From e89100c599034bf05af43ea7e35592e2df07bbed Mon Sep 17 00:00:00 2001 From: Hiroyuki Takizawa Date: Thu, 30 Dec 2021 10:39:16 +0900 Subject: [PATCH 01/90] format all header files --- .clang-format | 6 + include/CL/sycl.hpp | 3 +- include/neoSYCL/extensions/nec/ve_device.hpp | 29 +-- include/neoSYCL/extensions/nec/ve_info.hpp | 7 +- include/neoSYCL/extensions/nec/ve_kernel.hpp | 93 +++++---- include/neoSYCL/extensions/nec/ve_queue.hpp | 30 +-- .../neoSYCL/extensions/nec/ve_selector.hpp | 11 +- include/neoSYCL/extensions/nec/ve_task.hpp | 8 +- .../extensions/nec/ve_task_handler.hpp | 103 +++++----- include/neoSYCL/sycl.hpp | 3 +- include/neoSYCL/sycl/access.hpp | 11 +- include/neoSYCL/sycl/accessor.hpp | 176 ++++++++-------- include/neoSYCL/sycl/allocator.hpp | 5 +- include/neoSYCL/sycl/buffer.hpp | 189 ++++++++++-------- include/neoSYCL/sycl/context.hpp | 26 +-- .../sycl/detail/container/array_nd.hpp | 128 ++++++------ .../sycl/detail/container/data_container.hpp | 22 +- .../detail/container/data_container_nd.hpp | 179 ++++++++--------- include/neoSYCL/sycl/detail/debug.hpp | 10 +- include/neoSYCL/sycl/detail/device_info.hpp | 29 +-- include/neoSYCL/sycl/detail/device_type.hpp | 7 +- .../neoSYCL/sycl/detail/highlight_func.hpp | 12 +- include/neoSYCL/sycl/detail/kernel.hpp | 4 +- include/neoSYCL/sycl/detail/kernel_arg.hpp | 9 +- include/neoSYCL/sycl/detail/platform_info.hpp | 16 +- .../sycl/detail/registered_platforms.hpp | 16 +- include/neoSYCL/sycl/detail/task.hpp | 9 +- include/neoSYCL/sycl/detail/task_counter.hpp | 10 +- include/neoSYCL/sycl/detail/task_handler.hpp | 50 ++--- include/neoSYCL/sycl/device.hpp | 70 +++---- include/neoSYCL/sycl/device_selector.hpp | 9 +- .../sycl/device_selector/cpu_selector.hpp | 10 +- include/neoSYCL/sycl/event.hpp | 21 +- include/neoSYCL/sycl/exception.hpp | 12 +- include/neoSYCL/sycl/extensions.hpp | 8 +- include/neoSYCL/sycl/handler.hpp | 98 +++++---- include/neoSYCL/sycl/id.hpp | 26 +-- include/neoSYCL/sycl/info/context.hpp | 10 +- include/neoSYCL/sycl/info/device.hpp | 4 +- include/neoSYCL/sycl/info/device_type.hpp | 3 +- include/neoSYCL/sycl/info/param_traits.hpp | 15 +- include/neoSYCL/sycl/info/platform.hpp | 8 +- include/neoSYCL/sycl/info/program.hpp | 11 +- include/neoSYCL/sycl/info/queue.hpp | 12 +- include/neoSYCL/sycl/item.hpp | 65 +++--- include/neoSYCL/sycl/nd_range.hpp | 32 ++- include/neoSYCL/sycl/op_def.hpp | 66 +++--- include/neoSYCL/sycl/platform.hpp | 32 +-- include/neoSYCL/sycl/program.hpp | 39 ++-- include/neoSYCL/sycl/property_list.hpp | 11 +- include/neoSYCL/sycl/queue.hpp | 78 ++++---- include/neoSYCL/sycl/range.hpp | 31 +-- include/neoSYCL/sycl/types.hpp | 23 +-- 53 files changed, 889 insertions(+), 1006 deletions(-) create mode 100644 .clang-format diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..d71aab3 --- /dev/null +++ b/.clang-format @@ -0,0 +1,6 @@ +Language: Cpp +BasedOnStyle: LLVM +AlignConsecutiveAssignments: true +AlignTrailingComments: true +SortIncludes: false + diff --git a/include/CL/sycl.hpp b/include/CL/sycl.hpp index f91100f..85fd6f6 100644 --- a/include/CL/sycl.hpp +++ b/include/CL/sycl.hpp @@ -9,5 +9,4 @@ using namespace neosycl::sycl; } -#endif //CUSTOM_SYCL_INCLUDE_SYCL_SYCL_H_ - +#endif // CUSTOM_SYCL_INCLUDE_SYCL_SYCL_H_ diff --git a/include/neoSYCL/extensions/nec/ve_device.hpp b/include/neoSYCL/extensions/nec/ve_device.hpp index 3b2b1d2..9fe1a76 100644 --- a/include/neoSYCL/extensions/nec/ve_device.hpp +++ b/include/neoSYCL/extensions/nec/ve_device.hpp @@ -7,30 +7,19 @@ namespace neosycl::sycl { class ve_device : public device { - private: +private: int node_id; - public: +public: ve_device(int node_id) : node_id(node_id) {} - bool is_host() const override { - return false; - } - bool is_cpu() const override { - return false; - } - bool is_gpu() const override { - return false; - } - bool is_accelerator() const override { - return true; - } - int get_node_id() const { - return node_id; - } - + bool is_host() const override { return false; } + bool is_cpu() const override { return false; } + bool is_gpu() const override { return false; } + bool is_accelerator() const override { return true; } + int get_node_id() const { return node_id; } }; -} +} // namespace neosycl::sycl -#endif //SYCL_INCLUDE_CL_SYCL_NEC_VE_DEVICE_HPP_ +#endif // SYCL_INCLUDE_CL_SYCL_NEC_VE_DEVICE_HPP_ diff --git a/include/neoSYCL/extensions/nec/ve_info.hpp b/include/neoSYCL/extensions/nec/ve_info.hpp index d555ee5..e5ddd55 100644 --- a/include/neoSYCL/extensions/nec/ve_info.hpp +++ b/include/neoSYCL/extensions/nec/ve_info.hpp @@ -5,10 +5,9 @@ namespace neosycl::sycl::extensions { -const int DEFAULT_VE_NODE = 0; +const int DEFAULT_VE_NODE = 0; const string_class DEFAULT_VE_LIB = "./kernel.so"; - struct VEProc { struct veo_proc_handle *ve_proc; uint64_t handle; @@ -17,6 +16,6 @@ struct VEProc { struct VEContext { struct veo_thr_ctxt *ve_ctx; }; -} +} // namespace neosycl::sycl::extensions -#endif //NEOSYCL_INCLUDE_CL_SYCL_NEC_VE_INFO_HPP_ +#endif // NEOSYCL_INCLUDE_CL_SYCL_NEC_VE_INFO_HPP_ diff --git a/include/neoSYCL/extensions/nec/ve_kernel.hpp b/include/neoSYCL/extensions/nec/ve_kernel.hpp index f839a33..08f2afa 100644 --- a/include/neoSYCL/extensions/nec/ve_kernel.hpp +++ b/include/neoSYCL/extensions/nec/ve_kernel.hpp @@ -11,15 +11,16 @@ struct VEKernel : public Kernel { nec::VEContext ctx_create(nec::VEProc proc) { struct veo_thr_ctxt *ctx = veo_context_open(proc.ve_proc); - DEBUG_INFO("[VEContext] create ve context: {:#x}", (size_t) ctx); + DEBUG_INFO("[VEContext] create ve context: {:#x}", (size_t)ctx); return nec::VEContext{ctx}; } void free_ctx(nec::VEContext ctx) { - DEBUG_INFO("[VEContext] release ve ctx: {:#x}", (size_t) ctx.ve_ctx); + DEBUG_INFO("[VEContext] release ve ctx: {:#x}", (size_t)ctx.ve_ctx); int rt = veo_context_close(ctx.ve_ctx); if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEContext] release ve ctx: {:#x} failed, return code: {}", (size_t) ctx.ve_ctx, rt); + DEBUG_INFO("[VEContext] release ve ctx: {:#x} failed, return code: {}", + (size_t)ctx.ve_ctx, rt); PRINT_ERR("[VEContext] release ve ctx failed"); } } @@ -32,7 +33,8 @@ struct VEKernel : public Kernel { return argp; } - VEKernel(const vector_class &args, const string_class &kernel_name, const nec::VEProc &proc) + VEKernel(const vector_class &args, const string_class &kernel_name, + const nec::VEProc &proc) : Kernel(args, kernel_name), proc(proc) { ctx = ctx_create(proc); } @@ -47,32 +49,34 @@ struct VEKernel : public Kernel { vector_class ve_addr_list; for (int i = 0; i < args.size(); i++) { - KernelArg arg = args[i]; + KernelArg arg = args[i]; size_t size_in_byte = arg.container->get_size(); uint64_t ve_addr_int; int rt = veo_alloc_mem(proc.ve_proc, &ve_addr_int, size_in_byte); if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEProc] allocate VE memory size: {} failed, return code: {}", size_in_byte, rt); + DEBUG_INFO( + "[VEProc] allocate VE memory size: {} failed, return code: {}", + size_in_byte, rt); PRINT_ERR("[VEProc] allocate VE memory failed"); throw nec::VEException("VE allocate return error"); } ve_addr_list.push_back(ve_addr_int); DEBUG_INFO("[VEKernel] allocate ve memory, size: {}, ve address: {:#x}", - size_in_byte, - ve_addr_int - ); + size_in_byte, ve_addr_int); if (arg.mode != access::mode::write) { - DEBUG_INFO("[VEKernel] do copy to ve memory for arg, device address: {:#x}, size: {}, host address: {:#x}", - (size_t) ve_addr_int, - size_in_byte, - (size_t) arg.container->get_data_ptr() - ); - rt = veo_write_mem(proc.ve_proc, ve_addr_int, arg.container->get_data_ptr(), size_in_byte); + DEBUG_INFO("[VEKernel] do copy to ve memory for arg, device address: " + "{:#x}, size: {}, host address: {:#x}", + (size_t)ve_addr_int, size_in_byte, + (size_t)arg.container->get_data_ptr()); + rt = veo_write_mem(proc.ve_proc, ve_addr_int, + arg.container->get_data_ptr(), size_in_byte); if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEProc] copy to ve memory failed, size: {}, return code: {}", size_in_byte, rt); + DEBUG_INFO( + "[VEProc] copy to ve memory failed, size: {}, return code: {}", + size_in_byte, rt); PRINT_ERR("[VEProc] copy to ve memory failed"); throw nec::VEException("VE copy return error"); } @@ -84,26 +88,29 @@ struct VEKernel : public Kernel { void copy_out(vector_class ve_addr_list) { for (int i = 0; i < args.size(); i++) { - KernelArg arg = args[i]; + KernelArg arg = args[i]; size_t size_in_byte = arg.container->get_size(); uint64_t device_ptr = ve_addr_list[i]; if (arg.mode != access::mode::read) { - DEBUG_INFO("[VEKernel] copy from ve memory, device address: {:#x}, size: {}, host address: {:#x}", - (size_t) device_ptr, - size_in_byte, - (size_t) arg.container->get_data_ptr() - ); + DEBUG_INFO("[VEKernel] copy from ve memory, device address: {:#x}, " + "size: {}, host address: {:#x}", + (size_t)device_ptr, size_in_byte, + (size_t)arg.container->get_data_ptr()); // do copy - int rt = veo_read_mem(proc.ve_proc, arg.container->get_data_ptr(), device_ptr, size_in_byte); + int rt = veo_read_mem(proc.ve_proc, arg.container->get_data_ptr(), + device_ptr, size_in_byte); if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEProc] copy from ve memory failed, size: {}, return code: {}", size_in_byte, rt); + DEBUG_INFO( + "[VEProc] copy from ve memory failed, size: {}, return code: {}", + size_in_byte, rt); PRINT_ERR("[VEProc] copy from ve memory failed"); throw nec::VEException("VE copy return error"); } } int rt = veo_free_mem(proc.ve_proc, device_ptr); if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEProc] free ve memory failed, size: {}, return code: {}", size_in_byte, rt); + DEBUG_INFO("[VEProc] free ve memory failed, size: {}, return code: {}", + size_in_byte, rt); PRINT_ERR("[VEProc] free ve memory failed"); throw nec::VEException("VE free memory return error"); } @@ -114,47 +121,52 @@ struct VEKernel : public Kernel { DEBUG_INFO("[VEKernel] single task: {}", kernel_name); veo_args *argp = create_ve_args(); - DEBUG_INFO("[VEKernel] create ve args: {:#x}", (size_t) argp); + DEBUG_INFO("[VEKernel] create ve args: {:#x}", (size_t)argp); try { vector_class ve_addr_list = copy_in(argp); DEBUG_INFO("[VEKernel] invoke ve func: {}", kernel_name); - uint64_t id = veo_call_async_by_name(ctx.ve_ctx, proc.handle, kernel_name.c_str(), argp); + uint64_t id = veo_call_async_by_name(ctx.ve_ctx, proc.handle, + kernel_name.c_str(), argp); uint64_t ret_val; veo_call_wait_result(ctx.ve_ctx, id, &ret_val); - DEBUG_INFO("[VEKernel] ve func finished, id: {}, ret val: {}", id, ret_val); + DEBUG_INFO("[VEKernel] ve func finished, id: {}, ret val: {}", id, + ret_val); copy_out(ve_addr_list); } catch (nec::VEException &e) { - std::cerr << "[VEKernel] kernel invoke failed, error message: " << e.what() << std::endl; + std::cerr << "[VEKernel] kernel invoke failed, error message: " + << e.what() << std::endl; } veo_args_free(argp); - } void parallel_for(const range<1> &r) override { - DEBUG_INFO("[VEKernel] parallel for 1d {} with range: {}", kernel_name, r.size()); + DEBUG_INFO("[VEKernel] parallel for 1d {} with range: {}", kernel_name, + r.size()); veo_args *argp = create_ve_args(); - DEBUG_INFO("[VEKernel] create ve args: {:#x}", (size_t) argp); + DEBUG_INFO("[VEKernel] create ve args: {:#x}", (size_t)argp); try { vector_class ve_addr_list = copy_in(argp); DEBUG_INFO("[VEKernel] invoke ve func: {}", kernel_name); set_arg_for_range(argp, r); - uint64_t id = veo_call_async_by_name(ctx.ve_ctx, proc.handle, kernel_name.c_str(), argp); + uint64_t id = veo_call_async_by_name(ctx.ve_ctx, proc.handle, + kernel_name.c_str(), argp); uint64_t ret_val; veo_call_wait_result(ctx.ve_ctx, id, &ret_val); - DEBUG_INFO("[VEKernel] ve func finished, id: {}, ret val: {}", id, ret_val); + DEBUG_INFO("[VEKernel] ve func finished, id: {}, ret val: {}", id, + ret_val); copy_out(ve_addr_list); } catch (nec::VEException &e) { - std::cerr << "[VEKernel] kernel invoke failed, error message: " << e.what() << std::endl; + std::cerr << "[VEKernel] kernel invoke failed, error message: " + << e.what() << std::endl; } veo_args_free(argp); - } void parallel_for(const range<2> &r) override { DEBUG_INFO("[VEKernel] parallel_for 2d"); @@ -163,12 +175,9 @@ struct VEKernel : public Kernel { DEBUG_INFO("[VEKernel] parallel_for 3d"); } - virtual ~VEKernel() { - free_ctx(ctx); - } - + virtual ~VEKernel() { free_ctx(ctx); } }; -} +} // namespace neosycl::sycl::detail -#endif //SYCL_INCLUDE_CL_SYCL_NEC_VE_KERNEL_HPP_ +#endif // SYCL_INCLUDE_CL_SYCL_NEC_VE_KERNEL_HPP_ diff --git a/include/neoSYCL/extensions/nec/ve_queue.hpp b/include/neoSYCL/extensions/nec/ve_queue.hpp index d1b04f9..0de9e6e 100644 --- a/include/neoSYCL/extensions/nec/ve_queue.hpp +++ b/include/neoSYCL/extensions/nec/ve_queue.hpp @@ -7,49 +7,51 @@ namespace neosycl::sycl { class ve_queue : public queue { - private: +private: ve_device dev; nec::VEProc proc; nec::VEProc proc_create(const string_class &lib_path, int ve_node) { struct veo_proc_handle *ve_proc = veo_proc_create(ve_node); if (!ve_proc) { - throw nec::VEException("[VEProc] create ve proc on node: " + std::to_string(ve_node) + " failed.."); + throw nec::VEException("[VEProc] create ve proc on node: " + + std::to_string(ve_node) + " failed.."); } uint64_t handle = veo_load_library(ve_proc, lib_path.c_str()); - DEBUG_INFO("[VEProc] create ve proc: {:#x} and load lib: {} on node: {}", (size_t) ve_proc, lib_path, ve_node); + DEBUG_INFO("[VEProc] create ve proc: {:#x} and load lib: {} on node: {}", + (size_t)ve_proc, lib_path, ve_node); return nec::VEProc{ve_proc, handle}; } void free_proc(nec::VEProc proc) { - DEBUG_INFO("[VEProc] release ve proc: {:#x}", (size_t) proc.ve_proc); + DEBUG_INFO("[VEProc] release ve proc: {:#x}", (size_t)proc.ve_proc); int rt = veo_proc_destroy(proc.ve_proc); if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEProc] release ve proc: {:#x} failed, return code: {}", (size_t) proc.ve_proc, rt); + DEBUG_INFO("[VEProc] release ve proc: {:#x} failed, return code: {}", + (size_t)proc.ve_proc, rt); PRINT_ERR("[VEProc] release ve proc failed"); } } - public: - ve_queue(const string_class &path = nec::DEFAULT_VE_LIB) : dev(nec::DEFAULT_VE_NODE), queue() { +public: + ve_queue(const string_class &path = nec::DEFAULT_VE_LIB) + : dev(nec::DEFAULT_VE_NODE), queue() { proc = proc_create(path, nec::DEFAULT_VE_NODE); } - ve_queue(const ve_device &dev, const string_class &path = nec::DEFAULT_VE_LIB) : dev(dev), queue() { + ve_queue(const ve_device &dev, const string_class &path = nec::DEFAULT_VE_LIB) + : dev(dev), queue() { proc = proc_create(path, dev.get_node_id()); } - detail::Task *build_task() override { - return new detail::VETask(proc); - } + detail::Task *build_task() override { return new detail::VETask(proc); } virtual ~ve_queue() { wait(); free_proc(proc); } - }; -} +} // namespace neosycl::sycl -#endif //SYCL_INCLUDE_CL_SYCL_NEC_VE_QUEUE_HPP_ +#endif // SYCL_INCLUDE_CL_SYCL_NEC_VE_QUEUE_HPP_ diff --git a/include/neoSYCL/extensions/nec/ve_selector.hpp b/include/neoSYCL/extensions/nec/ve_selector.hpp index d0b5068..15b8bea 100644 --- a/include/neoSYCL/extensions/nec/ve_selector.hpp +++ b/include/neoSYCL/extensions/nec/ve_selector.hpp @@ -5,7 +5,7 @@ namespace neosycl::sycl { class ve_selector : public device_selector { - public: +public: int operator()(const device &dev) const override { if (dev.is_accelerator()) { return 1; @@ -13,12 +13,9 @@ class ve_selector : public device_selector { return -1; } - device select_device() const override { - return ve_device(); - } - + device select_device() const override { return ve_device(); } }; -} +} // namespace neosycl::sycl -#endif //SYCL_INCLUDE_CL_SYCL_NEC_VE_SELECTOR_HPP_ +#endif // SYCL_INCLUDE_CL_SYCL_NEC_VE_SELECTOR_HPP_ diff --git a/include/neoSYCL/extensions/nec/ve_task.hpp b/include/neoSYCL/extensions/nec/ve_task.hpp index 499daae..c3698ae 100644 --- a/include/neoSYCL/extensions/nec/ve_task.hpp +++ b/include/neoSYCL/extensions/nec/ve_task.hpp @@ -10,15 +10,13 @@ struct VETask : public Task { VETask(const nec::VEProc &proc) : proc(proc) {} - bool is_cpu() override { - return false; - } + bool is_cpu() override { return false; } std::shared_ptr get_kernel(string_class name) override { return std::shared_ptr(new VEKernel(args, name, proc)); } }; -} +} // namespace neosycl::sycl::detail -#endif //SYCL_INCLUDE_CL_SYCL_NEC_VE_TASK_HPP_ +#endif // SYCL_INCLUDE_CL_SYCL_NEC_VE_TASK_HPP_ diff --git a/include/neoSYCL/extensions/nec/ve_task_handler.hpp b/include/neoSYCL/extensions/nec/ve_task_handler.hpp index 5cb49b3..b1706ba 100644 --- a/include/neoSYCL/extensions/nec/ve_task_handler.hpp +++ b/include/neoSYCL/extensions/nec/ve_task_handler.hpp @@ -10,22 +10,20 @@ namespace neosycl::sycl::extensions { struct task_handler_ve : public detail::task_handler { public: - - task_handler_ve(const VEProc &proc) : proc(proc) { - ctx = ctx_create(proc); - } + task_handler_ve(const VEProc &proc) : proc(proc) { ctx = ctx_create(proc); } VEContext ctx_create(VEProc proc) { struct veo_thr_ctxt *ctx = veo_context_open(proc.ve_proc); - DEBUG_INFO("[VEContext] create ve context: {:#x}", (size_t) ctx); + DEBUG_INFO("[VEContext] create ve context: {:#x}", (size_t)ctx); return VEContext{ctx}; } void free_ctx(VEContext ctx) { - DEBUG_INFO("[VEContext] release ve ctx: {:#x}", (size_t) ctx.ve_ctx); + DEBUG_INFO("[VEContext] release ve ctx: {:#x}", (size_t)ctx.ve_ctx); int rt = veo_context_close(ctx.ve_ctx); if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEContext] release ve ctx: {:#x} failed, return code: {}", (size_t) ctx.ve_ctx, rt); + DEBUG_INFO("[VEContext] release ve ctx: {:#x} failed, return code: {}", + (size_t)ctx.ve_ctx, rt); PRINT_ERR("[VEContext] release ve ctx failed"); } } @@ -38,36 +36,40 @@ struct task_handler_ve : public detail::task_handler { return argp; } - vector_class copy_in(struct veo_args *argp, shared_ptr_class k, VEProc proc) { + vector_class copy_in(struct veo_args *argp, + shared_ptr_class k, + VEProc proc) { vector_class ve_addr_list; for (int i = 0; i < k->args.size(); i++) { detail::KernelArg arg = k->args[i]; - size_t size_in_byte = arg.container->get_size(); + size_t size_in_byte = arg.container->get_size(); uint64_t ve_addr_int; int rt = veo_alloc_mem(proc.ve_proc, &ve_addr_int, size_in_byte); if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEProc] allocate VE memory size: {} failed, return code: {}", size_in_byte, rt); + DEBUG_INFO( + "[VEProc] allocate VE memory size: {} failed, return code: {}", + size_in_byte, rt); PRINT_ERR("[VEProc] allocate VE memory failed"); throw exception("VE allocate return error"); } ve_addr_list.push_back(ve_addr_int); DEBUG_INFO("[VEKernel] allocate ve memory, size: {}, ve address: {:#x}", - size_in_byte, - ve_addr_int - ); + size_in_byte, ve_addr_int); if (arg.mode != access::mode::write) { - DEBUG_INFO("[VEKernel] do copy to ve memory for arg, device address: {:#x}, size: {}, host address: {:#x}", - (size_t) ve_addr_int, - size_in_byte, - (size_t) arg.container->get_raw_ptr() - ); - rt = veo_write_mem(proc.ve_proc, ve_addr_int, arg.container->get_raw_ptr(), size_in_byte); + DEBUG_INFO("[VEKernel] do copy to ve memory for arg, device address: " + "{:#x}, size: {}, host address: {:#x}", + (size_t)ve_addr_int, size_in_byte, + (size_t)arg.container->get_raw_ptr()); + rt = veo_write_mem(proc.ve_proc, ve_addr_int, + arg.container->get_raw_ptr(), size_in_byte); if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEProc] copy to ve memory failed, size: {}, return code: {}", size_in_byte, rt); + DEBUG_INFO( + "[VEProc] copy to ve memory failed, size: {}, return code: {}", + size_in_byte, rt); PRINT_ERR("[VEProc] copy to ve memory failed"); throw exception("VE copy return error"); } @@ -77,36 +79,41 @@ struct task_handler_ve : public detail::task_handler { return ve_addr_list; } - void copy_out(vector_class ve_addr_list, shared_ptr_class k, VEProc proc) { + void copy_out(vector_class ve_addr_list, + shared_ptr_class k, VEProc proc) { for (int i = 0; i < k->args.size(); i++) { detail::KernelArg arg = k->args[i]; - size_t size_in_byte = arg.container->get_size(); - uint64_t device_ptr = ve_addr_list[i]; + size_t size_in_byte = arg.container->get_size(); + uint64_t device_ptr = ve_addr_list[i]; if (arg.mode != access::mode::read) { - DEBUG_INFO("[VEKernel] copy from ve memory, device address: {:#x}, size: {}, host address: {:#x}", - (size_t) device_ptr, - size_in_byte, - (size_t) arg.container->get_raw_ptr() - ); + DEBUG_INFO("[VEKernel] copy from ve memory, device address: {:#x}, " + "size: {}, host address: {:#x}", + (size_t)device_ptr, size_in_byte, + (size_t)arg.container->get_raw_ptr()); // do copy - int rt = veo_read_mem(proc.ve_proc, arg.container->get_raw_ptr(), device_ptr, size_in_byte); + int rt = veo_read_mem(proc.ve_proc, arg.container->get_raw_ptr(), + device_ptr, size_in_byte); if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEProc] copy from ve memory failed, size: {}, return code: {}", size_in_byte, rt); + DEBUG_INFO( + "[VEProc] copy from ve memory failed, size: {}, return code: {}", + size_in_byte, rt); PRINT_ERR("[VEProc] copy from ve memory failed"); throw exception("VE copy return error"); } } int rt = veo_free_mem(proc.ve_proc, device_ptr); if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEProc] free ve memory failed, size: {}, return code: {}", size_in_byte, rt); + DEBUG_INFO("[VEProc] free ve memory failed, size: {}, return code: {}", + size_in_byte, rt); PRINT_ERR("[VEProc] free ve memory failed"); throw exception("VE free memory return error"); } } } - void single_task(shared_ptr_class k, const std::function &func) override { - for (const detail::KernelArg &arg:k->args) { + void single_task(shared_ptr_class k, + const std::function &func) override { + for (const detail::KernelArg &arg : k->args) { arg.acquire_access(); } DEBUG_INFO("execute single %d kernel, name: %s\n", type(), k->name.c_str()); @@ -114,58 +121,56 @@ struct task_handler_ve : public detail::task_handler { DEBUG_INFO("[VEKernel] single task: {}", k->name.c_str()); veo_args *argp = create_ve_args(); - DEBUG_INFO("[VEKernel] create ve args: {:#x}", (size_t) argp); + DEBUG_INFO("[VEKernel] create ve args: {:#x}", (size_t)argp); try { vector_class ve_addr_list = copy_in(argp, k, proc); DEBUG_INFO("[VEKernel] invoke ve func: {}", k->name.c_str()); - uint64_t id = veo_call_async_by_name(ctx.ve_ctx, proc.handle, k->name.c_str(), argp); + uint64_t id = veo_call_async_by_name(ctx.ve_ctx, proc.handle, + k->name.c_str(), argp); uint64_t ret_val; veo_call_wait_result(ctx.ve_ctx, id, &ret_val); - DEBUG_INFO("[VEKernel] ve func finished, id: {}, ret val: {}", id, ret_val); + DEBUG_INFO("[VEKernel] ve func finished, id: {}, ret val: {}", id, + ret_val); copy_out(ve_addr_list, k, proc); } catch (exception &e) { - std::cerr << "[VEKernel] kernel invoke failed, error message: " << e.what() << std::endl; + std::cerr << "[VEKernel] kernel invoke failed, error message: " + << e.what() << std::endl; } veo_args_free(argp); - for (const detail::KernelArg &arg:k->args) { + for (const detail::KernelArg &arg : k->args) { arg.release_access(); } } - void parallel_for_1d(shared_ptr_class k, - range<1> r, + void parallel_for_1d(shared_ptr_class k, range<1> r, const std::function)> &func, id<1> offset) override { throw exception("not implemented"); }; - void parallel_for_2d(shared_ptr_class k, - range<2> r, + void parallel_for_2d(shared_ptr_class k, range<2> r, const std::function)> &func, id<2> offset) override { throw exception("not implemented"); }; - void parallel_for_3d(shared_ptr_class k, - range<3> r, + void parallel_for_3d(shared_ptr_class k, range<3> r, const std::function)> &func, id<3> offset) override { throw exception("not implemented"); }; - detail::SUPPORT_PLATFORM_TYPE type() override { - return detail::SX_AURORA; - } + detail::SUPPORT_PLATFORM_TYPE type() override { return detail::SX_AURORA; } private: VEContext ctx; VEProc proc; }; -} -#endif //NEOSYCL_INCLUDE_NEOSYCL_EXTENSIONS_NEC_VE_TASK_HANDLER_HPP +} // namespace neosycl::sycl::extensions +#endif // NEOSYCL_INCLUDE_NEOSYCL_EXTENSIONS_NEC_VE_TASK_HANDLER_HPP diff --git a/include/neoSYCL/sycl.hpp b/include/neoSYCL/sycl.hpp index 47a0448..7c8c444 100644 --- a/include/neoSYCL/sycl.hpp +++ b/include/neoSYCL/sycl.hpp @@ -15,7 +15,6 @@ #include "sycl/device_selector.hpp" #include "sycl/device_selector/cpu_selector.hpp" - // include platform headers #include "sycl/platform.hpp" @@ -33,4 +32,4 @@ using namespace neosycl::sycl; } -#endif //NEOSYCL_INCLUDE_NEOSYCL_SYCL_HPP_ +#endif // NEOSYCL_INCLUDE_NEOSYCL_SYCL_HPP_ diff --git a/include/neoSYCL/sycl/access.hpp b/include/neoSYCL/sycl/access.hpp index 6a2d6fd..7058266 100644 --- a/include/neoSYCL/sycl/access.hpp +++ b/include/neoSYCL/sycl/access.hpp @@ -24,10 +24,7 @@ enum class mode { atomic }; -enum placeholder { - false_t, - true_t -}; +enum placeholder { false_t, true_t }; enum class address_space : int { global_space, @@ -36,8 +33,8 @@ enum class address_space : int { private_space }; -} +} // namespace access -} +} // namespace neosycl::sycl -#endif //CUSTOM_SYCL_INCLUDE_SYCL_ACCESS_HPP_ +#endif // CUSTOM_SYCL_INCLUDE_SYCL_ACCESS_HPP_ diff --git a/include/neoSYCL/sycl/accessor.hpp b/include/neoSYCL/sycl/accessor.hpp index 249fe01..b8d004b 100644 --- a/include/neoSYCL/sycl/accessor.hpp +++ b/include/neoSYCL/sycl/accessor.hpp @@ -9,149 +9,151 @@ namespace neosycl::sycl { -template -class buffer; +template class buffer; -template +template class accessor { public: - template - accessor(buffer &bufferRef, const property_list &propList = {}): - data(bufferRef.data), accessRange(bufferRef.get_range()) {} + template + accessor(buffer &bufferRef, + const property_list &propList = {}) + : data(bufferRef.data), accessRange(bufferRef.get_range()) {} - template + template accessor(buffer &bufferRef, - range accessRange, - const property_list &propList = {}): - data(bufferRef.data), accessRange(accessRange) {} + range accessRange, const property_list &propList = {}) + : data(bufferRef.data), accessRange(accessRange) {} - template + template accessor(buffer &bufferRef, - range accessRange, - id accessOffset, - const property_list &propList = {} - ):data(bufferRef.data), accessRange(accessRange), accessOffset(accessOffset) {} + range accessRange, id accessOffset, + const property_list &propList = {}) + : data(bufferRef.data), accessRange(accessRange), + accessOffset(accessOffset) {} - template + template accessor(buffer &bufferRef, - handler &commandGroupHandlerRef, - range accessRange, - const property_list &propList = {} - ):data(bufferRef.data), accessRange(accessRange), accessOffset(0) {} + handler &commandGroupHandlerRef, range accessRange, + const property_list &propList = {}) + : data(bufferRef.data), accessRange(accessRange), accessOffset(0) {} - template + template accessor(buffer &bufferRef, - handler &commandGroupHandlerRef, - range accessRange, - id accessOffset, - const property_list &propList = {} - ):data(bufferRef.data), accessRange(accessRange), accessOffset(accessOffset) {} - - constexpr bool is_placeholder() const { - return isPlaceholder; - } + handler &commandGroupHandlerRef, range accessRange, + id accessOffset, const property_list &propList = {}) + : data(bufferRef.data), accessRange(accessRange), + accessOffset(accessOffset) {} - size_t get_size() const { - return data.get_size(); - } + constexpr bool is_placeholder() const { return isPlaceholder; } - size_t get_count() const { - return data.use_count(); - } + size_t get_size() const { return data.get_size(); } - range get_range() const { - return accessRange; - } + size_t get_count() const { return data.use_count(); } - id get_offset() const { - return accessOffset; - } + range get_range() const { return accessRange; } + + id get_offset() const { return accessOffset; } - /* Available only when: (accessMode == access::mode::read_write || accessMode == access::mode::discard_read_write) && dimensions == 0) */ - template> + /* Available only when: (accessMode == access::mode::read_write || accessMode + * == access::mode::discard_read_write) && dimensions == 0) */ + template > operator dataT &() const; - /* Available only when: (accessMode == access::mode::write || accessMode == access::mode::read_write || accessMode == access::mode::discard_write || accessMode == access::mode::discard_read_write) && dimensions > 0) */ - template 0)>> + /* Available only when: (accessMode == access::mode::write || accessMode == + * access::mode::read_write || accessMode == access::mode::discard_write || + * accessMode == access::mode::discard_read_write) && dimensions > 0) */ + template 0)>> dataT &operator[](id index) const { size_t index_val = id2index(index); DEBUG_INFO("[Accessor] access with index: %d", index_val); return (*data).get(index_val); } - template 0)>> + template < + access::mode Mode = accessMode, int D = dimensions, + typename = std::enable_if_t<(Mode == access::mode::read) && (D > 0)>> dataT operator[](id index) const { size_t index_val = id2index(index); DEBUG_INFO("[Accessor] read access with index: %d", index_val); return (*data).get(index_val); } - /* Available only when: (accessMode == access::mode::write || accessMode == access::mode::read_write || accessMode == access::mode::discard_write || accessMode == access::mode::discard_read_write) && dimensions == 1) */ - template> + /* Available only when: (accessMode == access::mode::write || accessMode == + * access::mode::read_write || accessMode == access::mode::discard_write || + * accessMode == access::mode::discard_read_write) && dimensions == 1) */ + template > dataT &operator[](size_t index) const { return (*data)[index]; } - template> + template < + access::mode Mode = accessMode, int D = dimensions, + typename = std::enable_if_t<(Mode == access::mode::read) && (D == 1)>> dataT operator[](size_t index) const { return (*data)[index]; } /* Available only when: dimensions > 1 */ - template> + template > dataT *operator[](size_t index) const { return (*data)[index]; } - template> + template < + access::mode Mode = accessMode, int D = dimensions, + typename = std::enable_if_t<(Mode == access::mode::read) && (D == 2)>> const dataT *operator[](size_t index) const { return (*data)[index]; } /* Available only when: dimensions > 1 */ - template> + template > detail::container::AccessProxyND operator[](size_t index) const { return (*data)[index]; } - template> + template < + access::mode Mode = accessMode, int D = dimensions, + typename = std::enable_if_t<(Mode == access::mode::read) && (D == 3)>> const dataT **operator[](size_t index) const { return (*data)[index]; } /* Available only when: accessMode == access::mode::read && dimensions == 0 */ - template> + template < + access::mode Mode = accessMode, int D = dimensions, + typename = std::enable_if_t<(Mode == access::mode::read) && (D == 0)>> operator dataT() const; ~accessor() = default; @@ -173,6 +175,6 @@ class accessor { } }; -} +} // namespace neosycl::sycl -#endif //NEOSYCL_INCLUDE_NEOSYCL_SYCL_ACCESSOR_HPP +#endif // NEOSYCL_INCLUDE_NEOSYCL_SYCL_ACCESSOR_HPP diff --git a/include/neoSYCL/sycl/allocator.hpp b/include/neoSYCL/sycl/allocator.hpp index ebd3c0e..b5a5ebc 100644 --- a/include/neoSYCL/sycl/allocator.hpp +++ b/include/neoSYCL/sycl/allocator.hpp @@ -3,9 +3,8 @@ namespace neosycl::sycl { -template -using buffer_allocator = std::allocator; +template using buffer_allocator = std::allocator; } -#endif //SYCL_INCLUDE_CL_SYCL_ALLOCATOR_HPP_ +#endif // SYCL_INCLUDE_CL_SYCL_ALLOCATOR_HPP_ diff --git a/include/neoSYCL/sycl/buffer.hpp b/include/neoSYCL/sycl/buffer.hpp index 91ba8d8..aa56e22 100644 --- a/include/neoSYCL/sycl/buffer.hpp +++ b/include/neoSYCL/sycl/buffer.hpp @@ -33,141 +33,160 @@ class context_bound { } // namespace buffer } // namespace property - -template> +template > class buffer { - friend accessor; - friend accessor; - friend accessor; - friend accessor; - friend accessor; - friend accessor; + friend accessor; + friend accessor; + friend accessor; + friend accessor; + friend accessor; + friend accessor; public: - using value_type = T; - using reference = value_type &; + using value_type = T; + using reference = value_type &; using const_reference = const value_type &; - using allocator_type = AllocatorT; + using allocator_type = AllocatorT; - buffer(const range &bufferRange, const property_list &propList = {}) : - buffer(bufferRange, allocator_type(), propList) {} + buffer(const range &bufferRange, + const property_list &propList = {}) + : buffer(bufferRange, allocator_type(), propList) {} - buffer(const range &bufferRange, AllocatorT allocator, const property_list &propList = {}) + buffer(const range &bufferRange, AllocatorT allocator, + const property_list &propList = {}) : bufferRange(bufferRange), - data(new detail::container::DataContainerND(bufferRange.data, allocator)) {} + data(new detail::container::DataContainerND( + bufferRange.data, allocator)) {} - buffer(T *hostData, const range &bufferRange, const property_list &propList = {}) : - buffer(hostData, bufferRange, allocator_type(), propList) {} + buffer(T *hostData, const range &bufferRange, + const property_list &propList = {}) + : buffer(hostData, bufferRange, allocator_type(), propList) {} - buffer(T *hostData, const range &bufferRange, AllocatorT allocator, const property_list &propList = {}) : - bufferRange(bufferRange), - data(new detail::container::DataContainerND(hostData, bufferRange.data, allocator)) {} + buffer(T *hostData, const range &bufferRange, + AllocatorT allocator, const property_list &propList = {}) + : bufferRange(bufferRange), + data(new detail::container::DataContainerND( + hostData, bufferRange.data, allocator)) {} - buffer(const T *hostData, const range &bufferRange, const property_list &propList = {}) : - buffer(hostData, bufferRange, allocator_type(), propList) {} + buffer(const T *hostData, const range &bufferRange, + const property_list &propList = {}) + : buffer(hostData, bufferRange, allocator_type(), propList) {} - buffer(const T *hostData, - const range &bufferRange, - AllocatorT allocator, - const property_list &propList = {}) : - bufferRange(bufferRange), - data(new detail::container::DataContainerND(hostData, bufferRange.data)) {} + buffer(const T *hostData, const range &bufferRange, + AllocatorT allocator, const property_list &propList = {}) + : bufferRange(bufferRange), + data(new detail::container::DataContainerND( + hostData, bufferRange.data)) {} buffer(const shared_ptr_class &hostData, - const range &bufferRange, AllocatorT allocator, const property_list &propList = {}) : - bufferRange(bufferRange), - data(new detail::container::DataContainerND(hostData, bufferRange.data, allocator)) {} + const range &bufferRange, AllocatorT allocator, + const property_list &propList = {}) + : bufferRange(bufferRange), + data(new detail::container::DataContainerND( + hostData, bufferRange.data, allocator)) {} - buffer(const shared_ptr_class &hostData, const range &bufferRange, const property_list &propList = {}) + buffer(const shared_ptr_class &hostData, + const range &bufferRange, + const property_list &propList = {}) : bufferRange(bufferRange), - data(new detail::container::DataContainerND(hostData.get(), bufferRange.data)) {} - - template> - buffer(InputIterator first, - InputIterator last, - AllocatorT allocator, - const property_list &propList = {}) : - bufferRange((last - first) / sizeof(T)), - data(new detail::container::DataContainerND - (first, detail::container::ArrayND<1>((last - first) / sizeof(T)), allocator)) {} - - template> - buffer(InputIterator first, InputIterator last, const property_list &propList = {}) : - bufferRange((last - first) / sizeof(T)), - data(new detail::container::DataContainerND - (first, detail::container::ArrayND<1>(last - first) / sizeof(T))) {} + data(new detail::container::DataContainerND( + hostData.get(), bufferRange.data)) {} + + template > + buffer(InputIterator first, InputIterator last, AllocatorT allocator, + const property_list &propList = {}) + : bufferRange((last - first) / sizeof(T)), + data(new detail::container::DataContainerND( + first, detail::container::ArrayND<1>((last - first) / sizeof(T)), + allocator)) {} + + template > + buffer(InputIterator first, InputIterator last, + const property_list &propList = {}) + : bufferRange((last - first) / sizeof(T)), + data(new detail::container::DataContainerND( + first, detail::container::ArrayND<1>(last - first) / sizeof(T))) {} buffer(buffer b, const id &baseIndex, const range &subRange); /* Available only when: dimensions == 1. */ -// buffer(cl_mem clMemObject, const context &syclContext, event availableEvent = {}); + // buffer(cl_mem clMemObject, const context &syclContext, event + // availableEvent = {}); -/* -- common interface members -- */ -/* -- property interface members -- */ - range get_range() const { - return bufferRange; - } + /* -- common interface members -- */ + /* -- property interface members -- */ + range get_range() const { return bufferRange; } - size_t get_count() const { - return bufferRange.size(); - } + size_t get_count() const { return bufferRange.size(); } - size_t get_size() const { - return get_count() * sizeof(T); - } + size_t get_size() const { return get_count() * sizeof(T); } - AllocatorT get_allocator() const { - return AllocatorT(); - } + AllocatorT get_allocator() const { return AllocatorT(); } - template - accessor get_access(handler &commandGroupHandler) { - commandGroupHandler.get_kernel()->args.push_back(detail::KernelArg(data, mode)); + template + accessor + get_access(handler &commandGroupHandler) { + commandGroupHandler.get_kernel()->args.push_back( + detail::KernelArg(data, mode)); return accessor(*this); } - template + template accessor get_access() { return accessor(*this); } - template - accessor get_access( - handler &commandGroupHandler, range accessRange, id accessOffset = {}) { - commandGroupHandler.get_kernel()->args.push_back(detail::KernelArg(data, mode)); - return accessor - (*this, commandGroupHandler, accessRange, accessOffset); + template + accessor + get_access(handler &commandGroupHandler, range accessRange, + id accessOffset = {}) { + commandGroupHandler.get_kernel()->args.push_back( + detail::KernelArg(data, mode)); + return accessor(*this, commandGroupHandler, + accessRange, accessOffset); } - template - accessor get_access( - range accessRange, id accessOffset = {}) { - return accessor - (*this, accessRange, accessOffset); + template + accessor + get_access(range accessRange, id accessOffset = {}) { + return accessor( + *this, accessRange, accessOffset); } - template + template void set_final_data(Destination finalData = nullptr); void set_write_back(bool flag = true); bool is_sub_buffer() const; - template - buffer reinterpret(range reinterpretRange) const; + template + buffer + reinterpret(range reinterpretRange) const; buffer(const buffer &rhs) : data(rhs.data), bufferRange(rhs.bufferRange) {} buffer(buffer &&rhs) : data(rhs.data), bufferRange(rhs.bufferRange) {} buffer &operator=(const buffer &rhs) { - data = rhs.data; + data = rhs.data; bufferRange = rhs.bufferRange; } buffer &operator=(buffer &&rhs) { - data = rhs.data; + data = rhs.data; bufferRange = rhs.bufferRange; } @@ -177,6 +196,6 @@ class buffer { std::shared_ptr> data; range bufferRange; }; -} +} // namespace neosycl::sycl -#endif //CUSTOM_SYCL_INCLUDE_SYCL_BUFFER_HPP_ +#endif // CUSTOM_SYCL_INCLUDE_SYCL_BUFFER_HPP_ diff --git a/include/neoSYCL/sycl/context.hpp b/include/neoSYCL/sycl/context.hpp index 6b2060a..5a39cf0 100644 --- a/include/neoSYCL/sycl/context.hpp +++ b/include/neoSYCL/sycl/context.hpp @@ -12,29 +12,31 @@ class context { public: explicit context(const property_list &propList = {}); - context(async_handler asyncHandler, - const property_list &propList = {}); + context(async_handler asyncHandler, const property_list &propList = {}); context(const device &dev, const property_list &propList = {}); - context(const device &dev, async_handler asyncHandler, const property_list &propList = {}); + context(const device &dev, async_handler asyncHandler, + const property_list &propList = {}); context(const platform &plt, const property_list &propList = {}); - context(const platform &plt, async_handler asyncHandler, const property_list &propList = {}); - - context(const vector_class &deviceList, const property_list &propList = {}); + context(const platform &plt, async_handler asyncHandler, + const property_list &propList = {}); context(const vector_class &deviceList, - async_handler asyncHandler, const property_list &propList = {}); + const property_list &propList = {}); -// context(cl_context clContext, async_handler asyncHandler = {}); + context(const vector_class &deviceList, async_handler asyncHandler, + const property_list &propList = {}); - template - typename info::param_traits::return_type get_info() const; + // context(cl_context clContext, async_handler asyncHandler = {}); + template + typename info::param_traits::return_type + get_info() const; }; -} +} // namespace neosycl::sycl -#endif //CUSTOM_SYCL_INCLUDE_SYCL_CONTEXT_HPP_ +#endif // CUSTOM_SYCL_INCLUDE_SYCL_CONTEXT_HPP_ diff --git a/include/neoSYCL/sycl/detail/container/array_nd.hpp b/include/neoSYCL/sycl/detail/container/array_nd.hpp index 30e598c..ea47511 100644 --- a/include/neoSYCL/sycl/detail/container/array_nd.hpp +++ b/include/neoSYCL/sycl/detail/container/array_nd.hpp @@ -1,89 +1,91 @@ #ifndef NEOSYCL_INCLUDE_NEOSYCL_SYCL_BUFFER_ARRAY_ND_HPP_ #define NEOSYCL_INCLUDE_NEOSYCL_SYCL_BUFFER_ARRAY_ND_HPP_ -#define DEFINE_ARRAY_ND_OP_CONST(op) \ - friend ArrayND operator op(const ArrayND &lhs, const ArrayND &rhs) { \ - ArrayND ret; \ - for(std::size_t i = 0; i < dimensions; ++i) { \ - ret[i] = (size_t)(lhs[i] op rhs[i]); \ - } \ - return ret; \ +#define DEFINE_ARRAY_ND_OP_CONST(op) \ + friend ArrayND operator op(const ArrayND &lhs, \ + const ArrayND &rhs) { \ + ArrayND ret; \ + for (std::size_t i = 0; i < dimensions; ++i) { \ + ret[i] = (size_t)(lhs[i] op rhs[i]); \ + } \ + return ret; \ }; -#define DEFINE_ARRAY_ND_OP_CONST_SIZE_T(op) \ - friend ArrayND operator op(const ArrayND &lhs, const size_t &rhs) { \ - ArrayND ret; \ - for(std::size_t i = 0; i < dimensions; ++i) { \ - ret[i] = (size_t)(lhs[i] op rhs); \ - } \ - return ret; \ +#define DEFINE_ARRAY_ND_OP_CONST_SIZE_T(op) \ + friend ArrayND operator op(const ArrayND &lhs, \ + const size_t &rhs) { \ + ArrayND ret; \ + for (std::size_t i = 0; i < dimensions; ++i) { \ + ret[i] = (size_t)(lhs[i] op rhs); \ + } \ + return ret; \ }; -#define DEFINE_ARRAY_ND_OP(op) \ - friend ArrayND &operator op(ArrayND &lhs, const ArrayND &rhs) { \ - for(std::size_t i = 0; i < dimensions; ++i) { \ - lhs[i] = (size_t)(lhs[i] op rhs[i]); \ - } \ - return lhs; \ +#define DEFINE_ARRAY_ND_OP(op) \ + friend ArrayND &operator op(ArrayND &lhs, \ + const ArrayND &rhs) { \ + for (std::size_t i = 0; i < dimensions; ++i) { \ + lhs[i] = (size_t)(lhs[i] op rhs[i]); \ + } \ + return lhs; \ }; -#define DEFINE_ARRAY_ND_OP_SIZE_T(op) \ - friend ArrayND &operator op(ArrayND &lhs, const size_t &rhs) { \ - for(std::size_t i = 0; i < dimensions; ++i) { \ - lhs[i] = (size_t)(lhs[i] op rhs); \ - } \ - return lhs; \ +#define DEFINE_ARRAY_ND_OP_SIZE_T(op) \ + friend ArrayND &operator op(ArrayND &lhs, \ + const size_t &rhs) { \ + for (std::size_t i = 0; i < dimensions; ++i) { \ + lhs[i] = (size_t)(lhs[i] op rhs); \ + } \ + return lhs; \ }; -#define DEFINE_ARRAY_ND_OP_CONST_SIZE_T_LEFT(op) \ - friend ArrayND operator op(const size_t &lhs, ArrayND &rhs) { \ - ArrayND ret; \ - for(std::size_t i = 0; i < dimensions; ++i) { \ - ret[i] = (size_t)(rhs[i] op lhs); \ - } \ - return ret; \ +#define DEFINE_ARRAY_ND_OP_CONST_SIZE_T_LEFT(op) \ + friend ArrayND operator op(const size_t &lhs, \ + ArrayND &rhs) { \ + ArrayND ret; \ + for (std::size_t i = 0; i < dimensions; ++i) { \ + ret[i] = (size_t)(rhs[i] op lhs); \ + } \ + return ret; \ }; -#define DEFINE_ARRAY_ND_COMMON_BY_VALUE_SEMANTICS() \ -friend bool operator==(const ArrayND &lhs, const ArrayND &rhs) { \ - for(std::size_t i = 0; i < dimensions; ++i) { \ - if(lhs[i]!=rhs[i]){ \ - return false; \ - } \ - } \ - return true; \ -} \ -friend bool operator!=(const ArrayND &lhs, const ArrayND &rhs) { \ - for(std::size_t i = 0; i < dimensions; ++i) { \ - if(lhs[i]!=rhs[i]){ \ - return true; \ - } \ - } \ - return false; \ -} +#define DEFINE_ARRAY_ND_COMMON_BY_VALUE_SEMANTICS() \ + friend bool operator==(const ArrayND &lhs, \ + const ArrayND &rhs) { \ + for (std::size_t i = 0; i < dimensions; ++i) { \ + if (lhs[i] != rhs[i]) { \ + return false; \ + } \ + } \ + return true; \ + } \ + friend bool operator!=(const ArrayND &lhs, \ + const ArrayND &rhs) { \ + for (std::size_t i = 0; i < dimensions; ++i) { \ + if (lhs[i] != rhs[i]) { \ + return true; \ + } \ + } \ + return false; \ + } namespace neosycl::sycl::detail::container { -template -struct ArrayND { +template struct ArrayND { ArrayND() : data{} {} - template> + template > ArrayND(size_t dim0) : data{dim0} {} - template> + template > ArrayND(size_t dim0, size_t dim1) : data{dim0, dim1} {} - template> + template > ArrayND(size_t dim0, size_t dim1, size_t dim2) : data{dim0, dim1, dim2} {} - size_t operator[](int dimension) const { - return data[dimension]; - } + size_t operator[](int dimension) const { return data[dimension]; } - size_t &operator[](int dimension) { - return data[dimension]; - } + size_t &operator[](int dimension) { return data[dimension]; } size_t get_liner() const { if (dimensions == 2) { @@ -178,6 +180,6 @@ struct ArrayND { size_t data[dimensions]; }; -} +} // namespace neosycl::sycl::detail::container -#endif //NEOSYCL_INCLUDE_NEOSYCL_SYCL_BUFFER_ARRAY_ND_HPP_ +#endif // NEOSYCL_INCLUDE_NEOSYCL_SYCL_BUFFER_ARRAY_ND_HPP_ diff --git a/include/neoSYCL/sycl/detail/container/data_container.hpp b/include/neoSYCL/sycl/detail/container/data_container.hpp index d5a936a..542b230 100644 --- a/include/neoSYCL/sycl/detail/container/data_container.hpp +++ b/include/neoSYCL/sycl/detail/container/data_container.hpp @@ -13,31 +13,21 @@ class DataContainer { mutable std::shared_mutex mtx; public: + void lock_read() const { mtx.lock_shared(); } - void lock_read() const { - mtx.lock_shared(); - } + void unlock_read() const { mtx.unlock_shared(); } - void unlock_read() const { - mtx.unlock_shared(); - } + void lock_write() const { mtx.lock(); } - void lock_write() const { - mtx.lock(); - } - - void unlock_write() const { - mtx.unlock(); - } + void unlock_write() const { mtx.unlock(); } virtual void *get_raw_ptr() = 0; virtual size_t get_size() = 0; virtual size_t get_count() = 0; - }; -} +} // namespace neosycl::sycl::detail::container -#endif //SYCL_INCLUDE_CL_SYCL_BUFFER_DATA_CONTAINER_HPP_ +#endif // SYCL_INCLUDE_CL_SYCL_BUFFER_DATA_CONTAINER_HPP_ diff --git a/include/neoSYCL/sycl/detail/container/data_container_nd.hpp b/include/neoSYCL/sycl/detail/container/data_container_nd.hpp index bd55e20..fbc6ab0 100644 --- a/include/neoSYCL/sycl/detail/container/data_container_nd.hpp +++ b/include/neoSYCL/sycl/detail/container/data_container_nd.hpp @@ -13,142 +13,124 @@ namespace neosycl::sycl::detail::container { * @tparam dimensions Buffer dimensions * @tparam AllocatorT Buffer Allocator */ -template +template class DataContainerD : public DataContainer { public: - explicit DataContainerD(ArrayND r) : range(r) { + explicit DataContainerD(ArrayND r) : range(r) { allocate_ptr = shared_ptr_class(alloc.allocate(r.get_liner())); - ptr = allocate_ptr.get(); + ptr = allocate_ptr.get(); } - DataContainerD(ArrayND r, AllocatorT allocatorT) : alloc(allocatorT), range(r) { + DataContainerD(ArrayND r, AllocatorT allocatorT) + : alloc(allocatorT), range(r) { allocate_ptr = shared_ptr_class(alloc.allocate(r.get_liner())); - ptr = allocate_ptr.get(); + ptr = allocate_ptr.get(); } - DataContainerD(T *data, ArrayND r) : range(r), ptr(data), allocate_ptr(nullptr) {} + DataContainerD(T *data, ArrayND r) + : range(r), ptr(data), allocate_ptr(nullptr) {} - DataContainerD(T *data, ArrayND r, AllocatorT allocatorT) : - ptr(data), alloc(allocatorT), range(r), allocate_ptr(nullptr) {} + DataContainerD(T *data, ArrayND r, AllocatorT allocatorT) + : ptr(data), alloc(allocatorT), range(r), allocate_ptr(nullptr) {} - size_t get_size() override { - return sizeof(T) * range.get_liner(); - } + size_t get_size() override { return sizeof(T) * range.get_liner(); } - size_t get_count() override { - return range.get_liner(); - } + size_t get_count() override { return range.get_liner(); } - T *get_ptr() const { - return ptr; - } + T *get_ptr() const { return ptr; } - void *get_raw_ptr() override { - return (void *) get_ptr(); - } + void *get_raw_ptr() override { return (void *)get_ptr(); } - T *begin() const { - return ptr; - } + T *begin() const { return ptr; } - T *end() const { - return ptr + range.get_liner(); - } + T *end() const { return ptr + range.get_liner(); } - T &get(size_t x) const { - return ptr[x]; - } + T &get(size_t x) const { return ptr[x]; } - AllocatorT get_allocator() { - return alloc; - } + AllocatorT get_allocator() { return alloc; } - ArrayND get_range() const { - return range; - } + ArrayND get_range() const { return range; } - DataContainerD(const DataContainerD &rhs) : - range(rhs.range), - alloc(rhs.alloc) { + DataContainerD(const DataContainerD &rhs) + : range(rhs.range), alloc(rhs.alloc) { allocate_ptr = shared_ptr_class(alloc.allocate(range.get_liner())); - ptr = allocate_ptr.get(); + ptr = allocate_ptr.get(); memcpy(ptr, rhs.ptr, sizeof(T) * range.get_liner()); } - DataContainerD(DataContainerD &&rhs) : - range(rhs.range), - alloc(rhs.alloc), - allocate_ptr(rhs.allocate_ptr), - ptr(rhs.ptr) {} + DataContainerD(DataContainerD &&rhs) + : range(rhs.range), alloc(rhs.alloc), allocate_ptr(rhs.allocate_ptr), + ptr(rhs.ptr) {} DataContainerD &operator=(const DataContainerD &rhs) { - range = rhs.range; - alloc = rhs.alloc; - ptr = rhs.ptr; + range = rhs.range; + alloc = rhs.alloc; + ptr = rhs.ptr; allocate_ptr = rhs.allocate_ptr; } DataContainerD &operator=(DataContainerD &&rhs) { - range = rhs.range; - alloc = rhs.alloc; - ptr = rhs.ptr; + range = rhs.range; + alloc = rhs.alloc; + ptr = rhs.ptr; allocate_ptr = rhs.allocate_ptr; } private: - ArrayND range; + ArrayND range; AllocatorT alloc; T *ptr; - shared_ptr_class allocate_ptr; + shared_ptr_class allocate_ptr; }; -template> +template > class DataContainerND {}; -template -class DataContainerND : public DataContainerD { +template +class DataContainerND + : public DataContainerD { public: DataContainerND(const ArrayND<1> &r) : DataContainerD(r) {} - DataContainerND(const ArrayND<1> &r, AllocatorT alloc) : - DataContainerD(r, alloc) {} + DataContainerND(const ArrayND<1> &r, AllocatorT alloc) + : DataContainerD(r, alloc) {} - DataContainerND(T *data, const ArrayND<1> &r) : - DataContainerD(data, r) {} + DataContainerND(T *data, const ArrayND<1> &r) + : DataContainerD(data, r) {} - DataContainerND(T *data, const ArrayND<1> &r, AllocatorT alloc) : - DataContainerD(data, r, alloc) {} + DataContainerND(T *data, const ArrayND<1> &r, AllocatorT alloc) + : DataContainerD(data, r, alloc) {} - DataContainerND(const DataContainerD &rhs) : - DataContainerD(rhs) {} + DataContainerND(const DataContainerD &rhs) + : DataContainerD(rhs) {} - DataContainerND(DataContainerD &&rhs) : - DataContainerD(rhs) {} + DataContainerND(DataContainerD &&rhs) + : DataContainerD(rhs) {} - T &operator[](size_t x) const { - return this->get_ptr()[x]; - } + T &operator[](size_t x) const { return this->get_ptr()[x]; } }; -template -class DataContainerND : public DataContainerD { +template +class DataContainerND + : public DataContainerD { public: DataContainerND(const ArrayND<2> &r) : DataContainerD(r) {} - DataContainerND(const ArrayND<2> &r, AllocatorT alloc) : - DataContainerD(r, alloc) {} + DataContainerND(const ArrayND<2> &r, AllocatorT alloc) + : DataContainerD(r, alloc) {} - DataContainerND(T *data, const ArrayND<2> &r) : - DataContainerD(data, r) {} + DataContainerND(T *data, const ArrayND<2> &r) + : DataContainerD(data, r) {} - DataContainerND(T *data, const ArrayND<2> &r, AllocatorT alloc) : - DataContainerD(data, r, alloc) {} + DataContainerND(T *data, const ArrayND<2> &r, AllocatorT alloc) + : DataContainerD(data, r, alloc) {} - DataContainerND(const DataContainerD &rhs) : - DataContainerD(rhs) {} + DataContainerND(const DataContainerD &rhs) + : DataContainerD(rhs) {} - DataContainerND(DataContainerD &&rhs) : - DataContainerD(rhs) {} + DataContainerND(DataContainerD &&rhs) + : DataContainerD(rhs) {} T *operator[](size_t i) const { size_t x = this->get_range()[0]; @@ -163,11 +145,9 @@ class DataContainerND : public DataContainerD -struct AccessProxyND {}; +template struct AccessProxyND {}; -template -struct AccessProxyND { +template struct AccessProxyND { AccessProxyND(const ArrayND<3> &r, T *ptr) : range(r), base_ptr(ptr) {} T *operator[](size_t i) const { @@ -179,33 +159,34 @@ struct AccessProxyND { T *base_ptr; }; -template -class DataContainerND : public DataContainerD { +template +class DataContainerND + : public DataContainerD { public: DataContainerND(const ArrayND<3> &r) : DataContainerD(r) {} - DataContainerND(const ArrayND<3> &r, AllocatorT alloc) : - DataContainerD(r, alloc) {} + DataContainerND(const ArrayND<3> &r, AllocatorT alloc) + : DataContainerD(r, alloc) {} - DataContainerND(T *data, const ArrayND<3> &r) : - DataContainerD(data, r) {} + DataContainerND(T *data, const ArrayND<3> &r) + : DataContainerD(data, r) {} - DataContainerND(T *data, const ArrayND<3> &r, AllocatorT alloc) : - DataContainerD(data, r, alloc) {} + DataContainerND(T *data, const ArrayND<3> &r, AllocatorT alloc) + : DataContainerD(data, r, alloc) {} - DataContainerND(const DataContainerD &rhs) : - DataContainerD(rhs) {} + DataContainerND(const DataContainerD &rhs) + : DataContainerD(rhs) {} - DataContainerND(DataContainerD &&rhs) : - DataContainerD(rhs) {} + DataContainerND(DataContainerD &&rhs) + : DataContainerD(rhs) {} AccessProxyND operator[](size_t i) const { - size_t x = this->get_range()[0]; + size_t x = this->get_range()[0]; T *base_ptr = this->get_ptr() + i * x; return AccessProxyND(this->get_range(), base_ptr); } }; -} +} // namespace neosycl::sycl::detail::container -#endif //SYCL_INCLUDE_CL_SYCL_BUFFER_DATA_CONTAINER_ND_HPP_ +#endif // SYCL_INCLUDE_CL_SYCL_BUFFER_DATA_CONTAINER_ND_HPP_ diff --git a/include/neoSYCL/sycl/detail/debug.hpp b/include/neoSYCL/sycl/detail/debug.hpp index 9c1dbe7..0bac32e 100644 --- a/include/neoSYCL/sycl/detail/debug.hpp +++ b/include/neoSYCL/sycl/detail/debug.hpp @@ -1,15 +1,15 @@ #ifndef SYCL_INCLUDE_CL_SYCL_DETAIL_DEBUG_HPP_ #define SYCL_INCLUDE_CL_SYCL_DETAIL_DEBUG_HPP_ - #ifdef DEBUG #include -#define DEBUG_INFO(...) std::cout<< "[DEBUG] "<< printf(__VA_ARGS__) < -void HIGHLIGHT_KERNEL_PARALLEL(Kernel k, const range &num_work_items) { -} +template +void HIGHLIGHT_KERNEL_PARALLEL(Kernel k, + const range &num_work_items) {} -template +template void HIGHLIGHT_KERNEL_SINGLE_TASK(Kernel k) {} -} +} // namespace neosycl::sycl::detail -#endif //SYCL_INCLUDE_CL_SYCL_KERNEL_HIGHLIGHT_FUNC_HPP_ +#endif // SYCL_INCLUDE_CL_SYCL_KERNEL_HIGHLIGHT_FUNC_HPP_ diff --git a/include/neoSYCL/sycl/detail/kernel.hpp b/include/neoSYCL/sycl/detail/kernel.hpp index cda8276..8b1d5df 100644 --- a/include/neoSYCL/sycl/detail/kernel.hpp +++ b/include/neoSYCL/sycl/detail/kernel.hpp @@ -11,6 +11,6 @@ struct kernel { string_class name; }; -} +} // namespace neosycl::sycl::detail -#endif //SYCL_INCLUDE_CL_SYCL_KERNEL_KERNEL_HPP_ +#endif // SYCL_INCLUDE_CL_SYCL_KERNEL_KERNEL_HPP_ diff --git a/include/neoSYCL/sycl/detail/kernel_arg.hpp b/include/neoSYCL/sycl/detail/kernel_arg.hpp index 693d57d..b896e09 100644 --- a/include/neoSYCL/sycl/detail/kernel_arg.hpp +++ b/include/neoSYCL/sycl/detail/kernel_arg.hpp @@ -7,8 +7,9 @@ namespace neosycl::sycl::detail { struct KernelArg { - KernelArg(std::shared_ptr arg, access::mode mode) : - container(std::move(arg)), mode(mode) {} + KernelArg(std::shared_ptr arg, + access::mode mode) + : container(std::move(arg)), mode(mode) {} void acquire_access() const { switch (mode) { @@ -38,6 +39,6 @@ struct KernelArg { access::mode mode; }; -} +} // namespace neosycl::sycl::detail -#endif //SYCL_INCLUDE_CL_SYCL_KERNEL_KERNEL_ARG_HPP_ +#endif // SYCL_INCLUDE_CL_SYCL_KERNEL_KERNEL_ARG_HPP_ diff --git a/include/neoSYCL/sycl/detail/platform_info.hpp b/include/neoSYCL/sycl/detail/platform_info.hpp index 263dcbd..5c2fd01 100644 --- a/include/neoSYCL/sycl/detail/platform_info.hpp +++ b/include/neoSYCL/sycl/detail/platform_info.hpp @@ -19,17 +19,11 @@ struct platform_info { struct cpu_platform_info : public platform_info { - bool is_host() override { - return true; - } + bool is_host() override { return true; } - SUPPORT_PLATFORM_TYPE type() override { - return SUPPORT_PLATFORM_TYPE::CPU; - } + SUPPORT_PLATFORM_TYPE type() override { return SUPPORT_PLATFORM_TYPE::CPU; } - bool has_extension(const string_class &extension) override { - return false; - } + bool has_extension(const string_class &extension) override { return false; } vector_class> list_devices() override { return {shared_ptr_class(new cpu_device_info())}; @@ -38,6 +32,6 @@ struct cpu_platform_info : public platform_info { using default_platform_info = cpu_platform_info; -} +} // namespace neosycl::sycl::detail -#endif //NEOSYCL_INCLUDE_NEOSYCL_SYCL_DETAIL_PLATFORM_INFO_HPP +#endif // NEOSYCL_INCLUDE_NEOSYCL_SYCL_DETAIL_PLATFORM_INFO_HPP diff --git a/include/neoSYCL/sycl/detail/registered_platforms.hpp b/include/neoSYCL/sycl/detail/registered_platforms.hpp index 820db95..0ee8bd8 100644 --- a/include/neoSYCL/sycl/detail/registered_platforms.hpp +++ b/include/neoSYCL/sycl/detail/registered_platforms.hpp @@ -12,17 +12,19 @@ namespace neosycl::sycl::detail { static shared_ptr_class REGISTERED_PLATFORMS[] = { - shared_ptr_class(new default_platform_info()) -}; + shared_ptr_class(new default_platform_info())}; -static std::map> PLATFORM_HANDLER_MAP = { - {SUPPORT_PLATFORM_TYPE::CPU, shared_ptr_class(new task_handler_cpu())}, +static std::map> + PLATFORM_HANDLER_MAP = { + {SUPPORT_PLATFORM_TYPE::CPU, + shared_ptr_class(new task_handler_cpu())}, #ifdef DBUILD_VE - {SUPPORT_PLATFORM_TYPE::SX_AURORA, shared_ptr_class(new task_handler_cpu())} + {SUPPORT_PLATFORM_TYPE::SX_AURORA, + shared_ptr_class(new task_handler_cpu())} #endif }; -} +} // namespace neosycl::sycl::detail -#endif //NEOSYCL_INCLUDE_NEOSYCL_SYCL_DETAIL_REGISTERED_PLATFORMS_H +#endif // NEOSYCL_INCLUDE_NEOSYCL_SYCL_DETAIL_REGISTERED_PLATFORMS_H diff --git a/include/neoSYCL/sycl/detail/task.hpp b/include/neoSYCL/sycl/detail/task.hpp index caf223e..dd6b73c 100644 --- a/include/neoSYCL/sycl/detail/task.hpp +++ b/include/neoSYCL/sycl/detail/task.hpp @@ -9,9 +9,7 @@ struct task { std::condition_variable cond; std::mutex lock; - void start() { - waiting = true; - } + void start() { waiting = true; } void end() { std::unique_lock ul{lock}; @@ -24,9 +22,8 @@ struct task { std::unique_lock ul{lock}; cond.wait(ul, [&] { return !waiting; }); } - }; -} +} // namespace neosycl::sycl::detail -#endif //NEOSYCL_INCLUDE_NEOSYCL_SYCL_DETAIL_TASK_HPP +#endif // NEOSYCL_INCLUDE_NEOSYCL_SYCL_DETAIL_TASK_HPP diff --git a/include/neoSYCL/sycl/detail/task_counter.hpp b/include/neoSYCL/sycl/detail/task_counter.hpp index 4d469b9..3288585 100644 --- a/include/neoSYCL/sycl/detail/task_counter.hpp +++ b/include/neoSYCL/sycl/detail/task_counter.hpp @@ -16,12 +16,9 @@ class task_counter { std::mutex lock; public: - task_counter() : counter(0) {} - void incr() { - counter++; - } + void incr() { counter++; } void decr() { std::unique_lock ul{lock}; @@ -35,9 +32,8 @@ class task_counter { std::unique_lock ul{lock}; cond.wait(ul, [&] { return counter == 0; }); } - }; -} +} // namespace neosycl::sycl::detail -#endif //SYCL_INCLUDE_CL_SYCL_QUEUE_QUEUE_HPP_ +#endif // SYCL_INCLUDE_CL_SYCL_QUEUE_QUEUE_HPP_ diff --git a/include/neoSYCL/sycl/detail/task_handler.hpp b/include/neoSYCL/sycl/detail/task_handler.hpp index c05be04..04ae7ea 100644 --- a/include/neoSYCL/sycl/detail/task_handler.hpp +++ b/include/neoSYCL/sycl/detail/task_handler.hpp @@ -7,60 +7,56 @@ namespace neosycl::sycl::detail { struct task_handler { - virtual void single_task(shared_ptr_class k, const std::function &func) = 0; + virtual void single_task(shared_ptr_class k, + const std::function &func) = 0; - virtual void parallel_for_1d(shared_ptr_class k, - range<1> r, + virtual void parallel_for_1d(shared_ptr_class k, range<1> r, const std::function)> &func, id<1> offset) = 0; - virtual void parallel_for_2d(shared_ptr_class k, - range<2> r, + virtual void parallel_for_2d(shared_ptr_class k, range<2> r, const std::function)> &func, id<2> offset) = 0; - virtual void parallel_for_3d(shared_ptr_class k, - range<3> r, + virtual void parallel_for_3d(shared_ptr_class k, range<3> r, const std::function)> &func, id<3> offset) = 0; virtual SUPPORT_PLATFORM_TYPE type() = 0; - }; struct task_handler_cpu : public task_handler { - void single_task(shared_ptr_class k, const std::function &func) override { - for (const KernelArg &arg:k->args) { + void single_task(shared_ptr_class k, + const std::function &func) override { + for (const KernelArg &arg : k->args) { arg.acquire_access(); } DEBUG_INFO("execute single %d kernel, name: %s\n", type(), k->name.c_str()); func(); - for (const KernelArg &arg:k->args) { + for (const KernelArg &arg : k->args) { arg.release_access(); } } - void parallel_for_1d(shared_ptr_class k, - range<1> r, + void parallel_for_1d(shared_ptr_class k, range<1> r, const std::function)> &func, id<1> offset) override { - for (const KernelArg &arg:k->args) { + for (const KernelArg &arg : k->args) { arg.acquire_access(); } for (size_t x = offset.get(0); x < r.get(0); x++) { func(id<1>(x)); } - for (const KernelArg &arg:k->args) { + for (const KernelArg &arg : k->args) { arg.release_access(); } }; - void parallel_for_2d(shared_ptr_class k, - range<2> r, + void parallel_for_2d(shared_ptr_class k, range<2> r, const std::function)> &func, id<2> offset) override { - for (const KernelArg &arg:k->args) { + for (const KernelArg &arg : k->args) { arg.acquire_access(); } for (size_t x = offset.get(0); x < r.get(0); x++) { @@ -68,16 +64,15 @@ struct task_handler_cpu : public task_handler { func(id<2>(x, y)); } } - for (const KernelArg &arg:k->args) { + for (const KernelArg &arg : k->args) { arg.release_access(); } }; - void parallel_for_3d(shared_ptr_class k, - range<3> r, + void parallel_for_3d(shared_ptr_class k, range<3> r, const std::function)> &func, id<3> offset) override { - for (const KernelArg &arg:k->args) { + for (const KernelArg &arg : k->args) { arg.acquire_access(); } for (size_t x = offset.get(0); x < r.get(0); x++) { @@ -87,17 +82,14 @@ struct task_handler_cpu : public task_handler { } } } - for (const KernelArg &arg:k->args) { + for (const KernelArg &arg : k->args) { arg.release_access(); } }; - SUPPORT_PLATFORM_TYPE type() override { - return CPU; - } - + SUPPORT_PLATFORM_TYPE type() override { return CPU; } }; -} +} // namespace neosycl::sycl::detail -#endif //NEOSYCL_INCLUDE_NEOSYCL_SYCL_TASK_HANDLER_HPP +#endif // NEOSYCL_INCLUDE_NEOSYCL_SYCL_TASK_HANDLER_HPP diff --git a/include/neoSYCL/sycl/device.hpp b/include/neoSYCL/sycl/device.hpp index 9197dfe..99c5175 100644 --- a/include/neoSYCL/sycl/device.hpp +++ b/include/neoSYCL/sycl/device.hpp @@ -15,56 +15,53 @@ class device { friend class handler; public: - device() : device_info(new detail::default_device_info()) {}; + device() : device_info(new detail::default_device_info()){}; - device(const shared_ptr_class &info) : device_info(info) {} + device(const shared_ptr_class &info) + : device_info(info) {} -// explicit device(cl_device_id deviceId); + // explicit device(cl_device_id deviceId); - explicit device(const device_selector &deviceSelector) {}; + explicit device(const device_selector &deviceSelector){}; /* -- common interface members -- */ -// cl_device_id get() const; + // cl_device_id get() const; - bool is_host() const { - return device_info->is_host(); - } + bool is_host() const { return device_info->is_host(); } - bool is_cpu() const { - return device_info->is_cpu(); - } + bool is_cpu() const { return device_info->is_cpu(); } - bool is_gpu() const { - return device_info->is_gpu(); - } + bool is_gpu() const { return device_info->is_gpu(); } - bool is_accelerator() const { - return device_info->is_accelerator(); - } + bool is_accelerator() const { return device_info->is_accelerator(); } platform get_platform() const; - template - typename info::param_traits::return_type get_info() const; + template + typename info::param_traits::return_type + get_info() const; bool has_extension(const string_class &extension) const; -// Available only when prop == info::partition_property::partition_equally - template + // Available only when prop == info::partition_property::partition_equally + template vector_class create_sub_devices(size_t nbSubDev) const; -// Available only when prop == info::partition_property::partition_by_counts - template - vector_class create_sub_devices(const vector_class &counts) const; + // Available only when prop == info::partition_property::partition_by_counts + template + vector_class + create_sub_devices(const vector_class &counts) const; -// Available only when prop == info::partition_property::partition_by_affinity_domain -// template -// vector_class create_sub_devices(info::affinity_domain affinityDomain) const; + // Available only when prop == + // info::partition_property::partition_by_affinity_domain + // template + // vector_class create_sub_devices(info::affinity_domain + // affinityDomain) const; - static vector_class get_devices( - info::device_type deviceType = info::device_type::all) { + static vector_class + get_devices(info::device_type deviceType = info::device_type::all) { vector_class ret; - for (const platform &info: platform::get_platforms()) { - for (const device &dev:info.get_devices()) { + for (const platform &info : platform::get_platforms()) { + for (const device &dev : info.get_devices()) { ret.push_back(dev); } } @@ -75,18 +72,17 @@ class device { shared_ptr_class device_info; }; -device device_selector::select_device() const { - return device(); -} +device device_selector::select_device() const { return device(); } vector_class platform::get_devices(info::device_type) const { vector_class ret; - for (shared_ptr_class info:platform_info->list_devices()) { + for (shared_ptr_class info : + platform_info->list_devices()) { ret.push_back(device(info)); } return ret; } -} +} // namespace neosycl::sycl -#endif //CUSTOM_SYCL_INCLUDE_SYCL_DEVICE_HPP_ +#endif // CUSTOM_SYCL_INCLUDE_SYCL_DEVICE_HPP_ diff --git a/include/neoSYCL/sycl/device_selector.hpp b/include/neoSYCL/sycl/device_selector.hpp index 1a7bc1e..2f51fe5 100644 --- a/include/neoSYCL/sycl/device_selector.hpp +++ b/include/neoSYCL/sycl/device_selector.hpp @@ -10,7 +10,6 @@ class device; class device_selector { public: - device_selector() = default; device_selector(const device_selector &rhs) = default; @@ -25,11 +24,11 @@ class device_selector { virtual int operator()(const device &device) const = 0; virtual shared_ptr_class get_platform_info() const { - return shared_ptr_class(new detail::cpu_platform_info); + return shared_ptr_class( + new detail::cpu_platform_info); } - }; -} +} // namespace neosycl::sycl -#endif //CUSTOM_SYCL_INCLUDE_SYCL_DEVICE_SELECTOR_HPP_ +#endif // CUSTOM_SYCL_INCLUDE_SYCL_DEVICE_SELECTOR_HPP_ diff --git a/include/neoSYCL/sycl/device_selector/cpu_selector.hpp b/include/neoSYCL/sycl/device_selector/cpu_selector.hpp index b8ac52f..abb1a4e 100644 --- a/include/neoSYCL/sycl/device_selector/cpu_selector.hpp +++ b/include/neoSYCL/sycl/device_selector/cpu_selector.hpp @@ -14,14 +14,14 @@ class cpu_selector : public device_selector { } device select_device() const { - return device(shared_ptr_class(new detail::cpu_device_info())); + return device( + shared_ptr_class(new detail::cpu_device_info())); } - }; using default_selector = cpu_selector; -using host_selector = cpu_selector; +using host_selector = cpu_selector; -} +} // namespace neosycl::sycl -#endif //SYCL_INCLUDE_CL_SYCL_DEVICE_SELECTOR_CPU_SELECTOR_HPP_ +#endif // SYCL_INCLUDE_CL_SYCL_DEVICE_SELECTOR_CPU_SELECTOR_HPP_ diff --git a/include/neoSYCL/sycl/event.hpp b/include/neoSYCL/sycl/event.hpp index 7a78f5c..4566aac 100644 --- a/include/neoSYCL/sycl/event.hpp +++ b/include/neoSYCL/sycl/event.hpp @@ -12,28 +12,21 @@ class event { ~event() {} - vector_class get_wait_list() { - throw unimplemented(); - } + vector_class get_wait_list() { throw unimplemented(); } - void wait() { - throw unimplemented(); - } + void wait() { throw unimplemented(); } - static void wait(const vector_class &eventList) { + static void wait(const vector_class &eventList) { throw unimplemented(); } - void wait_and_throw() { - throw unimplemented(); - } + void wait_and_throw() { throw unimplemented(); } - static void wait_and_throw(const vector_class &eventList) { + static void wait_and_throw(const vector_class &eventList) { throw unimplemented(); } - }; -} +} // namespace neosycl::sycl -#endif //CUSTOM_SYCL_INCLUDE_SYCL_EVENT_HPP_ +#endif // CUSTOM_SYCL_INCLUDE_SYCL_EVENT_HPP_ diff --git a/include/neoSYCL/sycl/exception.hpp b/include/neoSYCL/sycl/exception.hpp index c0d1f3f..65290a0 100644 --- a/include/neoSYCL/sycl/exception.hpp +++ b/include/neoSYCL/sycl/exception.hpp @@ -10,12 +10,9 @@ class context; class exception : public std::exception { public: - exception(const string_class &message) : error_msg(message) {} - const char *what() const noexcept override { - return error_msg.c_str(); - } + const char *what() const noexcept override { return error_msg.c_str(); } bool has_context() const; @@ -23,11 +20,10 @@ class exception : public std::exception { private: string_class error_msg; - }; using exception_list = vector_class; -using async_handler = function_class; +using async_handler = function_class; class runtime_error : public exception { using exception::exception; @@ -77,6 +73,6 @@ class unimplemented : public exception { unimplemented() : exception("not implemented") {} }; -} +} // namespace neosycl::sycl -#endif //CUSTOM_SYCL_INCLUDE_SYCL_EXCEPTION_HPP_ +#endif // CUSTOM_SYCL_INCLUDE_SYCL_EXCEPTION_HPP_ diff --git a/include/neoSYCL/sycl/extensions.hpp b/include/neoSYCL/sycl/extensions.hpp index 842d32b..1793f32 100644 --- a/include/neoSYCL/sycl/extensions.hpp +++ b/include/neoSYCL/sycl/extensions.hpp @@ -1,10 +1,6 @@ #ifndef NEOSYCL_INCLUDE_CL_SYCL_EXTENSIONS_HPP_ #define NEOSYCL_INCLUDE_CL_SYCL_EXTENSIONS_HPP_ -namespace neosycl::sycl { +namespace neosycl::sycl {} - - -} - -#endif //NEOSYCL_INCLUDE_CL_SYCL_EXTENSIONS_HPP_ +#endif // NEOSYCL_INCLUDE_CL_SYCL_EXTENSIONS_HPP_ diff --git a/include/neoSYCL/sycl/handler.hpp b/include/neoSYCL/sycl/handler.hpp index 0b8f008..d8dfd9b 100644 --- a/include/neoSYCL/sycl/handler.hpp +++ b/include/neoSYCL/sycl/handler.hpp @@ -26,109 +26,103 @@ namespace detail { * @tparam KernelName class * @return str */ -template -string_class get_kernel_name_from_class() { +template string_class get_kernel_name_from_class() { KernelName *p; string_class in = typeid(p).name(); return in; } -} +} // namespace detail class handler { public: - explicit handler(device dev, - shared_ptr_class counter) : - bind_device(std::move(dev)), - counter(std::move(counter)), - kernel(new detail::kernel()) {} + explicit handler(device dev, shared_ptr_class counter) + : bind_device(std::move(dev)), counter(std::move(counter)), + kernel(new detail::kernel()) {} - template + template void single_task(KernelType kernelFunc) { kernel->name = detail::get_kernel_name_from_class(); - shared_ptr_class handler = detail::PLATFORM_HANDLER_MAP[bind_device.device_info->type()]; - submit_task([f = kernelFunc, h = handler, k = kernel]() { - h->single_task(k, f); - }); + shared_ptr_class handler = + detail::PLATFORM_HANDLER_MAP[bind_device.device_info->type()]; + submit_task( + [f = kernelFunc, h = handler, k = kernel]() { h->single_task(k, f); }); } - template + template void submit_parallel_for(shared_ptr_class handler, - range<3> numWorkItems, - id<3> offset, + range<3> numWorkItems, id<3> offset, KernelType kernelFunc) { - submit_task([f = kernelFunc, n = numWorkItems, o = offset, h = std::move(handler), k = kernel]() { - h->parallel_for_3d(k, n, f, o); - }); + submit_task([f = kernelFunc, n = numWorkItems, o = offset, + h = std::move(handler), + k = kernel]() { h->parallel_for_3d(k, n, f, o); }); } - template + template void submit_parallel_for(shared_ptr_class handler, - range<2> numWorkItems, - id<2> offset, + range<2> numWorkItems, id<2> offset, KernelType kernelFunc) { - submit_task([f = kernelFunc, n = numWorkItems, o = offset, h = std::move(handler), k = kernel]() { - h->parallel_for_2d(k, n, f, o); - }); + submit_task([f = kernelFunc, n = numWorkItems, o = offset, + h = std::move(handler), + k = kernel]() { h->parallel_for_2d(k, n, f, o); }); } - template + template void submit_parallel_for(shared_ptr_class handler, - range<1> numWorkItems, - id<1> offset, + range<1> numWorkItems, id<1> offset, KernelType kernelFunc) { - submit_task([f = kernelFunc, n = numWorkItems, o = offset, h = std::move(handler), k = kernel]() { - h->parallel_for_1d(k, n, f, o); - }); + submit_task([f = kernelFunc, n = numWorkItems, o = offset, + h = std::move(handler), + k = kernel]() { h->parallel_for_1d(k, n, f, o); }); } - template + template void parallel_for(range numWorkItems, KernelType kernelFunc) { kernel->name = detail::get_kernel_name_from_class(); - shared_ptr_class handler = detail::PLATFORM_HANDLER_MAP[bind_device.device_info->type()]; + shared_ptr_class handler = + detail::PLATFORM_HANDLER_MAP[bind_device.device_info->type()]; submit_parallel_for(handler, numWorkItems, id(), kernelFunc); } - template - void parallel_for(range numWorkItems, id workItemOffset, KernelType kernelFunc) { + template + void parallel_for(range numWorkItems, + id workItemOffset, KernelType kernelFunc) { kernel->name = detail::get_kernel_name_from_class(); - shared_ptr_class handler = detail::PLATFORM_HANDLER_MAP[bind_device.device_info->type()]; + shared_ptr_class handler = + detail::PLATFORM_HANDLER_MAP[bind_device.device_info->type()]; submit_parallel_for(handler, numWorkItems, workItemOffset, kernelFunc); } -// template -// void parallel_for(nd_range executionRange, KernelType kernelFunc); + // template + // void parallel_for(nd_range executionRange, KernelType + // kernelFunc); - template - void parallel_for_work_group(range numWorkGroups, WorkgroupFunctionType kernelFunc); + template + void parallel_for_work_group(range numWorkGroups, + WorkgroupFunctionType kernelFunc); - template + template void parallel_for_work_group(range numWorkGroups, range workGroupSize, WorkgroupFunctionType kernelFunc); //----- OpenCL interoperability interface // - template - void set_arg(int argIndex, T &&arg) { + template void set_arg(int argIndex, T &&arg) { kernel->args.insert(argIndex, arg); } - template - void set_args(Ts &&... args) { + template void set_args(Ts &&...args) { kernel->args.push_back(args...); } - shared_ptr_class get_kernel() { - return kernel; - } + shared_ptr_class get_kernel() { return kernel; } private: shared_ptr_class kernel; device bind_device; shared_ptr_class counter; - template - void submit_task(Func func) { + template void submit_task(Func func) { counter->incr(); std::thread t([f = func, c = counter]() { try { @@ -142,6 +136,6 @@ class handler { } }; -} +} // namespace neosycl::sycl -#endif //CUSTOM_SYCL_INCLUDE_SYCL_HANDLER_HPP_ +#endif // CUSTOM_SYCL_INCLUDE_SYCL_HANDLER_HPP_ diff --git a/include/neoSYCL/sycl/id.hpp b/include/neoSYCL/sycl/id.hpp index c622412..b8b66d5 100644 --- a/include/neoSYCL/sycl/id.hpp +++ b/include/neoSYCL/sycl/id.hpp @@ -9,17 +9,16 @@ namespace neosycl::sycl { -template -struct id { +template struct id { id() = default; - template> + template > id(size_t dim0) : data{dim0} {} - template> + template > id(size_t dim0, size_t dim1) : data{dim0, dim1} {} - template> + template > id(size_t dim0, size_t dim1, size_t dim2) : data{dim0, dim1, dim2} {} id(const range &range) { @@ -34,18 +33,11 @@ struct id { } } - size_t get(int dimension) const { - return data[dimension]; - } - - size_t &operator[](int dimension) { - return data[dimension]; - } + size_t get(int dimension) const { return data[dimension]; } - size_t operator[](int dimension) const { - return data[dimension]; - } + size_t &operator[](int dimension) { return data[dimension]; } + size_t operator[](int dimension) const { return data[dimension]; } // Where OP is: +, -, *, /, %, <<, >>, &, |, ˆ, &&, ||, <, >, <=, >=. DEFINE_OP_CONST(id, +); @@ -130,6 +122,6 @@ struct id { detail::container::ArrayND data; }; -} +} // namespace neosycl::sycl -#endif //SYCL_INCLUDE_CL_SYCL_ID_HPP_ +#endif // SYCL_INCLUDE_CL_SYCL_ID_HPP_ diff --git a/include/neoSYCL/sycl/info/context.hpp b/include/neoSYCL/sycl/info/context.hpp index 51a5768..b6ac2b7 100644 --- a/include/neoSYCL/sycl/info/context.hpp +++ b/include/neoSYCL/sycl/info/context.hpp @@ -5,12 +5,8 @@ namespace neosycl::sycl::info { using gl_context_interop = bool; -enum class context : int { - reference_count, - num_devices, - gl_interop -}; +enum class context : int { reference_count, num_devices, gl_interop }; -} +} // namespace neosycl::sycl::info -#endif //CUSTOM_SYCL_INCLUDE_SYCL_INFO_CONTEXT_HPP_ +#endif // CUSTOM_SYCL_INCLUDE_SYCL_INFO_CONTEXT_HPP_ diff --git a/include/neoSYCL/sycl/info/device.hpp b/include/neoSYCL/sycl/info/device.hpp index 6aa686a..607f49d 100644 --- a/include/neoSYCL/sycl/info/device.hpp +++ b/include/neoSYCL/sycl/info/device.hpp @@ -87,6 +87,6 @@ enum class partition_property : int { partition_by_affinity_domain }; -} +} // namespace neosycl::sycl::info -#endif //NEOSYCL_INCLUDE_CL_SYCL_INFO_DEVICE_HPP_ +#endif // NEOSYCL_INCLUDE_CL_SYCL_INFO_DEVICE_HPP_ diff --git a/include/neoSYCL/sycl/info/device_type.hpp b/include/neoSYCL/sycl/info/device_type.hpp index 380dada..4f96fa7 100644 --- a/include/neoSYCL/sycl/info/device_type.hpp +++ b/include/neoSYCL/sycl/info/device_type.hpp @@ -13,7 +13,6 @@ enum class device_type : unsigned int { all }; - } -#endif //CUSTOM_SYCL_INCLUDE_SYCL_INFO_DEVICE_TYPE_HPP_ +#endif // CUSTOM_SYCL_INCLUDE_SYCL_INFO_DEVICE_TYPE_HPP_ diff --git a/include/neoSYCL/sycl/info/param_traits.hpp b/include/neoSYCL/sycl/info/param_traits.hpp index 1a8b756..118c35f 100644 --- a/include/neoSYCL/sycl/info/param_traits.hpp +++ b/include/neoSYCL/sycl/info/param_traits.hpp @@ -6,21 +6,16 @@ namespace neosycl::sycl::info { -template -class param_traits { - using return_type = T; -}; +template class param_traits { using return_type = T; }; -template -class param_traits { +template class param_traits { using type = string_class; }; -template -class param_traits { +template class param_traits { using type = string_class; }; -} +} // namespace neosycl::sycl::info -#endif //CUSTOM_SYCL_INCLUDE_SYCL_INFO_PARAM_TRAITS_HPP_ +#endif // CUSTOM_SYCL_INCLUDE_SYCL_INFO_PARAM_TRAITS_HPP_ diff --git a/include/neoSYCL/sycl/info/platform.hpp b/include/neoSYCL/sycl/info/platform.hpp index a29d143..9287d6e 100644 --- a/include/neoSYCL/sycl/info/platform.hpp +++ b/include/neoSYCL/sycl/info/platform.hpp @@ -21,10 +21,10 @@ enum class platform : unsigned int { */ version, /** Returns the name of the platform (as a string_class) - */ + */ name, /** Returns the string provided by the platform vendor (as a string_class) - */ + */ vendor, /** Returns a space-separated list of extension names supported by the platform (as a string_class) @@ -32,6 +32,6 @@ enum class platform : unsigned int { extensions }; -} +} // namespace neosycl::sycl::info -#endif //CUSTOM_SYCL_INCLUDE_SYCL_INFO_PLATFORM_HPP_ +#endif // CUSTOM_SYCL_INCLUDE_SYCL_INFO_PLATFORM_HPP_ diff --git a/include/neoSYCL/sycl/info/program.hpp b/include/neoSYCL/sycl/info/program.hpp index 8c43946..9ed5488 100644 --- a/include/neoSYCL/sycl/info/program.hpp +++ b/include/neoSYCL/sycl/info/program.hpp @@ -5,14 +5,9 @@ namespace neosycl::sycl { namespace info { -enum class program : int { - reference_count, - context, - devices -}; +enum class program : int { reference_count, context, devices }; - -} } +} // namespace neosycl::sycl -#endif //NEOSYCL_INCLUDE_NEOSYCL_SYCL_INFO_PROGRAM_HPP_ +#endif // NEOSYCL_INCLUDE_NEOSYCL_SYCL_INFO_PROGRAM_HPP_ diff --git a/include/neoSYCL/sycl/info/queue.hpp b/include/neoSYCL/sycl/info/queue.hpp index 5848e2e..6523d04 100644 --- a/include/neoSYCL/sycl/info/queue.hpp +++ b/include/neoSYCL/sycl/info/queue.hpp @@ -5,14 +5,8 @@ namespace neosycl::sycl::info { using queue_profiling = bool; +enum class queue : int { context, device, reference_count, properties }; -enum class queue : int { - context, - device, - reference_count, - properties -}; +} // namespace neosycl::sycl::info -} - -#endif //CUSTOM_SYCL_INCLUDE_SYCL_INFO_QUEUE_HPP_ +#endif // CUSTOM_SYCL_INCLUDE_SYCL_INFO_QUEUE_HPP_ diff --git a/include/neoSYCL/sycl/item.hpp b/include/neoSYCL/sycl/item.hpp index 4f81f92..36a5cfd 100644 --- a/include/neoSYCL/sycl/item.hpp +++ b/include/neoSYCL/sycl/item.hpp @@ -3,65 +3,58 @@ #include "neoSYCL/sycl/detail/container/array_nd.hpp" -#define DEFINE_ITEM_BY_VALUE_OP(cls) \ -friend bool operator ==(const cls &lhs, const cls &rhs) { \ - return (lhs.data == rhs.data) && (lhs.max_range == rhs.max_range) && (lhs.offset == rhs.offset); \ -} \ -friend bool operator !=(const cls &lhs, const cls &rhs) { \ - return (lhs.data != rhs.data) || (lhs.max_range != rhs.max_range) || (lhs.offset != rhs.offset); \ -} +#define DEFINE_ITEM_BY_VALUE_OP(cls) \ + friend bool operator==(const cls &lhs, \ + const cls &rhs) { \ + return (lhs.data == rhs.data) && (lhs.max_range == rhs.max_range) && \ + (lhs.offset == rhs.offset); \ + } \ + friend bool operator!=(const cls &lhs, \ + const cls &rhs) { \ + return (lhs.data != rhs.data) || (lhs.max_range != rhs.max_range) || \ + (lhs.offset != rhs.offset); \ + } namespace neosycl::sycl { -template -struct id; +template struct id; -template -struct item { +template struct item { item() = delete; - template> - item(const range &r, - const detail::container::ArrayND &index, - const detail::container::ArrayND &offsets) - : max_range(r), data{index}, offset{offsets} { - } + template > + item(const range &r, + const detail::container::ArrayND &index, + const detail::container::ArrayND &offsets) + : max_range(r), data{index}, offset{offsets} {} - id get_id() const { - return id(this); - }; + id get_id() const { return id(this); }; - size_t get_id(int dimension) const { - return this->index[dimension]; - }; + size_t get_id(int dimension) const { return this->index[dimension]; }; - size_t operator[](int dimension) const { - return this->index[dimension]; - } + size_t operator[](int dimension) const { return this->index[dimension]; } - range get_range() const { - return this->max_range; - } + range get_range() const { return this->max_range; } // only available if with_offset is true - template> + template > id get_offset() const { return this->get_id(); } // only available if with_offset is false - template> + template > operator item() const { return item(this->max_range, this->data, this->data); } DEFINE_ITEM_BY_VALUE_OP(item); - range max_range; - detail::container::ArrayND offset; - detail::container::ArrayND data; + range max_range; + detail::container::ArrayND offset; + detail::container::ArrayND data; }; -} +} // namespace neosycl::sycl -#endif //SYCL_INCLUDE_CL_SYCL_ITEM_HPP_ +#endif // SYCL_INCLUDE_CL_SYCL_ITEM_HPP_ diff --git a/include/neoSYCL/sycl/nd_range.hpp b/include/neoSYCL/sycl/nd_range.hpp index aaa621b..c1c47ac 100644 --- a/include/neoSYCL/sycl/nd_range.hpp +++ b/include/neoSYCL/sycl/nd_range.hpp @@ -5,30 +5,24 @@ namespace neosycl::sycl { -template -struct nd_range { - nd_range(range globalSize, range localSize, id offset = id()) : - global_range(globalSize), local_range(localSize), offset(offset) {} +template struct nd_range { + nd_range(range globalSize, range localSize, + id offset = id()) + : global_range(globalSize), local_range(localSize), offset(offset) {} - range get_global_range() const { - return global_range; - } + range get_global_range() const { return global_range; } - range get_local_range() const { - return local_range; - } + range get_local_range() const { return local_range; } - range get_group_range(); + range get_group_range(); - id get_offset() const { - return offset; - } + id get_offset() const { return offset; } - range global_range; - range local_range; - id offset; + range global_range; + range local_range; + id offset; }; -} +} // namespace neosycl::sycl -#endif //NEOSYCL_INCLUDE_NEOSYCL_SYCL_ND_RANGE_HPP_ +#endif // NEOSYCL_INCLUDE_NEOSYCL_SYCL_ND_RANGE_HPP_ diff --git a/include/neoSYCL/sycl/op_def.hpp b/include/neoSYCL/sycl/op_def.hpp index e1cdd45..70cb034 100644 --- a/include/neoSYCL/sycl/op_def.hpp +++ b/include/neoSYCL/sycl/op_def.hpp @@ -1,44 +1,52 @@ #ifndef NEOSYCL_INCLUDE_NEOSYCL_SYCL_OP_DEF_HPP #define NEOSYCL_INCLUDE_NEOSYCL_SYCL_OP_DEF_HPP -#define DEFINE_OP_CONST(cls, op) \ - friend cls operator op(const cls &lhs, const cls &rhs) { \ - cls ret; \ - ret.data = lhs.data op rhs.data; \ - return ret; \ +#define DEFINE_OP_CONST(cls, op) \ + friend cls operator op(const cls &lhs, \ + const cls &rhs) { \ + cls ret; \ + ret.data = lhs.data op rhs.data; \ + return ret; \ }; -#define DEFINE_OP_CONST_SIZE_T(cls, op) \ - friend cls operator op(const cls &lhs, const size_t &rhs) { \ - cls ret; \ - ret.data = lhs.data op rhs; \ - return ret; \ +#define DEFINE_OP_CONST_SIZE_T(cls, op) \ + friend cls operator op(const cls &lhs, \ + const size_t &rhs) { \ + cls ret; \ + ret.data = lhs.data op rhs; \ + return ret; \ }; -#define DEFINE_OP(cls, op) \ - friend cls &operator op(cls &lhs, const cls &rhs) { \ - lhs.data = lhs.data op rhs.data; \ - return lhs; \ +#define DEFINE_OP(cls, op) \ + friend cls &operator op(cls &lhs, \ + const cls &rhs) { \ + lhs.data = lhs.data op rhs.data; \ + return lhs; \ }; -#define DEFINE_OP_SIZE_T(cls, op) \ - friend cls &operator op(cls &lhs, const size_t &rhs) { \ - lhs.data = lhs.data op rhs; \ - return lhs; \ +#define DEFINE_OP_SIZE_T(cls, op) \ + friend cls &operator op(cls &lhs, \ + const size_t &rhs) { \ + lhs.data = lhs.data op rhs; \ + return lhs; \ }; -#define DEFINE_OP_CONST_SIZE_T_LEFT(cls, op) \ - friend cls operator op(const size_t &lhs, cls &rhs) { \ - cls ret; \ - ret.data = rhs.data op lhs; \ - return ret; \ +#define DEFINE_OP_CONST_SIZE_T_LEFT(cls, op) \ + friend cls operator op(const size_t &lhs, \ + cls &rhs) { \ + cls ret; \ + ret.data = rhs.data op lhs; \ + return ret; \ }; -#define DEFINE_COMMON_BY_VALUE_OP(cls, op) \ -friend bool operator op(const cls &lhs, const cls &rhs) { \ - return lhs.data op rhs.data; \ -} +#define DEFINE_COMMON_BY_VALUE_OP(cls, op) \ + friend bool operator op(const cls &lhs, \ + const cls &rhs) { \ + return lhs.data op rhs.data; \ + } -#define DEFINE_COMMON_BY_VALUE_SEMANTICS(cls) DEFINE_COMMON_BY_VALUE_OP(cls,==); DEFINE_COMMON_BY_VALUE_OP(cls,!=) +#define DEFINE_COMMON_BY_VALUE_SEMANTICS(cls) \ + DEFINE_COMMON_BY_VALUE_OP(cls, ==); \ + DEFINE_COMMON_BY_VALUE_OP(cls, !=) -#endif //NEOSYCL_INCLUDE_NEOSYCL_SYCL_OP_DEF_HPP +#endif // NEOSYCL_INCLUDE_NEOSYCL_SYCL_OP_DEF_HPP diff --git a/include/neoSYCL/sycl/platform.hpp b/include/neoSYCL/sycl/platform.hpp index d73b69b..5fe765d 100644 --- a/include/neoSYCL/sycl/platform.hpp +++ b/include/neoSYCL/sycl/platform.hpp @@ -16,32 +16,34 @@ class platform { public: platform() : platform_info(new detail::default_platform_info) {} - platform(const shared_ptr_class &info) : platform_info(info) {} + platform(const shared_ptr_class &info) + : platform_info(info) {} -// explicit platform(cl_platform_id platformID); + // explicit platform(cl_platform_id platformID); - explicit platform(const device_selector &deviceSelector) : - platform_info(deviceSelector.get_platform_info()) {} + explicit platform(const device_selector &deviceSelector) + : platform_info(deviceSelector.get_platform_info()) {} -/* -- common interface members -- */ -// cl_platform_id get() const; + /* -- common interface members -- */ + // cl_platform_id get() const; - vector_class get_devices(info::device_type = info::device_type::all) const; + vector_class + get_devices(info::device_type = info::device_type::all) const; - template - typename info::param_traits::return_type get_info() const; + template + typename info::param_traits::return_type + get_info() const; bool has_extension(const string_class &extension) const { return platform_info->has_extension(extension); } - bool is_host() const { - return platform_info->is_host(); - } + bool is_host() const { return platform_info->is_host(); } static vector_class get_platforms() { vector_class ret; - for (const shared_ptr_class &info: detail::REGISTERED_PLATFORMS) { + for (const shared_ptr_class &info : + detail::REGISTERED_PLATFORMS) { ret.push_back(platform(info)); } return ret; @@ -51,6 +53,6 @@ class platform { shared_ptr_class platform_info; }; -} +} // namespace neosycl::sycl -#endif //CUSTOM_SYCL_INCLUDE_SYCL_PLATFORM_HPP_ +#endif // CUSTOM_SYCL_INCLUDE_SYCL_PLATFORM_HPP_ diff --git a/include/neoSYCL/sycl/program.hpp b/include/neoSYCL/sycl/program.hpp index c7e9302..9cc1e09 100644 --- a/include/neoSYCL/sycl/program.hpp +++ b/include/neoSYCL/sycl/program.hpp @@ -8,18 +8,13 @@ namespace neosycl::sycl { class kernel; -enum class program_state { - none, - compiled, - linked -}; +enum class program_state { none, compiled, linked }; class program { - public: +public: program() = delete; - explicit program(const context &context, - const property_list &propList = {}); + explicit program(const context &context, const property_list &propList = {}); program(const context &context, vector_class deviceList, const property_list &propList = {}); @@ -27,22 +22,21 @@ class program { program(vector_class &programList, const property_list &propList = {}); - program(vector_class &programList, - string_class linkOptions, + program(vector_class &programList, string_class linkOptions, const property_list &propList = {}); -// program(const context &context, cl_program clProgram); + // program(const context &context, cl_program clProgram); -// cl_program get() const; + // cl_program get() const; bool is_host() const; - template + template void compile_with_kernel_type(string_class compileOptions = ""); void compile_with_source(string_class kernelSource, string_class compileOptions = ""); - template + template void build_with_kernel_type(string_class buildOptions = ""); void build_with_source(string_class kernelSource, @@ -50,18 +44,19 @@ class program { void link(string_class linkOptions = ""); -// template -// bool has_kernel() const; + // template + // bool has_kernel() const; bool has_kernel(string_class kernelName) const; -// template -// kernel get_kernel() const; + // template + // kernel get_kernel() const; kernel get_kernel(string_class kernelName) const; - template - typename info::param_traits::return_type get_info() const; + template + typename info::param_traits::return_type + get_info() const; vector_class> get_binaries() const; @@ -78,6 +73,6 @@ class program { program_state get_state() const; }; -} +} // namespace neosycl::sycl -#endif //NEOSYCL_INCLUDE_NEOSYCL_SYCL_PROGRAM_HPP_ +#endif // NEOSYCL_INCLUDE_NEOSYCL_SYCL_PROGRAM_HPP_ diff --git a/include/neoSYCL/sycl/property_list.hpp b/include/neoSYCL/sycl/property_list.hpp index f8a4372..bcc22fc 100644 --- a/include/neoSYCL/sycl/property_list.hpp +++ b/include/neoSYCL/sycl/property_list.hpp @@ -7,18 +7,15 @@ namespace neosycl::sycl { class property_list { - template - bool has_property() const { + template bool has_property() const { throw unimplemented(); } - template - propertyT get_property() const { + template propertyT get_property() const { throw unimplemented(); } - }; -} +} // namespace neosycl::sycl -#endif //SYCL_INCLUDE_CL_SYCL_PROPERTY_LIST_HPP_ +#endif // SYCL_INCLUDE_CL_SYCL_PROPERTY_LIST_HPP_ diff --git a/include/neoSYCL/sycl/queue.hpp b/include/neoSYCL/sycl/queue.hpp index f4657c4..2033436 100644 --- a/include/neoSYCL/sycl/queue.hpp +++ b/include/neoSYCL/sycl/queue.hpp @@ -17,59 +17,64 @@ namespace neosycl::sycl { class queue { public: - explicit queue(const property_list &propList = {}) : - bind_device(), counter(new detail::task_counter()) {} + explicit queue(const property_list &propList = {}) + : bind_device(), counter(new detail::task_counter()) {} - explicit queue(const async_handler &asyncHandler, const property_list &propList = {}) : - bind_device(), counter(new detail::task_counter()), err_handler(asyncHandler) {} + explicit queue(const async_handler &asyncHandler, + const property_list &propList = {}) + : bind_device(), counter(new detail::task_counter()), + err_handler(asyncHandler) {} - explicit queue(const device_selector &deviceSelector, const property_list &propList = {}) - : bind_device(deviceSelector.select_device()), counter(new detail::task_counter()) {} + explicit queue(const device_selector &deviceSelector, + const property_list &propList = {}) + : bind_device(deviceSelector.select_device()), + counter(new detail::task_counter()) {} explicit queue(const device_selector &deviceSelector, - const async_handler &asyncHandler, const property_list &propList = {}) - : bind_device(deviceSelector.select_device()), counter(new detail::task_counter()), - err_handler(asyncHandler) {} + const async_handler &asyncHandler, + const property_list &propList = {}) + : bind_device(deviceSelector.select_device()), + counter(new detail::task_counter()), err_handler(asyncHandler) {} - explicit queue(const device &syclDevice, const property_list &propList = {}) : - bind_device(syclDevice), counter(new detail::task_counter()) {} + explicit queue(const device &syclDevice, const property_list &propList = {}) + : bind_device(syclDevice), counter(new detail::task_counter()) {} explicit queue(const device &syclDevice, const async_handler &asyncHandler, - const property_list &propList = {}) : - bind_device(syclDevice), counter(new detail::task_counter()), err_handler(asyncHandler) {} + const property_list &propList = {}) + : bind_device(syclDevice), counter(new detail::task_counter()), + err_handler(asyncHandler) {} - explicit queue(const context &syclContext, const device_selector &deviceSelector, + explicit queue(const context &syclContext, + const device_selector &deviceSelector, const property_list &propList = {}); - explicit queue(const context &syclContext, const device_selector &deviceSelector, - const async_handler &asyncHandler, const property_list &propList = {}); + explicit queue(const context &syclContext, + const device_selector &deviceSelector, + const async_handler &asyncHandler, + const property_list &propList = {}); explicit queue(const context &syclContext, const device &syclDevice, const property_list &propList = {}); explicit queue(const context &syclContext, const device &syclDevice, - const async_handler &asyncHandler, const property_list &propList = {}); + const async_handler &asyncHandler, + const property_list &propList = {}); -// explicit queue(cl_command_queue clQueue, const context &syclContext, -// const async_handler &asyncHandler = {}); + // explicit queue(cl_command_queue clQueue, const context &syclContext, + // const async_handler &asyncHandler = {}); -// cl_command_queue get() const; + // cl_command_queue get() const; context get_context() const; - device get_device() const { - return bind_device; - } + device get_device() const { return bind_device; } - bool is_host() const { - return bind_device.is_host(); - } + bool is_host() const { return bind_device.is_host(); } - template + template typename info::param_traits::return_type get_info() const; - template - event submit(T cgf) { + template event submit(T cgf) { try { handler command_group_handler(bind_device, counter); cgf(command_group_handler); @@ -79,20 +84,15 @@ class queue { return event(); } - template - event submit(T cgf, const queue &secondaryQueue); + template event submit(T cgf, const queue &secondaryQueue); - void wait() { - counter->wait(); - } + void wait() { counter->wait(); } void wait_and_throw(); void throw_asynchronous(); - virtual ~queue() { - counter->wait(); - } + virtual ~queue() { counter->wait(); } private: device bind_device; @@ -100,6 +100,6 @@ class queue { shared_ptr_class counter; }; -} +} // namespace neosycl::sycl -#endif //CUSTOM_SYCL_INCLUDE_SYCL_QUEUE_HPP_ +#endif // CUSTOM_SYCL_INCLUDE_SYCL_QUEUE_HPP_ diff --git a/include/neoSYCL/sycl/range.hpp b/include/neoSYCL/sycl/range.hpp index 1891c0e..d965296 100644 --- a/include/neoSYCL/sycl/range.hpp +++ b/include/neoSYCL/sycl/range.hpp @@ -6,32 +6,23 @@ namespace neosycl::sycl { -template -struct range { - template> - range(size_t dim0) :data(dim0) {} +template struct range { + template > + range(size_t dim0) : data(dim0) {} - template> + template > range(size_t dim0, size_t dim1) : data(dim0, dim1) {} - template> + template > range(size_t dim0, size_t dim1, size_t dim2) : data(dim0, dim1, dim2) {} - size_t get(int dimension) const { - return data[dimension]; - } + size_t get(int dimension) const { return data[dimension]; } - size_t &operator[](int dimension) { - return data[dimension]; - } + size_t &operator[](int dimension) { return data[dimension]; } - size_t operator[](int dimension) const { - return data[dimension]; - } + size_t operator[](int dimension) const { return data[dimension]; } - size_t size() const { - return data.get_liner(); - } + size_t size() const { return data.get_liner(); } // Where OP is: +, -, *, /, %, <<, >>, &, |, ˆ, &&, ||, <, >, <=, >=. DEFINE_OP_CONST(range, +); @@ -116,6 +107,6 @@ struct range { detail::container::ArrayND data; }; -} +} // namespace neosycl::sycl -#endif //CUSTOM_SYCL_INCLUDE_SYCL_RANGE_H_ +#endif // CUSTOM_SYCL_INCLUDE_SYCL_RANGE_H_ diff --git a/include/neoSYCL/sycl/types.hpp b/include/neoSYCL/sycl/types.hpp index 25ac2a3..ccdd3ba 100644 --- a/include/neoSYCL/sycl/types.hpp +++ b/include/neoSYCL/sycl/types.hpp @@ -11,33 +11,28 @@ namespace neosycl::sycl { -template > +template > using vector_class = std::vector; using string_class = std::string; -template +template using function_class = std::function; using mutex_class = std::mutex; -template -using shared_ptr_class = std::shared_ptr; +template using shared_ptr_class = std::shared_ptr; -template -using unique_ptr_class = std::unique_ptr; +template using unique_ptr_class = std::unique_ptr; -template -using weak_ptr_class = std::weak_ptr; +template using weak_ptr_class = std::weak_ptr; -template -using hash_class = std::hash; +template using hash_class = std::hash; using exception_ptr_class = std::exception_ptr; -template -using buffer_allocator = std::allocator; +template using buffer_allocator = std::allocator; -} +} // namespace neosycl::sycl -#endif //CUSTOM_SYCL_INCLUDE_SYCL_TYPES_HPP_ +#endif // CUSTOM_SYCL_INCLUDE_SYCL_TYPES_HPP_ From 24cb545620bc693b7ff5be0909cfed017c3f4eb0 Mon Sep 17 00:00:00 2001 From: Hiroyuki Takizawa Date: Fri, 31 Dec 2021 02:31:11 +0900 Subject: [PATCH 02/90] device_selector support --- CMakeLists.txt | 1 + examples/sequential_vector.cpp | 2 +- .../extensions/nec/ve_context_info.hpp | 59 +++++++++++++++++++ include/neoSYCL/extensions/nec/ve_device.hpp | 5 +- .../neoSYCL/extensions/nec/ve_device_info.hpp | 22 +++++++ include/neoSYCL/extensions/nec/ve_info.hpp | 11 +++- include/neoSYCL/extensions/nec/ve_kernel.hpp | 5 +- include/neoSYCL/extensions/nec/ve_queue.hpp | 5 +- .../neoSYCL/extensions/nec/ve_selector.hpp | 17 ++++-- include/neoSYCL/extensions/nec/ve_task.hpp | 5 +- .../extensions/nec/ve_task_handler.hpp | 28 ++------- include/neoSYCL/sycl.hpp | 28 +++++---- include/neoSYCL/sycl/accessor.hpp | 3 - include/neoSYCL/sycl/buffer.hpp | 8 --- include/neoSYCL/sycl/context.hpp | 17 ++++-- include/neoSYCL/sycl/detail/context_info.hpp | 21 +++++++ include/neoSYCL/sycl/detail/device_info.hpp | 5 ++ include/neoSYCL/sycl/detail/device_type.hpp | 2 +- .../sycl/detail/registered_platforms.hpp | 17 ------ include/neoSYCL/sycl/detail/task_handler.hpp | 8 ++- include/neoSYCL/sycl/device.hpp | 8 +-- include/neoSYCL/sycl/device_selector.hpp | 4 +- .../sycl/device_selector/cpu_selector.hpp | 8 ++- include/neoSYCL/sycl/handler.hpp | 57 ++++++++++-------- include/neoSYCL/sycl/id.hpp | 4 -- include/neoSYCL/sycl/platform.hpp | 2 +- include/neoSYCL/sycl/property_list.hpp | 2 - include/neoSYCL/sycl/queue.hpp | 27 ++++----- include/neoSYCL/sycl/range.hpp | 1 - 29 files changed, 242 insertions(+), 140 deletions(-) create mode 100644 include/neoSYCL/extensions/nec/ve_context_info.hpp create mode 100644 include/neoSYCL/extensions/nec/ve_device_info.hpp create mode 100644 include/neoSYCL/sycl/detail/context_info.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 278946a..38bad83 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,6 +33,7 @@ if (CMAKE_BUILD_TYPE STREQUAL Debug) message(STATUS "Enable debug mode") ADD_DEFINITIONS(-DDEBUG) include_directories(third_party/fmt/include) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g") endif () include_directories(include) diff --git a/examples/sequential_vector.cpp b/examples/sequential_vector.cpp index 6013726..6e202d0 100644 --- a/examples/sequential_vector.cpp +++ b/examples/sequential_vector.cpp @@ -17,7 +17,7 @@ int main() { // all SYCL tasks must complete before exiting the block // Create a queue to work on - queue myQueue; + queue myQueue(ve_selector{}); // Create buffers from a & b vectors with 2 different syntax buffer A(a, range<1>(N)); diff --git a/include/neoSYCL/extensions/nec/ve_context_info.hpp b/include/neoSYCL/extensions/nec/ve_context_info.hpp new file mode 100644 index 0000000..bd5dc2c --- /dev/null +++ b/include/neoSYCL/extensions/nec/ve_context_info.hpp @@ -0,0 +1,59 @@ +#ifndef NEOSYCL_INCLUDE_NEOSYCL_EXTENSIONS_NEC_VE_CONTEXT_INFO_HPP +#define NEOSYCL_INCLUDE_NEOSYCL_EXTENSIONS_NEC_VE_CONTEXT_INFO_HPP + +#include "neoSYCL/extensions/nec/ve_info.hpp" + +namespace neosycl::sycl::extensions::nec { + +class ve_context_info : public detail::context_info { + VEProc proc; + VEContext ctx; + + VEContext create_ctx(VEProc proc) { + struct veo_thr_ctxt *ctx = veo_context_open(proc.ve_proc); + DEBUG_INFO("[VEContext] create ve context: {:#x}", (size_t)ctx); + return VEContext{ctx}; + } + + void free_ctx(VEContext ctx) { + DEBUG_INFO("[VEContext] release ve ctx: {:#x}", (size_t)ctx.ve_ctx); + int rt = veo_context_close(ctx.ve_ctx); + if (rt != veo_command_state::VEO_COMMAND_OK) { + DEBUG_INFO("[VEContext] release ve ctx: {:#x} failed, return code: {}", + (size_t)ctx.ve_ctx, rt); + PRINT_ERR("[VEContext] release ve ctx failed"); + } + } + + VEProc create_proc(const string_class &lib_path = DEFAULT_VE_LIB, + int ve_node = DEFAULT_VE_NODE) { + struct veo_proc_handle *ve_proc = veo_proc_create(ve_node); + if (!ve_proc) { + DEBUG_INFO("[VEProc] create ve proc on node: %d failed..", ve_node); + throw ve_exception("[VEProc] create ve proc failed."); + } + uint64_t handle = veo_load_library(ve_proc, lib_path.c_str()); + DEBUG_INFO("[VEProc] create ve proc: {:#x} and load lib: {} on node: {}", + (size_t)ve_proc, lib_path, ve_node); + return nec::VEProc{ve_proc, handle}; + } + + void free_proc(VEProc proc) { + DEBUG_INFO("[VEProc] release ve proc: {:#x}", (size_t)proc.ve_proc); + int rt = veo_proc_destroy(proc.ve_proc); + if (rt != veo_command_state::VEO_COMMAND_OK) { + DEBUG_INFO("[VEProc] release ve proc: {:#x} failed, return code: {}", + (size_t)proc.ve_proc, rt); + PRINT_ERR("[VEProc] release ve proc failed"); + } + } + +public: + ve_context_info() + : detail::context_info(), proc(create_proc()), ctx(create_ctx(proc)) { + task_handler = handler_type(new task_handler_ve(proc, ctx)); + } + ~ve_context_info() { free_proc(proc); } +}; +} // namespace neosycl::sycl::extensions::nec +#endif diff --git a/include/neoSYCL/extensions/nec/ve_device.hpp b/include/neoSYCL/extensions/nec/ve_device.hpp index 9fe1a76..547179c 100644 --- a/include/neoSYCL/extensions/nec/ve_device.hpp +++ b/include/neoSYCL/extensions/nec/ve_device.hpp @@ -1,6 +1,7 @@ #ifndef SYCL_INCLUDE_CL_SYCL_NEC_VE_DEVICE_HPP_ #define SYCL_INCLUDE_CL_SYCL_NEC_VE_DEVICE_HPP_ - +// obsolete +#if 0 #include "ve_offload.h" #include "neoSYCL/sycl/device.hpp" @@ -21,5 +22,5 @@ class ve_device : public device { }; } // namespace neosycl::sycl - +#endif #endif // SYCL_INCLUDE_CL_SYCL_NEC_VE_DEVICE_HPP_ diff --git a/include/neoSYCL/extensions/nec/ve_device_info.hpp b/include/neoSYCL/extensions/nec/ve_device_info.hpp new file mode 100644 index 0000000..cda732a --- /dev/null +++ b/include/neoSYCL/extensions/nec/ve_device_info.hpp @@ -0,0 +1,22 @@ +#ifndef SYCL_INCLUDE_CL_SYCL_NEC_VE_DEVICE_INFO_HPP_ +#define SYCL_INCLUDE_CL_SYCL_NEC_VE_DEVICE_INFO_HPP_ + +namespace neosycl::sycl { + +struct ve_device_info : public detail::device_info { + bool is_host() override { return false; } + + bool is_cpu() override { return true; } + bool is_gpu() override { return false; } + bool is_accelerator() override { return true; } + + detail::SUPPORT_PLATFORM_TYPE type() override { + return detail::SUPPORT_PLATFORM_TYPE::VE; + } + + detail::context_info *create_context_info() const override; +}; + +} // namespace neosycl::sycl + +#endif // SYCL_INCLUDE_CL_SYCL_NEC_VE_DEVICE_INFO_HPP_ diff --git a/include/neoSYCL/extensions/nec/ve_info.hpp b/include/neoSYCL/extensions/nec/ve_info.hpp index e5ddd55..a5de753 100644 --- a/include/neoSYCL/extensions/nec/ve_info.hpp +++ b/include/neoSYCL/extensions/nec/ve_info.hpp @@ -2,8 +2,9 @@ #define NEOSYCL_INCLUDE_CL_SYCL_NEC_VE_INFO_HPP_ #include "ve_offload.h" +#include -namespace neosycl::sycl::extensions { +namespace neosycl::sycl::extensions::nec { const int DEFAULT_VE_NODE = 0; const string_class DEFAULT_VE_LIB = "./kernel.so"; @@ -16,6 +17,12 @@ struct VEProc { struct VEContext { struct veo_thr_ctxt *ve_ctx; }; -} // namespace neosycl::sycl::extensions + +class ve_exception : public sycl::exception { +public: + ve_exception(const string_class &msg) : sycl::exception(msg) {} +}; + +} // namespace neosycl::sycl::extensions::nec #endif // NEOSYCL_INCLUDE_CL_SYCL_NEC_VE_INFO_HPP_ diff --git a/include/neoSYCL/extensions/nec/ve_kernel.hpp b/include/neoSYCL/extensions/nec/ve_kernel.hpp index 08f2afa..3cf4add 100644 --- a/include/neoSYCL/extensions/nec/ve_kernel.hpp +++ b/include/neoSYCL/extensions/nec/ve_kernel.hpp @@ -1,6 +1,7 @@ #ifndef SYCL_INCLUDE_CL_SYCL_NEC_VE_KERNEL_HPP_ #define SYCL_INCLUDE_CL_SYCL_NEC_VE_KERNEL_HPP_ - +// obsolete +#if 0 #include "ve_info.hpp" namespace neosycl::sycl::detail { @@ -179,5 +180,5 @@ struct VEKernel : public Kernel { }; } // namespace neosycl::sycl::detail - +#endif #endif // SYCL_INCLUDE_CL_SYCL_NEC_VE_KERNEL_HPP_ diff --git a/include/neoSYCL/extensions/nec/ve_queue.hpp b/include/neoSYCL/extensions/nec/ve_queue.hpp index 0de9e6e..ba3c1ae 100644 --- a/include/neoSYCL/extensions/nec/ve_queue.hpp +++ b/include/neoSYCL/extensions/nec/ve_queue.hpp @@ -1,6 +1,7 @@ #ifndef SYCL_INCLUDE_CL_SYCL_NEC_VE_QUEUE_HPP_ #define SYCL_INCLUDE_CL_SYCL_NEC_VE_QUEUE_HPP_ - +// obsolete +#if 0 #include "ve_offload.h" #include "CL/SYCL/nec/ve_task.hpp" @@ -53,5 +54,5 @@ class ve_queue : public queue { }; } // namespace neosycl::sycl - +#endif #endif // SYCL_INCLUDE_CL_SYCL_NEC_VE_QUEUE_HPP_ diff --git a/include/neoSYCL/extensions/nec/ve_selector.hpp b/include/neoSYCL/extensions/nec/ve_selector.hpp index 15b8bea..fbd0077 100644 --- a/include/neoSYCL/extensions/nec/ve_selector.hpp +++ b/include/neoSYCL/extensions/nec/ve_selector.hpp @@ -1,6 +1,10 @@ #ifndef SYCL_INCLUDE_CL_SYCL_NEC_VE_SELECTOR_HPP_ #define SYCL_INCLUDE_CL_SYCL_NEC_VE_SELECTOR_HPP_ +#include "neoSYCL/extensions/nec/ve_device_info.hpp" +#include "neoSYCL/sycl/detail/context_info.hpp" +#include "neoSYCL/extensions/nec/ve_context_info.hpp" + namespace neosycl::sycl { class ve_selector : public device_selector { @@ -8,14 +12,19 @@ class ve_selector : public device_selector { public: int operator()(const device &dev) const override { if (dev.is_accelerator()) { - return 1; + return true; } - return -1; + return false; + } + device select_device() const override { + return device(shared_ptr_class(new ve_device_info())); } - - device select_device() const override { return ve_device(); } }; +detail::context_info *ve_device_info::create_context_info() const { + return new extensions::nec::ve_context_info(); +} + } // namespace neosycl::sycl #endif // SYCL_INCLUDE_CL_SYCL_NEC_VE_SELECTOR_HPP_ diff --git a/include/neoSYCL/extensions/nec/ve_task.hpp b/include/neoSYCL/extensions/nec/ve_task.hpp index c3698ae..02b6536 100644 --- a/include/neoSYCL/extensions/nec/ve_task.hpp +++ b/include/neoSYCL/extensions/nec/ve_task.hpp @@ -1,6 +1,7 @@ #ifndef SYCL_INCLUDE_CL_SYCL_NEC_VE_TASK_HPP_ #define SYCL_INCLUDE_CL_SYCL_NEC_VE_TASK_HPP_ - +// obsolete +#if 0 #include "ve_kernel.hpp" namespace neosycl::sycl::detail { @@ -18,5 +19,5 @@ struct VETask : public Task { }; } // namespace neosycl::sycl::detail - +#endif #endif // SYCL_INCLUDE_CL_SYCL_NEC_VE_TASK_HPP_ diff --git a/include/neoSYCL/extensions/nec/ve_task_handler.hpp b/include/neoSYCL/extensions/nec/ve_task_handler.hpp index b1706ba..e4b9d25 100644 --- a/include/neoSYCL/extensions/nec/ve_task_handler.hpp +++ b/include/neoSYCL/extensions/nec/ve_task_handler.hpp @@ -5,28 +5,12 @@ #include "neoSYCL/sycl/detail/kernel_arg.hpp" #include "ve_offload.h" -namespace neosycl::sycl::extensions { +namespace neosycl::sycl::extensions::nec { -struct task_handler_ve : public detail::task_handler { +class task_handler_ve : public detail::task_handler { public: - task_handler_ve(const VEProc &proc) : proc(proc) { ctx = ctx_create(proc); } - - VEContext ctx_create(VEProc proc) { - struct veo_thr_ctxt *ctx = veo_context_open(proc.ve_proc); - DEBUG_INFO("[VEContext] create ve context: {:#x}", (size_t)ctx); - return VEContext{ctx}; - } - - void free_ctx(VEContext ctx) { - DEBUG_INFO("[VEContext] release ve ctx: {:#x}", (size_t)ctx.ve_ctx); - int rt = veo_context_close(ctx.ve_ctx); - if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEContext] release ve ctx: {:#x} failed, return code: {}", - (size_t)ctx.ve_ctx, rt); - PRINT_ERR("[VEContext] release ve ctx failed"); - } - } + task_handler_ve(const VEProc &p, const VEContext &c) : proc(p), ctx(c) {} struct veo_args *create_ve_args() { struct veo_args *argp = veo_args_alloc(); @@ -35,7 +19,7 @@ struct task_handler_ve : public detail::task_handler { } return argp; } - + vector_class copy_in(struct veo_args *argp, shared_ptr_class k, VEProc proc) { @@ -165,12 +149,12 @@ struct task_handler_ve : public detail::task_handler { throw exception("not implemented"); }; - detail::SUPPORT_PLATFORM_TYPE type() override { return detail::SX_AURORA; } + detail::SUPPORT_PLATFORM_TYPE type() override { return detail::VE; } private: VEContext ctx; VEProc proc; }; -} // namespace neosycl::sycl::extensions +} // namespace neosycl::sycl::extensions::nec #endif // NEOSYCL_INCLUDE_NEOSYCL_EXTENSIONS_NEC_VE_TASK_HANDLER_HPP diff --git a/include/neoSYCL/sycl.hpp b/include/neoSYCL/sycl.hpp index 7c8c444..e652e88 100644 --- a/include/neoSYCL/sycl.hpp +++ b/include/neoSYCL/sycl.hpp @@ -3,29 +3,35 @@ // debug headers #include "sycl/detail/debug.hpp" -#include "sycl/types.hpp" +#include "sycl/types.hpp" #include "sycl/exception.hpp" - -// include device headers +#include "sycl/op_def.hpp" +#include "sycl/range.hpp" +#include "sycl/item.hpp" #include "sycl/id.hpp" -#include "sycl/device.hpp" -// include device selector headers #include "sycl/device_selector.hpp" +#include "sycl/platform.hpp" +#include "sycl/device.hpp" + #include "sycl/device_selector/cpu_selector.hpp" +#ifdef BUILD_VE +#include "extensions/nec/ve_task_handler.hpp" +#include "extensions/nec/ve_selector.hpp" +#endif -// include platform headers -#include "sycl/platform.hpp" +#include "sycl/property_list.hpp" +#include "sycl/context.hpp" +#include "sycl/handler.hpp" +#include "sycl/queue.hpp" // include buffer headers #include "sycl/accessor.hpp" -#include "sycl/accessor.hpp" +#include "sycl/access.hpp" +#include "sycl/allocator.hpp" #include "sycl/buffer.hpp" -#include "sycl/queue.hpp" -#include "sycl/handler.hpp" - namespace neosycl { using namespace neosycl::sycl; diff --git a/include/neoSYCL/sycl/accessor.hpp b/include/neoSYCL/sycl/accessor.hpp index b8d004b..25dec57 100644 --- a/include/neoSYCL/sycl/accessor.hpp +++ b/include/neoSYCL/sycl/accessor.hpp @@ -1,9 +1,6 @@ #ifndef NEOSYCL_INCLUDE_NEOSYCL_SYCL_ACCESSOR_HPP #define NEOSYCL_INCLUDE_NEOSYCL_SYCL_ACCESSOR_HPP -#include "neoSYCL/sycl/id.hpp" -#include "neoSYCL/sycl/property_list.hpp" -#include "neoSYCL/sycl/handler.hpp" #include "neoSYCL/sycl/detail/container/data_container.hpp" #include "neoSYCL/sycl/detail/container/data_container_nd.hpp" diff --git a/include/neoSYCL/sycl/buffer.hpp b/include/neoSYCL/sycl/buffer.hpp index aa56e22..b2bbcc7 100644 --- a/include/neoSYCL/sycl/buffer.hpp +++ b/include/neoSYCL/sycl/buffer.hpp @@ -1,14 +1,6 @@ #ifndef CUSTOM_SYCL_INCLUDE_SYCL_BUFFER_HPP_ #define CUSTOM_SYCL_INCLUDE_SYCL_BUFFER_HPP_ -#include "neoSYCL/sycl/types.hpp" -#include "neoSYCL/sycl/range.hpp" -#include "neoSYCL/sycl/access.hpp" -#include "neoSYCL/sycl/accessor.hpp" -#include "neoSYCL/sycl/allocator.hpp" -#include "neoSYCL/sycl/handler.hpp" -#include "neoSYCL/sycl/context.hpp" -#include "neoSYCL/sycl/property_list.hpp" #include "neoSYCL/sycl/detail/container/data_container.hpp" #include "neoSYCL/sycl/detail/container/data_container_nd.hpp" diff --git a/include/neoSYCL/sycl/context.hpp b/include/neoSYCL/sycl/context.hpp index 5a39cf0..1f04d45 100644 --- a/include/neoSYCL/sycl/context.hpp +++ b/include/neoSYCL/sycl/context.hpp @@ -1,20 +1,19 @@ #ifndef CUSTOM_SYCL_INCLUDE_SYCL_CONTEXT_HPP_ #define CUSTOM_SYCL_INCLUDE_SYCL_CONTEXT_HPP_ -#include "neoSYCL/sycl/exception.hpp" #include "neoSYCL/sycl/info/context.hpp" -#include "neoSYCL/sycl/property_list.hpp" +#include "neoSYCL/sycl/detail/context_info.hpp" namespace neosycl::sycl { class context { public: - explicit context(const property_list &propList = {}); + explicit context(const property_list &propList = {}) { init(device()); } context(async_handler asyncHandler, const property_list &propList = {}); - context(const device &dev, const property_list &propList = {}); + context(const device &dev, const property_list &propList = {}) { init(dev); } context(const device &dev, async_handler asyncHandler, const property_list &propList = {}); @@ -35,6 +34,16 @@ class context { template typename info::param_traits::return_type get_info() const; + + shared_ptr_class get_context_info() { return ctx_info; } + +private: + void init(const device &dev) { + ctx_info = shared_ptr_class( + dev.device_info->create_context_info()); + } + + shared_ptr_class ctx_info; }; } // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/detail/context_info.hpp b/include/neoSYCL/sycl/detail/context_info.hpp new file mode 100644 index 0000000..c6df169 --- /dev/null +++ b/include/neoSYCL/sycl/detail/context_info.hpp @@ -0,0 +1,21 @@ +#pragma once + +namespace neosycl::sycl::detail { +class context_info { +protected: + using handler_type = shared_ptr_class; + context_info() {} + +public: + ~context_info() = default; + handler_type task_handler; +}; + +class cpu_context_info : public context_info { +public: + cpu_context_info() : context_info() { + task_handler = handler_type(new task_handler_cpu()); + } + ~cpu_context_info() = default; +}; +} // namespace neosycl::sycl::detail \ No newline at end of file diff --git a/include/neoSYCL/sycl/detail/device_info.hpp b/include/neoSYCL/sycl/detail/device_info.hpp index 3355f5c..bf84dc3 100644 --- a/include/neoSYCL/sycl/detail/device_info.hpp +++ b/include/neoSYCL/sycl/detail/device_info.hpp @@ -4,6 +4,7 @@ #include "neoSYCL/sycl/detail/device_type.hpp" namespace neosycl::sycl::detail { +class context_info; struct device_info { virtual bool is_host() = 0; @@ -15,6 +16,8 @@ struct device_info { virtual bool is_accelerator() = 0; virtual SUPPORT_PLATFORM_TYPE type() = 0; + + virtual context_info *create_context_info() const = 0; }; struct cpu_device_info : public device_info { @@ -25,6 +28,8 @@ struct cpu_device_info : public device_info { bool is_accelerator() override { return false; } SUPPORT_PLATFORM_TYPE type() override { return SUPPORT_PLATFORM_TYPE::CPU; } + + context_info *create_context_info() const override; }; using default_device_info = cpu_device_info; diff --git a/include/neoSYCL/sycl/detail/device_type.hpp b/include/neoSYCL/sycl/detail/device_type.hpp index bea606e..5ad2115 100644 --- a/include/neoSYCL/sycl/detail/device_type.hpp +++ b/include/neoSYCL/sycl/detail/device_type.hpp @@ -3,7 +3,7 @@ namespace neosycl::sycl::detail { -enum SUPPORT_PLATFORM_TYPE : int { CPU, SX_AURORA }; +enum SUPPORT_PLATFORM_TYPE : int { CPU, VE }; } diff --git a/include/neoSYCL/sycl/detail/registered_platforms.hpp b/include/neoSYCL/sycl/detail/registered_platforms.hpp index 0ee8bd8..8888da0 100644 --- a/include/neoSYCL/sycl/detail/registered_platforms.hpp +++ b/include/neoSYCL/sycl/detail/registered_platforms.hpp @@ -2,29 +2,12 @@ #define NEOSYCL_INCLUDE_NEOSYCL_SYCL_DETAIL_REGISTERED_PLATFORMS_H #include "neoSYCL/sycl/detail/platform_info.hpp" -#include "neoSYCL/sycl/detail/task_handler.hpp" -#include - -#ifdef BUILD_VE -#include "neoSYCL/extensions/nec/ve_task_handler.hpp" -#endif namespace neosycl::sycl::detail { static shared_ptr_class REGISTERED_PLATFORMS[] = { shared_ptr_class(new default_platform_info())}; -static std::map> - PLATFORM_HANDLER_MAP = { - {SUPPORT_PLATFORM_TYPE::CPU, - shared_ptr_class(new task_handler_cpu())}, - -#ifdef DBUILD_VE - {SUPPORT_PLATFORM_TYPE::SX_AURORA, - shared_ptr_class(new task_handler_cpu())} -#endif -}; - } // namespace neosycl::sycl::detail #endif // NEOSYCL_INCLUDE_NEOSYCL_SYCL_DETAIL_REGISTERED_PLATFORMS_H diff --git a/include/neoSYCL/sycl/detail/task_handler.hpp b/include/neoSYCL/sycl/detail/task_handler.hpp index 04ae7ea..d5e9f08 100644 --- a/include/neoSYCL/sycl/detail/task_handler.hpp +++ b/include/neoSYCL/sycl/detail/task_handler.hpp @@ -5,7 +5,9 @@ namespace neosycl::sycl::detail { -struct task_handler { +class task_handler { +public: + explicit task_handler() {} virtual void single_task(shared_ptr_class k, const std::function &func) = 0; @@ -25,7 +27,9 @@ struct task_handler { virtual SUPPORT_PLATFORM_TYPE type() = 0; }; -struct task_handler_cpu : public task_handler { +class task_handler_cpu : public task_handler { +public: + task_handler_cpu() {} void single_task(shared_ptr_class k, const std::function &func) override { diff --git a/include/neoSYCL/sycl/device.hpp b/include/neoSYCL/sycl/device.hpp index 99c5175..3084561 100644 --- a/include/neoSYCL/sycl/device.hpp +++ b/include/neoSYCL/sycl/device.hpp @@ -1,9 +1,6 @@ #ifndef CUSTOM_SYCL_INCLUDE_SYCL_DEVICE_HPP_ #define CUSTOM_SYCL_INCLUDE_SYCL_DEVICE_HPP_ -#include "neoSYCL/sycl/exception.hpp" -#include "neoSYCL/sycl/types.hpp" -#include "neoSYCL/sycl/platform.hpp" #include "neoSYCL/sycl/info/device_type.hpp" #include "neoSYCL/sycl/info/device.hpp" #include "neoSYCL/sycl/info/param_traits.hpp" @@ -13,6 +10,7 @@ namespace neosycl::sycl { class device { friend class handler; + friend class context; public: device() : device_info(new detail::default_device_info()){}; @@ -68,12 +66,12 @@ class device { return ret; } + detail::SUPPORT_PLATFORM_TYPE type() const { return device_info->type(); } + private: shared_ptr_class device_info; }; -device device_selector::select_device() const { return device(); } - vector_class platform::get_devices(info::device_type) const { vector_class ret; for (shared_ptr_class info : diff --git a/include/neoSYCL/sycl/device_selector.hpp b/include/neoSYCL/sycl/device_selector.hpp index 2f51fe5..9e2beda 100644 --- a/include/neoSYCL/sycl/device_selector.hpp +++ b/include/neoSYCL/sycl/device_selector.hpp @@ -1,7 +1,6 @@ #ifndef CUSTOM_SYCL_INCLUDE_SYCL_DEVICE_SELECTOR_HPP_ #define CUSTOM_SYCL_INCLUDE_SYCL_DEVICE_SELECTOR_HPP_ -#include "neoSYCL/sycl/device.hpp" #include "neoSYCL/sycl/detail/platform_info.hpp" namespace neosycl::sycl { @@ -18,8 +17,7 @@ class device_selector { virtual ~device_selector() = default; - // defined in device.hpp - device select_device() const; + virtual device select_device() const = 0; virtual int operator()(const device &device) const = 0; diff --git a/include/neoSYCL/sycl/device_selector/cpu_selector.hpp b/include/neoSYCL/sycl/device_selector/cpu_selector.hpp index abb1a4e..c8348e5 100644 --- a/include/neoSYCL/sycl/device_selector/cpu_selector.hpp +++ b/include/neoSYCL/sycl/device_selector/cpu_selector.hpp @@ -1,6 +1,8 @@ #ifndef SYCL_INCLUDE_CL_SYCL_DEVICE_SELECTOR_CPU_SELECTOR_HPP_ #define SYCL_INCLUDE_CL_SYCL_DEVICE_SELECTOR_CPU_SELECTOR_HPP_ +#include "neoSYCL/sycl/detail/context_info.hpp" + namespace neosycl::sycl { class cpu_selector : public device_selector { @@ -13,7 +15,7 @@ class cpu_selector : public device_selector { return false; } - device select_device() const { + device select_device() const override { return device( shared_ptr_class(new detail::cpu_device_info())); } @@ -22,6 +24,10 @@ class cpu_selector : public device_selector { using default_selector = cpu_selector; using host_selector = cpu_selector; +detail::context_info *detail::cpu_device_info::create_context_info() const { + return new detail::cpu_context_info(); +} + } // namespace neosycl::sycl #endif // SYCL_INCLUDE_CL_SYCL_DEVICE_SELECTOR_CPU_SELECTOR_HPP_ diff --git a/include/neoSYCL/sycl/handler.hpp b/include/neoSYCL/sycl/handler.hpp index d8dfd9b..10d81f0 100644 --- a/include/neoSYCL/sycl/handler.hpp +++ b/include/neoSYCL/sycl/handler.hpp @@ -15,6 +15,7 @@ #include "neoSYCL/sycl/detail/task_handler.hpp" #include "neoSYCL/sycl/detail/registered_platforms.hpp" #include "neoSYCL/sycl/detail/task_counter.hpp" +#include namespace neosycl::sycl { @@ -28,49 +29,57 @@ namespace detail { */ template string_class get_kernel_name_from_class() { KernelName *p; +#if 0 + int status; + char *pc = abi::__cxa_demangle(typeid(p).name(), 0, 0, &status); + string_class in(pc); + free(pc); +#else string_class in = typeid(p).name(); +#endif return in; } } // namespace detail class handler { + using handler_type = shared_ptr_class; + using counter_type = shared_ptr_class; + using kernel_type = shared_ptr_class; + public: - explicit handler(device dev, shared_ptr_class counter) + explicit handler(device dev, counter_type counter, context c) : bind_device(std::move(dev)), counter(std::move(counter)), - kernel(new detail::kernel()) {} + kernel(new detail::kernel()), ctx(c) {} template void single_task(KernelType kernelFunc) { kernel->name = detail::get_kernel_name_from_class(); - shared_ptr_class handler = - detail::PLATFORM_HANDLER_MAP[bind_device.device_info->type()]; - submit_task( - [f = kernelFunc, h = handler, k = kernel]() { h->single_task(k, f); }); + handler_type task_handler = ctx.get_context_info()->task_handler; + submit_task([f = kernelFunc, h = task_handler, k = kernel]() { + h->single_task(k, f); + }); } template - void submit_parallel_for(shared_ptr_class handler, - range<3> numWorkItems, id<3> offset, - KernelType kernelFunc) { + void submit_parallel_for(handler_type handler, range<3> numWorkItems, + id<3> offset, KernelType kernelFunc) { submit_task([f = kernelFunc, n = numWorkItems, o = offset, h = std::move(handler), k = kernel]() { h->parallel_for_3d(k, n, f, o); }); } template - void submit_parallel_for(shared_ptr_class handler, - range<2> numWorkItems, id<2> offset, - KernelType kernelFunc) { + void submit_parallel_for(handler_type handler, range<2> numWorkItems, + id<2> offset, KernelType kernelFunc) { submit_task([f = kernelFunc, n = numWorkItems, o = offset, h = std::move(handler), k = kernel]() { h->parallel_for_2d(k, n, f, o); }); } template - void submit_parallel_for(shared_ptr_class handler, - range<1> numWorkItems, id<1> offset, - KernelType kernelFunc) { + void submit_parallel_for(handler_type handler, range<1> numWorkItems, + id<1> offset, KernelType kernelFunc) { submit_task([f = kernelFunc, n = numWorkItems, o = offset, h = std::move(handler), k = kernel]() { h->parallel_for_1d(k, n, f, o); }); @@ -79,18 +88,17 @@ class handler { template void parallel_for(range numWorkItems, KernelType kernelFunc) { kernel->name = detail::get_kernel_name_from_class(); - shared_ptr_class handler = - detail::PLATFORM_HANDLER_MAP[bind_device.device_info->type()]; - submit_parallel_for(handler, numWorkItems, id(), kernelFunc); + handler_type task_handler = ctx.get_context_info()->task_handler; + submit_parallel_for(task_handler, numWorkItems, id(), + kernelFunc); } template void parallel_for(range numWorkItems, id workItemOffset, KernelType kernelFunc) { kernel->name = detail::get_kernel_name_from_class(); - shared_ptr_class handler = - detail::PLATFORM_HANDLER_MAP[bind_device.device_info->type()]; - submit_parallel_for(handler, numWorkItems, workItemOffset, kernelFunc); + handler_type task_handler = ctx.get_context_info()->task_handler; + submit_parallel_for(task_handler, numWorkItems, workItemOffset, kernelFunc); } // template @@ -115,12 +123,13 @@ class handler { kernel->args.push_back(args...); } - shared_ptr_class get_kernel() { return kernel; } + kernel_type get_kernel() { return kernel; } private: - shared_ptr_class kernel; + kernel_type kernel; device bind_device; - shared_ptr_class counter; + counter_type counter; + context ctx; template void submit_task(Func func) { counter->incr(); diff --git a/include/neoSYCL/sycl/id.hpp b/include/neoSYCL/sycl/id.hpp index b8b66d5..7886806 100644 --- a/include/neoSYCL/sycl/id.hpp +++ b/include/neoSYCL/sycl/id.hpp @@ -1,10 +1,6 @@ #ifndef SYCL_INCLUDE_CL_SYCL_ID_HPP_ #define SYCL_INCLUDE_CL_SYCL_ID_HPP_ -#include "neoSYCL/sycl/types.hpp" -#include "neoSYCL/sycl/range.hpp" -#include "neoSYCL/sycl/item.hpp" -#include "neoSYCL/sycl/op_def.hpp" #include "neoSYCL/sycl/detail/container/array_nd.hpp" namespace neosycl::sycl { diff --git a/include/neoSYCL/sycl/platform.hpp b/include/neoSYCL/sycl/platform.hpp index 5fe765d..96e8619 100644 --- a/include/neoSYCL/sycl/platform.hpp +++ b/include/neoSYCL/sycl/platform.hpp @@ -1,11 +1,11 @@ #ifndef CUSTOM_SYCL_INCLUDE_SYCL_PLATFORM_HPP_ #define CUSTOM_SYCL_INCLUDE_SYCL_PLATFORM_HPP_ -#include "neoSYCL/sycl/device_selector.hpp" #include "neoSYCL/sycl/info/device_type.hpp" #include "neoSYCL/sycl/info/param_traits.hpp" #include "neoSYCL/sycl/detail/platform_info.hpp" #include "neoSYCL/sycl/detail/device_type.hpp" +#include "neoSYCL/sycl/detail/task_handler.hpp" #include "neoSYCL/sycl/detail/registered_platforms.hpp" namespace neosycl::sycl { diff --git a/include/neoSYCL/sycl/property_list.hpp b/include/neoSYCL/sycl/property_list.hpp index bcc22fc..2509846 100644 --- a/include/neoSYCL/sycl/property_list.hpp +++ b/include/neoSYCL/sycl/property_list.hpp @@ -1,8 +1,6 @@ #ifndef SYCL_INCLUDE_CL_SYCL_PROPERTY_LIST_HPP_ #define SYCL_INCLUDE_CL_SYCL_PROPERTY_LIST_HPP_ -#include "neoSYCL/sycl/exception.hpp" - namespace neosycl::sycl { class property_list { diff --git a/include/neoSYCL/sycl/queue.hpp b/include/neoSYCL/sycl/queue.hpp index 2033436..581a5e1 100644 --- a/include/neoSYCL/sycl/queue.hpp +++ b/include/neoSYCL/sycl/queue.hpp @@ -3,46 +3,40 @@ #include -#include "neoSYCL/sycl/exception.hpp" -#include "neoSYCL/sycl/types.hpp" -#include "neoSYCL/sycl/device_selector.hpp" #include "neoSYCL/sycl/info/queue.hpp" -#include "neoSYCL/sycl/property_list.hpp" -#include "neoSYCL/sycl/handler.hpp" -#include "neoSYCL/sycl/platform.hpp" -#include "neoSYCL/sycl/context.hpp" #include "neoSYCL/sycl/detail/task_counter.hpp" - namespace neosycl::sycl { class queue { public: explicit queue(const property_list &propList = {}) - : bind_device(), counter(new detail::task_counter()) {} + : bind_device(), counter(new detail::task_counter()), ctx(bind_device) {} explicit queue(const async_handler &asyncHandler, const property_list &propList = {}) : bind_device(), counter(new detail::task_counter()), - err_handler(asyncHandler) {} + err_handler(asyncHandler), ctx(bind_device) {} explicit queue(const device_selector &deviceSelector, const property_list &propList = {}) : bind_device(deviceSelector.select_device()), - counter(new detail::task_counter()) {} + counter(new detail::task_counter()), ctx(bind_device) {} explicit queue(const device_selector &deviceSelector, const async_handler &asyncHandler, const property_list &propList = {}) : bind_device(deviceSelector.select_device()), - counter(new detail::task_counter()), err_handler(asyncHandler) {} + counter(new detail::task_counter()), err_handler(asyncHandler), + ctx(bind_device) {} explicit queue(const device &syclDevice, const property_list &propList = {}) - : bind_device(syclDevice), counter(new detail::task_counter()) {} + : bind_device(syclDevice), counter(new detail::task_counter()), + ctx(bind_device) {} explicit queue(const device &syclDevice, const async_handler &asyncHandler, const property_list &propList = {}) : bind_device(syclDevice), counter(new detail::task_counter()), - err_handler(asyncHandler) {} + err_handler(asyncHandler), ctx(bind_device) {} explicit queue(const context &syclContext, const device_selector &deviceSelector, @@ -65,7 +59,7 @@ class queue { // cl_command_queue get() const; - context get_context() const; + context get_context() const { return ctx; } device get_device() const { return bind_device; } @@ -76,7 +70,7 @@ class queue { template event submit(T cgf) { try { - handler command_group_handler(bind_device, counter); + handler command_group_handler(bind_device, counter, ctx); cgf(command_group_handler); } catch (...) { throw; @@ -98,6 +92,7 @@ class queue { device bind_device; async_handler err_handler; shared_ptr_class counter; + context ctx; }; } // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/range.hpp b/include/neoSYCL/sycl/range.hpp index d965296..5366f5b 100644 --- a/include/neoSYCL/sycl/range.hpp +++ b/include/neoSYCL/sycl/range.hpp @@ -2,7 +2,6 @@ #define CUSTOM_SYCL_INCLUDE_SYCL_RANGE_H_ #include "neoSYCL/sycl/detail/container/array_nd.hpp" -#include "neoSYCL/sycl/op_def.hpp" namespace neosycl::sycl { From b565e2c2efd39248d653d8b5b8f1f78e2a6012fd Mon Sep 17 00:00:00 2001 From: Hiroyuki Takizawa Date: Sun, 2 Jan 2022 01:48:02 +0900 Subject: [PATCH 03/90] supporting parallel_for_1d and kernelName --- .../extensions/nec/ve_task_handler.hpp | 42 ++++++++++- include/neoSYCL/sycl/handler.hpp | 12 +-- kernel_generator/include/kernel.h | 32 ++++---- kernel_generator/src/clang_tool.cpp | 75 +++++++++++-------- kernel_generator/src/kernel.cpp | 40 +++++----- kernel_generator/src/parallel_task.cpp | 40 +++++++--- kernel_generator/src/single_task.cpp | 40 +++++++--- kernel_generator/src/ve_kernel_translator.cpp | 18 +++-- tests/test_parallel_for.cpp | 10 ++- 9 files changed, 202 insertions(+), 107 deletions(-) diff --git a/include/neoSYCL/extensions/nec/ve_task_handler.hpp b/include/neoSYCL/extensions/nec/ve_task_handler.hpp index e4b9d25..ecafc67 100644 --- a/include/neoSYCL/extensions/nec/ve_task_handler.hpp +++ b/include/neoSYCL/extensions/nec/ve_task_handler.hpp @@ -19,7 +19,7 @@ class task_handler_ve : public detail::task_handler { } return argp; } - + vector_class copy_in(struct veo_args *argp, shared_ptr_class k, VEProc proc) { @@ -131,10 +131,48 @@ class task_handler_ve : public detail::task_handler { } } + void set_arg_for_range(const vector_class &args, + struct veo_args *argp, const range<1> &r) { + int index = args.size(); + veo_args_set_i64(argp, index, r.size()); + veo_args_set_i64(argp, index + 1, 1); + } + void parallel_for_1d(shared_ptr_class k, range<1> r, const std::function)> &func, id<1> offset) override { - throw exception("not implemented"); + for (const detail::KernelArg &arg : k->args) { + arg.acquire_access(); + } + DEBUG_INFO("execute parallel<1> %d kernel, name: %s\n", type(), + k->name.c_str()); + + DEBUG_INFO("[VEKernel] parallel task: {}", k->name.c_str()); + + veo_args *argp = create_ve_args(); + DEBUG_INFO("[VEKernel] create ve args: {:#x}", (size_t)argp); + + try { + + vector_class ve_addr_list = copy_in(argp, k, proc); + set_arg_for_range(k->args, argp, r); + DEBUG_INFO("[VEKernel] invoke ve func: {}", k->name.c_str()); + uint64_t id = veo_call_async_by_name(ctx.ve_ctx, proc.handle, + k->name.c_str(), argp); + uint64_t ret_val; + veo_call_wait_result(ctx.ve_ctx, id, &ret_val); + DEBUG_INFO("[VEKernel] ve func finished, id: {}, ret val: {}", id, + ret_val); + copy_out(ve_addr_list, k, proc); + + } catch (exception &e) { + std::cerr << "[VEKernel] kernel invoke failed, error message: " + << e.what() << std::endl; + } + veo_args_free(argp); + for (const detail::KernelArg &arg : k->args) { + arg.release_access(); + } }; void parallel_for_2d(shared_ptr_class k, range<2> r, diff --git a/include/neoSYCL/sycl/handler.hpp b/include/neoSYCL/sycl/handler.hpp index 10d81f0..86f5dea 100644 --- a/include/neoSYCL/sycl/handler.hpp +++ b/include/neoSYCL/sycl/handler.hpp @@ -3,7 +3,7 @@ #include #include - +#include #include "neoSYCL/sycl/nd_range.hpp" #include "neoSYCL/sycl/types.hpp" #include "neoSYCL/sycl/event.hpp" @@ -29,14 +29,16 @@ namespace detail { */ template string_class get_kernel_name_from_class() { KernelName *p; -#if 0 int status; char *pc = abi::__cxa_demangle(typeid(p).name(), 0, 0, &status); string_class in(pc); free(pc); -#else - string_class in = typeid(p).name(); -#endif + std::regex re("([^\\s\\:]+)\\*$"); + std::smatch result; + if (std::regex_search(in, result, re)) { + in = result[1].str(); + } + return in; } diff --git a/kernel_generator/include/kernel.h b/kernel_generator/include/kernel.h index fe0d329..c86912f 100644 --- a/kernel_generator/include/kernel.h +++ b/kernel_generator/include/kernel.h @@ -45,22 +45,15 @@ struct KernelInfo { std::string index_name; bool parallel; - KernelInfo(std::vector params, - std::string kernel_name, + KernelInfo(std::vector params, std::string kernel_name, std::string kernel_body) - : params(std::move(params)), - kernel_name(std::move(kernel_name)), - kernel_body(std::move(kernel_body)), - parallel(false) {} + : params(std::move(params)), kernel_name(std::move(kernel_name)), + kernel_body(std::move(kernel_body)), parallel(false) {} - KernelInfo(std::vector params, - std::string kernel_name, - std::string kernel_body, - std::string index_name) - : params(std::move(params)), - kernel_name(std::move(kernel_name)), - kernel_body(std::move(kernel_body)), - index_name(std::move(index_name)), + KernelInfo(std::vector params, std::string kernel_name, + std::string kernel_body, std::string index_name) + : params(std::move(params)), kernel_name(std::move(kernel_name)), + kernel_body(std::move(kernel_body)), index_name(std::move(index_name)), parallel(true) {} }; @@ -69,10 +62,11 @@ struct ProgramContext { std::map structs; }; -std::vector analyze_arguments_dependency(CompilerInstance &ci, - const CXXRecordDecl *lambda_func_decl, - ProgramContext &context); +std::vector +analyze_arguments_dependency(CompilerInstance &ci, + const CXXRecordDecl *lambda_func_decl, + ProgramContext &context); -} +} // namespace sycl -#endif //CUSTOM_SYCL_RUNTIME_KERNEL_HPP_ +#endif // CUSTOM_SYCL_RUNTIME_KERNEL_HPP_ diff --git a/kernel_generator/src/clang_tool.cpp b/kernel_generator/src/clang_tool.cpp index 08a1d3f..56b6b3a 100644 --- a/kernel_generator/src/clang_tool.cpp +++ b/kernel_generator/src/clang_tool.cpp @@ -23,16 +23,21 @@ #include "ve_kernel_translator.h" static llvm::cl::OptionCategory MyToolCategory("Additional options"); -static llvm::cl::opt OutputNameOption("o", - llvm::cl::desc("output filename"), - llvm::cl::value_desc("filename"), - llvm::cl::cat(MyToolCategory)); +static llvm::cl::opt + OutputNameOption("o", llvm::cl::desc("output filename"), + llvm::cl::value_desc("filename"), + llvm::cl::cat(MyToolCategory)); -static llvm::cl::extrahelp CommonHelp(clang::tooling::CommonOptionsParser::HelpMessage); +static llvm::cl::extrahelp + CommonHelp(clang::tooling::CommonOptionsParser::HelpMessage); static llvm::cl::extrahelp MoreHelp("SYCL Kernel generate tool"); -const static std::string KERNEL_HIGHLIGHT_SINGLE_TASK_FUNC_NAME = "HIGHLIGHT_KERNEL_SINGLE_TASK"; -const static std::string KERNEL_HIGHLIGHT_PARALLEL_FUNC_NAME = "HIGHLIGHT_KERNEL_PARALLEL"; +const static std::string KERNEL_HIGHLIGHT_SINGLE_TASK_FUNC_NAME = +"single_task"; + // "HIGHLIGHT_KERNEL_SINGLE_TASK"; +const static std::string KERNEL_HIGHLIGHT_PARALLEL_FUNC_NAME = +"parallel_for"; + //"HIGHLIGHT_KERNEL_PARALLEL"; const static std::string DEFAULT_OUTPUT_NAME = "kernel"; using namespace sycl; @@ -45,9 +50,9 @@ class SYCLVisitor : public clang::RecursiveASTVisitor { ProgramContext context; public: - SYCLVisitor(clang::CompilerInstance &ci, clang::SourceManager &sm, clang::Rewriter &re) - : instance(ci), rewriter(re), manager(sm) { - } + SYCLVisitor(clang::CompilerInstance &ci, clang::SourceManager &sm, + clang::Rewriter &re) + : instance(ci), rewriter(re), manager(sm) {} virtual bool shouldVisitTemplateInstantiations() { return true; } @@ -65,19 +70,25 @@ class SYCLVisitor : public clang::RecursiveASTVisitor { try { if (clang::isa(s)) { - clang::CallExpr *call_expr = clang::cast(s); + clang::CallExpr *call_expr = clang::cast(s); clang::FunctionDecl *callee = call_expr->getDirectCallee(); if (callee && callee->getIdentifier()) { // Get the func which name start with SYCL_PREFIX - if (callee->getName().compare(KERNEL_HIGHLIGHT_SINGLE_TASK_FUNC_NAME) == 0) { - KernelInfo info = parse_single_task_func(this->instance, callee, context); + if (callee->getName().compare( + KERNEL_HIGHLIGHT_SINGLE_TASK_FUNC_NAME) == 0) { + KernelInfo info = + parse_single_task_func(this->instance, callee, context); if (context.kernels.count(info.kernel_name) == 0) { - context.kernels.insert(std::pair(info.kernel_name, info)); + context.kernels.insert( + std::pair(info.kernel_name, info)); } - } else if (callee->getName().compare(KERNEL_HIGHLIGHT_PARALLEL_FUNC_NAME) == 0) { - KernelInfo info = parse_parallel_task_func(this->instance, callee, context); + } else if (callee->getName().compare( + KERNEL_HIGHLIGHT_PARALLEL_FUNC_NAME) == 0) { + KernelInfo info = + parse_parallel_task_func(this->instance, callee, context); if (context.kernels.count(info.kernel_name) == 0) { - context.kernels.insert(std::pair(info.kernel_name, info)); + context.kernels.insert( + std::pair(info.kernel_name, info)); } } } @@ -89,9 +100,7 @@ class SYCLVisitor : public clang::RecursiveASTVisitor { return true; } - ProgramContext get_context() { - return context; - } + ProgramContext get_context() { return context; } }; class SYCLASTConsumer : public clang::ASTConsumer { @@ -104,9 +113,9 @@ class SYCLASTConsumer : public clang::ASTConsumer { public: explicit SYCLASTConsumer(clang::CompilerInstance &ci) - : instance(ci), - manager(ci.getSourceManager()), - visitor(std::make_unique(ci, ci.getSourceManager(), this->rewriter)) { + : instance(ci), manager(ci.getSourceManager()), + visitor(std::make_unique(ci, ci.getSourceManager(), + this->rewriter)) { this->rewriter.setSourceMgr(ci.getSourceManager(), ci.getLangOpts()); } // Retrieve the AST analysis result @@ -132,15 +141,18 @@ class SYCLASTConsumer : public clang::ASTConsumer { // Write include headers here std::string kernel_code; - kernel_code.append(translator.before_kernel(program_context)).append(LINE_BREAK); + kernel_code.append(translator.before_kernel(program_context)) + .append(LINE_BREAK); // Output all kernels auto kernels = program_context.kernels; for (auto &kernel : kernels) { - std::string kernel_str = translator.body_to_decl_str(program_context, kernel.second); + std::string kernel_str = + translator.body_to_decl_str(program_context, kernel.second); kernel_code.append(kernel_str).append(LINE_BREAK); } - kernel_code.append(translator.after_kernel(program_context)).append(LINE_BREAK); + kernel_code.append(translator.after_kernel(program_context)) + .append(LINE_BREAK); // write kernel code kernel_out << kernel_code << std::endl; @@ -163,8 +175,11 @@ class SYCLFrontendAction : public clang::PluginASTAction { }; int main(int argc, const char **argv) { - llvm::Expected - op = clang::tooling::CommonOptionsParser::create(argc, argv, MyToolCategory, llvm::cl::OneOrMore); - clang::tooling::ClangTool tool(op->getCompilations(), op->getSourcePathList()); - return tool.run(clang::tooling::newFrontendActionFactory().get()); + llvm::Expected op = + clang::tooling::CommonOptionsParser::create(argc, argv, MyToolCategory, + llvm::cl::OneOrMore); + clang::tooling::ClangTool tool(op->getCompilations(), + op->getSourcePathList()); + return tool.run( + clang::tooling::newFrontendActionFactory().get()); } \ No newline at end of file diff --git a/kernel_generator/src/kernel.cpp b/kernel_generator/src/kernel.cpp index 9bfe5f7..f2c59d1 100644 --- a/kernel_generator/src/kernel.cpp +++ b/kernel_generator/src/kernel.cpp @@ -2,34 +2,37 @@ namespace sycl { -std::vector analyze_arguments_dependency( - CompilerInstance &ci, const CXXRecordDecl *lambda_func_decl, ProgramContext &context) { +std::vector +analyze_arguments_dependency(CompilerInstance &ci, + const CXXRecordDecl *lambda_func_decl, + ProgramContext &context) { std::vector args; // we decide the argument order by parent context - for (Decl *d:lambda_func_decl->getParent()->decls()) { + for (Decl *d : lambda_func_decl->getParent()->decls()) { if (isa(d)) { - VarDecl *var = cast(d); + VarDecl *var = cast(d); CXXRecordDecl *raw_decl = var->getType()->getAsCXXRecordDecl(); if (!raw_decl) { continue; } std::string name = var->getIdentifier()->getName().str(); - ClassTemplateSpecializationDecl *template_decl = clang_cast(raw_decl); + ClassTemplateSpecializationDecl *template_decl = + clang_cast(raw_decl); auto template_args = template_decl->getTemplateArgs().asArray(); - if (template_args.size() != 4) { + if (template_args.size() < 4) { throw KernelValidateException("Accessor should have 4 template args"); } TemplateArgument accessor_type_tmp = template_args[0]; - TemplateArgument dimensions_tmp = template_args[1]; - TemplateArgument mode_tmp = template_args[2]; - TemplateArgument target_tmp = template_args[3]; + TemplateArgument dimensions_tmp = template_args[1]; + TemplateArgument mode_tmp = template_args[2]; + TemplateArgument target_tmp = template_args[3]; QualType accessor_type = accessor_type_tmp.getAsType(); - int field_dimensions = dimensions_tmp.getAsIntegral().getExtValue(); + int field_dimensions = dimensions_tmp.getAsIntegral().getExtValue(); std::string field_type = accessor_type.getAsString(); if (!accessor_type->isBuiltinType()) { @@ -41,14 +44,17 @@ std::vector analyze_arguments_dependency( std::string def_body = decl2str(ci, type_decl); if (context.structs.count(def_name) == 0) { - context.structs.insert(std::pair(def_name, def_body)); + context.structs.insert( + std::pair(def_name, def_body)); } #ifdef DEBUG std::cout << "Definition name: " << def_name << std::endl; - std::cout << "========= Definition body start =========" << std::endl; + std::cout << "========= Definition body start =========" + << std::endl; std::cout << def_body << std::endl; - std::cout << "========= Definition body end =========" << std::endl; + std::cout << "========= Definition body end =========" + << std::endl; #endif } } @@ -56,13 +62,13 @@ std::vector analyze_arguments_dependency( args.push_back(KernelArgument{name, field_type, field_dimensions}); - PRINT_INFO("Found a lambda field decl, Type: {}, Name: {}, Dimensions: {}", field_type, name, field_dimensions); + PRINT_INFO( + "Found a lambda field decl, Type: {}, Name: {}, Dimensions: {}", + field_type, name, field_dimensions); } } return args; }; -} - - +} // namespace sycl diff --git a/kernel_generator/src/parallel_task.cpp b/kernel_generator/src/parallel_task.cpp index 050e72b..b3f9f75 100644 --- a/kernel_generator/src/parallel_task.cpp +++ b/kernel_generator/src/parallel_task.cpp @@ -1,17 +1,21 @@ #include "parallel_task.h" +#include namespace sycl { -KernelInfo parse_parallel_task_func(CompilerInstance &ci, const FunctionDecl *callee, ProgramContext &context) { +KernelInfo parse_parallel_task_func(CompilerInstance &ci, + const FunctionDecl *callee, + ProgramContext &context) { if (callee->getNumParams() != 2) { throw KernelValidateException("Parallel kernel must have 2 param"); } - const TemplateArgumentList *template_args = callee->getTemplateSpecializationArgs(); + const TemplateArgumentList *template_args = + callee->getTemplateSpecializationArgs(); if (template_args == nullptr || template_args->size() != 3) { throw KernelValidateException("Parallel kernel must have 3 template args"); } - const TemplateArgument &classname_arg = template_args->get(0); + const TemplateArgument &classname_arg = template_args->get(0); const TemplateArgument &lambda_func_arg = template_args->get(1); if (classname_arg.getKind() != TemplateArgument::ArgKind::Type) { @@ -22,34 +26,48 @@ KernelInfo parse_parallel_task_func(CompilerInstance &ci, const FunctionDecl *ca throw KernelValidateException("Template 'ArgKind' must be 'Type'"); } - QualType classname_type = classname_arg.getAsType(); + QualType classname_type = classname_arg.getAsType(); QualType lambda_func_type = lambda_func_arg.getAsType(); +#if 0 std::string mangledName; - clang::MangleContext *mangleContext = ci.getASTContext().createMangleContext(); + clang::MangleContext *mangleContext = + ci.getASTContext().createMangleContext(); llvm::raw_string_ostream ostream(mangledName); mangleContext->mangleCXXRTTI(classname_type, ostream); ostream.flush(); std::string kernelName = mangledName.substr(4, mangledName.size()); +#else + std::string kernelName = classname_type.getAsString(); + std::regex re("([^\\s\\:]+)$"); + std::smatch result; + if(std::regex_search(kernelName, result, re)){ + kernelName = result[1].str(); + } +#endif // Get classname here, will be used as kernel func name - std::string classname = classname_type->getAsRecordDecl()->getDeclName().getAsString(); + std::string classname = + classname_type->getAsRecordDecl()->getDeclName().getAsString(); CXXRecordDecl *lambda_func_decl = lambda_func_type->getAsCXXRecordDecl(); - std::vector kernel_arguments = analyze_arguments_dependency(ci, lambda_func_decl, context); + std::vector kernel_arguments = + analyze_arguments_dependency(ci, lambda_func_decl, context); CXXMethodDecl *lambda_decl = lambda_func_decl->getLambdaCallOperator(); - std::string func_body = decl2str(ci, lambda_decl->getBody()); + std::string func_body = decl2str(ci, lambda_decl->getBody()); if (lambda_decl->getNumParams() != 1) { throw KernelValidateException("Parallel Kernel should have Index"); } - std::string index_name = lambda_decl->getParamDecl(0)->getIdentifier()->getName().str(); + std::string index_name = + lambda_decl->getParamDecl(0)->getIdentifier()->getName().str(); #ifdef DEBUG - std::cout << "Parallel kernel name: " << kernelName << " , index_name: " << index_name << std::endl; + std::cout << "Parallel kernel name: " << kernelName + << " , index_name: " << index_name << std::endl; std::cout << "========= Parallel kernel body start =========" << std::endl; std::cout << func_body << std::endl; std::cout << "========= Parallel kernel body end =========" << std::endl; @@ -60,4 +78,4 @@ KernelInfo parse_parallel_task_func(CompilerInstance &ci, const FunctionDecl *ca return info; } -} \ No newline at end of file +} // namespace sycl \ No newline at end of file diff --git a/kernel_generator/src/single_task.cpp b/kernel_generator/src/single_task.cpp index 6f4e5f1..37b55cb 100644 --- a/kernel_generator/src/single_task.cpp +++ b/kernel_generator/src/single_task.cpp @@ -1,17 +1,22 @@ #include "single_task.h" +#include namespace sycl { -KernelInfo parse_single_task_func(CompilerInstance &ci, const FunctionDecl *callee, ProgramContext &context) { +KernelInfo parse_single_task_func(CompilerInstance &ci, + const FunctionDecl *callee, + ProgramContext &context) { if (callee->getNumParams() != 1) { throw KernelValidateException("Single-task kernel must have 1 param"); } - const TemplateArgumentList *template_args = callee->getTemplateSpecializationArgs(); + const TemplateArgumentList *template_args = + callee->getTemplateSpecializationArgs(); if (template_args == nullptr || template_args->size() != 2) { - throw KernelValidateException("Single-task kernel must have 2 template args"); + throw KernelValidateException( + "Single-task kernel must have 2 template args"); } - const TemplateArgument &classname_arg = template_args->get(0); + const TemplateArgument &classname_arg = template_args->get(0); const TemplateArgument &lambda_func_arg = template_args->get(1); if (classname_arg.getKind() != TemplateArgument::ArgKind::Type) { @@ -22,26 +27,40 @@ KernelInfo parse_single_task_func(CompilerInstance &ci, const FunctionDecl *call throw KernelValidateException("Template 'ArgKind' must be 'Type'"); } - QualType classname_type = classname_arg.getAsType(); + QualType classname_type = classname_arg.getAsType(); QualType lambda_func_type = lambda_func_arg.getAsType(); +#if 0 + // manginling rule could be compiler-depedent. + // so it's better to avoid using it as the kenrel name. std::string mangledName; - clang::MangleContext *mangleContext = ci.getASTContext().createMangleContext(); + clang::MangleContext *mangleContext = + ci.getASTContext().createMangleContext(); llvm::raw_string_ostream ostream(mangledName); mangleContext->mangleCXXRTTI(classname_type, ostream); ostream.flush(); std::string kernelName = mangledName.substr(4, mangledName.size()); +#else + std::string kernelName= classname_type.getAsString(); + std::regex re("([^\\s\\:]+)$"); + std::smatch result; + if(std::regex_search(kernelName, result, re)){ + kernelName = result[1].str(); + } +#endif // Get classname here, will be used as kernel func name std::cout << classname_type->getTypeClassName() << std::endl; - std::string classname = classname_type->getAsRecordDecl()->getDeclName().getAsString(); + std::string classname = + classname_type->getAsRecordDecl()->getDeclName().getAsString(); CXXRecordDecl *lambda_func_decl = lambda_func_type->getAsCXXRecordDecl(); - std::vector kernel_arguments = analyze_arguments_dependency(ci, lambda_func_decl, context); + std::vector kernel_arguments = + analyze_arguments_dependency(ci, lambda_func_decl, context); CXXMethodDecl *lambda_decl = lambda_func_decl->getLambdaCallOperator(); - std::string func_body = decl2str(ci, lambda_decl->getBody()); + std::string func_body = decl2str(ci, lambda_decl->getBody()); #ifdef DEBUG std::cout << "Single kernel name: " << kernelName << std::endl; @@ -55,5 +74,4 @@ KernelInfo parse_single_task_func(CompilerInstance &ci, const FunctionDecl *call return info; }; -} - +} // namespace sycl diff --git a/kernel_generator/src/ve_kernel_translator.cpp b/kernel_generator/src/ve_kernel_translator.cpp index 20b578f..bc86be9 100644 --- a/kernel_generator/src/ve_kernel_translator.cpp +++ b/kernel_generator/src/ve_kernel_translator.cpp @@ -2,11 +2,12 @@ namespace sycl { -std::string VEKernelTranslator::body_to_decl_str(const ProgramContext &context, const KernelInfo &info) { +std::string VEKernelTranslator::body_to_decl_str(const ProgramContext &context, + const KernelInfo &info) { std::string func_params; // generate function params - for (const KernelArgument &arg:info.params) { + for (const KernelArgument &arg : info.params) { func_params += fmt::format("{} *{}, ", arg.type, arg.name); } @@ -15,13 +16,15 @@ std::string VEKernelTranslator::body_to_decl_str(const ProgramContext &context, std::string body = fmt::format("\nfor(int {0}=0;{0}").append(LINE_BREAK); ret.append("#include ").append(LINE_BREAK); - for (auto &def:context.structs) { + for (auto &def : context.structs) { ret.append(def.second).append(";").append(LINE_BREAK); } return ret; @@ -40,5 +43,4 @@ std::string VEKernelTranslator::after_kernel(const ProgramContext &context) { return ""; } -} - +} // namespace sycl diff --git a/tests/test_parallel_for.cpp b/tests/test_parallel_for.cpp index 25bc507..93e65a2 100644 --- a/tests/test_parallel_for.cpp +++ b/tests/test_parallel_for.cpp @@ -11,10 +11,12 @@ TEST(parallel_for, simple_test) { Vector b = {5, 6, 8}; Vector c; - queue q; + queue q(ve_selector{}); - buffer A{std::begin(a), std::end(a)}; - buffer B{std::begin(b), std::end(b)}; + buffer A(a, range<1>(N)); + buffer B(b, range<1>(N)); + //buffer A{std::begin(a), std::end(a)}; // doesn't work for ve + //buffer B{std::begin(b), std::end(b)}; // doesn't work for ve { buffer C{c, N}; @@ -42,4 +44,4 @@ TEST(parallel_for, simple_test) { for (size_t i = 0; i < N; i++) { EXPECT_EQ(c[i], a[i] + b[i]); } -} \ No newline at end of file +} From 1a06109341ddca5318e36d133019082c407264c6 Mon Sep 17 00:00:00 2001 From: Hiroyuki Takizawa Date: Sat, 8 Jan 2022 01:45:40 +0900 Subject: [PATCH 04/90] Add accessor::devptr --- .../extensions/nec/ve_context_info.hpp | 9 ++++--- include/neoSYCL/sycl.hpp | 4 +-- include/neoSYCL/sycl/accessor.hpp | 27 +++++++++++++++---- .../neoSYCL/sycl/detail/highlight_func.hpp | 4 +-- include/neoSYCL/sycl/detail/kernel_arg.hpp | 7 ++--- include/neoSYCL/sycl/handler.hpp | 7 ++++- 6 files changed, 42 insertions(+), 16 deletions(-) diff --git a/include/neoSYCL/extensions/nec/ve_context_info.hpp b/include/neoSYCL/extensions/nec/ve_context_info.hpp index bd5dc2c..d30f512 100644 --- a/include/neoSYCL/extensions/nec/ve_context_info.hpp +++ b/include/neoSYCL/extensions/nec/ve_context_info.hpp @@ -15,7 +15,7 @@ class ve_context_info : public detail::context_info { return VEContext{ctx}; } - void free_ctx(VEContext ctx) { + void free_ctx() { DEBUG_INFO("[VEContext] release ve ctx: {:#x}", (size_t)ctx.ve_ctx); int rt = veo_context_close(ctx.ve_ctx); if (rt != veo_command_state::VEO_COMMAND_OK) { @@ -38,7 +38,7 @@ class ve_context_info : public detail::context_info { return nec::VEProc{ve_proc, handle}; } - void free_proc(VEProc proc) { + void free_proc() { DEBUG_INFO("[VEProc] release ve proc: {:#x}", (size_t)proc.ve_proc); int rt = veo_proc_destroy(proc.ve_proc); if (rt != veo_command_state::VEO_COMMAND_OK) { @@ -53,7 +53,10 @@ class ve_context_info : public detail::context_info { : detail::context_info(), proc(create_proc()), ctx(create_ctx(proc)) { task_handler = handler_type(new task_handler_ve(proc, ctx)); } - ~ve_context_info() { free_proc(proc); } + ~ve_context_info() { + free_ctx(); + free_proc(); + } }; } // namespace neosycl::sycl::extensions::nec #endif diff --git a/include/neoSYCL/sycl.hpp b/include/neoSYCL/sycl.hpp index e652e88..e156879 100644 --- a/include/neoSYCL/sycl.hpp +++ b/include/neoSYCL/sycl.hpp @@ -23,12 +23,12 @@ #include "sycl/property_list.hpp" #include "sycl/context.hpp" +#include "sycl/accessor.hpp" #include "sycl/handler.hpp" #include "sycl/queue.hpp" // include buffer headers -#include "sycl/accessor.hpp" -#include "sycl/access.hpp" +// #include "sycl/access.hpp" #include "sycl/allocator.hpp" #include "sycl/buffer.hpp" diff --git a/include/neoSYCL/sycl/accessor.hpp b/include/neoSYCL/sycl/accessor.hpp index 25dec57..7b6d5bc 100644 --- a/include/neoSYCL/sycl/accessor.hpp +++ b/include/neoSYCL/sycl/accessor.hpp @@ -8,6 +8,9 @@ namespace neosycl::sycl { template class buffer; +// prototype decl +class handler; + template @@ -17,32 +20,33 @@ class accessor { template accessor(buffer &bufferRef, const property_list &propList = {}) - : data(bufferRef.data), accessRange(bufferRef.get_range()) {} + : data(bufferRef.data), accessRange(bufferRef.get_range()), devptr(0) {} template accessor(buffer &bufferRef, range accessRange, const property_list &propList = {}) - : data(bufferRef.data), accessRange(accessRange) {} + : data(bufferRef.data), accessRange(accessRange), devptr(0) {} template accessor(buffer &bufferRef, range accessRange, id accessOffset, const property_list &propList = {}) : data(bufferRef.data), accessRange(accessRange), - accessOffset(accessOffset) {} + accessOffset(accessOffset), devptr(0) {} template accessor(buffer &bufferRef, handler &commandGroupHandlerRef, range accessRange, const property_list &propList = {}) - : data(bufferRef.data), accessRange(accessRange), accessOffset(0) {} + : data(bufferRef.data), accessRange(accessRange), accessOffset(0), + devptr(0) {} template accessor(buffer &bufferRef, handler &commandGroupHandlerRef, range accessRange, id accessOffset, const property_list &propList = {}) : data(bufferRef.data), accessRange(accessRange), - accessOffset(accessOffset) {} + accessOffset(accessOffset), devptr(0) {} constexpr bool is_placeholder() const { return isPlaceholder; } @@ -153,12 +157,25 @@ class accessor { typename = std::enable_if_t<(Mode == access::mode::read) && (D == 0)>> operator dataT() const; + template > + dataT *get_pointer() const { + return data.get(); + } + + template > + void *get_pointer() const { + return reinterpret_cast(devptr); + } ~accessor() = default; private: std::shared_ptr> data; range accessRange; id accessOffset; + void *devptr; size_t id2index(id index) const { size_t x = this->accessRange.get(0); diff --git a/include/neoSYCL/sycl/detail/highlight_func.hpp b/include/neoSYCL/sycl/detail/highlight_func.hpp index adc4211..9ddffd9 100644 --- a/include/neoSYCL/sycl/detail/highlight_func.hpp +++ b/include/neoSYCL/sycl/detail/highlight_func.hpp @@ -1,6 +1,6 @@ #ifndef SYCL_INCLUDE_CL_SYCL_KERNEL_HIGHLIGHT_FUNC_HPP_ #define SYCL_INCLUDE_CL_SYCL_KERNEL_HIGHLIGHT_FUNC_HPP_ - +#if 0 namespace neosycl::sycl::detail { template @@ -11,5 +11,5 @@ template void HIGHLIGHT_KERNEL_SINGLE_TASK(Kernel k) {} } // namespace neosycl::sycl::detail - +#endif #endif // SYCL_INCLUDE_CL_SYCL_KERNEL_HIGHLIGHT_FUNC_HPP_ diff --git a/include/neoSYCL/sycl/detail/kernel_arg.hpp b/include/neoSYCL/sycl/detail/kernel_arg.hpp index b896e09..5481f3b 100644 --- a/include/neoSYCL/sycl/detail/kernel_arg.hpp +++ b/include/neoSYCL/sycl/detail/kernel_arg.hpp @@ -7,8 +7,9 @@ namespace neosycl::sycl::detail { struct KernelArg { - KernelArg(std::shared_ptr arg, - access::mode mode) + using container_type = std::shared_ptr; + + KernelArg(container_type arg, access::mode mode) : container(std::move(arg)), mode(mode) {} void acquire_access() const { @@ -35,7 +36,7 @@ struct KernelArg { } } - std::shared_ptr container; + container_type container; access::mode mode; }; diff --git a/include/neoSYCL/sycl/handler.hpp b/include/neoSYCL/sycl/handler.hpp index 86f5dea..ea482d1 100644 --- a/include/neoSYCL/sycl/handler.hpp +++ b/include/neoSYCL/sycl/handler.hpp @@ -9,7 +9,7 @@ #include "neoSYCL/sycl/event.hpp" #include "neoSYCL/sycl/id.hpp" #include "neoSYCL/sycl/allocator.hpp" -#include "neoSYCL/sycl/detail/highlight_func.hpp" +//#include "neoSYCL/sycl/detail/highlight_func.hpp" #include "neoSYCL/sycl/detail/kernel.hpp" #include "neoSYCL/sycl/detail/task.hpp" #include "neoSYCL/sycl/detail/task_handler.hpp" @@ -127,6 +127,11 @@ class handler { kernel_type get_kernel() { return kernel; } + template + void require(sycl::accessor acc) { + kernel->args.push_back(detail::KernelArg(acc, m)); + } + private: kernel_type kernel; device bind_device; From f46490dc064bd9b3cae1863415247b366a37f754 Mon Sep 17 00:00:00 2001 From: Hiroyuki Takizawa Date: Sun, 9 Jan 2022 03:19:42 +0900 Subject: [PATCH 05/90] redundant data copy avoidance --- .../extensions/nec/ve_task_handler.hpp | 62 ++++++++++++------- include/neoSYCL/sycl/accessor.hpp | 16 +++-- include/neoSYCL/sycl/buffer.hpp | 4 +- .../{kernel_arg.hpp => accessor_info.hpp} | 11 ++-- include/neoSYCL/sycl/detail/kernel.hpp | 4 +- include/neoSYCL/sycl/detail/task_handler.hpp | 16 ++--- include/neoSYCL/sycl/handler.hpp | 3 +- 7 files changed, 65 insertions(+), 51 deletions(-) rename include/neoSYCL/sycl/detail/{kernel_arg.hpp => accessor_info.hpp} (76%) diff --git a/include/neoSYCL/extensions/nec/ve_task_handler.hpp b/include/neoSYCL/extensions/nec/ve_task_handler.hpp index ecafc67..dd09ce2 100644 --- a/include/neoSYCL/extensions/nec/ve_task_handler.hpp +++ b/include/neoSYCL/extensions/nec/ve_task_handler.hpp @@ -2,16 +2,22 @@ #define NEOSYCL_INCLUDE_NEOSYCL_EXTENSIONS_NEC_VE_TASK_HANDLER_HPP #include "neoSYCL/extensions/nec/ve_info.hpp" -#include "neoSYCL/sycl/detail/kernel_arg.hpp" +#include "neoSYCL/sycl/detail/accessor_info.hpp" #include "ve_offload.h" namespace neosycl::sycl::extensions::nec { class task_handler_ve : public detail::task_handler { - + struct buf_info{ + detail::accessor_info arg; + uint64_t ptr; + }; + using buffer_type = std::vector; public: task_handler_ve(const VEProc &p, const VEContext &c) : proc(p), ctx(c) {} - + ~task_handler_ve(){ + copy_out(); + } struct veo_args *create_ve_args() { struct veo_args *argp = veo_args_alloc(); if (!argp) { @@ -20,13 +26,21 @@ class task_handler_ve : public detail::task_handler { return argp; } - vector_class copy_in(struct veo_args *argp, + void copy_in(struct veo_args *argp, shared_ptr_class k, VEProc proc) { - vector_class ve_addr_list; + int i, j; - for (int i = 0; i < k->args.size(); i++) { - detail::KernelArg arg = k->args[i]; + for (i = 0; i < k->args.size(); i++) { + detail::accessor_info arg = k->args[i]; + for(j = 0; j< bufs.size(); j++){ + if(arg.container->get_raw_ptr() == bufs[j].arg.container->get_raw_ptr()) + break; + } + if(j!=bufs.size()){ + veo_args_set_i64(argp, i, bufs[j].ptr); + continue; + } size_t size_in_byte = arg.container->get_size(); uint64_t ve_addr_int; @@ -38,7 +52,9 @@ class task_handler_ve : public detail::task_handler { PRINT_ERR("[VEProc] allocate VE memory failed"); throw exception("VE allocate return error"); } - ve_addr_list.push_back(ve_addr_int); + //ve_addr_list.push_back(ve_addr_int); + buf_info bi{arg,ve_addr_int}; + bufs.push_back(bi); DEBUG_INFO("[VEKernel] allocate ve memory, size: {}, ve address: {:#x}", size_in_byte, ve_addr_int); @@ -60,15 +76,14 @@ class task_handler_ve : public detail::task_handler { } veo_args_set_i64(argp, i, ve_addr_int); } - return ve_addr_list; + return; } - void copy_out(vector_class ve_addr_list, - shared_ptr_class k, VEProc proc) { - for (int i = 0; i < k->args.size(); i++) { - detail::KernelArg arg = k->args[i]; + void copy_out() { + for (int i = 0; i < bufs.size(); i++) { + detail::accessor_info arg = bufs[i].arg; size_t size_in_byte = arg.container->get_size(); - uint64_t device_ptr = ve_addr_list[i]; + uint64_t device_ptr = bufs[i].ptr; if (arg.mode != access::mode::read) { DEBUG_INFO("[VEKernel] copy from ve memory, device address: {:#x}, " "size: {}, host address: {:#x}", @@ -97,7 +112,7 @@ class task_handler_ve : public detail::task_handler { void single_task(shared_ptr_class k, const std::function &func) override { - for (const detail::KernelArg &arg : k->args) { + for (const detail::accessor_info &arg : k->args) { arg.acquire_access(); } DEBUG_INFO("execute single %d kernel, name: %s\n", type(), k->name.c_str()); @@ -109,7 +124,7 @@ class task_handler_ve : public detail::task_handler { try { - vector_class ve_addr_list = copy_in(argp, k, proc); + copy_in(argp, k, proc); DEBUG_INFO("[VEKernel] invoke ve func: {}", k->name.c_str()); uint64_t id = veo_call_async_by_name(ctx.ve_ctx, proc.handle, k->name.c_str(), argp); @@ -117,7 +132,7 @@ class task_handler_ve : public detail::task_handler { veo_call_wait_result(ctx.ve_ctx, id, &ret_val); DEBUG_INFO("[VEKernel] ve func finished, id: {}, ret val: {}", id, ret_val); - copy_out(ve_addr_list, k, proc); + //copy_out(ve_addr_list, k, proc); } catch (exception &e) { std::cerr << "[VEKernel] kernel invoke failed, error message: " @@ -126,12 +141,12 @@ class task_handler_ve : public detail::task_handler { veo_args_free(argp); - for (const detail::KernelArg &arg : k->args) { + for (const detail::accessor_info &arg : k->args) { arg.release_access(); } } - void set_arg_for_range(const vector_class &args, + void set_arg_for_range(const vector_class &args, struct veo_args *argp, const range<1> &r) { int index = args.size(); veo_args_set_i64(argp, index, r.size()); @@ -141,7 +156,7 @@ class task_handler_ve : public detail::task_handler { void parallel_for_1d(shared_ptr_class k, range<1> r, const std::function)> &func, id<1> offset) override { - for (const detail::KernelArg &arg : k->args) { + for (const detail::accessor_info &arg : k->args) { arg.acquire_access(); } DEBUG_INFO("execute parallel<1> %d kernel, name: %s\n", type(), @@ -154,7 +169,7 @@ class task_handler_ve : public detail::task_handler { try { - vector_class ve_addr_list = copy_in(argp, k, proc); + copy_in(argp, k, proc); set_arg_for_range(k->args, argp, r); DEBUG_INFO("[VEKernel] invoke ve func: {}", k->name.c_str()); uint64_t id = veo_call_async_by_name(ctx.ve_ctx, proc.handle, @@ -163,14 +178,14 @@ class task_handler_ve : public detail::task_handler { veo_call_wait_result(ctx.ve_ctx, id, &ret_val); DEBUG_INFO("[VEKernel] ve func finished, id: {}, ret val: {}", id, ret_val); - copy_out(ve_addr_list, k, proc); + //copy_out(ve_addr_list, k, proc); } catch (exception &e) { std::cerr << "[VEKernel] kernel invoke failed, error message: " << e.what() << std::endl; } veo_args_free(argp); - for (const detail::KernelArg &arg : k->args) { + for (const detail::accessor_info &arg : k->args) { arg.release_access(); } }; @@ -192,6 +207,7 @@ class task_handler_ve : public detail::task_handler { private: VEContext ctx; VEProc proc; + buffer_type bufs; }; } // namespace neosycl::sycl::extensions::nec diff --git a/include/neoSYCL/sycl/accessor.hpp b/include/neoSYCL/sycl/accessor.hpp index 7b6d5bc..f44fe6f 100644 --- a/include/neoSYCL/sycl/accessor.hpp +++ b/include/neoSYCL/sycl/accessor.hpp @@ -20,33 +20,33 @@ class accessor { template accessor(buffer &bufferRef, const property_list &propList = {}) - : data(bufferRef.data), accessRange(bufferRef.get_range()), devptr(0) {} + : data(bufferRef.data), accessRange(bufferRef.get_range()), handler_(0) {} template accessor(buffer &bufferRef, range accessRange, const property_list &propList = {}) - : data(bufferRef.data), accessRange(accessRange), devptr(0) {} + : data(bufferRef.data), accessRange(accessRange), handler_(0) {} template accessor(buffer &bufferRef, range accessRange, id accessOffset, const property_list &propList = {}) : data(bufferRef.data), accessRange(accessRange), - accessOffset(accessOffset), devptr(0) {} + accessOffset(accessOffset), handler_(0) {} template accessor(buffer &bufferRef, handler &commandGroupHandlerRef, range accessRange, const property_list &propList = {}) : data(bufferRef.data), accessRange(accessRange), accessOffset(0), - devptr(0) {} + handler_(&commandGroupHandlerRef) {} template accessor(buffer &bufferRef, handler &commandGroupHandlerRef, range accessRange, id accessOffset, const property_list &propList = {}) : data(bufferRef.data), accessRange(accessRange), - accessOffset(accessOffset), devptr(0) {} + accessOffset(accessOffset), handler_(&commandGroupHandlerRef) {} constexpr bool is_placeholder() const { return isPlaceholder; } @@ -166,16 +166,14 @@ class accessor { template > - void *get_pointer() const { - return reinterpret_cast(devptr); - } + void *get_pointer() const; ~accessor() = default; private: std::shared_ptr> data; range accessRange; id accessOffset; - void *devptr; + handler* handler_; size_t id2index(id index) const { size_t x = this->accessRange.get(0); diff --git a/include/neoSYCL/sycl/buffer.hpp b/include/neoSYCL/sycl/buffer.hpp index b2bbcc7..8656d78 100644 --- a/include/neoSYCL/sycl/buffer.hpp +++ b/include/neoSYCL/sycl/buffer.hpp @@ -130,7 +130,7 @@ class buffer { accessor get_access(handler &commandGroupHandler) { commandGroupHandler.get_kernel()->args.push_back( - detail::KernelArg(data, mode)); + detail::accessor_info(data, mode)); return accessor(*this); } @@ -145,7 +145,7 @@ class buffer { get_access(handler &commandGroupHandler, range accessRange, id accessOffset = {}) { commandGroupHandler.get_kernel()->args.push_back( - detail::KernelArg(data, mode)); + detail::accessor_info(data, mode)); return accessor(*this, commandGroupHandler, accessRange, accessOffset); } diff --git a/include/neoSYCL/sycl/detail/kernel_arg.hpp b/include/neoSYCL/sycl/detail/accessor_info.hpp similarity index 76% rename from include/neoSYCL/sycl/detail/kernel_arg.hpp rename to include/neoSYCL/sycl/detail/accessor_info.hpp index 5481f3b..f70129a 100644 --- a/include/neoSYCL/sycl/detail/kernel_arg.hpp +++ b/include/neoSYCL/sycl/detail/accessor_info.hpp @@ -1,15 +1,14 @@ -#ifndef SYCL_INCLUDE_CL_SYCL_KERNEL_KERNEL_ARG_HPP_ -#define SYCL_INCLUDE_CL_SYCL_KERNEL_KERNEL_ARG_HPP_ - +#ifndef SYCL_INCLUDE_CL_SYCL_ACCESSOR_INFO_HPP_ +#define SYCL_INCLUDE_CL_SYCL_ACCESSOR_INFO_HPP_ #include "neoSYCL/sycl/detail/container/data_container.hpp" #include "neoSYCL/sycl/access.hpp" namespace neosycl::sycl::detail { -struct KernelArg { +struct accessor_info { using container_type = std::shared_ptr; - KernelArg(container_type arg, access::mode mode) + accessor_info(container_type arg, access::mode mode) : container(std::move(arg)), mode(mode) {} void acquire_access() const { @@ -42,4 +41,4 @@ struct KernelArg { } // namespace neosycl::sycl::detail -#endif // SYCL_INCLUDE_CL_SYCL_KERNEL_KERNEL_ARG_HPP_ +#endif //SYCL_INCLUDE_CL_SYCL_ACCESSOR_INFO_HPP_ \ No newline at end of file diff --git a/include/neoSYCL/sycl/detail/kernel.hpp b/include/neoSYCL/sycl/detail/kernel.hpp index 8b1d5df..4fcfaf8 100644 --- a/include/neoSYCL/sycl/detail/kernel.hpp +++ b/include/neoSYCL/sycl/detail/kernel.hpp @@ -2,12 +2,12 @@ #define SYCL_INCLUDE_CL_SYCL_KERNEL_KERNEL_HPP_ #include -#include "neoSYCL/sycl/detail/kernel_arg.hpp" +#include "neoSYCL/sycl/detail/accessor_info.hpp" namespace neosycl::sycl::detail { struct kernel { - vector_class args; + vector_class args; string_class name; }; diff --git a/include/neoSYCL/sycl/detail/task_handler.hpp b/include/neoSYCL/sycl/detail/task_handler.hpp index d5e9f08..8fc543e 100644 --- a/include/neoSYCL/sycl/detail/task_handler.hpp +++ b/include/neoSYCL/sycl/detail/task_handler.hpp @@ -33,12 +33,12 @@ class task_handler_cpu : public task_handler { void single_task(shared_ptr_class k, const std::function &func) override { - for (const KernelArg &arg : k->args) { + for (const accessor_info &arg : k->args) { arg.acquire_access(); } DEBUG_INFO("execute single %d kernel, name: %s\n", type(), k->name.c_str()); func(); - for (const KernelArg &arg : k->args) { + for (const accessor_info &arg : k->args) { arg.release_access(); } } @@ -46,13 +46,13 @@ class task_handler_cpu : public task_handler { void parallel_for_1d(shared_ptr_class k, range<1> r, const std::function)> &func, id<1> offset) override { - for (const KernelArg &arg : k->args) { + for (const accessor_info &arg : k->args) { arg.acquire_access(); } for (size_t x = offset.get(0); x < r.get(0); x++) { func(id<1>(x)); } - for (const KernelArg &arg : k->args) { + for (const accessor_info &arg : k->args) { arg.release_access(); } }; @@ -60,7 +60,7 @@ class task_handler_cpu : public task_handler { void parallel_for_2d(shared_ptr_class k, range<2> r, const std::function)> &func, id<2> offset) override { - for (const KernelArg &arg : k->args) { + for (const accessor_info &arg : k->args) { arg.acquire_access(); } for (size_t x = offset.get(0); x < r.get(0); x++) { @@ -68,7 +68,7 @@ class task_handler_cpu : public task_handler { func(id<2>(x, y)); } } - for (const KernelArg &arg : k->args) { + for (const accessor_info &arg : k->args) { arg.release_access(); } }; @@ -76,7 +76,7 @@ class task_handler_cpu : public task_handler { void parallel_for_3d(shared_ptr_class k, range<3> r, const std::function)> &func, id<3> offset) override { - for (const KernelArg &arg : k->args) { + for (const accessor_info &arg : k->args) { arg.acquire_access(); } for (size_t x = offset.get(0); x < r.get(0); x++) { @@ -86,7 +86,7 @@ class task_handler_cpu : public task_handler { } } } - for (const KernelArg &arg : k->args) { + for (const accessor_info &arg : k->args) { arg.release_access(); } }; diff --git a/include/neoSYCL/sycl/handler.hpp b/include/neoSYCL/sycl/handler.hpp index ea482d1..dabdc52 100644 --- a/include/neoSYCL/sycl/handler.hpp +++ b/include/neoSYCL/sycl/handler.hpp @@ -129,7 +129,8 @@ class handler { template void require(sycl::accessor acc) { - kernel->args.push_back(detail::KernelArg(acc, m)); + acc.handler_ = this; + kernel->args.push_back(detail::accessor_info(acc, m)); } private: From b7787add2adc2e68a1d079a851c6344c18bca3d1 Mon Sep 17 00:00:00 2001 From: Hiroyuki Takizawa Date: Mon, 10 Jan 2022 00:29:22 +0900 Subject: [PATCH 06/90] Use sycl::buffer to manage device buffer --- .../extensions/nec/ve_task_handler.hpp | 272 ++++++++++-------- include/neoSYCL/sycl.hpp | 2 +- include/neoSYCL/sycl/accessor.hpp | 17 +- include/neoSYCL/sycl/buffer.hpp | 18 +- include/neoSYCL/sycl/detail/accessor_info.hpp | 2 +- include/neoSYCL/sycl/detail/context_info.hpp | 10 + include/neoSYCL/sycl/detail/task_handler.hpp | 16 ++ include/neoSYCL/sycl/handler.hpp | 8 + 8 files changed, 212 insertions(+), 133 deletions(-) diff --git a/include/neoSYCL/extensions/nec/ve_task_handler.hpp b/include/neoSYCL/extensions/nec/ve_task_handler.hpp index dd09ce2..0e96cfa 100644 --- a/include/neoSYCL/extensions/nec/ve_task_handler.hpp +++ b/include/neoSYCL/extensions/nec/ve_task_handler.hpp @@ -8,141 +8,63 @@ namespace neosycl::sycl::extensions::nec { class task_handler_ve : public detail::task_handler { - struct buf_info{ - detail::accessor_info arg; + struct buf_info { + container_type buf; uint64_t ptr; + bool updated; }; using buffer_type = std::vector; + public: - task_handler_ve(const VEProc &p, const VEContext &c) : proc(p), ctx(c) {} - ~task_handler_ve(){ - copy_out(); - } - struct veo_args *create_ve_args() { + task_handler_ve(const VEProc &p, const VEContext &c) : proc_(p), ctx_(c) {} + ~task_handler_ve() { free_mem(); } + + struct veo_args *alloc_veo_args() { struct veo_args *argp = veo_args_alloc(); if (!argp) { throw exception("ve args return nullptr"); } + DEBUG_INFO("[VEKernel] create ve args: {:#x}", (size_t)argp); return argp; } - void copy_in(struct veo_args *argp, - shared_ptr_class k, - VEProc proc) { - int i, j; + struct veo_args *create_ve_args(shared_ptr_class k) { + struct veo_args *argp = alloc_veo_args(); - for (i = 0; i < k->args.size(); i++) { - detail::accessor_info arg = k->args[i]; - for(j = 0; j< bufs.size(); j++){ - if(arg.container->get_raw_ptr() == bufs[j].arg.container->get_raw_ptr()) - break; - } - if(j!=bufs.size()){ - veo_args_set_i64(argp, i, bufs[j].ptr); - continue; - } - size_t size_in_byte = arg.container->get_size(); + for (int i = 0; i < k->args.size(); i++) { + detail::accessor_info acc = k->args[i]; - uint64_t ve_addr_int; - int rt = veo_alloc_mem(proc.ve_proc, &ve_addr_int, size_in_byte); - if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO( - "[VEProc] allocate VE memory size: {} failed, return code: {}", - size_in_byte, rt); - PRINT_ERR("[VEProc] allocate VE memory failed"); - throw exception("VE allocate return error"); - } - //ve_addr_list.push_back(ve_addr_int); - buf_info bi{arg,ve_addr_int}; - bufs.push_back(bi); - - DEBUG_INFO("[VEKernel] allocate ve memory, size: {}, ve address: {:#x}", - size_in_byte, ve_addr_int); - - if (arg.mode != access::mode::write) { - DEBUG_INFO("[VEKernel] do copy to ve memory for arg, device address: " - "{:#x}, size: {}, host address: {:#x}", - (size_t)ve_addr_int, size_in_byte, - (size_t)arg.container->get_raw_ptr()); - rt = veo_write_mem(proc.ve_proc, ve_addr_int, - arg.container->get_raw_ptr(), size_in_byte); - if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO( - "[VEProc] copy to ve memory failed, size: {}, return code: {}", - size_in_byte, rt); - PRINT_ERR("[VEProc] copy to ve memory failed"); - throw exception("VE copy return error"); - } - } + void *ve_addr = alloc_mem(acc.container, acc.mode); + uint64_t ve_addr_int = reinterpret_cast(ve_addr); veo_args_set_i64(argp, i, ve_addr_int); } - return; - } - - void copy_out() { - for (int i = 0; i < bufs.size(); i++) { - detail::accessor_info arg = bufs[i].arg; - size_t size_in_byte = arg.container->get_size(); - uint64_t device_ptr = bufs[i].ptr; - if (arg.mode != access::mode::read) { - DEBUG_INFO("[VEKernel] copy from ve memory, device address: {:#x}, " - "size: {}, host address: {:#x}", - (size_t)device_ptr, size_in_byte, - (size_t)arg.container->get_raw_ptr()); - // do copy - int rt = veo_read_mem(proc.ve_proc, arg.container->get_raw_ptr(), - device_ptr, size_in_byte); - if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO( - "[VEProc] copy from ve memory failed, size: {}, return code: {}", - size_in_byte, rt); - PRINT_ERR("[VEProc] copy from ve memory failed"); - throw exception("VE copy return error"); - } - } - int rt = veo_free_mem(proc.ve_proc, device_ptr); - if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEProc] free ve memory failed, size: {}, return code: {}", - size_in_byte, rt); - PRINT_ERR("[VEProc] free ve memory failed"); - throw exception("VE free memory return error"); - } - } + return argp; } void single_task(shared_ptr_class k, const std::function &func) override { - for (const detail::accessor_info &arg : k->args) { - arg.acquire_access(); + for (const detail::accessor_info &acc : k->args) { + acc.acquire_access(); } DEBUG_INFO("execute single %d kernel, name: %s\n", type(), k->name.c_str()); - DEBUG_INFO("[VEKernel] single task: {}", k->name.c_str()); - - veo_args *argp = create_ve_args(); - DEBUG_INFO("[VEKernel] create ve args: {:#x}", (size_t)argp); - try { - - copy_in(argp, k, proc); + struct veo_args *argp = create_ve_args(k); DEBUG_INFO("[VEKernel] invoke ve func: {}", k->name.c_str()); - uint64_t id = veo_call_async_by_name(ctx.ve_ctx, proc.handle, + uint64_t id = veo_call_async_by_name(ctx_.ve_ctx, proc_.handle, k->name.c_str(), argp); uint64_t ret_val; - veo_call_wait_result(ctx.ve_ctx, id, &ret_val); + veo_call_wait_result(ctx_.ve_ctx, id, &ret_val); DEBUG_INFO("[VEKernel] ve func finished, id: {}, ret val: {}", id, ret_val); - //copy_out(ve_addr_list, k, proc); - + // copy_out(ve_addr_list, k, proc); + veo_args_free(argp); } catch (exception &e) { std::cerr << "[VEKernel] kernel invoke failed, error message: " << e.what() << std::endl; } - - veo_args_free(argp); - - for (const detail::accessor_info &arg : k->args) { - arg.release_access(); + for (const detail::accessor_info &acc : k->args) { + acc.release_access(); } } @@ -161,53 +83,157 @@ class task_handler_ve : public detail::task_handler { } DEBUG_INFO("execute parallel<1> %d kernel, name: %s\n", type(), k->name.c_str()); - DEBUG_INFO("[VEKernel] parallel task: {}", k->name.c_str()); - - veo_args *argp = create_ve_args(); - DEBUG_INFO("[VEKernel] create ve args: {:#x}", (size_t)argp); - try { - - copy_in(argp, k, proc); + struct veo_args *argp = create_ve_args(k); + DEBUG_INFO("[VEKernel] create ve args: {:#x}", (size_t)argp); set_arg_for_range(k->args, argp, r); DEBUG_INFO("[VEKernel] invoke ve func: {}", k->name.c_str()); - uint64_t id = veo_call_async_by_name(ctx.ve_ctx, proc.handle, + uint64_t id = veo_call_async_by_name(ctx_.ve_ctx, proc_.handle, k->name.c_str(), argp); uint64_t ret_val; - veo_call_wait_result(ctx.ve_ctx, id, &ret_val); + veo_call_wait_result(ctx_.ve_ctx, id, &ret_val); DEBUG_INFO("[VEKernel] ve func finished, id: {}, ret val: {}", id, ret_val); - //copy_out(ve_addr_list, k, proc); - + // copy_out(ve_addr_list, k, proc); + veo_args_free(argp); } catch (exception &e) { std::cerr << "[VEKernel] kernel invoke failed, error message: " << e.what() << std::endl; } - veo_args_free(argp); - for (const detail::accessor_info &arg : k->args) { - arg.release_access(); + + for (const detail::accessor_info &acc : k->args) { + acc.release_access(); } - }; + } void parallel_for_2d(shared_ptr_class k, range<2> r, const std::function)> &func, id<2> offset) override { throw exception("not implemented"); - }; + } void parallel_for_3d(shared_ptr_class k, range<3> r, const std::function)> &func, id<3> offset) override { throw exception("not implemented"); - }; + } detail::SUPPORT_PLATFORM_TYPE type() override { return detail::VE; } + int find_buf(container_type d) { + for (int j = 0; j < bufs_.size(); j++) { + if (d->get_raw_ptr() == bufs_[j].buf->get_raw_ptr()) { + return j; + } + } + return -1; + } + + void free_mem() { + while (bufs_.size() > 0) { + free_mem(bufs_[0].buf); + } + } + + void free_mem(container_type d) { + int index = find_buf(d); + if (index < 0) + return; + copy_back(bufs_[index]); + uint64_t device_ptr = bufs_[index].ptr; + + int rt = veo_free_mem(proc_.ve_proc, device_ptr); + if (rt != VEO_COMMAND_OK) { + DEBUG_INFO("[VEProc] free ve memory failed, size: {}, return code: {}", + bufs_[index].buf->get_size(), rt); + PRINT_ERR("[VEProc] free ve memory failed"); + throw exception("VE free memory return error"); + } + bufs_.erase(bufs_.begin() + index); + } + + void *alloc_mem(container_type d, access::mode mode = access::mode::read) { + int index = find_buf(d); + bool to_be_updated = (mode != access::mode::read); + if (index >= 0) { + bufs_[index].updated = to_be_updated; + return reinterpret_cast(bufs_[index].ptr); + } + + size_t size_in_byte = d->get_size(); + uint64_t ve_addr_int; + + int rt = veo_alloc_mem(proc_.ve_proc, &ve_addr_int, size_in_byte); + if (rt != VEO_COMMAND_OK) { + DEBUG_INFO("[VEProc] allocate VE memory size: {} failed, return code: {}", + size_in_byte, rt); + PRINT_ERR("[VEProc] allocate VE memory failed"); + throw exception("VE allocate return error"); + } + DEBUG_INFO("[VEKernel] allocate ve memory, size: {}, ve address: {:#x}", + size_in_byte, ve_addr_int); + buf_info bi{d, ve_addr_int, to_be_updated}; + bufs_.push_back(bi); + + if (mode != access::mode::discard_write && + mode != access::mode::discard_read_write) { + DEBUG_INFO("[VEKernel] do copy to ve memory for arg, device address: " + "{:#x}, size: {}, host address: {:#x}", + (size_t)ve_addr_int, size_in_byte, (size_t)d->get_raw_ptr()); + rt = veo_write_mem(proc_.ve_proc, ve_addr_int, d->get_raw_ptr(), + size_in_byte); + if (rt != VEO_COMMAND_OK) { + DEBUG_INFO( + "[VEProc] copy to ve memory failed, size: {}, return code: {}", + size_in_byte, rt); + PRINT_ERR("[VEProc] copy to ve memory failed"); + throw exception("VE copy return error"); + } + } + + return reinterpret_cast(bi.ptr); + } + + void *get_pointer(container_type d) { + int index = find_buf(d); + if (index < 0) + return nullptr; + return reinterpret_cast(bufs_[index].ptr); + } + + void copy_back(buf_info &bi) { + if (bi.updated) { + size_t size_in_byte = bi.buf->get_size(); + uint64_t device_ptr = bi.ptr; + DEBUG_INFO("[VEKernel] copy from ve memory, device address: {:#x}, " + "size: {}, host address: {:#x}", + (size_t)device_ptr, size_in_byte, + (size_t)bi.buf->get_raw_ptr()); + // do copy + int rt = veo_read_mem(proc_.ve_proc, bi.buf->get_raw_ptr(), device_ptr, + size_in_byte); + if (rt != veo_command_state::VEO_COMMAND_OK) { + DEBUG_INFO( + "[VEProc] copy from ve memory failed, size: {}, return code: {}", + size_in_byte, rt); + PRINT_ERR("[VEProc] copy from ve memory failed"); + throw exception("VE copy return error"); + } + bi.updated = false; + } + } + + void copy_back() { + for (int i = 0; i < bufs_.size(); i++) { + copy_back(bufs_[i]); + } + } + private: - VEContext ctx; - VEProc proc; - buffer_type bufs; + VEContext ctx_; + VEProc proc_; + buffer_type bufs_; }; } // namespace neosycl::sycl::extensions::nec diff --git a/include/neoSYCL/sycl.hpp b/include/neoSYCL/sycl.hpp index e156879..d3c80d8 100644 --- a/include/neoSYCL/sycl.hpp +++ b/include/neoSYCL/sycl.hpp @@ -23,12 +23,12 @@ #include "sycl/property_list.hpp" #include "sycl/context.hpp" +#include "sycl/access.hpp" #include "sycl/accessor.hpp" #include "sycl/handler.hpp" #include "sycl/queue.hpp" // include buffer headers -// #include "sycl/access.hpp" #include "sycl/allocator.hpp" #include "sycl/buffer.hpp" diff --git a/include/neoSYCL/sycl/accessor.hpp b/include/neoSYCL/sycl/accessor.hpp index f44fe6f..e98cce5 100644 --- a/include/neoSYCL/sycl/accessor.hpp +++ b/include/neoSYCL/sycl/accessor.hpp @@ -20,33 +20,36 @@ class accessor { template accessor(buffer &bufferRef, const property_list &propList = {}) - : data(bufferRef.data), accessRange(bufferRef.get_range()), handler_(0) {} + : data(bufferRef.data), accessRange(bufferRef.get_range()) {} template accessor(buffer &bufferRef, range accessRange, const property_list &propList = {}) - : data(bufferRef.data), accessRange(accessRange), handler_(0) {} + : data(bufferRef.data), accessRange(accessRange) {} template accessor(buffer &bufferRef, range accessRange, id accessOffset, const property_list &propList = {}) : data(bufferRef.data), accessRange(accessRange), - accessOffset(accessOffset), handler_(0) {} + accessOffset(accessOffset) {} template accessor(buffer &bufferRef, handler &commandGroupHandlerRef, range accessRange, const property_list &propList = {}) - : data(bufferRef.data), accessRange(accessRange), accessOffset(0), - handler_(&commandGroupHandlerRef) {} + : data(bufferRef.data), accessRange(accessRange), accessOffset(0) { + bufferRef.push_context(commandGroupHandlerRef, accessMode); + } template accessor(buffer &bufferRef, handler &commandGroupHandlerRef, range accessRange, id accessOffset, const property_list &propList = {}) : data(bufferRef.data), accessRange(accessRange), - accessOffset(accessOffset), handler_(&commandGroupHandlerRef) {} + accessOffset(accessOffset) { + bufferRef.push_context(commandGroupHandlerRef, accessMode); + } constexpr bool is_placeholder() const { return isPlaceholder; } @@ -167,13 +170,13 @@ class accessor { typename = std::enable_if_t<(T == access::target::global_buffer) || (T == access::target::constant_buffer)>> void *get_pointer() const; + ~accessor() = default; private: std::shared_ptr> data; range accessRange; id accessOffset; - handler* handler_; size_t id2index(id index) const { size_t x = this->accessRange.get(0); diff --git a/include/neoSYCL/sycl/buffer.hpp b/include/neoSYCL/sycl/buffer.hpp index 8656d78..e616642 100644 --- a/include/neoSYCL/sycl/buffer.hpp +++ b/include/neoSYCL/sycl/buffer.hpp @@ -28,6 +28,7 @@ class context_bound { template > class buffer { + friend class handler; friend accessor; friend accessor accessor get_access(handler &commandGroupHandler) { + push_context(commandGroupHandler.get_context(), mode); commandGroupHandler.get_kernel()->args.push_back( detail::accessor_info(data, mode)); return accessor(*this); @@ -144,6 +146,7 @@ class buffer { accessor get_access(handler &commandGroupHandler, range accessRange, id accessOffset = {}) { + push_context(commandGroupHandler.get_context(), mode); commandGroupHandler.get_kernel()->args.push_back( detail::accessor_info(data, mode)); return accessor(*this, commandGroupHandler, @@ -182,11 +185,24 @@ class buffer { bufferRange = rhs.bufferRange; } - ~buffer() = default; + ~buffer() { + for (auto &it : ctx_) { + it.get_context_info()->free_mem(data); + } + } private: std::shared_ptr> data; range bufferRange; + std::vector ctx_; + + void push_context(context c, access::mode m = access::mode::read) { + c.get_context_info()->alloc_mem(data, m); + ctx_.push_back(c); + } + void push_context(handler h, access::mode m = access::mode::read) { + push_context(h.get_context()); + } }; } // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/detail/accessor_info.hpp b/include/neoSYCL/sycl/detail/accessor_info.hpp index f70129a..13fe99e 100644 --- a/include/neoSYCL/sycl/detail/accessor_info.hpp +++ b/include/neoSYCL/sycl/detail/accessor_info.hpp @@ -41,4 +41,4 @@ struct accessor_info { } // namespace neosycl::sycl::detail -#endif //SYCL_INCLUDE_CL_SYCL_ACCESSOR_INFO_HPP_ \ No newline at end of file +#endif // SYCL_INCLUDE_CL_SYCL_ACCESSOR_INFO_HPP_ \ No newline at end of file diff --git a/include/neoSYCL/sycl/detail/context_info.hpp b/include/neoSYCL/sycl/detail/context_info.hpp index c6df169..c60bf7c 100644 --- a/include/neoSYCL/sycl/detail/context_info.hpp +++ b/include/neoSYCL/sycl/detail/context_info.hpp @@ -2,12 +2,22 @@ namespace neosycl::sycl::detail { class context_info { + using container_type = std::shared_ptr; + protected: using handler_type = shared_ptr_class; context_info() {} public: ~context_info() = default; + void *get_pointer(container_type c) const { + return task_handler->get_pointer(c); + } + void *alloc_mem(container_type c, access::mode m = access::mode::read) const { + return task_handler->alloc_mem(c, m); + } + void free_mem(container_type c) const { task_handler->free_mem(c); } + handler_type task_handler; }; diff --git a/include/neoSYCL/sycl/detail/task_handler.hpp b/include/neoSYCL/sycl/detail/task_handler.hpp index 8fc543e..fb0005d 100644 --- a/include/neoSYCL/sycl/detail/task_handler.hpp +++ b/include/neoSYCL/sycl/detail/task_handler.hpp @@ -6,6 +6,9 @@ namespace neosycl::sycl::detail { class task_handler { +protected: + using container_type = std::shared_ptr; + public: explicit task_handler() {} @@ -25,6 +28,12 @@ class task_handler { id<3> offset) = 0; virtual SUPPORT_PLATFORM_TYPE type() = 0; + + virtual void *get_pointer(container_type) = 0; + virtual void *alloc_mem(container_type, + access::mode = access::mode::read) = 0; + virtual void free_mem(container_type) = 0; + virtual void copy_back() = 0; }; class task_handler_cpu : public task_handler { @@ -92,6 +101,13 @@ class task_handler_cpu : public task_handler { }; SUPPORT_PLATFORM_TYPE type() override { return CPU; } + + void *get_pointer(container_type p) { return p->get_raw_ptr(); } + void *alloc_mem(container_type p, access::mode = access::mode::read) { + return p->get_raw_ptr(); + } + void free_mem(container_type) {} + void copy_back() {} }; } // namespace neosycl::sycl::detail diff --git a/include/neoSYCL/sycl/handler.hpp b/include/neoSYCL/sycl/handler.hpp index dabdc52..9540611 100644 --- a/include/neoSYCL/sycl/handler.hpp +++ b/include/neoSYCL/sycl/handler.hpp @@ -133,6 +133,14 @@ class handler { kernel->args.push_back(detail::accessor_info(acc, m)); } + template + T *get_pointer(sycl::accessor acc) { + return ctx.get_context_info()->get_pointer(acc.data); + } + + context get_context() { return ctx; } + private: kernel_type kernel; device bind_device; From 45725dc78c914b13f90e98062a965b3f2cae93d2 Mon Sep 17 00:00:00 2001 From: Hiroyuki Takizawa Date: Mon, 4 Apr 2022 16:35:56 +0900 Subject: [PATCH 07/90] Dtor of a context_info variant called properly. --- include/neoSYCL/extensions/nec/ve_context_info.hpp | 6 +++--- include/neoSYCL/sycl/context.hpp | 2 ++ include/neoSYCL/sycl/detail/context_info.hpp | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/include/neoSYCL/extensions/nec/ve_context_info.hpp b/include/neoSYCL/extensions/nec/ve_context_info.hpp index d30f512..5e1b8a5 100644 --- a/include/neoSYCL/extensions/nec/ve_context_info.hpp +++ b/include/neoSYCL/extensions/nec/ve_context_info.hpp @@ -10,9 +10,9 @@ class ve_context_info : public detail::context_info { VEContext ctx; VEContext create_ctx(VEProc proc) { - struct veo_thr_ctxt *ctx = veo_context_open(proc.ve_proc); - DEBUG_INFO("[VEContext] create ve context: {:#x}", (size_t)ctx); - return VEContext{ctx}; + struct veo_thr_ctxt *c = veo_context_open(proc.ve_proc); + DEBUG_INFO("[VEContext] create ve context: {:#x}", (size_t)c); + return VEContext{c}; } void free_ctx() { diff --git a/include/neoSYCL/sycl/context.hpp b/include/neoSYCL/sycl/context.hpp index 1f04d45..7d3fdac 100644 --- a/include/neoSYCL/sycl/context.hpp +++ b/include/neoSYCL/sycl/context.hpp @@ -11,6 +11,8 @@ class context { public: explicit context(const property_list &propList = {}) { init(device()); } + ~context() = default; + context(async_handler asyncHandler, const property_list &propList = {}); context(const device &dev, const property_list &propList = {}) { init(dev); } diff --git a/include/neoSYCL/sycl/detail/context_info.hpp b/include/neoSYCL/sycl/detail/context_info.hpp index c60bf7c..41ef9a1 100644 --- a/include/neoSYCL/sycl/detail/context_info.hpp +++ b/include/neoSYCL/sycl/detail/context_info.hpp @@ -9,7 +9,7 @@ class context_info { context_info() {} public: - ~context_info() = default; + virtual ~context_info() = default; void *get_pointer(container_type c) const { return task_handler->get_pointer(c); } From 6f6e4a331cbdab9ec3c9670273106d4c83bb22cd Mon Sep 17 00:00:00 2001 From: Hiroyuki Takizawa Date: Sat, 7 May 2022 13:31:48 +0900 Subject: [PATCH 08/90] Remove compiler warnings --- include/neoSYCL/sycl/detail/task_handler.hpp | 8 ++++---- tests/test_parallel_for.cpp | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/neoSYCL/sycl/detail/task_handler.hpp b/include/neoSYCL/sycl/detail/task_handler.hpp index fb0005d..da31327 100644 --- a/include/neoSYCL/sycl/detail/task_handler.hpp +++ b/include/neoSYCL/sycl/detail/task_handler.hpp @@ -102,12 +102,12 @@ class task_handler_cpu : public task_handler { SUPPORT_PLATFORM_TYPE type() override { return CPU; } - void *get_pointer(container_type p) { return p->get_raw_ptr(); } - void *alloc_mem(container_type p, access::mode = access::mode::read) { + void *get_pointer(container_type p) override { return p->get_raw_ptr(); } + void *alloc_mem(container_type p, access::mode = access::mode::read) override { return p->get_raw_ptr(); } - void free_mem(container_type) {} - void copy_back() {} + void free_mem(container_type) override {} + void copy_back() override {} }; } // namespace neosycl::sycl::detail diff --git a/tests/test_parallel_for.cpp b/tests/test_parallel_for.cpp index 93e65a2..ff5a083 100644 --- a/tests/test_parallel_for.cpp +++ b/tests/test_parallel_for.cpp @@ -32,9 +32,9 @@ TEST(parallel_for, simple_test) { kc[index] = ka[index] + kb[index]; }); }); + q.wait(); } - q.wait(); std::cout << std::endl << "Result:" << std::endl; for (auto e : c) From a82c75d82c2de777ce2e7d94442993c08456af6b Mon Sep 17 00:00:00 2001 From: Hiroyuki Takizawa Date: Sat, 7 May 2022 13:52:48 +0900 Subject: [PATCH 09/90] Don't use {fmt} for DEBUG_INFO --- .../extensions/nec/ve_context_info.hpp | 14 ++++---- include/neoSYCL/extensions/nec/ve_queue.hpp | 8 ++--- .../extensions/nec/ve_task_handler.hpp | 36 +++++++++---------- include/neoSYCL/sycl/detail/debug.hpp | 6 ++-- 4 files changed, 32 insertions(+), 32 deletions(-) diff --git a/include/neoSYCL/extensions/nec/ve_context_info.hpp b/include/neoSYCL/extensions/nec/ve_context_info.hpp index 5e1b8a5..be85742 100644 --- a/include/neoSYCL/extensions/nec/ve_context_info.hpp +++ b/include/neoSYCL/extensions/nec/ve_context_info.hpp @@ -11,15 +11,15 @@ class ve_context_info : public detail::context_info { VEContext create_ctx(VEProc proc) { struct veo_thr_ctxt *c = veo_context_open(proc.ve_proc); - DEBUG_INFO("[VEContext] create ve context: {:#x}", (size_t)c); + DEBUG_INFO("[VEContext] create ve context: %#x", (size_t)c); return VEContext{c}; } void free_ctx() { - DEBUG_INFO("[VEContext] release ve ctx: {:#x}", (size_t)ctx.ve_ctx); + DEBUG_INFO("[VEContext] release ve ctx: %#x", (size_t)ctx.ve_ctx); int rt = veo_context_close(ctx.ve_ctx); if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEContext] release ve ctx: {:#x} failed, return code: {}", + DEBUG_INFO("[VEContext] release ve ctx: %#x failed, return code: %d", (size_t)ctx.ve_ctx, rt); PRINT_ERR("[VEContext] release ve ctx failed"); } @@ -33,16 +33,16 @@ class ve_context_info : public detail::context_info { throw ve_exception("[VEProc] create ve proc failed."); } uint64_t handle = veo_load_library(ve_proc, lib_path.c_str()); - DEBUG_INFO("[VEProc] create ve proc: {:#x} and load lib: {} on node: {}", - (size_t)ve_proc, lib_path, ve_node); + DEBUG_INFO("[VEProc] create ve proc: %#x and load lib: %s on node: %d", + (size_t)ve_proc, lib_path.c_str(), ve_node); return nec::VEProc{ve_proc, handle}; } void free_proc() { - DEBUG_INFO("[VEProc] release ve proc: {:#x}", (size_t)proc.ve_proc); + DEBUG_INFO("[VEProc] release ve proc: %#x", (size_t)proc.ve_proc); int rt = veo_proc_destroy(proc.ve_proc); if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEProc] release ve proc: {:#x} failed, return code: {}", + DEBUG_INFO("[VEProc] release ve proc: %#x failed, return code: %d", (size_t)proc.ve_proc, rt); PRINT_ERR("[VEProc] release ve proc failed"); } diff --git a/include/neoSYCL/extensions/nec/ve_queue.hpp b/include/neoSYCL/extensions/nec/ve_queue.hpp index ba3c1ae..5934a1d 100644 --- a/include/neoSYCL/extensions/nec/ve_queue.hpp +++ b/include/neoSYCL/extensions/nec/ve_queue.hpp @@ -19,16 +19,16 @@ class ve_queue : public queue { std::to_string(ve_node) + " failed.."); } uint64_t handle = veo_load_library(ve_proc, lib_path.c_str()); - DEBUG_INFO("[VEProc] create ve proc: {:#x} and load lib: {} on node: {}", - (size_t)ve_proc, lib_path, ve_node); + DEBUG_INFO("[VEProc] create ve proc: %#x and load lib: %s on node: %d", + (size_t)ve_proc, lib_path.c_str(), ve_node); return nec::VEProc{ve_proc, handle}; } void free_proc(nec::VEProc proc) { - DEBUG_INFO("[VEProc] release ve proc: {:#x}", (size_t)proc.ve_proc); + DEBUG_INFO("[VEProc] release ve proc: %#x", (size_t)proc.ve_proc); int rt = veo_proc_destroy(proc.ve_proc); if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEProc] release ve proc: {:#x} failed, return code: {}", + DEBUG_INFO("[VEProc] release ve proc: %#x failed, return code: %d", (size_t)proc.ve_proc, rt); PRINT_ERR("[VEProc] release ve proc failed"); } diff --git a/include/neoSYCL/extensions/nec/ve_task_handler.hpp b/include/neoSYCL/extensions/nec/ve_task_handler.hpp index 0e96cfa..201cf99 100644 --- a/include/neoSYCL/extensions/nec/ve_task_handler.hpp +++ b/include/neoSYCL/extensions/nec/ve_task_handler.hpp @@ -24,7 +24,7 @@ class task_handler_ve : public detail::task_handler { if (!argp) { throw exception("ve args return nullptr"); } - DEBUG_INFO("[VEKernel] create ve args: {:#x}", (size_t)argp); + DEBUG_INFO("[VEKernel] create ve args: %#x", (size_t)argp); return argp; } @@ -47,15 +47,15 @@ class task_handler_ve : public detail::task_handler { acc.acquire_access(); } DEBUG_INFO("execute single %d kernel, name: %s\n", type(), k->name.c_str()); - DEBUG_INFO("[VEKernel] single task: {}", k->name.c_str()); + DEBUG_INFO("[VEKernel] single task: %s", k->name.c_str()); try { struct veo_args *argp = create_ve_args(k); - DEBUG_INFO("[VEKernel] invoke ve func: {}", k->name.c_str()); + DEBUG_INFO("[VEKernel] invoke ve func: %s", k->name.c_str()); uint64_t id = veo_call_async_by_name(ctx_.ve_ctx, proc_.handle, k->name.c_str(), argp); uint64_t ret_val; veo_call_wait_result(ctx_.ve_ctx, id, &ret_val); - DEBUG_INFO("[VEKernel] ve func finished, id: {}, ret val: {}", id, + DEBUG_INFO("[VEKernel] ve func finished, id: %lu, ret val: %lu", id, ret_val); // copy_out(ve_addr_list, k, proc); veo_args_free(argp); @@ -83,23 +83,23 @@ class task_handler_ve : public detail::task_handler { } DEBUG_INFO("execute parallel<1> %d kernel, name: %s\n", type(), k->name.c_str()); - DEBUG_INFO("[VEKernel] parallel task: {}", k->name.c_str()); + DEBUG_INFO("[VEKernel] parallel task: %s", k->name.c_str()); try { struct veo_args *argp = create_ve_args(k); - DEBUG_INFO("[VEKernel] create ve args: {:#x}", (size_t)argp); + DEBUG_INFO("[VEKernel] create ve args: %#x", (size_t)argp); set_arg_for_range(k->args, argp, r); - DEBUG_INFO("[VEKernel] invoke ve func: {}", k->name.c_str()); + DEBUG_INFO("[VEKernel] invoke ve func: %s", k->name.c_str()); uint64_t id = veo_call_async_by_name(ctx_.ve_ctx, proc_.handle, k->name.c_str(), argp); uint64_t ret_val; veo_call_wait_result(ctx_.ve_ctx, id, &ret_val); - DEBUG_INFO("[VEKernel] ve func finished, id: {}, ret val: {}", id, + DEBUG_INFO("[VEKernel] ve func finished, id: %lu, ret val: %lu", id, ret_val); // copy_out(ve_addr_list, k, proc); veo_args_free(argp); } catch (exception &e) { - std::cerr << "[VEKernel] kernel invoke failed, error message: " - << e.what() << std::endl; + PRINT_ERR("[VEKernel] kernel invoke failed"); + throw exception("kernel invocation error"); } for (const detail::accessor_info &acc : k->args) { @@ -145,7 +145,7 @@ class task_handler_ve : public detail::task_handler { int rt = veo_free_mem(proc_.ve_proc, device_ptr); if (rt != VEO_COMMAND_OK) { - DEBUG_INFO("[VEProc] free ve memory failed, size: {}, return code: {}", + DEBUG_INFO("[VEProc] free ve memory failed, size: %lu, return code: %d", bufs_[index].buf->get_size(), rt); PRINT_ERR("[VEProc] free ve memory failed"); throw exception("VE free memory return error"); @@ -166,12 +166,12 @@ class task_handler_ve : public detail::task_handler { int rt = veo_alloc_mem(proc_.ve_proc, &ve_addr_int, size_in_byte); if (rt != VEO_COMMAND_OK) { - DEBUG_INFO("[VEProc] allocate VE memory size: {} failed, return code: {}", + DEBUG_INFO("[VEProc] allocate VE memory size: %lu failed, return code: %d", size_in_byte, rt); PRINT_ERR("[VEProc] allocate VE memory failed"); throw exception("VE allocate return error"); } - DEBUG_INFO("[VEKernel] allocate ve memory, size: {}, ve address: {:#x}", + DEBUG_INFO("[VEKernel] allocate ve memory, size: %lu, ve address: %#x", size_in_byte, ve_addr_int); buf_info bi{d, ve_addr_int, to_be_updated}; bufs_.push_back(bi); @@ -179,13 +179,13 @@ class task_handler_ve : public detail::task_handler { if (mode != access::mode::discard_write && mode != access::mode::discard_read_write) { DEBUG_INFO("[VEKernel] do copy to ve memory for arg, device address: " - "{:#x}, size: {}, host address: {:#x}", + "%#x, size: %lu, host address: %#x", (size_t)ve_addr_int, size_in_byte, (size_t)d->get_raw_ptr()); rt = veo_write_mem(proc_.ve_proc, ve_addr_int, d->get_raw_ptr(), size_in_byte); if (rt != VEO_COMMAND_OK) { DEBUG_INFO( - "[VEProc] copy to ve memory failed, size: {}, return code: {}", + "[VEProc] copy to ve memory failed, size: %lu, return code: %d", size_in_byte, rt); PRINT_ERR("[VEProc] copy to ve memory failed"); throw exception("VE copy return error"); @@ -206,8 +206,8 @@ class task_handler_ve : public detail::task_handler { if (bi.updated) { size_t size_in_byte = bi.buf->get_size(); uint64_t device_ptr = bi.ptr; - DEBUG_INFO("[VEKernel] copy from ve memory, device address: {:#x}, " - "size: {}, host address: {:#x}", + DEBUG_INFO("[VEKernel] copy from ve memory, device address: %#x, " + "size: %lu, host address: %#x", (size_t)device_ptr, size_in_byte, (size_t)bi.buf->get_raw_ptr()); // do copy @@ -215,7 +215,7 @@ class task_handler_ve : public detail::task_handler { size_in_byte); if (rt != veo_command_state::VEO_COMMAND_OK) { DEBUG_INFO( - "[VEProc] copy from ve memory failed, size: {}, return code: {}", + "[VEProc] copy from ve memory failed, size: %lu, return code: %d", size_in_byte, rt); PRINT_ERR("[VEProc] copy from ve memory failed"); throw exception("VE copy return error"); diff --git a/include/neoSYCL/sycl/detail/debug.hpp b/include/neoSYCL/sycl/detail/debug.hpp index 0bac32e..72fc2da 100644 --- a/include/neoSYCL/sycl/detail/debug.hpp +++ b/include/neoSYCL/sycl/detail/debug.hpp @@ -4,12 +4,12 @@ #ifdef DEBUG #include #define DEBUG_INFO(...) \ - std::cout << "[DEBUG] " << printf(__VA_ARGS__) << std::endl + std::cerr << "[DEBUG] " << printf(__VA_ARGS__) << std::endl #else #define DEBUG_INFO(format, ...) #endif -#define PRINT_INFO(...) std::cout << "[INFO] " << __VA_ARGS__ << std::endl -#define PRINT_ERR(...) std::cout << "[ERROR] " << __VA_ARGS__ << std::endl +#define PRINT_INFO(...) std::cerr << "[INFO] " << __VA_ARGS__ << std::endl +#define PRINT_ERR(...) std::cerr << "[ERROR] " << __VA_ARGS__ << std::endl #endif // SYCL_INCLUDE_CL_SYCL_DETAIL_DEBUG_HPP_ From f80641b9abdb9abbbd62cdda01bef78de8cc7e33 Mon Sep 17 00:00:00 2001 From: Hiroyuki Takizawa Date: Sat, 7 May 2022 19:42:36 +0900 Subject: [PATCH 10/90] Capture support assuming host code transformation --- examples/sequential_vector.cpp | 2 +- .../extensions/nec/ve_context_info.hpp | 6 +- include/neoSYCL/extensions/nec/ve_device.hpp | 26 --- include/neoSYCL/extensions/nec/ve_info.hpp | 4 +- include/neoSYCL/extensions/nec/ve_kernel.hpp | 184 ------------------ include/neoSYCL/extensions/nec/ve_queue.hpp | 58 ------ .../neoSYCL/extensions/nec/ve_selector.hpp | 2 + include/neoSYCL/extensions/nec/ve_task.hpp | 23 --- .../extensions/nec/ve_task_handler.hpp | 49 ++--- include/neoSYCL/sycl.hpp | 1 - include/neoSYCL/sycl/accessor.hpp | 3 +- include/neoSYCL/sycl/buffer.hpp | 2 +- include/neoSYCL/sycl/detail/debug.hpp | 2 +- include/neoSYCL/sycl/detail/task_handler.hpp | 1 + include/neoSYCL/sycl/handler.hpp | 8 +- 15 files changed, 46 insertions(+), 325 deletions(-) delete mode 100644 include/neoSYCL/extensions/nec/ve_device.hpp delete mode 100644 include/neoSYCL/extensions/nec/ve_kernel.hpp delete mode 100644 include/neoSYCL/extensions/nec/ve_queue.hpp delete mode 100644 include/neoSYCL/extensions/nec/ve_task.hpp diff --git a/examples/sequential_vector.cpp b/examples/sequential_vector.cpp index 6e202d0..465fb82 100644 --- a/examples/sequential_vector.cpp +++ b/examples/sequential_vector.cpp @@ -41,7 +41,7 @@ int main() { } }); }); // End of our commands for this queue - + myQueue.wait(); } // End scope, so we wait for the queue to complete std::cout << "Result:" << std::endl; diff --git a/include/neoSYCL/extensions/nec/ve_context_info.hpp b/include/neoSYCL/extensions/nec/ve_context_info.hpp index be85742..c187bf0 100644 --- a/include/neoSYCL/extensions/nec/ve_context_info.hpp +++ b/include/neoSYCL/extensions/nec/ve_context_info.hpp @@ -32,9 +32,11 @@ class ve_context_info : public detail::context_info { DEBUG_INFO("[VEProc] create ve proc on node: %d failed..", ve_node); throw ve_exception("[VEProc] create ve proc failed."); } - uint64_t handle = veo_load_library(ve_proc, lib_path.c_str()); + const char* env = getenv(ENV_VE_KERNEL); + string_class fn(env?env:lib_path); + uint64_t handle = veo_load_library(ve_proc, fn.c_str()); DEBUG_INFO("[VEProc] create ve proc: %#x and load lib: %s on node: %d", - (size_t)ve_proc, lib_path.c_str(), ve_node); + (size_t)ve_proc, fn.c_str(), ve_node); return nec::VEProc{ve_proc, handle}; } diff --git a/include/neoSYCL/extensions/nec/ve_device.hpp b/include/neoSYCL/extensions/nec/ve_device.hpp deleted file mode 100644 index 547179c..0000000 --- a/include/neoSYCL/extensions/nec/ve_device.hpp +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef SYCL_INCLUDE_CL_SYCL_NEC_VE_DEVICE_HPP_ -#define SYCL_INCLUDE_CL_SYCL_NEC_VE_DEVICE_HPP_ -// obsolete -#if 0 -#include "ve_offload.h" -#include "neoSYCL/sycl/device.hpp" - -namespace neosycl::sycl { - -class ve_device : public device { -private: - int node_id; - -public: - ve_device(int node_id) : node_id(node_id) {} - - bool is_host() const override { return false; } - bool is_cpu() const override { return false; } - bool is_gpu() const override { return false; } - bool is_accelerator() const override { return true; } - int get_node_id() const { return node_id; } -}; - -} // namespace neosycl::sycl -#endif -#endif // SYCL_INCLUDE_CL_SYCL_NEC_VE_DEVICE_HPP_ diff --git a/include/neoSYCL/extensions/nec/ve_info.hpp b/include/neoSYCL/extensions/nec/ve_info.hpp index a5de753..8f49a48 100644 --- a/include/neoSYCL/extensions/nec/ve_info.hpp +++ b/include/neoSYCL/extensions/nec/ve_info.hpp @@ -6,9 +6,9 @@ namespace neosycl::sycl::extensions::nec { -const int DEFAULT_VE_NODE = 0; +const int DEFAULT_VE_NODE = -1; const string_class DEFAULT_VE_LIB = "./kernel.so"; - +const char* ENV_VE_KERNEL = "NEOSYCL_VE_KERNEL"; struct VEProc { struct veo_proc_handle *ve_proc; uint64_t handle; diff --git a/include/neoSYCL/extensions/nec/ve_kernel.hpp b/include/neoSYCL/extensions/nec/ve_kernel.hpp deleted file mode 100644 index 3cf4add..0000000 --- a/include/neoSYCL/extensions/nec/ve_kernel.hpp +++ /dev/null @@ -1,184 +0,0 @@ -#ifndef SYCL_INCLUDE_CL_SYCL_NEC_VE_KERNEL_HPP_ -#define SYCL_INCLUDE_CL_SYCL_NEC_VE_KERNEL_HPP_ -// obsolete -#if 0 -#include "ve_info.hpp" - -namespace neosycl::sycl::detail { - -struct VEKernel : public Kernel { - nec::VEProc proc; - nec::VEContext ctx; - - nec::VEContext ctx_create(nec::VEProc proc) { - struct veo_thr_ctxt *ctx = veo_context_open(proc.ve_proc); - DEBUG_INFO("[VEContext] create ve context: {:#x}", (size_t)ctx); - return nec::VEContext{ctx}; - } - - void free_ctx(nec::VEContext ctx) { - DEBUG_INFO("[VEContext] release ve ctx: {:#x}", (size_t)ctx.ve_ctx); - int rt = veo_context_close(ctx.ve_ctx); - if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEContext] release ve ctx: {:#x} failed, return code: {}", - (size_t)ctx.ve_ctx, rt); - PRINT_ERR("[VEContext] release ve ctx failed"); - } - } - - struct veo_args *create_ve_args() { - struct veo_args *argp = veo_args_alloc(); - if (!argp) { - throw nec::VEException("ve args return nullptr"); - } - return argp; - } - - VEKernel(const vector_class &args, const string_class &kernel_name, - const nec::VEProc &proc) - : Kernel(args, kernel_name), proc(proc) { - ctx = ctx_create(proc); - } - - void set_arg_for_range(struct veo_args *argp, const range<1> &r) { - int index = args.size(); - veo_args_set_i64(argp, index, r.size()); - veo_args_set_i64(argp, index + 1, 1); - } - - vector_class copy_in(struct veo_args *argp) { - vector_class ve_addr_list; - - for (int i = 0; i < args.size(); i++) { - KernelArg arg = args[i]; - size_t size_in_byte = arg.container->get_size(); - - uint64_t ve_addr_int; - int rt = veo_alloc_mem(proc.ve_proc, &ve_addr_int, size_in_byte); - if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO( - "[VEProc] allocate VE memory size: {} failed, return code: {}", - size_in_byte, rt); - PRINT_ERR("[VEProc] allocate VE memory failed"); - throw nec::VEException("VE allocate return error"); - } - ve_addr_list.push_back(ve_addr_int); - - DEBUG_INFO("[VEKernel] allocate ve memory, size: {}, ve address: {:#x}", - size_in_byte, ve_addr_int); - - if (arg.mode != access::mode::write) { - DEBUG_INFO("[VEKernel] do copy to ve memory for arg, device address: " - "{:#x}, size: {}, host address: {:#x}", - (size_t)ve_addr_int, size_in_byte, - (size_t)arg.container->get_data_ptr()); - rt = veo_write_mem(proc.ve_proc, ve_addr_int, - arg.container->get_data_ptr(), size_in_byte); - if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO( - "[VEProc] copy to ve memory failed, size: {}, return code: {}", - size_in_byte, rt); - PRINT_ERR("[VEProc] copy to ve memory failed"); - throw nec::VEException("VE copy return error"); - } - } - veo_args_set_i64(argp, i, ve_addr_int); - } - return ve_addr_list; - } - - void copy_out(vector_class ve_addr_list) { - for (int i = 0; i < args.size(); i++) { - KernelArg arg = args[i]; - size_t size_in_byte = arg.container->get_size(); - uint64_t device_ptr = ve_addr_list[i]; - if (arg.mode != access::mode::read) { - DEBUG_INFO("[VEKernel] copy from ve memory, device address: {:#x}, " - "size: {}, host address: {:#x}", - (size_t)device_ptr, size_in_byte, - (size_t)arg.container->get_data_ptr()); - // do copy - int rt = veo_read_mem(proc.ve_proc, arg.container->get_data_ptr(), - device_ptr, size_in_byte); - if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO( - "[VEProc] copy from ve memory failed, size: {}, return code: {}", - size_in_byte, rt); - PRINT_ERR("[VEProc] copy from ve memory failed"); - throw nec::VEException("VE copy return error"); - } - } - int rt = veo_free_mem(proc.ve_proc, device_ptr); - if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEProc] free ve memory failed, size: {}, return code: {}", - size_in_byte, rt); - PRINT_ERR("[VEProc] free ve memory failed"); - throw nec::VEException("VE free memory return error"); - } - } - } - - void single_task() override { - DEBUG_INFO("[VEKernel] single task: {}", kernel_name); - - veo_args *argp = create_ve_args(); - DEBUG_INFO("[VEKernel] create ve args: {:#x}", (size_t)argp); - - try { - - vector_class ve_addr_list = copy_in(argp); - DEBUG_INFO("[VEKernel] invoke ve func: {}", kernel_name); - uint64_t id = veo_call_async_by_name(ctx.ve_ctx, proc.handle, - kernel_name.c_str(), argp); - uint64_t ret_val; - veo_call_wait_result(ctx.ve_ctx, id, &ret_val); - DEBUG_INFO("[VEKernel] ve func finished, id: {}, ret val: {}", id, - ret_val); - copy_out(ve_addr_list); - - } catch (nec::VEException &e) { - std::cerr << "[VEKernel] kernel invoke failed, error message: " - << e.what() << std::endl; - } - - veo_args_free(argp); - } - void parallel_for(const range<1> &r) override { - DEBUG_INFO("[VEKernel] parallel for 1d {} with range: {}", kernel_name, - r.size()); - - veo_args *argp = create_ve_args(); - DEBUG_INFO("[VEKernel] create ve args: {:#x}", (size_t)argp); - - try { - vector_class ve_addr_list = copy_in(argp); - DEBUG_INFO("[VEKernel] invoke ve func: {}", kernel_name); - set_arg_for_range(argp, r); - uint64_t id = veo_call_async_by_name(ctx.ve_ctx, proc.handle, - kernel_name.c_str(), argp); - uint64_t ret_val; - veo_call_wait_result(ctx.ve_ctx, id, &ret_val); - DEBUG_INFO("[VEKernel] ve func finished, id: {}, ret val: {}", id, - ret_val); - copy_out(ve_addr_list); - - } catch (nec::VEException &e) { - std::cerr << "[VEKernel] kernel invoke failed, error message: " - << e.what() << std::endl; - } - - veo_args_free(argp); - } - void parallel_for(const range<2> &r) override { - DEBUG_INFO("[VEKernel] parallel_for 2d"); - } - void parallel_for(const range<3> &r) override { - DEBUG_INFO("[VEKernel] parallel_for 3d"); - } - - virtual ~VEKernel() { free_ctx(ctx); } -}; - -} // namespace neosycl::sycl::detail -#endif -#endif // SYCL_INCLUDE_CL_SYCL_NEC_VE_KERNEL_HPP_ diff --git a/include/neoSYCL/extensions/nec/ve_queue.hpp b/include/neoSYCL/extensions/nec/ve_queue.hpp deleted file mode 100644 index 5934a1d..0000000 --- a/include/neoSYCL/extensions/nec/ve_queue.hpp +++ /dev/null @@ -1,58 +0,0 @@ -#ifndef SYCL_INCLUDE_CL_SYCL_NEC_VE_QUEUE_HPP_ -#define SYCL_INCLUDE_CL_SYCL_NEC_VE_QUEUE_HPP_ -// obsolete -#if 0 -#include "ve_offload.h" -#include "CL/SYCL/nec/ve_task.hpp" - -namespace neosycl::sycl { - -class ve_queue : public queue { -private: - ve_device dev; - nec::VEProc proc; - - nec::VEProc proc_create(const string_class &lib_path, int ve_node) { - struct veo_proc_handle *ve_proc = veo_proc_create(ve_node); - if (!ve_proc) { - throw nec::VEException("[VEProc] create ve proc on node: " + - std::to_string(ve_node) + " failed.."); - } - uint64_t handle = veo_load_library(ve_proc, lib_path.c_str()); - DEBUG_INFO("[VEProc] create ve proc: %#x and load lib: %s on node: %d", - (size_t)ve_proc, lib_path.c_str(), ve_node); - return nec::VEProc{ve_proc, handle}; - } - - void free_proc(nec::VEProc proc) { - DEBUG_INFO("[VEProc] release ve proc: %#x", (size_t)proc.ve_proc); - int rt = veo_proc_destroy(proc.ve_proc); - if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEProc] release ve proc: %#x failed, return code: %d", - (size_t)proc.ve_proc, rt); - PRINT_ERR("[VEProc] release ve proc failed"); - } - } - -public: - ve_queue(const string_class &path = nec::DEFAULT_VE_LIB) - : dev(nec::DEFAULT_VE_NODE), queue() { - proc = proc_create(path, nec::DEFAULT_VE_NODE); - } - - ve_queue(const ve_device &dev, const string_class &path = nec::DEFAULT_VE_LIB) - : dev(dev), queue() { - proc = proc_create(path, dev.get_node_id()); - } - - detail::Task *build_task() override { return new detail::VETask(proc); } - - virtual ~ve_queue() { - wait(); - free_proc(proc); - } -}; - -} // namespace neosycl::sycl -#endif -#endif // SYCL_INCLUDE_CL_SYCL_NEC_VE_QUEUE_HPP_ diff --git a/include/neoSYCL/extensions/nec/ve_selector.hpp b/include/neoSYCL/extensions/nec/ve_selector.hpp index fbd0077..d5821a4 100644 --- a/include/neoSYCL/extensions/nec/ve_selector.hpp +++ b/include/neoSYCL/extensions/nec/ve_selector.hpp @@ -1,6 +1,8 @@ #ifndef SYCL_INCLUDE_CL_SYCL_NEC_VE_SELECTOR_HPP_ #define SYCL_INCLUDE_CL_SYCL_NEC_VE_SELECTOR_HPP_ +#include "neoSYCL/extensions/nec/ve_info.hpp" +#include "neoSYCL/extensions/nec/ve_task_handler.hpp" #include "neoSYCL/extensions/nec/ve_device_info.hpp" #include "neoSYCL/sycl/detail/context_info.hpp" #include "neoSYCL/extensions/nec/ve_context_info.hpp" diff --git a/include/neoSYCL/extensions/nec/ve_task.hpp b/include/neoSYCL/extensions/nec/ve_task.hpp deleted file mode 100644 index 02b6536..0000000 --- a/include/neoSYCL/extensions/nec/ve_task.hpp +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef SYCL_INCLUDE_CL_SYCL_NEC_VE_TASK_HPP_ -#define SYCL_INCLUDE_CL_SYCL_NEC_VE_TASK_HPP_ -// obsolete -#if 0 -#include "ve_kernel.hpp" - -namespace neosycl::sycl::detail { - -struct VETask : public Task { - nec::VEProc proc; - - VETask(const nec::VEProc &proc) : proc(proc) {} - - bool is_cpu() override { return false; } - - std::shared_ptr get_kernel(string_class name) override { - return std::shared_ptr(new VEKernel(args, name, proc)); - } -}; - -} // namespace neosycl::sycl::detail -#endif -#endif // SYCL_INCLUDE_CL_SYCL_NEC_VE_TASK_HPP_ diff --git a/include/neoSYCL/extensions/nec/ve_task_handler.hpp b/include/neoSYCL/extensions/nec/ve_task_handler.hpp index 201cf99..6431d1a 100644 --- a/include/neoSYCL/extensions/nec/ve_task_handler.hpp +++ b/include/neoSYCL/extensions/nec/ve_task_handler.hpp @@ -1,12 +1,7 @@ #ifndef NEOSYCL_INCLUDE_NEOSYCL_EXTENSIONS_NEC_VE_TASK_HANDLER_HPP #define NEOSYCL_INCLUDE_NEOSYCL_EXTENSIONS_NEC_VE_TASK_HANDLER_HPP -#include "neoSYCL/extensions/nec/ve_info.hpp" -#include "neoSYCL/sycl/detail/accessor_info.hpp" -#include "ve_offload.h" - namespace neosycl::sycl::extensions::nec { - class task_handler_ve : public detail::task_handler { struct buf_info { container_type buf; @@ -17,7 +12,22 @@ class task_handler_ve : public detail::task_handler { public: task_handler_ve(const VEProc &p, const VEContext &c) : proc_(p), ctx_(c) {} - ~task_handler_ve() { free_mem(); } + ~task_handler_ve() { /* do nothing */ } + + void set_capture(const char *name, void *p, size_t sz) override { + uint64_t devptr = veo_get_sym(proc_.ve_proc, proc_.handle, name); + if (devptr == 0) { + throw exception("ve_get_sym return 0"); + } + + int rt = veo_write_mem(proc_.ve_proc, devptr, p, sz); + if (rt != VEO_COMMAND_OK) { + DEBUG_INFO("setup kernel \"%s\" failed, size: %lu, return code: %d", name, + sz, rt); + PRINT_ERR("setup kernel failed"); + throw exception("setup kernel return error"); + } + } struct veo_args *alloc_veo_args() { struct veo_args *argp = veo_args_alloc(); @@ -28,28 +38,16 @@ class task_handler_ve : public detail::task_handler { return argp; } - struct veo_args *create_ve_args(shared_ptr_class k) { - struct veo_args *argp = alloc_veo_args(); - - for (int i = 0; i < k->args.size(); i++) { - detail::accessor_info acc = k->args[i]; - - void *ve_addr = alloc_mem(acc.container, acc.mode); - uint64_t ve_addr_int = reinterpret_cast(ve_addr); - veo_args_set_i64(argp, i, ve_addr_int); - } - return argp; - } - void single_task(shared_ptr_class k, const std::function &func) override { for (const detail::accessor_info &acc : k->args) { acc.acquire_access(); + alloc_mem(acc.container, acc.mode); } DEBUG_INFO("execute single %d kernel, name: %s\n", type(), k->name.c_str()); DEBUG_INFO("[VEKernel] single task: %s", k->name.c_str()); try { - struct veo_args *argp = create_ve_args(k); + struct veo_args *argp = alloc_veo_args(); DEBUG_INFO("[VEKernel] invoke ve func: %s", k->name.c_str()); uint64_t id = veo_call_async_by_name(ctx_.ve_ctx, proc_.handle, k->name.c_str(), argp); @@ -68,26 +66,29 @@ class task_handler_ve : public detail::task_handler { } } +#if 0 void set_arg_for_range(const vector_class &args, struct veo_args *argp, const range<1> &r) { int index = args.size(); veo_args_set_i64(argp, index, r.size()); veo_args_set_i64(argp, index + 1, 1); } +#endif void parallel_for_1d(shared_ptr_class k, range<1> r, const std::function)> &func, id<1> offset) override { - for (const detail::accessor_info &arg : k->args) { - arg.acquire_access(); + for (const detail::accessor_info &acc : k->args) { + acc.acquire_access(); + alloc_mem(acc.container, acc.mode); } DEBUG_INFO("execute parallel<1> %d kernel, name: %s\n", type(), k->name.c_str()); DEBUG_INFO("[VEKernel] parallel task: %s", k->name.c_str()); try { - struct veo_args *argp = create_ve_args(k); + struct veo_args *argp = alloc_veo_args(); DEBUG_INFO("[VEKernel] create ve args: %#x", (size_t)argp); - set_arg_for_range(k->args, argp, r); + //set_arg_for_range(k->args, argp, r); DEBUG_INFO("[VEKernel] invoke ve func: %s", k->name.c_str()); uint64_t id = veo_call_async_by_name(ctx_.ve_ctx, proc_.handle, k->name.c_str(), argp); diff --git a/include/neoSYCL/sycl.hpp b/include/neoSYCL/sycl.hpp index d3c80d8..64746de 100644 --- a/include/neoSYCL/sycl.hpp +++ b/include/neoSYCL/sycl.hpp @@ -17,7 +17,6 @@ #include "sycl/device_selector/cpu_selector.hpp" #ifdef BUILD_VE -#include "extensions/nec/ve_task_handler.hpp" #include "extensions/nec/ve_selector.hpp" #endif diff --git a/include/neoSYCL/sycl/accessor.hpp b/include/neoSYCL/sycl/accessor.hpp index e98cce5..bead246 100644 --- a/include/neoSYCL/sycl/accessor.hpp +++ b/include/neoSYCL/sycl/accessor.hpp @@ -15,7 +15,8 @@ template class accessor { - + friend class handler; + public: template accessor(buffer &bufferRef, diff --git a/include/neoSYCL/sycl/buffer.hpp b/include/neoSYCL/sycl/buffer.hpp index e616642..a445a02 100644 --- a/include/neoSYCL/sycl/buffer.hpp +++ b/include/neoSYCL/sycl/buffer.hpp @@ -201,7 +201,7 @@ class buffer { ctx_.push_back(c); } void push_context(handler h, access::mode m = access::mode::read) { - push_context(h.get_context()); + push_context(h.get_context(), m); } }; } // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/detail/debug.hpp b/include/neoSYCL/sycl/detail/debug.hpp index 72fc2da..9aa42fb 100644 --- a/include/neoSYCL/sycl/detail/debug.hpp +++ b/include/neoSYCL/sycl/detail/debug.hpp @@ -4,7 +4,7 @@ #ifdef DEBUG #include #define DEBUG_INFO(...) \ - std::cerr << "[DEBUG] " << printf(__VA_ARGS__) << std::endl + std::cerr << "[DEBUG] " << printf(__VA_ARGS__) << "\n"; #else #define DEBUG_INFO(format, ...) #endif diff --git a/include/neoSYCL/sycl/detail/task_handler.hpp b/include/neoSYCL/sycl/detail/task_handler.hpp index da31327..2423dc6 100644 --- a/include/neoSYCL/sycl/detail/task_handler.hpp +++ b/include/neoSYCL/sycl/detail/task_handler.hpp @@ -34,6 +34,7 @@ class task_handler { access::mode = access::mode::read) = 0; virtual void free_mem(container_type) = 0; virtual void copy_back() = 0; + virtual void set_capture(const char* name, void* p, size_t sz) {} }; class task_handler_cpu : public task_handler { diff --git a/include/neoSYCL/sycl/handler.hpp b/include/neoSYCL/sycl/handler.hpp index 9540611..6392e1a 100644 --- a/include/neoSYCL/sycl/handler.hpp +++ b/include/neoSYCL/sycl/handler.hpp @@ -54,6 +54,12 @@ class handler { : bind_device(std::move(dev)), counter(std::move(counter)), kernel(new detail::kernel()), ctx(c) {} + template + void set_capture(void* p, size_t sz){ + const char* name = ("__"+detail::get_kernel_name_from_class()+"_obj__").c_str(); + ctx.get_context_info()->task_handler->set_capture(name,p,sz); + } + template void single_task(KernelType kernelFunc) { kernel->name = detail::get_kernel_name_from_class(); @@ -136,7 +142,7 @@ class handler { template T *get_pointer(sycl::accessor acc) { - return ctx.get_context_info()->get_pointer(acc.data); + return (T*)ctx.get_context_info()->get_pointer(acc.data); } context get_context() { return ctx; } From cb630a6e59cdc3d8dd9003fe2462c97dfdbec302 Mon Sep 17 00:00:00 2001 From: Hiroyuki Takizawa Date: Sat, 7 May 2022 21:33:55 +0900 Subject: [PATCH 11/90] Lambda for handler::set_capture() --- include/neoSYCL/sycl/handler.hpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/include/neoSYCL/sycl/handler.hpp b/include/neoSYCL/sycl/handler.hpp index 6392e1a..3d2678c 100644 --- a/include/neoSYCL/sycl/handler.hpp +++ b/include/neoSYCL/sycl/handler.hpp @@ -55,9 +55,16 @@ class handler { kernel(new detail::kernel()), ctx(c) {} template - void set_capture(void* p, size_t sz){ + void copy_capture(KernelName* p){ const char* name = ("__"+detail::get_kernel_name_from_class()+"_obj__").c_str(); - ctx.get_context_info()->task_handler->set_capture(name,p,sz); + ctx.get_context_info()->task_handler->set_capture(name,p,sizeof(KernelName)); + } + + template + void set_capture(KernelType kernelFunc){ + if(bind_device.type() != detail::VE) + return; + kernelFunc(); } template From d78157048ae7aa19a7fc11f419fd6609e392a847 Mon Sep 17 00:00:00 2001 From: Hiroyuki Takizawa Date: Sat, 7 May 2022 21:53:14 +0900 Subject: [PATCH 12/90] Add "override" to some methods --- include/neoSYCL/extensions/nec/ve_task_handler.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/neoSYCL/extensions/nec/ve_task_handler.hpp b/include/neoSYCL/extensions/nec/ve_task_handler.hpp index 6431d1a..6c752f1 100644 --- a/include/neoSYCL/extensions/nec/ve_task_handler.hpp +++ b/include/neoSYCL/extensions/nec/ve_task_handler.hpp @@ -137,7 +137,7 @@ class task_handler_ve : public detail::task_handler { } } - void free_mem(container_type d) { + void free_mem(container_type d) override { int index = find_buf(d); if (index < 0) return; @@ -154,7 +154,7 @@ class task_handler_ve : public detail::task_handler { bufs_.erase(bufs_.begin() + index); } - void *alloc_mem(container_type d, access::mode mode = access::mode::read) { + void *alloc_mem(container_type d, access::mode mode = access::mode::read) override { int index = find_buf(d); bool to_be_updated = (mode != access::mode::read); if (index >= 0) { @@ -196,7 +196,7 @@ class task_handler_ve : public detail::task_handler { return reinterpret_cast(bi.ptr); } - void *get_pointer(container_type d) { + void *get_pointer(container_type d) override { int index = find_buf(d); if (index < 0) return nullptr; @@ -225,7 +225,7 @@ class task_handler_ve : public detail::task_handler { } } - void copy_back() { + void copy_back() override { for (int i = 0; i < bufs_.size(); i++) { copy_back(bufs_[i]); } From 2530a61d320970962412696f18e10cd5930eb60a Mon Sep 17 00:00:00 2001 From: Hiroyuki Takizawa Date: Mon, 9 May 2022 02:34:21 +0900 Subject: [PATCH 13/90] Cleanup debug messages --- .../extensions/nec/ve_context_info.hpp | 24 ++- .../extensions/nec/ve_task_handler.hpp | 159 +++++++----------- include/neoSYCL/sycl/accessor.hpp | 4 +- include/neoSYCL/sycl/detail/debug.hpp | 22 ++- include/neoSYCL/sycl/detail/task_handler.hpp | 5 +- include/neoSYCL/sycl/handler.hpp | 9 +- include/neoSYCL/sycl/queue.hpp | 7 +- 7 files changed, 105 insertions(+), 125 deletions(-) diff --git a/include/neoSYCL/extensions/nec/ve_context_info.hpp b/include/neoSYCL/extensions/nec/ve_context_info.hpp index c187bf0..c9d2ea4 100644 --- a/include/neoSYCL/extensions/nec/ve_context_info.hpp +++ b/include/neoSYCL/extensions/nec/ve_context_info.hpp @@ -11,17 +11,16 @@ class ve_context_info : public detail::context_info { VEContext create_ctx(VEProc proc) { struct veo_thr_ctxt *c = veo_context_open(proc.ve_proc); - DEBUG_INFO("[VEContext] create ve context: %#x", (size_t)c); + DEBUG_INFO("veo_ctxt created: %#x", (size_t)c); return VEContext{c}; } void free_ctx() { - DEBUG_INFO("[VEContext] release ve ctx: %#x", (size_t)ctx.ve_ctx); + DEBUG_INFO("veo_ctxt released: %#x", (size_t)ctx.ve_ctx); int rt = veo_context_close(ctx.ve_ctx); if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEContext] release ve ctx: %#x failed, return code: %d", - (size_t)ctx.ve_ctx, rt); - PRINT_ERR("[VEContext] release ve ctx failed"); + PRINT_ERR("veo_context_close() failed: %#x, retval=%d", + (size_t)ctx.ve_ctx, rt); } } @@ -29,24 +28,23 @@ class ve_context_info : public detail::context_info { int ve_node = DEFAULT_VE_NODE) { struct veo_proc_handle *ve_proc = veo_proc_create(ve_node); if (!ve_proc) { - DEBUG_INFO("[VEProc] create ve proc on node: %d failed..", ve_node); - throw ve_exception("[VEProc] create ve proc failed."); + PRINT_ERR("veo_proc_create(%d) failed", ve_node); + throw ve_exception("create_proc() failed"); } + DEBUG_INFO("veo_proc created: %#x", (size_t)ve_proc); + const char* env = getenv(ENV_VE_KERNEL); string_class fn(env?env:lib_path); uint64_t handle = veo_load_library(ve_proc, fn.c_str()); - DEBUG_INFO("[VEProc] create ve proc: %#x and load lib: %s on node: %d", - (size_t)ve_proc, fn.c_str(), ve_node); + DEBUG_INFO("kernel lib loaded: %#x, %s", (size_t)ve_proc, fn.c_str()); return nec::VEProc{ve_proc, handle}; } void free_proc() { - DEBUG_INFO("[VEProc] release ve proc: %#x", (size_t)proc.ve_proc); + DEBUG_INFO("veo_proc released: %#x", (size_t)proc.ve_proc); int rt = veo_proc_destroy(proc.ve_proc); if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEProc] release ve proc: %#x failed, return code: %d", - (size_t)proc.ve_proc, rt); - PRINT_ERR("[VEProc] release ve proc failed"); + PRINT_ERR("veo_proc_destroy() failed"); } } diff --git a/include/neoSYCL/extensions/nec/ve_task_handler.hpp b/include/neoSYCL/extensions/nec/ve_task_handler.hpp index 6c752f1..4bc37fe 100644 --- a/include/neoSYCL/extensions/nec/ve_task_handler.hpp +++ b/include/neoSYCL/extensions/nec/ve_task_handler.hpp @@ -12,112 +12,58 @@ class task_handler_ve : public detail::task_handler { public: task_handler_ve(const VEProc &p, const VEContext &c) : proc_(p), ctx_(c) {} - ~task_handler_ve() { /* do nothing */ } + ~task_handler_ve() { /* do nothing */ + } void set_capture(const char *name, void *p, size_t sz) override { + DEBUG_INFO("set capture: %s %#x %#x",name, (size_t)proc_.ve_proc, (size_t)proc_.handle); uint64_t devptr = veo_get_sym(proc_.ve_proc, proc_.handle, name); if (devptr == 0) { - throw exception("ve_get_sym return 0"); + PRINT_ERR("veo_get_sym() failed: %s", name); + throw exception("setup_capture() failed"); } int rt = veo_write_mem(proc_.ve_proc, devptr, p, sz); if (rt != VEO_COMMAND_OK) { - DEBUG_INFO("setup kernel \"%s\" failed, size: %lu, return code: %d", name, - sz, rt); - PRINT_ERR("setup kernel failed"); - throw exception("setup kernel return error"); + PRINT_ERR("veo_write_mem() failed: %s", name); + throw exception("setup_capture() failed"); } } struct veo_args *alloc_veo_args() { struct veo_args *argp = veo_args_alloc(); if (!argp) { - throw exception("ve args return nullptr"); + PRINT_ERR("veo_args_alloc() failed"); + throw exception("alloc_veo_args() failed"); } - DEBUG_INFO("[VEKernel] create ve args: %#x", (size_t)argp); return argp; } void single_task(shared_ptr_class k, const std::function &func) override { - for (const detail::accessor_info &acc : k->args) { - acc.acquire_access(); - alloc_mem(acc.container, acc.mode); - } - DEBUG_INFO("execute single %d kernel, name: %s\n", type(), k->name.c_str()); - DEBUG_INFO("[VEKernel] single task: %s", k->name.c_str()); - try { - struct veo_args *argp = alloc_veo_args(); - DEBUG_INFO("[VEKernel] invoke ve func: %s", k->name.c_str()); - uint64_t id = veo_call_async_by_name(ctx_.ve_ctx, proc_.handle, - k->name.c_str(), argp); - uint64_t ret_val; - veo_call_wait_result(ctx_.ve_ctx, id, &ret_val); - DEBUG_INFO("[VEKernel] ve func finished, id: %lu, ret val: %lu", id, - ret_val); - // copy_out(ve_addr_list, k, proc); - veo_args_free(argp); - } catch (exception &e) { - std::cerr << "[VEKernel] kernel invoke failed, error message: " - << e.what() << std::endl; - } - for (const detail::accessor_info &acc : k->args) { - acc.release_access(); - } + DEBUG_INFO("single_task(): %s", k->name.c_str()); + call_kernel_func(k); } -#if 0 - void set_arg_for_range(const vector_class &args, - struct veo_args *argp, const range<1> &r) { - int index = args.size(); - veo_args_set_i64(argp, index, r.size()); - veo_args_set_i64(argp, index + 1, 1); - } -#endif - void parallel_for_1d(shared_ptr_class k, range<1> r, const std::function)> &func, id<1> offset) override { - for (const detail::accessor_info &acc : k->args) { - acc.acquire_access(); - alloc_mem(acc.container, acc.mode); - } - DEBUG_INFO("execute parallel<1> %d kernel, name: %s\n", type(), - k->name.c_str()); - DEBUG_INFO("[VEKernel] parallel task: %s", k->name.c_str()); - try { - struct veo_args *argp = alloc_veo_args(); - DEBUG_INFO("[VEKernel] create ve args: %#x", (size_t)argp); - //set_arg_for_range(k->args, argp, r); - DEBUG_INFO("[VEKernel] invoke ve func: %s", k->name.c_str()); - uint64_t id = veo_call_async_by_name(ctx_.ve_ctx, proc_.handle, - k->name.c_str(), argp); - uint64_t ret_val; - veo_call_wait_result(ctx_.ve_ctx, id, &ret_val); - DEBUG_INFO("[VEKernel] ve func finished, id: %lu, ret val: %lu", id, - ret_val); - // copy_out(ve_addr_list, k, proc); - veo_args_free(argp); - } catch (exception &e) { - PRINT_ERR("[VEKernel] kernel invoke failed"); - throw exception("kernel invocation error"); - } - - for (const detail::accessor_info &acc : k->args) { - acc.release_access(); - } + DEBUG_INFO("parallel_for_1d(): %s", k->name.c_str()); + call_kernel_func(k); } void parallel_for_2d(shared_ptr_class k, range<2> r, const std::function)> &func, id<2> offset) override { - throw exception("not implemented"); + DEBUG_INFO("parallel_for_2d(): %s", k->name.c_str()); + call_kernel_func(k); } void parallel_for_3d(shared_ptr_class k, range<3> r, const std::function)> &func, id<3> offset) override { - throw exception("not implemented"); + DEBUG_INFO("parallel_for_3d(): %s", k->name.c_str()); + call_kernel_func(k); } detail::SUPPORT_PLATFORM_TYPE type() override { return detail::VE; } @@ -146,15 +92,14 @@ class task_handler_ve : public detail::task_handler { int rt = veo_free_mem(proc_.ve_proc, device_ptr); if (rt != VEO_COMMAND_OK) { - DEBUG_INFO("[VEProc] free ve memory failed, size: %lu, return code: %d", - bufs_[index].buf->get_size(), rt); - PRINT_ERR("[VEProc] free ve memory failed"); - throw exception("VE free memory return error"); + PRINT_ERR("veo_free_mem() failed: return code=%d", rt); + throw exception("free_mem() failed"); } bufs_.erase(bufs_.begin() + index); } - void *alloc_mem(container_type d, access::mode mode = access::mode::read) override { + void *alloc_mem(container_type d, + access::mode mode = access::mode::read) override { int index = find_buf(d); bool to_be_updated = (mode != access::mode::read); if (index >= 0) { @@ -167,29 +112,24 @@ class task_handler_ve : public detail::task_handler { int rt = veo_alloc_mem(proc_.ve_proc, &ve_addr_int, size_in_byte); if (rt != VEO_COMMAND_OK) { - DEBUG_INFO("[VEProc] allocate VE memory size: %lu failed, return code: %d", - size_in_byte, rt); - PRINT_ERR("[VEProc] allocate VE memory failed"); - throw exception("VE allocate return error"); + PRINT_ERR("veo_alloc_mem() failed: return code=%d", rt); + throw exception("alloc_mem() failed"); } - DEBUG_INFO("[VEKernel] allocate ve memory, size: %lu, ve address: %#x", - size_in_byte, ve_addr_int); + + DEBUG_INFO("memory alloc: vaddr=%#x, size=%lu", ve_addr_int, size_in_byte); buf_info bi{d, ve_addr_int, to_be_updated}; bufs_.push_back(bi); if (mode != access::mode::discard_write && mode != access::mode::discard_read_write) { - DEBUG_INFO("[VEKernel] do copy to ve memory for arg, device address: " - "%#x, size: %lu, host address: %#x", - (size_t)ve_addr_int, size_in_byte, (size_t)d->get_raw_ptr()); + DEBUG_INFO("memory copy (h2v): " + "vaddr=%#x, haddr=%#x, size=%lu", + (size_t)ve_addr_int, (size_t)d->get_raw_ptr(), size_in_byte); rt = veo_write_mem(proc_.ve_proc, ve_addr_int, d->get_raw_ptr(), size_in_byte); if (rt != VEO_COMMAND_OK) { - DEBUG_INFO( - "[VEProc] copy to ve memory failed, size: %lu, return code: %d", - size_in_byte, rt); - PRINT_ERR("[VEProc] copy to ve memory failed"); - throw exception("VE copy return error"); + PRINT_ERR("veo_write_mem() failed"); + throw exception("alloc_mem() failed"); } } @@ -207,21 +147,17 @@ class task_handler_ve : public detail::task_handler { if (bi.updated) { size_t size_in_byte = bi.buf->get_size(); uint64_t device_ptr = bi.ptr; - DEBUG_INFO("[VEKernel] copy from ve memory, device address: %#x, " - "size: %lu, host address: %#x", - (size_t)device_ptr, size_in_byte, - (size_t)bi.buf->get_raw_ptr()); + DEBUG_INFO("memory copy (v2h): " + "vaddr=%#x, haddr=%#x, size=%lu", + (size_t)device_ptr, (size_t)bi.buf->get_raw_ptr(), + size_in_byte); // do copy int rt = veo_read_mem(proc_.ve_proc, bi.buf->get_raw_ptr(), device_ptr, size_in_byte); if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO( - "[VEProc] copy from ve memory failed, size: %lu, return code: %d", - size_in_byte, rt); - PRINT_ERR("[VEProc] copy from ve memory failed"); - throw exception("VE copy return error"); + PRINT_ERR("veo_read_mem() failed"); + throw exception("copy_back() failed"); } - bi.updated = false; } } @@ -235,6 +171,29 @@ class task_handler_ve : public detail::task_handler { VEContext ctx_; VEProc proc_; buffer_type bufs_; + + void call_kernel_func(shared_ptr_class k) { + for (const detail::accessor_info &acc : k->args) { + acc.acquire_access(); + alloc_mem(acc.container, acc.mode); + } + DEBUG_INFO("-- KENREL EXEC BEGIN --"); + try { + struct veo_args *argp = alloc_veo_args(); + uint64_t id = veo_call_async_by_name(ctx_.ve_ctx, proc_.handle, + k->name.c_str(), argp); + uint64_t ret_val; + veo_call_wait_result(ctx_.ve_ctx, id, &ret_val); + DEBUG_INFO("-- KERNEL EXEC END (ret=%lu) --", ret_val); + // copy_out(ve_addr_list, k, proc); + veo_args_free(argp); + } catch (exception &e) { + PRINT_ERR("kernel execution failed: %s", e.what()); + } + for (const detail::accessor_info &acc : k->args) { + acc.release_access(); + } + } }; } // namespace neosycl::sycl::extensions::nec diff --git a/include/neoSYCL/sycl/accessor.hpp b/include/neoSYCL/sycl/accessor.hpp index bead246..b8c0926 100644 --- a/include/neoSYCL/sycl/accessor.hpp +++ b/include/neoSYCL/sycl/accessor.hpp @@ -83,7 +83,7 @@ class accessor { (D > 0)>> dataT &operator[](id index) const { size_t index_val = id2index(index); - DEBUG_INFO("[Accessor] access with index: %d", index_val); + DEBUG_INFO("access with index: %d", index_val); return (*data).get(index_val); } @@ -92,7 +92,7 @@ class accessor { typename = std::enable_if_t<(Mode == access::mode::read) && (D > 0)>> dataT operator[](id index) const { size_t index_val = id2index(index); - DEBUG_INFO("[Accessor] read access with index: %d", index_val); + DEBUG_INFO("read access with index: %d", index_val); return (*data).get(index_val); } diff --git a/include/neoSYCL/sycl/detail/debug.hpp b/include/neoSYCL/sycl/detail/debug.hpp index 9aa42fb..387429f 100644 --- a/include/neoSYCL/sycl/detail/debug.hpp +++ b/include/neoSYCL/sycl/detail/debug.hpp @@ -1,15 +1,25 @@ #ifndef SYCL_INCLUDE_CL_SYCL_DETAIL_DEBUG_HPP_ #define SYCL_INCLUDE_CL_SYCL_DETAIL_DEBUG_HPP_ +#include #ifdef DEBUG -#include -#define DEBUG_INFO(...) \ - std::cerr << "[DEBUG] " << printf(__VA_ARGS__) << "\n"; +#define DEBUG_INFO(...) \ + { \ + std::fprintf(stderr,"[DEBUG] "); \ + std::fprintf(stderr,__VA_ARGS__); \ + std::fprintf(stderr,"\n"); \ + } #else #define DEBUG_INFO(format, ...) #endif -#define PRINT_INFO(...) std::cerr << "[INFO] " << __VA_ARGS__ << std::endl -#define PRINT_ERR(...) std::cerr << "[ERROR] " << __VA_ARGS__ << std::endl - +#define PRINT_ERR(...) \ + { \ + std::fprintf(stderr,"[ERROR] "); \ + std::fprintf(stderr,__VA_ARGS__); \ + std::fprintf(stderr,"\n"); \ + std::fprintf(stderr, \ + "[SOURCE LOC] Line %d in %s\n", __LINE__, __FILE__); \ + } +// std::fprintf(stderr,"[LOCATION]: Line %d in %s\n", __LINE__, __FILE__); #endif // SYCL_INCLUDE_CL_SYCL_DETAIL_DEBUG_HPP_ diff --git a/include/neoSYCL/sycl/detail/task_handler.hpp b/include/neoSYCL/sycl/detail/task_handler.hpp index 2423dc6..dadf971 100644 --- a/include/neoSYCL/sycl/detail/task_handler.hpp +++ b/include/neoSYCL/sycl/detail/task_handler.hpp @@ -46,7 +46,7 @@ class task_handler_cpu : public task_handler { for (const accessor_info &arg : k->args) { arg.acquire_access(); } - DEBUG_INFO("execute single %d kernel, name: %s\n", type(), k->name.c_str()); + DEBUG_INFO("single_task(): %s\n", k->name.c_str()); func(); for (const accessor_info &arg : k->args) { arg.release_access(); @@ -59,6 +59,7 @@ class task_handler_cpu : public task_handler { for (const accessor_info &arg : k->args) { arg.acquire_access(); } + DEBUG_INFO("parallel_for_1d(): %s\n", k->name.c_str()); for (size_t x = offset.get(0); x < r.get(0); x++) { func(id<1>(x)); } @@ -73,6 +74,7 @@ class task_handler_cpu : public task_handler { for (const accessor_info &arg : k->args) { arg.acquire_access(); } + DEBUG_INFO("parallel_for_2d(): %s\n", k->name.c_str()); for (size_t x = offset.get(0); x < r.get(0); x++) { for (size_t y = offset.get(1); y < r.get(1); y++) { func(id<2>(x, y)); @@ -89,6 +91,7 @@ class task_handler_cpu : public task_handler { for (const accessor_info &arg : k->args) { arg.acquire_access(); } + DEBUG_INFO("parallel_for_3d(): %s\n", k->name.c_str()); for (size_t x = offset.get(0); x < r.get(0); x++) { for (size_t y = offset.get(1); y < r.get(1); y++) { for (size_t z = offset.get(2); z < r.get(2); z++) { diff --git a/include/neoSYCL/sycl/handler.hpp b/include/neoSYCL/sycl/handler.hpp index 3d2678c..56b316a 100644 --- a/include/neoSYCL/sycl/handler.hpp +++ b/include/neoSYCL/sycl/handler.hpp @@ -56,8 +56,13 @@ class handler { template void copy_capture(KernelName* p){ - const char* name = ("__"+detail::get_kernel_name_from_class()+"_obj__").c_str(); - ctx.get_context_info()->task_handler->set_capture(name,p,sizeof(KernelName)); + string_class name = detail::get_kernel_name_from_class(); + DEBUG_INFO("kernel class: %s", name.c_str()); + name = "__" + name + "_obj__"; + DEBUG_INFO("kernel object: %s %#x %lu", name.c_str(), (size_t)p, + sizeof(KernelName)); + ctx.get_context_info()->task_handler->set_capture(name.c_str(), p, + sizeof(KernelName)); } template diff --git a/include/neoSYCL/sycl/queue.hpp b/include/neoSYCL/sycl/queue.hpp index 581a5e1..0e081ca 100644 --- a/include/neoSYCL/sycl/queue.hpp +++ b/include/neoSYCL/sycl/queue.hpp @@ -72,7 +72,12 @@ class queue { try { handler command_group_handler(bind_device, counter, ctx); cgf(command_group_handler); - } catch (...) { + } catch (std::exception& e) { + PRINT_ERR("%s",e.what()); + throw; + } + catch (...) { + PRINT_ERR("unknown exception"); throw; } return event(); From 57c492864ce6e32c5dae5fca615b95d85a7e3faf Mon Sep 17 00:00:00 2001 From: Hiroyuki Takizawa Date: Wed, 11 May 2022 20:42:43 +0900 Subject: [PATCH 14/90] ifdef RUTIME_ONLY in scyl.hpp --- include/neoSYCL/sycl.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/neoSYCL/sycl.hpp b/include/neoSYCL/sycl.hpp index 64746de..fd32662 100644 --- a/include/neoSYCL/sycl.hpp +++ b/include/neoSYCL/sycl.hpp @@ -11,6 +11,7 @@ #include "sycl/item.hpp" #include "sycl/id.hpp" +#ifndef ___NEOSYCL_KERNEL_RUNTIME_ONLY___ #include "sycl/device_selector.hpp" #include "sycl/platform.hpp" #include "sycl/device.hpp" @@ -31,6 +32,8 @@ #include "sycl/allocator.hpp" #include "sycl/buffer.hpp" +#endif + namespace neosycl { using namespace neosycl::sycl; From 0ea418605691934dead5e936921037ec4e4431f2 Mon Sep 17 00:00:00 2001 From: Hiroyuki Takizawa Date: Wed, 11 May 2022 20:43:46 +0900 Subject: [PATCH 15/90] line break at the end of file --- include/neoSYCL/sycl/detail/accessor_info.hpp | 2 +- include/neoSYCL/sycl/detail/context_info.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/neoSYCL/sycl/detail/accessor_info.hpp b/include/neoSYCL/sycl/detail/accessor_info.hpp index 13fe99e..b49703a 100644 --- a/include/neoSYCL/sycl/detail/accessor_info.hpp +++ b/include/neoSYCL/sycl/detail/accessor_info.hpp @@ -41,4 +41,4 @@ struct accessor_info { } // namespace neosycl::sycl::detail -#endif // SYCL_INCLUDE_CL_SYCL_ACCESSOR_INFO_HPP_ \ No newline at end of file +#endif // SYCL_INCLUDE_CL_SYCL_ACCESSOR_INFO_HPP_ diff --git a/include/neoSYCL/sycl/detail/context_info.hpp b/include/neoSYCL/sycl/detail/context_info.hpp index 41ef9a1..a20b38c 100644 --- a/include/neoSYCL/sycl/detail/context_info.hpp +++ b/include/neoSYCL/sycl/detail/context_info.hpp @@ -28,4 +28,4 @@ class cpu_context_info : public context_info { } ~cpu_context_info() = default; }; -} // namespace neosycl::sycl::detail \ No newline at end of file +} // namespace neosycl::sycl::detail From 4a9faad7fac2d4087380cf345b8d614c8b479c35 Mon Sep 17 00:00:00 2001 From: Hiroyuki Takizawa Date: Thu, 12 May 2022 15:21:31 +0900 Subject: [PATCH 16/90] set_range and set_offset --- .../extensions/nec/ve_task_handler.hpp | 42 +++ include/neoSYCL/sycl/detail/task_handler.hpp | 204 +++++------ include/neoSYCL/sycl/handler.hpp | 323 +++++++++--------- 3 files changed, 313 insertions(+), 256 deletions(-) diff --git a/include/neoSYCL/extensions/nec/ve_task_handler.hpp b/include/neoSYCL/extensions/nec/ve_task_handler.hpp index 4bc37fe..3158b8d 100644 --- a/include/neoSYCL/extensions/nec/ve_task_handler.hpp +++ b/include/neoSYCL/extensions/nec/ve_task_handler.hpp @@ -30,6 +30,48 @@ class task_handler_ve : public detail::task_handler { } } + void set_range(const char* name, range<1>& d) override { + size_t r[3] = { d[0],1,1 }; + set_range_offset(name, r); + } + void set_range(const char* name, range<2>& d) override { + size_t r[3] = { d[0],d[1],1 }; + set_range_offset(name, r); + } + void set_range(const char* name, range<3>& d) override { + size_t r[3] = { d[0],d[1],d[2] }; + set_range_offset(name, r); + } + + void set_offset(const char* name, id<1>& i) override { + size_t o[3] = { i[0],1,1 }; + set_range_offset(name, o); + } + void set_offset(const char* name, id<2>& i) override { + size_t o[3] = { i[0],i[1],1 }; + set_range_offset(name, o); + } + void set_offset(const char* name, id<3>& i) override { + size_t o[3] = { i[0],i[1], i[2] }; + set_range_offset(name, o); + } + + void set_range_offset (const char* name, size_t r[3]) { + DEBUG_INFO("set range/offset: %s %lu %lu %lu", name, r[0], r[1], r[2]); + uint64_t devptr = veo_get_sym(proc_.ve_proc, proc_.handle, name); + if (devptr == 0) { + PRINT_ERR("veo_get_sym() failed: %s", name); + throw exception("setup_range_offset() failed"); + } + + int rt = veo_write_mem(proc_.ve_proc, devptr, r, sizeof(size_t)*3); + if (rt != VEO_COMMAND_OK) { + PRINT_ERR("veo_write_mem() failed: %s", name); + throw exception("setup_range_offset() failed"); + } + } + + struct veo_args *alloc_veo_args() { struct veo_args *argp = veo_args_alloc(); if (!argp) { diff --git a/include/neoSYCL/sycl/detail/task_handler.hpp b/include/neoSYCL/sycl/detail/task_handler.hpp index dadf971..a53fbcf 100644 --- a/include/neoSYCL/sycl/detail/task_handler.hpp +++ b/include/neoSYCL/sycl/detail/task_handler.hpp @@ -5,114 +5,122 @@ namespace neosycl::sycl::detail { -class task_handler { -protected: - using container_type = std::shared_ptr; - -public: - explicit task_handler() {} - - virtual void single_task(shared_ptr_class k, - const std::function &func) = 0; - - virtual void parallel_for_1d(shared_ptr_class k, range<1> r, - const std::function)> &func, - id<1> offset) = 0; - - virtual void parallel_for_2d(shared_ptr_class k, range<2> r, - const std::function)> &func, - id<2> offset) = 0; - - virtual void parallel_for_3d(shared_ptr_class k, range<3> r, - const std::function)> &func, - id<3> offset) = 0; - - virtual SUPPORT_PLATFORM_TYPE type() = 0; - - virtual void *get_pointer(container_type) = 0; - virtual void *alloc_mem(container_type, - access::mode = access::mode::read) = 0; - virtual void free_mem(container_type) = 0; - virtual void copy_back() = 0; - virtual void set_capture(const char* name, void* p, size_t sz) {} -}; - -class task_handler_cpu : public task_handler { -public: - task_handler_cpu() {} - - void single_task(shared_ptr_class k, - const std::function &func) override { - for (const accessor_info &arg : k->args) { - arg.acquire_access(); - } - DEBUG_INFO("single_task(): %s\n", k->name.c_str()); - func(); - for (const accessor_info &arg : k->args) { - arg.release_access(); - } - } + class task_handler { + protected: + using container_type = std::shared_ptr; - void parallel_for_1d(shared_ptr_class k, range<1> r, - const std::function)> &func, - id<1> offset) override { - for (const accessor_info &arg : k->args) { - arg.acquire_access(); - } - DEBUG_INFO("parallel_for_1d(): %s\n", k->name.c_str()); - for (size_t x = offset.get(0); x < r.get(0); x++) { - func(id<1>(x)); - } - for (const accessor_info &arg : k->args) { - arg.release_access(); - } + public: + explicit task_handler() {} + + virtual void single_task(shared_ptr_class k, + const std::function& func) = 0; + + virtual void parallel_for_1d(shared_ptr_class k, range<1> r, + const std::function)>& func, + id<1> offset) = 0; + + virtual void parallel_for_2d(shared_ptr_class k, range<2> r, + const std::function)>& func, + id<2> offset) = 0; + + virtual void parallel_for_3d(shared_ptr_class k, range<3> r, + const std::function)>& func, + id<3> offset) = 0; + + virtual SUPPORT_PLATFORM_TYPE type() = 0; + + virtual void* get_pointer(container_type) = 0; + virtual void* alloc_mem(container_type, + access::mode = access::mode::read) = 0; + virtual void free_mem(container_type) = 0; + virtual void copy_back() = 0; + virtual void set_capture(const char* name, void* p, size_t sz) {} + + virtual void set_range(const char* name, range<1>& d) {} + virtual void set_range(const char* name, range<2>& d) {} + virtual void set_range(const char* name, range<3>& d) {} + + virtual void set_offset(const char* name, id<1>& i) {} + virtual void set_offset(const char* name, id<2>& i) {} + virtual void set_offset(const char* name, id<3>& i) {} }; - void parallel_for_2d(shared_ptr_class k, range<2> r, - const std::function)> &func, - id<2> offset) override { - for (const accessor_info &arg : k->args) { - arg.acquire_access(); - } - DEBUG_INFO("parallel_for_2d(): %s\n", k->name.c_str()); - for (size_t x = offset.get(0); x < r.get(0); x++) { - for (size_t y = offset.get(1); y < r.get(1); y++) { - func(id<2>(x, y)); + class task_handler_cpu : public task_handler { + public: + task_handler_cpu() {} + + void single_task(shared_ptr_class k, + const std::function& func) override { + for (const accessor_info& arg : k->args) { + arg.acquire_access(); + } + DEBUG_INFO("single_task(): %s\n", k->name.c_str()); + func(); + for (const accessor_info& arg : k->args) { + arg.release_access(); } } - for (const accessor_info &arg : k->args) { - arg.release_access(); - } - }; - void parallel_for_3d(shared_ptr_class k, range<3> r, - const std::function)> &func, - id<3> offset) override { - for (const accessor_info &arg : k->args) { - arg.acquire_access(); - } - DEBUG_INFO("parallel_for_3d(): %s\n", k->name.c_str()); - for (size_t x = offset.get(0); x < r.get(0); x++) { - for (size_t y = offset.get(1); y < r.get(1); y++) { - for (size_t z = offset.get(2); z < r.get(2); z++) { - func(id<3>(x, y, z)); + void parallel_for_1d(shared_ptr_class k, range<1> r, + const std::function)>& func, + id<1> offset) override { + for (const accessor_info& arg : k->args) { + arg.acquire_access(); + } + DEBUG_INFO("parallel_for_1d(): %s\n", k->name.c_str()); + for (size_t x = offset.get(0); x < r.get(0); x++) { + func(id<1>(x)); + } + for (const accessor_info& arg : k->args) { + arg.release_access(); + } + }; + + void parallel_for_2d(shared_ptr_class k, range<2> r, + const std::function)>& func, + id<2> offset) override { + for (const accessor_info& arg : k->args) { + arg.acquire_access(); + } + DEBUG_INFO("parallel_for_2d(): %s\n", k->name.c_str()); + for (size_t x = offset.get(0); x < r.get(0); x++) { + for (size_t y = offset.get(1); y < r.get(1); y++) { + func(id<2>(x, y)); } } - } - for (const accessor_info &arg : k->args) { - arg.release_access(); - } - }; + for (const accessor_info& arg : k->args) { + arg.release_access(); + } + }; + + void parallel_for_3d(shared_ptr_class k, range<3> r, + const std::function)>& func, + id<3> offset) override { + for (const accessor_info& arg : k->args) { + arg.acquire_access(); + } + DEBUG_INFO("parallel_for_3d(): %s\n", k->name.c_str()); + for (size_t x = offset.get(0); x < r.get(0); x++) { + for (size_t y = offset.get(1); y < r.get(1); y++) { + for (size_t z = offset.get(2); z < r.get(2); z++) { + func(id<3>(x, y, z)); + } + } + } + for (const accessor_info& arg : k->args) { + arg.release_access(); + } + }; - SUPPORT_PLATFORM_TYPE type() override { return CPU; } + SUPPORT_PLATFORM_TYPE type() override { return CPU; } - void *get_pointer(container_type p) override { return p->get_raw_ptr(); } - void *alloc_mem(container_type p, access::mode = access::mode::read) override { - return p->get_raw_ptr(); - } - void free_mem(container_type) override {} - void copy_back() override {} -}; + void* get_pointer(container_type p) override { return p->get_raw_ptr(); } + void* alloc_mem(container_type p, access::mode = access::mode::read) override { + return p->get_raw_ptr(); + } + void free_mem(container_type) override {} + void copy_back() override {} + }; } // namespace neosycl::sycl::detail diff --git a/include/neoSYCL/sycl/handler.hpp b/include/neoSYCL/sycl/handler.hpp index 56b316a..0db08eb 100644 --- a/include/neoSYCL/sycl/handler.hpp +++ b/include/neoSYCL/sycl/handler.hpp @@ -19,165 +19,172 @@ namespace neosycl::sycl { -namespace detail { - -/** - * get func name by create ptr, rely on compiler reflect implementation - * - * @tparam KernelName class - * @return str - */ -template string_class get_kernel_name_from_class() { - KernelName *p; - int status; - char *pc = abi::__cxa_demangle(typeid(p).name(), 0, 0, &status); - string_class in(pc); - free(pc); - std::regex re("([^\\s\\:]+)\\*$"); - std::smatch result; - if (std::regex_search(in, result, re)) { - in = result[1].str(); - } - - return in; -} - -} // namespace detail - -class handler { - using handler_type = shared_ptr_class; - using counter_type = shared_ptr_class; - using kernel_type = shared_ptr_class; - -public: - explicit handler(device dev, counter_type counter, context c) - : bind_device(std::move(dev)), counter(std::move(counter)), - kernel(new detail::kernel()), ctx(c) {} - - template - void copy_capture(KernelName* p){ - string_class name = detail::get_kernel_name_from_class(); - DEBUG_INFO("kernel class: %s", name.c_str()); - name = "__" + name + "_obj__"; - DEBUG_INFO("kernel object: %s %#x %lu", name.c_str(), (size_t)p, - sizeof(KernelName)); - ctx.get_context_info()->task_handler->set_capture(name.c_str(), p, - sizeof(KernelName)); - } - - template - void set_capture(KernelType kernelFunc){ - if(bind_device.type() != detail::VE) - return; - kernelFunc(); - } - - template - void single_task(KernelType kernelFunc) { - kernel->name = detail::get_kernel_name_from_class(); - handler_type task_handler = ctx.get_context_info()->task_handler; - submit_task([f = kernelFunc, h = task_handler, k = kernel]() { - h->single_task(k, f); - }); - } - - template - void submit_parallel_for(handler_type handler, range<3> numWorkItems, - id<3> offset, KernelType kernelFunc) { - submit_task([f = kernelFunc, n = numWorkItems, o = offset, - h = std::move(handler), - k = kernel]() { h->parallel_for_3d(k, n, f, o); }); - } - - template - void submit_parallel_for(handler_type handler, range<2> numWorkItems, - id<2> offset, KernelType kernelFunc) { - submit_task([f = kernelFunc, n = numWorkItems, o = offset, - h = std::move(handler), - k = kernel]() { h->parallel_for_2d(k, n, f, o); }); - } - - template - void submit_parallel_for(handler_type handler, range<1> numWorkItems, - id<1> offset, KernelType kernelFunc) { - submit_task([f = kernelFunc, n = numWorkItems, o = offset, - h = std::move(handler), - k = kernel]() { h->parallel_for_1d(k, n, f, o); }); - } - - template - void parallel_for(range numWorkItems, KernelType kernelFunc) { - kernel->name = detail::get_kernel_name_from_class(); - handler_type task_handler = ctx.get_context_info()->task_handler; - submit_parallel_for(task_handler, numWorkItems, id(), - kernelFunc); - } - - template - void parallel_for(range numWorkItems, - id workItemOffset, KernelType kernelFunc) { - kernel->name = detail::get_kernel_name_from_class(); - handler_type task_handler = ctx.get_context_info()->task_handler; - submit_parallel_for(task_handler, numWorkItems, workItemOffset, kernelFunc); - } - - // template - // void parallel_for(nd_range executionRange, KernelType - // kernelFunc); - - template - void parallel_for_work_group(range numWorkGroups, - WorkgroupFunctionType kernelFunc); - - template - void parallel_for_work_group(range numWorkGroups, - range workGroupSize, - WorkgroupFunctionType kernelFunc); - - //----- OpenCL interoperability interface // - template void set_arg(int argIndex, T &&arg) { - kernel->args.insert(argIndex, arg); - } - - template void set_args(Ts &&...args) { - kernel->args.push_back(args...); - } - - kernel_type get_kernel() { return kernel; } - - template - void require(sycl::accessor acc) { - acc.handler_ = this; - kernel->args.push_back(detail::accessor_info(acc, m)); - } - - template - T *get_pointer(sycl::accessor acc) { - return (T*)ctx.get_context_info()->get_pointer(acc.data); - } - - context get_context() { return ctx; } - -private: - kernel_type kernel; - device bind_device; - counter_type counter; - context ctx; - - template void submit_task(Func func) { - counter->incr(); - std::thread t([f = func, c = counter]() { - try { - f(); - } catch (...) { - throw; + namespace detail { + + /** + * get func name by create ptr, rely on compiler reflect implementation + * + * @tparam KernelName class + * @return str + */ + template string_class get_kernel_name_from_class() { + KernelName* p; + int status; + char* pc = abi::__cxa_demangle(typeid(p).name(), 0, 0, &status); + string_class in(pc); + free(pc); + std::regex re("([^\\s\\:]+)\\*$"); + std::smatch result; + if (std::regex_search(in, result, re)) { + in = result[1].str(); } - c->decr(); - }); - t.detach(); - } -}; + + return in; + } + + } // namespace detail + + class handler { + using handler_type = shared_ptr_class; + using counter_type = shared_ptr_class; + using kernel_type = shared_ptr_class; + + public: + explicit handler(device dev, counter_type counter, context c) + : bind_device(std::move(dev)), counter(std::move(counter)), + kernel(new detail::kernel()), ctx(c) {} + + template + void copy_capture(KernelName* p) { + string_class name = detail::get_kernel_name_from_class(); + DEBUG_INFO("kernel class: %s", name.c_str()); + name = "__" + name + "_obj__"; + DEBUG_INFO("kernel object: %s %#x %lu", name.c_str(), (size_t)p, + sizeof(KernelName)); + ctx.get_context_info()->task_handler->set_capture(name.c_str(), p, + sizeof(KernelName)); + } + + template + void set_capture(KernelType kernelFunc) { + if (bind_device.type() != detail::VE) + return; + kernelFunc(); + } + + template + void single_task(KernelType kernelFunc) { + kernel->name = detail::get_kernel_name_from_class(); + handler_type task_handler = ctx.get_context_info()->task_handler; + submit_task([f = kernelFunc, h = task_handler, k = kernel]() { + h->single_task(k, f); + }); + } + + template + void submit_parallel_for(handler_type handler, range<3> numWorkItems, + id<3> offset, KernelType kernelFunc) { + submit_task([f = kernelFunc, n = numWorkItems, o = offset, + h = std::move(handler), + k = kernel]() { h->parallel_for_3d(k, n, f, o); }); + } + + template + void submit_parallel_for(handler_type handler, range<2> numWorkItems, + id<2> offset, KernelType kernelFunc) { + submit_task([f = kernelFunc, n = numWorkItems, o = offset, + h = std::move(handler), + k = kernel]() { h->parallel_for_2d(k, n, f, o); }); + } + + template + void submit_parallel_for(handler_type handler, range<1> numWorkItems, + id<1> offset, KernelType kernelFunc) { + submit_task([f = kernelFunc, n = numWorkItems, o = offset, + h = std::move(handler), + k = kernel]() { h->parallel_for_1d(k, n, f, o); }); + } + + template + void parallel_for(range numWorkItems, KernelType kernelFunc) { + kernel->name = detail::get_kernel_name_from_class(); + handler_type task_handler = ctx.get_context_info()->task_handler; + string_class range_obj = "__" + kernel->name + "_range__"; + task_handler->set_range(range_obj.c_str(), numWorkItems); + submit_parallel_for(task_handler, numWorkItems, id(), + kernelFunc); + } + + template + void parallel_for(range numWorkItems, + id workItemOffset, KernelType kernelFunc) { + kernel->name = detail::get_kernel_name_from_class(); + handler_type task_handler = ctx.get_context_info()->task_handler; + string_class range_obj = "__" + kernel->name + "_range__"; + task_handler->set_range(range_obj.c_str(), numWorkItems); + string_class offset_obj = "__" + kernel->name + "_offset__"; + task_handler->set_range(offset_obj.c_str(), workItemOffset); + submit_parallel_for(task_handler, numWorkItems, workItemOffset, kernelFunc); + } + + // template + // void parallel_for(nd_range executionRange, KernelType + // kernelFunc); + + template + void parallel_for_work_group(range numWorkGroups, + WorkgroupFunctionType kernelFunc); + + template + void parallel_for_work_group(range numWorkGroups, + range workGroupSize, + WorkgroupFunctionType kernelFunc); + + //----- OpenCL interoperability interface // + template void set_arg(int argIndex, T&& arg) { + kernel->args.insert(argIndex, arg); + } + + template void set_args(Ts &&...args) { + kernel->args.push_back(args...); + } + + kernel_type get_kernel() { return kernel; } + + template + void require(sycl::accessor acc) { + acc.handler_ = this; + kernel->args.push_back(detail::accessor_info(acc, m)); + } + + template + T* get_pointer(sycl::accessor acc) { + return (T*)ctx.get_context_info()->get_pointer(acc.data); + } + + context get_context() { return ctx; } + + private: + kernel_type kernel; + device bind_device; + counter_type counter; + context ctx; + + template void submit_task(Func func) { + counter->incr(); + std::thread t([f = func, c = counter]() { + try { + f(); + } + catch (...) { + throw; + } + c->decr(); + }); + t.detach(); + } + }; } // namespace neosycl::sycl From c0496caf0e0174486d6da13551a934629ef82f70 Mon Sep 17 00:00:00 2001 From: Hiroyuki Takizawa Date: Tue, 17 May 2022 20:57:15 +0900 Subject: [PATCH 17/90] kernel and kernel_info class --- .clang-format | 22 +- include/neoSYCL/extensions/nec/rt_acc.hpp | 27 ++ .../neoSYCL/extensions/nec/ve_kernel_info.hpp | 24 ++ .../neoSYCL/extensions/nec/ve_selector.hpp | 1 + .../extensions/nec/ve_task_handler.hpp | 164 +++++----- include/neoSYCL/sycl.hpp | 3 + include/neoSYCL/sycl/buffer.hpp | 76 +++-- include/neoSYCL/sycl/context.hpp | 28 +- include/neoSYCL/sycl/detail/context_info.hpp | 43 ++- include/neoSYCL/sycl/detail/kernel_info.hpp | 23 ++ include/neoSYCL/sycl/detail/task_handler.hpp | 79 +++-- include/neoSYCL/sycl/handler.hpp | 304 ++++++++---------- include/neoSYCL/sycl/kernel.hpp | 28 ++ 13 files changed, 496 insertions(+), 326 deletions(-) create mode 100644 include/neoSYCL/extensions/nec/rt_acc.hpp create mode 100644 include/neoSYCL/extensions/nec/ve_kernel_info.hpp create mode 100644 include/neoSYCL/sycl/detail/kernel_info.hpp create mode 100644 include/neoSYCL/sycl/kernel.hpp diff --git a/.clang-format b/.clang-format index d71aab3..5032e04 100644 --- a/.clang-format +++ b/.clang-format @@ -1,6 +1,20 @@ -Language: Cpp BasedOnStyle: LLVM -AlignConsecutiveAssignments: true +Language: Cpp +AlignConsecutiveAssignments: Consecutive AlignTrailingComments: true -SortIncludes: false - +AlwaysBreakTemplateDeclarations: MultiLine +BreakBeforeBraces: Custom +BraceWrapping: + AfterClass: false + AfterControlStatement: MultiLine + AfterEnum: false + AfterFunction: false + AfterNamespace: false +# AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + BeforeCatch: true + BeforeElse: true + IndentBraces: false +PointerAlignment: Left +SortIncludes: Never diff --git a/include/neoSYCL/extensions/nec/rt_acc.hpp b/include/neoSYCL/extensions/nec/rt_acc.hpp new file mode 100644 index 0000000..088ef91 --- /dev/null +++ b/include/neoSYCL/extensions/nec/rt_acc.hpp @@ -0,0 +1,27 @@ +#pragma once +#include + +namespace neosycl::sycl::rt { + +template struct acc_ { + T* p_; + size_t r_[6]; + + acc_(T* p = nullptr) : p_(p), r_{1, 1, 1, 0, 0, 0} {} + acc_(T* p, neosycl::sycl::range<1> r) : p_(p), r_{r[0], 1, 1, 0, 0, 0} {} + acc_(T* p, neosycl::sycl::range<2> r) : p_(p), r_{r[0], r[1], 1, 0, 0, 0} {} + acc_(T* p, neosycl::sycl::range<3> r) + : p_(p), r_{r[0], r[1], r[2], 0, 0, 0} {} + ~acc_() = default; + + inline T& operator[](size_t i) { return p_[i]; } + inline T& operator[](neosycl::sycl::id<3>& i) { + return p_[(i[2] * r_[1] + i[1]) * r_[0] + i[0]]; + } + inline T& operator[](neosycl::sycl::id<2>& i) { + return p_[i[1] * r_[0] + i[0]]; + } + inline T& operator[](neosycl::sycl::id<1>& i) { return p_[i[0]]; } +}; + +} // namespace neosycl::sycl::rt diff --git a/include/neoSYCL/extensions/nec/ve_kernel_info.hpp b/include/neoSYCL/extensions/nec/ve_kernel_info.hpp new file mode 100644 index 0000000..f3df26c --- /dev/null +++ b/include/neoSYCL/extensions/nec/ve_kernel_info.hpp @@ -0,0 +1,24 @@ +#ifndef SYCL_INCLUDE_CL_SYCL_NEC_VE_KERNEL_INFO_HPP_ +#define SYCL_INCLUDE_CL_SYCL_NEC_VE_KERNEL_INFO_HPP_ + +namespace neosycl::sycl::extensions::nec { +struct kernel_info_ve : public detail::kernel_info { + uint64_t func_; + uint64_t capt_; + uint64_t rnge_; + + // capture data and size + // size_t size_; + // void* data_; + + kernel_info_ve(const char* c) : detail::kernel_info(c) { + func_ = 0; + capt_ = 0; + rnge_ = 0; + // size_ = 0; + // data_ = nullptr; + } +}; +} // namespace neosycl::sycl::extensions::nec + +#endif diff --git a/include/neoSYCL/extensions/nec/ve_selector.hpp b/include/neoSYCL/extensions/nec/ve_selector.hpp index d5821a4..ca6359e 100644 --- a/include/neoSYCL/extensions/nec/ve_selector.hpp +++ b/include/neoSYCL/extensions/nec/ve_selector.hpp @@ -2,6 +2,7 @@ #define SYCL_INCLUDE_CL_SYCL_NEC_VE_SELECTOR_HPP_ #include "neoSYCL/extensions/nec/ve_info.hpp" +#include "neoSYCL/extensions/nec/ve_kernel_info.hpp" #include "neoSYCL/extensions/nec/ve_task_handler.hpp" #include "neoSYCL/extensions/nec/ve_device_info.hpp" #include "neoSYCL/sycl/detail/context_info.hpp" diff --git a/include/neoSYCL/extensions/nec/ve_task_handler.hpp b/include/neoSYCL/extensions/nec/ve_task_handler.hpp index 3158b8d..d1b768e 100644 --- a/include/neoSYCL/extensions/nec/ve_task_handler.hpp +++ b/include/neoSYCL/extensions/nec/ve_task_handler.hpp @@ -11,69 +11,78 @@ class task_handler_ve : public detail::task_handler { using buffer_type = std::vector; public: - task_handler_ve(const VEProc &p, const VEContext &c) : proc_(p), ctx_(c) {} - ~task_handler_ve() { /* do nothing */ + task_handler_ve(const VEProc& p, const VEContext& c) : proc_(p), ctx_(c) { + argp_ = alloc_veo_args(); } + ~task_handler_ve() { veo_args_free(argp_); } - void set_capture(const char *name, void *p, size_t sz) override { - DEBUG_INFO("set capture: %s %#x %#x",name, (size_t)proc_.ve_proc, (size_t)proc_.handle); - uint64_t devptr = veo_get_sym(proc_.ve_proc, proc_.handle, name); - if (devptr == 0) { - PRINT_ERR("veo_get_sym() failed: %s", name); - throw exception("setup_capture() failed"); + kernel* create_kernel(const char* s) override { + kernel_info_ve* ki = new kernel_info_ve(s); + + std::string oname = std::string("__") + s + "_obj__"; + + ki->func_ = veo_get_sym(proc_.ve_proc, proc_.handle, s); + ki->capt_ = veo_get_sym(proc_.ve_proc, proc_.handle, oname.c_str()); + + if (ki->func_ == 0 || ki->capt_ == 0) { + PRINT_ERR("veo_get_sym() failed: %s", s); + throw exception("create_kernel() failed"); } + return new kernel(ki); + } + + void set_capture(shared_ptr_class k, void* p, size_t sz) override { + DEBUG_INFO("set capture: %s %#x %#x", k->get_name(), (size_t)proc_.ve_proc, + (size_t)proc_.handle); - int rt = veo_write_mem(proc_.ve_proc, devptr, p, sz); + kernel::info_type ki = k->get_kernel_info(); + shared_ptr_class kiv = + std::dynamic_pointer_cast(ki); + if (kiv == nullptr) { + PRINT_ERR("invalid kernel_info: %#x", ki.get()); + throw exception("set_capture() failed"); + } + + DEBUG_INFO("set capture: %#x %#x %#x", (size_t)proc_.ve_proc, (size_t)p, + sz); + int rt = veo_write_mem(proc_.ve_proc, kiv->capt_, p, sz); if (rt != VEO_COMMAND_OK) { - PRINT_ERR("veo_write_mem() failed: %s", name); + PRINT_ERR("veo_write_mem() failed: %s %d", k->get_name(), rt); throw exception("setup_capture() failed"); } } - void set_range(const char* name, range<1>& d) override { - size_t r[3] = { d[0],1,1 }; - set_range_offset(name, r); - } - void set_range(const char* name, range<2>& d) override { - size_t r[3] = { d[0],d[1],1 }; - set_range_offset(name, r); - } - void set_range(const char* name, range<3>& d) override { - size_t r[3] = { d[0],d[1],d[2] }; - set_range_offset(name, r); - } + void set_range(shared_ptr_class k, size_t r[6]) { + kernel::info_type ki = k->get_kernel_info(); + shared_ptr_class kiv = + std::dynamic_pointer_cast(ki); + if (kiv == nullptr) { + PRINT_ERR("invalid kernel_info: %#x", ki.get()); + throw exception("set_range() failed"); + } + DEBUG_INFO("range : %s %lu %lu %lu", k->get_name(), r[0], r[1], r[2]); + DEBUG_INFO("offset: %s %lu %lu %lu", k->get_name(), r[3], r[4], r[5]); - void set_offset(const char* name, id<1>& i) override { - size_t o[3] = { i[0],1,1 }; - set_range_offset(name, o); - } - void set_offset(const char* name, id<2>& i) override { - size_t o[3] = { i[0],i[1],1 }; - set_range_offset(name, o); - } - void set_offset(const char* name, id<3>& i) override { - size_t o[3] = { i[0],i[1], i[2] }; - set_range_offset(name, o); - } + if (kiv->rnge_ == 0) { + // this is the first call and the pointer is not set yet. + std::string rname = std::string("__") + k->get_name() + "_range__"; - void set_range_offset (const char* name, size_t r[3]) { - DEBUG_INFO("set range/offset: %s %lu %lu %lu", name, r[0], r[1], r[2]); - uint64_t devptr = veo_get_sym(proc_.ve_proc, proc_.handle, name); - if (devptr == 0) { - PRINT_ERR("veo_get_sym() failed: %s", name); - throw exception("setup_range_offset() failed"); + kiv->rnge_ = veo_get_sym(proc_.ve_proc, proc_.handle, rname.c_str()); + if (kiv->rnge_ == 0) { + PRINT_ERR("veo_get_sym() failed: %s", rname.c_str()); + throw exception("set_range() failed"); + } } - int rt = veo_write_mem(proc_.ve_proc, devptr, r, sizeof(size_t)*3); + int rt = veo_write_mem(proc_.ve_proc, kiv->rnge_, r, sizeof(size_t) * 6); if (rt != VEO_COMMAND_OK) { - PRINT_ERR("veo_write_mem() failed: %s", name); - throw exception("setup_range_offset() failed"); + PRINT_ERR("veo_write_mem() failed: %s", k->get_name()); + throw exception("setup_range() failed"); } } - - struct veo_args *alloc_veo_args() { - struct veo_args *argp = veo_args_alloc(); + struct veo_args* alloc_veo_args() { + struct veo_args* argp = veo_args_alloc(); if (!argp) { PRINT_ERR("veo_args_alloc() failed"); throw exception("alloc_veo_args() failed"); @@ -81,30 +90,30 @@ class task_handler_ve : public detail::task_handler { return argp; } - void single_task(shared_ptr_class k, - const std::function &func) override { - DEBUG_INFO("single_task(): %s", k->name.c_str()); + void single_task(shared_ptr_class k, + const std::function& func) override { + DEBUG_INFO("single_task(): %s", k->get_name()); call_kernel_func(k); } - void parallel_for_1d(shared_ptr_class k, range<1> r, - const std::function)> &func, + void parallel_for_1d(shared_ptr_class k, range<1> r, + const std::function)>& func, id<1> offset) override { - DEBUG_INFO("parallel_for_1d(): %s", k->name.c_str()); + DEBUG_INFO("parallel_for_1d(): %s", k->get_name()); call_kernel_func(k); } - void parallel_for_2d(shared_ptr_class k, range<2> r, - const std::function)> &func, + void parallel_for_2d(shared_ptr_class k, range<2> r, + const std::function)>& func, id<2> offset) override { - DEBUG_INFO("parallel_for_2d(): %s", k->name.c_str()); + DEBUG_INFO("parallel_for_2d(): %s", k->get_name()); call_kernel_func(k); } - void parallel_for_3d(shared_ptr_class k, range<3> r, - const std::function)> &func, + void parallel_for_3d(shared_ptr_class k, range<3> r, + const std::function)>& func, id<3> offset) override { - DEBUG_INFO("parallel_for_3d(): %s", k->name.c_str()); + DEBUG_INFO("parallel_for_3d(): %s", k->get_name()); call_kernel_func(k); } @@ -140,13 +149,14 @@ class task_handler_ve : public detail::task_handler { bufs_.erase(bufs_.begin() + index); } - void *alloc_mem(container_type d, + void* alloc_mem(container_type d, access::mode mode = access::mode::read) override { int index = find_buf(d); bool to_be_updated = (mode != access::mode::read); if (index >= 0) { bufs_[index].updated = to_be_updated; - return reinterpret_cast(bufs_[index].ptr); + // return reinterpret_cast(bufs_[index].ptr); + return nullptr; } size_t size_in_byte = d->get_size(); @@ -163,7 +173,8 @@ class task_handler_ve : public detail::task_handler { bufs_.push_back(bi); if (mode != access::mode::discard_write && - mode != access::mode::discard_read_write) { + mode != access::mode::discard_read_write) + { DEBUG_INFO("memory copy (h2v): " "vaddr=%#x, haddr=%#x, size=%lu", (size_t)ve_addr_int, (size_t)d->get_raw_ptr(), size_in_byte); @@ -175,17 +186,17 @@ class task_handler_ve : public detail::task_handler { } } - return reinterpret_cast(bi.ptr); + return reinterpret_cast(bi.ptr); } - void *get_pointer(container_type d) override { + void* get_pointer(container_type d) override { int index = find_buf(d); if (index < 0) return nullptr; - return reinterpret_cast(bufs_[index].ptr); + return reinterpret_cast(bufs_[index].ptr); } - void copy_back(buf_info &bi) { + void copy_back(buf_info& bi) { if (bi.updated) { size_t size_in_byte = bi.buf->get_size(); uint64_t device_ptr = bi.ptr; @@ -213,26 +224,33 @@ class task_handler_ve : public detail::task_handler { VEContext ctx_; VEProc proc_; buffer_type bufs_; + struct veo_args* argp_; - void call_kernel_func(shared_ptr_class k) { - for (const detail::accessor_info &acc : k->args) { + void call_kernel_func(shared_ptr_class k) { + kernel::info_type ki = k->get_kernel_info(); + shared_ptr_class kiv = + std::dynamic_pointer_cast(ki); + if (kiv == nullptr) { + PRINT_ERR("invalid kernel_info: %#x", ki.get()); + throw exception("set_capture() failed"); + } + + for (const detail::accessor_info& acc : k->get_acc()) { acc.acquire_access(); alloc_mem(acc.container, acc.mode); } DEBUG_INFO("-- KENREL EXEC BEGIN --"); try { - struct veo_args *argp = alloc_veo_args(); - uint64_t id = veo_call_async_by_name(ctx_.ve_ctx, proc_.handle, - k->name.c_str(), argp); uint64_t ret_val; + uint64_t id = veo_call_async(ctx_.ve_ctx, kiv->func_, argp_); veo_call_wait_result(ctx_.ve_ctx, id, &ret_val); DEBUG_INFO("-- KERNEL EXEC END (ret=%lu) --", ret_val); // copy_out(ve_addr_list, k, proc); - veo_args_free(argp); - } catch (exception &e) { + } + catch (exception& e) { PRINT_ERR("kernel execution failed: %s", e.what()); } - for (const detail::accessor_info &acc : k->args) { + for (const detail::accessor_info& acc : k->get_acc()) { acc.release_access(); } } diff --git a/include/neoSYCL/sycl.hpp b/include/neoSYCL/sycl.hpp index fd32662..fa9e2c2 100644 --- a/include/neoSYCL/sycl.hpp +++ b/include/neoSYCL/sycl.hpp @@ -31,7 +31,10 @@ // include buffer headers #include "sycl/allocator.hpp" #include "sycl/buffer.hpp" +#endif // ___NEOSYCL_KERNEL_RUNTIME_ONLY___ +#ifdef BUILD_VE +#include "extensions/nec/rt_acc.hpp" #endif namespace neosycl { diff --git a/include/neoSYCL/sycl/buffer.hpp b/include/neoSYCL/sycl/buffer.hpp index a445a02..5acba9d 100644 --- a/include/neoSYCL/sycl/buffer.hpp +++ b/include/neoSYCL/sycl/buffer.hpp @@ -14,8 +14,8 @@ class use_host_ptr { }; class use_mutex { public: - use_mutex(mutex_class &mutexRef); - mutex_class *get_mutex_ptr() const; + use_mutex(mutex_class& mutexRef); + mutex_class* get_mutex_ptr() const; }; class context_bound { public: @@ -44,50 +44,50 @@ class buffer { public: using value_type = T; - using reference = value_type &; - using const_reference = const value_type &; + using reference = value_type&; + using const_reference = const value_type&; using allocator_type = AllocatorT; - buffer(const range &bufferRange, - const property_list &propList = {}) + buffer(const range& bufferRange, + const property_list& propList = {}) : buffer(bufferRange, allocator_type(), propList) {} - buffer(const range &bufferRange, AllocatorT allocator, - const property_list &propList = {}) + buffer(const range& bufferRange, AllocatorT allocator, + const property_list& propList = {}) : bufferRange(bufferRange), data(new detail::container::DataContainerND( bufferRange.data, allocator)) {} - buffer(T *hostData, const range &bufferRange, - const property_list &propList = {}) + buffer(T* hostData, const range& bufferRange, + const property_list& propList = {}) : buffer(hostData, bufferRange, allocator_type(), propList) {} - buffer(T *hostData, const range &bufferRange, - AllocatorT allocator, const property_list &propList = {}) + buffer(T* hostData, const range& bufferRange, + AllocatorT allocator, const property_list& propList = {}) : bufferRange(bufferRange), data(new detail::container::DataContainerND( hostData, bufferRange.data, allocator)) {} - buffer(const T *hostData, const range &bufferRange, - const property_list &propList = {}) + buffer(const T* hostData, const range& bufferRange, + const property_list& propList = {}) : buffer(hostData, bufferRange, allocator_type(), propList) {} - buffer(const T *hostData, const range &bufferRange, - AllocatorT allocator, const property_list &propList = {}) + buffer(const T* hostData, const range& bufferRange, + AllocatorT allocator, const property_list& propList = {}) : bufferRange(bufferRange), data(new detail::container::DataContainerND( hostData, bufferRange.data)) {} - buffer(const shared_ptr_class &hostData, - const range &bufferRange, AllocatorT allocator, - const property_list &propList = {}) + buffer(const shared_ptr_class& hostData, + const range& bufferRange, AllocatorT allocator, + const property_list& propList = {}) : bufferRange(bufferRange), data(new detail::container::DataContainerND( hostData, bufferRange.data, allocator)) {} - buffer(const shared_ptr_class &hostData, - const range &bufferRange, - const property_list &propList = {}) + buffer(const shared_ptr_class& hostData, + const range& bufferRange, + const property_list& propList = {}) : bufferRange(bufferRange), data(new detail::container::DataContainerND( hostData.get(), bufferRange.data)) {} @@ -95,7 +95,7 @@ class buffer { template > buffer(InputIterator first, InputIterator last, AllocatorT allocator, - const property_list &propList = {}) + const property_list& propList = {}) : bufferRange((last - first) / sizeof(T)), data(new detail::container::DataContainerND( first, detail::container::ArrayND<1>((last - first) / sizeof(T)), @@ -104,13 +104,13 @@ class buffer { template > buffer(InputIterator first, InputIterator last, - const property_list &propList = {}) + const property_list& propList = {}) : bufferRange((last - first) / sizeof(T)), data(new detail::container::DataContainerND( first, detail::container::ArrayND<1>(last - first) / sizeof(T))) {} - buffer(buffer b, const id &baseIndex, - const range &subRange); + buffer(buffer b, const id& baseIndex, + const range& subRange); /* Available only when: dimensions == 1. */ // buffer(cl_mem clMemObject, const context &syclContext, event @@ -129,10 +129,9 @@ class buffer { template accessor - get_access(handler &commandGroupHandler) { + get_access(handler& commandGroupHandler) { push_context(commandGroupHandler.get_context(), mode); - commandGroupHandler.get_kernel()->args.push_back( - detail::accessor_info(data, mode)); + commandGroupHandler.get_acc_().push_back(detail::accessor_info(data, mode)); return accessor(*this); } @@ -144,11 +143,10 @@ class buffer { template accessor - get_access(handler &commandGroupHandler, range accessRange, + get_access(handler& commandGroupHandler, range accessRange, id accessOffset = {}) { push_context(commandGroupHandler.get_context(), mode); - commandGroupHandler.get_kernel()->args.push_back( - detail::accessor_info(data, mode)); + commandGroupHandler.get_acc_().push_back(detail::accessor_info(data, mode)); return accessor(*this, commandGroupHandler, accessRange, accessOffset); } @@ -171,22 +169,22 @@ class buffer { buffer reinterpret(range reinterpretRange) const; - buffer(const buffer &rhs) : data(rhs.data), bufferRange(rhs.bufferRange) {} + buffer(const buffer& rhs) : data(rhs.data), bufferRange(rhs.bufferRange) {} - buffer(buffer &&rhs) : data(rhs.data), bufferRange(rhs.bufferRange) {} + buffer(buffer&& rhs) : data(rhs.data), bufferRange(rhs.bufferRange) {} - buffer &operator=(const buffer &rhs) { + buffer& operator=(const buffer& rhs) { data = rhs.data; bufferRange = rhs.bufferRange; } - buffer &operator=(buffer &&rhs) { + buffer& operator=(buffer&& rhs) { data = rhs.data; bufferRange = rhs.bufferRange; } ~buffer() { - for (auto &it : ctx_) { + for (auto& it : ctx_) { it.get_context_info()->free_mem(data); } } @@ -197,8 +195,8 @@ class buffer { std::vector ctx_; void push_context(context c, access::mode m = access::mode::read) { - c.get_context_info()->alloc_mem(data, m); - ctx_.push_back(c); + if (c.get_context_info()->alloc_mem(data, m) != nullptr) + ctx_.push_back(c); } void push_context(handler h, access::mode m = access::mode::read) { push_context(h.get_context(), m); diff --git a/include/neoSYCL/sycl/context.hpp b/include/neoSYCL/sycl/context.hpp index 7d3fdac..57c2da7 100644 --- a/include/neoSYCL/sycl/context.hpp +++ b/include/neoSYCL/sycl/context.hpp @@ -9,27 +9,27 @@ namespace neosycl::sycl { class context { public: - explicit context(const property_list &propList = {}) { init(device()); } + explicit context(const property_list& propList = {}) { init(device()); } ~context() = default; - context(async_handler asyncHandler, const property_list &propList = {}); + context(async_handler asyncHandler, const property_list& propList = {}); - context(const device &dev, const property_list &propList = {}) { init(dev); } + context(const device& dev, const property_list& propList = {}) { init(dev); } - context(const device &dev, async_handler asyncHandler, - const property_list &propList = {}); + context(const device& dev, async_handler asyncHandler, + const property_list& propList = {}); - context(const platform &plt, const property_list &propList = {}); + context(const platform& plt, const property_list& propList = {}); - context(const platform &plt, async_handler asyncHandler, - const property_list &propList = {}); + context(const platform& plt, async_handler asyncHandler, + const property_list& propList = {}); - context(const vector_class &deviceList, - const property_list &propList = {}); + context(const vector_class& deviceList, + const property_list& propList = {}); - context(const vector_class &deviceList, async_handler asyncHandler, - const property_list &propList = {}); + context(const vector_class& deviceList, async_handler asyncHandler, + const property_list& propList = {}); // context(cl_context clContext, async_handler asyncHandler = {}); @@ -37,10 +37,10 @@ class context { typename info::param_traits::return_type get_info() const; - shared_ptr_class get_context_info() { return ctx_info; } + detail::context_info* get_context_info() { return ctx_info.get(); } private: - void init(const device &dev) { + void init(const device& dev) { ctx_info = shared_ptr_class( dev.device_info->create_context_info()); } diff --git a/include/neoSYCL/sycl/detail/context_info.hpp b/include/neoSYCL/sycl/detail/context_info.hpp index a20b38c..a649952 100644 --- a/include/neoSYCL/sycl/detail/context_info.hpp +++ b/include/neoSYCL/sycl/detail/context_info.hpp @@ -1,24 +1,63 @@ #pragma once +#include +#include +#include namespace neosycl::sycl::detail { +inline string_class get_kernel_name_from_class(const std::type_info& ti) { + // KernelName* p; + int status; + char* pc = abi::__cxa_demangle(ti.name(), 0, 0, &status); + string_class in(pc); + free(pc); + std::regex re("([^\\s\\:]+)\\*$"); + std::smatch result; + if (std::regex_search(in, result, re)) { + in = result[1].str(); + } + return in; +} + class context_info { using container_type = std::shared_ptr; protected: using handler_type = shared_ptr_class; + using kernel_map = std::map>; + context_info() {} public: virtual ~context_info() = default; - void *get_pointer(container_type c) const { + void* get_pointer(container_type c) const { return task_handler->get_pointer(c); } - void *alloc_mem(container_type c, access::mode m = access::mode::read) const { + void* alloc_mem(container_type c, access::mode m = access::mode::read) const { return task_handler->alloc_mem(c, m); } void free_mem(container_type c) const { task_handler->free_mem(c); } + void set_capture(shared_ptr_class k, void* p, size_t sz) const { + task_handler->set_capture(k, p, sz); + } + + template + shared_ptr_class get_kernel() { + const std::type_info& tinfo = typeid(KernelName*); + + if(kernels_.count(tinfo.hash_code())) + return kernels_.at(tinfo.hash_code()); + + string_class name = get_kernel_name_from_class(tinfo); + DEBUG_INFO("kernel class: %s", name.c_str()); + kernel* k = task_handler->create_kernel(name.c_str()); + shared_ptr_class p(std::move(k)); + kernels_.insert(std::make_pair(tinfo.hash_code(), p)); + return p; + } + handler_type task_handler; + kernel_map kernels_; // all exiting kernels in the context }; class cpu_context_info : public context_info { diff --git a/include/neoSYCL/sycl/detail/kernel_info.hpp b/include/neoSYCL/sycl/detail/kernel_info.hpp new file mode 100644 index 0000000..2746bc5 --- /dev/null +++ b/include/neoSYCL/sycl/detail/kernel_info.hpp @@ -0,0 +1,23 @@ +#ifndef SYCL_INCLUDE_CL_SYCL_KERNEL_KERNEL_HPP_ +#define SYCL_INCLUDE_CL_SYCL_KERNEL_KERNEL_HPP_ + +#include +#include "neoSYCL/sycl/detail/accessor_info.hpp" + +namespace neosycl::sycl::detail { + +struct kernel_info { + std::string name; + + virtual ~kernel_info() = default; +protected: + kernel_info(const char* c) : name(c) {} +}; + +struct kernel_info_cpu : public kernel_info { + kernel_info_cpu(const char* c) : kernel_info(c) {} +}; + +} // namespace neosycl::sycl::detail + +#endif // SYCL_INCLUDE_CL_SYCL_KERNEL_KERNEL_HPP_ diff --git a/include/neoSYCL/sycl/detail/task_handler.hpp b/include/neoSYCL/sycl/detail/task_handler.hpp index a53fbcf..b9e08b6 100644 --- a/include/neoSYCL/sycl/detail/task_handler.hpp +++ b/include/neoSYCL/sycl/detail/task_handler.hpp @@ -1,13 +1,14 @@ #ifndef NEOSYCL_INCLUDE_NEOSYCL_SYCL_TASK_HANDLER_HPP #define NEOSYCL_INCLUDE_NEOSYCL_SYCL_TASK_HANDLER_HPP -#include "neoSYCL/sycl/detail/kernel.hpp" +#include "neoSYCL/sycl/kernel.hpp" namespace neosycl::sycl::detail { class task_handler { protected: - using container_type = std::shared_ptr; + using container_type = shared_ptr_class; + using kernel_info_type = kernel::info_type; public: explicit task_handler() {} @@ -29,20 +30,37 @@ namespace neosycl::sycl::detail { virtual SUPPORT_PLATFORM_TYPE type() = 0; - virtual void* get_pointer(container_type) = 0; + virtual void* get_pointer(container_type) = 0; virtual void* alloc_mem(container_type, - access::mode = access::mode::read) = 0; - virtual void free_mem(container_type) = 0; - virtual void copy_back() = 0; - virtual void set_capture(const char* name, void* p, size_t sz) {} - - virtual void set_range(const char* name, range<1>& d) {} - virtual void set_range(const char* name, range<2>& d) {} - virtual void set_range(const char* name, range<3>& d) {} - - virtual void set_offset(const char* name, id<1>& i) {} - virtual void set_offset(const char* name, id<2>& i) {} - virtual void set_offset(const char* name, id<3>& i) {} + access::mode = access::mode::read) = 0; + virtual void free_mem(container_type) = 0; + virtual void copy_back() = 0; + + virtual void set_capture(shared_ptr_class, void* p, size_t sz) {} + virtual void set_range(shared_ptr_class, size_t r[6]) {} + + template + void set_range(shared_ptr_class k, range r) { + size_t sz[6] = {1, 1, 1, 0, 0, 0}; + for (size_t idx(0); idx != dim; idx++) { + sz[idx] = r[idx]; + } + set_range(k, sz); + } + + template + void set_range(shared_ptr_class k, range r, id i) { + size_t sz[6] = {1, 1, 1, 0, 0, 0}; + for (size_t idx(0); idx != dim; idx++) { + sz[idx] = r[idx]; + } + for (size_t idx(3); idx != dim + 3; idx++) { + sz[idx] = i[idx]; + } + set_range(k, sz); + } + + virtual kernel* create_kernel(const char*) = 0; }; class task_handler_cpu : public task_handler { @@ -51,12 +69,12 @@ namespace neosycl::sycl::detail { void single_task(shared_ptr_class k, const std::function& func) override { - for (const accessor_info& arg : k->args) { + for (const accessor_info& arg : k->get_acc()) { arg.acquire_access(); } - DEBUG_INFO("single_task(): %s\n", k->name.c_str()); + DEBUG_INFO("single_task(): %s\n", k->get_name()); func(); - for (const accessor_info& arg : k->args) { + for (const accessor_info& arg : k->get_acc()) { arg.release_access(); } } @@ -64,14 +82,14 @@ namespace neosycl::sycl::detail { void parallel_for_1d(shared_ptr_class k, range<1> r, const std::function)>& func, id<1> offset) override { - for (const accessor_info& arg : k->args) { + for (const accessor_info& arg : k->get_acc()) { arg.acquire_access(); } - DEBUG_INFO("parallel_for_1d(): %s\n", k->name.c_str()); + DEBUG_INFO("parallel_for_1d(): %s\n", k->get_name()); for (size_t x = offset.get(0); x < r.get(0); x++) { func(id<1>(x)); } - for (const accessor_info& arg : k->args) { + for (const accessor_info& arg : k->get_acc()) { arg.release_access(); } }; @@ -79,16 +97,16 @@ namespace neosycl::sycl::detail { void parallel_for_2d(shared_ptr_class k, range<2> r, const std::function)>& func, id<2> offset) override { - for (const accessor_info& arg : k->args) { + for (const accessor_info& arg : k->get_acc()) { arg.acquire_access(); } - DEBUG_INFO("parallel_for_2d(): %s\n", k->name.c_str()); + DEBUG_INFO("parallel_for_2d(): %s\n", k->get_name()); for (size_t x = offset.get(0); x < r.get(0); x++) { for (size_t y = offset.get(1); y < r.get(1); y++) { func(id<2>(x, y)); } } - for (const accessor_info& arg : k->args) { + for (const accessor_info& arg : k->get_acc()) { arg.release_access(); } }; @@ -96,10 +114,10 @@ namespace neosycl::sycl::detail { void parallel_for_3d(shared_ptr_class k, range<3> r, const std::function)>& func, id<3> offset) override { - for (const accessor_info& arg : k->args) { + for (const accessor_info& arg : k->get_acc()) { arg.acquire_access(); } - DEBUG_INFO("parallel_for_3d(): %s\n", k->name.c_str()); + DEBUG_INFO("parallel_for_3d(): %s\n", k->get_name()); for (size_t x = offset.get(0); x < r.get(0); x++) { for (size_t y = offset.get(1); y < r.get(1); y++) { for (size_t z = offset.get(2); z < r.get(2); z++) { @@ -107,7 +125,7 @@ namespace neosycl::sycl::detail { } } } - for (const accessor_info& arg : k->args) { + for (const accessor_info& arg : k->get_acc()) { arg.release_access(); } }; @@ -116,10 +134,15 @@ namespace neosycl::sycl::detail { void* get_pointer(container_type p) override { return p->get_raw_ptr(); } void* alloc_mem(container_type p, access::mode = access::mode::read) override { - return p->get_raw_ptr(); + return nullptr; + //return p->get_raw_ptr(); } void free_mem(container_type) override {} void copy_back() override {} + + kernel* create_kernel(const char* s) override { + return new kernel(new detail::kernel_info_cpu(s)); + } }; } // namespace neosycl::sycl::detail diff --git a/include/neoSYCL/sycl/handler.hpp b/include/neoSYCL/sycl/handler.hpp index 0db08eb..5c2849b 100644 --- a/include/neoSYCL/sycl/handler.hpp +++ b/include/neoSYCL/sycl/handler.hpp @@ -10,181 +10,153 @@ #include "neoSYCL/sycl/id.hpp" #include "neoSYCL/sycl/allocator.hpp" //#include "neoSYCL/sycl/detail/highlight_func.hpp" -#include "neoSYCL/sycl/detail/kernel.hpp" +#include "neoSYCL/sycl/kernel.hpp" #include "neoSYCL/sycl/detail/task.hpp" #include "neoSYCL/sycl/detail/task_handler.hpp" #include "neoSYCL/sycl/detail/registered_platforms.hpp" #include "neoSYCL/sycl/detail/task_counter.hpp" -#include namespace neosycl::sycl { - namespace detail { - - /** - * get func name by create ptr, rely on compiler reflect implementation - * - * @tparam KernelName class - * @return str - */ - template string_class get_kernel_name_from_class() { - KernelName* p; - int status; - char* pc = abi::__cxa_demangle(typeid(p).name(), 0, 0, &status); - string_class in(pc); - free(pc); - std::regex re("([^\\s\\:]+)\\*$"); - std::smatch result; - if (std::regex_search(in, result, re)) { - in = result[1].str(); - } - - return in; - } - - } // namespace detail - - class handler { - using handler_type = shared_ptr_class; - using counter_type = shared_ptr_class; - using kernel_type = shared_ptr_class; - - public: - explicit handler(device dev, counter_type counter, context c) - : bind_device(std::move(dev)), counter(std::move(counter)), - kernel(new detail::kernel()), ctx(c) {} - - template - void copy_capture(KernelName* p) { - string_class name = detail::get_kernel_name_from_class(); - DEBUG_INFO("kernel class: %s", name.c_str()); - name = "__" + name + "_obj__"; - DEBUG_INFO("kernel object: %s %#x %lu", name.c_str(), (size_t)p, - sizeof(KernelName)); - ctx.get_context_info()->task_handler->set_capture(name.c_str(), p, - sizeof(KernelName)); - } - - template - void set_capture(KernelType kernelFunc) { - if (bind_device.type() != detail::VE) - return; - kernelFunc(); - } - - template - void single_task(KernelType kernelFunc) { - kernel->name = detail::get_kernel_name_from_class(); - handler_type task_handler = ctx.get_context_info()->task_handler; - submit_task([f = kernelFunc, h = task_handler, k = kernel]() { - h->single_task(k, f); - }); - } - - template - void submit_parallel_for(handler_type handler, range<3> numWorkItems, - id<3> offset, KernelType kernelFunc) { - submit_task([f = kernelFunc, n = numWorkItems, o = offset, - h = std::move(handler), - k = kernel]() { h->parallel_for_3d(k, n, f, o); }); - } - - template - void submit_parallel_for(handler_type handler, range<2> numWorkItems, - id<2> offset, KernelType kernelFunc) { - submit_task([f = kernelFunc, n = numWorkItems, o = offset, - h = std::move(handler), - k = kernel]() { h->parallel_for_2d(k, n, f, o); }); - } - - template - void submit_parallel_for(handler_type handler, range<1> numWorkItems, - id<1> offset, KernelType kernelFunc) { - submit_task([f = kernelFunc, n = numWorkItems, o = offset, - h = std::move(handler), - k = kernel]() { h->parallel_for_1d(k, n, f, o); }); - } - - template - void parallel_for(range numWorkItems, KernelType kernelFunc) { - kernel->name = detail::get_kernel_name_from_class(); - handler_type task_handler = ctx.get_context_info()->task_handler; - string_class range_obj = "__" + kernel->name + "_range__"; - task_handler->set_range(range_obj.c_str(), numWorkItems); - submit_parallel_for(task_handler, numWorkItems, id(), - kernelFunc); - } - - template - void parallel_for(range numWorkItems, - id workItemOffset, KernelType kernelFunc) { - kernel->name = detail::get_kernel_name_from_class(); - handler_type task_handler = ctx.get_context_info()->task_handler; - string_class range_obj = "__" + kernel->name + "_range__"; +class handler { + using handler_type = shared_ptr_class; + using counter_type = shared_ptr_class; + using kernel_type = shared_ptr_class; + using accessor_list = std::vector; + +public: + explicit handler(device dev, counter_type counter, context c) + : bind_device_(std::move(dev)), counter_(std::move(counter)), + kernel_(nullptr), ctx_(c) {} + + template void copy_capture(KernelName* p) { + detail::context_info* cinfo = ctx_.get_context_info(); + + cinfo->set_capture(kernel_, p, sizeof(KernelName)); + } + + template + void set_kernel(KernelType kernelFunc) { + detail::context_info* cinfo = ctx_.get_context_info(); + + kernel_ = cinfo->get_kernel(); + // kernel_->name = detail::get_kernel_name_from_class(); + if (bind_device_.type() != detail::VE) + return; + kernelFunc(); + } + + template + void single_task(KernelType kernelFunc) { + handler_type task_handler = ctx_.get_context_info()->task_handler; + submit_task([f = kernelFunc, h = task_handler, k = kernel_]() { + h->single_task(k, f); + }); + } + + template + void submit_parallel_for(handler_type handler, range<3> numWorkItems, + id<3> offset, KernelType kernelFunc) { + submit_task([f = kernelFunc, n = numWorkItems, o = offset, + h = std::move(handler), + k = kernel_]() { h->parallel_for_3d(k, n, f, o); }); + } + + template + void submit_parallel_for(handler_type handler, range<2> numWorkItems, + id<2> offset, KernelType kernelFunc) { + submit_task([f = kernelFunc, n = numWorkItems, o = offset, + h = std::move(handler), + k = kernel_]() { h->parallel_for_2d(k, n, f, o); }); + } + + template + void submit_parallel_for(handler_type handler, range<1> numWorkItems, + id<1> offset, KernelType kernelFunc) { + submit_task([f = kernelFunc, n = numWorkItems, o = offset, + h = std::move(handler), + k = kernel_]() { h->parallel_for_1d(k, n, f, o); }); + } + + template + void parallel_for(range numWorkItems, KernelType kernelFunc) { + handler_type task_handler = ctx_.get_context_info()->task_handler; + + task_handler->set_range(kernel_, numWorkItems); + submit_parallel_for(task_handler, numWorkItems, id(), + kernelFunc); + } + + template + void parallel_for(range numWorkItems, + id workItemOffset, KernelType kernelFunc) { + handler_type task_handler = ctx_.get_context_info()->task_handler; + + task_handler->set_range(kernel_, numWorkItems, workItemOffset); +#if 0 + string_class range_obj = "__" + kernel_->get_name() + "_range__"; task_handler->set_range(range_obj.c_str(), numWorkItems); - string_class offset_obj = "__" + kernel->name + "_offset__"; + string_class offset_obj = "__" + kernel_->get_name() + "_offset__"; task_handler->set_range(offset_obj.c_str(), workItemOffset); - submit_parallel_for(task_handler, numWorkItems, workItemOffset, kernelFunc); - } - - // template - // void parallel_for(nd_range executionRange, KernelType - // kernelFunc); - - template - void parallel_for_work_group(range numWorkGroups, - WorkgroupFunctionType kernelFunc); - - template - void parallel_for_work_group(range numWorkGroups, - range workGroupSize, - WorkgroupFunctionType kernelFunc); - - //----- OpenCL interoperability interface // - template void set_arg(int argIndex, T&& arg) { - kernel->args.insert(argIndex, arg); - } - - template void set_args(Ts &&...args) { - kernel->args.push_back(args...); - } - - kernel_type get_kernel() { return kernel; } - - template - void require(sycl::accessor acc) { - acc.handler_ = this; - kernel->args.push_back(detail::accessor_info(acc, m)); - } - - template - T* get_pointer(sycl::accessor acc) { - return (T*)ctx.get_context_info()->get_pointer(acc.data); - } - - context get_context() { return ctx; } - - private: - kernel_type kernel; - device bind_device; - counter_type counter; - context ctx; - - template void submit_task(Func func) { - counter->incr(); - std::thread t([f = func, c = counter]() { - try { - f(); - } - catch (...) { - throw; - } - c->decr(); - }); - t.detach(); - } - }; +#endif + submit_parallel_for(task_handler, numWorkItems, workItemOffset, kernelFunc); + } + + // template + // void parallel_for(nd_range executionRange, KernelType + // kernelFunc); + + template + void parallel_for_work_group(range numWorkGroups, + WorkgroupFunctionType kernelFunc); + + template + void parallel_for_work_group(range numWorkGroups, + range workGroupSize, + WorkgroupFunctionType kernelFunc); + + //----- OpenCL interoperability interface // + template void set_arg(int argIndex, T&& arg); + + template void set_args(Ts&&... args); + + template + void require(sycl::accessor acc) { + acc.handler_ = this; + acc_.push_back(detail::accessor_info(acc, m)); + } + + template + T* get_pointer(sycl::accessor acc) { + return (T*)ctx_.get_context_info()->get_pointer(acc.data); + } + + context get_context() { return ctx_; } + + accessor_list& get_acc_() { return acc_; } + +private: + kernel_type kernel_; + device bind_device_; + counter_type counter_; + context ctx_; + accessor_list acc_; + + template void submit_task(Func func) { + counter_->incr(); + std::thread t([f = func, c = counter_]() { + try { + f(); + } + catch (...) { + throw; + } + c->decr(); + }); + t.detach(); + } +}; } // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/kernel.hpp b/include/neoSYCL/sycl/kernel.hpp new file mode 100644 index 0000000..d1bf88b --- /dev/null +++ b/include/neoSYCL/sycl/kernel.hpp @@ -0,0 +1,28 @@ +#ifndef NEOSYCL_INCLUDE_NEOSYCL_SYCL_KERNEL_HPP +#define NEOSYCL_INCLUDE_NEOSYCL_SYCL_KERNEL_HPP + +#include "neoSYCL/sycl/detail/kernel_info.hpp" +#include "neoSYCL/sycl/detail/accessor_info.hpp" + +namespace neosycl::sycl { + +class kernel { +public: + using info_type = shared_ptr_class; + using accessor_list = vector_class; + + kernel(detail::kernel_info* info) : acc_(), info_(std::move(info)) {} + + void set_acc(accessor_list& acc) { acc_ = acc; } + accessor_list& get_acc() { return acc_; } + info_type get_kernel_info() { return info_; } + // string_class get_name() { return info_->name; } + const char* get_name() const { return info_->name.c_str(); } + +private: + info_type info_; + accessor_list acc_; +}; + +} // namespace neosycl::sycl +#endif From a18e59537191cbf689d80f7f6586d5bd6f8d7012 Mon Sep 17 00:00:00 2001 From: Hiroyuki Takizawa Date: Thu, 19 May 2022 09:40:18 +0900 Subject: [PATCH 18/90] Bug fix in sycl::item --- include/neoSYCL/sycl/item.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/neoSYCL/sycl/item.hpp b/include/neoSYCL/sycl/item.hpp index 36a5cfd..226f2b6 100644 --- a/include/neoSYCL/sycl/item.hpp +++ b/include/neoSYCL/sycl/item.hpp @@ -30,9 +30,9 @@ template struct item { id get_id() const { return id(this); }; - size_t get_id(int dimension) const { return this->index[dimension]; }; + size_t get_id(int dimension) const { return this->data[dimension]; }; - size_t operator[](int dimension) const { return this->index[dimension]; } + size_t operator[](int dimension) const { return this->data[dimension]; } range get_range() const { return this->max_range; } From 6b7d62d2ef03db287864b93ccd38773ce51b2a38 Mon Sep 17 00:00:00 2001 From: Hiroyuki Takizawa Date: Sat, 21 May 2022 18:31:39 +0900 Subject: [PATCH 19/90] Integration with new kout --- .gitignore | 3 +- CMakeLists.txt | 16 +- include/neoSYCL/extensions/nec/rt_acc.hpp | 27 - .../extensions/nec/ve_context_info.hpp | 24 +- include/neoSYCL/extensions/nec/ve_info.hpp | 12 +- .../extensions/nec/ve_task_handler.hpp | 29 +- include/neoSYCL/kout/Kout.hpp | 36 + include/neoSYCL/kout/KoutPrinterHelper.hpp | 24 + include/neoSYCL/kout/KoutVisitor.hpp | 32 + include/neoSYCL/kout/VarDeclFinder.hpp | 35 + include/neoSYCL/sycl.hpp | 6 +- include/neoSYCL/sycl/accessor.hpp | 48 +- include/neoSYCL/sycl/buffer.hpp | 6 +- .../detail/container/data_container_nd.hpp | 74 +- include/neoSYCL/sycl/detail/context_info.hpp | 24 +- include/neoSYCL/sycl/detail/kernel_info.hpp | 11 +- include/neoSYCL/sycl/detail/task_handler.hpp | 170 +- include/neoSYCL/sycl/handler.hpp | 80 +- include/neoSYCL/sycl/kernel.hpp | 2 +- include/neoSYCL/sycl/queue.hpp | 2 +- include/neoSYCL/sycl/runtime/acc.hpp | 18 + kernel_generator/CMakeLists.txt | 38 - kernel_generator/include/exceptions.h | 30 - kernel_generator/include/helpers.h | 59 - kernel_generator/include/kernel.h | 72 - kernel_generator/include/kernel_translator.h | 22 - kernel_generator/include/parallel_task.h | 27 - kernel_generator/include/single_task.h | 36 - .../include/ve_kernel_translator.h | 20 - kernel_generator/src/CMakeLists.txt | 41 - kernel_generator/src/clang_tool.cpp | 185 - kernel_generator/src/kernel.cpp | 74 - kernel_generator/src/parallel_task.cpp | 81 - kernel_generator/src/single_task.cpp | 77 - kernel_generator/src/ve_kernel_translator.cpp | 46 - kernel_generator/third_party/CMakeLists.txt | 2 - .../third_party/fmt/CMakeLists.txt | 2 - .../third_party/fmt/include/fmt/core.h | 1796 -------- .../third_party/fmt/include/fmt/format-inl.h | 1403 ------- .../third_party/fmt/include/fmt/format.h | 3648 ----------------- .../third_party/fmt/src/format.cc | 176 - src/CMakeLists.txt | 94 + src/KoutPrinterHelper.cpp | 58 + src/KoutVisitor.cpp | 383 ++ src/VarDeclFinder.cpp | 91 + src/main.cpp | 73 + 46 files changed, 1137 insertions(+), 8076 deletions(-) delete mode 100644 include/neoSYCL/extensions/nec/rt_acc.hpp create mode 100644 include/neoSYCL/kout/Kout.hpp create mode 100644 include/neoSYCL/kout/KoutPrinterHelper.hpp create mode 100644 include/neoSYCL/kout/KoutVisitor.hpp create mode 100644 include/neoSYCL/kout/VarDeclFinder.hpp create mode 100644 include/neoSYCL/sycl/runtime/acc.hpp delete mode 100644 kernel_generator/CMakeLists.txt delete mode 100644 kernel_generator/include/exceptions.h delete mode 100644 kernel_generator/include/helpers.h delete mode 100644 kernel_generator/include/kernel.h delete mode 100644 kernel_generator/include/kernel_translator.h delete mode 100644 kernel_generator/include/parallel_task.h delete mode 100644 kernel_generator/include/single_task.h delete mode 100644 kernel_generator/include/ve_kernel_translator.h delete mode 100644 kernel_generator/src/CMakeLists.txt delete mode 100644 kernel_generator/src/clang_tool.cpp delete mode 100644 kernel_generator/src/kernel.cpp delete mode 100644 kernel_generator/src/parallel_task.cpp delete mode 100644 kernel_generator/src/single_task.cpp delete mode 100644 kernel_generator/src/ve_kernel_translator.cpp delete mode 100644 kernel_generator/third_party/CMakeLists.txt delete mode 100644 kernel_generator/third_party/fmt/CMakeLists.txt delete mode 100644 kernel_generator/third_party/fmt/include/fmt/core.h delete mode 100644 kernel_generator/third_party/fmt/include/fmt/format-inl.h delete mode 100644 kernel_generator/third_party/fmt/include/fmt/format.h delete mode 100644 kernel_generator/third_party/fmt/src/format.cc create mode 100644 src/CMakeLists.txt create mode 100644 src/KoutPrinterHelper.cpp create mode 100644 src/KoutVisitor.cpp create mode 100644 src/VarDeclFinder.cpp create mode 100644 src/main.cpp diff --git a/.gitignore b/.gitignore index 48cc816..458a1d3 100644 --- a/.gitignore +++ b/.gitignore @@ -33,6 +33,7 @@ # CLion tmp dirs build +build.* .idea cmake-build-debug cmake-build-release @@ -40,4 +41,4 @@ node_modules package-lock.json package.json -.DS_Store \ No newline at end of file +.DS_Store diff --git a/CMakeLists.txt b/CMakeLists.txt index 38bad83..e7e20e7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,13 +5,12 @@ set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17") -option(BUILD_KERNEL_GENERATOR "build sycl kernel generator" OFF) +option(BUILD_KERNEL_OUTLINER "build sycl kernel outliner" ON) option(BUILD_VE "Enable SX-Aurora support" OFF) -#option(BUILD_TESTING "Enable build tests" OFF) -if (BUILD_KERNEL_GENERATOR) - add_subdirectory(kernel_generator) +if (BUILD_KERNEL_OUTLINER) + add_subdirectory(src) endif () @@ -24,7 +23,7 @@ if (BUILD_VE) message(STATUS "Use veo headers path: ${VEO_HEADER_PATH}") message(STATUS "Use veo libs path: ${VEO_LIB_PATH}") - include_directories(${VEO_HEADER_PATH}) + include_directories(${VEO_HEADER_PATH} ${CMAKE_SOURCE_DIR}/include/neoSYCL/) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L${VEO_LIB_PATH} -Wl,-rpath=${VEO_LIB_PATH} -lveo") endif () @@ -32,16 +31,9 @@ endif () if (CMAKE_BUILD_TYPE STREQUAL Debug) message(STATUS "Enable debug mode") ADD_DEFINITIONS(-DDEBUG) - include_directories(third_party/fmt/include) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g") endif () include_directories(include) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lpthread") - -# add examples -add_subdirectory(examples) - -# add tests -add_subdirectory(tests) \ No newline at end of file diff --git a/include/neoSYCL/extensions/nec/rt_acc.hpp b/include/neoSYCL/extensions/nec/rt_acc.hpp deleted file mode 100644 index 088ef91..0000000 --- a/include/neoSYCL/extensions/nec/rt_acc.hpp +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once -#include - -namespace neosycl::sycl::rt { - -template struct acc_ { - T* p_; - size_t r_[6]; - - acc_(T* p = nullptr) : p_(p), r_{1, 1, 1, 0, 0, 0} {} - acc_(T* p, neosycl::sycl::range<1> r) : p_(p), r_{r[0], 1, 1, 0, 0, 0} {} - acc_(T* p, neosycl::sycl::range<2> r) : p_(p), r_{r[0], r[1], 1, 0, 0, 0} {} - acc_(T* p, neosycl::sycl::range<3> r) - : p_(p), r_{r[0], r[1], r[2], 0, 0, 0} {} - ~acc_() = default; - - inline T& operator[](size_t i) { return p_[i]; } - inline T& operator[](neosycl::sycl::id<3>& i) { - return p_[(i[2] * r_[1] + i[1]) * r_[0] + i[0]]; - } - inline T& operator[](neosycl::sycl::id<2>& i) { - return p_[i[1] * r_[0] + i[0]]; - } - inline T& operator[](neosycl::sycl::id<1>& i) { return p_[i[0]]; } -}; - -} // namespace neosycl::sycl::rt diff --git a/include/neoSYCL/extensions/nec/ve_context_info.hpp b/include/neoSYCL/extensions/nec/ve_context_info.hpp index c9d2ea4..0e4e4b1 100644 --- a/include/neoSYCL/extensions/nec/ve_context_info.hpp +++ b/include/neoSYCL/extensions/nec/ve_context_info.hpp @@ -10,38 +10,42 @@ class ve_context_info : public detail::context_info { VEContext ctx; VEContext create_ctx(VEProc proc) { - struct veo_thr_ctxt *c = veo_context_open(proc.ve_proc); - DEBUG_INFO("veo_ctxt created: %#x", (size_t)c); + struct veo_thr_ctxt* c = veo_context_open(proc.ve_proc); + DEBUG_INFO("veo_ctxt created: %lx", (size_t)c); return VEContext{c}; } void free_ctx() { - DEBUG_INFO("veo_ctxt released: %#x", (size_t)ctx.ve_ctx); + DEBUG_INFO("veo_ctxt released: %lx", (size_t)ctx.ve_ctx); int rt = veo_context_close(ctx.ve_ctx); if (rt != veo_command_state::VEO_COMMAND_OK) { - PRINT_ERR("veo_context_close() failed: %#x, retval=%d", + PRINT_ERR("veo_context_close() failed: %lx, retval=%d", (size_t)ctx.ve_ctx, rt); } } - VEProc create_proc(const string_class &lib_path = DEFAULT_VE_LIB, + VEProc create_proc(const string_class& lib_path = DEFAULT_VE_LIB, int ve_node = DEFAULT_VE_NODE) { - struct veo_proc_handle *ve_proc = veo_proc_create(ve_node); + struct veo_proc_handle* ve_proc = veo_proc_create(ve_node); if (!ve_proc) { PRINT_ERR("veo_proc_create(%d) failed", ve_node); throw ve_exception("create_proc() failed"); } - DEBUG_INFO("veo_proc created: %#x", (size_t)ve_proc); + DEBUG_INFO("veo_proc created: %lx", (size_t)ve_proc); const char* env = getenv(ENV_VE_KERNEL); - string_class fn(env?env:lib_path); + string_class fn(env ? env : lib_path); uint64_t handle = veo_load_library(ve_proc, fn.c_str()); - DEBUG_INFO("kernel lib loaded: %#x, %s", (size_t)ve_proc, fn.c_str()); + if (handle == 0) { + PRINT_ERR("veo_load_library failed: %s", fn.c_str()); + throw ve_exception("create_proc failed"); + } + DEBUG_INFO("kernel lib loaded: %lx, %s", (size_t)ve_proc, fn.c_str()); return nec::VEProc{ve_proc, handle}; } void free_proc() { - DEBUG_INFO("veo_proc released: %#x", (size_t)proc.ve_proc); + DEBUG_INFO("veo_proc released: %lx", (size_t)proc.ve_proc); int rt = veo_proc_destroy(proc.ve_proc); if (rt != veo_command_state::VEO_COMMAND_OK) { PRINT_ERR("veo_proc_destroy() failed"); diff --git a/include/neoSYCL/extensions/nec/ve_info.hpp b/include/neoSYCL/extensions/nec/ve_info.hpp index 8f49a48..88591ee 100644 --- a/include/neoSYCL/extensions/nec/ve_info.hpp +++ b/include/neoSYCL/extensions/nec/ve_info.hpp @@ -6,21 +6,21 @@ namespace neosycl::sycl::extensions::nec { -const int DEFAULT_VE_NODE = -1; -const string_class DEFAULT_VE_LIB = "./kernel.so"; -const char* ENV_VE_KERNEL = "NEOSYCL_VE_KERNEL"; +constexpr int DEFAULT_VE_NODE = -1; +const char* DEFAULT_VE_LIB = "./kernel.so"; +const char* ENV_VE_KERNEL = "NEOSYCL_VE_KERNEL"; struct VEProc { - struct veo_proc_handle *ve_proc; + struct veo_proc_handle* ve_proc; uint64_t handle; }; struct VEContext { - struct veo_thr_ctxt *ve_ctx; + struct veo_thr_ctxt* ve_ctx; }; class ve_exception : public sycl::exception { public: - ve_exception(const string_class &msg) : sycl::exception(msg) {} + ve_exception(const string_class& msg) : sycl::exception(msg) {} }; } // namespace neosycl::sycl::extensions::nec diff --git a/include/neoSYCL/extensions/nec/ve_task_handler.hpp b/include/neoSYCL/extensions/nec/ve_task_handler.hpp index d1b768e..ae6446e 100644 --- a/include/neoSYCL/extensions/nec/ve_task_handler.hpp +++ b/include/neoSYCL/extensions/nec/ve_task_handler.hpp @@ -16,6 +16,11 @@ class task_handler_ve : public detail::task_handler { } ~task_handler_ve() { veo_args_free(argp_); } + void run(shared_ptr_class k) override { + DEBUG_INFO("run(): %s", k->get_name()); + call_kernel_func(k); + } + kernel* create_kernel(const char* s) override { kernel_info_ve* ki = new kernel_info_ve(s); @@ -32,18 +37,18 @@ class task_handler_ve : public detail::task_handler { } void set_capture(shared_ptr_class k, void* p, size_t sz) override { - DEBUG_INFO("set capture: %s %#x %#x", k->get_name(), (size_t)proc_.ve_proc, + DEBUG_INFO("set capture: %s %lx %lx", k->get_name(), (size_t)proc_.ve_proc, (size_t)proc_.handle); kernel::info_type ki = k->get_kernel_info(); shared_ptr_class kiv = std::dynamic_pointer_cast(ki); if (kiv == nullptr) { - PRINT_ERR("invalid kernel_info: %#x", ki.get()); + PRINT_ERR("invalid kernel_info: %lx", (size_t)ki.get()); throw exception("set_capture() failed"); } - DEBUG_INFO("set capture: %#x %#x %#x", (size_t)proc_.ve_proc, (size_t)p, + DEBUG_INFO("set capture: %lx %lx %lx", (size_t)proc_.ve_proc, (size_t)p, sz); int rt = veo_write_mem(proc_.ve_proc, kiv->capt_, p, sz); if (rt != VEO_COMMAND_OK) { @@ -57,7 +62,7 @@ class task_handler_ve : public detail::task_handler { shared_ptr_class kiv = std::dynamic_pointer_cast(ki); if (kiv == nullptr) { - PRINT_ERR("invalid kernel_info: %#x", ki.get()); + PRINT_ERR("invalid kernel_info: %lx", (size_t)ki.get()); throw exception("set_range() failed"); } DEBUG_INFO("range : %s %lu %lu %lu", k->get_name(), r[0], r[1], r[2]); @@ -90,6 +95,7 @@ class task_handler_ve : public detail::task_handler { return argp; } +#if 0 void single_task(shared_ptr_class k, const std::function& func) override { DEBUG_INFO("single_task(): %s", k->get_name()); @@ -116,11 +122,12 @@ class task_handler_ve : public detail::task_handler { DEBUG_INFO("parallel_for_3d(): %s", k->get_name()); call_kernel_func(k); } +#endif detail::SUPPORT_PLATFORM_TYPE type() override { return detail::VE; } int find_buf(container_type d) { - for (int j = 0; j < bufs_.size(); j++) { + for (size_t j = 0; j < bufs_.size(); j++) { if (d->get_raw_ptr() == bufs_[j].buf->get_raw_ptr()) { return j; } @@ -168,7 +175,7 @@ class task_handler_ve : public detail::task_handler { throw exception("alloc_mem() failed"); } - DEBUG_INFO("memory alloc: vaddr=%#x, size=%lu", ve_addr_int, size_in_byte); + DEBUG_INFO("memory alloc: vaddr=%lx, size=%lu", ve_addr_int, size_in_byte); buf_info bi{d, ve_addr_int, to_be_updated}; bufs_.push_back(bi); @@ -176,7 +183,7 @@ class task_handler_ve : public detail::task_handler { mode != access::mode::discard_read_write) { DEBUG_INFO("memory copy (h2v): " - "vaddr=%#x, haddr=%#x, size=%lu", + "vaddr=%lx, haddr=%lx, size=%lu", (size_t)ve_addr_int, (size_t)d->get_raw_ptr(), size_in_byte); rt = veo_write_mem(proc_.ve_proc, ve_addr_int, d->get_raw_ptr(), size_in_byte); @@ -201,7 +208,7 @@ class task_handler_ve : public detail::task_handler { size_t size_in_byte = bi.buf->get_size(); uint64_t device_ptr = bi.ptr; DEBUG_INFO("memory copy (v2h): " - "vaddr=%#x, haddr=%#x, size=%lu", + "vaddr=%lx, haddr=%lx, size=%lu", (size_t)device_ptr, (size_t)bi.buf->get_raw_ptr(), size_in_byte); // do copy @@ -215,14 +222,14 @@ class task_handler_ve : public detail::task_handler { } void copy_back() override { - for (int i = 0; i < bufs_.size(); i++) { + for (size_t i = 0; i < bufs_.size(); i++) { copy_back(bufs_[i]); } } private: - VEContext ctx_; VEProc proc_; + VEContext ctx_; buffer_type bufs_; struct veo_args* argp_; @@ -231,7 +238,7 @@ class task_handler_ve : public detail::task_handler { shared_ptr_class kiv = std::dynamic_pointer_cast(ki); if (kiv == nullptr) { - PRINT_ERR("invalid kernel_info: %#x", ki.get()); + PRINT_ERR("invalid kernel_info: %lx", (size_t)ki.get()); throw exception("set_capture() failed"); } diff --git a/include/neoSYCL/kout/Kout.hpp b/include/neoSYCL/kout/Kout.hpp new file mode 100644 index 0000000..d19dbca --- /dev/null +++ b/include/neoSYCL/kout/Kout.hpp @@ -0,0 +1,36 @@ +/******************************************************************** +Copyright (c) 2021 Hiroyuki Takizawa + +This software is released under the MIT License, see LICENSE.txt. +**********************************************************************/ +#pragma once +#include "clang/AST/AST.h" +#include "clang/AST/ASTConsumer.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/PrettyPrinter.h" +#include "clang/Frontend/ASTConsumers.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/FrontendActions.h" +#include "clang/Rewrite/Core/Rewriter.h" +#include "clang/Tooling/CommonOptionsParser.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/Support/raw_ostream.h" +#include "clang/Basic/Version.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/PreprocessorOptions.h" +#include +#include +#include +#include +#include + +#define SYCL_ACCESSOR "neosycl::sycl::accessor" +#define SYCL_HANDLER "class neosycl::sycl::handler" +#define SYCL_SINGLE_TASK "single_task" +#define SYCL_PARALLEL_FOR "parallel_for" + +using namespace std; +using namespace llvm; +using namespace clang; +using namespace clang::tooling; diff --git a/include/neoSYCL/kout/KoutPrinterHelper.hpp b/include/neoSYCL/kout/KoutPrinterHelper.hpp new file mode 100644 index 0000000..b02e008 --- /dev/null +++ b/include/neoSYCL/kout/KoutPrinterHelper.hpp @@ -0,0 +1,24 @@ +#pragma once + +class KoutPrinterHelper : public PrinterHelper { + ASTContext& ast_; + +public: + KoutPrinterHelper(ASTContext& a) : ast_(a) {} + + bool Visit(CXXOperatorCallExpr*, llvm::raw_ostream&); + bool Visit(DeclRefExpr*, llvm::raw_ostream&); + + bool handledStmt(Stmt* s, llvm::raw_ostream& os) { + // cerr << s->getStmtClassName() << endl; + auto op = dyn_cast(s); + if (op) + return Visit(op, os); + + auto var = dyn_cast(s); + if (var) + return Visit(var, os); + + return false; + } +}; diff --git a/include/neoSYCL/kout/KoutVisitor.hpp b/include/neoSYCL/kout/KoutVisitor.hpp new file mode 100644 index 0000000..1473112 --- /dev/null +++ b/include/neoSYCL/kout/KoutVisitor.hpp @@ -0,0 +1,32 @@ +#pragma once +#include "Kout.hpp" +#include "KoutPrinterHelper.hpp" +#include "VarDeclFinder.hpp" + +class KoutVisitor : public RecursiveASTVisitor { +public: + KoutVisitor(Rewriter& R, ASTContext& ast) + : TheRewriter(R), kernCode(kcode_), ast_(ast) {} + + bool shouldVisitTemplateInstantiations() { return true; } + + void printAccessor(llvm::raw_string_ostream& st, Decl* d); + void printVar(llvm::raw_string_ostream& st, Decl* d); + void printLoop(llvm::raw_string_ostream& st, CXXMethodDecl* func, Decl* d, + int dim); + + void checkSingleTaskFunc(CXXMemberCallExpr* ce, CXXMethodDecl* callee, + std::string& text); + void checkParallelForFunc(CXXMemberCallExpr* ce, CXXMethodDecl* callee, + std::string& text); + + bool VisitCXXMemberCallExpr(CXXMemberCallExpr* ce); + + string& getDeviceCode() { return kcode_; } + +private: + Rewriter& TheRewriter; + string kcode_; + raw_string_ostream kernCode; + ASTContext& ast_; +}; diff --git a/include/neoSYCL/kout/VarDeclFinder.hpp b/include/neoSYCL/kout/VarDeclFinder.hpp new file mode 100644 index 0000000..e36be4c --- /dev/null +++ b/include/neoSYCL/kout/VarDeclFinder.hpp @@ -0,0 +1,35 @@ +#pragma once +#include "Kout.hpp" + +class VarDeclFinder : public RecursiveASTVisitor { +public: + virtual bool shouldVisitTemplateInstantiations() { return true; } + + bool isInList(std::vector& l, Decl* d) { + for (const auto& item : l) { + if (item == d) { + return true; + } + } + return false; + } + + bool isAccessor(std::string& type_name) { + std::regex re("(" + string(SYCL_ACCESSOR) + ")<.*>"); + std::smatch result; + if (std::regex_search(type_name, result, re)) { + type_name = result[1].str(); + return true; + } + return false; + } + + bool VisitDecl(Decl* d); + bool VisitDeclRefExpr(DeclRefExpr* e); + bool VisitMemberExpr(MemberExpr* e); + + std::vector vlist; + std::vector alist; + std::vector decl_list; + std::vector parm_list; +}; diff --git a/include/neoSYCL/sycl.hpp b/include/neoSYCL/sycl.hpp index fa9e2c2..fe3475f 100644 --- a/include/neoSYCL/sycl.hpp +++ b/include/neoSYCL/sycl.hpp @@ -11,6 +11,8 @@ #include "sycl/item.hpp" #include "sycl/id.hpp" +#include "sycl/runtime/acc.hpp" + #ifndef ___NEOSYCL_KERNEL_RUNTIME_ONLY___ #include "sycl/device_selector.hpp" #include "sycl/platform.hpp" @@ -33,10 +35,6 @@ #include "sycl/buffer.hpp" #endif // ___NEOSYCL_KERNEL_RUNTIME_ONLY___ -#ifdef BUILD_VE -#include "extensions/nec/rt_acc.hpp" -#endif - namespace neosycl { using namespace neosycl::sycl; diff --git a/include/neoSYCL/sycl/accessor.hpp b/include/neoSYCL/sycl/accessor.hpp index b8c0926..aeedeed 100644 --- a/include/neoSYCL/sycl/accessor.hpp +++ b/include/neoSYCL/sycl/accessor.hpp @@ -16,37 +16,36 @@ template class accessor { friend class handler; - public: template - accessor(buffer &bufferRef, - const property_list &propList = {}) + accessor(buffer& bufferRef, + const property_list& propList = {}) : data(bufferRef.data), accessRange(bufferRef.get_range()) {} template - accessor(buffer &bufferRef, - range accessRange, const property_list &propList = {}) + accessor(buffer& bufferRef, + range accessRange, const property_list& propList = {}) : data(bufferRef.data), accessRange(accessRange) {} template - accessor(buffer &bufferRef, + accessor(buffer& bufferRef, range accessRange, id accessOffset, - const property_list &propList = {}) + const property_list& propList = {}) : data(bufferRef.data), accessRange(accessRange), accessOffset(accessOffset) {} template - accessor(buffer &bufferRef, - handler &commandGroupHandlerRef, range accessRange, - const property_list &propList = {}) + accessor(buffer& bufferRef, + handler& commandGroupHandlerRef, range accessRange, + const property_list& propList = {}) : data(bufferRef.data), accessRange(accessRange), accessOffset(0) { bufferRef.push_context(commandGroupHandlerRef, accessMode); } template - accessor(buffer &bufferRef, - handler &commandGroupHandlerRef, range accessRange, - id accessOffset, const property_list &propList = {}) + accessor(buffer& bufferRef, + handler& commandGroupHandlerRef, range accessRange, + id accessOffset, const property_list& propList = {}) : data(bufferRef.data), accessRange(accessRange), accessOffset(accessOffset) { bufferRef.push_context(commandGroupHandlerRef, accessMode); @@ -69,7 +68,7 @@ class accessor { std::enable_if_t<((Mode == access::mode::read_write) || (Mode == access::mode::discard_read_write)) && (D == 0)>> - operator dataT &() const; + operator dataT&() const; /* Available only when: (accessMode == access::mode::write || accessMode == * access::mode::read_write || accessMode == access::mode::discard_write || @@ -81,7 +80,7 @@ class accessor { (Mode == access::mode::discard_write) || (Mode == access::mode::discard_read_write)) && (D > 0)>> - dataT &operator[](id index) const { + dataT& operator[](id index) const { size_t index_val = id2index(index); DEBUG_INFO("access with index: %d", index_val); return (*data).get(index_val); @@ -106,7 +105,7 @@ class accessor { (Mode == access::mode::discard_write) || (Mode == access::mode::discard_read_write)) && (D == 1)>> - dataT &operator[](size_t index) const { + dataT& operator[](size_t index) const { return (*data)[index]; } @@ -125,14 +124,14 @@ class accessor { (Mode == access::mode::discard_write) || (Mode == access::mode::discard_read_write)) && (D == 2)>> - dataT *operator[](size_t index) const { + dataT* operator[](size_t index) const { return (*data)[index]; } template < access::mode Mode = accessMode, int D = dimensions, typename = std::enable_if_t<(Mode == access::mode::read) && (D == 2)>> - const dataT *operator[](size_t index) const { + const dataT* operator[](size_t index) const { return (*data)[index]; } @@ -151,7 +150,7 @@ class accessor { template < access::mode Mode = accessMode, int D = dimensions, typename = std::enable_if_t<(Mode == access::mode::read) && (D == 3)>> - const dataT **operator[](size_t index) const { + const dataT** operator[](size_t index) const { return (*data)[index]; } @@ -163,14 +162,14 @@ class accessor { template > - dataT *get_pointer() const { + dataT* get_pointer() const { return data.get(); } template > - void *get_pointer() const; + void* get_pointer() const; ~accessor() = default; @@ -183,9 +182,10 @@ class accessor { size_t x = this->accessRange.get(0); size_t y = this->accessRange.get(1); if (dimensions == 2) { - return x * index[0] + index[1]; - } else if (dimensions == 3) { - return x * index[0] + y * index[1] + index[2]; + return index[0] + x*index[1]; + } + else if (dimensions == 3) { + return index[0] + x * (index[1] + y * index[2]); } return index[0]; } diff --git a/include/neoSYCL/sycl/buffer.hpp b/include/neoSYCL/sycl/buffer.hpp index 5acba9d..99c3138 100644 --- a/include/neoSYCL/sycl/buffer.hpp +++ b/include/neoSYCL/sycl/buffer.hpp @@ -131,7 +131,7 @@ class buffer { accessor get_access(handler& commandGroupHandler) { push_context(commandGroupHandler.get_context(), mode); - commandGroupHandler.get_acc_().push_back(detail::accessor_info(data, mode)); + //commandGroupHandler.get_acc_().push_back(detail::accessor_info(data, mode)); return accessor(*this); } @@ -146,7 +146,7 @@ class buffer { get_access(handler& commandGroupHandler, range accessRange, id accessOffset = {}) { push_context(commandGroupHandler.get_context(), mode); - commandGroupHandler.get_acc_().push_back(detail::accessor_info(data, mode)); + //commandGroupHandler.get_acc_().push_back(detail::accessor_info(data, mode)); return accessor(*this, commandGroupHandler, accessRange, accessOffset); } @@ -190,8 +190,8 @@ class buffer { } private: - std::shared_ptr> data; range bufferRange; + std::shared_ptr> data; std::vector ctx_; void push_context(context c, access::mode m = access::mode::read) { diff --git a/include/neoSYCL/sycl/detail/container/data_container_nd.hpp b/include/neoSYCL/sycl/detail/container/data_container_nd.hpp index fbc6ab0..b710d0e 100644 --- a/include/neoSYCL/sycl/detail/container/data_container_nd.hpp +++ b/include/neoSYCL/sycl/detail/container/data_container_nd.hpp @@ -27,49 +27,49 @@ class DataContainerD : public DataContainer { ptr = allocate_ptr.get(); } - DataContainerD(T *data, ArrayND r) + DataContainerD(T* data, ArrayND r) : range(r), ptr(data), allocate_ptr(nullptr) {} - DataContainerD(T *data, ArrayND r, AllocatorT allocatorT) + DataContainerD(T* data, ArrayND r, AllocatorT allocatorT) : ptr(data), alloc(allocatorT), range(r), allocate_ptr(nullptr) {} size_t get_size() override { return sizeof(T) * range.get_liner(); } size_t get_count() override { return range.get_liner(); } - T *get_ptr() const { return ptr; } + T* get_ptr() const { return ptr; } - void *get_raw_ptr() override { return (void *)get_ptr(); } + void* get_raw_ptr() override { return (void*)get_ptr(); } - T *begin() const { return ptr; } + T* begin() const { return ptr; } - T *end() const { return ptr + range.get_liner(); } + T* end() const { return ptr + range.get_liner(); } - T &get(size_t x) const { return ptr[x]; } + T& get(size_t x) const { return ptr[x]; } AllocatorT get_allocator() { return alloc; } ArrayND get_range() const { return range; } - DataContainerD(const DataContainerD &rhs) + DataContainerD(const DataContainerD& rhs) : range(rhs.range), alloc(rhs.alloc) { allocate_ptr = shared_ptr_class(alloc.allocate(range.get_liner())); ptr = allocate_ptr.get(); memcpy(ptr, rhs.ptr, sizeof(T) * range.get_liner()); } - DataContainerD(DataContainerD &&rhs) + DataContainerD(DataContainerD&& rhs) : range(rhs.range), alloc(rhs.alloc), allocate_ptr(rhs.allocate_ptr), ptr(rhs.ptr) {} - DataContainerD &operator=(const DataContainerD &rhs) { + DataContainerD& operator=(const DataContainerD& rhs) { range = rhs.range; alloc = rhs.alloc; ptr = rhs.ptr; allocate_ptr = rhs.allocate_ptr; } - DataContainerD &operator=(DataContainerD &&rhs) { + DataContainerD& operator=(DataContainerD&& rhs) { range = rhs.range; alloc = rhs.alloc; ptr = rhs.ptr; @@ -77,9 +77,9 @@ class DataContainerD : public DataContainer { } private: - ArrayND range; AllocatorT alloc; - T *ptr; + ArrayND range; + T* ptr; shared_ptr_class allocate_ptr; }; @@ -91,48 +91,48 @@ template class DataContainerND : public DataContainerD { public: - DataContainerND(const ArrayND<1> &r) : DataContainerD(r) {} + DataContainerND(const ArrayND<1>& r) : DataContainerD(r) {} - DataContainerND(const ArrayND<1> &r, AllocatorT alloc) + DataContainerND(const ArrayND<1>& r, AllocatorT alloc) : DataContainerD(r, alloc) {} - DataContainerND(T *data, const ArrayND<1> &r) + DataContainerND(T* data, const ArrayND<1>& r) : DataContainerD(data, r) {} - DataContainerND(T *data, const ArrayND<1> &r, AllocatorT alloc) + DataContainerND(T* data, const ArrayND<1>& r, AllocatorT alloc) : DataContainerD(data, r, alloc) {} - DataContainerND(const DataContainerD &rhs) + DataContainerND(const DataContainerD& rhs) : DataContainerD(rhs) {} - DataContainerND(DataContainerD &&rhs) + DataContainerND(DataContainerD&& rhs) : DataContainerD(rhs) {} - T &operator[](size_t x) const { return this->get_ptr()[x]; } + T& operator[](size_t x) const { return this->get_ptr()[x]; } }; template class DataContainerND : public DataContainerD { public: - DataContainerND(const ArrayND<2> &r) : DataContainerD(r) {} + DataContainerND(const ArrayND<2>& r) : DataContainerD(r) {} - DataContainerND(const ArrayND<2> &r, AllocatorT alloc) + DataContainerND(const ArrayND<2>& r, AllocatorT alloc) : DataContainerD(r, alloc) {} - DataContainerND(T *data, const ArrayND<2> &r) + DataContainerND(T* data, const ArrayND<2>& r) : DataContainerD(data, r) {} - DataContainerND(T *data, const ArrayND<2> &r, AllocatorT alloc) + DataContainerND(T* data, const ArrayND<2>& r, AllocatorT alloc) : DataContainerD(data, r, alloc) {} - DataContainerND(const DataContainerD &rhs) + DataContainerND(const DataContainerD& rhs) : DataContainerD(rhs) {} - DataContainerND(DataContainerD &&rhs) + DataContainerND(DataContainerD&& rhs) : DataContainerD(rhs) {} - T *operator[](size_t i) const { + T* operator[](size_t i) const { size_t x = this->get_range()[0]; return this->get_ptr() + (x * i); } @@ -148,41 +148,41 @@ class DataContainerND template struct AccessProxyND {}; template struct AccessProxyND { - AccessProxyND(const ArrayND<3> &r, T *ptr) : range(r), base_ptr(ptr) {} + AccessProxyND(const ArrayND<3>& r, T* ptr) : range(r), base_ptr(ptr) {} - T *operator[](size_t i) const { + T* operator[](size_t i) const { size_t y = range[1]; return base_ptr + i * y; } ArrayND<3> range; - T *base_ptr; + T* base_ptr; }; template class DataContainerND : public DataContainerD { public: - DataContainerND(const ArrayND<3> &r) : DataContainerD(r) {} + DataContainerND(const ArrayND<3>& r) : DataContainerD(r) {} - DataContainerND(const ArrayND<3> &r, AllocatorT alloc) + DataContainerND(const ArrayND<3>& r, AllocatorT alloc) : DataContainerD(r, alloc) {} - DataContainerND(T *data, const ArrayND<3> &r) + DataContainerND(T* data, const ArrayND<3>& r) : DataContainerD(data, r) {} - DataContainerND(T *data, const ArrayND<3> &r, AllocatorT alloc) + DataContainerND(T* data, const ArrayND<3>& r, AllocatorT alloc) : DataContainerD(data, r, alloc) {} - DataContainerND(const DataContainerD &rhs) + DataContainerND(const DataContainerD& rhs) : DataContainerD(rhs) {} - DataContainerND(DataContainerD &&rhs) + DataContainerND(DataContainerD&& rhs) : DataContainerD(rhs) {} AccessProxyND operator[](size_t i) const { size_t x = this->get_range()[0]; - T *base_ptr = this->get_ptr() + i * x; + T* base_ptr = this->get_ptr() + i * x; return AccessProxyND(this->get_range(), base_ptr); } }; diff --git a/include/neoSYCL/sycl/detail/context_info.hpp b/include/neoSYCL/sycl/detail/context_info.hpp index a649952..7254401 100644 --- a/include/neoSYCL/sycl/detail/context_info.hpp +++ b/include/neoSYCL/sycl/detail/context_info.hpp @@ -4,6 +4,9 @@ #include namespace neosycl::sycl::detail { +const char* DEFAULT_LIB = "./kernel.so"; +const char* ENV_KERNEL = "NEOSYCL_KERNEL"; + inline string_class get_kernel_name_from_class(const std::type_info& ti) { // KernelName* p; int status; @@ -41,14 +44,13 @@ class context_info { task_handler->set_capture(k, p, sz); } - template - shared_ptr_class get_kernel() { + template shared_ptr_class get_kernel() { const std::type_info& tinfo = typeid(KernelName*); - if(kernels_.count(tinfo.hash_code())) + if (kernels_.count(tinfo.hash_code())) return kernels_.at(tinfo.hash_code()); - string_class name = get_kernel_name_from_class(tinfo); + string_class name = get_kernel_name_from_class(tinfo); DEBUG_INFO("kernel class: %s", name.c_str()); kernel* k = task_handler->create_kernel(name.c_str()); shared_ptr_class p(std::move(k)); @@ -61,10 +63,20 @@ class context_info { }; class cpu_context_info : public context_info { + void* dll_; + public: cpu_context_info() : context_info() { - task_handler = handler_type(new task_handler_cpu()); + const char* env = getenv(ENV_KERNEL); + string_class fn(env ? env : DEFAULT_LIB); + dll_ = dlopen(fn.c_str(), RTLD_LAZY); + if (!dll_) { + PRINT_ERR("dlopen failed: %s", dlerror()); + throw exception("cpu_context_info() failed"); + } + DEBUG_INFO("kernel lib loaded: %lx, %s", (size_t)dll_, fn.c_str()); + task_handler = handler_type(new task_handler_cpu(dll_)); } - ~cpu_context_info() = default; + ~cpu_context_info() { dlclose(dll_); } }; } // namespace neosycl::sycl::detail diff --git a/include/neoSYCL/sycl/detail/kernel_info.hpp b/include/neoSYCL/sycl/detail/kernel_info.hpp index 2746bc5..21a9e9b 100644 --- a/include/neoSYCL/sycl/detail/kernel_info.hpp +++ b/include/neoSYCL/sycl/detail/kernel_info.hpp @@ -3,6 +3,7 @@ #include #include "neoSYCL/sycl/detail/accessor_info.hpp" +#include namespace neosycl::sycl::detail { @@ -15,7 +16,15 @@ struct kernel_info { }; struct kernel_info_cpu : public kernel_info { - kernel_info_cpu(const char* c) : kernel_info(c) {} + int (*func_)(); + void* capt_; + void* rnge_; + + kernel_info_cpu(const char* c) : kernel_info(c) { + func_ = nullptr; + capt_ = nullptr; + rnge_ = nullptr; + } }; } // namespace neosycl::sycl::detail diff --git a/include/neoSYCL/sycl/detail/task_handler.hpp b/include/neoSYCL/sycl/detail/task_handler.hpp index b9e08b6..76dfb4d 100644 --- a/include/neoSYCL/sycl/detail/task_handler.hpp +++ b/include/neoSYCL/sycl/detail/task_handler.hpp @@ -2,71 +2,62 @@ #define NEOSYCL_INCLUDE_NEOSYCL_SYCL_TASK_HANDLER_HPP #include "neoSYCL/sycl/kernel.hpp" +#include namespace neosycl::sycl::detail { - class task_handler { - protected: - using container_type = shared_ptr_class; - using kernel_info_type = kernel::info_type; +class task_handler { +protected: + using container_type = shared_ptr_class; + using kernel_info_type = kernel::info_type; - public: - explicit task_handler() {} +public: + explicit task_handler() {} - virtual void single_task(shared_ptr_class k, - const std::function& func) = 0; + virtual void run(shared_ptr_class k) = 0; - virtual void parallel_for_1d(shared_ptr_class k, range<1> r, - const std::function)>& func, - id<1> offset) = 0; - - virtual void parallel_for_2d(shared_ptr_class k, range<2> r, - const std::function)>& func, - id<2> offset) = 0; - - virtual void parallel_for_3d(shared_ptr_class k, range<3> r, - const std::function)>& func, - id<3> offset) = 0; + virtual SUPPORT_PLATFORM_TYPE type() = 0; - virtual SUPPORT_PLATFORM_TYPE type() = 0; + virtual void* get_pointer(container_type) = 0; + virtual void* alloc_mem(container_type, + access::mode = access::mode::read) = 0; + virtual void free_mem(container_type) = 0; + virtual void copy_back() = 0; - virtual void* get_pointer(container_type) = 0; - virtual void* alloc_mem(container_type, - access::mode = access::mode::read) = 0; - virtual void free_mem(container_type) = 0; - virtual void copy_back() = 0; + virtual void set_capture(shared_ptr_class, void* p, size_t sz) = 0; + virtual void set_range(shared_ptr_class, size_t r[6]) = 0; - virtual void set_capture(shared_ptr_class, void* p, size_t sz) {} - virtual void set_range(shared_ptr_class, size_t r[6]) {} - - template - void set_range(shared_ptr_class k, range r) { - size_t sz[6] = {1, 1, 1, 0, 0, 0}; - for (size_t idx(0); idx != dim; idx++) { - sz[idx] = r[idx]; - } - set_range(k, sz); + template + void set_range(shared_ptr_class k, range r) { + size_t sz[6] = {1, 1, 1, 0, 0, 0}; + for (size_t idx(0); idx != dim; idx++) { + sz[idx] = r[idx]; } - - template - void set_range(shared_ptr_class k, range r, id i) { - size_t sz[6] = {1, 1, 1, 0, 0, 0}; - for (size_t idx(0); idx != dim; idx++) { - sz[idx] = r[idx]; - } - for (size_t idx(3); idx != dim + 3; idx++) { - sz[idx] = i[idx]; - } - set_range(k, sz); + set_range(k, sz); + } + + template + void set_range(shared_ptr_class k, range r, id i) { + size_t sz[6] = {1, 1, 1, 0, 0, 0}; + for (size_t idx(0); idx != dim; idx++) { + sz[idx] = r[idx]; + } + for (size_t idx(3); idx != dim + 3; idx++) { + sz[idx] = i[idx]; } + set_range(k, sz); + } + + virtual kernel* create_kernel(const char*) = 0; +}; - virtual kernel* create_kernel(const char*) = 0; - }; +class task_handler_cpu : public task_handler { + void* dll_; - class task_handler_cpu : public task_handler { - public: - task_handler_cpu() {} +public: + task_handler_cpu(void* p) : dll_(p) {} +#if 0 void single_task(shared_ptr_class k, const std::function& func) override { for (const accessor_info& arg : k->get_acc()) { @@ -129,21 +120,74 @@ namespace neosycl::sycl::detail { arg.release_access(); } }; - - SUPPORT_PLATFORM_TYPE type() override { return CPU; } - - void* get_pointer(container_type p) override { return p->get_raw_ptr(); } - void* alloc_mem(container_type p, access::mode = access::mode::read) override { - return nullptr; - //return p->get_raw_ptr(); +#endif + + virtual void run(shared_ptr_class k) { + kernel::info_type ki = k->get_kernel_info(); + shared_ptr_class kic = + std::dynamic_pointer_cast(ki); + if (kic == nullptr) { + PRINT_ERR("invalid kernel_info: %lx", (size_t)ki.get()); + throw exception("set_capture() failed"); } - void free_mem(container_type) override {} - void copy_back() override {} - kernel* create_kernel(const char* s) override { - return new kernel(new detail::kernel_info_cpu(s)); + for (const detail::accessor_info& acc : k->get_acc()) { + acc.acquire_access(); + alloc_mem(acc.container, acc.mode); + } + DEBUG_INFO("-- KENREL EXEC BEGIN --"); + try { + int ret_val = kic->func_(); + DEBUG_INFO("-- KERNEL EXEC END (ret=%d) --", ret_val); + // copy_out(ve_addr_list, k, proc); + } + catch (exception& e) { + PRINT_ERR("kernel execution failed: %s", e.what()); + } + for (const detail::accessor_info& acc : k->get_acc()) { + acc.release_access(); + } + } + + SUPPORT_PLATFORM_TYPE type() override { return CPU; } + + void* get_pointer(container_type p) override { return p->get_raw_ptr(); } + void* alloc_mem(container_type p, + access::mode = access::mode::read) override { + return nullptr; + // return p->get_raw_ptr(); + } + void free_mem(container_type) override {} + void copy_back() override {} + + void set_capture(shared_ptr_class k, void* p, size_t sz) { + auto ki = std::dynamic_pointer_cast( + k->get_kernel_info()); + if (ki && ki->capt_) + std::memcpy(ki->capt_, p, sz); + } + + void set_range(shared_ptr_class k, size_t r[6]) { + auto ki = std::dynamic_pointer_cast( + k->get_kernel_info()); + if (ki && ki->rnge_) + std::memcpy(ki->rnge_, r, sizeof(size_t) * 6); + } + + kernel* create_kernel(const char* s) override { + auto inf = new detail::kernel_info_cpu(s); + inf->func_ = reinterpret_cast(dlsym(dll_, s)); + string_class capt = string_class("__") + s + "_obj__"; + string_class rnge = string_class("__") + s + "_range__"; + inf->capt_ = dlsym(dll_, capt.c_str()); + inf->rnge_ = dlsym(dll_, rnge.c_str()); + if (!inf->func_) { + PRINT_ERR("dlsym() failed: %s", s); + throw exception("create_kernel() failed"); } - }; + return new kernel(inf); + } +}; } // namespace neosycl::sycl::detail diff --git a/include/neoSYCL/sycl/handler.hpp b/include/neoSYCL/sycl/handler.hpp index 5c2849b..75c15e7 100644 --- a/include/neoSYCL/sycl/handler.hpp +++ b/include/neoSYCL/sycl/handler.hpp @@ -29,77 +29,77 @@ class handler { : bind_device_(std::move(dev)), counter_(std::move(counter)), kernel_(nullptr), ctx_(c) {} - template void copy_capture(KernelName* p) { + template + void run(range r, KernelType kernelFunc) { detail::context_info* cinfo = ctx_.get_context_info(); + handler_type task_handler = cinfo->task_handler; + kernel_ = cinfo->get_kernel(); + kernel_->get_acc().clear(); + task_handler->set_range(kernel_, r); - cinfo->set_capture(kernel_, p, sizeof(KernelName)); + kernelFunc(); + + submit_task([h = task_handler, k = kernel_]() { h->run(k); }); } template - void set_kernel(KernelType kernelFunc) { + void run(KernelType kernelFunc) { detail::context_info* cinfo = ctx_.get_context_info(); + handler_type task_handler = cinfo->task_handler; + kernel_ = cinfo->get_kernel(); + kernel_->get_acc().clear(); - kernel_ = cinfo->get_kernel(); - // kernel_->name = detail::get_kernel_name_from_class(); - if (bind_device_.type() != detail::VE) - return; kernelFunc(); + + submit_task([h = task_handler, k = kernel_]() { h->run(k); }); + } + + template void copy_capture(KernelName* p) { + detail::context_info* cinfo = ctx_.get_context_info(); + + cinfo->set_capture(kernel_, p, sizeof(KernelName)); } template void single_task(KernelType kernelFunc) { + PRINT_ERR("This code is not converted yet.") + abort(); +#if 0 handler_type task_handler = ctx_.get_context_info()->task_handler; submit_task([f = kernelFunc, h = task_handler, k = kernel_]() { h->single_task(k, f); }); - } - - template - void submit_parallel_for(handler_type handler, range<3> numWorkItems, - id<3> offset, KernelType kernelFunc) { - submit_task([f = kernelFunc, n = numWorkItems, o = offset, - h = std::move(handler), - k = kernel_]() { h->parallel_for_3d(k, n, f, o); }); - } - - template - void submit_parallel_for(handler_type handler, range<2> numWorkItems, - id<2> offset, KernelType kernelFunc) { - submit_task([f = kernelFunc, n = numWorkItems, o = offset, - h = std::move(handler), - k = kernel_]() { h->parallel_for_2d(k, n, f, o); }); - } - - template - void submit_parallel_for(handler_type handler, range<1> numWorkItems, - id<1> offset, KernelType kernelFunc) { - submit_task([f = kernelFunc, n = numWorkItems, o = offset, - h = std::move(handler), - k = kernel_]() { h->parallel_for_1d(k, n, f, o); }); +#endif } template void parallel_for(range numWorkItems, KernelType kernelFunc) { + PRINT_ERR("This code is not converted yet.") + abort(); +#if 0 handler_type task_handler = ctx_.get_context_info()->task_handler; task_handler->set_range(kernel_, numWorkItems); submit_parallel_for(task_handler, numWorkItems, id(), kernelFunc); +#endif } template void parallel_for(range numWorkItems, id workItemOffset, KernelType kernelFunc) { + PRINT_ERR("This code is not converted yet.") + abort(); +#if 0 handler_type task_handler = ctx_.get_context_info()->task_handler; task_handler->set_range(kernel_, numWorkItems, workItemOffset); -#if 0 string_class range_obj = "__" + kernel_->get_name() + "_range__"; task_handler->set_range(range_obj.c_str(), numWorkItems); string_class offset_obj = "__" + kernel_->get_name() + "_offset__"; task_handler->set_range(offset_obj.c_str(), workItemOffset); -#endif submit_parallel_for(task_handler, numWorkItems, workItemOffset, kernelFunc); +#endif } // template @@ -136,10 +136,22 @@ class handler { accessor_list& get_acc_() { return acc_; } + template + neosycl::sycl::rt::acc_ map_(sycl::accessor acc) { + size_t sz[6] = {1, 1, 1, 0, 0, 0}; + + kernel_->get_acc().push_back(detail::accessor_info(acc.data, m)); + std::memcpy(sz + 0, &acc.get_range()[0], sizeof(size_t) * D); + std::memcpy(sz + 3, &acc.get_offset()[0], sizeof(size_t) * D); + return neosycl::sycl::rt::acc_{ + get_pointer(acc), {sz[0], sz[1], sz[2], sz[3], sz[4], sz[5]}}; + } + private: - kernel_type kernel_; device bind_device_; counter_type counter_; + kernel_type kernel_; context ctx_; accessor_list acc_; diff --git a/include/neoSYCL/sycl/kernel.hpp b/include/neoSYCL/sycl/kernel.hpp index d1bf88b..40b5374 100644 --- a/include/neoSYCL/sycl/kernel.hpp +++ b/include/neoSYCL/sycl/kernel.hpp @@ -20,8 +20,8 @@ class kernel { const char* get_name() const { return info_->name.c_str(); } private: - info_type info_; accessor_list acc_; + info_type info_; }; } // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/queue.hpp b/include/neoSYCL/sycl/queue.hpp index 0e081ca..3ec7f61 100644 --- a/include/neoSYCL/sycl/queue.hpp +++ b/include/neoSYCL/sycl/queue.hpp @@ -95,8 +95,8 @@ class queue { private: device bind_device; - async_handler err_handler; shared_ptr_class counter; + async_handler err_handler; context ctx; }; diff --git a/include/neoSYCL/sycl/runtime/acc.hpp b/include/neoSYCL/sycl/runtime/acc.hpp new file mode 100644 index 0000000..e572b87 --- /dev/null +++ b/include/neoSYCL/sycl/runtime/acc.hpp @@ -0,0 +1,18 @@ +#pragma once +#include + +namespace neosycl::sycl::rt { + +template struct acc_ { + T* p_; + size_t r_[6]; + + inline T& operator[](size_t i) { return p_[i]; } + inline T& operator[](neosycl::sycl::id<3>& i) { + return p_[(i[2] * r_[1] + i[1]) * r_[0] + i[0]]; + } + inline T& operator[](neosycl::sycl::id<2>& i) { return p_[i[1] * r_[0] + i[0]]; } + inline T& operator[](neosycl::sycl::id<1>& i) { return p_[i[0]]; } +}; + +} // namespace neosycl::sycl::rt diff --git a/kernel_generator/CMakeLists.txt b/kernel_generator/CMakeLists.txt deleted file mode 100644 index f6024d7..0000000 --- a/kernel_generator/CMakeLists.txt +++ /dev/null @@ -1,38 +0,0 @@ - -# find Clang -find_package(Clang REQUIRED CONFIG) -include_directories(${CLANG_INCLUDE_DIRS}) -add_definitions(${CLANG_DEFINITIONS}) -message(STATUS "Clang_FOUND ${Clang_FOUND}") -message(STATUS "Clang_DIR ${Clang_DIR}") -message(STATUS "Using ClangConfig.cmake in: ${Clang_DIR}") - -# find LLVM -find_package(LLVM REQUIRED CONFIG) - -message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") -message(STATUS "LLVM_FOUND ${LLVM_FOUND}") -message(STATUS "LLVM_DIR ${LLVM_DIR}") -message(STATUS "LLVM_INCLUDE_DIRS: ${LLVM_INCLUDE_DIRS}") -message(STATUS "LLVM_DEFINITIONS: ${LLVM_DEFINITIONS}") -message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") - -add_definitions(${LLVM_DEFINITIONS}) -include_directories(${LLVM_INCLUDE_DIRS}) -list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}") - -add_subdirectory(third_party) - -# include all headers here -include_directories( - include - third_party/fmt/include -) - - -if (CMAKE_BUILD_TYPE STREQUAL Debug) - ADD_DEFINITIONS(-DDEBUG) -endif () - -add_subdirectory(src) - diff --git a/kernel_generator/include/exceptions.h b/kernel_generator/include/exceptions.h deleted file mode 100644 index 282c645..0000000 --- a/kernel_generator/include/exceptions.h +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef CUSTOM_SYCL_INCLUDE_EXCEPTIONS_H_ -#define CUSTOM_SYCL_INCLUDE_EXCEPTIONS_H_ - -namespace sycl { - -class BaseException : public std::exception { - private: - std::string message; - - public: - BaseException(const std::string &message) : message(message) {} - - const char *what() const noexcept override { - return this->message.c_str(); - } -}; - -class ClangCastException : public BaseException { - public: - ClangCastException(const std::string &message) : BaseException(message) {} -}; - -class KernelValidateException : public BaseException { - public: - KernelValidateException(const std::string &message) : BaseException(message) {} -}; - -} - -#endif //CUSTOM_SYCL_INCLUDE_EXCEPTIONS_H_ diff --git a/kernel_generator/include/helpers.h b/kernel_generator/include/helpers.h deleted file mode 100644 index 364c663..0000000 --- a/kernel_generator/include/helpers.h +++ /dev/null @@ -1,59 +0,0 @@ -#ifndef CUSTOM_SYCL_INCLUDE_HELPERS_H_ -#define CUSTOM_SYCL_INCLUDE_HELPERS_H_ - -#include - -#include "clang/AST/AST.h" -#include "clang/AST/ASTConsumer.h" -#include "clang/AST/ASTContext.h" -#include "clang/AST/ASTImporter.h" -#include "clang/AST/RecursiveASTVisitor.h" -#include "clang/Frontend/FrontendAction.h" -#include "clang/Frontend/ASTConsumers.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Lex/Preprocessor.h" -#include "clang/Rewrite/Core/Rewriter.h" -#include "clang/Tooling/CommonOptionsParser.h" -#include "clang/Tooling/Refactoring/Extract/Extract.h" -#include "clang/Tooling/Tooling.h" - -#include "exceptions.h" - -#include -#include - -#ifdef DEBUG -#define PRINT_DEBUG(...) std::cout<< "[DEBUG] "<< fmt::format(__VA_ARGS__) < -std::string decl2str(CompilerInstance &ci, T *d) { - SourceManager &sm = ci.getSourceManager(); - SourceLocation start(d->getBeginLoc()), end(d->getEndLoc()); - SourceLocation e(Lexer::getLocForEndOfToken(end, 0, sm, ci.getLangOpts())); - return std::string(sm.getCharacterData(start), sm.getCharacterData(e) - sm.getCharacterData(start)); -} - -template -T *clang_cast(N any) { - if (isa(any)) { - return cast(any); - } else { - any->dump(); - throw ClangCastException("Clang node cast failed"); - } -} - -} - -#endif //CUSTOM_SYCL_INCLUDE_HELPERS_H_ diff --git a/kernel_generator/include/kernel.h b/kernel_generator/include/kernel.h deleted file mode 100644 index c86912f..0000000 --- a/kernel_generator/include/kernel.h +++ /dev/null @@ -1,72 +0,0 @@ -// -// Created by WhiteBlue on 2020/5/20. -// - -#ifndef CUSTOM_SYCL_RUNTIME_KERNEL_HPP_ -#define CUSTOM_SYCL_RUNTIME_KERNEL_HPP_ - -#include -#include -#include -#include - -#include "clang/AST/AST.h" -#include "clang/AST/ASTConsumer.h" -#include "clang/AST/ASTContext.h" -#include "clang/AST/ASTImporter.h" -#include "clang/AST/RecursiveASTVisitor.h" -#include "clang/Frontend/FrontendAction.h" -#include "clang/Frontend/ASTConsumers.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Lex/Preprocessor.h" -#include "clang/Rewrite/Core/Rewriter.h" -#include "clang/Tooling/CommonOptionsParser.h" -#include "clang/Tooling/Refactoring/Extract/Extract.h" -#include "clang/Tooling/Tooling.h" - -#include "helpers.h" - -#include "fmt/format.h" - -using namespace clang; - -namespace sycl { - -struct KernelArgument { - std::string name; - std::string type; - int dimensions; -}; - -struct KernelInfo { - std::vector params; - std::string kernel_name; - std::string kernel_body; - std::string index_name; - bool parallel; - - KernelInfo(std::vector params, std::string kernel_name, - std::string kernel_body) - : params(std::move(params)), kernel_name(std::move(kernel_name)), - kernel_body(std::move(kernel_body)), parallel(false) {} - - KernelInfo(std::vector params, std::string kernel_name, - std::string kernel_body, std::string index_name) - : params(std::move(params)), kernel_name(std::move(kernel_name)), - kernel_body(std::move(kernel_body)), index_name(std::move(index_name)), - parallel(true) {} -}; - -struct ProgramContext { - std::map kernels; - std::map structs; -}; - -std::vector -analyze_arguments_dependency(CompilerInstance &ci, - const CXXRecordDecl *lambda_func_decl, - ProgramContext &context); - -} // namespace sycl - -#endif // CUSTOM_SYCL_RUNTIME_KERNEL_HPP_ diff --git a/kernel_generator/include/kernel_translator.h b/kernel_generator/include/kernel_translator.h deleted file mode 100644 index 2b480a4..0000000 --- a/kernel_generator/include/kernel_translator.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef CUSTOM_SYCL_INCLUDE_KERNEL_TRANSLATOR_H_ -#define CUSTOM_SYCL_INCLUDE_KERNEL_TRANSLATOR_H_ - -#include "kernel.h" -#include "fmt/format.h" - -namespace sycl { - -class KernelTranslator { - - public: - virtual std::string body_to_decl_str(const ProgramContext &context, const KernelInfo &info) = 0; - - virtual std::string before_kernel(const ProgramContext &context) = 0; - - virtual std::string after_kernel(const ProgramContext &context) = 0; - -}; - -}; - -#endif //CUSTOM_SYCL_INCLUDE_KERNEL_TRANSLATOR_H_ diff --git a/kernel_generator/include/parallel_task.h b/kernel_generator/include/parallel_task.h deleted file mode 100644 index 1840de4..0000000 --- a/kernel_generator/include/parallel_task.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef CUSTOM_SYCL_INCLUDE_PARALLEL_TASK_H_ -#define CUSTOM_SYCL_INCLUDE_PARALLEL_TASK_H_ - -#include "clang/AST/AST.h" -#include "clang/AST/Mangle.h" -#include "clang/AST/ASTConsumer.h" -#include "clang/AST/ASTContext.h" -#include "clang/AST/ASTImporter.h" -#include "clang/AST/RecursiveASTVisitor.h" -#include "clang/ASTMatchers/ASTMatchFinder.h" -#include "clang/ASTMatchers/ASTMatchers.h" -#include "clang/Frontend/FrontendAction.h" -#include "clang/Frontend/ASTConsumers.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Frontend/FrontendActions.h" -#include "clang/Lex/Preprocessor.h" - -#include "exceptions.h" -#include "kernel.h" - -namespace sycl { - -KernelInfo parse_parallel_task_func(CompilerInstance &ci, const FunctionDecl *callee, ProgramContext &context); - -} - -#endif //CUSTOM_SYCL_INCLUDE_PARALLEL_TASK_H_ diff --git a/kernel_generator/include/single_task.h b/kernel_generator/include/single_task.h deleted file mode 100644 index 74f1b44..0000000 --- a/kernel_generator/include/single_task.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef CUSTOM_SYCL_INCLUDE_SINGLE_TASK_H_ -#define CUSTOM_SYCL_INCLUDE_SINGLE_TASK_H_ - -#include "clang/AST/AST.h" -#include "clang/AST/Mangle.h" -#include "clang/AST/ASTConsumer.h" -#include "clang/AST/ASTContext.h" -#include "clang/AST/ASTImporter.h" -#include "clang/AST/RecursiveASTVisitor.h" -#include "clang/ASTMatchers/ASTMatchFinder.h" -#include "clang/ASTMatchers/ASTMatchers.h" -#include "clang/Frontend/FrontendAction.h" -#include "clang/Frontend/ASTConsumers.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Frontend/FrontendActions.h" -#include "clang/Lex/Preprocessor.h" -#include "clang/Rewrite/Core/Rewriter.h" -#include "clang/Rewrite/Frontend/FrontendActions.h" -#include "clang/Tooling/CommonOptionsParser.h" -#include "clang/Tooling/Refactoring/Extract/Extract.h" -#include "clang/Tooling/Tooling.h" -#include "clang/Frontend/FrontendPluginRegistry.h" - -#include "exceptions.h" -#include "helpers.h" -#include "kernel.h" - -using namespace clang; - -namespace sycl { - -KernelInfo parse_single_task_func(CompilerInstance &ci, const FunctionDecl *callee, ProgramContext &context); - -} - -#endif //CUSTOM_SYCL_INCLUDE_SINGLE_TASK_H_ diff --git a/kernel_generator/include/ve_kernel_translator.h b/kernel_generator/include/ve_kernel_translator.h deleted file mode 100644 index 1ee2f22..0000000 --- a/kernel_generator/include/ve_kernel_translator.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef CUSTOM_SYCL_INCLUDE_VE_KERNEL_TRANSLATOR_H_ -#define CUSTOM_SYCL_INCLUDE_VE_KERNEL_TRANSLATOR_H_ - -#include "kernel_translator.h" - -namespace sycl { - -class VEKernelTranslator : public KernelTranslator { - public: - std::string body_to_decl_str(const ProgramContext &context, const KernelInfo &info) override; - - std::string before_kernel(const ProgramContext &context) override; - - std::string after_kernel(const ProgramContext &context) override; - -}; - -} - -#endif //CUSTOM_SYCL_INCLUDE_VE_KERNEL_TRANSLATOR_H_ diff --git a/kernel_generator/src/CMakeLists.txt b/kernel_generator/src/CMakeLists.txt deleted file mode 100644 index 05e85f0..0000000 --- a/kernel_generator/src/CMakeLists.txt +++ /dev/null @@ -1,41 +0,0 @@ - -# add executable file -add_executable( - clang_tool - - single_task.cpp - parallel_task.cpp - kernel.cpp - clang_tool.cpp - ve_kernel_translator.cpp -) - -# link clang libraries -target_link_libraries(clang_tool - - clangFrontend - clangSerialization - clangDriver - clangParse - clangSema - clangAnalysis - clangAST - clangBasic - clangEdit - clangLex - clangTooling - clangASTMatchers - clangAnalysis - clangEdit - clangAST - clangLex - clangRewrite - clangToolingCore - - ${llvm_libs} - - fmt - ) - - - diff --git a/kernel_generator/src/clang_tool.cpp b/kernel_generator/src/clang_tool.cpp deleted file mode 100644 index 56b6b3a..0000000 --- a/kernel_generator/src/clang_tool.cpp +++ /dev/null @@ -1,185 +0,0 @@ -#include -#include -#include - -#include "clang/AST/AST.h" -#include "clang/AST/Mangle.h" -#include "clang/AST/ASTConsumer.h" -#include "clang/AST/ASTContext.h" -#include "clang/AST/RecursiveASTVisitor.h" -#include "clang/Frontend/FrontendAction.h" -#include "clang/Frontend/ASTConsumers.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Lex/Preprocessor.h" -#include "clang/Rewrite/Core/Rewriter.h" -#include "clang/Tooling/CommonOptionsParser.h" -#include "clang/Tooling/Refactoring/Extract/Extract.h" -#include "clang/Tooling/Tooling.h" -#include "fmt/format.h" - -#include "kernel.h" -#include "parallel_task.h" -#include "single_task.h" -#include "ve_kernel_translator.h" - -static llvm::cl::OptionCategory MyToolCategory("Additional options"); -static llvm::cl::opt - OutputNameOption("o", llvm::cl::desc("output filename"), - llvm::cl::value_desc("filename"), - llvm::cl::cat(MyToolCategory)); - -static llvm::cl::extrahelp - CommonHelp(clang::tooling::CommonOptionsParser::HelpMessage); -static llvm::cl::extrahelp MoreHelp("SYCL Kernel generate tool"); - -const static std::string KERNEL_HIGHLIGHT_SINGLE_TASK_FUNC_NAME = -"single_task"; - // "HIGHLIGHT_KERNEL_SINGLE_TASK"; -const static std::string KERNEL_HIGHLIGHT_PARALLEL_FUNC_NAME = -"parallel_for"; - //"HIGHLIGHT_KERNEL_PARALLEL"; -const static std::string DEFAULT_OUTPUT_NAME = "kernel"; - -using namespace sycl; - -class SYCLVisitor : public clang::RecursiveASTVisitor { -private: - clang::Rewriter &rewriter; - clang::SourceManager &manager; - clang::CompilerInstance &instance; - ProgramContext context; - -public: - SYCLVisitor(clang::CompilerInstance &ci, clang::SourceManager &sm, - clang::Rewriter &re) - : instance(ci), rewriter(re), manager(sm) {} - - virtual bool shouldVisitTemplateInstantiations() { return true; } - - bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) { - // For debugging, dumping the AST nodes will show which nodes are already - // being visited. - Declaration->getDeclName().getUsingDirectiveName(); - - // The return value indicates whether we want the visitation to proceed. - // Return false to stop the traversal of the AST. - return true; - } - - bool VisitStmt(clang::Stmt *s) { - try { - - if (clang::isa(s)) { - clang::CallExpr *call_expr = clang::cast(s); - clang::FunctionDecl *callee = call_expr->getDirectCallee(); - if (callee && callee->getIdentifier()) { - // Get the func which name start with SYCL_PREFIX - if (callee->getName().compare( - KERNEL_HIGHLIGHT_SINGLE_TASK_FUNC_NAME) == 0) { - KernelInfo info = - parse_single_task_func(this->instance, callee, context); - if (context.kernels.count(info.kernel_name) == 0) { - context.kernels.insert( - std::pair(info.kernel_name, info)); - } - } else if (callee->getName().compare( - KERNEL_HIGHLIGHT_PARALLEL_FUNC_NAME) == 0) { - KernelInfo info = - parse_parallel_task_func(this->instance, callee, context); - if (context.kernels.count(info.kernel_name) == 0) { - context.kernels.insert( - std::pair(info.kernel_name, info)); - } - } - } - } - } catch (BaseException e) { - llvm::errs() << e.what() << "\n"; - return true; - } - return true; - } - - ProgramContext get_context() { return context; } -}; - -class SYCLASTConsumer : public clang::ASTConsumer { -private: - std::unique_ptr visitor; - clang::Rewriter rewriter; - clang::SourceManager &manager; - clang::CompilerInstance &instance; - VEKernelTranslator translator; - -public: - explicit SYCLASTConsumer(clang::CompilerInstance &ci) - : instance(ci), manager(ci.getSourceManager()), - visitor(std::make_unique(ci, ci.getSourceManager(), - this->rewriter)) { - this->rewriter.setSourceMgr(ci.getSourceManager(), ci.getLangOpts()); - } - // Retrieve the AST analysis result - virtual void HandleTranslationUnit(clang::ASTContext &ctx) { - visitor->TraverseAST(ctx); - - std::string file_name = DEFAULT_OUTPUT_NAME; - if (OutputNameOption.size() == 1) { - file_name = OutputNameOption.c_str(); - } - - ProgramContext program_context = visitor->get_context(); - - int kernel_count = program_context.kernels.size(); - PRINT_INFO("Found {} kernels", kernel_count); - if (kernel_count == 0) { - return; - } - - // create files - std::ofstream kernel_out; - kernel_out.open(file_name + ".c", std::ios::out); - - // Write include headers here - std::string kernel_code; - kernel_code.append(translator.before_kernel(program_context)) - .append(LINE_BREAK); - - // Output all kernels - auto kernels = program_context.kernels; - for (auto &kernel : kernels) { - std::string kernel_str = - translator.body_to_decl_str(program_context, kernel.second); - kernel_code.append(kernel_str).append(LINE_BREAK); - } - kernel_code.append(translator.after_kernel(program_context)) - .append(LINE_BREAK); - - // write kernel code - kernel_out << kernel_code << std::endl; - - kernel_out.close(); - } -}; - -class SYCLFrontendAction : public clang::PluginASTAction { -public: - virtual std::unique_ptr - CreateASTConsumer(clang::CompilerInstance &ci, llvm::StringRef file) { - return std::make_unique(ci); - } - - bool ParseArgs(const clang::CompilerInstance &ci, - const std::vector &args) { - return true; - } -}; - -int main(int argc, const char **argv) { - llvm::Expected op = - clang::tooling::CommonOptionsParser::create(argc, argv, MyToolCategory, - llvm::cl::OneOrMore); - clang::tooling::ClangTool tool(op->getCompilations(), - op->getSourcePathList()); - return tool.run( - clang::tooling::newFrontendActionFactory().get()); -} \ No newline at end of file diff --git a/kernel_generator/src/kernel.cpp b/kernel_generator/src/kernel.cpp deleted file mode 100644 index f2c59d1..0000000 --- a/kernel_generator/src/kernel.cpp +++ /dev/null @@ -1,74 +0,0 @@ -#include "kernel.h" - -namespace sycl { - -std::vector -analyze_arguments_dependency(CompilerInstance &ci, - const CXXRecordDecl *lambda_func_decl, - ProgramContext &context) { - std::vector args; - - // we decide the argument order by parent context - for (Decl *d : lambda_func_decl->getParent()->decls()) { - if (isa(d)) { - VarDecl *var = cast(d); - CXXRecordDecl *raw_decl = var->getType()->getAsCXXRecordDecl(); - if (!raw_decl) { - continue; - } - std::string name = var->getIdentifier()->getName().str(); - - ClassTemplateSpecializationDecl *template_decl = - clang_cast(raw_decl); - auto template_args = template_decl->getTemplateArgs().asArray(); - - if (template_args.size() < 4) { - throw KernelValidateException("Accessor should have 4 template args"); - } - - TemplateArgument accessor_type_tmp = template_args[0]; - TemplateArgument dimensions_tmp = template_args[1]; - TemplateArgument mode_tmp = template_args[2]; - TemplateArgument target_tmp = template_args[3]; - - QualType accessor_type = accessor_type_tmp.getAsType(); - int field_dimensions = dimensions_tmp.getAsIntegral().getExtValue(); - std::string field_type = accessor_type.getAsString(); - - if (!accessor_type->isBuiltinType()) { - // not builtin type - CXXRecordDecl *type_decl = accessor_type->getAsCXXRecordDecl(); - if (type_decl) { - if (type_decl->getIdentifier()) { - std::string def_name = type_decl->getIdentifier()->getName().str(); - std::string def_body = decl2str(ci, type_decl); - - if (context.structs.count(def_name) == 0) { - context.structs.insert( - std::pair(def_name, def_body)); - } - -#ifdef DEBUG - std::cout << "Definition name: " << def_name << std::endl; - std::cout << "========= Definition body start =========" - << std::endl; - std::cout << def_body << std::endl; - std::cout << "========= Definition body end =========" - << std::endl; -#endif - } - } - } - - args.push_back(KernelArgument{name, field_type, field_dimensions}); - - PRINT_INFO( - "Found a lambda field decl, Type: {}, Name: {}, Dimensions: {}", - field_type, name, field_dimensions); - } - } - - return args; -}; - -} // namespace sycl diff --git a/kernel_generator/src/parallel_task.cpp b/kernel_generator/src/parallel_task.cpp deleted file mode 100644 index b3f9f75..0000000 --- a/kernel_generator/src/parallel_task.cpp +++ /dev/null @@ -1,81 +0,0 @@ -#include "parallel_task.h" -#include - -namespace sycl { - -KernelInfo parse_parallel_task_func(CompilerInstance &ci, - const FunctionDecl *callee, - ProgramContext &context) { - if (callee->getNumParams() != 2) { - throw KernelValidateException("Parallel kernel must have 2 param"); - } - const TemplateArgumentList *template_args = - callee->getTemplateSpecializationArgs(); - if (template_args == nullptr || template_args->size() != 3) { - throw KernelValidateException("Parallel kernel must have 3 template args"); - } - - const TemplateArgument &classname_arg = template_args->get(0); - const TemplateArgument &lambda_func_arg = template_args->get(1); - - if (classname_arg.getKind() != TemplateArgument::ArgKind::Type) { - throw KernelValidateException("Template 'ArgKind' must be 'Type'"); - } - - if (lambda_func_arg.getKind() != TemplateArgument::ArgKind::Type) { - throw KernelValidateException("Template 'ArgKind' must be 'Type'"); - } - - QualType classname_type = classname_arg.getAsType(); - QualType lambda_func_type = lambda_func_arg.getAsType(); - -#if 0 - std::string mangledName; - clang::MangleContext *mangleContext = - ci.getASTContext().createMangleContext(); - llvm::raw_string_ostream ostream(mangledName); - mangleContext->mangleCXXRTTI(classname_type, ostream); - ostream.flush(); - std::string kernelName = mangledName.substr(4, mangledName.size()); -#else - std::string kernelName = classname_type.getAsString(); - std::regex re("([^\\s\\:]+)$"); - std::smatch result; - if(std::regex_search(kernelName, result, re)){ - kernelName = result[1].str(); - } -#endif - - // Get classname here, will be used as kernel func name - std::string classname = - classname_type->getAsRecordDecl()->getDeclName().getAsString(); - - CXXRecordDecl *lambda_func_decl = lambda_func_type->getAsCXXRecordDecl(); - - std::vector kernel_arguments = - analyze_arguments_dependency(ci, lambda_func_decl, context); - - CXXMethodDecl *lambda_decl = lambda_func_decl->getLambdaCallOperator(); - std::string func_body = decl2str(ci, lambda_decl->getBody()); - - if (lambda_decl->getNumParams() != 1) { - throw KernelValidateException("Parallel Kernel should have Index"); - } - - std::string index_name = - lambda_decl->getParamDecl(0)->getIdentifier()->getName().str(); - -#ifdef DEBUG - std::cout << "Parallel kernel name: " << kernelName - << " , index_name: " << index_name << std::endl; - std::cout << "========= Parallel kernel body start =========" << std::endl; - std::cout << func_body << std::endl; - std::cout << "========= Parallel kernel body end =========" << std::endl; -#endif - - KernelInfo info(kernel_arguments, kernelName, func_body, index_name); - - return info; -} - -} // namespace sycl \ No newline at end of file diff --git a/kernel_generator/src/single_task.cpp b/kernel_generator/src/single_task.cpp deleted file mode 100644 index 37b55cb..0000000 --- a/kernel_generator/src/single_task.cpp +++ /dev/null @@ -1,77 +0,0 @@ -#include "single_task.h" -#include - -namespace sycl { - -KernelInfo parse_single_task_func(CompilerInstance &ci, - const FunctionDecl *callee, - ProgramContext &context) { - if (callee->getNumParams() != 1) { - throw KernelValidateException("Single-task kernel must have 1 param"); - } - const TemplateArgumentList *template_args = - callee->getTemplateSpecializationArgs(); - if (template_args == nullptr || template_args->size() != 2) { - throw KernelValidateException( - "Single-task kernel must have 2 template args"); - } - - const TemplateArgument &classname_arg = template_args->get(0); - const TemplateArgument &lambda_func_arg = template_args->get(1); - - if (classname_arg.getKind() != TemplateArgument::ArgKind::Type) { - throw KernelValidateException("Template 'ArgKind' must be 'Type'"); - } - - if (lambda_func_arg.getKind() != TemplateArgument::ArgKind::Type) { - throw KernelValidateException("Template 'ArgKind' must be 'Type'"); - } - - QualType classname_type = classname_arg.getAsType(); - QualType lambda_func_type = lambda_func_arg.getAsType(); - -#if 0 - // manginling rule could be compiler-depedent. - // so it's better to avoid using it as the kenrel name. - std::string mangledName; - clang::MangleContext *mangleContext = - ci.getASTContext().createMangleContext(); - llvm::raw_string_ostream ostream(mangledName); - mangleContext->mangleCXXRTTI(classname_type, ostream); - ostream.flush(); - std::string kernelName = mangledName.substr(4, mangledName.size()); -#else - std::string kernelName= classname_type.getAsString(); - std::regex re("([^\\s\\:]+)$"); - std::smatch result; - if(std::regex_search(kernelName, result, re)){ - kernelName = result[1].str(); - } -#endif - - // Get classname here, will be used as kernel func name - std::cout << classname_type->getTypeClassName() << std::endl; - std::string classname = - classname_type->getAsRecordDecl()->getDeclName().getAsString(); - - CXXRecordDecl *lambda_func_decl = lambda_func_type->getAsCXXRecordDecl(); - - std::vector kernel_arguments = - analyze_arguments_dependency(ci, lambda_func_decl, context); - - CXXMethodDecl *lambda_decl = lambda_func_decl->getLambdaCallOperator(); - std::string func_body = decl2str(ci, lambda_decl->getBody()); - -#ifdef DEBUG - std::cout << "Single kernel name: " << kernelName << std::endl; - std::cout << "========= Single Kernel body start =========" << std::endl; - std::cout << func_body << std::endl; - std::cout << "========= Single Kernel body end =========" << std::endl; -#endif - - KernelInfo info(kernel_arguments, kernelName, func_body); - - return info; -}; - -} // namespace sycl diff --git a/kernel_generator/src/ve_kernel_translator.cpp b/kernel_generator/src/ve_kernel_translator.cpp deleted file mode 100644 index bc86be9..0000000 --- a/kernel_generator/src/ve_kernel_translator.cpp +++ /dev/null @@ -1,46 +0,0 @@ -#include "ve_kernel_translator.h" - -namespace sycl { - -std::string VEKernelTranslator::body_to_decl_str(const ProgramContext &context, - const KernelInfo &info) { - std::string func_params; - - // generate function params - for (const KernelArgument &arg : info.params) { - func_params += fmt::format("{} *{}, ", arg.type, arg.name); - } - - if (info.parallel) { - // generate parallel for kernel here - std::string body = fmt::format("\nfor(int {0}=0;{0}").append(LINE_BREAK); - ret.append("#include ").append(LINE_BREAK); - for (auto &def : context.structs) { - ret.append(def.second).append(";").append(LINE_BREAK); - } - return ret; -} - -std::string VEKernelTranslator::after_kernel(const ProgramContext &context) { - return ""; -} - -} // namespace sycl diff --git a/kernel_generator/third_party/CMakeLists.txt b/kernel_generator/third_party/CMakeLists.txt deleted file mode 100644 index 21ba0d7..0000000 --- a/kernel_generator/third_party/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ - -add_subdirectory(fmt) \ No newline at end of file diff --git a/kernel_generator/third_party/fmt/CMakeLists.txt b/kernel_generator/third_party/fmt/CMakeLists.txt deleted file mode 100644 index f43d5ce..0000000 --- a/kernel_generator/third_party/fmt/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -include_directories(include) -add_library(fmt SHARED src/format.cc) \ No newline at end of file diff --git a/kernel_generator/third_party/fmt/include/fmt/core.h b/kernel_generator/third_party/fmt/include/fmt/core.h deleted file mode 100644 index 0e0824f..0000000 --- a/kernel_generator/third_party/fmt/include/fmt/core.h +++ /dev/null @@ -1,1796 +0,0 @@ -// Formatting library for C++ - the core API -// -// Copyright (c) 2012 - present, Victor Zverovich -// All rights reserved. -// -// For the license information refer to format.h. - -#ifndef FMT_CORE_H_ -#define FMT_CORE_H_ - -#include // std::FILE -#include -#include -#include -#include -#include -#include -#include - -// The fmt library version in the form major * 10000 + minor * 100 + patch. -#define FMT_VERSION 60201 - -#ifdef __has_feature -# define FMT_HAS_FEATURE(x) __has_feature(x) -#else -# define FMT_HAS_FEATURE(x) 0 -#endif - -#if defined(__has_include) && !defined(__INTELLISENSE__) && \ - !(defined(__INTEL_COMPILER) && __INTEL_COMPILER < 1600) -# define FMT_HAS_INCLUDE(x) __has_include(x) -#else -# define FMT_HAS_INCLUDE(x) 0 -#endif - -#ifdef __has_cpp_attribute -# define FMT_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x) -#else -# define FMT_HAS_CPP_ATTRIBUTE(x) 0 -#endif - -#define FMT_HAS_CPP14_ATTRIBUTE(attribute) \ - (__cplusplus >= 201402L && FMT_HAS_CPP_ATTRIBUTE(attribute)) - -#define FMT_HAS_CPP17_ATTRIBUTE(attribute) \ - (__cplusplus >= 201703L && FMT_HAS_CPP_ATTRIBUTE(attribute)) - -#ifdef __clang__ -# define FMT_CLANG_VERSION (__clang_major__ * 100 + __clang_minor__) -#else -# define FMT_CLANG_VERSION 0 -#endif - -#if defined(__GNUC__) && !defined(__clang__) -# define FMT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) -#else -# define FMT_GCC_VERSION 0 -#endif - -#if __cplusplus >= 201103L || defined(__GXX_EXPERIMENTAL_CXX0X__) -# define FMT_HAS_GXX_CXX11 FMT_GCC_VERSION -#else -# define FMT_HAS_GXX_CXX11 0 -#endif - -#ifdef __NVCC__ -# define FMT_NVCC __NVCC__ -#else -# define FMT_NVCC 0 -#endif - -#ifdef _MSC_VER -# define FMT_MSC_VER _MSC_VER -#else -# define FMT_MSC_VER 0 -#endif - -// Check if relaxed C++14 constexpr is supported. -// GCC doesn't allow throw in constexpr until version 6 (bug 67371). -#ifndef FMT_USE_CONSTEXPR -# define FMT_USE_CONSTEXPR \ - (FMT_HAS_FEATURE(cxx_relaxed_constexpr) || FMT_MSC_VER >= 1910 || \ - (FMT_GCC_VERSION >= 600 && __cplusplus >= 201402L)) && \ - !FMT_NVCC -#endif -#if FMT_USE_CONSTEXPR -# define FMT_CONSTEXPR constexpr -# define FMT_CONSTEXPR_DECL constexpr -#else -# define FMT_CONSTEXPR inline -# define FMT_CONSTEXPR_DECL -#endif - -#ifndef FMT_OVERRIDE -# if FMT_HAS_FEATURE(cxx_override) || \ - (FMT_GCC_VERSION >= 408 && FMT_HAS_GXX_CXX11) || FMT_MSC_VER >= 1900 -# define FMT_OVERRIDE override -# else -# define FMT_OVERRIDE -# endif -#endif - -// Check if exceptions are disabled. -#ifndef FMT_EXCEPTIONS -# if (defined(__GNUC__) && !defined(__EXCEPTIONS)) || \ - FMT_MSC_VER && !_HAS_EXCEPTIONS -# define FMT_EXCEPTIONS 0 -# else -# define FMT_EXCEPTIONS 1 -# endif -#endif - -// Define FMT_USE_NOEXCEPT to make fmt use noexcept (C++11 feature). -#ifndef FMT_USE_NOEXCEPT -# define FMT_USE_NOEXCEPT 0 -#endif - -#if FMT_USE_NOEXCEPT || FMT_HAS_FEATURE(cxx_noexcept) || \ - (FMT_GCC_VERSION >= 408 && FMT_HAS_GXX_CXX11) || FMT_MSC_VER >= 1900 -# define FMT_DETECTED_NOEXCEPT noexcept -# define FMT_HAS_CXX11_NOEXCEPT 1 -#else -# define FMT_DETECTED_NOEXCEPT throw() -# define FMT_HAS_CXX11_NOEXCEPT 0 -#endif - -#ifndef FMT_NOEXCEPT -# if FMT_EXCEPTIONS || FMT_HAS_CXX11_NOEXCEPT -# define FMT_NOEXCEPT FMT_DETECTED_NOEXCEPT -# else -# define FMT_NOEXCEPT -# endif -#endif - -// [[noreturn]] is disabled on MSVC and NVCC because of bogus unreachable code -// warnings. -#if FMT_EXCEPTIONS && FMT_HAS_CPP_ATTRIBUTE(noreturn) && !FMT_MSC_VER && \ - !FMT_NVCC -# define FMT_NORETURN [[noreturn]] -#else -# define FMT_NORETURN -#endif - -#ifndef FMT_MAYBE_UNUSED -# if FMT_HAS_CPP17_ATTRIBUTE(maybe_unused) -# define FMT_MAYBE_UNUSED [[maybe_unused]] -# else -# define FMT_MAYBE_UNUSED -# endif -#endif - -#ifndef FMT_DEPRECATED -# if FMT_HAS_CPP14_ATTRIBUTE(deprecated) || FMT_MSC_VER >= 1900 -# define FMT_DEPRECATED [[deprecated]] -# else -# if defined(__GNUC__) || defined(__clang__) -# define FMT_DEPRECATED __attribute__((deprecated)) -# elif FMT_MSC_VER -# define FMT_DEPRECATED __declspec(deprecated) -# else -# define FMT_DEPRECATED /* deprecated */ -# endif -# endif -#endif - -// Workaround broken [[deprecated]] in the Intel, PGI and NVCC compilers. -#if defined(__INTEL_COMPILER) || defined(__PGI) || FMT_NVCC -# define FMT_DEPRECATED_ALIAS -#else -# define FMT_DEPRECATED_ALIAS FMT_DEPRECATED -#endif - -#ifndef FMT_BEGIN_NAMESPACE -# if FMT_HAS_FEATURE(cxx_inline_namespaces) || FMT_GCC_VERSION >= 404 || \ - FMT_MSC_VER >= 1900 -# define FMT_INLINE_NAMESPACE inline namespace -# define FMT_END_NAMESPACE \ - } \ - } -# else -# define FMT_INLINE_NAMESPACE namespace -# define FMT_END_NAMESPACE \ - } \ - using namespace v6; \ - } -# endif -# define FMT_BEGIN_NAMESPACE \ - namespace fmt { \ - FMT_INLINE_NAMESPACE v6 { -#endif - -#if !defined(FMT_HEADER_ONLY) && defined(_WIN32) -# if FMT_MSC_VER -# define FMT_NO_W4275 __pragma(warning(suppress : 4275)) -# else -# define FMT_NO_W4275 -# endif -# define FMT_CLASS_API FMT_NO_W4275 -# ifdef FMT_EXPORT -# define FMT_API __declspec(dllexport) -# elif defined(FMT_SHARED) -# define FMT_API __declspec(dllimport) -# define FMT_EXTERN_TEMPLATE_API FMT_API -# endif -#endif -#ifndef FMT_CLASS_API -# define FMT_CLASS_API -#endif -#ifndef FMT_API -# if FMT_GCC_VERSION || FMT_CLANG_VERSION -# define FMT_API __attribute__((visibility("default"))) -# define FMT_EXTERN_TEMPLATE_API FMT_API -# define FMT_INSTANTIATION_DEF_API -# else -# define FMT_API -# endif -#endif -#ifndef FMT_EXTERN_TEMPLATE_API -# define FMT_EXTERN_TEMPLATE_API -#endif -#ifndef FMT_INSTANTIATION_DEF_API -# define FMT_INSTANTIATION_DEF_API FMT_API -#endif - -#ifndef FMT_HEADER_ONLY -# define FMT_EXTERN extern -#else -# define FMT_EXTERN -#endif - -// libc++ supports string_view in pre-c++17. -#if (FMT_HAS_INCLUDE() && \ - (__cplusplus > 201402L || defined(_LIBCPP_VERSION))) || \ - (defined(_MSVC_LANG) && _MSVC_LANG > 201402L && _MSC_VER >= 1910) -# include -# define FMT_USE_STRING_VIEW -#elif FMT_HAS_INCLUDE("experimental/string_view") && __cplusplus >= 201402L -# include -# define FMT_USE_EXPERIMENTAL_STRING_VIEW -#endif - -#ifndef FMT_UNICODE -# define FMT_UNICODE !FMT_MSC_VER -#endif -#if FMT_UNICODE && FMT_MSC_VER -# pragma execution_character_set("utf-8") -#endif - -FMT_BEGIN_NAMESPACE - -// Implementations of enable_if_t and other metafunctions for older systems. -template -using enable_if_t = typename std::enable_if::type; -template -using conditional_t = typename std::conditional::type; -template using bool_constant = std::integral_constant; -template -using remove_reference_t = typename std::remove_reference::type; -template -using remove_const_t = typename std::remove_const::type; -template -using remove_cvref_t = typename std::remove_cv>::type; -template struct type_identity { using type = T; }; -template using type_identity_t = typename type_identity::type; - -struct monostate {}; - -// An enable_if helper to be used in template parameters which results in much -// shorter symbols: https://godbolt.org/z/sWw4vP. Extra parentheses are needed -// to workaround a bug in MSVC 2019 (see #1140 and #1186). -#define FMT_ENABLE_IF(...) enable_if_t<(__VA_ARGS__), int> = 0 - -namespace internal { - -// A helper function to suppress bogus "conditional expression is constant" -// warnings. -template FMT_CONSTEXPR T const_check(T value) { return value; } - -// A workaround for gcc 4.8 to make void_t work in a SFINAE context. -template struct void_t_impl { using type = void; }; - -FMT_NORETURN FMT_API void assert_fail(const char* file, int line, - const char* message); - -#ifndef FMT_ASSERT -# ifdef NDEBUG -// FMT_ASSERT is not empty to avoid -Werror=empty-body. -# define FMT_ASSERT(condition, message) ((void)0) -# else -# define FMT_ASSERT(condition, message) \ - ((condition) /* void() fails with -Winvalid-constexpr on clang 4.0.1 */ \ - ? (void)0 \ - : ::fmt::internal::assert_fail(__FILE__, __LINE__, (message))) -# endif -#endif - -#if defined(FMT_USE_STRING_VIEW) -template using std_string_view = std::basic_string_view; -#elif defined(FMT_USE_EXPERIMENTAL_STRING_VIEW) -template -using std_string_view = std::experimental::basic_string_view; -#else -template struct std_string_view {}; -#endif - -#ifdef FMT_USE_INT128 -// Do nothing. -#elif defined(__SIZEOF_INT128__) && !FMT_NVCC -# define FMT_USE_INT128 1 -using int128_t = __int128_t; -using uint128_t = __uint128_t; -#else -# define FMT_USE_INT128 0 -#endif -#if !FMT_USE_INT128 -struct int128_t {}; -struct uint128_t {}; -#endif - -// Casts a nonnegative integer to unsigned. -template -FMT_CONSTEXPR typename std::make_unsigned::type to_unsigned(Int value) { - FMT_ASSERT(value >= 0, "negative value"); - return static_cast::type>(value); -} - -constexpr unsigned char micro[] = "\u00B5"; - -template constexpr bool is_unicode() { - return FMT_UNICODE || sizeof(Char) != 1 || - (sizeof(micro) == 3 && micro[0] == 0xC2 && micro[1] == 0xB5); -} - -#ifdef __cpp_char8_t -using char8_type = char8_t; -#else -enum char8_type : unsigned char {}; -#endif -} // namespace internal - -template -using void_t = typename internal::void_t_impl::type; - -/** - An implementation of ``std::basic_string_view`` for pre-C++17. It provides a - subset of the API. ``fmt::basic_string_view`` is used for format strings even - if ``std::string_view`` is available to prevent issues when a library is - compiled with a different ``-std`` option than the client code (which is not - recommended). - */ -template class basic_string_view { - private: - const Char* data_; - size_t size_; - - public: - using char_type FMT_DEPRECATED_ALIAS = Char; - using value_type = Char; - using iterator = const Char*; - - FMT_CONSTEXPR basic_string_view() FMT_NOEXCEPT : data_(nullptr), size_(0) {} - - /** Constructs a string reference object from a C string and a size. */ - FMT_CONSTEXPR basic_string_view(const Char* s, size_t count) FMT_NOEXCEPT - : data_(s), - size_(count) {} - - /** - \rst - Constructs a string reference object from a C string computing - the size with ``std::char_traits::length``. - \endrst - */ -#if __cplusplus >= 201703L // C++17's char_traits::length() is constexpr. - FMT_CONSTEXPR -#endif - basic_string_view(const Char* s) - : data_(s), size_(std::char_traits::length(s)) {} - - /** Constructs a string reference from a ``std::basic_string`` object. */ - template - FMT_CONSTEXPR basic_string_view( - const std::basic_string& s) FMT_NOEXCEPT - : data_(s.data()), - size_(s.size()) {} - - template < - typename S, - FMT_ENABLE_IF(std::is_same>::value)> - FMT_CONSTEXPR basic_string_view(S s) FMT_NOEXCEPT : data_(s.data()), - size_(s.size()) {} - - /** Returns a pointer to the string data. */ - FMT_CONSTEXPR const Char* data() const { return data_; } - - /** Returns the string size. */ - FMT_CONSTEXPR size_t size() const { return size_; } - - FMT_CONSTEXPR iterator begin() const { return data_; } - FMT_CONSTEXPR iterator end() const { return data_ + size_; } - - FMT_CONSTEXPR const Char& operator[](size_t pos) const { return data_[pos]; } - - FMT_CONSTEXPR void remove_prefix(size_t n) { - data_ += n; - size_ -= n; - } - - // Lexicographically compare this string reference to other. - int compare(basic_string_view other) const { - size_t str_size = size_ < other.size_ ? size_ : other.size_; - int result = std::char_traits::compare(data_, other.data_, str_size); - if (result == 0) - result = size_ == other.size_ ? 0 : (size_ < other.size_ ? -1 : 1); - return result; - } - - friend bool operator==(basic_string_view lhs, basic_string_view rhs) { - return lhs.compare(rhs) == 0; - } - friend bool operator!=(basic_string_view lhs, basic_string_view rhs) { - return lhs.compare(rhs) != 0; - } - friend bool operator<(basic_string_view lhs, basic_string_view rhs) { - return lhs.compare(rhs) < 0; - } - friend bool operator<=(basic_string_view lhs, basic_string_view rhs) { - return lhs.compare(rhs) <= 0; - } - friend bool operator>(basic_string_view lhs, basic_string_view rhs) { - return lhs.compare(rhs) > 0; - } - friend bool operator>=(basic_string_view lhs, basic_string_view rhs) { - return lhs.compare(rhs) >= 0; - } -}; - -using string_view = basic_string_view; -using wstring_view = basic_string_view; - -#ifndef __cpp_char8_t -// char8_t is deprecated; use char instead. -using char8_t FMT_DEPRECATED_ALIAS = internal::char8_type; -#endif - -/** Specifies if ``T`` is a character type. Can be specialized by users. */ -template struct is_char : std::false_type {}; -template <> struct is_char : std::true_type {}; -template <> struct is_char : std::true_type {}; -template <> struct is_char : std::true_type {}; -template <> struct is_char : std::true_type {}; -template <> struct is_char : std::true_type {}; - -/** - \rst - Returns a string view of `s`. In order to add custom string type support to - {fmt} provide an overload of `to_string_view` for it in the same namespace as - the type for the argument-dependent lookup to work. - - **Example**:: - - namespace my_ns { - inline string_view to_string_view(const my_string& s) { - return {s.data(), s.length()}; - } - } - std::string message = fmt::format(my_string("The answer is {}"), 42); - \endrst - */ -template ::value)> -inline basic_string_view to_string_view(const Char* s) { - return s; -} - -template -inline basic_string_view to_string_view( - const std::basic_string& s) { - return s; -} - -template -inline basic_string_view to_string_view(basic_string_view s) { - return s; -} - -template >::value)> -inline basic_string_view to_string_view( - internal::std_string_view s) { - return s; -} - -// A base class for compile-time strings. It is defined in the fmt namespace to -// make formatting functions visible via ADL, e.g. format(fmt("{}"), 42). -struct compile_string {}; - -template -struct is_compile_string : std::is_base_of {}; - -template ::value)> -constexpr basic_string_view to_string_view(const S& s) { - return s; -} - -namespace internal { -void to_string_view(...); -using fmt::v6::to_string_view; - -// Specifies whether S is a string type convertible to fmt::basic_string_view. -// It should be a constexpr function but MSVC 2017 fails to compile it in -// enable_if and MSVC 2015 fails to compile it as an alias template. -template -struct is_string : std::is_class()))> { -}; - -template struct char_t_impl {}; -template struct char_t_impl::value>> { - using result = decltype(to_string_view(std::declval())); - using type = typename result::value_type; -}; - -struct error_handler { - FMT_CONSTEXPR error_handler() = default; - FMT_CONSTEXPR error_handler(const error_handler&) = default; - - // This function is intentionally not constexpr to give a compile-time error. - FMT_NORETURN FMT_API void on_error(const char* message); -}; -} // namespace internal - -/** String's character type. */ -template using char_t = typename internal::char_t_impl::type; - -/** - \rst - Parsing context consisting of a format string range being parsed and an - argument counter for automatic indexing. - - You can use one of the following type aliases for common character types: - - +-----------------------+-------------------------------------+ - | Type | Definition | - +=======================+=====================================+ - | format_parse_context | basic_format_parse_context | - +-----------------------+-------------------------------------+ - | wformat_parse_context | basic_format_parse_context | - +-----------------------+-------------------------------------+ - \endrst - */ -template -class basic_format_parse_context : private ErrorHandler { - private: - basic_string_view format_str_; - int next_arg_id_; - - public: - using char_type = Char; - using iterator = typename basic_string_view::iterator; - - explicit FMT_CONSTEXPR basic_format_parse_context( - basic_string_view format_str, ErrorHandler eh = ErrorHandler()) - : ErrorHandler(eh), format_str_(format_str), next_arg_id_(0) {} - - /** - Returns an iterator to the beginning of the format string range being - parsed. - */ - FMT_CONSTEXPR iterator begin() const FMT_NOEXCEPT { - return format_str_.begin(); - } - - /** - Returns an iterator past the end of the format string range being parsed. - */ - FMT_CONSTEXPR iterator end() const FMT_NOEXCEPT { return format_str_.end(); } - - /** Advances the begin iterator to ``it``. */ - FMT_CONSTEXPR void advance_to(iterator it) { - format_str_.remove_prefix(internal::to_unsigned(it - begin())); - } - - /** - Reports an error if using the manual argument indexing; otherwise returns - the next argument index and switches to the automatic indexing. - */ - FMT_CONSTEXPR int next_arg_id() { - if (next_arg_id_ >= 0) return next_arg_id_++; - on_error("cannot switch from manual to automatic argument indexing"); - return 0; - } - - /** - Reports an error if using the automatic argument indexing; otherwise - switches to the manual indexing. - */ - FMT_CONSTEXPR void check_arg_id(int) { - if (next_arg_id_ > 0) - on_error("cannot switch from automatic to manual argument indexing"); - else - next_arg_id_ = -1; - } - - FMT_CONSTEXPR void check_arg_id(basic_string_view) {} - - FMT_CONSTEXPR void on_error(const char* message) { - ErrorHandler::on_error(message); - } - - FMT_CONSTEXPR ErrorHandler error_handler() const { return *this; } -}; - -using format_parse_context = basic_format_parse_context; -using wformat_parse_context = basic_format_parse_context; - -template -using basic_parse_context FMT_DEPRECATED_ALIAS = - basic_format_parse_context; -using parse_context FMT_DEPRECATED_ALIAS = basic_format_parse_context; -using wparse_context FMT_DEPRECATED_ALIAS = basic_format_parse_context; - -template class basic_format_arg; -template class basic_format_args; - -// A formatter for objects of type T. -template -struct formatter { - // A deleted default constructor indicates a disabled formatter. - formatter() = delete; -}; - -template -struct FMT_DEPRECATED convert_to_int - : bool_constant::value && - std::is_convertible::value> {}; - -// Specifies if T has an enabled formatter specialization. A type can be -// formattable even if it doesn't have a formatter e.g. via a conversion. -template -using has_formatter = - std::is_constructible>; - -namespace internal { - -/** A contiguous memory buffer with an optional growing ability. */ -template class buffer { - private: - T* ptr_; - std::size_t size_; - std::size_t capacity_; - - protected: - // Don't initialize ptr_ since it is not accessed to save a few cycles. - buffer(std::size_t sz) FMT_NOEXCEPT : size_(sz), capacity_(sz) {} - - buffer(T* p = nullptr, std::size_t sz = 0, std::size_t cap = 0) FMT_NOEXCEPT - : ptr_(p), - size_(sz), - capacity_(cap) {} - - /** Sets the buffer data and capacity. */ - void set(T* buf_data, std::size_t buf_capacity) FMT_NOEXCEPT { - ptr_ = buf_data; - capacity_ = buf_capacity; - } - - /** Increases the buffer capacity to hold at least *capacity* elements. */ - virtual void grow(std::size_t capacity) = 0; - - public: - using value_type = T; - using const_reference = const T&; - - buffer(const buffer&) = delete; - void operator=(const buffer&) = delete; - virtual ~buffer() = default; - - T* begin() FMT_NOEXCEPT { return ptr_; } - T* end() FMT_NOEXCEPT { return ptr_ + size_; } - - const T* begin() const FMT_NOEXCEPT { return ptr_; } - const T* end() const FMT_NOEXCEPT { return ptr_ + size_; } - - /** Returns the size of this buffer. */ - std::size_t size() const FMT_NOEXCEPT { return size_; } - - /** Returns the capacity of this buffer. */ - std::size_t capacity() const FMT_NOEXCEPT { return capacity_; } - - /** Returns a pointer to the buffer data. */ - T* data() FMT_NOEXCEPT { return ptr_; } - - /** Returns a pointer to the buffer data. */ - const T* data() const FMT_NOEXCEPT { return ptr_; } - - /** - Resizes the buffer. If T is a POD type new elements may not be initialized. - */ - void resize(std::size_t new_size) { - reserve(new_size); - size_ = new_size; - } - - /** Clears this buffer. */ - void clear() { size_ = 0; } - - /** Reserves space to store at least *capacity* elements. */ - void reserve(std::size_t new_capacity) { - if (new_capacity > capacity_) grow(new_capacity); - } - - void push_back(const T& value) { - reserve(size_ + 1); - ptr_[size_++] = value; - } - - /** Appends data to the end of the buffer. */ - template void append(const U* begin, const U* end); - - template T& operator[](I index) { return ptr_[index]; } - template const T& operator[](I index) const { - return ptr_[index]; - } -}; - -// A container-backed buffer. -template -class container_buffer : public buffer { - private: - Container& container_; - - protected: - void grow(std::size_t capacity) FMT_OVERRIDE { - container_.resize(capacity); - this->set(&container_[0], capacity); - } - - public: - explicit container_buffer(Container& c) - : buffer(c.size()), container_(c) {} -}; - -// Extracts a reference to the container from back_insert_iterator. -template -inline Container& get_container(std::back_insert_iterator it) { - using bi_iterator = std::back_insert_iterator; - struct accessor : bi_iterator { - accessor(bi_iterator iter) : bi_iterator(iter) {} - using bi_iterator::container; - }; - return *accessor(it).container; -} - -template -struct fallback_formatter { - fallback_formatter() = delete; -}; - -// Specifies if T has an enabled fallback_formatter specialization. -template -using has_fallback_formatter = - std::is_constructible>; - -template struct named_arg_base; -template struct named_arg; - -enum class type { - none_type, - named_arg_type, - // Integer types should go first, - int_type, - uint_type, - long_long_type, - ulong_long_type, - int128_type, - uint128_type, - bool_type, - char_type, - last_integer_type = char_type, - // followed by floating-point types. - float_type, - double_type, - long_double_type, - last_numeric_type = long_double_type, - cstring_type, - string_type, - pointer_type, - custom_type -}; - -// Maps core type T to the corresponding type enum constant. -template -struct type_constant : std::integral_constant {}; - -#define FMT_TYPE_CONSTANT(Type, constant) \ - template \ - struct type_constant \ - : std::integral_constant {} - -FMT_TYPE_CONSTANT(const named_arg_base&, named_arg_type); -FMT_TYPE_CONSTANT(int, int_type); -FMT_TYPE_CONSTANT(unsigned, uint_type); -FMT_TYPE_CONSTANT(long long, long_long_type); -FMT_TYPE_CONSTANT(unsigned long long, ulong_long_type); -FMT_TYPE_CONSTANT(int128_t, int128_type); -FMT_TYPE_CONSTANT(uint128_t, uint128_type); -FMT_TYPE_CONSTANT(bool, bool_type); -FMT_TYPE_CONSTANT(Char, char_type); -FMT_TYPE_CONSTANT(float, float_type); -FMT_TYPE_CONSTANT(double, double_type); -FMT_TYPE_CONSTANT(long double, long_double_type); -FMT_TYPE_CONSTANT(const Char*, cstring_type); -FMT_TYPE_CONSTANT(basic_string_view, string_type); -FMT_TYPE_CONSTANT(const void*, pointer_type); - -FMT_CONSTEXPR bool is_integral_type(type t) { - FMT_ASSERT(t != type::named_arg_type, "invalid argument type"); - return t > type::none_type && t <= type::last_integer_type; -} - -FMT_CONSTEXPR bool is_arithmetic_type(type t) { - FMT_ASSERT(t != type::named_arg_type, "invalid argument type"); - return t > type::none_type && t <= type::last_numeric_type; -} - -template struct string_value { - const Char* data; - std::size_t size; -}; - -template struct custom_value { - using parse_context = basic_format_parse_context; - const void* value; - void (*format)(const void* arg, - typename Context::parse_context_type& parse_ctx, Context& ctx); -}; - -// A formatting argument value. -template class value { - public: - using char_type = typename Context::char_type; - - union { - int int_value; - unsigned uint_value; - long long long_long_value; - unsigned long long ulong_long_value; - int128_t int128_value; - uint128_t uint128_value; - bool bool_value; - char_type char_value; - float float_value; - double double_value; - long double long_double_value; - const void* pointer; - string_value string; - custom_value custom; - const named_arg_base* named_arg; - }; - - FMT_CONSTEXPR value(int val = 0) : int_value(val) {} - FMT_CONSTEXPR value(unsigned val) : uint_value(val) {} - value(long long val) : long_long_value(val) {} - value(unsigned long long val) : ulong_long_value(val) {} - value(int128_t val) : int128_value(val) {} - value(uint128_t val) : uint128_value(val) {} - value(float val) : float_value(val) {} - value(double val) : double_value(val) {} - value(long double val) : long_double_value(val) {} - value(bool val) : bool_value(val) {} - value(char_type val) : char_value(val) {} - value(const char_type* val) { string.data = val; } - value(basic_string_view val) { - string.data = val.data(); - string.size = val.size(); - } - value(const void* val) : pointer(val) {} - - template value(const T& val) { - custom.value = &val; - // Get the formatter type through the context to allow different contexts - // have different extension points, e.g. `formatter` for `format` and - // `printf_formatter` for `printf`. - custom.format = format_custom_arg< - T, conditional_t::value, - typename Context::template formatter_type, - fallback_formatter>>; - } - - value(const named_arg_base& val) { named_arg = &val; } - - private: - // Formats an argument of a custom type, such as a user-defined class. - template - static void format_custom_arg(const void* arg, - typename Context::parse_context_type& parse_ctx, - Context& ctx) { - Formatter f; - parse_ctx.advance_to(f.parse(parse_ctx)); - ctx.advance_to(f.format(*static_cast(arg), ctx)); - } -}; - -template -FMT_CONSTEXPR basic_format_arg make_arg(const T& value); - -// To minimize the number of types we need to deal with, long is translated -// either to int or to long long depending on its size. -enum { long_short = sizeof(long) == sizeof(int) }; -using long_type = conditional_t; -using ulong_type = conditional_t; - -// Maps formatting arguments to core types. -template struct arg_mapper { - using char_type = typename Context::char_type; - - FMT_CONSTEXPR int map(signed char val) { return val; } - FMT_CONSTEXPR unsigned map(unsigned char val) { return val; } - FMT_CONSTEXPR int map(short val) { return val; } - FMT_CONSTEXPR unsigned map(unsigned short val) { return val; } - FMT_CONSTEXPR int map(int val) { return val; } - FMT_CONSTEXPR unsigned map(unsigned val) { return val; } - FMT_CONSTEXPR long_type map(long val) { return val; } - FMT_CONSTEXPR ulong_type map(unsigned long val) { return val; } - FMT_CONSTEXPR long long map(long long val) { return val; } - FMT_CONSTEXPR unsigned long long map(unsigned long long val) { return val; } - FMT_CONSTEXPR int128_t map(int128_t val) { return val; } - FMT_CONSTEXPR uint128_t map(uint128_t val) { return val; } - FMT_CONSTEXPR bool map(bool val) { return val; } - - template ::value)> - FMT_CONSTEXPR char_type map(T val) { - static_assert( - std::is_same::value || std::is_same::value, - "mixing character types is disallowed"); - return val; - } - - FMT_CONSTEXPR float map(float val) { return val; } - FMT_CONSTEXPR double map(double val) { return val; } - FMT_CONSTEXPR long double map(long double val) { return val; } - - FMT_CONSTEXPR const char_type* map(char_type* val) { return val; } - FMT_CONSTEXPR const char_type* map(const char_type* val) { return val; } - template ::value)> - FMT_CONSTEXPR basic_string_view map(const T& val) { - static_assert(std::is_same>::value, - "mixing character types is disallowed"); - return to_string_view(val); - } - template , T>::value && - !is_string::value && !has_formatter::value && - !has_fallback_formatter::value)> - FMT_CONSTEXPR basic_string_view map(const T& val) { - return basic_string_view(val); - } - template < - typename T, - FMT_ENABLE_IF( - std::is_constructible, T>::value && - !std::is_constructible, T>::value && - !is_string::value && !has_formatter::value && - !has_fallback_formatter::value)> - FMT_CONSTEXPR basic_string_view map(const T& val) { - return std_string_view(val); - } - FMT_CONSTEXPR const char* map(const signed char* val) { - static_assert(std::is_same::value, "invalid string type"); - return reinterpret_cast(val); - } - FMT_CONSTEXPR const char* map(const unsigned char* val) { - static_assert(std::is_same::value, "invalid string type"); - return reinterpret_cast(val); - } - - FMT_CONSTEXPR const void* map(void* val) { return val; } - FMT_CONSTEXPR const void* map(const void* val) { return val; } - FMT_CONSTEXPR const void* map(std::nullptr_t val) { return val; } - template FMT_CONSTEXPR int map(const T*) { - // Formatting of arbitrary pointers is disallowed. If you want to output - // a pointer cast it to "void *" or "const void *". In particular, this - // forbids formatting of "[const] volatile char *" which is printed as bool - // by iostreams. - static_assert(!sizeof(T), "formatting of non-void pointers is disallowed"); - return 0; - } - - template ::value && - !has_formatter::value && - !has_fallback_formatter::value)> - FMT_CONSTEXPR auto map(const T& val) - -> decltype(std::declval().map( - static_cast::type>(val))) { - return map(static_cast::type>(val)); - } - template ::value && !is_char::value && - (has_formatter::value || - has_fallback_formatter::value))> - FMT_CONSTEXPR const T& map(const T& val) { - return val; - } - - template - FMT_CONSTEXPR const named_arg_base& map( - const named_arg& val) { - auto arg = make_arg(val.value); - std::memcpy(val.data, &arg, sizeof(arg)); - return val; - } - - int map(...) { - constexpr bool formattable = sizeof(Context) == 0; - static_assert( - formattable, - "Cannot format argument. To make type T formattable provide a " - "formatter specialization: " - "https://fmt.dev/latest/api.html#formatting-user-defined-types"); - return 0; - } -}; - -// A type constant after applying arg_mapper. -template -using mapped_type_constant = - type_constant().map(std::declval())), - typename Context::char_type>; - -enum { packed_arg_bits = 5 }; -// Maximum number of arguments with packed types. -enum { max_packed_args = 63 / packed_arg_bits }; -enum : unsigned long long { is_unpacked_bit = 1ULL << 63 }; - -template class arg_map; -} // namespace internal - -// A formatting argument. It is a trivially copyable/constructible type to -// allow storage in basic_memory_buffer. -template class basic_format_arg { - private: - internal::value value_; - internal::type type_; - - template - friend FMT_CONSTEXPR basic_format_arg internal::make_arg( - const T& value); - - template - friend FMT_CONSTEXPR auto visit_format_arg(Visitor&& vis, - const basic_format_arg& arg) - -> decltype(vis(0)); - - friend class basic_format_args; - friend class internal::arg_map; - - using char_type = typename Context::char_type; - - public: - class handle { - public: - explicit handle(internal::custom_value custom) : custom_(custom) {} - - void format(typename Context::parse_context_type& parse_ctx, - Context& ctx) const { - custom_.format(custom_.value, parse_ctx, ctx); - } - - private: - internal::custom_value custom_; - }; - - FMT_CONSTEXPR basic_format_arg() : type_(internal::type::none_type) {} - - FMT_CONSTEXPR explicit operator bool() const FMT_NOEXCEPT { - return type_ != internal::type::none_type; - } - - internal::type type() const { return type_; } - - bool is_integral() const { return internal::is_integral_type(type_); } - bool is_arithmetic() const { return internal::is_arithmetic_type(type_); } -}; - -/** - \rst - Visits an argument dispatching to the appropriate visit method based on - the argument type. For example, if the argument type is ``double`` then - ``vis(value)`` will be called with the value of type ``double``. - \endrst - */ -template -FMT_CONSTEXPR auto visit_format_arg(Visitor&& vis, - const basic_format_arg& arg) - -> decltype(vis(0)) { - using char_type = typename Context::char_type; - switch (arg.type_) { - case internal::type::none_type: - break; - case internal::type::named_arg_type: - FMT_ASSERT(false, "invalid argument type"); - break; - case internal::type::int_type: - return vis(arg.value_.int_value); - case internal::type::uint_type: - return vis(arg.value_.uint_value); - case internal::type::long_long_type: - return vis(arg.value_.long_long_value); - case internal::type::ulong_long_type: - return vis(arg.value_.ulong_long_value); -#if FMT_USE_INT128 - case internal::type::int128_type: - return vis(arg.value_.int128_value); - case internal::type::uint128_type: - return vis(arg.value_.uint128_value); -#else - case internal::type::int128_type: - case internal::type::uint128_type: - break; -#endif - case internal::type::bool_type: - return vis(arg.value_.bool_value); - case internal::type::char_type: - return vis(arg.value_.char_value); - case internal::type::float_type: - return vis(arg.value_.float_value); - case internal::type::double_type: - return vis(arg.value_.double_value); - case internal::type::long_double_type: - return vis(arg.value_.long_double_value); - case internal::type::cstring_type: - return vis(arg.value_.string.data); - case internal::type::string_type: - return vis(basic_string_view(arg.value_.string.data, - arg.value_.string.size)); - case internal::type::pointer_type: - return vis(arg.value_.pointer); - case internal::type::custom_type: - return vis(typename basic_format_arg::handle(arg.value_.custom)); - } - return vis(monostate()); -} - -namespace internal { -// A map from argument names to their values for named arguments. -template class arg_map { - private: - using char_type = typename Context::char_type; - - struct entry { - basic_string_view name; - basic_format_arg arg; - }; - - entry* map_; - unsigned size_; - - void push_back(value val) { - const auto& named = *val.named_arg; - map_[size_] = {named.name, named.template deserialize()}; - ++size_; - } - - public: - arg_map(const arg_map&) = delete; - void operator=(const arg_map&) = delete; - arg_map() : map_(nullptr), size_(0) {} - void init(const basic_format_args& args); - ~arg_map() { delete[] map_; } - - basic_format_arg find(basic_string_view name) const { - // The list is unsorted, so just return the first matching name. - for (entry *it = map_, *end = map_ + size_; it != end; ++it) { - if (it->name == name) return it->arg; - } - return {}; - } -}; - -// A type-erased reference to an std::locale to avoid heavy include. -class locale_ref { - private: - const void* locale_; // A type-erased pointer to std::locale. - - public: - locale_ref() : locale_(nullptr) {} - template explicit locale_ref(const Locale& loc); - - explicit operator bool() const FMT_NOEXCEPT { return locale_ != nullptr; } - - template Locale get() const; -}; - -template constexpr unsigned long long encode_types() { return 0; } - -template -constexpr unsigned long long encode_types() { - return static_cast(mapped_type_constant::value) | - (encode_types() << packed_arg_bits); -} - -template -FMT_CONSTEXPR basic_format_arg make_arg(const T& value) { - basic_format_arg arg; - arg.type_ = mapped_type_constant::value; - arg.value_ = arg_mapper().map(value); - return arg; -} - -// The type template parameter is there to avoid an ODR violation when using -// a fallback formatter in one translation unit and an implicit conversion in -// another (not recommended). -template -inline value make_arg(const T& val) { - return arg_mapper().map(val); -} - -template -inline basic_format_arg make_arg(const T& value) { - return make_arg(value); -} - -template struct is_reference_wrapper : std::false_type {}; - -template -struct is_reference_wrapper> : std::true_type {}; - -class dynamic_arg_list { - // Workaround for clang's -Wweak-vtables. Unlike for regular classes, for - // templates it doesn't complain about inability to deduce single translation - // unit for placing vtable. So storage_node_base is made a fake template. - template struct node { - virtual ~node() = default; - std::unique_ptr> next; - }; - - template struct typed_node : node<> { - T value; - - template - FMT_CONSTEXPR typed_node(const Arg& arg) : value(arg) {} - - template - FMT_CONSTEXPR typed_node(const basic_string_view& arg) - : value(arg.data(), arg.size()) {} - }; - - std::unique_ptr> head_; - - public: - template const T& push(const Arg& arg) { - auto node = std::unique_ptr>(new typed_node(arg)); - auto& value = node->value; - node->next = std::move(head_); - head_ = std::move(node); - return value; - } -}; -} // namespace internal - -// Formatting context. -template class basic_format_context { - public: - /** The character type for the output. */ - using char_type = Char; - - private: - OutputIt out_; - basic_format_args args_; - internal::arg_map map_; - internal::locale_ref loc_; - - public: - using iterator = OutputIt; - using format_arg = basic_format_arg; - using parse_context_type = basic_format_parse_context; - template using formatter_type = formatter; - - basic_format_context(const basic_format_context&) = delete; - void operator=(const basic_format_context&) = delete; - /** - Constructs a ``basic_format_context`` object. References to the arguments are - stored in the object so make sure they have appropriate lifetimes. - */ - basic_format_context(OutputIt out, - basic_format_args ctx_args, - internal::locale_ref loc = internal::locale_ref()) - : out_(out), args_(ctx_args), loc_(loc) {} - - format_arg arg(int id) const { return args_.get(id); } - - // Checks if manual indexing is used and returns the argument with the - // specified name. - format_arg arg(basic_string_view name); - - internal::error_handler error_handler() { return {}; } - void on_error(const char* message) { error_handler().on_error(message); } - - // Returns an iterator to the beginning of the output range. - iterator out() { return out_; } - - // Advances the begin iterator to ``it``. - void advance_to(iterator it) { out_ = it; } - - internal::locale_ref locale() { return loc_; } -}; - -template -using buffer_context = - basic_format_context>, - Char>; -using format_context = buffer_context; -using wformat_context = buffer_context; - -/** - \rst - An array of references to arguments. It can be implicitly converted into - `~fmt::basic_format_args` for passing into type-erased formatting functions - such as `~fmt::vformat`. - \endrst - */ -template -class format_arg_store -#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 - // Workaround a GCC template argument substitution bug. - : public basic_format_args -#endif -{ - private: - static const size_t num_args = sizeof...(Args); - static const bool is_packed = num_args < internal::max_packed_args; - - using value_type = conditional_t, - basic_format_arg>; - - // If the arguments are not packed, add one more element to mark the end. - value_type data_[num_args + (num_args == 0 ? 1 : 0)]; - - friend class basic_format_args; - - public: - static constexpr unsigned long long types = - is_packed ? internal::encode_types() - : internal::is_unpacked_bit | num_args; - - format_arg_store(const Args&... args) - : -#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 - basic_format_args(*this), -#endif - data_{internal::make_arg< - is_packed, Context, - internal::mapped_type_constant::value>(args)...} { - } -}; - -/** - \rst - Constructs an `~fmt::format_arg_store` object that contains references to - arguments and can be implicitly converted to `~fmt::format_args`. `Context` - can be omitted in which case it defaults to `~fmt::context`. - See `~fmt::arg` for lifetime considerations. - \endrst - */ -template -inline format_arg_store make_format_args( - const Args&... args) { - return {args...}; -} - -/** - \rst - A dynamic version of `fmt::format_arg_store<>`. - It's equipped with a storage to potentially temporary objects which lifetime - could be shorter than the format arguments object. - - It can be implicitly converted into `~fmt::basic_format_args` for passing - into type-erased formatting functions such as `~fmt::vformat`. - \endrst - */ -template -class dynamic_format_arg_store -#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 - // Workaround a GCC template argument substitution bug. - : public basic_format_args -#endif -{ - private: - using char_type = typename Context::char_type; - - template struct need_copy { - static constexpr internal::type mapped_type = - internal::mapped_type_constant::value; - - enum { - value = !(internal::is_reference_wrapper::value || - std::is_same>::value || - std::is_same>::value || - (mapped_type != internal::type::cstring_type && - mapped_type != internal::type::string_type && - mapped_type != internal::type::custom_type && - mapped_type != internal::type::named_arg_type)) - }; - }; - - template - using stored_type = conditional_t::value, - std::basic_string, T>; - - // Storage of basic_format_arg must be contiguous. - std::vector> data_; - - // Storage of arguments not fitting into basic_format_arg must grow - // without relocation because items in data_ refer to it. - internal::dynamic_arg_list dynamic_args_; - - friend class basic_format_args; - - unsigned long long get_types() const { - return internal::is_unpacked_bit | data_.size(); - } - - template void emplace_arg(const T& arg) { - data_.emplace_back(internal::make_arg(arg)); - } - - public: - /** - \rst - Adds an argument into the dynamic store for later passing to a formating - function. - - Note that custom types and string types (but not string views!) are copied - into the store with dynamic memory (in addition to resizing vector). - - **Example**:: - - fmt::dynamic_format_arg_store store; - store.push_back(42); - store.push_back("abc"); - store.push_back(1.5f); - std::string result = fmt::vformat("{} and {} and {}", store); - \endrst - */ - template void push_back(const T& arg) { - static_assert( - !std::is_base_of, T>::value, - "named arguments are not supported yet"); - if (internal::const_check(need_copy::value)) - emplace_arg(dynamic_args_.push>(arg)); - else - emplace_arg(arg); - } - - /** - Adds a reference to the argument into the dynamic store for later passing to - a formating function. - */ - template void push_back(std::reference_wrapper arg) { - static_assert( - need_copy::value, - "objects of built-in types and string views are always copied"); - emplace_arg(arg.get()); - } -}; - -/** - \rst - A view of a collection of formatting arguments. To avoid lifetime issues it - should only be used as a parameter type in type-erased functions such as - ``vformat``:: - - void vlog(string_view format_str, format_args args); // OK - format_args args = make_format_args(42); // Error: dangling reference - \endrst - */ -template class basic_format_args { - public: - using size_type = int; - using format_arg = basic_format_arg; - - private: - // To reduce compiled code size per formatting function call, types of first - // max_packed_args arguments are passed in the types_ field. - unsigned long long types_; - union { - // If the number of arguments is less than max_packed_args, the argument - // values are stored in values_, otherwise they are stored in args_. - // This is done to reduce compiled code size as storing larger objects - // may require more code (at least on x86-64) even if the same amount of - // data is actually copied to stack. It saves ~10% on the bloat test. - const internal::value* values_; - const format_arg* args_; - }; - - bool is_packed() const { return (types_ & internal::is_unpacked_bit) == 0; } - - internal::type type(int index) const { - int shift = index * internal::packed_arg_bits; - unsigned int mask = (1 << internal::packed_arg_bits) - 1; - return static_cast((types_ >> shift) & mask); - } - - friend class internal::arg_map; - - void set_data(const internal::value* values) { values_ = values; } - void set_data(const format_arg* args) { args_ = args; } - - format_arg do_get(int index) const { - format_arg arg; - if (!is_packed()) { - auto num_args = max_size(); - if (index < num_args) arg = args_[index]; - return arg; - } - if (index > internal::max_packed_args) return arg; - arg.type_ = type(index); - if (arg.type_ == internal::type::none_type) return arg; - internal::value& val = arg.value_; - val = values_[index]; - return arg; - } - - public: - basic_format_args() : types_(0) {} - - /** - \rst - Constructs a `basic_format_args` object from `~fmt::format_arg_store`. - \endrst - */ - template - basic_format_args(const format_arg_store& store) - : types_(store.types) { - set_data(store.data_); - } - - /** - \rst - Constructs a `basic_format_args` object from - `~fmt::dynamic_format_arg_store`. - \endrst - */ - basic_format_args(const dynamic_format_arg_store& store) - : types_(store.get_types()) { - set_data(store.data_.data()); - } - - /** - \rst - Constructs a `basic_format_args` object from a dynamic set of arguments. - \endrst - */ - basic_format_args(const format_arg* args, int count) - : types_(internal::is_unpacked_bit | internal::to_unsigned(count)) { - set_data(args); - } - - /** Returns the argument at specified index. */ - format_arg get(int index) const { - format_arg arg = do_get(index); - if (arg.type_ == internal::type::named_arg_type) - arg = arg.value_.named_arg->template deserialize(); - return arg; - } - - int max_size() const { - unsigned long long max_packed = internal::max_packed_args; - return static_cast(is_packed() ? max_packed - : types_ & ~internal::is_unpacked_bit); - } -}; - -/** An alias to ``basic_format_args``. */ -// It is a separate type rather than an alias to make symbols readable. -struct format_args : basic_format_args { - template - format_args(Args&&... args) - : basic_format_args(static_cast(args)...) {} -}; -struct wformat_args : basic_format_args { - template - wformat_args(Args&&... args) - : basic_format_args(static_cast(args)...) {} -}; - -template struct is_contiguous : std::false_type {}; - -template -struct is_contiguous> : std::true_type {}; - -template -struct is_contiguous> : std::true_type {}; - -namespace internal { - -template -struct is_contiguous_back_insert_iterator : std::false_type {}; -template -struct is_contiguous_back_insert_iterator> - : is_contiguous {}; - -template struct named_arg_base { - basic_string_view name; - - // Serialized value. - mutable char data[sizeof(basic_format_arg>)]; - - named_arg_base(basic_string_view nm) : name(nm) {} - - template basic_format_arg deserialize() const { - basic_format_arg arg; - std::memcpy(&arg, data, sizeof(basic_format_arg)); - return arg; - } -}; - -struct view {}; - -template -struct named_arg : view, named_arg_base { - const T& value; - - named_arg(basic_string_view name, const T& val) - : named_arg_base(name), value(val) {} -}; - -template ::value)> -inline void check_format_string(const S&) { -#if defined(FMT_ENFORCE_COMPILE_STRING) - static_assert(is_compile_string::value, - "FMT_ENFORCE_COMPILE_STRING requires all format strings to " - "utilize FMT_STRING() or fmt()."); -#endif -} -template ::value)> -void check_format_string(S); - -template struct bool_pack; -template -using all_true = - std::is_same, bool_pack>; - -template > -inline format_arg_store, remove_reference_t...> -make_args_checked(const S& format_str, - const remove_reference_t&... args) { - static_assert( - all_true<(!std::is_base_of>::value || - !std::is_reference::value)...>::value, - "passing views as lvalues is disallowed"); - check_format_string(format_str); - return {args...}; -} - -template -std::basic_string vformat( - basic_string_view format_str, - basic_format_args>> args); - -template -typename buffer_context::iterator vformat_to( - buffer& buf, basic_string_view format_str, - basic_format_args>> args); - -template ::value)> -inline void vprint_mojibake(std::FILE*, basic_string_view, const Args&) {} - -FMT_API void vprint_mojibake(std::FILE*, string_view, format_args); -#ifndef _WIN32 -inline void vprint_mojibake(std::FILE*, string_view, format_args) {} -#endif -} // namespace internal - -/** - \rst - Returns a named argument to be used in a formatting function. It should only - be used in a call to a formatting function. - - **Example**:: - - fmt::print("Elapsed time: {s:.2f} seconds", fmt::arg("s", 1.23)); - \endrst - */ -template > -inline internal::named_arg arg(const S& name, const T& arg) { - static_assert(internal::is_string::value, ""); - return {name, arg}; -} - -// Disable nested named arguments, e.g. ``arg("a", arg("b", 42))``. -template -void arg(S, internal::named_arg) = delete; - -/** Formats a string and writes the output to ``out``. */ -// GCC 8 and earlier cannot handle std::back_insert_iterator with -// vformat_to(...) overload, so SFINAE on iterator type instead. -template , - FMT_ENABLE_IF( - internal::is_contiguous_back_insert_iterator::value)> -OutputIt vformat_to( - OutputIt out, const S& format_str, - basic_format_args>> args) { - using container = remove_reference_t; - internal::container_buffer buf((internal::get_container(out))); - internal::vformat_to(buf, to_string_view(format_str), args); - return out; -} - -template ::value&& internal::is_string::value)> -inline std::back_insert_iterator format_to( - std::back_insert_iterator out, const S& format_str, - Args&&... args) { - return vformat_to(out, to_string_view(format_str), - internal::make_args_checked(format_str, args...)); -} - -template > -inline std::basic_string vformat( - const S& format_str, - basic_format_args>> args) { - return internal::vformat(to_string_view(format_str), args); -} - -/** - \rst - Formats arguments and returns the result as a string. - - **Example**:: - - #include - std::string message = fmt::format("The answer is {}", 42); - \endrst -*/ -// Pass char_t as a default template parameter instead of using -// std::basic_string> to reduce the symbol size. -template > -inline std::basic_string format(const S& format_str, Args&&... args) { - return internal::vformat( - to_string_view(format_str), - internal::make_args_checked(format_str, args...)); -} - -FMT_API void vprint(string_view, format_args); -FMT_API void vprint(std::FILE*, string_view, format_args); - -/** - \rst - Formats ``args`` according to specifications in ``format_str`` and writes the - output to the file ``f``. Strings are assumed to be Unicode-encoded unless the - ``FMT_UNICODE`` macro is set to 0. - - **Example**:: - - fmt::print(stderr, "Don't {}!", "panic"); - \endrst - */ -template > -inline void print(std::FILE* f, const S& format_str, Args&&... args) { - return internal::is_unicode() - ? vprint(f, to_string_view(format_str), - internal::make_args_checked(format_str, args...)) - : internal::vprint_mojibake( - f, to_string_view(format_str), - internal::make_args_checked(format_str, args...)); -} - -/** - \rst - Formats ``args`` according to specifications in ``format_str`` and writes - the output to ``stdout``. Strings are assumed to be Unicode-encoded unless - the ``FMT_UNICODE`` macro is set to 0. - - **Example**:: - - fmt::print("Elapsed time: {0:.2f} seconds", 1.23); - \endrst - */ -template > -inline void print(const S& format_str, Args&&... args) { - return internal::is_unicode() - ? vprint(to_string_view(format_str), - internal::make_args_checked(format_str, args...)) - : internal::vprint_mojibake( - stdout, to_string_view(format_str), - internal::make_args_checked(format_str, args...)); -} -FMT_END_NAMESPACE - -#endif // FMT_CORE_H_ diff --git a/kernel_generator/third_party/fmt/include/fmt/format-inl.h b/kernel_generator/third_party/fmt/include/fmt/format-inl.h deleted file mode 100644 index f632714..0000000 --- a/kernel_generator/third_party/fmt/include/fmt/format-inl.h +++ /dev/null @@ -1,1403 +0,0 @@ -// Formatting library for C++ - implementation -// -// Copyright (c) 2012 - 2016, Victor Zverovich -// All rights reserved. -// -// For the license information refer to format.h. - -#ifndef FMT_FORMAT_INL_H_ -#define FMT_FORMAT_INL_H_ - -#include -#include -#include -#include -#include -#include // for std::memmove -#include - -#include "format.h" -#if !defined(FMT_STATIC_THOUSANDS_SEPARATOR) -# include -#endif - -#ifdef _WIN32 -# include -# include -#endif - -#ifdef _MSC_VER -# pragma warning(push) -# pragma warning(disable : 4702) // unreachable code -#endif - -// Dummy implementations of strerror_r and strerror_s called if corresponding -// system functions are not available. -inline fmt::internal::null<> strerror_r(int, char*, ...) { return {}; } -inline fmt::internal::null<> strerror_s(char*, std::size_t, ...) { return {}; } - -FMT_BEGIN_NAMESPACE -namespace internal { - -FMT_FUNC void assert_fail(const char* file, int line, const char* message) { - print(stderr, "{}:{}: assertion failed: {}", file, line, message); - std::abort(); -} - -#ifndef _MSC_VER -# define FMT_SNPRINTF snprintf -#else // _MSC_VER -inline int fmt_snprintf(char* buffer, size_t size, const char* format, ...) { - va_list args; - va_start(args, format); - int result = vsnprintf_s(buffer, size, _TRUNCATE, format, args); - va_end(args); - return result; -} -# define FMT_SNPRINTF fmt_snprintf -#endif // _MSC_VER - -// A portable thread-safe version of strerror. -// Sets buffer to point to a string describing the error code. -// This can be either a pointer to a string stored in buffer, -// or a pointer to some static immutable string. -// Returns one of the following values: -// 0 - success -// ERANGE - buffer is not large enough to store the error message -// other - failure -// Buffer should be at least of size 1. -FMT_FUNC int safe_strerror(int error_code, char*& buffer, - std::size_t buffer_size) FMT_NOEXCEPT { - FMT_ASSERT(buffer != nullptr && buffer_size != 0, "invalid buffer"); - - class dispatcher { - private: - int error_code_; - char*& buffer_; - std::size_t buffer_size_; - - // A noop assignment operator to avoid bogus warnings. - void operator=(const dispatcher&) {} - - // Handle the result of XSI-compliant version of strerror_r. - int handle(int result) { - // glibc versions before 2.13 return result in errno. - return result == -1 ? errno : result; - } - - // Handle the result of GNU-specific version of strerror_r. - FMT_MAYBE_UNUSED - int handle(char* message) { - // If the buffer is full then the message is probably truncated. - if (message == buffer_ && strlen(buffer_) == buffer_size_ - 1) - return ERANGE; - buffer_ = message; - return 0; - } - - // Handle the case when strerror_r is not available. - FMT_MAYBE_UNUSED - int handle(internal::null<>) { - return fallback(strerror_s(buffer_, buffer_size_, error_code_)); - } - - // Fallback to strerror_s when strerror_r is not available. - FMT_MAYBE_UNUSED - int fallback(int result) { - // If the buffer is full then the message is probably truncated. - return result == 0 && strlen(buffer_) == buffer_size_ - 1 ? ERANGE - : result; - } - -#if !FMT_MSC_VER - // Fallback to strerror if strerror_r and strerror_s are not available. - int fallback(internal::null<>) { - errno = 0; - buffer_ = strerror(error_code_); - return errno; - } -#endif - - public: - dispatcher(int err_code, char*& buf, std::size_t buf_size) - : error_code_(err_code), buffer_(buf), buffer_size_(buf_size) {} - - int run() { return handle(strerror_r(error_code_, buffer_, buffer_size_)); } - }; - return dispatcher(error_code, buffer, buffer_size).run(); -} - -FMT_FUNC void format_error_code(internal::buffer& out, int error_code, - string_view message) FMT_NOEXCEPT { - // Report error code making sure that the output fits into - // inline_buffer_size to avoid dynamic memory allocation and potential - // bad_alloc. - out.resize(0); - static const char SEP[] = ": "; - static const char ERROR_STR[] = "error "; - // Subtract 2 to account for terminating null characters in SEP and ERROR_STR. - std::size_t error_code_size = sizeof(SEP) + sizeof(ERROR_STR) - 2; - auto abs_value = static_cast>(error_code); - if (internal::is_negative(error_code)) { - abs_value = 0 - abs_value; - ++error_code_size; - } - error_code_size += internal::to_unsigned(internal::count_digits(abs_value)); - internal::writer w(out); - if (message.size() <= inline_buffer_size - error_code_size) { - w.write(message); - w.write(SEP); - } - w.write(ERROR_STR); - w.write(error_code); - assert(out.size() <= inline_buffer_size); -} - -FMT_FUNC void report_error(format_func func, int error_code, - string_view message) FMT_NOEXCEPT { - memory_buffer full_message; - func(full_message, error_code, message); - // Don't use fwrite_fully because the latter may throw. - (void)std::fwrite(full_message.data(), full_message.size(), 1, stderr); - std::fputc('\n', stderr); -} - -// A wrapper around fwrite that throws on error. -FMT_FUNC void fwrite_fully(const void* ptr, size_t size, size_t count, - FILE* stream) { - size_t written = std::fwrite(ptr, size, count, stream); - if (written < count) FMT_THROW(system_error(errno, "cannot write to file")); -} -} // namespace internal - -#if !defined(FMT_STATIC_THOUSANDS_SEPARATOR) -namespace internal { - -template -locale_ref::locale_ref(const Locale& loc) : locale_(&loc) { - static_assert(std::is_same::value, ""); -} - -template Locale locale_ref::get() const { - static_assert(std::is_same::value, ""); - return locale_ ? *static_cast(locale_) : std::locale(); -} - -template FMT_FUNC std::string grouping_impl(locale_ref loc) { - return std::use_facet>(loc.get()).grouping(); -} -template FMT_FUNC Char thousands_sep_impl(locale_ref loc) { - return std::use_facet>(loc.get()) - .thousands_sep(); -} -template FMT_FUNC Char decimal_point_impl(locale_ref loc) { - return std::use_facet>(loc.get()) - .decimal_point(); -} -} // namespace internal -#else -template -FMT_FUNC std::string internal::grouping_impl(locale_ref) { - return "\03"; -} -template -FMT_FUNC Char internal::thousands_sep_impl(locale_ref) { - return FMT_STATIC_THOUSANDS_SEPARATOR; -} -template -FMT_FUNC Char internal::decimal_point_impl(locale_ref) { - return '.'; -} -#endif - -FMT_API FMT_FUNC format_error::~format_error() FMT_NOEXCEPT = default; -FMT_API FMT_FUNC system_error::~system_error() FMT_NOEXCEPT = default; - -FMT_FUNC void system_error::init(int err_code, string_view format_str, - format_args args) { - error_code_ = err_code; - memory_buffer buffer; - format_system_error(buffer, err_code, vformat(format_str, args)); - std::runtime_error& base = *this; - base = std::runtime_error(to_string(buffer)); -} - -namespace internal { - -template <> FMT_FUNC int count_digits<4>(internal::fallback_uintptr n) { - // fallback_uintptr is always stored in little endian. - int i = static_cast(sizeof(void*)) - 1; - while (i > 0 && n.value[i] == 0) --i; - auto char_digits = std::numeric_limits::digits / 4; - return i >= 0 ? i * char_digits + count_digits<4, unsigned>(n.value[i]) : 1; -} - -template -const char basic_data::digits[] = - "0001020304050607080910111213141516171819" - "2021222324252627282930313233343536373839" - "4041424344454647484950515253545556575859" - "6061626364656667686970717273747576777879" - "8081828384858687888990919293949596979899"; - -template -const char basic_data::hex_digits[] = "0123456789abcdef"; - -#define FMT_POWERS_OF_10(factor) \ - factor * 10, (factor)*100, (factor)*1000, (factor)*10000, (factor)*100000, \ - (factor)*1000000, (factor)*10000000, (factor)*100000000, \ - (factor)*1000000000 - -template -const uint64_t basic_data::powers_of_10_64[] = { - 1, FMT_POWERS_OF_10(1), FMT_POWERS_OF_10(1000000000ULL), - 10000000000000000000ULL}; - -template -const uint32_t basic_data::zero_or_powers_of_10_32[] = {0, - FMT_POWERS_OF_10(1)}; - -template -const uint64_t basic_data::zero_or_powers_of_10_64[] = { - 0, FMT_POWERS_OF_10(1), FMT_POWERS_OF_10(1000000000ULL), - 10000000000000000000ULL}; - -// Normalized 64-bit significands of pow(10, k), for k = -348, -340, ..., 340. -// These are generated by support/compute-powers.py. -template -const uint64_t basic_data::pow10_significands[] = { - 0xfa8fd5a0081c0288, 0xbaaee17fa23ebf76, 0x8b16fb203055ac76, - 0xcf42894a5dce35ea, 0x9a6bb0aa55653b2d, 0xe61acf033d1a45df, - 0xab70fe17c79ac6ca, 0xff77b1fcbebcdc4f, 0xbe5691ef416bd60c, - 0x8dd01fad907ffc3c, 0xd3515c2831559a83, 0x9d71ac8fada6c9b5, - 0xea9c227723ee8bcb, 0xaecc49914078536d, 0x823c12795db6ce57, - 0xc21094364dfb5637, 0x9096ea6f3848984f, 0xd77485cb25823ac7, - 0xa086cfcd97bf97f4, 0xef340a98172aace5, 0xb23867fb2a35b28e, - 0x84c8d4dfd2c63f3b, 0xc5dd44271ad3cdba, 0x936b9fcebb25c996, - 0xdbac6c247d62a584, 0xa3ab66580d5fdaf6, 0xf3e2f893dec3f126, - 0xb5b5ada8aaff80b8, 0x87625f056c7c4a8b, 0xc9bcff6034c13053, - 0x964e858c91ba2655, 0xdff9772470297ebd, 0xa6dfbd9fb8e5b88f, - 0xf8a95fcf88747d94, 0xb94470938fa89bcf, 0x8a08f0f8bf0f156b, - 0xcdb02555653131b6, 0x993fe2c6d07b7fac, 0xe45c10c42a2b3b06, - 0xaa242499697392d3, 0xfd87b5f28300ca0e, 0xbce5086492111aeb, - 0x8cbccc096f5088cc, 0xd1b71758e219652c, 0x9c40000000000000, - 0xe8d4a51000000000, 0xad78ebc5ac620000, 0x813f3978f8940984, - 0xc097ce7bc90715b3, 0x8f7e32ce7bea5c70, 0xd5d238a4abe98068, - 0x9f4f2726179a2245, 0xed63a231d4c4fb27, 0xb0de65388cc8ada8, - 0x83c7088e1aab65db, 0xc45d1df942711d9a, 0x924d692ca61be758, - 0xda01ee641a708dea, 0xa26da3999aef774a, 0xf209787bb47d6b85, - 0xb454e4a179dd1877, 0x865b86925b9bc5c2, 0xc83553c5c8965d3d, - 0x952ab45cfa97a0b3, 0xde469fbd99a05fe3, 0xa59bc234db398c25, - 0xf6c69a72a3989f5c, 0xb7dcbf5354e9bece, 0x88fcf317f22241e2, - 0xcc20ce9bd35c78a5, 0x98165af37b2153df, 0xe2a0b5dc971f303a, - 0xa8d9d1535ce3b396, 0xfb9b7cd9a4a7443c, 0xbb764c4ca7a44410, - 0x8bab8eefb6409c1a, 0xd01fef10a657842c, 0x9b10a4e5e9913129, - 0xe7109bfba19c0c9d, 0xac2820d9623bf429, 0x80444b5e7aa7cf85, - 0xbf21e44003acdd2d, 0x8e679c2f5e44ff8f, 0xd433179d9c8cb841, - 0x9e19db92b4e31ba9, 0xeb96bf6ebadf77d9, 0xaf87023b9bf0ee6b, -}; - -// Binary exponents of pow(10, k), for k = -348, -340, ..., 340, corresponding -// to significands above. -template -const int16_t basic_data::pow10_exponents[] = { - -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, -954, - -927, -901, -874, -847, -821, -794, -768, -741, -715, -688, -661, - -635, -608, -582, -555, -529, -502, -475, -449, -422, -396, -369, - -343, -316, -289, -263, -236, -210, -183, -157, -130, -103, -77, - -50, -24, 3, 30, 56, 83, 109, 136, 162, 189, 216, - 242, 269, 295, 322, 348, 375, 402, 428, 455, 481, 508, - 534, 561, 588, 614, 641, 667, 694, 720, 747, 774, 800, - 827, 853, 880, 907, 933, 960, 986, 1013, 1039, 1066}; - -template -const char basic_data::foreground_color[] = "\x1b[38;2;"; -template -const char basic_data::background_color[] = "\x1b[48;2;"; -template const char basic_data::reset_color[] = "\x1b[0m"; -template const wchar_t basic_data::wreset_color[] = L"\x1b[0m"; -template const char basic_data::signs[] = {0, '-', '+', ' '}; - -template struct bits { - static FMT_CONSTEXPR_DECL const int value = - static_cast(sizeof(T) * std::numeric_limits::digits); -}; - -class fp; -template fp normalize(fp value); - -// Lower (upper) boundary is a value half way between a floating-point value -// and its predecessor (successor). Boundaries have the same exponent as the -// value so only significands are stored. -struct boundaries { - uint64_t lower; - uint64_t upper; -}; - -// A handmade floating-point number f * pow(2, e). -class fp { - private: - using significand_type = uint64_t; - - public: - significand_type f; - int e; - - // All sizes are in bits. - // Subtract 1 to account for an implicit most significant bit in the - // normalized form. - static FMT_CONSTEXPR_DECL const int double_significand_size = - std::numeric_limits::digits - 1; - static FMT_CONSTEXPR_DECL const uint64_t implicit_bit = - 1ULL << double_significand_size; - static FMT_CONSTEXPR_DECL const int significand_size = - bits::value; - - fp() : f(0), e(0) {} - fp(uint64_t f_val, int e_val) : f(f_val), e(e_val) {} - - // Constructs fp from an IEEE754 double. It is a template to prevent compile - // errors on platforms where double is not IEEE754. - template explicit fp(Double d) { assign(d); } - - // Assigns d to this and return true iff predecessor is closer than successor. - template - bool assign(Double d) { - // Assume double is in the format [sign][exponent][significand]. - using limits = std::numeric_limits; - const int exponent_size = - bits::value - double_significand_size - 1; // -1 for sign - const uint64_t significand_mask = implicit_bit - 1; - const uint64_t exponent_mask = (~0ULL >> 1) & ~significand_mask; - const int exponent_bias = (1 << exponent_size) - limits::max_exponent - 1; - auto u = bit_cast(d); - f = u & significand_mask; - int biased_e = - static_cast((u & exponent_mask) >> double_significand_size); - // Predecessor is closer if d is a normalized power of 2 (f == 0) other than - // the smallest normalized number (biased_e > 1). - bool is_predecessor_closer = f == 0 && biased_e > 1; - if (biased_e != 0) - f += implicit_bit; - else - biased_e = 1; // Subnormals use biased exponent 1 (min exponent). - e = biased_e - exponent_bias - double_significand_size; - return is_predecessor_closer; - } - - template - bool assign(Double) { - *this = fp(); - return false; - } - - // Assigns d to this together with computing lower and upper boundaries, - // where a boundary is a value half way between the number and its predecessor - // (lower) or successor (upper). The upper boundary is normalized and lower - // has the same exponent but may be not normalized. - template boundaries assign_with_boundaries(Double d) { - bool is_lower_closer = assign(d); - fp lower = - is_lower_closer ? fp((f << 2) - 1, e - 2) : fp((f << 1) - 1, e - 1); - // 1 in normalize accounts for the exponent shift above. - fp upper = normalize<1>(fp((f << 1) + 1, e - 1)); - lower.f <<= lower.e - upper.e; - return boundaries{lower.f, upper.f}; - } - - template boundaries assign_float_with_boundaries(Double d) { - assign(d); - constexpr int min_normal_e = std::numeric_limits::min_exponent - - std::numeric_limits::digits; - significand_type half_ulp = 1 << (std::numeric_limits::digits - - std::numeric_limits::digits - 1); - if (min_normal_e > e) half_ulp <<= min_normal_e - e; - fp upper = normalize<0>(fp(f + half_ulp, e)); - fp lower = fp( - f - (half_ulp >> ((f == implicit_bit && e > min_normal_e) ? 1 : 0)), e); - lower.f <<= lower.e - upper.e; - return boundaries{lower.f, upper.f}; - } -}; - -// Normalizes the value converted from double and multiplied by (1 << SHIFT). -template fp normalize(fp value) { - // Handle subnormals. - const auto shifted_implicit_bit = fp::implicit_bit << SHIFT; - while ((value.f & shifted_implicit_bit) == 0) { - value.f <<= 1; - --value.e; - } - // Subtract 1 to account for hidden bit. - const auto offset = - fp::significand_size - fp::double_significand_size - SHIFT - 1; - value.f <<= offset; - value.e -= offset; - return value; -} - -inline bool operator==(fp x, fp y) { return x.f == y.f && x.e == y.e; } - -// Computes lhs * rhs / pow(2, 64) rounded to nearest with half-up tie breaking. -inline uint64_t multiply(uint64_t lhs, uint64_t rhs) { -#if FMT_USE_INT128 - auto product = static_cast<__uint128_t>(lhs) * rhs; - auto f = static_cast(product >> 64); - return (static_cast(product) & (1ULL << 63)) != 0 ? f + 1 : f; -#else - // Multiply 32-bit parts of significands. - uint64_t mask = (1ULL << 32) - 1; - uint64_t a = lhs >> 32, b = lhs & mask; - uint64_t c = rhs >> 32, d = rhs & mask; - uint64_t ac = a * c, bc = b * c, ad = a * d, bd = b * d; - // Compute mid 64-bit of result and round. - uint64_t mid = (bd >> 32) + (ad & mask) + (bc & mask) + (1U << 31); - return ac + (ad >> 32) + (bc >> 32) + (mid >> 32); -#endif -} - -inline fp operator*(fp x, fp y) { return {multiply(x.f, y.f), x.e + y.e + 64}; } - -// Returns a cached power of 10 `c_k = c_k.f * pow(2, c_k.e)` such that its -// (binary) exponent satisfies `min_exponent <= c_k.e <= min_exponent + 28`. -inline fp get_cached_power(int min_exponent, int& pow10_exponent) { - const int64_t one_over_log2_10 = 0x4d104d42; // round(pow(2, 32) / log2(10)) - int index = static_cast( - ((min_exponent + fp::significand_size - 1) * one_over_log2_10 + - ((int64_t(1) << 32) - 1)) // ceil - >> 32 // arithmetic shift - ); - // Decimal exponent of the first (smallest) cached power of 10. - const int first_dec_exp = -348; - // Difference between 2 consecutive decimal exponents in cached powers of 10. - const int dec_exp_step = 8; - index = (index - first_dec_exp - 1) / dec_exp_step + 1; - pow10_exponent = first_dec_exp + index * dec_exp_step; - return {data::pow10_significands[index], data::pow10_exponents[index]}; -} - -// A simple accumulator to hold the sums of terms in bigint::square if uint128_t -// is not available. -struct accumulator { - uint64_t lower; - uint64_t upper; - - accumulator() : lower(0), upper(0) {} - explicit operator uint32_t() const { return static_cast(lower); } - - void operator+=(uint64_t n) { - lower += n; - if (lower < n) ++upper; - } - void operator>>=(int shift) { - assert(shift == 32); - (void)shift; - lower = (upper << 32) | (lower >> 32); - upper >>= 32; - } -}; - -class bigint { - private: - // A bigint is stored as an array of bigits (big digits), with bigit at index - // 0 being the least significant one. - using bigit = uint32_t; - using double_bigit = uint64_t; - enum { bigits_capacity = 32 }; - basic_memory_buffer bigits_; - int exp_; - - bigit operator[](int index) const { return bigits_[to_unsigned(index)]; } - bigit& operator[](int index) { return bigits_[to_unsigned(index)]; } - - static FMT_CONSTEXPR_DECL const int bigit_bits = bits::value; - - friend struct formatter; - - void subtract_bigits(int index, bigit other, bigit& borrow) { - auto result = static_cast((*this)[index]) - other - borrow; - (*this)[index] = static_cast(result); - borrow = static_cast(result >> (bigit_bits * 2 - 1)); - } - - void remove_leading_zeros() { - int num_bigits = static_cast(bigits_.size()) - 1; - while (num_bigits > 0 && (*this)[num_bigits] == 0) --num_bigits; - bigits_.resize(to_unsigned(num_bigits + 1)); - } - - // Computes *this -= other assuming aligned bigints and *this >= other. - void subtract_aligned(const bigint& other) { - FMT_ASSERT(other.exp_ >= exp_, "unaligned bigints"); - FMT_ASSERT(compare(*this, other) >= 0, ""); - bigit borrow = 0; - int i = other.exp_ - exp_; - for (size_t j = 0, n = other.bigits_.size(); j != n; ++i, ++j) { - subtract_bigits(i, other.bigits_[j], borrow); - } - while (borrow > 0) subtract_bigits(i, 0, borrow); - remove_leading_zeros(); - } - - void multiply(uint32_t value) { - const double_bigit wide_value = value; - bigit carry = 0; - for (size_t i = 0, n = bigits_.size(); i < n; ++i) { - double_bigit result = bigits_[i] * wide_value + carry; - bigits_[i] = static_cast(result); - carry = static_cast(result >> bigit_bits); - } - if (carry != 0) bigits_.push_back(carry); - } - - void multiply(uint64_t value) { - const bigit mask = ~bigit(0); - const double_bigit lower = value & mask; - const double_bigit upper = value >> bigit_bits; - double_bigit carry = 0; - for (size_t i = 0, n = bigits_.size(); i < n; ++i) { - double_bigit result = bigits_[i] * lower + (carry & mask); - carry = - bigits_[i] * upper + (result >> bigit_bits) + (carry >> bigit_bits); - bigits_[i] = static_cast(result); - } - while (carry != 0) { - bigits_.push_back(carry & mask); - carry >>= bigit_bits; - } - } - - public: - bigint() : exp_(0) {} - explicit bigint(uint64_t n) { assign(n); } - ~bigint() { assert(bigits_.capacity() <= bigits_capacity); } - - bigint(const bigint&) = delete; - void operator=(const bigint&) = delete; - - void assign(const bigint& other) { - bigits_.resize(other.bigits_.size()); - auto data = other.bigits_.data(); - std::copy(data, data + other.bigits_.size(), bigits_.data()); - exp_ = other.exp_; - } - - void assign(uint64_t n) { - size_t num_bigits = 0; - do { - bigits_[num_bigits++] = n & ~bigit(0); - n >>= bigit_bits; - } while (n != 0); - bigits_.resize(num_bigits); - exp_ = 0; - } - - int num_bigits() const { return static_cast(bigits_.size()) + exp_; } - - bigint& operator<<=(int shift) { - assert(shift >= 0); - exp_ += shift / bigit_bits; - shift %= bigit_bits; - if (shift == 0) return *this; - bigit carry = 0; - for (size_t i = 0, n = bigits_.size(); i < n; ++i) { - bigit c = bigits_[i] >> (bigit_bits - shift); - bigits_[i] = (bigits_[i] << shift) + carry; - carry = c; - } - if (carry != 0) bigits_.push_back(carry); - return *this; - } - - template bigint& operator*=(Int value) { - FMT_ASSERT(value > 0, ""); - multiply(uint32_or_64_or_128_t(value)); - return *this; - } - - friend int compare(const bigint& lhs, const bigint& rhs) { - int num_lhs_bigits = lhs.num_bigits(), num_rhs_bigits = rhs.num_bigits(); - if (num_lhs_bigits != num_rhs_bigits) - return num_lhs_bigits > num_rhs_bigits ? 1 : -1; - int i = static_cast(lhs.bigits_.size()) - 1; - int j = static_cast(rhs.bigits_.size()) - 1; - int end = i - j; - if (end < 0) end = 0; - for (; i >= end; --i, --j) { - bigit lhs_bigit = lhs[i], rhs_bigit = rhs[j]; - if (lhs_bigit != rhs_bigit) return lhs_bigit > rhs_bigit ? 1 : -1; - } - if (i != j) return i > j ? 1 : -1; - return 0; - } - - // Returns compare(lhs1 + lhs2, rhs). - friend int add_compare(const bigint& lhs1, const bigint& lhs2, - const bigint& rhs) { - int max_lhs_bigits = (std::max)(lhs1.num_bigits(), lhs2.num_bigits()); - int num_rhs_bigits = rhs.num_bigits(); - if (max_lhs_bigits + 1 < num_rhs_bigits) return -1; - if (max_lhs_bigits > num_rhs_bigits) return 1; - auto get_bigit = [](const bigint& n, int i) -> bigit { - return i >= n.exp_ && i < n.num_bigits() ? n[i - n.exp_] : 0; - }; - double_bigit borrow = 0; - int min_exp = (std::min)((std::min)(lhs1.exp_, lhs2.exp_), rhs.exp_); - for (int i = num_rhs_bigits - 1; i >= min_exp; --i) { - double_bigit sum = - static_cast(get_bigit(lhs1, i)) + get_bigit(lhs2, i); - bigit rhs_bigit = get_bigit(rhs, i); - if (sum > rhs_bigit + borrow) return 1; - borrow = rhs_bigit + borrow - sum; - if (borrow > 1) return -1; - borrow <<= bigit_bits; - } - return borrow != 0 ? -1 : 0; - } - - // Assigns pow(10, exp) to this bigint. - void assign_pow10(int exp) { - assert(exp >= 0); - if (exp == 0) return assign(1); - // Find the top bit. - int bitmask = 1; - while (exp >= bitmask) bitmask <<= 1; - bitmask >>= 1; - // pow(10, exp) = pow(5, exp) * pow(2, exp). First compute pow(5, exp) by - // repeated squaring and multiplication. - assign(5); - bitmask >>= 1; - while (bitmask != 0) { - square(); - if ((exp & bitmask) != 0) *this *= 5; - bitmask >>= 1; - } - *this <<= exp; // Multiply by pow(2, exp) by shifting. - } - - void square() { - basic_memory_buffer n(std::move(bigits_)); - int num_bigits = static_cast(bigits_.size()); - int num_result_bigits = 2 * num_bigits; - bigits_.resize(to_unsigned(num_result_bigits)); - using accumulator_t = conditional_t; - auto sum = accumulator_t(); - for (int bigit_index = 0; bigit_index < num_bigits; ++bigit_index) { - // Compute bigit at position bigit_index of the result by adding - // cross-product terms n[i] * n[j] such that i + j == bigit_index. - for (int i = 0, j = bigit_index; j >= 0; ++i, --j) { - // Most terms are multiplied twice which can be optimized in the future. - sum += static_cast(n[i]) * n[j]; - } - (*this)[bigit_index] = static_cast(sum); - sum >>= bits::value; // Compute the carry. - } - // Do the same for the top half. - for (int bigit_index = num_bigits; bigit_index < num_result_bigits; - ++bigit_index) { - for (int j = num_bigits - 1, i = bigit_index - j; i < num_bigits;) - sum += static_cast(n[i++]) * n[j--]; - (*this)[bigit_index] = static_cast(sum); - sum >>= bits::value; - } - --num_result_bigits; - remove_leading_zeros(); - exp_ *= 2; - } - - // Divides this bignum by divisor, assigning the remainder to this and - // returning the quotient. - int divmod_assign(const bigint& divisor) { - FMT_ASSERT(this != &divisor, ""); - if (compare(*this, divisor) < 0) return 0; - int num_bigits = static_cast(bigits_.size()); - FMT_ASSERT(divisor.bigits_[divisor.bigits_.size() - 1u] != 0, ""); - int exp_difference = exp_ - divisor.exp_; - if (exp_difference > 0) { - // Align bigints by adding trailing zeros to simplify subtraction. - bigits_.resize(to_unsigned(num_bigits + exp_difference)); - for (int i = num_bigits - 1, j = i + exp_difference; i >= 0; --i, --j) - bigits_[j] = bigits_[i]; - std::uninitialized_fill_n(bigits_.data(), exp_difference, 0); - exp_ -= exp_difference; - } - int quotient = 0; - do { - subtract_aligned(divisor); - ++quotient; - } while (compare(*this, divisor) >= 0); - return quotient; - } -}; - -enum class round_direction { unknown, up, down }; - -// Given the divisor (normally a power of 10), the remainder = v % divisor for -// some number v and the error, returns whether v should be rounded up, down, or -// whether the rounding direction can't be determined due to error. -// error should be less than divisor / 2. -inline round_direction get_round_direction(uint64_t divisor, uint64_t remainder, - uint64_t error) { - FMT_ASSERT(remainder < divisor, ""); // divisor - remainder won't overflow. - FMT_ASSERT(error < divisor, ""); // divisor - error won't overflow. - FMT_ASSERT(error < divisor - error, ""); // error * 2 won't overflow. - // Round down if (remainder + error) * 2 <= divisor. - if (remainder <= divisor - remainder && error * 2 <= divisor - remainder * 2) - return round_direction::down; - // Round up if (remainder - error) * 2 >= divisor. - if (remainder >= error && - remainder - error >= divisor - (remainder - error)) { - return round_direction::up; - } - return round_direction::unknown; -} - -namespace digits { -enum result { - more, // Generate more digits. - done, // Done generating digits. - error // Digit generation cancelled due to an error. -}; -} - -// A version of count_digits optimized for grisu_gen_digits. -inline int grisu_count_digits(uint32_t n) { - if (n < 10) return 1; - if (n < 100) return 2; - if (n < 1000) return 3; - if (n < 10000) return 4; - if (n < 100000) return 5; - if (n < 1000000) return 6; - if (n < 10000000) return 7; - if (n < 100000000) return 8; - if (n < 1000000000) return 9; - return 10; -} - -// Generates output using the Grisu digit-gen algorithm. -// error: the size of the region (lower, upper) outside of which numbers -// definitely do not round to value (Delta in Grisu3). -template -FMT_ALWAYS_INLINE digits::result grisu_gen_digits(fp value, uint64_t error, - int& exp, Handler& handler) { - const fp one(1ULL << -value.e, value.e); - // The integral part of scaled value (p1 in Grisu) = value / one. It cannot be - // zero because it contains a product of two 64-bit numbers with MSB set (due - // to normalization) - 1, shifted right by at most 60 bits. - auto integral = static_cast(value.f >> -one.e); - FMT_ASSERT(integral != 0, ""); - FMT_ASSERT(integral == value.f >> -one.e, ""); - // The fractional part of scaled value (p2 in Grisu) c = value % one. - uint64_t fractional = value.f & (one.f - 1); - exp = grisu_count_digits(integral); // kappa in Grisu. - // Divide by 10 to prevent overflow. - auto result = handler.on_start(data::powers_of_10_64[exp - 1] << -one.e, - value.f / 10, error * 10, exp); - if (result != digits::more) return result; - // Generate digits for the integral part. This can produce up to 10 digits. - do { - uint32_t digit = 0; - auto divmod_integral = [&](uint32_t divisor) { - digit = integral / divisor; - integral %= divisor; - }; - // This optimization by Milo Yip reduces the number of integer divisions by - // one per iteration. - switch (exp) { - case 10: - divmod_integral(1000000000); - break; - case 9: - divmod_integral(100000000); - break; - case 8: - divmod_integral(10000000); - break; - case 7: - divmod_integral(1000000); - break; - case 6: - divmod_integral(100000); - break; - case 5: - divmod_integral(10000); - break; - case 4: - divmod_integral(1000); - break; - case 3: - divmod_integral(100); - break; - case 2: - divmod_integral(10); - break; - case 1: - digit = integral; - integral = 0; - break; - default: - FMT_ASSERT(false, "invalid number of digits"); - } - --exp; - uint64_t remainder = - (static_cast(integral) << -one.e) + fractional; - result = handler.on_digit(static_cast('0' + digit), - data::powers_of_10_64[exp] << -one.e, remainder, - error, exp, true); - if (result != digits::more) return result; - } while (exp > 0); - // Generate digits for the fractional part. - for (;;) { - fractional *= 10; - error *= 10; - char digit = - static_cast('0' + static_cast(fractional >> -one.e)); - fractional &= one.f - 1; - --exp; - result = handler.on_digit(digit, one.f, fractional, error, exp, false); - if (result != digits::more) return result; - } -} - -// The fixed precision digit handler. -struct fixed_handler { - char* buf; - int size; - int precision; - int exp10; - bool fixed; - - digits::result on_start(uint64_t divisor, uint64_t remainder, uint64_t error, - int& exp) { - // Non-fixed formats require at least one digit and no precision adjustment. - if (!fixed) return digits::more; - // Adjust fixed precision by exponent because it is relative to decimal - // point. - precision += exp + exp10; - // Check if precision is satisfied just by leading zeros, e.g. - // format("{:.2f}", 0.001) gives "0.00" without generating any digits. - if (precision > 0) return digits::more; - if (precision < 0) return digits::done; - auto dir = get_round_direction(divisor, remainder, error); - if (dir == round_direction::unknown) return digits::error; - buf[size++] = dir == round_direction::up ? '1' : '0'; - return digits::done; - } - - digits::result on_digit(char digit, uint64_t divisor, uint64_t remainder, - uint64_t error, int, bool integral) { - FMT_ASSERT(remainder < divisor, ""); - buf[size++] = digit; - if (size < precision) return digits::more; - if (!integral) { - // Check if error * 2 < divisor with overflow prevention. - // The check is not needed for the integral part because error = 1 - // and divisor > (1 << 32) there. - if (error >= divisor || error >= divisor - error) return digits::error; - } else { - FMT_ASSERT(error == 1 && divisor > 2, ""); - } - auto dir = get_round_direction(divisor, remainder, error); - if (dir != round_direction::up) - return dir == round_direction::down ? digits::done : digits::error; - ++buf[size - 1]; - for (int i = size - 1; i > 0 && buf[i] > '9'; --i) { - buf[i] = '0'; - ++buf[i - 1]; - } - if (buf[0] > '9') { - buf[0] = '1'; - buf[size++] = '0'; - } - return digits::done; - } -}; - -// The shortest representation digit handler. -struct grisu_shortest_handler { - char* buf; - int size; - // Distance between scaled value and upper bound (wp_W in Grisu3). - uint64_t diff; - - digits::result on_start(uint64_t, uint64_t, uint64_t, int&) { - return digits::more; - } - - // Decrement the generated number approaching value from above. - void round(uint64_t d, uint64_t divisor, uint64_t& remainder, - uint64_t error) { - while ( - remainder < d && error - remainder >= divisor && - (remainder + divisor < d || d - remainder >= remainder + divisor - d)) { - --buf[size - 1]; - remainder += divisor; - } - } - - // Implements Grisu's round_weed. - digits::result on_digit(char digit, uint64_t divisor, uint64_t remainder, - uint64_t error, int exp, bool integral) { - buf[size++] = digit; - if (remainder >= error) return digits::more; - uint64_t unit = integral ? 1 : data::powers_of_10_64[-exp]; - uint64_t up = (diff - 1) * unit; // wp_Wup - round(up, divisor, remainder, error); - uint64_t down = (diff + 1) * unit; // wp_Wdown - if (remainder < down && error - remainder >= divisor && - (remainder + divisor < down || - down - remainder > remainder + divisor - down)) { - return digits::error; - } - return 2 * unit <= remainder && remainder <= error - 4 * unit - ? digits::done - : digits::error; - } -}; - -// Formats value using a variation of the Fixed-Precision Positive -// Floating-Point Printout ((FPP)^2) algorithm by Steele & White: -// https://fmt.dev/p372-steele.pdf. -template -void fallback_format(Double d, buffer& buf, int& exp10) { - bigint numerator; // 2 * R in (FPP)^2. - bigint denominator; // 2 * S in (FPP)^2. - // lower and upper are differences between value and corresponding boundaries. - bigint lower; // (M^- in (FPP)^2). - bigint upper_store; // upper's value if different from lower. - bigint* upper = nullptr; // (M^+ in (FPP)^2). - fp value; - // Shift numerator and denominator by an extra bit or two (if lower boundary - // is closer) to make lower and upper integers. This eliminates multiplication - // by 2 during later computations. - // TODO: handle float - int shift = value.assign(d) ? 2 : 1; - uint64_t significand = value.f << shift; - if (value.e >= 0) { - numerator.assign(significand); - numerator <<= value.e; - lower.assign(1); - lower <<= value.e; - if (shift != 1) { - upper_store.assign(1); - upper_store <<= value.e + 1; - upper = &upper_store; - } - denominator.assign_pow10(exp10); - denominator <<= 1; - } else if (exp10 < 0) { - numerator.assign_pow10(-exp10); - lower.assign(numerator); - if (shift != 1) { - upper_store.assign(numerator); - upper_store <<= 1; - upper = &upper_store; - } - numerator *= significand; - denominator.assign(1); - denominator <<= shift - value.e; - } else { - numerator.assign(significand); - denominator.assign_pow10(exp10); - denominator <<= shift - value.e; - lower.assign(1); - if (shift != 1) { - upper_store.assign(1ULL << 1); - upper = &upper_store; - } - } - if (!upper) upper = &lower; - // Invariant: value == (numerator / denominator) * pow(10, exp10). - bool even = (value.f & 1) == 0; - int num_digits = 0; - char* data = buf.data(); - for (;;) { - int digit = numerator.divmod_assign(denominator); - bool low = compare(numerator, lower) - even < 0; // numerator <[=] lower. - // numerator + upper >[=] pow10: - bool high = add_compare(numerator, *upper, denominator) + even > 0; - data[num_digits++] = static_cast('0' + digit); - if (low || high) { - if (!low) { - ++data[num_digits - 1]; - } else if (high) { - int result = add_compare(numerator, numerator, denominator); - // Round half to even. - if (result > 0 || (result == 0 && (digit % 2) != 0)) - ++data[num_digits - 1]; - } - buf.resize(to_unsigned(num_digits)); - exp10 -= num_digits - 1; - return; - } - numerator *= 10; - lower *= 10; - if (upper != &lower) *upper *= 10; - } -} - -// Formats value using the Grisu algorithm -// (https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf) -// if T is a IEEE754 binary32 or binary64 and snprintf otherwise. -template -int format_float(T value, int precision, float_specs specs, buffer& buf) { - static_assert(!std::is_same::value, ""); - FMT_ASSERT(value >= 0, "value is negative"); - - const bool fixed = specs.format == float_format::fixed; - if (value <= 0) { // <= instead of == to silence a warning. - if (precision <= 0 || !fixed) { - buf.push_back('0'); - return 0; - } - buf.resize(to_unsigned(precision)); - std::uninitialized_fill_n(buf.data(), precision, '0'); - return -precision; - } - - if (!specs.use_grisu) return snprintf_float(value, precision, specs, buf); - - int exp = 0; - const int min_exp = -60; // alpha in Grisu. - int cached_exp10 = 0; // K in Grisu. - if (precision < 0) { - fp fp_value; - auto boundaries = specs.binary32 - ? fp_value.assign_float_with_boundaries(value) - : fp_value.assign_with_boundaries(value); - fp_value = normalize(fp_value); - // Find a cached power of 10 such that multiplying value by it will bring - // the exponent in the range [min_exp, -32]. - const fp cached_pow = get_cached_power( - min_exp - (fp_value.e + fp::significand_size), cached_exp10); - // Multiply value and boundaries by the cached power of 10. - fp_value = fp_value * cached_pow; - boundaries.lower = multiply(boundaries.lower, cached_pow.f); - boundaries.upper = multiply(boundaries.upper, cached_pow.f); - assert(min_exp <= fp_value.e && fp_value.e <= -32); - --boundaries.lower; // \tilde{M}^- - 1 ulp -> M^-_{\downarrow}. - ++boundaries.upper; // \tilde{M}^+ + 1 ulp -> M^+_{\uparrow}. - // Numbers outside of (lower, upper) definitely do not round to value. - grisu_shortest_handler handler{buf.data(), 0, - boundaries.upper - fp_value.f}; - auto result = - grisu_gen_digits(fp(boundaries.upper, fp_value.e), - boundaries.upper - boundaries.lower, exp, handler); - if (result == digits::error) { - exp += handler.size - cached_exp10 - 1; - fallback_format(value, buf, exp); - return exp; - } - buf.resize(to_unsigned(handler.size)); - } else { - if (precision > 17) return snprintf_float(value, precision, specs, buf); - fp normalized = normalize(fp(value)); - const auto cached_pow = get_cached_power( - min_exp - (normalized.e + fp::significand_size), cached_exp10); - normalized = normalized * cached_pow; - fixed_handler handler{buf.data(), 0, precision, -cached_exp10, fixed}; - if (grisu_gen_digits(normalized, 1, exp, handler) == digits::error) - return snprintf_float(value, precision, specs, buf); - int num_digits = handler.size; - if (!fixed) { - // Remove trailing zeros. - while (num_digits > 0 && buf[num_digits - 1] == '0') { - --num_digits; - ++exp; - } - } - buf.resize(to_unsigned(num_digits)); - } - return exp - cached_exp10; -} - -template -int snprintf_float(T value, int precision, float_specs specs, - buffer& buf) { - // Buffer capacity must be non-zero, otherwise MSVC's vsnprintf_s will fail. - FMT_ASSERT(buf.capacity() > buf.size(), "empty buffer"); - static_assert(!std::is_same::value, ""); - - // Subtract 1 to account for the difference in precision since we use %e for - // both general and exponent format. - if (specs.format == float_format::general || - specs.format == float_format::exp) - precision = (precision >= 0 ? precision : 6) - 1; - - // Build the format string. - enum { max_format_size = 7 }; // Ths longest format is "%#.*Le". - char format[max_format_size]; - char* format_ptr = format; - *format_ptr++ = '%'; - if (specs.showpoint && specs.format == float_format::hex) *format_ptr++ = '#'; - if (precision >= 0) { - *format_ptr++ = '.'; - *format_ptr++ = '*'; - } - if (std::is_same()) *format_ptr++ = 'L'; - *format_ptr++ = specs.format != float_format::hex - ? (specs.format == float_format::fixed ? 'f' : 'e') - : (specs.upper ? 'A' : 'a'); - *format_ptr = '\0'; - - // Format using snprintf. - auto offset = buf.size(); - for (;;) { - auto begin = buf.data() + offset; - auto capacity = buf.capacity() - offset; -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (precision > 100000) - throw std::runtime_error( - "fuzz mode - avoid large allocation inside snprintf"); -#endif - // Suppress the warning about a nonliteral format string. - // Cannot use auto becase of a bug in MinGW (#1532). - int (*snprintf_ptr)(char*, size_t, const char*, ...) = FMT_SNPRINTF; - int result = precision >= 0 - ? snprintf_ptr(begin, capacity, format, precision, value) - : snprintf_ptr(begin, capacity, format, value); - if (result < 0) { - buf.reserve(buf.capacity() + 1); // The buffer will grow exponentially. - continue; - } - auto size = to_unsigned(result); - // Size equal to capacity means that the last character was truncated. - if (size >= capacity) { - buf.reserve(size + offset + 1); // Add 1 for the terminating '\0'. - continue; - } - auto is_digit = [](char c) { return c >= '0' && c <= '9'; }; - if (specs.format == float_format::fixed) { - if (precision == 0) { - buf.resize(size); - return 0; - } - // Find and remove the decimal point. - auto end = begin + size, p = end; - do { - --p; - } while (is_digit(*p)); - int fraction_size = static_cast(end - p - 1); - std::memmove(p, p + 1, to_unsigned(fraction_size)); - buf.resize(size - 1); - return -fraction_size; - } - if (specs.format == float_format::hex) { - buf.resize(size + offset); - return 0; - } - // Find and parse the exponent. - auto end = begin + size, exp_pos = end; - do { - --exp_pos; - } while (*exp_pos != 'e'); - char sign = exp_pos[1]; - assert(sign == '+' || sign == '-'); - int exp = 0; - auto p = exp_pos + 2; // Skip 'e' and sign. - do { - assert(is_digit(*p)); - exp = exp * 10 + (*p++ - '0'); - } while (p != end); - if (sign == '-') exp = -exp; - int fraction_size = 0; - if (exp_pos != begin + 1) { - // Remove trailing zeros. - auto fraction_end = exp_pos - 1; - while (*fraction_end == '0') --fraction_end; - // Move the fractional part left to get rid of the decimal point. - fraction_size = static_cast(fraction_end - begin - 1); - std::memmove(begin + 1, begin + 2, to_unsigned(fraction_size)); - } - buf.resize(to_unsigned(fraction_size) + offset + 1); - return exp - fraction_size; - } -} - -// A public domain branchless UTF-8 decoder by Christopher Wellons: -// https://github.com/skeeto/branchless-utf8 -/* Decode the next character, c, from buf, reporting errors in e. - * - * Since this is a branchless decoder, four bytes will be read from the - * buffer regardless of the actual length of the next character. This - * means the buffer _must_ have at least three bytes of zero padding - * following the end of the data stream. - * - * Errors are reported in e, which will be non-zero if the parsed - * character was somehow invalid: invalid byte sequence, non-canonical - * encoding, or a surrogate half. - * - * The function returns a pointer to the next character. When an error - * occurs, this pointer will be a guess that depends on the particular - * error, but it will always advance at least one byte. - */ -FMT_FUNC const char* utf8_decode(const char* buf, uint32_t* c, int* e) { - static const char lengths[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 2, 2, 2, 2, 3, 3, 4, 0}; - static const int masks[] = {0x00, 0x7f, 0x1f, 0x0f, 0x07}; - static const uint32_t mins[] = {4194304, 0, 128, 2048, 65536}; - static const int shiftc[] = {0, 18, 12, 6, 0}; - static const int shifte[] = {0, 6, 4, 2, 0}; - - auto s = reinterpret_cast(buf); - int len = lengths[s[0] >> 3]; - - // Compute the pointer to the next character early so that the next - // iteration can start working on the next character. Neither Clang - // nor GCC figure out this reordering on their own. - const char* next = buf + len + !len; - - // Assume a four-byte character and load four bytes. Unused bits are - // shifted out. - *c = uint32_t(s[0] & masks[len]) << 18; - *c |= uint32_t(s[1] & 0x3f) << 12; - *c |= uint32_t(s[2] & 0x3f) << 6; - *c |= uint32_t(s[3] & 0x3f) << 0; - *c >>= shiftc[len]; - - // Accumulate the various error conditions. - *e = (*c < mins[len]) << 6; // non-canonical encoding - *e |= ((*c >> 11) == 0x1b) << 7; // surrogate half? - *e |= (*c > 0x10FFFF) << 8; // out of range? - *e |= (s[1] & 0xc0) >> 2; - *e |= (s[2] & 0xc0) >> 4; - *e |= (s[3]) >> 6; - *e ^= 0x2a; // top two bits of each tail byte correct? - *e >>= shifte[len]; - - return next; -} -} // namespace internal - -template <> struct formatter { - format_parse_context::iterator parse(format_parse_context& ctx) { - return ctx.begin(); - } - - format_context::iterator format(const internal::bigint& n, - format_context& ctx) { - auto out = ctx.out(); - bool first = true; - for (auto i = n.bigits_.size(); i > 0; --i) { - auto value = n.bigits_[i - 1u]; - if (first) { - out = format_to(out, "{:x}", value); - first = false; - continue; - } - out = format_to(out, "{:08x}", value); - } - if (n.exp_ > 0) - out = format_to(out, "p{}", n.exp_ * internal::bigint::bigit_bits); - return out; - } -}; - -FMT_FUNC internal::utf8_to_utf16::utf8_to_utf16(string_view s) { - auto transcode = [this](const char* p) { - auto cp = uint32_t(); - auto error = 0; - p = utf8_decode(p, &cp, &error); - if (error != 0) FMT_THROW(std::runtime_error("invalid utf8")); - if (cp <= 0xFFFF) { - buffer_.push_back(static_cast(cp)); - } else { - cp -= 0x10000; - buffer_.push_back(static_cast(0xD800 + (cp >> 10))); - buffer_.push_back(static_cast(0xDC00 + (cp & 0x3FF))); - } - return p; - }; - auto p = s.data(); - const size_t block_size = 4; // utf8_decode always reads blocks of 4 chars. - if (s.size() >= block_size) { - for (auto end = p + s.size() - block_size + 1; p < end;) p = transcode(p); - } - if (auto num_chars_left = s.data() + s.size() - p) { - char buf[2 * block_size - 1] = {}; - memcpy(buf, p, to_unsigned(num_chars_left)); - p = buf; - do { - p = transcode(p); - } while (p - buf < num_chars_left); - } - buffer_.push_back(0); -} - -FMT_FUNC void format_system_error(internal::buffer& out, int error_code, - string_view message) FMT_NOEXCEPT { - FMT_TRY { - memory_buffer buf; - buf.resize(inline_buffer_size); - for (;;) { - char* system_message = &buf[0]; - int result = - internal::safe_strerror(error_code, system_message, buf.size()); - if (result == 0) { - internal::writer w(out); - w.write(message); - w.write(": "); - w.write(system_message); - return; - } - if (result != ERANGE) - break; // Can't get error message, report error code instead. - buf.resize(buf.size() * 2); - } - } - FMT_CATCH(...) {} - format_error_code(out, error_code, message); -} - -FMT_FUNC void internal::error_handler::on_error(const char* message) { - FMT_THROW(format_error(message)); -} - -FMT_FUNC void report_system_error(int error_code, - fmt::string_view message) FMT_NOEXCEPT { - report_error(format_system_error, error_code, message); -} - -FMT_FUNC void vprint(std::FILE* f, string_view format_str, format_args args) { - memory_buffer buffer; - internal::vformat_to(buffer, format_str, - basic_format_args>(args)); -#ifdef _WIN32 - auto fd = _fileno(f); - if (_isatty(fd)) { - internal::utf8_to_utf16 u16(string_view(buffer.data(), buffer.size())); - auto written = DWORD(); - if (!WriteConsoleW(reinterpret_cast(_get_osfhandle(fd)), - u16.c_str(), static_cast(u16.size()), &written, - nullptr)) { - FMT_THROW(format_error("failed to write to console")); - } - return; - } -#endif - internal::fwrite_fully(buffer.data(), 1, buffer.size(), f); -} - -#ifdef _WIN32 -// Print assuming legacy (non-Unicode) encoding. -FMT_FUNC void internal::vprint_mojibake(std::FILE* f, string_view format_str, - format_args args) { - memory_buffer buffer; - internal::vformat_to(buffer, format_str, - basic_format_args>(args)); - fwrite_fully(buffer.data(), 1, buffer.size(), f); -} -#endif - -FMT_FUNC void vprint(string_view format_str, format_args args) { - vprint(stdout, format_str, args); -} - -FMT_END_NAMESPACE - -#ifdef _MSC_VER -# pragma warning(pop) -#endif - -#endif // FMT_FORMAT_INL_H_ diff --git a/kernel_generator/third_party/fmt/include/fmt/format.h b/kernel_generator/third_party/fmt/include/fmt/format.h deleted file mode 100644 index 4e96539..0000000 --- a/kernel_generator/third_party/fmt/include/fmt/format.h +++ /dev/null @@ -1,3648 +0,0 @@ -/* - Formatting library for C++ - - Copyright (c) 2012 - present, Victor Zverovich - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - --- Optional exception to the license --- - - As an exception, if, as a result of your compiling your source code, portions - of this Software are embedded into a machine-executable object form of such - source code, you may redistribute such embedded portions in such object form - without including the above copyright and permission notices. - */ - -#ifndef FMT_FORMAT_H_ -#define FMT_FORMAT_H_ - -#include -#include -#include -#include -#include -#include -#include - -#include "core.h" - -#ifdef FMT_DEPRECATED_INCLUDE_OS -# include "os.h" -#endif - -#ifdef __INTEL_COMPILER -# define FMT_ICC_VERSION __INTEL_COMPILER -#elif defined(__ICL) -# define FMT_ICC_VERSION __ICL -#else -# define FMT_ICC_VERSION 0 -#endif - -#ifdef __NVCC__ -# define FMT_CUDA_VERSION (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__) -#else -# define FMT_CUDA_VERSION 0 -#endif - -#ifdef __has_builtin -# define FMT_HAS_BUILTIN(x) __has_builtin(x) -#else -# define FMT_HAS_BUILTIN(x) 0 -#endif - -#if FMT_GCC_VERSION || FMT_CLANG_VERSION -# define FMT_NOINLINE __attribute__((noinline)) -#else -# define FMT_NOINLINE -#endif - -#if __cplusplus == 201103L || __cplusplus == 201402L -# if defined(__clang__) -# define FMT_FALLTHROUGH [[clang::fallthrough]] -# elif FMT_GCC_VERSION >= 700 && !defined(__PGI) -# define FMT_FALLTHROUGH [[gnu::fallthrough]] -# else -# define FMT_FALLTHROUGH -# endif -#elif FMT_HAS_CPP17_ATTRIBUTE(fallthrough) || \ - (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) -# define FMT_FALLTHROUGH [[fallthrough]] -#else -# define FMT_FALLTHROUGH -#endif - -#ifndef FMT_THROW -# if FMT_EXCEPTIONS -# if FMT_MSC_VER || FMT_NVCC -FMT_BEGIN_NAMESPACE -namespace internal { -template inline void do_throw(const Exception& x) { - // Silence unreachable code warnings in MSVC and NVCC because these - // are nearly impossible to fix in a generic code. - volatile bool b = true; - if (b) throw x; -} -} // namespace internal -FMT_END_NAMESPACE -# define FMT_THROW(x) internal::do_throw(x) -# else -# define FMT_THROW(x) throw x -# endif -# else -# define FMT_THROW(x) \ - do { \ - static_cast(sizeof(x)); \ - FMT_ASSERT(false, ""); \ - } while (false) -# endif -#endif - -#if FMT_EXCEPTIONS -# define FMT_TRY try -# define FMT_CATCH(x) catch (x) -#else -# define FMT_TRY if (true) -# define FMT_CATCH(x) if (false) -#endif - -#ifndef FMT_USE_USER_DEFINED_LITERALS -// For Intel and NVIDIA compilers both they and the system gcc/msc support UDLs. -# if (FMT_HAS_FEATURE(cxx_user_literals) || FMT_GCC_VERSION >= 407 || \ - FMT_MSC_VER >= 1900) && \ - (!(FMT_ICC_VERSION || FMT_CUDA_VERSION) || FMT_ICC_VERSION >= 1500 || \ - FMT_CUDA_VERSION >= 700) -# define FMT_USE_USER_DEFINED_LITERALS 1 -# else -# define FMT_USE_USER_DEFINED_LITERALS 0 -# endif -#endif - -#ifndef FMT_USE_UDL_TEMPLATE -// EDG front end based compilers (icc, nvcc) and GCC < 6.4 do not propertly -// support UDL templates and GCC >= 9 warns about them. -# if FMT_USE_USER_DEFINED_LITERALS && FMT_ICC_VERSION == 0 && \ - FMT_CUDA_VERSION == 0 && \ - ((FMT_GCC_VERSION >= 604 && FMT_GCC_VERSION <= 900 && \ - __cplusplus >= 201402L) || \ - FMT_CLANG_VERSION >= 304) -# define FMT_USE_UDL_TEMPLATE 1 -# else -# define FMT_USE_UDL_TEMPLATE 0 -# endif -#endif - -#ifndef FMT_USE_FLOAT -# define FMT_USE_FLOAT 1 -#endif - -#ifndef FMT_USE_DOUBLE -# define FMT_USE_DOUBLE 1 -#endif - -#ifndef FMT_USE_LONG_DOUBLE -# define FMT_USE_LONG_DOUBLE 1 -#endif - -// __builtin_clz is broken in clang with Microsoft CodeGen: -// https://github.com/fmtlib/fmt/issues/519 -#if (FMT_GCC_VERSION || FMT_HAS_BUILTIN(__builtin_clz)) && !FMT_MSC_VER -# define FMT_BUILTIN_CLZ(n) __builtin_clz(n) -#endif -#if (FMT_GCC_VERSION || FMT_HAS_BUILTIN(__builtin_clzll)) && !FMT_MSC_VER -# define FMT_BUILTIN_CLZLL(n) __builtin_clzll(n) -#endif - -// Some compilers masquerade as both MSVC and GCC-likes or otherwise support -// __builtin_clz and __builtin_clzll, so only define FMT_BUILTIN_CLZ using the -// MSVC intrinsics if the clz and clzll builtins are not available. -#if FMT_MSC_VER && !defined(FMT_BUILTIN_CLZLL) && !defined(_MANAGED) -# include // _BitScanReverse, _BitScanReverse64 - -FMT_BEGIN_NAMESPACE -namespace internal { -// Avoid Clang with Microsoft CodeGen's -Wunknown-pragmas warning. -# ifndef __clang__ -# pragma intrinsic(_BitScanReverse) -# endif -inline uint32_t clz(uint32_t x) { - unsigned long r = 0; - _BitScanReverse(&r, x); - - FMT_ASSERT(x != 0, ""); - // Static analysis complains about using uninitialized data - // "r", but the only way that can happen is if "x" is 0, - // which the callers guarantee to not happen. -# pragma warning(suppress : 6102) - return 31 - r; -} -# define FMT_BUILTIN_CLZ(n) internal::clz(n) - -# if defined(_WIN64) && !defined(__clang__) -# pragma intrinsic(_BitScanReverse64) -# endif - -inline uint32_t clzll(uint64_t x) { - unsigned long r = 0; -# ifdef _WIN64 - _BitScanReverse64(&r, x); -# else - // Scan the high 32 bits. - if (_BitScanReverse(&r, static_cast(x >> 32))) return 63 - (r + 32); - - // Scan the low 32 bits. - _BitScanReverse(&r, static_cast(x)); -# endif - - FMT_ASSERT(x != 0, ""); - // Static analysis complains about using uninitialized data - // "r", but the only way that can happen is if "x" is 0, - // which the callers guarantee to not happen. -# pragma warning(suppress : 6102) - return 63 - r; -} -# define FMT_BUILTIN_CLZLL(n) internal::clzll(n) -} // namespace internal -FMT_END_NAMESPACE -#endif - -// Enable the deprecated numeric alignment. -#ifndef FMT_NUMERIC_ALIGN -# define FMT_NUMERIC_ALIGN 1 -#endif - -// Enable the deprecated percent specifier. -#ifndef FMT_DEPRECATED_PERCENT -# define FMT_DEPRECATED_PERCENT 0 -#endif - -FMT_BEGIN_NAMESPACE -namespace internal { - -// An equivalent of `*reinterpret_cast(&source)` that doesn't have -// undefined behavior (e.g. due to type aliasing). -// Example: uint64_t d = bit_cast(2.718); -template -inline Dest bit_cast(const Source& source) { - static_assert(sizeof(Dest) == sizeof(Source), "size mismatch"); - Dest dest; - std::memcpy(&dest, &source, sizeof(dest)); - return dest; -} - -inline bool is_big_endian() { - const auto u = 1u; - struct bytes { - char data[sizeof(u)]; - }; - return bit_cast(u).data[0] == 0; -} - -// A fallback implementation of uintptr_t for systems that lack it. -struct fallback_uintptr { - unsigned char value[sizeof(void*)]; - - fallback_uintptr() = default; - explicit fallback_uintptr(const void* p) { - *this = bit_cast(p); - if (is_big_endian()) { - for (size_t i = 0, j = sizeof(void*) - 1; i < j; ++i, --j) - std::swap(value[i], value[j]); - } - } -}; -#ifdef UINTPTR_MAX -using uintptr_t = ::uintptr_t; -inline uintptr_t to_uintptr(const void* p) { return bit_cast(p); } -#else -using uintptr_t = fallback_uintptr; -inline fallback_uintptr to_uintptr(const void* p) { - return fallback_uintptr(p); -} -#endif - -// Returns the largest possible value for type T. Same as -// std::numeric_limits::max() but shorter and not affected by the max macro. -template constexpr T max_value() { - return (std::numeric_limits::max)(); -} -template constexpr int num_bits() { - return std::numeric_limits::digits; -} -template <> constexpr int num_bits() { - return static_cast(sizeof(void*) * - std::numeric_limits::digits); -} - -// An approximation of iterator_t for pre-C++20 systems. -template -using iterator_t = decltype(std::begin(std::declval())); - -// Detect the iterator category of *any* given type in a SFINAE-friendly way. -// Unfortunately, older implementations of std::iterator_traits are not safe -// for use in a SFINAE-context. -template -struct iterator_category : std::false_type {}; - -template struct iterator_category { - using type = std::random_access_iterator_tag; -}; - -template -struct iterator_category> { - using type = typename It::iterator_category; -}; - -// Detect if *any* given type models the OutputIterator concept. -template class is_output_iterator { - // Check for mutability because all iterator categories derived from - // std::input_iterator_tag *may* also meet the requirements of an - // OutputIterator, thereby falling into the category of 'mutable iterators' - // [iterator.requirements.general] clause 4. The compiler reveals this - // property only at the point of *actually dereferencing* the iterator! - template - static decltype(*(std::declval())) test(std::input_iterator_tag); - template static char& test(std::output_iterator_tag); - template static const char& test(...); - - using type = decltype(test(typename iterator_category::type{})); - - public: - enum { value = !std::is_const>::value }; -}; - -// A workaround for std::string not having mutable data() until C++17. -template inline Char* get_data(std::basic_string& s) { - return &s[0]; -} -template -inline typename Container::value_type* get_data(Container& c) { - return c.data(); -} - -#if defined(_SECURE_SCL) && _SECURE_SCL -// Make a checked iterator to avoid MSVC warnings. -template using checked_ptr = stdext::checked_array_iterator; -template checked_ptr make_checked(T* p, std::size_t size) { - return {p, size}; -} -#else -template using checked_ptr = T*; -template inline T* make_checked(T* p, std::size_t) { return p; } -#endif - -template ::value)> -inline checked_ptr reserve( - std::back_insert_iterator& it, std::size_t n) { - Container& c = get_container(it); - std::size_t size = c.size(); - c.resize(size + n); - return make_checked(get_data(c) + size, n); -} - -template -inline Iterator& reserve(Iterator& it, std::size_t) { - return it; -} - -// An output iterator that counts the number of objects written to it and -// discards them. -class counting_iterator { - private: - std::size_t count_; - - public: - using iterator_category = std::output_iterator_tag; - using difference_type = std::ptrdiff_t; - using pointer = void; - using reference = void; - using _Unchecked_type = counting_iterator; // Mark iterator as checked. - - struct value_type { - template void operator=(const T&) {} - }; - - counting_iterator() : count_(0) {} - - std::size_t count() const { return count_; } - - counting_iterator& operator++() { - ++count_; - return *this; - } - - counting_iterator operator++(int) { - auto it = *this; - ++*this; - return it; - } - - value_type operator*() const { return {}; } -}; - -template class truncating_iterator_base { - protected: - OutputIt out_; - std::size_t limit_; - std::size_t count_; - - truncating_iterator_base(OutputIt out, std::size_t limit) - : out_(out), limit_(limit), count_(0) {} - - public: - using iterator_category = std::output_iterator_tag; - using value_type = typename std::iterator_traits::value_type; - using difference_type = void; - using pointer = void; - using reference = void; - using _Unchecked_type = - truncating_iterator_base; // Mark iterator as checked. - - OutputIt base() const { return out_; } - std::size_t count() const { return count_; } -}; - -// An output iterator that truncates the output and counts the number of objects -// written to it. -template ::value_type>::type> -class truncating_iterator; - -template -class truncating_iterator - : public truncating_iterator_base { - mutable typename truncating_iterator_base::value_type blackhole_; - - public: - using value_type = typename truncating_iterator_base::value_type; - - truncating_iterator(OutputIt out, std::size_t limit) - : truncating_iterator_base(out, limit) {} - - truncating_iterator& operator++() { - if (this->count_++ < this->limit_) ++this->out_; - return *this; - } - - truncating_iterator operator++(int) { - auto it = *this; - ++*this; - return it; - } - - value_type& operator*() const { - return this->count_ < this->limit_ ? *this->out_ : blackhole_; - } -}; - -template -class truncating_iterator - : public truncating_iterator_base { - public: - truncating_iterator(OutputIt out, std::size_t limit) - : truncating_iterator_base(out, limit) {} - - template truncating_iterator& operator=(T val) { - if (this->count_++ < this->limit_) *this->out_++ = val; - return *this; - } - - truncating_iterator& operator++() { return *this; } - truncating_iterator& operator++(int) { return *this; } - truncating_iterator& operator*() { return *this; } -}; - -// A range with the specified output iterator and value type. -template -class output_range { - private: - OutputIt it_; - - public: - using value_type = T; - using iterator = OutputIt; - struct sentinel {}; - - explicit output_range(OutputIt it) : it_(it) {} - OutputIt begin() const { return it_; } - sentinel end() const { return {}; } // Sentinel is not used yet. -}; - -template -inline size_t count_code_points(basic_string_view s) { - return s.size(); -} - -// Counts the number of code points in a UTF-8 string. -inline size_t count_code_points(basic_string_view s) { - const char* data = s.data(); - size_t num_code_points = 0; - for (size_t i = 0, size = s.size(); i != size; ++i) { - if ((data[i] & 0xc0) != 0x80) ++num_code_points; - } - return num_code_points; -} - -inline size_t count_code_points(basic_string_view s) { - return count_code_points(basic_string_view( - reinterpret_cast(s.data()), s.size())); -} - -template -inline size_t code_point_index(basic_string_view s, size_t n) { - size_t size = s.size(); - return n < size ? n : size; -} - -// Calculates the index of the nth code point in a UTF-8 string. -inline size_t code_point_index(basic_string_view s, size_t n) { - const char8_type* data = s.data(); - size_t num_code_points = 0; - for (size_t i = 0, size = s.size(); i != size; ++i) { - if ((data[i] & 0xc0) != 0x80 && ++num_code_points > n) { - return i; - } - } - return s.size(); -} - -inline char8_type to_char8_t(char c) { return static_cast(c); } - -template -using needs_conversion = bool_constant< - std::is_same::value_type, - char>::value && - std::is_same::value>; - -template ::value)> -OutputIt copy_str(InputIt begin, InputIt end, OutputIt it) { - return std::copy(begin, end, it); -} - -template ::value)> -OutputIt copy_str(InputIt begin, InputIt end, OutputIt it) { - return std::transform(begin, end, it, to_char8_t); -} - -#ifndef FMT_USE_GRISU -# define FMT_USE_GRISU 1 -#endif - -template constexpr bool use_grisu() { - return FMT_USE_GRISU && std::numeric_limits::is_iec559 && - sizeof(T) <= sizeof(double); -} - -template -template -void buffer::append(const U* begin, const U* end) { - std::size_t new_size = size_ + to_unsigned(end - begin); - reserve(new_size); - std::uninitialized_copy(begin, end, make_checked(ptr_, capacity_) + size_); - size_ = new_size; -} -} // namespace internal - -// A range with an iterator appending to a buffer. -template -class buffer_range : public internal::output_range< - std::back_insert_iterator>, T> { - public: - using iterator = std::back_insert_iterator>; - using internal::output_range::output_range; - buffer_range(internal::buffer& buf) - : internal::output_range(std::back_inserter(buf)) {} -}; - -class FMT_DEPRECATED u8string_view - : public basic_string_view { - public: - u8string_view(const char* s) - : basic_string_view( - reinterpret_cast(s)) {} - u8string_view(const char* s, size_t count) FMT_NOEXCEPT - : basic_string_view( - reinterpret_cast(s), count) {} -}; - -#if FMT_USE_USER_DEFINED_LITERALS -inline namespace literals { -FMT_DEPRECATED inline basic_string_view operator"" _u( - const char* s, std::size_t n) { - return {reinterpret_cast(s), n}; -} -} // namespace literals -#endif - -// The number of characters to store in the basic_memory_buffer object itself -// to avoid dynamic memory allocation. -enum { inline_buffer_size = 500 }; - -/** - \rst - A dynamically growing memory buffer for trivially copyable/constructible types - with the first ``SIZE`` elements stored in the object itself. - - You can use one of the following type aliases for common character types: - - +----------------+------------------------------+ - | Type | Definition | - +================+==============================+ - | memory_buffer | basic_memory_buffer | - +----------------+------------------------------+ - | wmemory_buffer | basic_memory_buffer | - +----------------+------------------------------+ - - **Example**:: - - fmt::memory_buffer out; - format_to(out, "The answer is {}.", 42); - - This will append the following output to the ``out`` object: - - .. code-block:: none - - The answer is 42. - - The output can be converted to an ``std::string`` with ``to_string(out)``. - \endrst - */ -template > -class basic_memory_buffer : private Allocator, public internal::buffer { - private: - T store_[SIZE]; - - // Deallocate memory allocated by the buffer. - void deallocate() { - T* data = this->data(); - if (data != store_) Allocator::deallocate(data, this->capacity()); - } - - protected: - void grow(std::size_t size) FMT_OVERRIDE; - - public: - using value_type = T; - using const_reference = const T&; - - explicit basic_memory_buffer(const Allocator& alloc = Allocator()) - : Allocator(alloc) { - this->set(store_, SIZE); - } - ~basic_memory_buffer() FMT_OVERRIDE { deallocate(); } - - private: - // Move data from other to this buffer. - void move(basic_memory_buffer& other) { - Allocator &this_alloc = *this, &other_alloc = other; - this_alloc = std::move(other_alloc); - T* data = other.data(); - std::size_t size = other.size(), capacity = other.capacity(); - if (data == other.store_) { - this->set(store_, capacity); - std::uninitialized_copy(other.store_, other.store_ + size, - internal::make_checked(store_, capacity)); - } else { - this->set(data, capacity); - // Set pointer to the inline array so that delete is not called - // when deallocating. - other.set(other.store_, 0); - } - this->resize(size); - } - - public: - /** - \rst - Constructs a :class:`fmt::basic_memory_buffer` object moving the content - of the other object to it. - \endrst - */ - basic_memory_buffer(basic_memory_buffer&& other) FMT_NOEXCEPT { move(other); } - - /** - \rst - Moves the content of the other ``basic_memory_buffer`` object to this one. - \endrst - */ - basic_memory_buffer& operator=(basic_memory_buffer&& other) FMT_NOEXCEPT { - FMT_ASSERT(this != &other, ""); - deallocate(); - move(other); - return *this; - } - - // Returns a copy of the allocator associated with this buffer. - Allocator get_allocator() const { return *this; } -}; - -template -void basic_memory_buffer::grow(std::size_t size) { -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (size > 1000) throw std::runtime_error("fuzz mode - won't grow that much"); -#endif - std::size_t old_capacity = this->capacity(); - std::size_t new_capacity = old_capacity + old_capacity / 2; - if (size > new_capacity) new_capacity = size; - T* old_data = this->data(); - T* new_data = std::allocator_traits::allocate(*this, new_capacity); - // The following code doesn't throw, so the raw pointer above doesn't leak. - std::uninitialized_copy(old_data, old_data + this->size(), - internal::make_checked(new_data, new_capacity)); - this->set(new_data, new_capacity); - // deallocate must not throw according to the standard, but even if it does, - // the buffer already uses the new storage and will deallocate it in - // destructor. - if (old_data != store_) Allocator::deallocate(old_data, old_capacity); -} - -using memory_buffer = basic_memory_buffer; -using wmemory_buffer = basic_memory_buffer; - -/** A formatting error such as invalid format string. */ -FMT_CLASS_API -class FMT_API format_error : public std::runtime_error { - public: - explicit format_error(const char* message) : std::runtime_error(message) {} - explicit format_error(const std::string& message) - : std::runtime_error(message) {} - format_error(const format_error&) = default; - format_error& operator=(const format_error&) = default; - format_error(format_error&&) = default; - format_error& operator=(format_error&&) = default; - ~format_error() FMT_NOEXCEPT FMT_OVERRIDE; -}; - -namespace internal { - -// Returns true if value is negative, false otherwise. -// Same as `value < 0` but doesn't produce warnings if T is an unsigned type. -template ::is_signed)> -FMT_CONSTEXPR bool is_negative(T value) { - return value < 0; -} -template ::is_signed)> -FMT_CONSTEXPR bool is_negative(T) { - return false; -} - -template ::value)> -FMT_CONSTEXPR bool is_supported_floating_point(T) { - return (std::is_same::value && FMT_USE_FLOAT) || - (std::is_same::value && FMT_USE_DOUBLE) || - (std::is_same::value && FMT_USE_LONG_DOUBLE); -} - -// Smallest of uint32_t, uint64_t, uint128_t that is large enough to -// represent all values of T. -template -using uint32_or_64_or_128_t = conditional_t< - std::numeric_limits::digits <= 32, uint32_t, - conditional_t::digits <= 64, uint64_t, uint128_t>>; - -// Static data is placed in this class template for the header-only config. -template struct FMT_EXTERN_TEMPLATE_API basic_data { - static const uint64_t powers_of_10_64[]; - static const uint32_t zero_or_powers_of_10_32[]; - static const uint64_t zero_or_powers_of_10_64[]; - static const uint64_t pow10_significands[]; - static const int16_t pow10_exponents[]; - static const char digits[]; - static const char hex_digits[]; - static const char foreground_color[]; - static const char background_color[]; - static const char reset_color[5]; - static const wchar_t wreset_color[5]; - static const char signs[]; -}; - -FMT_EXTERN template struct basic_data; - -// This is a struct rather than an alias to avoid shadowing warnings in gcc. -struct data : basic_data<> {}; - -#ifdef FMT_BUILTIN_CLZLL -// Returns the number of decimal digits in n. Leading zeros are not counted -// except for n == 0 in which case count_digits returns 1. -inline int count_digits(uint64_t n) { - // Based on http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10 - // and the benchmark https://github.com/localvoid/cxx-benchmark-count-digits. - int t = (64 - FMT_BUILTIN_CLZLL(n | 1)) * 1233 >> 12; - return t - (n < data::zero_or_powers_of_10_64[t]) + 1; -} -#else -// Fallback version of count_digits used when __builtin_clz is not available. -inline int count_digits(uint64_t n) { - int count = 1; - for (;;) { - // Integer division is slow so do it for a group of four digits instead - // of for every digit. The idea comes from the talk by Alexandrescu - // "Three Optimization Tips for C++". See speed-test for a comparison. - if (n < 10) return count; - if (n < 100) return count + 1; - if (n < 1000) return count + 2; - if (n < 10000) return count + 3; - n /= 10000u; - count += 4; - } -} -#endif - -#if FMT_USE_INT128 -inline int count_digits(uint128_t n) { - int count = 1; - for (;;) { - // Integer division is slow so do it for a group of four digits instead - // of for every digit. The idea comes from the talk by Alexandrescu - // "Three Optimization Tips for C++". See speed-test for a comparison. - if (n < 10) return count; - if (n < 100) return count + 1; - if (n < 1000) return count + 2; - if (n < 10000) return count + 3; - n /= 10000U; - count += 4; - } -} -#endif - -// Counts the number of digits in n. BITS = log2(radix). -template inline int count_digits(UInt n) { - int num_digits = 0; - do { - ++num_digits; - } while ((n >>= BITS) != 0); - return num_digits; -} - -template <> int count_digits<4>(internal::fallback_uintptr n); - -#if FMT_GCC_VERSION || FMT_CLANG_VERSION -# define FMT_ALWAYS_INLINE inline __attribute__((always_inline)) -#else -# define FMT_ALWAYS_INLINE -#endif - -#ifdef FMT_BUILTIN_CLZ -// Optional version of count_digits for better performance on 32-bit platforms. -inline int count_digits(uint32_t n) { - int t = (32 - FMT_BUILTIN_CLZ(n | 1)) * 1233 >> 12; - return t - (n < data::zero_or_powers_of_10_32[t]) + 1; -} -#endif - -template FMT_API std::string grouping_impl(locale_ref loc); -template inline std::string grouping(locale_ref loc) { - return grouping_impl(loc); -} -template <> inline std::string grouping(locale_ref loc) { - return grouping_impl(loc); -} - -template FMT_API Char thousands_sep_impl(locale_ref loc); -template inline Char thousands_sep(locale_ref loc) { - return Char(thousands_sep_impl(loc)); -} -template <> inline wchar_t thousands_sep(locale_ref loc) { - return thousands_sep_impl(loc); -} - -template FMT_API Char decimal_point_impl(locale_ref loc); -template inline Char decimal_point(locale_ref loc) { - return Char(decimal_point_impl(loc)); -} -template <> inline wchar_t decimal_point(locale_ref loc) { - return decimal_point_impl(loc); -} - -// Formats a decimal unsigned integer value writing into buffer. -// add_thousands_sep is called after writing each char to add a thousands -// separator if necessary. -template -inline Char* format_decimal(Char* buffer, UInt value, int num_digits, - F add_thousands_sep) { - FMT_ASSERT(num_digits >= 0, "invalid digit count"); - buffer += num_digits; - Char* end = buffer; - while (value >= 100) { - // Integer division is slow so do it for a group of two digits instead - // of for every digit. The idea comes from the talk by Alexandrescu - // "Three Optimization Tips for C++". See speed-test for a comparison. - auto index = static_cast((value % 100) * 2); - value /= 100; - *--buffer = static_cast(data::digits[index + 1]); - add_thousands_sep(buffer); - *--buffer = static_cast(data::digits[index]); - add_thousands_sep(buffer); - } - if (value < 10) { - *--buffer = static_cast('0' + value); - return end; - } - auto index = static_cast(value * 2); - *--buffer = static_cast(data::digits[index + 1]); - add_thousands_sep(buffer); - *--buffer = static_cast(data::digits[index]); - return end; -} - -template constexpr int digits10() FMT_NOEXCEPT { - return std::numeric_limits::digits10; -} -template <> constexpr int digits10() FMT_NOEXCEPT { return 38; } -template <> constexpr int digits10() FMT_NOEXCEPT { return 38; } - -template -inline Iterator format_decimal(Iterator out, UInt value, int num_digits, - F add_thousands_sep) { - FMT_ASSERT(num_digits >= 0, "invalid digit count"); - // Buffer should be large enough to hold all digits (<= digits10 + 1). - enum { max_size = digits10() + 1 }; - Char buffer[2 * max_size]; - auto end = format_decimal(buffer, value, num_digits, add_thousands_sep); - return internal::copy_str(buffer, end, out); -} - -template -inline It format_decimal(It out, UInt value, int num_digits) { - return format_decimal(out, value, num_digits, [](Char*) {}); -} - -template -inline Char* format_uint(Char* buffer, UInt value, int num_digits, - bool upper = false) { - buffer += num_digits; - Char* end = buffer; - do { - const char* digits = upper ? "0123456789ABCDEF" : data::hex_digits; - unsigned digit = (value & ((1 << BASE_BITS) - 1)); - *--buffer = static_cast(BASE_BITS < 4 ? static_cast('0' + digit) - : digits[digit]); - } while ((value >>= BASE_BITS) != 0); - return end; -} - -template -Char* format_uint(Char* buffer, internal::fallback_uintptr n, int num_digits, - bool = false) { - auto char_digits = std::numeric_limits::digits / 4; - int start = (num_digits + char_digits - 1) / char_digits - 1; - if (int start_digits = num_digits % char_digits) { - unsigned value = n.value[start--]; - buffer = format_uint(buffer, value, start_digits); - } - for (; start >= 0; --start) { - unsigned value = n.value[start]; - buffer += char_digits; - auto p = buffer; - for (int i = 0; i < char_digits; ++i) { - unsigned digit = (value & ((1 << BASE_BITS) - 1)); - *--p = static_cast(data::hex_digits[digit]); - value >>= BASE_BITS; - } - } - return buffer; -} - -template -inline It format_uint(It out, UInt value, int num_digits, bool upper = false) { - // Buffer should be large enough to hold all digits (digits / BASE_BITS + 1). - char buffer[num_bits() / BASE_BITS + 1]; - format_uint(buffer, value, num_digits, upper); - return internal::copy_str(buffer, buffer + num_digits, out); -} - -// A converter from UTF-8 to UTF-16. -class utf8_to_utf16 { - private: - wmemory_buffer buffer_; - - public: - FMT_API explicit utf8_to_utf16(string_view s); - operator wstring_view() const { return {&buffer_[0], size()}; } - size_t size() const { return buffer_.size() - 1; } - const wchar_t* c_str() const { return &buffer_[0]; } - std::wstring str() const { return {&buffer_[0], size()}; } -}; - -template struct null {}; - -// Workaround an array initialization issue in gcc 4.8. -template struct fill_t { - private: - enum { max_size = 4 }; - Char data_[max_size]; - unsigned char size_; - - public: - FMT_CONSTEXPR void operator=(basic_string_view s) { - auto size = s.size(); - if (size > max_size) { - FMT_THROW(format_error("invalid fill")); - return; - } - for (size_t i = 0; i < size; ++i) data_[i] = s[i]; - size_ = static_cast(size); - } - - size_t size() const { return size_; } - const Char* data() const { return data_; } - - FMT_CONSTEXPR Char& operator[](size_t index) { return data_[index]; } - FMT_CONSTEXPR const Char& operator[](size_t index) const { - return data_[index]; - } - - static FMT_CONSTEXPR fill_t make() { - auto fill = fill_t(); - fill[0] = Char(' '); - fill.size_ = 1; - return fill; - } -}; -} // namespace internal - -// We cannot use enum classes as bit fields because of a gcc bug -// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61414. -namespace align { -enum type { none, left, right, center, numeric }; -} -using align_t = align::type; - -namespace sign { -enum type { none, minus, plus, space }; -} -using sign_t = sign::type; - -// Format specifiers for built-in and string types. -template struct basic_format_specs { - int width; - int precision; - char type; - align_t align : 4; - sign_t sign : 3; - bool alt : 1; // Alternate form ('#'). - internal::fill_t fill; - - constexpr basic_format_specs() - : width(0), - precision(-1), - type(0), - align(align::none), - sign(sign::none), - alt(false), - fill(internal::fill_t::make()) {} -}; - -using format_specs = basic_format_specs; - -namespace internal { - -// A floating-point presentation format. -enum class float_format : unsigned char { - general, // General: exponent notation or fixed point based on magnitude. - exp, // Exponent notation with the default precision of 6, e.g. 1.2e-3. - fixed, // Fixed point with the default precision of 6, e.g. 0.0012. - hex -}; - -struct float_specs { - int precision; - float_format format : 8; - sign_t sign : 8; - bool upper : 1; - bool locale : 1; - bool percent : 1; - bool binary32 : 1; - bool use_grisu : 1; - bool showpoint : 1; -}; - -// Writes the exponent exp in the form "[+-]d{2,3}" to buffer. -template It write_exponent(int exp, It it) { - FMT_ASSERT(-10000 < exp && exp < 10000, "exponent out of range"); - if (exp < 0) { - *it++ = static_cast('-'); - exp = -exp; - } else { - *it++ = static_cast('+'); - } - if (exp >= 100) { - const char* top = data::digits + (exp / 100) * 2; - if (exp >= 1000) *it++ = static_cast(top[0]); - *it++ = static_cast(top[1]); - exp %= 100; - } - const char* d = data::digits + exp * 2; - *it++ = static_cast(d[0]); - *it++ = static_cast(d[1]); - return it; -} - -template class float_writer { - private: - // The number is given as v = digits_ * pow(10, exp_). - const char* digits_; - int num_digits_; - int exp_; - size_t size_; - float_specs specs_; - Char decimal_point_; - - template It prettify(It it) const { - // pow(10, full_exp - 1) <= v <= pow(10, full_exp). - int full_exp = num_digits_ + exp_; - if (specs_.format == float_format::exp) { - // Insert a decimal point after the first digit and add an exponent. - *it++ = static_cast(*digits_); - int num_zeros = specs_.precision - num_digits_; - if (num_digits_ > 1 || specs_.showpoint) *it++ = decimal_point_; - it = copy_str(digits_ + 1, digits_ + num_digits_, it); - if (num_zeros > 0 && specs_.showpoint) - it = std::fill_n(it, num_zeros, static_cast('0')); - *it++ = static_cast(specs_.upper ? 'E' : 'e'); - return write_exponent(full_exp - 1, it); - } - if (num_digits_ <= full_exp) { - // 1234e7 -> 12340000000[.0+] - it = copy_str(digits_, digits_ + num_digits_, it); - it = std::fill_n(it, full_exp - num_digits_, static_cast('0')); - if (specs_.showpoint || specs_.precision < 0) { - *it++ = decimal_point_; - int num_zeros = specs_.precision - full_exp; - if (num_zeros <= 0) { - if (specs_.format != float_format::fixed) - *it++ = static_cast('0'); - return it; - } -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (num_zeros > 1000) - throw std::runtime_error("fuzz mode - avoiding excessive cpu use"); -#endif - it = std::fill_n(it, num_zeros, static_cast('0')); - } - } else if (full_exp > 0) { - // 1234e-2 -> 12.34[0+] - it = copy_str(digits_, digits_ + full_exp, it); - if (!specs_.showpoint) { - // Remove trailing zeros. - int num_digits = num_digits_; - while (num_digits > full_exp && digits_[num_digits - 1] == '0') - --num_digits; - if (num_digits != full_exp) *it++ = decimal_point_; - return copy_str(digits_ + full_exp, digits_ + num_digits, it); - } - *it++ = decimal_point_; - it = copy_str(digits_ + full_exp, digits_ + num_digits_, it); - if (specs_.precision > num_digits_) { - // Add trailing zeros. - int num_zeros = specs_.precision - num_digits_; - it = std::fill_n(it, num_zeros, static_cast('0')); - } - } else { - // 1234e-6 -> 0.001234 - *it++ = static_cast('0'); - int num_zeros = -full_exp; - int num_digits = num_digits_; - if (num_digits == 0 && specs_.precision >= 0 && - specs_.precision < num_zeros) { - num_zeros = specs_.precision; - } - // Remove trailing zeros. - if (!specs_.showpoint) - while (num_digits > 0 && digits_[num_digits - 1] == '0') --num_digits; - if (num_zeros != 0 || num_digits != 0 || specs_.showpoint) { - *it++ = decimal_point_; - it = std::fill_n(it, num_zeros, static_cast('0')); - it = copy_str(digits_, digits_ + num_digits, it); - } - } - return it; - } - - public: - float_writer(const char* digits, int num_digits, int exp, float_specs specs, - Char decimal_point) - : digits_(digits), - num_digits_(num_digits), - exp_(exp), - specs_(specs), - decimal_point_(decimal_point) { - int full_exp = num_digits + exp - 1; - int precision = specs.precision > 0 ? specs.precision : 16; - if (specs_.format == float_format::general && - !(full_exp >= -4 && full_exp < precision)) { - specs_.format = float_format::exp; - } - size_ = prettify(counting_iterator()).count(); - size_ += specs.sign ? 1 : 0; - } - - size_t size() const { return size_; } - size_t width() const { return size(); } - - template void operator()(It&& it) { - if (specs_.sign) *it++ = static_cast(data::signs[specs_.sign]); - it = prettify(it); - } -}; - -template -int format_float(T value, int precision, float_specs specs, buffer& buf); - -// Formats a floating-point number with snprintf. -template -int snprintf_float(T value, int precision, float_specs specs, - buffer& buf); - -template T promote_float(T value) { return value; } -inline double promote_float(float value) { return static_cast(value); } - -template -FMT_CONSTEXPR void handle_int_type_spec(char spec, Handler&& handler) { - switch (spec) { - case 0: - case 'd': - handler.on_dec(); - break; - case 'x': - case 'X': - handler.on_hex(); - break; - case 'b': - case 'B': - handler.on_bin(); - break; - case 'o': - handler.on_oct(); - break; - case 'n': - case 'L': - handler.on_num(); - break; - default: - handler.on_error(); - } -} - -template -FMT_CONSTEXPR float_specs parse_float_type_spec( - const basic_format_specs& specs, ErrorHandler&& eh = {}) { - auto result = float_specs(); - result.showpoint = specs.alt; - switch (specs.type) { - case 0: - result.format = float_format::general; - result.showpoint |= specs.precision > 0; - break; - case 'G': - result.upper = true; - FMT_FALLTHROUGH; - case 'g': - result.format = float_format::general; - break; - case 'E': - result.upper = true; - FMT_FALLTHROUGH; - case 'e': - result.format = float_format::exp; - result.showpoint |= specs.precision != 0; - break; - case 'F': - result.upper = true; - FMT_FALLTHROUGH; - case 'f': - result.format = float_format::fixed; - result.showpoint |= specs.precision != 0; - break; -#if FMT_DEPRECATED_PERCENT - case '%': - result.format = float_format::fixed; - result.percent = true; - break; -#endif - case 'A': - result.upper = true; - FMT_FALLTHROUGH; - case 'a': - result.format = float_format::hex; - break; - case 'n': - result.locale = true; - break; - default: - eh.on_error("invalid type specifier"); - break; - } - return result; -} - -template -FMT_CONSTEXPR void handle_char_specs(const basic_format_specs* specs, - Handler&& handler) { - if (!specs) return handler.on_char(); - if (specs->type && specs->type != 'c') return handler.on_int(); - if (specs->align == align::numeric || specs->sign != sign::none || specs->alt) - handler.on_error("invalid format specifier for char"); - handler.on_char(); -} - -template -FMT_CONSTEXPR void handle_cstring_type_spec(Char spec, Handler&& handler) { - if (spec == 0 || spec == 's') - handler.on_string(); - else if (spec == 'p') - handler.on_pointer(); - else - handler.on_error("invalid type specifier"); -} - -template -FMT_CONSTEXPR void check_string_type_spec(Char spec, ErrorHandler&& eh) { - if (spec != 0 && spec != 's') eh.on_error("invalid type specifier"); -} - -template -FMT_CONSTEXPR void check_pointer_type_spec(Char spec, ErrorHandler&& eh) { - if (spec != 0 && spec != 'p') eh.on_error("invalid type specifier"); -} - -template class int_type_checker : private ErrorHandler { - public: - FMT_CONSTEXPR explicit int_type_checker(ErrorHandler eh) : ErrorHandler(eh) {} - - FMT_CONSTEXPR void on_dec() {} - FMT_CONSTEXPR void on_hex() {} - FMT_CONSTEXPR void on_bin() {} - FMT_CONSTEXPR void on_oct() {} - FMT_CONSTEXPR void on_num() {} - - FMT_CONSTEXPR void on_error() { - ErrorHandler::on_error("invalid type specifier"); - } -}; - -template -class char_specs_checker : public ErrorHandler { - private: - char type_; - - public: - FMT_CONSTEXPR char_specs_checker(char type, ErrorHandler eh) - : ErrorHandler(eh), type_(type) {} - - FMT_CONSTEXPR void on_int() { - handle_int_type_spec(type_, int_type_checker(*this)); - } - FMT_CONSTEXPR void on_char() {} -}; - -template -class cstring_type_checker : public ErrorHandler { - public: - FMT_CONSTEXPR explicit cstring_type_checker(ErrorHandler eh) - : ErrorHandler(eh) {} - - FMT_CONSTEXPR void on_string() {} - FMT_CONSTEXPR void on_pointer() {} -}; - -template -void arg_map::init(const basic_format_args& args) { - if (map_) return; - map_ = new entry[internal::to_unsigned(args.max_size())]; - if (args.is_packed()) { - for (int i = 0;; ++i) { - internal::type arg_type = args.type(i); - if (arg_type == internal::type::none_type) return; - if (arg_type == internal::type::named_arg_type) - push_back(args.values_[i]); - } - } - for (int i = 0, n = args.max_size(); i < n; ++i) { - auto type = args.args_[i].type_; - if (type == internal::type::named_arg_type) push_back(args.args_[i].value_); - } -} - -template struct nonfinite_writer { - sign_t sign; - const char* str; - static constexpr size_t str_size = 3; - - size_t size() const { return str_size + (sign ? 1 : 0); } - size_t width() const { return size(); } - - template void operator()(It&& it) const { - if (sign) *it++ = static_cast(data::signs[sign]); - it = copy_str(str, str + str_size, it); - } -}; - -template -FMT_NOINLINE OutputIt fill(OutputIt it, size_t n, const fill_t& fill) { - auto fill_size = fill.size(); - if (fill_size == 1) return std::fill_n(it, n, fill[0]); - for (size_t i = 0; i < n; ++i) it = std::copy_n(fill.data(), fill_size, it); - return it; -} - -// This template provides operations for formatting and writing data into a -// character range. -template class basic_writer { - public: - using char_type = typename Range::value_type; - using iterator = typename Range::iterator; - using format_specs = basic_format_specs; - - private: - iterator out_; // Output iterator. - locale_ref locale_; - - // Attempts to reserve space for n extra characters in the output range. - // Returns a pointer to the reserved range or a reference to out_. - auto reserve(std::size_t n) -> decltype(internal::reserve(out_, n)) { - return internal::reserve(out_, n); - } - - template struct padded_int_writer { - size_t size_; - string_view prefix; - char_type fill; - std::size_t padding; - F f; - - size_t size() const { return size_; } - size_t width() const { return size_; } - - template void operator()(It&& it) const { - if (prefix.size() != 0) - it = copy_str(prefix.begin(), prefix.end(), it); - it = std::fill_n(it, padding, fill); - f(it); - } - }; - - // Writes an integer in the format - // - // where are written by f(it). - template - void write_int(int num_digits, string_view prefix, format_specs specs, F f) { - std::size_t size = prefix.size() + to_unsigned(num_digits); - char_type fill = specs.fill[0]; - std::size_t padding = 0; - if (specs.align == align::numeric) { - auto unsiged_width = to_unsigned(specs.width); - if (unsiged_width > size) { - padding = unsiged_width - size; - size = unsiged_width; - } - } else if (specs.precision > num_digits) { - size = prefix.size() + to_unsigned(specs.precision); - padding = to_unsigned(specs.precision - num_digits); - fill = static_cast('0'); - } - if (specs.align == align::none) specs.align = align::right; - write_padded(specs, padded_int_writer{size, prefix, fill, padding, f}); - } - - // Writes a decimal integer. - template void write_decimal(Int value) { - auto abs_value = static_cast>(value); - bool negative = is_negative(value); - // Don't do -abs_value since it trips unsigned-integer-overflow sanitizer. - if (negative) abs_value = ~abs_value + 1; - int num_digits = count_digits(abs_value); - auto&& it = reserve((negative ? 1 : 0) + static_cast(num_digits)); - if (negative) *it++ = static_cast('-'); - it = format_decimal(it, abs_value, num_digits); - } - - // The handle_int_type_spec handler that writes an integer. - template struct int_writer { - using unsigned_type = uint32_or_64_or_128_t; - - basic_writer& writer; - const Specs& specs; - unsigned_type abs_value; - char prefix[4]; - unsigned prefix_size; - - string_view get_prefix() const { return string_view(prefix, prefix_size); } - - int_writer(basic_writer& w, Int value, const Specs& s) - : writer(w), - specs(s), - abs_value(static_cast(value)), - prefix_size(0) { - if (is_negative(value)) { - prefix[0] = '-'; - ++prefix_size; - abs_value = 0 - abs_value; - } else if (specs.sign != sign::none && specs.sign != sign::minus) { - prefix[0] = specs.sign == sign::plus ? '+' : ' '; - ++prefix_size; - } - } - - struct dec_writer { - unsigned_type abs_value; - int num_digits; - - template void operator()(It&& it) const { - it = internal::format_decimal(it, abs_value, num_digits); - } - }; - - void on_dec() { - int num_digits = count_digits(abs_value); - writer.write_int(num_digits, get_prefix(), specs, - dec_writer{abs_value, num_digits}); - } - - struct hex_writer { - int_writer& self; - int num_digits; - - template void operator()(It&& it) const { - it = format_uint<4, char_type>(it, self.abs_value, num_digits, - self.specs.type != 'x'); - } - }; - - void on_hex() { - if (specs.alt) { - prefix[prefix_size++] = '0'; - prefix[prefix_size++] = specs.type; - } - int num_digits = count_digits<4>(abs_value); - writer.write_int(num_digits, get_prefix(), specs, - hex_writer{*this, num_digits}); - } - - template struct bin_writer { - unsigned_type abs_value; - int num_digits; - - template void operator()(It&& it) const { - it = format_uint(it, abs_value, num_digits); - } - }; - - void on_bin() { - if (specs.alt) { - prefix[prefix_size++] = '0'; - prefix[prefix_size++] = static_cast(specs.type); - } - int num_digits = count_digits<1>(abs_value); - writer.write_int(num_digits, get_prefix(), specs, - bin_writer<1>{abs_value, num_digits}); - } - - void on_oct() { - int num_digits = count_digits<3>(abs_value); - if (specs.alt && specs.precision <= num_digits && abs_value != 0) { - // Octal prefix '0' is counted as a digit, so only add it if precision - // is not greater than the number of digits. - prefix[prefix_size++] = '0'; - } - writer.write_int(num_digits, get_prefix(), specs, - bin_writer<3>{abs_value, num_digits}); - } - - enum { sep_size = 1 }; - - struct num_writer { - unsigned_type abs_value; - int size; - const std::string& groups; - char_type sep; - - template void operator()(It&& it) const { - basic_string_view s(&sep, sep_size); - // Index of a decimal digit with the least significant digit having - // index 0. - int digit_index = 0; - std::string::const_iterator group = groups.cbegin(); - it = format_decimal( - it, abs_value, size, - [this, s, &group, &digit_index](char_type*& buffer) { - if (*group <= 0 || ++digit_index % *group != 0 || - *group == max_value()) - return; - if (group + 1 != groups.cend()) { - digit_index = 0; - ++group; - } - buffer -= s.size(); - std::uninitialized_copy(s.data(), s.data() + s.size(), - make_checked(buffer, s.size())); - }); - } - }; - - void on_num() { - std::string groups = grouping(writer.locale_); - if (groups.empty()) return on_dec(); - auto sep = thousands_sep(writer.locale_); - if (!sep) return on_dec(); - int num_digits = count_digits(abs_value); - int size = num_digits; - std::string::const_iterator group = groups.cbegin(); - while (group != groups.cend() && num_digits > *group && *group > 0 && - *group != max_value()) { - size += sep_size; - num_digits -= *group; - ++group; - } - if (group == groups.cend()) - size += sep_size * ((num_digits - 1) / groups.back()); - writer.write_int(size, get_prefix(), specs, - num_writer{abs_value, size, groups, sep}); - } - - FMT_NORETURN void on_error() { - FMT_THROW(format_error("invalid type specifier")); - } - }; - - template struct str_writer { - const Char* s; - size_t size_; - - size_t size() const { return size_; } - size_t width() const { - return count_code_points(basic_string_view(s, size_)); - } - - template void operator()(It&& it) const { - it = copy_str(s, s + size_, it); - } - }; - - struct bytes_writer { - string_view bytes; - - size_t size() const { return bytes.size(); } - size_t width() const { return bytes.size(); } - - template void operator()(It&& it) const { - const char* data = bytes.data(); - it = copy_str(data, data + size(), it); - } - }; - - template struct pointer_writer { - UIntPtr value; - int num_digits; - - size_t size() const { return to_unsigned(num_digits) + 2; } - size_t width() const { return size(); } - - template void operator()(It&& it) const { - *it++ = static_cast('0'); - *it++ = static_cast('x'); - it = format_uint<4, char_type>(it, value, num_digits); - } - }; - - public: - explicit basic_writer(Range out, locale_ref loc = locale_ref()) - : out_(out.begin()), locale_(loc) {} - - iterator out() const { return out_; } - - // Writes a value in the format - // - // where is written by f(it). - template void write_padded(const format_specs& specs, F&& f) { - // User-perceived width (in code points). - unsigned width = to_unsigned(specs.width); - size_t size = f.size(); // The number of code units. - size_t num_code_points = width != 0 ? f.width() : size; - if (width <= num_code_points) return f(reserve(size)); - size_t padding = width - num_code_points; - size_t fill_size = specs.fill.size(); - auto&& it = reserve(size + padding * fill_size); - if (specs.align == align::right) { - it = fill(it, padding, specs.fill); - f(it); - } else if (specs.align == align::center) { - std::size_t left_padding = padding / 2; - it = fill(it, left_padding, specs.fill); - f(it); - it = fill(it, padding - left_padding, specs.fill); - } else { - f(it); - it = fill(it, padding, specs.fill); - } - } - - void write(int value) { write_decimal(value); } - void write(long value) { write_decimal(value); } - void write(long long value) { write_decimal(value); } - - void write(unsigned value) { write_decimal(value); } - void write(unsigned long value) { write_decimal(value); } - void write(unsigned long long value) { write_decimal(value); } - -#if FMT_USE_INT128 - void write(int128_t value) { write_decimal(value); } - void write(uint128_t value) { write_decimal(value); } -#endif - - template - void write_int(T value, const Spec& spec) { - handle_int_type_spec(spec.type, int_writer(*this, value, spec)); - } - - template ::value)> - void write(T value, format_specs specs = {}) { - if (const_check(!is_supported_floating_point(value))) { - return; - } - float_specs fspecs = parse_float_type_spec(specs); - fspecs.sign = specs.sign; - if (std::signbit(value)) { // value < 0 is false for NaN so use signbit. - fspecs.sign = sign::minus; - value = -value; - } else if (fspecs.sign == sign::minus) { - fspecs.sign = sign::none; - } - - if (!std::isfinite(value)) { - auto str = std::isinf(value) ? (fspecs.upper ? "INF" : "inf") - : (fspecs.upper ? "NAN" : "nan"); - return write_padded(specs, nonfinite_writer{fspecs.sign, str}); - } - - if (specs.align == align::none) { - specs.align = align::right; - } else if (specs.align == align::numeric) { - if (fspecs.sign) { - auto&& it = reserve(1); - *it++ = static_cast(data::signs[fspecs.sign]); - fspecs.sign = sign::none; - if (specs.width != 0) --specs.width; - } - specs.align = align::right; - } - - memory_buffer buffer; - if (fspecs.format == float_format::hex) { - if (fspecs.sign) buffer.push_back(data::signs[fspecs.sign]); - snprintf_float(promote_float(value), specs.precision, fspecs, buffer); - write_padded(specs, str_writer{buffer.data(), buffer.size()}); - return; - } - int precision = specs.precision >= 0 || !specs.type ? specs.precision : 6; - if (fspecs.format == float_format::exp) { - if (precision == max_value()) - FMT_THROW(format_error("number is too big")); - else - ++precision; - } - if (const_check(std::is_same())) fspecs.binary32 = true; - fspecs.use_grisu = use_grisu(); - if (const_check(FMT_DEPRECATED_PERCENT) && fspecs.percent) value *= 100; - int exp = format_float(promote_float(value), precision, fspecs, buffer); - if (const_check(FMT_DEPRECATED_PERCENT) && fspecs.percent) { - buffer.push_back('%'); - --exp; // Adjust decimal place position. - } - fspecs.precision = precision; - char_type point = fspecs.locale ? decimal_point(locale_) - : static_cast('.'); - write_padded(specs, float_writer(buffer.data(), - static_cast(buffer.size()), - exp, fspecs, point)); - } - - void write(char value) { - auto&& it = reserve(1); - *it++ = value; - } - - template ::value)> - void write(Char value) { - auto&& it = reserve(1); - *it++ = value; - } - - void write(string_view value) { - auto&& it = reserve(value.size()); - it = copy_str(value.begin(), value.end(), it); - } - void write(wstring_view value) { - static_assert(std::is_same::value, ""); - auto&& it = reserve(value.size()); - it = std::copy(value.begin(), value.end(), it); - } - - template - void write(const Char* s, std::size_t size, const format_specs& specs) { - write_padded(specs, str_writer{s, size}); - } - - template - void write(basic_string_view s, const format_specs& specs = {}) { - const Char* data = s.data(); - std::size_t size = s.size(); - if (specs.precision >= 0 && to_unsigned(specs.precision) < size) - size = code_point_index(s, to_unsigned(specs.precision)); - write(data, size, specs); - } - - void write_bytes(string_view bytes, const format_specs& specs) { - write_padded(specs, bytes_writer{bytes}); - } - - template - void write_pointer(UIntPtr value, const format_specs* specs) { - int num_digits = count_digits<4>(value); - auto pw = pointer_writer{value, num_digits}; - if (!specs) return pw(reserve(to_unsigned(num_digits) + 2)); - format_specs specs_copy = *specs; - if (specs_copy.align == align::none) specs_copy.align = align::right; - write_padded(specs_copy, pw); - } -}; - -using writer = basic_writer>; - -template struct is_integral : std::is_integral {}; -template <> struct is_integral : std::true_type {}; -template <> struct is_integral : std::true_type {}; - -template -class arg_formatter_base { - public: - using char_type = typename Range::value_type; - using iterator = typename Range::iterator; - using format_specs = basic_format_specs; - - private: - using writer_type = basic_writer; - writer_type writer_; - format_specs* specs_; - - struct char_writer { - char_type value; - - size_t size() const { return 1; } - size_t width() const { return 1; } - - template void operator()(It&& it) const { *it++ = value; } - }; - - void write_char(char_type value) { - if (specs_) - writer_.write_padded(*specs_, char_writer{value}); - else - writer_.write(value); - } - - void write_pointer(const void* p) { - writer_.write_pointer(internal::to_uintptr(p), specs_); - } - - protected: - writer_type& writer() { return writer_; } - FMT_DEPRECATED format_specs* spec() { return specs_; } - format_specs* specs() { return specs_; } - iterator out() { return writer_.out(); } - - void write(bool value) { - string_view sv(value ? "true" : "false"); - specs_ ? writer_.write(sv, *specs_) : writer_.write(sv); - } - - void write(const char_type* value) { - if (!value) { - FMT_THROW(format_error("string pointer is null")); - } else { - auto length = std::char_traits::length(value); - basic_string_view sv(value, length); - specs_ ? writer_.write(sv, *specs_) : writer_.write(sv); - } - } - - public: - arg_formatter_base(Range r, format_specs* s, locale_ref loc) - : writer_(r, loc), specs_(s) {} - - iterator operator()(monostate) { - FMT_ASSERT(false, "invalid argument type"); - return out(); - } - - template ::value)> - iterator operator()(T value) { - if (specs_) - writer_.write_int(value, *specs_); - else - writer_.write(value); - return out(); - } - - iterator operator()(char_type value) { - internal::handle_char_specs( - specs_, char_spec_handler(*this, static_cast(value))); - return out(); - } - - iterator operator()(bool value) { - if (specs_ && specs_->type) return (*this)(value ? 1 : 0); - write(value != 0); - return out(); - } - - template ::value)> - iterator operator()(T value) { - if (const_check(is_supported_floating_point(value))) - writer_.write(value, specs_ ? *specs_ : format_specs()); - else - FMT_ASSERT(false, "unsupported float argument type"); - return out(); - } - - struct char_spec_handler : ErrorHandler { - arg_formatter_base& formatter; - char_type value; - - char_spec_handler(arg_formatter_base& f, char_type val) - : formatter(f), value(val) {} - - void on_int() { - if (formatter.specs_) - formatter.writer_.write_int(value, *formatter.specs_); - else - formatter.writer_.write(value); - } - void on_char() { formatter.write_char(value); } - }; - - struct cstring_spec_handler : internal::error_handler { - arg_formatter_base& formatter; - const char_type* value; - - cstring_spec_handler(arg_formatter_base& f, const char_type* val) - : formatter(f), value(val) {} - - void on_string() { formatter.write(value); } - void on_pointer() { formatter.write_pointer(value); } - }; - - iterator operator()(const char_type* value) { - if (!specs_) return write(value), out(); - internal::handle_cstring_type_spec(specs_->type, - cstring_spec_handler(*this, value)); - return out(); - } - - iterator operator()(basic_string_view value) { - if (specs_) { - internal::check_string_type_spec(specs_->type, internal::error_handler()); - writer_.write(value, *specs_); - } else { - writer_.write(value); - } - return out(); - } - - iterator operator()(const void* value) { - if (specs_) - check_pointer_type_spec(specs_->type, internal::error_handler()); - write_pointer(value); - return out(); - } -}; - -template FMT_CONSTEXPR bool is_name_start(Char c) { - return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || '_' == c; -} - -// Parses the range [begin, end) as an unsigned integer. This function assumes -// that the range is non-empty and the first character is a digit. -template -FMT_CONSTEXPR int parse_nonnegative_int(const Char*& begin, const Char* end, - ErrorHandler&& eh) { - FMT_ASSERT(begin != end && '0' <= *begin && *begin <= '9', ""); - unsigned value = 0; - // Convert to unsigned to prevent a warning. - constexpr unsigned max_int = max_value(); - unsigned big = max_int / 10; - do { - // Check for overflow. - if (value > big) { - value = max_int + 1; - break; - } - value = value * 10 + unsigned(*begin - '0'); - ++begin; - } while (begin != end && '0' <= *begin && *begin <= '9'); - if (value > max_int) eh.on_error("number is too big"); - return static_cast(value); -} - -template class custom_formatter { - private: - using char_type = typename Context::char_type; - - basic_format_parse_context& parse_ctx_; - Context& ctx_; - - public: - explicit custom_formatter(basic_format_parse_context& parse_ctx, - Context& ctx) - : parse_ctx_(parse_ctx), ctx_(ctx) {} - - bool operator()(typename basic_format_arg::handle h) const { - h.format(parse_ctx_, ctx_); - return true; - } - - template bool operator()(T) const { return false; } -}; - -template -using is_integer = - bool_constant::value && !std::is_same::value && - !std::is_same::value && - !std::is_same::value>; - -template class width_checker { - public: - explicit FMT_CONSTEXPR width_checker(ErrorHandler& eh) : handler_(eh) {} - - template ::value)> - FMT_CONSTEXPR unsigned long long operator()(T value) { - if (is_negative(value)) handler_.on_error("negative width"); - return static_cast(value); - } - - template ::value)> - FMT_CONSTEXPR unsigned long long operator()(T) { - handler_.on_error("width is not integer"); - return 0; - } - - private: - ErrorHandler& handler_; -}; - -template class precision_checker { - public: - explicit FMT_CONSTEXPR precision_checker(ErrorHandler& eh) : handler_(eh) {} - - template ::value)> - FMT_CONSTEXPR unsigned long long operator()(T value) { - if (is_negative(value)) handler_.on_error("negative precision"); - return static_cast(value); - } - - template ::value)> - FMT_CONSTEXPR unsigned long long operator()(T) { - handler_.on_error("precision is not integer"); - return 0; - } - - private: - ErrorHandler& handler_; -}; - -// A format specifier handler that sets fields in basic_format_specs. -template class specs_setter { - public: - explicit FMT_CONSTEXPR specs_setter(basic_format_specs& specs) - : specs_(specs) {} - - FMT_CONSTEXPR specs_setter(const specs_setter& other) - : specs_(other.specs_) {} - - FMT_CONSTEXPR void on_align(align_t align) { specs_.align = align; } - FMT_CONSTEXPR void on_fill(basic_string_view fill) { - specs_.fill = fill; - } - FMT_CONSTEXPR void on_plus() { specs_.sign = sign::plus; } - FMT_CONSTEXPR void on_minus() { specs_.sign = sign::minus; } - FMT_CONSTEXPR void on_space() { specs_.sign = sign::space; } - FMT_CONSTEXPR void on_hash() { specs_.alt = true; } - - FMT_CONSTEXPR void on_zero() { - specs_.align = align::numeric; - specs_.fill[0] = Char('0'); - } - - FMT_CONSTEXPR void on_width(int width) { specs_.width = width; } - FMT_CONSTEXPR void on_precision(int precision) { - specs_.precision = precision; - } - FMT_CONSTEXPR void end_precision() {} - - FMT_CONSTEXPR void on_type(Char type) { - specs_.type = static_cast(type); - } - - protected: - basic_format_specs& specs_; -}; - -template class numeric_specs_checker { - public: - FMT_CONSTEXPR numeric_specs_checker(ErrorHandler& eh, internal::type arg_type) - : error_handler_(eh), arg_type_(arg_type) {} - - FMT_CONSTEXPR void require_numeric_argument() { - if (!is_arithmetic_type(arg_type_)) - error_handler_.on_error("format specifier requires numeric argument"); - } - - FMT_CONSTEXPR void check_sign() { - require_numeric_argument(); - if (is_integral_type(arg_type_) && arg_type_ != type::int_type && - arg_type_ != type::long_long_type && arg_type_ != type::char_type) { - error_handler_.on_error("format specifier requires signed argument"); - } - } - - FMT_CONSTEXPR void check_precision() { - if (is_integral_type(arg_type_) || arg_type_ == type::pointer_type) - error_handler_.on_error("precision not allowed for this argument type"); - } - - private: - ErrorHandler& error_handler_; - internal::type arg_type_; -}; - -// A format specifier handler that checks if specifiers are consistent with the -// argument type. -template class specs_checker : public Handler { - public: - FMT_CONSTEXPR specs_checker(const Handler& handler, internal::type arg_type) - : Handler(handler), checker_(*this, arg_type) {} - - FMT_CONSTEXPR specs_checker(const specs_checker& other) - : Handler(other), checker_(*this, other.arg_type_) {} - - FMT_CONSTEXPR void on_align(align_t align) { - if (align == align::numeric) checker_.require_numeric_argument(); - Handler::on_align(align); - } - - FMT_CONSTEXPR void on_plus() { - checker_.check_sign(); - Handler::on_plus(); - } - - FMT_CONSTEXPR void on_minus() { - checker_.check_sign(); - Handler::on_minus(); - } - - FMT_CONSTEXPR void on_space() { - checker_.check_sign(); - Handler::on_space(); - } - - FMT_CONSTEXPR void on_hash() { - checker_.require_numeric_argument(); - Handler::on_hash(); - } - - FMT_CONSTEXPR void on_zero() { - checker_.require_numeric_argument(); - Handler::on_zero(); - } - - FMT_CONSTEXPR void end_precision() { checker_.check_precision(); } - - private: - numeric_specs_checker checker_; -}; - -template