diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..120223b --- /dev/null +++ b/.clang-format @@ -0,0 +1,21 @@ +BasedOnStyle: LLVM +Language: Cpp +AlignConsecutiveAssignments: Consecutive +AlignTrailingComments: true +AllowShortFunctionsOnASingleLine: Empty +AlwaysBreakTemplateDeclarations: Yes +BreakBeforeBraces: Custom +BraceWrapping: + AfterClass: false + AfterControlStatement: MultiLine + AfterEnum: false + AfterFunction: false + AfterNamespace: false +# AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + BeforeCatch: true + BeforeElse: true + IndentBraces: false +PointerAlignment: Left +SortIncludes: false diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..37205e0 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,103 @@ +name: CI + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +jobs: + format: + runs-on: ubuntu-22.04 + + steps: + - uses: actions/checkout@v3 + - name: Check coding style + run: | + git ls-files '*.cpp' '*.hpp' | xargs clang-format-13 --dry-run --Werror + + build: + strategy: + fail-fast: false + matrix: + os: + - rockylinux/rockylinux:8 + - ubuntu:20.04 + device: + - host + - cpu + - ve + + runs-on: ubuntu-latest + container: + image: ${{ matrix.os }} + + steps: + - name: Install dependencies (Rocky Linux) + run: | + yum install -y git gcc gcc-c++ cmake make clang-devel llvm-devel python3 libffi-devel + if: ${{ startsWith(matrix.os, 'rockylinux') }} + + - name: Install dependencies (Ubuntu) + run: | + apt-get update + apt-get install -y git gcc g++ cmake make pkg-config clang-12 libclang-12-dev llvm-12-dev python3 libffi-dev + env: + DEBIAN_FRONTEND: noninteractive + if: ${{ startsWith(matrix.os, 'ubuntu') }} + + - name: Checkout veo-stubs + uses: actions/checkout@v3 + with: + repository: keichi/veo-stubs + ref: v0.1.0 + path: veo-stubs + submodules: true + if: ${{ matrix.device == 've' }} + + - name: Build and install veo-stubs + run: | + cmake -B build-veo-stubs -S veo-stubs -DCMAKE_INSTALL_PREFIX=/opt/nec/ve/veos + cmake --build build-veo-stubs --parallel $(nproc) + cmake --install build-veo-stubs + if: ${{ matrix.device == 've' }} + + - name: Checkout neoSYCL + uses: actions/checkout@v3 + with: + path: neoSYCL + + - name: Configure and install neoSYCL + run: | + cmake -B build-neoSYCL -S neoSYCL + if: ${{ matrix.device == 'host' }} + + - name: Configure and install neoSYCL + run: | + cmake -B build-neoSYCL -DBUILD_KERNEL_OUTLINER=ON -S neoSYCL + if: ${{ matrix.device == 'cpu' }} + + - name: Configure and install neoSYCL + run: | + cmake -B build-neoSYCL -DBUILD_VE=ON -DBUILD_KERNEL_OUTLINER=ON -S neoSYCL + if: ${{ matrix.device == 've' }} + + - name: Build and install neoSYCL + run: | + cmake --build build-neoSYCL --parallel $(nproc) + cmake --install build-neoSYCL + + - name: Checkout veo-sycl-bench + uses: actions/checkout@v3 + with: + repository: Tohoku-University-Takizawa-Lab/veo-sycl-bench + path: veo-sycl-bench + ref: 2670124d4aed0b867c78c27aa9780a21a21aebd1 + + - name: Build and run veo-sycl-bench + run: | + cp neoSYCL/tests/test_veosycl.sh veo-sycl-bench/ + cd veo-sycl-bench + ./test_veosycl.sh ${{ matrix.device }} + env: + DEVICE_COMPILER: g++ diff --git a/.gitignore b/.gitignore index 48cc816..86a22be 100644 --- a/.gitignore +++ b/.gitignore @@ -31,13 +31,18 @@ *.out *.app -# CLion tmp dirs +# CMake +CMakeLists.txt.user +CMakeCache.txt +CMakeFiles +CMakeScripts +Testing +Makefile +cmake_install.cmake +install_manifest.txt +compile_commands.json +CTestTestfile.cmake +_deps + build -.idea -cmake-build-debug -cmake-build-release -node_modules -package-lock.json -package.json - -.DS_Store \ No newline at end of file +.DS_Store diff --git a/CMakeLists.txt b/CMakeLists.txt index 278946a..1daed6d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,46 +1,37 @@ -cmake_minimum_required(VERSION 3.4) -project(neoSYCL C CXX) +cmake_minimum_required(VERSION 3.10) -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17") - -option(BUILD_KERNEL_GENERATOR "build sycl kernel generator" OFF) -option(BUILD_VE "Enable SX-Aurora support" OFF) -#option(BUILD_TESTING "Enable build tests" OFF) +project(neoSYCL LANGUAGES CXX VERSION 0.5.0) +add_library(neoSYCL INTERFACE) +set(CMAKE_CXX_STANDARD 17) -if (BUILD_KERNEL_GENERATOR) - add_subdirectory(kernel_generator) -endif () - +option(BUILD_KERNEL_OUTLINER "Build SYCL kernel outliner" ON) +option(BUILD_VE "Enable SX-Aurora TSUBASA support" OFF) if (BUILD_VE) - ADD_DEFINITIONS(-DBUILD_VE) + add_definitions(-DBUILD_VE) # find VEOS headers and libs - set(VEO_HEADER_PATH "/opt/nec/ve/veos/include" CACHE PATH "veo headers path") - set(VEO_LIB_PATH "/opt/nec/ve/veos/lib64" CACHE PATH "veo libs path") + set(VEO_DIR "/opt/nec/ve/veos" CACHE PATH "VEO installation path") + find_path(VEO_HEADER_PATH ve_offload.h HINTS ${VEO_DIR} PATH_SUFFIXES include) + find_path(VEO_LIB_PATH libveo.so HINTS ${VEO_DIR} PATH_SUFFIXES lib64) + message(STATUS "Enable SX-Aurora support") - message(STATUS "Use veo headers path: ${VEO_HEADER_PATH}") - message(STATUS "Use veo libs path: ${VEO_LIB_PATH}") + message(STATUS "Use VEO headers path: ${VEO_HEADER_PATH}") + message(STATUS "Use VEO libs path: ${VEO_LIB_PATH}") - include_directories(${VEO_HEADER_PATH}) - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L${VEO_LIB_PATH} -Wl,-rpath=${VEO_LIB_PATH} -lveo") + include_directories(${VEO_HEADER_PATH} include/neoSYCL/) endif () -# debug info -if (CMAKE_BUILD_TYPE STREQUAL Debug) - message(STATUS "Enable debug mode") - ADD_DEFINITIONS(-DDEBUG) - include_directories(third_party/fmt/include) +if (BUILD_KERNEL_OUTLINER) + add_subdirectory(src) endif () include_directories(include) -set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lpthread") - -# add examples -add_subdirectory(examples) +target_include_directories( + neoSYCL + INTERFACE $ + $) -# add tests -add_subdirectory(tests) \ No newline at end of file +install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/neoSYCL DESTINATION include) +install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/CL DESTINATION include) diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..e78154c --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 High Performance Computing Laboratory, Tohoku University + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index a586973..f75eb5c 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,59 @@ # neoSYCL -SYCL Implementation for CPU / NEC SX-Aurora TSUBASA (SX-AT) +[![build](https://github.com/Tohoku-University-Takizawa-Lab/neoSYCL/actions/workflows/ci.yml/badge.svg)](https://github.com/Tohoku-University-Takizawa-Lab/neoSYCL/actions/workflows/ci.yml) -## Installation & testing +A SYCL Implementation for CPU and NEC SX-Aurora TSUBASA (SX-AT) -SYCL is a template library, so no real installation is required. +## Requirements -For CPU computation, the runtime library is implemented with pure C++ STL, does not rely on any third-party libraries. +- CMake 3.10 or higher +- A C++ compiler supporting C++17 +- Clang and LLVM 12 or higher (including development headers and shared + libraries, required if using the CPU or VE device) +- Python 3 (required if using the CPU or VE device) +- NEC Software Development Kit for Vector Engine (required if using the VE device) -To build examples, use following commands: +We regularly test on Rocky Linux 8 and Ubuntu 20.04 on our GitHub Actions +pipeline. Use the following commands to install all required dependencies +(except the VE SDK) on these distributions: -`cmake ${CMAKE_FILE_DIR}` +- Rocky Linux 8: `dnf install -y git gcc gcc-c++ cmake make clang-devel llvm-devel python3` +- Ubuntu 20.04: `apt install -y git gcc g++ cmake make pkg-config clang-12 libclang-12-dev llvm-12-dev python3` +## Configuration and Installation -## Enable NEC SX-Aurora TSUBASA (SX-AT) support +``` +mkdir build && cd build +cmake .. +make +sudo make install +``` -@TODO Current SX-AURORA new version is under development, please check branch backup. +neoSYCL supports three types of devices: -We also need a `sycl-kernel-generator` to generate kernel codes, which is a module of neoSYCL project. +- Host: Kernels are executed on the host. This device is always available. +- CPU: Kernels are compiled to a shared library and executed + on the host CPU. To use this device, add the `-DBUILD_KERNEL_OUTLINER=ON` + option when invoking CMake. +- VE: Kernels are compiled to a shared library and executed + on the Vector Engine using Vector Engine Offloading. To use this device, add + the `-DBUILD_KERNEL_OUTLINER=ON` and `-DBUILD_VE=ON` options when invoking + CMake. -To enable SX-Aurora support, use following commands: +If Clang, LLVM or VEOS are installed to non-standard locations, their paths +should be given via the options `-DCLang_DIR=`, `-DLLVM_DIR=` and `-DVEO_DIR=` +options, respectively. -`cmake -DBUILD_VE=ON -DClang_DIR=${LLVM_PROJECT_BUILD_DIR}/lib/cmake/clang -DLLVM_DIR=${LLVM_PROJECT_BUILD_DIR}/lib/cmake/llvm -DBUILD_KERNEL_GENERATOR=ON ${CMAKE_FILE_DIR}` +## Usage +When using the CPU or VE device, the SYCL source file needs to be compiled +using the `nsc++` compiler wrapper. The target device can be specified using +the `--device` option. Accepted values are ` cpu` or `ve`. + +## Citing + +If you use neoSYCL in your work, please cite the following paper: + +Yinan Ke, Mulya Agung, Hiroyuki Takizawa, "neoSYCL: a SYCL implementation for +SX-Aurora TSUBASA," The International Conference on High Performance Computing +in Asia-Pacific Region (HPC Asia 2021), pp. 50-57, Jan. 2021. diff --git a/cmake/ExtProjectUtils.cmake b/cmake/ExtProjectUtils.cmake deleted file mode 100644 index 80a57ea..0000000 --- a/cmake/ExtProjectUtils.cmake +++ /dev/null @@ -1,50 +0,0 @@ -include(ExternalProject) -include(CMakeParseArguments) - -# -# Function to inject dependency (download from git repo) -# -# Use as ExternalProjectGit( "" "" "" ) -# where -# - url to repository for ex. https://github.com/log4cplus/log4cplus.git -# project name will be regexed from url as latest part in our case log4cplus.git -# - tag - tag you want to use -# - destination - where to install your binaries, for example ${CMAKE_BINARY_DIR}/3rdparty -# - -function(ExtProjectGit repourl tag destination) - - message(STATUS "Get external project from: ${repourl} : ${tag}") - - string(REGEX MATCH "([^\\/]+)[.]git$" _name ${repourl}) - message(STATUS "_name = ${_name}") - - set(options) - set(oneValueArgs) - set(multiValueArgs CMAKE_ARGS) - cmake_parse_arguments(ExtProjectGit "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - - set(cmake_cli_args -DCMAKE_INSTALL_PREFIX=${destination} - -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}) - if(CMAKE_TOOLCHAIN_FILE) - get_filename_component(_ft_path ${CMAKE_TOOLCHAIN_FILE} ABSOLUTE) - get_filename_component(_cm_rt_opath ${CMAKE_RUNTIME_OUTPUT_DIRECTORY} ABSOLUTE) - set(cmake_cli_args ${cmake_cli_args} - -DCMAKE_TOOLCHAIN_FILE=${_ft_path} - -DCMAKE_RUNTIME_OUTPUT_DIRECTORY=${_cm_rt_opath}) - endif() - - foreach(cmake_key ${ExtProjectGit_CMAKE_ARGS}) - set(cmake_cli_args ${cmake_key} ${cmake_cli_args}) - endforeach() - - message(STATUS "ARGS for ExternalProject_Add(${name}): ${cmake_cli_args}") - message(STATUS "CMAKE_CXX_FLAGS = ${CMAKE_CXX_FLAGS}") - - ExternalProject_Add(${_name} - GIT_REPOSITORY ${repourl} - GIT_TAG ${tag} - CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} ${cmake_cli_args} -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} - PREFIX "${destination}" - INSTALL_DIR "${destination}") -endfunction() \ No newline at end of file diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt deleted file mode 100644 index 76c3b54..0000000 --- a/examples/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -set(EXECUTABLE_OUTPUT_PATH ${CMAKE_BINARY_DIR}/examples) - -add_executable(sequential_vector sequential_vector.cpp) - -add_subdirectory(computecpp) diff --git a/examples/bfs/CMakeLists.txt b/examples/bfs/CMakeLists.txt deleted file mode 100644 index 4317874..0000000 --- a/examples/bfs/CMakeLists.txt +++ /dev/null @@ -1,22 +0,0 @@ - -add_executable( - bfs - bfs.cpp -) - - -add_executable( - bfs_sycl - bfs_sycl.cpp -) - -add_executable( - bfs_veo - bfs_veo.cpp -) - -add_executable( - graph_gen - inputGen/graphgen.cpp -) - diff --git a/examples/bfs/bfs.cpp b/examples/bfs/bfs.cpp deleted file mode 100755 index f6d523e..0000000 --- a/examples/bfs/bfs.cpp +++ /dev/null @@ -1,173 +0,0 @@ -#include -#include -#include -#include -#include - -//#define NUM_THREAD 4 -#define OPEN - -FILE *fp; - -//Structure to hold a node information -struct Node { - int starting; - int no_of_edges; -}; - -long get_time() { - struct timeval tv; - gettimeofday(&tv, NULL); - return tv.tv_sec * 1000 + tv.tv_usec / 1000; -} - -void BFSGraph(int argc, char **argv); - -void Usage(int argc, char **argv) { - - fprintf(stderr, "Usage: %s \n", argv[0]); - -} -//////////////////////////////////////////////////////////////////////////////// -// Main Program -//////////////////////////////////////////////////////////////////////////////// -int main(int argc, char **argv) { - BFSGraph(argc, argv); -} - -//////////////////////////////////////////////////////////////////////////////// -//Apply BFS on a Graph using CUDA -//////////////////////////////////////////////////////////////////////////////// -void BFSGraph(int argc, char **argv) { - int no_of_nodes = 0; - int edge_list_size = 0; - char *input_f; - int num_omp_threads; - - if (argc != 3) { - Usage(argc, argv); - exit(0); - } - - num_omp_threads = atoi(argv[1]); - input_f = argv[2]; - - printf("Reading File\n"); - //Read in Graph from a file - fp = fopen(input_f, "r"); - if (!fp) { - printf("Error Reading graph file\n"); - return; - } - - long start_time = get_time(); - - int source = 0; - - fscanf(fp, "%d", &no_of_nodes); - - // allocate host memory - Node *h_graph_nodes = (Node *) malloc(sizeof(Node) * no_of_nodes); - bool *h_graph_mask = (bool *) malloc(sizeof(bool) * no_of_nodes); - bool *h_updating_graph_mask = (bool *) malloc(sizeof(bool) * no_of_nodes); - bool *h_graph_visited = (bool *) malloc(sizeof(bool) * no_of_nodes); - - int start, edgeno; - // initalize the memory - for (unsigned int i = 0; i < no_of_nodes; i++) { - fscanf(fp, "%d %d", &start, &edgeno); - h_graph_nodes[i].starting = start; - h_graph_nodes[i].no_of_edges = edgeno; - h_graph_mask[i] = false; - h_updating_graph_mask[i] = false; - h_graph_visited[i] = false; - } - - //read the source node from the file - fscanf(fp, "%d", &source); - // source=0; //tesing code line - - //set the source node as true in the mask - h_graph_mask[source] = true; - h_graph_visited[source] = true; - - fscanf(fp, "%d", &edge_list_size); - - int id, cost; - int *h_graph_edges = (int *) malloc(sizeof(int) * edge_list_size); - for (int i = 0; i < edge_list_size; i++) { - fscanf(fp, "%d", &id); - fscanf(fp, "%d", &cost); - h_graph_edges[i] = id; - } - - if (fp) - fclose(fp); - - - // allocate mem for the result on host side - int *h_cost = (int *) malloc(sizeof(int) * no_of_nodes); - for (int i = 0; i < no_of_nodes; i++) - h_cost[i] = -1; - h_cost[source] = 0; - - printf("Start traversing the tree\n"); - - long kernel_start_time = get_time(); - - int k = 0; - - bool stop; - do { - //if no thread changes this value then the loop stops - stop = false; - - for (int tid = 0; tid < no_of_nodes; tid++) { - if (h_graph_mask[tid] == true) { - h_graph_mask[tid] = false; - for (int i = h_graph_nodes[tid].starting; i < (h_graph_nodes[tid].no_of_edges + h_graph_nodes[tid].starting); - i++) { - int id = h_graph_edges[i]; - if (!h_graph_visited[id]) { - h_cost[id] = h_cost[tid] + 1; - h_updating_graph_mask[id] = true; - } - } - } - } - - for (int tid = 0; tid < no_of_nodes; tid++) { - if (h_updating_graph_mask[tid] == true) { - h_graph_mask[tid] = true; - h_graph_visited[tid] = true; - stop = true; - h_updating_graph_mask[tid] = false; - } - } - k++; - } while (stop); - - long end_time = get_time(); - - printf("Total cost: %ld ms\n", end_time - start_time); - printf("Kernel cost: %ld ms\n", end_time - kernel_start_time); - - - //Store the result into a file - FILE * fpo = fopen("result.txt", "w"); - for (int i = 0; i < no_of_nodes; i++) - fprintf(fpo, "%d) cost:%d\n", i, h_cost[i]); - fclose(fpo); - printf("Result stored in result.txt\n"); - - - // cleanup memory - free(h_graph_nodes); - free(h_graph_edges); - free(h_graph_mask); - free(h_updating_graph_mask); - free(h_graph_visited); - free(h_cost); - -} - diff --git a/examples/bfs/bfs_sycl.cpp b/examples/bfs/bfs_sycl.cpp deleted file mode 100755 index 98d1491..0000000 --- a/examples/bfs/bfs_sycl.cpp +++ /dev/null @@ -1,209 +0,0 @@ -#include -#include -#include -#include -#include - -#include - -//#define NUM_THREAD 4 -#define OPEN - -using namespace cl::sycl; - -FILE *fp; - -//Structure to hold a node information -struct Node { - int starting; - int no_of_edges; -}; - -long get_time() { - struct timeval tv; - gettimeofday(&tv, NULL); - return tv.tv_sec * 1000 + tv.tv_usec / 1000; -} - -void BFSGraph(int argc, char **argv); - -void Usage(int argc, char **argv) { - - fprintf(stderr, "Usage: %s \n", argv[0]); - -} -//////////////////////////////////////////////////////////////////////////////// -// Main Program -//////////////////////////////////////////////////////////////////////////////// -int main(int argc, char **argv) { - BFSGraph(argc, argv); -} - -//////////////////////////////////////////////////////////////////////////////// -//Apply BFS on a Graph using CUDA -//////////////////////////////////////////////////////////////////////////////// -void BFSGraph(int argc, char **argv) { - int no_of_nodes = 0; - int edge_list_size = 0; - char *input_f; - int num_omp_threads; - - if (argc != 3) { - Usage(argc, argv); - exit(0); - } - - num_omp_threads = atoi(argv[1]); - input_f = argv[2]; - - printf("Reading File\n"); - //Read in Graph from a file - fp = fopen(input_f, "r"); - if (!fp) { - printf("Error Reading graph file\n"); - return; - } - - long start_time = get_time(); - - int source = 0; - - fscanf(fp, "%d", &no_of_nodes); - - // allocate host memory - - Node *h_graph_nodes = (Node *) malloc(sizeof(Node) * no_of_nodes); - bool *h_graph_mask = (bool *) malloc(sizeof(bool) * no_of_nodes); - bool *h_updating_graph_mask = (bool *) malloc(sizeof(bool) * no_of_nodes); - bool *h_graph_visited = (bool *) malloc(sizeof(bool) * no_of_nodes); - - buffer h_graph_nodes_buf(h_graph_nodes, range<1>(no_of_nodes)); - buffer h_graph_mask_buf(h_graph_mask, range<1>(no_of_nodes)); - buffer h_updating_graph_mask_buf(h_updating_graph_mask, range<1>(no_of_nodes)); - buffer h_graph_visited_buf(h_graph_visited, range<1>(no_of_nodes)); - - int start, edgeno; - // initalize the memory - for (unsigned int i = 0; i < no_of_nodes; i++) { - fscanf(fp, "%d %d", &start, &edgeno); - h_graph_nodes[i].starting = start; - h_graph_nodes[i].no_of_edges = edgeno; - h_graph_mask[i] = false; - h_updating_graph_mask[i] = false; - h_graph_visited[i] = false; - } - - //read the source node from the file - fscanf(fp, "%d", &source); - // source=0; //tesing code line - - //set the source node as true in the mask - h_graph_mask[source] = true; - h_graph_visited[source] = true; - - fscanf(fp, "%d", &edge_list_size); - - int id, cost; - int *h_graph_edges = (int *) malloc(sizeof(int) * edge_list_size); - - buffer h_graph_edges_buf(h_graph_edges, range<1>(edge_list_size)); - - for (int i = 0; i < edge_list_size; i++) { - fscanf(fp, "%d", &id); - fscanf(fp, "%d", &cost); - h_graph_edges[i] = id; - } - - if (fp) - fclose(fp); - - - // allocate mem for the result on host side - int *h_cost = (int *) malloc(sizeof(int) * no_of_nodes); - - buffer h_cost_buf(h_cost, range<1>(no_of_nodes)); - - for (int i = 0; i < no_of_nodes; i++) - h_cost[i] = -1; - h_cost[source] = 0; - - printf("Start traversing the tree\n"); - - long kernel_start_time = get_time(); - - ve_queue q; - - buffer n_buf(&no_of_nodes, range<1>(1)); - - q.submit([&](handler &cgh) { - auto h_graph_mask_buf_access = h_graph_mask_buf.get_access(cgh); - auto h_graph_visited_buf_access = h_graph_visited_buf.get_access(cgh); - auto h_updating_graph_mask_buf_access = h_updating_graph_mask_buf.get_access(cgh); - auto h_graph_nodes_buf_access = h_graph_nodes_buf.get_access(cgh); - - auto h_graph_edges_buf_access = h_graph_edges_buf.get_access(cgh); - auto h_cost_buf_access = h_cost_buf.get_access(cgh); - - auto no_of_nodes_access = n_buf.get_access(cgh); - - cgh.single_task([=]() { - int k = 0; - bool stop; - do { - //if no thread changes this value then the loop stops - stop = false; - for (int tid = 0; tid < no_of_nodes_access[0]; tid++) { - if (h_graph_mask_buf_access[tid] == true) { - h_graph_mask_buf_access[tid] = false; - for (int i = h_graph_nodes_buf_access[tid].starting; - i < (h_graph_nodes_buf_access[tid].no_of_edges + h_graph_nodes_buf_access[tid].starting); - i++) { - int id = h_graph_edges_buf_access[i]; - if (!h_graph_visited_buf_access[id]) { - h_cost_buf_access[id] = h_cost_buf_access[tid] + 1; - h_updating_graph_mask_buf_access[id] = true; - } - } - } - } - - for (int tid = 0; tid < no_of_nodes_access[0]; tid++) { - if (h_updating_graph_mask_buf_access[tid] == true) { - h_graph_mask_buf_access[tid] = true; - h_graph_visited_buf_access[tid] = true; - stop = true; - h_updating_graph_mask_buf_access[tid] = false; - } - } - k++; - } while (stop); - - }); - - }); - - q.wait(); - - long end_time = get_time(); - - printf("Total cost: %ld ms\n", end_time - start_time); - printf("Kernel cost: %ld ms\n", end_time - kernel_start_time); - - //Store the result into a file - FILE * fpo = fopen("result.txt", "w"); - for (int i = 0; i < no_of_nodes; i++) - fprintf(fpo, "%d) cost:%d\n", i, h_cost[i]); - fclose(fpo); - printf("Result stored in result.txt\n"); - - - // cleanup memory - free(h_graph_nodes); - free(h_graph_edges); - free(h_graph_mask); - free(h_updating_graph_mask); - free(h_graph_visited); - free(h_cost); - -} - diff --git a/examples/bfs/bfs_veo.cpp b/examples/bfs/bfs_veo.cpp deleted file mode 100755 index 6b158c9..0000000 --- a/examples/bfs/bfs_veo.cpp +++ /dev/null @@ -1,266 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//#define NUM_THREAD 4 -#define OPEN - - -FILE *fp; - -//Structure to hold a node information -struct Node { - int starting; - int no_of_edges; -}; - -long get_time() { - struct timeval tv; - gettimeofday(&tv, NULL); - return tv.tv_sec * 1000 + tv.tv_usec / 1000; -} - -void BFSGraph(int argc, char **argv); - -void Usage(int argc, char **argv) { - - fprintf(stderr, "Usage: %s \n", argv[0]); - -} -//////////////////////////////////////////////////////////////////////////////// -// Main Program -//////////////////////////////////////////////////////////////////////////////// -int main(int argc, char **argv) { - BFSGraph(argc, argv); -} - -//////////////////////////////////////////////////////////////////////////////// -//Apply BFS on a Graph using CUDA -//////////////////////////////////////////////////////////////////////////////// -void BFSGraph(int argc, char **argv) { - int no_of_nodes = 0; - int edge_list_size = 0; - char *input_f; - int num_omp_threads; - - if (argc != 3) { - Usage(argc, argv); - exit(0); - } - - num_omp_threads = atoi(argv[1]); - input_f = argv[2]; - - printf("Reading File\n"); - //Read in Graph from a file - fp = fopen(input_f, "r"); - if (!fp) { - printf("Error Reading graph file\n"); - return; - } - - struct veo_proc_handle *proc = veo_proc_create(0); - if (proc == NULL) { - perror("veo_proc_create"); - exit(1); - } - uint64_t handle = veo_load_library(proc, "./ve_kernel.so"); - printf("handle = %p\n", (void *) handle); - - long start_time = get_time(); - - // create context - struct veo_thr_ctxt *curr_ctx = veo_context_open(proc); - - int source = 0; - - fscanf(fp, "%d", &no_of_nodes); - - // allocate host memory - - Node *h_graph_nodes = (Node *) malloc(sizeof(Node) * no_of_nodes); - bool *h_graph_mask = (bool *) malloc(sizeof(bool) * no_of_nodes); - bool *h_updating_graph_mask = (bool *) malloc(sizeof(bool) * no_of_nodes); - bool *h_graph_visited = (bool *) malloc(sizeof(bool) * no_of_nodes); - - // allocate memory size - uint64_t ve_h_graph_nodes; - int rc = veo_alloc_mem(proc, &ve_h_graph_nodes, sizeof(Node) * no_of_nodes); - if (rc != VEO_COMMAND_OK) { - perror("allocate memory return bad code\n"); - } - - uint64_t ve_h_graph_mask; - rc = veo_alloc_mem(proc, &ve_h_graph_mask, sizeof(bool) * no_of_nodes); - if (rc != VEO_COMMAND_OK) { - perror("allocate memory return bad code\n"); - } - - uint64_t ve_h_updating_graph_mask; - rc = veo_alloc_mem(proc, &ve_h_updating_graph_mask, sizeof(bool) * no_of_nodes); - if (rc != VEO_COMMAND_OK) { - perror("allocate memory return bad code\n"); - } - - uint64_t ve_h_graph_visited; - rc = veo_alloc_mem(proc, &ve_h_graph_visited, sizeof(bool) * no_of_nodes); - if (rc != VEO_COMMAND_OK) { - perror("allocate memory return bad code\n"); - } - - int start, edgeno; - // initalize the memory - for (unsigned int i = 0; i < no_of_nodes; i++) { - fscanf(fp, "%d %d", &start, &edgeno); - h_graph_nodes[i].starting = start; - h_graph_nodes[i].no_of_edges = edgeno; - h_graph_mask[i] = false; - h_updating_graph_mask[i] = false; - h_graph_visited[i] = false; - } - - //read the source node from the file - fscanf(fp, "%d", &source); - // source=0; //tesing code line - - //set the source node as true in the mask - h_graph_mask[source] = true; - h_graph_visited[source] = true; - - fscanf(fp, "%d", &edge_list_size); - - int id, cost; - int *h_graph_edges = (int *) malloc(sizeof(int) * edge_list_size); - - uint64_t ve_h_graph_edges; - rc = veo_alloc_mem(proc, &ve_h_graph_edges, sizeof(int) * edge_list_size); - if (rc != VEO_COMMAND_OK) { - perror("allocate memory return bad code\n"); - } - - for (int i = 0; i < edge_list_size; i++) { - fscanf(fp, "%d", &id); - fscanf(fp, "%d", &cost); - h_graph_edges[i] = id; - } - - if (fp) - fclose(fp); - - - // allocate mem for the result on host side - int *h_cost = (int *) malloc(sizeof(int) * no_of_nodes); - - uint64_t ve_h_cost; - rc = veo_alloc_mem(proc, &ve_h_cost, sizeof(int) * no_of_nodes); - if (rc != VEO_COMMAND_OK) { - perror("allocate memory return bad code\n"); - } - - for (int i = 0; i < no_of_nodes; i++) - h_cost[i] = -1; - h_cost[source] = 0; - - printf("Start traversing the tree\n"); - - long kernel_start_time = get_time(); - - // do data copy - rc = veo_write_mem(proc, ve_h_graph_nodes, h_graph_nodes, sizeof(Node) * no_of_nodes); - if (rc != VEO_COMMAND_OK) { - perror("write memory return bad code\n"); - } - - rc = veo_write_mem(proc, ve_h_graph_mask, h_graph_mask, sizeof(bool) * no_of_nodes); - if (rc != VEO_COMMAND_OK) { - perror("write memory return bad code\n"); - } - - rc = veo_write_mem(proc, ve_h_updating_graph_mask, h_updating_graph_mask, sizeof(bool) * no_of_nodes); - if (rc != VEO_COMMAND_OK) { - perror("write memory return bad code\n"); - } - - rc = veo_write_mem(proc, ve_h_graph_visited, h_graph_visited, sizeof(bool) * no_of_nodes); - if (rc != VEO_COMMAND_OK) { - perror("write memory return bad code\n"); - } - - rc = veo_write_mem(proc, ve_h_graph_edges, h_graph_edges, sizeof(int) * edge_list_size); - if (rc != VEO_COMMAND_OK) { - perror("write memory return bad code\n"); - } - - rc = veo_write_mem(proc, ve_h_cost, h_cost, sizeof(int) * no_of_nodes); - if (rc != VEO_COMMAND_OK) { - perror("write memory return bad code\n"); - } - - struct veo_args *argp0 = veo_args_alloc(); - veo_args_set_i64(argp0, 0, ve_h_graph_nodes); - veo_args_set_i64(argp0, 1, ve_h_graph_mask); - veo_args_set_i64(argp0, 2, ve_h_updating_graph_mask); - veo_args_set_i64(argp0, 3, ve_h_graph_visited); - veo_args_set_i64(argp0, 4, ve_h_graph_edges); - veo_args_set_i64(argp0, 5, ve_h_cost); - veo_args_set_i64(argp0, 6, no_of_nodes); - - uint64_t sym = veo_get_sym(proc, handle, "kernel_func"); - uint64_t pid = veo_call_async(curr_ctx, sym, argp0); - uint64_t retval; - rc = veo_call_wait_result(curr_ctx, pid, &retval); - if (rc != VEO_COMMAND_OK) { - perror("body_force kernel return error\n"); - } - - //copy data back - rc = veo_read_mem(proc, h_graph_mask, ve_h_graph_mask, sizeof(bool) * no_of_nodes); - if (rc != VEO_COMMAND_OK) { - perror("read memory return bad code\n"); - } - - rc = veo_read_mem(proc, h_graph_visited, ve_h_graph_visited, sizeof(bool) * no_of_nodes); - if (rc != VEO_COMMAND_OK) { - perror("read memory return bad code\n"); - } - - rc = veo_read_mem(proc, h_updating_graph_mask, ve_h_updating_graph_mask, sizeof(bool) * no_of_nodes); - if (rc != VEO_COMMAND_OK) { - perror("read memory return bad code\n"); - } - - rc = veo_read_mem(proc, h_cost, ve_h_cost, sizeof(int) * no_of_nodes); - if (rc != VEO_COMMAND_OK) { - perror("read memory return bad code\n"); - } - - veo_args_free(argp0); - - veo_context_close(curr_ctx); - - long end_time = get_time(); - - printf("Total cost: %ld ms\n", end_time - start_time); - printf("Kernel cost: %ld ms\n", end_time - kernel_start_time); - - //Store the result into a file - FILE * fpo = fopen("result.txt", "w"); - for (int i = 0; i < no_of_nodes; i++) - fprintf(fpo, "%d) cost:%d\n", i, h_cost[i]); - fclose(fpo); - printf("Result stored in result.txt\n"); - - - // cleanup memory - free(h_graph_nodes); - free(h_graph_edges); - free(h_graph_mask); - free(h_updating_graph_mask); - free(h_graph_visited); - free(h_cost); - -} - diff --git a/examples/bfs/inputGen/Makefile b/examples/bfs/inputGen/Makefile deleted file mode 100644 index cb6dbe9..0000000 --- a/examples/bfs/inputGen/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -FLAGS := -std=c++0x -fopenmp - -graphgen: graphgen.cpp - g++ $(FLAGS) -o $@ $< - -clean: - rm graphgen - diff --git a/examples/bfs/inputGen/gen_dataset.sh b/examples/bfs/inputGen/gen_dataset.sh deleted file mode 100755 index d9ab7f0..0000000 --- a/examples/bfs/inputGen/gen_dataset.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/bash - -./graph_gen 1024 1k -./graph_gen 2048 2k -./graph_gen 4096 4k -./graph_gen 8192 8k -./graph_gen 16384 16k -./graph_gen 32768 32k -./graph_gen 65536 64k -./graph_gen 131072 128k -./graph_gen 261444 256k -./graph_gen 524288 512k -./graph_gen 1048576 1M -./graph_gen 2097152 2M -./graph_gen 4194304 4M -./graph_gen 8388608 8M -./graph_gen 16777216 16M - - -#!/bin/bash - -EXEC=dsad - -./$EXEC 2 graph1k.txt -./$EXEC 2 graph2k.txt -./$EXEC 2 graph4k.txt -./$EXEC 2 graph8k.txt -./$EXEC 2 graph16k.txt -./$EXEC 2 graph32k.txt -./$EXEC 2 graph64k.txt -./$EXEC 2 graph128k.txt -./$EXEC 2 graph256k.txt -./$EXEC 2 graph512k.txt - -echo "1M\n" -./$EXEC 2 graph1M.txt - -echo "2M\n" -./$EXEC 2 graph2M.txt - -echo "4M\n" -./$EXEC 2 graph4M.txt - -echo "8M\n" -./$EXEC 2 graph8M.txt - -echo "16M\n" -./$EXEC 2 graph16M.txt - - - - diff --git a/examples/bfs/inputGen/graphgen.cpp b/examples/bfs/inputGen/graphgen.cpp deleted file mode 100644 index 76f793d..0000000 --- a/examples/bfs/inputGen/graphgen.cpp +++ /dev/null @@ -1,119 +0,0 @@ -/* - * graphgen.cpp - * by Sam Kauffman - Univeristy of Virginia - * - * This program generates graphs of the format described in GraphFormat.txt - * and SampleGraph.jpg for use with BFS (breadth-first search) in Rodinia. - * - * The graph is not guaranteed to be connected, are there may be multiple edges - * and loops. - * - * Usage: - * graphgen [] - * num = number of nodes - * Output filename is "graph.txt". filename_bit defaults to num. - * - * This program uses the TR1 header . - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -// These names may vary by implementation -//#define LINEAR_CONGRUENTIAL_ENGINE linear_congruential_engine -#define LINEAR_CONGRUENTIAL_ENGINE std::tr1::linear_congruential -//#define UNIFORM_INT_DISTRIBUTION uniform_int_distribution -#define UNIFORM_INT_DISTRIBUTION std::tr1::uniform_int - -using namespace std; - -#define MIN_NODES 20 -#define MAX_NODES ULONG_MAX -#define MIN_EDGES 2 -#define MAX_INIT_EDGES 4 // Nodes will have, on average, 2*MAX_INIT_EDGES edges -#define MIN_WEIGHT 1 -#define MAX_WEIGHT 10 - -typedef unsigned int uint; -typedef unsigned long ulong; - -struct edge; // forward declaration -typedef vector node; -struct edge { - ulong dest; - uint weight; -}; - -int main(int argc, char **argv) { - // Parse command lined - ulong numNodes; - string s; - if (argc < 2) { - cerr << "Error: enter a number of nodes.\n"; - exit(1); - } - numNodes = strtoul(argv[1], NULL, 10); - if (numNodes < MIN_NODES || numNodes > MAX_NODES || argv[1][0] == '-') { - cerr << "Error: Invalid argument: " << argv[1] << "\n"; - exit(1); - } - s = argc > 2 ? argv[2] : argv[1]; // filename bit - string filename = "graph" + s + ".txt"; - - cout << "Generating graph with " << numNodes << " nodes...\n"; - node *graph; - graph = new node[numNodes]; - - // Initialize random number generators - // C RNG for numbers of edges and weights - srand(time(NULL)); - // TR1 RNG for choosing edge destinations - LINEAR_CONGRUENTIAL_ENGINE gen(time(NULL)); - UNIFORM_INT_DISTRIBUTION randNode(0, numNodes - 1); - - // Generate graph - uint numEdges; - ulong nodeID; - uint weight; - ulong i; - uint j; - for (i = 0; i < numNodes; i++) { - numEdges = rand() % (MAX_INIT_EDGES - MIN_EDGES + 1) + MIN_EDGES; - for (j = 0; j < numEdges; j++) { - nodeID = randNode(gen); - weight = rand() % (MAX_WEIGHT - MIN_WEIGHT + 1) + MIN_WEIGHT; - graph[i].push_back(edge()); - graph[i].back().dest = nodeID; - graph[i].back().weight = weight; - graph[nodeID].push_back(edge()); - graph[nodeID].back().dest = i; - graph[nodeID].back().weight = weight; - } - } - - // Output - cout << "Writing to file \"" << filename << "\"...\n"; - ofstream outf(filename); - outf << numNodes << "\n"; - ulong totalEdges = 0; - for (uint i = 0; i < numNodes; i++) { - numEdges = graph[i].size(); - outf << totalEdges << " " << numEdges << "\n"; - totalEdges += numEdges; - } - outf << "\n" << randNode(gen) << "\n\n"; - outf << totalEdges << "\n"; - for (ulong i = 0; i < numNodes; i++) - for (uint j = 0; j < graph[i].size(); j++) - outf << graph[i][j].dest << " " << graph[i][j].weight << "\n"; - outf.close(); - - delete[] graph; -} diff --git a/examples/bfs/kernel.c b/examples/bfs/kernel.c deleted file mode 100644 index acca871..0000000 --- a/examples/bfs/kernel.c +++ /dev/null @@ -1,45 +0,0 @@ -#include -#include -#include - -typedef struct { - int starting; - int no_of_edges; -} Node; - -int kernel_func(Node *h_graph_nodes, - bool *h_graph_mask, - bool *h_updating_graph_mask, - bool *h_graph_visited, - int *h_graph_edges, - int *h_cost, - int no_of_nodes) { - bool stop; - do { - stop = false; - - for (int tid = 0; tid < no_of_nodes; tid++) { - if (h_graph_mask[tid] == true) { - h_graph_mask[tid] = false; - for (int i = h_graph_nodes[tid].starting; i < (h_graph_nodes[tid].no_of_edges + h_graph_nodes[tid].starting); - i++) { - int id = h_graph_edges[i]; - if (!h_graph_visited[id]) { - h_cost[id] = h_cost[tid] + 1; - h_updating_graph_mask[id] = true; - } - } - } - } - - for (int tid = 0; tid < no_of_nodes; tid++) { - if (h_updating_graph_mask[tid] == true) { - h_graph_mask[tid] = true; - h_graph_visited[tid] = true; - stop = true; - h_updating_graph_mask[tid] = false; - } - } - } while (stop); - -} \ No newline at end of file diff --git a/examples/computecpp/CMakeLists.txt b/examples/computecpp/CMakeLists.txt deleted file mode 100644 index d7776c2..0000000 --- a/examples/computecpp/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ - -add_executable(accessors accessors.cpp) diff --git a/examples/computecpp/accessors.cpp b/examples/computecpp/accessors.cpp deleted file mode 100644 index fc2b3a7..0000000 --- a/examples/computecpp/accessors.cpp +++ /dev/null @@ -1,89 +0,0 @@ -/*************************************************************************** - * - * Copyright (C) 2016 Codeplay Software Limited - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * For your convenience, a copy of the License has been included in this - * repository. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Codeplay's ComputeCpp SDK - * - * accessor.cpp - * - * Description: - * Sample code that illustrates how to make data available on a device - * using accessors in SYCL. - * - **************************************************************************/ - -#include -#include - -class multiply; - -int main() { - using namespace cl::sycl; - /* We define the data to be passed to the device. */ - int data = 5; - - /* The scope we create here defines the lifetime of the buffer object, in SYCL - * the lifetime of the buffer object dictates synchronization using RAII. */ - try { - /* We can also create a queue that uses the default selector in - * the queue's default constructor. */ - queue myQueue; - - /* We define a buffer in order to maintain data across the host and one or - * more devices. We construct this buffer with the address of the data - * defined above and a range specifying a single element. */ - buffer buf(&data, range<1>(1)); - - myQueue.submit([&](handler &cgh) { - /* We define accessors for requiring access to a buffer on the host or on - * a device. Accessors are are like pointers to data we can use in - * kernels to access the data. When constructing the accessor you must - * specify the access target and mode. SYCL also provides the - * get_access() as a buffer member function, which only requires an - * access mode - in this case access::mode::read_write. - * (make_access<>() has a second template argument which defaults - * to access::mode::global) */ - auto ptr = buf.get_access(cgh); - - cgh.single_task([=]() { - /* We use the subscript operator of the accessor constructed above to - * read the value, multiply it by itself and then write it back to the - * accessor again. */ - ptr[0] = ptr[0] * ptr[0]; - }); - }); - - /* queue::wait() will block until kernel execution finishes, - * successfully or otherwise. */ - myQueue.wait(); - - } catch (exception const &e) { - std::cout << "SYCL exception caught: " << e.what() << '\n'; - return 1; - } - - constexpr int expectedResult = 5 * 5; - /* We check that the result is correct. */ - if (data != expectedResult) { - std::cout << "Oops! Something went wrong... " << expectedResult - << " != " << data << '\n'; - return 1; - } - - std::cout << "Hurray! 5 * 5 is " << data << '\n'; - return 0; -} \ No newline at end of file diff --git a/examples/sequential_vector.cpp b/examples/sequential_vector.cpp deleted file mode 100644 index 6013726..0000000 --- a/examples/sequential_vector.cpp +++ /dev/null @@ -1,51 +0,0 @@ -#include -#include -#include - -using namespace cl::sycl; - -int main() { - const int N = 3; - using Vector = float[N]; - - Vector a = {1, 2, 3}; - Vector b = {5, 6, 8}; - - float c[N]; - - { // By sticking all the SYCL work in a {} block, we ensure - // all SYCL tasks must complete before exiting the block - - // Create a queue to work on - queue myQueue; - - // Create buffers from a & b vectors with 2 different syntax - buffer A(a, range<1>(N)); - buffer B(b, range<1>(N)); - - // A buffer of N float using the storage of c - buffer C(c, N); - - /* The command group describing all operations needed for the kernel - execution */ - myQueue.submit([&](handler &cgh) { - // In the kernel A and B are read, but C is written - auto ka = A.get_access(cgh); - auto kb = B.get_access(cgh); - auto kc = C.get_access(cgh); - - // Enqueue a single, simple task - cgh.single_task([=]() { - for (size_t i = 0; i != N; i++) { - kc[i] = ka[i] + kb[i]; - } - }); - }); // End of our commands for this queue - - } // End scope, so we wait for the queue to complete - - std::cout << "Result:" << std::endl; - for (size_t i = 0; i != N; i++) - std::cout << c[i] << " "; - std::cout << std::endl; -} diff --git a/include/CL/.DS_Store b/include/CL/.DS_Store deleted file mode 100644 index d216a4a..0000000 Binary files a/include/CL/.DS_Store and /dev/null differ diff --git a/include/CL/sycl.hpp b/include/CL/sycl.hpp index f91100f..85fd6f6 100644 --- a/include/CL/sycl.hpp +++ b/include/CL/sycl.hpp @@ -9,5 +9,4 @@ using namespace neosycl::sycl; } -#endif //CUSTOM_SYCL_INCLUDE_SYCL_SYCL_H_ - +#endif // CUSTOM_SYCL_INCLUDE_SYCL_SYCL_H_ diff --git a/include/neoSYCL/extensions/nec/device.hpp b/include/neoSYCL/extensions/nec/device.hpp new file mode 100644 index 0000000..8673ad3 --- /dev/null +++ b/include/neoSYCL/extensions/nec/device.hpp @@ -0,0 +1,36 @@ +#pragma once + +namespace neosycl::sycl::extensions::nec { + +struct device_impl_ve : public detail::device_impl { + device_impl_ve(device d) : detail::device_impl(d) {} + + bool is_host() override { + return false; + } + bool is_cpu() override { + return false; + } + bool is_gpu() override { + return false; + } + bool is_accelerator() override { + return true; + } + const void* get_info(info::device param) const override { + switch (param) { + case info::device::device_type: + default: + PRINT_ERR("device::get_info(%d) not implemented", (int)param); + throw unimplemented(); + } + } + info::device_type type() override { + return info::device_type::accelerator; + } + + virtual detail::program_data* create_program(device d) override { + return new program_data_ve(d); + } +}; +} // namespace neosycl::sycl::extensions::nec diff --git a/include/neoSYCL/extensions/nec/kernel.hpp b/include/neoSYCL/extensions/nec/kernel.hpp new file mode 100644 index 0000000..f10577f --- /dev/null +++ b/include/neoSYCL/extensions/nec/kernel.hpp @@ -0,0 +1,23 @@ +#pragma once + +namespace neosycl::sycl::extensions::nec { + +class kernel_data_ve : public detail::kernel_data { +public: + uint64_t func_; + uint64_t capt_; + uint64_t rnge_; + struct veo_args* argp_; + + kernel_data_ve() : kernel_data() { + func_ = 0; + capt_ = 0; + rnge_ = 0; + argp_ = veo_args_alloc(); + } + + ~kernel_data_ve() { + veo_args_free(argp_); + } +}; +} // namespace neosycl::sycl::extensions::nec diff --git a/include/neoSYCL/extensions/nec/program.hpp b/include/neoSYCL/extensions/nec/program.hpp new file mode 100644 index 0000000..33d37c7 --- /dev/null +++ b/include/neoSYCL/extensions/nec/program.hpp @@ -0,0 +1,105 @@ +#pragma once + +namespace neosycl::sycl::extensions::nec { +const char* DEFAULT_VE_LIB = "./kernel-ve.so"; +const char* ENV_VE_KERNEL = "NEOSYCL_VE_KERNEL"; +constexpr int DEFAULT_VE_NODE = -1; + +class program_data_ve : public detail::program_data { + veo_util util; + +public: + program_data_ve(device d) : detail::program_data(d), util() {} + + ~program_data_ve() { + util.close(); + } + + bool open() override { + const char* env = getenv(ENV_VE_KERNEL); + string_class fn(env ? env : DEFAULT_VE_LIB); + + if (util.open(fn, DEFAULT_VE_NODE) == false) { + DEBUG_INFO("open() failed: %s", fn.c_str()); + return false; + } + DEBUG_INFO("kernel lib loaded: %s", fn.c_str()); + return true; + } + + bool is_open() override { + return util.is_open(); + } + + void run(kernel k) override { + auto kdv = cast(k); + + try { + DEBUG_INFO("-- KENREL EXEC BEGIN --"); + [[maybe_unused]] int rt = util.call_func(kdv->func_, kdv->argp_); + DEBUG_INFO("-- KERNEL EXEC END (ret=%d) --", rt); + } + catch (exception& e) { + PRINT_ERR("kernel execution failed: %s", e.what()); + } + } + + void* alloc_mem(void* p, size_t s) override { + void* dptr = util.alloc_mem(s); + DEBUG_INFO("memory alloc: daddr=%p, size=%lu", dptr, s); + return dptr; + } + + void free_mem(void* p) override { + util.free_mem(p); + } + + void write_mem(void* d, void* h, size_t s) override { + util.write_mem(d, h, s); + } + + void read_mem(void* h, void* d, size_t s) override { + util.read_mem(h, d, s); + } + + void copy_mem(void* d1, void* d2, size_t s) override { + util.copy_mem(d1, d2, s); + } + + void set_capture(kernel& k, void* p, size_t sz) override { + auto kdv = cast(k); + if (kdv->capt_) + util.write_mem((void*)kdv->capt_, p, sz); + else + throw runtime_error("set_capture() failed"); + } + + void set_range(kernel& k, size_t r[6]) override { + auto kdv = cast(k); + if (kdv->rnge_) + util.write_mem((void*)kdv->rnge_, r, sizeof(size_t) * 6); + else + throw runtime_error("set_range() failed"); + } + + kernel_data_ptr create_kernel_data(const char* s) override { + auto data = new kernel_data_ve(); + // data->dll_ = dll_; + string_class capt = string_class("__") + s + "_obj__"; + string_class rnge = string_class("__") + s + "_range__"; + data->func_ = util.get_sym(s); + data->capt_ = util.get_sym(capt.c_str()); + + if (!data->func_ || !data->capt_) { + PRINT_ERR("ve_dlfcn for %s failed", s); + throw exception("create_kernel() failed"); + } + + data->rnge_ = util.get_sym(rnge.c_str()); // this call could fail + + // DEBUG_INFO("prog_data: %p %p %p", data->func_, data->capt_, data->rnge_); + kernel_data_ptr ret(data); + return ret; + } +}; +} // namespace neosycl::sycl::extensions::nec diff --git a/include/neoSYCL/extensions/nec/ve_device.hpp b/include/neoSYCL/extensions/nec/ve_device.hpp deleted file mode 100644 index 3b2b1d2..0000000 --- a/include/neoSYCL/extensions/nec/ve_device.hpp +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef SYCL_INCLUDE_CL_SYCL_NEC_VE_DEVICE_HPP_ -#define SYCL_INCLUDE_CL_SYCL_NEC_VE_DEVICE_HPP_ - -#include "ve_offload.h" -#include "neoSYCL/sycl/device.hpp" - -namespace neosycl::sycl { - -class ve_device : public device { - private: - int node_id; - - public: - ve_device(int node_id) : node_id(node_id) {} - - bool is_host() const override { - return false; - } - bool is_cpu() const override { - return false; - } - bool is_gpu() const override { - return false; - } - bool is_accelerator() const override { - return true; - } - int get_node_id() const { - return node_id; - } - -}; - -} - -#endif //SYCL_INCLUDE_CL_SYCL_NEC_VE_DEVICE_HPP_ diff --git a/include/neoSYCL/extensions/nec/ve_info.hpp b/include/neoSYCL/extensions/nec/ve_info.hpp deleted file mode 100644 index d555ee5..0000000 --- a/include/neoSYCL/extensions/nec/ve_info.hpp +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef NEOSYCL_INCLUDE_CL_SYCL_NEC_VE_INFO_HPP_ -#define NEOSYCL_INCLUDE_CL_SYCL_NEC_VE_INFO_HPP_ - -#include "ve_offload.h" - -namespace neosycl::sycl::extensions { - -const int DEFAULT_VE_NODE = 0; -const string_class DEFAULT_VE_LIB = "./kernel.so"; - - -struct VEProc { - struct veo_proc_handle *ve_proc; - uint64_t handle; -}; - -struct VEContext { - struct veo_thr_ctxt *ve_ctx; -}; -} - -#endif //NEOSYCL_INCLUDE_CL_SYCL_NEC_VE_INFO_HPP_ diff --git a/include/neoSYCL/extensions/nec/ve_kernel.hpp b/include/neoSYCL/extensions/nec/ve_kernel.hpp deleted file mode 100644 index f839a33..0000000 --- a/include/neoSYCL/extensions/nec/ve_kernel.hpp +++ /dev/null @@ -1,174 +0,0 @@ -#ifndef SYCL_INCLUDE_CL_SYCL_NEC_VE_KERNEL_HPP_ -#define SYCL_INCLUDE_CL_SYCL_NEC_VE_KERNEL_HPP_ - -#include "ve_info.hpp" - -namespace neosycl::sycl::detail { - -struct VEKernel : public Kernel { - nec::VEProc proc; - nec::VEContext ctx; - - nec::VEContext ctx_create(nec::VEProc proc) { - struct veo_thr_ctxt *ctx = veo_context_open(proc.ve_proc); - DEBUG_INFO("[VEContext] create ve context: {:#x}", (size_t) ctx); - return nec::VEContext{ctx}; - } - - void free_ctx(nec::VEContext ctx) { - DEBUG_INFO("[VEContext] release ve ctx: {:#x}", (size_t) ctx.ve_ctx); - int rt = veo_context_close(ctx.ve_ctx); - if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEContext] release ve ctx: {:#x} failed, return code: {}", (size_t) ctx.ve_ctx, rt); - PRINT_ERR("[VEContext] release ve ctx failed"); - } - } - - struct veo_args *create_ve_args() { - struct veo_args *argp = veo_args_alloc(); - if (!argp) { - throw nec::VEException("ve args return nullptr"); - } - return argp; - } - - VEKernel(const vector_class &args, const string_class &kernel_name, const nec::VEProc &proc) - : Kernel(args, kernel_name), proc(proc) { - ctx = ctx_create(proc); - } - - void set_arg_for_range(struct veo_args *argp, const range<1> &r) { - int index = args.size(); - veo_args_set_i64(argp, index, r.size()); - veo_args_set_i64(argp, index + 1, 1); - } - - vector_class copy_in(struct veo_args *argp) { - vector_class ve_addr_list; - - for (int i = 0; i < args.size(); i++) { - KernelArg arg = args[i]; - size_t size_in_byte = arg.container->get_size(); - - uint64_t ve_addr_int; - int rt = veo_alloc_mem(proc.ve_proc, &ve_addr_int, size_in_byte); - if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEProc] allocate VE memory size: {} failed, return code: {}", size_in_byte, rt); - PRINT_ERR("[VEProc] allocate VE memory failed"); - throw nec::VEException("VE allocate return error"); - } - ve_addr_list.push_back(ve_addr_int); - - DEBUG_INFO("[VEKernel] allocate ve memory, size: {}, ve address: {:#x}", - size_in_byte, - ve_addr_int - ); - - if (arg.mode != access::mode::write) { - DEBUG_INFO("[VEKernel] do copy to ve memory for arg, device address: {:#x}, size: {}, host address: {:#x}", - (size_t) ve_addr_int, - size_in_byte, - (size_t) arg.container->get_data_ptr() - ); - rt = veo_write_mem(proc.ve_proc, ve_addr_int, arg.container->get_data_ptr(), size_in_byte); - if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEProc] copy to ve memory failed, size: {}, return code: {}", size_in_byte, rt); - PRINT_ERR("[VEProc] copy to ve memory failed"); - throw nec::VEException("VE copy return error"); - } - } - veo_args_set_i64(argp, i, ve_addr_int); - } - return ve_addr_list; - } - - void copy_out(vector_class ve_addr_list) { - for (int i = 0; i < args.size(); i++) { - KernelArg arg = args[i]; - size_t size_in_byte = arg.container->get_size(); - uint64_t device_ptr = ve_addr_list[i]; - if (arg.mode != access::mode::read) { - DEBUG_INFO("[VEKernel] copy from ve memory, device address: {:#x}, size: {}, host address: {:#x}", - (size_t) device_ptr, - size_in_byte, - (size_t) arg.container->get_data_ptr() - ); - // do copy - int rt = veo_read_mem(proc.ve_proc, arg.container->get_data_ptr(), device_ptr, size_in_byte); - if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEProc] copy from ve memory failed, size: {}, return code: {}", size_in_byte, rt); - PRINT_ERR("[VEProc] copy from ve memory failed"); - throw nec::VEException("VE copy return error"); - } - } - int rt = veo_free_mem(proc.ve_proc, device_ptr); - if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEProc] free ve memory failed, size: {}, return code: {}", size_in_byte, rt); - PRINT_ERR("[VEProc] free ve memory failed"); - throw nec::VEException("VE free memory return error"); - } - } - } - - void single_task() override { - DEBUG_INFO("[VEKernel] single task: {}", kernel_name); - - veo_args *argp = create_ve_args(); - DEBUG_INFO("[VEKernel] create ve args: {:#x}", (size_t) argp); - - try { - - vector_class ve_addr_list = copy_in(argp); - DEBUG_INFO("[VEKernel] invoke ve func: {}", kernel_name); - uint64_t id = veo_call_async_by_name(ctx.ve_ctx, proc.handle, kernel_name.c_str(), argp); - uint64_t ret_val; - veo_call_wait_result(ctx.ve_ctx, id, &ret_val); - DEBUG_INFO("[VEKernel] ve func finished, id: {}, ret val: {}", id, ret_val); - copy_out(ve_addr_list); - - } catch (nec::VEException &e) { - std::cerr << "[VEKernel] kernel invoke failed, error message: " << e.what() << std::endl; - } - - veo_args_free(argp); - - } - void parallel_for(const range<1> &r) override { - DEBUG_INFO("[VEKernel] parallel for 1d {} with range: {}", kernel_name, r.size()); - - veo_args *argp = create_ve_args(); - DEBUG_INFO("[VEKernel] create ve args: {:#x}", (size_t) argp); - - try { - vector_class ve_addr_list = copy_in(argp); - DEBUG_INFO("[VEKernel] invoke ve func: {}", kernel_name); - set_arg_for_range(argp, r); - uint64_t id = veo_call_async_by_name(ctx.ve_ctx, proc.handle, kernel_name.c_str(), argp); - uint64_t ret_val; - veo_call_wait_result(ctx.ve_ctx, id, &ret_val); - DEBUG_INFO("[VEKernel] ve func finished, id: {}, ret val: {}", id, ret_val); - copy_out(ve_addr_list); - - } catch (nec::VEException &e) { - std::cerr << "[VEKernel] kernel invoke failed, error message: " << e.what() << std::endl; - } - - veo_args_free(argp); - - } - void parallel_for(const range<2> &r) override { - DEBUG_INFO("[VEKernel] parallel_for 2d"); - } - void parallel_for(const range<3> &r) override { - DEBUG_INFO("[VEKernel] parallel_for 3d"); - } - - virtual ~VEKernel() { - free_ctx(ctx); - } - -}; - -} - -#endif //SYCL_INCLUDE_CL_SYCL_NEC_VE_KERNEL_HPP_ diff --git a/include/neoSYCL/extensions/nec/ve_queue.hpp b/include/neoSYCL/extensions/nec/ve_queue.hpp deleted file mode 100644 index d1b04f9..0000000 --- a/include/neoSYCL/extensions/nec/ve_queue.hpp +++ /dev/null @@ -1,55 +0,0 @@ -#ifndef SYCL_INCLUDE_CL_SYCL_NEC_VE_QUEUE_HPP_ -#define SYCL_INCLUDE_CL_SYCL_NEC_VE_QUEUE_HPP_ - -#include "ve_offload.h" -#include "CL/SYCL/nec/ve_task.hpp" - -namespace neosycl::sycl { - -class ve_queue : public queue { - private: - ve_device dev; - nec::VEProc proc; - - nec::VEProc proc_create(const string_class &lib_path, int ve_node) { - struct veo_proc_handle *ve_proc = veo_proc_create(ve_node); - if (!ve_proc) { - throw nec::VEException("[VEProc] create ve proc on node: " + std::to_string(ve_node) + " failed.."); - } - uint64_t handle = veo_load_library(ve_proc, lib_path.c_str()); - DEBUG_INFO("[VEProc] create ve proc: {:#x} and load lib: {} on node: {}", (size_t) ve_proc, lib_path, ve_node); - return nec::VEProc{ve_proc, handle}; - } - - void free_proc(nec::VEProc proc) { - DEBUG_INFO("[VEProc] release ve proc: {:#x}", (size_t) proc.ve_proc); - int rt = veo_proc_destroy(proc.ve_proc); - if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEProc] release ve proc: {:#x} failed, return code: {}", (size_t) proc.ve_proc, rt); - PRINT_ERR("[VEProc] release ve proc failed"); - } - } - - public: - ve_queue(const string_class &path = nec::DEFAULT_VE_LIB) : dev(nec::DEFAULT_VE_NODE), queue() { - proc = proc_create(path, nec::DEFAULT_VE_NODE); - } - - ve_queue(const ve_device &dev, const string_class &path = nec::DEFAULT_VE_LIB) : dev(dev), queue() { - proc = proc_create(path, dev.get_node_id()); - } - - detail::Task *build_task() override { - return new detail::VETask(proc); - } - - virtual ~ve_queue() { - wait(); - free_proc(proc); - } - -}; - -} - -#endif //SYCL_INCLUDE_CL_SYCL_NEC_VE_QUEUE_HPP_ diff --git a/include/neoSYCL/extensions/nec/ve_selector.hpp b/include/neoSYCL/extensions/nec/ve_selector.hpp index d0b5068..6f08cf8 100644 --- a/include/neoSYCL/extensions/nec/ve_selector.hpp +++ b/include/neoSYCL/extensions/nec/ve_selector.hpp @@ -1,24 +1,38 @@ -#ifndef SYCL_INCLUDE_CL_SYCL_NEC_VE_SELECTOR_HPP_ -#define SYCL_INCLUDE_CL_SYCL_NEC_VE_SELECTOR_HPP_ +#pragma once +#include "neoSYCL/extensions/nec/veo_util.hpp" +#include "neoSYCL/extensions/nec/kernel.hpp" +#include "neoSYCL/extensions/nec/program.hpp" +#include "neoSYCL/extensions/nec/device.hpp" namespace neosycl::sycl { class ve_selector : public device_selector { - - public: - int operator()(const device &dev) const override { +public: + virtual int operator()(const device& dev) const override { if (dev.is_accelerator()) { return 1; } - return -1; + return 0; } - device select_device() const override { - return ve_device(); + auto pf = platform::get_default_platform(); + auto devices = pf.get_devices(info::device_type::accelerator); + for (auto& i : devices) { + if (this->operator()(i) > 0) + return i; + } + throw sycl::runtime_error("no available device found"); } - }; +platform platform::register_all_devices() { + // create a platform with the default device at first + initial_platform_builder builder; + platform p(builder.create()); + // register all available devices + builder.add(p); + builder.add(p); + return p; } -#endif //SYCL_INCLUDE_CL_SYCL_NEC_VE_SELECTOR_HPP_ +} // namespace neosycl::sycl diff --git a/include/neoSYCL/extensions/nec/ve_task.hpp b/include/neoSYCL/extensions/nec/ve_task.hpp deleted file mode 100644 index 499daae..0000000 --- a/include/neoSYCL/extensions/nec/ve_task.hpp +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef SYCL_INCLUDE_CL_SYCL_NEC_VE_TASK_HPP_ -#define SYCL_INCLUDE_CL_SYCL_NEC_VE_TASK_HPP_ - -#include "ve_kernel.hpp" - -namespace neosycl::sycl::detail { - -struct VETask : public Task { - nec::VEProc proc; - - VETask(const nec::VEProc &proc) : proc(proc) {} - - bool is_cpu() override { - return false; - } - - std::shared_ptr get_kernel(string_class name) override { - return std::shared_ptr(new VEKernel(args, name, proc)); - } -}; - -} - -#endif //SYCL_INCLUDE_CL_SYCL_NEC_VE_TASK_HPP_ diff --git a/include/neoSYCL/extensions/nec/ve_task_handler.hpp b/include/neoSYCL/extensions/nec/ve_task_handler.hpp deleted file mode 100644 index 5cb49b3..0000000 --- a/include/neoSYCL/extensions/nec/ve_task_handler.hpp +++ /dev/null @@ -1,171 +0,0 @@ -#ifndef NEOSYCL_INCLUDE_NEOSYCL_EXTENSIONS_NEC_VE_TASK_HANDLER_HPP -#define NEOSYCL_INCLUDE_NEOSYCL_EXTENSIONS_NEC_VE_TASK_HANDLER_HPP - -#include "neoSYCL/extensions/nec/ve_info.hpp" -#include "neoSYCL/sycl/detail/kernel_arg.hpp" -#include "ve_offload.h" - -namespace neosycl::sycl::extensions { - -struct task_handler_ve : public detail::task_handler { - -public: - - task_handler_ve(const VEProc &proc) : proc(proc) { - ctx = ctx_create(proc); - } - - VEContext ctx_create(VEProc proc) { - struct veo_thr_ctxt *ctx = veo_context_open(proc.ve_proc); - DEBUG_INFO("[VEContext] create ve context: {:#x}", (size_t) ctx); - return VEContext{ctx}; - } - - void free_ctx(VEContext ctx) { - DEBUG_INFO("[VEContext] release ve ctx: {:#x}", (size_t) ctx.ve_ctx); - int rt = veo_context_close(ctx.ve_ctx); - if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEContext] release ve ctx: {:#x} failed, return code: {}", (size_t) ctx.ve_ctx, rt); - PRINT_ERR("[VEContext] release ve ctx failed"); - } - } - - struct veo_args *create_ve_args() { - struct veo_args *argp = veo_args_alloc(); - if (!argp) { - throw exception("ve args return nullptr"); - } - return argp; - } - - vector_class copy_in(struct veo_args *argp, shared_ptr_class k, VEProc proc) { - vector_class ve_addr_list; - - for (int i = 0; i < k->args.size(); i++) { - detail::KernelArg arg = k->args[i]; - size_t size_in_byte = arg.container->get_size(); - - uint64_t ve_addr_int; - int rt = veo_alloc_mem(proc.ve_proc, &ve_addr_int, size_in_byte); - if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEProc] allocate VE memory size: {} failed, return code: {}", size_in_byte, rt); - PRINT_ERR("[VEProc] allocate VE memory failed"); - throw exception("VE allocate return error"); - } - ve_addr_list.push_back(ve_addr_int); - - DEBUG_INFO("[VEKernel] allocate ve memory, size: {}, ve address: {:#x}", - size_in_byte, - ve_addr_int - ); - - if (arg.mode != access::mode::write) { - DEBUG_INFO("[VEKernel] do copy to ve memory for arg, device address: {:#x}, size: {}, host address: {:#x}", - (size_t) ve_addr_int, - size_in_byte, - (size_t) arg.container->get_raw_ptr() - ); - rt = veo_write_mem(proc.ve_proc, ve_addr_int, arg.container->get_raw_ptr(), size_in_byte); - if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEProc] copy to ve memory failed, size: {}, return code: {}", size_in_byte, rt); - PRINT_ERR("[VEProc] copy to ve memory failed"); - throw exception("VE copy return error"); - } - } - veo_args_set_i64(argp, i, ve_addr_int); - } - return ve_addr_list; - } - - void copy_out(vector_class ve_addr_list, shared_ptr_class k, VEProc proc) { - for (int i = 0; i < k->args.size(); i++) { - detail::KernelArg arg = k->args[i]; - size_t size_in_byte = arg.container->get_size(); - uint64_t device_ptr = ve_addr_list[i]; - if (arg.mode != access::mode::read) { - DEBUG_INFO("[VEKernel] copy from ve memory, device address: {:#x}, size: {}, host address: {:#x}", - (size_t) device_ptr, - size_in_byte, - (size_t) arg.container->get_raw_ptr() - ); - // do copy - int rt = veo_read_mem(proc.ve_proc, arg.container->get_raw_ptr(), device_ptr, size_in_byte); - if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEProc] copy from ve memory failed, size: {}, return code: {}", size_in_byte, rt); - PRINT_ERR("[VEProc] copy from ve memory failed"); - throw exception("VE copy return error"); - } - } - int rt = veo_free_mem(proc.ve_proc, device_ptr); - if (rt != veo_command_state::VEO_COMMAND_OK) { - DEBUG_INFO("[VEProc] free ve memory failed, size: {}, return code: {}", size_in_byte, rt); - PRINT_ERR("[VEProc] free ve memory failed"); - throw exception("VE free memory return error"); - } - } - } - - void single_task(shared_ptr_class k, const std::function &func) override { - for (const detail::KernelArg &arg:k->args) { - arg.acquire_access(); - } - DEBUG_INFO("execute single %d kernel, name: %s\n", type(), k->name.c_str()); - - DEBUG_INFO("[VEKernel] single task: {}", k->name.c_str()); - - veo_args *argp = create_ve_args(); - DEBUG_INFO("[VEKernel] create ve args: {:#x}", (size_t) argp); - - try { - - vector_class ve_addr_list = copy_in(argp, k, proc); - DEBUG_INFO("[VEKernel] invoke ve func: {}", k->name.c_str()); - uint64_t id = veo_call_async_by_name(ctx.ve_ctx, proc.handle, k->name.c_str(), argp); - uint64_t ret_val; - veo_call_wait_result(ctx.ve_ctx, id, &ret_val); - DEBUG_INFO("[VEKernel] ve func finished, id: {}, ret val: {}", id, ret_val); - copy_out(ve_addr_list, k, proc); - - } catch (exception &e) { - std::cerr << "[VEKernel] kernel invoke failed, error message: " << e.what() << std::endl; - } - - veo_args_free(argp); - - for (const detail::KernelArg &arg:k->args) { - arg.release_access(); - } - } - - void parallel_for_1d(shared_ptr_class k, - range<1> r, - const std::function)> &func, - id<1> offset) override { - throw exception("not implemented"); - }; - - void parallel_for_2d(shared_ptr_class k, - range<2> r, - const std::function)> &func, - id<2> offset) override { - throw exception("not implemented"); - }; - - void parallel_for_3d(shared_ptr_class k, - range<3> r, - const std::function)> &func, - id<3> offset) override { - throw exception("not implemented"); - }; - - detail::SUPPORT_PLATFORM_TYPE type() override { - return detail::SX_AURORA; - } - -private: - VEContext ctx; - VEProc proc; -}; - -} -#endif //NEOSYCL_INCLUDE_NEOSYCL_EXTENSIONS_NEC_VE_TASK_HANDLER_HPP diff --git a/include/neoSYCL/extensions/nec/veo_util.hpp b/include/neoSYCL/extensions/nec/veo_util.hpp new file mode 100644 index 0000000..f29ada7 --- /dev/null +++ b/include/neoSYCL/extensions/nec/veo_util.hpp @@ -0,0 +1,121 @@ +#pragma once +#include "ve_offload.h" + +namespace neosycl::sycl::extensions::nec { + +class veo_util { + struct veo_proc_handle* proc_; + struct veo_thr_ctxt* ctx_; + uint64_t dll_; + +public: + veo_util() : proc_(nullptr), ctx_(nullptr), dll_(0) {} + veo_util(const veo_util& u) = default; + + bool open(const string_class& lib_path, int ve_node) { + proc_ = veo_proc_create(ve_node); + if (!proc_) { + DEBUG_INFO("veo_proc_create(%d) failed", ve_node); + return false; + } + DEBUG_INFO("veo proc created: %lx", (size_t)proc_); + + dll_ = veo_load_library(proc_, lib_path.c_str()); + if (dll_ == 0) { + DEBUG_INFO("veo_load_library failed: %s", lib_path.c_str()); + return false; + } + + ctx_ = veo_context_open(proc_); + DEBUG_INFO("veo_ctxt created: %lx", (size_t)ctx_); + + return true; + } + + void close() { + if (ctx_ == nullptr) + return; + DEBUG_INFO("veo ctxt released: %lx", (size_t)ctx_); + int rt = veo_context_close(ctx_); + if (rt != veo_command_state::VEO_COMMAND_OK) { + PRINT_ERR("veo_context_close() failed (%d)", rt); + throw runtime_error("veo_util::close() failed"); + } + DEBUG_INFO("veo proc released: %lx", (size_t)proc_); + rt = veo_proc_destroy(proc_); + if (rt != veo_command_state::VEO_COMMAND_OK) { + PRINT_ERR("veo_proc_destroy() failed (%d)", rt); + throw runtime_error("veo_util::close() failed"); + } + } + + bool is_open() const { + return ctx_ != nullptr; + } + + uint64_t get_sym(const char* s) { + return veo_get_sym(proc_, dll_, s); + } + + int write_mem(void* dst, const void* src, size_t sz) { + int rt = veo_write_mem(proc_, (uint64_t)dst, src, sz); + if (rt != VEO_COMMAND_OK) { + PRINT_ERR("veo_write_mem() failed (%d)", rt); + throw exception("veo_util::write_mem() failed"); + } + return rt; + } + + int read_mem(void* dst, const void* src, size_t sz) { + int rt = veo_read_mem(proc_, dst, (uint64_t)src, sz); + if (rt != VEO_COMMAND_OK) { + PRINT_ERR("veo_read_mem() failed (%d)", rt); + throw exception("veo_util::read_mem() failed"); + } + return rt; + } + + int copy_mem(void* dst, const void* src, size_t sz) { + shared_ptr_class tmp(new unsigned char[sz]); + + // the data is copied twice... + int rt = veo_read_mem(proc_, tmp.get(), (uint64_t)src, sz); + if (rt != VEO_COMMAND_OK) { + PRINT_ERR("veo_copy_mem() failed (%d)", rt); + throw exception("veo_util::copy_mem() failed"); + } + rt = veo_write_mem(proc_, (uint64_t)dst, tmp.get(), sz); + if (rt != VEO_COMMAND_OK) { + PRINT_ERR("veo_copy_mem() failed (%d)", rt); + throw exception("veo_util::copy_mem() failed"); + } + return rt; + } + + void* alloc_mem(size_t sz) { + uint64_t ve_addr_int; + + int rt = veo_alloc_mem(proc_, &ve_addr_int, sz); + if (rt != VEO_COMMAND_OK) { + PRINT_ERR("veo_alloc_mem() failed (%d)", rt); + throw exception("veo_util::alloc_mem() failed"); + } + return (void*)ve_addr_int; + } + + int free_mem(void* p) { + return veo_free_mem(proc_, (uint64_t)p); + } + + int call_func(uint64_t funcp, struct veo_args* argp) { + uint64_t ret; + uint64_t id = veo_call_async(ctx_, funcp, argp); + int rt = veo_call_wait_result(ctx_, id, &ret); + if (rt != VEO_COMMAND_OK) { + PRINT_ERR("veo_call_wait_result() failed (%d)", rt); + throw exception("veo_util::call_func() failed"); + } + return ret; + } +}; +} // namespace neosycl::sycl::extensions::nec diff --git a/include/neoSYCL/kout/Kout.hpp b/include/neoSYCL/kout/Kout.hpp new file mode 100644 index 0000000..d19dbca --- /dev/null +++ b/include/neoSYCL/kout/Kout.hpp @@ -0,0 +1,36 @@ +/******************************************************************** +Copyright (c) 2021 Hiroyuki Takizawa + +This software is released under the MIT License, see LICENSE.txt. +**********************************************************************/ +#pragma once +#include "clang/AST/AST.h" +#include "clang/AST/ASTConsumer.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/PrettyPrinter.h" +#include "clang/Frontend/ASTConsumers.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/FrontendActions.h" +#include "clang/Rewrite/Core/Rewriter.h" +#include "clang/Tooling/CommonOptionsParser.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/Support/raw_ostream.h" +#include "clang/Basic/Version.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/PreprocessorOptions.h" +#include +#include +#include +#include +#include + +#define SYCL_ACCESSOR "neosycl::sycl::accessor" +#define SYCL_HANDLER "class neosycl::sycl::handler" +#define SYCL_SINGLE_TASK "single_task" +#define SYCL_PARALLEL_FOR "parallel_for" + +using namespace std; +using namespace llvm; +using namespace clang; +using namespace clang::tooling; diff --git a/include/neoSYCL/kout/KoutPrinterHelper.hpp b/include/neoSYCL/kout/KoutPrinterHelper.hpp new file mode 100644 index 0000000..f0e3f05 --- /dev/null +++ b/include/neoSYCL/kout/KoutPrinterHelper.hpp @@ -0,0 +1,29 @@ +#pragma once + +class KoutPrinterHelper : public PrinterHelper { + ASTContext& ast_; + +public: + KoutPrinterHelper(ASTContext& a) : ast_(a) {} + + bool Visit(CXXOperatorCallExpr*, llvm::raw_ostream&); + bool Visit(DeclRefExpr*, llvm::raw_ostream&); + bool Visit(ReturnStmt*, llvm::raw_ostream&); + + bool handledStmt(Stmt* s, llvm::raw_ostream& os) { + // cerr << s->getStmtClassName() << endl; + auto op = dyn_cast(s); + if (op) + return Visit(op, os); + + auto var = dyn_cast(s); + if (var) + return Visit(var, os); + + auto ret = dyn_cast(s); + if (ret) + return Visit(ret, os); + + return false; + } +}; diff --git a/include/neoSYCL/kout/KoutVisitor.hpp b/include/neoSYCL/kout/KoutVisitor.hpp new file mode 100644 index 0000000..ac6b721 --- /dev/null +++ b/include/neoSYCL/kout/KoutVisitor.hpp @@ -0,0 +1,48 @@ +#pragma once +#include "Kout.hpp" +#include "KoutPrinterHelper.hpp" +#include "VarDeclFinder.hpp" + +class KoutVisitor : public RecursiveASTVisitor { +public: + struct KoutData { + string handler; + string kernel; + string range; + string offset; + string func; + string var; + size_t dim; + PrintingPolicy policy; + KoutPrinterHelper helper; + SourceManager& smgr; + vector vlist; + vector alist; + KoutData(PrintingPolicy p, ASTContext& ast) + : dim(0), policy(p), helper(ast), smgr(ast.getSourceManager()) {} + }; + + KoutVisitor(Rewriter& R, ASTContext& ast) + : TheRewriter(R), kcode_(), kernCode(kcode_), ast_(ast) {} + + bool shouldVisitTemplateInstantiations() { + return true; + } + + void checkCXXMCallExpr(bool is_single, CXXMemberCallExpr* ce, + CXXMethodDecl* callee, std::string& text); + + bool VisitCXXMemberCallExpr(CXXMemberCallExpr* ce); + + string& getDeviceCode() { + return kcode_; + } + + bool VisitTypeAliasDecl(TypeAliasDecl* d); + +private: + Rewriter& TheRewriter; + string kcode_; + raw_string_ostream kernCode; + ASTContext& ast_; +}; diff --git a/include/neoSYCL/kout/VarDeclFinder.hpp b/include/neoSYCL/kout/VarDeclFinder.hpp new file mode 100644 index 0000000..13aac68 --- /dev/null +++ b/include/neoSYCL/kout/VarDeclFinder.hpp @@ -0,0 +1,37 @@ +#pragma once +#include "Kout.hpp" + +class VarDeclFinder : public RecursiveASTVisitor { +public: + virtual bool shouldVisitTemplateInstantiations() { + return true; + } + + bool isInList(std::vector& l, Decl* d) { + for (const auto& item : l) { + if (item == d) { + return true; + } + } + return false; + } + + bool isAccessor(std::string& type_name) { + std::regex re("(" + string(SYCL_ACCESSOR) + ")<.*>"); + std::smatch result; + if (std::regex_search(type_name, result, re)) { + type_name = result[1].str(); + return true; + } + return false; + } + + bool VisitDecl(Decl* d); + bool VisitDeclRefExpr(DeclRefExpr* e); + bool VisitMemberExpr(MemberExpr* e); + + std::vector vlist; + std::vector alist; + std::vector decl_list; + std::vector parm_list; +}; diff --git a/include/neoSYCL/sycl.hpp b/include/neoSYCL/sycl.hpp index 47a0448..7e495e7 100644 --- a/include/neoSYCL/sycl.hpp +++ b/include/neoSYCL/sycl.hpp @@ -1,36 +1,51 @@ -#ifndef NEOSYCL_INCLUDE_NEOSYCL_SYCL_HPP_ -#define NEOSYCL_INCLUDE_NEOSYCL_SYCL_HPP_ +#pragma once // debug headers #include "sycl/detail/debug.hpp" -#include "sycl/types.hpp" +#include "sycl/types.hpp" #include "sycl/exception.hpp" - -// include device headers +#include "sycl/op_def.hpp" +#include "sycl/range.hpp" +#include "sycl/item.hpp" #include "sycl/id.hpp" -#include "sycl/device.hpp" +#include "sycl/nd_range.hpp" + +#include "sycl/detail/runtime/acc.hpp" +#include "sycl/detail/builtin/builtin.hpp" -// include device selector headers +#ifndef ___NEOSYCL_KERNEL_RUNTIME_ONLY___ #include "sycl/device_selector.hpp" -#include "sycl/device_selector/cpu_selector.hpp" +#include "sycl/platform.hpp" +#include "sycl/device.hpp" +#include "sycl/context.hpp" +#include "sycl/kernel.hpp" +#include "sycl/program.hpp" +#include "neoSYCL/sycl/detail/kernel.hpp" +#include "neoSYCL/sycl/detail/program.hpp" +#include "neoSYCL/sycl/detail/device.hpp" +#include "neoSYCL/sycl/detail/context.hpp" +#include "neoSYCL/sycl/detail/platform.hpp" -// include platform headers -#include "sycl/platform.hpp" +#include "sycl/device_selector/cpu_selector.hpp" +#ifdef BUILD_VE +#include "extensions/nec/ve_selector.hpp" +#endif -// include buffer headers -#include "sycl/accessor.hpp" +#include "sycl/atomic.hpp" #include "sycl/accessor.hpp" -#include "sycl/buffer.hpp" - -#include "sycl/queue.hpp" #include "sycl/handler.hpp" +#include "sycl/event.hpp" +#include "sycl/queue.hpp" + +// include buffer headers +#include "sycl/allocator.hpp" +#include "sycl/buffer.hpp" +#endif // ___NEOSYCL_KERNEL_RUNTIME_ONLY___ namespace neosycl { using namespace neosycl::sycl; } - -#endif //NEOSYCL_INCLUDE_NEOSYCL_SYCL_HPP_ diff --git a/include/neoSYCL/sycl/access.hpp b/include/neoSYCL/sycl/access.hpp index 6a2d6fd..5788cf0 100644 --- a/include/neoSYCL/sycl/access.hpp +++ b/include/neoSYCL/sycl/access.hpp @@ -1,5 +1,4 @@ -#ifndef CUSTOM_SYCL_INCLUDE_SYCL_ACCESS_HPP_ -#define CUSTOM_SYCL_INCLUDE_SYCL_ACCESS_HPP_ +#pragma once namespace neosycl::sycl { @@ -24,10 +23,7 @@ enum class mode { atomic }; -enum placeholder { - false_t, - true_t -}; +enum placeholder { false_t, true_t }; enum class address_space : int { global_space, @@ -36,8 +32,6 @@ enum class address_space : int { private_space }; -} - -} +} // namespace access -#endif //CUSTOM_SYCL_INCLUDE_SYCL_ACCESS_HPP_ +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/accessor.hpp b/include/neoSYCL/sycl/accessor.hpp index 249fe01..edf327f 100644 --- a/include/neoSYCL/sycl/accessor.hpp +++ b/include/neoSYCL/sycl/accessor.hpp @@ -1,65 +1,156 @@ -#ifndef NEOSYCL_INCLUDE_NEOSYCL_SYCL_ACCESSOR_HPP -#define NEOSYCL_INCLUDE_NEOSYCL_SYCL_ACCESSOR_HPP - -#include "neoSYCL/sycl/id.hpp" -#include "neoSYCL/sycl/property_list.hpp" -#include "neoSYCL/sycl/handler.hpp" +#pragma once #include "neoSYCL/sycl/detail/container/data_container.hpp" #include "neoSYCL/sycl/detail/container/data_container_nd.hpp" +#include "neoSYCL/sycl/detail/container/buffer_container.hpp" namespace neosycl::sycl { -template +// prototype decls +template class buffer; +class handler; -template +/////////////////////////////////////////////////////////////////////////////// +template class accessor { + friend class handler; public: - template - accessor(buffer &bufferRef, const property_list &propList = {}): - data(bufferRef.data), accessRange(bufferRef.get_range()) {} - - template - accessor(buffer &bufferRef, - range accessRange, - const property_list &propList = {}): - data(bufferRef.data), accessRange(accessRange) {} - - template - accessor(buffer &bufferRef, - range accessRange, - id accessOffset, - const property_list &propList = {} - ):data(bufferRef.data), accessRange(accessRange), accessOffset(accessOffset) {} - - template - accessor(buffer &bufferRef, - handler &commandGroupHandlerRef, - range accessRange, - const property_list &propList = {} - ):data(bufferRef.data), accessRange(accessRange), accessOffset(0) {} - - template - accessor(buffer &bufferRef, - handler &commandGroupHandlerRef, - range accessRange, - id accessOffset, - const property_list &propList = {} - ):data(bufferRef.data), accessRange(accessRange), accessOffset(accessOffset) {} + using value_type = dataT; + using reference = dataT&; + using const_reference = const dataT&; + using container_type = detail::container::BufferContainer; + + accessor(const accessor& rhs) = default; + accessor(accessor&& rhs) = default; + ~accessor() = default; + + accessor& operator=(const accessor& rhs) = default; + accessor& operator=(accessor&& rhs) = default; + + template + friend bool operator==(const accessor& lhs, + const accessor& rhs); + template + friend bool operator!=(const accessor& lhs, + const accessor& rhs); + +#if 0 + /* TODO: accessor of dimensions == 0 is not supported yet */ + + /* Available only when: ((isPlaceholder == access::placeholder::false_t && + accessTarget == access::target::host_buffer) || (isPlaceholder == + access::placeholder::true_t && (accessTarget == access::target::global_buffer + || accessTarget == access::target::constant_buffer))) && dimensions == 0 */ + template > + accessor(buffer& bufferRef, + const property_list& propList = {}) + : data(bufferRef.get_data()), accessRange(bufferRef.get_range()), + accessOffset(), device_ptr(nullptr) {} + + /* Available only when: (isPlaceholder == access::placeholder::false_t && + (accessTarget == access::target::global_buffer || accessTarget == + access::target::constant_buffer)) && dimensions == 0 */ + template > + accessor(buffer& bufferRef, + handler& commandGroupHandlerRef, const property_list& propList = {}) + : data(bufferRef.get_data()), accessRange(bufferRef.get_range()), + accessOffset(), device_ptr(nullptr) { + alloc_(commandGroupHandlerRef); + } +#endif + + /* Available only when: ((isPlaceholder == access::placeholder::false_t && + accessTarget == access::target::host_buffer) || (isPlaceholder == + access::placeholder::true_t && (accessTarget == access::target::global_buffer + || accessTarget == access::target::constant_buffer))) && dimensions > 0 */ + template 0)>> + accessor(buffer& bufferRef, + const property_list& propList = {}) + : data(bufferRef.get_data()), accessRange(bufferRef.get_range()), + accessOffset(), device_ptr(nullptr) {} + + /* Available only when: (isPlaceholder == access::placeholder::false_t && + (accessTarget == access::target::global_buffer || accessTarget == + access::target::constant_buffer)) && dimensions > 0 */ + template 0)>> + accessor(buffer& bufferRef, + handler& commandGroupHandlerRef, const property_list& propList = {}) + : data(bufferRef.get_data()), accessRange(bufferRef.get_range()), + accessOffset(), device_ptr(nullptr) { + alloc_(commandGroupHandlerRef); + } + + /* Available only when: (isPlaceholder == access::placeholder::false_t && + accessTarget == access::target::host_buffer) || (isPlaceholder == + access::placeholder::true_t && (accessTarget == access::target::global_buffer + || accessTarget == access::target::constant_buffer)) && dimensions > 0 */ + template 0)>> + accessor(buffer& bufferRef, + range accessRange, const property_list& propList = {}) + : data(bufferRef.get_data()), accessRange(accessRange), accessOffset(), + device_ptr(nullptr) {} + + /* Available only when: (isPlaceholder == access::placeholder::false_t && + accessTarget == access::target::host_buffer) || (isPlaceholder == + access::placeholder::true_t && (accessTarget == access::target::global_buffer + || accessTarget == access::target::constant_buffer)) && dimensions > 0 */ + template 0)>> + accessor(buffer& bufferRef, + range accessRange, id accessOffset, + const property_list& propList = {}) + : data(bufferRef.get_data()), accessRange(accessRange), + accessOffset(accessOffset), device_ptr(nullptr) {} + + /* Available only when: (isPlaceholder == access::placeholder::false_t && + (accessTarget == access::target::global_buffer || accessTarget == + access::target::constant_buffer)) && dimensions > 0 */ + template 0)>> + accessor(buffer& bufferRef, + handler& commandGroupHandlerRef, range accessRange, + const property_list& propList = {}) + : data(bufferRef.get_data()), accessRange(accessRange), accessOffset(), + device_ptr(nullptr) { + alloc_(commandGroupHandlerRef); + } + /* Available only when: (isPlaceholder == access::placeholder::false_t && + (accessTarget == access::target::global_buffer || accessTarget == + access::target::constant_buffer)) && dimensions > 0 */ + template 0)>> + accessor(buffer& bufferRef, + handler& commandGroupHandlerRef, range accessRange, + id accessOffset, const property_list& propList = {}) + : data(bufferRef.get_data()), accessRange(accessRange), + accessOffset(accessOffset), device_ptr(nullptr) { + alloc_(commandGroupHandlerRef); + } + + /* -- common interface members -- */ + + /* -- property interface members -- */ constexpr bool is_placeholder() const { return isPlaceholder; } size_t get_size() const { - return data.get_size(); + return data->get_size(); } size_t get_count() const { - return data.use_count(); + return data->get_count(); } range get_range() const { @@ -70,109 +161,177 @@ class accessor { return accessOffset; } - /* Available only when: (accessMode == access::mode::read_write || accessMode == access::mode::discard_read_write) && dimensions == 0) */ - template> - operator dataT &() const; - - /* Available only when: (accessMode == access::mode::write || accessMode == access::mode::read_write || accessMode == access::mode::discard_write || accessMode == access::mode::discard_read_write) && dimensions > 0) */ - template 0)>> - dataT &operator[](id index) const { + /* Available only when: (accessMode == access::mode::read_write || + * accessMode + * == access::mode::discard_read_write) && dimensions == 0) */ + template > + operator dataT&() const; + + /* Available only when: (accessMode == access::mode::write || accessMode == + * access::mode::read_write || accessMode == access::mode::discard_write || + * accessMode == access::mode::discard_read_write) && dimensions > 0) */ + template 0)>> + dataT& operator[](id index) const { size_t index_val = id2index(index); - DEBUG_INFO("[Accessor] access with index: %d", index_val); + // DEBUG_INFO("access with index: %lu", index_val); return (*data).get(index_val); } - template 0)>> + template < + access::mode Mode = accessMode, int D = dimensions, + typename = std::enable_if_t<(Mode == access::mode::read) && (D > 0)>> dataT operator[](id index) const { size_t index_val = id2index(index); - DEBUG_INFO("[Accessor] read access with index: %d", index_val); + // DEBUG_INFO("read access with index: %lu", index_val); return (*data).get(index_val); } - /* Available only when: (accessMode == access::mode::write || accessMode == access::mode::read_write || accessMode == access::mode::discard_write || accessMode == access::mode::discard_read_write) && dimensions == 1) */ - template> - dataT &operator[](size_t index) const { + /* Available only when: (accessMode == access::mode::write || accessMode == + * access::mode::read_write || accessMode == access::mode::discard_write || + * accessMode == access::mode::discard_read_write) && dimensions == 1) */ + template > + dataT& operator[](size_t index) const { return (*data)[index]; } - template> + template < + access::mode Mode = accessMode, int D = dimensions, + typename = std::enable_if_t<(Mode == access::mode::read) && (D == 1)>> dataT operator[](size_t index) const { return (*data)[index]; } /* Available only when: dimensions > 1 */ - template> - dataT *operator[](size_t index) const { + template > + dataT* operator[](size_t index) const { return (*data)[index]; } - template> - const dataT *operator[](size_t index) const { + template < + access::mode Mode = accessMode, int D = dimensions, + typename = std::enable_if_t<(Mode == access::mode::read) && (D == 2)>> + const dataT* operator[](size_t index) const { return (*data)[index]; } /* Available only when: dimensions > 1 */ - template> + template > detail::container::AccessProxyND operator[](size_t index) const { return (*data)[index]; } - template> - const dataT **operator[](size_t index) const { + template < + access::mode Mode = accessMode, int D = dimensions, + typename = std::enable_if_t<(Mode == access::mode::read) && (D == 3)>> + const dataT** operator[](size_t index) const { return (*data)[index]; } - /* Available only when: accessMode == access::mode::read && dimensions == 0 */ - template> + /* Available only when: accessMode == access::mode::read && dimensions == 0 + */ + template < + access::mode Mode = accessMode, int D = dimensions, + typename = std::enable_if_t<(Mode == access::mode::read) && (D == 0)>> operator dataT() const; - ~accessor() = default; + template > + dataT* get_pointer() const { + return data->get_ptr(); + } + + /* Available only when: accessMode == access::mode::atomic && dimensions == + 0 */ + template < + access::mode Mode = accessMode, int D = dimensions, + typename = std::enable_if_t<(Mode == access::mode::atomic) && (D == 0)>> + operator atomic() const; + + /* Available only when: accessMode == access::mode::atomic && dimensions > + 0 */ + template < + access::mode Mode = accessMode, int D = dimensions, + typename = std::enable_if_t<(Mode == access::mode::atomic) && (D > 0)>> + atomic + operator[](id index) const; + + /* Available only when: accessMode == access::mode::atomic && dimensions == + 1 */ + template < + access::mode Mode = accessMode, int D = dimensions, + typename = std::enable_if_t<(Mode == access::mode::atomic) && (D == 1)>> + atomic + operator[](size_t index) const; + + /* Available only when: dimensions > 1 */ + // __unspecified__& operator[](size_t index) const; + + template > + void* get_pointer() const; private: - std::shared_ptr> data; + std::shared_ptr data; range accessRange; id accessOffset; + void* device_ptr; size_t id2index(id index) const { size_t x = this->accessRange.get(0); size_t y = this->accessRange.get(1); if (dimensions == 2) { - return x * index[0] + index[1]; - } else if (dimensions == 3) { - return x * index[0] + y * index[1] + index[2]; + return index[0] + x * index[1]; + } + else if (dimensions == 3) { + return index[0] + x * (index[1] + y * index[2]); } return index[0]; } + + void alloc_(handler& h) /* defined in handler.hpp */; }; +template +bool operator==(const accessor& lhs, + const accessor& rhs) { + return lhs.data == rhs.data && lhs.accessRange == rhs.accessRange && + lhs.accessOffset == rhs.accessOffset; } -#endif //NEOSYCL_INCLUDE_NEOSYCL_SYCL_ACCESSOR_HPP +template +bool operator!=(const accessor& lhs, + const accessor& rhs) { + return !(lhs == rhs); +} +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/allocator.hpp b/include/neoSYCL/sycl/allocator.hpp index ebd3c0e..444b78a 100644 --- a/include/neoSYCL/sycl/allocator.hpp +++ b/include/neoSYCL/sycl/allocator.hpp @@ -1,5 +1,4 @@ -#ifndef SYCL_INCLUDE_CL_SYCL_ALLOCATOR_HPP_ -#define SYCL_INCLUDE_CL_SYCL_ALLOCATOR_HPP_ +#pragma once namespace neosycl::sycl { @@ -7,5 +6,3 @@ template using buffer_allocator = std::allocator; } - -#endif //SYCL_INCLUDE_CL_SYCL_ALLOCATOR_HPP_ diff --git a/include/neoSYCL/sycl/atomic.hpp b/include/neoSYCL/sycl/atomic.hpp new file mode 100644 index 0000000..1d5bd31 --- /dev/null +++ b/include/neoSYCL/sycl/atomic.hpp @@ -0,0 +1,12 @@ +#pragma once + +namespace neosycl::sycl { + +template +class atomic { /* TODO: not implemented yet */ +public: + atomic() = default; +}; + +} // namespace neosycl::sycl \ No newline at end of file diff --git a/include/neoSYCL/sycl/buffer.hpp b/include/neoSYCL/sycl/buffer.hpp index 91ba8d8..6f0112a 100644 --- a/include/neoSYCL/sycl/buffer.hpp +++ b/include/neoSYCL/sycl/buffer.hpp @@ -1,14 +1,4 @@ -#ifndef CUSTOM_SYCL_INCLUDE_SYCL_BUFFER_HPP_ -#define CUSTOM_SYCL_INCLUDE_SYCL_BUFFER_HPP_ - -#include "neoSYCL/sycl/types.hpp" -#include "neoSYCL/sycl/range.hpp" -#include "neoSYCL/sycl/access.hpp" -#include "neoSYCL/sycl/accessor.hpp" -#include "neoSYCL/sycl/allocator.hpp" -#include "neoSYCL/sycl/handler.hpp" -#include "neoSYCL/sycl/context.hpp" -#include "neoSYCL/sycl/property_list.hpp" +#pragma once #include "neoSYCL/sycl/detail/container/data_container.hpp" #include "neoSYCL/sycl/detail/container/data_container_nd.hpp" @@ -22,8 +12,8 @@ class use_host_ptr { }; class use_mutex { public: - use_mutex(mutex_class &mutexRef); - mutex_class *get_mutex_ptr() const; + use_mutex(mutex_class& mutexRef); + mutex_class* get_mutex_ptr() const; }; class context_bound { public: @@ -33,78 +23,96 @@ class context_bound { } // namespace buffer } // namespace property - -template> +/////////////////////////////////////////////////////////////////////////////// +template > class buffer { - friend accessor; - friend accessor; - friend accessor; - friend accessor; - friend accessor; - friend accessor; - public: - using value_type = T; - using reference = value_type &; - using const_reference = const value_type &; - using allocator_type = AllocatorT; - - buffer(const range &bufferRange, const property_list &propList = {}) : - buffer(bufferRange, allocator_type(), propList) {} - - buffer(const range &bufferRange, AllocatorT allocator, const property_list &propList = {}) + using value_type = T; + using reference = value_type&; + using const_reference = const value_type&; + using allocator_type = AllocatorT; + using container_type = detail::container::BufferContainer; + + buffer(const buffer& rhs) = default; + buffer(buffer&& rhs) = default; + ~buffer() = default; + + buffer& operator=(const buffer& rhs) = default; + buffer& operator=(buffer&& rhs) = default; + + template + friend bool operator==(const buffer& lhs, + const buffer& rhs); + template + friend bool operator!=(const buffer& lhs, + const buffer& rhs); + + buffer(const range& bufferRange, + const property_list& propList = {}) + : buffer(bufferRange, allocator_type(), propList) {} + + buffer(const range& bufferRange, AllocatorT allocator, + const property_list& propList = {}) : bufferRange(bufferRange), - data(new detail::container::DataContainerND(bufferRange.data, allocator)) {} + data(new container_type(bufferRange.data, allocator)) {} - buffer(T *hostData, const range &bufferRange, const property_list &propList = {}) : - buffer(hostData, bufferRange, allocator_type(), propList) {} + buffer(T* hostData, const range& bufferRange, + const property_list& propList = {}) + : buffer(hostData, bufferRange, allocator_type(), propList) {} - buffer(T *hostData, const range &bufferRange, AllocatorT allocator, const property_list &propList = {}) : - bufferRange(bufferRange), - data(new detail::container::DataContainerND(hostData, bufferRange.data, allocator)) {} + buffer(T* hostData, const range& bufferRange, + AllocatorT allocator, const property_list& propList = {}) + : bufferRange(bufferRange), + data(new container_type(hostData, bufferRange.data, allocator)) {} - buffer(const T *hostData, const range &bufferRange, const property_list &propList = {}) : - buffer(hostData, bufferRange, allocator_type(), propList) {} + buffer(const T* hostData, const range& bufferRange, + const property_list& propList = {}) + : buffer(hostData, bufferRange, allocator_type(), propList) {} - buffer(const T *hostData, - const range &bufferRange, - AllocatorT allocator, - const property_list &propList = {}) : - bufferRange(bufferRange), - data(new detail::container::DataContainerND(hostData, bufferRange.data)) {} + buffer(const T* hostData, const range& bufferRange, + AllocatorT allocator, const property_list& propList = {}) + : bufferRange(bufferRange), + data(new container_type(hostData, bufferRange.data)) {} - buffer(const shared_ptr_class &hostData, - const range &bufferRange, AllocatorT allocator, const property_list &propList = {}) : - bufferRange(bufferRange), - data(new detail::container::DataContainerND(hostData, bufferRange.data, allocator)) {} + buffer(const shared_ptr_class& hostData, + const range& bufferRange, AllocatorT allocator, + const property_list& propList = {}) + : bufferRange(bufferRange), + data(new container_type(hostData, bufferRange.data, allocator)) {} - buffer(const shared_ptr_class &hostData, const range &bufferRange, const property_list &propList = {}) + buffer(const shared_ptr_class& hostData, + const range& bufferRange, + const property_list& propList = {}) : bufferRange(bufferRange), - data(new detail::container::DataContainerND(hostData.get(), bufferRange.data)) {} - - template> - buffer(InputIterator first, - InputIterator last, - AllocatorT allocator, - const property_list &propList = {}) : - bufferRange((last - first) / sizeof(T)), - data(new detail::container::DataContainerND - (first, detail::container::ArrayND<1>((last - first) / sizeof(T)), allocator)) {} - - template> - buffer(InputIterator first, InputIterator last, const property_list &propList = {}) : - bufferRange((last - first) / sizeof(T)), - data(new detail::container::DataContainerND - (first, detail::container::ArrayND<1>(last - first) / sizeof(T))) {} - - buffer(buffer b, const id &baseIndex, - const range &subRange); + data(new container_type(hostData.get(), bufferRange.data)) {} + + template > + buffer(InputIterator first, InputIterator last, AllocatorT allocator, + const property_list& propList = {}) + : bufferRange((last - first) / sizeof(T)), + data(new container_type( + first, detail::container::ArrayND<1>((last - first) / sizeof(T)), + allocator)) {} + + template > + buffer(InputIterator first, InputIterator last, + const property_list& propList = {}) + : bufferRange((last - first) / sizeof(T)), + data(new container_type( + first, detail::container::ArrayND<1>(last - first) / sizeof(T))) {} + + buffer(buffer b, const id& baseIndex, + const range& subRange); /* Available only when: dimensions == 1. */ -// buffer(cl_mem clMemObject, const context &syclContext, event availableEvent = {}); + // buffer(cl_mem clMemObject, const context &syclContext, event + // availableEvent = {}); -/* -- common interface members -- */ -/* -- property interface members -- */ + /* -- common interface members -- */ + /* -- property interface members -- */ range get_range() const { return bufferRange; } @@ -121,62 +129,79 @@ class buffer { return AllocatorT(); } - template - accessor get_access(handler &commandGroupHandler) { - commandGroupHandler.get_kernel()->args.push_back(detail::KernelArg(data, mode)); - return accessor(*this); + template + accessor + get_access(handler& commandGroupHandler) { + accessor acc(*this); + commandGroupHandler.alloc_mem_(acc); + return acc; } - template + template accessor get_access() { return accessor(*this); } - template - accessor get_access( - handler &commandGroupHandler, range accessRange, id accessOffset = {}) { - commandGroupHandler.get_kernel()->args.push_back(detail::KernelArg(data, mode)); - return accessor - (*this, commandGroupHandler, accessRange, accessOffset); + template + accessor + get_access(handler& commandGroupHandler, range accessRange, + id accessOffset = {}) { + accessor acc(*this, commandGroupHandler, + accessRange, accessOffset); + commandGroupHandler.alloc_mem_(acc); + return acc; } - template - accessor get_access( - range accessRange, id accessOffset = {}) { - return accessor - (*this, accessRange, accessOffset); + template + accessor + get_access(range accessRange, id accessOffset = {}) { + return accessor( + *this, accessRange, accessOffset); } - template + template void set_final_data(Destination finalData = nullptr); void set_write_back(bool flag = true); - bool is_sub_buffer() const; - - template - buffer reinterpret(range reinterpretRange) const; - - buffer(const buffer &rhs) : data(rhs.data), bufferRange(rhs.bufferRange) {} - - buffer(buffer &&rhs) : data(rhs.data), bufferRange(rhs.bufferRange) {} - - buffer &operator=(const buffer &rhs) { - data = rhs.data; - bufferRange = rhs.bufferRange; + /* TODO: sub-buffer is not supported yet */ + bool is_sub_buffer() const { + return false; } - buffer &operator=(buffer &&rhs) { - data = rhs.data; - bufferRange = rhs.bufferRange; + template + buffer + reinterpret(range reinterpretRange) const { + buffer r(*this); + if (ReinterpretDim > get_count()) + throw sycl::invalid_object_error("invalid dimensions"); + for (int i(0); i < ReinterpretDim; i++) { + if (r.bufferRage[i] >= reinterpretRange[i]) + r.bufferRage[i] = reinterpretRange[i]; + else + throw sycl::invalid_object_error("invalid range"); + } + return r; } - ~buffer() = default; + std::shared_ptr get_data() { + return data; + } private: - std::shared_ptr> data; range bufferRange; + std::shared_ptr data; }; + +template +bool operator==(const buffer& lhs, const buffer& rhs) { + return lhs.data == rhs.data; +} +template +bool operator!=(const buffer& lhs, const buffer& rhs) { + return !(lhs == rhs); } -#endif //CUSTOM_SYCL_INCLUDE_SYCL_BUFFER_HPP_ +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/context.hpp b/include/neoSYCL/sycl/context.hpp index 6b2060a..284aea4 100644 --- a/include/neoSYCL/sycl/context.hpp +++ b/include/neoSYCL/sycl/context.hpp @@ -1,40 +1,62 @@ -#ifndef CUSTOM_SYCL_INCLUDE_SYCL_CONTEXT_HPP_ -#define CUSTOM_SYCL_INCLUDE_SYCL_CONTEXT_HPP_ - -#include "neoSYCL/sycl/exception.hpp" -#include "neoSYCL/sycl/info/context.hpp" +#pragma once #include "neoSYCL/sycl/property_list.hpp" +#include "neoSYCL/sycl/info/context.hpp" namespace neosycl::sycl { -class context { +namespace detail { +class context_impl; +} +/////////////////////////////////////////////////////////////////////////////// +class context { public: - explicit context(const property_list &propList = {}); + explicit context(const property_list& propList = {}) { + init({}); + } - context(async_handler asyncHandler, - const property_list &propList = {}); + ~context() = default; - context(const device &dev, const property_list &propList = {}); + context(async_handler asyncHandler, const property_list& propList = {}); - context(const device &dev, async_handler asyncHandler, const property_list &propList = {}); + context(const device& dev, const property_list& propList = {}) { + init({dev}); + } - context(const platform &plt, const property_list &propList = {}); + context(const device& dev, async_handler asyncHandler, + const property_list& propList = {}); - context(const platform &plt, async_handler asyncHandler, const property_list &propList = {}); + context(const platform& plt, const property_list& propList = {}); - context(const vector_class &deviceList, const property_list &propList = {}); + context(const platform& plt, async_handler asyncHandler, + const property_list& propList = {}); - context(const vector_class &deviceList, - async_handler asyncHandler, const property_list &propList = {}); + context(const vector_class& deviceList, + const property_list& propList = {}); -// context(cl_context clContext, async_handler asyncHandler = {}); + context(const vector_class& deviceList, async_handler asyncHandler, + const property_list& propList = {}); - template - typename info::param_traits::return_type get_info() const; + context(cl_context clContext, async_handler asyncHandler = {}) { + throw unimplemented(); + } -}; + /* -- common interface members -- */ + template + typename info::param_traits::return_type + get_info() const; -} + vector_class get_devices() const; + + // INTERNAL USE ONLY + long use_count_() const { + return impl_.use_count(); + } + +private: + void init(vector_class); + + shared_ptr_class impl_; +}; -#endif //CUSTOM_SYCL_INCLUDE_SYCL_CONTEXT_HPP_ +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/detail/accessor_data.hpp b/include/neoSYCL/sycl/detail/accessor_data.hpp new file mode 100644 index 0000000..1778585 --- /dev/null +++ b/include/neoSYCL/sycl/detail/accessor_data.hpp @@ -0,0 +1,16 @@ +#pragma once +#include "neoSYCL/sycl/detail/container/data_container.hpp" + +namespace neosycl::sycl::detail { + +struct accessor_data { + using container_ptr = shared_ptr_class; + + accessor_data(container_ptr arg, access::mode mode) + : data(std::move(arg)), mode(mode) {} + + container_ptr data; + access::mode mode; +}; + +} // namespace neosycl::sycl::detail diff --git a/include/neoSYCL/sycl/detail/builtin/builtin.hpp b/include/neoSYCL/sycl/detail/builtin/builtin.hpp new file mode 100644 index 0000000..700661e --- /dev/null +++ b/include/neoSYCL/sycl/detail/builtin/builtin.hpp @@ -0,0 +1,12 @@ +#pragma once + +// includes all builtin functions +#include "neoSYCL/sycl/detail/builtin/op_defs.hpp" +#include "neoSYCL/sycl/detail/builtin/vec.hpp" +#include "neoSYCL/sycl/detail/builtin/math.hpp" +#include "neoSYCL/sycl/detail/builtin/common.hpp" +#include "neoSYCL/sycl/detail/builtin/integer.hpp" +#include "neoSYCL/sycl/detail/builtin/geometric.hpp" +#include "neoSYCL/sycl/detail/builtin/relational.hpp" + +namespace neosycl::sycl {} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/detail/builtin/common.hpp b/include/neoSYCL/sycl/detail/builtin/common.hpp new file mode 100644 index 0000000..5169893 --- /dev/null +++ b/include/neoSYCL/sycl/detail/builtin/common.hpp @@ -0,0 +1,103 @@ +#pragma once + +namespace neosycl::sycl { +namespace detail { +template +T clamp(T x, T y, T z) { + return (x < y ? y : (x > z ? z : x)); +} +template +T degrees(T i) { + return i * 180 / M_PI; +} +template +T mix(T x, T y, T z) { + return (x * z + (1 - z) * y); +} +template +T radians(T i) { + return i * M_PI / 180; +} +template +T step(T e, T x) { + return x < e ? 0 : 1; +} +template +T smoothstep(T e1, T e2, T x) { + return x < e1 ? 0 : (x > e2 ? 1 : (x - e1) / (e2 - e1)); +} +template +T max(T x, T y) { + return x < y ? y : x; +} +template +T min(T x, T y) { + return x < y ? x : y; +} +template +T sign(T x) { + return x < 0 ? 1 : 0; +} +} // namespace detail + +DEFINE_GEN_FUNC3(clamp, detail::clamp); +template +vec clamp(vec x, T y, T z) { + vec ret; + for (int i(0); i < D; i++) + ret[i] = clamp(x[i], y, z); + return ret; +} + +DEFINE_GEN_FUNC(degrees, detail::degrees); + +DEFINE_GEN_FUNC2(max, detail::max); +template +vec max(vec x, T y) { + vec ret; + for (int i(0); i < D; i++) + ret[i] = detail::max(x[i], y); + return ret; +} + +DEFINE_GEN_FUNC2(min, detail::min); +template +vec min(vec x, T y) { + vec ret; + for (int i(0); i < D; i++) + ret[i] = detail::min(x[i], y); + return ret; +} + +DEFINE_GEN_FUNC3(mix, detail::mix); +template +vec mix(vec x, vec y, T z) { + vec ret; + for (int i(0); i < D; i++) + ret[i] = detail::mix(x[i], y[i], z); + return ret; +} + +DEFINE_GEN_FUNC(radians, detail::radians); + +DEFINE_GEN_FUNC2(step, detail::step); +template +vec step(T e, vec x) { + vec ret; + for (int i(0); i < D; i++) + ret[i] = detail::step(e, x[i]); + return ret; +} + +DEFINE_GEN_FUNC3(smoothstep, detail::smoothstep); +template +vec smoothstep(T e1, T e2, vec x) { + vec ret; + for (int i(0); i < D; i++) + ret[i] = detail::smoothstep(e1, e2, x[i]); + return ret; +} + +DEFINE_GEN_FUNC(sign, detail::sign); + +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/detail/builtin/geometric.hpp b/include/neoSYCL/sycl/detail/builtin/geometric.hpp new file mode 100644 index 0000000..cdf561a --- /dev/null +++ b/include/neoSYCL/sycl/detail/builtin/geometric.hpp @@ -0,0 +1,79 @@ +#pragma once + +namespace neosycl::sycl { +template +vec cross(const vec p0, const vec& p1) { + return vec(p0[1] * p1[2] - p0[2] * p1[1], p0[2] * p1[0] - p0[0] * p1[2], + p0[0] * p1[1] - p0[1] * p1[0], 0); +} +template +vec cross(const vec p0, const vec& p1) { + return vec(p0[1] * p1[2] - p0[2] * p1[1], p0[2] * p1[0] - p0[0] * p1[2], + p0[0] * p1[1] - p0[1] * p1[0]); +} + +template +T dot(const vec p0, const vec& p1) { + T r = 0; + for (int i(0); i < D; i++) + r += p0[i] * p1[i]; + return r; +} +template +T distance(const vec p0, const vec& p1) { + T r = 0; + for (int i(0); i < D; i++) { + T diff = p0[i] - p1[i]; + r += diff * diff; + } + return sqrt(r); +} +template +T length(const vec p) { + T r = 0; + for (int i(0); i < D; i++) { + r += p[i] * p[i]; + } + return sqrt(r); +} +template +vec normalize(const vec p) { + vec r = 0; + T len = length(p); + for (int i(0); i < D; i++) { + r = p[i] / len; + } + return r; +} +template +float fast_distance(const vec p0, const vec& p1) { + float r = 0; + for (int i(0); i < D; i++) { + float diff = p0[i] - p1[i]; + r += diff * diff; + } + return std::sqrt(r); +} +template +float fast_length(const vec p) { + float r = 0; + for (int i(0); i < D; i++) { + r += p[i] * p[i]; + } + return std::sqrt(r); +} +template +vec fast_normalize(const vec p) { + vec r = 0; + float sum = 0; + for (int i(0); i < D; i++) { + sum += p[i] * p[i]; + } + float rlen = detail::rsqrt(sum); + for (int i(0); i < D; i++) { + r = p[i] * rlen; + } + return r; +} + +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/detail/builtin/integer.hpp b/include/neoSYCL/sycl/detail/builtin/integer.hpp new file mode 100644 index 0000000..164128c --- /dev/null +++ b/include/neoSYCL/sycl/detail/builtin/integer.hpp @@ -0,0 +1,110 @@ +#pragma once +#include +#include +#include "vec.hpp" + +namespace neosycl::sycl { + +namespace detail { +template ::value>> +typename std::make_unsigned::type abs(const T& x) { + return (x < 0 ? (-x) : x); +} +template ::value>> +typename std::make_unsigned::type abs_diff(const T& i, const T& j) { + T diff = i - j; + return abs(diff); +} +template ::value>> +T add_sat(const T& i, const T& j) { + if (std::numeric_limits::max() - i <= j) + return std::numeric_limits::max(); + return i + j; +} +template ::value>> +T hadd(const T& i, const T& j) { + T sum = add_sat(i, j); + return (sum >> 1); +} +template ::value>> +T rhadd(const T& i, const T& j) { + T sum = add_sat(1, add_sat(i, j)); + return (sum >> 1); +} +template ::value>> +int clz(const T& x) { + using uT = typename std::make_unsigned::type; + constexpr uT mask = (uT(1) << (sizeof(T) * 8 - 1)); + + uT bits = (uT)x; + int num = 0; + while ((bits & mask) == 0) { + bits <<= 1; + num++; + } + return num; +} +template ::value>> +T mul_hi(const T& i, const T& j) { + using uT = typename std::make_unsigned::type; + constexpr uT mask = ((~uT(0)) << (sizeof(T) * 4)); + return (mask & (i * j)); +} +template ::value>> +T mul_sat(const T& i, const T& j) { + if (std::numeric_limits::max() / i <= j) + return std::numeric_limits::max(); + return i * j; +} +template ::value>> +T mad_hi(const T& i, const T& j, const T& k) { + return mul_hi(i, j) + k; +} +template ::value>> +T mad_sat(const T& i, const T& j, const T& k) { + T prod = mul_sat(i, j); + return add_sat(prod, k); +} +template ::value>> +T rotate(const T& i, const T& j) { + T bits = i; + bits <<= j; + bits |= (i >> (sizeof(T) * 8 - j)); + return bits; +} +template ::value>> +T sub_sat(const T& i, const T& j) { + if (std::numeric_limits::min() + j >= i) + return std::numeric_limits::min(); + return i - j; +} +template ::value>> +T popcount(const T& x) { + int nzb = 0; + T bits = x; + while (bits) { + nzb += (bits & 0x01); + bits >>= 1; + } + return nzb; +} +}; // namespace detail + +DEFINE_UGEN_FUNC(abs, detail::abs); +DEFINE_UGEN_FUNC2(abs_diff, detail::abs_diff); +DEFINE_GEN_FUNC2(add_sat, detail::add_sat); +/* clamp() is defined in math.hpp */ +DEFINE_GEN_FUNC2(hadd, detail::hadd); +DEFINE_GEN_FUNC2(rhadd, detail::rhadd); +DEFINE_GEN_FUNC(clz, detail::clz); +DEFINE_GEN_FUNC3(mad_hi, detail::mad_hi); +DEFINE_GEN_FUNC2(mul_hi, detail::mul_hi); +DEFINE_GEN_FUNC3(mad_sat, detail::mad_sat); +/* max() is defined in math.hpp */ +/* min() is defined in math.hpp */ +DEFINE_GEN_FUNC2(rotate, detail::rotate); +DEFINE_GEN_FUNC2(sub_sat, detail::sub_sat); +/* upsample() is not implemented yet */ +DEFINE_GEN_FUNC(popcount, detail::popcount); + +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/detail/builtin/math.hpp b/include/neoSYCL/sycl/detail/builtin/math.hpp new file mode 100644 index 0000000..cfafd15 --- /dev/null +++ b/include/neoSYCL/sycl/detail/builtin/math.hpp @@ -0,0 +1,337 @@ +#pragma once +#include +#include +#include "vec.hpp" + +namespace neosycl::sycl { + +namespace detail { +template +T acospi(T i) { + return std::acos(i) / M_PI; +} +template +T asinpi(T i) { + return std::asin(i) / M_PI; +} +template +T atanpi(T i) { + return std::atan(i) / M_PI; +} +template +T atan2pi(T i, T j) { + return std::atan2(i, j) / M_PI; +} +template +T cospi(T i) { + return std::cos(M_PI * i); +} +template +T divide(T i, T j) { + return i / j; +} +template +T exp10(T i) { + return std::exp(i + std::log(10)); +} +template +T mad(T i, T j, T k) { + return i * j + k; +} +template +T maxmag(T i, T j) { + if (fabs(i) > fabs(j)) + return i; + if (fabs(i) < fabs(j)) + return j; + + return fmax(i, j); +} +template +T minmag(T i, T j) { + if (fabs(i) < fabs(j)) + return i; + if (fabs(i) > fabs(j)) + return j; + + return fmin(i, j); +} + +template +T powr(T i, T j) { + return std::pow(i, j); +} +template +T recip(T i) { + return (T)1 / i; +} +template +T rootn(T x, int i) { + return pow(x, (T)1 / i); +} +template +T rsqrt(T i) { + return std::sqrt(-i); +} +template +T sinpi(T i) { + return std::sin(i) / M_PI; +} +template +T tanpi(T i) { + return std::tan(i) / M_PI; +} + +} // namespace detail + +namespace naitive { +using detail::divide; +using detail::exp10; +using detail::powr; +using detail::recip; +using detail::rsqrt; +using std::cos; +using std::exp; +using std::exp2; +using std::log; +using std::log10; +using std::log2; +using std::sin; +using std::sqrt; +using std::tan; +}; // namespace naitive + +DEFINE_GEN_FUNC(acos, std::acos); +DEFINE_GEN_FUNC(acosh, std::acosh); +DEFINE_GEN_FUNC(acospi, detail::acospi); +DEFINE_GEN_FUNC(asin, std::asin); +DEFINE_GEN_FUNC(asinh, std::asinh); +DEFINE_GEN_FUNC(asinpi, detail::asinpi); +DEFINE_GEN_FUNC(atan, std::atan); +DEFINE_GEN_FUNC2(atan2, std::atan2); +DEFINE_GEN_FUNC(atanh, std::atanh); +DEFINE_GEN_FUNC(atanpi, detail::atanpi); +DEFINE_GEN_FUNC2(atan2pi, detail::atan2pi); +DEFINE_GEN_FUNC(cbrt, std::cbrt); +DEFINE_GEN_FUNC(ceil, std::ceil); +DEFINE_GEN_FUNC2(copysign, std::copysign); +DEFINE_GEN_FUNC(cos, std::cos); +DEFINE_GEN_FUNC(cosh, std::cosh); +DEFINE_GEN_FUNC(cospi, detail::cospi); +DEFINE_GEN_FUNC2(divide, detail::divide); +DEFINE_GEN_FUNC(erfc, std::erfc); +DEFINE_GEN_FUNC(erf, std::erf); +DEFINE_GEN_FUNC(exp, std::exp); +DEFINE_GEN_FUNC(exp2, std::exp2); +DEFINE_GEN_FUNC(exp10, detail::exp10); +DEFINE_GEN_FUNC(expm1, std::expm1); +DEFINE_GEN_FUNC(fabs, std::fabs); +DEFINE_GEN_FUNC2(fdim, std::fdim); +DEFINE_GEN_FUNC(floor, std::floor); +DEFINE_GEN_FUNC3(fma, std::fma); +DEFINE_GEN_FUNC2(fmax, std::fmax); +template +vec fmax(const vec& x, T y) { + return fmax(x, y); +} +DEFINE_GEN_FUNC2(fmin, std::fmin); +template +vec fmin(const vec& x, T y) { + return fmin(x, y); +} +DEFINE_GEN_FUNC2(fmod, std::fmod); +// DEFINE_GEN_FUNC2(fract, *); +template +T fract(const T& x, T* p) { + *p = std::floor(x); + return x - *p; +} +template +vec fract(const vec& x, T* p) { + vec R; + for (int i(0); i < D; i++) + R[i] = fract(x[i], &p[i]); + return R; +} +// DEFINE_GEN_FUNC2(frexp, std::frexp); +template +T frexp(const T& x, int* p) { + return std::frexp(x, p); +} +template +vec frexp(const vec& x, int* p) { + vec R; + for (int i(0); i < D; i++) + R[i] = frexp(x[i], &p[i]); + return R; +} + +DEFINE_GEN_FUNC2(hypot, std::hypot); + +// DEFINE_GEN_FUNC_2T(int, logb, std::logbf); +template +int ilogb(const T& x) { + return std::ilogb(x); +} +template +int logb(const T& x) { + return ilogb(x); +} +template +vec ilogb(const vec& x) { + vec R; + for (int i(0); i < D; i++) + R[i] = ilogb(x[i]); + return R; +} +template +vec logb(const vec& x) { + return ilobg(x); +} + +// DEFINE_GEN_FUNC2(ldexp, *); +template +T ldexp(const T& x, const int& y) { + return x * (1 << y); +} +template +vec ldexp(const vec& x, const int& y) { + vec R; + for (int i(0); i < D; i++) + R[i] = ldexp(x[i], y); + return R; +} + +DEFINE_GEN_FUNC(lgamma, std::lgamma); +// DEFINE_GEN_FUNC2(lgamma_r, *); +template +T lgamma_r(const T& x, int* y) { + vec R; + for (int i(0); i < D; i++) { + R[i] = lgamma(x[i]); + *y = R[i] > 0 ? 0 : 1; + } + return R; +} + +template +T lgamma_r(const vec& x, int* y) { + vec R; + for (int i(0); i < D; i++) + R[i] = lgamma_r(x[i], &y[i]); + return R; +} + +DEFINE_GEN_FUNC(log, std::log); +DEFINE_GEN_FUNC(log2, std::log2); +DEFINE_GEN_FUNC(log10, std::log10); +DEFINE_GEN_FUNC(log1p, std::log1p); +DEFINE_GEN_FUNC(logb, std::logb); +DEFINE_GEN_FUNC3(mad, detail::mad); +DEFINE_GEN_FUNC2(maxmag, detail::maxmag); +DEFINE_GEN_FUNC2(minmag, detail::minmag); +// DEFINE_GEN_FUNC2(modf, *); +template +T modf(const T& x, T* y) { + return std::modf(x, y); +} +template +vec modf(const vec& x, T* y) { + vec R; + for (int i(0); i < D; i++) + R[i] = modf(x[i], &y[i]); + return R; +} + +// DEFINE_GEN_FUNC_2T(uint, nan, std::nanf); +inline float nan(const uint& x) { + return std::numeric_limits::quiet_NaN(); +} +template +vec nan(const vec& x) { + vec R; + for (int i(0); i < D; i++) + R[i] = nan(x[i]); + return R; +} +inline double nan(const ulonglong& x) { + return std::numeric_limits::quiet_NaN(); +} +template +vec nan(const vec& x) { + vec R; + for (int i(0); i < D; i++) + R[i] = nan(x[i]); + return R; +} + +DEFINE_GEN_FUNC2(nextafter, std::nextafter); +DEFINE_GEN_FUNC2(pow, std::pow); +template +T pown(T x, int y) { + return pow(x, T((float)y)); +} +template +vec pown(const vec& x, int y) { + vec R; + for (int i(0); i < D; i++) + R[i] = pown(x[i], y); +} +DEFINE_GEN_FUNC2(powr, detail::powr); +DEFINE_GEN_FUNC(recip, detail::recip); +DEFINE_GEN_FUNC2(remainder, std::remainder); +template +T remquo(T x, T y, int* z) { + return std::remquo(x, y, z); +} +template +vec remquo(const vec& x, const vec& y, int* z) { + vec R; + for (int i(0); i < D; i++) + R[i] = remquo(x[i], y[i], &z[i]); + return R; +} +DEFINE_GEN_FUNC(rint, std::rint); + +template +T rootn(T x, int y) { + return detail::rootn(x, y); +} +template +vec rootn(const vec& x, int y) { + vec R; + for (int i(0); i < D; i++) + R[i] = rootn(x[i], y); + return R; +} + +DEFINE_GEN_FUNC(round, std::round); + +DEFINE_GEN_FUNC(rsqrt, detail::rsqrt); + +DEFINE_GEN_FUNC(sin, std::sin); + +template +T sincos(T x, T* y) { + *y = cos(x); + return sin(x); +} +template +vec sincos(const vec& x, T* y) { + vec R; + for (int i(0); i < D; i++) { + y[i] = cos(x[i]); + R[i] = sin(x[i]); + } + return R; +} + +DEFINE_GEN_FUNC(sinh, std::sinh); +DEFINE_GEN_FUNC(sinpi, detail::sinpi); +DEFINE_GEN_FUNC(sqrt, std::sqrt); +DEFINE_GEN_FUNC(tan, std::tan); +DEFINE_GEN_FUNC(tanh, std::tanh); +DEFINE_GEN_FUNC(tanpi, detail::tanpi); +DEFINE_GEN_FUNC(tgamma, std::tgamma); +DEFINE_GEN_FUNC(trunc, std::trunc); +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/detail/builtin/op_defs.hpp b/include/neoSYCL/sycl/detail/builtin/op_defs.hpp new file mode 100644 index 0000000..346005e --- /dev/null +++ b/include/neoSYCL/sycl/detail/builtin/op_defs.hpp @@ -0,0 +1,204 @@ +#pragma once + +namespace neosycl::sycl { +/* + N: the function name + F: scalar function for each element of the output vector +*/ +#define DEFINE_GEN_FUNC(N, F) \ + template \ + T N(const T& i) { \ + return F(i); \ + } \ + template \ + vec N(const vec& x) { \ + vec R; \ + for (int i = 0; i < D; i++) \ + R[i] = F(x[i]); \ + return R; \ + } + +#define DEFINE_UGEN_FUNC(N, F) \ + template \ + typename std::make_unsigned::type N(const T& i) { \ + return F(i); \ + } \ + template \ + vec::type, D> N(const vec& x) { \ + vec::type, D> R; \ + for (int i = 0; i < D; i++) \ + R[i] = F(x[i]); \ + return R; \ + } + +#define DEFINE_GEN_FUNC2(N, F) \ + template \ + T N(const T& x, const T& y) { \ + return F(x, y); \ + } \ + template \ + vec N(const vec& x, const vec& y) { \ + vec R; \ + for (int i = 0; i < D; i++) \ + R[i] = F(x[i], y[i]); \ + return R; \ + } + +#define DEFINE_UGEN_FUNC2(N, F) \ + template \ + typename std::make_unsigned::type N(const T& x, const T& y) { \ + return F(x, y); \ + } \ + template \ + vec::type, D> N(const vec& x, \ + const vec& y) { \ + vec::type, D> R; \ + for (int i = 0; i < D; i++) \ + R[i] = F(x[i], y[i]); \ + return R; \ + } + +#define DEFINE_GEN_FUNC3(N, F) \ + template \ + T N(const T& x, const T& y, const T& z) { \ + return F(x, y, z); \ + } \ + template \ + vec N(const vec& x, const vec& y, const vec& z) { \ + vec R; \ + for (int i = 0; i < D; i++) \ + R[i] = F(x[i], y[i], z[i]); \ + return R; \ + } + +/* + OP: operator + T: data type + D: dimensions +*/ +#define FRIEND_VEC_BINARY_OPERATOR(OP) \ + template \ + friend vec operator OP(const vec& lhs, const vec& rhs); \ + template \ + friend vec operator OP(const vec& lhs, const T& rhs); \ + template \ + friend vec operator OP(const T& lhs, const vec& rhs); + +#define DEF_VEC_BINARY_OPERATOR(OP) \ + template \ + vec operator OP(const vec& lhs, const vec& rhs) { \ + vec r; \ + for (int i(0); i < D; i++) { \ + r.data_[i] = lhs.data_[i] OP rhs.data_[i]; \ + } \ + return r; \ + } \ + template \ + vec operator OP(const vec& lhs, const T& rhs) { \ + vec r; \ + for (int i(0); i < D; i++) { \ + r.data_[i] = lhs.data_[i] OP rhs; \ + } \ + return r; \ + } \ + template \ + vec operator OP(const T& lhs, const vec& rhs) { \ + vec r; \ + for (int i(0); i < D; i++) { \ + r.data_[i] = rhs.data_[i] OP lhs; \ + } \ + return r; \ + } + +#define FRIEND_VEC_ASSIGN_OPERATOR(OP) \ + template \ + friend vec& operator OP(vec& lhs, const vec& rhs); \ + template \ + friend vec& operator OP(vec& lhs, const T& rhs); + +#define DEF_VEC_ASSIGN_OPERATOR(OP) \ + template \ + vec& operator OP(vec& lhs, const vec& rhs) { \ + for (int i(0); i < D; i++) { \ + lhs.data_[i] OP rhs.data_[i]; \ + } \ + return lhs; \ + } \ + template \ + vec& operator OP(vec& lhs, const T& rhs) { \ + vec r; \ + for (int i(0); i < D; i++) { \ + lhs.data_[i] OP rhs; \ + } \ + return lhs; \ + } + +#define FRIEND_VEC_INCDEC_OPERATOR(OP) \ + template \ + friend vec& operator OP(vec& lhs); \ + template \ + friend vec operator OP(vec& lhs, int inc); + +#define DEF_VEC_INCDEC_OPERATOR(OP) \ + template \ + vec& operator OP(vec& lhs) { \ + for (int i(0); i < D; i++) { \ + OP(lhs.data_[i]); \ + } \ + return lhs; \ + } \ + template \ + vec operator OP(vec& lhs, int inc) { \ + vec r; \ + for (int i(0); i < D; i++) { \ + r[i] = (lhs.data_[i]); \ + (lhs.daha_[i]) OP; \ + } \ + return r; \ + } + +#define FRIEND_VEC_BOOLEAN_OPERATOR(OP) \ + template \ + friend vec operator OP(const vec& lhs, const vec& rhs); \ + template \ + friend vec operator OP(const vec& lhs, const T& rhs); \ + template \ + friend vec operator OP(const T& lhs, const vec& rhs); + +#define DEF_VEC_BOOLEAN_OPERATOR(OP) \ + template \ + vec operator OP(const vec& lhs, const vec& rhs) { \ + vec r; \ + for (int i(0); i < D; i++) \ + r.data_[i] = RET(lhs.data_[i] OP rhs.data_[i]); \ + return r; \ + } \ + template \ + vec operator OP(const vec& lhs, const T& rhs) { \ + vec r; \ + for (int i(0); i < D; i++) \ + r.data_[i] = RET(lhs.data_[i] OP rhs); \ + return r; \ + } \ + template \ + vec operator OP(const T& lhs, const vec& rhs) { \ + vec r; \ + for (int i(0); i < D; i++) \ + r.data_[i] = RET(lhs OP rhs.data_[i]); \ + return r; \ + } + +#define FRIEND_VEC_UNARY_OPERATOR(OP) \ + template \ + friend vec operator OP(const vec& v); + +#define DEF_VEC_UNARY_OPERATOR(OP) \ + template \ + vec operator OP(const vec& v) { \ + vec r; \ + for (int i(0); i < D; i++) { \ + r.data_[i] = operator OP(v.data_[i]); \ + } \ + return r; \ + } +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/detail/builtin/relational.hpp b/include/neoSYCL/sycl/detail/builtin/relational.hpp new file mode 100644 index 0000000..6bfbca8 --- /dev/null +++ b/include/neoSYCL/sycl/detail/builtin/relational.hpp @@ -0,0 +1,67 @@ +#pragma once + +namespace neosycl::sycl { +#include +#include + +#define DEFINE_REL_FUNC(F, Exp) \ + inline std::uint32_t F(float p0, float p1) { \ + return std::uint32_t(Exp); \ + } \ + inline std::uint64_t F(double p0, double p1) { \ + return std::uint64_t(Exp); \ + } \ + template \ + vec F(const vec& p0, const vec& p1) { \ + vec r; \ + for (int i(0); i < D; i++) \ + r[i] = F(p0[i], p1[i]); \ + return r; \ + } \ + template \ + vec F(const vec& p0, \ + const vec& p1) { \ + vec r; \ + for (int i(0); i < D; i++) \ + r[i] = F(p0[i], p1[i]); \ + return r; \ + } + +#define DEFINE_TEST_FUNC(F, Exp) \ + inline std::uint32_t F(float p) { \ + return std::uint32_t(Exp); \ + } \ + inline std::uint64_t F(double p) { \ + return std::uint64_t(Exp); \ + } \ + template \ + vec F(const vec& p) { \ + vec r; \ + for (int i(0); i < D; i++) \ + r[i] = F(p[i]); \ + return r; \ + } \ + template \ + vec F(const vec& p) { \ + vec r; \ + for (int i(0); i < D; i++) \ + r[i] = F(p[i]); \ + return r; \ + } + +DEFINE_REL_FUNC(isequal, p0 == p1); +DEFINE_REL_FUNC(isnoequal, p0 != p1); +DEFINE_REL_FUNC(isgreater, p0 > p1); +DEFINE_REL_FUNC(isgreaterequal, p0 >= p1); +DEFINE_REL_FUNC(isless, p0 < p1); +DEFINE_REL_FUNC(islessequal, p0 <= p1); +DEFINE_REL_FUNC(islessgreater, (p0 < p1) || (p0 > p1)); +DEFINE_TEST_FUNC(isfinite, std::isfinite(p)); +DEFINE_TEST_FUNC(isinf, std::isinf(p)); +DEFINE_TEST_FUNC(isnan, std::isnan(p)); +DEFINE_TEST_FUNC(isnormal, std::isnormal(p)); +DEFINE_REL_FUNC(isordered, isequal(p0, p0) && isequal(p1, p1)); +DEFINE_REL_FUNC(isunordered, isnan(p0) || isnan(p1)); +DEFINE_TEST_FUNC(signbit, std::signbit(p)); + +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/detail/builtin/vec.hpp b/include/neoSYCL/sycl/detail/builtin/vec.hpp new file mode 100644 index 0000000..0adeaf4 --- /dev/null +++ b/include/neoSYCL/sycl/detail/builtin/vec.hpp @@ -0,0 +1,264 @@ +#pragma once +#include + +namespace neosycl::sycl { + +enum class rounding_mode { automatic, rte, rtz, rtp, rtn }; + +struct elem { + static constexpr int x = 0; + static constexpr int y = 1; + static constexpr int z = 2; + static constexpr int w = 3; + static constexpr int r = 0; + static constexpr int g = 1; + static constexpr int b = 2; + static constexpr int a = 3; + static constexpr int s0 = 0; + static constexpr int s1 = 1; + static constexpr int s2 = 2; + static constexpr int s3 = 3; + static constexpr int s4 = 4; + static constexpr int s5 = 5; + static constexpr int s6 = 6; + static constexpr int s7 = 7; + static constexpr int s8 = 8; + static constexpr int s9 = 9; + static constexpr int sA = 10; + static constexpr int sB = 11; + static constexpr int sC = 12; + static constexpr int sD = 13; + static constexpr int sE = 14; + static constexpr int sF = 15; +}; + +//////////////////////////////////////////////////////////////////////////////// +template +class vec { + FRIEND_VEC_BINARY_OPERATOR(+); + FRIEND_VEC_BINARY_OPERATOR(-); + FRIEND_VEC_BINARY_OPERATOR(*); + FRIEND_VEC_BINARY_OPERATOR(/); + FRIEND_VEC_BINARY_OPERATOR(%); + FRIEND_VEC_ASSIGN_OPERATOR(+=); + FRIEND_VEC_ASSIGN_OPERATOR(-=); + FRIEND_VEC_ASSIGN_OPERATOR(*=); + FRIEND_VEC_ASSIGN_OPERATOR(/=); + FRIEND_VEC_ASSIGN_OPERATOR(%=); + FRIEND_VEC_INCDEC_OPERATOR(++); + FRIEND_VEC_INCDEC_OPERATOR(--); + FRIEND_VEC_BINARY_OPERATOR(&); + FRIEND_VEC_BINARY_OPERATOR(|); + FRIEND_VEC_BINARY_OPERATOR(^); + FRIEND_VEC_ASSIGN_OPERATOR(&=); + FRIEND_VEC_ASSIGN_OPERATOR(|=); + FRIEND_VEC_ASSIGN_OPERATOR(^=); + FRIEND_VEC_BOOLEAN_OPERATOR(&&); + FRIEND_VEC_BOOLEAN_OPERATOR(||); + FRIEND_VEC_BINARY_OPERATOR(<<); + FRIEND_VEC_BINARY_OPERATOR(>>); + FRIEND_VEC_ASSIGN_OPERATOR(<<=); + FRIEND_VEC_ASSIGN_OPERATOR(>>=); + FRIEND_VEC_BOOLEAN_OPERATOR(==); + FRIEND_VEC_BOOLEAN_OPERATOR(!=); + FRIEND_VEC_BOOLEAN_OPERATOR(<); + FRIEND_VEC_BOOLEAN_OPERATOR(>); + FRIEND_VEC_BOOLEAN_OPERATOR(<=); + FRIEND_VEC_BOOLEAN_OPERATOR(>=); + FRIEND_VEC_UNARY_OPERATOR(!); + FRIEND_VEC_UNARY_OPERATOR(~); + + static constexpr int size() { + return NumElems; + } + +public: + using element_type = DataT; + using __unspecified__ = void; + using vector_t = __unspecified__; + using __swizzled_vec__ = __unspecified__; + + vec() = default; + vec(const vec&) = default; + vec(vec&& Rhs) = default; + + explicit vec(const DataT& x) { + for (int i(0); i < size(); i++) + data_[i] = x; + } + + template > + vec(const DataT& x) { + for (int i(0); i < size(); i++) + data_[i] = x; + } + template > + vec(const DataT& x, const DataT& y) { + data_[0] = x; + data_[1] = y; + } + template > + vec(const DataT& x, const DataT& y, const DataT& z) { + data_[0] = x; + data_[1] = y; + data_[2] = z; + } + template > + vec(const DataT& x, const DataT& y, const DataT& z, const DataT& w) { + data_[0] = x; + data_[1] = y; + data_[2] = z; + data_[3] = w; + } + + template > + operator DataT() const { + return data_[0]; + } + + size_t get_count() const { + return size(); + } + size_t get_size() const { + return get_count() * sizeof(DataT); + } + + template + vec convert() const { + vec r; + // TODO: rounding mode for casting + for (int i(0); i < size(); i++) { + r[i] = convT(data_[i]); + } + return r; + } + + template + asT as() const; + + template + __swizzled_vec__ swizzle() const; + + // Available only when numElements <= 4. + // XYZW_ACCESS is: x, y, z, w, subject to numElements. + __swizzled_vec__ XYZW_ACCESS() const; + + // Available only numElements == 4. + // RGBA_ACCESS is: r, g, b, a. + __swizzled_vec__ RGBA_ACCESS() const; + + // INDEX_ACCESS is: s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, + // sE, sF, subject to numElements. + __swizzled_vec__ INDEX_ACCESS() const; + +#ifdef SYCL_SIMPLE_SWIZZLES + // Available only when numElements <= 4. + // XYZW_SWIZZLE is all permutations with repetition of: x, y, z, w, subject to + // numElements. + __swizzled_vec__ XYZW_SWIZZLE() const; + + // Available only when numElements == 4. + // RGBA_SWIZZLE is all permutations with repetition of: r, g, b, a. + __swizzled_vec__ RGBA_SWIZZLE() const; +#endif // #ifdef SYCL_SIMPLE_SWIZZLES + + __swizzled_vec__ lo() const; + __swizzled_vec__ hi() const; + __swizzled_vec__ odd() const; + __swizzled_vec__ even() const; + + // load and store member functions + // template + // void load(size_t offset, multi_ptr ptr); + // template + // void store(size_t offset, multi_ptr ptr) const; + + vec& operator=(const vec& Rhs) = default; + + vec& operator=(const DataT& Rhs) { + for (int i = 0; i < size(); i++) + data_[i] = Rhs; + } + + vec& operator+=(const vec& Rhs) { + *this = *this + Rhs; + return *this; + }; + + element_type& x() { + return data_[0]; + } + template 1)>> + element_type& y() { + return data_[1]; + } + template 2)>> + element_type& z() { + return data_[2]; + } + template 3)>> + element_type& w() { + return data_[3]; + } + inline element_type& operator[](size_t i) { + return data_[i]; + } + inline const element_type operator[](size_t i) const { + return data_[i]; + } + +private: + element_type data_[NumElems]; +}; + +DEF_VEC_BINARY_OPERATOR(+); +DEF_VEC_BINARY_OPERATOR(-); +DEF_VEC_BINARY_OPERATOR(*); +DEF_VEC_BINARY_OPERATOR(/); +DEF_VEC_BINARY_OPERATOR(%); +DEF_VEC_ASSIGN_OPERATOR(+=); +DEF_VEC_ASSIGN_OPERATOR(-=); +DEF_VEC_ASSIGN_OPERATOR(*=); +DEF_VEC_ASSIGN_OPERATOR(/=); +DEF_VEC_ASSIGN_OPERATOR(%=); +DEF_VEC_INCDEC_OPERATOR(++); +DEF_VEC_INCDEC_OPERATOR(--); +DEF_VEC_BINARY_OPERATOR(&); +DEF_VEC_BINARY_OPERATOR(|); +DEF_VEC_BINARY_OPERATOR(^); +DEF_VEC_ASSIGN_OPERATOR(&=); +DEF_VEC_ASSIGN_OPERATOR(|=); +DEF_VEC_ASSIGN_OPERATOR(^=); +DEF_VEC_BOOLEAN_OPERATOR(&&); +DEF_VEC_BOOLEAN_OPERATOR(||); +DEF_VEC_BINARY_OPERATOR(<<); +DEF_VEC_BINARY_OPERATOR(>>); +DEF_VEC_ASSIGN_OPERATOR(<<=); +DEF_VEC_ASSIGN_OPERATOR(>>=); +DEF_VEC_BOOLEAN_OPERATOR(==); +DEF_VEC_BOOLEAN_OPERATOR(!=); +DEF_VEC_BOOLEAN_OPERATOR(<); +DEF_VEC_BOOLEAN_OPERATOR(>); +DEF_VEC_BOOLEAN_OPERATOR(<=); +DEF_VEC_BOOLEAN_OPERATOR(>=); +DEF_VEC_UNARY_OPERATOR(!); +DEF_VEC_UNARY_OPERATOR(~); + +using float4 = vec; +using float3 = vec; +using float2 = vec; +using double4 = vec; +using double3 = vec; +using double2 = vec; +using int4 = vec; +using int3 = vec; +using int2 = vec; +using uint4 = vec; +using uint3 = vec; +using uint2 = vec; +using ulonglong = unsigned long long; +using ulonglong4 = vec; +using ulonglong3 = vec; +using ulonglong2 = vec; + +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/detail/container/array_nd.hpp b/include/neoSYCL/sycl/detail/container/array_nd.hpp index 30e598c..32c39fb 100644 --- a/include/neoSYCL/sycl/detail/container/array_nd.hpp +++ b/include/neoSYCL/sycl/detail/container/array_nd.hpp @@ -1,94 +1,102 @@ #ifndef NEOSYCL_INCLUDE_NEOSYCL_SYCL_BUFFER_ARRAY_ND_HPP_ #define NEOSYCL_INCLUDE_NEOSYCL_SYCL_BUFFER_ARRAY_ND_HPP_ -#define DEFINE_ARRAY_ND_OP_CONST(op) \ - friend ArrayND operator op(const ArrayND &lhs, const ArrayND &rhs) { \ - ArrayND ret; \ - for(std::size_t i = 0; i < dimensions; ++i) { \ - ret[i] = (size_t)(lhs[i] op rhs[i]); \ - } \ - return ret; \ +#define DEFINE_ARRAY_ND_OP_CONST(op) \ + friend ArrayND operator op(const ArrayND& lhs, \ + const ArrayND& rhs) { \ + ArrayND ret; \ + for (int i = 0; i < dimensions; ++i) { \ + ret[i] = (size_t)(lhs[i] op rhs[i]); \ + } \ + return ret; \ }; -#define DEFINE_ARRAY_ND_OP_CONST_SIZE_T(op) \ - friend ArrayND operator op(const ArrayND &lhs, const size_t &rhs) { \ - ArrayND ret; \ - for(std::size_t i = 0; i < dimensions; ++i) { \ - ret[i] = (size_t)(lhs[i] op rhs); \ - } \ - return ret; \ +#define DEFINE_ARRAY_ND_OP_CONST_SIZE_T(op) \ + friend ArrayND operator op(const ArrayND& lhs, \ + const size_t& rhs) { \ + ArrayND ret; \ + for (int i = 0; i < dimensions; ++i) { \ + ret[i] = (size_t)(lhs[i] op rhs); \ + } \ + return ret; \ }; -#define DEFINE_ARRAY_ND_OP(op) \ - friend ArrayND &operator op(ArrayND &lhs, const ArrayND &rhs) { \ - for(std::size_t i = 0; i < dimensions; ++i) { \ - lhs[i] = (size_t)(lhs[i] op rhs[i]); \ - } \ - return lhs; \ +#define DEFINE_ARRAY_ND_OP(op) \ + friend ArrayND& operator op(ArrayND& lhs, \ + const ArrayND& rhs) { \ + for (int i = 0; i < dimensions; ++i) { \ + lhs[i] = (size_t)(lhs[i] op rhs[i]); \ + } \ + return lhs; \ }; -#define DEFINE_ARRAY_ND_OP_SIZE_T(op) \ - friend ArrayND &operator op(ArrayND &lhs, const size_t &rhs) { \ - for(std::size_t i = 0; i < dimensions; ++i) { \ - lhs[i] = (size_t)(lhs[i] op rhs); \ - } \ - return lhs; \ +#define DEFINE_ARRAY_ND_OP_SIZE_T(op) \ + friend ArrayND& operator op(ArrayND& lhs, \ + const size_t& rhs) { \ + for (int i = 0; i < dimensions; ++i) { \ + lhs[i] = (size_t)(lhs[i] op rhs); \ + } \ + return lhs; \ }; -#define DEFINE_ARRAY_ND_OP_CONST_SIZE_T_LEFT(op) \ - friend ArrayND operator op(const size_t &lhs, ArrayND &rhs) { \ - ArrayND ret; \ - for(std::size_t i = 0; i < dimensions; ++i) { \ - ret[i] = (size_t)(rhs[i] op lhs); \ - } \ - return ret; \ +#define DEFINE_ARRAY_ND_OP_CONST_SIZE_T_LEFT(op) \ + friend ArrayND operator op(const size_t& lhs, \ + ArrayND& rhs) { \ + ArrayND ret; \ + for (int i = 0; i < dimensions; ++i) { \ + ret[i] = (size_t)(rhs[i] op lhs); \ + } \ + return ret; \ }; -#define DEFINE_ARRAY_ND_COMMON_BY_VALUE_SEMANTICS() \ -friend bool operator==(const ArrayND &lhs, const ArrayND &rhs) { \ - for(std::size_t i = 0; i < dimensions; ++i) { \ - if(lhs[i]!=rhs[i]){ \ - return false; \ - } \ - } \ - return true; \ -} \ -friend bool operator!=(const ArrayND &lhs, const ArrayND &rhs) { \ - for(std::size_t i = 0; i < dimensions; ++i) { \ - if(lhs[i]!=rhs[i]){ \ - return true; \ - } \ - } \ - return false; \ -} +#define DEFINE_ARRAY_ND_COMMON_BY_VALUE_SEMANTICS() \ + friend bool operator==(const ArrayND& lhs, \ + const ArrayND& rhs) { \ + for (int i = 0; i < dimensions; ++i) { \ + if (lhs[i] != rhs[i]) { \ + return false; \ + } \ + } \ + return true; \ + } \ + friend bool operator!=(const ArrayND& lhs, \ + const ArrayND& rhs) { \ + for (int i = 0; i < dimensions; ++i) { \ + if (lhs[i] != rhs[i]) { \ + return true; \ + } \ + } \ + return false; \ + } namespace neosycl::sycl::detail::container { -template +template struct ArrayND { ArrayND() : data{} {} - template> + template > ArrayND(size_t dim0) : data{dim0} {} - template> + template > ArrayND(size_t dim0, size_t dim1) : data{dim0, dim1} {} - template> + template > ArrayND(size_t dim0, size_t dim1, size_t dim2) : data{dim0, dim1, dim2} {} size_t operator[](int dimension) const { return data[dimension]; } - size_t &operator[](int dimension) { + size_t& operator[](int dimension) { return data[dimension]; } size_t get_liner() const { if (dimensions == 2) { return data[0] * data[1]; - } else if (dimensions == 3) { + } + else if (dimensions == 3) { return data[0] * data[1] * data[2]; } return data[0]; @@ -178,6 +186,6 @@ struct ArrayND { size_t data[dimensions]; }; -} +} // namespace neosycl::sycl::detail::container -#endif //NEOSYCL_INCLUDE_NEOSYCL_SYCL_BUFFER_ARRAY_ND_HPP_ +#endif // NEOSYCL_INCLUDE_NEOSYCL_SYCL_BUFFER_ARRAY_ND_HPP_ diff --git a/include/neoSYCL/sycl/detail/container/buffer_container.hpp b/include/neoSYCL/sycl/detail/container/buffer_container.hpp new file mode 100644 index 0000000..036a7da --- /dev/null +++ b/include/neoSYCL/sycl/detail/container/buffer_container.hpp @@ -0,0 +1,44 @@ +#pragma once +#include +#include "neoSYCL/sycl/access.hpp" + +namespace neosycl::sycl { + +namespace detail { +class program_data; +}; + +namespace detail::container { +struct device_ptr_type { + void* ptr; + access::mode mode; +}; + +template > +class BufferContainer : public DataContainerND { +public: + BufferContainer(const ArrayND& r) + : DataContainerND(r) {} + + BufferContainer(const ArrayND& r, AllocatorT alloc) + : DataContainerND(r, alloc) {} + + BufferContainer(T* data, const ArrayND& r) + : DataContainerND(data, r) {} + + BufferContainer(T* data, const ArrayND& r, AllocatorT alloc) + : DataContainerND(data, r, alloc) {} + + BufferContainer(const DataContainerD& rhs) + : DataContainerND(rhs) {} + + BufferContainer(DataContainerD&& rhs) + : DataContainerND(rhs) {} + + ~BufferContainer(); + + /* {program_data*, device_ptr} */ + std::map, device_ptr_type> map; +}; +} // namespace detail::container +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/detail/container/data_container.hpp b/include/neoSYCL/sycl/detail/container/data_container.hpp index d5a936a..82104ef 100644 --- a/include/neoSYCL/sycl/detail/container/data_container.hpp +++ b/include/neoSYCL/sycl/detail/container/data_container.hpp @@ -13,7 +13,6 @@ class DataContainer { mutable std::shared_mutex mtx; public: - void lock_read() const { mtx.lock_shared(); } @@ -30,14 +29,13 @@ class DataContainer { mtx.unlock(); } - virtual void *get_raw_ptr() = 0; + virtual void* get_raw_ptr() = 0; virtual size_t get_size() = 0; virtual size_t get_count() = 0; - }; -} +} // namespace neosycl::sycl::detail::container -#endif //SYCL_INCLUDE_CL_SYCL_BUFFER_DATA_CONTAINER_HPP_ +#endif // SYCL_INCLUDE_CL_SYCL_BUFFER_DATA_CONTAINER_HPP_ diff --git a/include/neoSYCL/sycl/detail/container/data_container_nd.hpp b/include/neoSYCL/sycl/detail/container/data_container_nd.hpp index bd55e20..e485a05 100644 --- a/include/neoSYCL/sycl/detail/container/data_container_nd.hpp +++ b/include/neoSYCL/sycl/detail/container/data_container_nd.hpp @@ -2,6 +2,7 @@ #define SYCL_INCLUDE_CL_SYCL_BUFFER_DATA_CONTAINER_ND_HPP_ #include +#include #include "neoSYCL/sycl/detail/container/array_nd.hpp" namespace neosycl::sycl::detail::container { @@ -13,23 +14,25 @@ namespace neosycl::sycl::detail::container { * @tparam dimensions Buffer dimensions * @tparam AllocatorT Buffer Allocator */ -template +template class DataContainerD : public DataContainer { public: - explicit DataContainerD(ArrayND r) : range(r) { + explicit DataContainerD(ArrayND r) : range(r) { allocate_ptr = shared_ptr_class(alloc.allocate(r.get_liner())); - ptr = allocate_ptr.get(); + ptr = allocate_ptr.get(); } - DataContainerD(ArrayND r, AllocatorT allocatorT) : alloc(allocatorT), range(r) { + DataContainerD(ArrayND r, AllocatorT allocatorT) + : alloc(allocatorT), range(r) { allocate_ptr = shared_ptr_class(alloc.allocate(r.get_liner())); - ptr = allocate_ptr.get(); + ptr = allocate_ptr.get(); } - DataContainerD(T *data, ArrayND r) : range(r), ptr(data), allocate_ptr(nullptr) {} + DataContainerD(T* data, ArrayND r) + : range(r), ptr(data), allocate_ptr(nullptr) {} - DataContainerD(T *data, ArrayND r, AllocatorT allocatorT) : - ptr(data), alloc(allocatorT), range(r), allocate_ptr(nullptr) {} + DataContainerD(T* data, ArrayND r, AllocatorT allocatorT) + : alloc(allocatorT), range(r), ptr(data), allocate_ptr(nullptr) {} size_t get_size() override { return sizeof(T) * range.get_liner(); @@ -39,23 +42,23 @@ class DataContainerD : public DataContainer { return range.get_liner(); } - T *get_ptr() const { + T* get_ptr() const { return ptr; } - void *get_raw_ptr() override { - return (void *) get_ptr(); + void* get_raw_ptr() override { + return (void*)get_ptr(); } - T *begin() const { + T* begin() const { return ptr; } - T *end() const { + T* end() const { return ptr + range.get_liner(); } - T &get(size_t x) const { + T& get(size_t x) const { return ptr[x]; } @@ -63,94 +66,93 @@ class DataContainerD : public DataContainer { return alloc; } - ArrayND get_range() const { + ArrayND get_range() const { return range; } - DataContainerD(const DataContainerD &rhs) : - range(rhs.range), - alloc(rhs.alloc) { + DataContainerD(const DataContainerD& rhs) + : alloc(rhs.alloc), range(rhs.range) { allocate_ptr = shared_ptr_class(alloc.allocate(range.get_liner())); - ptr = allocate_ptr.get(); - memcpy(ptr, rhs.ptr, sizeof(T) * range.get_liner()); + ptr = allocate_ptr.get(); + std::memcpy(ptr, rhs.ptr, sizeof(T) * range.get_liner()); } - DataContainerD(DataContainerD &&rhs) : - range(rhs.range), - alloc(rhs.alloc), - allocate_ptr(rhs.allocate_ptr), - ptr(rhs.ptr) {} + DataContainerD(DataContainerD&& rhs) + : alloc(rhs.alloc), range(rhs.range), ptr(rhs.ptr), + allocate_ptr(rhs.allocate_ptr) {} - DataContainerD &operator=(const DataContainerD &rhs) { - range = rhs.range; - alloc = rhs.alloc; - ptr = rhs.ptr; + DataContainerD& operator=(const DataContainerD& rhs) { + range = rhs.range; + alloc = rhs.alloc; + ptr = rhs.ptr; allocate_ptr = rhs.allocate_ptr; } - DataContainerD &operator=(DataContainerD &&rhs) { - range = rhs.range; - alloc = rhs.alloc; - ptr = rhs.ptr; + DataContainerD& operator=(DataContainerD&& rhs) { + range = rhs.range; + alloc = rhs.alloc; + ptr = rhs.ptr; allocate_ptr = rhs.allocate_ptr; } private: - ArrayND range; AllocatorT alloc; - T *ptr; - shared_ptr_class allocate_ptr; + ArrayND range; + T* ptr; + shared_ptr_class allocate_ptr; }; -template> +template > class DataContainerND {}; -template -class DataContainerND : public DataContainerD { +template +class DataContainerND + : public DataContainerD { public: - DataContainerND(const ArrayND<1> &r) : DataContainerD(r) {} + DataContainerND(const ArrayND<1>& r) : DataContainerD(r) {} - DataContainerND(const ArrayND<1> &r, AllocatorT alloc) : - DataContainerD(r, alloc) {} + DataContainerND(const ArrayND<1>& r, AllocatorT alloc) + : DataContainerD(r, alloc) {} - DataContainerND(T *data, const ArrayND<1> &r) : - DataContainerD(data, r) {} + DataContainerND(T* data, const ArrayND<1>& r) + : DataContainerD(data, r) {} - DataContainerND(T *data, const ArrayND<1> &r, AllocatorT alloc) : - DataContainerD(data, r, alloc) {} + DataContainerND(T* data, const ArrayND<1>& r, AllocatorT alloc) + : DataContainerD(data, r, alloc) {} - DataContainerND(const DataContainerD &rhs) : - DataContainerD(rhs) {} + DataContainerND(const DataContainerD& rhs) + : DataContainerD(rhs) {} - DataContainerND(DataContainerD &&rhs) : - DataContainerD(rhs) {} + DataContainerND(DataContainerD&& rhs) + : DataContainerD(rhs) {} - T &operator[](size_t x) const { + T& operator[](size_t x) const { return this->get_ptr()[x]; } }; -template -class DataContainerND : public DataContainerD { +template +class DataContainerND + : public DataContainerD { public: - DataContainerND(const ArrayND<2> &r) : DataContainerD(r) {} + DataContainerND(const ArrayND<2>& r) : DataContainerD(r) {} - DataContainerND(const ArrayND<2> &r, AllocatorT alloc) : - DataContainerD(r, alloc) {} + DataContainerND(const ArrayND<2>& r, AllocatorT alloc) + : DataContainerD(r, alloc) {} - DataContainerND(T *data, const ArrayND<2> &r) : - DataContainerD(data, r) {} + DataContainerND(T* data, const ArrayND<2>& r) + : DataContainerD(data, r) {} - DataContainerND(T *data, const ArrayND<2> &r, AllocatorT alloc) : - DataContainerD(data, r, alloc) {} + DataContainerND(T* data, const ArrayND<2>& r, AllocatorT alloc) + : DataContainerD(data, r, alloc) {} - DataContainerND(const DataContainerD &rhs) : - DataContainerD(rhs) {} + DataContainerND(const DataContainerD& rhs) + : DataContainerD(rhs) {} - DataContainerND(DataContainerD &&rhs) : - DataContainerD(rhs) {} + DataContainerND(DataContainerD&& rhs) + : DataContainerD(rhs) {} - T *operator[](size_t i) const { + T* operator[](size_t i) const { size_t x = this->get_range()[0]; return this->get_ptr() + (x * i); } @@ -163,49 +165,50 @@ class DataContainerND : public DataContainerD +template struct AccessProxyND {}; -template +template struct AccessProxyND { - AccessProxyND(const ArrayND<3> &r, T *ptr) : range(r), base_ptr(ptr) {} + AccessProxyND(const ArrayND<3>& r, T* ptr) : range(r), base_ptr(ptr) {} - T *operator[](size_t i) const { + T* operator[](size_t i) const { size_t y = range[1]; return base_ptr + i * y; } ArrayND<3> range; - T *base_ptr; + T* base_ptr; }; -template -class DataContainerND : public DataContainerD { +template +class DataContainerND + : public DataContainerD { public: - DataContainerND(const ArrayND<3> &r) : DataContainerD(r) {} + DataContainerND(const ArrayND<3>& r) : DataContainerD(r) {} - DataContainerND(const ArrayND<3> &r, AllocatorT alloc) : - DataContainerD(r, alloc) {} + DataContainerND(const ArrayND<3>& r, AllocatorT alloc) + : DataContainerD(r, alloc) {} - DataContainerND(T *data, const ArrayND<3> &r) : - DataContainerD(data, r) {} + DataContainerND(T* data, const ArrayND<3>& r) + : DataContainerD(data, r) {} - DataContainerND(T *data, const ArrayND<3> &r, AllocatorT alloc) : - DataContainerD(data, r, alloc) {} + DataContainerND(T* data, const ArrayND<3>& r, AllocatorT alloc) + : DataContainerD(data, r, alloc) {} - DataContainerND(const DataContainerD &rhs) : - DataContainerD(rhs) {} + DataContainerND(const DataContainerD& rhs) + : DataContainerD(rhs) {} - DataContainerND(DataContainerD &&rhs) : - DataContainerD(rhs) {} + DataContainerND(DataContainerD&& rhs) + : DataContainerD(rhs) {} AccessProxyND operator[](size_t i) const { - size_t x = this->get_range()[0]; - T *base_ptr = this->get_ptr() + i * x; + size_t x = this->get_range()[0]; + T* base_ptr = this->get_ptr() + i * x; return AccessProxyND(this->get_range(), base_ptr); } }; -} +} // namespace neosycl::sycl::detail::container -#endif //SYCL_INCLUDE_CL_SYCL_BUFFER_DATA_CONTAINER_ND_HPP_ +#endif // SYCL_INCLUDE_CL_SYCL_BUFFER_DATA_CONTAINER_ND_HPP_ diff --git a/include/neoSYCL/sycl/detail/context.hpp b/include/neoSYCL/sycl/detail/context.hpp new file mode 100644 index 0000000..6b024b0 --- /dev/null +++ b/include/neoSYCL/sycl/detail/context.hpp @@ -0,0 +1,67 @@ +#pragma once + +namespace neosycl::sycl { +namespace detail { + +template +typename info::param_traits::return_type +get_info(context_impl*); + +/////////////////////////////////////////////////////////////////////////////// +class context_impl { +public: + context_impl(context c) : ctx_(c) {} + ~context_impl() = default; + + context ctx_; + vector_class dev_; + + template + typename info::param_traits::return_type get_info() { + return get_info(this); + } +}; + +template <> +typename info::param_traits::return_type +get_info(context_impl* c) { + return c->ctx_.use_count_(); +} +template <> +typename info::param_traits::return_type +get_info(context_impl* c) { + return c->dev_[0].get_platform(); +} +template <> +typename info::param_traits::return_type +get_info(context_impl* c) { + return c->dev_; +} +} // namespace detail + +vector_class context::get_devices() const { + return impl_->dev_; +} + +void context::init(vector_class dev) { + impl_ = nullptr; + if (dev.size() == 0) { + DEBUG_INFO("context created with no device"); + return; + } + impl_ = + shared_ptr_class(new detail::context_impl(*this)); + for (auto& d : dev) + if (d.get_impl() == nullptr) + throw invalid_object_error("null device object found"); + impl_->dev_ = dev; +} + +template +typename info::param_traits::return_type +context::get_info() const { + return impl_->get_info(); +} + +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/detail/cpu/device.hpp b/include/neoSYCL/sycl/detail/cpu/device.hpp new file mode 100644 index 0000000..1bc4950 --- /dev/null +++ b/include/neoSYCL/sycl/detail/cpu/device.hpp @@ -0,0 +1,37 @@ +#pragma once + +namespace neosycl::sycl::detail { + +struct device_impl_cpu : public device_impl { + device_impl_cpu(device d) : device_impl(d) {} + + bool is_host() override { + return false; + } + bool is_cpu() override { + return true; + } + bool is_gpu() override { + return false; + } + bool is_accelerator() override { + return false; + } + const void* get_info(info::device param) const override { + switch (param) { + case info::device::device_type: + default: + PRINT_ERR("device::get_info(%d) not implemented", (int)param); + throw unimplemented(); + } + } + info::device_type type() override { + return info::device_type::cpu; + } + + virtual program_data* create_program(device d) override { + return new program_data_cpu(d); + } +}; + +} // namespace neosycl::sycl::detail diff --git a/include/neoSYCL/sycl/detail/cpu/kernel.hpp b/include/neoSYCL/sycl/detail/cpu/kernel.hpp new file mode 100644 index 0000000..2fce674 --- /dev/null +++ b/include/neoSYCL/sycl/detail/cpu/kernel.hpp @@ -0,0 +1,20 @@ +#pragma once + +namespace neosycl::sycl::detail { + +class kernel_data_cpu : public kernel_data { +public: + // void* dll_; + int (*func_)(); + void* capt_; + void* rnge_; + + kernel_data_cpu() : kernel_data() { + // dll_ = nullptr; + func_ = nullptr; + capt_ = nullptr; + rnge_ = nullptr; + } +}; + +} // namespace neosycl::sycl::detail diff --git a/include/neoSYCL/sycl/detail/cpu/program.hpp b/include/neoSYCL/sycl/detail/cpu/program.hpp new file mode 100644 index 0000000..e055dd0 --- /dev/null +++ b/include/neoSYCL/sycl/detail/cpu/program.hpp @@ -0,0 +1,109 @@ +#pragma once + +namespace neosycl::sycl::detail { + +class program_data_cpu : public program_data { + void* dll_; + +public: + friend class handler; + program_data_cpu(device d) : program_data(d), dll_(nullptr) {} + + ~program_data_cpu() { + // FIXME Here, we intentionally do not unload the kernel library because + // unloading libgomp.so causes a segfault. + // c.f. https://github.com/libocca/occa/issues/208 + // if (dll_) + // dlclose(dll_); + } + + bool open() override { + const char* env = getenv(ENV_KERNEL); + string_class fn(env ? env : DEFAULT_LIB); + dll_ = dlopen(fn.c_str(), RTLD_LAZY); + if (!dll_) { + DEBUG_INFO("dlopen failed: %s", dlerror()); + return false; + } + DEBUG_INFO("kernel lib loaded: %lx, %s", (size_t)dll_, fn.c_str()); + return true; + } + + bool is_open() override { + return dll_ != nullptr; + } + + void run(kernel k) override { + auto kdc = cast(k); + + try { + DEBUG_INFO("-- KENREL EXEC BEGIN --"); + [[maybe_unused]] int ret_val = kdc->func_(); + DEBUG_INFO("-- KERNEL EXEC END (ret=%d) --", ret_val); + } + catch (exception& e) { + PRINT_ERR("kernel execution failed: %s", e.what()); + throw; + } + } + + void* alloc_mem(void* p, size_t s) override { + void* ret = malloc(s); + return ret; + } + + void free_mem(void* p) override { + if (p) + free(p); + } + + void write_mem(void* d, void* h, size_t s) override { + std::memcpy(d, h, s); + } + + void read_mem(void* h, void* d, size_t s) override { + std::memcpy(h, d, s); + } + + void copy_mem(void* d1, void* d2, size_t s) override { + std::memcpy(d1, d2, s); + } + + void set_capture(kernel& k, void* p, size_t sz) override { + auto kdc = cast(k); + if (kdc->capt_) + std::memcpy(kdc->capt_, p, sz); + else + throw runtime_error("set_capture() failed"); + } + + void set_range(kernel& k, size_t r[6]) override { + auto kdc = cast(k); + if (kdc->rnge_) + std::memcpy(kdc->rnge_, r, sizeof(size_t) * 6); + else + throw runtime_error("set_range() failed"); + } + + kernel_data_ptr create_kernel_data(const char* s) override { + auto data = new kernel_data_cpu(); + // data->dll_ = dll_; + string_class capt = string_class("__") + s + "_obj__"; + string_class rnge = string_class("__") + s + "_range__"; + auto f = dlsym(dll_, s); + data->func_ = reinterpret_cast(f); + data->capt_ = dlsym(dll_, capt.c_str()); + + if (!data->func_ || !data->capt_) { + PRINT_ERR("dlsym() for %s failed: %s", s, dlerror()); + throw exception("create_kernel() failed"); + } + + data->rnge_ = dlsym(dll_, rnge.c_str()); // this call could fail + dlerror(); // reset dlerror + + kernel_data_ptr ret(data); + return ret; + } +}; +} // namespace neosycl::sycl::detail diff --git a/include/neoSYCL/sycl/detail/debug.hpp b/include/neoSYCL/sycl/detail/debug.hpp index 9c1dbe7..85e520a 100644 --- a/include/neoSYCL/sycl/detail/debug.hpp +++ b/include/neoSYCL/sycl/detail/debug.hpp @@ -1,15 +1,31 @@ #ifndef SYCL_INCLUDE_CL_SYCL_DETAIL_DEBUG_HPP_ #define SYCL_INCLUDE_CL_SYCL_DETAIL_DEBUG_HPP_ - +#include #ifdef DEBUG -#include -#define DEBUG_INFO(...) std::cout<< "[DEBUG] "<< printf(__VA_ARGS__) < + +namespace neosycl::sycl { + +namespace detail { + +struct device_impl { + device dev_; + device_impl(device d) : dev_(d) {} + virtual ~device_impl() = default; + + virtual bool is_host() = 0; + virtual bool is_cpu() = 0; + virtual bool is_gpu() = 0; + virtual bool is_accelerator() = 0; + virtual const void* get_info(info::device) const = 0; + virtual info::device_type type() = 0; + virtual program_data* create_program(device) = 0; + + template + typename info::param_traits::return_type get_info() { + typename info::param_traits::return_type ret = + *(typename info::param_traits::return_type*) + get_info(param); + return ret; + } +}; + +struct device_impl_host : public device_impl { + device_impl_host(device d) : device_impl(d) {} + + bool is_host() override { + return true; + } + bool is_cpu() override { + return false; + } + bool is_gpu() override { + return false; + } + bool is_accelerator() override { + return false; + } + + const void* get_info(info::device param) const override { + switch (param) { + case info::device::device_type: + default: + PRINT_ERR("device::get_info(%d) not implemented", (int)param); + throw unimplemented(); + } + } + info::device_type type() override { + return info::device_type::host; + } + virtual program_data* create_program(device d) override { + return new program_data_host(d); + } +}; + +using default_device_impl = device_impl_host; + +} // namespace detail + +device device::get_default_device() { +#if defined(USE_VE) && defined(BUILD_VE) + return platform::get_default_platform().get_devices()[2]; +#else +#if defined(USE_CPU) + return platform::get_default_platform().get_devices()[1]; +#else + return platform::get_default_platform().get_devices()[0]; +#endif +#endif +} + +bool device::is_host() const { + return impl_->is_host(); +} + +bool device::is_cpu() const { + return impl_->is_cpu(); +} + +bool device::is_gpu() const { + return impl_->is_gpu(); +} + +bool device::is_accelerator() const { + return impl_->is_accelerator(); +} + +info::device_type device::type() const { + return impl_->type(); +} + +detail::program_data* device::create_program() const { + return impl_->create_program(*this); +} + +template +typename info::param_traits::return_type +device::get_info() const { + return impl_->get_info(); +} + +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/detail/device_info.hpp b/include/neoSYCL/sycl/detail/device_info.hpp deleted file mode 100644 index 43d3753..0000000 --- a/include/neoSYCL/sycl/detail/device_info.hpp +++ /dev/null @@ -1,45 +0,0 @@ -#ifndef NEOSYCL_INCLUDE_NEOSYCL_SYCL_DETAIL_DEVICE_INFO_HPP -#define NEOSYCL_INCLUDE_NEOSYCL_SYCL_DETAIL_DEVICE_INFO_HPP - -#include "neoSYCL/sycl/detail/device_type.hpp" - -namespace neosycl::sycl::detail { - -struct device_info { - virtual bool is_host() = 0; - - virtual bool is_cpu() = 0; - - virtual bool is_gpu() = 0; - - virtual bool is_accelerator() = 0; - - virtual SUPPORT_PLATFORM_TYPE type() = 0; - -}; - -struct cpu_device_info : public device_info { - bool is_host() override { - return true; - } - - bool is_cpu() override { - return true; - } - bool is_gpu() override { - return false; - } - bool is_accelerator() override { - return false; - } - - SUPPORT_PLATFORM_TYPE type() override { - return SUPPORT_PLATFORM_TYPE::CPU; - } -}; - -using default_device_info = cpu_device_info; - -} - -#endif //NEOSYCL_INCLUDE_NEOSYCL_SYCL_DETAIL_DEVICE_INFO_HPP diff --git a/include/neoSYCL/sycl/detail/device_type.hpp b/include/neoSYCL/sycl/detail/device_type.hpp deleted file mode 100644 index fb02ec5..0000000 --- a/include/neoSYCL/sycl/detail/device_type.hpp +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef NEOSYCL_INCLUDE_NEOSYCL_SYCL_DETAIL_DEVICE_TYPE_HPP -#define NEOSYCL_INCLUDE_NEOSYCL_SYCL_DETAIL_DEVICE_TYPE_HPP - -namespace neosycl::sycl::detail { - -enum SUPPORT_PLATFORM_TYPE : int { - CPU, - SX_AURORA -}; - -} - -#endif //NEOSYCL_INCLUDE_NEOSYCL_SYCL_DETAIL_DEVICE_TYPE_HPP diff --git a/include/neoSYCL/sycl/detail/handler.hpp b/include/neoSYCL/sycl/detail/handler.hpp new file mode 100644 index 0000000..722079a --- /dev/null +++ b/include/neoSYCL/sycl/detail/handler.hpp @@ -0,0 +1,62 @@ +#pragma once + +namespace neosycl::sycl::detail { + +inline void single_task(const std::function& func) { + func(); +} + +inline void parallel_for(range<1> r, const std::function)>& func, + id<1> offset, id<1>*) { + for (size_t x = offset.get(0); x < r.get(0); x++) { + func(id<1>(x)); + } +} + +inline void parallel_for(range<1> r, const std::function)>& func, + id<1> offset, item<1>*) { + for (size_t x = offset.get(0); x < r.get(0); x++) { + func(item<1>(r, {x}, {offset[0]})); + } +} + +inline void parallel_for(range<2> r, const std::function)>& func, + id<2> offset, id<2>*) { + for (size_t x = offset.get(0); x < r.get(0); x++) { + for (size_t y = offset.get(1); y < r.get(1); y++) { + func(id<2>(x, y)); + } + } +} + +inline void parallel_for(range<2> r, const std::function)>& func, + id<2> offset, item<2>*) { + for (size_t x = offset.get(0); x < r.get(0); x++) { + for (size_t y = offset.get(1); y < r.get(1); y++) { + func(item<2>(r, {x, y}, {offset[0], offset[1]})); + } + } +} + +inline void parallel_for(range<3> r, const std::function)>& func, + id<3> offset, id<3>*) { + for (size_t x = offset.get(0); x < r.get(0); x++) { + for (size_t y = offset.get(1); y < r.get(1); y++) { + for (size_t z = offset.get(2); z < r.get(2); z++) { + func(id<3>(x, y, z)); + } + } + } +} + +inline void parallel_for(range<3> r, const std::function)>& func, + id<3> offset, item<3>*) { + for (size_t x = offset.get(0); x < r.get(0); x++) { + for (size_t y = offset.get(1); y < r.get(1); y++) { + for (size_t z = offset.get(2); z < r.get(2); z++) { + func(item<3>(r, {x, y, z}, {offset[0], offset[1], offset[2]})); + } + } + } +} +} // namespace neosycl::sycl::detail diff --git a/include/neoSYCL/sycl/detail/highlight_func.hpp b/include/neoSYCL/sycl/detail/highlight_func.hpp deleted file mode 100644 index 3376c16..0000000 --- a/include/neoSYCL/sycl/detail/highlight_func.hpp +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef SYCL_INCLUDE_CL_SYCL_KERNEL_HIGHLIGHT_FUNC_HPP_ -#define SYCL_INCLUDE_CL_SYCL_KERNEL_HIGHLIGHT_FUNC_HPP_ - -namespace neosycl::sycl::detail { - -template -void HIGHLIGHT_KERNEL_PARALLEL(Kernel k, const range &num_work_items) { -} - -template -void HIGHLIGHT_KERNEL_SINGLE_TASK(Kernel k) {} - -} - -#endif //SYCL_INCLUDE_CL_SYCL_KERNEL_HIGHLIGHT_FUNC_HPP_ diff --git a/include/neoSYCL/sycl/detail/kernel.hpp b/include/neoSYCL/sycl/detail/kernel.hpp index cda8276..3e1ff5e 100644 --- a/include/neoSYCL/sycl/detail/kernel.hpp +++ b/include/neoSYCL/sycl/detail/kernel.hpp @@ -1,16 +1,72 @@ -#ifndef SYCL_INCLUDE_CL_SYCL_KERNEL_KERNEL_HPP_ -#define SYCL_INCLUDE_CL_SYCL_KERNEL_KERNEL_HPP_ - +#pragma once #include -#include "neoSYCL/sycl/detail/kernel_arg.hpp" +#include +#include + +namespace neosycl::sycl { + +namespace detail { + +struct kernel_data { + kernel_data() {} + virtual ~kernel_data() {} +}; -namespace neosycl::sycl::detail { +struct kernel_data_host : public kernel_data { + kernel_data_host() : kernel_data() {} + virtual ~kernel_data_host() {} +}; + +class kernel_impl { + // friend class program_impl; + friend class kernel; + +public: + kernel_impl(string_class n, program p) : name(n), prog(std::move(p)) {} + virtual ~kernel_impl() = default; -struct kernel { - vector_class args; string_class name; + program prog; +#ifndef DISABLE_MULTI_DEVICE_SUPPORT + using kernel_data_map = + std::map>; + kernel_data_map map; +#else + shared_ptr_class data; +#endif }; +} // namespace detail + +kernel::kernel() : impl_(nullptr) {} + +kernel::kernel(string_class name, program prog) : impl_(nullptr) { + impl_ = std::move(shared_ptr_class( + new detail::kernel_impl(name, prog))); +} + +bool kernel::is_host() const { + return get_program().is_host(); +} + +context kernel::get_context() const { + return get_program().get_context(); +} + +program kernel::get_program() const { + return impl_->prog; +} + +shared_ptr_class kernel::get_kernel_data(device d) { +#ifndef DISABLE_MULTI_DEVICE_SUPPORT + return impl_->map.at(d.type()); +#else + return impl_->data; +#endif +} + +const char* kernel::get_name() const { + return impl_->name.c_str(); } -#endif //SYCL_INCLUDE_CL_SYCL_KERNEL_KERNEL_HPP_ +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/detail/kernel_arg.hpp b/include/neoSYCL/sycl/detail/kernel_arg.hpp deleted file mode 100644 index 693d57d..0000000 --- a/include/neoSYCL/sycl/detail/kernel_arg.hpp +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef SYCL_INCLUDE_CL_SYCL_KERNEL_KERNEL_ARG_HPP_ -#define SYCL_INCLUDE_CL_SYCL_KERNEL_KERNEL_ARG_HPP_ - -#include "neoSYCL/sycl/detail/container/data_container.hpp" -#include "neoSYCL/sycl/access.hpp" - -namespace neosycl::sycl::detail { - -struct KernelArg { - KernelArg(std::shared_ptr arg, access::mode mode) : - container(std::move(arg)), mode(mode) {} - - void acquire_access() const { - switch (mode) { - case access::mode::read: { - container->lock_read(); - break; - } - default: { - container->lock_write(); - } - } - } - - void release_access() const { - switch (mode) { - case access::mode::read: { - container->unlock_read(); - break; - } - default: { - container->unlock_write(); - } - } - } - - std::shared_ptr container; - access::mode mode; -}; - -} - -#endif //SYCL_INCLUDE_CL_SYCL_KERNEL_KERNEL_ARG_HPP_ diff --git a/include/neoSYCL/sycl/detail/platform.hpp b/include/neoSYCL/sycl/detail/platform.hpp new file mode 100644 index 0000000..77194a3 --- /dev/null +++ b/include/neoSYCL/sycl/detail/platform.hpp @@ -0,0 +1,129 @@ +#pragma once + +#include "neoSYCL/sycl/detail/cpu/kernel.hpp" +#include "neoSYCL/sycl/detail/cpu/program.hpp" +#include "neoSYCL/sycl/detail/cpu/device.hpp" + +namespace neosycl::sycl { + +namespace detail { +class platform_impl { +public: + platform_impl(device d) { + dev_.push_back(d); + } + + virtual bool is_host() = 0; + + virtual bool has_extension(const string_class& extension) = 0; + + vector_class list_devices() { + return dev_; + } + + vector_class dev_; +}; + +class host_platform_impl : public platform_impl { +public: + host_platform_impl(device d) : platform_impl(d) {} + + bool is_host() override { + return true; + } + bool has_extension(const string_class& extension) override { + return false; + } +}; + +using default_platform_impl = host_platform_impl; + +} // namespace detail + +vector_class platform::REGISTERED = { + platform::register_all_devices()}; + +platform platform::get_default_platform() { + return platform::REGISTERED[0]; +} + +vector_class platform::get_platforms() { + return platform::REGISTERED; +} + +vector_class platform::get_devices(info::device_type t) const { + vector_class ret; + for (const device& dev : impl_->list_devices()) { + if (t == info::device_type::all || t == dev.type()) + ret.push_back(dev); + } + return ret; +} + +platform::platform(const device_selector& deviceSelector) { + device d = deviceSelector.select_device(); + impl_ = shared_ptr_class( + new detail::host_platform_impl(d)); + for (auto& dev : impl_->dev_) + dev.set_platform(*this); +} + +platform::platform(detail::platform_impl* impl) : impl_(impl) { + /* do nothing if impl == null */ + if (impl) { + for (auto& dev : impl_->dev_) + dev.set_platform(*this); + } +} + +bool platform::has_extension(const string_class& extension) const { + return impl_->has_extension(extension); +} + +bool platform::is_host() const { + return impl_->is_host(); +} + +/* this class accesses private members of platform and device */ +class initial_platform_builder { +public: + template + device get() { + device dummy(nullptr); + device d(new T(dummy)); + d.get_impl()->dev_ = d; + return d; + } + + template + void add(platform& p) { + device d = get(); + d.set_platform(p); + p.impl_->dev_.push_back(d); + } + + platform create() { + device d = get(); + platform p(new detail::default_platform_impl(d)); + d.set_platform(p); + return p; + } +}; + +#ifndef BUILD_VE +// See also extensions/nec/ve_selector.hpp +platform platform::register_all_devices() { + // create a platform with the default device at first + initial_platform_builder builder; + platform p(builder.create()); + // register all available devices +#ifdef USE_CPU + builder.add(p); +#else + builder.add(p); +#endif + return p; +} +#endif + +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/detail/platform_info.hpp b/include/neoSYCL/sycl/detail/platform_info.hpp deleted file mode 100644 index 263dcbd..0000000 --- a/include/neoSYCL/sycl/detail/platform_info.hpp +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef NEOSYCL_INCLUDE_NEOSYCL_SYCL_DETAIL_PLATFORM_INFO_HPP -#define NEOSYCL_INCLUDE_NEOSYCL_SYCL_DETAIL_PLATFORM_INFO_HPP - -#include "neoSYCL/sycl/detail/device_info.hpp" -#include "neoSYCL/sycl/detail/device_type.hpp" - -namespace neosycl::sycl::detail { - -struct platform_info { - - virtual bool is_host() = 0; - - virtual SUPPORT_PLATFORM_TYPE type() = 0; - - virtual bool has_extension(const string_class &extension) = 0; - - virtual vector_class> list_devices() = 0; -}; - -struct cpu_platform_info : public platform_info { - - bool is_host() override { - return true; - } - - SUPPORT_PLATFORM_TYPE type() override { - return SUPPORT_PLATFORM_TYPE::CPU; - } - - bool has_extension(const string_class &extension) override { - return false; - } - - vector_class> list_devices() override { - return {shared_ptr_class(new cpu_device_info())}; - } -}; - -using default_platform_info = cpu_platform_info; - -} - -#endif //NEOSYCL_INCLUDE_NEOSYCL_SYCL_DETAIL_PLATFORM_INFO_HPP diff --git a/include/neoSYCL/sycl/detail/program.hpp b/include/neoSYCL/sycl/detail/program.hpp new file mode 100644 index 0000000..1ef55b8 --- /dev/null +++ b/include/neoSYCL/sycl/detail/program.hpp @@ -0,0 +1,332 @@ +#pragma once +#include +#include +#include +#include "neoSYCL/sycl/detail/container/data_container.hpp" +#include "neoSYCL/sycl/detail/container/data_container_nd.hpp" +#include "neoSYCL/sycl/detail/container/buffer_container.hpp" + +namespace neosycl::sycl { + +class handler; + +namespace detail::container { +class CopybackProxy; +} + +namespace detail { +const char* DEFAULT_LIB = "./kernel.so"; +const char* ENV_KERNEL = "NEOSYCL_KERNEL"; + +/* convet a type to its name in string */ +inline string_class get_kernel_name_from_class(const std::type_info& ti) { + int status; + char* pc = abi::__cxa_demangle(ti.name(), 0, 0, &status); + string_class in(pc); + free(pc); + std::regex re("([^\\s\\:]+)\\*$"); + std::smatch result; + if (std::regex_search(in, result, re)) { + in = result[1].str(); + } + return in; +} + +/////////////////////////////////////////////////////////////////////////////// +// internal program state on each device and interface to handle the device +class program_data { +public: + friend class neosycl::sycl::handler; + friend class container::CopybackProxy; + + using kernel_data_ptr = shared_ptr_class; + // don't copy the instance + program_data(const program_data& rhs) = delete; + program_data(program_data&& rhs) = delete; + program_data& operator=(const program_data& rhs) = delete; + program_data& operator=(program_data&& rhs) = delete; + + program_data(device d) : dev_(d) {} + virtual ~program_data() {} + + device& get_device() { + return dev_; + } + + virtual bool open() = 0; + virtual bool is_open() = 0; + virtual void run(kernel k) = 0; + virtual kernel_data_ptr create_kernel_data(const char* s) = 0; + +protected: + device dev_; + + virtual void* alloc_mem(void*, size_t) = 0; + virtual void free_mem(void*) = 0; + virtual void write_mem(void*, void*, size_t) = 0; + virtual void read_mem(void*, void*, size_t) = 0; + virtual void copy_mem(void*, void*, size_t) = 0; + virtual void set_capture(kernel&, void* p, size_t sz) = 0; + virtual void set_range(kernel&, size_t r[6]) = 0; + + template + void set_range(kernel& k, range r) { + size_t sz[6] = {1, 1, 1, 0, 0, 0}; + for (size_t idx(0); idx != dim; idx++) { + sz[idx] = r[idx]; + } + set_range(k, sz); + } + + template + void set_range(kernel& k, range r, id i) { + size_t sz[6] = {1, 1, 1, 0, 0, 0}; + for (size_t idx(0); idx != dim; idx++) { + sz[idx] = r[idx]; + } + for (size_t idx(3); idx != dim + 3; idx++) { + sz[idx] = i[idx]; + } + set_range(k, sz); + } + + template + auto cast(kernel k) { + auto kd = k.get_kernel_data(get_device()); + auto kdc = std::dynamic_pointer_cast(kd); + if (kdc.use_count() == 0) { + PRINT_ERR("invalid kernel_data: %lx", (size_t)kd.get()); + throw runtime_error("program_data::cast() failed"); + } + return kdc; + } +}; + +/////////////////////////////////////////////////////////////////////////////// +// implementation of program class interface +class program_impl { + friend class sycl::program; + using kernel_data_ptr = program_data::kernel_data_ptr; + using kernel_hash_map = std::map; + using kernel_name_map = std::map; + + program_impl(program p, context c, vector_class d) + : prog_(p), ctx_(c) { + init(d); + } + + void init(const vector_class& d) { + if (d.size() == 0) + throw runtime_error("contenxt with no available device"); + if (d.size() > 2) { + DEBUG_INFO("one context with host and multiple devices (experimental)"); + } + for (const auto& dev : d) { + program_data* p = dev.create_program(); + if (p && p->open()) + data_.push_back(shared_ptr_class(p)); + else if (p) + delete p; + } + if (data_.size() == 0) + throw runtime_error("program initialization failed"); + } + +public: + ~program_impl() = default; + + bool is_host() const { + // true if only the host is available. + return data_.size() < 2; + } + + bool has_kernel(string_class name) { + return name_map_.count(name); + } + + template + bool has_kernel() { + const std::type_info& tinfo = typeid(KernelName*); + if (hash_map_.count(tinfo.hash_code())) + return true; + + string_class name = get_kernel_name_from_class(tinfo); + return has_kernel(name); + } + + kernel get_kernel(string_class name) { + if (name_map_.count(name)) { + DEBUG_INFO("kernel found: %s", name.c_str()); + return name_map_.at(name); + } + + kernel k(name.c_str(), prog_); +#ifndef DISABLE_MULTI_DEVICE_SUPPORT + for (auto& d : data_) { + auto kd = d->create_kernel_data(name.c_str()); + k.get_impl()->map.insert(std::make_pair(d->get_device().type(), kd)); + } +#else + auto kd = data_[0]->create_kernel_data(name.c_str()); + k.get_impl()->data = std::move(kd); +#endif + name_map_.insert(std::make_pair(name, k)); + return k; + } + + template + kernel get_kernel() { + const std::type_info& tinfo = typeid(KernelName*); + if (hash_map_.count(tinfo.hash_code())) { + DEBUG_INFO("kernel found: hash=%lu", tinfo.hash_code()); + return hash_map_.at(tinfo.hash_code()); + } + + string_class name = get_kernel_name_from_class(tinfo); + DEBUG_INFO("kernel class: %s", name.c_str()); + + kernel k = std::move(get_kernel(name)); + hash_map_.insert(std::make_pair(tinfo.hash_code(), k)); + return k; + } + + context get_context() const { + return ctx_; + } + +private: + program_state state_; + program prog_; + context ctx_; + vector_class> data_; + kernel_hash_map hash_map_; // quickly accessible + kernel_name_map name_map_; // all exiting kernels +}; + +class program_data_host : public program_data { +public: + program_data_host(device d) : program_data(d) {} + + ~program_data_host() {} + + bool open() override { + return true; + } + + bool is_open() override { + return true; + } + + void run(kernel k) override {} + + void* alloc_mem(void* p, size_t s) override { + return nullptr; + } + + void free_mem(void* p) override {} + + void write_mem(void* d, void* h, size_t s) override {} + + void read_mem(void* h, void* d, size_t s) override {} + + void copy_mem(void* h, void* d, size_t s) override {} + + void set_capture(kernel& k, void* p, size_t sz) override {} + + void set_range(kernel& k, size_t r[6]) override {} + + kernel_data_ptr create_kernel_data(const char* s) override { + auto data = new kernel_data_host(); + kernel_data_ptr ret(data); + return ret; + } +}; +} // namespace detail + +void program::init_(context c, vector_class d) { + impl_ = shared_ptr_class( + new detail::program_impl(*this, c, d)); +} + +bool program::is_host() const { + return impl_->is_host(); +} + +template +bool program::has_kernel() const { + return impl_->has_kernel(); +} + +bool program::has_kernel(string_class kernelName) const { + return impl_->has_kernel(kernelName); +} + +kernel program::get_kernel(string_class kernelName) const { + return impl_->get_kernel(kernelName); +} + +template +kernel program::get_kernel() const { + return impl_->get_kernel(); +} + +context program::get_context() const { + return impl_->get_context(); +} + +vector_class program::get_devices() const { + vector_class ret; + for (const auto& dat : impl_->data_) + ret.push_back(dat->get_device()); + return ret; +} + +program_state program::get_state() const { + return impl_->state_; +} + +shared_ptr_class program::get_data(device dev) const { + for (auto& d : impl_->data_) + if (dev == d->get_device()) + return d; + return nullptr; +} + +namespace detail::container { + +/////////////////////////////////////////////////////////////////////////////// +// Copyback device data to host memory at buffer object destruction +class CopybackProxy { + // buffer must be copied back even after a queue/program is destoryed. + // so shared_ptr is kept in the map to do it. +public: + template > + void operator()(BufferContainer& buf, + shared_ptr_class p, bool free_mem_flag) { + DEBUG_INFO("memory copy back : device type = %d", + (int)p->get_device().type()); + + if (p->get_device().is_host()) + return; + if (buf.map.count(p)) { + auto [devp, mode] = buf.map.at(p); + if (mode != access::mode::read) + p->read_mem(buf.get_raw_ptr(), devp, buf.get_size()); + // buf.map.erase(p); + if (free_mem_flag) + p->free_mem(devp); + } + } +}; + +template +BufferContainer::~BufferContainer() { + CopybackProxy proxy; + DEBUG_INFO("buffer destruction: %lu device(s)", map.size()); + for (auto& d : map) { + proxy(*this, d.first, true); + } +} +} // namespace detail::container + +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/detail/registered_platforms.hpp b/include/neoSYCL/sycl/detail/registered_platforms.hpp deleted file mode 100644 index 820db95..0000000 --- a/include/neoSYCL/sycl/detail/registered_platforms.hpp +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef NEOSYCL_INCLUDE_NEOSYCL_SYCL_DETAIL_REGISTERED_PLATFORMS_H -#define NEOSYCL_INCLUDE_NEOSYCL_SYCL_DETAIL_REGISTERED_PLATFORMS_H - -#include "neoSYCL/sycl/detail/platform_info.hpp" -#include "neoSYCL/sycl/detail/task_handler.hpp" -#include - -#ifdef BUILD_VE -#include "neoSYCL/extensions/nec/ve_task_handler.hpp" -#endif - -namespace neosycl::sycl::detail { - -static shared_ptr_class REGISTERED_PLATFORMS[] = { - shared_ptr_class(new default_platform_info()) -}; - -static std::map> PLATFORM_HANDLER_MAP = { - {SUPPORT_PLATFORM_TYPE::CPU, shared_ptr_class(new task_handler_cpu())}, - -#ifdef DBUILD_VE - {SUPPORT_PLATFORM_TYPE::SX_AURORA, shared_ptr_class(new task_handler_cpu())} -#endif -}; - -} - -#endif //NEOSYCL_INCLUDE_NEOSYCL_SYCL_DETAIL_REGISTERED_PLATFORMS_H diff --git a/include/neoSYCL/sycl/detail/runtime/acc.hpp b/include/neoSYCL/sycl/detail/runtime/acc.hpp new file mode 100644 index 0000000..4479eb9 --- /dev/null +++ b/include/neoSYCL/sycl/detail/runtime/acc.hpp @@ -0,0 +1,51 @@ +#pragma once + +namespace neosycl::sycl::rt { + +using namespace neosycl::sycl; + +template +struct acc_ { + T* p_; + size_t r_[6]; + + inline T& operator[](size_t i) { + return p_[i]; + } + inline T& operator[](const id<3> i) { + return p_[(i[0] * r_[1] + i[1]) * r_[2] + i[2]]; + } + inline T& operator[](const id<2> i) { + return p_[i[0] * r_[1] + i[1]]; + } + inline T& operator[](const id<1> i) { + return p_[i[0]]; + } + + template + T& operator[](item& i) { + return this->operator[](i.get_id()); + } + + T* get() const { + return p_; + } +}; + +inline item<1> id2item(size_t s[6], size_t i0) { + return item<1>(range<1>{s[0]}, detail::container::ArrayND<1>{i0}, + detail::container::ArrayND<1>{s[3]}); +} + +inline item<2> id2item(size_t s[6], size_t i0, size_t i1) { + return item<2>(range<2>{s[0], s[1]}, detail::container::ArrayND<2>{i0, i1}, + detail::container::ArrayND<2>{s[3], s[4]}); +} + +inline item<3> id2item(size_t s[6], size_t i0, size_t i1, size_t i2) { + return item<3>(range<3>{s[0], s[1], s[2]}, + detail::container::ArrayND<3>{i0, i1, i2}, + detail::container::ArrayND<3>{s[3], s[4], s[5]}); +} + +} // namespace neosycl::sycl::rt diff --git a/include/neoSYCL/sycl/detail/task.hpp b/include/neoSYCL/sycl/detail/task.hpp index caf223e..24354a5 100644 --- a/include/neoSYCL/sycl/detail/task.hpp +++ b/include/neoSYCL/sycl/detail/task.hpp @@ -24,9 +24,8 @@ struct task { std::unique_lock ul{lock}; cond.wait(ul, [&] { return !waiting; }); } - }; -} +} // namespace neosycl::sycl::detail -#endif //NEOSYCL_INCLUDE_NEOSYCL_SYCL_DETAIL_TASK_HPP +#endif // NEOSYCL_INCLUDE_NEOSYCL_SYCL_DETAIL_TASK_HPP diff --git a/include/neoSYCL/sycl/detail/task_counter.hpp b/include/neoSYCL/sycl/detail/task_counter.hpp index 4d469b9..51d666e 100644 --- a/include/neoSYCL/sycl/detail/task_counter.hpp +++ b/include/neoSYCL/sycl/detail/task_counter.hpp @@ -16,7 +16,6 @@ class task_counter { std::mutex lock; public: - task_counter() : counter(0) {} void incr() { @@ -35,9 +34,8 @@ class task_counter { std::unique_lock ul{lock}; cond.wait(ul, [&] { return counter == 0; }); } - }; -} +} // namespace neosycl::sycl::detail -#endif //SYCL_INCLUDE_CL_SYCL_QUEUE_QUEUE_HPP_ +#endif // SYCL_INCLUDE_CL_SYCL_QUEUE_QUEUE_HPP_ diff --git a/include/neoSYCL/sycl/detail/task_handler.hpp b/include/neoSYCL/sycl/detail/task_handler.hpp deleted file mode 100644 index c05be04..0000000 --- a/include/neoSYCL/sycl/detail/task_handler.hpp +++ /dev/null @@ -1,103 +0,0 @@ -#ifndef NEOSYCL_INCLUDE_NEOSYCL_SYCL_TASK_HANDLER_HPP -#define NEOSYCL_INCLUDE_NEOSYCL_SYCL_TASK_HANDLER_HPP - -#include "neoSYCL/sycl/detail/kernel.hpp" - -namespace neosycl::sycl::detail { - -struct task_handler { - - virtual void single_task(shared_ptr_class k, const std::function &func) = 0; - - virtual void parallel_for_1d(shared_ptr_class k, - range<1> r, - const std::function)> &func, - id<1> offset) = 0; - - virtual void parallel_for_2d(shared_ptr_class k, - range<2> r, - const std::function)> &func, - id<2> offset) = 0; - - virtual void parallel_for_3d(shared_ptr_class k, - range<3> r, - const std::function)> &func, - id<3> offset) = 0; - - virtual SUPPORT_PLATFORM_TYPE type() = 0; - -}; - -struct task_handler_cpu : public task_handler { - - void single_task(shared_ptr_class k, const std::function &func) override { - for (const KernelArg &arg:k->args) { - arg.acquire_access(); - } - DEBUG_INFO("execute single %d kernel, name: %s\n", type(), k->name.c_str()); - func(); - for (const KernelArg &arg:k->args) { - arg.release_access(); - } - } - - void parallel_for_1d(shared_ptr_class k, - range<1> r, - const std::function)> &func, - id<1> offset) override { - for (const KernelArg &arg:k->args) { - arg.acquire_access(); - } - for (size_t x = offset.get(0); x < r.get(0); x++) { - func(id<1>(x)); - } - for (const KernelArg &arg:k->args) { - arg.release_access(); - } - }; - - void parallel_for_2d(shared_ptr_class k, - range<2> r, - const std::function)> &func, - id<2> offset) override { - for (const KernelArg &arg:k->args) { - arg.acquire_access(); - } - for (size_t x = offset.get(0); x < r.get(0); x++) { - for (size_t y = offset.get(1); y < r.get(1); y++) { - func(id<2>(x, y)); - } - } - for (const KernelArg &arg:k->args) { - arg.release_access(); - } - }; - - void parallel_for_3d(shared_ptr_class k, - range<3> r, - const std::function)> &func, - id<3> offset) override { - for (const KernelArg &arg:k->args) { - arg.acquire_access(); - } - for (size_t x = offset.get(0); x < r.get(0); x++) { - for (size_t y = offset.get(1); y < r.get(1); y++) { - for (size_t z = offset.get(2); z < r.get(2); z++) { - func(id<3>(x, y, z)); - } - } - } - for (const KernelArg &arg:k->args) { - arg.release_access(); - } - }; - - SUPPORT_PLATFORM_TYPE type() override { - return CPU; - } - -}; - -} - -#endif //NEOSYCL_INCLUDE_NEOSYCL_SYCL_TASK_HANDLER_HPP diff --git a/include/neoSYCL/sycl/device.hpp b/include/neoSYCL/sycl/device.hpp index 9197dfe..ae17629 100644 --- a/include/neoSYCL/sycl/device.hpp +++ b/include/neoSYCL/sycl/device.hpp @@ -1,92 +1,120 @@ -#ifndef CUSTOM_SYCL_INCLUDE_SYCL_DEVICE_HPP_ -#define CUSTOM_SYCL_INCLUDE_SYCL_DEVICE_HPP_ - -#include "neoSYCL/sycl/exception.hpp" -#include "neoSYCL/sycl/types.hpp" -#include "neoSYCL/sycl/platform.hpp" -#include "neoSYCL/sycl/info/device_type.hpp" -#include "neoSYCL/sycl/info/device.hpp" -#include "neoSYCL/sycl/info/param_traits.hpp" -#include "neoSYCL/sycl/detail/device_info.hpp" +#pragma once namespace neosycl::sycl { +namespace detail { +class device_impl; +class program_data; +} // namespace detail + +/////////////////////////////////////////////////////////////////////////////// class device { friend class handler; + friend class context; + friend class platform; + friend class initial_platform_builder; public: - device() : device_info(new detail::default_device_info()) {}; - - device(const shared_ptr_class &info) : device_info(info) {} + device(const device& rhs) = default; + device(device&& rhs) = default; + ~device() = default; -// explicit device(cl_device_id deviceId); + device& operator=(const device& rhs) = default; + device& operator=(device&& rhs) = default; - explicit device(const device_selector &deviceSelector) {}; + friend bool operator==(const device& lhs, const device& rhs); + friend bool operator!=(const device& lhs, const device& rhs); + friend bool operator<(const device& lhs, const device& rhs); - /* -- common interface members -- */ -// cl_device_id get() const; - - bool is_host() const { - return device_info->is_host(); + explicit device() : impl_(nullptr), plt_() { + *this = device::get_default_device(); } - bool is_cpu() const { - return device_info->is_cpu(); + explicit device(cl_device_id deviceId) { + throw feature_not_supported("OpenCL interop not supported."); } - bool is_gpu() const { - return device_info->is_gpu(); + explicit device(const device_selector& deviceSelector) + : impl_(nullptr), plt_() { + *this = deviceSelector.select_device(); } - bool is_accelerator() const { - return device_info->is_accelerator(); + /* -- common interface members -- */ + // cl_device_id get() const; + + bool is_host() const; + + bool is_cpu() const; + + bool is_gpu() const; + + bool is_accelerator() const; + + platform get_platform() const { + return plt_; } - platform get_platform() const; - template - typename info::param_traits::return_type get_info() const; + template + typename info::param_traits::return_type + get_info() const; - bool has_extension(const string_class &extension) const; + bool has_extension(const string_class& extension) const; -// Available only when prop == info::partition_property::partition_equally - template - vector_class create_sub_devices(size_t nbSubDev) const; + // Available only when prop == info::partition_property::partition_equally + template + vector_class create_sub_devices(size_t nbSubDev) const { + throw unimplemented(); + } -// Available only when prop == info::partition_property::partition_by_counts - template - vector_class create_sub_devices(const vector_class &counts) const; + // Available only when prop == info::partition_property::partition_by_counts + template + vector_class + create_sub_devices(const vector_class& counts) const { + throw unimplemented(); + } -// Available only when prop == info::partition_property::partition_by_affinity_domain -// template -// vector_class create_sub_devices(info::affinity_domain affinityDomain) const; +#if 0 + // Available only when prop == + // info::partition_property::partition_by_affinity_domain + template + vector_class + create_sub_devices(info::affinity_domain affinityDomain) const; +#endif + + static device get_default_device(); + + // INTERNAL USE ONLY + info::device_type type() const; + detail::program_data* create_program() const; + shared_ptr_class get_impl() const { + return impl_; + } - static vector_class get_devices( - info::device_type deviceType = info::device_type::all) { - vector_class ret; - for (const platform &info: platform::get_platforms()) { - for (const device &dev:info.get_devices()) { - ret.push_back(dev); - } +private: + // INTERNAL USE ONLY + void set_platform(platform p) { + plt_ = p; + } + explicit device(detail::device_impl* impl, platform* p = nullptr) + : impl_(impl), plt_() { + if (impl == nullptr) { + DEBUG_INFO("empty device created"); } - return ret; + else if (p != nullptr) + plt_ = *p; } -private: - shared_ptr_class device_info; + shared_ptr_class impl_; + platform plt_; }; -device device_selector::select_device() const { - return device(); +bool operator==(const device& lhs, const device& rhs) { + return (lhs.impl_ == rhs.impl_); } - -vector_class platform::get_devices(info::device_type) const { - vector_class ret; - for (shared_ptr_class info:platform_info->list_devices()) { - ret.push_back(device(info)); - } - return ret; +bool operator!=(const device& lhs, const device& rhs) { + return !(lhs == rhs); } - +bool operator<(const device& lhs, const device& rhs) { + return (lhs.impl_ < rhs.impl_); } - -#endif //CUSTOM_SYCL_INCLUDE_SYCL_DEVICE_HPP_ +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/device_selector.hpp b/include/neoSYCL/sycl/device_selector.hpp index 1a7bc1e..5009ceb 100644 --- a/include/neoSYCL/sycl/device_selector.hpp +++ b/include/neoSYCL/sycl/device_selector.hpp @@ -1,8 +1,4 @@ -#ifndef CUSTOM_SYCL_INCLUDE_SYCL_DEVICE_SELECTOR_HPP_ -#define CUSTOM_SYCL_INCLUDE_SYCL_DEVICE_SELECTOR_HPP_ - -#include "neoSYCL/sycl/device.hpp" -#include "neoSYCL/sycl/detail/platform_info.hpp" +#pragma once namespace neosycl::sycl { @@ -10,26 +6,17 @@ class device; class device_selector { public: - device_selector() = default; - device_selector(const device_selector &rhs) = default; + device_selector(const device_selector& rhs) = default; - device_selector &operator=(const device_selector &rhs) = default; + device_selector& operator=(const device_selector& rhs) = default; virtual ~device_selector() = default; - // defined in device.hpp - device select_device() const; - - virtual int operator()(const device &device) const = 0; - - virtual shared_ptr_class get_platform_info() const { - return shared_ptr_class(new detail::cpu_platform_info); - } + virtual device select_device() const = 0; + virtual int operator()(const device& device) const = 0; }; -} - -#endif //CUSTOM_SYCL_INCLUDE_SYCL_DEVICE_SELECTOR_HPP_ +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/device_selector/cpu_selector.hpp b/include/neoSYCL/sycl/device_selector/cpu_selector.hpp index b8ac52f..6cfd724 100644 --- a/include/neoSYCL/sycl/device_selector/cpu_selector.hpp +++ b/include/neoSYCL/sycl/device_selector/cpu_selector.hpp @@ -1,27 +1,32 @@ -#ifndef SYCL_INCLUDE_CL_SYCL_DEVICE_SELECTOR_CPU_SELECTOR_HPP_ -#define SYCL_INCLUDE_CL_SYCL_DEVICE_SELECTOR_CPU_SELECTOR_HPP_ +#pragma once +#include "neoSYCL/sycl/detail/cpu/kernel.hpp" +#include "neoSYCL/sycl/detail/cpu/program.hpp" +#include "neoSYCL/sycl/detail/cpu/device.hpp" namespace neosycl::sycl { class cpu_selector : public device_selector { public: - int operator()(const device &dev) const override { + virtual int operator()(const device& dev) const override { if (dev.is_cpu()) { - return true; + return 1; } - return false; + return 0; } - device select_device() const { - return device(shared_ptr_class(new detail::cpu_device_info())); + device select_device() const override { + auto pf = platform::get_default_platform(); + auto devices = pf.get_devices(info::device_type::cpu); + for (auto& i : devices) { + if (this->operator()(i) > 0) + return i; + } + throw sycl::runtime_error("no available device found"); } - }; using default_selector = cpu_selector; -using host_selector = cpu_selector; - -} +using host_selector = cpu_selector; -#endif //SYCL_INCLUDE_CL_SYCL_DEVICE_SELECTOR_CPU_SELECTOR_HPP_ +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/event.hpp b/include/neoSYCL/sycl/event.hpp index 7a78f5c..dd12809 100644 --- a/include/neoSYCL/sycl/event.hpp +++ b/include/neoSYCL/sycl/event.hpp @@ -1,8 +1,4 @@ -#ifndef CUSTOM_SYCL_INCLUDE_SYCL_EVENT_HPP_ -#define CUSTOM_SYCL_INCLUDE_SYCL_EVENT_HPP_ - -#include "neoSYCL/sycl/types.hpp" -#include "neoSYCL/sycl/exception.hpp" +#pragma once namespace neosycl::sycl { @@ -12,7 +8,7 @@ class event { ~event() {} - vector_class get_wait_list() { + vector_class get_wait_list() { throw unimplemented(); } @@ -20,7 +16,7 @@ class event { throw unimplemented(); } - static void wait(const vector_class &eventList) { + static void wait(const vector_class& eventList) { throw unimplemented(); } @@ -28,12 +24,9 @@ class event { throw unimplemented(); } - static void wait_and_throw(const vector_class &eventList) { + static void wait_and_throw(const vector_class& eventList) { throw unimplemented(); } - }; -} - -#endif //CUSTOM_SYCL_INCLUDE_SYCL_EVENT_HPP_ +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/exception.hpp b/include/neoSYCL/sycl/exception.hpp index c0d1f3f..e0725c2 100644 --- a/include/neoSYCL/sycl/exception.hpp +++ b/include/neoSYCL/sycl/exception.hpp @@ -1,8 +1,5 @@ -#ifndef CUSTOM_SYCL_INCLUDE_SYCL_EXCEPTION_HPP_ -#define CUSTOM_SYCL_INCLUDE_SYCL_EXCEPTION_HPP_ - +#pragma once #include -#include namespace neosycl::sycl { @@ -10,10 +7,9 @@ class context; class exception : public std::exception { public: + exception(const string_class& message) : error_msg(message) {} - exception(const string_class &message) : error_msg(message) {} - - const char *what() const noexcept override { + const char* what() const noexcept override { return error_msg.c_str(); } @@ -23,11 +19,10 @@ class exception : public std::exception { private: string_class error_msg; - }; using exception_list = vector_class; -using async_handler = function_class; +using async_handler = function_class; class runtime_error : public exception { using exception::exception; @@ -77,6 +72,4 @@ class unimplemented : public exception { unimplemented() : exception("not implemented") {} }; -} - -#endif //CUSTOM_SYCL_INCLUDE_SYCL_EXCEPTION_HPP_ +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/extensions.hpp b/include/neoSYCL/sycl/extensions.hpp index 842d32b..7735196 100644 --- a/include/neoSYCL/sycl/extensions.hpp +++ b/include/neoSYCL/sycl/extensions.hpp @@ -1,10 +1,3 @@ -#ifndef NEOSYCL_INCLUDE_CL_SYCL_EXTENSIONS_HPP_ -#define NEOSYCL_INCLUDE_CL_SYCL_EXTENSIONS_HPP_ +#pragma once -namespace neosycl::sycl { - - - -} - -#endif //NEOSYCL_INCLUDE_CL_SYCL_EXTENSIONS_HPP_ +namespace neosycl::sycl {} diff --git a/include/neoSYCL/sycl/handler.hpp b/include/neoSYCL/sycl/handler.hpp index 0b8f008..8db89c6 100644 --- a/include/neoSYCL/sycl/handler.hpp +++ b/include/neoSYCL/sycl/handler.hpp @@ -1,147 +1,320 @@ -#ifndef CUSTOM_SYCL_INCLUDE_SYCL_HANDLER_HPP_ -#define CUSTOM_SYCL_INCLUDE_SYCL_HANDLER_HPP_ - -#include -#include - -#include "neoSYCL/sycl/nd_range.hpp" -#include "neoSYCL/sycl/types.hpp" -#include "neoSYCL/sycl/event.hpp" -#include "neoSYCL/sycl/id.hpp" -#include "neoSYCL/sycl/allocator.hpp" -#include "neoSYCL/sycl/detail/highlight_func.hpp" -#include "neoSYCL/sycl/detail/kernel.hpp" -#include "neoSYCL/sycl/detail/task.hpp" -#include "neoSYCL/sycl/detail/task_handler.hpp" -#include "neoSYCL/sycl/detail/registered_platforms.hpp" +#pragma once #include "neoSYCL/sycl/detail/task_counter.hpp" +#include "neoSYCL/sycl/detail/handler.hpp" +#include "neoSYCL/sycl/detail/accessor_data.hpp" namespace neosycl::sycl { -namespace detail { - -/** - * get func name by create ptr, rely on compiler reflect implementation - * - * @tparam KernelName class - * @return str - */ -template -string_class get_kernel_name_from_class() { - KernelName *p; - string_class in = typeid(p).name(); - return in; -} +/////////////////////////////////////////////////////////////////////////////// +class handler { + using counter_type = shared_ptr_class; + using handler_type = shared_ptr_class; -} + friend class queue; + + explicit handler(device d, program p, counter_type counter) + : dev_(std::move(d)), prog_(std::move(p)), cntr_(std::move(counter)), + hndl_(prog_.get_data(dev_)) {} + + ~handler() { + for (size_t i(0); i < acc_.size(); i++) { + // DEBUG_INFO("memory unlock: %p", acc_[i].data.get()); + if (acc_[i].mode != access::mode::read) + acc_[i].data->unlock_write(); + else + acc_[i].data->unlock_read(); + } + } -class handler { public: - explicit handler(device dev, - shared_ptr_class counter) : - bind_device(std::move(dev)), - counter(std::move(counter)), - kernel(new detail::kernel()) {} + template + void run(range kernelRange, id kernelOffset, + KernelType kernelFunc) { + kernel k = prog_.get_kernel(); + hndl_->set_range(k, kernelRange, kernelOffset); - template + kernelFunc(k); + // DEBUG_INFO("kernel %s %p %lu", k.get_name(), ptr, sz); + // hndl_->set_capture(k, ptr, sz); + hndl_->run(k); + } + + template + void run(range kernelRange, KernelType kernelFunc) { + kernel k = prog_.get_kernel(); + hndl_->set_range(k, kernelRange); + + kernelFunc(k); + // DEBUG_INFO("kernel %s %p %lu", k.get_name(), ptr, sz); + // hndl_->set_capture(k, ptr, sz); + hndl_->run(k); + } + + template + void run(KernelType kernelFunc) { + kernel k = prog_.get_kernel(); + + kernelFunc(k); + // DEBUG_INFO("kernel %s %p %lu", k.get_name(), ptr, sz); + // hndl_->set_capture(k, ptr, sz); + hndl_->run(k); + } + + template void single_task(KernelType kernelFunc) { - kernel->name = detail::get_kernel_name_from_class(); - shared_ptr_class handler = detail::PLATFORM_HANDLER_MAP[bind_device.device_info->type()]; - submit_task([f = kernelFunc, h = handler, k = kernel]() { - h->single_task(k, f); - }); - } - - template - void submit_parallel_for(shared_ptr_class handler, - range<3> numWorkItems, - id<3> offset, - KernelType kernelFunc) { - submit_task([f = kernelFunc, n = numWorkItems, o = offset, h = std::move(handler), k = kernel]() { - h->parallel_for_3d(k, n, f, o); - }); - } - - template - void submit_parallel_for(shared_ptr_class handler, - range<2> numWorkItems, - id<2> offset, - KernelType kernelFunc) { - submit_task([f = kernelFunc, n = numWorkItems, o = offset, h = std::move(handler), k = kernel]() { - h->parallel_for_2d(k, n, f, o); - }); - } - - template - void submit_parallel_for(shared_ptr_class handler, - range<1> numWorkItems, - id<1> offset, - KernelType kernelFunc) { - submit_task([f = kernelFunc, n = numWorkItems, o = offset, h = std::move(handler), k = kernel]() { - h->parallel_for_1d(k, n, f, o); - }); - } - - template + if (!dev_.is_host()) + return; + detail::single_task(kernelFunc); + } + + template void parallel_for(range numWorkItems, KernelType kernelFunc) { - kernel->name = detail::get_kernel_name_from_class(); - shared_ptr_class handler = detail::PLATFORM_HANDLER_MAP[bind_device.device_info->type()]; - submit_parallel_for(handler, numWorkItems, id(), kernelFunc); + if (!dev_.is_host()) + return; + detail::parallel_for(numWorkItems, kernelFunc, id{}, + get_index_type(kernelFunc)); } - template - void parallel_for(range numWorkItems, id workItemOffset, KernelType kernelFunc) { - kernel->name = detail::get_kernel_name_from_class(); - shared_ptr_class handler = detail::PLATFORM_HANDLER_MAP[bind_device.device_info->type()]; - submit_parallel_for(handler, numWorkItems, workItemOffset, kernelFunc); + template + void parallel_for(range numWorkItems, + id workItemOffset, KernelType kernelFunc) { + if (!dev_.is_host()) + return; + detail::parallel_for(numWorkItems, kernelFunc, workItemOffset, + get_index_type(kernelFunc)); } -// template -// void parallel_for(nd_range executionRange, KernelType kernelFunc); + // experimental impl just for translation testing + template + void parallel_for(nd_range executionRange, + KernelType kernelFunc) { + PRINT_ERR("Not implemented yet.") + throw unimplemented(); + } - template - void parallel_for_work_group(range numWorkGroups, WorkgroupFunctionType kernelFunc); + template + void parallel_for_work_group(range numWorkGroups, + WorkgroupFunctionType kernelFunc) { + PRINT_ERR("Not implemented yet.") + throw unimplemented(); + } - template + template void parallel_for_work_group(range numWorkGroups, range workGroupSize, - WorkgroupFunctionType kernelFunc); + WorkgroupFunctionType kernelFunc) { + PRINT_ERR("Not implemented yet.") + throw unimplemented(); + ; + } //----- OpenCL interoperability interface // - template - void set_arg(int argIndex, T &&arg) { - kernel->args.insert(argIndex, arg); + template + void set_arg(int argIndex, T&& arg); + + template + void set_args(Ts&&... args); + + template + void require(sycl::accessor acc) { + /* allocate and lock a device buffer for this placeholder */ + this->alloc_mem_(acc); } - template - void set_args(Ts &&... args) { - kernel->args.push_back(args...); + //------ Explicit memory operation APIs // + template + void copy(accessor src, + shared_ptr_class dst) { + DEBUG_INFO("handler::copy(%p, %p, %lu)", get_ptr_(src), dst.get(), + src.data->get_size()); + hndl_->read_mem((void*)dst.get(), get_ptr_(src), src.data->get_size()); } - shared_ptr_class get_kernel() { - return kernel; + template + void copy(shared_ptr_class src, + accessor dst) { + DEBUG_INFO("handler::copy(%p, %p, %lu)", src.get(), get_ptr_(dst), + dst.data->get_size()); + hndl_->write_mem(get_ptr_(dst), (void*)src.get(), dst.data->get_size()); } -private: - shared_ptr_class kernel; - device bind_device; - shared_ptr_class counter; - - template - void submit_task(Func func) { - counter->incr(); - std::thread t([f = func, c = counter]() { - try { - f(); - } catch (...) { - throw; + template + void copy(accessor src, + T_dst* dst) { + DEBUG_INFO("handler::copy(%p, %p, %lu)", get_ptr_(src), dst, + src.data->get_size()); + hndl_->read_mem((void*)dst, get_ptr_(src), src.data->get_size()); + } + + template + void copy(const T_src* src, + accessor dst) { + DEBUG_INFO("handler::copy(%p, %p, %lu)", src, get_ptr_(dst), + dst.data->get_size()); + hndl_->write_mem(get_ptr_(dst), (void*)src, dst.data->get_size()); + } + + template + void + copy(accessor src, + accessor dst) { + DEBUG_INFO("handler::copy(%p, %p, %lu)", get_ptr_(src), get_ptr_(dst), + dst.data->get_size()); + hndl_->copy_mem(get_ptr_(dst), get_ptr_(src), dst.data->get_size()); + } + + template + void update_host(accessor acc) { + DEBUG_INFO("handler::update_host(%p)", get_ptr_(acc)); + hndl_->read_mem(acc.data->get_raw_ptr(), (void*)get_ptr_(acc), + acc.data->get_size()); + } + + template + void fill(accessor dst, const T& src) { + DEBUG_INFO("handler::fill(%p)", get_ptr_(dst)); + shared_ptr_class tmp(new T[dst.get_count()]); + std::fill_n((T*)tmp.get(), dst.get_count(), src); + hndl_->write_mem((void*)get_ptr_(dst), (void*)tmp.get(), + dst.data->get_size()); + } + + //----- INTERNAL USE ONLY --------------- // + template + T* get_ptr_(sycl::accessor acc) { + using container_type = typename accessor::container_type; + shared_ptr_class buf = acc.data; + + DEBUG_INFO("is_host %d daddr %p haddr %p %d %p", (int)dev_.is_host(), + acc.device_ptr, buf->get_raw_ptr(), (int)buf->map.count(hndl_), + buf->map.at(hndl_).ptr); + if (acc.device_ptr) + return (T*)acc.device_ptr; + else if (dev_.is_host()) + return (T*)buf->get_raw_ptr(); + else if (buf->map.count(hndl_)) + return (T*)buf->map.at(hndl_).ptr; + throw runtime_error("invalid BufferContainer object"); + } + + template + void* alloc_mem_(accessor& acc) { + using container_type = typename accessor::container_type; + using device_ptr_type = detail::container::device_ptr_type; + shared_ptr_class buf = acc.data; + + // allocate a device memory chunk if not allocated yet + int count = 0; + if (dev_.is_host() == false) { + count = buf->map.count(hndl_); + if (count == 1) + // multiple accessors would use the same buffer + acc.device_ptr = buf->map.at(hndl_).ptr; + else if (count == 0) { + void* dp = hndl_->alloc_mem(buf->get_raw_ptr(), buf->get_size()); + device_ptr_type cdp = {dp, m}; + buf->map.insert(std::make_pair(hndl_, cdp)); + acc.device_ptr = dp; + + if (m != access::mode::discard_write && + m != access::mode::discard_read_write) { + DEBUG_INFO("memory copy (h2d): " + "daddr=%p, haddr=%p, size=%lu", + acc.device_ptr, buf->get_raw_ptr(), buf->get_size()); + hndl_->write_mem(acc.device_ptr, buf->get_raw_ptr(), buf->get_size()); + } } - c->decr(); - }); - t.detach(); + else + throw runtime_error("invalid BufferContainer object"); + // DEBUG_INFO("device ptr %p", acc.device_ptr); + } + + // check if the buffer is already locked + size_t i = 0; + if (count > 0) { + // (count > 0) does not mean it is already locked by this handler. + // so let's check if it has been locked so far. + for (i = 0; i < acc_.size(); i++) + if (acc_[i].data.get() == acc.data.get()) { + if (acc_[i].mode == access::mode::read && m != access::mode::read) { + // not sure if this is a thread-safe way... + acc_[i].data->unlock_read(); + acc_[i].data->lock_write(); + acc_[i].mode = m; // this is used for unlocking + buf->map.at(hndl_).mode = m; // this is used for buffer copy back + } + break; + } + } + // lock the buffer because it's not locked by this handler + if (i == acc_.size()) { + // DEBUG_INFO("memory lock: %p", acc.data.get()); + acc_.push_back(detail::accessor_data(acc.data, m)); + if (m != access::mode::read) + acc.data->lock_write(); + else + acc.data->lock_read(); + } + + if (dev_.is_host()) + return acc.data->get_raw_ptr(); + return acc.device_ptr; + } + + template + neosycl::sycl::rt::acc_ map_(sycl::accessor acc) { + size_t sz[6] = {1, 1, 1, 0, 0, 0}; + std::memcpy(sz + 0, &acc.get_range()[0], sizeof(size_t) * D); + std::memcpy(sz + 3, &acc.get_offset()[0], sizeof(size_t) * D); + return neosycl::sycl::rt::acc_{ + get_ptr_(acc), {sz[0], sz[1], sz[2], sz[3], sz[4], sz[5]}}; + } + + void set_capture_(kernel k, void* p, size_t sz) { + hndl_->set_capture(k, p, sz); + } + +private: + device dev_; + program prog_; + counter_type cntr_; + handler_type hndl_; + vector_class acc_; + + template + auto index_type_ptr(retT (F::*)(argT)) { + return (argT*)nullptr; + } + + template + auto index_type_ptr_(retT (F::*)(argT) const) { + return (argT*)nullptr; + } + + template + auto get_index_type(const KernelType&) { + return index_type_ptr_(&KernelType::operator()); } }; +// called by accessor::accessor() +template +void accessor::alloc_(handler& h) { + h.alloc_mem_(*this); } - -#endif //CUSTOM_SYCL_INCLUDE_SYCL_HANDLER_HPP_ +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/id.hpp b/include/neoSYCL/sycl/id.hpp index c622412..68991b1 100644 --- a/include/neoSYCL/sycl/id.hpp +++ b/include/neoSYCL/sycl/id.hpp @@ -1,35 +1,29 @@ -#ifndef SYCL_INCLUDE_CL_SYCL_ID_HPP_ -#define SYCL_INCLUDE_CL_SYCL_ID_HPP_ - -#include "neoSYCL/sycl/types.hpp" -#include "neoSYCL/sycl/range.hpp" -#include "neoSYCL/sycl/item.hpp" -#include "neoSYCL/sycl/op_def.hpp" +#pragma once #include "neoSYCL/sycl/detail/container/array_nd.hpp" namespace neosycl::sycl { -template +template struct id { id() = default; - template> + template > id(size_t dim0) : data{dim0} {} - template> + template > id(size_t dim0, size_t dim1) : data{dim0, dim1} {} - template> + template > id(size_t dim0, size_t dim1, size_t dim2) : data{dim0, dim1, dim2} {} - id(const range &range) { - for (size_t i = 0; i < dimensions; i++) { + id(const range& range) { + for (int i = 0; i < dimensions; i++) { this->data[i] = range.get(i); } } - id(const item &item) { - for (size_t i = 0; i < dimensions; i++) { + id(const item& item) { + for (int i = 0; i < dimensions; i++) { this->data[i] = item[i]; } } @@ -38,7 +32,7 @@ struct id { return data[dimension]; } - size_t &operator[](int dimension) { + size_t& operator[](int dimension) { return data[dimension]; } @@ -46,7 +40,6 @@ struct id { return data[dimension]; } - // Where OP is: +, -, *, /, %, <<, >>, &, |, ˆ, &&, ||, <, >, <=, >=. DEFINE_OP_CONST(id, +); DEFINE_OP_CONST(id, -); @@ -130,6 +123,4 @@ struct id { detail::container::ArrayND data; }; -} - -#endif //SYCL_INCLUDE_CL_SYCL_ID_HPP_ +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/info/context.hpp b/include/neoSYCL/sycl/info/context.hpp index 51a5768..e7ba6cd 100644 --- a/include/neoSYCL/sycl/info/context.hpp +++ b/include/neoSYCL/sycl/info/context.hpp @@ -1,16 +1,15 @@ -#ifndef CUSTOM_SYCL_INCLUDE_SYCL_INFO_CONTEXT_HPP_ -#define CUSTOM_SYCL_INCLUDE_SYCL_INFO_CONTEXT_HPP_ +#pragma once namespace neosycl::sycl::info { using gl_context_interop = bool; -enum class context : int { - reference_count, - num_devices, - gl_interop -}; +// enum class context : int { reference_count, num_devices, gl_interop }; +enum class context : int { reference_count, platform, devices }; -} +DEF_INFO_TYPE_TRAIT(info::context, info::context::reference_count, long); +DEF_INFO_TYPE_TRAIT(info::context, info::context::platform, sycl::platform); +DEF_INFO_TYPE_TRAIT(info::context, info::context::devices, + vector_class); -#endif //CUSTOM_SYCL_INCLUDE_SYCL_INFO_CONTEXT_HPP_ +} // namespace neosycl::sycl::info diff --git a/include/neoSYCL/sycl/info/device.hpp b/include/neoSYCL/sycl/info/device.hpp index 6aa686a..8ba6b51 100644 --- a/include/neoSYCL/sycl/info/device.hpp +++ b/include/neoSYCL/sycl/info/device.hpp @@ -1,5 +1,4 @@ -#ifndef NEOSYCL_INCLUDE_CL_SYCL_INFO_DEVICE_HPP_ -#define NEOSYCL_INCLUDE_CL_SYCL_INFO_DEVICE_HPP_ +#pragma once namespace neosycl::sycl::info { @@ -87,6 +86,4 @@ enum class partition_property : int { partition_by_affinity_domain }; -} - -#endif //NEOSYCL_INCLUDE_CL_SYCL_INFO_DEVICE_HPP_ +} // namespace neosycl::sycl::info diff --git a/include/neoSYCL/sycl/info/device_type.hpp b/include/neoSYCL/sycl/info/device_type.hpp index 380dada..150ff05 100644 --- a/include/neoSYCL/sycl/info/device_type.hpp +++ b/include/neoSYCL/sycl/info/device_type.hpp @@ -1,5 +1,4 @@ -#ifndef CUSTOM_SYCL_INCLUDE_SYCL_INFO_DEVICE_TYPE_HPP_ -#define CUSTOM_SYCL_INCLUDE_SYCL_INFO_DEVICE_TYPE_HPP_ +#pragma once namespace neosycl::sycl::info { @@ -13,7 +12,4 @@ enum class device_type : unsigned int { all }; - } - -#endif //CUSTOM_SYCL_INCLUDE_SYCL_INFO_DEVICE_TYPE_HPP_ diff --git a/include/neoSYCL/sycl/info/kernel.hpp b/include/neoSYCL/sycl/info/kernel.hpp new file mode 100644 index 0000000..abd2ba7 --- /dev/null +++ b/include/neoSYCL/sycl/info/kernel.hpp @@ -0,0 +1,24 @@ +#pragma once + +namespace neosycl::sycl { + +namespace info { + +enum class kernel : int { + function_name, + num_args, + context, + program, + reference_count, + attributes +}; + +enum kernel_work_group : int { + global_work_size, + work_group_size, + compile_work_group_size, + preferred_work_group_size_multiple, + private_mem_size +}; +} // namespace info +} // namespace neosycl::sycl \ No newline at end of file diff --git a/include/neoSYCL/sycl/info/param_traits.hpp b/include/neoSYCL/sycl/info/param_traits.hpp index 1a8b756..8b30e99 100644 --- a/include/neoSYCL/sycl/info/param_traits.hpp +++ b/include/neoSYCL/sycl/info/param_traits.hpp @@ -1,26 +1,32 @@ -#ifndef CUSTOM_SYCL_INCLUDE_SYCL_INFO_PARAM_TRAITS_HPP_ -#define CUSTOM_SYCL_INCLUDE_SYCL_INFO_PARAM_TRAITS_HPP_ - -#include "neoSYCL/sycl/info/platform.hpp" -#include "neoSYCL/sycl/info/device.hpp" +#pragma once namespace neosycl::sycl::info { -template -class param_traits { - using return_type = T; +template +struct param_traits { + using return_type = int; + static const return_type value; }; -template -class param_traits { - using type = string_class; -}; +#define DEF_STR_INFO_TRAIT(Info, Param, Value) \ + template <> \ + struct param_traits { \ + using return_type = string_class; \ + static constexpr char value[] = Value; \ + }; \ + // const char param_traits::value[] = Value; -template -class param_traits { - using type = string_class; -}; +#define DEF_INFO_TRAIT(Info, Param, Type, Value) \ + template <> \ + struct param_traits { \ + using return_type = Type; \ + inline static constexpr Type value = Type(Value); \ + }; -} +#define DEF_INFO_TYPE_TRAIT(Info, Param, Type) \ + template <> \ + struct param_traits { \ + using return_type = Type; \ + }; -#endif //CUSTOM_SYCL_INCLUDE_SYCL_INFO_PARAM_TRAITS_HPP_ +} // namespace neosycl::sycl::info diff --git a/include/neoSYCL/sycl/info/platform.hpp b/include/neoSYCL/sycl/info/platform.hpp index a29d143..16c7acf 100644 --- a/include/neoSYCL/sycl/info/platform.hpp +++ b/include/neoSYCL/sycl/info/platform.hpp @@ -1,5 +1,4 @@ -#ifndef CUSTOM_SYCL_INCLUDE_SYCL_INFO_PLATFORM_HPP_ -#define CUSTOM_SYCL_INCLUDE_SYCL_INFO_PLATFORM_HPP_ +#pragma once namespace neosycl::sycl::info { @@ -21,10 +20,10 @@ enum class platform : unsigned int { */ version, /** Returns the name of the platform (as a string_class) - */ + */ name, /** Returns the string provided by the platform vendor (as a string_class) - */ + */ vendor, /** Returns a space-separated list of extension names supported by the platform (as a string_class) @@ -32,6 +31,11 @@ enum class platform : unsigned int { extensions }; -} +DEF_STR_INFO_TRAIT(info::platform, info::platform::profile, "FULL PROFILE"); +DEF_STR_INFO_TRAIT(info::platform, info::platform::version, "0.1.0"); +DEF_STR_INFO_TRAIT(info::platform, info::platform::name, "neoSYCL"); +DEF_STR_INFO_TRAIT(info::platform, info::platform::vendor, + "Tohoku University HPC Lab"); +DEF_STR_INFO_TRAIT(info::platform, info::platform::extensions, ""); -#endif //CUSTOM_SYCL_INCLUDE_SYCL_INFO_PLATFORM_HPP_ +} // namespace neosycl::sycl::info diff --git a/include/neoSYCL/sycl/info/program.hpp b/include/neoSYCL/sycl/info/program.hpp index 8c43946..9ed5488 100644 --- a/include/neoSYCL/sycl/info/program.hpp +++ b/include/neoSYCL/sycl/info/program.hpp @@ -5,14 +5,9 @@ namespace neosycl::sycl { namespace info { -enum class program : int { - reference_count, - context, - devices -}; +enum class program : int { reference_count, context, devices }; - -} } +} // namespace neosycl::sycl -#endif //NEOSYCL_INCLUDE_NEOSYCL_SYCL_INFO_PROGRAM_HPP_ +#endif // NEOSYCL_INCLUDE_NEOSYCL_SYCL_INFO_PROGRAM_HPP_ diff --git a/include/neoSYCL/sycl/info/queue.hpp b/include/neoSYCL/sycl/info/queue.hpp index 5848e2e..6523d04 100644 --- a/include/neoSYCL/sycl/info/queue.hpp +++ b/include/neoSYCL/sycl/info/queue.hpp @@ -5,14 +5,8 @@ namespace neosycl::sycl::info { using queue_profiling = bool; +enum class queue : int { context, device, reference_count, properties }; -enum class queue : int { - context, - device, - reference_count, - properties -}; +} // namespace neosycl::sycl::info -} - -#endif //CUSTOM_SYCL_INCLUDE_SYCL_INFO_QUEUE_HPP_ +#endif // CUSTOM_SYCL_INCLUDE_SYCL_INFO_QUEUE_HPP_ diff --git a/include/neoSYCL/sycl/item.hpp b/include/neoSYCL/sycl/item.hpp index 4f81f92..a5d0e08 100644 --- a/include/neoSYCL/sycl/item.hpp +++ b/include/neoSYCL/sycl/item.hpp @@ -1,67 +1,74 @@ -#ifndef SYCL_INCLUDE_CL_SYCL_ITEM_HPP_ -#define SYCL_INCLUDE_CL_SYCL_ITEM_HPP_ - +#pragma once #include "neoSYCL/sycl/detail/container/array_nd.hpp" -#define DEFINE_ITEM_BY_VALUE_OP(cls) \ -friend bool operator ==(const cls &lhs, const cls &rhs) { \ - return (lhs.data == rhs.data) && (lhs.max_range == rhs.max_range) && (lhs.offset == rhs.offset); \ -} \ -friend bool operator !=(const cls &lhs, const cls &rhs) { \ - return (lhs.data != rhs.data) || (lhs.max_range != rhs.max_range) || (lhs.offset != rhs.offset); \ -} +#define DEFINE_ITEM_BY_VALUE_OP(cls) \ + friend bool operator==(const cls& lhs, \ + const cls& rhs) { \ + return (lhs.data == rhs.data) && (lhs.max_range == rhs.max_range) && \ + (lhs.offset == rhs.offset); \ + } \ + friend bool operator!=(const cls& lhs, \ + const cls& rhs) { \ + return (lhs.data != rhs.data) || (lhs.max_range != rhs.max_range) || \ + (lhs.offset != rhs.offset); \ + } namespace neosycl::sycl { -template +template struct id; -template +template struct item { item() = delete; - template> - item(const range &r, - const detail::container::ArrayND &index, - const detail::container::ArrayND &offsets) - : max_range(r), data{index}, offset{offsets} { - } + template + item(const range& r, + const detail::container::ArrayND& index, + const detail::container::ArrayND& offsets) + : max_range(r), data{index}, offset{offsets} {} id get_id() const { - return id(this); + return id(*this); }; size_t get_id(int dimension) const { - return this->index[dimension]; + return this->data[dimension]; }; size_t operator[](int dimension) const { - return this->index[dimension]; + return this->data[dimension]; } - range get_range() const { + range get_range() const { return this->max_range; } // only available if with_offset is true - template> + template > id get_offset() const { return this->get_id(); } // only available if with_offset is false - template> + template > operator item() const { return item(this->max_range, this->data, this->data); } DEFINE_ITEM_BY_VALUE_OP(item); - range max_range; - detail::container::ArrayND offset; - detail::container::ArrayND data; + range max_range; + detail::container::ArrayND data; + detail::container::ArrayND offset; }; -} +// experimental impl just for testing +template +struct nd_item : public neosycl::sycl::item { + size_t get_global_linear_id() { + return 0; + } +}; -#endif //SYCL_INCLUDE_CL_SYCL_ITEM_HPP_ +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/kernel.hpp b/include/neoSYCL/sycl/kernel.hpp new file mode 100644 index 0000000..2da2a88 --- /dev/null +++ b/include/neoSYCL/sycl/kernel.hpp @@ -0,0 +1,56 @@ +#pragma once +#include "neoSYCL/sycl/access.hpp" +#include "neoSYCL/sycl/info/kernel.hpp" + +namespace neosycl::sycl { + +namespace detail { +class kernel_impl; +class kernel_data; +} // namespace detail + +class program; +class handler; + +/////////////////////////////////////////////////////////////////////////////// +class kernel { + friend class handler; + + explicit kernel(); + +public: + kernel(string_class name, program prog); + kernel(const kernel& k) : impl_(k.impl_) {} + ~kernel() = default; + + /* -- common interface members -- */ + cl_kernel get() const { + return 0; + } + + bool is_host() const; + + context get_context() const; + + program get_program() const; + + template + typename info::param_traits::return_type + get_info() const; + + template + typename info::param_traits::return_type + get_work_group_info(const device& dev) const; + + // INTERNAL USE ONLY: for debugging + const char* get_name() const; + shared_ptr_class get_kernel_data(device); + shared_ptr_class get_impl() { + return impl_; + } + +private: + shared_ptr_class impl_; +}; + +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/nd_range.hpp b/include/neoSYCL/sycl/nd_range.hpp index aaa621b..6886636 100644 --- a/include/neoSYCL/sycl/nd_range.hpp +++ b/include/neoSYCL/sycl/nd_range.hpp @@ -1,34 +1,30 @@ -#ifndef NEOSYCL_INCLUDE_NEOSYCL_SYCL_ND_RANGE_HPP_ -#define NEOSYCL_INCLUDE_NEOSYCL_SYCL_ND_RANGE_HPP_ - -#include "neoSYCL/sycl/id.hpp" +#pragma once namespace neosycl::sycl { -template +template struct nd_range { - nd_range(range globalSize, range localSize, id offset = id()) : - global_range(globalSize), local_range(localSize), offset(offset) {} + nd_range(range globalSize, range localSize, + id offset = id()) + : global_range(globalSize), local_range(localSize), offset(offset) {} - range get_global_range() const { + range get_global_range() const { return global_range; } - range get_local_range() const { + range get_local_range() const { return local_range; } - range get_group_range(); + range get_group_range(); - id get_offset() const { + id get_offset() const { return offset; } - range global_range; - range local_range; - id offset; + range global_range; + range local_range; + id offset; }; -} - -#endif //NEOSYCL_INCLUDE_NEOSYCL_SYCL_ND_RANGE_HPP_ +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/op_def.hpp b/include/neoSYCL/sycl/op_def.hpp index e1cdd45..1685365 100644 --- a/include/neoSYCL/sycl/op_def.hpp +++ b/include/neoSYCL/sycl/op_def.hpp @@ -1,44 +1,49 @@ -#ifndef NEOSYCL_INCLUDE_NEOSYCL_SYCL_OP_DEF_HPP -#define NEOSYCL_INCLUDE_NEOSYCL_SYCL_OP_DEF_HPP - -#define DEFINE_OP_CONST(cls, op) \ - friend cls operator op(const cls &lhs, const cls &rhs) { \ - cls ret; \ - ret.data = lhs.data op rhs.data; \ - return ret; \ +#pragma once + +#define DEFINE_OP_CONST(cls, op) \ + friend cls operator op(const cls& lhs, \ + const cls& rhs) { \ + cls ret; \ + ret.data = lhs.data op rhs.data; \ + return ret; \ }; -#define DEFINE_OP_CONST_SIZE_T(cls, op) \ - friend cls operator op(const cls &lhs, const size_t &rhs) { \ - cls ret; \ - ret.data = lhs.data op rhs; \ - return ret; \ +#define DEFINE_OP_CONST_SIZE_T(cls, op) \ + friend cls operator op(const cls& lhs, \ + const size_t& rhs) { \ + cls ret; \ + ret.data = lhs.data op rhs; \ + return ret; \ }; -#define DEFINE_OP(cls, op) \ - friend cls &operator op(cls &lhs, const cls &rhs) { \ - lhs.data = lhs.data op rhs.data; \ - return lhs; \ +#define DEFINE_OP(cls, op) \ + friend cls& operator op(cls& lhs, \ + const cls& rhs) { \ + lhs.data = lhs.data op rhs.data; \ + return lhs; \ }; -#define DEFINE_OP_SIZE_T(cls, op) \ - friend cls &operator op(cls &lhs, const size_t &rhs) { \ - lhs.data = lhs.data op rhs; \ - return lhs; \ +#define DEFINE_OP_SIZE_T(cls, op) \ + friend cls& operator op(cls& lhs, \ + const size_t& rhs) { \ + lhs.data = lhs.data op rhs; \ + return lhs; \ }; -#define DEFINE_OP_CONST_SIZE_T_LEFT(cls, op) \ - friend cls operator op(const size_t &lhs, cls &rhs) { \ - cls ret; \ - ret.data = rhs.data op lhs; \ - return ret; \ +#define DEFINE_OP_CONST_SIZE_T_LEFT(cls, op) \ + friend cls operator op(const size_t& lhs, \ + cls& rhs) { \ + cls ret; \ + ret.data = rhs.data op lhs; \ + return ret; \ }; -#define DEFINE_COMMON_BY_VALUE_OP(cls, op) \ -friend bool operator op(const cls &lhs, const cls &rhs) { \ - return lhs.data op rhs.data; \ -} - -#define DEFINE_COMMON_BY_VALUE_SEMANTICS(cls) DEFINE_COMMON_BY_VALUE_OP(cls,==); DEFINE_COMMON_BY_VALUE_OP(cls,!=) +#define DEFINE_COMMON_BY_VALUE_OP(cls, op) \ + friend bool operator op(const cls& lhs, \ + const cls& rhs) { \ + return lhs.data op rhs.data; \ + } -#endif //NEOSYCL_INCLUDE_NEOSYCL_SYCL_OP_DEF_HPP +#define DEFINE_COMMON_BY_VALUE_SEMANTICS(cls) \ + DEFINE_COMMON_BY_VALUE_OP(cls, ==); \ + DEFINE_COMMON_BY_VALUE_OP(cls, !=) diff --git a/include/neoSYCL/sycl/platform.hpp b/include/neoSYCL/sycl/platform.hpp index d73b69b..cdf9402 100644 --- a/include/neoSYCL/sycl/platform.hpp +++ b/include/neoSYCL/sycl/platform.hpp @@ -1,56 +1,78 @@ -#ifndef CUSTOM_SYCL_INCLUDE_SYCL_PLATFORM_HPP_ -#define CUSTOM_SYCL_INCLUDE_SYCL_PLATFORM_HPP_ - -#include "neoSYCL/sycl/device_selector.hpp" -#include "neoSYCL/sycl/info/device_type.hpp" +#pragma once #include "neoSYCL/sycl/info/param_traits.hpp" -#include "neoSYCL/sycl/detail/platform_info.hpp" -#include "neoSYCL/sycl/detail/device_type.hpp" -#include "neoSYCL/sycl/detail/registered_platforms.hpp" +#include "neoSYCL/sycl/info/device_type.hpp" +#include "neoSYCL/sycl/info/platform.hpp" +#include "neoSYCL/sycl/info/device.hpp" namespace neosycl::sycl { +namespace detail { +class platform_impl; +}; + class device; +/////////////////////////////////////////////////////////////////////////////// class platform { -public: - platform() : platform_info(new detail::default_platform_info) {} + friend class initial_platform_builder; - platform(const shared_ptr_class &info) : platform_info(info) {} +public: + platform(const platform& rhs) = default; + platform(platform&& rhs) = default; + ~platform() = default; -// explicit platform(cl_platform_id platformID); + platform& operator=(const platform& rhs) = default; + platform& operator=(platform&& rhs) = default; - explicit platform(const device_selector &deviceSelector) : - platform_info(deviceSelector.get_platform_info()) {} + friend bool operator==(const platform& lhs, const platform& rhs); + friend bool operator!=(const platform& lhs, const platform& rhs); -/* -- common interface members -- */ -// cl_platform_id get() const; + // get a static platform object by default (= REGISTERED[0]) + // explicit platform() { *this = get_default_platform(); } + explicit platform() : impl_(nullptr) {} - vector_class get_devices(info::device_type = info::device_type::all) const; + explicit platform(cl_platform_id platformID) { + throw unimplemented(); + } - template - typename info::param_traits::return_type get_info() const; + explicit platform(const device_selector& deviceSelector); - bool has_extension(const string_class &extension) const { - return platform_info->has_extension(extension); + /* -- common interface members -- */ + /* platform is not associated with OpenCL => 0 */ + cl_platform_id get() const { + return 0; } - bool is_host() const { - return platform_info->is_host(); - } + vector_class + get_devices(info::device_type = info::device_type::all) const; - static vector_class get_platforms() { - vector_class ret; - for (const shared_ptr_class &info: detail::REGISTERED_PLATFORMS) { - ret.push_back(platform(info)); - } - return ret; + template + typename info::param_traits::return_type + get_info() const { + return info::param_traits::value; } + bool has_extension(const string_class& extension) const; + + bool is_host() const; + + static vector_class get_platforms(); + static platform get_default_platform(); + static platform register_all_devices(); + private: - shared_ptr_class platform_info; + shared_ptr_class impl_; + + // INTERNAL USE ONLY: create the default platform + static vector_class REGISTERED; + explicit platform(detail::platform_impl* impl); }; +bool operator==(const platform& lhs, const platform& rhs) { + return lhs.impl_ == rhs.impl_; +} +bool operator!=(const platform& lhs, const platform& rhs) { + return !(lhs == rhs); } -#endif //CUSTOM_SYCL_INCLUDE_SYCL_PLATFORM_HPP_ +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/program.hpp b/include/neoSYCL/sycl/program.hpp index c7e9302..e09423e 100644 --- a/include/neoSYCL/sycl/program.hpp +++ b/include/neoSYCL/sycl/program.hpp @@ -1,48 +1,56 @@ -#ifndef NEOSYCL_INCLUDE_NEOSYCL_SYCL_PROGRAM_HPP_ -#define NEOSYCL_INCLUDE_NEOSYCL_SYCL_PROGRAM_HPP_ +#pragma once -#include "neoSYCL/sycl/property_list.hpp" #include "neoSYCL/sycl/info/program.hpp" namespace neosycl::sycl { -class kernel; +namespace detail { +class program_impl; +class program_data; +}; // namespace detail -enum class program_state { - none, - compiled, - linked -}; +class handler; + +enum class program_state { none, compiled, linked }; +/////////////////////////////////////////////////////////////////////////////// class program { - public: +public: + friend class handler; + using data = detail::program_data; + program() = delete; - explicit program(const context &context, - const property_list &propList = {}); + explicit program(const context& context, const property_list& propList = {}) { + init_(context, context.get_devices()); + } - program(const context &context, vector_class deviceList, - const property_list &propList = {}); + program(const context& context, vector_class deviceList, + const property_list& propList = {}) { + init_(context, deviceList); + } - program(vector_class &programList, - const property_list &propList = {}); + program(vector_class& programList, + const property_list& propList = {}); - program(vector_class &programList, - string_class linkOptions, - const property_list &propList = {}); + program(vector_class& programList, string_class linkOptions, + const property_list& propList = {}); -// program(const context &context, cl_program clProgram); + // program(const context &context, cl_program clProgram); -// cl_program get() const; + /* -- common interface members -- */ + cl_program get() const { + throw unimplemented(); + } bool is_host() const; - template + template void compile_with_kernel_type(string_class compileOptions = ""); void compile_with_source(string_class kernelSource, string_class compileOptions = ""); - template + template void build_with_kernel_type(string_class buildOptions = ""); void build_with_source(string_class kernelSource, @@ -50,18 +58,19 @@ class program { void link(string_class linkOptions = ""); -// template -// bool has_kernel() const; + template + bool has_kernel() const; bool has_kernel(string_class kernelName) const; -// template -// kernel get_kernel() const; + template + kernel get_kernel() const; kernel get_kernel(string_class kernelName) const; - template - typename info::param_traits::return_type get_info() const; + template + typename info::param_traits::return_type + get_info() const; vector_class> get_binaries() const; @@ -76,8 +85,14 @@ class program { string_class get_build_options() const; program_state get_state() const; -}; -} + // INTERNAL USE ONLY + shared_ptr_class get_data(device dev) const; + +private: + shared_ptr_class impl_; + + void init_(context c, vector_class deviceList); +}; -#endif //NEOSYCL_INCLUDE_NEOSYCL_SYCL_PROGRAM_HPP_ +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/property_list.hpp b/include/neoSYCL/sycl/property_list.hpp index f8a4372..d7a1bc6 100644 --- a/include/neoSYCL/sycl/property_list.hpp +++ b/include/neoSYCL/sycl/property_list.hpp @@ -1,24 +1,21 @@ #ifndef SYCL_INCLUDE_CL_SYCL_PROPERTY_LIST_HPP_ #define SYCL_INCLUDE_CL_SYCL_PROPERTY_LIST_HPP_ -#include "neoSYCL/sycl/exception.hpp" - namespace neosycl::sycl { class property_list { - template + template bool has_property() const { throw unimplemented(); } - template + template propertyT get_property() const { throw unimplemented(); } - }; -} +} // namespace neosycl::sycl -#endif //SYCL_INCLUDE_CL_SYCL_PROPERTY_LIST_HPP_ +#endif // SYCL_INCLUDE_CL_SYCL_PROPERTY_LIST_HPP_ diff --git a/include/neoSYCL/sycl/queue.hpp b/include/neoSYCL/sycl/queue.hpp index f4657c4..ebb4e08 100644 --- a/include/neoSYCL/sycl/queue.hpp +++ b/include/neoSYCL/sycl/queue.hpp @@ -1,61 +1,67 @@ -#ifndef CUSTOM_SYCL_INCLUDE_SYCL_QUEUE_HPP_ -#define CUSTOM_SYCL_INCLUDE_SYCL_QUEUE_HPP_ - -#include - -#include "neoSYCL/sycl/exception.hpp" -#include "neoSYCL/sycl/types.hpp" -#include "neoSYCL/sycl/device_selector.hpp" +#pragma once #include "neoSYCL/sycl/info/queue.hpp" -#include "neoSYCL/sycl/property_list.hpp" -#include "neoSYCL/sycl/handler.hpp" -#include "neoSYCL/sycl/platform.hpp" -#include "neoSYCL/sycl/context.hpp" #include "neoSYCL/sycl/detail/task_counter.hpp" namespace neosycl::sycl { +/////////////////////////////////////////////////////////////////////////////// class queue { public: - explicit queue(const property_list &propList = {}) : - bind_device(), counter(new detail::task_counter()) {} - - explicit queue(const async_handler &asyncHandler, const property_list &propList = {}) : - bind_device(), counter(new detail::task_counter()), err_handler(asyncHandler) {} - - explicit queue(const device_selector &deviceSelector, const property_list &propList = {}) - : bind_device(deviceSelector.select_device()), counter(new detail::task_counter()) {} - - explicit queue(const device_selector &deviceSelector, - const async_handler &asyncHandler, const property_list &propList = {}) - : bind_device(deviceSelector.select_device()), counter(new detail::task_counter()), - err_handler(asyncHandler) {} - - explicit queue(const device &syclDevice, const property_list &propList = {}) : - bind_device(syclDevice), counter(new detail::task_counter()) {} - - explicit queue(const device &syclDevice, const async_handler &asyncHandler, - const property_list &propList = {}) : - bind_device(syclDevice), counter(new detail::task_counter()), err_handler(asyncHandler) {} - - explicit queue(const context &syclContext, const device_selector &deviceSelector, - const property_list &propList = {}); - - explicit queue(const context &syclContext, const device_selector &deviceSelector, - const async_handler &asyncHandler, const property_list &propList = {}); - - explicit queue(const context &syclContext, const device &syclDevice, - const property_list &propList = {}); - - explicit queue(const context &syclContext, const device &syclDevice, - const async_handler &asyncHandler, const property_list &propList = {}); - -// explicit queue(cl_command_queue clQueue, const context &syclContext, -// const async_handler &asyncHandler = {}); - -// cl_command_queue get() const; - - context get_context() const; + explicit queue(const property_list& propList = {}) + : bind_device(device::get_default_device()), + counter(new detail::task_counter()), ctx(bind_device), prog(ctx) {} + + explicit queue(const async_handler& asyncHandler, + const property_list& propList = {}) + : bind_device(device::get_default_device()), + counter(new detail::task_counter()), err_handler(asyncHandler), + ctx(bind_device), prog(ctx) {} + + explicit queue(const device_selector& deviceSelector, + const property_list& propList = {}) + : bind_device(deviceSelector.select_device()), + counter(new detail::task_counter()), ctx(bind_device), prog(ctx) {} + + explicit queue(const device_selector& deviceSelector, + const async_handler& asyncHandler, + const property_list& propList = {}) + : bind_device(deviceSelector.select_device()), + counter(new detail::task_counter()), err_handler(asyncHandler), + ctx(bind_device), prog(ctx) {} + + explicit queue(const device& syclDevice, const property_list& propList = {}) + : bind_device(syclDevice), counter(new detail::task_counter()), + ctx(bind_device), prog(ctx) {} + + explicit queue(const device& syclDevice, const async_handler& asyncHandler, + const property_list& propList = {}) + : bind_device(syclDevice), counter(new detail::task_counter()), + err_handler(asyncHandler), ctx(bind_device), prog(ctx) {} + + explicit queue(const context& syclContext, + const device_selector& deviceSelector, + const property_list& propList = {}); + + explicit queue(const context& syclContext, + const device_selector& deviceSelector, + const async_handler& asyncHandler, + const property_list& propList = {}); + + explicit queue(const context& syclContext, const device& syclDevice, + const property_list& propList = {}); + + explicit queue(const context& syclContext, const device& syclDevice, + const async_handler& asyncHandler, + const property_list& propList = {}); + + // explicit queue(cl_command_queue clQueue, const context &syclContext, + // const async_handler &asyncHandler = {}); + + // cl_command_queue get() const; + + context get_context() const { + return ctx; + } device get_device() const { return bind_device; @@ -65,22 +71,56 @@ class queue { return bind_device.is_host(); } - template + template typename info::param_traits::return_type get_info() const; - template +#ifndef DISABLE_MULTI_THREAD_SUPPORT + template + event submit(T cgf) { + counter->incr(); + std::thread t([f = cgf, d = bind_device, p = prog, c = counter]() { + try { + handler command_group_handler(d, p, c); + f(command_group_handler); + } + catch (std::exception& e) { + PRINT_ERR("%s", e.what()); + throw; + } + catch (...) { + PRINT_ERR("unknown exception"); + throw; + } + c->decr(); + }); + t.detach(); + return event(); + } +#else + /* this may run each command group faster but all command groups will be + * executed sequencially (i.e. no task parallelism) */ + template event submit(T cgf) { try { - handler command_group_handler(bind_device, counter); + handler command_group_handler(bind_device, prog, counter); cgf(command_group_handler); - } catch (...) { + } + catch (std::exception& e) { + PRINT_ERR("%s", e.what()); + throw; + } + catch (...) { + PRINT_ERR("unknown exception"); throw; } return event(); } +#endif - template - event submit(T cgf, const queue &secondaryQueue); + template + event submit(T cgf, const queue& secondaryQueue) { + throw unimplemented(); + } void wait() { counter->wait(); @@ -96,10 +136,10 @@ class queue { private: device bind_device; - async_handler err_handler; shared_ptr_class counter; + async_handler err_handler; + context ctx; + program prog; }; -} - -#endif //CUSTOM_SYCL_INCLUDE_SYCL_QUEUE_HPP_ +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/range.hpp b/include/neoSYCL/sycl/range.hpp index 1891c0e..9b6db5a 100644 --- a/include/neoSYCL/sycl/range.hpp +++ b/include/neoSYCL/sycl/range.hpp @@ -1,27 +1,24 @@ -#ifndef CUSTOM_SYCL_INCLUDE_SYCL_RANGE_H_ -#define CUSTOM_SYCL_INCLUDE_SYCL_RANGE_H_ - +#pragma once #include "neoSYCL/sycl/detail/container/array_nd.hpp" -#include "neoSYCL/sycl/op_def.hpp" namespace neosycl::sycl { -template +template struct range { - template> - range(size_t dim0) :data(dim0) {} + template > + range(size_t dim0) : data(dim0) {} - template> + template > range(size_t dim0, size_t dim1) : data(dim0, dim1) {} - template> + template > range(size_t dim0, size_t dim1, size_t dim2) : data(dim0, dim1, dim2) {} size_t get(int dimension) const { return data[dimension]; } - size_t &operator[](int dimension) { + size_t& operator[](int dimension) { return data[dimension]; } @@ -116,6 +113,4 @@ struct range { detail::container::ArrayND data; }; -} - -#endif //CUSTOM_SYCL_INCLUDE_SYCL_RANGE_H_ +} // namespace neosycl::sycl diff --git a/include/neoSYCL/sycl/types.hpp b/include/neoSYCL/sycl/types.hpp index 25ac2a3..293d912 100644 --- a/include/neoSYCL/sycl/types.hpp +++ b/include/neoSYCL/sycl/types.hpp @@ -1,6 +1,4 @@ -#ifndef CUSTOM_SYCL_INCLUDE_SYCL_TYPES_HPP_ -#define CUSTOM_SYCL_INCLUDE_SYCL_TYPES_HPP_ - +#pragma once #include #include #include @@ -11,33 +9,38 @@ namespace neosycl::sycl { -template > +template > using vector_class = std::vector; using string_class = std::string; -template +template using function_class = std::function; using mutex_class = std::mutex; -template +template using shared_ptr_class = std::shared_ptr; -template +template using unique_ptr_class = std::unique_ptr; -template +template using weak_ptr_class = std::weak_ptr; -template +template using hash_class = std::hash; using exception_ptr_class = std::exception_ptr; -template +template using buffer_allocator = std::allocator; -} - -#endif //CUSTOM_SYCL_INCLUDE_SYCL_TYPES_HPP_ +/* OpenCL interop (minimum type definition only) */ +using cl_platform_id = u_int64_t; +using cl_device_id = u_int64_t; +using cl_context = u_int64_t; +using cl_program = u_int64_t; +using cl_kernel = u_int64_t; +using cl_uint = uint; +} // namespace neosycl::sycl diff --git a/kernel_generator/CMakeLists.txt b/kernel_generator/CMakeLists.txt deleted file mode 100644 index f6024d7..0000000 --- a/kernel_generator/CMakeLists.txt +++ /dev/null @@ -1,38 +0,0 @@ - -# find Clang -find_package(Clang REQUIRED CONFIG) -include_directories(${CLANG_INCLUDE_DIRS}) -add_definitions(${CLANG_DEFINITIONS}) -message(STATUS "Clang_FOUND ${Clang_FOUND}") -message(STATUS "Clang_DIR ${Clang_DIR}") -message(STATUS "Using ClangConfig.cmake in: ${Clang_DIR}") - -# find LLVM -find_package(LLVM REQUIRED CONFIG) - -message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") -message(STATUS "LLVM_FOUND ${LLVM_FOUND}") -message(STATUS "LLVM_DIR ${LLVM_DIR}") -message(STATUS "LLVM_INCLUDE_DIRS: ${LLVM_INCLUDE_DIRS}") -message(STATUS "LLVM_DEFINITIONS: ${LLVM_DEFINITIONS}") -message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") - -add_definitions(${LLVM_DEFINITIONS}) -include_directories(${LLVM_INCLUDE_DIRS}) -list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}") - -add_subdirectory(third_party) - -# include all headers here -include_directories( - include - third_party/fmt/include -) - - -if (CMAKE_BUILD_TYPE STREQUAL Debug) - ADD_DEFINITIONS(-DDEBUG) -endif () - -add_subdirectory(src) - diff --git a/kernel_generator/include/exceptions.h b/kernel_generator/include/exceptions.h deleted file mode 100644 index 282c645..0000000 --- a/kernel_generator/include/exceptions.h +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef CUSTOM_SYCL_INCLUDE_EXCEPTIONS_H_ -#define CUSTOM_SYCL_INCLUDE_EXCEPTIONS_H_ - -namespace sycl { - -class BaseException : public std::exception { - private: - std::string message; - - public: - BaseException(const std::string &message) : message(message) {} - - const char *what() const noexcept override { - return this->message.c_str(); - } -}; - -class ClangCastException : public BaseException { - public: - ClangCastException(const std::string &message) : BaseException(message) {} -}; - -class KernelValidateException : public BaseException { - public: - KernelValidateException(const std::string &message) : BaseException(message) {} -}; - -} - -#endif //CUSTOM_SYCL_INCLUDE_EXCEPTIONS_H_ diff --git a/kernel_generator/include/helpers.h b/kernel_generator/include/helpers.h deleted file mode 100644 index 364c663..0000000 --- a/kernel_generator/include/helpers.h +++ /dev/null @@ -1,59 +0,0 @@ -#ifndef CUSTOM_SYCL_INCLUDE_HELPERS_H_ -#define CUSTOM_SYCL_INCLUDE_HELPERS_H_ - -#include - -#include "clang/AST/AST.h" -#include "clang/AST/ASTConsumer.h" -#include "clang/AST/ASTContext.h" -#include "clang/AST/ASTImporter.h" -#include "clang/AST/RecursiveASTVisitor.h" -#include "clang/Frontend/FrontendAction.h" -#include "clang/Frontend/ASTConsumers.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Lex/Preprocessor.h" -#include "clang/Rewrite/Core/Rewriter.h" -#include "clang/Tooling/CommonOptionsParser.h" -#include "clang/Tooling/Refactoring/Extract/Extract.h" -#include "clang/Tooling/Tooling.h" - -#include "exceptions.h" - -#include -#include - -#ifdef DEBUG -#define PRINT_DEBUG(...) std::cout<< "[DEBUG] "<< fmt::format(__VA_ARGS__) < -std::string decl2str(CompilerInstance &ci, T *d) { - SourceManager &sm = ci.getSourceManager(); - SourceLocation start(d->getBeginLoc()), end(d->getEndLoc()); - SourceLocation e(Lexer::getLocForEndOfToken(end, 0, sm, ci.getLangOpts())); - return std::string(sm.getCharacterData(start), sm.getCharacterData(e) - sm.getCharacterData(start)); -} - -template -T *clang_cast(N any) { - if (isa(any)) { - return cast(any); - } else { - any->dump(); - throw ClangCastException("Clang node cast failed"); - } -} - -} - -#endif //CUSTOM_SYCL_INCLUDE_HELPERS_H_ diff --git a/kernel_generator/include/kernel.h b/kernel_generator/include/kernel.h deleted file mode 100644 index fe0d329..0000000 --- a/kernel_generator/include/kernel.h +++ /dev/null @@ -1,78 +0,0 @@ -// -// Created by WhiteBlue on 2020/5/20. -// - -#ifndef CUSTOM_SYCL_RUNTIME_KERNEL_HPP_ -#define CUSTOM_SYCL_RUNTIME_KERNEL_HPP_ - -#include -#include -#include -#include - -#include "clang/AST/AST.h" -#include "clang/AST/ASTConsumer.h" -#include "clang/AST/ASTContext.h" -#include "clang/AST/ASTImporter.h" -#include "clang/AST/RecursiveASTVisitor.h" -#include "clang/Frontend/FrontendAction.h" -#include "clang/Frontend/ASTConsumers.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Lex/Preprocessor.h" -#include "clang/Rewrite/Core/Rewriter.h" -#include "clang/Tooling/CommonOptionsParser.h" -#include "clang/Tooling/Refactoring/Extract/Extract.h" -#include "clang/Tooling/Tooling.h" - -#include "helpers.h" - -#include "fmt/format.h" - -using namespace clang; - -namespace sycl { - -struct KernelArgument { - std::string name; - std::string type; - int dimensions; -}; - -struct KernelInfo { - std::vector params; - std::string kernel_name; - std::string kernel_body; - std::string index_name; - bool parallel; - - KernelInfo(std::vector params, - std::string kernel_name, - std::string kernel_body) - : params(std::move(params)), - kernel_name(std::move(kernel_name)), - kernel_body(std::move(kernel_body)), - parallel(false) {} - - KernelInfo(std::vector params, - std::string kernel_name, - std::string kernel_body, - std::string index_name) - : params(std::move(params)), - kernel_name(std::move(kernel_name)), - kernel_body(std::move(kernel_body)), - index_name(std::move(index_name)), - parallel(true) {} -}; - -struct ProgramContext { - std::map kernels; - std::map structs; -}; - -std::vector analyze_arguments_dependency(CompilerInstance &ci, - const CXXRecordDecl *lambda_func_decl, - ProgramContext &context); - -} - -#endif //CUSTOM_SYCL_RUNTIME_KERNEL_HPP_ diff --git a/kernel_generator/include/kernel_translator.h b/kernel_generator/include/kernel_translator.h deleted file mode 100644 index 2b480a4..0000000 --- a/kernel_generator/include/kernel_translator.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef CUSTOM_SYCL_INCLUDE_KERNEL_TRANSLATOR_H_ -#define CUSTOM_SYCL_INCLUDE_KERNEL_TRANSLATOR_H_ - -#include "kernel.h" -#include "fmt/format.h" - -namespace sycl { - -class KernelTranslator { - - public: - virtual std::string body_to_decl_str(const ProgramContext &context, const KernelInfo &info) = 0; - - virtual std::string before_kernel(const ProgramContext &context) = 0; - - virtual std::string after_kernel(const ProgramContext &context) = 0; - -}; - -}; - -#endif //CUSTOM_SYCL_INCLUDE_KERNEL_TRANSLATOR_H_ diff --git a/kernel_generator/include/parallel_task.h b/kernel_generator/include/parallel_task.h deleted file mode 100644 index 1840de4..0000000 --- a/kernel_generator/include/parallel_task.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef CUSTOM_SYCL_INCLUDE_PARALLEL_TASK_H_ -#define CUSTOM_SYCL_INCLUDE_PARALLEL_TASK_H_ - -#include "clang/AST/AST.h" -#include "clang/AST/Mangle.h" -#include "clang/AST/ASTConsumer.h" -#include "clang/AST/ASTContext.h" -#include "clang/AST/ASTImporter.h" -#include "clang/AST/RecursiveASTVisitor.h" -#include "clang/ASTMatchers/ASTMatchFinder.h" -#include "clang/ASTMatchers/ASTMatchers.h" -#include "clang/Frontend/FrontendAction.h" -#include "clang/Frontend/ASTConsumers.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Frontend/FrontendActions.h" -#include "clang/Lex/Preprocessor.h" - -#include "exceptions.h" -#include "kernel.h" - -namespace sycl { - -KernelInfo parse_parallel_task_func(CompilerInstance &ci, const FunctionDecl *callee, ProgramContext &context); - -} - -#endif //CUSTOM_SYCL_INCLUDE_PARALLEL_TASK_H_ diff --git a/kernel_generator/include/single_task.h b/kernel_generator/include/single_task.h deleted file mode 100644 index 74f1b44..0000000 --- a/kernel_generator/include/single_task.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef CUSTOM_SYCL_INCLUDE_SINGLE_TASK_H_ -#define CUSTOM_SYCL_INCLUDE_SINGLE_TASK_H_ - -#include "clang/AST/AST.h" -#include "clang/AST/Mangle.h" -#include "clang/AST/ASTConsumer.h" -#include "clang/AST/ASTContext.h" -#include "clang/AST/ASTImporter.h" -#include "clang/AST/RecursiveASTVisitor.h" -#include "clang/ASTMatchers/ASTMatchFinder.h" -#include "clang/ASTMatchers/ASTMatchers.h" -#include "clang/Frontend/FrontendAction.h" -#include "clang/Frontend/ASTConsumers.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Frontend/FrontendActions.h" -#include "clang/Lex/Preprocessor.h" -#include "clang/Rewrite/Core/Rewriter.h" -#include "clang/Rewrite/Frontend/FrontendActions.h" -#include "clang/Tooling/CommonOptionsParser.h" -#include "clang/Tooling/Refactoring/Extract/Extract.h" -#include "clang/Tooling/Tooling.h" -#include "clang/Frontend/FrontendPluginRegistry.h" - -#include "exceptions.h" -#include "helpers.h" -#include "kernel.h" - -using namespace clang; - -namespace sycl { - -KernelInfo parse_single_task_func(CompilerInstance &ci, const FunctionDecl *callee, ProgramContext &context); - -} - -#endif //CUSTOM_SYCL_INCLUDE_SINGLE_TASK_H_ diff --git a/kernel_generator/include/ve_kernel_translator.h b/kernel_generator/include/ve_kernel_translator.h deleted file mode 100644 index 1ee2f22..0000000 --- a/kernel_generator/include/ve_kernel_translator.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef CUSTOM_SYCL_INCLUDE_VE_KERNEL_TRANSLATOR_H_ -#define CUSTOM_SYCL_INCLUDE_VE_KERNEL_TRANSLATOR_H_ - -#include "kernel_translator.h" - -namespace sycl { - -class VEKernelTranslator : public KernelTranslator { - public: - std::string body_to_decl_str(const ProgramContext &context, const KernelInfo &info) override; - - std::string before_kernel(const ProgramContext &context) override; - - std::string after_kernel(const ProgramContext &context) override; - -}; - -} - -#endif //CUSTOM_SYCL_INCLUDE_VE_KERNEL_TRANSLATOR_H_ diff --git a/kernel_generator/src/CMakeLists.txt b/kernel_generator/src/CMakeLists.txt deleted file mode 100644 index 05e85f0..0000000 --- a/kernel_generator/src/CMakeLists.txt +++ /dev/null @@ -1,41 +0,0 @@ - -# add executable file -add_executable( - clang_tool - - single_task.cpp - parallel_task.cpp - kernel.cpp - clang_tool.cpp - ve_kernel_translator.cpp -) - -# link clang libraries -target_link_libraries(clang_tool - - clangFrontend - clangSerialization - clangDriver - clangParse - clangSema - clangAnalysis - clangAST - clangBasic - clangEdit - clangLex - clangTooling - clangASTMatchers - clangAnalysis - clangEdit - clangAST - clangLex - clangRewrite - clangToolingCore - - ${llvm_libs} - - fmt - ) - - - diff --git a/kernel_generator/src/clang_tool.cpp b/kernel_generator/src/clang_tool.cpp deleted file mode 100644 index 08a1d3f..0000000 --- a/kernel_generator/src/clang_tool.cpp +++ /dev/null @@ -1,170 +0,0 @@ -#include -#include -#include - -#include "clang/AST/AST.h" -#include "clang/AST/Mangle.h" -#include "clang/AST/ASTConsumer.h" -#include "clang/AST/ASTContext.h" -#include "clang/AST/RecursiveASTVisitor.h" -#include "clang/Frontend/FrontendAction.h" -#include "clang/Frontend/ASTConsumers.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Lex/Preprocessor.h" -#include "clang/Rewrite/Core/Rewriter.h" -#include "clang/Tooling/CommonOptionsParser.h" -#include "clang/Tooling/Refactoring/Extract/Extract.h" -#include "clang/Tooling/Tooling.h" -#include "fmt/format.h" - -#include "kernel.h" -#include "parallel_task.h" -#include "single_task.h" -#include "ve_kernel_translator.h" - -static llvm::cl::OptionCategory MyToolCategory("Additional options"); -static llvm::cl::opt OutputNameOption("o", - llvm::cl::desc("output filename"), - llvm::cl::value_desc("filename"), - llvm::cl::cat(MyToolCategory)); - -static llvm::cl::extrahelp CommonHelp(clang::tooling::CommonOptionsParser::HelpMessage); -static llvm::cl::extrahelp MoreHelp("SYCL Kernel generate tool"); - -const static std::string KERNEL_HIGHLIGHT_SINGLE_TASK_FUNC_NAME = "HIGHLIGHT_KERNEL_SINGLE_TASK"; -const static std::string KERNEL_HIGHLIGHT_PARALLEL_FUNC_NAME = "HIGHLIGHT_KERNEL_PARALLEL"; -const static std::string DEFAULT_OUTPUT_NAME = "kernel"; - -using namespace sycl; - -class SYCLVisitor : public clang::RecursiveASTVisitor { -private: - clang::Rewriter &rewriter; - clang::SourceManager &manager; - clang::CompilerInstance &instance; - ProgramContext context; - -public: - SYCLVisitor(clang::CompilerInstance &ci, clang::SourceManager &sm, clang::Rewriter &re) - : instance(ci), rewriter(re), manager(sm) { - } - - virtual bool shouldVisitTemplateInstantiations() { return true; } - - bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) { - // For debugging, dumping the AST nodes will show which nodes are already - // being visited. - Declaration->getDeclName().getUsingDirectiveName(); - - // The return value indicates whether we want the visitation to proceed. - // Return false to stop the traversal of the AST. - return true; - } - - bool VisitStmt(clang::Stmt *s) { - try { - - if (clang::isa(s)) { - clang::CallExpr *call_expr = clang::cast(s); - clang::FunctionDecl *callee = call_expr->getDirectCallee(); - if (callee && callee->getIdentifier()) { - // Get the func which name start with SYCL_PREFIX - if (callee->getName().compare(KERNEL_HIGHLIGHT_SINGLE_TASK_FUNC_NAME) == 0) { - KernelInfo info = parse_single_task_func(this->instance, callee, context); - if (context.kernels.count(info.kernel_name) == 0) { - context.kernels.insert(std::pair(info.kernel_name, info)); - } - } else if (callee->getName().compare(KERNEL_HIGHLIGHT_PARALLEL_FUNC_NAME) == 0) { - KernelInfo info = parse_parallel_task_func(this->instance, callee, context); - if (context.kernels.count(info.kernel_name) == 0) { - context.kernels.insert(std::pair(info.kernel_name, info)); - } - } - } - } - } catch (BaseException e) { - llvm::errs() << e.what() << "\n"; - return true; - } - return true; - } - - ProgramContext get_context() { - return context; - } -}; - -class SYCLASTConsumer : public clang::ASTConsumer { -private: - std::unique_ptr visitor; - clang::Rewriter rewriter; - clang::SourceManager &manager; - clang::CompilerInstance &instance; - VEKernelTranslator translator; - -public: - explicit SYCLASTConsumer(clang::CompilerInstance &ci) - : instance(ci), - manager(ci.getSourceManager()), - visitor(std::make_unique(ci, ci.getSourceManager(), this->rewriter)) { - this->rewriter.setSourceMgr(ci.getSourceManager(), ci.getLangOpts()); - } - // Retrieve the AST analysis result - virtual void HandleTranslationUnit(clang::ASTContext &ctx) { - visitor->TraverseAST(ctx); - - std::string file_name = DEFAULT_OUTPUT_NAME; - if (OutputNameOption.size() == 1) { - file_name = OutputNameOption.c_str(); - } - - ProgramContext program_context = visitor->get_context(); - - int kernel_count = program_context.kernels.size(); - PRINT_INFO("Found {} kernels", kernel_count); - if (kernel_count == 0) { - return; - } - - // create files - std::ofstream kernel_out; - kernel_out.open(file_name + ".c", std::ios::out); - - // Write include headers here - std::string kernel_code; - kernel_code.append(translator.before_kernel(program_context)).append(LINE_BREAK); - - // Output all kernels - auto kernels = program_context.kernels; - for (auto &kernel : kernels) { - std::string kernel_str = translator.body_to_decl_str(program_context, kernel.second); - kernel_code.append(kernel_str).append(LINE_BREAK); - } - kernel_code.append(translator.after_kernel(program_context)).append(LINE_BREAK); - - // write kernel code - kernel_out << kernel_code << std::endl; - - kernel_out.close(); - } -}; - -class SYCLFrontendAction : public clang::PluginASTAction { -public: - virtual std::unique_ptr - CreateASTConsumer(clang::CompilerInstance &ci, llvm::StringRef file) { - return std::make_unique(ci); - } - - bool ParseArgs(const clang::CompilerInstance &ci, - const std::vector &args) { - return true; - } -}; - -int main(int argc, const char **argv) { - llvm::Expected - op = clang::tooling::CommonOptionsParser::create(argc, argv, MyToolCategory, llvm::cl::OneOrMore); - clang::tooling::ClangTool tool(op->getCompilations(), op->getSourcePathList()); - return tool.run(clang::tooling::newFrontendActionFactory().get()); -} \ No newline at end of file diff --git a/kernel_generator/src/kernel.cpp b/kernel_generator/src/kernel.cpp deleted file mode 100644 index 9bfe5f7..0000000 --- a/kernel_generator/src/kernel.cpp +++ /dev/null @@ -1,68 +0,0 @@ -#include "kernel.h" - -namespace sycl { - -std::vector analyze_arguments_dependency( - CompilerInstance &ci, const CXXRecordDecl *lambda_func_decl, ProgramContext &context) { - std::vector args; - - // we decide the argument order by parent context - for (Decl *d:lambda_func_decl->getParent()->decls()) { - if (isa(d)) { - VarDecl *var = cast(d); - CXXRecordDecl *raw_decl = var->getType()->getAsCXXRecordDecl(); - if (!raw_decl) { - continue; - } - std::string name = var->getIdentifier()->getName().str(); - - ClassTemplateSpecializationDecl *template_decl = clang_cast(raw_decl); - auto template_args = template_decl->getTemplateArgs().asArray(); - - if (template_args.size() != 4) { - throw KernelValidateException("Accessor should have 4 template args"); - } - - TemplateArgument accessor_type_tmp = template_args[0]; - TemplateArgument dimensions_tmp = template_args[1]; - TemplateArgument mode_tmp = template_args[2]; - TemplateArgument target_tmp = template_args[3]; - - QualType accessor_type = accessor_type_tmp.getAsType(); - int field_dimensions = dimensions_tmp.getAsIntegral().getExtValue(); - std::string field_type = accessor_type.getAsString(); - - if (!accessor_type->isBuiltinType()) { - // not builtin type - CXXRecordDecl *type_decl = accessor_type->getAsCXXRecordDecl(); - if (type_decl) { - if (type_decl->getIdentifier()) { - std::string def_name = type_decl->getIdentifier()->getName().str(); - std::string def_body = decl2str(ci, type_decl); - - if (context.structs.count(def_name) == 0) { - context.structs.insert(std::pair(def_name, def_body)); - } - -#ifdef DEBUG - std::cout << "Definition name: " << def_name << std::endl; - std::cout << "========= Definition body start =========" << std::endl; - std::cout << def_body << std::endl; - std::cout << "========= Definition body end =========" << std::endl; -#endif - } - } - } - - args.push_back(KernelArgument{name, field_type, field_dimensions}); - - PRINT_INFO("Found a lambda field decl, Type: {}, Name: {}, Dimensions: {}", field_type, name, field_dimensions); - } - } - - return args; -}; - -} - - diff --git a/kernel_generator/src/parallel_task.cpp b/kernel_generator/src/parallel_task.cpp deleted file mode 100644 index 050e72b..0000000 --- a/kernel_generator/src/parallel_task.cpp +++ /dev/null @@ -1,63 +0,0 @@ -#include "parallel_task.h" - -namespace sycl { - -KernelInfo parse_parallel_task_func(CompilerInstance &ci, const FunctionDecl *callee, ProgramContext &context) { - if (callee->getNumParams() != 2) { - throw KernelValidateException("Parallel kernel must have 2 param"); - } - const TemplateArgumentList *template_args = callee->getTemplateSpecializationArgs(); - if (template_args == nullptr || template_args->size() != 3) { - throw KernelValidateException("Parallel kernel must have 3 template args"); - } - - const TemplateArgument &classname_arg = template_args->get(0); - const TemplateArgument &lambda_func_arg = template_args->get(1); - - if (classname_arg.getKind() != TemplateArgument::ArgKind::Type) { - throw KernelValidateException("Template 'ArgKind' must be 'Type'"); - } - - if (lambda_func_arg.getKind() != TemplateArgument::ArgKind::Type) { - throw KernelValidateException("Template 'ArgKind' must be 'Type'"); - } - - QualType classname_type = classname_arg.getAsType(); - QualType lambda_func_type = lambda_func_arg.getAsType(); - - std::string mangledName; - clang::MangleContext *mangleContext = ci.getASTContext().createMangleContext(); - llvm::raw_string_ostream ostream(mangledName); - mangleContext->mangleCXXRTTI(classname_type, ostream); - ostream.flush(); - std::string kernelName = mangledName.substr(4, mangledName.size()); - - // Get classname here, will be used as kernel func name - std::string classname = classname_type->getAsRecordDecl()->getDeclName().getAsString(); - - CXXRecordDecl *lambda_func_decl = lambda_func_type->getAsCXXRecordDecl(); - - std::vector kernel_arguments = analyze_arguments_dependency(ci, lambda_func_decl, context); - - CXXMethodDecl *lambda_decl = lambda_func_decl->getLambdaCallOperator(); - std::string func_body = decl2str(ci, lambda_decl->getBody()); - - if (lambda_decl->getNumParams() != 1) { - throw KernelValidateException("Parallel Kernel should have Index"); - } - - std::string index_name = lambda_decl->getParamDecl(0)->getIdentifier()->getName().str(); - -#ifdef DEBUG - std::cout << "Parallel kernel name: " << kernelName << " , index_name: " << index_name << std::endl; - std::cout << "========= Parallel kernel body start =========" << std::endl; - std::cout << func_body << std::endl; - std::cout << "========= Parallel kernel body end =========" << std::endl; -#endif - - KernelInfo info(kernel_arguments, kernelName, func_body, index_name); - - return info; -} - -} \ No newline at end of file diff --git a/kernel_generator/src/single_task.cpp b/kernel_generator/src/single_task.cpp deleted file mode 100644 index 6f4e5f1..0000000 --- a/kernel_generator/src/single_task.cpp +++ /dev/null @@ -1,59 +0,0 @@ -#include "single_task.h" - -namespace sycl { - -KernelInfo parse_single_task_func(CompilerInstance &ci, const FunctionDecl *callee, ProgramContext &context) { - if (callee->getNumParams() != 1) { - throw KernelValidateException("Single-task kernel must have 1 param"); - } - const TemplateArgumentList *template_args = callee->getTemplateSpecializationArgs(); - if (template_args == nullptr || template_args->size() != 2) { - throw KernelValidateException("Single-task kernel must have 2 template args"); - } - - const TemplateArgument &classname_arg = template_args->get(0); - const TemplateArgument &lambda_func_arg = template_args->get(1); - - if (classname_arg.getKind() != TemplateArgument::ArgKind::Type) { - throw KernelValidateException("Template 'ArgKind' must be 'Type'"); - } - - if (lambda_func_arg.getKind() != TemplateArgument::ArgKind::Type) { - throw KernelValidateException("Template 'ArgKind' must be 'Type'"); - } - - QualType classname_type = classname_arg.getAsType(); - QualType lambda_func_type = lambda_func_arg.getAsType(); - - std::string mangledName; - clang::MangleContext *mangleContext = ci.getASTContext().createMangleContext(); - llvm::raw_string_ostream ostream(mangledName); - mangleContext->mangleCXXRTTI(classname_type, ostream); - ostream.flush(); - std::string kernelName = mangledName.substr(4, mangledName.size()); - - // Get classname here, will be used as kernel func name - std::cout << classname_type->getTypeClassName() << std::endl; - std::string classname = classname_type->getAsRecordDecl()->getDeclName().getAsString(); - - CXXRecordDecl *lambda_func_decl = lambda_func_type->getAsCXXRecordDecl(); - - std::vector kernel_arguments = analyze_arguments_dependency(ci, lambda_func_decl, context); - - CXXMethodDecl *lambda_decl = lambda_func_decl->getLambdaCallOperator(); - std::string func_body = decl2str(ci, lambda_decl->getBody()); - -#ifdef DEBUG - std::cout << "Single kernel name: " << kernelName << std::endl; - std::cout << "========= Single Kernel body start =========" << std::endl; - std::cout << func_body << std::endl; - std::cout << "========= Single Kernel body end =========" << std::endl; -#endif - - KernelInfo info(kernel_arguments, kernelName, func_body); - - return info; -}; - -} - diff --git a/kernel_generator/src/ve_kernel_translator.cpp b/kernel_generator/src/ve_kernel_translator.cpp deleted file mode 100644 index 20b578f..0000000 --- a/kernel_generator/src/ve_kernel_translator.cpp +++ /dev/null @@ -1,44 +0,0 @@ -#include "ve_kernel_translator.h" - -namespace sycl { - -std::string VEKernelTranslator::body_to_decl_str(const ProgramContext &context, const KernelInfo &info) { - std::string func_params; - - // generate function params - for (const KernelArgument &arg:info.params) { - func_params += fmt::format("{} *{}, ", arg.type, arg.name); - } - - if (info.parallel) { - // generate parallel for kernel here - std::string body = fmt::format("\nfor(int {0}=0;{0}").append(LINE_BREAK); - ret.append("#include ").append(LINE_BREAK); - for (auto &def:context.structs) { - ret.append(def.second).append(";").append(LINE_BREAK); - } - return ret; -} - -std::string VEKernelTranslator::after_kernel(const ProgramContext &context) { - return ""; -} - -} - diff --git a/kernel_generator/third_party/CMakeLists.txt b/kernel_generator/third_party/CMakeLists.txt deleted file mode 100644 index 21ba0d7..0000000 --- a/kernel_generator/third_party/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ - -add_subdirectory(fmt) \ No newline at end of file diff --git a/kernel_generator/third_party/fmt/CMakeLists.txt b/kernel_generator/third_party/fmt/CMakeLists.txt deleted file mode 100644 index f43d5ce..0000000 --- a/kernel_generator/third_party/fmt/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -include_directories(include) -add_library(fmt SHARED src/format.cc) \ No newline at end of file diff --git a/kernel_generator/third_party/fmt/include/fmt/core.h b/kernel_generator/third_party/fmt/include/fmt/core.h deleted file mode 100644 index 0e0824f..0000000 --- a/kernel_generator/third_party/fmt/include/fmt/core.h +++ /dev/null @@ -1,1796 +0,0 @@ -// Formatting library for C++ - the core API -// -// Copyright (c) 2012 - present, Victor Zverovich -// All rights reserved. -// -// For the license information refer to format.h. - -#ifndef FMT_CORE_H_ -#define FMT_CORE_H_ - -#include // std::FILE -#include -#include -#include -#include -#include -#include -#include - -// The fmt library version in the form major * 10000 + minor * 100 + patch. -#define FMT_VERSION 60201 - -#ifdef __has_feature -# define FMT_HAS_FEATURE(x) __has_feature(x) -#else -# define FMT_HAS_FEATURE(x) 0 -#endif - -#if defined(__has_include) && !defined(__INTELLISENSE__) && \ - !(defined(__INTEL_COMPILER) && __INTEL_COMPILER < 1600) -# define FMT_HAS_INCLUDE(x) __has_include(x) -#else -# define FMT_HAS_INCLUDE(x) 0 -#endif - -#ifdef __has_cpp_attribute -# define FMT_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x) -#else -# define FMT_HAS_CPP_ATTRIBUTE(x) 0 -#endif - -#define FMT_HAS_CPP14_ATTRIBUTE(attribute) \ - (__cplusplus >= 201402L && FMT_HAS_CPP_ATTRIBUTE(attribute)) - -#define FMT_HAS_CPP17_ATTRIBUTE(attribute) \ - (__cplusplus >= 201703L && FMT_HAS_CPP_ATTRIBUTE(attribute)) - -#ifdef __clang__ -# define FMT_CLANG_VERSION (__clang_major__ * 100 + __clang_minor__) -#else -# define FMT_CLANG_VERSION 0 -#endif - -#if defined(__GNUC__) && !defined(__clang__) -# define FMT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) -#else -# define FMT_GCC_VERSION 0 -#endif - -#if __cplusplus >= 201103L || defined(__GXX_EXPERIMENTAL_CXX0X__) -# define FMT_HAS_GXX_CXX11 FMT_GCC_VERSION -#else -# define FMT_HAS_GXX_CXX11 0 -#endif - -#ifdef __NVCC__ -# define FMT_NVCC __NVCC__ -#else -# define FMT_NVCC 0 -#endif - -#ifdef _MSC_VER -# define FMT_MSC_VER _MSC_VER -#else -# define FMT_MSC_VER 0 -#endif - -// Check if relaxed C++14 constexpr is supported. -// GCC doesn't allow throw in constexpr until version 6 (bug 67371). -#ifndef FMT_USE_CONSTEXPR -# define FMT_USE_CONSTEXPR \ - (FMT_HAS_FEATURE(cxx_relaxed_constexpr) || FMT_MSC_VER >= 1910 || \ - (FMT_GCC_VERSION >= 600 && __cplusplus >= 201402L)) && \ - !FMT_NVCC -#endif -#if FMT_USE_CONSTEXPR -# define FMT_CONSTEXPR constexpr -# define FMT_CONSTEXPR_DECL constexpr -#else -# define FMT_CONSTEXPR inline -# define FMT_CONSTEXPR_DECL -#endif - -#ifndef FMT_OVERRIDE -# if FMT_HAS_FEATURE(cxx_override) || \ - (FMT_GCC_VERSION >= 408 && FMT_HAS_GXX_CXX11) || FMT_MSC_VER >= 1900 -# define FMT_OVERRIDE override -# else -# define FMT_OVERRIDE -# endif -#endif - -// Check if exceptions are disabled. -#ifndef FMT_EXCEPTIONS -# if (defined(__GNUC__) && !defined(__EXCEPTIONS)) || \ - FMT_MSC_VER && !_HAS_EXCEPTIONS -# define FMT_EXCEPTIONS 0 -# else -# define FMT_EXCEPTIONS 1 -# endif -#endif - -// Define FMT_USE_NOEXCEPT to make fmt use noexcept (C++11 feature). -#ifndef FMT_USE_NOEXCEPT -# define FMT_USE_NOEXCEPT 0 -#endif - -#if FMT_USE_NOEXCEPT || FMT_HAS_FEATURE(cxx_noexcept) || \ - (FMT_GCC_VERSION >= 408 && FMT_HAS_GXX_CXX11) || FMT_MSC_VER >= 1900 -# define FMT_DETECTED_NOEXCEPT noexcept -# define FMT_HAS_CXX11_NOEXCEPT 1 -#else -# define FMT_DETECTED_NOEXCEPT throw() -# define FMT_HAS_CXX11_NOEXCEPT 0 -#endif - -#ifndef FMT_NOEXCEPT -# if FMT_EXCEPTIONS || FMT_HAS_CXX11_NOEXCEPT -# define FMT_NOEXCEPT FMT_DETECTED_NOEXCEPT -# else -# define FMT_NOEXCEPT -# endif -#endif - -// [[noreturn]] is disabled on MSVC and NVCC because of bogus unreachable code -// warnings. -#if FMT_EXCEPTIONS && FMT_HAS_CPP_ATTRIBUTE(noreturn) && !FMT_MSC_VER && \ - !FMT_NVCC -# define FMT_NORETURN [[noreturn]] -#else -# define FMT_NORETURN -#endif - -#ifndef FMT_MAYBE_UNUSED -# if FMT_HAS_CPP17_ATTRIBUTE(maybe_unused) -# define FMT_MAYBE_UNUSED [[maybe_unused]] -# else -# define FMT_MAYBE_UNUSED -# endif -#endif - -#ifndef FMT_DEPRECATED -# if FMT_HAS_CPP14_ATTRIBUTE(deprecated) || FMT_MSC_VER >= 1900 -# define FMT_DEPRECATED [[deprecated]] -# else -# if defined(__GNUC__) || defined(__clang__) -# define FMT_DEPRECATED __attribute__((deprecated)) -# elif FMT_MSC_VER -# define FMT_DEPRECATED __declspec(deprecated) -# else -# define FMT_DEPRECATED /* deprecated */ -# endif -# endif -#endif - -// Workaround broken [[deprecated]] in the Intel, PGI and NVCC compilers. -#if defined(__INTEL_COMPILER) || defined(__PGI) || FMT_NVCC -# define FMT_DEPRECATED_ALIAS -#else -# define FMT_DEPRECATED_ALIAS FMT_DEPRECATED -#endif - -#ifndef FMT_BEGIN_NAMESPACE -# if FMT_HAS_FEATURE(cxx_inline_namespaces) || FMT_GCC_VERSION >= 404 || \ - FMT_MSC_VER >= 1900 -# define FMT_INLINE_NAMESPACE inline namespace -# define FMT_END_NAMESPACE \ - } \ - } -# else -# define FMT_INLINE_NAMESPACE namespace -# define FMT_END_NAMESPACE \ - } \ - using namespace v6; \ - } -# endif -# define FMT_BEGIN_NAMESPACE \ - namespace fmt { \ - FMT_INLINE_NAMESPACE v6 { -#endif - -#if !defined(FMT_HEADER_ONLY) && defined(_WIN32) -# if FMT_MSC_VER -# define FMT_NO_W4275 __pragma(warning(suppress : 4275)) -# else -# define FMT_NO_W4275 -# endif -# define FMT_CLASS_API FMT_NO_W4275 -# ifdef FMT_EXPORT -# define FMT_API __declspec(dllexport) -# elif defined(FMT_SHARED) -# define FMT_API __declspec(dllimport) -# define FMT_EXTERN_TEMPLATE_API FMT_API -# endif -#endif -#ifndef FMT_CLASS_API -# define FMT_CLASS_API -#endif -#ifndef FMT_API -# if FMT_GCC_VERSION || FMT_CLANG_VERSION -# define FMT_API __attribute__((visibility("default"))) -# define FMT_EXTERN_TEMPLATE_API FMT_API -# define FMT_INSTANTIATION_DEF_API -# else -# define FMT_API -# endif -#endif -#ifndef FMT_EXTERN_TEMPLATE_API -# define FMT_EXTERN_TEMPLATE_API -#endif -#ifndef FMT_INSTANTIATION_DEF_API -# define FMT_INSTANTIATION_DEF_API FMT_API -#endif - -#ifndef FMT_HEADER_ONLY -# define FMT_EXTERN extern -#else -# define FMT_EXTERN -#endif - -// libc++ supports string_view in pre-c++17. -#if (FMT_HAS_INCLUDE() && \ - (__cplusplus > 201402L || defined(_LIBCPP_VERSION))) || \ - (defined(_MSVC_LANG) && _MSVC_LANG > 201402L && _MSC_VER >= 1910) -# include -# define FMT_USE_STRING_VIEW -#elif FMT_HAS_INCLUDE("experimental/string_view") && __cplusplus >= 201402L -# include -# define FMT_USE_EXPERIMENTAL_STRING_VIEW -#endif - -#ifndef FMT_UNICODE -# define FMT_UNICODE !FMT_MSC_VER -#endif -#if FMT_UNICODE && FMT_MSC_VER -# pragma execution_character_set("utf-8") -#endif - -FMT_BEGIN_NAMESPACE - -// Implementations of enable_if_t and other metafunctions for older systems. -template -using enable_if_t = typename std::enable_if::type; -template -using conditional_t = typename std::conditional::type; -template using bool_constant = std::integral_constant; -template -using remove_reference_t = typename std::remove_reference::type; -template -using remove_const_t = typename std::remove_const::type; -template -using remove_cvref_t = typename std::remove_cv>::type; -template struct type_identity { using type = T; }; -template using type_identity_t = typename type_identity::type; - -struct monostate {}; - -// An enable_if helper to be used in template parameters which results in much -// shorter symbols: https://godbolt.org/z/sWw4vP. Extra parentheses are needed -// to workaround a bug in MSVC 2019 (see #1140 and #1186). -#define FMT_ENABLE_IF(...) enable_if_t<(__VA_ARGS__), int> = 0 - -namespace internal { - -// A helper function to suppress bogus "conditional expression is constant" -// warnings. -template FMT_CONSTEXPR T const_check(T value) { return value; } - -// A workaround for gcc 4.8 to make void_t work in a SFINAE context. -template struct void_t_impl { using type = void; }; - -FMT_NORETURN FMT_API void assert_fail(const char* file, int line, - const char* message); - -#ifndef FMT_ASSERT -# ifdef NDEBUG -// FMT_ASSERT is not empty to avoid -Werror=empty-body. -# define FMT_ASSERT(condition, message) ((void)0) -# else -# define FMT_ASSERT(condition, message) \ - ((condition) /* void() fails with -Winvalid-constexpr on clang 4.0.1 */ \ - ? (void)0 \ - : ::fmt::internal::assert_fail(__FILE__, __LINE__, (message))) -# endif -#endif - -#if defined(FMT_USE_STRING_VIEW) -template using std_string_view = std::basic_string_view; -#elif defined(FMT_USE_EXPERIMENTAL_STRING_VIEW) -template -using std_string_view = std::experimental::basic_string_view; -#else -template struct std_string_view {}; -#endif - -#ifdef FMT_USE_INT128 -// Do nothing. -#elif defined(__SIZEOF_INT128__) && !FMT_NVCC -# define FMT_USE_INT128 1 -using int128_t = __int128_t; -using uint128_t = __uint128_t; -#else -# define FMT_USE_INT128 0 -#endif -#if !FMT_USE_INT128 -struct int128_t {}; -struct uint128_t {}; -#endif - -// Casts a nonnegative integer to unsigned. -template -FMT_CONSTEXPR typename std::make_unsigned::type to_unsigned(Int value) { - FMT_ASSERT(value >= 0, "negative value"); - return static_cast::type>(value); -} - -constexpr unsigned char micro[] = "\u00B5"; - -template constexpr bool is_unicode() { - return FMT_UNICODE || sizeof(Char) != 1 || - (sizeof(micro) == 3 && micro[0] == 0xC2 && micro[1] == 0xB5); -} - -#ifdef __cpp_char8_t -using char8_type = char8_t; -#else -enum char8_type : unsigned char {}; -#endif -} // namespace internal - -template -using void_t = typename internal::void_t_impl::type; - -/** - An implementation of ``std::basic_string_view`` for pre-C++17. It provides a - subset of the API. ``fmt::basic_string_view`` is used for format strings even - if ``std::string_view`` is available to prevent issues when a library is - compiled with a different ``-std`` option than the client code (which is not - recommended). - */ -template class basic_string_view { - private: - const Char* data_; - size_t size_; - - public: - using char_type FMT_DEPRECATED_ALIAS = Char; - using value_type = Char; - using iterator = const Char*; - - FMT_CONSTEXPR basic_string_view() FMT_NOEXCEPT : data_(nullptr), size_(0) {} - - /** Constructs a string reference object from a C string and a size. */ - FMT_CONSTEXPR basic_string_view(const Char* s, size_t count) FMT_NOEXCEPT - : data_(s), - size_(count) {} - - /** - \rst - Constructs a string reference object from a C string computing - the size with ``std::char_traits::length``. - \endrst - */ -#if __cplusplus >= 201703L // C++17's char_traits::length() is constexpr. - FMT_CONSTEXPR -#endif - basic_string_view(const Char* s) - : data_(s), size_(std::char_traits::length(s)) {} - - /** Constructs a string reference from a ``std::basic_string`` object. */ - template - FMT_CONSTEXPR basic_string_view( - const std::basic_string& s) FMT_NOEXCEPT - : data_(s.data()), - size_(s.size()) {} - - template < - typename S, - FMT_ENABLE_IF(std::is_same>::value)> - FMT_CONSTEXPR basic_string_view(S s) FMT_NOEXCEPT : data_(s.data()), - size_(s.size()) {} - - /** Returns a pointer to the string data. */ - FMT_CONSTEXPR const Char* data() const { return data_; } - - /** Returns the string size. */ - FMT_CONSTEXPR size_t size() const { return size_; } - - FMT_CONSTEXPR iterator begin() const { return data_; } - FMT_CONSTEXPR iterator end() const { return data_ + size_; } - - FMT_CONSTEXPR const Char& operator[](size_t pos) const { return data_[pos]; } - - FMT_CONSTEXPR void remove_prefix(size_t n) { - data_ += n; - size_ -= n; - } - - // Lexicographically compare this string reference to other. - int compare(basic_string_view other) const { - size_t str_size = size_ < other.size_ ? size_ : other.size_; - int result = std::char_traits::compare(data_, other.data_, str_size); - if (result == 0) - result = size_ == other.size_ ? 0 : (size_ < other.size_ ? -1 : 1); - return result; - } - - friend bool operator==(basic_string_view lhs, basic_string_view rhs) { - return lhs.compare(rhs) == 0; - } - friend bool operator!=(basic_string_view lhs, basic_string_view rhs) { - return lhs.compare(rhs) != 0; - } - friend bool operator<(basic_string_view lhs, basic_string_view rhs) { - return lhs.compare(rhs) < 0; - } - friend bool operator<=(basic_string_view lhs, basic_string_view rhs) { - return lhs.compare(rhs) <= 0; - } - friend bool operator>(basic_string_view lhs, basic_string_view rhs) { - return lhs.compare(rhs) > 0; - } - friend bool operator>=(basic_string_view lhs, basic_string_view rhs) { - return lhs.compare(rhs) >= 0; - } -}; - -using string_view = basic_string_view; -using wstring_view = basic_string_view; - -#ifndef __cpp_char8_t -// char8_t is deprecated; use char instead. -using char8_t FMT_DEPRECATED_ALIAS = internal::char8_type; -#endif - -/** Specifies if ``T`` is a character type. Can be specialized by users. */ -template struct is_char : std::false_type {}; -template <> struct is_char : std::true_type {}; -template <> struct is_char : std::true_type {}; -template <> struct is_char : std::true_type {}; -template <> struct is_char : std::true_type {}; -template <> struct is_char : std::true_type {}; - -/** - \rst - Returns a string view of `s`. In order to add custom string type support to - {fmt} provide an overload of `to_string_view` for it in the same namespace as - the type for the argument-dependent lookup to work. - - **Example**:: - - namespace my_ns { - inline string_view to_string_view(const my_string& s) { - return {s.data(), s.length()}; - } - } - std::string message = fmt::format(my_string("The answer is {}"), 42); - \endrst - */ -template ::value)> -inline basic_string_view to_string_view(const Char* s) { - return s; -} - -template -inline basic_string_view to_string_view( - const std::basic_string& s) { - return s; -} - -template -inline basic_string_view to_string_view(basic_string_view s) { - return s; -} - -template >::value)> -inline basic_string_view to_string_view( - internal::std_string_view s) { - return s; -} - -// A base class for compile-time strings. It is defined in the fmt namespace to -// make formatting functions visible via ADL, e.g. format(fmt("{}"), 42). -struct compile_string {}; - -template -struct is_compile_string : std::is_base_of {}; - -template ::value)> -constexpr basic_string_view to_string_view(const S& s) { - return s; -} - -namespace internal { -void to_string_view(...); -using fmt::v6::to_string_view; - -// Specifies whether S is a string type convertible to fmt::basic_string_view. -// It should be a constexpr function but MSVC 2017 fails to compile it in -// enable_if and MSVC 2015 fails to compile it as an alias template. -template -struct is_string : std::is_class()))> { -}; - -template struct char_t_impl {}; -template struct char_t_impl::value>> { - using result = decltype(to_string_view(std::declval())); - using type = typename result::value_type; -}; - -struct error_handler { - FMT_CONSTEXPR error_handler() = default; - FMT_CONSTEXPR error_handler(const error_handler&) = default; - - // This function is intentionally not constexpr to give a compile-time error. - FMT_NORETURN FMT_API void on_error(const char* message); -}; -} // namespace internal - -/** String's character type. */ -template using char_t = typename internal::char_t_impl::type; - -/** - \rst - Parsing context consisting of a format string range being parsed and an - argument counter for automatic indexing. - - You can use one of the following type aliases for common character types: - - +-----------------------+-------------------------------------+ - | Type | Definition | - +=======================+=====================================+ - | format_parse_context | basic_format_parse_context | - +-----------------------+-------------------------------------+ - | wformat_parse_context | basic_format_parse_context | - +-----------------------+-------------------------------------+ - \endrst - */ -template -class basic_format_parse_context : private ErrorHandler { - private: - basic_string_view format_str_; - int next_arg_id_; - - public: - using char_type = Char; - using iterator = typename basic_string_view::iterator; - - explicit FMT_CONSTEXPR basic_format_parse_context( - basic_string_view format_str, ErrorHandler eh = ErrorHandler()) - : ErrorHandler(eh), format_str_(format_str), next_arg_id_(0) {} - - /** - Returns an iterator to the beginning of the format string range being - parsed. - */ - FMT_CONSTEXPR iterator begin() const FMT_NOEXCEPT { - return format_str_.begin(); - } - - /** - Returns an iterator past the end of the format string range being parsed. - */ - FMT_CONSTEXPR iterator end() const FMT_NOEXCEPT { return format_str_.end(); } - - /** Advances the begin iterator to ``it``. */ - FMT_CONSTEXPR void advance_to(iterator it) { - format_str_.remove_prefix(internal::to_unsigned(it - begin())); - } - - /** - Reports an error if using the manual argument indexing; otherwise returns - the next argument index and switches to the automatic indexing. - */ - FMT_CONSTEXPR int next_arg_id() { - if (next_arg_id_ >= 0) return next_arg_id_++; - on_error("cannot switch from manual to automatic argument indexing"); - return 0; - } - - /** - Reports an error if using the automatic argument indexing; otherwise - switches to the manual indexing. - */ - FMT_CONSTEXPR void check_arg_id(int) { - if (next_arg_id_ > 0) - on_error("cannot switch from automatic to manual argument indexing"); - else - next_arg_id_ = -1; - } - - FMT_CONSTEXPR void check_arg_id(basic_string_view) {} - - FMT_CONSTEXPR void on_error(const char* message) { - ErrorHandler::on_error(message); - } - - FMT_CONSTEXPR ErrorHandler error_handler() const { return *this; } -}; - -using format_parse_context = basic_format_parse_context; -using wformat_parse_context = basic_format_parse_context; - -template -using basic_parse_context FMT_DEPRECATED_ALIAS = - basic_format_parse_context; -using parse_context FMT_DEPRECATED_ALIAS = basic_format_parse_context; -using wparse_context FMT_DEPRECATED_ALIAS = basic_format_parse_context; - -template class basic_format_arg; -template class basic_format_args; - -// A formatter for objects of type T. -template -struct formatter { - // A deleted default constructor indicates a disabled formatter. - formatter() = delete; -}; - -template -struct FMT_DEPRECATED convert_to_int - : bool_constant::value && - std::is_convertible::value> {}; - -// Specifies if T has an enabled formatter specialization. A type can be -// formattable even if it doesn't have a formatter e.g. via a conversion. -template -using has_formatter = - std::is_constructible>; - -namespace internal { - -/** A contiguous memory buffer with an optional growing ability. */ -template class buffer { - private: - T* ptr_; - std::size_t size_; - std::size_t capacity_; - - protected: - // Don't initialize ptr_ since it is not accessed to save a few cycles. - buffer(std::size_t sz) FMT_NOEXCEPT : size_(sz), capacity_(sz) {} - - buffer(T* p = nullptr, std::size_t sz = 0, std::size_t cap = 0) FMT_NOEXCEPT - : ptr_(p), - size_(sz), - capacity_(cap) {} - - /** Sets the buffer data and capacity. */ - void set(T* buf_data, std::size_t buf_capacity) FMT_NOEXCEPT { - ptr_ = buf_data; - capacity_ = buf_capacity; - } - - /** Increases the buffer capacity to hold at least *capacity* elements. */ - virtual void grow(std::size_t capacity) = 0; - - public: - using value_type = T; - using const_reference = const T&; - - buffer(const buffer&) = delete; - void operator=(const buffer&) = delete; - virtual ~buffer() = default; - - T* begin() FMT_NOEXCEPT { return ptr_; } - T* end() FMT_NOEXCEPT { return ptr_ + size_; } - - const T* begin() const FMT_NOEXCEPT { return ptr_; } - const T* end() const FMT_NOEXCEPT { return ptr_ + size_; } - - /** Returns the size of this buffer. */ - std::size_t size() const FMT_NOEXCEPT { return size_; } - - /** Returns the capacity of this buffer. */ - std::size_t capacity() const FMT_NOEXCEPT { return capacity_; } - - /** Returns a pointer to the buffer data. */ - T* data() FMT_NOEXCEPT { return ptr_; } - - /** Returns a pointer to the buffer data. */ - const T* data() const FMT_NOEXCEPT { return ptr_; } - - /** - Resizes the buffer. If T is a POD type new elements may not be initialized. - */ - void resize(std::size_t new_size) { - reserve(new_size); - size_ = new_size; - } - - /** Clears this buffer. */ - void clear() { size_ = 0; } - - /** Reserves space to store at least *capacity* elements. */ - void reserve(std::size_t new_capacity) { - if (new_capacity > capacity_) grow(new_capacity); - } - - void push_back(const T& value) { - reserve(size_ + 1); - ptr_[size_++] = value; - } - - /** Appends data to the end of the buffer. */ - template void append(const U* begin, const U* end); - - template T& operator[](I index) { return ptr_[index]; } - template const T& operator[](I index) const { - return ptr_[index]; - } -}; - -// A container-backed buffer. -template -class container_buffer : public buffer { - private: - Container& container_; - - protected: - void grow(std::size_t capacity) FMT_OVERRIDE { - container_.resize(capacity); - this->set(&container_[0], capacity); - } - - public: - explicit container_buffer(Container& c) - : buffer(c.size()), container_(c) {} -}; - -// Extracts a reference to the container from back_insert_iterator. -template -inline Container& get_container(std::back_insert_iterator it) { - using bi_iterator = std::back_insert_iterator; - struct accessor : bi_iterator { - accessor(bi_iterator iter) : bi_iterator(iter) {} - using bi_iterator::container; - }; - return *accessor(it).container; -} - -template -struct fallback_formatter { - fallback_formatter() = delete; -}; - -// Specifies if T has an enabled fallback_formatter specialization. -template -using has_fallback_formatter = - std::is_constructible>; - -template struct named_arg_base; -template struct named_arg; - -enum class type { - none_type, - named_arg_type, - // Integer types should go first, - int_type, - uint_type, - long_long_type, - ulong_long_type, - int128_type, - uint128_type, - bool_type, - char_type, - last_integer_type = char_type, - // followed by floating-point types. - float_type, - double_type, - long_double_type, - last_numeric_type = long_double_type, - cstring_type, - string_type, - pointer_type, - custom_type -}; - -// Maps core type T to the corresponding type enum constant. -template -struct type_constant : std::integral_constant {}; - -#define FMT_TYPE_CONSTANT(Type, constant) \ - template \ - struct type_constant \ - : std::integral_constant {} - -FMT_TYPE_CONSTANT(const named_arg_base&, named_arg_type); -FMT_TYPE_CONSTANT(int, int_type); -FMT_TYPE_CONSTANT(unsigned, uint_type); -FMT_TYPE_CONSTANT(long long, long_long_type); -FMT_TYPE_CONSTANT(unsigned long long, ulong_long_type); -FMT_TYPE_CONSTANT(int128_t, int128_type); -FMT_TYPE_CONSTANT(uint128_t, uint128_type); -FMT_TYPE_CONSTANT(bool, bool_type); -FMT_TYPE_CONSTANT(Char, char_type); -FMT_TYPE_CONSTANT(float, float_type); -FMT_TYPE_CONSTANT(double, double_type); -FMT_TYPE_CONSTANT(long double, long_double_type); -FMT_TYPE_CONSTANT(const Char*, cstring_type); -FMT_TYPE_CONSTANT(basic_string_view, string_type); -FMT_TYPE_CONSTANT(const void*, pointer_type); - -FMT_CONSTEXPR bool is_integral_type(type t) { - FMT_ASSERT(t != type::named_arg_type, "invalid argument type"); - return t > type::none_type && t <= type::last_integer_type; -} - -FMT_CONSTEXPR bool is_arithmetic_type(type t) { - FMT_ASSERT(t != type::named_arg_type, "invalid argument type"); - return t > type::none_type && t <= type::last_numeric_type; -} - -template struct string_value { - const Char* data; - std::size_t size; -}; - -template struct custom_value { - using parse_context = basic_format_parse_context; - const void* value; - void (*format)(const void* arg, - typename Context::parse_context_type& parse_ctx, Context& ctx); -}; - -// A formatting argument value. -template class value { - public: - using char_type = typename Context::char_type; - - union { - int int_value; - unsigned uint_value; - long long long_long_value; - unsigned long long ulong_long_value; - int128_t int128_value; - uint128_t uint128_value; - bool bool_value; - char_type char_value; - float float_value; - double double_value; - long double long_double_value; - const void* pointer; - string_value string; - custom_value custom; - const named_arg_base* named_arg; - }; - - FMT_CONSTEXPR value(int val = 0) : int_value(val) {} - FMT_CONSTEXPR value(unsigned val) : uint_value(val) {} - value(long long val) : long_long_value(val) {} - value(unsigned long long val) : ulong_long_value(val) {} - value(int128_t val) : int128_value(val) {} - value(uint128_t val) : uint128_value(val) {} - value(float val) : float_value(val) {} - value(double val) : double_value(val) {} - value(long double val) : long_double_value(val) {} - value(bool val) : bool_value(val) {} - value(char_type val) : char_value(val) {} - value(const char_type* val) { string.data = val; } - value(basic_string_view val) { - string.data = val.data(); - string.size = val.size(); - } - value(const void* val) : pointer(val) {} - - template value(const T& val) { - custom.value = &val; - // Get the formatter type through the context to allow different contexts - // have different extension points, e.g. `formatter` for `format` and - // `printf_formatter` for `printf`. - custom.format = format_custom_arg< - T, conditional_t::value, - typename Context::template formatter_type, - fallback_formatter>>; - } - - value(const named_arg_base& val) { named_arg = &val; } - - private: - // Formats an argument of a custom type, such as a user-defined class. - template - static void format_custom_arg(const void* arg, - typename Context::parse_context_type& parse_ctx, - Context& ctx) { - Formatter f; - parse_ctx.advance_to(f.parse(parse_ctx)); - ctx.advance_to(f.format(*static_cast(arg), ctx)); - } -}; - -template -FMT_CONSTEXPR basic_format_arg make_arg(const T& value); - -// To minimize the number of types we need to deal with, long is translated -// either to int or to long long depending on its size. -enum { long_short = sizeof(long) == sizeof(int) }; -using long_type = conditional_t; -using ulong_type = conditional_t; - -// Maps formatting arguments to core types. -template struct arg_mapper { - using char_type = typename Context::char_type; - - FMT_CONSTEXPR int map(signed char val) { return val; } - FMT_CONSTEXPR unsigned map(unsigned char val) { return val; } - FMT_CONSTEXPR int map(short val) { return val; } - FMT_CONSTEXPR unsigned map(unsigned short val) { return val; } - FMT_CONSTEXPR int map(int val) { return val; } - FMT_CONSTEXPR unsigned map(unsigned val) { return val; } - FMT_CONSTEXPR long_type map(long val) { return val; } - FMT_CONSTEXPR ulong_type map(unsigned long val) { return val; } - FMT_CONSTEXPR long long map(long long val) { return val; } - FMT_CONSTEXPR unsigned long long map(unsigned long long val) { return val; } - FMT_CONSTEXPR int128_t map(int128_t val) { return val; } - FMT_CONSTEXPR uint128_t map(uint128_t val) { return val; } - FMT_CONSTEXPR bool map(bool val) { return val; } - - template ::value)> - FMT_CONSTEXPR char_type map(T val) { - static_assert( - std::is_same::value || std::is_same::value, - "mixing character types is disallowed"); - return val; - } - - FMT_CONSTEXPR float map(float val) { return val; } - FMT_CONSTEXPR double map(double val) { return val; } - FMT_CONSTEXPR long double map(long double val) { return val; } - - FMT_CONSTEXPR const char_type* map(char_type* val) { return val; } - FMT_CONSTEXPR const char_type* map(const char_type* val) { return val; } - template ::value)> - FMT_CONSTEXPR basic_string_view map(const T& val) { - static_assert(std::is_same>::value, - "mixing character types is disallowed"); - return to_string_view(val); - } - template , T>::value && - !is_string::value && !has_formatter::value && - !has_fallback_formatter::value)> - FMT_CONSTEXPR basic_string_view map(const T& val) { - return basic_string_view(val); - } - template < - typename T, - FMT_ENABLE_IF( - std::is_constructible, T>::value && - !std::is_constructible, T>::value && - !is_string::value && !has_formatter::value && - !has_fallback_formatter::value)> - FMT_CONSTEXPR basic_string_view map(const T& val) { - return std_string_view(val); - } - FMT_CONSTEXPR const char* map(const signed char* val) { - static_assert(std::is_same::value, "invalid string type"); - return reinterpret_cast(val); - } - FMT_CONSTEXPR const char* map(const unsigned char* val) { - static_assert(std::is_same::value, "invalid string type"); - return reinterpret_cast(val); - } - - FMT_CONSTEXPR const void* map(void* val) { return val; } - FMT_CONSTEXPR const void* map(const void* val) { return val; } - FMT_CONSTEXPR const void* map(std::nullptr_t val) { return val; } - template FMT_CONSTEXPR int map(const T*) { - // Formatting of arbitrary pointers is disallowed. If you want to output - // a pointer cast it to "void *" or "const void *". In particular, this - // forbids formatting of "[const] volatile char *" which is printed as bool - // by iostreams. - static_assert(!sizeof(T), "formatting of non-void pointers is disallowed"); - return 0; - } - - template ::value && - !has_formatter::value && - !has_fallback_formatter::value)> - FMT_CONSTEXPR auto map(const T& val) - -> decltype(std::declval().map( - static_cast::type>(val))) { - return map(static_cast::type>(val)); - } - template ::value && !is_char::value && - (has_formatter::value || - has_fallback_formatter::value))> - FMT_CONSTEXPR const T& map(const T& val) { - return val; - } - - template - FMT_CONSTEXPR const named_arg_base& map( - const named_arg& val) { - auto arg = make_arg(val.value); - std::memcpy(val.data, &arg, sizeof(arg)); - return val; - } - - int map(...) { - constexpr bool formattable = sizeof(Context) == 0; - static_assert( - formattable, - "Cannot format argument. To make type T formattable provide a " - "formatter specialization: " - "https://fmt.dev/latest/api.html#formatting-user-defined-types"); - return 0; - } -}; - -// A type constant after applying arg_mapper. -template -using mapped_type_constant = - type_constant().map(std::declval())), - typename Context::char_type>; - -enum { packed_arg_bits = 5 }; -// Maximum number of arguments with packed types. -enum { max_packed_args = 63 / packed_arg_bits }; -enum : unsigned long long { is_unpacked_bit = 1ULL << 63 }; - -template class arg_map; -} // namespace internal - -// A formatting argument. It is a trivially copyable/constructible type to -// allow storage in basic_memory_buffer. -template class basic_format_arg { - private: - internal::value value_; - internal::type type_; - - template - friend FMT_CONSTEXPR basic_format_arg internal::make_arg( - const T& value); - - template - friend FMT_CONSTEXPR auto visit_format_arg(Visitor&& vis, - const basic_format_arg& arg) - -> decltype(vis(0)); - - friend class basic_format_args; - friend class internal::arg_map; - - using char_type = typename Context::char_type; - - public: - class handle { - public: - explicit handle(internal::custom_value custom) : custom_(custom) {} - - void format(typename Context::parse_context_type& parse_ctx, - Context& ctx) const { - custom_.format(custom_.value, parse_ctx, ctx); - } - - private: - internal::custom_value custom_; - }; - - FMT_CONSTEXPR basic_format_arg() : type_(internal::type::none_type) {} - - FMT_CONSTEXPR explicit operator bool() const FMT_NOEXCEPT { - return type_ != internal::type::none_type; - } - - internal::type type() const { return type_; } - - bool is_integral() const { return internal::is_integral_type(type_); } - bool is_arithmetic() const { return internal::is_arithmetic_type(type_); } -}; - -/** - \rst - Visits an argument dispatching to the appropriate visit method based on - the argument type. For example, if the argument type is ``double`` then - ``vis(value)`` will be called with the value of type ``double``. - \endrst - */ -template -FMT_CONSTEXPR auto visit_format_arg(Visitor&& vis, - const basic_format_arg& arg) - -> decltype(vis(0)) { - using char_type = typename Context::char_type; - switch (arg.type_) { - case internal::type::none_type: - break; - case internal::type::named_arg_type: - FMT_ASSERT(false, "invalid argument type"); - break; - case internal::type::int_type: - return vis(arg.value_.int_value); - case internal::type::uint_type: - return vis(arg.value_.uint_value); - case internal::type::long_long_type: - return vis(arg.value_.long_long_value); - case internal::type::ulong_long_type: - return vis(arg.value_.ulong_long_value); -#if FMT_USE_INT128 - case internal::type::int128_type: - return vis(arg.value_.int128_value); - case internal::type::uint128_type: - return vis(arg.value_.uint128_value); -#else - case internal::type::int128_type: - case internal::type::uint128_type: - break; -#endif - case internal::type::bool_type: - return vis(arg.value_.bool_value); - case internal::type::char_type: - return vis(arg.value_.char_value); - case internal::type::float_type: - return vis(arg.value_.float_value); - case internal::type::double_type: - return vis(arg.value_.double_value); - case internal::type::long_double_type: - return vis(arg.value_.long_double_value); - case internal::type::cstring_type: - return vis(arg.value_.string.data); - case internal::type::string_type: - return vis(basic_string_view(arg.value_.string.data, - arg.value_.string.size)); - case internal::type::pointer_type: - return vis(arg.value_.pointer); - case internal::type::custom_type: - return vis(typename basic_format_arg::handle(arg.value_.custom)); - } - return vis(monostate()); -} - -namespace internal { -// A map from argument names to their values for named arguments. -template class arg_map { - private: - using char_type = typename Context::char_type; - - struct entry { - basic_string_view name; - basic_format_arg arg; - }; - - entry* map_; - unsigned size_; - - void push_back(value val) { - const auto& named = *val.named_arg; - map_[size_] = {named.name, named.template deserialize()}; - ++size_; - } - - public: - arg_map(const arg_map&) = delete; - void operator=(const arg_map&) = delete; - arg_map() : map_(nullptr), size_(0) {} - void init(const basic_format_args& args); - ~arg_map() { delete[] map_; } - - basic_format_arg find(basic_string_view name) const { - // The list is unsorted, so just return the first matching name. - for (entry *it = map_, *end = map_ + size_; it != end; ++it) { - if (it->name == name) return it->arg; - } - return {}; - } -}; - -// A type-erased reference to an std::locale to avoid heavy include. -class locale_ref { - private: - const void* locale_; // A type-erased pointer to std::locale. - - public: - locale_ref() : locale_(nullptr) {} - template explicit locale_ref(const Locale& loc); - - explicit operator bool() const FMT_NOEXCEPT { return locale_ != nullptr; } - - template Locale get() const; -}; - -template constexpr unsigned long long encode_types() { return 0; } - -template -constexpr unsigned long long encode_types() { - return static_cast(mapped_type_constant::value) | - (encode_types() << packed_arg_bits); -} - -template -FMT_CONSTEXPR basic_format_arg make_arg(const T& value) { - basic_format_arg arg; - arg.type_ = mapped_type_constant::value; - arg.value_ = arg_mapper().map(value); - return arg; -} - -// The type template parameter is there to avoid an ODR violation when using -// a fallback formatter in one translation unit and an implicit conversion in -// another (not recommended). -template -inline value make_arg(const T& val) { - return arg_mapper().map(val); -} - -template -inline basic_format_arg make_arg(const T& value) { - return make_arg(value); -} - -template struct is_reference_wrapper : std::false_type {}; - -template -struct is_reference_wrapper> : std::true_type {}; - -class dynamic_arg_list { - // Workaround for clang's -Wweak-vtables. Unlike for regular classes, for - // templates it doesn't complain about inability to deduce single translation - // unit for placing vtable. So storage_node_base is made a fake template. - template struct node { - virtual ~node() = default; - std::unique_ptr> next; - }; - - template struct typed_node : node<> { - T value; - - template - FMT_CONSTEXPR typed_node(const Arg& arg) : value(arg) {} - - template - FMT_CONSTEXPR typed_node(const basic_string_view& arg) - : value(arg.data(), arg.size()) {} - }; - - std::unique_ptr> head_; - - public: - template const T& push(const Arg& arg) { - auto node = std::unique_ptr>(new typed_node(arg)); - auto& value = node->value; - node->next = std::move(head_); - head_ = std::move(node); - return value; - } -}; -} // namespace internal - -// Formatting context. -template class basic_format_context { - public: - /** The character type for the output. */ - using char_type = Char; - - private: - OutputIt out_; - basic_format_args args_; - internal::arg_map map_; - internal::locale_ref loc_; - - public: - using iterator = OutputIt; - using format_arg = basic_format_arg; - using parse_context_type = basic_format_parse_context; - template using formatter_type = formatter; - - basic_format_context(const basic_format_context&) = delete; - void operator=(const basic_format_context&) = delete; - /** - Constructs a ``basic_format_context`` object. References to the arguments are - stored in the object so make sure they have appropriate lifetimes. - */ - basic_format_context(OutputIt out, - basic_format_args ctx_args, - internal::locale_ref loc = internal::locale_ref()) - : out_(out), args_(ctx_args), loc_(loc) {} - - format_arg arg(int id) const { return args_.get(id); } - - // Checks if manual indexing is used and returns the argument with the - // specified name. - format_arg arg(basic_string_view name); - - internal::error_handler error_handler() { return {}; } - void on_error(const char* message) { error_handler().on_error(message); } - - // Returns an iterator to the beginning of the output range. - iterator out() { return out_; } - - // Advances the begin iterator to ``it``. - void advance_to(iterator it) { out_ = it; } - - internal::locale_ref locale() { return loc_; } -}; - -template -using buffer_context = - basic_format_context>, - Char>; -using format_context = buffer_context; -using wformat_context = buffer_context; - -/** - \rst - An array of references to arguments. It can be implicitly converted into - `~fmt::basic_format_args` for passing into type-erased formatting functions - such as `~fmt::vformat`. - \endrst - */ -template -class format_arg_store -#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 - // Workaround a GCC template argument substitution bug. - : public basic_format_args -#endif -{ - private: - static const size_t num_args = sizeof...(Args); - static const bool is_packed = num_args < internal::max_packed_args; - - using value_type = conditional_t, - basic_format_arg>; - - // If the arguments are not packed, add one more element to mark the end. - value_type data_[num_args + (num_args == 0 ? 1 : 0)]; - - friend class basic_format_args; - - public: - static constexpr unsigned long long types = - is_packed ? internal::encode_types() - : internal::is_unpacked_bit | num_args; - - format_arg_store(const Args&... args) - : -#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 - basic_format_args(*this), -#endif - data_{internal::make_arg< - is_packed, Context, - internal::mapped_type_constant::value>(args)...} { - } -}; - -/** - \rst - Constructs an `~fmt::format_arg_store` object that contains references to - arguments and can be implicitly converted to `~fmt::format_args`. `Context` - can be omitted in which case it defaults to `~fmt::context`. - See `~fmt::arg` for lifetime considerations. - \endrst - */ -template -inline format_arg_store make_format_args( - const Args&... args) { - return {args...}; -} - -/** - \rst - A dynamic version of `fmt::format_arg_store<>`. - It's equipped with a storage to potentially temporary objects which lifetime - could be shorter than the format arguments object. - - It can be implicitly converted into `~fmt::basic_format_args` for passing - into type-erased formatting functions such as `~fmt::vformat`. - \endrst - */ -template -class dynamic_format_arg_store -#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 - // Workaround a GCC template argument substitution bug. - : public basic_format_args -#endif -{ - private: - using char_type = typename Context::char_type; - - template struct need_copy { - static constexpr internal::type mapped_type = - internal::mapped_type_constant::value; - - enum { - value = !(internal::is_reference_wrapper::value || - std::is_same>::value || - std::is_same>::value || - (mapped_type != internal::type::cstring_type && - mapped_type != internal::type::string_type && - mapped_type != internal::type::custom_type && - mapped_type != internal::type::named_arg_type)) - }; - }; - - template - using stored_type = conditional_t::value, - std::basic_string, T>; - - // Storage of basic_format_arg must be contiguous. - std::vector> data_; - - // Storage of arguments not fitting into basic_format_arg must grow - // without relocation because items in data_ refer to it. - internal::dynamic_arg_list dynamic_args_; - - friend class basic_format_args; - - unsigned long long get_types() const { - return internal::is_unpacked_bit | data_.size(); - } - - template void emplace_arg(const T& arg) { - data_.emplace_back(internal::make_arg(arg)); - } - - public: - /** - \rst - Adds an argument into the dynamic store for later passing to a formating - function. - - Note that custom types and string types (but not string views!) are copied - into the store with dynamic memory (in addition to resizing vector). - - **Example**:: - - fmt::dynamic_format_arg_store store; - store.push_back(42); - store.push_back("abc"); - store.push_back(1.5f); - std::string result = fmt::vformat("{} and {} and {}", store); - \endrst - */ - template void push_back(const T& arg) { - static_assert( - !std::is_base_of, T>::value, - "named arguments are not supported yet"); - if (internal::const_check(need_copy::value)) - emplace_arg(dynamic_args_.push>(arg)); - else - emplace_arg(arg); - } - - /** - Adds a reference to the argument into the dynamic store for later passing to - a formating function. - */ - template void push_back(std::reference_wrapper arg) { - static_assert( - need_copy::value, - "objects of built-in types and string views are always copied"); - emplace_arg(arg.get()); - } -}; - -/** - \rst - A view of a collection of formatting arguments. To avoid lifetime issues it - should only be used as a parameter type in type-erased functions such as - ``vformat``:: - - void vlog(string_view format_str, format_args args); // OK - format_args args = make_format_args(42); // Error: dangling reference - \endrst - */ -template class basic_format_args { - public: - using size_type = int; - using format_arg = basic_format_arg; - - private: - // To reduce compiled code size per formatting function call, types of first - // max_packed_args arguments are passed in the types_ field. - unsigned long long types_; - union { - // If the number of arguments is less than max_packed_args, the argument - // values are stored in values_, otherwise they are stored in args_. - // This is done to reduce compiled code size as storing larger objects - // may require more code (at least on x86-64) even if the same amount of - // data is actually copied to stack. It saves ~10% on the bloat test. - const internal::value* values_; - const format_arg* args_; - }; - - bool is_packed() const { return (types_ & internal::is_unpacked_bit) == 0; } - - internal::type type(int index) const { - int shift = index * internal::packed_arg_bits; - unsigned int mask = (1 << internal::packed_arg_bits) - 1; - return static_cast((types_ >> shift) & mask); - } - - friend class internal::arg_map; - - void set_data(const internal::value* values) { values_ = values; } - void set_data(const format_arg* args) { args_ = args; } - - format_arg do_get(int index) const { - format_arg arg; - if (!is_packed()) { - auto num_args = max_size(); - if (index < num_args) arg = args_[index]; - return arg; - } - if (index > internal::max_packed_args) return arg; - arg.type_ = type(index); - if (arg.type_ == internal::type::none_type) return arg; - internal::value& val = arg.value_; - val = values_[index]; - return arg; - } - - public: - basic_format_args() : types_(0) {} - - /** - \rst - Constructs a `basic_format_args` object from `~fmt::format_arg_store`. - \endrst - */ - template - basic_format_args(const format_arg_store& store) - : types_(store.types) { - set_data(store.data_); - } - - /** - \rst - Constructs a `basic_format_args` object from - `~fmt::dynamic_format_arg_store`. - \endrst - */ - basic_format_args(const dynamic_format_arg_store& store) - : types_(store.get_types()) { - set_data(store.data_.data()); - } - - /** - \rst - Constructs a `basic_format_args` object from a dynamic set of arguments. - \endrst - */ - basic_format_args(const format_arg* args, int count) - : types_(internal::is_unpacked_bit | internal::to_unsigned(count)) { - set_data(args); - } - - /** Returns the argument at specified index. */ - format_arg get(int index) const { - format_arg arg = do_get(index); - if (arg.type_ == internal::type::named_arg_type) - arg = arg.value_.named_arg->template deserialize(); - return arg; - } - - int max_size() const { - unsigned long long max_packed = internal::max_packed_args; - return static_cast(is_packed() ? max_packed - : types_ & ~internal::is_unpacked_bit); - } -}; - -/** An alias to ``basic_format_args``. */ -// It is a separate type rather than an alias to make symbols readable. -struct format_args : basic_format_args { - template - format_args(Args&&... args) - : basic_format_args(static_cast(args)...) {} -}; -struct wformat_args : basic_format_args { - template - wformat_args(Args&&... args) - : basic_format_args(static_cast(args)...) {} -}; - -template struct is_contiguous : std::false_type {}; - -template -struct is_contiguous> : std::true_type {}; - -template -struct is_contiguous> : std::true_type {}; - -namespace internal { - -template -struct is_contiguous_back_insert_iterator : std::false_type {}; -template -struct is_contiguous_back_insert_iterator> - : is_contiguous {}; - -template struct named_arg_base { - basic_string_view name; - - // Serialized value. - mutable char data[sizeof(basic_format_arg>)]; - - named_arg_base(basic_string_view nm) : name(nm) {} - - template basic_format_arg deserialize() const { - basic_format_arg arg; - std::memcpy(&arg, data, sizeof(basic_format_arg)); - return arg; - } -}; - -struct view {}; - -template -struct named_arg : view, named_arg_base { - const T& value; - - named_arg(basic_string_view name, const T& val) - : named_arg_base(name), value(val) {} -}; - -template ::value)> -inline void check_format_string(const S&) { -#if defined(FMT_ENFORCE_COMPILE_STRING) - static_assert(is_compile_string::value, - "FMT_ENFORCE_COMPILE_STRING requires all format strings to " - "utilize FMT_STRING() or fmt()."); -#endif -} -template ::value)> -void check_format_string(S); - -template struct bool_pack; -template -using all_true = - std::is_same, bool_pack>; - -template > -inline format_arg_store, remove_reference_t...> -make_args_checked(const S& format_str, - const remove_reference_t&... args) { - static_assert( - all_true<(!std::is_base_of>::value || - !std::is_reference::value)...>::value, - "passing views as lvalues is disallowed"); - check_format_string(format_str); - return {args...}; -} - -template -std::basic_string vformat( - basic_string_view format_str, - basic_format_args>> args); - -template -typename buffer_context::iterator vformat_to( - buffer& buf, basic_string_view format_str, - basic_format_args>> args); - -template ::value)> -inline void vprint_mojibake(std::FILE*, basic_string_view, const Args&) {} - -FMT_API void vprint_mojibake(std::FILE*, string_view, format_args); -#ifndef _WIN32 -inline void vprint_mojibake(std::FILE*, string_view, format_args) {} -#endif -} // namespace internal - -/** - \rst - Returns a named argument to be used in a formatting function. It should only - be used in a call to a formatting function. - - **Example**:: - - fmt::print("Elapsed time: {s:.2f} seconds", fmt::arg("s", 1.23)); - \endrst - */ -template > -inline internal::named_arg arg(const S& name, const T& arg) { - static_assert(internal::is_string::value, ""); - return {name, arg}; -} - -// Disable nested named arguments, e.g. ``arg("a", arg("b", 42))``. -template -void arg(S, internal::named_arg) = delete; - -/** Formats a string and writes the output to ``out``. */ -// GCC 8 and earlier cannot handle std::back_insert_iterator with -// vformat_to(...) overload, so SFINAE on iterator type instead. -template , - FMT_ENABLE_IF( - internal::is_contiguous_back_insert_iterator::value)> -OutputIt vformat_to( - OutputIt out, const S& format_str, - basic_format_args>> args) { - using container = remove_reference_t; - internal::container_buffer buf((internal::get_container(out))); - internal::vformat_to(buf, to_string_view(format_str), args); - return out; -} - -template ::value&& internal::is_string::value)> -inline std::back_insert_iterator format_to( - std::back_insert_iterator out, const S& format_str, - Args&&... args) { - return vformat_to(out, to_string_view(format_str), - internal::make_args_checked(format_str, args...)); -} - -template > -inline std::basic_string vformat( - const S& format_str, - basic_format_args>> args) { - return internal::vformat(to_string_view(format_str), args); -} - -/** - \rst - Formats arguments and returns the result as a string. - - **Example**:: - - #include - std::string message = fmt::format("The answer is {}", 42); - \endrst -*/ -// Pass char_t as a default template parameter instead of using -// std::basic_string> to reduce the symbol size. -template > -inline std::basic_string format(const S& format_str, Args&&... args) { - return internal::vformat( - to_string_view(format_str), - internal::make_args_checked(format_str, args...)); -} - -FMT_API void vprint(string_view, format_args); -FMT_API void vprint(std::FILE*, string_view, format_args); - -/** - \rst - Formats ``args`` according to specifications in ``format_str`` and writes the - output to the file ``f``. Strings are assumed to be Unicode-encoded unless the - ``FMT_UNICODE`` macro is set to 0. - - **Example**:: - - fmt::print(stderr, "Don't {}!", "panic"); - \endrst - */ -template > -inline void print(std::FILE* f, const S& format_str, Args&&... args) { - return internal::is_unicode() - ? vprint(f, to_string_view(format_str), - internal::make_args_checked(format_str, args...)) - : internal::vprint_mojibake( - f, to_string_view(format_str), - internal::make_args_checked(format_str, args...)); -} - -/** - \rst - Formats ``args`` according to specifications in ``format_str`` and writes - the output to ``stdout``. Strings are assumed to be Unicode-encoded unless - the ``FMT_UNICODE`` macro is set to 0. - - **Example**:: - - fmt::print("Elapsed time: {0:.2f} seconds", 1.23); - \endrst - */ -template > -inline void print(const S& format_str, Args&&... args) { - return internal::is_unicode() - ? vprint(to_string_view(format_str), - internal::make_args_checked(format_str, args...)) - : internal::vprint_mojibake( - stdout, to_string_view(format_str), - internal::make_args_checked(format_str, args...)); -} -FMT_END_NAMESPACE - -#endif // FMT_CORE_H_ diff --git a/kernel_generator/third_party/fmt/include/fmt/format-inl.h b/kernel_generator/third_party/fmt/include/fmt/format-inl.h deleted file mode 100644 index f632714..0000000 --- a/kernel_generator/third_party/fmt/include/fmt/format-inl.h +++ /dev/null @@ -1,1403 +0,0 @@ -// Formatting library for C++ - implementation -// -// Copyright (c) 2012 - 2016, Victor Zverovich -// All rights reserved. -// -// For the license information refer to format.h. - -#ifndef FMT_FORMAT_INL_H_ -#define FMT_FORMAT_INL_H_ - -#include -#include -#include -#include -#include -#include // for std::memmove -#include - -#include "format.h" -#if !defined(FMT_STATIC_THOUSANDS_SEPARATOR) -# include -#endif - -#ifdef _WIN32 -# include -# include -#endif - -#ifdef _MSC_VER -# pragma warning(push) -# pragma warning(disable : 4702) // unreachable code -#endif - -// Dummy implementations of strerror_r and strerror_s called if corresponding -// system functions are not available. -inline fmt::internal::null<> strerror_r(int, char*, ...) { return {}; } -inline fmt::internal::null<> strerror_s(char*, std::size_t, ...) { return {}; } - -FMT_BEGIN_NAMESPACE -namespace internal { - -FMT_FUNC void assert_fail(const char* file, int line, const char* message) { - print(stderr, "{}:{}: assertion failed: {}", file, line, message); - std::abort(); -} - -#ifndef _MSC_VER -# define FMT_SNPRINTF snprintf -#else // _MSC_VER -inline int fmt_snprintf(char* buffer, size_t size, const char* format, ...) { - va_list args; - va_start(args, format); - int result = vsnprintf_s(buffer, size, _TRUNCATE, format, args); - va_end(args); - return result; -} -# define FMT_SNPRINTF fmt_snprintf -#endif // _MSC_VER - -// A portable thread-safe version of strerror. -// Sets buffer to point to a string describing the error code. -// This can be either a pointer to a string stored in buffer, -// or a pointer to some static immutable string. -// Returns one of the following values: -// 0 - success -// ERANGE - buffer is not large enough to store the error message -// other - failure -// Buffer should be at least of size 1. -FMT_FUNC int safe_strerror(int error_code, char*& buffer, - std::size_t buffer_size) FMT_NOEXCEPT { - FMT_ASSERT(buffer != nullptr && buffer_size != 0, "invalid buffer"); - - class dispatcher { - private: - int error_code_; - char*& buffer_; - std::size_t buffer_size_; - - // A noop assignment operator to avoid bogus warnings. - void operator=(const dispatcher&) {} - - // Handle the result of XSI-compliant version of strerror_r. - int handle(int result) { - // glibc versions before 2.13 return result in errno. - return result == -1 ? errno : result; - } - - // Handle the result of GNU-specific version of strerror_r. - FMT_MAYBE_UNUSED - int handle(char* message) { - // If the buffer is full then the message is probably truncated. - if (message == buffer_ && strlen(buffer_) == buffer_size_ - 1) - return ERANGE; - buffer_ = message; - return 0; - } - - // Handle the case when strerror_r is not available. - FMT_MAYBE_UNUSED - int handle(internal::null<>) { - return fallback(strerror_s(buffer_, buffer_size_, error_code_)); - } - - // Fallback to strerror_s when strerror_r is not available. - FMT_MAYBE_UNUSED - int fallback(int result) { - // If the buffer is full then the message is probably truncated. - return result == 0 && strlen(buffer_) == buffer_size_ - 1 ? ERANGE - : result; - } - -#if !FMT_MSC_VER - // Fallback to strerror if strerror_r and strerror_s are not available. - int fallback(internal::null<>) { - errno = 0; - buffer_ = strerror(error_code_); - return errno; - } -#endif - - public: - dispatcher(int err_code, char*& buf, std::size_t buf_size) - : error_code_(err_code), buffer_(buf), buffer_size_(buf_size) {} - - int run() { return handle(strerror_r(error_code_, buffer_, buffer_size_)); } - }; - return dispatcher(error_code, buffer, buffer_size).run(); -} - -FMT_FUNC void format_error_code(internal::buffer& out, int error_code, - string_view message) FMT_NOEXCEPT { - // Report error code making sure that the output fits into - // inline_buffer_size to avoid dynamic memory allocation and potential - // bad_alloc. - out.resize(0); - static const char SEP[] = ": "; - static const char ERROR_STR[] = "error "; - // Subtract 2 to account for terminating null characters in SEP and ERROR_STR. - std::size_t error_code_size = sizeof(SEP) + sizeof(ERROR_STR) - 2; - auto abs_value = static_cast>(error_code); - if (internal::is_negative(error_code)) { - abs_value = 0 - abs_value; - ++error_code_size; - } - error_code_size += internal::to_unsigned(internal::count_digits(abs_value)); - internal::writer w(out); - if (message.size() <= inline_buffer_size - error_code_size) { - w.write(message); - w.write(SEP); - } - w.write(ERROR_STR); - w.write(error_code); - assert(out.size() <= inline_buffer_size); -} - -FMT_FUNC void report_error(format_func func, int error_code, - string_view message) FMT_NOEXCEPT { - memory_buffer full_message; - func(full_message, error_code, message); - // Don't use fwrite_fully because the latter may throw. - (void)std::fwrite(full_message.data(), full_message.size(), 1, stderr); - std::fputc('\n', stderr); -} - -// A wrapper around fwrite that throws on error. -FMT_FUNC void fwrite_fully(const void* ptr, size_t size, size_t count, - FILE* stream) { - size_t written = std::fwrite(ptr, size, count, stream); - if (written < count) FMT_THROW(system_error(errno, "cannot write to file")); -} -} // namespace internal - -#if !defined(FMT_STATIC_THOUSANDS_SEPARATOR) -namespace internal { - -template -locale_ref::locale_ref(const Locale& loc) : locale_(&loc) { - static_assert(std::is_same::value, ""); -} - -template Locale locale_ref::get() const { - static_assert(std::is_same::value, ""); - return locale_ ? *static_cast(locale_) : std::locale(); -} - -template FMT_FUNC std::string grouping_impl(locale_ref loc) { - return std::use_facet>(loc.get()).grouping(); -} -template FMT_FUNC Char thousands_sep_impl(locale_ref loc) { - return std::use_facet>(loc.get()) - .thousands_sep(); -} -template FMT_FUNC Char decimal_point_impl(locale_ref loc) { - return std::use_facet>(loc.get()) - .decimal_point(); -} -} // namespace internal -#else -template -FMT_FUNC std::string internal::grouping_impl(locale_ref) { - return "\03"; -} -template -FMT_FUNC Char internal::thousands_sep_impl(locale_ref) { - return FMT_STATIC_THOUSANDS_SEPARATOR; -} -template -FMT_FUNC Char internal::decimal_point_impl(locale_ref) { - return '.'; -} -#endif - -FMT_API FMT_FUNC format_error::~format_error() FMT_NOEXCEPT = default; -FMT_API FMT_FUNC system_error::~system_error() FMT_NOEXCEPT = default; - -FMT_FUNC void system_error::init(int err_code, string_view format_str, - format_args args) { - error_code_ = err_code; - memory_buffer buffer; - format_system_error(buffer, err_code, vformat(format_str, args)); - std::runtime_error& base = *this; - base = std::runtime_error(to_string(buffer)); -} - -namespace internal { - -template <> FMT_FUNC int count_digits<4>(internal::fallback_uintptr n) { - // fallback_uintptr is always stored in little endian. - int i = static_cast(sizeof(void*)) - 1; - while (i > 0 && n.value[i] == 0) --i; - auto char_digits = std::numeric_limits::digits / 4; - return i >= 0 ? i * char_digits + count_digits<4, unsigned>(n.value[i]) : 1; -} - -template -const char basic_data::digits[] = - "0001020304050607080910111213141516171819" - "2021222324252627282930313233343536373839" - "4041424344454647484950515253545556575859" - "6061626364656667686970717273747576777879" - "8081828384858687888990919293949596979899"; - -template -const char basic_data::hex_digits[] = "0123456789abcdef"; - -#define FMT_POWERS_OF_10(factor) \ - factor * 10, (factor)*100, (factor)*1000, (factor)*10000, (factor)*100000, \ - (factor)*1000000, (factor)*10000000, (factor)*100000000, \ - (factor)*1000000000 - -template -const uint64_t basic_data::powers_of_10_64[] = { - 1, FMT_POWERS_OF_10(1), FMT_POWERS_OF_10(1000000000ULL), - 10000000000000000000ULL}; - -template -const uint32_t basic_data::zero_or_powers_of_10_32[] = {0, - FMT_POWERS_OF_10(1)}; - -template -const uint64_t basic_data::zero_or_powers_of_10_64[] = { - 0, FMT_POWERS_OF_10(1), FMT_POWERS_OF_10(1000000000ULL), - 10000000000000000000ULL}; - -// Normalized 64-bit significands of pow(10, k), for k = -348, -340, ..., 340. -// These are generated by support/compute-powers.py. -template -const uint64_t basic_data::pow10_significands[] = { - 0xfa8fd5a0081c0288, 0xbaaee17fa23ebf76, 0x8b16fb203055ac76, - 0xcf42894a5dce35ea, 0x9a6bb0aa55653b2d, 0xe61acf033d1a45df, - 0xab70fe17c79ac6ca, 0xff77b1fcbebcdc4f, 0xbe5691ef416bd60c, - 0x8dd01fad907ffc3c, 0xd3515c2831559a83, 0x9d71ac8fada6c9b5, - 0xea9c227723ee8bcb, 0xaecc49914078536d, 0x823c12795db6ce57, - 0xc21094364dfb5637, 0x9096ea6f3848984f, 0xd77485cb25823ac7, - 0xa086cfcd97bf97f4, 0xef340a98172aace5, 0xb23867fb2a35b28e, - 0x84c8d4dfd2c63f3b, 0xc5dd44271ad3cdba, 0x936b9fcebb25c996, - 0xdbac6c247d62a584, 0xa3ab66580d5fdaf6, 0xf3e2f893dec3f126, - 0xb5b5ada8aaff80b8, 0x87625f056c7c4a8b, 0xc9bcff6034c13053, - 0x964e858c91ba2655, 0xdff9772470297ebd, 0xa6dfbd9fb8e5b88f, - 0xf8a95fcf88747d94, 0xb94470938fa89bcf, 0x8a08f0f8bf0f156b, - 0xcdb02555653131b6, 0x993fe2c6d07b7fac, 0xe45c10c42a2b3b06, - 0xaa242499697392d3, 0xfd87b5f28300ca0e, 0xbce5086492111aeb, - 0x8cbccc096f5088cc, 0xd1b71758e219652c, 0x9c40000000000000, - 0xe8d4a51000000000, 0xad78ebc5ac620000, 0x813f3978f8940984, - 0xc097ce7bc90715b3, 0x8f7e32ce7bea5c70, 0xd5d238a4abe98068, - 0x9f4f2726179a2245, 0xed63a231d4c4fb27, 0xb0de65388cc8ada8, - 0x83c7088e1aab65db, 0xc45d1df942711d9a, 0x924d692ca61be758, - 0xda01ee641a708dea, 0xa26da3999aef774a, 0xf209787bb47d6b85, - 0xb454e4a179dd1877, 0x865b86925b9bc5c2, 0xc83553c5c8965d3d, - 0x952ab45cfa97a0b3, 0xde469fbd99a05fe3, 0xa59bc234db398c25, - 0xf6c69a72a3989f5c, 0xb7dcbf5354e9bece, 0x88fcf317f22241e2, - 0xcc20ce9bd35c78a5, 0x98165af37b2153df, 0xe2a0b5dc971f303a, - 0xa8d9d1535ce3b396, 0xfb9b7cd9a4a7443c, 0xbb764c4ca7a44410, - 0x8bab8eefb6409c1a, 0xd01fef10a657842c, 0x9b10a4e5e9913129, - 0xe7109bfba19c0c9d, 0xac2820d9623bf429, 0x80444b5e7aa7cf85, - 0xbf21e44003acdd2d, 0x8e679c2f5e44ff8f, 0xd433179d9c8cb841, - 0x9e19db92b4e31ba9, 0xeb96bf6ebadf77d9, 0xaf87023b9bf0ee6b, -}; - -// Binary exponents of pow(10, k), for k = -348, -340, ..., 340, corresponding -// to significands above. -template -const int16_t basic_data::pow10_exponents[] = { - -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, -954, - -927, -901, -874, -847, -821, -794, -768, -741, -715, -688, -661, - -635, -608, -582, -555, -529, -502, -475, -449, -422, -396, -369, - -343, -316, -289, -263, -236, -210, -183, -157, -130, -103, -77, - -50, -24, 3, 30, 56, 83, 109, 136, 162, 189, 216, - 242, 269, 295, 322, 348, 375, 402, 428, 455, 481, 508, - 534, 561, 588, 614, 641, 667, 694, 720, 747, 774, 800, - 827, 853, 880, 907, 933, 960, 986, 1013, 1039, 1066}; - -template -const char basic_data::foreground_color[] = "\x1b[38;2;"; -template -const char basic_data::background_color[] = "\x1b[48;2;"; -template const char basic_data::reset_color[] = "\x1b[0m"; -template const wchar_t basic_data::wreset_color[] = L"\x1b[0m"; -template const char basic_data::signs[] = {0, '-', '+', ' '}; - -template struct bits { - static FMT_CONSTEXPR_DECL const int value = - static_cast(sizeof(T) * std::numeric_limits::digits); -}; - -class fp; -template fp normalize(fp value); - -// Lower (upper) boundary is a value half way between a floating-point value -// and its predecessor (successor). Boundaries have the same exponent as the -// value so only significands are stored. -struct boundaries { - uint64_t lower; - uint64_t upper; -}; - -// A handmade floating-point number f * pow(2, e). -class fp { - private: - using significand_type = uint64_t; - - public: - significand_type f; - int e; - - // All sizes are in bits. - // Subtract 1 to account for an implicit most significant bit in the - // normalized form. - static FMT_CONSTEXPR_DECL const int double_significand_size = - std::numeric_limits::digits - 1; - static FMT_CONSTEXPR_DECL const uint64_t implicit_bit = - 1ULL << double_significand_size; - static FMT_CONSTEXPR_DECL const int significand_size = - bits::value; - - fp() : f(0), e(0) {} - fp(uint64_t f_val, int e_val) : f(f_val), e(e_val) {} - - // Constructs fp from an IEEE754 double. It is a template to prevent compile - // errors on platforms where double is not IEEE754. - template explicit fp(Double d) { assign(d); } - - // Assigns d to this and return true iff predecessor is closer than successor. - template - bool assign(Double d) { - // Assume double is in the format [sign][exponent][significand]. - using limits = std::numeric_limits; - const int exponent_size = - bits::value - double_significand_size - 1; // -1 for sign - const uint64_t significand_mask = implicit_bit - 1; - const uint64_t exponent_mask = (~0ULL >> 1) & ~significand_mask; - const int exponent_bias = (1 << exponent_size) - limits::max_exponent - 1; - auto u = bit_cast(d); - f = u & significand_mask; - int biased_e = - static_cast((u & exponent_mask) >> double_significand_size); - // Predecessor is closer if d is a normalized power of 2 (f == 0) other than - // the smallest normalized number (biased_e > 1). - bool is_predecessor_closer = f == 0 && biased_e > 1; - if (biased_e != 0) - f += implicit_bit; - else - biased_e = 1; // Subnormals use biased exponent 1 (min exponent). - e = biased_e - exponent_bias - double_significand_size; - return is_predecessor_closer; - } - - template - bool assign(Double) { - *this = fp(); - return false; - } - - // Assigns d to this together with computing lower and upper boundaries, - // where a boundary is a value half way between the number and its predecessor - // (lower) or successor (upper). The upper boundary is normalized and lower - // has the same exponent but may be not normalized. - template boundaries assign_with_boundaries(Double d) { - bool is_lower_closer = assign(d); - fp lower = - is_lower_closer ? fp((f << 2) - 1, e - 2) : fp((f << 1) - 1, e - 1); - // 1 in normalize accounts for the exponent shift above. - fp upper = normalize<1>(fp((f << 1) + 1, e - 1)); - lower.f <<= lower.e - upper.e; - return boundaries{lower.f, upper.f}; - } - - template boundaries assign_float_with_boundaries(Double d) { - assign(d); - constexpr int min_normal_e = std::numeric_limits::min_exponent - - std::numeric_limits::digits; - significand_type half_ulp = 1 << (std::numeric_limits::digits - - std::numeric_limits::digits - 1); - if (min_normal_e > e) half_ulp <<= min_normal_e - e; - fp upper = normalize<0>(fp(f + half_ulp, e)); - fp lower = fp( - f - (half_ulp >> ((f == implicit_bit && e > min_normal_e) ? 1 : 0)), e); - lower.f <<= lower.e - upper.e; - return boundaries{lower.f, upper.f}; - } -}; - -// Normalizes the value converted from double and multiplied by (1 << SHIFT). -template fp normalize(fp value) { - // Handle subnormals. - const auto shifted_implicit_bit = fp::implicit_bit << SHIFT; - while ((value.f & shifted_implicit_bit) == 0) { - value.f <<= 1; - --value.e; - } - // Subtract 1 to account for hidden bit. - const auto offset = - fp::significand_size - fp::double_significand_size - SHIFT - 1; - value.f <<= offset; - value.e -= offset; - return value; -} - -inline bool operator==(fp x, fp y) { return x.f == y.f && x.e == y.e; } - -// Computes lhs * rhs / pow(2, 64) rounded to nearest with half-up tie breaking. -inline uint64_t multiply(uint64_t lhs, uint64_t rhs) { -#if FMT_USE_INT128 - auto product = static_cast<__uint128_t>(lhs) * rhs; - auto f = static_cast(product >> 64); - return (static_cast(product) & (1ULL << 63)) != 0 ? f + 1 : f; -#else - // Multiply 32-bit parts of significands. - uint64_t mask = (1ULL << 32) - 1; - uint64_t a = lhs >> 32, b = lhs & mask; - uint64_t c = rhs >> 32, d = rhs & mask; - uint64_t ac = a * c, bc = b * c, ad = a * d, bd = b * d; - // Compute mid 64-bit of result and round. - uint64_t mid = (bd >> 32) + (ad & mask) + (bc & mask) + (1U << 31); - return ac + (ad >> 32) + (bc >> 32) + (mid >> 32); -#endif -} - -inline fp operator*(fp x, fp y) { return {multiply(x.f, y.f), x.e + y.e + 64}; } - -// Returns a cached power of 10 `c_k = c_k.f * pow(2, c_k.e)` such that its -// (binary) exponent satisfies `min_exponent <= c_k.e <= min_exponent + 28`. -inline fp get_cached_power(int min_exponent, int& pow10_exponent) { - const int64_t one_over_log2_10 = 0x4d104d42; // round(pow(2, 32) / log2(10)) - int index = static_cast( - ((min_exponent + fp::significand_size - 1) * one_over_log2_10 + - ((int64_t(1) << 32) - 1)) // ceil - >> 32 // arithmetic shift - ); - // Decimal exponent of the first (smallest) cached power of 10. - const int first_dec_exp = -348; - // Difference between 2 consecutive decimal exponents in cached powers of 10. - const int dec_exp_step = 8; - index = (index - first_dec_exp - 1) / dec_exp_step + 1; - pow10_exponent = first_dec_exp + index * dec_exp_step; - return {data::pow10_significands[index], data::pow10_exponents[index]}; -} - -// A simple accumulator to hold the sums of terms in bigint::square if uint128_t -// is not available. -struct accumulator { - uint64_t lower; - uint64_t upper; - - accumulator() : lower(0), upper(0) {} - explicit operator uint32_t() const { return static_cast(lower); } - - void operator+=(uint64_t n) { - lower += n; - if (lower < n) ++upper; - } - void operator>>=(int shift) { - assert(shift == 32); - (void)shift; - lower = (upper << 32) | (lower >> 32); - upper >>= 32; - } -}; - -class bigint { - private: - // A bigint is stored as an array of bigits (big digits), with bigit at index - // 0 being the least significant one. - using bigit = uint32_t; - using double_bigit = uint64_t; - enum { bigits_capacity = 32 }; - basic_memory_buffer bigits_; - int exp_; - - bigit operator[](int index) const { return bigits_[to_unsigned(index)]; } - bigit& operator[](int index) { return bigits_[to_unsigned(index)]; } - - static FMT_CONSTEXPR_DECL const int bigit_bits = bits::value; - - friend struct formatter; - - void subtract_bigits(int index, bigit other, bigit& borrow) { - auto result = static_cast((*this)[index]) - other - borrow; - (*this)[index] = static_cast(result); - borrow = static_cast(result >> (bigit_bits * 2 - 1)); - } - - void remove_leading_zeros() { - int num_bigits = static_cast(bigits_.size()) - 1; - while (num_bigits > 0 && (*this)[num_bigits] == 0) --num_bigits; - bigits_.resize(to_unsigned(num_bigits + 1)); - } - - // Computes *this -= other assuming aligned bigints and *this >= other. - void subtract_aligned(const bigint& other) { - FMT_ASSERT(other.exp_ >= exp_, "unaligned bigints"); - FMT_ASSERT(compare(*this, other) >= 0, ""); - bigit borrow = 0; - int i = other.exp_ - exp_; - for (size_t j = 0, n = other.bigits_.size(); j != n; ++i, ++j) { - subtract_bigits(i, other.bigits_[j], borrow); - } - while (borrow > 0) subtract_bigits(i, 0, borrow); - remove_leading_zeros(); - } - - void multiply(uint32_t value) { - const double_bigit wide_value = value; - bigit carry = 0; - for (size_t i = 0, n = bigits_.size(); i < n; ++i) { - double_bigit result = bigits_[i] * wide_value + carry; - bigits_[i] = static_cast(result); - carry = static_cast(result >> bigit_bits); - } - if (carry != 0) bigits_.push_back(carry); - } - - void multiply(uint64_t value) { - const bigit mask = ~bigit(0); - const double_bigit lower = value & mask; - const double_bigit upper = value >> bigit_bits; - double_bigit carry = 0; - for (size_t i = 0, n = bigits_.size(); i < n; ++i) { - double_bigit result = bigits_[i] * lower + (carry & mask); - carry = - bigits_[i] * upper + (result >> bigit_bits) + (carry >> bigit_bits); - bigits_[i] = static_cast(result); - } - while (carry != 0) { - bigits_.push_back(carry & mask); - carry >>= bigit_bits; - } - } - - public: - bigint() : exp_(0) {} - explicit bigint(uint64_t n) { assign(n); } - ~bigint() { assert(bigits_.capacity() <= bigits_capacity); } - - bigint(const bigint&) = delete; - void operator=(const bigint&) = delete; - - void assign(const bigint& other) { - bigits_.resize(other.bigits_.size()); - auto data = other.bigits_.data(); - std::copy(data, data + other.bigits_.size(), bigits_.data()); - exp_ = other.exp_; - } - - void assign(uint64_t n) { - size_t num_bigits = 0; - do { - bigits_[num_bigits++] = n & ~bigit(0); - n >>= bigit_bits; - } while (n != 0); - bigits_.resize(num_bigits); - exp_ = 0; - } - - int num_bigits() const { return static_cast(bigits_.size()) + exp_; } - - bigint& operator<<=(int shift) { - assert(shift >= 0); - exp_ += shift / bigit_bits; - shift %= bigit_bits; - if (shift == 0) return *this; - bigit carry = 0; - for (size_t i = 0, n = bigits_.size(); i < n; ++i) { - bigit c = bigits_[i] >> (bigit_bits - shift); - bigits_[i] = (bigits_[i] << shift) + carry; - carry = c; - } - if (carry != 0) bigits_.push_back(carry); - return *this; - } - - template bigint& operator*=(Int value) { - FMT_ASSERT(value > 0, ""); - multiply(uint32_or_64_or_128_t(value)); - return *this; - } - - friend int compare(const bigint& lhs, const bigint& rhs) { - int num_lhs_bigits = lhs.num_bigits(), num_rhs_bigits = rhs.num_bigits(); - if (num_lhs_bigits != num_rhs_bigits) - return num_lhs_bigits > num_rhs_bigits ? 1 : -1; - int i = static_cast(lhs.bigits_.size()) - 1; - int j = static_cast(rhs.bigits_.size()) - 1; - int end = i - j; - if (end < 0) end = 0; - for (; i >= end; --i, --j) { - bigit lhs_bigit = lhs[i], rhs_bigit = rhs[j]; - if (lhs_bigit != rhs_bigit) return lhs_bigit > rhs_bigit ? 1 : -1; - } - if (i != j) return i > j ? 1 : -1; - return 0; - } - - // Returns compare(lhs1 + lhs2, rhs). - friend int add_compare(const bigint& lhs1, const bigint& lhs2, - const bigint& rhs) { - int max_lhs_bigits = (std::max)(lhs1.num_bigits(), lhs2.num_bigits()); - int num_rhs_bigits = rhs.num_bigits(); - if (max_lhs_bigits + 1 < num_rhs_bigits) return -1; - if (max_lhs_bigits > num_rhs_bigits) return 1; - auto get_bigit = [](const bigint& n, int i) -> bigit { - return i >= n.exp_ && i < n.num_bigits() ? n[i - n.exp_] : 0; - }; - double_bigit borrow = 0; - int min_exp = (std::min)((std::min)(lhs1.exp_, lhs2.exp_), rhs.exp_); - for (int i = num_rhs_bigits - 1; i >= min_exp; --i) { - double_bigit sum = - static_cast(get_bigit(lhs1, i)) + get_bigit(lhs2, i); - bigit rhs_bigit = get_bigit(rhs, i); - if (sum > rhs_bigit + borrow) return 1; - borrow = rhs_bigit + borrow - sum; - if (borrow > 1) return -1; - borrow <<= bigit_bits; - } - return borrow != 0 ? -1 : 0; - } - - // Assigns pow(10, exp) to this bigint. - void assign_pow10(int exp) { - assert(exp >= 0); - if (exp == 0) return assign(1); - // Find the top bit. - int bitmask = 1; - while (exp >= bitmask) bitmask <<= 1; - bitmask >>= 1; - // pow(10, exp) = pow(5, exp) * pow(2, exp). First compute pow(5, exp) by - // repeated squaring and multiplication. - assign(5); - bitmask >>= 1; - while (bitmask != 0) { - square(); - if ((exp & bitmask) != 0) *this *= 5; - bitmask >>= 1; - } - *this <<= exp; // Multiply by pow(2, exp) by shifting. - } - - void square() { - basic_memory_buffer n(std::move(bigits_)); - int num_bigits = static_cast(bigits_.size()); - int num_result_bigits = 2 * num_bigits; - bigits_.resize(to_unsigned(num_result_bigits)); - using accumulator_t = conditional_t; - auto sum = accumulator_t(); - for (int bigit_index = 0; bigit_index < num_bigits; ++bigit_index) { - // Compute bigit at position bigit_index of the result by adding - // cross-product terms n[i] * n[j] such that i + j == bigit_index. - for (int i = 0, j = bigit_index; j >= 0; ++i, --j) { - // Most terms are multiplied twice which can be optimized in the future. - sum += static_cast(n[i]) * n[j]; - } - (*this)[bigit_index] = static_cast(sum); - sum >>= bits::value; // Compute the carry. - } - // Do the same for the top half. - for (int bigit_index = num_bigits; bigit_index < num_result_bigits; - ++bigit_index) { - for (int j = num_bigits - 1, i = bigit_index - j; i < num_bigits;) - sum += static_cast(n[i++]) * n[j--]; - (*this)[bigit_index] = static_cast(sum); - sum >>= bits::value; - } - --num_result_bigits; - remove_leading_zeros(); - exp_ *= 2; - } - - // Divides this bignum by divisor, assigning the remainder to this and - // returning the quotient. - int divmod_assign(const bigint& divisor) { - FMT_ASSERT(this != &divisor, ""); - if (compare(*this, divisor) < 0) return 0; - int num_bigits = static_cast(bigits_.size()); - FMT_ASSERT(divisor.bigits_[divisor.bigits_.size() - 1u] != 0, ""); - int exp_difference = exp_ - divisor.exp_; - if (exp_difference > 0) { - // Align bigints by adding trailing zeros to simplify subtraction. - bigits_.resize(to_unsigned(num_bigits + exp_difference)); - for (int i = num_bigits - 1, j = i + exp_difference; i >= 0; --i, --j) - bigits_[j] = bigits_[i]; - std::uninitialized_fill_n(bigits_.data(), exp_difference, 0); - exp_ -= exp_difference; - } - int quotient = 0; - do { - subtract_aligned(divisor); - ++quotient; - } while (compare(*this, divisor) >= 0); - return quotient; - } -}; - -enum class round_direction { unknown, up, down }; - -// Given the divisor (normally a power of 10), the remainder = v % divisor for -// some number v and the error, returns whether v should be rounded up, down, or -// whether the rounding direction can't be determined due to error. -// error should be less than divisor / 2. -inline round_direction get_round_direction(uint64_t divisor, uint64_t remainder, - uint64_t error) { - FMT_ASSERT(remainder < divisor, ""); // divisor - remainder won't overflow. - FMT_ASSERT(error < divisor, ""); // divisor - error won't overflow. - FMT_ASSERT(error < divisor - error, ""); // error * 2 won't overflow. - // Round down if (remainder + error) * 2 <= divisor. - if (remainder <= divisor - remainder && error * 2 <= divisor - remainder * 2) - return round_direction::down; - // Round up if (remainder - error) * 2 >= divisor. - if (remainder >= error && - remainder - error >= divisor - (remainder - error)) { - return round_direction::up; - } - return round_direction::unknown; -} - -namespace digits { -enum result { - more, // Generate more digits. - done, // Done generating digits. - error // Digit generation cancelled due to an error. -}; -} - -// A version of count_digits optimized for grisu_gen_digits. -inline int grisu_count_digits(uint32_t n) { - if (n < 10) return 1; - if (n < 100) return 2; - if (n < 1000) return 3; - if (n < 10000) return 4; - if (n < 100000) return 5; - if (n < 1000000) return 6; - if (n < 10000000) return 7; - if (n < 100000000) return 8; - if (n < 1000000000) return 9; - return 10; -} - -// Generates output using the Grisu digit-gen algorithm. -// error: the size of the region (lower, upper) outside of which numbers -// definitely do not round to value (Delta in Grisu3). -template -FMT_ALWAYS_INLINE digits::result grisu_gen_digits(fp value, uint64_t error, - int& exp, Handler& handler) { - const fp one(1ULL << -value.e, value.e); - // The integral part of scaled value (p1 in Grisu) = value / one. It cannot be - // zero because it contains a product of two 64-bit numbers with MSB set (due - // to normalization) - 1, shifted right by at most 60 bits. - auto integral = static_cast(value.f >> -one.e); - FMT_ASSERT(integral != 0, ""); - FMT_ASSERT(integral == value.f >> -one.e, ""); - // The fractional part of scaled value (p2 in Grisu) c = value % one. - uint64_t fractional = value.f & (one.f - 1); - exp = grisu_count_digits(integral); // kappa in Grisu. - // Divide by 10 to prevent overflow. - auto result = handler.on_start(data::powers_of_10_64[exp - 1] << -one.e, - value.f / 10, error * 10, exp); - if (result != digits::more) return result; - // Generate digits for the integral part. This can produce up to 10 digits. - do { - uint32_t digit = 0; - auto divmod_integral = [&](uint32_t divisor) { - digit = integral / divisor; - integral %= divisor; - }; - // This optimization by Milo Yip reduces the number of integer divisions by - // one per iteration. - switch (exp) { - case 10: - divmod_integral(1000000000); - break; - case 9: - divmod_integral(100000000); - break; - case 8: - divmod_integral(10000000); - break; - case 7: - divmod_integral(1000000); - break; - case 6: - divmod_integral(100000); - break; - case 5: - divmod_integral(10000); - break; - case 4: - divmod_integral(1000); - break; - case 3: - divmod_integral(100); - break; - case 2: - divmod_integral(10); - break; - case 1: - digit = integral; - integral = 0; - break; - default: - FMT_ASSERT(false, "invalid number of digits"); - } - --exp; - uint64_t remainder = - (static_cast(integral) << -one.e) + fractional; - result = handler.on_digit(static_cast('0' + digit), - data::powers_of_10_64[exp] << -one.e, remainder, - error, exp, true); - if (result != digits::more) return result; - } while (exp > 0); - // Generate digits for the fractional part. - for (;;) { - fractional *= 10; - error *= 10; - char digit = - static_cast('0' + static_cast(fractional >> -one.e)); - fractional &= one.f - 1; - --exp; - result = handler.on_digit(digit, one.f, fractional, error, exp, false); - if (result != digits::more) return result; - } -} - -// The fixed precision digit handler. -struct fixed_handler { - char* buf; - int size; - int precision; - int exp10; - bool fixed; - - digits::result on_start(uint64_t divisor, uint64_t remainder, uint64_t error, - int& exp) { - // Non-fixed formats require at least one digit and no precision adjustment. - if (!fixed) return digits::more; - // Adjust fixed precision by exponent because it is relative to decimal - // point. - precision += exp + exp10; - // Check if precision is satisfied just by leading zeros, e.g. - // format("{:.2f}", 0.001) gives "0.00" without generating any digits. - if (precision > 0) return digits::more; - if (precision < 0) return digits::done; - auto dir = get_round_direction(divisor, remainder, error); - if (dir == round_direction::unknown) return digits::error; - buf[size++] = dir == round_direction::up ? '1' : '0'; - return digits::done; - } - - digits::result on_digit(char digit, uint64_t divisor, uint64_t remainder, - uint64_t error, int, bool integral) { - FMT_ASSERT(remainder < divisor, ""); - buf[size++] = digit; - if (size < precision) return digits::more; - if (!integral) { - // Check if error * 2 < divisor with overflow prevention. - // The check is not needed for the integral part because error = 1 - // and divisor > (1 << 32) there. - if (error >= divisor || error >= divisor - error) return digits::error; - } else { - FMT_ASSERT(error == 1 && divisor > 2, ""); - } - auto dir = get_round_direction(divisor, remainder, error); - if (dir != round_direction::up) - return dir == round_direction::down ? digits::done : digits::error; - ++buf[size - 1]; - for (int i = size - 1; i > 0 && buf[i] > '9'; --i) { - buf[i] = '0'; - ++buf[i - 1]; - } - if (buf[0] > '9') { - buf[0] = '1'; - buf[size++] = '0'; - } - return digits::done; - } -}; - -// The shortest representation digit handler. -struct grisu_shortest_handler { - char* buf; - int size; - // Distance between scaled value and upper bound (wp_W in Grisu3). - uint64_t diff; - - digits::result on_start(uint64_t, uint64_t, uint64_t, int&) { - return digits::more; - } - - // Decrement the generated number approaching value from above. - void round(uint64_t d, uint64_t divisor, uint64_t& remainder, - uint64_t error) { - while ( - remainder < d && error - remainder >= divisor && - (remainder + divisor < d || d - remainder >= remainder + divisor - d)) { - --buf[size - 1]; - remainder += divisor; - } - } - - // Implements Grisu's round_weed. - digits::result on_digit(char digit, uint64_t divisor, uint64_t remainder, - uint64_t error, int exp, bool integral) { - buf[size++] = digit; - if (remainder >= error) return digits::more; - uint64_t unit = integral ? 1 : data::powers_of_10_64[-exp]; - uint64_t up = (diff - 1) * unit; // wp_Wup - round(up, divisor, remainder, error); - uint64_t down = (diff + 1) * unit; // wp_Wdown - if (remainder < down && error - remainder >= divisor && - (remainder + divisor < down || - down - remainder > remainder + divisor - down)) { - return digits::error; - } - return 2 * unit <= remainder && remainder <= error - 4 * unit - ? digits::done - : digits::error; - } -}; - -// Formats value using a variation of the Fixed-Precision Positive -// Floating-Point Printout ((FPP)^2) algorithm by Steele & White: -// https://fmt.dev/p372-steele.pdf. -template -void fallback_format(Double d, buffer& buf, int& exp10) { - bigint numerator; // 2 * R in (FPP)^2. - bigint denominator; // 2 * S in (FPP)^2. - // lower and upper are differences between value and corresponding boundaries. - bigint lower; // (M^- in (FPP)^2). - bigint upper_store; // upper's value if different from lower. - bigint* upper = nullptr; // (M^+ in (FPP)^2). - fp value; - // Shift numerator and denominator by an extra bit or two (if lower boundary - // is closer) to make lower and upper integers. This eliminates multiplication - // by 2 during later computations. - // TODO: handle float - int shift = value.assign(d) ? 2 : 1; - uint64_t significand = value.f << shift; - if (value.e >= 0) { - numerator.assign(significand); - numerator <<= value.e; - lower.assign(1); - lower <<= value.e; - if (shift != 1) { - upper_store.assign(1); - upper_store <<= value.e + 1; - upper = &upper_store; - } - denominator.assign_pow10(exp10); - denominator <<= 1; - } else if (exp10 < 0) { - numerator.assign_pow10(-exp10); - lower.assign(numerator); - if (shift != 1) { - upper_store.assign(numerator); - upper_store <<= 1; - upper = &upper_store; - } - numerator *= significand; - denominator.assign(1); - denominator <<= shift - value.e; - } else { - numerator.assign(significand); - denominator.assign_pow10(exp10); - denominator <<= shift - value.e; - lower.assign(1); - if (shift != 1) { - upper_store.assign(1ULL << 1); - upper = &upper_store; - } - } - if (!upper) upper = &lower; - // Invariant: value == (numerator / denominator) * pow(10, exp10). - bool even = (value.f & 1) == 0; - int num_digits = 0; - char* data = buf.data(); - for (;;) { - int digit = numerator.divmod_assign(denominator); - bool low = compare(numerator, lower) - even < 0; // numerator <[=] lower. - // numerator + upper >[=] pow10: - bool high = add_compare(numerator, *upper, denominator) + even > 0; - data[num_digits++] = static_cast('0' + digit); - if (low || high) { - if (!low) { - ++data[num_digits - 1]; - } else if (high) { - int result = add_compare(numerator, numerator, denominator); - // Round half to even. - if (result > 0 || (result == 0 && (digit % 2) != 0)) - ++data[num_digits - 1]; - } - buf.resize(to_unsigned(num_digits)); - exp10 -= num_digits - 1; - return; - } - numerator *= 10; - lower *= 10; - if (upper != &lower) *upper *= 10; - } -} - -// Formats value using the Grisu algorithm -// (https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf) -// if T is a IEEE754 binary32 or binary64 and snprintf otherwise. -template -int format_float(T value, int precision, float_specs specs, buffer& buf) { - static_assert(!std::is_same::value, ""); - FMT_ASSERT(value >= 0, "value is negative"); - - const bool fixed = specs.format == float_format::fixed; - if (value <= 0) { // <= instead of == to silence a warning. - if (precision <= 0 || !fixed) { - buf.push_back('0'); - return 0; - } - buf.resize(to_unsigned(precision)); - std::uninitialized_fill_n(buf.data(), precision, '0'); - return -precision; - } - - if (!specs.use_grisu) return snprintf_float(value, precision, specs, buf); - - int exp = 0; - const int min_exp = -60; // alpha in Grisu. - int cached_exp10 = 0; // K in Grisu. - if (precision < 0) { - fp fp_value; - auto boundaries = specs.binary32 - ? fp_value.assign_float_with_boundaries(value) - : fp_value.assign_with_boundaries(value); - fp_value = normalize(fp_value); - // Find a cached power of 10 such that multiplying value by it will bring - // the exponent in the range [min_exp, -32]. - const fp cached_pow = get_cached_power( - min_exp - (fp_value.e + fp::significand_size), cached_exp10); - // Multiply value and boundaries by the cached power of 10. - fp_value = fp_value * cached_pow; - boundaries.lower = multiply(boundaries.lower, cached_pow.f); - boundaries.upper = multiply(boundaries.upper, cached_pow.f); - assert(min_exp <= fp_value.e && fp_value.e <= -32); - --boundaries.lower; // \tilde{M}^- - 1 ulp -> M^-_{\downarrow}. - ++boundaries.upper; // \tilde{M}^+ + 1 ulp -> M^+_{\uparrow}. - // Numbers outside of (lower, upper) definitely do not round to value. - grisu_shortest_handler handler{buf.data(), 0, - boundaries.upper - fp_value.f}; - auto result = - grisu_gen_digits(fp(boundaries.upper, fp_value.e), - boundaries.upper - boundaries.lower, exp, handler); - if (result == digits::error) { - exp += handler.size - cached_exp10 - 1; - fallback_format(value, buf, exp); - return exp; - } - buf.resize(to_unsigned(handler.size)); - } else { - if (precision > 17) return snprintf_float(value, precision, specs, buf); - fp normalized = normalize(fp(value)); - const auto cached_pow = get_cached_power( - min_exp - (normalized.e + fp::significand_size), cached_exp10); - normalized = normalized * cached_pow; - fixed_handler handler{buf.data(), 0, precision, -cached_exp10, fixed}; - if (grisu_gen_digits(normalized, 1, exp, handler) == digits::error) - return snprintf_float(value, precision, specs, buf); - int num_digits = handler.size; - if (!fixed) { - // Remove trailing zeros. - while (num_digits > 0 && buf[num_digits - 1] == '0') { - --num_digits; - ++exp; - } - } - buf.resize(to_unsigned(num_digits)); - } - return exp - cached_exp10; -} - -template -int snprintf_float(T value, int precision, float_specs specs, - buffer& buf) { - // Buffer capacity must be non-zero, otherwise MSVC's vsnprintf_s will fail. - FMT_ASSERT(buf.capacity() > buf.size(), "empty buffer"); - static_assert(!std::is_same::value, ""); - - // Subtract 1 to account for the difference in precision since we use %e for - // both general and exponent format. - if (specs.format == float_format::general || - specs.format == float_format::exp) - precision = (precision >= 0 ? precision : 6) - 1; - - // Build the format string. - enum { max_format_size = 7 }; // Ths longest format is "%#.*Le". - char format[max_format_size]; - char* format_ptr = format; - *format_ptr++ = '%'; - if (specs.showpoint && specs.format == float_format::hex) *format_ptr++ = '#'; - if (precision >= 0) { - *format_ptr++ = '.'; - *format_ptr++ = '*'; - } - if (std::is_same()) *format_ptr++ = 'L'; - *format_ptr++ = specs.format != float_format::hex - ? (specs.format == float_format::fixed ? 'f' : 'e') - : (specs.upper ? 'A' : 'a'); - *format_ptr = '\0'; - - // Format using snprintf. - auto offset = buf.size(); - for (;;) { - auto begin = buf.data() + offset; - auto capacity = buf.capacity() - offset; -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (precision > 100000) - throw std::runtime_error( - "fuzz mode - avoid large allocation inside snprintf"); -#endif - // Suppress the warning about a nonliteral format string. - // Cannot use auto becase of a bug in MinGW (#1532). - int (*snprintf_ptr)(char*, size_t, const char*, ...) = FMT_SNPRINTF; - int result = precision >= 0 - ? snprintf_ptr(begin, capacity, format, precision, value) - : snprintf_ptr(begin, capacity, format, value); - if (result < 0) { - buf.reserve(buf.capacity() + 1); // The buffer will grow exponentially. - continue; - } - auto size = to_unsigned(result); - // Size equal to capacity means that the last character was truncated. - if (size >= capacity) { - buf.reserve(size + offset + 1); // Add 1 for the terminating '\0'. - continue; - } - auto is_digit = [](char c) { return c >= '0' && c <= '9'; }; - if (specs.format == float_format::fixed) { - if (precision == 0) { - buf.resize(size); - return 0; - } - // Find and remove the decimal point. - auto end = begin + size, p = end; - do { - --p; - } while (is_digit(*p)); - int fraction_size = static_cast(end - p - 1); - std::memmove(p, p + 1, to_unsigned(fraction_size)); - buf.resize(size - 1); - return -fraction_size; - } - if (specs.format == float_format::hex) { - buf.resize(size + offset); - return 0; - } - // Find and parse the exponent. - auto end = begin + size, exp_pos = end; - do { - --exp_pos; - } while (*exp_pos != 'e'); - char sign = exp_pos[1]; - assert(sign == '+' || sign == '-'); - int exp = 0; - auto p = exp_pos + 2; // Skip 'e' and sign. - do { - assert(is_digit(*p)); - exp = exp * 10 + (*p++ - '0'); - } while (p != end); - if (sign == '-') exp = -exp; - int fraction_size = 0; - if (exp_pos != begin + 1) { - // Remove trailing zeros. - auto fraction_end = exp_pos - 1; - while (*fraction_end == '0') --fraction_end; - // Move the fractional part left to get rid of the decimal point. - fraction_size = static_cast(fraction_end - begin - 1); - std::memmove(begin + 1, begin + 2, to_unsigned(fraction_size)); - } - buf.resize(to_unsigned(fraction_size) + offset + 1); - return exp - fraction_size; - } -} - -// A public domain branchless UTF-8 decoder by Christopher Wellons: -// https://github.com/skeeto/branchless-utf8 -/* Decode the next character, c, from buf, reporting errors in e. - * - * Since this is a branchless decoder, four bytes will be read from the - * buffer regardless of the actual length of the next character. This - * means the buffer _must_ have at least three bytes of zero padding - * following the end of the data stream. - * - * Errors are reported in e, which will be non-zero if the parsed - * character was somehow invalid: invalid byte sequence, non-canonical - * encoding, or a surrogate half. - * - * The function returns a pointer to the next character. When an error - * occurs, this pointer will be a guess that depends on the particular - * error, but it will always advance at least one byte. - */ -FMT_FUNC const char* utf8_decode(const char* buf, uint32_t* c, int* e) { - static const char lengths[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 2, 2, 2, 2, 3, 3, 4, 0}; - static const int masks[] = {0x00, 0x7f, 0x1f, 0x0f, 0x07}; - static const uint32_t mins[] = {4194304, 0, 128, 2048, 65536}; - static const int shiftc[] = {0, 18, 12, 6, 0}; - static const int shifte[] = {0, 6, 4, 2, 0}; - - auto s = reinterpret_cast(buf); - int len = lengths[s[0] >> 3]; - - // Compute the pointer to the next character early so that the next - // iteration can start working on the next character. Neither Clang - // nor GCC figure out this reordering on their own. - const char* next = buf + len + !len; - - // Assume a four-byte character and load four bytes. Unused bits are - // shifted out. - *c = uint32_t(s[0] & masks[len]) << 18; - *c |= uint32_t(s[1] & 0x3f) << 12; - *c |= uint32_t(s[2] & 0x3f) << 6; - *c |= uint32_t(s[3] & 0x3f) << 0; - *c >>= shiftc[len]; - - // Accumulate the various error conditions. - *e = (*c < mins[len]) << 6; // non-canonical encoding - *e |= ((*c >> 11) == 0x1b) << 7; // surrogate half? - *e |= (*c > 0x10FFFF) << 8; // out of range? - *e |= (s[1] & 0xc0) >> 2; - *e |= (s[2] & 0xc0) >> 4; - *e |= (s[3]) >> 6; - *e ^= 0x2a; // top two bits of each tail byte correct? - *e >>= shifte[len]; - - return next; -} -} // namespace internal - -template <> struct formatter { - format_parse_context::iterator parse(format_parse_context& ctx) { - return ctx.begin(); - } - - format_context::iterator format(const internal::bigint& n, - format_context& ctx) { - auto out = ctx.out(); - bool first = true; - for (auto i = n.bigits_.size(); i > 0; --i) { - auto value = n.bigits_[i - 1u]; - if (first) { - out = format_to(out, "{:x}", value); - first = false; - continue; - } - out = format_to(out, "{:08x}", value); - } - if (n.exp_ > 0) - out = format_to(out, "p{}", n.exp_ * internal::bigint::bigit_bits); - return out; - } -}; - -FMT_FUNC internal::utf8_to_utf16::utf8_to_utf16(string_view s) { - auto transcode = [this](const char* p) { - auto cp = uint32_t(); - auto error = 0; - p = utf8_decode(p, &cp, &error); - if (error != 0) FMT_THROW(std::runtime_error("invalid utf8")); - if (cp <= 0xFFFF) { - buffer_.push_back(static_cast(cp)); - } else { - cp -= 0x10000; - buffer_.push_back(static_cast(0xD800 + (cp >> 10))); - buffer_.push_back(static_cast(0xDC00 + (cp & 0x3FF))); - } - return p; - }; - auto p = s.data(); - const size_t block_size = 4; // utf8_decode always reads blocks of 4 chars. - if (s.size() >= block_size) { - for (auto end = p + s.size() - block_size + 1; p < end;) p = transcode(p); - } - if (auto num_chars_left = s.data() + s.size() - p) { - char buf[2 * block_size - 1] = {}; - memcpy(buf, p, to_unsigned(num_chars_left)); - p = buf; - do { - p = transcode(p); - } while (p - buf < num_chars_left); - } - buffer_.push_back(0); -} - -FMT_FUNC void format_system_error(internal::buffer& out, int error_code, - string_view message) FMT_NOEXCEPT { - FMT_TRY { - memory_buffer buf; - buf.resize(inline_buffer_size); - for (;;) { - char* system_message = &buf[0]; - int result = - internal::safe_strerror(error_code, system_message, buf.size()); - if (result == 0) { - internal::writer w(out); - w.write(message); - w.write(": "); - w.write(system_message); - return; - } - if (result != ERANGE) - break; // Can't get error message, report error code instead. - buf.resize(buf.size() * 2); - } - } - FMT_CATCH(...) {} - format_error_code(out, error_code, message); -} - -FMT_FUNC void internal::error_handler::on_error(const char* message) { - FMT_THROW(format_error(message)); -} - -FMT_FUNC void report_system_error(int error_code, - fmt::string_view message) FMT_NOEXCEPT { - report_error(format_system_error, error_code, message); -} - -FMT_FUNC void vprint(std::FILE* f, string_view format_str, format_args args) { - memory_buffer buffer; - internal::vformat_to(buffer, format_str, - basic_format_args>(args)); -#ifdef _WIN32 - auto fd = _fileno(f); - if (_isatty(fd)) { - internal::utf8_to_utf16 u16(string_view(buffer.data(), buffer.size())); - auto written = DWORD(); - if (!WriteConsoleW(reinterpret_cast(_get_osfhandle(fd)), - u16.c_str(), static_cast(u16.size()), &written, - nullptr)) { - FMT_THROW(format_error("failed to write to console")); - } - return; - } -#endif - internal::fwrite_fully(buffer.data(), 1, buffer.size(), f); -} - -#ifdef _WIN32 -// Print assuming legacy (non-Unicode) encoding. -FMT_FUNC void internal::vprint_mojibake(std::FILE* f, string_view format_str, - format_args args) { - memory_buffer buffer; - internal::vformat_to(buffer, format_str, - basic_format_args>(args)); - fwrite_fully(buffer.data(), 1, buffer.size(), f); -} -#endif - -FMT_FUNC void vprint(string_view format_str, format_args args) { - vprint(stdout, format_str, args); -} - -FMT_END_NAMESPACE - -#ifdef _MSC_VER -# pragma warning(pop) -#endif - -#endif // FMT_FORMAT_INL_H_ diff --git a/kernel_generator/third_party/fmt/include/fmt/format.h b/kernel_generator/third_party/fmt/include/fmt/format.h deleted file mode 100644 index 4e96539..0000000 --- a/kernel_generator/third_party/fmt/include/fmt/format.h +++ /dev/null @@ -1,3648 +0,0 @@ -/* - Formatting library for C++ - - Copyright (c) 2012 - present, Victor Zverovich - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - --- Optional exception to the license --- - - As an exception, if, as a result of your compiling your source code, portions - of this Software are embedded into a machine-executable object form of such - source code, you may redistribute such embedded portions in such object form - without including the above copyright and permission notices. - */ - -#ifndef FMT_FORMAT_H_ -#define FMT_FORMAT_H_ - -#include -#include -#include -#include -#include -#include -#include - -#include "core.h" - -#ifdef FMT_DEPRECATED_INCLUDE_OS -# include "os.h" -#endif - -#ifdef __INTEL_COMPILER -# define FMT_ICC_VERSION __INTEL_COMPILER -#elif defined(__ICL) -# define FMT_ICC_VERSION __ICL -#else -# define FMT_ICC_VERSION 0 -#endif - -#ifdef __NVCC__ -# define FMT_CUDA_VERSION (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__) -#else -# define FMT_CUDA_VERSION 0 -#endif - -#ifdef __has_builtin -# define FMT_HAS_BUILTIN(x) __has_builtin(x) -#else -# define FMT_HAS_BUILTIN(x) 0 -#endif - -#if FMT_GCC_VERSION || FMT_CLANG_VERSION -# define FMT_NOINLINE __attribute__((noinline)) -#else -# define FMT_NOINLINE -#endif - -#if __cplusplus == 201103L || __cplusplus == 201402L -# if defined(__clang__) -# define FMT_FALLTHROUGH [[clang::fallthrough]] -# elif FMT_GCC_VERSION >= 700 && !defined(__PGI) -# define FMT_FALLTHROUGH [[gnu::fallthrough]] -# else -# define FMT_FALLTHROUGH -# endif -#elif FMT_HAS_CPP17_ATTRIBUTE(fallthrough) || \ - (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) -# define FMT_FALLTHROUGH [[fallthrough]] -#else -# define FMT_FALLTHROUGH -#endif - -#ifndef FMT_THROW -# if FMT_EXCEPTIONS -# if FMT_MSC_VER || FMT_NVCC -FMT_BEGIN_NAMESPACE -namespace internal { -template inline void do_throw(const Exception& x) { - // Silence unreachable code warnings in MSVC and NVCC because these - // are nearly impossible to fix in a generic code. - volatile bool b = true; - if (b) throw x; -} -} // namespace internal -FMT_END_NAMESPACE -# define FMT_THROW(x) internal::do_throw(x) -# else -# define FMT_THROW(x) throw x -# endif -# else -# define FMT_THROW(x) \ - do { \ - static_cast(sizeof(x)); \ - FMT_ASSERT(false, ""); \ - } while (false) -# endif -#endif - -#if FMT_EXCEPTIONS -# define FMT_TRY try -# define FMT_CATCH(x) catch (x) -#else -# define FMT_TRY if (true) -# define FMT_CATCH(x) if (false) -#endif - -#ifndef FMT_USE_USER_DEFINED_LITERALS -// For Intel and NVIDIA compilers both they and the system gcc/msc support UDLs. -# if (FMT_HAS_FEATURE(cxx_user_literals) || FMT_GCC_VERSION >= 407 || \ - FMT_MSC_VER >= 1900) && \ - (!(FMT_ICC_VERSION || FMT_CUDA_VERSION) || FMT_ICC_VERSION >= 1500 || \ - FMT_CUDA_VERSION >= 700) -# define FMT_USE_USER_DEFINED_LITERALS 1 -# else -# define FMT_USE_USER_DEFINED_LITERALS 0 -# endif -#endif - -#ifndef FMT_USE_UDL_TEMPLATE -// EDG front end based compilers (icc, nvcc) and GCC < 6.4 do not propertly -// support UDL templates and GCC >= 9 warns about them. -# if FMT_USE_USER_DEFINED_LITERALS && FMT_ICC_VERSION == 0 && \ - FMT_CUDA_VERSION == 0 && \ - ((FMT_GCC_VERSION >= 604 && FMT_GCC_VERSION <= 900 && \ - __cplusplus >= 201402L) || \ - FMT_CLANG_VERSION >= 304) -# define FMT_USE_UDL_TEMPLATE 1 -# else -# define FMT_USE_UDL_TEMPLATE 0 -# endif -#endif - -#ifndef FMT_USE_FLOAT -# define FMT_USE_FLOAT 1 -#endif - -#ifndef FMT_USE_DOUBLE -# define FMT_USE_DOUBLE 1 -#endif - -#ifndef FMT_USE_LONG_DOUBLE -# define FMT_USE_LONG_DOUBLE 1 -#endif - -// __builtin_clz is broken in clang with Microsoft CodeGen: -// https://github.com/fmtlib/fmt/issues/519 -#if (FMT_GCC_VERSION || FMT_HAS_BUILTIN(__builtin_clz)) && !FMT_MSC_VER -# define FMT_BUILTIN_CLZ(n) __builtin_clz(n) -#endif -#if (FMT_GCC_VERSION || FMT_HAS_BUILTIN(__builtin_clzll)) && !FMT_MSC_VER -# define FMT_BUILTIN_CLZLL(n) __builtin_clzll(n) -#endif - -// Some compilers masquerade as both MSVC and GCC-likes or otherwise support -// __builtin_clz and __builtin_clzll, so only define FMT_BUILTIN_CLZ using the -// MSVC intrinsics if the clz and clzll builtins are not available. -#if FMT_MSC_VER && !defined(FMT_BUILTIN_CLZLL) && !defined(_MANAGED) -# include // _BitScanReverse, _BitScanReverse64 - -FMT_BEGIN_NAMESPACE -namespace internal { -// Avoid Clang with Microsoft CodeGen's -Wunknown-pragmas warning. -# ifndef __clang__ -# pragma intrinsic(_BitScanReverse) -# endif -inline uint32_t clz(uint32_t x) { - unsigned long r = 0; - _BitScanReverse(&r, x); - - FMT_ASSERT(x != 0, ""); - // Static analysis complains about using uninitialized data - // "r", but the only way that can happen is if "x" is 0, - // which the callers guarantee to not happen. -# pragma warning(suppress : 6102) - return 31 - r; -} -# define FMT_BUILTIN_CLZ(n) internal::clz(n) - -# if defined(_WIN64) && !defined(__clang__) -# pragma intrinsic(_BitScanReverse64) -# endif - -inline uint32_t clzll(uint64_t x) { - unsigned long r = 0; -# ifdef _WIN64 - _BitScanReverse64(&r, x); -# else - // Scan the high 32 bits. - if (_BitScanReverse(&r, static_cast(x >> 32))) return 63 - (r + 32); - - // Scan the low 32 bits. - _BitScanReverse(&r, static_cast(x)); -# endif - - FMT_ASSERT(x != 0, ""); - // Static analysis complains about using uninitialized data - // "r", but the only way that can happen is if "x" is 0, - // which the callers guarantee to not happen. -# pragma warning(suppress : 6102) - return 63 - r; -} -# define FMT_BUILTIN_CLZLL(n) internal::clzll(n) -} // namespace internal -FMT_END_NAMESPACE -#endif - -// Enable the deprecated numeric alignment. -#ifndef FMT_NUMERIC_ALIGN -# define FMT_NUMERIC_ALIGN 1 -#endif - -// Enable the deprecated percent specifier. -#ifndef FMT_DEPRECATED_PERCENT -# define FMT_DEPRECATED_PERCENT 0 -#endif - -FMT_BEGIN_NAMESPACE -namespace internal { - -// An equivalent of `*reinterpret_cast(&source)` that doesn't have -// undefined behavior (e.g. due to type aliasing). -// Example: uint64_t d = bit_cast(2.718); -template -inline Dest bit_cast(const Source& source) { - static_assert(sizeof(Dest) == sizeof(Source), "size mismatch"); - Dest dest; - std::memcpy(&dest, &source, sizeof(dest)); - return dest; -} - -inline bool is_big_endian() { - const auto u = 1u; - struct bytes { - char data[sizeof(u)]; - }; - return bit_cast(u).data[0] == 0; -} - -// A fallback implementation of uintptr_t for systems that lack it. -struct fallback_uintptr { - unsigned char value[sizeof(void*)]; - - fallback_uintptr() = default; - explicit fallback_uintptr(const void* p) { - *this = bit_cast(p); - if (is_big_endian()) { - for (size_t i = 0, j = sizeof(void*) - 1; i < j; ++i, --j) - std::swap(value[i], value[j]); - } - } -}; -#ifdef UINTPTR_MAX -using uintptr_t = ::uintptr_t; -inline uintptr_t to_uintptr(const void* p) { return bit_cast(p); } -#else -using uintptr_t = fallback_uintptr; -inline fallback_uintptr to_uintptr(const void* p) { - return fallback_uintptr(p); -} -#endif - -// Returns the largest possible value for type T. Same as -// std::numeric_limits::max() but shorter and not affected by the max macro. -template constexpr T max_value() { - return (std::numeric_limits::max)(); -} -template constexpr int num_bits() { - return std::numeric_limits::digits; -} -template <> constexpr int num_bits() { - return static_cast(sizeof(void*) * - std::numeric_limits::digits); -} - -// An approximation of iterator_t for pre-C++20 systems. -template -using iterator_t = decltype(std::begin(std::declval())); - -// Detect the iterator category of *any* given type in a SFINAE-friendly way. -// Unfortunately, older implementations of std::iterator_traits are not safe -// for use in a SFINAE-context. -template -struct iterator_category : std::false_type {}; - -template struct iterator_category { - using type = std::random_access_iterator_tag; -}; - -template -struct iterator_category> { - using type = typename It::iterator_category; -}; - -// Detect if *any* given type models the OutputIterator concept. -template class is_output_iterator { - // Check for mutability because all iterator categories derived from - // std::input_iterator_tag *may* also meet the requirements of an - // OutputIterator, thereby falling into the category of 'mutable iterators' - // [iterator.requirements.general] clause 4. The compiler reveals this - // property only at the point of *actually dereferencing* the iterator! - template - static decltype(*(std::declval())) test(std::input_iterator_tag); - template static char& test(std::output_iterator_tag); - template static const char& test(...); - - using type = decltype(test(typename iterator_category::type{})); - - public: - enum { value = !std::is_const>::value }; -}; - -// A workaround for std::string not having mutable data() until C++17. -template inline Char* get_data(std::basic_string& s) { - return &s[0]; -} -template -inline typename Container::value_type* get_data(Container& c) { - return c.data(); -} - -#if defined(_SECURE_SCL) && _SECURE_SCL -// Make a checked iterator to avoid MSVC warnings. -template using checked_ptr = stdext::checked_array_iterator; -template checked_ptr make_checked(T* p, std::size_t size) { - return {p, size}; -} -#else -template using checked_ptr = T*; -template inline T* make_checked(T* p, std::size_t) { return p; } -#endif - -template ::value)> -inline checked_ptr reserve( - std::back_insert_iterator& it, std::size_t n) { - Container& c = get_container(it); - std::size_t size = c.size(); - c.resize(size + n); - return make_checked(get_data(c) + size, n); -} - -template -inline Iterator& reserve(Iterator& it, std::size_t) { - return it; -} - -// An output iterator that counts the number of objects written to it and -// discards them. -class counting_iterator { - private: - std::size_t count_; - - public: - using iterator_category = std::output_iterator_tag; - using difference_type = std::ptrdiff_t; - using pointer = void; - using reference = void; - using _Unchecked_type = counting_iterator; // Mark iterator as checked. - - struct value_type { - template void operator=(const T&) {} - }; - - counting_iterator() : count_(0) {} - - std::size_t count() const { return count_; } - - counting_iterator& operator++() { - ++count_; - return *this; - } - - counting_iterator operator++(int) { - auto it = *this; - ++*this; - return it; - } - - value_type operator*() const { return {}; } -}; - -template class truncating_iterator_base { - protected: - OutputIt out_; - std::size_t limit_; - std::size_t count_; - - truncating_iterator_base(OutputIt out, std::size_t limit) - : out_(out), limit_(limit), count_(0) {} - - public: - using iterator_category = std::output_iterator_tag; - using value_type = typename std::iterator_traits::value_type; - using difference_type = void; - using pointer = void; - using reference = void; - using _Unchecked_type = - truncating_iterator_base; // Mark iterator as checked. - - OutputIt base() const { return out_; } - std::size_t count() const { return count_; } -}; - -// An output iterator that truncates the output and counts the number of objects -// written to it. -template ::value_type>::type> -class truncating_iterator; - -template -class truncating_iterator - : public truncating_iterator_base { - mutable typename truncating_iterator_base::value_type blackhole_; - - public: - using value_type = typename truncating_iterator_base::value_type; - - truncating_iterator(OutputIt out, std::size_t limit) - : truncating_iterator_base(out, limit) {} - - truncating_iterator& operator++() { - if (this->count_++ < this->limit_) ++this->out_; - return *this; - } - - truncating_iterator operator++(int) { - auto it = *this; - ++*this; - return it; - } - - value_type& operator*() const { - return this->count_ < this->limit_ ? *this->out_ : blackhole_; - } -}; - -template -class truncating_iterator - : public truncating_iterator_base { - public: - truncating_iterator(OutputIt out, std::size_t limit) - : truncating_iterator_base(out, limit) {} - - template truncating_iterator& operator=(T val) { - if (this->count_++ < this->limit_) *this->out_++ = val; - return *this; - } - - truncating_iterator& operator++() { return *this; } - truncating_iterator& operator++(int) { return *this; } - truncating_iterator& operator*() { return *this; } -}; - -// A range with the specified output iterator and value type. -template -class output_range { - private: - OutputIt it_; - - public: - using value_type = T; - using iterator = OutputIt; - struct sentinel {}; - - explicit output_range(OutputIt it) : it_(it) {} - OutputIt begin() const { return it_; } - sentinel end() const { return {}; } // Sentinel is not used yet. -}; - -template -inline size_t count_code_points(basic_string_view s) { - return s.size(); -} - -// Counts the number of code points in a UTF-8 string. -inline size_t count_code_points(basic_string_view s) { - const char* data = s.data(); - size_t num_code_points = 0; - for (size_t i = 0, size = s.size(); i != size; ++i) { - if ((data[i] & 0xc0) != 0x80) ++num_code_points; - } - return num_code_points; -} - -inline size_t count_code_points(basic_string_view s) { - return count_code_points(basic_string_view( - reinterpret_cast(s.data()), s.size())); -} - -template -inline size_t code_point_index(basic_string_view s, size_t n) { - size_t size = s.size(); - return n < size ? n : size; -} - -// Calculates the index of the nth code point in a UTF-8 string. -inline size_t code_point_index(basic_string_view s, size_t n) { - const char8_type* data = s.data(); - size_t num_code_points = 0; - for (size_t i = 0, size = s.size(); i != size; ++i) { - if ((data[i] & 0xc0) != 0x80 && ++num_code_points > n) { - return i; - } - } - return s.size(); -} - -inline char8_type to_char8_t(char c) { return static_cast(c); } - -template -using needs_conversion = bool_constant< - std::is_same::value_type, - char>::value && - std::is_same::value>; - -template ::value)> -OutputIt copy_str(InputIt begin, InputIt end, OutputIt it) { - return std::copy(begin, end, it); -} - -template ::value)> -OutputIt copy_str(InputIt begin, InputIt end, OutputIt it) { - return std::transform(begin, end, it, to_char8_t); -} - -#ifndef FMT_USE_GRISU -# define FMT_USE_GRISU 1 -#endif - -template constexpr bool use_grisu() { - return FMT_USE_GRISU && std::numeric_limits::is_iec559 && - sizeof(T) <= sizeof(double); -} - -template -template -void buffer::append(const U* begin, const U* end) { - std::size_t new_size = size_ + to_unsigned(end - begin); - reserve(new_size); - std::uninitialized_copy(begin, end, make_checked(ptr_, capacity_) + size_); - size_ = new_size; -} -} // namespace internal - -// A range with an iterator appending to a buffer. -template -class buffer_range : public internal::output_range< - std::back_insert_iterator>, T> { - public: - using iterator = std::back_insert_iterator>; - using internal::output_range::output_range; - buffer_range(internal::buffer& buf) - : internal::output_range(std::back_inserter(buf)) {} -}; - -class FMT_DEPRECATED u8string_view - : public basic_string_view { - public: - u8string_view(const char* s) - : basic_string_view( - reinterpret_cast(s)) {} - u8string_view(const char* s, size_t count) FMT_NOEXCEPT - : basic_string_view( - reinterpret_cast(s), count) {} -}; - -#if FMT_USE_USER_DEFINED_LITERALS -inline namespace literals { -FMT_DEPRECATED inline basic_string_view operator"" _u( - const char* s, std::size_t n) { - return {reinterpret_cast(s), n}; -} -} // namespace literals -#endif - -// The number of characters to store in the basic_memory_buffer object itself -// to avoid dynamic memory allocation. -enum { inline_buffer_size = 500 }; - -/** - \rst - A dynamically growing memory buffer for trivially copyable/constructible types - with the first ``SIZE`` elements stored in the object itself. - - You can use one of the following type aliases for common character types: - - +----------------+------------------------------+ - | Type | Definition | - +================+==============================+ - | memory_buffer | basic_memory_buffer | - +----------------+------------------------------+ - | wmemory_buffer | basic_memory_buffer | - +----------------+------------------------------+ - - **Example**:: - - fmt::memory_buffer out; - format_to(out, "The answer is {}.", 42); - - This will append the following output to the ``out`` object: - - .. code-block:: none - - The answer is 42. - - The output can be converted to an ``std::string`` with ``to_string(out)``. - \endrst - */ -template > -class basic_memory_buffer : private Allocator, public internal::buffer { - private: - T store_[SIZE]; - - // Deallocate memory allocated by the buffer. - void deallocate() { - T* data = this->data(); - if (data != store_) Allocator::deallocate(data, this->capacity()); - } - - protected: - void grow(std::size_t size) FMT_OVERRIDE; - - public: - using value_type = T; - using const_reference = const T&; - - explicit basic_memory_buffer(const Allocator& alloc = Allocator()) - : Allocator(alloc) { - this->set(store_, SIZE); - } - ~basic_memory_buffer() FMT_OVERRIDE { deallocate(); } - - private: - // Move data from other to this buffer. - void move(basic_memory_buffer& other) { - Allocator &this_alloc = *this, &other_alloc = other; - this_alloc = std::move(other_alloc); - T* data = other.data(); - std::size_t size = other.size(), capacity = other.capacity(); - if (data == other.store_) { - this->set(store_, capacity); - std::uninitialized_copy(other.store_, other.store_ + size, - internal::make_checked(store_, capacity)); - } else { - this->set(data, capacity); - // Set pointer to the inline array so that delete is not called - // when deallocating. - other.set(other.store_, 0); - } - this->resize(size); - } - - public: - /** - \rst - Constructs a :class:`fmt::basic_memory_buffer` object moving the content - of the other object to it. - \endrst - */ - basic_memory_buffer(basic_memory_buffer&& other) FMT_NOEXCEPT { move(other); } - - /** - \rst - Moves the content of the other ``basic_memory_buffer`` object to this one. - \endrst - */ - basic_memory_buffer& operator=(basic_memory_buffer&& other) FMT_NOEXCEPT { - FMT_ASSERT(this != &other, ""); - deallocate(); - move(other); - return *this; - } - - // Returns a copy of the allocator associated with this buffer. - Allocator get_allocator() const { return *this; } -}; - -template -void basic_memory_buffer::grow(std::size_t size) { -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (size > 1000) throw std::runtime_error("fuzz mode - won't grow that much"); -#endif - std::size_t old_capacity = this->capacity(); - std::size_t new_capacity = old_capacity + old_capacity / 2; - if (size > new_capacity) new_capacity = size; - T* old_data = this->data(); - T* new_data = std::allocator_traits::allocate(*this, new_capacity); - // The following code doesn't throw, so the raw pointer above doesn't leak. - std::uninitialized_copy(old_data, old_data + this->size(), - internal::make_checked(new_data, new_capacity)); - this->set(new_data, new_capacity); - // deallocate must not throw according to the standard, but even if it does, - // the buffer already uses the new storage and will deallocate it in - // destructor. - if (old_data != store_) Allocator::deallocate(old_data, old_capacity); -} - -using memory_buffer = basic_memory_buffer; -using wmemory_buffer = basic_memory_buffer; - -/** A formatting error such as invalid format string. */ -FMT_CLASS_API -class FMT_API format_error : public std::runtime_error { - public: - explicit format_error(const char* message) : std::runtime_error(message) {} - explicit format_error(const std::string& message) - : std::runtime_error(message) {} - format_error(const format_error&) = default; - format_error& operator=(const format_error&) = default; - format_error(format_error&&) = default; - format_error& operator=(format_error&&) = default; - ~format_error() FMT_NOEXCEPT FMT_OVERRIDE; -}; - -namespace internal { - -// Returns true if value is negative, false otherwise. -// Same as `value < 0` but doesn't produce warnings if T is an unsigned type. -template ::is_signed)> -FMT_CONSTEXPR bool is_negative(T value) { - return value < 0; -} -template ::is_signed)> -FMT_CONSTEXPR bool is_negative(T) { - return false; -} - -template ::value)> -FMT_CONSTEXPR bool is_supported_floating_point(T) { - return (std::is_same::value && FMT_USE_FLOAT) || - (std::is_same::value && FMT_USE_DOUBLE) || - (std::is_same::value && FMT_USE_LONG_DOUBLE); -} - -// Smallest of uint32_t, uint64_t, uint128_t that is large enough to -// represent all values of T. -template -using uint32_or_64_or_128_t = conditional_t< - std::numeric_limits::digits <= 32, uint32_t, - conditional_t::digits <= 64, uint64_t, uint128_t>>; - -// Static data is placed in this class template for the header-only config. -template struct FMT_EXTERN_TEMPLATE_API basic_data { - static const uint64_t powers_of_10_64[]; - static const uint32_t zero_or_powers_of_10_32[]; - static const uint64_t zero_or_powers_of_10_64[]; - static const uint64_t pow10_significands[]; - static const int16_t pow10_exponents[]; - static const char digits[]; - static const char hex_digits[]; - static const char foreground_color[]; - static const char background_color[]; - static const char reset_color[5]; - static const wchar_t wreset_color[5]; - static const char signs[]; -}; - -FMT_EXTERN template struct basic_data; - -// This is a struct rather than an alias to avoid shadowing warnings in gcc. -struct data : basic_data<> {}; - -#ifdef FMT_BUILTIN_CLZLL -// Returns the number of decimal digits in n. Leading zeros are not counted -// except for n == 0 in which case count_digits returns 1. -inline int count_digits(uint64_t n) { - // Based on http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10 - // and the benchmark https://github.com/localvoid/cxx-benchmark-count-digits. - int t = (64 - FMT_BUILTIN_CLZLL(n | 1)) * 1233 >> 12; - return t - (n < data::zero_or_powers_of_10_64[t]) + 1; -} -#else -// Fallback version of count_digits used when __builtin_clz is not available. -inline int count_digits(uint64_t n) { - int count = 1; - for (;;) { - // Integer division is slow so do it for a group of four digits instead - // of for every digit. The idea comes from the talk by Alexandrescu - // "Three Optimization Tips for C++". See speed-test for a comparison. - if (n < 10) return count; - if (n < 100) return count + 1; - if (n < 1000) return count + 2; - if (n < 10000) return count + 3; - n /= 10000u; - count += 4; - } -} -#endif - -#if FMT_USE_INT128 -inline int count_digits(uint128_t n) { - int count = 1; - for (;;) { - // Integer division is slow so do it for a group of four digits instead - // of for every digit. The idea comes from the talk by Alexandrescu - // "Three Optimization Tips for C++". See speed-test for a comparison. - if (n < 10) return count; - if (n < 100) return count + 1; - if (n < 1000) return count + 2; - if (n < 10000) return count + 3; - n /= 10000U; - count += 4; - } -} -#endif - -// Counts the number of digits in n. BITS = log2(radix). -template inline int count_digits(UInt n) { - int num_digits = 0; - do { - ++num_digits; - } while ((n >>= BITS) != 0); - return num_digits; -} - -template <> int count_digits<4>(internal::fallback_uintptr n); - -#if FMT_GCC_VERSION || FMT_CLANG_VERSION -# define FMT_ALWAYS_INLINE inline __attribute__((always_inline)) -#else -# define FMT_ALWAYS_INLINE -#endif - -#ifdef FMT_BUILTIN_CLZ -// Optional version of count_digits for better performance on 32-bit platforms. -inline int count_digits(uint32_t n) { - int t = (32 - FMT_BUILTIN_CLZ(n | 1)) * 1233 >> 12; - return t - (n < data::zero_or_powers_of_10_32[t]) + 1; -} -#endif - -template FMT_API std::string grouping_impl(locale_ref loc); -template inline std::string grouping(locale_ref loc) { - return grouping_impl(loc); -} -template <> inline std::string grouping(locale_ref loc) { - return grouping_impl(loc); -} - -template FMT_API Char thousands_sep_impl(locale_ref loc); -template inline Char thousands_sep(locale_ref loc) { - return Char(thousands_sep_impl(loc)); -} -template <> inline wchar_t thousands_sep(locale_ref loc) { - return thousands_sep_impl(loc); -} - -template FMT_API Char decimal_point_impl(locale_ref loc); -template inline Char decimal_point(locale_ref loc) { - return Char(decimal_point_impl(loc)); -} -template <> inline wchar_t decimal_point(locale_ref loc) { - return decimal_point_impl(loc); -} - -// Formats a decimal unsigned integer value writing into buffer. -// add_thousands_sep is called after writing each char to add a thousands -// separator if necessary. -template -inline Char* format_decimal(Char* buffer, UInt value, int num_digits, - F add_thousands_sep) { - FMT_ASSERT(num_digits >= 0, "invalid digit count"); - buffer += num_digits; - Char* end = buffer; - while (value >= 100) { - // Integer division is slow so do it for a group of two digits instead - // of for every digit. The idea comes from the talk by Alexandrescu - // "Three Optimization Tips for C++". See speed-test for a comparison. - auto index = static_cast((value % 100) * 2); - value /= 100; - *--buffer = static_cast(data::digits[index + 1]); - add_thousands_sep(buffer); - *--buffer = static_cast(data::digits[index]); - add_thousands_sep(buffer); - } - if (value < 10) { - *--buffer = static_cast('0' + value); - return end; - } - auto index = static_cast(value * 2); - *--buffer = static_cast(data::digits[index + 1]); - add_thousands_sep(buffer); - *--buffer = static_cast(data::digits[index]); - return end; -} - -template constexpr int digits10() FMT_NOEXCEPT { - return std::numeric_limits::digits10; -} -template <> constexpr int digits10() FMT_NOEXCEPT { return 38; } -template <> constexpr int digits10() FMT_NOEXCEPT { return 38; } - -template -inline Iterator format_decimal(Iterator out, UInt value, int num_digits, - F add_thousands_sep) { - FMT_ASSERT(num_digits >= 0, "invalid digit count"); - // Buffer should be large enough to hold all digits (<= digits10 + 1). - enum { max_size = digits10() + 1 }; - Char buffer[2 * max_size]; - auto end = format_decimal(buffer, value, num_digits, add_thousands_sep); - return internal::copy_str(buffer, end, out); -} - -template -inline It format_decimal(It out, UInt value, int num_digits) { - return format_decimal(out, value, num_digits, [](Char*) {}); -} - -template -inline Char* format_uint(Char* buffer, UInt value, int num_digits, - bool upper = false) { - buffer += num_digits; - Char* end = buffer; - do { - const char* digits = upper ? "0123456789ABCDEF" : data::hex_digits; - unsigned digit = (value & ((1 << BASE_BITS) - 1)); - *--buffer = static_cast(BASE_BITS < 4 ? static_cast('0' + digit) - : digits[digit]); - } while ((value >>= BASE_BITS) != 0); - return end; -} - -template -Char* format_uint(Char* buffer, internal::fallback_uintptr n, int num_digits, - bool = false) { - auto char_digits = std::numeric_limits::digits / 4; - int start = (num_digits + char_digits - 1) / char_digits - 1; - if (int start_digits = num_digits % char_digits) { - unsigned value = n.value[start--]; - buffer = format_uint(buffer, value, start_digits); - } - for (; start >= 0; --start) { - unsigned value = n.value[start]; - buffer += char_digits; - auto p = buffer; - for (int i = 0; i < char_digits; ++i) { - unsigned digit = (value & ((1 << BASE_BITS) - 1)); - *--p = static_cast(data::hex_digits[digit]); - value >>= BASE_BITS; - } - } - return buffer; -} - -template -inline It format_uint(It out, UInt value, int num_digits, bool upper = false) { - // Buffer should be large enough to hold all digits (digits / BASE_BITS + 1). - char buffer[num_bits() / BASE_BITS + 1]; - format_uint(buffer, value, num_digits, upper); - return internal::copy_str(buffer, buffer + num_digits, out); -} - -// A converter from UTF-8 to UTF-16. -class utf8_to_utf16 { - private: - wmemory_buffer buffer_; - - public: - FMT_API explicit utf8_to_utf16(string_view s); - operator wstring_view() const { return {&buffer_[0], size()}; } - size_t size() const { return buffer_.size() - 1; } - const wchar_t* c_str() const { return &buffer_[0]; } - std::wstring str() const { return {&buffer_[0], size()}; } -}; - -template struct null {}; - -// Workaround an array initialization issue in gcc 4.8. -template struct fill_t { - private: - enum { max_size = 4 }; - Char data_[max_size]; - unsigned char size_; - - public: - FMT_CONSTEXPR void operator=(basic_string_view s) { - auto size = s.size(); - if (size > max_size) { - FMT_THROW(format_error("invalid fill")); - return; - } - for (size_t i = 0; i < size; ++i) data_[i] = s[i]; - size_ = static_cast(size); - } - - size_t size() const { return size_; } - const Char* data() const { return data_; } - - FMT_CONSTEXPR Char& operator[](size_t index) { return data_[index]; } - FMT_CONSTEXPR const Char& operator[](size_t index) const { - return data_[index]; - } - - static FMT_CONSTEXPR fill_t make() { - auto fill = fill_t(); - fill[0] = Char(' '); - fill.size_ = 1; - return fill; - } -}; -} // namespace internal - -// We cannot use enum classes as bit fields because of a gcc bug -// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61414. -namespace align { -enum type { none, left, right, center, numeric }; -} -using align_t = align::type; - -namespace sign { -enum type { none, minus, plus, space }; -} -using sign_t = sign::type; - -// Format specifiers for built-in and string types. -template struct basic_format_specs { - int width; - int precision; - char type; - align_t align : 4; - sign_t sign : 3; - bool alt : 1; // Alternate form ('#'). - internal::fill_t fill; - - constexpr basic_format_specs() - : width(0), - precision(-1), - type(0), - align(align::none), - sign(sign::none), - alt(false), - fill(internal::fill_t::make()) {} -}; - -using format_specs = basic_format_specs; - -namespace internal { - -// A floating-point presentation format. -enum class float_format : unsigned char { - general, // General: exponent notation or fixed point based on magnitude. - exp, // Exponent notation with the default precision of 6, e.g. 1.2e-3. - fixed, // Fixed point with the default precision of 6, e.g. 0.0012. - hex -}; - -struct float_specs { - int precision; - float_format format : 8; - sign_t sign : 8; - bool upper : 1; - bool locale : 1; - bool percent : 1; - bool binary32 : 1; - bool use_grisu : 1; - bool showpoint : 1; -}; - -// Writes the exponent exp in the form "[+-]d{2,3}" to buffer. -template It write_exponent(int exp, It it) { - FMT_ASSERT(-10000 < exp && exp < 10000, "exponent out of range"); - if (exp < 0) { - *it++ = static_cast('-'); - exp = -exp; - } else { - *it++ = static_cast('+'); - } - if (exp >= 100) { - const char* top = data::digits + (exp / 100) * 2; - if (exp >= 1000) *it++ = static_cast(top[0]); - *it++ = static_cast(top[1]); - exp %= 100; - } - const char* d = data::digits + exp * 2; - *it++ = static_cast(d[0]); - *it++ = static_cast(d[1]); - return it; -} - -template class float_writer { - private: - // The number is given as v = digits_ * pow(10, exp_). - const char* digits_; - int num_digits_; - int exp_; - size_t size_; - float_specs specs_; - Char decimal_point_; - - template It prettify(It it) const { - // pow(10, full_exp - 1) <= v <= pow(10, full_exp). - int full_exp = num_digits_ + exp_; - if (specs_.format == float_format::exp) { - // Insert a decimal point after the first digit and add an exponent. - *it++ = static_cast(*digits_); - int num_zeros = specs_.precision - num_digits_; - if (num_digits_ > 1 || specs_.showpoint) *it++ = decimal_point_; - it = copy_str(digits_ + 1, digits_ + num_digits_, it); - if (num_zeros > 0 && specs_.showpoint) - it = std::fill_n(it, num_zeros, static_cast('0')); - *it++ = static_cast(specs_.upper ? 'E' : 'e'); - return write_exponent(full_exp - 1, it); - } - if (num_digits_ <= full_exp) { - // 1234e7 -> 12340000000[.0+] - it = copy_str(digits_, digits_ + num_digits_, it); - it = std::fill_n(it, full_exp - num_digits_, static_cast('0')); - if (specs_.showpoint || specs_.precision < 0) { - *it++ = decimal_point_; - int num_zeros = specs_.precision - full_exp; - if (num_zeros <= 0) { - if (specs_.format != float_format::fixed) - *it++ = static_cast('0'); - return it; - } -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (num_zeros > 1000) - throw std::runtime_error("fuzz mode - avoiding excessive cpu use"); -#endif - it = std::fill_n(it, num_zeros, static_cast('0')); - } - } else if (full_exp > 0) { - // 1234e-2 -> 12.34[0+] - it = copy_str(digits_, digits_ + full_exp, it); - if (!specs_.showpoint) { - // Remove trailing zeros. - int num_digits = num_digits_; - while (num_digits > full_exp && digits_[num_digits - 1] == '0') - --num_digits; - if (num_digits != full_exp) *it++ = decimal_point_; - return copy_str(digits_ + full_exp, digits_ + num_digits, it); - } - *it++ = decimal_point_; - it = copy_str(digits_ + full_exp, digits_ + num_digits_, it); - if (specs_.precision > num_digits_) { - // Add trailing zeros. - int num_zeros = specs_.precision - num_digits_; - it = std::fill_n(it, num_zeros, static_cast('0')); - } - } else { - // 1234e-6 -> 0.001234 - *it++ = static_cast('0'); - int num_zeros = -full_exp; - int num_digits = num_digits_; - if (num_digits == 0 && specs_.precision >= 0 && - specs_.precision < num_zeros) { - num_zeros = specs_.precision; - } - // Remove trailing zeros. - if (!specs_.showpoint) - while (num_digits > 0 && digits_[num_digits - 1] == '0') --num_digits; - if (num_zeros != 0 || num_digits != 0 || specs_.showpoint) { - *it++ = decimal_point_; - it = std::fill_n(it, num_zeros, static_cast('0')); - it = copy_str(digits_, digits_ + num_digits, it); - } - } - return it; - } - - public: - float_writer(const char* digits, int num_digits, int exp, float_specs specs, - Char decimal_point) - : digits_(digits), - num_digits_(num_digits), - exp_(exp), - specs_(specs), - decimal_point_(decimal_point) { - int full_exp = num_digits + exp - 1; - int precision = specs.precision > 0 ? specs.precision : 16; - if (specs_.format == float_format::general && - !(full_exp >= -4 && full_exp < precision)) { - specs_.format = float_format::exp; - } - size_ = prettify(counting_iterator()).count(); - size_ += specs.sign ? 1 : 0; - } - - size_t size() const { return size_; } - size_t width() const { return size(); } - - template void operator()(It&& it) { - if (specs_.sign) *it++ = static_cast(data::signs[specs_.sign]); - it = prettify(it); - } -}; - -template -int format_float(T value, int precision, float_specs specs, buffer& buf); - -// Formats a floating-point number with snprintf. -template -int snprintf_float(T value, int precision, float_specs specs, - buffer& buf); - -template T promote_float(T value) { return value; } -inline double promote_float(float value) { return static_cast(value); } - -template -FMT_CONSTEXPR void handle_int_type_spec(char spec, Handler&& handler) { - switch (spec) { - case 0: - case 'd': - handler.on_dec(); - break; - case 'x': - case 'X': - handler.on_hex(); - break; - case 'b': - case 'B': - handler.on_bin(); - break; - case 'o': - handler.on_oct(); - break; - case 'n': - case 'L': - handler.on_num(); - break; - default: - handler.on_error(); - } -} - -template -FMT_CONSTEXPR float_specs parse_float_type_spec( - const basic_format_specs& specs, ErrorHandler&& eh = {}) { - auto result = float_specs(); - result.showpoint = specs.alt; - switch (specs.type) { - case 0: - result.format = float_format::general; - result.showpoint |= specs.precision > 0; - break; - case 'G': - result.upper = true; - FMT_FALLTHROUGH; - case 'g': - result.format = float_format::general; - break; - case 'E': - result.upper = true; - FMT_FALLTHROUGH; - case 'e': - result.format = float_format::exp; - result.showpoint |= specs.precision != 0; - break; - case 'F': - result.upper = true; - FMT_FALLTHROUGH; - case 'f': - result.format = float_format::fixed; - result.showpoint |= specs.precision != 0; - break; -#if FMT_DEPRECATED_PERCENT - case '%': - result.format = float_format::fixed; - result.percent = true; - break; -#endif - case 'A': - result.upper = true; - FMT_FALLTHROUGH; - case 'a': - result.format = float_format::hex; - break; - case 'n': - result.locale = true; - break; - default: - eh.on_error("invalid type specifier"); - break; - } - return result; -} - -template -FMT_CONSTEXPR void handle_char_specs(const basic_format_specs* specs, - Handler&& handler) { - if (!specs) return handler.on_char(); - if (specs->type && specs->type != 'c') return handler.on_int(); - if (specs->align == align::numeric || specs->sign != sign::none || specs->alt) - handler.on_error("invalid format specifier for char"); - handler.on_char(); -} - -template -FMT_CONSTEXPR void handle_cstring_type_spec(Char spec, Handler&& handler) { - if (spec == 0 || spec == 's') - handler.on_string(); - else if (spec == 'p') - handler.on_pointer(); - else - handler.on_error("invalid type specifier"); -} - -template -FMT_CONSTEXPR void check_string_type_spec(Char spec, ErrorHandler&& eh) { - if (spec != 0 && spec != 's') eh.on_error("invalid type specifier"); -} - -template -FMT_CONSTEXPR void check_pointer_type_spec(Char spec, ErrorHandler&& eh) { - if (spec != 0 && spec != 'p') eh.on_error("invalid type specifier"); -} - -template class int_type_checker : private ErrorHandler { - public: - FMT_CONSTEXPR explicit int_type_checker(ErrorHandler eh) : ErrorHandler(eh) {} - - FMT_CONSTEXPR void on_dec() {} - FMT_CONSTEXPR void on_hex() {} - FMT_CONSTEXPR void on_bin() {} - FMT_CONSTEXPR void on_oct() {} - FMT_CONSTEXPR void on_num() {} - - FMT_CONSTEXPR void on_error() { - ErrorHandler::on_error("invalid type specifier"); - } -}; - -template -class char_specs_checker : public ErrorHandler { - private: - char type_; - - public: - FMT_CONSTEXPR char_specs_checker(char type, ErrorHandler eh) - : ErrorHandler(eh), type_(type) {} - - FMT_CONSTEXPR void on_int() { - handle_int_type_spec(type_, int_type_checker(*this)); - } - FMT_CONSTEXPR void on_char() {} -}; - -template -class cstring_type_checker : public ErrorHandler { - public: - FMT_CONSTEXPR explicit cstring_type_checker(ErrorHandler eh) - : ErrorHandler(eh) {} - - FMT_CONSTEXPR void on_string() {} - FMT_CONSTEXPR void on_pointer() {} -}; - -template -void arg_map::init(const basic_format_args& args) { - if (map_) return; - map_ = new entry[internal::to_unsigned(args.max_size())]; - if (args.is_packed()) { - for (int i = 0;; ++i) { - internal::type arg_type = args.type(i); - if (arg_type == internal::type::none_type) return; - if (arg_type == internal::type::named_arg_type) - push_back(args.values_[i]); - } - } - for (int i = 0, n = args.max_size(); i < n; ++i) { - auto type = args.args_[i].type_; - if (type == internal::type::named_arg_type) push_back(args.args_[i].value_); - } -} - -template struct nonfinite_writer { - sign_t sign; - const char* str; - static constexpr size_t str_size = 3; - - size_t size() const { return str_size + (sign ? 1 : 0); } - size_t width() const { return size(); } - - template void operator()(It&& it) const { - if (sign) *it++ = static_cast(data::signs[sign]); - it = copy_str(str, str + str_size, it); - } -}; - -template -FMT_NOINLINE OutputIt fill(OutputIt it, size_t n, const fill_t& fill) { - auto fill_size = fill.size(); - if (fill_size == 1) return std::fill_n(it, n, fill[0]); - for (size_t i = 0; i < n; ++i) it = std::copy_n(fill.data(), fill_size, it); - return it; -} - -// This template provides operations for formatting and writing data into a -// character range. -template class basic_writer { - public: - using char_type = typename Range::value_type; - using iterator = typename Range::iterator; - using format_specs = basic_format_specs; - - private: - iterator out_; // Output iterator. - locale_ref locale_; - - // Attempts to reserve space for n extra characters in the output range. - // Returns a pointer to the reserved range or a reference to out_. - auto reserve(std::size_t n) -> decltype(internal::reserve(out_, n)) { - return internal::reserve(out_, n); - } - - template struct padded_int_writer { - size_t size_; - string_view prefix; - char_type fill; - std::size_t padding; - F f; - - size_t size() const { return size_; } - size_t width() const { return size_; } - - template void operator()(It&& it) const { - if (prefix.size() != 0) - it = copy_str(prefix.begin(), prefix.end(), it); - it = std::fill_n(it, padding, fill); - f(it); - } - }; - - // Writes an integer in the format - // - // where are written by f(it). - template - void write_int(int num_digits, string_view prefix, format_specs specs, F f) { - std::size_t size = prefix.size() + to_unsigned(num_digits); - char_type fill = specs.fill[0]; - std::size_t padding = 0; - if (specs.align == align::numeric) { - auto unsiged_width = to_unsigned(specs.width); - if (unsiged_width > size) { - padding = unsiged_width - size; - size = unsiged_width; - } - } else if (specs.precision > num_digits) { - size = prefix.size() + to_unsigned(specs.precision); - padding = to_unsigned(specs.precision - num_digits); - fill = static_cast('0'); - } - if (specs.align == align::none) specs.align = align::right; - write_padded(specs, padded_int_writer{size, prefix, fill, padding, f}); - } - - // Writes a decimal integer. - template void write_decimal(Int value) { - auto abs_value = static_cast>(value); - bool negative = is_negative(value); - // Don't do -abs_value since it trips unsigned-integer-overflow sanitizer. - if (negative) abs_value = ~abs_value + 1; - int num_digits = count_digits(abs_value); - auto&& it = reserve((negative ? 1 : 0) + static_cast(num_digits)); - if (negative) *it++ = static_cast('-'); - it = format_decimal(it, abs_value, num_digits); - } - - // The handle_int_type_spec handler that writes an integer. - template struct int_writer { - using unsigned_type = uint32_or_64_or_128_t; - - basic_writer& writer; - const Specs& specs; - unsigned_type abs_value; - char prefix[4]; - unsigned prefix_size; - - string_view get_prefix() const { return string_view(prefix, prefix_size); } - - int_writer(basic_writer& w, Int value, const Specs& s) - : writer(w), - specs(s), - abs_value(static_cast(value)), - prefix_size(0) { - if (is_negative(value)) { - prefix[0] = '-'; - ++prefix_size; - abs_value = 0 - abs_value; - } else if (specs.sign != sign::none && specs.sign != sign::minus) { - prefix[0] = specs.sign == sign::plus ? '+' : ' '; - ++prefix_size; - } - } - - struct dec_writer { - unsigned_type abs_value; - int num_digits; - - template void operator()(It&& it) const { - it = internal::format_decimal(it, abs_value, num_digits); - } - }; - - void on_dec() { - int num_digits = count_digits(abs_value); - writer.write_int(num_digits, get_prefix(), specs, - dec_writer{abs_value, num_digits}); - } - - struct hex_writer { - int_writer& self; - int num_digits; - - template void operator()(It&& it) const { - it = format_uint<4, char_type>(it, self.abs_value, num_digits, - self.specs.type != 'x'); - } - }; - - void on_hex() { - if (specs.alt) { - prefix[prefix_size++] = '0'; - prefix[prefix_size++] = specs.type; - } - int num_digits = count_digits<4>(abs_value); - writer.write_int(num_digits, get_prefix(), specs, - hex_writer{*this, num_digits}); - } - - template struct bin_writer { - unsigned_type abs_value; - int num_digits; - - template void operator()(It&& it) const { - it = format_uint(it, abs_value, num_digits); - } - }; - - void on_bin() { - if (specs.alt) { - prefix[prefix_size++] = '0'; - prefix[prefix_size++] = static_cast(specs.type); - } - int num_digits = count_digits<1>(abs_value); - writer.write_int(num_digits, get_prefix(), specs, - bin_writer<1>{abs_value, num_digits}); - } - - void on_oct() { - int num_digits = count_digits<3>(abs_value); - if (specs.alt && specs.precision <= num_digits && abs_value != 0) { - // Octal prefix '0' is counted as a digit, so only add it if precision - // is not greater than the number of digits. - prefix[prefix_size++] = '0'; - } - writer.write_int(num_digits, get_prefix(), specs, - bin_writer<3>{abs_value, num_digits}); - } - - enum { sep_size = 1 }; - - struct num_writer { - unsigned_type abs_value; - int size; - const std::string& groups; - char_type sep; - - template void operator()(It&& it) const { - basic_string_view s(&sep, sep_size); - // Index of a decimal digit with the least significant digit having - // index 0. - int digit_index = 0; - std::string::const_iterator group = groups.cbegin(); - it = format_decimal( - it, abs_value, size, - [this, s, &group, &digit_index](char_type*& buffer) { - if (*group <= 0 || ++digit_index % *group != 0 || - *group == max_value()) - return; - if (group + 1 != groups.cend()) { - digit_index = 0; - ++group; - } - buffer -= s.size(); - std::uninitialized_copy(s.data(), s.data() + s.size(), - make_checked(buffer, s.size())); - }); - } - }; - - void on_num() { - std::string groups = grouping(writer.locale_); - if (groups.empty()) return on_dec(); - auto sep = thousands_sep(writer.locale_); - if (!sep) return on_dec(); - int num_digits = count_digits(abs_value); - int size = num_digits; - std::string::const_iterator group = groups.cbegin(); - while (group != groups.cend() && num_digits > *group && *group > 0 && - *group != max_value()) { - size += sep_size; - num_digits -= *group; - ++group; - } - if (group == groups.cend()) - size += sep_size * ((num_digits - 1) / groups.back()); - writer.write_int(size, get_prefix(), specs, - num_writer{abs_value, size, groups, sep}); - } - - FMT_NORETURN void on_error() { - FMT_THROW(format_error("invalid type specifier")); - } - }; - - template struct str_writer { - const Char* s; - size_t size_; - - size_t size() const { return size_; } - size_t width() const { - return count_code_points(basic_string_view(s, size_)); - } - - template void operator()(It&& it) const { - it = copy_str(s, s + size_, it); - } - }; - - struct bytes_writer { - string_view bytes; - - size_t size() const { return bytes.size(); } - size_t width() const { return bytes.size(); } - - template void operator()(It&& it) const { - const char* data = bytes.data(); - it = copy_str(data, data + size(), it); - } - }; - - template struct pointer_writer { - UIntPtr value; - int num_digits; - - size_t size() const { return to_unsigned(num_digits) + 2; } - size_t width() const { return size(); } - - template void operator()(It&& it) const { - *it++ = static_cast('0'); - *it++ = static_cast('x'); - it = format_uint<4, char_type>(it, value, num_digits); - } - }; - - public: - explicit basic_writer(Range out, locale_ref loc = locale_ref()) - : out_(out.begin()), locale_(loc) {} - - iterator out() const { return out_; } - - // Writes a value in the format - // - // where is written by f(it). - template void write_padded(const format_specs& specs, F&& f) { - // User-perceived width (in code points). - unsigned width = to_unsigned(specs.width); - size_t size = f.size(); // The number of code units. - size_t num_code_points = width != 0 ? f.width() : size; - if (width <= num_code_points) return f(reserve(size)); - size_t padding = width - num_code_points; - size_t fill_size = specs.fill.size(); - auto&& it = reserve(size + padding * fill_size); - if (specs.align == align::right) { - it = fill(it, padding, specs.fill); - f(it); - } else if (specs.align == align::center) { - std::size_t left_padding = padding / 2; - it = fill(it, left_padding, specs.fill); - f(it); - it = fill(it, padding - left_padding, specs.fill); - } else { - f(it); - it = fill(it, padding, specs.fill); - } - } - - void write(int value) { write_decimal(value); } - void write(long value) { write_decimal(value); } - void write(long long value) { write_decimal(value); } - - void write(unsigned value) { write_decimal(value); } - void write(unsigned long value) { write_decimal(value); } - void write(unsigned long long value) { write_decimal(value); } - -#if FMT_USE_INT128 - void write(int128_t value) { write_decimal(value); } - void write(uint128_t value) { write_decimal(value); } -#endif - - template - void write_int(T value, const Spec& spec) { - handle_int_type_spec(spec.type, int_writer(*this, value, spec)); - } - - template ::value)> - void write(T value, format_specs specs = {}) { - if (const_check(!is_supported_floating_point(value))) { - return; - } - float_specs fspecs = parse_float_type_spec(specs); - fspecs.sign = specs.sign; - if (std::signbit(value)) { // value < 0 is false for NaN so use signbit. - fspecs.sign = sign::minus; - value = -value; - } else if (fspecs.sign == sign::minus) { - fspecs.sign = sign::none; - } - - if (!std::isfinite(value)) { - auto str = std::isinf(value) ? (fspecs.upper ? "INF" : "inf") - : (fspecs.upper ? "NAN" : "nan"); - return write_padded(specs, nonfinite_writer{fspecs.sign, str}); - } - - if (specs.align == align::none) { - specs.align = align::right; - } else if (specs.align == align::numeric) { - if (fspecs.sign) { - auto&& it = reserve(1); - *it++ = static_cast(data::signs[fspecs.sign]); - fspecs.sign = sign::none; - if (specs.width != 0) --specs.width; - } - specs.align = align::right; - } - - memory_buffer buffer; - if (fspecs.format == float_format::hex) { - if (fspecs.sign) buffer.push_back(data::signs[fspecs.sign]); - snprintf_float(promote_float(value), specs.precision, fspecs, buffer); - write_padded(specs, str_writer{buffer.data(), buffer.size()}); - return; - } - int precision = specs.precision >= 0 || !specs.type ? specs.precision : 6; - if (fspecs.format == float_format::exp) { - if (precision == max_value()) - FMT_THROW(format_error("number is too big")); - else - ++precision; - } - if (const_check(std::is_same())) fspecs.binary32 = true; - fspecs.use_grisu = use_grisu(); - if (const_check(FMT_DEPRECATED_PERCENT) && fspecs.percent) value *= 100; - int exp = format_float(promote_float(value), precision, fspecs, buffer); - if (const_check(FMT_DEPRECATED_PERCENT) && fspecs.percent) { - buffer.push_back('%'); - --exp; // Adjust decimal place position. - } - fspecs.precision = precision; - char_type point = fspecs.locale ? decimal_point(locale_) - : static_cast('.'); - write_padded(specs, float_writer(buffer.data(), - static_cast(buffer.size()), - exp, fspecs, point)); - } - - void write(char value) { - auto&& it = reserve(1); - *it++ = value; - } - - template ::value)> - void write(Char value) { - auto&& it = reserve(1); - *it++ = value; - } - - void write(string_view value) { - auto&& it = reserve(value.size()); - it = copy_str(value.begin(), value.end(), it); - } - void write(wstring_view value) { - static_assert(std::is_same::value, ""); - auto&& it = reserve(value.size()); - it = std::copy(value.begin(), value.end(), it); - } - - template - void write(const Char* s, std::size_t size, const format_specs& specs) { - write_padded(specs, str_writer{s, size}); - } - - template - void write(basic_string_view s, const format_specs& specs = {}) { - const Char* data = s.data(); - std::size_t size = s.size(); - if (specs.precision >= 0 && to_unsigned(specs.precision) < size) - size = code_point_index(s, to_unsigned(specs.precision)); - write(data, size, specs); - } - - void write_bytes(string_view bytes, const format_specs& specs) { - write_padded(specs, bytes_writer{bytes}); - } - - template - void write_pointer(UIntPtr value, const format_specs* specs) { - int num_digits = count_digits<4>(value); - auto pw = pointer_writer{value, num_digits}; - if (!specs) return pw(reserve(to_unsigned(num_digits) + 2)); - format_specs specs_copy = *specs; - if (specs_copy.align == align::none) specs_copy.align = align::right; - write_padded(specs_copy, pw); - } -}; - -using writer = basic_writer>; - -template struct is_integral : std::is_integral {}; -template <> struct is_integral : std::true_type {}; -template <> struct is_integral : std::true_type {}; - -template -class arg_formatter_base { - public: - using char_type = typename Range::value_type; - using iterator = typename Range::iterator; - using format_specs = basic_format_specs; - - private: - using writer_type = basic_writer; - writer_type writer_; - format_specs* specs_; - - struct char_writer { - char_type value; - - size_t size() const { return 1; } - size_t width() const { return 1; } - - template void operator()(It&& it) const { *it++ = value; } - }; - - void write_char(char_type value) { - if (specs_) - writer_.write_padded(*specs_, char_writer{value}); - else - writer_.write(value); - } - - void write_pointer(const void* p) { - writer_.write_pointer(internal::to_uintptr(p), specs_); - } - - protected: - writer_type& writer() { return writer_; } - FMT_DEPRECATED format_specs* spec() { return specs_; } - format_specs* specs() { return specs_; } - iterator out() { return writer_.out(); } - - void write(bool value) { - string_view sv(value ? "true" : "false"); - specs_ ? writer_.write(sv, *specs_) : writer_.write(sv); - } - - void write(const char_type* value) { - if (!value) { - FMT_THROW(format_error("string pointer is null")); - } else { - auto length = std::char_traits::length(value); - basic_string_view sv(value, length); - specs_ ? writer_.write(sv, *specs_) : writer_.write(sv); - } - } - - public: - arg_formatter_base(Range r, format_specs* s, locale_ref loc) - : writer_(r, loc), specs_(s) {} - - iterator operator()(monostate) { - FMT_ASSERT(false, "invalid argument type"); - return out(); - } - - template ::value)> - iterator operator()(T value) { - if (specs_) - writer_.write_int(value, *specs_); - else - writer_.write(value); - return out(); - } - - iterator operator()(char_type value) { - internal::handle_char_specs( - specs_, char_spec_handler(*this, static_cast(value))); - return out(); - } - - iterator operator()(bool value) { - if (specs_ && specs_->type) return (*this)(value ? 1 : 0); - write(value != 0); - return out(); - } - - template ::value)> - iterator operator()(T value) { - if (const_check(is_supported_floating_point(value))) - writer_.write(value, specs_ ? *specs_ : format_specs()); - else - FMT_ASSERT(false, "unsupported float argument type"); - return out(); - } - - struct char_spec_handler : ErrorHandler { - arg_formatter_base& formatter; - char_type value; - - char_spec_handler(arg_formatter_base& f, char_type val) - : formatter(f), value(val) {} - - void on_int() { - if (formatter.specs_) - formatter.writer_.write_int(value, *formatter.specs_); - else - formatter.writer_.write(value); - } - void on_char() { formatter.write_char(value); } - }; - - struct cstring_spec_handler : internal::error_handler { - arg_formatter_base& formatter; - const char_type* value; - - cstring_spec_handler(arg_formatter_base& f, const char_type* val) - : formatter(f), value(val) {} - - void on_string() { formatter.write(value); } - void on_pointer() { formatter.write_pointer(value); } - }; - - iterator operator()(const char_type* value) { - if (!specs_) return write(value), out(); - internal::handle_cstring_type_spec(specs_->type, - cstring_spec_handler(*this, value)); - return out(); - } - - iterator operator()(basic_string_view value) { - if (specs_) { - internal::check_string_type_spec(specs_->type, internal::error_handler()); - writer_.write(value, *specs_); - } else { - writer_.write(value); - } - return out(); - } - - iterator operator()(const void* value) { - if (specs_) - check_pointer_type_spec(specs_->type, internal::error_handler()); - write_pointer(value); - return out(); - } -}; - -template FMT_CONSTEXPR bool is_name_start(Char c) { - return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || '_' == c; -} - -// Parses the range [begin, end) as an unsigned integer. This function assumes -// that the range is non-empty and the first character is a digit. -template -FMT_CONSTEXPR int parse_nonnegative_int(const Char*& begin, const Char* end, - ErrorHandler&& eh) { - FMT_ASSERT(begin != end && '0' <= *begin && *begin <= '9', ""); - unsigned value = 0; - // Convert to unsigned to prevent a warning. - constexpr unsigned max_int = max_value(); - unsigned big = max_int / 10; - do { - // Check for overflow. - if (value > big) { - value = max_int + 1; - break; - } - value = value * 10 + unsigned(*begin - '0'); - ++begin; - } while (begin != end && '0' <= *begin && *begin <= '9'); - if (value > max_int) eh.on_error("number is too big"); - return static_cast(value); -} - -template class custom_formatter { - private: - using char_type = typename Context::char_type; - - basic_format_parse_context& parse_ctx_; - Context& ctx_; - - public: - explicit custom_formatter(basic_format_parse_context& parse_ctx, - Context& ctx) - : parse_ctx_(parse_ctx), ctx_(ctx) {} - - bool operator()(typename basic_format_arg::handle h) const { - h.format(parse_ctx_, ctx_); - return true; - } - - template bool operator()(T) const { return false; } -}; - -template -using is_integer = - bool_constant::value && !std::is_same::value && - !std::is_same::value && - !std::is_same::value>; - -template class width_checker { - public: - explicit FMT_CONSTEXPR width_checker(ErrorHandler& eh) : handler_(eh) {} - - template ::value)> - FMT_CONSTEXPR unsigned long long operator()(T value) { - if (is_negative(value)) handler_.on_error("negative width"); - return static_cast(value); - } - - template ::value)> - FMT_CONSTEXPR unsigned long long operator()(T) { - handler_.on_error("width is not integer"); - return 0; - } - - private: - ErrorHandler& handler_; -}; - -template class precision_checker { - public: - explicit FMT_CONSTEXPR precision_checker(ErrorHandler& eh) : handler_(eh) {} - - template ::value)> - FMT_CONSTEXPR unsigned long long operator()(T value) { - if (is_negative(value)) handler_.on_error("negative precision"); - return static_cast(value); - } - - template ::value)> - FMT_CONSTEXPR unsigned long long operator()(T) { - handler_.on_error("precision is not integer"); - return 0; - } - - private: - ErrorHandler& handler_; -}; - -// A format specifier handler that sets fields in basic_format_specs. -template class specs_setter { - public: - explicit FMT_CONSTEXPR specs_setter(basic_format_specs& specs) - : specs_(specs) {} - - FMT_CONSTEXPR specs_setter(const specs_setter& other) - : specs_(other.specs_) {} - - FMT_CONSTEXPR void on_align(align_t align) { specs_.align = align; } - FMT_CONSTEXPR void on_fill(basic_string_view fill) { - specs_.fill = fill; - } - FMT_CONSTEXPR void on_plus() { specs_.sign = sign::plus; } - FMT_CONSTEXPR void on_minus() { specs_.sign = sign::minus; } - FMT_CONSTEXPR void on_space() { specs_.sign = sign::space; } - FMT_CONSTEXPR void on_hash() { specs_.alt = true; } - - FMT_CONSTEXPR void on_zero() { - specs_.align = align::numeric; - specs_.fill[0] = Char('0'); - } - - FMT_CONSTEXPR void on_width(int width) { specs_.width = width; } - FMT_CONSTEXPR void on_precision(int precision) { - specs_.precision = precision; - } - FMT_CONSTEXPR void end_precision() {} - - FMT_CONSTEXPR void on_type(Char type) { - specs_.type = static_cast(type); - } - - protected: - basic_format_specs& specs_; -}; - -template class numeric_specs_checker { - public: - FMT_CONSTEXPR numeric_specs_checker(ErrorHandler& eh, internal::type arg_type) - : error_handler_(eh), arg_type_(arg_type) {} - - FMT_CONSTEXPR void require_numeric_argument() { - if (!is_arithmetic_type(arg_type_)) - error_handler_.on_error("format specifier requires numeric argument"); - } - - FMT_CONSTEXPR void check_sign() { - require_numeric_argument(); - if (is_integral_type(arg_type_) && arg_type_ != type::int_type && - arg_type_ != type::long_long_type && arg_type_ != type::char_type) { - error_handler_.on_error("format specifier requires signed argument"); - } - } - - FMT_CONSTEXPR void check_precision() { - if (is_integral_type(arg_type_) || arg_type_ == type::pointer_type) - error_handler_.on_error("precision not allowed for this argument type"); - } - - private: - ErrorHandler& error_handler_; - internal::type arg_type_; -}; - -// A format specifier handler that checks if specifiers are consistent with the -// argument type. -template class specs_checker : public Handler { - public: - FMT_CONSTEXPR specs_checker(const Handler& handler, internal::type arg_type) - : Handler(handler), checker_(*this, arg_type) {} - - FMT_CONSTEXPR specs_checker(const specs_checker& other) - : Handler(other), checker_(*this, other.arg_type_) {} - - FMT_CONSTEXPR void on_align(align_t align) { - if (align == align::numeric) checker_.require_numeric_argument(); - Handler::on_align(align); - } - - FMT_CONSTEXPR void on_plus() { - checker_.check_sign(); - Handler::on_plus(); - } - - FMT_CONSTEXPR void on_minus() { - checker_.check_sign(); - Handler::on_minus(); - } - - FMT_CONSTEXPR void on_space() { - checker_.check_sign(); - Handler::on_space(); - } - - FMT_CONSTEXPR void on_hash() { - checker_.require_numeric_argument(); - Handler::on_hash(); - } - - FMT_CONSTEXPR void on_zero() { - checker_.require_numeric_argument(); - Handler::on_zero(); - } - - FMT_CONSTEXPR void end_precision() { checker_.check_precision(); } - - private: - numeric_specs_checker checker_; -}; - -template