diff --git a/.devcontainer/make_devcontainers.sh b/.devcontainer/make_devcontainers.sh index 64b92c0..f868cc1 100755 --- a/.devcontainer/make_devcontainers.sh +++ b/.devcontainer/make_devcontainers.sh @@ -111,6 +111,9 @@ mv "./temp_devcontainer.json" ${base_devcontainer_file} # Create an array to keep track of valid subdirectory names valid_subdirs=() +# The img folder should not be removed: +valid_subdirs+=("img") + # For each unique combination for combination in $combinations; do cuda_version=$(echo "$combination" | jq -r '.cuda') diff --git a/ci/build_nvbench.sh b/ci/build_nvbench.sh index cc245d3..e9ba372 100755 --- a/ci/build_nvbench.sh +++ b/ci/build_nvbench.sh @@ -8,6 +8,23 @@ PRESET="nvbench-ci" CMAKE_OPTIONS="" +function version_lt() { + local lhs="${1//v/}" + local rhs="${2//v/}" + # If the versions are equal, return false + [ "$lhs" = "$rhs" ] && return 1 + # If the left-hand side is less than the right-hand side, return true + [ "$lhs" = `echo -e "$lhs\n$rhs" | sort -V | head -n1` ] +} + +# If CUDA_COMPILER is nvcc and the version < 11.3, disable CUPTI +if [[ "$CUDA_COMPILER" == *"nvcc"* ]]; then + CUDA_VERSION=$(nvcc --version | grep release | sed -r 's/.*release ([0-9.]+).*/\1/') + if version_lt "$CUDA_VERSION" "11.3"; then + CMAKE_OPTIONS+=" -DNVBench_ENABLE_CUPTI=OFF " + fi +fi + configure_and_build_preset "NVBench" "$PRESET" "$CMAKE_OPTIONS" print_time_summary diff --git a/ci/matrix.yaml b/ci/matrix.yaml index 9959473..563dac0 100644 --- a/ci/matrix.yaml +++ b/ci/matrix.yaml @@ -47,40 +47,40 @@ msvc2022: &msvc2022 { name: 'cl', version: '14.39', exe: 'cl++' } # Configurations that will run for every PR pull_request: nvcc: - - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc7, extra_build_args: "-cmake-options '-DNVBench_ENABLE_CUPTI=OFF'"} - - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc8, extra_build_args: "-cmake-options '-DNVBench_ENABLE_CUPTI=OFF'"} - - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc9, extra_build_args: "-cmake-options '-DNVBench_ENABLE_CUPTI=OFF'"} - - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *llvm9, extra_build_args: "-cmake-options '-DNVBench_ENABLE_CUPTI=OFF'"} - - {cuda: *cuda_prev_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11} - - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc7} - - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc8} - - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc9} - - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc10} - - {cuda: *cuda_curr_min, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11} - - {cuda: *cuda_curr_min, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12} - - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm9} - - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm10} - - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm11} - - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm12} - - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm13} - - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm14} - - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc7} - - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc8} - - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc9} - - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc10} - - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11} - - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12} + - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc7 } + - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc8 } + - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc9 } + - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *llvm9 } + - {cuda: *cuda_prev_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11 } + - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc7 } + - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc8 } + - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc9 } + - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc10 } + - {cuda: *cuda_curr_min, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11 } + - {cuda: *cuda_curr_min, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12 } + - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm9 } + - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm10 } + - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm11 } + - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm12 } + - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm13 } + - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm14 } + - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc7 } + - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc8 } + - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc9 } + - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc10 } + - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11 } + - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12 } # Fails to compile simple input on CTK12.4. Try to add later. - # {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc13} - - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm9} - - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm10} - - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm11} - - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm12} - - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm13} - - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm14} - - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm15} - - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm16} - - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm17} - - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm18, extra_build_args: "-cmake-options '-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler'"} - - {cuda: *cuda_curr_max, os: 'windows2022', cpu: 'amd64', compiler: *msvc2019, extra_build_args: "-cmake-options '-DNVBench_ENABLE_CUPTI=OFF -DNVBench_ENABLE_NVML=OFF'"} - - {cuda: *cuda_curr_max, os: 'windows2022', cpu: 'amd64', compiler: *msvc2022, extra_build_args: "-cmake-options '-DNVBench_ENABLE_CUPTI=OFF -DNVBench_ENABLE_NVML=OFF'"} + # {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc13 } + - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm9 } + - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm10 } + - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm11 } + - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm12 } + - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm13 } + - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm14 } + - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm15 } + - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm16 } + - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm17 } + - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm18, extra_build_args: "-cmake-options '-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler'"} + - {cuda: *cuda_curr_max, os: 'windows2022', cpu: 'amd64', compiler: *msvc2019 } + - {cuda: *cuda_curr_max, os: 'windows2022', cpu: 'amd64', compiler: *msvc2022 } diff --git a/ci/windows/build_nvbench.ps1 b/ci/windows/build_nvbench.ps1 index 1ac8bd1..7240698 100644 --- a/ci/windows/build_nvbench.ps1 +++ b/ci/windows/build_nvbench.ps1 @@ -20,7 +20,7 @@ $CMAKE_OPTIONS = "" # Append any arguments pass in on the command line If($ARG_CMAKE_OPTIONS -ne "") { - $CMAKE_OPTIONS += "$ARG_CMAKE_OPTIONS" + $CMAKE_OPTIONS += " $ARG_CMAKE_OPTIONS" } configure_and_build_preset "NVBench" "$PRESET" "$CMAKE_OPTIONS" diff --git a/ci/windows/test_nvbench.ps1 b/ci/windows/test_nvbench.ps1 index bcd9f2c..4ee5106 100644 --- a/ci/windows/test_nvbench.ps1 +++ b/ci/windows/test_nvbench.ps1 @@ -20,7 +20,7 @@ $CMAKE_OPTIONS = "" # Append any arguments pass in on the command line If($ARG_CMAKE_OPTIONS -ne "") { - $CMAKE_OPTIONS += "$ARG_CMAKE_OPTIONS" + $CMAKE_OPTIONS += " $ARG_CMAKE_OPTIONS" } configure_and_build_preset "NVBench" "$PRESET" "$CMAKE_OPTIONS" diff --git a/cmake/NVBenchNVML.cmake b/cmake/NVBenchNVML.cmake index f2aadbb..4b005f3 100644 --- a/cmake/NVBenchNVML.cmake +++ b/cmake/NVBenchNVML.cmake @@ -1,37 +1,43 @@ -# Since this file is installed, we need to make sure that the CUDAToolkit has -# been found by consumers: -if (NOT TARGET CUDA::toolkit) - find_package(CUDAToolkit REQUIRED) -endif() - -if (WIN32) - # The CUDA:: targets currently don't provide dll locations through the - # `IMPORTED_LOCATION` property, nor are they marked as `SHARED` libraries - # (they're currently `UNKNOWN`). This prevents the `nvbench_setup_dep_dlls` - # CMake function from copying the dlls to the build / install directories. - # This is discussed in https://gitlab.kitware.com/cmake/cmake/-/issues/22845 - # and the other CMake issues it links to. - # - # We create a nvbench-specific target that configures the nvml interface as - # described here: - # https://gitlab.kitware.com/cmake/cmake/-/issues/22845#note_1077538 - # - # Use find_file instead of find_library, which would search for a .lib file. - # This is also nice because find_file searches recursively (find_library - # does not) and some versions of CTK nest nvml.dll several directories deep - # under C:\Windows\System32. - find_file(NVBench_NVML_DLL nvml.dll REQUIRED - DOC "The full path to nvml.dll. Usually somewhere under C:/Windows/System32." - PATHS "C:/Windows/System32" - ) - mark_as_advanced(NVBench_NVML_DLL) - add_library(nvbench::nvml SHARED IMPORTED) - target_link_libraries(nvbench::nvml INTERFACE CUDA::toolkit) - set_target_properties(nvbench::nvml PROPERTIES - IMPORTED_LOCATION "${NVBench_NVML_DLL}" - IMPORTED_IMPLIB "${CUDA_nvml_LIBRARY}" - ) -else() - # Linux is much easier... - add_library(nvbench::nvml ALIAS CUDA::nvml) -endif() +# Since this file is installed, we need to make sure that the CUDAToolkit has +# been found by consumers: +if (NOT TARGET CUDA::toolkit) + find_package(CUDAToolkit REQUIRED) +endif() + +if (WIN32) + # The CUDA:: targets currently don't provide dll locations through the + # `IMPORTED_LOCATION` property, nor are they marked as `SHARED` libraries + # (they're currently `UNKNOWN`). This prevents the `nvbench_setup_dep_dlls` + # CMake function from copying the dlls to the build / install directories. + # This is discussed in https://gitlab.kitware.com/cmake/cmake/-/issues/22845 + # and the other CMake issues it links to. + # + # We create a nvbench-specific target that configures the nvml interface as + # described here: + # https://gitlab.kitware.com/cmake/cmake/-/issues/22845#note_1077538 + # + # Use find_file instead of find_library, which would search for a .lib file. + # This is also nice because find_file searches recursively (find_library + # does not) and some versions of CTK nest nvml.dll several directories deep + # under C:\Windows\System32. + find_file(NVBench_NVML_DLL nvml.dll + DOC "The full path to nvml.dll. Usually somewhere under C:/Windows/System32." + PATHS "C:/Windows/System32" + ) + mark_as_advanced(NVBench_NVML_DLL) +endif() + +if (NVBench_NVML_DLL) + add_library(nvbench::nvml SHARED IMPORTED) + target_link_libraries(nvbench::nvml INTERFACE CUDA::toolkit) + set_target_properties(nvbench::nvml PROPERTIES + IMPORTED_LOCATION "${NVBench_NVML_DLL}" + IMPORTED_IMPLIB "${CUDA_nvml_LIBRARY}" + ) +elseif(TARGET CUDA::nvml) + add_library(nvbench::nvml ALIAS CUDA::nvml) +else() + message(FATAL_ERROR "Could not find nvml.dll or CUDA::nvml target. " + "Set -DNVBench_ENABLE_NVML=OFF to disable NVML support " + "or set -DNVBench_NVML_DLL to the full path to nvml.dll on Windows.") +endif() diff --git a/nvbench/main.cuh b/nvbench/main.cuh index bcdced5..cd809ba 100644 --- a/nvbench/main.cuh +++ b/nvbench/main.cuh @@ -28,14 +28,135 @@ #include #include +// Advanced users can rebuild NVBench's `main` function using the macros in this file, or replace +// them with customized implementations. + +// Customization point, called before NVBench initialization. +#ifndef NVBENCH_MAIN_INITIALIZE_CUSTOM_PRE +#define NVBENCH_MAIN_INITIALIZE_CUSTOM_PRE(argc, argv) []() {}() +#endif + +// Customization point, called after NVBench initialization. +#ifndef NVBENCH_MAIN_INITIALIZE_CUSTOM_POST +#define NVBENCH_MAIN_INITIALIZE_CUSTOM_POST(argc, argv) []() {}() +#endif + +// Customization point, called before NVBench parsing. Update argc/argv if needed. +// argc/argv are the usual command line arguments types. The ARGS version of this +// macro is a bit more convenient. +#ifndef NVBENCH_MAIN_CUSTOM_ARGC_ARGV_HANDLER +#define NVBENCH_MAIN_CUSTOM_ARGC_ARGV_HANDLER(argc, argv) []() {}() +#endif + +// Customization point, called before NVBench parsing. Update args if needed. +// Args is a vector of strings, each element is an argument. +#ifndef NVBENCH_MAIN_CUSTOM_ARGS_HANDLER +#define NVBENCH_MAIN_CUSTOM_ARGS_HANDLER(args) []() {}() +#endif + +// Customization point, called before NVBench parsing. +#ifndef NVBENCH_MAIN_PARSE_CUSTOM_PRE +#define NVBENCH_MAIN_PARSE_CUSTOM_PRE(parser, args) []() {}() +#endif + +// Customization point, called after NVBench parsing. +#ifndef NVBENCH_MAIN_PARSE_CUSTOM_POST +#define NVBENCH_MAIN_PARSE_CUSTOM_POST(parser) []() {}() +#endif + +// Customization point, called before NVBench finalization. +#ifndef NVBENCH_MAIN_FINALIZE_CUSTOM_PRE +#define NVBENCH_MAIN_FINALIZE_CUSTOM_PRE() []() {}() +#endif + +// Customization point, called after NVBench finalization. +#ifndef NVBENCH_MAIN_FINALIZE_CUSTOM_POST +#define NVBENCH_MAIN_FINALIZE_CUSTOM_POST() []() {}() +#endif + +// Customization point, use to catch addition exceptions. +#ifndef NVBENCH_MAIN_CATCH_EXCEPTIONS_CUSTOM +#define NVBENCH_MAIN_CATCH_EXCEPTIONS_CUSTOM +#endif + +/************************************ Default implementation **************************************/ + +#ifndef NVBENCH_MAIN #define NVBENCH_MAIN \ - int main(int argc, char const *const *argv) \ + int main(int argc, char **argv) \ try \ { \ NVBENCH_MAIN_BODY(argc, argv); \ - NVBENCH_CUDA_CALL(cudaDeviceReset()); \ return 0; \ } \ + NVBENCH_MAIN_CATCH_EXCEPTIONS_CUSTOM \ + NVBENCH_MAIN_CATCH_EXCEPTIONS +#endif + +#ifndef NVBENCH_MAIN_BODY +#define NVBENCH_MAIN_BODY(argc, argv) \ + NVBENCH_MAIN_INITIALIZE(argc, argv); \ + { \ + NVBENCH_MAIN_PARSE(argc, argv); \ + \ + NVBENCH_MAIN_PRINT_PREAMBLE(parser); \ + NVBENCH_MAIN_RUN_BENCHMARKS(parser); \ + NVBENCH_MAIN_PRINT_EPILOGUE(parser); \ + \ + NVBENCH_MAIN_PRINT_RESULTS(parser); \ + } /* Tear down parser before finalization */ \ + NVBENCH_MAIN_FINALIZE(); \ + return 0; +#endif + +#ifndef NVBENCH_MAIN_INITIALIZE +#define NVBENCH_MAIN_INITIALIZE(argc, argv) \ + { /* Open a scope to ensure that the inner initialize/finalize hooks clean up in order. */ \ + NVBENCH_MAIN_INITIALIZE_CUSTOM_PRE(argc, argv); \ + nvbench::detail::main_initialize(argc, argv); \ + { /* Open a scope to ensure that the inner initialize/finalize hooks clean up in order. */ \ + NVBENCH_MAIN_INITIALIZE_CUSTOM_POST(argc, argv) +#endif + +#ifndef NVBENCH_MAIN_PARSE +#define NVBENCH_MAIN_PARSE(argc, argv) \ + NVBENCH_MAIN_CUSTOM_ARGC_ARGV_HANDLER(argc, argv); \ + std::vector args = nvbench::detail::main_convert_args(argc, argv); \ + NVBENCH_MAIN_CUSTOM_ARGS_HANDLER(args); \ + nvbench::option_parser parser; \ + NVBENCH_MAIN_PARSE_CUSTOM_PRE(parser, args); \ + parser.parse(args); \ + NVBENCH_MAIN_PARSE_CUSTOM_POST(parser) +#endif + +#ifndef NVBENCH_MAIN_PRINT_PREAMBLE +#define NVBENCH_MAIN_PRINT_PREAMBLE(parser) nvbench::detail::main_print_preamble(parser) +#endif + +#ifndef NVBENCH_MAIN_RUN_BENCHMARKS +#define NVBENCH_MAIN_RUN_BENCHMARKS(parser) nvbench::detail::main_run_benchmarks(parser) +#endif + +#ifndef NVBENCH_MAIN_PRINT_EPILOGUE +#define NVBENCH_MAIN_PRINT_EPILOGUE(parser) nvbench::detail::main_print_epilogue(parser) +#endif + +#ifndef NVBENCH_MAIN_PRINT_RESULTS +#define NVBENCH_MAIN_PRINT_RESULTS(parser) nvbench::detail::main_print_results(parser) +#endif + +#ifndef NVBENCH_MAIN_FINALIZE +#define NVBENCH_MAIN_FINALIZE() \ + NVBENCH_MAIN_FINALIZE_CUSTOM_PRE(); \ + } /* Close a scope to ensure that the inner initialize/finalize hooks clean up in order. */ \ + nvbench::detail::main_finalize(); \ + NVBENCH_MAIN_FINALIZE_CUSTOM_POST(); \ + } /* Close a scope to ensure that the inner initialize/finalize hooks clean up in order. */ \ + []() {}() +#endif + +#ifndef NVBENCH_MAIN_CATCH_EXCEPTIONS +#define NVBENCH_MAIN_CATCH_EXCEPTIONS \ catch (std::exception & e) \ { \ std::cerr << "\nNVBench encountered an error:\n\n" << e.what() << "\n"; \ @@ -46,56 +167,87 @@ std::cerr << "\nNVBench encountered an unknown error.\n"; \ return 1; \ } - -#ifdef NVBENCH_HAS_CUPTI -#define NVBENCH_INITIALIZE_DRIVER_API NVBENCH_DRIVER_API_CALL(cuInit(0)) -#else -// clang-format off -#define NVBENCH_INITIALIZE_DRIVER_API do {} while (false) -// clang-format on #endif -#define NVBENCH_MAIN_PARSE(argc, argv) \ - nvbench::option_parser parser; \ - parser.parse(argc, argv) +namespace nvbench::detail +{ -// See NVIDIA/NVBench#136 for CUDA_MODULE_LOADING +inline void set_env(const char *name, const char *value) +{ #ifdef _MSC_VER -#define NVBENCH_INITIALIZE_CUDA_ENV _putenv_s("CUDA_MODULE_LOADING", "EAGER") + _putenv_s(name, value); #else -#define NVBENCH_INITIALIZE_CUDA_ENV setenv("CUDA_MODULE_LOADING", "EAGER", 1) + setenv(name, value, 1); #endif +} -#define NVBENCH_INITIALIZE_BENCHMARKS() \ - nvbench::benchmark_manager::get().initialize() +inline void main_initialize(int, char **) +{ + // See NVIDIA/NVBench#136 for CUDA_MODULE_LOADING + set_env("CUDA_MODULE_LOADING", "EAGER"); -#define NVBENCH_MAIN_BODY(argc, argv) \ - do \ - { \ - NVBENCH_INITIALIZE_CUDA_ENV; \ - NVBENCH_INITIALIZE_DRIVER_API; \ - NVBENCH_INITIALIZE_BENCHMARKS(); \ - NVBENCH_MAIN_PARSE(argc, argv); \ - auto &printer = parser.get_printer(); \ - \ - printer.print_device_info(); \ - printer.print_log_preamble(); \ - auto &benchmarks = parser.get_benchmarks(); \ - \ - std::size_t total_states = 0; \ - for (auto &bench_ptr : benchmarks) \ - { \ - total_states += bench_ptr->get_config_count(); \ - } \ - printer.set_total_state_count(total_states); \ - \ - printer.set_completed_state_count(0); \ - for (auto &bench_ptr : benchmarks) \ - { \ - bench_ptr->set_printer(printer); \ - bench_ptr->run(); \ - bench_ptr->clear_printer(); \ - } \ - printer.print_log_epilogue(); \ - printer.print_benchmark_results(benchmarks); \ - } while (false) + // Initialize CUDA driver API if needed: +#ifdef NVBENCH_HAS_CUPTI + NVBENCH_DRIVER_API_CALL(cuInit(0)); +#endif + + // Initialize the benchmarks *after* setting up the CUDA environment: + nvbench::benchmark_manager::get().initialize(); +} + +inline std::vector main_convert_args(int argc, char const *const *argv) +{ + std::vector args; + for (int i = 0; i < argc; ++i) + { + args.push_back(argv[i]); + } + return args; +} + +inline void main_print_preamble(option_parser &parser) +{ + auto &printer = parser.get_printer(); + + printer.print_device_info(); + printer.print_log_preamble(); +} + +inline void main_run_benchmarks(option_parser &parser) +{ + auto &printer = parser.get_printer(); + auto &benchmarks = parser.get_benchmarks(); + + std::size_t total_states = 0; + for (auto &bench_ptr : benchmarks) + { + total_states += bench_ptr->get_config_count(); + } + + printer.set_completed_state_count(0); + printer.set_total_state_count(total_states); + + for (auto &bench_ptr : benchmarks) + { + bench_ptr->set_printer(printer); + bench_ptr->run(); + bench_ptr->clear_printer(); + } +} + +inline void main_print_epilogue(option_parser &parser) +{ + auto &printer = parser.get_printer(); + printer.print_log_epilogue(); +} + +inline void main_print_results(option_parser &parser) +{ + auto &printer = parser.get_printer(); + auto &benchmarks = parser.get_benchmarks(); + printer.print_benchmark_results(benchmarks); +} + +inline void main_finalize() { NVBENCH_CUDA_CALL(cudaDeviceReset()); } + +} // namespace nvbench::detail diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt index 1535878..4a03000 100644 --- a/testing/CMakeLists.txt +++ b/testing/CMakeLists.txt @@ -6,6 +6,9 @@ set(test_srcs cpu_timer.cu criterion_manager.cu criterion_params.cu + custom_main_custom_args.cu + custom_main_custom_exceptions.cu + custom_main_global_state_raii.cu enum_type_list.cu entropy_criterion.cu float64_axis.cu @@ -24,7 +27,12 @@ set(test_srcs type_list.cu ) -# Metatarget for all examples: +# Custom arguments: +# CTest commands+args can't be modified after creation, so we need to rely on substitution. +set(NVBench_TEST_ARGS_nvbench.test.custom_main_custom_args "--quiet" "--my-custom-arg" "--run-once" "-d" "0") +set(NVBench_TEST_ARGS_nvbench.test.custom_main_custom_exceptions "--quiet" "--run-once" "-d" "0") + +# Metatarget for all tests: add_custom_target(nvbench.test.all) add_dependencies(nvbench.all nvbench.test.all) @@ -36,10 +44,14 @@ foreach(test_src IN LISTS test_srcs) target_link_libraries(${test_name} PRIVATE nvbench::nvbench fmt) set_target_properties(${test_name} PROPERTIES COMPILE_FEATURES cuda_std_17) nvbench_config_target(${test_name}) - add_test(NAME ${test_name} COMMAND "$") + add_test(NAME ${test_name} COMMAND "$" ${NVBench_TEST_ARGS_${test_name}}) add_dependencies(nvbench.test.all ${test_name}) endforeach() +set_tests_properties(nvbench.test.custom_main_custom_exceptions PROPERTIES + PASS_REGULAR_EXPRESSION "Custom error detected: Expected exception thrown." +) + add_subdirectory(cmake) add_subdirectory(device) diff --git a/testing/custom_main_custom_args.cu b/testing/custom_main_custom_args.cu new file mode 100644 index 0000000..f7e331e --- /dev/null +++ b/testing/custom_main_custom_args.cu @@ -0,0 +1,132 @@ +/* + * Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "nvbench/cuda_call.cuh" + +/****************************************************************************** + * Install custom parser. + * sSee for more details. + ******************************************************************************/ + +// +// Step 1: Define a custom argument handler that accepts a vector of strings. +// - This handler should modify the vector in place to remove any custom +// arguments it handles. NVbench will then parse the remaining arguments. +// - The handler should also update any application state needed to handle +// the custom arguments. +// + +// User code to handle a specific argument: +void handle_my_custom_arg(); + +// NVBench hook for modiifying the command line arguments before parsing: +void custom_arg_handler(std::vector &args) +{ + // Handle and remove "--my-custom-arg" + if (auto it = std::find(args.begin(), args.end(), "--my-custom-arg"); it != args.end()) + { + handle_my_custom_arg(); + args.erase(it); + } +} + +// +// Step 2: Install the custom argument handler. +// - This is done by defining a macro that invokes the custom argument handler. +// + +// Install the custom argument handler: +// Either define this before any NVBench headers are included, or undefine and redefine: +#undef NVBENCH_MAIN_CUSTOM_ARGS_HANDLER +#define NVBENCH_MAIN_CUSTOM_ARGS_HANDLER(args) custom_arg_handler(args) + +// Step 3: Define `main` +// +// After installing the custom argument handler, define the main function using: +// +// ``` +// NVBENCH_MAIN +// ``` +// +// Here, this is done at the end of this file. + +/****************************************************************************** + * Unit test verification: + ******************************************************************************/ + +// Track whether the args are found / handled. +bool h_custom_arg_found = false; +bool h_handled_on_device = false; +__device__ bool d_custom_arg_found = false; +__device__ bool d_handled_on_device = false; + +// Copy host values to device: +void copy_host_state_to_device() +{ + NVBENCH_CUDA_CALL(cudaMemcpyToSymbol(d_custom_arg_found, &h_custom_arg_found, sizeof(bool))); + NVBENCH_CUDA_CALL(cudaMemcpyToSymbol(d_handled_on_device, &h_handled_on_device, sizeof(bool))); +} + +// Copy device values to host: +void copy_device_state_to_host() +{ + NVBENCH_CUDA_CALL(cudaMemcpyFromSymbol(&h_custom_arg_found, d_custom_arg_found, sizeof(bool))); + NVBENCH_CUDA_CALL(cudaMemcpyFromSymbol(&h_handled_on_device, d_handled_on_device, sizeof(bool))); +} + +void handle_my_custom_arg() +{ + h_custom_arg_found = true; + copy_host_state_to_device(); +} + +void verify() +{ + copy_device_state_to_host(); + if (!h_custom_arg_found) + { + throw std::runtime_error("Custom argument not detected."); + } + if (!h_handled_on_device) + { + throw std::runtime_error("Custom argument not handled on device."); + } +} + +// Install a verification check to ensure the custom argument was handled. +// Use the `PRE` finalize hook to ensure we check device state before resetting the context. +#undef NVBENCH_MAIN_FINALIZE_CUSTOM_PRE +#define NVBENCH_MAIN_FINALIZE_CUSTOM_PRE() verify() + +// Simple kernel/benchmark to make sure that the handler can successfully modify CUDA state: +__global__ void kernel() +{ + if (d_custom_arg_found) + { + d_handled_on_device = true; + } +} +void bench(nvbench::state &state) +{ + state.exec([](nvbench::launch &) { kernel<<<1, 1>>>(); }); +} +NVBENCH_BENCH(bench); + +// Define the customized main function: +NVBENCH_MAIN diff --git a/testing/custom_main_custom_exceptions.cu b/testing/custom_main_custom_exceptions.cu new file mode 100644 index 0000000..b1f9b9c --- /dev/null +++ b/testing/custom_main_custom_exceptions.cu @@ -0,0 +1,64 @@ +/* + * Copyright 2022 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +/****************************************************************************** + * Install exception handler around the NVBench main body. This is used + * to print helpful information when a user exception is thrown before exiting. + * + * Note that this will **NOT** be used when a benchmark throws an exception. + * That will fail the benchmark and note the exception, and continue + * execution. + * + * This is used to catch exceptions in user extensions of NVBench, things like + * customized initialization, command line parsing, finalization, etc. See + * for more details. + ******************************************************************************/ + +struct user_exception : public std::runtime_error +{ + user_exception() + : std::runtime_error("Expected exception thrown.") + {} +}; + +// User code to handle user exception: +void handle_my_exception(user_exception &e) +{ + std::cerr << "Custom error detected: " << e.what() << std::endl; + std::exit(1); +} + +// Install the exception handler around the NVBench main body. +// NVBench will have sensible defaults for common exceptions following this if no terminating catch +// block is defined. +// Either define this before any NVBench headers are included, or undefine and redefine. +#undef NVBENCH_MAIN_CATCH_EXCEPTIONS_CUSTOM +#define NVBENCH_MAIN_CATCH_EXCEPTIONS_CUSTOM \ + catch (user_exception & e) { handle_my_exception(e); } + +// For testing purposes, install a argument parser that throws: +void really_robust_argument_parser(std::vector &) { throw user_exception(); } +#undef NVBENCH_MAIN_CUSTOM_ARGS_HANDLER +#define NVBENCH_MAIN_CUSTOM_ARGS_HANDLER(args) really_robust_argument_parser(args); + +// Define the customized main function: +NVBENCH_MAIN diff --git a/testing/custom_main_global_state_raii.cu b/testing/custom_main_global_state_raii.cu new file mode 100644 index 0000000..e3584ab --- /dev/null +++ b/testing/custom_main_global_state_raii.cu @@ -0,0 +1,121 @@ +/* + * Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +/****************************************************************************** + * Test having global state that is initialized and finalized via RAII. + *****************************************************************************/ + +struct raii +{ + const char m_ref_data[6]; + char *m_data; + bool m_cuda; + + const char *m_outer_data; + bool m_outer_cuda; + + explicit raii(bool cuda, char *outer_data = nullptr, bool outer_cuda = false) + : m_ref_data{'a', 'b', 'c', '1', '2', '3'} + , m_data(nullptr) + , m_cuda(cuda) + , m_outer_data(outer_data) + , m_outer_cuda(outer_cuda) + { + if (m_cuda) + { + printf("(%p) RAII test: allocating device memory\n", this); + NVBENCH_CUDA_CALL(cudaMalloc(&m_data, 6)); + NVBENCH_CUDA_CALL(cudaMemcpy(m_data, m_ref_data, 6, cudaMemcpyHostToDevice)); + } + else + { + printf("(%p) RAII test: allocating host memory\n", this); + m_data = new char[6]; + std::copy(m_ref_data, m_ref_data + 6, m_data); + } + } + + ~raii() + { + this->verify(); + if (m_cuda) + { + printf("(%p) RAII test: invalidating device memory\n", this); + NVBENCH_CUDA_CALL(cudaMemset(m_data, 0, 6)); + printf("(%p) RAII test: freeing device memory\n", this); + NVBENCH_CUDA_CALL(cudaFree(m_data)); + } + else + { + printf("(%p) RAII test: invalidating host memory\n", this); + std::fill(m_data, m_data + 6, '\0'); + printf("(%p) RAII test: freeing host memory\n", this); + delete[] m_data; + } + } + + void verify() noexcept + { + printf("(%p) RAII test: verifying instance state\n", this); + this->verify(m_cuda, m_data); + if (m_outer_data) + { + printf("(%p) RAII test: verifying outer state\n", this); + this->verify(m_outer_cuda, m_outer_data); + } + } + + void verify(bool cuda, const char *data) noexcept + { + if (cuda) + { + char test_data[6]; + NVBENCH_CUDA_CALL(cudaMemcpy(test_data, data, 6, cudaMemcpyDeviceToHost)); + if (strncmp(test_data, m_ref_data, 6) != 0) + { + printf("(%p) RAII test failed: device data mismatch\n", this); + std::exit(1); + } + } + else + { + if (strncmp(data, m_ref_data, 6) != 0) + { + printf("(%p) RAII test failed: host data mismatch\n", this); + std::exit(1); + } + } + } +}; + +// These will be destroyed in the opposite order in which they are created: + +#undef NVBENCH_MAIN_INITIALIZE_CUSTOM_PRE +#define NVBENCH_MAIN_INITIALIZE_CUSTOM_PRE(argc, argv) raii raii_outer(false); + +#undef NVBENCH_MAIN_INITIALIZE_CUSTOM_POST +#define NVBENCH_MAIN_INITIALIZE_CUSTOM_POST(argc, argv) \ + [[maybe_unused]] raii raii_inner(true, raii_outer.m_data, raii_outer.m_cuda); + +NVBENCH_MAIN