diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index ae87e0f63..5ddd91ead 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -110,10 +110,13 @@ jobs: working-directory: ${{github.workspace}}/build shell: bash run: | - cmake -S $GITHUB_WORKSPACE/doc/dox/dev/devsamp/main -B test_install_devsamp_main -DCMAKE_PREFIX_PATH=${{github.workspace}}/install || (cat test_install_devsamp_main/CMakeFiles/CMakeOutput.log && cat test_install_devsamp_main/CMakeFiles/CMakeError.log) - cmake --build test_install_devsamp_main + cmake -S $GITHUB_WORKSPACE/doc/dox/dev/devsamp/helloworld -B test_install_devsamp_helloworld -DCMAKE_PREFIX_PATH=${{github.workspace}}/install || (cat test_install_devsamp_helloworld/CMakeFiles/CMakeOutput.log && cat test_install_devsamp_helloworld/CMakeFiles/CMakeError.log) + cmake --build test_install_devsamp_helloworld + test_install_devsamp_helloworld/helloworld-parsec + test_install_devsamp_helloworld/helloworld-mad cmake -S $GITHUB_WORKSPACE/doc/dox/dev/devsamp/fibonacci -B test_install_devsamp_fibonacci -DCMAKE_PREFIX_PATH=${{github.workspace}}/install || (cat test_install_devsamp_fibonacci/CMakeFiles/CMakeOutput.log && cat test_install_devsamp_fibonacci/CMakeFiles/CMakeError.log) cmake --build test_install_devsamp_fibonacci + test_install_devsamp_fibonacci/fibonacci-parsec cmake -E make_directory test_install_userexamples cat > test_install_userexamples/CMakeLists.txt < cmake -S . -B build && cmake --build build --target hw-parsec +> cmake -S . -B build && cmake --build build --target helloworld-parsec ``` +The complete example, including the CMake build harness using a slightly easier way to build the executable (using `add_ttg_executable` CMake macro), can be found in [dox examples](https://github.com/TESSEorg/ttg/tree/master/doc/dox/dev/devsamp/helloworld). + ## "Hello, World!" Walkthrough Although it does not involve any useful flow of computation and/or data, the above "Hello, World!" TTG program introduces several key TTG concepts and illustrates what you need to do to write a complete TTG program. So let's walk through it. @@ -95,7 +97,7 @@ Every TTG program must: - make TTG executable and kickstart the execution by sending a control or data message to the TTG, - shut down the runtime -Let's go over each of these steps using the "Hello, World!" example. +Let's go over each of these steps using the "Hello, World!" example. The complete example, including the CMake build harness, can be found in [dox examples](https://github.com/TESSEorg/ttg/tree/master/doc/dox/dev/devsamp/fibonacci). ### Select the TTG Backend @@ -138,7 +140,7 @@ To make a TTG create and connect one or more TTs. The simplest TTG consists of a The "Hello, World!" example contains a single TT that executes a single task (hence, task ID can be omitted, i.e., void) that does not take and produce any data. The easiest way to make such a TT is by wrapping a callable (e.g., a lambda) with `ttg::make_tt`: ```cpp - auto tt = ttg::make_tt([]() { std::cout << "Hello, World!"; }); + auto tt = ttg::make_tt([]() { std::cout << "Hello, World!\n"; }); ``` ## Execute TTG @@ -243,6 +245,7 @@ $F_{n-1},F_{n-2} \to F_{n}$). To illustrate the real power of TTG let's tweak the problem slightly: instead of computing first $N$ Fibonacci numbers let's find the largest Fibonacci number smaller than some $N$. The key difference in the latter case is that, unlike the former, the number of tasks is NOT known a priori; furthermore, to make a decision whether we need to compute next Fibonacci number we must examine the value returned by the previous task. This is an example of data-dependent tasking, where the decision which (if any) task to execute next depends on the values produced by previous tasks. The ability to compose regular as well as data-dependent task graphs is a distinguishing strength of TTG. To make things even more interesting, we will demonstrate how to implement such program both for execution on CPUs as well as on accelerators (GPUs). +The complete examples, including the CMake build harness, can be found in [dox examples](https://github.com/TESSEorg/ttg/tree/master/doc/dox/dev/devsamp/fibonacci). ### The CPU Version @@ -450,8 +453,6 @@ Here's the CUDA version of the device kernel and its host-side wrapper; ROCm and `cu_next_value` is the device kernel that evaluates $F_{n+1}$ from $F_{n}$ and $F_{n-1}$. `next_value` is a host function that launches `cu_next_value`; this is the function called in the `fib` task. -The complete example, including the CMake build harness, can be found in [dox examples](https://github.com/TESSEorg/ttg/tree/master/doc/dox/dev/devsamp/fibonacci). - ## Debugging TTG Programs ### TTG Visualization diff --git a/doc/dox/dev/devsamp/fibonacci/CMakeLists.txt b/doc/dox/dev/devsamp/fibonacci/CMakeLists.txt index e5058cb43..7c57bde7d 100644 --- a/doc/dox/dev/devsamp/fibonacci/CMakeLists.txt +++ b/doc/dox/dev/devsamp/fibonacci/CMakeLists.txt @@ -2,6 +2,11 @@ cmake_minimum_required(VERSION 3.14) project(ttg-devsample-fibonacci) find_package(ttg REQUIRED) +if (NOT TARGET ttg-parsec) # else build from source + include(FetchContent) + FetchContent_Declare(ttg GIT_REPOSITORY https://github.com/TESSEorg/ttg.git) + FetchContent_MakeAvailable( ttg ) +endif() add_ttg_executable(fibonacci fibonacci.cc NOT_EXCLUDE_FROM_ALL) # Fib device test @@ -11,4 +16,4 @@ if (TTG_HAVE_CUDA) fibonacci_cuda_kernel.h fibonacci_cuda_kernel.cu LINK_LIBRARIES std::coroutine RUNTIMES "parsec" NOT_EXCLUDE_FROM_ALL) -endif() \ No newline at end of file +endif() diff --git a/doc/dox/dev/devsamp/fibonacci/README.md b/doc/dox/dev/devsamp/fibonacci/README.md new file mode 100644 index 000000000..b2812041c --- /dev/null +++ b/doc/dox/dev/devsamp/fibonacci/README.md @@ -0,0 +1,17 @@ +# Largest Fibonacci number + +This directory contains TTG programs computing the largest Fibonacci number smaller than $N$: + +- CPU version: `fibonacci.cc` +- Device version: `fibonacci_device.cc` + - CUDA kernel: `fibonacci_cuda_kernel.{cu,h}` + +## Build + +After TTG has been installed to `/path/to/ttg`, do this: + +- configure: `cmake -S . -B build -DCMAKE_PREFIX_PATH="/path/to/ttg"` +- build: + - CPU version: `cmake --build build --target fibonacci` + - CUDA version (TTG must have been configured with CUDA support): `cmake --build build --target fibonacci_cuda` +- run: `./build/fibonacci N` or `./build/fibonacci_cuda N` diff --git a/doc/dox/dev/devsamp/helloworld/CMakeLists.txt b/doc/dox/dev/devsamp/helloworld/CMakeLists.txt new file mode 100644 index 000000000..c7fddf47f --- /dev/null +++ b/doc/dox/dev/devsamp/helloworld/CMakeLists.txt @@ -0,0 +1,11 @@ +cmake_minimum_required(VERSION 3.14) +project(ttg-devsample-helloworld) + +find_package(ttg REQUIRED) +if (NOT TARGET ttg-parsec) # else build from source + include(FetchContent) + FetchContent_Declare(ttg GIT_REPOSITORY https://github.com/TESSEorg/ttg.git) + FetchContent_MakeAvailable( ttg ) +endif() + +add_ttg_executable(helloworld helloworld.cpp NOT_EXCLUDE_FROM_ALL) diff --git a/doc/dox/dev/devsamp/helloworld/README.md b/doc/dox/dev/devsamp/helloworld/README.md new file mode 100644 index 000000000..4513cd489 --- /dev/null +++ b/doc/dox/dev/devsamp/helloworld/README.md @@ -0,0 +1,11 @@ +# TTG "Hello World" + +This directory contains the TTG "Hello World" program + +## Build + +After TTG has been installed to `/path/to/ttg`, do this: + +- configure: `cmake -S . -B build -DCMAKE_PREFIX_PATH="/path/to/ttg"` +- build: `cmake --build build` +- run: `./build/helloworld-parsec` or `./build/helloworld-mad` diff --git a/doc/dox/dev/devsamp/helloworld/helloworld.cpp b/doc/dox/dev/devsamp/helloworld/helloworld.cpp new file mode 100644 index 000000000..b59d939b4 --- /dev/null +++ b/doc/dox/dev/devsamp/helloworld/helloworld.cpp @@ -0,0 +1,17 @@ +#include + +using namespace ttg; + + int main(int argc, char *argv[]) { + ttg::initialize(argc, argv); + + auto tt = ttg::make_tt([]() { std::cout << "Hello, World!\n"; }); + + ttg::make_graph_executable(tt); + ttg::execute(); + if (ttg::get_default_world().rank() == 0) tt->invoke(); + ttg::fence(); + + ttg::finalize(); + return 0; +} diff --git a/doc/dox/dev/devsamp/main/CMakeLists.txt b/doc/dox/dev/devsamp/main/CMakeLists.txt deleted file mode 100644 index 5a127cd97..000000000 --- a/doc/dox/dev/devsamp/main/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -cmake_minimum_required(VERSION 3.14) -project(ttg-devsample-main) - -find_package(ttg REQUIRED) - -add_ttg_executable(test test.cpp NOT_EXCLUDE_FROM_ALL) diff --git a/doc/dox/dev/devsamp/main/test.cpp b/doc/dox/dev/devsamp/main/test.cpp deleted file mode 100644 index a0cc7b73c..000000000 --- a/doc/dox/dev/devsamp/main/test.cpp +++ /dev/null @@ -1,8 +0,0 @@ -#include - -using namespace ttg; - -int main(int argc, char* argv[]) { - initialize(argc, argv); - ttg_finalize(); -} diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 5c3d4012c..e1fb7d685 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -32,14 +32,6 @@ add_ttg_executable(serialization serialization.cc unit_main.cpp add_ttg_executable(serialization_boost serialization_boost.cc LINK_LIBRARIES ttg-serialization-boost RUNTIMES "parsec") -# Fib device test -if (TTG_HAVE_CUDA) - add_ttg_executable(fibonacci_device fibonacci_device.cc - fibonacci_cuda_kernel.h - fibonacci_cuda_kernel.cu - LINK_LIBRARIES std::coroutine RUNTIMES "parsec") -endif() - # TODO: convert into unit test #if (TARGET MADworld) #add_executable(splitmd_serialization splitmd_serialization.cc unit_main.cpp) diff --git a/tests/unit/fibonacci_cuda_kernel.cu b/tests/unit/fibonacci_cuda_kernel.cu deleted file mode 100644 index 6fa316468..000000000 --- a/tests/unit/fibonacci_cuda_kernel.cu +++ /dev/null @@ -1,15 +0,0 @@ -#include "fibonacci_cuda_kernel.h" - -#ifdef TTG_HAVE_CUDA - -__global__ void cu_next_value(int64_t* fn_and_fnm1) { - int64_t fnp1 = fn_and_fnm1[0] + fn_and_fnm1[1]; - fn_and_fnm1[1] = fn_and_fnm1[0]; - fn_and_fnm1[0] = fnp1; -} - -void next_value(int64_t* fn_and_fnm1) { - cu_next_value<<<1, 1>>>(fn_and_fnm1); -} - -#endif // TTG_HAVE_CUDA diff --git a/tests/unit/fibonacci_cuda_kernel.h b/tests/unit/fibonacci_cuda_kernel.h deleted file mode 100644 index a096ec3f1..000000000 --- a/tests/unit/fibonacci_cuda_kernel.h +++ /dev/null @@ -1,4 +0,0 @@ -#include "ttg/config.h" -#include - -void next_value(int64_t* fn_and_fnm1); \ No newline at end of file diff --git a/tests/unit/fibonacci_device.cc b/tests/unit/fibonacci_device.cc deleted file mode 100644 index a1603cb58..000000000 --- a/tests/unit/fibonacci_device.cc +++ /dev/null @@ -1,88 +0,0 @@ -#include - -#if defined(TTG_HAVE_CUDA) -#define ES ttg::ExecutionSpace::CUDA -#include "cuda_runtime.h" -#include "fibonacci_cuda_kernel.h" -#else -#error " CUDA is required to build this test!" -#endif - -#include "ttg/serialization.h" - -const int64_t F_n_max = 1000; -/// N.B. contains values of F_n and F_{n-1} -struct Fn : public ttg::TTValue { - std::unique_ptr F; // F[0] = F_n, F[1] = F_{n-1} - ttg::Buffer b; - - Fn() : F(std::make_unique(2)), b(F.get(), 2) { F[0] = 1; F[1] = 0; } - - Fn(const Fn&) = delete; - Fn(Fn&& other) = default; - Fn& operator=(const Fn& other) = delete; - Fn& operator=(Fn&& other) = default; - - template - void serialize(Archive& ar) { - ttg::ttg_abort(); - } - template - void serialize(Archive& ar, const unsigned int) { - ttg::ttg_abort(); - } -}; - -auto make_ttg_fib_lt(const int64_t F_n_max = 1000) { - ttg::Edge f2f; - ttg::Edge f2p; - - auto fib = ttg::make_tt( - [=](int64_t n, Fn&& f_n) -> ttg::device::Task { - assert(n > 0); - ttg::trace("in fib: n=", n, " F_n=", f_n.F[0]); - - co_await ttg::device::select(f_n.b); - - next_value(f_n.b.current_device_ptr()); - - // wait for the task to complete and the values to be brought back to the host - co_await ttg::device::wait(f_n.b); - - if (f_n.F[0] < F_n_max) { - co_await ttg::device::forward(ttg::device::send<0>(n + 1, std::move(f_n))); - } else { - co_await ttg::device::forward(ttg::device::sendv<1>(std::move(f_n))); - } - }, - ttg::edges(f2f), ttg::edges(f2f, f2p), "fib"); - auto print = ttg::make_tt( - [=](Fn&& f_n) { - std::cout << "The largest Fibonacci number smaller than " << F_n_max << " is " << f_n.F[1] << std::endl; - }, - ttg::edges(f2p), ttg::edges(), "print"); - - auto ins = std::make_tuple(fib->template in<0>()); - std::vector> ops; - ops.emplace_back(std::move(fib)); - ops.emplace_back(std::move(print)); - return make_ttg(std::move(ops), ins, std::make_tuple(), "Fib_n < N"); -} - -int main(int argc, char* argv[]) { - ttg::initialize(argc, argv, -1); - ttg::trace_on(); - int64_t N = 1000; - if (argc > 1) N = std::atol(argv[1]); - auto fib = make_ttg_fib_lt(N); // computes largest F_n < N - - ttg::make_graph_executable(fib.get()); - if (ttg::default_execution_context().rank() == 0) - fib->template in<0>()->send(1, Fn{});; - - ttg::execute(ttg::ttg_default_execution_context()); - ttg::fence(ttg::ttg_default_execution_context()); - - ttg::finalize(); - return 0; -}