Skip to content

Commit

Permalink
updated the Fibonacci device example to be roughly correct :)
Browse files Browse the repository at this point in the history
  • Loading branch information
evaleev committed Mar 20, 2024
1 parent 42ef7d8 commit 4d525ef
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 51 deletions.
15 changes: 10 additions & 5 deletions tests/unit/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ list(APPEND ut_src fibonacci-coro.cc)
list(APPEND ut_src device_coro.cc)
if (TTG_HAVE_CUDA)
list(APPEND ut_src cuda_kernel.cu)
# fibonacci device example
list(APPEND ut_src
fibonacci_device.cc
fibonacci_cuda_kernel.h
fibonacci_cuda_kernel.cc
)
endif(TTG_HAVE_CUDA)
list(APPEND ut_libs std::coroutine)

Expand All @@ -25,11 +31,10 @@ add_ttg_executable(core-unittests-ttg "${ut_src}" LINK_LIBRARIES "${ut_libs}" CO
add_ttg_executable(serialization serialization.cc unit_main.cpp
LINK_LIBRARIES Catch2::Catch2 ttg-serialization $<TARGET_NAME_IF_EXISTS:BTAS::BTAS>
COMPILE_DEFINITIONS $<$<TARGET_EXISTS:BTAS::BTAS>:TTG_HAS_BTAS=1>)
#target_link_libraries(serialization "Catch2::Catch2;ttg-serialization")
#if (TARGET BTAS::BTAS)
# target_link_libraries(serialization BTAS::BTAS)
# target_compile_definitions(serialization PRIVATE TTG_HAS_BTAS=1)
#endif (TARGET BTAS::BTAS)

# Boost serialization test: checks low-level codegen
add_ttg_executable(serialization_boost serialization_boost.cc
LINK_LIBRARIES ttg-serialization-boost RUNTIMES "parsec")

# TODO: convert into unit test
#if (TARGET MADworld)
Expand Down
26 changes: 7 additions & 19 deletions tests/unit/fibonacci_cuda_kernel.cu
Original file line number Diff line number Diff line change
@@ -1,27 +1,15 @@
#include "cuda_kernel.h"
#include "fibonacci_cuda_kernel.h"

#ifdef TTG_HAVE_CUDA

__global__ void cu_calculate_fibonacci(int64_t* results, std::size_t n) {
int tx = threadIdx.x; // Thread index

if (tx == 0) {
int64_t a = 0, b = 1, c;
if (n == 0) {
results[tx] = a;
return;
}
for (int i = 2; i <= n; i++) {
c = a + b;
a = b;
b = c;
}
results[tx] = b;
}
__global__ void cu_next_value(int64_t* fn_and_fnm1) {
int64_t fnp1 = fn_and_fnm1[0] + fn_and_fnm1[1];
fn_and_fnm1[1] = fn_and_fnm1[0];
fn_and_fnm1[0] = fnp1;
}

void calculate_fibonacci(int64_t* results, std::size_t n) {
cu_calculate_fibonacci<<<1, 1>>>(results, n); // Adjust <<<1, 1>>> as needed for parallel computation
void next_value(int64_t* fn_and_fnm1) {
cu_next_value<<<1, 1>>>(fn_and_fnm1);
}

#endif // TTG_HAVE_CUDA
2 changes: 1 addition & 1 deletion tests/unit/fibonacci_cuda_kernel.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#include "ttg/config.h"
#include <cinttypes>

void calculate_fibonacci(int64_t* result, std::size_t n);
void next_value(int64_t* fn_and_fnm1);
54 changes: 28 additions & 26 deletions tests/unit/fibonacci_device.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,23 @@
// Define TTG_USE_CUDA only if CUDA support is desired and available
#ifdef TTG_USE_CUDA
#include "cuda_runtime.h"
#include "cuda_kernel.h"
#include "fibonacci_cuda_kernel.h"
#endif

#include "ttg/serialization.h"

// Default to CUDA if available, can be overridden by defining TTG_USE_XXX for other backends
#define ES ttg::default_execution_space()

struct A : public ttg::TTValue<A> {
int64_t value;
ttg::Buffer<int64_t> buffer;
/// N.B. contains values of F_n and F_{n-1}
struct Fn : public ttg::TTValue<Fn> {
int64_t F[2] = {1, 0}; // F[0] = F_n, F[1] = F_{n-1}
ttg::Buffer<int64_t> b;

A() : value(0), buffer(&value, 1) {}
A(int64_t val) : value(val), buffer(&value, 1) {}
Fn() : b(&F[0], 2) {}

A(A&& other) = default;
A& operator=(A&& other) = default;
Fn(Fn&& other) = default;
Fn& operator=(Fn&& other) = default;

template <typename Archive>
void serialize(Archive& ar) {
Expand All @@ -34,39 +34,41 @@ struct A : public ttg::TTValue<A> {

int main(int argc, char* argv[]) {
ttg::initialize(argc, argv, -1);
const int64_t N = 20;
const int64_t F_n_max = 1000;

ttg::Edge<int64_t, A> f2f;
ttg::Edge<void, A> f2p;
ttg::Edge<int64_t, Fn> f2f;
ttg::Edge<void, Fn> f2p;

auto fib = ttg::make_tt<ES>(
[=](int64_t n, A& F_nms) -> ttg::device::Task {
if (n <= N) {
co_await ttg::device::select(F_nms.buffer);

int64_t result = calculate_fibonacci(n);

A F_n(result);
if (n < N) {
co_await ttg::device::send<0>(n + 1, F_n);
} else {
co_await ttg::device::sendv<1>(F_n);
}
[=](int64_t n, Fn& f_n) -> ttg::device::Task {
assert(n > 0);

co_await ttg::device::select(f_n.b);

next_value(f_n.b.current_device_ptr());

// wait for the task to complete and the values to be brought back to the host
co_await ttg::device::wait(f_n.b);

if (f_n.F[0] < F_n_max) {
co_await ttg::device::forward(ttg::device::send<0>(n + 1, f_n));
} else {
co_await ttg::device::forward(ttg::device::sendv<1>(f_n));
}
},
ttg::edges(f2f),
ttg::edges(f2f, f2p),
"fib");

auto print = ttg::make_tt([](A F_N) {
std::cout << "The " << N << "th Fibonacci number is " << F_N.value << std::endl;
auto print = ttg::make_tt([](Fn f_n) {
std::cout << "The largest Fibonacci number smaller than" << F_n_max << " is " << f_n.F[1] << std::endl;
},
ttg::edges(f2p),
ttg::edges(),
"print");

ttg::make_graph_executable(fib.get());
if (ttg::default_execution_context().rank() == 0) fib->invoke(2, A(1));
if (ttg::default_execution_context().rank() == 0) fib->invoke(1, Fn{});

ttg::execute(ttg_default_execution_context());
ttg::fence(ttg_default_execution_context());
Expand Down

0 comments on commit 4d525ef

Please sign in to comment.