Skip to content

Commit

Permalink
ENH: nested parallel reductions
Browse files Browse the repository at this point in the history
  • Loading branch information
Adrian-Diaz committed Oct 1, 2024
1 parent e0fee3f commit d3fa311
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 26 deletions.
49 changes: 44 additions & 5 deletions examples/main_kokkos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -875,11 +875,11 @@ int main(int argc, char* argv[])
FOR_ALL(i_i, 0, hiersize, j_j, 0, hiersize, k_k, 0, hiersize, {
hierTest3D(i_i, j_j, k_k) = 0.0;
});
FOR_FIRST(hiersize, {
FOR_FIRST(i_i,hiersize, {
// Kokkos::parallel_for( \
//Kokkos::TeamPolicy<>( 32, Kokkos::AUTO, 32 ), \
//KOKKOS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type &teamMember ) {
const int i_i = TEAM_ID;
//const int i_i = TEAM_ID;
FOR_SECOND(j_j, i_i, hiersize, {
// Kokkos::parallel_for( \
//Kokkos::TeamThreadRange( teamMember, istart, iend ), [&] ( const int (j_j) ) {
Expand All @@ -905,12 +905,16 @@ int main(int argc, char* argv[])
FOR_ALL(i_i, 0, hiersize, j_j, 0, hiersize, k_k, 0, hiersize, {
hierTest3D(i_i, j_j, k_k) = i_i*hiersize*hiersize+j_j*hiersize+k_k;
});
FOR_FIRST_EASY(i_i,hiersize, {

printf("\n\n\nHierarchical Reduce\n");
//2D nesting
FOR_FIRST(i_i,hiersize, {
// Kokkos::parallel_for( \
//Kokkos::TeamPolicy<>( 32, Kokkos::AUTO, 32 ), \
//KOKKOS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type &teamMember ) {
//const int i_i = TEAM_ID;
double result = 0;
double lsum;
FOR_REDUCE_SUM_SECOND(j_j, i_i, hiersize, lsum, {
lsum += hierTest3D(i_i,j_j,0);
// Kokkos::parallel_for( \
Expand All @@ -920,11 +924,46 @@ int main(int argc, char* argv[])
// int jend = (j_j+1)*32;
}, result);
hierTest1D(i_i)= result;
//printf("value at %d is %f\n", i_i, hierTest1D(i_i));
});
Kokkos::fence();
for (int ppp = 0; ppp < hiersize; ppp++) {
//printf("%f\n", hierTest1D(ppp));
// printf("%f\n", hierTest2D(3,ppp));
// printf("%f\n", hierTest3D(3,3,ppp));
}
printf("\n\n");

printf("\n\n\nHierarchical Vectorized Reduce\n");
//3D vector nesting
FOR_FIRST(i_i,hiersize, {
// Kokkos::parallel_for( \
//Kokkos::TeamPolicy<>( 32, Kokkos::AUTO, 32 ), \
//KOKKOS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type &teamMember ) {
//const int i_i = TEAM_ID;
double result = 0;
double lsum;
FOR_SECOND(j_j, i_i, hiersize, {
// Kokkos::parallel_for( \
//Kokkos::TeamThreadRange( teamMember, istart, iend ), [&] ( const int (j_j) ) {
// hierTest2D(i_i,j_j) = i_i * (j_j+1);
// int jstart = j_j*32;
// int jend = (j_j+1)*32;
FOR_REDUCE_SUM_THIRD(k_k, i_i, j_j, lsum, {
lsum += hierTest3D(i_i,j_j,k_k);
// Kokkos::parallel_for( \
//Kokkos::TeamThreadRange( teamMember, istart, iend ), [&] ( const int (j_j) ) {
// hierTest2D(i_i,j_j) = i_i * (j_j+1);
// int jstart = j_j*32;
// int jend = (j_j+1)*32;
}, result);
hierTest2D(i_i,j_j)= result;
//printf("value at %d , %d is %f\n", i_i, j_j, hierTest2D(i_i,j_j));
});
});
Kokkos::fence();
printf("\n\n\nHierarchical Reduce\n");
for (int ppp = 0; ppp < hiersize; ppp++) {
printf("%f\n", hierTest1D(ppp));
//printf("%f\n", hierTest1D(ppp));
// printf("%f\n", hierTest2D(3,ppp));
// printf("%f\n", hierTest3D(3,3,ppp));
}
Expand Down
26 changes: 16 additions & 10 deletions src/include/kokkos_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -8040,11 +8040,14 @@ void DynamicRaggedRightArrayKokkos<T,Layout,ExecSpace,MemoryTraits>::set_values(
template <typename T, typename Layout, typename ExecSpace, typename MemoryTraits>
KOKKOS_INLINE_FUNCTION
void DynamicRaggedRightArrayKokkos<T,Layout,ExecSpace,MemoryTraits>::set_values_sparse(T val) {
Kokkos::parallel_for( Kokkos::TeamPolicy<>( dim1_, Kokkos::AUTO, 32 ), KOKKOS_CLASS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type &teamMember ) {
const int i_i = teamMember.league_rank();
Kokkos::parallel_for( Kokkos::TeamThreadRange( teamMember, 0, stride_(i_i) ), [&] ( const int (j_j) ) {
array_(dim2_*i_i+j_j) = val;
});
// Kokkos::parallel_for( Kokkos::TeamPolicy<>( dim1_, Kokkos::AUTO, 32 ), KOKKOS_CLASS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type &teamMember ) {
// const int i_i = teamMember.league_rank();
// Kokkos::parallel_for( Kokkos::TeamThreadRange( teamMember, 0, stride_(i_i) ), [&] ( const int (j_j) ) {
// array_(dim2_*i_i+j_j) = val;
// });
// });
Kokkos::parallel_for("SetValues_DynamicRaggedRightArrayKokkos", length_, KOKKOS_CLASS_LAMBDA(const int i) {
array_(i) = val;
});
}
// Get the name of the view
Expand Down Expand Up @@ -8256,11 +8259,14 @@ void DynamicRaggedDownArrayKokkos<T,Layout,ExecSpace,MemoryTraits>::set_values(T
template <typename T, typename Layout, typename ExecSpace, typename MemoryTraits>
KOKKOS_INLINE_FUNCTION
void DynamicRaggedDownArrayKokkos<T,Layout,ExecSpace,MemoryTraits>::set_values_sparse(T val) {
Kokkos::parallel_for( Kokkos::TeamPolicy<>( dim2_, Kokkos::AUTO, 32 ), KOKKOS_CLASS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type &teamMember ) {
const int j_j = teamMember.league_rank();
Kokkos::parallel_for( Kokkos::TeamThreadRange( teamMember, 0, stride_(j_j) ), [&] ( const int (i_i) ) {
array_(dim1_*j_j+i_i) = val;
});
// Kokkos::parallel_for( Kokkos::TeamPolicy<>( dim2_, Kokkos::AUTO, 32 ), KOKKOS_CLASS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type &teamMember ) {
// const int j_j = teamMember.league_rank();
// Kokkos::parallel_for( Kokkos::TeamThreadRange( teamMember, 0, stride_(j_j) ), [&] ( const int (i_i) ) {
// array_(dim1_*j_j+i_i) = val;
// });
// });
Kokkos::parallel_for("SetValues_DynamicRaggedDownArrayKokkos", length_, KOKKOS_CLASS_LAMBDA(const int i) {
array_(i) = val;
});
}
// Get the name of the view
Expand Down
20 changes: 9 additions & 11 deletions src/include/macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -445,19 +445,11 @@ THREAD_ID \
teamMember.team_rank()

#define \
FOR_FIRST(x1, fcn) \
FOR_FIRST(i, x1, fcn) \
Kokkos::parallel_for( \
Kokkos::TeamPolicy<>( x1, Kokkos::AUTO, 32 ), \
KOKKOS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type &teamMember ) \
{fcn} )

#define \
FOR_FIRST_EASY(i, x1, fcn) \
Kokkos::parallel_for( \
Kokkos::TeamPolicy<>( x1, Kokkos::AUTO, 32 ), \
KOKKOS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type &teamMember ) \
{ const int i = TEAM_ID; \
fcn} )
{ const int i = TEAM_ID; fcn} )

#define \
FOR_SECOND(j, y0, y1, fcn) \
Expand All @@ -468,7 +460,7 @@ Kokkos::parallel_for( \
#define \
FOR_REDUCE_SUM_SECOND(j, y0, y1, lsum, fcn, result) \
Kokkos::parallel_reduce( \
Kokkos::TeamThreadRange( teamMember, y0, y1 ), [&] ( const int (j), decltype(result) &(lsum) ) \
Kokkos::TeamThreadRange( teamMember, y0, y1 ), [&] ( const int (j), decltype(lsum) &(lsum) ) \
{fcn}, result )

#define \
Expand All @@ -477,6 +469,12 @@ Kokkos::parallel_for( \
Kokkos::ThreadVectorRange( teamMember, z0, z1 ), [&] ( const int (k) ) \
{fcn} )

#define \
FOR_REDUCE_SUM_THIRD(k, z0, z1, lsum, fcn, result) \
Kokkos::parallel_reduce( \
Kokkos::ThreadVectorRange( teamMember, z0, z1 ), [&] ( const int (k), decltype(lsum) &(lsum) ) \
{fcn}, result )

//Kokkos Initialize
#define \
MATAR_KOKKOS_INIT \
Expand Down

0 comments on commit d3fa311

Please sign in to comment.