diff --git a/benchmarks/lbm/src/Config.cpp b/benchmarks/lbm/src/Config.cpp index 4a380539..ae30c720 100644 --- a/benchmarks/lbm/src/Config.cpp +++ b/benchmarks/lbm/src/Config.cpp @@ -109,6 +109,22 @@ auto Config::parseArgs(const int argc, char* argv[]) std::cout << "Benchmark example " << '\n'; std::cout << "./lbm --deviceType gpu --deviceIds 0 1 2 3 4 --grid dGrid --domain-size 100 --max-iter 2000 --computeFP double --storageFP double --nOCC --huGrid --benchmark --warmup-iter 10 --repetitions 5" << '\n'; + std::cout <<" ./lbm --deviceType gpu\\\n" + " --deviceIds 0\\\n" + " --grid dGrid\\\n" + " --domain-size 100\\\n" + " --max-iter 1000\\\n" + " --computeFP float\\\n" + " --storageFP float\\\n" + " --occ none\\\n" + " --transferMode put\\\n" + " --stencilSemantic grid\\\n" + " --spaceCurve sweep\\\n" + " --collision bgk\\\n" + " --streamingMethod pull\\\n" + " --lattice d3q19\\\n" + " --vti 10"; + return -1; } diff --git a/benchmarks/lbm/src/Lbm.h b/benchmarks/lbm/src/Lbm.h index 132a3d99..ce465ce4 100644 --- a/benchmarks/lbm/src/Lbm.h +++ b/benchmarks/lbm/src/Lbm.h @@ -127,6 +127,10 @@ struct Lbm ContainerFactory::Common::computeWallNghMask(cellFlagField, cellFlagField) .run(Neon::Backend::mainStreamIdx); + cellFlagField.newHaloUpdate(Neon::set::StencilSemantic::standard, + Neon::set::TransferMode::get, + Neon::Execution::device) + .run(Neon::Backend::mainStreamIdx); metrics::recordProblemSetupMetrics(grid.getBackend(), *reportPtr, setBcClockStart); } @@ -157,12 +161,17 @@ struct Lbm ops.push_back(even); std::stringstream appName; - if (iteration % 2 == 0) - appName << "LBM_push_even"; - else + if (skIdx % 2 == 0) appName << "LBM_pull_even"; + else + appName << "LBM_pull_odd"; skeleton.at(skIdx).sequence(ops, appName.str(), opt); + + if (skIdx % 2 == 0) + skeleton.at(skIdx).ioToDot("lbm-pull-even","lbm_pull_even",true); + else + skeleton.at(skIdx).ioToDot("lbm-pull-odd","lbm_pull_even", true); } } { @@ -200,7 +209,7 @@ struct Lbm if (iteration % 2 == 0) appName << "LBM_push_even"; else - appName << "LBM_pull_even"; + appName << "LBM_push_odd"; skeleton.at(skIdx).sequence(ops, appName.str(), opt); } @@ -226,13 +235,13 @@ struct Lbm cellFlagField, lbmParameters.omega, pFieldList.at(0)); - appName << "LBM_push_even"; + appName << "LBM_aa_even"; } else { lbmIteration = ContainerFactory::AA::Odd::iteration( cellFlagField, lbmParameters.omega, pFieldList.at(0)); - appName << "LBM_pull_even"; + appName << "LBM_aa_even"; } std::vector ops; skeleton.at(skIdx) = Neon::skeleton::Skeleton(pFieldList[0].getBackend()); @@ -299,6 +308,10 @@ struct Lbm done = true; } if constexpr (method == lbm::Method::pull) { + pop.newHaloUpdate(Neon::set::StencilSemantic::standard, + Neon::set::TransferMode::get, + Neon::Execution::device) + .run(Neon::Backend::mainStreamIdx); auto computeRhoAndU = ContainerFactory::Pull::computeRhoAndU(pop, cellFlagField, rho, u); computeRhoAndU.run(Neon::Backend::mainStreamIdx); done = true; @@ -326,8 +339,8 @@ struct Lbm iterIdStr = std::string(numDigits - std::min(numDigits, iterIdStr.length()), '0') + iterIdStr; // pop.ioToVtk("pop_" + iterIdStr, "pop", false); - u.ioToVtk("u_" + iterIdStr, "u", false); - rho.ioToVtk("rho_" + iterIdStr, "rho", false); + u.ioToVtk("u_" + iterIdStr, "u", false, Neon::IoFileType::BINARY); + rho.ioToVtk("rho_" + iterIdStr, "rho", false, Neon::IoFileType::BINARY); cellFlagField.template ioToVtk("cellFlagField_" + iterIdStr, "flag", false); #if 0 diff --git a/libNeonCore/include/Neon/core/types/Macros.h b/libNeonCore/include/Neon/core/types/Macros.h index 5e909d3a..bcecdbb7 100644 --- a/libNeonCore/include/Neon/core/types/Macros.h +++ b/libNeonCore/include/Neon/core/types/Macros.h @@ -206,7 +206,7 @@ #define NEON_RESTRICT restrict #endif -#ifdef NEON_COMPILER_CUDA +#if defined(NEON_COMPILER_CUDA) && !defined(_WIN32) #define NEON_RESTRICT __restrict__ #endif diff --git a/libNeonDomain/include/Neon/domain/details/dGrid/dField_imp.h b/libNeonDomain/include/Neon/domain/details/dGrid/dField_imp.h index 49f57dbd..11dda19e 100644 --- a/libNeonDomain/include/Neon/domain/details/dGrid/dField_imp.h +++ b/libNeonDomain/include/Neon/domain/details/dGrid/dField_imp.h @@ -26,7 +26,8 @@ dField::dField(const std::string& fieldUserName, T(0), dataUse, memoryOptions, - haloStatus) { + haloStatus) +{ // only works if dims in x and y direction for all partitions match for (int i = 0; i < dims.size() - 1; ++i) { @@ -88,7 +89,7 @@ dField::dField(const std::string& fieldUserName, { // Setting up partitions Neon::aGrid const& aGrid = mData->grid->helpFieldMemoryAllocator(); - mData->memoryField = aGrid.newField(fieldUserName + "-storage", cardinality, T(), dataUse, memoryOptions); + mData->memoryField = aGrid.newField(fieldUserName + "-storage", cardinality, T(), dataUse, memoryOptions); // const int setCardinality = mData->grid->getBackend().getDeviceCount(); mData->partitionTable.forEachConfiguration( [&](Neon::Execution execution, @@ -306,7 +307,7 @@ auto dField::operator()(const Neon::index_3d& idxGlobal, auto& partition = mData->partitionTable.getPartition(Neon::Execution::host, partitionIdx, Neon::DataView::STANDARD); - auto& span = mData->grid->getSpan(Neon::Execution::host,partitionIdx, Neon::DataView::STANDARD); + auto& span = mData->grid->getSpan(Neon::Execution::host, partitionIdx, Neon::DataView::STANDARD); Idx idx; bool isOk = span.setAndValidate(idx, localIDx.x, localIDx.y, localIDx.z); if (!isOk) { @@ -326,7 +327,7 @@ auto dField::getReference(const Neon::index_3d& idxGlobal, auto& partition = mData->partitionTable.getPartition(Neon::Execution::host, partitionIdx, Neon::DataView::STANDARD); - auto& span = mData->grid->getSpan(Neon::Execution::host,partitionIdx, Neon::DataView::STANDARD); + auto& span = mData->grid->getSpan(Neon::Execution::host, partitionIdx, Neon::DataView::STANDARD); Idx idx; bool isOk = span.setAndValidate(idx, localIDx.x, localIDx.y, localIDx.z); if (!isOk) { @@ -484,6 +485,81 @@ auto dField::initHaloUpdateTable() transfersVec.push_back(transfer); } }); + + mData->latticeHaloUpdateTable.forEachPutConfiguration( + bk, [&](Neon::SetIdx setIdxSrc, + Execution execution, + Neon::domain::tool::partitioning::ByDirection byDirection, + std::vector& transfersVec) { + { + using namespace Neon::domain::tool::partitioning; + + Neon::SetIdx setIdxDst = getNghSetIdx(setIdxSrc, byDirection); + + int r = grid.getStencil().getRadius(); + + std::array partitions; + std::array, Data::EndPointsUtils::nConfigs> ghostZBeginIdx; + std::array, Data::EndPointsUtils::nConfigs> boundaryZBeginIdx; + std::array memPhyDim; + + partitions[Data::EndPoints::dst] = &this->getPartition(execution, setIdxDst, Neon::DataView::STANDARD); + partitions[Data::EndPoints::src] = &this->getPartition(execution, setIdxSrc, Neon::DataView::STANDARD); + + for (auto endPoint : {Data::EndPoints::dst, Data::EndPoints::src}) { + ghostZBeginIdx[endPoint][static_cast(ByDirection::down)] = 0; + boundaryZBeginIdx[endPoint][static_cast(ByDirection::down)] = r; + boundaryZBeginIdx[endPoint][static_cast(ByDirection::up)] = partitions[endPoint]->dim().z; + ghostZBeginIdx[endPoint][static_cast(ByDirection::up)] = partitions[endPoint]->dim().z + r; + + memPhyDim[endPoint] = Neon::size_4d( + 1, + size_t(partitions[endPoint]->dim().x), + size_t(partitions[endPoint]->dim().x) * partitions[endPoint]->dim().y, + size_t(partitions[endPoint]->dim().x) * partitions[endPoint]->dim().y * (partitions[endPoint]->dim().z + 2 * r)); + } + + for (int j = 0; j < this->getCardinality(); j++) { + auto const& stencil = this->getGrid().getStencil(); + if (this->getCardinality() != stencil.nPoints()) { + continue; + } + T* srcMem = partitions[Data::EndPoints::src]->mem(); + T* dstMem = partitions[Data::EndPoints::dst]->mem(); + + Neon::size_4d srcBoundaryBuff(0, 0, boundaryZBeginIdx[Data::EndPoints::src][static_cast(byDirection)], j); + Neon::size_4d dstGhostBuff(0, 0, ghostZBeginIdx[Data::EndPoints::dst][static_cast(ByDirectionUtils::invert(byDirection))], j); + + // std::cout << "To " << dstGhostBuff << " prt " << partitions[Data::EndPoints::dst]->prtID() << " From " << srcBoundaryBuff << "(src dim" << partitions[Data::EndPoints::src]->dim() << ")" << std::endl; + // std::cout << "dst mem " << partitions[Data::EndPoints::dst]->mem() << " " << std::endl; + // std::cout << "dst pitch " << (dstGhostBuff * memPhyDim[Data::EndPoints::dst]).rSum() << " " << std::endl; + // std::cout << "dst dstGhostBuff " << dstGhostBuff << " " << std::endl; + // std::cout << "dst pitch all" << memPhyDim[Data::EndPoints::dst] << " " << std::endl; + + Neon::set::MemoryTransfer transfer({setIdxDst, dstMem + (dstGhostBuff * memPhyDim[Data::EndPoints::dst]).rSum(), dstGhostBuff}, + {setIdxSrc, srcMem + (srcBoundaryBuff * memPhyDim[Data::EndPoints::src]).rSum(), srcBoundaryBuff}, + sizeof(T) * + r * + partitions[Data::EndPoints::src]->dim().x * + partitions[Data::EndPoints::src]->dim().y); + if (ByDirection::up == byDirection && bk.isLastDevice(setIdxSrc)) { + return; + } + + if (ByDirection::down == byDirection && bk.isFirstDevice(setIdxSrc)) { + return; + } + if (ByDirection::up == byDirection && !(stencil.points()[j].z > 0)) { + continue; + } + if (ByDirection::down == byDirection && !(stencil.points()[j].z < 0)) { + continue; + } + // std::cout << transfer.toString() << std::endl; + transfersVec.push_back(transfer); + } + } + }); // // mData->latticeHaloUpdateTable.forEachPutConfiguration( // bk, [&](Neon::SetIdx setIdxSrc, @@ -608,7 +684,33 @@ auto dField:: execution); } } else { - NEON_DEV_UNDER_CONSTRUCTION(""); + auto transfers = bk.template newDataSet>(); + if (this->getMemoryOptions().getOrder() == Neon::MemoryLayout::structOfArrays) { + for (auto byDirection : {tool::partitioning::ByDirection::up, + tool::partitioning::ByDirection::down}) { + + auto const& tableEntryByDir = mData->latticeHaloUpdateTable.get(transferMode, + execution, + byDirection); + + tableEntryByDir.forEachSeq([&](SetIdx setIdx, auto const& tableEntryByDirBySetIdx) { + transfers[setIdx].insert(std::end(transfers[setIdx]), + std::begin(tableEntryByDirBySetIdx), + std::end(tableEntryByDirBySetIdx)); + }); + } + dataTransferContainer = + Neon::set::Container::factoryDataTransfer( + *this, + transferMode, + stencilSemantic, + transfers, + execution); + + + } else { + NEON_DEV_UNDER_CONSTRUCTION(""); + } } Neon::set::Container SyncContainer = Neon::set::Container::factorySynchronization( diff --git a/libNeonDomain/include/Neon/domain/details/dGrid/dGrid_imp.h b/libNeonDomain/include/Neon/domain/details/dGrid/dGrid_imp.h index a6fbf1aa..a263400a 100644 --- a/libNeonDomain/include/Neon/domain/details/dGrid/dGrid_imp.h +++ b/libNeonDomain/include/Neon/domain/details/dGrid/dGrid_imp.h @@ -91,15 +91,17 @@ dGrid::dGrid(const Neon::Backend& backend, Neon::DataView dw, dSpan& span) { span.mDataView = dw; - span.mZHaloRadius = setCardinality == 1 ? 0 : mData->halo.z; - span.mZBoundaryRadius = mData->halo.z; + span.mZghostRadius = setCardinality == 1 ? 0 : mData->halo.z; + span.mZboundaryRadius = mData->halo.z; + span.mMaxZInDomain = mData->partitionDims[setIdx].z; switch (dw) { case Neon::DataView::STANDARD: { // Only works z partitions. assert(mData->halo.x == 0 && mData->halo.y == 0); - span.mDim = mData->partitionDims[setIdx]; + span.mSpanDim = mData->partitionDims[setIdx]; + break; } case Neon::DataView::BOUNDARY: { @@ -107,8 +109,8 @@ dGrid::dGrid(const Neon::Backend& backend, // Only works z partitions. assert(mData->halo.x == 0 && mData->halo.y == 0); - span.mDim = mData->partitionDims[setIdx]; - span.mDim.z = span.mZBoundaryRadius * 2; + span.mSpanDim = mData->partitionDims[setIdx]; + span.mSpanDim.z = span.mZboundaryRadius * 2; break; } @@ -117,12 +119,12 @@ dGrid::dGrid(const Neon::Backend& backend, // Only works z partitions. assert(mData->halo.x == 0 && mData->halo.y == 0); - span.mDim = mData->partitionDims[setIdx]; - span.mDim.z = span.mDim.z - span.mZBoundaryRadius * 2; - if (span.mDim.z <= 0 && setCardinality > 1) { + span.mSpanDim = mData->partitionDims[setIdx]; + span.mSpanDim.z = span.mSpanDim.z - span.mZboundaryRadius * 2; + if (span.mSpanDim.z <= 0 && setCardinality > 1) { NeonException exp("dGrid"); exp << "The grid size is too small to support the data view model correctly \n"; - exp << span.mDim << " for setIdx " << setIdx << " and device " << getDevSet().devId(setIdx); + exp << span.mSpanDim << " for setIdx " << setIdx << " and device " << getDevSet().devId(setIdx); NEON_THROW(exp); } @@ -140,7 +142,7 @@ dGrid::dGrid(const Neon::Backend& backend, Neon::DataView dw, int& count) { if (Execution::host == execution) { - count = mData->spanTable.getSpan(Neon::Execution::host, setIdx, dw).mDim.rMul(); + count = mData->spanTable.getSpan(Neon::Execution::host, setIdx, dw).mSpanDim.rMul(); } }); } diff --git a/libNeonDomain/include/Neon/domain/details/dGrid/dSpan.h b/libNeonDomain/include/Neon/domain/details/dGrid/dSpan.h index 74ab5ff3..c81baace 100644 --- a/libNeonDomain/include/Neon/domain/details/dGrid/dSpan.h +++ b/libNeonDomain/include/Neon/domain/details/dGrid/dSpan.h @@ -43,11 +43,12 @@ class dSpan private: Neon::DataView mDataView; - int mZHaloRadius; - int mZBoundaryRadius; - Neon::index_3d mDim /** Dimension of the span, its values depends on the mDataView*/; + int mZghostRadius; + int mZboundaryRadius; + int mMaxZInDomain; + Neon::index_3d mSpanDim /** Dimension of the span, its values depends on the mDataView*/; }; -} // namespace Neon::domain::details::dGrid +} // namespace Neon::domain::deta ils::dGrid #include "dSpan_imp.h" \ No newline at end of file diff --git a/libNeonDomain/include/Neon/domain/details/dGrid/dSpan_imp.h b/libNeonDomain/include/Neon/domain/details/dGrid/dSpan_imp.h index 9fb56572..37bea7d7 100644 --- a/libNeonDomain/include/Neon/domain/details/dGrid/dSpan_imp.h +++ b/libNeonDomain/include/Neon/domain/details/dGrid/dSpan_imp.h @@ -14,25 +14,25 @@ dSpan::setAndValidate(Idx& idx, idx.setLocation().y = int(y); idx.setLocation().z = int(z); - if (idx.getLocation() < mDim) { + if (idx.getLocation() < mSpanDim) { res = true; } switch (mDataView) { case Neon::DataView::STANDARD: { - idx.setLocation().z += mZHaloRadius; + idx.setLocation().z += mZghostRadius; return res; } case Neon::DataView::INTERNAL: { - idx.setLocation().z += mZHaloRadius + mZBoundaryRadius; + idx.setLocation().z += mZghostRadius + mZboundaryRadius; return res; } case Neon::DataView::BOUNDARY: { - idx.setLocation().z += idx.getLocation().z < mZBoundaryRadius - ? 0 - : (mDim.z - 1) + (-1 * mZBoundaryRadius /* we remove zBoundaryRadius as the first zBoundaryRadius will manage the lower slices */); - idx.setLocation().z += mZHaloRadius; + idx.setLocation().z += idx.getLocation().z < mZboundaryRadius + ? 0 + : (mMaxZInDomain - 1) + (-1 * mZboundaryRadius /* we remove zBoundaryRadius as the first zBoundaryRadius will manage the lower slices */); + idx.setLocation().z += mZghostRadius; return res; } @@ -51,19 +51,19 @@ NEON_CUDA_HOST_DEVICE inline auto dSpan::helpGetDataView() NEON_CUDA_HOST_DEVICE inline auto dSpan::helpGetZHaloRadius() const -> int const& { - return mZHaloRadius; + return mZghostRadius; } NEON_CUDA_HOST_DEVICE inline auto dSpan::helpGetZBoundaryRadius() const -> int const& { - return mZBoundaryRadius; + return mZboundaryRadius; } NEON_CUDA_HOST_DEVICE inline auto dSpan::helpGetDim() const -> Neon::index_3d const& { - return mDim; + return mSpanDim; } } // namespace Neon::domain::details::dGrid \ No newline at end of file diff --git a/libNeonDomain/src/domain/details/dGrid/dGrid.cpp b/libNeonDomain/src/domain/details/dGrid/dGrid.cpp index 890642b3..ec8b24d8 100644 --- a/libNeonDomain/src/domain/details/dGrid/dGrid.cpp +++ b/libNeonDomain/src/domain/details/dGrid/dGrid.cpp @@ -59,7 +59,7 @@ auto dGrid::getLaunchParameters(const Neon::DataView dataView, auto dimsByDataView = getBackend().devSet().newDataSet([&](Neon::SetIdx const& setIdx, auto& value) { - value = getSpan(Neon::Execution::host, setIdx, dataView).mDim; + value = getSpan(Neon::Execution::host, setIdx, dataView).mSpanDim; }); ret.set(Neon::sys::GpuLaunchInfo::domainGridMode, diff --git a/libNeonDomain/tests/domain-map/src/gtests.cpp b/libNeonDomain/tests/domain-map/src/gtests.cpp index 50d6e34d..c48511b7 100644 --- a/libNeonDomain/tests/domain-map/src/gtests.cpp +++ b/libNeonDomain/tests/domain-map/src/gtests.cpp @@ -13,6 +13,15 @@ TEST(domain_map, dGrid) 1); } +TEST(domain_map_dataView, dGrid) +{ + int nGpus = 2; + using Type = int64_t; + runAllTestConfiguration(std::function(map::dataView::run), + nGpus, + 2); +} + TEST(domain_map, eGrid) { int nGpus = 3; diff --git a/libNeonDomain/tests/domain-map/src/map.cu b/libNeonDomain/tests/domain-map/src/map.cu index b001d832..2ed92ddb 100644 --- a/libNeonDomain/tests/domain-map/src/map.cu +++ b/libNeonDomain/tests/domain-map/src/map.cu @@ -1,10 +1,10 @@ #include #include "Neon/domain/Grids.h" +#include "Neon/domain/details/dGridSoA/dGridSoA.h" #include "Neon/domain/tools/TestData.h" #include "TestInformation.h" #include "gtest/gtest.h" -#include "Neon/domain/details/dGridSoA/dGridSoA.h" namespace map { @@ -32,6 +32,27 @@ auto mapContainer_axpy(int streamIdx, }); } +template +auto mapContainer_add(int streamIdx, + typename Field::Type& val, + Field& fieldB) + -> Neon::set::Container +{ + const auto& grid = fieldB.getGrid(); + return grid.newContainer( + "mapContainer_axpy", + [&, val](Neon::set::Loader& loader) { + auto b = loader.load(fieldB); + + return [=] NEON_CUDA_HOST_DEVICE(const typename Field::Idx& e) mutable { + for (int i = 0; i < b.cardinality(); i++) { + // printf("GPU %ld <- %ld + %ld\n", lc(e, i) , la(e, i) , val); + b(e, i) += val; + } + }; + }); +} + using namespace Neon::domain::tool::testing; template @@ -78,5 +99,53 @@ template auto run(TestData&) - template auto run(TestData&) -> void; template auto run(TestData&) -> void; +namespace dataView { +template +auto run(TestData& data) -> void +{ + + using Type = typename TestData::Type; + auto& grid = data.getGrid(); + const std::string appName = TestInformation::fullName(grid.getImplementationName()); + + data.resetValuesToLinear(1, 100); + T val = T(33); + + { // NEON + const Neon::index_3d dim = grid.getDimension(); + std::vector elements; + + auto& X = data.getField(FieldNames::X); + auto& Y = data.getField(FieldNames::Y); + + + mapContainer_axpy(Neon::Backend::mainStreamIdx, + val, X, Y) + .run(0, Neon::DataView::BOUNDARY); + + mapContainer_axpy(Neon::Backend::mainStreamIdx, + val, X, Y) + .run(0, Neon::DataView::INTERNAL); + + X.updateHostData(0); + Y.updateHostData(0); + + data.getBackend().sync(0); + } + + { // Golden data + auto& X = data.getIODomain(FieldNames::X); + auto& Y = data.getIODomain(FieldNames::Y); + data.axpy(&val, X, Y); + } + + bool isOk = data.compare(FieldNames::Y); + ASSERT_TRUE(isOk); +} +template auto run(TestData&) -> void; +template auto run(TestData&) -> void; +template auto run(TestData&) -> void; +template auto run(TestData&) -> void; +} // namespace dataView } // namespace map \ No newline at end of file diff --git a/libNeonDomain/tests/domain-map/src/map.h b/libNeonDomain/tests/domain-map/src/map.h index 16073657..99864a3f 100644 --- a/libNeonDomain/tests/domain-map/src/map.h +++ b/libNeonDomain/tests/domain-map/src/map.h @@ -3,8 +3,8 @@ #include #include "Neon/domain/Grids.h" -#include "Neon/domain/tools/TestData.h" #include "Neon/domain/details/dGridSoA/dGridSoA.h" +#include "Neon/domain/tools/TestData.h" namespace map { @@ -18,5 +18,16 @@ extern template auto run(TestData(TestData&) -> void; extern template auto run(TestData&) -> void; +namespace dataView { + +template +auto run(TestData& data) -> void; + +extern template auto run(TestData&) -> void; +extern template auto run(TestData&) -> void; +extern template auto run(TestData&) -> void; +extern template auto run(TestData&) -> void; + +} // namespace dataView } // namespace map diff --git a/libNeonSkeleton/tests/unit/sUt_skeletonOnStreams/src/sUt_skeleton.Stencil.cu b/libNeonSkeleton/tests/unit/sUt_skeletonOnStreams/src/sUt_skeleton.Stencil.cu index 0170936c..2e2a2929 100644 --- a/libNeonSkeleton/tests/unit/sUt_skeletonOnStreams/src/sUt_skeleton.Stencil.cu +++ b/libNeonSkeleton/tests/unit/sUt_skeletonOnStreams/src/sUt_skeleton.Stencil.cu @@ -160,7 +160,7 @@ void SingleStencil(TestData& data, } template -void SingleStencilOCC(TestData& data) +void SingleStencilStandardOCC(TestData& data) { SingleStencil(data, Neon::skeleton::Occ::standard, Neon::set::TransferMode::get); } @@ -208,4 +208,14 @@ TEST(SingleStencil_NoOCC, bGrid) // using Grid = Neon::dGrid; using Type = int32_t; runAllTestConfiguration("bGrid_t", SingleStencilNoOCC, nGpus, 1); +} + +TEST(SingleStencil_StandardOCC, bGrid) +{ + int nGpus = 1; + using Grid = Neon::bGrid; + // using Grid = Neon::domain::eGrid; + // using Grid = Neon::dGrid; + using Type = int32_t; + runAllTestConfiguration("bGrid_t", SingleStencilStandardOCC, nGpus, 1); } \ No newline at end of file diff --git a/libNeonSkeleton/tests/unit/skeleton-stencil/src/runHelper.h b/libNeonSkeleton/tests/unit/skeleton-stencil/src/runHelper.h index 4858b819..8cd53082 100644 --- a/libNeonSkeleton/tests/unit/skeleton-stencil/src/runHelper.h +++ b/libNeonSkeleton/tests/unit/skeleton-stencil/src/runHelper.h @@ -22,10 +22,11 @@ using namespace Neon::domain::tool::testing; using namespace Neon::domain::tool; template -void runAllTestConfiguration(const std::string& gname, - std::function&)> f, - int nGpus, - int minNumGpus) +void runAllTestConfiguration(const std::string& gname, + std::function&, Neon::skeleton::Occ)> f, + Neon::skeleton::Occ occ, + int nGpus, + int minNumGpus) { if (Neon::sys::globalSpace::gpuSysObjStorage.numDevs() > 0) { std::vector nGpuTest; @@ -69,7 +70,7 @@ void runAllTestConfiguration(const std::string& gname, NEON_INFO(testData.toString()); - f(testData); + f(gname, testData, occ); } } } diff --git a/libNeonSkeleton/tests/unit/skeleton-stencil/src/stencil.cu b/libNeonSkeleton/tests/unit/skeleton-stencil/src/stencil.cu index 0e88980a..095959f9 100644 --- a/libNeonSkeleton/tests/unit/skeleton-stencil/src/stencil.cu +++ b/libNeonSkeleton/tests/unit/skeleton-stencil/src/stencil.cu @@ -59,7 +59,9 @@ auto laplaceOnIntegers(const Field& filedA, template -void singleStencil(TestData& data) +void singleStencil(std::string testName, + TestData& data, + Neon::skeleton::Occ occ) { using Type = typename TestData::Type; @@ -82,7 +84,9 @@ void singleStencil(TestData& data) ops.push_back(laplaceOnIntegers(Y, X)); Neon::skeleton::Skeleton skl(data.getBackend()); - skl.sequence(ops, "sUt_dGridStencil"); + Neon::skeleton::Options opt(occ, Neon::set::TransferMode::get); + skl.sequence(ops, testName, opt); + skl.ioToDot(testName, testName, true); for (int j = 0; j < nIterations; j++) { skl.run(); @@ -108,20 +112,29 @@ void singleStencil(TestData& data) ASSERT_TRUE(isOk); } -TEST(singleStencil, dGrid) +TEST(skeleton_stencil_occ_none, dGrid) { int nGpus = 1; using Grid = Neon::dGrid; using Type = int32_t; constexpr int C = 0; - runAllTestConfiguration("dGrid", singleStencil, nGpus, 1); + runAllTestConfiguration("skeleton_stencil_occ_none_dGrid", singleStencil, Neon::skeleton::Occ::none, nGpus, 1); } -TEST(singleStencil, bGridSingleGpu) +TEST(skeleton_stencil_occ_standard, dGrid) +{ + int nGpus = 1; + using Grid = Neon::dGrid; + using Type = int32_t; + constexpr int C = 0; + runAllTestConfiguration("skeleton_stencil_occ_standard_dGrid", singleStencil, Neon::skeleton::Occ::standard, nGpus, 1); +} + +TEST(skeleton_stencil, bGridSingleGpu) { int nGpus = 1; using Grid = Neon::bGrid; using Type = int32_t; constexpr int C = 0; - runAllTestConfiguration("bGrid", singleStencil, nGpus, 1); + runAllTestConfiguration("bGrid", singleStencil, Neon::skeleton::Occ::none, nGpus, 1); } \ No newline at end of file