Skip to content

Commit

Permalink
Merge branch 'disaggregated-dGRid' into lattice-benchmark-lbm
Browse files Browse the repository at this point in the history
  • Loading branch information
massimim committed Oct 10, 2023
2 parents a79ef8b + b574b49 commit 4e690c6
Show file tree
Hide file tree
Showing 14 changed files with 300 additions and 53 deletions.
16 changes: 16 additions & 0 deletions benchmarks/lbm/src/Config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,22 @@ auto Config::parseArgs(const int argc, char* argv[])
std::cout << "Benchmark example " << '\n';
std::cout << "./lbm --deviceType gpu --deviceIds 0 1 2 3 4 --grid dGrid --domain-size 100 --max-iter 2000 --computeFP double --storageFP double --nOCC --huGrid --benchmark --warmup-iter 10 --repetitions 5" << '\n';

std::cout <<" ./lbm --deviceType gpu\\\n"
" --deviceIds 0\\\n"
" --grid dGrid\\\n"
" --domain-size 100\\\n"
" --max-iter 1000\\\n"
" --computeFP float\\\n"
" --storageFP float\\\n"
" --occ none\\\n"
" --transferMode put\\\n"
" --stencilSemantic grid\\\n"
" --spaceCurve sweep\\\n"
" --collision bgk\\\n"
" --streamingMethod pull\\\n"
" --lattice d3q19\\\n"
" --vti 10";

return -1;
}

Expand Down
29 changes: 21 additions & 8 deletions benchmarks/lbm/src/Lbm.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,10 @@ struct Lbm
ContainerFactory::Common::computeWallNghMask(cellFlagField,
cellFlagField)
.run(Neon::Backend::mainStreamIdx);
cellFlagField.newHaloUpdate(Neon::set::StencilSemantic::standard,
Neon::set::TransferMode::get,
Neon::Execution::device)
.run(Neon::Backend::mainStreamIdx);
metrics::recordProblemSetupMetrics(grid.getBackend(), *reportPtr, setBcClockStart);
}

Expand Down Expand Up @@ -157,12 +161,17 @@ struct Lbm
ops.push_back(even);
std::stringstream appName;

if (iteration % 2 == 0)
appName << "LBM_push_even";
else
if (skIdx % 2 == 0)
appName << "LBM_pull_even";
else
appName << "LBM_pull_odd";

skeleton.at(skIdx).sequence(ops, appName.str(), opt);

if (skIdx % 2 == 0)
skeleton.at(skIdx).ioToDot("lbm-pull-even","lbm_pull_even",true);
else
skeleton.at(skIdx).ioToDot("lbm-pull-odd","lbm_pull_even", true);
}
}
{
Expand Down Expand Up @@ -200,7 +209,7 @@ struct Lbm
if (iteration % 2 == 0)
appName << "LBM_push_even";
else
appName << "LBM_pull_even";
appName << "LBM_push_odd";
skeleton.at(skIdx).sequence(ops, appName.str(), opt);
}

Expand All @@ -226,13 +235,13 @@ struct Lbm
cellFlagField,
lbmParameters.omega,
pFieldList.at(0));
appName << "LBM_push_even";
appName << "LBM_aa_even";
} else {
lbmIteration = ContainerFactory::AA::Odd::iteration(
cellFlagField,
lbmParameters.omega,
pFieldList.at(0));
appName << "LBM_pull_even";
appName << "LBM_aa_even";
}
std::vector<Neon::set::Container> ops;
skeleton.at(skIdx) = Neon::skeleton::Skeleton(pFieldList[0].getBackend());
Expand Down Expand Up @@ -299,6 +308,10 @@ struct Lbm
done = true;
}
if constexpr (method == lbm::Method::pull) {
pop.newHaloUpdate(Neon::set::StencilSemantic::standard,
Neon::set::TransferMode::get,
Neon::Execution::device)
.run(Neon::Backend::mainStreamIdx);
auto computeRhoAndU = ContainerFactory::Pull::computeRhoAndU(pop, cellFlagField, rho, u);
computeRhoAndU.run(Neon::Backend::mainStreamIdx);
done = true;
Expand Down Expand Up @@ -326,8 +339,8 @@ struct Lbm
iterIdStr = std::string(numDigits - std::min(numDigits, iterIdStr.length()), '0') + iterIdStr;

// pop.ioToVtk("pop_" + iterIdStr, "pop", false);
u.ioToVtk("u_" + iterIdStr, "u", false);
rho.ioToVtk("rho_" + iterIdStr, "rho", false);
u.ioToVtk("u_" + iterIdStr, "u", false, Neon::IoFileType::BINARY);
rho.ioToVtk("rho_" + iterIdStr, "rho", false, Neon::IoFileType::BINARY);
cellFlagField.template ioToVtk<int>("cellFlagField_" + iterIdStr, "flag", false);

#if 0
Expand Down
2 changes: 1 addition & 1 deletion libNeonCore/include/Neon/core/types/Macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@
#define NEON_RESTRICT restrict
#endif

#ifdef NEON_COMPILER_CUDA
#if defined(NEON_COMPILER_CUDA) && !defined(_WIN32)
#define NEON_RESTRICT __restrict__
#endif

Expand Down
112 changes: 107 additions & 5 deletions libNeonDomain/include/Neon/domain/details/dGrid/dField_imp.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ dField<T, C>::dField(const std::string& fieldUserName,
T(0),
dataUse,
memoryOptions,
haloStatus) {
haloStatus)
{

// only works if dims in x and y direction for all partitions match
for (int i = 0; i < dims.size() - 1; ++i) {
Expand Down Expand Up @@ -88,7 +89,7 @@ dField<T, C>::dField(const std::string& fieldUserName,

{ // Setting up partitions
Neon::aGrid const& aGrid = mData->grid->helpFieldMemoryAllocator();
mData->memoryField = aGrid.newField<T,C>(fieldUserName + "-storage", cardinality, T(), dataUse, memoryOptions);
mData->memoryField = aGrid.newField<T, C>(fieldUserName + "-storage", cardinality, T(), dataUse, memoryOptions);
// const int setCardinality = mData->grid->getBackend().getDeviceCount();
mData->partitionTable.forEachConfiguration(
[&](Neon::Execution execution,
Expand Down Expand Up @@ -306,7 +307,7 @@ auto dField<T, C>::operator()(const Neon::index_3d& idxGlobal,
auto& partition = mData->partitionTable.getPartition(Neon::Execution::host,
partitionIdx,
Neon::DataView::STANDARD);
auto& span = mData->grid->getSpan(Neon::Execution::host,partitionIdx, Neon::DataView::STANDARD);
auto& span = mData->grid->getSpan(Neon::Execution::host, partitionIdx, Neon::DataView::STANDARD);
Idx idx;
bool isOk = span.setAndValidate(idx, localIDx.x, localIDx.y, localIDx.z);
if (!isOk) {
Expand All @@ -326,7 +327,7 @@ auto dField<T, C>::getReference(const Neon::index_3d& idxGlobal,
auto& partition = mData->partitionTable.getPartition(Neon::Execution::host,
partitionIdx,
Neon::DataView::STANDARD);
auto& span = mData->grid->getSpan(Neon::Execution::host,partitionIdx, Neon::DataView::STANDARD);
auto& span = mData->grid->getSpan(Neon::Execution::host, partitionIdx, Neon::DataView::STANDARD);
Idx idx;
bool isOk = span.setAndValidate(idx, localIDx.x, localIDx.y, localIDx.z);
if (!isOk) {
Expand Down Expand Up @@ -484,6 +485,81 @@ auto dField<T, C>::initHaloUpdateTable()
transfersVec.push_back(transfer);
}
});

mData->latticeHaloUpdateTable.forEachPutConfiguration(
bk, [&](Neon::SetIdx setIdxSrc,
Execution execution,
Neon::domain::tool::partitioning::ByDirection byDirection,
std::vector<Neon::set::MemoryTransfer>& transfersVec) {
{
using namespace Neon::domain::tool::partitioning;

Neon::SetIdx setIdxDst = getNghSetIdx(setIdxSrc, byDirection);

int r = grid.getStencil().getRadius();

std::array<Partition*, Data::EndPointsUtils::nConfigs> partitions;
std::array<std::array<int, ByDirectionUtils::nConfigs>, Data::EndPointsUtils::nConfigs> ghostZBeginIdx;
std::array<std::array<int, ByDirectionUtils::nConfigs>, Data::EndPointsUtils::nConfigs> boundaryZBeginIdx;
std::array<Neon::size_4d, Data::EndPointsUtils::nConfigs> memPhyDim;

partitions[Data::EndPoints::dst] = &this->getPartition(execution, setIdxDst, Neon::DataView::STANDARD);
partitions[Data::EndPoints::src] = &this->getPartition(execution, setIdxSrc, Neon::DataView::STANDARD);

for (auto endPoint : {Data::EndPoints::dst, Data::EndPoints::src}) {
ghostZBeginIdx[endPoint][static_cast<int>(ByDirection::down)] = 0;
boundaryZBeginIdx[endPoint][static_cast<int>(ByDirection::down)] = r;
boundaryZBeginIdx[endPoint][static_cast<int>(ByDirection::up)] = partitions[endPoint]->dim().z;
ghostZBeginIdx[endPoint][static_cast<int>(ByDirection::up)] = partitions[endPoint]->dim().z + r;

memPhyDim[endPoint] = Neon::size_4d(
1,
size_t(partitions[endPoint]->dim().x),
size_t(partitions[endPoint]->dim().x) * partitions[endPoint]->dim().y,
size_t(partitions[endPoint]->dim().x) * partitions[endPoint]->dim().y * (partitions[endPoint]->dim().z + 2 * r));
}

for (int j = 0; j < this->getCardinality(); j++) {
auto const& stencil = this->getGrid().getStencil();
if (this->getCardinality() != stencil.nPoints()) {
continue;
}
T* srcMem = partitions[Data::EndPoints::src]->mem();
T* dstMem = partitions[Data::EndPoints::dst]->mem();

Neon::size_4d srcBoundaryBuff(0, 0, boundaryZBeginIdx[Data::EndPoints::src][static_cast<int>(byDirection)], j);
Neon::size_4d dstGhostBuff(0, 0, ghostZBeginIdx[Data::EndPoints::dst][static_cast<int>(ByDirectionUtils::invert(byDirection))], j);

// std::cout << "To " << dstGhostBuff << " prt " << partitions[Data::EndPoints::dst]->prtID() << " From " << srcBoundaryBuff << "(src dim" << partitions[Data::EndPoints::src]->dim() << ")" << std::endl;
// std::cout << "dst mem " << partitions[Data::EndPoints::dst]->mem() << " " << std::endl;
// std::cout << "dst pitch " << (dstGhostBuff * memPhyDim[Data::EndPoints::dst]).rSum() << " " << std::endl;
// std::cout << "dst dstGhostBuff " << dstGhostBuff << " " << std::endl;
// std::cout << "dst pitch all" << memPhyDim[Data::EndPoints::dst] << " " << std::endl;

Neon::set::MemoryTransfer transfer({setIdxDst, dstMem + (dstGhostBuff * memPhyDim[Data::EndPoints::dst]).rSum(), dstGhostBuff},
{setIdxSrc, srcMem + (srcBoundaryBuff * memPhyDim[Data::EndPoints::src]).rSum(), srcBoundaryBuff},
sizeof(T) *
r *
partitions[Data::EndPoints::src]->dim().x *
partitions[Data::EndPoints::src]->dim().y);
if (ByDirection::up == byDirection && bk.isLastDevice(setIdxSrc)) {
return;
}

if (ByDirection::down == byDirection && bk.isFirstDevice(setIdxSrc)) {
return;
}
if (ByDirection::up == byDirection && !(stencil.points()[j].z > 0)) {
continue;
}
if (ByDirection::down == byDirection && !(stencil.points()[j].z < 0)) {
continue;
}
// std::cout << transfer.toString() << std::endl;
transfersVec.push_back(transfer);
}
}
});
//
// mData->latticeHaloUpdateTable.forEachPutConfiguration(
// bk, [&](Neon::SetIdx setIdxSrc,
Expand Down Expand Up @@ -608,7 +684,33 @@ auto dField<T, C>::
execution);
}
} else {
NEON_DEV_UNDER_CONSTRUCTION("");
auto transfers = bk.template newDataSet<std::vector<Neon::set::MemoryTransfer>>();
if (this->getMemoryOptions().getOrder() == Neon::MemoryLayout::structOfArrays) {
for (auto byDirection : {tool::partitioning::ByDirection::up,
tool::partitioning::ByDirection::down}) {

auto const& tableEntryByDir = mData->latticeHaloUpdateTable.get(transferMode,
execution,
byDirection);

tableEntryByDir.forEachSeq([&](SetIdx setIdx, auto const& tableEntryByDirBySetIdx) {
transfers[setIdx].insert(std::end(transfers[setIdx]),
std::begin(tableEntryByDirBySetIdx),
std::end(tableEntryByDirBySetIdx));
});
}
dataTransferContainer =
Neon::set::Container::factoryDataTransfer(
*this,
transferMode,
stencilSemantic,
transfers,
execution);


} else {
NEON_DEV_UNDER_CONSTRUCTION("");
}
}
Neon::set::Container SyncContainer =
Neon::set::Container::factorySynchronization(
Expand Down
22 changes: 12 additions & 10 deletions libNeonDomain/include/Neon/domain/details/dGrid/dGrid_imp.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,24 +91,26 @@ dGrid::dGrid(const Neon::Backend& backend,
Neon::DataView dw,
dSpan& span) {
span.mDataView = dw;
span.mZHaloRadius = setCardinality == 1 ? 0 : mData->halo.z;
span.mZBoundaryRadius = mData->halo.z;
span.mZghostRadius = setCardinality == 1 ? 0 : mData->halo.z;
span.mZboundaryRadius = mData->halo.z;
span.mMaxZInDomain = mData->partitionDims[setIdx].z;

switch (dw) {
case Neon::DataView::STANDARD: {
// Only works z partitions.
assert(mData->halo.x == 0 && mData->halo.y == 0);

span.mDim = mData->partitionDims[setIdx];
span.mSpanDim = mData->partitionDims[setIdx];

break;
}
case Neon::DataView::BOUNDARY: {
auto dims = getDevSet().newDataSet<index_3d>();
// Only works z partitions.
assert(mData->halo.x == 0 && mData->halo.y == 0);

span.mDim = mData->partitionDims[setIdx];
span.mDim.z = span.mZBoundaryRadius * 2;
span.mSpanDim = mData->partitionDims[setIdx];
span.mSpanDim.z = span.mZboundaryRadius * 2;

break;
}
Expand All @@ -117,12 +119,12 @@ dGrid::dGrid(const Neon::Backend& backend,
// Only works z partitions.
assert(mData->halo.x == 0 && mData->halo.y == 0);

span.mDim = mData->partitionDims[setIdx];
span.mDim.z = span.mDim.z - span.mZBoundaryRadius * 2;
if (span.mDim.z <= 0 && setCardinality > 1) {
span.mSpanDim = mData->partitionDims[setIdx];
span.mSpanDim.z = span.mSpanDim.z - span.mZboundaryRadius * 2;
if (span.mSpanDim.z <= 0 && setCardinality > 1) {
NeonException exp("dGrid");
exp << "The grid size is too small to support the data view model correctly \n";
exp << span.mDim << " for setIdx " << setIdx << " and device " << getDevSet().devId(setIdx);
exp << span.mSpanDim << " for setIdx " << setIdx << " and device " << getDevSet().devId(setIdx);
NEON_THROW(exp);
}

Expand All @@ -140,7 +142,7 @@ dGrid::dGrid(const Neon::Backend& backend,
Neon::DataView dw,
int& count) {
if (Execution::host == execution) {
count = mData->spanTable.getSpan(Neon::Execution::host, setIdx, dw).mDim.rMul();
count = mData->spanTable.getSpan(Neon::Execution::host, setIdx, dw).mSpanDim.rMul();
}
});
}
Expand Down
9 changes: 5 additions & 4 deletions libNeonDomain/include/Neon/domain/details/dGrid/dSpan.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,12 @@ class dSpan

private:
Neon::DataView mDataView;
int mZHaloRadius;
int mZBoundaryRadius;
Neon::index_3d mDim /** Dimension of the span, its values depends on the mDataView*/;
int mZghostRadius;
int mZboundaryRadius;
int mMaxZInDomain;
Neon::index_3d mSpanDim /** Dimension of the span, its values depends on the mDataView*/;
};

} // namespace Neon::domain::details::dGrid
} // namespace Neon::domain::deta ils::dGrid

#include "dSpan_imp.h"
20 changes: 10 additions & 10 deletions libNeonDomain/include/Neon/domain/details/dGrid/dSpan_imp.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,25 +14,25 @@ dSpan::setAndValidate(Idx& idx,
idx.setLocation().y = int(y);
idx.setLocation().z = int(z);

if (idx.getLocation() < mDim) {
if (idx.getLocation() < mSpanDim) {
res = true;
}

switch (mDataView) {
case Neon::DataView::STANDARD: {
idx.setLocation().z += mZHaloRadius;
idx.setLocation().z += mZghostRadius;
return res;
}
case Neon::DataView::INTERNAL: {
idx.setLocation().z += mZHaloRadius + mZBoundaryRadius;
idx.setLocation().z += mZghostRadius + mZboundaryRadius;
return res;
}
case Neon::DataView::BOUNDARY: {

idx.setLocation().z += idx.getLocation().z < mZBoundaryRadius
? 0
: (mDim.z - 1) + (-1 * mZBoundaryRadius /* we remove zBoundaryRadius as the first zBoundaryRadius will manage the lower slices */);
idx.setLocation().z += mZHaloRadius;
idx.setLocation().z += idx.getLocation().z < mZboundaryRadius
? 0
: (mMaxZInDomain - 1) + (-1 * mZboundaryRadius /* we remove zBoundaryRadius as the first zBoundaryRadius will manage the lower slices */);
idx.setLocation().z += mZghostRadius;

return res;
}
Expand All @@ -51,19 +51,19 @@ NEON_CUDA_HOST_DEVICE inline auto dSpan::helpGetDataView()
NEON_CUDA_HOST_DEVICE inline auto dSpan::helpGetZHaloRadius()
const -> int const&
{
return mZHaloRadius;
return mZghostRadius;
}

NEON_CUDA_HOST_DEVICE inline auto dSpan::helpGetZBoundaryRadius()
const -> int const&
{
return mZBoundaryRadius;
return mZboundaryRadius;
}

NEON_CUDA_HOST_DEVICE inline auto dSpan::helpGetDim()
const -> Neon::index_3d const&
{
return mDim;
return mSpanDim;
}

} // namespace Neon::domain::details::dGrid
Loading

0 comments on commit 4e690c6

Please sign in to comment.