Skip to content

Commit

Permalink
fix mPartition
Browse files Browse the repository at this point in the history
parent testing is okay now
  • Loading branch information
Ahdhn committed Jul 4, 2023
1 parent 9b681e0 commit c1326a3
Show file tree
Hide file tree
Showing 12 changed files with 269 additions and 263 deletions.
2 changes: 1 addition & 1 deletion apps/lbmMultiRes/init.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ uint32_t init(Neon::domain::mGrid& grid,
#endif

if (!in.hasChildren(cell)) {
const Neon::index_3d idx = in.mapToGlobal(cell);
const Neon::index_3d idx = in.getGlobalIndex(cell);

//pop
for (int q = 0; q < Q; ++q) {
Expand Down
6 changes: 6 additions & 0 deletions libNeonDomain/include/Neon/domain/details/bGrid/bIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ class bIndex
NEON_CUDA_HOST_DEVICE inline auto getDataBlockIdx() const -> DataBlockIdx const&;
NEON_CUDA_HOST_DEVICE inline auto setInDataBlockIdx(InDataBlockIdx const&) -> void;
NEON_CUDA_HOST_DEVICE inline auto setDataBlockIdx(DataBlockIdx const&) -> void;
NEON_CUDA_HOST_DEVICE inline auto isActive() -> bool;
// the local index within the block
InDataBlockIdx mInDataBlockIdx;
DataBlockIdx mDataBlockIdx{};
Expand Down Expand Up @@ -130,6 +131,11 @@ NEON_CUDA_HOST_DEVICE auto bIndex<SBlock>::getInDataBlockIdx() const -> const bI
return mInDataBlockIdx;
}

template <typename SBlock>
NEON_CUDA_HOST_DEVICE auto bIndex<SBlock>::isActive() -> bool
{
return mDataBlockIdx != std::numeric_limits<typename bIndex::DataBlockIdx>::max();
}

} // namespace Neon::domain::details::bGrid

Expand Down
10 changes: 9 additions & 1 deletion libNeonDomain/include/Neon/domain/details/bGrid/bPartition.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,14 +138,22 @@ class bPartition
helpGetNghIdx(const Idx& idx)
const -> Idx;

NEON_CUDA_HOST_DEVICE inline auto
helpGetNghIdx(const Idx& idx, const NghIdx& offset, const typename Idx::DataBlockIdx* blockConnectivity)
const -> Idx;

template <int xOff, int yOff, int zOff>
NEON_CUDA_HOST_DEVICE inline auto
helpGetNghIdx(const Idx& idx, const typename Idx::DataBlockIdx* blockConnectivity)
const -> Idx;

int mCardinality;
T* mMem;
NghIdx const* NEON_RESTRICT mStencilNghIndex;
typename Idx::DataBlockIdx const* NEON_RESTRICT mBlockConnectivity;
typename SBlock::BitMask const* NEON_RESTRICT mMask;
Neon::int32_3d const* NEON_RESTRICT mOrigin;
int mSetIdx;
int mMultiResDiscreteIdxSpacing = 1;
};

} // namespace Neon::domain::details::bGrid
Expand Down
26 changes: 21 additions & 5 deletions libNeonDomain/include/Neon/domain/details/bGrid/bPartition_imp.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,6 @@ NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
location.x += gidx.mInDataBlockIdx.x;
location.y += gidx.mInDataBlockIdx.y;
location.z += gidx.mInDataBlockIdx.z;
if constexpr (SBlock::isMultiResMode) {
return location * mMultiResDiscreteIdxSpacing;
}
return location;
}

Expand Down Expand Up @@ -132,6 +129,16 @@ NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
const NghIdx& offset)
const -> Idx
{
return this->helpGetNghIdx(idx, offset, mBlockConnectivity);
}

template <typename T, int C, typename SBlock>
NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
helpGetNghIdx(const Idx& idx,
const NghIdx& offset,
const typename Idx::DataBlockIdx* blockConnectivity)
const -> Idx
{

typename Idx::InDataBlockIdx ngh(idx.mInDataBlockIdx.x + offset.x,
idx.mInDataBlockIdx.y + offset.y,
Expand Down Expand Up @@ -185,7 +192,7 @@ NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
(xFlag + 1) +
(yFlag + 1) * 3 +
(zFlag + 1) * 9;
remoteNghIdx.mDataBlockIdx = mBlockConnectivity[connectivityJump];
remoteNghIdx.mDataBlockIdx = blockConnectivity[connectivityJump];

return remoteNghIdx;
} else {
Expand All @@ -202,6 +209,15 @@ NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
helpGetNghIdx(const Idx& idx)
const -> Idx
{
return this->helpGetNghIdx<xOff, yOff, zOff>(idx, mBlockConnectivity);
}

template <typename T, int C, typename SBlock>
template <int xOff, int yOff, int zOff>
NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
helpGetNghIdx(const Idx& idx, const typename Idx::DataBlockIdx* blockConnectivity)
const -> Idx
{

typename Idx::InDataBlockIdx ngh(idx.mInDataBlockIdx.x + xOff,
idx.mInDataBlockIdx.y + yOff,
Expand Down Expand Up @@ -275,7 +291,7 @@ NEON_CUDA_HOST_DEVICE inline auto bPartition<T, C, SBlock>::
(xFlag + 1) +
(yFlag + 1) * 3 +
(zFlag + 1) * 9;
remoteNghIdx.mDataBlockIdx = mBlockConnectivity[connectivityJump];
remoteNghIdx.mDataBlockIdx = blockConnectivity[connectivityJump];

return remoteNghIdx;
} else {
Expand Down
6 changes: 6 additions & 0 deletions libNeonDomain/include/Neon/domain/details/mGrid/mField.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ class mField

auto forEachActiveCell(int level,
const std::function<void(const Neon::index_3d&, const int& cardinality, T&)>& fun,
bool filterOverlaps = true,
Neon::computeMode_t::computeMode_e mode = Neon::computeMode_t::computeMode_e::par) -> void;


Expand All @@ -92,6 +93,11 @@ class mField

auto load(Neon::set::Loader loader, int level, Neon::MultiResCompute compute) const -> const typename xField<T, C>::Partition&;

auto getBackend() const -> const Backend&
{
return mData->grid->getBackend();
}

private:
mField(const std::string& name,
const mGrid& grid,
Expand Down
101 changes: 50 additions & 51 deletions libNeonDomain/include/Neon/domain/details/mGrid/mField_imp.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,60 +33,58 @@ mField<T, C>::mField(const std::string& name,

for (int l = 0; l < descriptor.getDepth(); ++l) {

auto parent = mData->grid->getParentsBlockID(l);
auto parentLocalID = mData->grid->getParentLocalID(l);
auto childBlockID = mData->grid->getChildBlockID(l);


for (int dvID = 0; dvID < Neon::DataViewUtil::nConfig; dvID++) {
mData->fields[l].mData->mPartitions[PartitionBackend::cpu][dvID] = mData->grid->getBackend().devSet().template newDataSet<Partition>();
mData->fields[l].mData->mPartitions[PartitionBackend::gpu][dvID] = mData->grid->getBackend().devSet().template newDataSet<Partition>();

for (int32_t gpuID = 0; gpuID < int32_t(mData->fields[l].mData->mPartitions[PartitionBackend::cpu][dvID].size()); gpuID++) {

auto setIdx = Neon::SetIdx(gpuID);

mData->fields[l].getPartition(Neon::Execution::host, setIdx, Neon::DataView(dvID)) =
Neon::domain::details::mGrid::mPartition<T, C>(
l,
mData->fields[l].mData->field.getMemoryField().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(),
(l == int(descriptor.getDepth()) - 1) ? nullptr : mData->fields[l + 1].mData->field.getMemoryField().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(), //parent
(l == 0) ? nullptr : mData->fields[l - 1].mData->field.getMemoryField().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(), //child
cardinality,
mData->grid->operator()(l).helpGetBlockConnectivity().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(),
mData->grid->operator()(l).helpGetDataBlockOriginField().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(),
parent.rawMem(gpuID, Neon::DeviceType::CPU),
parentLocalID.rawMem(gpuID, Neon::DeviceType::CPU),
mData->grid->operator()(l).getActiveBitMask().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(),
(l == 0) ? nullptr : mData->grid->operator()(l - 1).getActiveBitMask().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(), //lower-level mask
(l == int(descriptor.getDepth()) - 1) ? nullptr : mData->grid->operator()(l + 1).getActiveBitMask().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(), //upper-level mask
(l == 0) ? nullptr : childBlockID.rawMem(gpuID, Neon::DeviceType::CPU),
(l == int(descriptor.getDepth()) - 1) ? nullptr : mData->grid->operator()(l + 1).helpGetBlockConnectivity().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(), //parent neighbor
mData->grid->operator()(l).helpGetStencilIdTo3dOffset().rawMem(Neon::Execution::host, setIdx),
refFactorSet.rawMem(gpuID, Neon::DeviceType::CPU),
spacingSet.rawMem(gpuID, Neon::DeviceType::CPU));

mData->fields[l].getPartition(Neon::Execution::device, setIdx, Neon::DataView(dvID)) =
Neon::domain::details::mGrid::mPartition<T, C>(
l,
mData->fields[l].mData->field.getMemoryField().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(),
(l == int(descriptor.getDepth()) - 1) ? nullptr : mData->fields[l + 1].mData->field.getMemoryField().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(), //parent
(l == 0) ? nullptr : mData->fields[l - 1].mData->field.getMemoryField().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(), //child
cardinality,
mData->grid->operator()(l).helpGetBlockConnectivity().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(),
mData->grid->operator()(l).helpGetDataBlockOriginField().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(),
parent.rawMem(gpuID, Neon::DeviceType::CUDA),
parentLocalID.rawMem(gpuID, Neon::DeviceType::CUDA),
mData->grid->operator()(l).getActiveBitMask().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(),
(l == 0) ? nullptr : mData->grid->operator()(l - 1).getActiveBitMask().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(), //lower-level mask
(l == int(descriptor.getDepth()) - 1) ? nullptr : mData->grid->operator()(l + 1).getActiveBitMask().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(), //upper-level mask
(l == 0) ? nullptr : childBlockID.rawMem(gpuID, Neon::DeviceType::CUDA),
(l == int(descriptor.getDepth()) - 1) ? nullptr : mData->grid->operator()(l + 1).helpGetBlockConnectivity().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(), //parent neighbor
mData->grid->operator()(l).helpGetStencilIdTo3dOffset().rawMem(Neon::Execution::device, setIdx),
refFactorSet.rawMem(gpuID, Neon::DeviceType::CUDA),
spacingSet.rawMem(gpuID, Neon::DeviceType::CUDA));
}
//for (int dvID = 0; dvID < Neon::DataViewUtil::nConfig; dvID++) {
int dvID = 0;

mData->fields[l].mData->mPartitions[PartitionBackend::cpu][dvID] = mData->grid->getBackend().devSet().template newDataSet<Partition>();
mData->fields[l].mData->mPartitions[PartitionBackend::gpu][dvID] = mData->grid->getBackend().devSet().template newDataSet<Partition>();

for (int32_t gpuID = 0; gpuID < int32_t(mData->fields[l].mData->mPartitions[PartitionBackend::cpu][dvID].size()); gpuID++) {

auto setIdx = Neon::SetIdx(gpuID);

mData->fields[l].getPartition(Neon::DeviceType::CPU, setIdx, Neon::DataView(dvID)) =
Neon::domain::details::mGrid::mPartition<T, C>(
l,
mData->fields[l].mData->field.getMemoryField().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(),
(l == int(descriptor.getDepth()) - 1) ? nullptr : mData->fields[l + 1].mData->field.getMemoryField().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(), //parent
(l == 0) ? nullptr : mData->fields[l - 1].mData->field.getMemoryField().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(), //child
cardinality,
mData->grid->operator()(l).helpGetBlockConnectivity().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(),
mData->grid->operator()(l).helpGetDataBlockOriginField().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(),
(l == int(descriptor.getDepth()) - 1) ? nullptr : mData->grid->getParentsBlockID(l).rawMem(gpuID, Neon::DeviceType::CPU),
mData->grid->operator()(l).getActiveBitMask().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(),
(l == 0) ? nullptr : mData->grid->operator()(l - 1).getActiveBitMask().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(), //lower-level mask
(l == int(descriptor.getDepth()) - 1) ? nullptr : mData->grid->operator()(l + 1).getActiveBitMask().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(), //upper-level mask
(l == 0) ? nullptr : childBlockID.rawMem(gpuID, Neon::DeviceType::CPU),
(l == int(descriptor.getDepth()) - 1) ? nullptr : mData->grid->operator()(l + 1).helpGetBlockConnectivity().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(), //parent neighbor
mData->grid->operator()(l).helpGetStencilIdTo3dOffset().rawMem(Neon::Execution::host, setIdx),
refFactorSet.rawMem(gpuID, Neon::DeviceType::CPU),
spacingSet.rawMem(gpuID, Neon::DeviceType::CPU));

mData->fields[l].getPartition(Neon::DeviceType::CUDA, setIdx, Neon::DataView(dvID)) =
Neon::domain::details::mGrid::mPartition<T, C>(
l,
mData->fields[l].mData->field.getMemoryField().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(),
(l == int(descriptor.getDepth()) - 1) ? nullptr : mData->fields[l + 1].mData->field.getMemoryField().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(), //parent
(l == 0) ? nullptr : mData->fields[l - 1].mData->field.getMemoryField().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(), //child
cardinality,
mData->grid->operator()(l).helpGetBlockConnectivity().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(),
mData->grid->operator()(l).helpGetDataBlockOriginField().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(),
(l == int(descriptor.getDepth()) - 1) ? nullptr : mData->grid->getParentsBlockID(l).rawMem(gpuID, Neon::DeviceType::CUDA),
mData->grid->operator()(l).getActiveBitMask().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(),
(l == 0) ? nullptr : mData->grid->operator()(l - 1).getActiveBitMask().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(), //lower-level mask
(l == int(descriptor.getDepth()) - 1) ? nullptr : mData->grid->operator()(l + 1).getActiveBitMask().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(), //upper-level mask
(l == 0) ? nullptr : childBlockID.rawMem(gpuID, Neon::DeviceType::CUDA),
(l == int(descriptor.getDepth()) - 1) ? nullptr : mData->grid->operator()(l + 1).helpGetBlockConnectivity().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(), //parent neighbor
mData->grid->operator()(l).helpGetStencilIdTo3dOffset().rawMem(Neon::Execution::device, setIdx),
refFactorSet.rawMem(gpuID, Neon::DeviceType::CUDA),
spacingSet.rawMem(gpuID, Neon::DeviceType::CUDA));
}
//}
}
}

Expand All @@ -97,6 +95,7 @@ auto mField<T, C>::forEachActiveCell(
const std::function<void(const Neon::index_3d&,
const int& cardinality,
T&)>& fun,
bool filterOverlaps,
[[maybe_unused]] Neon::computeMode_t::computeMode_e mode)
-> void
{
Expand Down Expand Up @@ -133,7 +132,7 @@ auto mField<T, C>::forEachActiveCell(
if ((*(mData->grid))(level).isInsideDomain(voxelGlobalID)) {

bool active = true;
if (level > 0) {
if (level > 0 && filterOverlaps) {
active = !((*(mData->grid))(level - 1).isInsideDomain(voxelGlobalID));
}

Expand Down
14 changes: 1 addition & 13 deletions libNeonDomain/include/Neon/domain/details/mGrid/mGrid.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,15 +136,7 @@ class mGrid
int level,
LoadingLambda lambda) const -> Neon::set::Container;


/*auto getLaunchParameters(Neon::DataView dataView,
const Neon::index_3d& blockSize,
const size_t& sharedMem,
int level) const -> Neon::set::LaunchParameters;*/


auto getParentsBlockID(int level) const -> const Neon::set::MemSet<uint32_t>&;
auto getParentLocalID(int level) const -> const Neon::set::MemSet<Idx::InDataBlockIdx>&;
auto getParentsBlockID(int level) const -> Neon::set::MemSet<uint32_t>&;
auto getChildBlockID(int level) const -> const Neon::set::MemSet<uint32_t>&;


Expand Down Expand Up @@ -201,10 +193,6 @@ class mGrid
//Given a block at level L, we store R children block IDs for each block in L where R is the refinement factor
std::vector<Neon::set::MemSet<Idx::DataBlockIdx>> mChildBlockID;

//store the parent local index within its block
std::vector<Neon::set::MemSet<Idx::InDataBlockIdx>> mParentLocalID;


//gird levels refinement factors
Neon::set::MemSet<int> mRefFactors;

Expand Down
Loading

0 comments on commit c1326a3

Please sign in to comment.