diff --git a/apps/lbmMultiRes/init.h b/apps/lbmMultiRes/init.h index e150b1f6..1e5ab5a5 100644 --- a/apps/lbmMultiRes/init.h +++ b/apps/lbmMultiRes/init.h @@ -62,7 +62,7 @@ uint32_t init(Neon::domain::mGrid& grid, #endif if (!in.hasChildren(cell)) { - const Neon::index_3d idx = in.mapToGlobal(cell); + const Neon::index_3d idx = in.getGlobalIndex(cell); //pop for (int q = 0; q < Q; ++q) { diff --git a/libNeonDomain/include/Neon/domain/details/bGrid/bIndex.h b/libNeonDomain/include/Neon/domain/details/bGrid/bIndex.h index bbf103d1..69f8bd42 100644 --- a/libNeonDomain/include/Neon/domain/details/bGrid/bIndex.h +++ b/libNeonDomain/include/Neon/domain/details/bGrid/bIndex.h @@ -102,6 +102,7 @@ class bIndex NEON_CUDA_HOST_DEVICE inline auto getDataBlockIdx() const -> DataBlockIdx const&; NEON_CUDA_HOST_DEVICE inline auto setInDataBlockIdx(InDataBlockIdx const&) -> void; NEON_CUDA_HOST_DEVICE inline auto setDataBlockIdx(DataBlockIdx const&) -> void; + NEON_CUDA_HOST_DEVICE inline auto isActive() -> bool; // the local index within the block InDataBlockIdx mInDataBlockIdx; DataBlockIdx mDataBlockIdx{}; @@ -130,6 +131,11 @@ NEON_CUDA_HOST_DEVICE auto bIndex::getInDataBlockIdx() const -> const bI return mInDataBlockIdx; } +template +NEON_CUDA_HOST_DEVICE auto bIndex::isActive() -> bool +{ + return mDataBlockIdx != std::numeric_limits::max(); +} } // namespace Neon::domain::details::bGrid diff --git a/libNeonDomain/include/Neon/domain/details/bGrid/bPartition.h b/libNeonDomain/include/Neon/domain/details/bGrid/bPartition.h index d54ef6a3..a169ee56 100644 --- a/libNeonDomain/include/Neon/domain/details/bGrid/bPartition.h +++ b/libNeonDomain/include/Neon/domain/details/bGrid/bPartition.h @@ -138,6 +138,15 @@ class bPartition helpGetNghIdx(const Idx& idx) const -> Idx; + NEON_CUDA_HOST_DEVICE inline auto + helpGetNghIdx(const Idx& idx, const NghIdx& offset, const typename Idx::DataBlockIdx* blockConnectivity) + const -> Idx; + + template + NEON_CUDA_HOST_DEVICE inline auto + helpGetNghIdx(const Idx& idx, const typename Idx::DataBlockIdx* blockConnectivity) + const -> Idx; + int mCardinality; T* mMem; NghIdx const* NEON_RESTRICT mStencilNghIndex; @@ -145,7 +154,6 @@ class bPartition typename SBlock::BitMask const* NEON_RESTRICT mMask; Neon::int32_3d const* NEON_RESTRICT mOrigin; int mSetIdx; - int mMultiResDiscreteIdxSpacing = 1; }; } // namespace Neon::domain::details::bGrid diff --git a/libNeonDomain/include/Neon/domain/details/bGrid/bPartition_imp.h b/libNeonDomain/include/Neon/domain/details/bGrid/bPartition_imp.h index b1f36d4d..ec456913 100644 --- a/libNeonDomain/include/Neon/domain/details/bGrid/bPartition_imp.h +++ b/libNeonDomain/include/Neon/domain/details/bGrid/bPartition_imp.h @@ -45,9 +45,6 @@ NEON_CUDA_HOST_DEVICE inline auto bPartition:: location.x += gidx.mInDataBlockIdx.x; location.y += gidx.mInDataBlockIdx.y; location.z += gidx.mInDataBlockIdx.z; - if constexpr (SBlock::isMultiResMode) { - return location * mMultiResDiscreteIdxSpacing; - } return location; } @@ -132,6 +129,16 @@ NEON_CUDA_HOST_DEVICE inline auto bPartition:: const NghIdx& offset) const -> Idx { + return this->helpGetNghIdx(idx, offset, mBlockConnectivity); +} + +template +NEON_CUDA_HOST_DEVICE inline auto bPartition:: + helpGetNghIdx(const Idx& idx, + const NghIdx& offset, + const typename Idx::DataBlockIdx* blockConnectivity) + const -> Idx +{ typename Idx::InDataBlockIdx ngh(idx.mInDataBlockIdx.x + offset.x, idx.mInDataBlockIdx.y + offset.y, @@ -185,7 +192,7 @@ NEON_CUDA_HOST_DEVICE inline auto bPartition:: (xFlag + 1) + (yFlag + 1) * 3 + (zFlag + 1) * 9; - remoteNghIdx.mDataBlockIdx = mBlockConnectivity[connectivityJump]; + remoteNghIdx.mDataBlockIdx = blockConnectivity[connectivityJump]; return remoteNghIdx; } else { @@ -202,6 +209,15 @@ NEON_CUDA_HOST_DEVICE inline auto bPartition:: helpGetNghIdx(const Idx& idx) const -> Idx { + return this->helpGetNghIdx(idx, mBlockConnectivity); +} + +template +template +NEON_CUDA_HOST_DEVICE inline auto bPartition:: + helpGetNghIdx(const Idx& idx, const typename Idx::DataBlockIdx* blockConnectivity) + const -> Idx +{ typename Idx::InDataBlockIdx ngh(idx.mInDataBlockIdx.x + xOff, idx.mInDataBlockIdx.y + yOff, @@ -275,7 +291,7 @@ NEON_CUDA_HOST_DEVICE inline auto bPartition:: (xFlag + 1) + (yFlag + 1) * 3 + (zFlag + 1) * 9; - remoteNghIdx.mDataBlockIdx = mBlockConnectivity[connectivityJump]; + remoteNghIdx.mDataBlockIdx = blockConnectivity[connectivityJump]; return remoteNghIdx; } else { diff --git a/libNeonDomain/include/Neon/domain/details/mGrid/mField.h b/libNeonDomain/include/Neon/domain/details/mGrid/mField.h index cf5d0c5e..59b1a299 100644 --- a/libNeonDomain/include/Neon/domain/details/mGrid/mField.h +++ b/libNeonDomain/include/Neon/domain/details/mGrid/mField.h @@ -79,6 +79,7 @@ class mField auto forEachActiveCell(int level, const std::function& fun, + bool filterOverlaps = true, Neon::computeMode_t::computeMode_e mode = Neon::computeMode_t::computeMode_e::par) -> void; @@ -92,6 +93,11 @@ class mField auto load(Neon::set::Loader loader, int level, Neon::MultiResCompute compute) const -> const typename xField::Partition&; + auto getBackend() const -> const Backend& + { + return mData->grid->getBackend(); + } + private: mField(const std::string& name, const mGrid& grid, diff --git a/libNeonDomain/include/Neon/domain/details/mGrid/mField_imp.h b/libNeonDomain/include/Neon/domain/details/mGrid/mField_imp.h index 5612e7c6..f9182ae8 100644 --- a/libNeonDomain/include/Neon/domain/details/mGrid/mField_imp.h +++ b/libNeonDomain/include/Neon/domain/details/mGrid/mField_imp.h @@ -33,60 +33,58 @@ mField::mField(const std::string& name, for (int l = 0; l < descriptor.getDepth(); ++l) { - auto parent = mData->grid->getParentsBlockID(l); - auto parentLocalID = mData->grid->getParentLocalID(l); auto childBlockID = mData->grid->getChildBlockID(l); - for (int dvID = 0; dvID < Neon::DataViewUtil::nConfig; dvID++) { - mData->fields[l].mData->mPartitions[PartitionBackend::cpu][dvID] = mData->grid->getBackend().devSet().template newDataSet(); - mData->fields[l].mData->mPartitions[PartitionBackend::gpu][dvID] = mData->grid->getBackend().devSet().template newDataSet(); - - for (int32_t gpuID = 0; gpuID < int32_t(mData->fields[l].mData->mPartitions[PartitionBackend::cpu][dvID].size()); gpuID++) { - - auto setIdx = Neon::SetIdx(gpuID); - - mData->fields[l].getPartition(Neon::Execution::host, setIdx, Neon::DataView(dvID)) = - Neon::domain::details::mGrid::mPartition( - l, - mData->fields[l].mData->field.getMemoryField().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(), - (l == int(descriptor.getDepth()) - 1) ? nullptr : mData->fields[l + 1].mData->field.getMemoryField().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(), //parent - (l == 0) ? nullptr : mData->fields[l - 1].mData->field.getMemoryField().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(), //child - cardinality, - mData->grid->operator()(l).helpGetBlockConnectivity().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(), - mData->grid->operator()(l).helpGetDataBlockOriginField().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(), - parent.rawMem(gpuID, Neon::DeviceType::CPU), - parentLocalID.rawMem(gpuID, Neon::DeviceType::CPU), - mData->grid->operator()(l).getActiveBitMask().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(), - (l == 0) ? nullptr : mData->grid->operator()(l - 1).getActiveBitMask().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(), //lower-level mask - (l == int(descriptor.getDepth()) - 1) ? nullptr : mData->grid->operator()(l + 1).getActiveBitMask().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(), //upper-level mask - (l == 0) ? nullptr : childBlockID.rawMem(gpuID, Neon::DeviceType::CPU), - (l == int(descriptor.getDepth()) - 1) ? nullptr : mData->grid->operator()(l + 1).helpGetBlockConnectivity().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(), //parent neighbor - mData->grid->operator()(l).helpGetStencilIdTo3dOffset().rawMem(Neon::Execution::host, setIdx), - refFactorSet.rawMem(gpuID, Neon::DeviceType::CPU), - spacingSet.rawMem(gpuID, Neon::DeviceType::CPU)); - - mData->fields[l].getPartition(Neon::Execution::device, setIdx, Neon::DataView(dvID)) = - Neon::domain::details::mGrid::mPartition( - l, - mData->fields[l].mData->field.getMemoryField().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(), - (l == int(descriptor.getDepth()) - 1) ? nullptr : mData->fields[l + 1].mData->field.getMemoryField().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(), //parent - (l == 0) ? nullptr : mData->fields[l - 1].mData->field.getMemoryField().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(), //child - cardinality, - mData->grid->operator()(l).helpGetBlockConnectivity().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(), - mData->grid->operator()(l).helpGetDataBlockOriginField().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(), - parent.rawMem(gpuID, Neon::DeviceType::CUDA), - parentLocalID.rawMem(gpuID, Neon::DeviceType::CUDA), - mData->grid->operator()(l).getActiveBitMask().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(), - (l == 0) ? nullptr : mData->grid->operator()(l - 1).getActiveBitMask().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(), //lower-level mask - (l == int(descriptor.getDepth()) - 1) ? nullptr : mData->grid->operator()(l + 1).getActiveBitMask().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(), //upper-level mask - (l == 0) ? nullptr : childBlockID.rawMem(gpuID, Neon::DeviceType::CUDA), - (l == int(descriptor.getDepth()) - 1) ? nullptr : mData->grid->operator()(l + 1).helpGetBlockConnectivity().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(), //parent neighbor - mData->grid->operator()(l).helpGetStencilIdTo3dOffset().rawMem(Neon::Execution::device, setIdx), - refFactorSet.rawMem(gpuID, Neon::DeviceType::CUDA), - spacingSet.rawMem(gpuID, Neon::DeviceType::CUDA)); - } + //for (int dvID = 0; dvID < Neon::DataViewUtil::nConfig; dvID++) { + int dvID = 0; + + mData->fields[l].mData->mPartitions[PartitionBackend::cpu][dvID] = mData->grid->getBackend().devSet().template newDataSet(); + mData->fields[l].mData->mPartitions[PartitionBackend::gpu][dvID] = mData->grid->getBackend().devSet().template newDataSet(); + + for (int32_t gpuID = 0; gpuID < int32_t(mData->fields[l].mData->mPartitions[PartitionBackend::cpu][dvID].size()); gpuID++) { + + auto setIdx = Neon::SetIdx(gpuID); + + mData->fields[l].getPartition(Neon::DeviceType::CPU, setIdx, Neon::DataView(dvID)) = + Neon::domain::details::mGrid::mPartition( + l, + mData->fields[l].mData->field.getMemoryField().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(), + (l == int(descriptor.getDepth()) - 1) ? nullptr : mData->fields[l + 1].mData->field.getMemoryField().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(), //parent + (l == 0) ? nullptr : mData->fields[l - 1].mData->field.getMemoryField().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(), //child + cardinality, + mData->grid->operator()(l).helpGetBlockConnectivity().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(), + mData->grid->operator()(l).helpGetDataBlockOriginField().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(), + (l == int(descriptor.getDepth()) - 1) ? nullptr : mData->grid->getParentsBlockID(l).rawMem(gpuID, Neon::DeviceType::CPU), + mData->grid->operator()(l).getActiveBitMask().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(), + (l == 0) ? nullptr : mData->grid->operator()(l - 1).getActiveBitMask().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(), //lower-level mask + (l == int(descriptor.getDepth()) - 1) ? nullptr : mData->grid->operator()(l + 1).getActiveBitMask().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(), //upper-level mask + (l == 0) ? nullptr : childBlockID.rawMem(gpuID, Neon::DeviceType::CPU), + (l == int(descriptor.getDepth()) - 1) ? nullptr : mData->grid->operator()(l + 1).helpGetBlockConnectivity().getPartition(Neon::Execution::host, setIdx, Neon::DataView::STANDARD).mem(), //parent neighbor + mData->grid->operator()(l).helpGetStencilIdTo3dOffset().rawMem(Neon::Execution::host, setIdx), + refFactorSet.rawMem(gpuID, Neon::DeviceType::CPU), + spacingSet.rawMem(gpuID, Neon::DeviceType::CPU)); + + mData->fields[l].getPartition(Neon::DeviceType::CUDA, setIdx, Neon::DataView(dvID)) = + Neon::domain::details::mGrid::mPartition( + l, + mData->fields[l].mData->field.getMemoryField().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(), + (l == int(descriptor.getDepth()) - 1) ? nullptr : mData->fields[l + 1].mData->field.getMemoryField().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(), //parent + (l == 0) ? nullptr : mData->fields[l - 1].mData->field.getMemoryField().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(), //child + cardinality, + mData->grid->operator()(l).helpGetBlockConnectivity().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(), + mData->grid->operator()(l).helpGetDataBlockOriginField().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(), + (l == int(descriptor.getDepth()) - 1) ? nullptr : mData->grid->getParentsBlockID(l).rawMem(gpuID, Neon::DeviceType::CUDA), + mData->grid->operator()(l).getActiveBitMask().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(), + (l == 0) ? nullptr : mData->grid->operator()(l - 1).getActiveBitMask().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(), //lower-level mask + (l == int(descriptor.getDepth()) - 1) ? nullptr : mData->grid->operator()(l + 1).getActiveBitMask().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(), //upper-level mask + (l == 0) ? nullptr : childBlockID.rawMem(gpuID, Neon::DeviceType::CUDA), + (l == int(descriptor.getDepth()) - 1) ? nullptr : mData->grid->operator()(l + 1).helpGetBlockConnectivity().getPartition(Neon::Execution::device, setIdx, Neon::DataView::STANDARD).mem(), //parent neighbor + mData->grid->operator()(l).helpGetStencilIdTo3dOffset().rawMem(Neon::Execution::device, setIdx), + refFactorSet.rawMem(gpuID, Neon::DeviceType::CUDA), + spacingSet.rawMem(gpuID, Neon::DeviceType::CUDA)); } + //} } } @@ -97,6 +95,7 @@ auto mField::forEachActiveCell( const std::function& fun, + bool filterOverlaps, [[maybe_unused]] Neon::computeMode_t::computeMode_e mode) -> void { @@ -133,7 +132,7 @@ auto mField::forEachActiveCell( if ((*(mData->grid))(level).isInsideDomain(voxelGlobalID)) { bool active = true; - if (level > 0) { + if (level > 0 && filterOverlaps) { active = !((*(mData->grid))(level - 1).isInsideDomain(voxelGlobalID)); } diff --git a/libNeonDomain/include/Neon/domain/details/mGrid/mGrid.h b/libNeonDomain/include/Neon/domain/details/mGrid/mGrid.h index a5366b36..2d2634aa 100644 --- a/libNeonDomain/include/Neon/domain/details/mGrid/mGrid.h +++ b/libNeonDomain/include/Neon/domain/details/mGrid/mGrid.h @@ -136,15 +136,7 @@ class mGrid int level, LoadingLambda lambda) const -> Neon::set::Container; - - /*auto getLaunchParameters(Neon::DataView dataView, - const Neon::index_3d& blockSize, - const size_t& sharedMem, - int level) const -> Neon::set::LaunchParameters;*/ - - - auto getParentsBlockID(int level) const -> const Neon::set::MemSet&; - auto getParentLocalID(int level) const -> const Neon::set::MemSet&; + auto getParentsBlockID(int level) const -> Neon::set::MemSet&; auto getChildBlockID(int level) const -> const Neon::set::MemSet&; @@ -201,10 +193,6 @@ class mGrid //Given a block at level L, we store R children block IDs for each block in L where R is the refinement factor std::vector> mChildBlockID; - //store the parent local index within its block - std::vector> mParentLocalID; - - //gird levels refinement factors Neon::set::MemSet mRefFactors; diff --git a/libNeonDomain/include/Neon/domain/details/mGrid/mPartition.h b/libNeonDomain/include/Neon/domain/details/mGrid/mPartition.h index 0940c607..11e246f3 100644 --- a/libNeonDomain/include/Neon/domain/details/mGrid/mPartition.h +++ b/libNeonDomain/include/Neon/domain/details/mGrid/mPartition.h @@ -38,23 +38,22 @@ class mPartition : public Neon::domain::details::bGrid::bPartition NghData; + NEON_CUDA_HOST_DEVICE inline auto childVal(const Idx& parent_cell, + const NghIdx child, + int card, + const T& alternativeVal) const -> NghData; /** * Get a cell that represents the child of a parent cell * @param parent_cell the parent cell that its child is requested * @param child the child 3d local index relative to the parent */ - NEON_CUDA_HOST_DEVICE inline auto getChild(const Idx& parent_cell, - Neon::int8_3d child) const -> Idx; + NEON_CUDA_HOST_DEVICE inline auto getChild(const Idx& parent_cell, + NghIdx child) const -> Idx; /** @@ -105,7 +104,7 @@ class mPartition : public Neon::domain::details::bGrid::bPartition bool; + NEON_CUDA_HOST_DEVICE inline auto hasChildren(const Idx& cell, const NghIdx nghDir) const -> bool; /** @@ -142,8 +141,8 @@ class mPartition : public Neon::domain::details::bGrid::bPartition Idx; + NEON_CUDA_HOST_DEVICE inline auto getUncle(const Idx& cell, + const NghIdx direction) const -> Idx; /** * The uncle of a cell at level L is a cell at level L+1 and is a neighbor to the cell's parent. @@ -153,10 +152,10 @@ class mPartition : public Neon::domain::details::bGrid::bPartition NghData; + NEON_CUDA_HOST_DEVICE inline auto uncleVal(const Idx& cell, + const NghIdx direction, + int card, + const T& alternativeVal) const -> NghData; /** * @brief similar to the above uncleVal but returns a reference. Additionally, it is now @@ -165,9 +164,9 @@ class mPartition : public Neon::domain::details::bGrid::bPartition T&; + NEON_CUDA_HOST_DEVICE inline auto uncleVal(const Idx& cell, + const NghIdx direction, + int card) const -> T&; /** * Get the refinement factor i.e., number of children at each dimension @@ -182,24 +181,23 @@ class mPartition : public Neon::domain::details::bGrid::bPartition uint32_t; - int mLevel; - T* mMemParent; - T* mMemChild; - Idx::DataBlockIdx* mParentBlockID; - Idx::InDataBlockIdx* mParentLocalID; - MaskT* mMaskLowerLevel; - MaskT* mMaskUpperLevel; - Idx::DataBlockIdx* mChildBlockID; - Idx::DataBlockIdx* mParentNeighbourBlocks; - int* mRefFactors; - int* mSpacing; + int mLevel; + T* mMemParent; + T* mMemChild; + Idx::DataBlockIdx* mParentBlockID; + MaskT* mMaskLowerLevel; + MaskT* mMaskUpperLevel; + Idx::DataBlockIdx* mChildBlockID; + Idx::DataBlockIdx* mParentNeighbourBlocks; + int* mRefFactors; + int* mSpacing; }; } // namespace Neon::domain::details::mGrid diff --git a/libNeonDomain/include/Neon/domain/details/mGrid/mPartition_imp.h b/libNeonDomain/include/Neon/domain/details/mGrid/mPartition_imp.h index 809484e4..1fbcb68c 100644 --- a/libNeonDomain/include/Neon/domain/details/mGrid/mPartition_imp.h +++ b/libNeonDomain/include/Neon/domain/details/mGrid/mPartition_imp.h @@ -8,7 +8,6 @@ mPartition::mPartition() mMemParent(nullptr), mMemChild(nullptr), mParentBlockID(nullptr), - mParentLocalID(nullptr), mMaskLowerLevel(nullptr), mMaskUpperLevel(nullptr), mChildBlockID(nullptr), @@ -18,31 +17,27 @@ mPartition::mPartition() } template -mPartition::mPartition(Neon::DataView /*dataView*/, - int level, - T* mem, - T* memParent, - T* memChild, - int cardinality, - Idx::DataBlockIdx* neighbourBlocks, - Neon::int32_3d* origin, - uint32_t* parentBlockID, - Idx::InDataBlockIdx* parentLocalID, - MaskT* mask, - MaskT* maskLowerLevel, - MaskT* maskUpperLevel, - uint32_t* childBlockID, - uint32_t* parentNeighbourBlocks, - T /*outsideValue*/, - NghIdx* stencilNghIndex, - int* refFactors, - int* spacing) +mPartition::mPartition(int level, + T* mem, + T* memParent, + T* memChild, + int cardinality, + Idx::DataBlockIdx* neighbourBlocks, + Neon::int32_3d* origin, + uint32_t* parentBlockID, + MaskT* mask, + MaskT* maskLowerLevel, + MaskT* maskUpperLevel, + uint32_t* childBlockID, + uint32_t* parentNeighbourBlocks, + NghIdx* stencilNghIndex, + int* refFactors, + int* spacing) : Neon::domain::details::bGrid::bPartition(0, cardinality, mem, neighbourBlocks, mask, origin, stencilNghIndex), mLevel(level), mMemParent(memParent), mMemChild(memChild), mParentBlockID(parentBlockID), - mParentLocalID(parentLocalID), mMaskLowerLevel(maskLowerLevel), mMaskUpperLevel(maskUpperLevel), mChildBlockID(childBlockID), @@ -53,24 +48,14 @@ mPartition::mPartition(Neon::DataView /*dataView*/, } template -NEON_CUDA_HOST_DEVICE inline Neon::index_3d mPartition::mapToGlobal(const Idx& gidx) const +NEON_CUDA_HOST_DEVICE inline Neon::index_3d mPartition::getGlobalIndex(Idx gidx) const { + const int sp = (mLevel == 0) ? 1 : mSpacing[mLevel - 1]; + Neon::index_3d ret = this->mOrigin[gidx.getDataBlockIdx()]; -#ifdef NEON_PLACE_CUDA_DEVICE - if constexpr (Cell::sUseSwirlIndex) { - auto swirl = cell.toSwirl(); - ret.x += swirl.mLocation.x; - ret.y += swirl.mLocation.y; - ret.z += swirl.mLocation.z; - } else { -#endif - const int sp = (mLevel == 0) ? 1 : mSpacing[mLevel - 1]; - ret.x += gidx.getInDataBlockIdx().x * sp; - ret.y += gidx.getInDataBlockIdx().y * sp; - ret.z += gidx.getInDataBlockIdx().z * sp; -#ifdef NEON_PLACE_CUDA_DEVICE - } -#endif + ret.x += gidx.mInDataBlockIdx.x * sp; + ret.y += gidx.mInDataBlockIdx.y * sp; + ret.z += gidx.mInDataBlockIdx.z * sp; return ret; } @@ -93,31 +78,32 @@ inline NEON_CUDA_HOST_DEVICE auto mPartition::childID(const Idx& gidx) con // return the child block id corresponding to this cell // the child block id lives at level mLevel-1 - const uint32_t childPitch = - // stride across all block before cell's block - gidx.getDataBlockIdx() * - gidx.memBlock3DSize.x * gidx.memBlock3DSize * gidx.memBlock3DSize + - // stride within the block - gidx.getInDataBlockIdx().x + - gidx.getInDataBlockIdx().y * gidx.memBlock3DSize.x + - gidx.getInDataBlockIdx().z * gidx.memBlock3DSize.x * gidx.memBlock3DSize.y; - - return mChildBlockID[childPitch]; + //const uint32_t childPitch = + // // stride across all block before cell's block + // gidx.getDataBlockIdx() * + // gidx.memBlock3DSize.x * gidx.memBlock3DSize * gidx.memBlock3DSize + + // // stride within the block + // gidx.getInDataBlockIdx().x + + // gidx.getInDataBlockIdx().y * gidx.memBlock3DSize.x + + // gidx.getInDataBlockIdx().z * gidx.memBlock3DSize.x * gidx.memBlock3DSize.y; + // + //return mChildBlockID[childPitch]; + return std::numeric_limits::max(); } template -NEON_CUDA_HOST_DEVICE inline auto mPartition::getChild(const Idx& parent_cell, - Neon::int8_3d child) const -> Idx +NEON_CUDA_HOST_DEVICE inline auto mPartition::getChild(const Idx& parent_cell, + NghIdx child) const -> Idx { Idx childCell; - if (hasChildren(parent_cell)) { - childCell.getDataBlockIdx = childID(parent_cell); - childCell.mBlockSize = mRefFactors[mLevel - 1]; - childCell.mLocation.x = child.x; - childCell.mLocation.y = child.y; - childCell.mLocation.z = child.z; - childCell.mIsActive = childCell.computeIsActive(mMaskLowerLevel); - } + childCell.mDataBlockIdx = std::numeric_limits::max(); + + //if (hasChildren(parent_cell)) { + // childCell.mDataBlockIdx = childID(parent_cell); + // childCell.mInDataBlockIdx.x = child.x; + // childCell.mInDataBlockIdx.y = child.y; + // childCell.mInDataBlockIdx.z = child.z; + //} return childCell; } @@ -126,37 +112,37 @@ template NEON_CUDA_HOST_DEVICE inline auto mPartition::childVal(const Idx& childCell, int card) -> T& { - return mMemChild[this->pitch(childCell, card)]; + return mMemChild[this->helpGetPitch(childCell, card)]; } template NEON_CUDA_HOST_DEVICE inline auto mPartition::childVal(const Idx& childCell, int card) const -> const T& { - return mMemChild[this->pitch(childCell, card)]; + return mMemChild[this->helpGetPitch(childCell, card)]; } template -NEON_CUDA_HOST_DEVICE inline auto mPartition::childVal(const Idx& parent_cell, - Neon::int8_3d child, - int card, - const T& alternativeVal) const -> NghData +NEON_CUDA_HOST_DEVICE inline auto mPartition::childVal(const Idx& parent_cell, + const NghIdx child, + int card, + const T& alternativeVal) const -> NghData { NghData ret; - ret.value = alternativeVal; - ret.isValid = false; - if (!parent_cell.mIsActive || !hasChildren(parent_cell)) { - return ret; - } + ret.mData = alternativeVal; + ret.mIsValid = false; + //if (!parent_cell.mIsActive || !hasChildren(parent_cell)) { + // return ret; + //} - Idx child_cell = getChild(parent_cell, child); + //Idx child_cell = getChild(parent_cell, child); - if (!child_cell.mIsActive) { - return ret; - } + //if (!child_cell.mIsActive) { + // return ret; + //} - ret.isValid = true; - ret.value = childVal(child_cell, card); + //ret.mIsValid = true; + //ret.mData = childVal(child_cell, card); return ret; } @@ -167,21 +153,21 @@ NEON_CUDA_HOST_DEVICE inline auto mPartition::hasChildren(const Idx& cell) if (mMemChild == nullptr || mMaskLowerLevel == nullptr || mLevel == 0) { return false; } - if (childID(cell) == std::numeric_limits::max()) { + if (childID(cell) == std::numeric_limits::max()) { return false; } return true; } template -NEON_CUDA_HOST_DEVICE inline auto mPartition::hasChildren(const Idx& cell, const Neon::int8_3d nghDir) const -> bool +NEON_CUDA_HOST_DEVICE inline auto mPartition::hasChildren(const Idx& cell, const NghIdx nghDir) const -> bool { if (mMemChild == nullptr || mMaskLowerLevel == nullptr || mLevel == 0) { return false; } Idx nghCell = this->getNghCell(cell, nghDir, this->getneighbourBlocksPtr(cell)); - if (!nghCell.isActive()) { + if (!this->isActive(nghCell)) { return false; } return hasChildren(nghCell); @@ -191,11 +177,14 @@ template NEON_CUDA_HOST_DEVICE inline auto mPartition::getParent(const Idx& cell) const -> Idx { Idx parentCell; + parentCell.mDataBlockIdx = std::numeric_limits::max(); if (mMemParent != nullptr) { - parentCell.mBlockID = mParentBlockID[cell.mBlockID]; - parentCell.mLocation = mParentLocalID[cell.mBlockID]; - parentCell.mBlockSize = mRefFactors[mLevel + 1]; - parentCell.mIsActive = true; + parentCell.mDataBlockIdx = mParentBlockID[cell.mDataBlockIdx]; + const Neon::index_3d g = this->getGlobalIndex(cell); + const uint32_t sp = getSpacing(mLevel); + parentCell.mInDataBlockIdx.x = (g.x / sp) % kMemBlockSizeX; + parentCell.mInDataBlockIdx.y = (g.y / sp) % kMemBlockSizeY; + parentCell.mInDataBlockIdx.z = (g.z / sp) % kMemBlockSizeZ; } return parentCell; } @@ -206,7 +195,7 @@ NEON_CUDA_HOST_DEVICE inline auto mPartition::parentVal(const Idx& eId, { auto parentCell = getParent(eId); if (parentCell.isActive()) { - return mMemParent[this->pitch(parentCell, card)]; + return mMemParent[this->helpGetPitch(parentCell, card)]; } } @@ -216,7 +205,7 @@ NEON_CUDA_HOST_DEVICE inline auto mPartition::parentVal(const Idx& eId, { auto parentCell = getParent(eId); if (parentCell.isActive()) { - return mMemParent[this->pitch(parentCell, card)]; + return mMemParent[this->helpGetPitch(parentCell, card)]; } } @@ -230,48 +219,46 @@ NEON_CUDA_HOST_DEVICE inline auto mPartition::hasParent(const Idx& cell) c } template -NEON_CUDA_HOST_DEVICE inline auto mPartition::getUncle(const Idx& cell, - Neon::int8_3d direction) const -> Idx +NEON_CUDA_HOST_DEVICE inline auto mPartition::getUncle(const Idx& cell, + const NghIdx direction) const -> Idx { Idx uncle = getParent(cell); if (uncle.isActive()) { - uncle = this->getNghCell(uncle, direction, (mParentNeighbourBlocks + (26 * uncle.mBlockID))); - uncle.mBlockSize = mRefFactors[mLevel + 1]; - uncle.mIsActive = uncle.mBlockID != std::numeric_limits::max(); - if (uncle.mIsActive) { - uncle.mIsActive = uncle.computeIsActive(mMaskUpperLevel); + uncle = this->helpGetNghIdx(uncle, direction, mParentNeighbourBlocks); + if (!this->isActive(uncle, mMaskUpperLevel)) { + uncle.mDataBlockIdx = std::numeric_limits::max(); } } return uncle; } template -NEON_CUDA_HOST_DEVICE inline auto mPartition::uncleVal(const Idx& cell, - Neon::int8_3d direction, - int card, - const T& alternativeVal) const -> NghData +NEON_CUDA_HOST_DEVICE inline auto mPartition::uncleVal(const Idx& cell, + const NghIdx direction, + int card, + const T& alternativeVal) const -> NghData { NghData ret; - ret.value = alternativeVal; - ret.isValid = false; + ret.mData = alternativeVal; + ret.mIsValid = false; Idx uncle = getUncle(cell, direction); - ret.isValid = uncle.isActive(); - if (ret.isValid) { - ret.value = mMemParent[this->pitch(uncle, card)]; + ret.mIsValid = uncle.isActive(); + if (ret.mIsValid) { + ret.mData = mMemParent[this->helpGetPitch(uncle, card)]; } return ret; } template -NEON_CUDA_HOST_DEVICE inline auto mPartition::uncleVal(const Idx& cell, - Neon::int8_3d direction, - int card) const -> T& +NEON_CUDA_HOST_DEVICE inline auto mPartition::uncleVal(const Idx& cell, + const NghIdx direction, + int card) const -> T& { Idx uncle = getUncle(cell, direction); - assert(uncle.isActive()); - return mMemParent[this->pitch(uncle, card)]; + assert(this->isActive(uncle, mMaskUpperLevel)); + return mMemParent[this->helpGetPitch(uncle, card)]; } } // namespace Neon::domain::details::mGrid \ No newline at end of file diff --git a/libNeonDomain/include/Neon/domain/details/mGrid/xField_imp.h b/libNeonDomain/include/Neon/domain/details/mGrid/xField_imp.h index 25736728..6e134849 100644 --- a/libNeonDomain/include/Neon/domain/details/mGrid/xField_imp.h +++ b/libNeonDomain/include/Neon/domain/details/mGrid/xField_imp.h @@ -79,11 +79,14 @@ auto xField::getPartition(Neon::Execution exec, const Neon::DataView& dataView) const -> const Partition& { - if (exec == Neon::Execution::device) { - return getPartition(Neon::DeviceType::CUDA, idx, dataView); - } if (exec == Neon::Execution::host) { return getPartition(Neon::DeviceType::CPU, idx, dataView); + } else { + if (mData->field.getBackend().runtime() == Neon::Runtime::openmp) { + return getPartition(Neon::DeviceType::CPU, idx, dataView); + } else { + return getPartition(Neon::DeviceType::CUDA, idx, dataView); + } } NEON_THROW_UNSUPPORTED_OPERATION("xField::getPartition() unsupported Execution"); @@ -95,11 +98,14 @@ auto xField::getPartition(Neon::Execution exec, Neon::SetIdx idx, const Neon::DataView& dataView) -> Partition& { - if (exec == Neon::Execution::device) { - return getPartition(Neon::DeviceType::CUDA, idx, dataView); - } if (exec == Neon::Execution::host) { return getPartition(Neon::DeviceType::CPU, idx, dataView); + } else { + if (mData->field.getBackend().runtime() == Neon::Runtime::openmp) { + return getPartition(Neon::DeviceType::CPU, idx, dataView); + } else { + return getPartition(Neon::DeviceType::CUDA, idx, dataView); + } } NEON_THROW_UNSUPPORTED_OPERATION("xField::getPartition() unsupported Execution"); diff --git a/libNeonDomain/src/domain/details/mGrid/mGrid.cpp b/libNeonDomain/src/domain/details/mGrid/mGrid.cpp index 222de396..9859453c 100644 --- a/libNeonDomain/src/domain/details/mGrid/mGrid.cpp +++ b/libNeonDomain/src/domain/details/mGrid/mGrid.cpp @@ -274,8 +274,8 @@ mGrid::mGrid( //parent block ID - mData->mParentBlockID.resize(mData->mDescriptor.getDepth()); - for (int l = 0; l < mData->mDescriptor.getDepth(); ++l) { + mData->mParentBlockID.resize(mData->mDescriptor.getDepth() - 1); + for (int l = 0; l < mData->mDescriptor.getDepth() - 1; ++l) { mData->mParentBlockID[l] = backend.devSet().template newMemSet({Neon::DataUse::HOST_DEVICE}, 1, memOptionsAoS, @@ -314,16 +314,6 @@ mGrid::mGrid( } - //parent local index - mData->mParentLocalID.resize(mData->mDescriptor.getDepth()); - for (int l = 0; l < mData->mDescriptor.getDepth(); ++l) { - mData->mParentLocalID[l] = backend.devSet().template newMemSet({Neon::DataUse::HOST_DEVICE}, - 1, - memOptionsAoS, - mData->grids[l].getBlockViewGrid().getNumActiveCellsPerPartition()); - } - - //descriptor auto descriptorSize = backend.devSet().template newDataSet(); for (int32_t c = 0; c < descriptorSize.cardinality(); ++c) { @@ -432,17 +422,16 @@ mGrid::mGrid( exp << "Something went wrong during constructing mGrid. Can not find the right parent of a block\n"; NEON_THROW(exp); } - mData->mParentBlockID[l].eRef(devID, blockIdx) = parentID.getDataBlockIdx(); - mData->mParentLocalID[l].eRef(devID, blockIdx) = parentID.getInDataBlockIdx(); } }); } if (backend.devType() == Neon::DeviceType::CUDA) { for (int l = 0; l < mData->mDescriptor.getDepth(); ++l) { - mData->mParentBlockID[l].updateDeviceData(backend, 0); - mData->mParentLocalID[l].updateDeviceData(backend, 0); + if (l < mData->mDescriptor.getDepth() - 1) { + mData->mParentBlockID[l].updateDeviceData(backend, 0); + } if (l > 0) { mData->mChildBlockID[l].updateDeviceData(backend, 0); } @@ -514,8 +503,14 @@ auto mGrid::setReduceEngine(Neon::sys::patterns::Engine eng) -> void } } -auto mGrid::getParentsBlockID(int level) const -> const Neon::set::MemSet& +auto mGrid::getParentsBlockID(int level) const -> Neon::set::MemSet& { + if (level >= mData->mDescriptor.getDepth() - 1) { + NeonException exp("mGrid::getParentsBlockID"); + exp << "There is no parent for level " << level << " since the tree depth is " << mData->mDescriptor.getDepth(); + NEON_THROW(exp); + } + return mData->mParentBlockID[level]; } auto mGrid::getChildBlockID(int level) const -> const Neon::set::MemSet& @@ -523,11 +518,6 @@ auto mGrid::getChildBlockID(int level) const -> const Neon::set::MemSetmChildBlockID[level]; } -auto mGrid::getParentLocalID(int level) const -> const Neon::set::MemSet& -{ - return mData->mParentLocalID[level]; -} - auto mGrid::getRefFactors() const -> const Neon::set::MemSet& { return mData->mRefFactors; diff --git a/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResParent.h b/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResParent.h index 212314ca..6aa427bc 100644 --- a/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResParent.h +++ b/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResParent.h @@ -12,7 +12,7 @@ void MultiResParent() SectionX[1] = 16; SectionX[2] = 24; - const Neon::domain::mGridDescriptor descriptor({1, 1, 1}); + Neon::mGridDescriptor<1> descriptor(3); for (auto runtime : {Neon::Runtime::openmp, Neon::Runtime::stream}) { @@ -33,8 +33,7 @@ void MultiResParent() }}, Neon::domain::Stencil::s7_Laplace_t(), descriptor); - //grid.topologyToVTK("grid111.vtk", false); - + auto XField = grid.newField("XField", 1, -1); auto hasParentField = grid.newField("hasParent", 1, -1); @@ -45,29 +44,31 @@ void MultiResParent() l, [&](const Neon::int32_3d, const int, Type& val) { val = l; - }); + }, + false); hasParentField.forEachActiveCell( l, [&](const Neon::int32_3d, const int, Type& val) { val = -1; - }); + }, + false); } if (bk.runtime() == Neon::Runtime::stream) { XField.updateDeviceData(); hasParentField.updateDeviceData(); } - //XField.ioToVtk("f", "f"); + //XField.ioToVtk("XF", true, true, true, false); for (int level = 0; level < descriptor.getDepth(); ++level) { - auto container = grid.getContainer( + auto container = grid.newContainer( "Parent", level, [&, level](Neon::set::Loader& loader) { auto& xLocal = XField.load(loader, level, Neon::MultiResCompute::MAP); auto& hasParentLocal = hasParentField.load(loader, level, Neon::MultiResCompute::MAP); - return [=] NEON_CUDA_HOST_DEVICE(const Neon::domain::mGrid::Cell& cell) mutable { + return [=] NEON_CUDA_HOST_DEVICE(const Neon::domain::mGrid::Idx& cell) mutable { if (xLocal.hasParent(cell)) { hasParentLocal(cell, 0) = 1; xLocal(cell, 0) = xLocal.parentVal(cell, 0); @@ -97,8 +98,8 @@ void MultiResParent() } else { EXPECT_EQ(val, -1); } - }); - + }, + false); XField.forEachActiveCell( l, @@ -108,7 +109,8 @@ void MultiResParent() } else { EXPECT_EQ(val, l) << "l = " << l << " id = " << id; } - }); + }, + false); } } } @@ -127,7 +129,7 @@ void MultiResAtomicAddParent() const Neon::int32_3d dim(24, 24, 24); const std::vector gpusIds(nGPUs, 0); - const Neon::domain::mGridDescriptor descriptor({1, 1, 1}); + Neon::mGridDescriptor<1> descriptor(3); for (auto runtime : { Neon::Runtime::openmp, @@ -154,7 +156,7 @@ void MultiResAtomicAddParent() }}, Neon::domain::Stencil::s7_Laplace_t(), descriptor); - + auto XField = grid.newField("XField", 1, -1); @@ -175,11 +177,11 @@ void MultiResAtomicAddParent() for (int level = 0; level < descriptor.getDepth(); ++level) { - auto container = grid.getContainer( + auto container = grid.newContainer( "Parent", level, [&, level](Neon::set::Loader& loader) { auto& xLocal = XField.load(loader, level, Neon::MultiResCompute::MAP); - return [=] NEON_CUDA_HOST_DEVICE(const Neon::domain::mGrid::Cell& cell) mutable { + return [=] NEON_CUDA_HOST_DEVICE(const Neon::domain::mGrid::Idx& cell) mutable { if (xLocal.hasParent(cell)) { #ifdef NEON_PLACE_CUDA_DEVICE