From f51c8453fce5fb20f045aa23e32c36ef5434c77a Mon Sep 17 00:00:00 2001 From: Ahmed Mahmoud Date: Tue, 4 Jul 2023 16:56:31 -0400 Subject: [PATCH] Fix child info --- .../Neon/domain/details/bGrid/bIndex.h | 4 +- .../Neon/domain/details/mGrid/mPartition.h | 40 ++++++------ .../domain/details/mGrid/mPartition_imp.h | 61 ++++++++----------- .../src/domain/details/mGrid/mGrid.cpp | 15 +++-- .../unit/sUt_multiRes/src/MultiResChild.h | 14 ++--- 5 files changed, 63 insertions(+), 71 deletions(-) diff --git a/libNeonDomain/include/Neon/domain/details/bGrid/bIndex.h b/libNeonDomain/include/Neon/domain/details/bGrid/bIndex.h index 69f8bd42..e8b93a38 100644 --- a/libNeonDomain/include/Neon/domain/details/bGrid/bIndex.h +++ b/libNeonDomain/include/Neon/domain/details/bGrid/bIndex.h @@ -102,7 +102,7 @@ class bIndex NEON_CUDA_HOST_DEVICE inline auto getDataBlockIdx() const -> DataBlockIdx const&; NEON_CUDA_HOST_DEVICE inline auto setInDataBlockIdx(InDataBlockIdx const&) -> void; NEON_CUDA_HOST_DEVICE inline auto setDataBlockIdx(DataBlockIdx const&) -> void; - NEON_CUDA_HOST_DEVICE inline auto isActive() -> bool; + NEON_CUDA_HOST_DEVICE inline auto isActive() const -> bool; // the local index within the block InDataBlockIdx mInDataBlockIdx; DataBlockIdx mDataBlockIdx{}; @@ -132,7 +132,7 @@ NEON_CUDA_HOST_DEVICE auto bIndex::getInDataBlockIdx() const -> const bI } template -NEON_CUDA_HOST_DEVICE auto bIndex::isActive() -> bool +NEON_CUDA_HOST_DEVICE auto bIndex::isActive() const -> bool { return mDataBlockIdx != std::numeric_limits::max(); } diff --git a/libNeonDomain/include/Neon/domain/details/mGrid/mPartition.h b/libNeonDomain/include/Neon/domain/details/mGrid/mPartition.h index 11e246f3..ff8e5b08 100644 --- a/libNeonDomain/include/Neon/domain/details/mGrid/mPartition.h +++ b/libNeonDomain/include/Neon/domain/details/mGrid/mPartition.h @@ -38,42 +38,42 @@ class mPartition : public Neon::domain::details::bGrid::bPartition NghData; /** * Get a cell that represents the child of a parent cell - * @param parent_cell the parent cell that its child is requested + * @param parentCell the parent cell that its child is requested * @param child the child 3d local index relative to the parent */ - NEON_CUDA_HOST_DEVICE inline auto getChild(const Idx& parent_cell, + NEON_CUDA_HOST_DEVICE inline auto getChild(const Idx& parentCell, NghIdx child) const -> Idx; diff --git a/libNeonDomain/include/Neon/domain/details/mGrid/mPartition_imp.h b/libNeonDomain/include/Neon/domain/details/mGrid/mPartition_imp.h index 1fbcb68c..69ce1c8e 100644 --- a/libNeonDomain/include/Neon/domain/details/mGrid/mPartition_imp.h +++ b/libNeonDomain/include/Neon/domain/details/mGrid/mPartition_imp.h @@ -24,12 +24,12 @@ mPartition::mPartition(int level, int cardinality, Idx::DataBlockIdx* neighbourBlocks, Neon::int32_3d* origin, - uint32_t* parentBlockID, + Idx::DataBlockIdx* parentBlockID, MaskT* mask, MaskT* maskLowerLevel, MaskT* maskUpperLevel, - uint32_t* childBlockID, - uint32_t* parentNeighbourBlocks, + Idx::DataBlockIdx* childBlockID, + Idx::DataBlockIdx* parentNeighbourBlocks, NghIdx* stencilNghIndex, int* refFactors, int* spacing) @@ -75,35 +75,28 @@ NEON_CUDA_HOST_DEVICE inline auto mPartition::getSpacing(const int level) template inline NEON_CUDA_HOST_DEVICE auto mPartition::childID(const Idx& gidx) const -> uint32_t { - // return the child block id corresponding to this cell - // the child block id lives at level mLevel-1 - - //const uint32_t childPitch = - // // stride across all block before cell's block - // gidx.getDataBlockIdx() * - // gidx.memBlock3DSize.x * gidx.memBlock3DSize * gidx.memBlock3DSize + - // // stride within the block - // gidx.getInDataBlockIdx().x + - // gidx.getInDataBlockIdx().y * gidx.memBlock3DSize.x + - // gidx.getInDataBlockIdx().z * gidx.memBlock3DSize.x * gidx.memBlock3DSize.y; - // - //return mChildBlockID[childPitch]; - return std::numeric_limits::max(); + // return the child block id corresponding to this gidx + // + // gidx.mDataBlockIdx * kMemBlockSizeX * kMemBlockSizeY * kMemBlockSizeZ + + // (i + j * kUserBlockSizeX + k * kUserBlockSizeX * kUserBlockSizeY) * kUserBlockSizeX* kUserBlockSizeY* kUserBlockSizeZ + + // x + y* refFactor + z* refFactor* refFactor + return mChildBlockID[this->helpGetPitch(gidx, 0)]; } template -NEON_CUDA_HOST_DEVICE inline auto mPartition::getChild(const Idx& parent_cell, +NEON_CUDA_HOST_DEVICE inline auto mPartition::getChild(const Idx& parentCell, NghIdx child) const -> Idx { Idx childCell; childCell.mDataBlockIdx = std::numeric_limits::max(); - //if (hasChildren(parent_cell)) { - // childCell.mDataBlockIdx = childID(parent_cell); - // childCell.mInDataBlockIdx.x = child.x; - // childCell.mInDataBlockIdx.y = child.y; - // childCell.mInDataBlockIdx.z = child.z; - //} + if (hasChildren(parentCell)) { + childCell.mDataBlockIdx = childID(parentCell); + int ref = getRefFactor(mLevel); + childCell.mInDataBlockIdx.x = (ref * parentCell.mInDataBlockIdx.x + child.x) % kMemBlockSizeX; + childCell.mInDataBlockIdx.y = (ref * parentCell.mInDataBlockIdx.y + child.y) % kMemBlockSizeY; + childCell.mInDataBlockIdx.z = (ref * parentCell.mInDataBlockIdx.z + child.z) % kMemBlockSizeZ; + } return childCell; } @@ -123,7 +116,7 @@ NEON_CUDA_HOST_DEVICE inline auto mPartition::childVal(const Idx& childCel } template -NEON_CUDA_HOST_DEVICE inline auto mPartition::childVal(const Idx& parent_cell, +NEON_CUDA_HOST_DEVICE inline auto mPartition::childVal(const Idx& parentCell, const NghIdx child, int card, const T& alternativeVal) const -> NghData @@ -131,18 +124,18 @@ NEON_CUDA_HOST_DEVICE inline auto mPartition::childVal(const Idx& parent NghData ret; ret.mData = alternativeVal; ret.mIsValid = false; - //if (!parent_cell.mIsActive || !hasChildren(parent_cell)) { - // return ret; - //} + if (!parentCell.isActive() || !hasChildren(parentCell)) { + return ret; + } - //Idx child_cell = getChild(parent_cell, child); + Idx childCell = getChild(parentCell, child); - //if (!child_cell.mIsActive) { - // return ret; - //} + if (!childCell.isActive()) { + return ret; + } - //ret.mIsValid = true; - //ret.mData = childVal(child_cell, card); + ret.mIsValid = true; + ret.mData = childVal(childCell, card); return ret; } diff --git a/libNeonDomain/src/domain/details/mGrid/mGrid.cpp b/libNeonDomain/src/domain/details/mGrid/mGrid.cpp index 9859453c..dcab0d16 100644 --- a/libNeonDomain/src/domain/details/mGrid/mGrid.cpp +++ b/libNeonDomain/src/domain/details/mGrid/mGrid.cpp @@ -390,16 +390,15 @@ mGrid::mGrid( auto [setIdx, childBlockID] = mData->grids[l - 1].helpGetSetIdxAndGridIdx(childId); + uint32_t pitch = blockIdx * kMemBlockSizeX * kMemBlockSizeY * kMemBlockSizeZ + + (i * kUserBlockSizeX + x) + + (j * kUserBlockSizeY + y) * kMemBlockSizeY + + (k * kUserBlockSizeZ + z) * kMemBlockSizeY * kMemBlockSizeZ; + if (setIdx.idx() == -1) { - mData->mChildBlockID[l].eRef(devID, - blockIdx * kMemBlockSizeX * kMemBlockSizeY * kMemBlockSizeZ + - (i + j * kUserBlockSizeX + k * kUserBlockSizeX * kUserBlockSizeY) * kUserBlockSizeX * kUserBlockSizeY * kUserBlockSizeZ + - x + y * refFactor + z * refFactor * refFactor) = std::numeric_limits::max(); + mData->mChildBlockID[l].eRef(devID, pitch) = std::numeric_limits::max(); } else { - mData->mChildBlockID[l].eRef(devID, - blockIdx * kMemBlockSizeX * kMemBlockSizeY * kMemBlockSizeZ + - (i + j * kUserBlockSizeX + k * kUserBlockSizeX * kUserBlockSizeY) * kUserBlockSizeX * kUserBlockSizeY * kUserBlockSizeZ + - x + y * refFactor + z * refFactor * refFactor) = childBlockID.getDataBlockIdx(); + mData->mChildBlockID[l].eRef(devID, pitch) = childBlockID.getDataBlockIdx(); } } } diff --git a/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResChild.h b/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResChild.h index ffb95b87..e6a6be75 100644 --- a/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResChild.h +++ b/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResChild.h @@ -7,9 +7,9 @@ void MultiResChild() const Neon::index_3d dim(24, 24, 24); const std::vector gpuIds(nGPUs, 0); - const Neon::domain::mGridDescriptor descriptor({1, 1, 1}); + Neon::mGridDescriptor<1> descriptor(3); - for (auto runtime : {Neon::Runtime::openmp, Neon::Runtime::stream}) { + for (auto runtime : {Neon::Runtime::openmp /*, Neon::Runtime::stream*/}) { auto bk = Neon::Backend(gpuIds, runtime); @@ -32,7 +32,7 @@ void MultiResChild() }}, Neon::domain::Stencil::s7_Laplace_t(), descriptor); - + auto XField = grid.newField("XField", 1, -1); auto isRefinedField = grid.newField("isRefined", 1, -1); @@ -59,18 +59,18 @@ void MultiResChild() for (int level = 0; level < descriptor.getDepth(); ++level) { - auto container = grid.getContainer( + auto container = grid.newContainer( "hasChildren", level, [&, level, descriptor](Neon::set::Loader& loader) { auto& xLocal = XField.load(loader, level, Neon::MultiResCompute::MAP); auto& isRefinedLocal = isRefinedField.load(loader, level, Neon::MultiResCompute::MAP); - return [=] NEON_CUDA_HOST_DEVICE(const Neon::domain::mGrid::Cell& cell) mutable { + return [=] NEON_CUDA_HOST_DEVICE(const Neon::domain::mGrid::Idx& cell) mutable { if (xLocal.hasChildren(cell)) { isRefinedLocal(cell, 0) = 1; - Neon::index_3d cellOrigin = xLocal.mapToGlobal(cell); + Neon::index_3d cellOrigin = xLocal.getGlobalIndex(cell); const int refFactor = xLocal.getRefFactor(level - 1); @@ -136,4 +136,4 @@ TEST(MultiRes, Child) if (Neon::sys::globalSpace::gpuSysObjStorage.numDevs() > 0) { MultiResChild(); } -} \ No newline at end of file +}