Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize Multi Resolution LBM #36

Merged
merged 68 commits into from
Jul 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
68 commits
Select commit Hold shift + click to select a range
60106bb
validation code
Ahdhn Mar 20, 2023
2f110bd
update validation data and plots using Re=100
Ahdhn Mar 22, 2023
181b453
use large grid size
Ahdhn Mar 22, 2023
186c942
lower Re
Ahdhn Mar 22, 2023
7c6a5cb
Filter out overlapping cells in forEachActiveCell and update plots
Ahdhn Mar 22, 2023
bf0e583
update uniform plots
Ahdhn Mar 22, 2023
cdbc7e0
organize plotting code
Ahdhn Mar 23, 2023
985fd1d
Allow Q27
Ahdhn Mar 23, 2023
0e77ad5
enable nvtx on windows
Ahdhn Mar 23, 2023
8e8784a
KBC
Ahdhn Mar 29, 2023
9f52727
fix compilation
Ahdhn Apr 11, 2023
69e7ce4
Fix KBC
Ahdhn Apr 13, 2023
72417ec
use KGB
Apr 13, 2023
c8d9d5b
fix
Ahdhn Apr 13, 2023
429faf1
reorg multi-res LBM app
Ahdhn Apr 20, 2023
fcd3d10
fix cudaErrorInvalidDeviceFunction error
Ahdhn Apr 24, 2023
64d09a7
Update lbmMultiRes.cu
Ahdhn Apr 24, 2023
c8beebf
Hardwired KBG
Ahdhn Apr 24, 2023
71c1434
debugging collideBGKUnrolled
Ahdhn May 3, 2023
b0d8b0a
WIP initiate Store from the fine level
Ahdhn May 4, 2023
6b3c005
storeFine seems to work now
Ahdhn May 8, 2023
db1dfee
fix indexing in storeFine
Ahdhn May 9, 2023
6126b3b
minor tweak to in storeCoarse
Ahdhn May 9, 2023
bc9a4d1
averaging for storeFine
Ahdhn May 10, 2023
413bc3b
minor fix
Ahdhn May 10, 2023
f5ab228
verify simulation in code
Ahdhn May 24, 2023
a359238
fix verify
Ahdhn May 24, 2023
8ae50d7
DAT file
Ahdhn May 25, 2023
d7f5015
minor bug fix
Ahdhn May 25, 2023
96e2040
fix post processing bug
Ahdhn May 30, 2023
2b6d96d
tiny change in plotting script
Ahdhn May 30, 2023
6a9ce01
fix bug with collide and lattice indexing
Ahdhn May 31, 2023
d2efe58
update plots
Ahdhn May 31, 2023
4048f43
stream fused explosion
Ahdhn May 31, 2023
3973bff
plot x velocity
Ahdhn May 31, 2023
bf5a534
formatting log
Ahdhn May 31, 2023
7049b94
formatting log
Ahdhn Jun 1, 2023
454ea1b
use relative difference for verification
Ahdhn Jun 1, 2023
1db902b
use consistent variable naming
Ahdhn Jun 1, 2023
fbd298d
Fix bug with storeFine
Ahdhn Jun 5, 2023
5bdf9c5
stream-fused coalescence
Ahdhn Jun 6, 2023
f97419e
stream fused explosion + coalescence
Ahdhn Jun 6, 2023
9c5f4fa
Merge branch 'develop' of https://github.com/Autodesk/Neon into Multi…
Ahdhn Jun 6, 2023
b033a97
re-apply previous changes
Ahdhn Jun 6, 2023
b06444e
re-enable mGrid tests
Ahdhn Jun 6, 2023
2921f79
re-enable mGrid
Ahdhn Jun 6, 2023
c4616e4
WIP fix mgrid
Ahdhn Jun 20, 2023
5158071
Merge branch 'develop' of https://github.com/Autodesk/Neon into Multi…
Ahdhn Jun 20, 2023
f8dea2e
done with integrating mGrid and mField with the refactored bGrid
Ahdhn Jun 26, 2023
2116890
warnings
Ahdhn Jun 26, 2023
9e4802a
isActive in bPartition
Ahdhn Jun 27, 2023
82a27ee
minor fixes in mGird and mField
Ahdhn Jun 27, 2023
2f16698
runtime depth in mGridDescriptor
Ahdhn Jun 27, 2023
19cb395
simplify (and fix) filtering out overlapping between grids of differe…
Ahdhn Jun 27, 2023
c13b48d
MultiResSingleMap is working
Ahdhn Jun 27, 2023
9b681e0
MultiResStencil is working
Ahdhn Jun 27, 2023
c1326a3
fix mPartition
Ahdhn Jul 4, 2023
f51c845
Fix child info
Ahdhn Jul 4, 2023
686d130
multi-res skeleton and demo
Ahdhn Jul 4, 2023
857992a
fix compilation issues with lbmMultiRes
Ahdhn Jul 5, 2023
a9509e0
fix hasChildren
Ahdhn Jul 5, 2023
64b1693
another one
Ahdhn Jul 5, 2023
9e90850
warnings
Ahdhn Jul 5, 2023
83259f9
minor fix to eGrid
Ahdhn Jul 5, 2023
95e379d
Merge branch 'MultiResLBMOpt' of https://github.com/Autodesk/Neon int…
Ahdhn Jul 5, 2023
f8ed659
fix Partition1D
Ahdhn Jul 5, 2023
6efd74f
Fix childID
Ahdhn Jul 6, 2023
6c35453
Fix uncleOffset
Ahdhn Jul 6, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ add_subdirectory("libNeonDomain")
add_subdirectory("libNeonSkeleton")
#add_subdirectory("libNeonSolver")
#add_subdirectory("tutorials")
#add_subdirectory("apps")
add_subdirectory("apps")
add_subdirectory("benchmarks")


Expand Down
2 changes: 1 addition & 1 deletion VerifyNeonPRWindows.bat
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ git fetch origin refs/pull/%PR%/head:pull_%PR%
git checkout pull_%PR%
mkdir build
cd build
cmake ..
cmake -G "Visual Studio 16 2019" ..
cmake --build . --config Release -j 10
set ctest_filename=CTestNeonWindowsReport.log
ctest --no-compress-output --output-on-failure -T Test --build-config Release --output-log %ctest_filename%
Expand Down
8 changes: 4 additions & 4 deletions apps/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.19 FATAL_ERROR)

add_subdirectory("fractal")
add_subdirectory("lbm")
add_subdirectory("gameOfLife")
add_subdirectory("poisson")
#add_subdirectory("fractal")
#add_subdirectory("lbm")
#add_subdirectory("gameOfLife")
#add_subdirectory("poisson")
add_subdirectory("lbmMultiRes")
2 changes: 1 addition & 1 deletion apps/lbmMultiRes/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.19 FATAL_ERROR)

set (APP_NAME app-lbmMultiRes)
file(GLOB_RECURSE SrcFiles lbmMultiRes.cu)
file(GLOB_RECURSE SrcFiles lbmMultiRes.cu lattice.h init.h postProcess.h util.h coalescence.h collide.h explosion.h stream.h store.h verify.h)

add_executable(${APP_NAME} ${SrcFiles})

Expand Down
51 changes: 51 additions & 0 deletions apps/lbmMultiRes/coalescence.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#pragma once
#include "lattice.h"

template <typename T, int Q>
inline Neon::set::Container coalescence(Neon::domain::mGrid& grid,
const bool fineInitStore,
const int level,
const Neon::domain::mGrid::Field<int>& sumStore,
const Neon::domain::mGrid::Field<T>& fout,
Neon::domain::mGrid::Field<T>& fin)
{
// Initiated by the coarse level (hence "pull"), this function simply read the missing population
// across the interface between coarse<->fine boundary by reading the population prepare during the store()

return grid.newContainer(
"Coalescence_" + std::to_string(level), level,
[&, level, fineInitStore](Neon::set::Loader& loader) {
const auto& pout = fout.load(loader, level, Neon::MultiResCompute::STENCIL);
const auto& ss = sumStore.load(loader, level, Neon::MultiResCompute::STENCIL);
auto& pin = fin.load(loader, level, Neon::MultiResCompute::MAP);

return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::mGrid::Idx& cell) mutable {
//If this cell has children i.e., it is been refined, than we should not work on it
//because this cell is only there to allow query and not to operate on
const int refFactor = pout.getRefFactor(level);
if (!pin.hasChildren(cell)) {

for (int q = 0; q < Q; ++q) {
const Neon::int8_3d dir = -getDir(q);
if (dir.x == 0 && dir.y == 0 && dir.z == 0) {
continue;
}
//if we have a neighbor at the same level that has been refined, then cell is on
//the interface and this is where we should do the coalescence
if (pin.hasChildren(cell, dir)) {
auto neighbor = pout.getNghData(cell, dir, q);
if (neighbor.mIsValid) {
if (fineInitStore) {
auto ssVal = ss.getNghData(cell, dir, q);
assert(ssVal.mData != 0);
pin(cell, q) = neighbor.mData / static_cast<T>(ssVal.mData * refFactor);
} else {
pin(cell, q) = neighbor.mData / static_cast<T>(refFactor);
}
}
}
}
}
};
});
}
346 changes: 346 additions & 0 deletions apps/lbmMultiRes/collide.h

Large diffs are not rendered by default.

57 changes: 57 additions & 0 deletions apps/lbmMultiRes/explosion.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#pragma once
template <typename T, int Q>
inline Neon::set::Container explosion(Neon::domain::mGrid& grid,
int level,
const Neon::domain::mGrid::Field<T>& fout,
Neon::domain::mGrid::Field<T>& fin)
{
// Initiated by the fine level (hence "pull"), this function performs a coarse (level+1) to
// fine (level) communication or "explosion" by simply distributing copies of coarse grid onto the fine grid.
// In other words, this function updates the "halo" cells of the fine level by making copies of the coarse cell
// values.


return grid.newContainer(
"Explosion_" + std::to_string(level), level,
[&, level](Neon::set::Loader& loader) {
const auto& pout = fout.load(loader, level, Neon::MultiResCompute::STENCIL_UP);
auto pin = fin.load(loader, level, Neon::MultiResCompute::MAP);

return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::mGrid::Idx& cell) mutable {
//If this cell has children i.e., it is been refined, then we should not work on it
//because this cell is only there to allow query and not to operate on
if (!pin.hasChildren(cell)) {
for (int8_t q = 0; q < Q; ++q) {
const Neon::int8_3d dir = -getDir(q);
if (dir.x == 0 && dir.y == 0 && dir.z == 0) {
continue;
}

//if the neighbor cell has children, then this 'cell' is interfacing with L-1 (fine) along q direction
//we want to only work on cells that interface with L+1 (coarse) cell along q
if (!pin.hasChildren(cell, dir)) {

//try to query the cell along this direction (opposite of the population direction) as we do
//in 'normal' streaming
auto neighborCell = pout.helpGetNghIdx(cell, dir);
if (!neighborCell.isActive()) {
//only if we can not do normal streaming, then we may have a coarser neighbor from which
//we can read this pop

//get the uncle direction/offset i.e., the neighbor of the cell's parent
//this direction/offset is wrt to the cell's parent
Neon::int8_3d uncleDir = uncleOffset(cell.mInDataBlockIdx, dir);

auto uncleLoc = pout.getUncle(cell, uncleDir);

auto uncle = pout.uncleVal(cell, uncleDir, q, T(0));
if (uncle.mIsValid) {
pin(cell, q) = uncle.mData;
}
}
}
}
}
};
});
}
167 changes: 167 additions & 0 deletions apps/lbmMultiRes/init.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
#pragma once
#include "Neon/Neon.h"
#include "Neon/domain/mGrid.h"

#include "lattice.h"


template <typename T, int Q>
uint32_t init(Neon::domain::mGrid& grid,
Neon::domain::mGrid::Field<int>& sumStore,
Neon::domain::mGrid::Field<T>& fin,
Neon::domain::mGrid::Field<T>& fout,
Neon::domain::mGrid::Field<CellType>& cellType,
Neon::domain::mGrid::Field<T>& vel,
Neon::domain::mGrid::Field<T>& rho,
const Neon::double_3d ulid)
{
uint32_t* dNumActiveVoxels = nullptr;

if (grid(0).getBackend().runtime() == Neon::Runtime::stream) {
cudaMalloc((void**)&dNumActiveVoxels, sizeof(uint32_t));
cudaMemset(dNumActiveVoxels, 0, sizeof(uint32_t));
} else {
dNumActiveVoxels = (uint32_t*)malloc(sizeof(uint32_t));
}

const Neon::index_3d gridDim = grid.getDimension();

//init fields
for (int level = 0; level < grid.getDescriptor().getDepth(); ++level) {

auto container =
grid.newContainer(
"Init_" + std::to_string(level), level,
[&fin, &fout, &cellType, &vel, &rho, &sumStore, level, gridDim, ulid, dNumActiveVoxels](Neon::set::Loader& loader) {
auto& in = fin.load(loader, level, Neon::MultiResCompute::MAP);
auto& out = fout.load(loader, level, Neon::MultiResCompute::MAP);
auto& type = cellType.load(loader, level, Neon::MultiResCompute::MAP);
auto& u = vel.load(loader, level, Neon::MultiResCompute::MAP);
auto& rh = rho.load(loader, level, Neon::MultiResCompute::MAP);
auto& ss = sumStore.load(loader, level, Neon::MultiResCompute::MAP);

return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::mGrid::Idx& cell) mutable {
//velocity and density
u(cell, 0) = 0;
u(cell, 1) = 0;
u(cell, 2) = 0;
rh(cell, 0) = 0;
type(cell, 0) = CellType::bulk;

for (int q = 0; q < Q; ++q) {
ss(cell, q) = 0;
in(cell, q) = 0;
out(cell, q) = 0;
}

#ifdef NEON_PLACE_CUDA_DEVICE
atomicAdd(dNumActiveVoxels, 1);
#else
#pragma omp atomic
dNumActiveVoxels[0] += 1;
#endif

if (!in.hasChildren(cell)) {
const Neon::index_3d idx = in.getGlobalIndex(cell);

//pop
for (int q = 0; q < Q; ++q) {
T pop_init_val = latticeWeights[q];

if (level == 0) {
if (idx.x == 0 || idx.x == gridDim.x - 1 ||
idx.y == 0 || idx.y == gridDim.y - 1 ||
idx.z == 0 || idx.z == gridDim.z - 1) {
type(cell, 0) = CellType::bounceBack;

if (idx.y == gridDim.y - 1) {
type(cell, 0) = CellType::movingWall;
pop_init_val = 0;
for (int d = 0; d < 3; ++d) {
pop_init_val += latticeVelocity[q][d] * ulid.v[d];
}
pop_init_val *= -6. * latticeWeights[q];
} else {
pop_init_val = 0;
}
}
}

out(cell, q) = pop_init_val;
in(cell, q) = pop_init_val;
}
} else {
in(cell, 0) = 0;
out(cell, 0) = 0;
}
};
});

container.run(0);
}


//init sumStore
for (int level = 0; level < grid.getDescriptor().getDepth() - 1; ++level) {

auto container =
grid.newContainer(
"InitSumStore_" + std::to_string(level), level,
[&sumStore, level, gridDim](Neon::set::Loader& loader) {
auto& ss = sumStore.load(loader, level, Neon::MultiResCompute::STENCIL_UP);

return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::mGrid::Idx& cell) mutable {
if (ss.hasParent(cell)) {

for (int8_t q = 0; q < Q; ++q) {
const Neon::int8_3d qDir = getDir(q);
if (qDir.x == 0 && qDir.y == 0 && qDir.z == 0) {
continue;
}

const Neon::int8_3d uncleDir = uncleOffset(cell.mInDataBlockIdx, qDir);

const auto cn = ss.helpGetNghIdx(cell, uncleDir);

if (!cn.isActive()) {

const auto uncle = ss.getUncle(cell, uncleDir);
if (uncle.isActive()) {

//locate the coarse cell where we should store this cell info
const Neon::int8_3d CsDir = uncleDir - qDir;

const auto cs = ss.getUncle(cell, CsDir);

if (cs.isActive()) {

#ifdef NEON_PLACE_CUDA_DEVICE
atomicAdd(&ss.uncleVal(cell, CsDir, q), int(1));
#else
#pragma omp atomic
ss.uncleVal(cell, CsDir, q) += 1;
#endif
}
}
}
}
}
};
});

container.run(0);
}


grid.getBackend().syncAll();

uint32_t hNumActiveVoxels = 0;
if (grid(0).getBackend().runtime() == Neon::Runtime::stream) {
cudaMemcpy(&hNumActiveVoxels, dNumActiveVoxels, sizeof(uint32_t), cudaMemcpyDeviceToHost);
cudaFree(dNumActiveVoxels);
} else {
hNumActiveVoxels = dNumActiveVoxels[0];
}

return hNumActiveVoxels;
}
Loading