Autodesk · Ahdhn · Jul 6, 2023 · Mar 20, 2023 · Mar 22, 2023 · Mar 22, 2023
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -80,7 +80,7 @@ add_subdirectory("libNeonDomain")
 add_subdirectory("libNeonSkeleton")
 #add_subdirectory("libNeonSolver")
 #add_subdirectory("tutorials")
-#add_subdirectory("apps")
+add_subdirectory("apps")
 add_subdirectory("benchmarks")
 
 

diff --git a/VerifyNeonPRWindows.bat b/VerifyNeonPRWindows.bat
@@ -11,7 +11,7 @@ git fetch origin refs/pull/%PR%/head:pull_%PR%
 git checkout  pull_%PR%
 mkdir build
 cd build
-cmake ..
+cmake -G "Visual Studio 16 2019" ..
 cmake --build . --config Release -j 10
 set ctest_filename=CTestNeonWindowsReport.log
 ctest --no-compress-output --output-on-failure -T Test --build-config Release --output-log %ctest_filename%

diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt
@@ -1,7 +1,7 @@
 cmake_minimum_required(VERSION 3.19 FATAL_ERROR)
 
-add_subdirectory("fractal")
-add_subdirectory("lbm")
-add_subdirectory("gameOfLife")
-add_subdirectory("poisson")
+#add_subdirectory("fractal")
+#add_subdirectory("lbm")
+#add_subdirectory("gameOfLife")
+#add_subdirectory("poisson")
 add_subdirectory("lbmMultiRes")
diff --git a/apps/lbmMultiRes/CMakeLists.txt b/apps/lbmMultiRes/CMakeLists.txt
@@ -1,7 +1,7 @@
 cmake_minimum_required(VERSION 3.19 FATAL_ERROR)
 
 set (APP_NAME app-lbmMultiRes)
-file(GLOB_RECURSE SrcFiles lbmMultiRes.cu)
+file(GLOB_RECURSE SrcFiles lbmMultiRes.cu lattice.h init.h postProcess.h util.h coalescence.h collide.h explosion.h stream.h store.h verify.h)
 
 add_executable(${APP_NAME} ${SrcFiles})
 

diff --git a/apps/lbmMultiRes/coalescence.h b/apps/lbmMultiRes/coalescence.h
@@ -0,0 +1,51 @@
+#pragma once
+#include "lattice.h"
+
+template <typename T, int Q>
+inline Neon::set::Container coalescence(Neon::domain::mGrid&                   grid,
+                                        const bool                             fineInitStore,
+                                        const int                              level,
+                                        const Neon::domain::mGrid::Field<int>& sumStore,
+                                        const Neon::domain::mGrid::Field<T>&   fout,
+                                        Neon::domain::mGrid::Field<T>&         fin)
+{
+    // Initiated by the coarse level (hence "pull"), this function simply read the missing population
+    // across the interface between coarse<->fine boundary by reading the population prepare during the store()
+
+    return grid.newContainer(
+        "Coalescence_" + std::to_string(level), level,
+        [&, level, fineInitStore](Neon::set::Loader& loader) {
+            const auto& pout = fout.load(loader, level, Neon::MultiResCompute::STENCIL);
+            const auto& ss = sumStore.load(loader, level, Neon::MultiResCompute::STENCIL);
+            auto&       pin = fin.load(loader, level, Neon::MultiResCompute::MAP);
+
+            return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::mGrid::Idx& cell) mutable {
+                //If this cell has children i.e., it is been refined, than we should not work on it
+                //because this cell is only there to allow query and not to operate on
+                const int refFactor = pout.getRefFactor(level);
+                if (!pin.hasChildren(cell)) {
+
+                    for (int q = 0; q < Q; ++q) {
+                        const Neon::int8_3d dir = -getDir(q);
+                        if (dir.x == 0 && dir.y == 0 && dir.z == 0) {
+                            continue;
+                        }
+                        //if we have a neighbor at the same level that has been refined, then cell is on
+                        //the interface and this is where we should do the coalescence
+                        if (pin.hasChildren(cell, dir)) {
+                            auto neighbor = pout.getNghData(cell, dir, q);
+                            if (neighbor.mIsValid) {
+                                if (fineInitStore) {
+                                    auto ssVal = ss.getNghData(cell, dir, q);
+                                    assert(ssVal.mData != 0);
+                                    pin(cell, q) = neighbor.mData / static_cast<T>(ssVal.mData * refFactor);
+                                } else {
+                                    pin(cell, q) = neighbor.mData / static_cast<T>(refFactor);
+                                }
+                            }
+                        }
+                    }
+                }
+            };
+        });
+}
diff --git a/apps/lbmMultiRes/collide.h b/apps/lbmMultiRes/collide.h
diff --git a/apps/lbmMultiRes/explosion.h b/apps/lbmMultiRes/explosion.h
@@ -0,0 +1,57 @@
+#pragma once
+template <typename T, int Q>
+inline Neon::set::Container explosion(Neon::domain::mGrid&                 grid,
+                                      int                                  level,
+                                      const Neon::domain::mGrid::Field<T>& fout,
+                                      Neon::domain::mGrid::Field<T>&       fin)
+{
+    // Initiated by the fine level (hence "pull"), this function performs a coarse (level+1) to
+    // fine (level) communication or "explosion" by simply distributing copies of coarse grid onto the fine grid.
+    // In other words, this function updates the "halo" cells of the fine level by making copies of the coarse cell
+    // values.
+
+
+    return grid.newContainer(
+        "Explosion_" + std::to_string(level), level,
+        [&, level](Neon::set::Loader& loader) {
+            const auto& pout = fout.load(loader, level, Neon::MultiResCompute::STENCIL_UP);
+            auto        pin = fin.load(loader, level, Neon::MultiResCompute::MAP);
+
+            return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::mGrid::Idx& cell) mutable {
+                //If this cell has children i.e., it is been refined, then we should not work on it
+                //because this cell is only there to allow query and not to operate on
+                if (!pin.hasChildren(cell)) {
+                    for (int8_t q = 0; q < Q; ++q) {
+                        const Neon::int8_3d dir = -getDir(q);
+                        if (dir.x == 0 && dir.y == 0 && dir.z == 0) {
+                            continue;
+                        }
+
+                        //if the neighbor cell has children, then this 'cell' is interfacing with L-1 (fine) along q direction
+                        //we want to only work on cells that interface with L+1 (coarse) cell along q
+                        if (!pin.hasChildren(cell, dir)) {
+
+                            //try to query the cell along this direction (opposite of the population direction) as we do
+                            //in 'normal' streaming
+                            auto neighborCell = pout.helpGetNghIdx(cell, dir);
+                            if (!neighborCell.isActive()) {
+                                //only if we can not do normal streaming, then we may have a coarser neighbor from which
+                                //we can read this pop
+
+                                //get the uncle direction/offset i.e., the neighbor of the cell's parent
+                                //this direction/offset is wrt to the cell's parent
+                                Neon::int8_3d uncleDir = uncleOffset(cell.mInDataBlockIdx, dir);
+
+                                auto uncleLoc = pout.getUncle(cell, uncleDir);
+
+                                auto uncle = pout.uncleVal(cell, uncleDir, q, T(0));
+                                if (uncle.mIsValid) {
+                                    pin(cell, q) = uncle.mData;
+                                }
+                            }
+                        }
+                    }
+                }
+            };
+        });
+}
diff --git a/apps/lbmMultiRes/init.h b/apps/lbmMultiRes/init.h
@@ -0,0 +1,167 @@
+#pragma once
+#include "Neon/Neon.h"
+#include "Neon/domain/mGrid.h"
+
+#include "lattice.h"
+
+
+template <typename T, int Q>
+uint32_t init(Neon::domain::mGrid&                  grid,
+              Neon::domain::mGrid::Field<int>&      sumStore,
+              Neon::domain::mGrid::Field<T>&        fin,
+              Neon::domain::mGrid::Field<T>&        fout,
+              Neon::domain::mGrid::Field<CellType>& cellType,
+              Neon::domain::mGrid::Field<T>&        vel,
+              Neon::domain::mGrid::Field<T>&        rho,
+              const Neon::double_3d                 ulid)
+{
+    uint32_t* dNumActiveVoxels = nullptr;
+
+    if (grid(0).getBackend().runtime() == Neon::Runtime::stream) {
+        cudaMalloc((void**)&dNumActiveVoxels, sizeof(uint32_t));
+        cudaMemset(dNumActiveVoxels, 0, sizeof(uint32_t));
+    } else {
+        dNumActiveVoxels = (uint32_t*)malloc(sizeof(uint32_t));
+    }
+
+    const Neon::index_3d gridDim = grid.getDimension();
+
+    //init fields
+    for (int level = 0; level < grid.getDescriptor().getDepth(); ++level) {
+
+        auto container =
+            grid.newContainer(
+                "Init_" + std::to_string(level), level,
+                [&fin, &fout, &cellType, &vel, &rho, &sumStore, level, gridDim, ulid, dNumActiveVoxels](Neon::set::Loader& loader) {
+                    auto& in = fin.load(loader, level, Neon::MultiResCompute::MAP);
+                    auto& out = fout.load(loader, level, Neon::MultiResCompute::MAP);
+                    auto& type = cellType.load(loader, level, Neon::MultiResCompute::MAP);
+                    auto& u = vel.load(loader, level, Neon::MultiResCompute::MAP);
+                    auto& rh = rho.load(loader, level, Neon::MultiResCompute::MAP);
+                    auto& ss = sumStore.load(loader, level, Neon::MultiResCompute::MAP);
+
+                    return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::mGrid::Idx& cell) mutable {
+                        //velocity and density
+                        u(cell, 0) = 0;
+                        u(cell, 1) = 0;
+                        u(cell, 2) = 0;
+                        rh(cell, 0) = 0;
+                        type(cell, 0) = CellType::bulk;
+
+                        for (int q = 0; q < Q; ++q) {
+                            ss(cell, q) = 0;
+                            in(cell, q) = 0;
+                            out(cell, q) = 0;
+                        }
+
+#ifdef NEON_PLACE_CUDA_DEVICE
+                        atomicAdd(dNumActiveVoxels, 1);
+#else
+#pragma omp atomic
+                        dNumActiveVoxels[0] += 1;
+#endif
+
+                        if (!in.hasChildren(cell)) {
+                            const Neon::index_3d idx = in.getGlobalIndex(cell);
+
+                            //pop
+                            for (int q = 0; q < Q; ++q) {
+                                T pop_init_val = latticeWeights[q];
+
+                                if (level == 0) {
+                                    if (idx.x == 0 || idx.x == gridDim.x - 1 ||
+                                        idx.y == 0 || idx.y == gridDim.y - 1 ||
+                                        idx.z == 0 || idx.z == gridDim.z - 1) {
+                                        type(cell, 0) = CellType::bounceBack;
+
+                                        if (idx.y == gridDim.y - 1) {
+                                            type(cell, 0) = CellType::movingWall;
+                                            pop_init_val = 0;
+                                            for (int d = 0; d < 3; ++d) {
+                                                pop_init_val += latticeVelocity[q][d] * ulid.v[d];
+                                            }
+                                            pop_init_val *= -6. * latticeWeights[q];
+                                        } else {
+                                            pop_init_val = 0;
+                                        }
+                                    }
+                                }
+
+                                out(cell, q) = pop_init_val;
+                                in(cell, q) = pop_init_val;
+                            }
+                        } else {
+                            in(cell, 0) = 0;
+                            out(cell, 0) = 0;
+                        }
+                    };
+                });
+
+        container.run(0);
+    }
+
+
+    //init sumStore
+    for (int level = 0; level < grid.getDescriptor().getDepth() - 1; ++level) {
+
+        auto container =
+            grid.newContainer(
+                "InitSumStore_" + std::to_string(level), level,
+                [&sumStore, level, gridDim](Neon::set::Loader& loader) {
+                    auto& ss = sumStore.load(loader, level, Neon::MultiResCompute::STENCIL_UP);
+
+                    return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::mGrid::Idx& cell) mutable {
+                        if (ss.hasParent(cell)) {
+
+                            for (int8_t q = 0; q < Q; ++q) {
+                                const Neon::int8_3d qDir = getDir(q);
+                                if (qDir.x == 0 && qDir.y == 0 && qDir.z == 0) {
+                                    continue;
+                                }
+
+                                const Neon::int8_3d uncleDir = uncleOffset(cell.mInDataBlockIdx, qDir);
+
+                                const auto cn = ss.helpGetNghIdx(cell, uncleDir);
+
+                                if (!cn.isActive()) {
+
+                                    const auto uncle = ss.getUncle(cell, uncleDir);
+                                    if (uncle.isActive()) {
+
+                                        //locate the coarse cell where we should store this cell info
+                                        const Neon::int8_3d CsDir = uncleDir - qDir;
+
+                                        const auto cs = ss.getUncle(cell, CsDir);
+
+                                        if (cs.isActive()) {
+
+#ifdef NEON_PLACE_CUDA_DEVICE
+                                            atomicAdd(&ss.uncleVal(cell, CsDir, q), int(1));
+#else
+#pragma omp atomic
+                                            ss.uncleVal(cell, CsDir, q) += 1;
+#endif
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    };
+                });
+
+        container.run(0);
+    }
+
+
+    grid.getBackend().syncAll();
+
+    uint32_t hNumActiveVoxels = 0;
+    if (grid(0).getBackend().runtime() == Neon::Runtime::stream) {
+        cudaMemcpy(&hNumActiveVoxels, dNumActiveVoxels, sizeof(uint32_t), cudaMemcpyDeviceToHost);
+        cudaFree(dNumActiveVoxels);
+    } else {
+        hNumActiveVoxels = dNumActiveVoxels[0];
+    }
+
+    return hNumActiveVoxels;
+}