Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dgridSoA #41

Closed
wants to merge 29 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
aacf68e
Adding exception for arrayOfStructure option for bGrid.
massimim Jun 9, 2023
18f2d72
Some documentation to bGrid.
massimim Jun 15, 2023
b81c423
bGrid: API documentation and refactoring of the template API.
massimim Jun 15, 2023
d82e985
Cleaning up naming for the BlockViewGrid
massimim Jun 15, 2023
9e29f8e
bGrid - introducing the concept of BlockView and refactoring the bitm…
massimim Jun 15, 2023
cdcdc0d
bGrid - fixing multi-GPU
massimim Jun 16, 2023
54b508d
Merge branch 'bGrid' into bGrid-newTemaplateAPI
massimim Jun 16, 2023
ea82dfc
Adding scripts
massimim Jun 16, 2023
cc536e8
Merge branch 'bGrid-newTemaplateAPI' into bGrid
massimim Jun 16, 2023
55af708
Benchmarks and scripts
massimim Jun 19, 2023
90a4ba9
Code documentation
massimim Jun 19, 2023
019db4d
Fixing grid spacing in bGrid.
massimim Jun 19, 2023
1790087
Merge remote-tracking branch 'origin/develop' into bGrid
massimim Jun 22, 2023
588b746
WIP
massimim Jun 22, 2023
9a87088
Fixing report filename for benchmarks scripts
massimim Jun 22, 2023
1168cc2
Adding halo option.
massimim Jun 23, 2023
0bdce94
Adding halo option.
massimim Jun 23, 2023
3dc808e
WIP
massimim Jun 23, 2023
ceab2a6
domain_neighbour_globalIdx for dGridSoA
massimim Jun 26, 2023
8197006
Merge branch 'lattice-benchmark-lbm' into dGridSOA
massimim Jun 26, 2023
13377a4
Testing block sizes on bGrid
massimim Jun 27, 2023
3a36f0c
Adding dGridSoA to the stencil tests
massimim Jun 28, 2023
a49b27a
WIP
massimim Jun 29, 2023
fde014d
Extending unit test for stencil to dGridSoA
massimim Jun 29, 2023
b0e74e6
WIP
massimim Jun 29, 2023
1dd5abc
WIP
massimim Jun 30, 2023
81b3526
WIP
massimim Jun 30, 2023
2a2caf7
WIP
massimim Jun 30, 2023
b63b90b
WIP
massimim Jun 30, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 50 additions & 40 deletions benchmarks/lbm-lid-driven-cavity-flow/lbm-lid-driven-cavity-flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
GRID_LIST = "dGrid bGrid eGrid".split()
STORAGE_FP_LIST = "double float".split()
COMPUTE_FP_LIST = "double float".split()
OCC_LIST = "nOCC".split()
OCC_LIST = "nOCC sOCC".split()
HU_LIST = "huGrid huLattice".split()
WARM_UP_ITER = 10
MAX_ITER = 100
REPETITIONS = 5
Expand Down Expand Up @@ -38,60 +39,69 @@
for COMPUTE_FP in COMPUTE_FP_LIST:
for DEVICE_SET in DEVICE_SET_LIST:
for GRID in GRID_LIST:
if STORAGE_FP == 'double' and COMPUTE_FP == 'float':
continue
for HU in HU_LIST:
if STORAGE_FP == 'double' and COMPUTE_FP == 'float':
continue

counter += 1
counter += 1
return counter


SAMPLES = countAll()
counter = 0
command = './lbm-lid-driven-cavity-flow'
# command = 'echo'
with open(command + '.log', 'w') as fp:
for DEVICE_TYPE in DEVICE_TYPE_LIST:
DEVICE_SET_LIST = [DEVICE_ID_LIST[0]]
if DEVICE_TYPE == 'gpu':
for DEVICE in DEVICE_ID_LIST[1:]:
DEVICE_SET_LIST.append(DEVICE_SET_LIST[-1] + ' ' + DEVICE)
for OCC in OCC_LIST:
for DOMAIN_SIZE in DOMAIN_SIZE_LIST:
for STORAGE_FP in STORAGE_FP_LIST:
for COMPUTE_FP in COMPUTE_FP_LIST:
for DEVICE_SET in DEVICE_SET_LIST:
for DEVICE_SET in DEVICE_SET_LIST:
for OCC in OCC_LIST:
for DOMAIN_SIZE in DOMAIN_SIZE_LIST:
for STORAGE_FP in STORAGE_FP_LIST:
for COMPUTE_FP in COMPUTE_FP_LIST:
for GRID in GRID_LIST:
if STORAGE_FP == 'double' and COMPUTE_FP == 'float':
continue
for HU in HU_LIST:

if STORAGE_FP == 'double' and COMPUTE_FP == 'float':
continue

parameters = []
parameters.append('--deviceType ' + DEVICE_TYPE)
parameters.append('--deviceIds ' + DEVICE_SET)
parameters.append('--grid ' + GRID)
parameters.append('--domain-size ' + DOMAIN_SIZE)
parameters.append('--warmup-iter ' + str(WARM_UP_ITER))
parameters.append('--repetitions ' + str(REPETITIONS))
parameters.append('--max-iter ' + str(MAX_ITER))
parameters.append(
'--report-filename ' + 'lbm-lid-driven-cavity-flow___' +
DEVICE_TYPE + '_' + DOMAIN_SIZE + '_' +
STORAGE_FP + '_' + COMPUTE_FP + '_' +
DEVICE_SET.replace(' ', '_') + '_' + OCC)
parameters.append('--computeFP ' + COMPUTE_FP)
parameters.append('--storageFP ' + STORAGE_FP)
parameters.append('--benchmark')
parameters.append('--' + OCC)
parameters = []
parameters.append('--deviceType ' + DEVICE_TYPE)
parameters.append('--deviceIds ' + DEVICE_SET)
parameters.append('--grid ' + GRID)
parameters.append('--domain-size ' + DOMAIN_SIZE)
parameters.append('--warmup-iter ' + str(WARM_UP_ITER))
parameters.append('--repetitions ' + str(REPETITIONS))
parameters.append('--max-iter ' + str(MAX_ITER))
parameters.append(
'--report-filename ' + 'lbm-lid-driven-cavity-flow___' +
DEVICE_TYPE + '_' +
DEVICE_SET.replace(' ', '_') + '-' +
GRID + '_' +
DOMAIN_SIZE + '-' +
STORAGE_FP + '-' + COMPUTE_FP + '-' +
OCC)
parameters.append('--computeFP ' + COMPUTE_FP)
parameters.append('--storageFP ' + STORAGE_FP)
parameters.append('--benchmark')
parameters.append('--' + OCC)
parameters.append('--' + HU)

commandList = []
commandList.append(command)
for el in parameters:
for s in el.split():
commandList.append(s)
commandList = []
commandList.append(command)
for el in parameters:
for s in el.split():
commandList.append(s)

fp.write("\n-------------------------------------------\n")
fp.write(' '.join(commandList))
fp.write("\n-------------------------------------------\n")
fp.flush()
subprocess.run(commandList, text=True, stdout=fp)
fp.write("\n-------------------------------------------\n")
fp.write(' '.join(commandList))
fp.write("\n-------------------------------------------\n")
fp.flush()
print(' '.join(commandList))
subprocess.run(commandList, text=True, stdout=fp)

Check notice on line 104 in benchmarks/lbm-lid-driven-cavity-flow/lbm-lid-driven-cavity-flow.py

View check run for this annotation

Autodesk Chorus / security/bandit

B603: subprocess_without_shell_equals_true

subprocess call - check for execution of untrusted input. secure coding id: PYTH-INJC-30.

counter += 1
printProgressBar(counter * 100.0 / SAMPLES, 'Progress')
counter += 1
printProgressBar(counter * 100.0 / SAMPLES, 'Progress')
125 changes: 64 additions & 61 deletions benchmarks/lbm-lid-driven-cavity-flow/src/LbmTools.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,23 +31,22 @@ struct LbmContainers<D3Q19Template<typename PopulationField::Type, LbmComputeTyp
using Rho = typename Grid::template Field<LbmStoreType, 1>;
using U = typename Grid::template Field<LbmStoreType, 3>;

#define LOADPOP(GOx, GOy, GOz, GOid, BKx, BKy, BKz, BKid) \
{ \
{ /*GO*/ \
if (wallBitFlag & (uint32_t(1) << GOid)) { \
/*std::cout << "cell " << i.mLocation << " direction " << GOid << " opposite " << BKid << std::endl; */ \
popIn[GOid] = fin(i, BKid); \
} else { \
popIn[GOid] = fin.template nghVal<BKx, BKy, BKz>(i, GOid, 0.0).value; \
} \
} \
{ /*BK*/ \
if (wallBitFlag & (uint32_t(1) << BKid)) { \
popIn[BKid] = fin(i, GOid); \
} else { \
popIn[BKid] = fin.template nghVal<GOx, GOy, GOz>(i, BKid, 0.0).value; \
} \
} \
#define LOADPOP(GOx, GOy, GOz, GOid, BKx, BKy, BKz, BKid) \
{ \
{ /*GO*/ \
if (wallBitFlag & (uint32_t(1) << GOid)) { \
popIn[GOid] = fin(i, BKid); \
} else { \
popIn[GOid] = fin.template nghVal<BKx, BKy, BKz>(i, GOid, 0.0).value; \
} \
} \
{ /*BK*/ \
if (wallBitFlag & (uint32_t(1) << BKid)) { \
popIn[BKid] = fin(i, GOid); \
} else { \
popIn[BKid] = fin.template nghVal<GOx, GOy, GOz>(i, BKid, 0.0).value; \
} \
} \
}
static inline NEON_CUDA_HOST_DEVICE auto
loadPopulation(Idx const& i,
Expand Down Expand Up @@ -101,8 +100,6 @@ struct LbmContainers<D3Q19Template<typename PopulationField::Type, LbmComputeTyp
typename PopulationField::Partition const& fin,
NEON_OUT LbmStoreType popIn[19])
{
// #pragma omp critical
// {
#if 0
using TopologyByDirection = std::tuple<Neon::int32_3d, int, Neon::int32_3d, int>;
constexpr std::array<TopologyByDirection, 9> stencil{
Expand Down Expand Up @@ -160,7 +157,6 @@ struct LbmContainers<D3Q19Template<typename PopulationField::Type, LbmComputeTyp
PULL_STREAM(0, -1, -1, /* GOid */ 7, /* --- */ 0, 1, 1, /* BKid */ 17);
PULL_STREAM(0, -1, 1, /* GOid */ 8, /* --- */ 0, 1, -1, /* BKid */ 18);

// }
// Treat the case of the center (c[k] = {0, 0, 0,}).
{
popIn[Lattice::centerDirection] = fin(gidx, Lattice::centerDirection);
Expand Down Expand Up @@ -213,45 +209,52 @@ struct LbmContainers<D3Q19Template<typename PopulationField::Type, LbmComputeTyp
const LbmComputeType ck_u07 = u[1] + u[2];
const LbmComputeType ck_u08 = u[1] - u[2];

const LbmComputeType eq_00 = rho * (1. / 18.) * (1. - 3. * u[0] + 4.5 * u[0] * u[0] - usqr);
const LbmComputeType eq_01 = rho * (1. / 18.) * (1. - 3. * u[1] + 4.5 * u[1] * u[1] - usqr);
const LbmComputeType eq_02 = rho * (1. / 18.) * (1. - 3. * u[2] + 4.5 * u[2] * u[2] - usqr);
const LbmComputeType eq_03 = rho * (1. / 36.) * (1. - 3. * ck_u03 + 4.5 * ck_u03 * ck_u03 - usqr);
const LbmComputeType eq_04 = rho * (1. / 36.) * (1. - 3. * ck_u04 + 4.5 * ck_u04 * ck_u04 - usqr);
const LbmComputeType eq_05 = rho * (1. / 36.) * (1. - 3. * ck_u05 + 4.5 * ck_u05 * ck_u05 - usqr);
const LbmComputeType eq_06 = rho * (1. / 36.) * (1. - 3. * ck_u06 + 4.5 * ck_u06 * ck_u06 - usqr);
const LbmComputeType eq_07 = rho * (1. / 36.) * (1. - 3. * ck_u07 + 4.5 * ck_u07 * ck_u07 - usqr);
const LbmComputeType eq_08 = rho * (1. / 36.) * (1. - 3. * ck_u08 + 4.5 * ck_u08 * ck_u08 - usqr);

const LbmComputeType eqopp_00 = eq_00 + rho * (1. / 18.) * 6. * u[0];
const LbmComputeType eqopp_01 = eq_01 + rho * (1. / 18.) * 6. * u[1];
const LbmComputeType eqopp_02 = eq_02 + rho * (1. / 18.) * 6. * u[2];
const LbmComputeType eqopp_03 = eq_03 + rho * (1. / 36.) * 6. * ck_u03;
const LbmComputeType eqopp_04 = eq_04 + rho * (1. / 36.) * 6. * ck_u04;
const LbmComputeType eqopp_05 = eq_05 + rho * (1. / 36.) * 6. * ck_u05;
const LbmComputeType eqopp_06 = eq_06 + rho * (1. / 36.) * 6. * ck_u06;
const LbmComputeType eqopp_07 = eq_07 + rho * (1. / 36.) * 6. * ck_u07;
const LbmComputeType eqopp_08 = eq_08 + rho * (1. / 36.) * 6. * ck_u08;

const LbmComputeType pop_out_00 = (1. - omega) * static_cast<LbmComputeType>(pop[0]) + omega * eq_00;
const LbmComputeType pop_out_01 = (1. - omega) * static_cast<LbmComputeType>(pop[1]) + omega * eq_01;
const LbmComputeType pop_out_02 = (1. - omega) * static_cast<LbmComputeType>(pop[2]) + omega * eq_02;
const LbmComputeType pop_out_03 = (1. - omega) * static_cast<LbmComputeType>(pop[3]) + omega * eq_03;
const LbmComputeType pop_out_04 = (1. - omega) * static_cast<LbmComputeType>(pop[4]) + omega * eq_04;
const LbmComputeType pop_out_05 = (1. - omega) * static_cast<LbmComputeType>(pop[5]) + omega * eq_05;
const LbmComputeType pop_out_06 = (1. - omega) * static_cast<LbmComputeType>(pop[6]) + omega * eq_06;
const LbmComputeType pop_out_07 = (1. - omega) * static_cast<LbmComputeType>(pop[7]) + omega * eq_07;
const LbmComputeType pop_out_08 = (1. - omega) * static_cast<LbmComputeType>(pop[8]) + omega * eq_08;

const LbmComputeType pop_out_opp_00 = (1. - omega) * static_cast<LbmComputeType>(pop[10]) + omega * eqopp_00;
const LbmComputeType pop_out_opp_01 = (1. - omega) * static_cast<LbmComputeType>(pop[11]) + omega * eqopp_01;
const LbmComputeType pop_out_opp_02 = (1. - omega) * static_cast<LbmComputeType>(pop[12]) + omega * eqopp_02;
const LbmComputeType pop_out_opp_03 = (1. - omega) * static_cast<LbmComputeType>(pop[13]) + omega * eqopp_03;
const LbmComputeType pop_out_opp_04 = (1. - omega) * static_cast<LbmComputeType>(pop[14]) + omega * eqopp_04;
const LbmComputeType pop_out_opp_05 = (1. - omega) * static_cast<LbmComputeType>(pop[15]) + omega * eqopp_05;
const LbmComputeType pop_out_opp_06 = (1. - omega) * static_cast<LbmComputeType>(pop[16]) + omega * eqopp_06;
const LbmComputeType pop_out_opp_07 = (1. - omega) * static_cast<LbmComputeType>(pop[17]) + omega * eqopp_07;
const LbmComputeType pop_out_opp_08 = (1. - omega) * static_cast<LbmComputeType>(pop[18]) + omega * eqopp_08;
constexpr LbmComputeType c1over18 = 1. / 18.;
constexpr LbmComputeType c1over36 = 1. / 36.;
constexpr LbmComputeType c4dot5 = 4.5;
constexpr LbmComputeType c3 = 3.;
constexpr LbmComputeType c1 = 1.;
constexpr LbmComputeType c6 = 6.;

const LbmComputeType eq_00 = rho * c1over18 * (c1 - c6 * u[0] + c4dot5 * u[0] * u[0] - usqr);
const LbmComputeType eq_01 = rho * c1over18 * (c1 - c6 * u[1] + c4dot5 * u[1] * u[1] - usqr);
const LbmComputeType eq_02 = rho * c1over18 * (c1 - c6 * u[2] + c4dot5 * u[2] * u[2] - usqr);
const LbmComputeType eq_03 = rho * c1over36 * (c1 - c6 * ck_u03 + c4dot5 * ck_u03 * ck_u03 - usqr);
const LbmComputeType eq_04 = rho * c1over36 * (c1 - c6 * ck_u04 + c4dot5 * ck_u04 * ck_u04 - usqr);
const LbmComputeType eq_05 = rho * c1over36 * (c1 - c6 * ck_u05 + c4dot5 * ck_u05 * ck_u05 - usqr);
const LbmComputeType eq_06 = rho * c1over36 * (c1 - c6 * ck_u06 + c4dot5 * ck_u06 * ck_u06 - usqr);
const LbmComputeType eq_07 = rho * c1over36 * (c1 - c6 * ck_u07 + c4dot5 * ck_u07 * ck_u07 - usqr);
const LbmComputeType eq_08 = rho * c1over36 * (c1 - c6 * ck_u08 + c4dot5 * ck_u08 * ck_u08 - usqr);

const LbmComputeType eqopp_00 = eq_00 + rho * c1over18 * c6 * u[0];
const LbmComputeType eqopp_01 = eq_01 + rho * c1over18 * c6 * u[1];
const LbmComputeType eqopp_02 = eq_02 + rho * c1over18 * c6 * u[2];
const LbmComputeType eqopp_03 = eq_03 + rho * c1over36 * c6 * ck_u03;
const LbmComputeType eqopp_04 = eq_04 + rho * c1over36 * c6 * ck_u04;
const LbmComputeType eqopp_05 = eq_05 + rho * c1over36 * c6 * ck_u05;
const LbmComputeType eqopp_06 = eq_06 + rho * c1over36 * c6 * ck_u06;
const LbmComputeType eqopp_07 = eq_07 + rho * c1over36 * c6 * ck_u07;
const LbmComputeType eqopp_08 = eq_08 + rho * c1over36 * c6 * ck_u08;

const LbmComputeType pop_out_00 = (c1 - omega) * static_cast<LbmComputeType>(pop[0]) + omega * eq_00;
const LbmComputeType pop_out_01 = (c1 - omega) * static_cast<LbmComputeType>(pop[1]) + omega * eq_01;
const LbmComputeType pop_out_02 = (c1 - omega) * static_cast<LbmComputeType>(pop[2]) + omega * eq_02;
const LbmComputeType pop_out_03 = (c1 - omega) * static_cast<LbmComputeType>(pop[3]) + omega * eq_03;
const LbmComputeType pop_out_04 = (c1 - omega) * static_cast<LbmComputeType>(pop[4]) + omega * eq_04;
const LbmComputeType pop_out_05 = (c1 - omega) * static_cast<LbmComputeType>(pop[5]) + omega * eq_05;
const LbmComputeType pop_out_06 = (c1 - omega) * static_cast<LbmComputeType>(pop[6]) + omega * eq_06;
const LbmComputeType pop_out_07 = (c1 - omega) * static_cast<LbmComputeType>(pop[7]) + omega * eq_07;
const LbmComputeType pop_out_08 = (c1 - omega) * static_cast<LbmComputeType>(pop[8]) + omega * eq_08;

const LbmComputeType pop_out_opp_00 = (c1 - omega) * static_cast<LbmComputeType>(pop[10]) + omega * eqopp_00;
const LbmComputeType pop_out_opp_01 = (c1 - omega) * static_cast<LbmComputeType>(pop[11]) + omega * eqopp_01;
const LbmComputeType pop_out_opp_02 = (c1 - omega) * static_cast<LbmComputeType>(pop[12]) + omega * eqopp_02;
const LbmComputeType pop_out_opp_03 = (c1 - omega) * static_cast<LbmComputeType>(pop[13]) + omega * eqopp_03;
const LbmComputeType pop_out_opp_04 = (c1 - omega) * static_cast<LbmComputeType>(pop[14]) + omega * eqopp_04;
const LbmComputeType pop_out_opp_05 = (c1 - omega) * static_cast<LbmComputeType>(pop[15]) + omega * eqopp_05;
const LbmComputeType pop_out_opp_06 = (c1 - omega) * static_cast<LbmComputeType>(pop[16]) + omega * eqopp_06;
const LbmComputeType pop_out_opp_07 = (c1 - omega) * static_cast<LbmComputeType>(pop[17]) + omega * eqopp_07;
const LbmComputeType pop_out_opp_08 = (c1 - omega) * static_cast<LbmComputeType>(pop[18]) + omega * eqopp_08;


#define COMPUTE_GO_AND_BACK(GOid, BKid) \
Expand All @@ -273,8 +276,8 @@ struct LbmContainers<D3Q19Template<typename PopulationField::Type, LbmComputeTyp
#undef COMPUTE_GO_AND_BACK

{
const LbmComputeType eq_09 = rho * (1. / 3.) * (1. - usqr);
const LbmComputeType pop_out_09 = (1. - omega) *
const LbmComputeType eq_09 = rho * (c1 / c3) * (c1 - usqr);
const LbmComputeType pop_out_09 = (c1 - omega) *
static_cast<LbmComputeType>(pop[Lattice::centerDirection]) +
omega * eq_09;
fOut(i, Lattice::centerDirection) = static_cast<LbmStoreType>(pop_out_09);
Expand Down
Loading
Loading