Skip to content

Commit

Permalink
LB: Re-generate kernels
Browse files Browse the repository at this point in the history
  • Loading branch information
RudolfWeeber committed Oct 14, 2024
1 parent 92021b4 commit 334b707
Show file tree
Hide file tree
Showing 58 changed files with 4,546 additions and 29,880 deletions.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class CollideSweepDoublePrecisionLeesEdwards {
BlockDataID pdfsID_, double grid_size,
double omega_shear, double v_s)
: forceID(forceID_), pdfsID(pdfsID_), grid_size_(grid_size),
omega_shear_(omega_shear), v_s_(v_s){};
omega_shear_(omega_shear), v_s_(v_s) {};

void run(IBlock *block);

Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class CollideSweepDoublePrecisionLeesEdwardsAVX {
double grid_size,
double omega_shear, double v_s)
: forceID(forceID_), pdfsID(pdfsID_), grid_size_(grid_size),
omega_shear_(omega_shear), v_s_(v_s){};
omega_shear_(omega_shear), v_s_(v_s) {};

void run(IBlock *block);

Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class CollideSweepDoublePrecisionLeesEdwardsCUDA {
double grid_size,
double omega_shear, double v_s)
: forceID(forceID_), pdfsID(pdfsID_), grid_size_(grid_size),
omega_shear_(omega_shear), v_s_(v_s){};
omega_shear_(omega_shear), v_s_(v_s) {};

void run(IBlock *block, gpuStream_t stream = nullptr);

Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class CollideSweepDoublePrecisionThermalized {
: forceID(forceID_), pdfsID(pdfsID_), kT_(kT), omega_bulk_(omega_bulk),
omega_even_(omega_even), omega_odd_(omega_odd),
omega_shear_(omega_shear), seed_(seed), time_step_(time_step),
configured_(false){};
configured_(false) {};

void run(IBlock *block);

Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class CollideSweepDoublePrecisionThermalizedAVX {
: forceID(forceID_), pdfsID(pdfsID_), kT_(kT), omega_bulk_(omega_bulk),
omega_even_(omega_even), omega_odd_(omega_odd),
omega_shear_(omega_shear), seed_(seed), time_step_(time_step),
configured_(false){};
configured_(false) {};

void run(IBlock *block);

Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class CollideSweepDoublePrecisionThermalizedCUDA {
: forceID(forceID_), pdfsID(pdfsID_), kT_(kT), omega_bulk_(omega_bulk),
omega_even_(omega_even), omega_odd_(omega_odd),
omega_shear_(omega_shear), seed_(seed), time_step_(time_step),
configured_(false){};
configured_(false) {};

void run(IBlock *block, gpuStream_t stream = nullptr);

Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class CollideSweepSinglePrecisionLeesEdwards {
BlockDataID pdfsID_, float grid_size,
float omega_shear, float v_s)
: forceID(forceID_), pdfsID(pdfsID_), grid_size_(grid_size),
omega_shear_(omega_shear), v_s_(v_s){};
omega_shear_(omega_shear), v_s_(v_s) {};

void run(IBlock *block);

Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class CollideSweepSinglePrecisionLeesEdwardsAVX {
float grid_size, float omega_shear,
float v_s)
: forceID(forceID_), pdfsID(pdfsID_), grid_size_(grid_size),
omega_shear_(omega_shear), v_s_(v_s){};
omega_shear_(omega_shear), v_s_(v_s) {};

void run(IBlock *block);

Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class CollideSweepSinglePrecisionLeesEdwardsCUDA {
float grid_size, float omega_shear,
float v_s)
: forceID(forceID_), pdfsID(pdfsID_), grid_size_(grid_size),
omega_shear_(omega_shear), v_s_(v_s){};
omega_shear_(omega_shear), v_s_(v_s) {};

void run(IBlock *block, gpuStream_t stream = nullptr);

Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class CollideSweepSinglePrecisionThermalized {
: forceID(forceID_), pdfsID(pdfsID_), kT_(kT), omega_bulk_(omega_bulk),
omega_even_(omega_even), omega_odd_(omega_odd),
omega_shear_(omega_shear), seed_(seed), time_step_(time_step),
configured_(false){};
configured_(false) {};

void run(IBlock *block);

Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class CollideSweepSinglePrecisionThermalizedAVX {
: forceID(forceID_), pdfsID(pdfsID_), kT_(kT), omega_bulk_(omega_bulk),
omega_even_(omega_even), omega_odd_(omega_odd),
omega_shear_(omega_shear), seed_(seed), time_step_(time_step),
configured_(false){};
configured_(false) {};

void run(IBlock *block);

Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class CollideSweepSinglePrecisionThermalizedCUDA {
: forceID(forceID_), pdfsID(pdfsID_), kT_(kT), omega_bulk_(omega_bulk),
omega_even_(omega_even), omega_odd_(omega_odd),
omega_shear_(omega_shear), seed_(seed), time_step_(time_step),
configured_(false){};
configured_(false) {};

void run(IBlock *block, gpuStream_t stream = nullptr);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@
//! \\author pystencils
//======================================================================================================================

// kernel generated with pystencils v1.3.3, lbmpy v1.3.3,
// lbmpy_walberla/pystencils_walberla from waLBerla commit
// 04f4adbdfc0af983e2d9b72e244d775f37d77034
// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, lbmpy_walberla/pystencils_walberla from waLBerla commit 04f4adbdfc0af983e2d9b72e244d775f37d77034

#include "Dynamic_UBB_double_precision.h"
#include "core/DataTypes.h"
Expand Down Expand Up @@ -49,99 +47,29 @@ namespace lbm {
#endif
// NOLINTBEGIN(readability-non-const-parameter*)
namespace internal_451fd042b8d7665063ea81b98853365b {
static FUNC_PREFIX void
dynamic_ubb_double_precision_boundary_Dynamic_UBB_double_precision(
uint8_t *RESTRICT const _data_indexVector, double *RESTRICT _data_pdfs,
int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1,
int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3,
int32_t indexVectorSize) {

const int32_t f_in_inv_dir_idx[] = {0, 2, 1, 4, 3, 6, 5, 10, 9, 8,
7, 16, 15, 18, 17, 12, 11, 14, 13};
const int32_t f_in_inv_offsets_x[] = {0, 0, 0, -1, 1, 0, 0, -1, 1, -1,
1, 0, 0, -1, 1, 0, 0, -1, 1};
const int32_t f_in_inv_offsets_y[] = {0, 1, -1, 0, 0, 0, 0, 1, 1, -1,
-1, 1, -1, 0, 0, 1, -1, 0, 0};
const int32_t f_in_inv_offsets_z[] = {0, 0, 0, 0, 0, 1, -1, 0, 0, 0,
0, 1, 1, 1, 1, -1, -1, -1, -1};

const double weights[] = {
0.33333333333333333, 0.055555555555555556, 0.055555555555555556,
0.055555555555555556, 0.055555555555555556, 0.055555555555555556,
0.055555555555555556, 0.027777777777777778, 0.027777777777777778,
0.027777777777777778, 0.027777777777777778, 0.027777777777777778,
0.027777777777777778, 0.027777777777777778, 0.027777777777777778,
0.027777777777777778, 0.027777777777777778, 0.027777777777777778,
0.027777777777777778};

const int32_t neighbour_offset_x[] = {0, 0, 0, -1, 1, 0, 0, -1, 1, -1,
1, 0, 0, -1, 1, 0, 0, -1, 1};
const int32_t neighbour_offset_y[] = {0, 1, -1, 0, 0, 0, 0, 1, 1, -1,
-1, 1, -1, 0, 0, 1, -1, 0, 0};
const int32_t neighbour_offset_z[] = {0, 0, 0, 0, 0, 1, -1, 0, 0, 0,
0, 1, 1, 1, 1, -1, -1, -1, -1};
static FUNC_PREFIX void dynamic_ubb_double_precision_boundary_Dynamic_UBB_double_precision(uint8_t *RESTRICT const _data_indexVector, double *RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize) {

const int32_t f_in_inv_dir_idx[] = {0, 2, 1, 4, 3, 6, 5, 10, 9, 8, 7, 16, 15, 18, 17, 12, 11, 14, 13};
const int32_t f_in_inv_offsets_x[] = {0, 0, 0, -1, 1, 0, 0, -1, 1, -1, 1, 0, 0, -1, 1, 0, 0, -1, 1};
const int32_t f_in_inv_offsets_y[] = {0, 1, -1, 0, 0, 0, 0, 1, 1, -1, -1, 1, -1, 0, 0, 1, -1, 0, 0};
const int32_t f_in_inv_offsets_z[] = {0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1};

const double weights[] = {0.33333333333333333, 0.055555555555555556, 0.055555555555555556, 0.055555555555555556, 0.055555555555555556, 0.055555555555555556, 0.055555555555555556, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778};

const int32_t neighbour_offset_x[] = {0, 0, 0, -1, 1, 0, 0, -1, 1, -1, 1, 0, 0, -1, 1, 0, 0, -1, 1};
const int32_t neighbour_offset_y[] = {0, 1, -1, 0, 0, 0, 0, 1, 1, -1, -1, 1, -1, 0, 0, 1, -1, 0, 0};
const int32_t neighbour_offset_z[] = {0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1};

for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1) {
const int32_t x = *((int32_t *)(&_data_indexVector[40 * ctr_0]));
const int32_t y = *((int32_t *)(&_data_indexVector[40 * ctr_0 + 4]));
const int32_t z = *((int32_t *)(&_data_indexVector[40 * ctr_0 + 8]));
const int32_t dir = *((int32_t *)(&_data_indexVector[40 * ctr_0 + 12]));
const double vel0Term =
_data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y +
_stride_pdfs_2 * z + 10 * _stride_pdfs_3] +
_data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y +
_stride_pdfs_2 * z + 14 * _stride_pdfs_3] +
_data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y +
_stride_pdfs_2 * z + 18 * _stride_pdfs_3] +
_data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y +
_stride_pdfs_2 * z + 4 * _stride_pdfs_3] +
_data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y +
_stride_pdfs_2 * z + 8 * _stride_pdfs_3];
const double vel1Term =
_data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y +
_stride_pdfs_2 * z + 11 * _stride_pdfs_3] +
_data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y +
_stride_pdfs_2 * z + 15 * _stride_pdfs_3] +
_data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y +
_stride_pdfs_2 * z + 7 * _stride_pdfs_3] +
_data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y +
_stride_pdfs_2 * z + _stride_pdfs_3];
const double vel2Term =
_data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y +
_stride_pdfs_2 * z + 12 * _stride_pdfs_3] +
_data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y +
_stride_pdfs_2 * z + 13 * _stride_pdfs_3] +
_data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y +
_stride_pdfs_2 * z + 5 * _stride_pdfs_3];
const double rho = vel0Term + vel1Term + vel2Term +
_data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y +
_stride_pdfs_2 * z + 16 * _stride_pdfs_3] +
_data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y +
_stride_pdfs_2 * z + 17 * _stride_pdfs_3] +
_data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y +
_stride_pdfs_2 * z + 2 * _stride_pdfs_3] +
_data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y +
_stride_pdfs_2 * z + 3 * _stride_pdfs_3] +
_data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y +
_stride_pdfs_2 * z + 6 * _stride_pdfs_3] +
_data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y +
_stride_pdfs_2 * z + 9 * _stride_pdfs_3] +
_data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y +
_stride_pdfs_2 * z];
_data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_0 * f_in_inv_offsets_x[dir] +
_stride_pdfs_1 * y + _stride_pdfs_1 * f_in_inv_offsets_y[dir] +
_stride_pdfs_2 * z + _stride_pdfs_2 * f_in_inv_offsets_z[dir] +
_stride_pdfs_3 * f_in_inv_dir_idx[dir]] =
-rho *
(6.0 * ((double)(neighbour_offset_x[dir])) *
*((double *)(&_data_indexVector[40 * ctr_0 + 16])) +
6.0 * ((double)(neighbour_offset_y[dir])) *
*((double *)(&_data_indexVector[40 * ctr_0 + 24])) +
6.0 * ((double)(neighbour_offset_z[dir])) *
*((double *)(&_data_indexVector[40 * ctr_0 + 32]))) *
weights[dir] +
_data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y +
_stride_pdfs_2 * z + _stride_pdfs_3 * dir];
const double vel0Term = _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 10 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 14 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 18 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 4 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 8 * _stride_pdfs_3];
const double vel1Term = _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 11 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 15 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 7 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + _stride_pdfs_3];
const double vel2Term = _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 12 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 13 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 5 * _stride_pdfs_3];
const double rho = vel0Term + vel1Term + vel2Term + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 16 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 17 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 2 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 3 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 6 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 9 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z];
_data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_0 * f_in_inv_offsets_x[dir] + _stride_pdfs_1 * y + _stride_pdfs_1 * f_in_inv_offsets_y[dir] + _stride_pdfs_2 * z + _stride_pdfs_2 * f_in_inv_offsets_z[dir] + _stride_pdfs_3 * f_in_inv_dir_idx[dir]] = -rho * (6.0 * ((double)(neighbour_offset_x[dir])) * *((double *)(&_data_indexVector[40 * ctr_0 + 16])) + 6.0 * ((double)(neighbour_offset_y[dir])) * *((double *)(&_data_indexVector[40 * ctr_0 + 24])) + 6.0 * ((double)(neighbour_offset_z[dir])) * *((double *)(&_data_indexVector[40 * ctr_0 + 32]))) * weights[dir] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + _stride_pdfs_3 * dir];
}
}
} // namespace internal_451fd042b8d7665063ea81b98853365b
Expand All @@ -155,8 +83,7 @@ dynamic_ubb_double_precision_boundary_Dynamic_UBB_double_precision(
#pragma pop
#endif

void Dynamic_UBB_double_precision::run_impl(IBlock *block,
IndexVectors::Type type) {
void Dynamic_UBB_double_precision::run_impl(IBlock *block, IndexVectors::Type type) {
auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
int32_t indexVectorSize = int32_c(indexVectors->indexVector(type).size());
if (indexVectorSize == 0)
Expand All @@ -174,10 +101,7 @@ void Dynamic_UBB_double_precision::run_impl(IBlock *block,
const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
internal_451fd042b8d7665063ea81b98853365b::
dynamic_ubb_double_precision_boundary_Dynamic_UBB_double_precision(
_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1,
_stride_pdfs_2, _stride_pdfs_3, indexVectorSize);
internal_451fd042b8d7665063ea81b98853365b::dynamic_ubb_double_precision_boundary_Dynamic_UBB_double_precision(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);
}

void Dynamic_UBB_double_precision::run(IBlock *block) {
Expand Down
Loading

0 comments on commit 334b707

Please sign in to comment.