Skip to content

Commit

Permalink
ocl: improved kernel configuration
Browse files Browse the repository at this point in the history
* Consolidate some settings into WA-level.
* Updated tuned parameters (PVC).
* Test tuned kernels (Daint-CI).
  • Loading branch information
hfp committed May 28, 2024
1 parent 208d53d commit 5b1f7fe
Show file tree
Hide file tree
Showing 5 changed files with 396 additions and 395 deletions.
2 changes: 1 addition & 1 deletion .ci/daint.cscs.ch/ocl.test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ export OMP_PROC_BIND=TRUE # set thread affinity
# OMP_NUM_THREADS is set by cmake

# use default parameters (omit loading tuned parameters)
export OPENCL_LIBSMM_SMM_PARAMS=0
#export OPENCL_LIBSMM_SMM_PARAMS=0

# document the current environment
env |& tee -a "${STAGE_NAME}.out"
Expand Down
11 changes: 6 additions & 5 deletions src/acc/opencl/acc_opencl.c
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ int c_dbcsr_acc_init(void) {
const int nccs = (NULL == env_nccs ? ACC_OPENCL_NCCS : atoi(env_nccs));
# endif
const char *const env_neo = getenv("NEOReadDebugKeys"), *const env_wa = getenv("ACC_OPENCL_WA");
const int neo = (NULL == env_neo ? 1 : atoi(env_neo)), wa = neo * (NULL == env_wa ? 7 : atoi(env_wa));
const int neo = (NULL == env_neo ? 1 : atoi(env_neo));
# if defined(ACC_OPENCL_ASYNC)
const char* const env_async = (ACC_OPENCL_ASYNC);
const int async_default = 3;
Expand Down Expand Up @@ -267,6 +267,7 @@ int c_dbcsr_acc_init(void) {
c_dbcsr_acc_opencl_config.async = (NULL == env_async ? async_default : atoi(env_async));
c_dbcsr_acc_opencl_config.dump = (NULL == env_dump ? /*default*/ 0 : atoi(env_dump));
c_dbcsr_acc_opencl_config.debug = (NULL == env_debug ? c_dbcsr_acc_opencl_config.dump : atoi(env_debug));
c_dbcsr_acc_opencl_config.wa = neo * (NULL == env_wa ? 31 : atoi(env_wa));
if (EXIT_SUCCESS != c_dbcsr_acc_opencl_device_uid(NULL /*device*/, env_devmatch, &c_dbcsr_acc_opencl_config.devmatch)) {
c_dbcsr_acc_opencl_config.devmatch = 1;
}
Expand All @@ -283,7 +284,7 @@ int c_dbcsr_acc_init(void) {
ACC_OPENCL_EXPECT(0 == LIBXSMM_PUTENV(ze_flat)); /* soft-error */
}
# if defined(ACC_OPENCL_NCCS)
if (NULL == getenv("ZEX_NUMBER_OF_CCS") && 0 != nccs && 0 == (1 & wa)) {
if (NULL == getenv("ZEX_NUMBER_OF_CCS") && 0 != nccs && 0 == (1 & c_dbcsr_acc_opencl_config.wa)) {
static char zex_nccs[ACC_OPENCL_MAXNDEVS * 8 + 32] = "ZEX_NUMBER_OF_CCS=";
int j = strlen(zex_nccs);
for (i = 0; i < ACC_OPENCL_MAXNDEVS; ++i) {
Expand All @@ -299,14 +300,14 @@ int c_dbcsr_acc_init(void) {
if (0 < j) ACC_OPENCL_EXPECT(0 == LIBXSMM_PUTENV(zex_nccs)); /* soft-error */
}
# endif
if (~1 & wa) { /* environment is populated before touching the compute runtime */
if (~1 & c_dbcsr_acc_opencl_config.wa) { /* environment is populated before touching the compute runtime */
static char* key_value[] = {
"NEOReadDebugKeys=1", "EnableRecoverablePageFaults=0", "DirectSubmissionOverrideBlitterSupport=0"};
if (NULL == env_neo) ACC_OPENCL_EXPECT(0 == LIBXSMM_PUTENV(key_value[0]));
if ((2 & wa) && NULL == getenv("EnableRecoverablePageFaults")) {
if ((2 & c_dbcsr_acc_opencl_config.wa) && NULL == getenv("EnableRecoverablePageFaults")) {
ACC_OPENCL_EXPECT(0 == LIBXSMM_PUTENV(key_value[1]));
}
if ((4 & wa) && NULL == getenv("DirectSubmissionOverrideBlitterSupport")) {
if ((4 & c_dbcsr_acc_opencl_config.wa) && NULL == getenv("DirectSubmissionOverrideBlitterSupport")) {
ACC_OPENCL_EXPECT(0 == LIBXSMM_PUTENV(key_value[2]));
}
}
Expand Down
2 changes: 2 additions & 0 deletions src/acc/opencl/acc_opencl.h
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,8 @@ typedef struct c_dbcsr_acc_opencl_config_t {
cl_int debug;
/** Dump level. */
cl_int dump;
/** WA level */
cl_int wa;
} c_dbcsr_acc_opencl_config_t;

/** Global configuration setup in c_dbcsr_acc_init. */
Expand Down
46 changes: 22 additions & 24 deletions src/acc/opencl/smm/opencl_libsmm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1087,6 +1087,15 @@ int libsmm_acc_process(const int* host_param_stack, const int* dev_param_stack,
else { /* preserve kernels, performance counters, etc. */
memcpy(&new_config, config, sizeof(opencl_libsmm_smm_t));
}
if (NULL == env_xf || '\0' == *env_xf) {
if (0 == c_dbcsr_acc_opencl_config.device.intel || CL_DEVICE_TYPE_GPU != c_dbcsr_acc_opencl_config.device.type ||
NULL == env_cl || NULL == strstr(env_cl, intel_xf))
{
new_config.flags = (NULL == config ? /*default*/ 0 : config->flags);
}
else new_config.flags = 1;
}
else new_config.flags = atoi(env_xf);
new_config.lu = unroll;
/* two defaults for new_config parameters: 1st - regular, 2nd - BS=1 kernel */
new_config.bm = (0 >= blockm ? (0 == kernel_idx ? (NULL == config ? LIBXSMM_MIN(OPENCL_LIBSMM_DEFAULT_BM, m_max)
Expand All @@ -1110,8 +1119,9 @@ int libsmm_acc_process(const int* host_param_stack, const int* dev_param_stack,
: atoi(env_nz),
0, 1);
new_config.al = LIBXSMM_CLMP(/* bug: AL=1 */
(NULL == env_al || '\0' == *env_al) ? 0 /*(0 == kernel_idx ? (NULL == config ? 0 : config->al) : 0)*/
: atoi(env_al),
(NULL == env_al || '\0' == *env_al)
? (0 == (8 & c_dbcsr_acc_opencl_config.wa) ? (0 == kernel_idx ? (NULL == config ? 0 : config->al) : 0) : 0)
: atoi(env_al),
0, 1);
new_config.tb = LIBXSMM_CLMP((NULL == env_tb || '\0' == *env_tb)
? (0 == kernel_idx ? (NULL == config ? /*default*/ 0 : config->tb) : /*default*/ 0)
Expand All @@ -1125,30 +1135,18 @@ int libsmm_acc_process(const int* host_param_stack, const int* dev_param_stack,
? (0 == kernel_idx ? (NULL == config ? /*default*/ 0 : config->ap) : /*default*/ 0)
: atoi(env_ap),
0, 1);
new_config.aa = LIBXSMM_CLMP(
(NULL == env_aa || '\0' == *env_aa)
? (0 == kernel_idx ? (NULL == config ? /*default*/ default_aa : config->aa) : /*default*/ default_aa)
: atoi(env_aa),
0, 2);
new_config.ab = LIBXSMM_CLMP(
(NULL == env_ab || '\0' == *env_ab)
? (0 == kernel_idx ? (NULL == config ? /*default*/ default_ab : config->ab) : /*default*/ default_ab)
: atoi(env_ab),
new_config.aa = LIBXSMM_CLMP(/* bug: AA=2 XF=1 */
(NULL == env_aa || '\0' == *env_aa) ? (0 == kernel_idx ? (NULL == config ? default_aa : config->aa) : default_aa)
: atoi(env_aa),
0, (0 == (16 & c_dbcsr_acc_opencl_config.wa) || 0 == new_config.flags) ? 2 : 1);
new_config.ab = LIBXSMM_CLMP((NULL == env_ab || '\0' == *env_ab)
? (0 == kernel_idx ? (NULL == config ? default_ab : config->ab) : default_ab)
: atoi(env_ab),
0, 2);
new_config.ac = LIBXSMM_CLMP(
(NULL == env_ac || '\0' == *env_ac)
? (0 == kernel_idx ? (NULL == config ? /*default*/ default_ac : config->ac) : /*default*/ default_ac)
: atoi(env_ac),
new_config.ac = LIBXSMM_CLMP((NULL == env_ac || '\0' == *env_ac)
? (0 == kernel_idx ? (NULL == config ? default_ac : config->ac) : default_ac)
: atoi(env_ac),
0, 1);
if (NULL == env_xf || '\0' == *env_xf) {
if (0 == c_dbcsr_acc_opencl_config.device.intel || CL_DEVICE_TYPE_GPU != c_dbcsr_acc_opencl_config.device.type ||
NULL == env_cl || NULL == strstr(env_cl, intel_xf))
{
new_config.flags = (NULL == config ? /*default*/ 0 : config->flags);
}
else new_config.flags = 1;
}
else new_config.flags = atoi(env_xf);
if (0 >= new_config.s) new_config.s = stack_size;
if (0 == kernel_idx || 1 >= new_config.bs) new_config.bs = bs;
nbm = (m_max + new_config.bm - 1) / new_config.bm;
Expand Down
Loading

0 comments on commit 5b1f7fe

Please sign in to comment.