Skip to content

Commit

Permalink
Allow tunring off DBCSR ACC with env variable
Browse files Browse the repository at this point in the history
  • Loading branch information
abussy committed May 30, 2024
1 parent a13ef3c commit 1213574
Show file tree
Hide file tree
Showing 7 changed files with 90 additions and 56 deletions.
42 changes: 38 additions & 4 deletions src/core/dbcsr_config.F
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ MODULE dbcsr_config
SET_PARAMETER_DEFAULT(COMM_THREAD_LOAD, CONF_PAR_INT, 100)
SET_PARAMETER_DEFAULT(MM_DENSE, CONF_PAR_LOGICAL,.NOT. has_acc)
SET_PARAMETER_DEFAULT(MULTREC_LIMIT, CONF_PAR_INT, 512)
SET_PARAMETER_DEFAULT(TURN_OFF_ACC, CONF_PAR_LOGICAL, .FALSE.)
SET_PARAMETER_DEFAULT(ACCDRV_THREAD_BUFFERS, CONF_PAR_INT, 8)
TYPE(CONF_PAR_LOGICAL) :: ACCDRV_AVOID_AFTER_BUSY = &
CONF_PAR_LOGICAL(name="ACCDRV_AVOID_AFTER_BUSY", val=.FALSE., defval=.FALSE.)
Expand Down Expand Up @@ -184,6 +185,7 @@ MODULE dbcsr_config
PUBLIC :: dbcsr_set_config, dbcsr_get_default_config, dbcsr_print_config
PUBLIC :: max_kernel_dim
PUBLIC :: get_accdrv_active_device_id, set_accdrv_active_device_id, reset_accdrv_active_device_id
PUBLIC :: use_acc

CONTAINS

Expand Down Expand Up @@ -340,6 +342,7 @@ SUBROUTINE dbcsr_set_config( &
comm_thread_load, &
mm_dense, &
multrec_limit, &
turn_off_acc, &
accdrv_thread_buffers, &
accdrv_avoid_after_busy, &
accdrv_min_flop_process, &
Expand Down Expand Up @@ -368,6 +371,7 @@ SUBROUTINE dbcsr_set_config( &
LOGICAL, INTENT(IN), OPTIONAL :: use_comm_thread
INTEGER, INTENT(IN), OPTIONAL :: comm_thread_load
LOGICAL, INTENT(IN), OPTIONAL :: mm_dense
LOGICAL, INTENT(IN), OPTIONAL :: turn_off_acc
INTEGER, INTENT(IN), OPTIONAL :: multrec_limit, accdrv_thread_buffers
LOGICAL, INTENT(IN), OPTIONAL :: accdrv_avoid_after_busy
INTEGER, INTENT(IN), OPTIONAL :: accdrv_min_flop_process
Expand All @@ -389,7 +393,7 @@ SUBROUTINE dbcsr_set_config( &
CALL dbcsr_cfg%num_layers_3D%set(num_layers_3D)
CALL dbcsr_cfg%use_comm_thread%set(use_comm_thread)
CALL dbcsr_cfg%multrec_limit%set(multrec_limit)
CALL dbcsr_cfg%mm_dense%set(mm_dense)
CALL dbcsr_cfg%turn_off_acc%set(turn_off_acc)
CALL dbcsr_cfg%accdrv_thread_buffers%set(accdrv_thread_buffers)
CALL dbcsr_cfg%accdrv_avoid_after_busy%set(accdrv_avoid_after_busy)
CALL dbcsr_cfg%accdrv_min_flop_process%set(accdrv_min_flop_process)
Expand Down Expand Up @@ -419,9 +423,22 @@ SUBROUTINE dbcsr_set_config( &
CALL dbcsr_cfg%comm_thread_load%set(comm_thread_load)

CALL dbcsr_cfg%n_stacks%set(nstacks)
CALL dbcsr_cfg%mm_stack_size%set(mm_stack_size)
CALL dbcsr_cfg%mm_driver%set(mm_driver)

! If ACC is turned-off, use the CPU defaults
IF (.NOT. PRESENT(mm_stack_size) .AND. dbcsr_cfg%turn_off_acc%val) THEN
CALL dbcsr_cfg%mm_stack_size%set(1000)
ELSE
CALL dbcsr_cfg%mm_stack_size%set(mm_stack_size)
END IF

IF (.NOT. PRESENT(mm_stack_size) .AND. dbcsr_cfg%turn_off_acc%val) THEN
CALL dbcsr_cfg%mm_dense%set(.TRUE.)
ELSE
CALL dbcsr_cfg%mm_dense%set(mm_dense)
END IF


END SUBROUTINE dbcsr_set_config

SUBROUTINE dbcsr_get_default_config( &
Expand All @@ -435,6 +452,7 @@ SUBROUTINE dbcsr_get_default_config( &
use_comm_thread, &
comm_thread_load, &
mm_dense, &
turn_off_acc, &
multrec_limit, &
accdrv_thread_buffers, &
accdrv_avoid_after_busy, &
Expand All @@ -455,7 +473,7 @@ SUBROUTINE dbcsr_get_default_config( &
INTEGER, INTENT(OUT), OPTIONAL :: num_layers_3D
LOGICAL, INTENT(OUT), OPTIONAL :: use_comm_thread
INTEGER, INTENT(OUT), OPTIONAL :: comm_thread_load
LOGICAL, INTENT(OUT), OPTIONAL :: mm_dense
LOGICAL, INTENT(OUT), OPTIONAL :: mm_dense, turn_off_acc
INTEGER, INTENT(OUT), OPTIONAL :: multrec_limit, accdrv_thread_buffers
LOGICAL, INTENT(OUT), OPTIONAL :: accdrv_avoid_after_busy
INTEGER, INTENT(OUT), OPTIONAL :: accdrv_min_flop_process
Expand All @@ -478,6 +496,7 @@ SUBROUTINE dbcsr_get_default_config( &
IF (PRESENT(comm_thread_load)) comm_thread_load = dbcsr_cfg%comm_thread_load%defval
IF (PRESENT(mm_dense)) mm_dense = dbcsr_cfg%mm_dense%defval
IF (PRESENT(multrec_limit)) multrec_limit = dbcsr_cfg%multrec_limit%defval
IF (PRESENT(turn_off_acc)) turn_off_acc = dbcsr_cfg%turn_off_acc%defval
IF (PRESENT(accdrv_thread_buffers)) accdrv_thread_buffers = dbcsr_cfg%accdrv_thread_buffers%defval
IF (PRESENT(accdrv_avoid_after_busy)) accdrv_avoid_after_busy = dbcsr_cfg%accdrv_avoid_after_busy%defval
IF (PRESENT(accdrv_min_flop_process)) accdrv_min_flop_process = dbcsr_cfg%accdrv_min_flop_process%defval
Expand Down Expand Up @@ -607,7 +626,12 @@ SUBROUTINE dbcsr_print_config(unit_nr)
END IF
END BLOCK

IF (has_acc) THEN
IF (dbcsr_cfg%turn_off_acc%val) THEN
WRITE (UNIT=unit_nr, FMT='(1X,A,T81,A4)') &
"DBCSR| ACC is turned off: only CPU is used", dbcsr_cfg%turn_off_acc%print_source()
END IF

IF (use_acc()) THEN
WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11)') &
"DBCSR| ACC: Number of devices/node", dbcsr_acc_get_ndevices()
WRITE (UNIT=unit_nr, FMT='(1X,A,T70,I11,A4)') &
Expand Down Expand Up @@ -671,4 +695,14 @@ SUBROUTINE reset_accdrv_active_device_id()
accdrv_active_device_id = default_accdrv_active_device_id
END SUBROUTINE reset_accdrv_active_device_id

FUNCTION use_acc()
LOGICAL :: use_acc

IF (has_acc .AND. .NOT. dbcsr_cfg%turn_off_acc%val) THEN
use_acc = .TRUE.
ELSE
use_acc = .FALSE.
END IF
END FUNCTION use_acc

END MODULE dbcsr_config
6 changes: 3 additions & 3 deletions src/core/dbcsr_lib.F
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ MODULE dbcsr_lib
USE dbcsr_config, ONLY: set_accdrv_active_device_id, &
reset_accdrv_active_device_id, &
dbcsr_set_config, &
has_acc
use_acc
USE dbcsr_kinds, ONLY: int_1_size, &
int_2_size, &
int_4_size, &
Expand Down Expand Up @@ -213,7 +213,7 @@ SUBROUTINE dbcsr_init_lib_pre(mp_comm, io_unit, accdrv_active_device_id)
#endif

! Initialize Acc and set active device
IF (has_acc) THEN
IF (use_acc()) THEN
IF (PRESENT(accdrv_active_device_id)) THEN
CALL set_accdrv_active_device_id(accdrv_active_device_id)
ELSEIF (dbcsr_acc_get_ndevices() > 0) THEN
Expand Down Expand Up @@ -313,7 +313,7 @@ SUBROUTINE dbcsr_finalize_lib()
#endif
! Reset Acc ID
CALL reset_accdrv_active_device_id()
IF (has_acc) THEN
IF (use_acc()) THEN
CALL acc_finalize()
END IF
Expand Down
12 changes: 6 additions & 6 deletions src/mm/dbcsr_mm.F
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ MODULE dbcsr_mm
USE dbcsr_config, ONLY: dbcsr_cfg, &
dbcsr_set_config, &
default_resize_factor, &
has_acc
use_acc
USE dbcsr_data_methods, ONLY: dbcsr_data_set_size_referenced, &
dbcsr_scalar_are_equal, &
dbcsr_scalar_one, &
Expand Down Expand Up @@ -153,7 +153,7 @@ SUBROUTINE dbcsr_multiply_lib_init()

! Each thread has its own working-matrix and its own mempool
ALLOCATE (memtype_product_wm(ithread)%p)
CALL dbcsr_memtype_setup(memtype_product_wm(ithread)%p, has_pool=dbcsr_cfg%use_mempools_cpu%val .OR. has_acc)
CALL dbcsr_memtype_setup(memtype_product_wm(ithread)%p, has_pool=dbcsr_cfg%use_mempools_cpu%val .OR. use_acc())
CALL dbcsr_mempool_limit_capacity(memtype_product_wm(ithread)%p%pool, capacity=MAX(1, dbcsr_cfg%num_layers_3D%val))
END SUBROUTINE dbcsr_multiply_lib_init

Expand Down Expand Up @@ -438,7 +438,7 @@ SUBROUTINE dbcsr_multiply_generic(transa, transb, &
CALL array_nullify(dense_row_sizes)

! Reset GPU errors
IF (has_acc) THEN
IF (use_acc()) THEN
CALL dbcsr_acc_clear_errors()
END IF

Expand All @@ -447,12 +447,12 @@ SUBROUTINE dbcsr_multiply_generic(transa, transb, &
! give any performance benefit)
CALL check_openmpi_rma()

use_mempools = dbcsr_cfg%use_mempools_cpu%val .OR. has_acc
use_mempools = dbcsr_cfg%use_mempools_cpu%val .OR. use_acc()

! setup driver-dependent memory-types and their memory-pools ---------------

! the ab_buffers are shared by all threads
IF (has_acc) THEN
IF (use_acc()) THEN
IF (.NOT. acc_stream_associated(stream_1)) THEN
CALL acc_stream_create(stream_1, "MemCpy (odd ticks)")
CALL acc_stream_create(stream_2, "MemCpy (even ticks)")
Expand Down Expand Up @@ -616,7 +616,7 @@ SUBROUTINE dbcsr_multiply_generic(transa, transb, &
END IF
ab_dense = use_dense_mult
! Use memory pools when no dense
IF (.NOT. has_acc) THEN
IF (.NOT. use_acc()) THEN
CALL dbcsr_memtype_setup(memtype_abpanel_1, has_pool=.NOT. ab_dense .AND. use_mempools, mpi=.TRUE.)
CALL dbcsr_memtype_setup(memtype_abpanel_2, has_pool=.NOT. ab_dense .AND. use_mempools, mpi=.TRUE.)
END IF
Expand Down
24 changes: 12 additions & 12 deletions src/mm/dbcsr_mm_3d.F
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ MODULE dbcsr_mm_3d
dbcsr_data_clear, &
dbcsr_data_set
USE dbcsr_config, ONLY: dbcsr_cfg, &
has_acc
use_acc
USE dbcsr_data_methods, ONLY: &
dbcsr_data_clear_pointer, dbcsr_data_ensure_size, dbcsr_data_exists, &
dbcsr_data_get_memory_type, dbcsr_data_get_size, dbcsr_data_get_size_referenced, &
Expand Down Expand Up @@ -1436,7 +1436,7 @@ SUBROUTINE multiply_3D(imgdist_left, imgdist_right, &
IF (ASSOCIATED(memtype_abpanel_2%pool)) &
CALL dbcsr_mempool_limit_capacity(memtype_abpanel_2%pool, &
capacity=2)
IF (has_acc) THEN
IF (use_acc()) THEN
! enumerate the blocksizes to keep the following 2D-arrays small.
CALL enumerate_blk_sizes(matrix_right%row_blk_size%low%data, &
dbcsr_max_row_size(matrix_right), &
Expand Down Expand Up @@ -1544,7 +1544,7 @@ SUBROUTINE multiply_3D(imgdist_left, imgdist_right, &
!
! Evaluate sizes for workspaces
size_guess_init = 1
IF (.NOT. keep_sparsity .AND. has_acc) THEN
IF (.NOT. keep_sparsity .AND. use_acc()) THEN
size_guess_init = product_matrix_size_guess(matrix_left, matrix_right, product_matrix, &
left_max_data_size, right_max_data_size, &
left_col_nimages, right_row_nimages, &
Expand Down Expand Up @@ -1733,7 +1733,7 @@ SUBROUTINE multiply_3D(imgdist_left, imgdist_right, &
IF (is_not_comm) THEN
! Right
IF (do_comm_right(icol3D)) THEN
IF (has_acc) THEN
IF (use_acc()) THEN
CALL timeset(routineN//"_acc_sync_right", handle2)
CALL acc_event_synchronize(right_buffer_p%data%d%acc_ready)
CALL timestop(handle2)
Expand All @@ -1749,7 +1749,7 @@ SUBROUTINE multiply_3D(imgdist_left, imgdist_right, &
END IF
! Left
IF (do_comm_left(irow3D)) THEN
IF (has_acc) THEN
IF (use_acc()) THEN
CALL timeset(routineN//"_acc_sync_left", handle2)
CALL acc_event_synchronize(left_buffer_p%data%d%acc_ready)
CALL timestop(handle2)
Expand Down Expand Up @@ -1967,7 +1967,7 @@ SUBROUTINE multiply_3D(imgdist_left, imgdist_right, &
ileft_buffer_calc = MIN(ileft_buffer_calc, nbuffers_norms)
! check if right matrix was already initialized
IF (.NOT. right_buffer_p%matrix%valid) THEN
IF (has_acc) CALL dbcsr_data_host2dev(right_buffer_p%data)
IF (use_acc()) CALL dbcsr_data_host2dev(right_buffer_p%data)
! Repoint indices of matrices
CALL make_meta(right_buffer_p, &
right_row_total_nimages, &
Expand All @@ -1985,7 +1985,7 @@ SUBROUTINE multiply_3D(imgdist_left, imgdist_right, &
right_norms(:, iright_buffer_calc), &
k_sizes, n_sizes(icol3D)%sizes)
END IF
IF (has_acc) THEN
IF (use_acc()) THEN
CALL acc_transpose_blocks(right_buffer_p%matrix, &
right_buffer_p%trs_stackbuf, &
k_sizes, n_sizes(icol3D)%sizes, &
Expand All @@ -1996,7 +1996,7 @@ SUBROUTINE multiply_3D(imgdist_left, imgdist_right, &
END IF
! check if left matrix was already initialized
IF (.NOT. left_buffer_p%matrix%valid) THEN
IF (has_acc) CALL dbcsr_data_host2dev(left_buffer_p%data)
IF (use_acc()) CALL dbcsr_data_host2dev(left_buffer_p%data)
! Repoint indices of matrices
CALL make_meta(left_buffer_p, &
left_col_total_nimages, &
Expand All @@ -2012,7 +2012,7 @@ SUBROUTINE multiply_3D(imgdist_left, imgdist_right, &
END IF
END IF
! Wait for left and right buffers transfer to device before proceeding
IF (has_acc) THEN
IF (use_acc()) THEN
CALL timeset(routineN//"_sync_h2d", handle2)
CALL acc_device_synchronize()
CALL timestop(handle2)
Expand Down Expand Up @@ -2224,7 +2224,7 @@ SUBROUTINE multiply_3D(imgdist_left, imgdist_right, &
CALL dbcsr_data_release(right_buffers(v_ki)%data)
NULLIFY (right_buffers(v_ki)%matrix%index)
CALL dbcsr_release(right_buffers(v_ki)%matrix)
IF (has_acc) THEN
IF (use_acc()) THEN
CALL dbcsr_data_clear_pointer(right_buffers(v_ki)%trs_stackbuf)
IF (right_buffers(v_ki)%trs_stackbuf%d%memory_type%acc_devalloc) THEN
CALL acc_event_destroy(right_buffers(v_ki)%trs_stackbuf%d%acc_ready)
Expand All @@ -2235,7 +2235,7 @@ SUBROUTINE multiply_3D(imgdist_left, imgdist_right, &
DEALLOCATE (left_buffers, right_buffers)
DEALLOCATE (do_comm_left, do_comm_right)
DEALLOCATE (right_vcol, left_vrow)
IF (has_acc) THEN
IF (use_acc()) THEN
DEALLOCATE (row_blk_sizes2enum, enum2row_blk_sizes)
DEALLOCATE (col_blk_sizes2enum, enum2col_blk_sizes)
END IF
Expand Down Expand Up @@ -2544,7 +2544,7 @@ SUBROUTINE buffer_init(buffer, data_type, &
CALL dbcsr_data_init(buffer%data_before_resize)
CALL dbcsr_data_new(buffer%data_before_resize, data_type, memory_type=data_memory_type)
END IF
new_trs_stackbuf = PRESENT(trs_memory_type) .AND. has_acc
new_trs_stackbuf = PRESENT(trs_memory_type) .AND. use_acc()
!
IF (buffer%is_valid) THEN
! Invalid buffers if data_type is different
Expand Down
Loading

0 comments on commit 1213574

Please sign in to comment.