Skip to content

Commit

Permalink
Fix: Potential overflow in number of thread-blocks in periodicity pea…
Browse files Browse the repository at this point in the history
…k-find/thresholding

Former-commit-id: 5481df6
  • Loading branch information
KAdamek committed Mar 27, 2018
1 parent 6772d10 commit 4110082
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 12 deletions.
53 changes: 50 additions & 3 deletions lib/device_peak_find.cu
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// James Sharpe's peak finding code
//#define PEAK_FIND_DEBUG

#include "headers/device_BC_plan.h"

Expand Down Expand Up @@ -45,9 +46,55 @@ void Peak_find_for_periodicity_search_old(float *d_input_SNR, ushort *d_input_ha
dilate_peak_find_for_periods_old<<<gridSize, blockDim>>>(d_input_SNR, d_input_harmonics, d_peak_list, nTimesamples, nDMs, 0, threshold, max_peak_size, gmem_peak_pos, d_MSD, DM_shift, inBin);
}

void Peak_find_for_periodicity_search(float *d_input_SNR, ushort *d_input_harmonics, float *d_peak_list, int nDMs, int nTimesamples, float threshold, int max_peak_size, int *gmem_peak_pos, float *d_MSD, int DM_shift, int inBin){
void Peak_find_for_periodicity_search(float *d_input_SNR, ushort *d_input_harmonics, float *d_peak_list, int secondary_size, int primary_size, float threshold, int max_peak_size, int *gmem_peak_pos, float *d_MSD, int DM_shift, int inBin){
// nDMs = secondary_size
// nTimesamples = primary_size
//---------> Nvidia stuff
// find maximum values for maximum grid sizes in x and y
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, CARD);
size_t max_x = deviceProp.maxGridSize[0], max_y = deviceProp.maxGridSize[1];

//---------> Task specific
int nBlocks_p, nBlocks_s, nRepeats;
size_t sec_per_chunk, sec_tail;
dim3 blockDim(32, 32, 1);
dim3 gridSize(1 + ((nTimesamples-1)/blockDim.x), 1 + ((nDMs-1)/blockDim.y), 1);

dilate_peak_find_for_periods<<<gridSize, blockDim>>>(d_input_SNR, d_input_harmonics, d_peak_list, nTimesamples, nDMs, 0, threshold, max_peak_size, gmem_peak_pos, d_MSD, DM_shift, inBin);
// number of thread-blocks required for the task
nBlocks_p = 1 + ((primary_size-1)/blockDim.x);
if((size_t) nBlocks_p > max_x) {printf("Too many DM trials!\n"); exit(1);}
nBlocks_s = 1 + ((secondary_size-1)/blockDim.y);

// number of kernel launches required for the task
nRepeats = ceil( (float) ((float) nBlocks_s)/((float) max_y));
sec_per_chunk = (int) (secondary_size/nRepeats);
sec_tail = secondary_size - sec_per_chunk*(nRepeats-1);

// creating vector with y dim processed by each kernel launch
std::vector<size_t> secondary_size_per_chunk;
for(int f=0; f<(nRepeats-1); f++){
secondary_size_per_chunk.push_back(sec_per_chunk);
}
secondary_size_per_chunk.push_back(sec_tail);

#ifdef PEAK_FIND_DEBUG
printf("Primary:%d; Secondary:%d\n", primary_size, secondary_size);
printf("gridSize: [%d; %d; %d]\n", nBlocks_p, nBlocks_s, 1);
printf("blockSize: [%d; %d; %d]\n", blockDim.x, blockDim.y, blockDim.z);
printf("nRepeats: %d; sec_per_chunk: %zu; sec_tail: %zu;\n", nRepeats, sec_per_chunk, sec_tail);
printf("Secondary dimensions per chunk: ");
for(size_t f=0; f<secondary_size_per_chunk.size(); f++) printf("%zu ", secondary_size_per_chunk[f]); printf("\n");
#endif

// launching GPU kernels
size_t shift = 0;
for(int f=0; f<(int) secondary_size_per_chunk.size(); f++){
nBlocks_s = 1 + ((secondary_size_per_chunk[f]-1)/blockDim.y);
dim3 gridSize(nBlocks_p, nBlocks_s, 1);

dilate_peak_find_for_periods<<<gridSize, blockDim>>>(&d_input_SNR[shift*primary_size], d_input_harmonics, d_peak_list, primary_size, secondary_size, 0, threshold, max_peak_size, gmem_peak_pos, d_MSD, DM_shift, inBin);

shift = shift + secondary_size_per_chunk[f];
}

}
46 changes: 37 additions & 9 deletions lib/device_threshold.cu
Original file line number Diff line number Diff line change
Expand Up @@ -85,32 +85,60 @@ int Threshold_for_periodicity_old(float *d_input, ushort *d_input_harms, float *
}

int Threshold_for_periodicity(float *d_input, ushort *d_input_harms, float *d_output_list, int *gmem_pos, float *d_MSD, float threshold, int primary_size, int secondary_size, int DM_shift, int inBin, int max_list_size) {
//---------> Nvidia stuff
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, CARD);
size_t max_x = deviceProp.maxGridSize[0], max_y = deviceProp.maxGridSize[1];
THR_init();

//---------> Task specific
int nBlocks_p, nBlocks_s;
int nBlocks_p, nBlocks_s, nRepeats;
size_t sec_per_chunk, sec_tail;

dim3 gridSize(1, 1, 1);
dim3 blockSize(WARP, WARP/2, 1);
dim3 blockSize(WARP, WARP, 1);

nBlocks_p = (int) (primary_size/(blockSize.x*THR_ELEM_PER_THREAD));
if( (primary_size%(blockSize.x*THR_ELEM_PER_THREAD))!=0 ) nBlocks_p++;
if((size_t) nBlocks_p > max_x) {printf("Too many DM trials!\n"); exit(1);}

nBlocks_s = (int) (secondary_size/blockSize.y);
if( (secondary_size%blockSize.y)!=0 ) nBlocks_s++;

gridSize.x = nBlocks_p;
gridSize.y = nBlocks_s;
gridSize.z = 1;
nRepeats = ceil( (float) ((float) nBlocks_s)/((float) max_y));

sec_per_chunk = (int) (secondary_size/nRepeats);
sec_tail = secondary_size - sec_per_chunk*(nRepeats-1);

std::vector<size_t> secondary_size_per_chunk;
for(int f=0; f<(nRepeats-1); f++){
secondary_size_per_chunk.push_back(sec_per_chunk);
}
secondary_size_per_chunk.push_back(sec_tail);

#ifdef THRESHOLD_DEBUG
printf("Primary:%d; Secondary:%d\n", primary_size, secondary_size);
printf("gridSize: [%d; %d; %d]\n", gridSize.x, gridSize.y, gridSize.z);
printf("gridSize: [%d; %d; %d]\n", nBlocks_p, nBlocks_s, gridSize.z);
printf("blockSize: [%d; %d; %d]\n", blockSize.x, blockSize.y, blockSize.z);
printf("nRepeats: %d; sec_per_chunk: %zu; sec_tail: %zu;\n", nRepeats, sec_per_chunk, sec_tail);
printf("Secondary dimensions per chunk: ");
for(size_t f=0; f<secondary_size_per_chunk.size(); f++) printf("%zu ", secondary_size_per_chunk[f]); printf("\n");
#endif

size_t shift = 0;
for(int f=0; f<(int) secondary_size_per_chunk.size(); f++){
nBlocks_s = (int) (secondary_size_per_chunk[f]/blockSize.y);
if( (secondary_size_per_chunk[f]%blockSize.y)!=0 ) nBlocks_s++;

THR_init();
GPU_Threshold_for_periodicity_kernel<<<gridSize, blockSize>>>(d_input, d_input_harms, d_output_list, gmem_pos, d_MSD, threshold, primary_size, secondary_size, DM_shift, max_list_size, inBin);
gridSize.x = nBlocks_p;
gridSize.y = nBlocks_s;
gridSize.z = 1;

checkCudaErrors(cudaGetLastError());
GPU_Threshold_for_periodicity_kernel<<<gridSize, blockSize>>>(&d_input[shift*primary_size], d_input_harms, d_output_list, gmem_pos, d_MSD, threshold, primary_size, secondary_size_per_chunk[f], DM_shift, max_list_size, inBin);

checkCudaErrors(cudaGetLastError());
shift = shift + secondary_size_per_chunk[f];
}

return (0);
}

0 comments on commit 4110082

Please sign in to comment.