-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathcudakernel.cu
44 lines (43 loc) · 1.52 KB
/
cudakernel.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
/**
* This file is part of the CALDGEMM library.
*
* Copyright 2015:
* - David Rohr ([email protected])
* - Matthias Bach ([email protected])
* - Matthias Kretz ([email protected])
*
* This file is part of CALDGEMM.
*
* CALDGEMM is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* CALDGEMM is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with CALDGEMM. If not, see <http://www.gnu.org/licenses/>.
*/
__global__ void CUDAKernelName(double* C, double* A, double* B, size_t height1, size_t height2, size_t width, double Alpha, double Beta, size_t pitch)
{
for (int j = blockIdx.y * blockDim.y + threadIdx.y;j < height2;j += blockDim.y * gridDim.y)
{
for (int i = blockIdx.x * blockDim.x + threadIdx.x;i < height1;i += blockDim.x * gridDim.x)
{
double addval = 0;
#ifdef CALDGEMM_FORCE_K
for (int k = 0;k < CALDGEMM_FORCE_K;k++)
#else
for (int k = 0;k < width;k++)
#endif
{
addval += A[j * width + k] * B[i * width + k];
}
double* destptr = &C[j * pitch + i];
*destptr = Alpha * addval + Beta * *destptr;
}
}
}