forked from NVIDIA/nv-wavenet
-
Notifications
You must be signed in to change notification settings - Fork 0
/
nv_wavenet_conversions.cuh
116 lines (92 loc) · 3.92 KB
/
nv_wavenet_conversions.cuh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
/******************************************************************************
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/
__global__ void convert_float2half_kernel(half* dst, float* src, size_t size) {
int totalThreads = gridDim.x * blockDim.x;
int blockStart = blockDim.x * blockIdx.x;
for (int i = blockStart + threadIdx.x; i < size; i += totalThreads) {
dst[i] = __float2half(src[i]);
}
}
bool isDevicePtr(const void* ptr) {
cudaPointerAttributes attributes;
cudaError_t result = cudaPointerGetAttributes(&attributes, ptr);
return (result == cudaSuccess) && (attributes.memoryType == cudaMemoryTypeDevice);
}
void convert_float2half(half* dst, float* src, size_t size) {
float* tmp;
if (isDevicePtr(src)) {
tmp = src;
}
else {
gpuErrChk(cudaMallocHost(&tmp, size*sizeof(float)));
memcpy(tmp, src, size*sizeof(float));
}
dim3 block(256,1,1);
dim3 grid(256,1,1);
convert_float2half_kernel<<<grid,block>>>(dst, tmp, size);
gpuErrChk(cudaDeviceSynchronize());
if (!isDevicePtr(src)) gpuErrChk(cudaFreeHost(tmp));
}
void convert_float2half2_vectorized(half2* dst, float* src, int M, int K) {
float* tmp;
if (isDevicePtr(src)) {
tmp = src;
}
else {
gpuErrChk(cudaMallocHost(&tmp, M*K*sizeof(float)));
memcpy(tmp, src, M*K*sizeof(float));
}
dim3 block(32);
assert((M%block.x)==0);
dim3 grid(M/block.x);
vectorizeWeights<float><<<grid,block>>>(M,K,dst, tmp);
gpuErrChk(cudaDeviceSynchronize());
gpuErrChk(cudaDeviceSynchronize());
if (!isDevicePtr(src)) gpuErrChk(cudaFreeHost(tmp));
}
__global__ void convert_half2float_kernel(float* dst, half* src, size_t size) {
int offset = blockDim.x*blockIdx.x + threadIdx.x;
if (offset < size)
dst[offset] = __half2float(src[offset]);
}
void convert_half2float(float* dst, half* dSrc, size_t size) {
float* tmp;
if (isDevicePtr(dst)) {
tmp = dst;
}
else {
gpuErrChk(cudaMallocHost(&tmp, size*sizeof(float)));
}
dim3 block(256,1,1);
dim3 grid((size + block.x - 1)/block.x, 1, 1);
convert_half2float_kernel<<<grid,block>>>(tmp, dSrc, size);
gpuErrChk(cudaDeviceSynchronize());
if (!isDevicePtr(dst)) {
memcpy(dst, tmp, size*sizeof(float));
gpuErrChk(cudaFreeHost(tmp));
}
}