forked from facebookarchive/fbcuda
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ShuffleTypes.cuh
68 lines (54 loc) · 1.84 KB
/
ShuffleTypes.cuh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
// Copyright 2004-present Facebook. All Rights Reserved.
#pragma once
#include "cuda/ComputeCapabilities.cuh"
#include "cuda/Pair.cuh"
#include <cuda_runtime.h>
namespace facebook { namespace cuda {
/** @file
Templated warp shuffles that work for basic and pair types
*/
template <typename T>
__device__ __forceinline__ T
shfl(const T val, int srcLane, int width = WARP_SIZE) {
return __shfl(val, srcLane, width);
}
template <typename T>
__device__ __forceinline__ T
shfl_up(const T val, int delta, int width = WARP_SIZE) {
return __shfl_up(val, delta, width);
}
template <typename T>
__device__ __forceinline__ T
shfl_down(const T val, int delta, int width = WARP_SIZE) {
return __shfl_down(val, delta, width);
}
template <typename T>
__device__ __forceinline__ T
shfl_xor(const T val, int laneMask, int width = WARP_SIZE) {
return __shfl_xor(val, laneMask, width);
}
template <typename K, typename V>
__device__ __forceinline__ Pair<K, V>
shfl(const Pair<K, V>& p, int srcLane, int width = WARP_SIZE) {
return Pair<K, V>(__shfl(p.k, srcLane, width),
__shfl(p.v, srcLane, width));
}
template <typename K, typename V>
__device__ __forceinline__ Pair<K, V>
shfl_up(const Pair<K, V>& p, int delta, int width = WARP_SIZE) {
return Pair<K, V>(__shfl_up(p.k, delta, width),
__shfl_up(p.v, delta, width));
}
template <typename K, typename V>
__device__ __forceinline__ Pair<K, V>
shfl_down(const Pair<K, V>& p, int delta, int width = WARP_SIZE) {
return Pair<K, V>(__shfl_down(p.k, delta, width),
__shfl_down(p.v, delta, width));
}
template <typename K, typename V>
__device__ __forceinline__ Pair<K, V>
shfl_xor(const Pair<K, V>& p, int laneMask, int width = WARP_SIZE) {
return Pair<K, V>(__shfl_xor(p.k, laneMask, width),
__shfl_xor(p.v, laneMask, width));
}
} } // namespace