Skip to content

Commit

Permalink
updated bias add to remove templated data type
Browse files Browse the repository at this point in the history
Now only the device type is templated. (#14)
  • Loading branch information
Dando18 committed Jul 25, 2019
1 parent bcccdf7 commit 7e1d8dd
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 27 deletions.
7 changes: 7 additions & 0 deletions include/math/bias_add.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,26 @@
*/
#pragma once

#include "mdnn_device_types.h"
#include "tensor/tensor.h"
#include "tensor/tensor_utilities.h"
#include "utilities_internal.h"

namespace magmadnn {
namespace math {

/*
template <typename T>
void bias_add(const Tensor &x, const Tensor &bias, Tensor &out);
#if defined(_HAS_CUDA_)
template <typename T>
void bias_add_device(const Tensor &x, const Tensor &bias, Tensor &out);
#endif
*/

template <DeviceType dev>
void bias_add(const Tensor &x, const Tensor &bias, Tensor &out);

} // namespace math
} // namespace magmadnn
24 changes: 7 additions & 17 deletions src/math/bias_add.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,41 +11,31 @@
namespace magmadnn {
namespace math {

template <typename T>
void bias_add(const Tensor &x, const Tensor &bias, Tensor &out) {
// assert(T_IS_SAME_MEMORY_TYPE(x, bias) && T_IS_SAME_MEMORY_TYPE(bias, out));
MAGMADNN_ASSERT(TYPES_MATCH(T, x.dtype()) && TYPES_MATCH(T, bias.dtype()) && TYPES_MATCH(T, out.dtype()),
"invalid tensor types");
template <>
void bias_add<CPU>(const Tensor &x, const Tensor &bias, Tensor &out) {
MAGMADNN_ASSERT(::magmadnn::utilities::do_tensors_match(out.dtype(), GetMemoryType<CPU>::value, {x, bias, out}),
"bias_add: tensors must have same dtype and memory type.");

if (out.get_memory_type() == HOST) {
FOR_ALL_DTYPES(out.dtype(), T, {
const T *x_ptr = x.get_ptr<T>();
const T *bias_ptr = bias.get_ptr<T>();
T *out_ptr = out.get_ptr<T>();

index_t x_rows = x.shape(0);
index_t x_cols = x.shape(1);
// unsigned int x_size = x_rows*x_cols;

/* TODO -- test openmp here */
for (unsigned int r = 0; r < x_rows; r++) {
for (unsigned int c = 0; c < x_cols; c++) {
out_ptr[r * x_cols + c] = x_ptr[r * x_cols + c] + bias_ptr[r];
}
}
}
#if defined(_HAS_CUDA_)
else {
bias_add_device<T>(x, bias, out);
}
#endif
})
}
#define COMPILE_BIASADD(type) template void bias_add<type>(const Tensor &, const Tensor &, Tensor &out);
CALL_FOR_ALL_TYPES(COMPILE_BIASADD)
#undef COMPILE_BIASADD

#if defined(_USE_CUDNN_BIAS_)
/* temporarily undefined this until cudnn works */

/* TODO -- investigate using CuDNN for this */
#if defined(_HAS_CUDA_)
template <typename T>
void bias_add_device(const Tensor &x, const Tensor &bias, Tensor &out) {
Expand Down
19 changes: 9 additions & 10 deletions src/math/bias_add_device.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ namespace magmadnn {
namespace math {

template <typename T>
__global__ void kernel_bias_add_device(const T *x, const T *bias, T *out, unsigned int x_rows, unsigned int x_cols) {
__global__ void kernel_bias_add(const T *x, const T *bias, T *out, unsigned int x_rows, unsigned int x_cols) {
unsigned int idx = blockDim.x * blockIdx.x + threadIdx.x;
unsigned int stride = blockDim.x * gridDim.x;

Expand All @@ -23,17 +23,16 @@ __global__ void kernel_bias_add_device(const T *x, const T *bias, T *out, unsign
}
}

template <typename T>
void bias_add_device(const Tensor &x, const Tensor &bias, Tensor &out) {
unsigned int x_rows = x.shape(0);
unsigned int x_cols = x.shape(1);
template <>
void bias_add<GPU>(const Tensor &x, const Tensor &bias, Tensor &out) {
size_t x_rows = x.shape(0);
size_t x_cols = x.shape(1);

kernel_bias_add_device<<<(x_rows * x_cols + BLK_SIZE - 1) / BLK_SIZE, BLK_SIZE>>>(x.get_ptr<T>(), bias.get_ptr<T>(),
out.get_ptr<T>(), x_rows, x_cols);
FOR_ALL_DTYPES(out.dtype(), T, {
kernel_bias_add<<<(x_rows * x_cols + BLK_SIZE - 1) / BLK_SIZE, BLK_SIZE>>>(x.get_ptr<T>(), bias.get_ptr<T>(),
out.get_ptr<T>(), x_rows, x_cols);
})
}
#define COMPILE_BIASADD_DEVICE(type) template void bias_add_device<type>(const Tensor&, const Tensor&, Tensor&);
CALL_FOR_ALL_TYPES(COMPILE_BIASADD_DEVICE)
#undef COMPILE_BIASADD_DEVICE

} // namespace math
} // namespace magmadnn
Expand Down

0 comments on commit 7e1d8dd

Please sign in to comment.