updated bias add to remove templated data type

Now only the device type is templated. (#14)
MagmaDNN · Jul 25, 2019 · 7e1d8dd · 7e1d8dd
1 parent bcccdf7
commit 7e1d8dd
Show file tree

Hide file tree

Showing 3 changed files with 23 additions and 27 deletions.
diff --git a/include/math/bias_add.h b/include/math/bias_add.h
@@ -8,19 +8,26 @@
  */
 #pragma once
 
+#include "mdnn_device_types.h"
 #include "tensor/tensor.h"
+#include "tensor/tensor_utilities.h"
 #include "utilities_internal.h"
 
 namespace magmadnn {
 namespace math {
 
+/*
 template <typename T>
 void bias_add(const Tensor &x, const Tensor &bias, Tensor &out);
 
 #if defined(_HAS_CUDA_)
 template <typename T>
 void bias_add_device(const Tensor &x, const Tensor &bias, Tensor &out);
 #endif
+*/
+
+template <DeviceType dev>
+void bias_add(const Tensor &x, const Tensor &bias, Tensor &out);
 
 }  // namespace math
 }  // namespace magmadnn
diff --git a/src/math/bias_add.cpp b/src/math/bias_add.cpp
@@ -11,41 +11,31 @@
 namespace magmadnn {
 namespace math {
 
-template <typename T>
-void bias_add(const Tensor &x, const Tensor &bias, Tensor &out) {
-    // assert(T_IS_SAME_MEMORY_TYPE(x, bias) && T_IS_SAME_MEMORY_TYPE(bias, out));
-    MAGMADNN_ASSERT(TYPES_MATCH(T, x.dtype()) && TYPES_MATCH(T, bias.dtype()) && TYPES_MATCH(T, out.dtype()),
-                    "invalid tensor types");
+template <>
+void bias_add<CPU>(const Tensor &x, const Tensor &bias, Tensor &out) {
+    MAGMADNN_ASSERT(::magmadnn::utilities::do_tensors_match(out.dtype(), GetMemoryType<CPU>::value, {x, bias, out}),
+                    "bias_add: tensors must have same dtype and memory type.");
 
-    if (out.get_memory_type() == HOST) {
+    FOR_ALL_DTYPES(out.dtype(), T, {
         const T *x_ptr = x.get_ptr<T>();
         const T *bias_ptr = bias.get_ptr<T>();
         T *out_ptr = out.get_ptr<T>();
 
         index_t x_rows = x.shape(0);
         index_t x_cols = x.shape(1);
-        // unsigned int x_size = x_rows*x_cols;
 
         /* TODO -- test openmp here */
         for (unsigned int r = 0; r < x_rows; r++) {
             for (unsigned int c = 0; c < x_cols; c++) {
                 out_ptr[r * x_cols + c] = x_ptr[r * x_cols + c] + bias_ptr[r];
             }
         }
-    }
-#if defined(_HAS_CUDA_)
-    else {
-        bias_add_device<T>(x, bias, out);
-    }
-#endif
+    })
 }
-#define COMPILE_BIASADD(type) template void bias_add<type>(const Tensor &, const Tensor &, Tensor &out);
-CALL_FOR_ALL_TYPES(COMPILE_BIASADD)
-#undef COMPILE_BIASADD
 
 #if defined(_USE_CUDNN_BIAS_)
 /* temporarily undefined this until cudnn works */
-
+/* TODO -- investigate using CuDNN for this */
 #if defined(_HAS_CUDA_)
 template <typename T>
 void bias_add_device(const Tensor &x, const Tensor &bias, Tensor &out) {

diff --git a/src/math/bias_add_device.cu b/src/math/bias_add_device.cu
@@ -14,7 +14,7 @@ namespace magmadnn {
 namespace math {
 
 template <typename T>
-__global__ void kernel_bias_add_device(const T *x, const T *bias, T *out, unsigned int x_rows, unsigned int x_cols) {
+__global__ void kernel_bias_add(const T *x, const T *bias, T *out, unsigned int x_rows, unsigned int x_cols) {
     unsigned int idx = blockDim.x * blockIdx.x + threadIdx.x;
     unsigned int stride = blockDim.x * gridDim.x;
 
@@ -23,17 +23,16 @@ __global__ void kernel_bias_add_device(const T *x, const T *bias, T *out, unsign
     }
 }
 
-template <typename T>
-void bias_add_device(const Tensor &x, const Tensor &bias, Tensor &out) {
-    unsigned int x_rows = x.shape(0);
-    unsigned int x_cols = x.shape(1);
+template <>
+void bias_add<GPU>(const Tensor &x, const Tensor &bias, Tensor &out) {
+    size_t x_rows = x.shape(0);
+    size_t x_cols = x.shape(1);
 
-    kernel_bias_add_device<<<(x_rows * x_cols + BLK_SIZE - 1) / BLK_SIZE, BLK_SIZE>>>(x.get_ptr<T>(), bias.get_ptr<T>(),
-                                                                                      out.get_ptr<T>(), x_rows, x_cols);
+    FOR_ALL_DTYPES(out.dtype(), T, {
+        kernel_bias_add<<<(x_rows * x_cols + BLK_SIZE - 1) / BLK_SIZE, BLK_SIZE>>>(x.get_ptr<T>(), bias.get_ptr<T>(),
+                                                                                   out.get_ptr<T>(), x_rows, x_cols);
+    })
 }
-#define COMPILE_BIASADD_DEVICE(type) template void bias_add_device<type>(const Tensor&, const Tensor&, Tensor&);
-CALL_FOR_ALL_TYPES(COMPILE_BIASADD_DEVICE)
-#undef COMPILE_BIASADD_DEVICE
 
 }  // namespace math
 }  // namespace magmadnn