Merge pull request #679 from clab/nodes-separate-final

Separate nodes into different files Former-commit-id: 9606e2a
clab · Jul 10, 2017 · 1241cfc · 1241cfc
2 parents 693c5e4 + 786d3a2
commit 1241cfc
Show file tree

Hide file tree

Showing 92 changed files with 5,270 additions and 4,430 deletions.
diff --git a/doc/source/code_style.rst b/doc/source/code_style.rst
@@ -8,7 +8,29 @@ Coding Tips
 One of the most common things that one will want to do to modify DyNet is to add a new operation
 to calculate a new function.
 You can find more information on how to do so at the end of the tutorial slides
-`here <http://phontron.com/slides/emnlp2016-dynet-tutorial-part1.pdf>`_.
+`here <http://phontron.com/slides/emnlp2016-dynet-tutorial-part1.pdf>`_ (note that some file
+names are old).
+
+Taking a look at the existing operations in the ``nodes-XXX.h`` and ``nodes-XXX.cc`` files
+will be the best guide in creating new operations. Here are some fine-grained tips for
+those that want to dive into the process.
+
+1. ``fx`` is a pointer to the (preallocated) location for the result
+   of forward to be stored
+2. ``fx`` is not initialized, so after calling forward ``fx`` must contain the correct answer
+3. dEdxi MUST **ACCUMULATE** a result since multiple calls to forward may depend on
+   the same ``x_i``. Even, e.g., Identity must be implemented as ``dEdx1 += dEdf``.
+4. scalars results of forward are placed in ``fx.v[0]``
+5. DyNet manages its own memory, not Eigen, and it is configured with the
+   EIGEN_NO_MALLOC option. If you get an error about Eigen attempting to allocate
+   memory, it is (probably) because of an implicit creation of a temporary variable.
+   If you really do need a temporary variable, its capacity must be requested by
+   Node::aux_storage_size
+
+And here are some notes on debugging problems with new operations
+
+1. fx is uninitialized when forward is called- are you relying on it being 0?
+2. dEdxi must accumulate (see point 3 above!)
 
 Coding Practices
 ----------------

diff --git a/dynet/CMakeLists.txt b/dynet/CMakeLists.txt
@@ -3,11 +3,11 @@
 set(dynet_library_SRCS
     aligned-mem-pool.cc
     cfsm-builder.cc
-    dynet.cc
     deep-lstm.cc
     devices.cc
     dict.cc
     dim.cc
+    dynet.cc
     exec.cc
     expr.cc
     fast-lstm.cc
@@ -17,34 +17,51 @@ set(dynet_library_SRCS
     gru.cc
     hsm-builder.cc
     init.cc
+    io.cc
     lstm.cc
     mem.cc
     model.cc
-    nodes.cc
-    nodes-common.cc
+    nodes-activations.cc
+    nodes-affinetransform.cc
+    nodes-arith-const.cc
+    nodes-arith-cwise.cc
+    nodes-arith-scalar.cc
+    nodes-arith-sum.cc
+    nodes-arith-unary.cc
+    nodes-concat.cc
+    nodes-const.cc
     nodes-contract.cc
     nodes-conv.cc
     nodes-conv2d.cc
+    nodes-dropout.cc
+    nodes-flow.cc
+    nodes-hinge.cc
+    nodes-linalg.cc
+    nodes-logsumexp.cc
+    nodes-losses.cc
+    nodes-matrixmultiply.cc
     nodes-maxpooling2d.cc
+    nodes-minmax.cc
+    nodes-moments.cc
+    nodes-normalization.cc
+    nodes-norms.cc
     nodes-pickneglogsoftmax.cc
-    nodes-matrixmultiply.cc
-    nodes-hinge.cc
-    nodes-affinetransform.cc
+    nodes-random.cc
+    nodes-select.cc
     nodes-similarities.cc
-    nodes-norms.cc
-    nodes-unary-arith.cc
+    nodes-softmaxes.cc
+    nodes-trig.cc
     param-init.cc
     param-nodes.cc
     pretrain.cc
-    rnn.cc
     rnn-state-machine.cc
+    rnn.cc
     saxe-init.cc
     shadow-params.cc
     tensor.cc
     training.cc
     treelstm.cc
     weight-decay.cc
-    io.cc
 )
 if(ENABLE_BOOST)
   list(APPEND dynet_library_SRCS mp.cc)
@@ -53,51 +70,99 @@ endif()
 # Headers:
 set(dynet_library_HDRS
     aligned-mem-pool.h
-    cfsm-builder.h
-    cudnn-ops.h
     c2w.h
-    dynet.h
+    cfsm-builder.h
+    cuda-matrix-multiply.h
     cuda.h
+    cudnn-ops.h
+    deep-lstm.h
     devices.h
     dict.h
     dim.h
+    dynet-helper.h
+    dynet.h
+    except.h
     exec.h
     expr.h
     fast-lstm.h
     functors.h
     globals.h
     gpu-kernels.h
     gpu-ops.h
+    grad-check.h
     graph.h
     gru.h
     hsm-builder.h
     init.h
+    io.h
     lstm.h
     mem.h
     model.h
-    nodes.h
     nodes-contract.h
     nodes-conv.h
+    nodes-macros.h
+    nodes.h
     op-helper.h
+    param-init.h
     param-nodes.h
+    pretrain.h
     rnn-state-machine.h
     rnn.h
     saxe-init.h
     shadow-params.h
+    sig.h
     simd-functors.h
+    str-util.h
     tensor.h
     timing.h
     training.h
     treelstm.h
-    except.h
-    nodes-macros.h
     weight-decay.h
-    io.h
 )
 if(ENABLE_BOOST)
   list(APPEND dynet_library_HDRS mp.h)
 endif()
 
+set(dynet_gpu_SRCS
+    cuda.cc
+    cudnn-ops.cu
+    gpu-ops.cu 
+    gpu-nodes-activations.cu
+    gpu-nodes-affinetransform.cu
+    gpu-nodes-arith-const.cu
+    gpu-nodes-arith-cwise.cu
+    gpu-nodes-arith-scalar.cu
+    gpu-nodes-arith-sum.cu
+    gpu-nodes-arith-unary.cu
+    gpu-nodes-concat.cu
+    gpu-nodes-const.cu
+    gpu-nodes-contract.cu
+    gpu-nodes-conv2d.cu
+    gpu-nodes-conv.cu
+    gpu-nodes-dropout.cu
+    gpu-nodes-flow.cu
+    gpu-nodes-hinge.cu
+    gpu-nodes-linalg.cu
+    gpu-nodes-logsumexp.cu
+    gpu-nodes-losses.cu
+    gpu-nodes-matrixmultiply.cu
+    gpu-nodes-maxpooling2d.cu
+    gpu-nodes-minmax.cu
+    gpu-nodes-moments.cu
+    gpu-nodes-normalization.cu
+    gpu-nodes-norms.cu
+    gpu-nodes-pickneglogsoftmax.cu
+    gpu-nodes-random.cu
+    gpu-nodes-select.cu
+    gpu-nodes-similarities.cu
+    gpu-nodes-softmaxes.cu
+    gpu-nodes-trig.cu
+    gpu-param-nodes.cu
+    gpu-tensor.cu
+    gpu-training.cu
+    gpu-model.cu
+)
+
 file(GLOB TEST_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} tests/*.cc)
 if (NOT MSVC)
   set(BUILD_SHARED_LIBS ON)
@@ -150,10 +215,10 @@ if(WITH_CUDA_BACKEND)
     list(APPEND CUDA_NVCC_FLAGS_DEBUG "--compiler-options \"/MDd\"")
     list(APPEND CUDA_NVCC_FLAGS_RELEASE "--compiler-options \"/MD\"")
     SET(CUDA_PROPAGATE_HOST_FLAGS OFF)
-    cuda_add_library(gdynet ${dynet_library_SRCS} ${dynet_library_HDRS} cuda.cc cudnn-ops.cu gpu-ops.cu gpu-nodes.cu gpu-nodes-contract.cu gpu-nodes-conv.cu gpu-nodes-conv2d.cu gpu-nodes-maxpooling2d.cu gpu-param-nodes.cu gpu-tensor.cu gpu-training.cu gpu-model.cu gpu-nodes-pickneglogsoftmax.cu gpu-nodes-matrixmultiply.cu gpu-nodes-hinge.cu gpu-nodes-affinetransform.cu gpu-nodes-similarities.cu gpu-nodes-norms.cu gpu-nodes-unary-arith.cu)
+    cuda_add_library(gdynet ${dynet_library_SRCS} ${dynet_library_HDRS} ${dynet_gpu_SRCS})
   else()
     SET(CUDA_PROPAGATE_HOST_FLAGS OFF)
-    cuda_add_library(gdynet ${dynet_library_SRCS} ${dynet_library_HDRS} cuda.cc cudnn-ops.cu gpu-ops.cu gpu-nodes.cu gpu-nodes-contract.cu gpu-nodes-conv.cu gpu-nodes-conv2d.cu gpu-nodes-maxpooling2d.cu gpu-param-nodes.cu gpu-tensor.cu gpu-training.cu gpu-model.cu gpu-nodes-pickneglogsoftmax.cu gpu-nodes-matrixmultiply.cu gpu-nodes-hinge.cu gpu-nodes-affinetransform.cu gpu-nodes-similarities.cu gpu-nodes-norms.cu gpu-nodes-unary-arith.cu OPTIONS --compiler-options "-fPIC")
+    cuda_add_library(gdynet ${dynet_library_SRCS} ${dynet_library_HDRS} ${dynet_gpu_SRCS} OPTIONS --compiler-options "-fPIC")
   endif()
   set_target_properties(gdynet PROPERTIES
                         COMPILE_DEFINITIONS HAVE_CUDA)
@@ -172,4 +237,3 @@ if(WITH_CUDA_BACKEND)
 endif(WITH_CUDA_BACKEND)
 
 # target_compile_features(dynet PRIVATE cxx_range_for)
-
diff --git a/dynet/dynet.cc b/dynet/dynet.cc
@@ -1,7 +1,6 @@
 #include "dynet/dynet.h"
 
 #include "dynet/exec.h"
-#include "dynet/nodes.h"
 #include "dynet/param-nodes.h"
 #include "dynet/aligned-mem-pool.h"
 #include "dynet/dynet-helper.h"

diff --git a/dynet/gpu-nodes-activations.cu b/dynet/gpu-nodes-activations.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-activations.cc"
diff --git a/dynet/gpu-nodes-arith-const.cu b/dynet/gpu-nodes-arith-const.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-arith-const.cc"
diff --git a/dynet/gpu-nodes-arith-cwise.cu b/dynet/gpu-nodes-arith-cwise.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-arith-cwise.cc"
diff --git a/dynet/gpu-nodes-arith-scalar.cu b/dynet/gpu-nodes-arith-scalar.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-arith-scalar.cc"
diff --git a/dynet/gpu-nodes-arith-sum.cu b/dynet/gpu-nodes-arith-sum.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-arith-sum.cc"
diff --git a/dynet/gpu-nodes-arith-unary.cu b/dynet/gpu-nodes-arith-unary.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-arith-unary.cc"
diff --git a/dynet/gpu-nodes-concat.cu b/dynet/gpu-nodes-concat.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-concat.cc"
diff --git a/dynet/gpu-nodes-const.cu b/dynet/gpu-nodes-const.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-const.cc"
diff --git a/dynet/gpu-nodes-conv.cu b/dynet/gpu-nodes-conv.cu
@@ -1,3 +1,3 @@
-// This is a dummy file that contains the same content as nodes-conv.cc but compiled
+// This is a dummy file that contains the same content as nodes.cc but compiled
 // on CUDA
 #include "nodes-conv.cc"
diff --git a/dynet/gpu-nodes-conv2d.cu b/dynet/gpu-nodes-conv2d.cu
@@ -1 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
 #include "nodes-conv2d.cc"
diff --git a/dynet/gpu-nodes-dropout.cu b/dynet/gpu-nodes-dropout.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-dropout.cc"
diff --git a/dynet/gpu-nodes-flow.cu b/dynet/gpu-nodes-flow.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-flow.cc"
diff --git a/dynet/gpu-nodes-linalg.cu b/dynet/gpu-nodes-linalg.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-linalg.cc"
diff --git a/dynet/gpu-nodes-logsumexp.cu b/dynet/gpu-nodes-logsumexp.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-logsumexp.cc"
diff --git a/dynet/gpu-nodes-losses.cu b/dynet/gpu-nodes-losses.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-losses.cc"
diff --git a/dynet/gpu-nodes-maxpooling2d.cu b/dynet/gpu-nodes-maxpooling2d.cu
@@ -1 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
 #include "nodes-maxpooling2d.cc"
diff --git a/dynet/gpu-nodes-minmax.cu b/dynet/gpu-nodes-minmax.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-minmax.cc"
diff --git a/dynet/gpu-nodes-moments.cu b/dynet/gpu-nodes-moments.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-moments.cc"
diff --git a/dynet/gpu-nodes-normalization.cu b/dynet/gpu-nodes-normalization.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-normalization.cc"
diff --git a/dynet/gpu-nodes-norms.cu b/dynet/gpu-nodes-norms.cu
@@ -1,3 +1,3 @@
-// This is a dummy file that contains the same content as nodes-norms.cc but compiled
+// This is a dummy file that contains the same content as nodes.cc but compiled
 // on CUDA
 #include "nodes-norms.cc"
diff --git a/dynet/gpu-nodes-random.cu b/dynet/gpu-nodes-random.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-random.cc"
diff --git a/dynet/gpu-nodes-select.cu b/dynet/gpu-nodes-select.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-select.cc"
diff --git a/dynet/gpu-nodes-similarities.cu b/dynet/gpu-nodes-similarities.cu
@@ -1,3 +1,3 @@
-// This is a dummy file that contains the same content as nodes-similarities.cc but compiled
+// This is a dummy file that contains the same content as nodes.cc but compiled
 // on CUDA
 #include "nodes-similarities.cc"
diff --git a/dynet/gpu-nodes-softmaxes.cu b/dynet/gpu-nodes-softmaxes.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-softmaxes.cc"
diff --git a/dynet/gpu-nodes-trig.cu b/dynet/gpu-nodes-trig.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-trig.cc"
diff --git a/dynet/gpu-nodes-unary-arith.cu b/dynet/gpu-nodes-unary-arith.cu