#22 add skeletons for Conv1D, Pooling1D and ZeroPadding1D implementat…

…ions
bhuman · May 23, 2024 · 79a51e6 · 79a51e6
1 parent a35fb5f
commit 79a51e6
Show file tree

Hide file tree

Showing 11 changed files with 430 additions and 19 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -34,6 +34,8 @@ add_library(CompiledNN
     Src/CompiledNN/CompiledNN/Operations/BatchNormalization.h
     Src/CompiledNN/CompiledNN/Operations/Concatenate.cpp
     Src/CompiledNN/CompiledNN/Operations/Concatenate.h
+    Src/CompiledNN/CompiledNN/Operations/Conv1D.cpp
+    Src/CompiledNN/CompiledNN/Operations/Conv1D.h
     Src/CompiledNN/CompiledNN/Operations/Conv2D.cpp
     Src/CompiledNN/CompiledNN/Operations/Conv2D.h
     Src/CompiledNN/CompiledNN/Operations/Cropping2D.cpp
@@ -46,6 +48,8 @@ add_library(CompiledNN
     Src/CompiledNN/CompiledNN/Operations/GlobalPooling2D.h
     Src/CompiledNN/CompiledNN/Operations/Im2Col2D.cpp
     Src/CompiledNN/CompiledNN/Operations/Im2Col2D.h
+    Src/CompiledNN/CompiledNN/Operations/Pooling1D.cpp
+    Src/CompiledNN/CompiledNN/Operations/Pooling1D.h
     Src/CompiledNN/CompiledNN/Operations/Pooling2D.cpp
     Src/CompiledNN/CompiledNN/Operations/Pooling2D.h
     Src/CompiledNN/CompiledNN/Operations/Softmax.cpp
@@ -54,6 +58,8 @@ add_library(CompiledNN
     Src/CompiledNN/CompiledNN/Operations/UInt8Input.h
     Src/CompiledNN/CompiledNN/Operations/UpSampling2D.cpp
     Src/CompiledNN/CompiledNN/Operations/UpSampling2D.h
+    Src/CompiledNN/CompiledNN/Operations/ZeroPadding1D.cpp
+    Src/CompiledNN/CompiledNN/Operations/ZeroPadding1D.h
     Src/CompiledNN/CompiledNN/Operations/ZeroPadding2D.cpp
     Src/CompiledNN/CompiledNN/Operations/ZeroPadding2D.h
     Src/CompiledNN/CompiledNN/Util/ExpApprox.cpp

diff --git a/Src/CompiledNN/CompiledNN.cpp b/Src/CompiledNN/CompiledNN.cpp
@@ -87,25 +87,40 @@ namespace NeuralNetwork
       return CompiledActivationFunctionId::linear;
     };
 
-    auto getPadding = [&compilers, &node, &settings](const std::array<unsigned int, 2>& kernelSize, const std::array<unsigned int, 2>& strides) -> OperationCompiler*
+    auto getPadding = [&compilers, &node, &settings](const std::vector<unsigned int>& kernelSize, const std::vector<unsigned int>& strides) -> OperationCompiler*
     {
       ASSERT(node.inputDimensions.size() == 1);
-      ASSERT(node.inputDimensions[0].size() == 3);
-      const unsigned int verticalRemainder = node.inputDimensions[0][0] % strides[0];
-      const unsigned int horizontalRemainder = node.inputDimensions[0][1] % strides[1];
-      const unsigned int verticalPadding = std::max<int>(0, static_cast<int>(kernelSize[0]) -
-                                                         (verticalRemainder ? verticalRemainder : strides[0]));
-      const unsigned int horizontalPadding = std::max<int>(0, static_cast<int>(kernelSize[1]) -
-                                                           (horizontalRemainder ? horizontalRemainder : strides[1]));
-      if(!verticalPadding && !horizontalPadding)
+      ASSERT(kernelSize.size() == strides.size());
+
+      std::vector<unsigned int> padding(kernelSize.size());
+      for(std::size_t i=0;i<padding.size();i++)
+      {
+        const unsigned int remainder = node.inputDimensions[0][i] % strides[i];
+        padding[i] = std::max<int>(0, static_cast<int>(kernelSize[i]) - (remainder ? remainder : strides[i]));
+      }
+
+      if(std::accumulate<>(padding.cbegin(), padding.cend(), 0u) == 0u)
         return nullptr;
 
-      ZeroPadding2DCompiler::Parameters p;
-      p.padding[ZeroPadding2DLayer::TOP] = verticalPadding / 2;
-      p.padding[ZeroPadding2DLayer::BOTTOM] = (verticalPadding + 1) / 2;
-      p.padding[ZeroPadding2DLayer::LEFT] = horizontalPadding / 2;
-      p.padding[ZeroPadding2DLayer::RIGHT] = (horizontalPadding + 1) / 2;
-      return getCompiler<ZeroPadding2DCompiler>(settings, p, compilers);
+      ASSERT(node.inputDimensions[0].size() == padding.size() + 1);
+      if(padding.size() == 1)
+      {
+        ZeroPadding1DCompiler::Parameters p;
+        p.padding[ZeroPadding1DLayer::LEFT] = padding[0] / 2;
+        p.padding[ZeroPadding1DLayer::RIGHT] = (padding[0] + 1) / 2;
+        return getCompiler<ZeroPadding1DCompiler>(settings, p, compilers);
+      }
+      else if(padding.size() == 2)
+      {
+        ZeroPadding2DCompiler::Parameters p;
+        p.padding[ZeroPadding2DLayer::TOP] = padding[0] / 2;
+        p.padding[ZeroPadding2DLayer::BOTTOM] = (padding[0] + 1) / 2;
+        p.padding[ZeroPadding2DLayer::LEFT] = padding[1] / 2;
+        p.padding[ZeroPadding2DLayer::RIGHT] = (padding[1] + 1) / 2;
+        return getCompiler<ZeroPadding2DCompiler>(settings, p, compilers);
+      }
+      FAIL("Padding not supported");
+      return nullptr;
     };
 
     std::vector<OperationCompiler*> result;
@@ -147,12 +162,32 @@ namespace NeuralNetwork
         break;
       case LayerType::reshape:
         break;
+      case LayerType::conv1D:
+      {
+        const Conv1DLayer& layer = *static_cast<const Conv1DLayer*>(node.layer);
+        if(layer.padding == PaddingType::same)
+        {
+          OperationCompiler* extPadding = getPadding({layer.weights.dims(0)}, {layer.stride});
+          if(extPadding)
+            result.push_back(extPadding);
+        }
+        Conv1DCompiler::Parameters p;
+        p.weights = &layer.weights;
+        p.biases = layer.hasBiases ? &layer.biases : nullptr;
+        p.stride = layer.stride;
+        OperationCompiler* extActivation;
+        p.postActivation = activationToCompiled(layer.activationId, extActivation);
+        result.push_back(getCompiler<Conv1DCompiler>(settings, p, compilers));
+        if(extActivation)
+          result.push_back(extActivation);
+        break;
+      }
       case LayerType::conv2D:
       {
         const Conv2DLayer& layer = *static_cast<const Conv2DLayer*>(node.layer);
         if(layer.padding == PaddingType::same)
         {
-          OperationCompiler* extPadding = getPadding({{layer.weights.dims(0), layer.weights.dims(1)}}, layer.strides);
+          OperationCompiler* extPadding = getPadding({{layer.weights.dims(0), layer.weights.dims(1)}}, {{layer.strides[0], layer.strides[1]}});
           if(extPadding)
             result.push_back(extPadding);
         }
@@ -172,7 +207,7 @@ namespace NeuralNetwork
         const SeparableConv2DLayer& layer = *static_cast<const SeparableConv2DLayer*>(node.layer);
         if(layer.padding == PaddingType::same)
         {
-          OperationCompiler* extPadding = getPadding({{layer.depthwiseWeights.dims(0), layer.depthwiseWeights.dims(1)}}, layer.strides);
+          OperationCompiler* extPadding = getPadding({{layer.depthwiseWeights.dims(0), layer.depthwiseWeights.dims(1)}}, {{layer.strides[0], layer.strides[1]}});
           if(extPadding)
             result.push_back(extPadding);
         }
@@ -198,7 +233,7 @@ namespace NeuralNetwork
         const DepthwiseConv2DLayer& layer = *static_cast<const DepthwiseConv2DLayer*>(node.layer);
         if(layer.padding == PaddingType::same)
         {
-          OperationCompiler* extPadding = getPadding({{layer.weights.dims(0), layer.weights.dims(1)}}, layer.strides);
+          OperationCompiler* extPadding = getPadding({{layer.weights.dims(0), layer.weights.dims(1)}}, {{layer.strides[0], layer.strides[1]}});
           if(extPadding)
             result.push_back(extPadding);
         }
@@ -231,6 +266,14 @@ namespace NeuralNetwork
         result.push_back(getCompiler<UpSampling2DCompiler>(settings, p, compilers));
         break;
       }
+      case LayerType::zeroPadding1D:
+      {
+        const ZeroPadding1DLayer& layer = *static_cast<const ZeroPadding1DLayer*>(node.layer);
+        ZeroPadding1DCompiler::Parameters p;
+        p.padding = layer.padding;
+        result.push_back(getCompiler<ZeroPadding1DCompiler>(settings, p, compilers));
+        break;
+      }
       case LayerType::zeroPadding2D:
       {
         const ZeroPadding2DLayer& layer = *static_cast<const ZeroPadding2DLayer*>(node.layer);
@@ -239,6 +282,18 @@ namespace NeuralNetwork
         result.push_back(getCompiler<ZeroPadding2DCompiler>(settings, p, compilers));
         break;
       }
+      case LayerType::maxPooling1D:
+      case LayerType::averagePooling1D:
+      {
+        const Pooling1DLayer& layer = *static_cast<const Pooling1DLayer*>(node.layer);
+        Pooling1DCompiler::Parameters p;
+        p.padding = layer.padding;
+        p.kernelSize = layer.kernelSize;
+        p.stride = layer.stride;
+        p.method = layer.method;
+        result.push_back(getCompiler<Pooling1DCompiler>(settings, p, compilers));
+        break;
+      }
       case LayerType::maxPooling2D:
       case LayerType::averagePooling2D:
       {
@@ -800,6 +855,16 @@ namespace NeuralNetwork
             nodeInputs[0].provider->compiler = getCompiler<DenseCompiler>(effSettings, p, compilers);
             continue;
           }
+          const Conv1DCompiler* conv1DCompiler = dynamic_cast<const Conv1DCompiler*>(nodeInputs[0].provider->compiler);
+          if(conv1DCompiler && !conv1DCompiler->p.batchNormalization && conv1DCompiler->p.postActivation.id == CompiledActivationFunctionId::linear && bnCompiler->p.dimension == 1)
+          {
+            --bnCompiler->refCount;
+            --conv1DCompiler->refCount;
+            Conv1DCompiler::Parameters p = conv1DCompiler->p;
+            p.batchNormalization = &bnCompiler->p;
+            nodeInputs[0].provider->compiler = getCompiler<Conv1DCompiler>(effSettings, p, compilers);
+            continue;
+          }
           const Conv2DCompiler* conv2DCompiler = dynamic_cast<const Conv2DCompiler*>(nodeInputs[0].provider->compiler);
           if(conv2DCompiler && !conv2DCompiler->p.batchNormalization && bnCompiler->p.dimension == 2)
           {
@@ -835,6 +900,16 @@ namespace NeuralNetwork
             nodeInputs[0].provider->compiler = getCompiler<DenseCompiler>(effSettings, p, compilers);
             continue;
           }
+          const Conv1DCompiler* conv1DCompiler = dynamic_cast<const Conv1DCompiler*>(nodeInputs[0].provider->compiler);
+          if(conv1DCompiler && conv1DCompiler->p.postActivation.id == CompiledActivationFunctionId::linear)
+          {
+            --activationCompiler->refCount;
+            --conv1DCompiler->refCount;
+            Conv1DCompiler::Parameters p = conv1DCompiler->p;
+            p.postActivation = activationCompiler->p.activationDesc;
+            nodeInputs[0].provider->compiler = getCompiler<Conv1DCompiler>(effSettings, p, compilers);
+            continue;
+          }
           const Conv2DCompiler* conv2DCompiler = dynamic_cast<const Conv2DCompiler*>(nodeInputs[0].provider->compiler);
           if(conv2DCompiler && conv2DCompiler->p.postActivation.id == CompiledActivationFunctionId::linear)
           {
@@ -976,4 +1051,3 @@ namespace NeuralNetwork
     compilerBackend(operations, compilers, inputLocations, outputLocations, settings);
   }
 }
-
diff --git a/Src/CompiledNN/CompiledNN/CompiledNNImpl.h b/Src/CompiledNN/CompiledNN/CompiledNNImpl.h
@@ -9,13 +9,16 @@
 #include "Operations/Arithmetic.h"
 #include "Operations/BatchNormalization.h"
 #include "Operations/Concatenate.h"
+#include "Operations/Conv1D.h"
 #include "Operations/Conv2D.h"
 #include "Operations/Cropping2D.h"
 #include "Operations/DConv2D.h"
 #include "Operations/Dense.h"
 #include "Operations/GlobalPooling2D.h"
+#include "Operations/Pooling1D.h"
 #include "Operations/Pooling2D.h"
 #include "Operations/Softmax.h"
 #include "Operations/UInt8Input.h"
 #include "Operations/UpSampling2D.h"
+#include "Operations/ZeroPadding1D.h"
 #include "Operations/ZeroPadding2D.h"
diff --git a/Src/CompiledNN/CompiledNN/Operations/Conv1D.cpp b/Src/CompiledNN/CompiledNN/Operations/Conv1D.cpp
@@ -0,0 +1,84 @@
+/**
+ * @author Felix Thielke
+ */
+
+#include "Conv1D.h"
+#include "Platform/BHAssert.h"
+
+namespace NeuralNetwork
+{
+  namespace CompiledNNImpl
+  {
+    void Conv1DCompiler::initialize()
+    {
+      // Declare constants
+      constants.clear();
+
+      // Store weights
+      constants.emplace_back();
+      NetworkConstants& weights = constants.back();
+      weights.data.clear();
+      ASSERT(p.weights->rank() == 3);
+      unsigned int outputBatchSize = 4 * (settings.xmmRegs() - std::max(2u, ActivationFunctionHandler::neededSpares(p.postActivation)));
+      for(unsigned int outputOffset = 0; outputOffset < p.weights->dims(2); outputOffset += outputBatchSize)
+      {
+        const unsigned int outputBatchEnd = std::min(outputOffset + outputBatchSize, p.weights->dims(2));
+
+        for(unsigned int input = 0; input < p.weights->dims(0) * p.weights->dims(1); input += 4)
+        {
+          const unsigned int remainingInputs = std::min(4u, p.weights->dims(0) * p.weights->dims(1) - input);
+
+          for(unsigned int shuffle = remainingInputs; shuffle; --shuffle)
+          {
+            for(unsigned int output = outputOffset; output < outputBatchEnd; output += 4)
+            {
+              const unsigned int remainingOutputs = std::min(4u, outputBatchEnd - output);
+
+              for(unsigned int i = 0; i < remainingOutputs; i++)
+              {
+                const float w = (*p.weights)[(input + ((remainingInputs - shuffle + i) % remainingInputs)) * p.weights->dims(2) + output + i];
+                if(p.batchNormalization)
+                  weights.data.emplace_back(w * (*p.batchNormalization->factor)[output + i]);
+                else
+                  weights.data.emplace_back(w);
+              }
+              for(unsigned int i = remainingOutputs; i < 4; i++)
+                weights.data.emplace_back(0.f);
+            }
+          }
+        }
+      }
+
+      // Store biases
+      if(p.biases || p.batchNormalization) {
+        constants.emplace_back();
+        NetworkConstants& biases = constants.back();
+        if(p.biases)
+          biases.data = *p.biases;
+        else
+          biases.data.resize(p.weights->dims(2), 0.f);
+        if(p.batchNormalization)
+        {
+          for(size_t i = 0; i < biases.data.size(); i++)
+            biases.data[i] = biases.data[i] * (*p.batchNormalization->factor)[i] + (*p.batchNormalization->offset)[i];
+        }
+      }
+    }
+
+    void Conv1DCompiler::compile(x86::Assembler& a, ActivationFunctionHandler&, const TensorPointerXf& input, const TensorPointerXf& output) const
+    {
+      ASSERT(input.rank() == 2);
+      ASSERT(output.rank() == 2);
+      ASSERT(input.dims(1) == p.weights->dims(1));
+      ASSERT(output.dims(1) == p.weights->dims(2));
+
+      //const NetworkConstants& weights = constants[0];
+
+      // Load input/output base addresses
+      a.mov(a.zsi(), imm(input.data()));
+      a.mov(a.zdi(), a.zsi());
+
+      FAIL("Not implemented");
+    }
+  }
+}
diff --git a/Src/CompiledNN/CompiledNN/Operations/Conv1D.h b/Src/CompiledNN/CompiledNN/Operations/Conv1D.h
@@ -0,0 +1,53 @@
+/**
+ * @author Felix Thielke
+ */
+
+#pragma once
+
+#include "../ActivationFunctions.h"
+#include "../CompiledNNImplBase.h"
+#include "BatchNormalization.h"
+
+namespace NeuralNetwork
+{
+  namespace CompiledNNImpl
+  {
+    struct Conv1DCompiler : public SISOOperationCompiler
+    {
+      struct Parameters final
+      {
+        const BatchNormalizationCompiler::Parameters* batchNormalization = nullptr;
+        const Tensor<float, 1>* weights;
+        const std::vector<float>* biases;
+        unsigned int stride;
+        ActivationFunctionDescriptor postActivation;
+
+        bool operator==(const Parameters& other) const
+        {
+          return batchNormalization == other.batchNormalization &&
+                 weights == other.weights &&
+                 biases == other.biases &&
+                 stride == other.stride &&
+                 postActivation == other.postActivation;
+        }
+      };
+      const Parameters p;
+
+      Conv1DCompiler(const CompilationSettings& settings, const Parameters& p) : SISOOperationCompiler(settings), p(p) {}
+
+      void initialize() override;
+      void compile(x86::Assembler& a, ActivationFunctionHandler& afHandler, const TensorPointerXf& input, const TensorPointerXf& output) const override;
+
+      inline bool canBeInplace() const override
+      {
+        return false;
+      }
+
+      inline std::vector<unsigned int> calcOutputDimensions(const std::vector<unsigned int>& inputDimensions) const override
+      {
+        ASSERT(inputDimensions.size() == 2);
+        return {{(inputDimensions[0] - p.weights->dims(0) + p.stride) / p.stride, p.weights->dims(2)}};
+      }
+    };
+  }
+}