Merge pull request #43 from majianjia/dev

Add ZeroPadding & Cropping Layers
majianjia · Jun 7, 2019 · e04f20b · e04f20b
2 parents 2d6675f + c1a9979
commit e04f20b
Show file tree

Hide file tree

Showing 13 changed files with 347 additions and 40 deletions.
diff --git a/README.md b/README.md
@@ -81,6 +81,9 @@ Please check [examples](https://github.com/majianjia/nnom/tree/master/examples)
 | SoftMax|Beta | SoftMax()| Softmax only has layer API| 
 | Activation|Beta| Activation()|A layer instance for activation|
 | Input/Output |Beta | Input()/Output()| |
+| Up Sampling | Beta|UpSample()||
+| Zero Padding | Beta |ZeroPadding()||
+| Cropping | Beta |Cropping()||
 
 **RNN Layers**
 
@@ -90,7 +93,6 @@ Please check [examples](https://github.com/majianjia/nnom/tree/master/examples)
 | Simple RNN | Under Dev. | SimpleCell()| Under Developpment |
 | Gated Recurrent Network (GRU)| Under Dev. | GRUCell()| Under Developpment |
 
-
 **Activations**
 
 Activation can be used by itself as layer, or can be attached to the previous layer as ["actail"](docs/A_Temporary_Guide_to_NNoM.md#addictionlly-activation-apis) to reduce memory cost.
@@ -111,7 +113,6 @@ Activation can be used by itself as layer, or can be attached to the previous la
 | Global Max Pooling  | Beta|GlobalMaxPool()||
 | Global Average Pooling | Beta|GlobalAvgPool()||
 | Global Sum Pooling | Beta|GlobalSumPool()|A better alternative to Global average pooling in MCU before Softmax|
-| Up Sampling | Beta|UpSample()||
 
 **Matrix Operations Layers**
 
@@ -122,7 +123,6 @@ Activation can be used by itself as layer, or can be attached to the previous la
 | Addition  | Beta|Add()||
 | Substraction  | Beta|Sub()||
 
-
 ## Dependencies
 
 NNoM now use the local pure C backend implementation by default. Thus, there is no special dependency needed. 

diff --git a/docs/api_layers.md b/docs/api_layers.md
@@ -104,7 +104,6 @@ This funtion is for 1D or 2D, mutiple channels depthwise convolution.
 
 When it is used for 1D convolution, the H should be set to 1 constantly in kernel and stride.  
 
-
 ---
 
 ## Dense()
@@ -124,7 +123,6 @@ A fully connected layer. It will flatten the data if the last output is mutiple-
 
 - The layer instance
 
-
 ---
 
 ## UpSample()
@@ -137,21 +135,55 @@ A basic up sampling, using nearest interpolation
 
 **Arguments**
 
-- **kernel:** a shape subject return by `kernel()`, the interpolation size.
+- **kernel:** a shape object returned by `kernel()`, the interpolation size.
 
 **Return**
 
 - The layer instance
 
+---
+
+## ZeroPadding()
 
+~~~C
+nnom_layer_t *ZeroPadding(nnom_border_t pad);
+~~~
+
+Pad zeros to the image for each edge (top, bottom, left, right)
+
+**Arguments**
+
+- **pad:** a border object returned by `border()`, contains top, bottom, left and right padding.
+
+**Return**
+
+- The layer instance
+
+---
+
+## Cropping()
+
+~~~C
+nnom_layer_t *Cropping(nnom_border_t pad);
+~~~
+
+It crops along spatial dimensions.
+
+**Arguments**
+
+- **pad:** a border object returned by `border()`, contains top, bottom, left and right size.
+
+**Return**
+
+- The layer instance
 
 ---
 
 ## Lambda()
 
 ~~~C
-// Lambda Layers
-// layer.run()   , required
+
+// layer.run()   , compulsory
 // layer.oshape(), optional, call default_output_shape() if left NULL
 // layer.free()  , optional, called while model is deleting, to free private resources
 // parameters    , private parameters for run method, left NULL if not needed.
@@ -165,9 +197,9 @@ Lambda layer is an anonymous layer (interface), which allows user to do customiz
 
 **Arguments**
 
-- **(*run)(nnom_layer_t *):** or so called run method, is the method to do the customized operation. 
-- **(*oshape)(nnom_layer_t *):** is to calculate the output shape according to the input shape during compiling. If this method is not presented, the input shape will be passed to the output shape.   
-- **(*free)(nnom_layer_t *):** is to free the resources allocated by users. this will be called when deleting models. Leave it NULL if no resources need to be released. 
+- **`(*run)(nnom_layer_t *)`:** or so called run method, is the method to do the customized operation. 
+- **`(*oshape)(nnom_layer_t *)`:** is to calculate the output shape according to the input shape during compiling. If this method is not presented, the input shape will be passed to the output shape.   
+- **`(*free)(nnom_layer_t *)`:** is to free the resources allocated by the users. This method will be called when the model is deleting. Leave it NULL if no resources need to be released. 
 - **parameters:** is the pointer to user configurations. User can access to it in all three methods above.
 
 **Return**
@@ -176,6 +208,7 @@ Lambda layer is an anonymous layer (interface), which allows user to do customiz
 
 **Notes**
 
+- All methods with type `nnom_status_t` must return `NN_SUCCESS` to allow the inference process. Any return other than that will stop the inference of the model. 
 - When `oshape()` is presented, please refer to examples of other similar layers. The shape passing must be handle carefully.
 - This method is called in compiling, thus it can also do works other than calculating output shape only. An exmaple is the `global_pooling_output_shape()` fills in the parameters left by `GlobalXXXPool()`
 

diff --git a/docs/api_properties.md b/docs/api_properties.md
@@ -5,8 +5,11 @@ Properties include some basic properties such as shape of the data buffer, Q-for
 
 ---
 
-## typesdef
+## Typedef
+
 ~~~C
+#define nnom_shape_data_t uint16_t
+
 typedef struct _nnom_shape
 {
 	nnom_shape_data_t h, w, c;
@@ -29,6 +32,11 @@ typedef struct _nnom_qformat
 	int8_t n, m;
 } nnom_qformat_t;
 
+typedef struct _nnom_border_t
+{
+	nnom_shape_data_t top, bottom, left, right;
+} nnom_border_t;
+
 ~~~
 
 ---
@@ -46,7 +54,7 @@ nnom_shape_t shape(size_t h, size_t w, size_t c);
 **Arguments**
 
 - ** h:** size of H, or number of row, or y axis in image. 
-- ** w:** size of W, or number of row, or x axis in image.
+- ** w:** size of W, or number of column, or x axis in image.
 - ** c:** size of channel. 
 
 **Return**
@@ -66,7 +74,7 @@ Use in pooling or convolutional layer to specified the kernel size.
 **Arguments**
 
 - ** h:** size of kernel in H, or number of row, or y axis in image. 
-- ** w:** size of kernel in W, or number of row, or x axis in image.
+- ** w:** size of kernel in W, or number of column, or x axis in image.
 
 **Return**
 
@@ -85,7 +93,28 @@ Use in pooling or convolutional layer to specified the stride size.
 **Arguments**
 
 - ** h:** size of stride in H, or number of row, or y axis in image. 
-- ** w:** size of stride in  W, or number of row, or x axis in image.
+- ** w:** size of stride in  W, or number of column, or x axis in image.
+
+**Return**
+
+- A shape instance. 
+
+---
+
+## border() 
+
+~~~C
+nnom_border_t border(size_t top, size_t bottom, size_t left, size_t right);
+~~~
+
+It pack the 4 padding/cropping value to a border object. 
+
+**Arguments**
+
+- ** top:** the padding/cropping at the top edge of the image.
+- ** bottom:** the padding/cropping at the bottom edge of the image.
+- ** left:** the padding/cropping at the left edge of the image.
+- ** right:** the padding/cropping at the right edge of the image.
 
 **Return**
 

diff --git a/docs/index.md b/docs/index.md
@@ -104,12 +104,15 @@ Check [Porting and optimising Guide](Porting_and_Optimisation_Guide.md) for deta
 | Fully-connected | Beta| Dense()| |
 | Lambda |Alpha| Lambda() |single input / single output anonymous operation| 
 | Batch Normalization |Beta | N/A| This layer is merged to the last Conv by the script|
-| Input/Output |Beta | Input()/Output()| |
 | Flatten|Beta | Flatten()| |
 | SoftMax|Beta | SoftMax()| Softmax only has layer API| 
 | Activation|Beta| Activation()|A layer instance for activation|
+| Input/Output |Beta | Input()/Output()| |
+| Up Sampling | Beta|UpSample()||
+| Zero Padding | Beta |ZeroPadding()||
+| Cropping | Beta |Cropping()||
 
-** RNN Layers **
+**RNN Layers**
 
 | Layers | Status |Layer API|Comments|
 | ------ |-- |--|--|
@@ -137,7 +140,6 @@ Activation can be used by itself as layer, or can be attached to the previous la
 | Global Max Pooling  | Beta|GlobalMaxPool()||
 | Global Average Pooling | Beta|GlobalAvgPool()||
 | Global Sum Pooling | Beta|GlobalSumPool()|A better alternative to Global average pooling in MCU before Softmax|
-| Up Sampling | Beta|UpSample()||
 
 **Matrix Operations Layers**
 
@@ -147,6 +149,3 @@ Activation can be used by itself as layer, or can be attached to the previous la
 | Multiple  |Beta |Mult()||
 | Addition  | Beta|Add()||
 | Substraction  | Beta|Sub()||
-
-
-
diff --git a/inc/nnom.h b/inc/nnom.h
@@ -47,6 +47,8 @@ typedef enum
 	NNOM_DW_CONV_2D,
 	NNOM_BATCHNORM,
 	NNOM_DENSE,
+	NNOM_ZERO_PADDING,
+	NNOM_CROPPING,
 	NNOM_RNN,
 	NNOM_ACTIVATION,
 	NNOM_RELU,
@@ -79,6 +81,8 @@ typedef enum
 			"DW_Conv2D",    \
 			"BatchNorm",	\
 			"Dense",        \
+			"ZeroPad",	    \
+			"Cropping",     \
 			"RNN",          \
 			"Activation",   \
 			"ReLU",         \
@@ -134,11 +138,16 @@ typedef enum
 
 // basic types
 #define nnom_shape_data_t uint16_t
-typedef struct _nnom_shape
+typedef struct _nnom_shape_t
 {
 	nnom_shape_data_t h, w, c;
 } nnom_shape_t;
 
+typedef struct _nnom_border_t
+{
+	nnom_shape_data_t top, bottom, left, right;
+} nnom_border_t;
+
 // nnom_shape_axis_t type provide the axis[] format access to nnom_shape_t
 typedef union {
 	nnom_shape_t s;

diff --git a/inc/nnom_layers.h b/inc/nnom_layers.h
@@ -53,6 +53,16 @@ typedef struct _nnom_dense_layer_t
 
 } nnom_dense_layer_t;
 
+// zero padding
+typedef struct _nnom_zero_padding_layer_t
+{
+	nnom_layer_t super;
+	nnom_border_t pad;
+} nnom_zero_padding_layer_t;
+
+// Cropping, same as zeropadding
+typedef nnom_zero_padding_layer_t nnom_cropping_layer_t;
+
 // lambda layer
 typedef struct _nnom_lambda_layer_t
 {
@@ -158,6 +168,7 @@ typedef struct _nnom_concat_layer
 nnom_shape_t shape(size_t h, size_t w, size_t c);
 nnom_shape_t kernel(size_t h, size_t w);
 nnom_shape_t stride(size_t h, size_t w);
+nnom_border_t border(size_t top, size_t bottom, size_t left, size_t right);
 nnom_qformat_t qformat(int8_t m, int8_t n);
 size_t shape_size(nnom_shape_t *s);
 
@@ -180,7 +191,11 @@ nnom_layer_t *SumPool(nnom_shape_t k, nnom_shape_t s, nnom_padding_t pad);
 nnom_layer_t *GlobalMaxPool(void);
 nnom_layer_t *GlobalAvgPool(void);
 nnom_layer_t *GlobalSumPool(void);
-nnom_layer_t *UpSample(nnom_shape_t kernel);	// UpSampling, whcih is acturally the unpooling 
+
+// padding, cropping, upsample
+nnom_layer_t *UpSample(nnom_shape_t kernel);	
+nnom_layer_t *ZeroPadding(nnom_border_t pad);
+nnom_layer_t *Cropping(nnom_border_t pad);
 
 // Activation
 nnom_layer_t *Activation(nnom_activation_t *act);

diff --git a/inc/nnom_local.h b/inc/nnom_local.h
@@ -116,25 +116,24 @@ void local_up_sampling_q7_HWC(const q7_t *Im_in,       // input image
                           q7_t *bufferA,               // NULL
                           q7_t *Im_out);
 
-void local_convolve_HWC_q7_nonsquare(const q7_t * Im_in,  // input image
+void local_convolve_HWC_q7_nonsquare(const q7_t * Im_in,            // input image
                                        const uint16_t dim_im_in_x,  // input image dimention x
                                        const uint16_t dim_im_in_y,  // input image dimention y
-                                       const uint16_t ch_im_in, // number of input image channels
-                                       const q7_t * wt, // kernel weights 
+                                       const uint16_t ch_im_in,     // number of input image channels
+                                       const q7_t * wt,             // kernel weights 
                                        const uint16_t ch_im_out,    // number of filters, i.e., output image channels
                                        const uint16_t dim_kernel_x, // filter kernel size x
                                        const uint16_t dim_kernel_y, // filter kernel size y
                                        const uint16_t padding_x,    // padding sizes x
                                        const uint16_t padding_y,    // padding sizes y
-                                       const uint16_t stride_x, // stride x
-                                       const uint16_t stride_y, // stride y
-                                       const q7_t * bias,   // bias
+                                       const uint16_t stride_x,     // stride x
+                                       const uint16_t stride_y,     // stride y
+                                       const q7_t * bias,           // bias
                                        const uint16_t bias_shift, const uint16_t out_shift, q7_t * Im_out,  // output image
                                        const uint16_t dim_im_out_x, // output image dimension x
                                        const uint16_t dim_im_out_y, // output image dimension y
-                                       q15_t * bufferA, //buffer space for input
-                                       q7_t * bufferB);   //buffer space for output
-
+                                       q15_t * bufferA,             //buffer space for input
+                                       q7_t * bufferB);             //buffer space for output
 
 void local_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t * Im_in,  // input image
                                                        const uint16_t dim_im_in_x,  // input image dimention x
@@ -156,7 +155,30 @@ void local_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t * Im_in,  // inp
                                                        const uint16_t dim_im_out_y, // output image dimension y
                                                        q15_t * bufferA, //buffer space for input
                                                        q7_t * bufferB);   //buffer space for output
-
+
+void local_zero_padding_q7(const q7_t *Im_in,           // input image
+						 const uint16_t dim_im_in_x,    // input image dimention x
+						 const uint16_t dim_im_in_y,    // input image dimention y
+						 const uint16_t ch_im_in,       // number of input image channels
+						 const uint16_t padding_top,    // padding sizes y
+						 const uint16_t padding_bottom, // padding sizes y
+						 const uint16_t padding_left,   // padding sizes x
+						 const uint16_t padding_right,  // padding sizes x
+						 q7_t *Im_out,                  // output image
+						 const uint16_t dim_im_out_x,   // output image dimension x
+						 const uint16_t dim_im_out_y);  // output image dimension y 
+
+void local_cropping_q7(const q7_t *Im_in,           // input image
+						 const uint16_t dim_im_in_x,    // input image dimention x
+						 const uint16_t dim_im_in_y,    // input image dimention y
+						 const uint16_t ch_im_in,       // number of input image channels
+						 const uint16_t padding_top,    // padding sizes y
+						 const uint16_t padding_bottom, // padding sizes y
+						 const uint16_t padding_left,   // padding sizes x
+						 const uint16_t padding_right,  // padding sizes x
+						 q7_t *Im_out,                  // output image
+						 const uint16_t dim_im_out_x,   // output image dimension x
+						 const uint16_t dim_im_out_y);  // output image dimension y 
 
 void local_fully_connected_q7_opt(const q7_t * pV,    // pointer to vector
                                     const q7_t * pM,    // pointer to matrix

diff --git a/inc/nnom_out_shape.h b/inc/nnom_out_shape.h
@@ -32,6 +32,11 @@ nnom_status_t dw_conv2d_out_shape(nnom_layer_t *layer);
 nnom_status_t dense_out_shape(nnom_layer_t *layer);
 nnom_status_t rnn_out_shape(nnom_layer_t *layer);
 
+// padding, cropping, upsample
+nnom_status_t upsample_out_shape(nnom_layer_t *layer);
+nnom_status_t zero_padding_out_shape(nnom_layer_t* layer);
+nnom_status_t cropping_out_shape(nnom_layer_t* layer);
+
 // activation
 nnom_status_t relu_out_shape(nnom_layer_t *layer);
 nnom_status_t softmax_out_shape(nnom_layer_t *layer);
@@ -41,7 +46,6 @@ nnom_status_t maxpooling_out_shape(nnom_layer_t *layer);
 nnom_status_t avgpooling_out_shape(nnom_layer_t *layer);
 nnom_status_t sumpooling_out_shape(nnom_layer_t *layer);
 nnom_status_t global_pooling_out_shape(nnom_layer_t *layer);
-nnom_status_t upsample_out_shape(nnom_layer_t *layer);
 
 // utils
 nnom_status_t flatten_out_shape(nnom_layer_t *layer);