diff --git a/README.md b/README.md
index d792fca..0cd6362 100644
--- a/README.md
+++ b/README.md
@@ -81,6 +81,9 @@ Please check [examples](https://github.com/majianjia/nnom/tree/master/examples)
 | SoftMax|Beta | SoftMax()| Softmax only has layer API| 
 | Activation|Beta| Activation()|A layer instance for activation|
 | Input/Output |Beta | Input()/Output()| |
+| Up Sampling | Beta|UpSample()||
+| Zero Padding | Beta |ZeroPadding()||
+| Cropping | Beta |Cropping()||
 
 **RNN Layers**
 
@@ -90,7 +93,6 @@ Please check [examples](https://github.com/majianjia/nnom/tree/master/examples)
 | Simple RNN | Under Dev. | SimpleCell()| Under Developpment |
 | Gated Recurrent Network (GRU)| Under Dev. | GRUCell()| Under Developpment |
 
-
 **Activations**
 
 Activation can be used by itself as layer, or can be attached to the previous layer as ["actail"](docs/A_Temporary_Guide_to_NNoM.md#addictionlly-activation-apis) to reduce memory cost.
@@ -111,7 +113,6 @@ Activation can be used by itself as layer, or can be attached to the previous la
 | Global Max Pooling  | Beta|GlobalMaxPool()||
 | Global Average Pooling | Beta|GlobalAvgPool()||
 | Global Sum Pooling | Beta|GlobalSumPool()|A better alternative to Global average pooling in MCU before Softmax|
-| Up Sampling | Beta|UpSample()||
 
 **Matrix Operations Layers**
 
@@ -122,7 +123,6 @@ Activation can be used by itself as layer, or can be attached to the previous la
 | Addition  | Beta|Add()||
 | Substraction  | Beta|Sub()||
 
-
 ## Dependencies
 
 NNoM now use the local pure C backend implementation by default. Thus, there is no special dependency needed. 
diff --git a/docs/api_layers.md b/docs/api_layers.md
index 6d77fa5..f31bef3 100644
--- a/docs/api_layers.md
+++ b/docs/api_layers.md
@@ -104,7 +104,6 @@ This funtion is for 1D or 2D, mutiple channels depthwise convolution.
 
 When it is used for 1D convolution, the H should be set to 1 constantly in kernel and stride.  
 
-
 ---
 
 ## Dense()
@@ -124,7 +123,6 @@ A fully connected layer. It will flatten the data if the last output is mutiple-
 
 - The layer instance
 
-
 ---
 
 ## UpSample()
@@ -137,21 +135,55 @@ A basic up sampling, using nearest interpolation
 
 **Arguments**
 
-- **kernel:** a shape subject return by `kernel()`, the interpolation size.
+- **kernel:** a shape object returned by `kernel()`, the interpolation size.
 
 **Return**
 
 - The layer instance
 
+---
+
+## ZeroPadding()
 
+~~~C
+nnom_layer_t *ZeroPadding(nnom_border_t pad);
+~~~
+
+Pad zeros to the image for each edge (top, bottom, left, right)
+
+**Arguments**
+
+- **pad:** a border object returned by `border()`, contains top, bottom, left and right padding.
+
+**Return**
+
+- The layer instance
+
+---
+
+## Cropping()
+
+~~~C
+nnom_layer_t *Cropping(nnom_border_t pad);
+~~~
+
+It crops along spatial dimensions.
+
+**Arguments**
+
+- **pad:** a border object returned by `border()`, contains top, bottom, left and right size.
+
+**Return**
+
+- The layer instance
 
 ---
 
 ## Lambda()
 
 ~~~C
-// Lambda Layers
-// layer.run()   , required
+
+// layer.run()   , compulsory
 // layer.oshape(), optional, call default_output_shape() if left NULL
 // layer.free()  , optional, called while model is deleting, to free private resources
 // parameters    , private parameters for run method, left NULL if not needed.
@@ -165,9 +197,9 @@ Lambda layer is an anonymous layer (interface), which allows user to do customiz
 
 **Arguments**
 
-- **(*run)(nnom_layer_t *):** or so called run method, is the method to do the customized operation. 
-- **(*oshape)(nnom_layer_t *):** is to calculate the output shape according to the input shape during compiling. If this method is not presented, the input shape will be passed to the output shape.   
-- **(*free)(nnom_layer_t *):** is to free the resources allocated by users. this will be called when deleting models. Leave it NULL if no resources need to be released. 
+- **`(*run)(nnom_layer_t *)`:** or so called run method, is the method to do the customized operation. 
+- **`(*oshape)(nnom_layer_t *)`:** is to calculate the output shape according to the input shape during compiling. If this method is not presented, the input shape will be passed to the output shape.   
+- **`(*free)(nnom_layer_t *)`:** is to free the resources allocated by the users. This method will be called when the model is deleting. Leave it NULL if no resources need to be released. 
 - **parameters:** is the pointer to user configurations. User can access to it in all three methods above.
 
 **Return**
@@ -176,6 +208,7 @@ Lambda layer is an anonymous layer (interface), which allows user to do customiz
 
 **Notes**
 
+- All methods with type `nnom_status_t` must return `NN_SUCCESS` to allow the inference process. Any return other than that will stop the inference of the model. 
 - When `oshape()` is presented, please refer to examples of other similar layers. The shape passing must be handle carefully.
 - This method is called in compiling, thus it can also do works other than calculating output shape only. An exmaple is the `global_pooling_output_shape()` fills in the parameters left by `GlobalXXXPool()`
 
diff --git a/docs/api_properties.md b/docs/api_properties.md
index 1888ad4..cb4a2f5 100644
--- a/docs/api_properties.md
+++ b/docs/api_properties.md
@@ -5,8 +5,11 @@ Properties include some basic properties such as shape of the data buffer, Q-for
 
 ---
 
-## typesdef
+## Typedef
+
 ~~~C
+#define nnom_shape_data_t uint16_t
+
 typedef struct _nnom_shape
 {
 	nnom_shape_data_t h, w, c;
@@ -29,6 +32,11 @@ typedef struct _nnom_qformat
 	int8_t n, m;
 } nnom_qformat_t;
 
+typedef struct _nnom_border_t
+{
+	nnom_shape_data_t top, bottom, left, right;
+} nnom_border_t;
+
 ~~~
 
 ---
@@ -46,7 +54,7 @@ nnom_shape_t shape(size_t h, size_t w, size_t c);
 **Arguments**
 
 - ** h:** size of H, or number of row, or y axis in image. 
-- ** w:** size of W, or number of row, or x axis in image.
+- ** w:** size of W, or number of column, or x axis in image.
 - ** c:** size of channel. 
 
 **Return**
@@ -66,7 +74,7 @@ Use in pooling or convolutional layer to specified the kernel size.
 **Arguments**
 
 - ** h:** size of kernel in H, or number of row, or y axis in image. 
-- ** w:** size of kernel in W, or number of row, or x axis in image.
+- ** w:** size of kernel in W, or number of column, or x axis in image.
 
 **Return**
 
@@ -85,7 +93,28 @@ Use in pooling or convolutional layer to specified the stride size.
 **Arguments**
 
 - ** h:** size of stride in H, or number of row, or y axis in image. 
-- ** w:** size of stride in  W, or number of row, or x axis in image.
+- ** w:** size of stride in  W, or number of column, or x axis in image.
+
+**Return**
+
+- A shape instance. 
+
+---
+
+## border() 
+
+~~~C
+nnom_border_t border(size_t top, size_t bottom, size_t left, size_t right);
+~~~
+
+It pack the 4 padding/cropping value to a border object. 
+
+**Arguments**
+
+- ** top:** the padding/cropping at the top edge of the image.
+- ** bottom:** the padding/cropping at the bottom edge of the image.
+- ** left:** the padding/cropping at the left edge of the image.
+- ** right:** the padding/cropping at the right edge of the image.
 
 **Return**
 
diff --git a/docs/index.md b/docs/index.md
index 6dd7e3e..815322e 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -104,12 +104,15 @@ Check [Porting and optimising Guide](Porting_and_Optimisation_Guide.md) for deta
 | Fully-connected | Beta| Dense()| |
 | Lambda |Alpha| Lambda() |single input / single output anonymous operation| 
 | Batch Normalization |Beta | N/A| This layer is merged to the last Conv by the script|
-| Input/Output |Beta | Input()/Output()| |
 | Flatten|Beta | Flatten()| |
 | SoftMax|Beta | SoftMax()| Softmax only has layer API| 
 | Activation|Beta| Activation()|A layer instance for activation|
+| Input/Output |Beta | Input()/Output()| |
+| Up Sampling | Beta|UpSample()||
+| Zero Padding | Beta |ZeroPadding()||
+| Cropping | Beta |Cropping()||
 
-** RNN Layers **
+**RNN Layers**
 
 | Layers | Status |Layer API|Comments|
 | ------ |-- |--|--|
@@ -137,7 +140,6 @@ Activation can be used by itself as layer, or can be attached to the previous la
 | Global Max Pooling  | Beta|GlobalMaxPool()||
 | Global Average Pooling | Beta|GlobalAvgPool()||
 | Global Sum Pooling | Beta|GlobalSumPool()|A better alternative to Global average pooling in MCU before Softmax|
-| Up Sampling | Beta|UpSample()||
 
 **Matrix Operations Layers**
 
@@ -147,6 +149,3 @@ Activation can be used by itself as layer, or can be attached to the previous la
 | Multiple  |Beta |Mult()||
 | Addition  | Beta|Add()||
 | Substraction  | Beta|Sub()||
-
-
-
diff --git a/inc/nnom.h b/inc/nnom.h
index d4aa908..c2e9fd0 100644
--- a/inc/nnom.h
+++ b/inc/nnom.h
@@ -47,6 +47,8 @@ typedef enum
 	NNOM_DW_CONV_2D,
 	NNOM_BATCHNORM,
 	NNOM_DENSE,
+	NNOM_ZERO_PADDING,
+	NNOM_CROPPING,
 	NNOM_RNN,
 	NNOM_ACTIVATION,
 	NNOM_RELU,
@@ -79,6 +81,8 @@ typedef enum
 			"DW_Conv2D",    \
 			"BatchNorm",	\
 			"Dense",        \
+			"ZeroPad",	    \
+			"Cropping",     \
 			"RNN",          \
 			"Activation",   \
 			"ReLU",         \
@@ -134,11 +138,16 @@ typedef enum
 
 // basic types
 #define nnom_shape_data_t uint16_t
-typedef struct _nnom_shape
+typedef struct _nnom_shape_t
 {
 	nnom_shape_data_t h, w, c;
 } nnom_shape_t;
 
+typedef struct _nnom_border_t
+{
+	nnom_shape_data_t top, bottom, left, right;
+} nnom_border_t;
+
 // nnom_shape_axis_t type provide the axis[] format access to nnom_shape_t
 typedef union {
 	nnom_shape_t s;
diff --git a/inc/nnom_layers.h b/inc/nnom_layers.h
index f03d649..e95b79f 100644
--- a/inc/nnom_layers.h
+++ b/inc/nnom_layers.h
@@ -53,6 +53,16 @@ typedef struct _nnom_dense_layer_t
 
 } nnom_dense_layer_t;
 
+// zero padding
+typedef struct _nnom_zero_padding_layer_t
+{
+	nnom_layer_t super;
+	nnom_border_t pad;
+} nnom_zero_padding_layer_t;
+
+// Cropping, same as zeropadding
+typedef nnom_zero_padding_layer_t nnom_cropping_layer_t;
+
 // lambda layer
 typedef struct _nnom_lambda_layer_t
 {
@@ -158,6 +168,7 @@ typedef struct _nnom_concat_layer
 nnom_shape_t shape(size_t h, size_t w, size_t c);
 nnom_shape_t kernel(size_t h, size_t w);
 nnom_shape_t stride(size_t h, size_t w);
+nnom_border_t border(size_t top, size_t bottom, size_t left, size_t right);
 nnom_qformat_t qformat(int8_t m, int8_t n);
 size_t shape_size(nnom_shape_t *s);
 
@@ -180,7 +191,11 @@ nnom_layer_t *SumPool(nnom_shape_t k, nnom_shape_t s, nnom_padding_t pad);
 nnom_layer_t *GlobalMaxPool(void);
 nnom_layer_t *GlobalAvgPool(void);
 nnom_layer_t *GlobalSumPool(void);
-nnom_layer_t *UpSample(nnom_shape_t kernel);	// UpSampling, whcih is acturally the unpooling 
+
+// padding, cropping, upsample
+nnom_layer_t *UpSample(nnom_shape_t kernel);	
+nnom_layer_t *ZeroPadding(nnom_border_t pad);
+nnom_layer_t *Cropping(nnom_border_t pad);
 
 // Activation
 nnom_layer_t *Activation(nnom_activation_t *act);
diff --git a/inc/nnom_local.h b/inc/nnom_local.h
index 9815f62..8e4ca94 100644
--- a/inc/nnom_local.h
+++ b/inc/nnom_local.h
@@ -116,25 +116,24 @@ void local_up_sampling_q7_HWC(const q7_t *Im_in,       // input image
                           q7_t *bufferA,               // NULL
                           q7_t *Im_out);
 
-void local_convolve_HWC_q7_nonsquare(const q7_t * Im_in,  // input image
+void local_convolve_HWC_q7_nonsquare(const q7_t * Im_in,            // input image
                                        const uint16_t dim_im_in_x,  // input image dimention x
                                        const uint16_t dim_im_in_y,  // input image dimention y
-                                       const uint16_t ch_im_in, // number of input image channels
-                                       const q7_t * wt, // kernel weights 
+                                       const uint16_t ch_im_in,     // number of input image channels
+                                       const q7_t * wt,             // kernel weights 
                                        const uint16_t ch_im_out,    // number of filters, i.e., output image channels
                                        const uint16_t dim_kernel_x, // filter kernel size x
                                        const uint16_t dim_kernel_y, // filter kernel size y
                                        const uint16_t padding_x,    // padding sizes x
                                        const uint16_t padding_y,    // padding sizes y
-                                       const uint16_t stride_x, // stride x
-                                       const uint16_t stride_y, // stride y
-                                       const q7_t * bias,   // bias
+                                       const uint16_t stride_x,     // stride x
+                                       const uint16_t stride_y,     // stride y
+                                       const q7_t * bias,           // bias
                                        const uint16_t bias_shift, const uint16_t out_shift, q7_t * Im_out,  // output image
                                        const uint16_t dim_im_out_x, // output image dimension x
                                        const uint16_t dim_im_out_y, // output image dimension y
-                                       q15_t * bufferA, //buffer space for input
-                                       q7_t * bufferB);   //buffer space for output
-
+                                       q15_t * bufferA,             //buffer space for input
+                                       q7_t * bufferB);             //buffer space for output
 
 void local_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t * Im_in,  // input image
                                                        const uint16_t dim_im_in_x,  // input image dimention x
@@ -156,7 +155,30 @@ void local_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t * Im_in,  // inp
                                                        const uint16_t dim_im_out_y, // output image dimension y
                                                        q15_t * bufferA, //buffer space for input
                                                        q7_t * bufferB);   //buffer space for output
- 
+
+void local_zero_padding_q7(const q7_t *Im_in,           // input image
+						 const uint16_t dim_im_in_x,    // input image dimention x
+						 const uint16_t dim_im_in_y,    // input image dimention y
+						 const uint16_t ch_im_in,       // number of input image channels
+						 const uint16_t padding_top,    // padding sizes y
+						 const uint16_t padding_bottom, // padding sizes y
+						 const uint16_t padding_left,   // padding sizes x
+						 const uint16_t padding_right,  // padding sizes x
+						 q7_t *Im_out,                  // output image
+						 const uint16_t dim_im_out_x,   // output image dimension x
+						 const uint16_t dim_im_out_y);  // output image dimension y 
+						 
+void local_cropping_q7(const q7_t *Im_in,           // input image
+						 const uint16_t dim_im_in_x,    // input image dimention x
+						 const uint16_t dim_im_in_y,    // input image dimention y
+						 const uint16_t ch_im_in,       // number of input image channels
+						 const uint16_t padding_top,    // padding sizes y
+						 const uint16_t padding_bottom, // padding sizes y
+						 const uint16_t padding_left,   // padding sizes x
+						 const uint16_t padding_right,  // padding sizes x
+						 q7_t *Im_out,                  // output image
+						 const uint16_t dim_im_out_x,   // output image dimension x
+						 const uint16_t dim_im_out_y);  // output image dimension y 
 
 void local_fully_connected_q7_opt(const q7_t * pV,    // pointer to vector
                                     const q7_t * pM,    // pointer to matrix
diff --git a/inc/nnom_out_shape.h b/inc/nnom_out_shape.h
index 61c776d..68da92d 100644
--- a/inc/nnom_out_shape.h
+++ b/inc/nnom_out_shape.h
@@ -32,6 +32,11 @@ nnom_status_t dw_conv2d_out_shape(nnom_layer_t *layer);
 nnom_status_t dense_out_shape(nnom_layer_t *layer);
 nnom_status_t rnn_out_shape(nnom_layer_t *layer);
 
+// padding, cropping, upsample
+nnom_status_t upsample_out_shape(nnom_layer_t *layer);
+nnom_status_t zero_padding_out_shape(nnom_layer_t* layer);
+nnom_status_t cropping_out_shape(nnom_layer_t* layer);
+
 // activation
 nnom_status_t relu_out_shape(nnom_layer_t *layer);
 nnom_status_t softmax_out_shape(nnom_layer_t *layer);
@@ -41,7 +46,6 @@ nnom_status_t maxpooling_out_shape(nnom_layer_t *layer);
 nnom_status_t avgpooling_out_shape(nnom_layer_t *layer);
 nnom_status_t sumpooling_out_shape(nnom_layer_t *layer);
 nnom_status_t global_pooling_out_shape(nnom_layer_t *layer);
-nnom_status_t upsample_out_shape(nnom_layer_t *layer);
 
 // utils
 nnom_status_t flatten_out_shape(nnom_layer_t *layer);
diff --git a/inc/nnom_run.h b/inc/nnom_run.h
index 8bb1e48..544af18 100644
--- a/inc/nnom_run.h
+++ b/inc/nnom_run.h
@@ -29,13 +29,16 @@ nnom_status_t dense_run(nnom_layer_t *layer);
 nnom_status_t rnn_run(nnom_layer_t *layer);
 nnom_status_t cell_simple_rnn_run(nnom_layer_t *layer);
 
+nnom_status_t upsample_run(nnom_layer_t *layer);
+nnom_status_t zero_padding_run(nnom_layer_t * layer);
+nnom_status_t cropping_run(nnom_layer_t * layer);
+
 nnom_status_t activation_run(nnom_layer_t *layer);
 nnom_status_t softmax_run(nnom_layer_t *layer);
 
 nnom_status_t maxpool_run(nnom_layer_t *layer);
 nnom_status_t avgpool_run(nnom_layer_t *layer);
 nnom_status_t sumpool_run(nnom_layer_t *layer);
-nnom_status_t upsample_run(nnom_layer_t *layer);
 
 nnom_status_t concat_run(nnom_layer_t *layer);
 nnom_status_t add_run(nnom_layer_t *layer);
diff --git a/src/nnom_layers.c b/src/nnom_layers.c
index 59a529b..fdcf9da 100644
--- a/src/nnom_layers.c
+++ b/src/nnom_layers.c
@@ -43,6 +43,15 @@ nnom_shape_t stride(size_t h, size_t w)
 {
 	return shape(h, w, 1);
 }
+nnom_border_t border(size_t top, size_t bottom, size_t left, size_t right)
+{
+	nnom_border_t b;
+	b.top = top;
+	b.bottom = bottom;
+	b.left = left;
+	b.right = right;
+	return b;
+}
 nnom_qformat_t qformat(int8_t m, int8_t n)
 {
 	nnom_qformat_t fmt;
@@ -188,7 +197,55 @@ nnom_layer_t *Dense(size_t output_unit, const nnom_weight_t *w, const nnom_bias_
 	return (nnom_layer_t *)layer;
 }
 
-// up sampling or unpooling layer
+// Zero padding layer
+nnom_layer_t *ZeroPadding(nnom_border_t pad)
+{
+	nnom_zero_padding_layer_t *layer;
+	nnom_layer_io_t *in, *out;
+
+	// apply a block memory for all the sub handles.
+	size_t mem_size = sizeof(nnom_zero_padding_layer_t) + sizeof(nnom_layer_io_t) * 2;
+	layer = nnom_mem(mem_size);
+	if (layer == NULL)
+		return NULL;
+
+	// distribut the memory to sub handles.
+	in = (void *)((unsigned long)layer + sizeof(nnom_zero_padding_layer_t));
+	out = (void *)((unsigned long)in + sizeof(nnom_layer_io_t));
+
+	// set type in layer parent
+	layer->super.type = NNOM_ZERO_PADDING;
+	// set buf state
+	in->type = LAYER_BUF_TEMP;
+	out->type = LAYER_BUF_TEMP;
+	// put in & out on the layer.
+	layer->super.in = io_init(layer, in);
+	layer->super.out = io_init(layer, out);
+	// set run and outshape methods
+	layer->super.run = zero_padding_run;
+	layer->super.comp_out_shape = zero_padding_out_shape;
+
+	// set parameters
+	layer->pad = pad;
+	
+	return (nnom_layer_t*)layer;
+}
+
+// Cropping layer
+nnom_layer_t *Cropping(nnom_border_t pad)
+{
+	nnom_layer_t *layer;
+	// most setting are the same as zero padding
+	layer = ZeroPadding(pad);
+	
+	layer->type = NNOM_CROPPING;
+	layer->run = cropping_run;
+	layer->comp_out_shape = cropping_out_shape;
+
+	return layer;
+}
+
+// up sampling layer
 nnom_layer_t *UpSample(nnom_shape_t kernel)
 {
 	nnom_upsample_layer_t *layer;
@@ -222,8 +279,6 @@ nnom_layer_t *UpSample(nnom_shape_t kernel)
 	return (nnom_layer_t*)layer;
 }
 
-
-
 // Simple RNN
 // unit = output shape
 // type of activation
diff --git a/src/nnom_local.c b/src/nnom_local.c
index e353cda..29a4c93 100644
--- a/src/nnom_local.c
+++ b/src/nnom_local.c
@@ -330,6 +330,79 @@ void local_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t *Im_in,
     }
 }
 
+void local_zero_padding_q7(const q7_t *Im_in,           // input image
+						 const uint16_t dim_im_in_x,    // input image dimention x
+						 const uint16_t dim_im_in_y,    // input image dimention y
+						 const uint16_t ch_im_in,       // number of input image channels
+						 const uint16_t padding_top,    // padding sizes y
+						 const uint16_t padding_bottom, // padding sizes y
+						 const uint16_t padding_left,   // padding sizes x
+						 const uint16_t padding_right,  // padding sizes x
+						 q7_t *Im_out,                  // output image
+						 const uint16_t dim_im_out_x,   // output image dimension x
+						 const uint16_t dim_im_out_y)   // output image dimension y 
+{
+	int i, size;
+	q7_t * p_out = Im_out; 
+	
+	// top rows
+	size = dim_im_out_x*ch_im_in*padding_top;
+	memset(p_out, 0, size); 
+	p_out += size;
+	
+	// middle
+	for(i=0; i<dim_im_in_y; i++)
+	{
+		// left - set to 0
+		size = ch_im_in * padding_left;
+		memset(p_out, 0, size); 
+		p_out += size;
+		// data - copy a row
+		size = dim_im_in_x * ch_im_in;
+		memcpy(p_out, Im_in + i*size, size);
+		p_out += size;
+		// right - set to 0
+		size = ch_im_in * padding_right;
+		memset(p_out, 0, size); 
+		p_out += size;
+	}
+	// bottom rows
+	memset(p_out, 0, dim_im_out_x*ch_im_in*padding_bottom); 
+}
+
+void local_cropping_q7(const q7_t *Im_in,           // input image
+						 const uint16_t dim_im_in_x,    // input image dimention x
+						 const uint16_t dim_im_in_y,    // input image dimention y
+						 const uint16_t ch_im_in,       // number of input image channels
+						 const uint16_t padding_top,    // padding sizes y
+						 const uint16_t padding_bottom, // padding sizes y
+						 const uint16_t padding_left,   // padding sizes x
+						 const uint16_t padding_right,  // padding sizes x
+						 q7_t *Im_out,                  // output image
+						 const uint16_t dim_im_out_x,   // output image dimension x
+						 const uint16_t dim_im_out_y)   // output image dimension y 
+{
+	int i, row_size;
+	const q7_t * p_in = Im_in; 
+	
+	// top rows to ignore
+	p_in += dim_im_in_x*ch_im_in*padding_top;
+	
+	// middle
+	row_size = dim_im_out_x * ch_im_in;
+	for(i=0; i<dim_im_out_y; i++)
+	{
+		// left to ignore          
+		p_in += ch_im_in * padding_left;
+		// data - copy a row
+		memcpy(Im_out + i*row_size, p_in, row_size);
+		p_in += row_size;
+		// right to ingore
+		p_in += ch_im_in * padding_right;
+	}
+
+}
+
 void local_fully_connected_q7_opt(const q7_t *pV,               // pointer to vector
                                   const q7_t *pM,               // pointer to matrix
                                   const uint16_t dim_vec,       // length of the vector
@@ -612,6 +685,9 @@ void local_relu_q7(q7_t *data, uint32_t size)
     }
 }
 
+
+
+
 // matrix ops
 void local_mult_q7(q7_t *pSrcA,
                    q7_t *pSrcB,
@@ -632,7 +708,6 @@ void local_mult_q7(q7_t *pSrcA,
     }
 }
 
-// own
 void local_add_q7(q7_t *pSrcA,
                   q7_t *pSrcB,
                   q7_t *pDst,
@@ -652,7 +727,6 @@ void local_add_q7(q7_t *pSrcA,
     }
 }
 
-// own
 void local_sub_q7(q7_t *pSrcA,
                   q7_t *pSrcB,
                   q7_t *pDst,
diff --git a/src/nnom_out_shape.c b/src/nnom_out_shape.c
index 71094fc..d7eb39b 100644
--- a/src/nnom_out_shape.c
+++ b/src/nnom_out_shape.c
@@ -158,6 +158,38 @@ nnom_status_t dense_out_shape(nnom_layer_t *layer)
 	return NN_SUCCESS;
 }
 
+nnom_status_t zero_padding_out_shape(nnom_layer_t* layer)
+{
+	nnom_zero_padding_layer_t *cl = (nnom_zero_padding_layer_t *)layer;
+	
+	// get the last layer's output as input shape
+	layer->in->shape = layer->in->hook.io->shape;
+	// output shape
+	layer->out->shape.w = layer->in->shape.w + cl->pad.left + cl->pad.right;
+	layer->out->shape.h = layer->in->shape.h + cl->pad.top + cl->pad.bottom;
+	layer->out->shape.c = layer->in->shape.c;
+	return NN_SUCCESS;
+}
+
+nnom_status_t cropping_out_shape(nnom_layer_t* layer)
+{
+	nnom_cropping_layer_t *cl = (nnom_cropping_layer_t *)layer;
+	
+	// get the last layer's output as input shape
+	layer->in->shape = layer->in->hook.io->shape;
+	
+	// output shape
+	if(layer->in->shape.w <= (cl->pad.left + cl->pad.right) || 
+		layer->in->shape.h <= (cl->pad.top + cl->pad.bottom))
+		return NN_ARGUMENT_ERROR;
+	
+	layer->out->shape.w = layer->in->shape.w - (cl->pad.left + cl->pad.right);
+	layer->out->shape.h = layer->in->shape.h - (cl->pad.top + cl->pad.bottom);
+	layer->out->shape.c = layer->in->shape.c;
+	return NN_SUCCESS;
+}
+
+
 // the state buffer and computational buffer shape of the cell
 nnom_status_t simplecell_out_shape(nnom_layer_t* layer, nnom_rnn_cell_t* cell)
 {
diff --git a/src/nnom_run.c b/src/nnom_run.c
index 350fc60..5357a42 100644
--- a/src/nnom_run.c
+++ b/src/nnom_run.c
@@ -165,6 +165,38 @@ nnom_status_t conv2d_run(nnom_layer_t *layer)
 }
 
 
+nnom_status_t zero_padding_run(nnom_layer_t * layer)
+{
+	nnom_zero_padding_layer_t *cl = (nnom_zero_padding_layer_t*)layer;
+	
+	local_zero_padding_q7(layer->in->mem->blk, 
+						layer->in->shape.w, layer->in->shape.h, layer->in->shape.c,
+						cl->pad.top,
+						cl->pad.bottom,
+						cl->pad.left,
+						cl->pad.right,
+						layer->out->mem->blk,
+						layer->out->shape.w, layer->out->shape.h);
+
+	return NN_SUCCESS;
+}
+
+nnom_status_t cropping_run(nnom_layer_t * layer)
+{
+	nnom_cropping_layer_t *cl = (nnom_cropping_layer_t*)layer;
+	
+	local_cropping_q7(layer->in->mem->blk, 
+						layer->in->shape.w, layer->in->shape.h, layer->in->shape.c,
+						cl->pad.top,
+						cl->pad.bottom,
+						cl->pad.left,
+						cl->pad.right,
+						layer->out->mem->blk,
+						layer->out->shape.w, layer->out->shape.h);
+
+	return NN_SUCCESS;
+}
+
 nnom_status_t cell_simple_rnn_run(nnom_layer_t *layer)
 {
 	/*