From 01b60fa379f257d1c8bb6ae749f9077a1561013f Mon Sep 17 00:00:00 2001 From: snoopyisadog Date: Wed, 14 Apr 2021 03:52:03 +0200 Subject: [PATCH] quantizedConv computes on integer value --- src/caffe/layers/conv_layer.cpp | 68 ++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 27 deletions(-) diff --git a/src/caffe/layers/conv_layer.cpp b/src/caffe/layers/conv_layer.cpp index 782c2844..6f51249c 100644 --- a/src/caffe/layers/conv_layer.cpp +++ b/src/caffe/layers/conv_layer.cpp @@ -2,6 +2,8 @@ #include "caffe/layers/conv_layer.hpp" #include "caffe/util/math_functions.hpp" +#define W this->blobs_[0] +#define B this->blobs_[1] namespace caffe { @@ -52,34 +54,42 @@ void ConvolutionLayer::compute_output_shape() { template void ConvolutionLayer::Forward_cpu(const vector*>& bottom, const vector*>& top) { - // set up quantization parameters: scale + zero_point const Dtype input_scale = this->input_scale_; const Dtype output_scale = this->output_scale_; const Dtype weight_scale = this->weight_scale_; - const Dtype bias_scale = this->bias_scale_; + const Dtype bias_scale = this->bias_scale_; // bias_scale = input_scale * weight_scale const int input_zero_point = this->input_zero_point_; const int output_zero_point = this->output_zero_point_; const int weight_zero_point = this->weight_zero_point_; const int bias_zero_point = this->bias_zero_point_; const Dtype saturate = this->saturate_; - const bool quant_in = (input_scale != Dtype(1.0) || input_zero_point != 0); - const bool quant_out = (output_scale != Dtype(1.0) || output_zero_point != 0); - const bool quant_w = (weight_scale != Dtype(1.0) || weight_zero_point != 0); - const bool quant_b = (this->bias_term_&& (bias_scale != Dtype(1.0) || bias_zero_point != 0)); - if (quant_w) { - Dtype *qw = this->blobs_[0]->mutable_cpu_data(); - caffe_cpu_dequantize(this->blobs_[0]->count(), qw, weight_scale, weight_zero_point); + /*** Quantization Computation + (1) shift input/weight/bias w.r.t corresponding zero_point + (2) compute Convolution+Bias on the integer value range + (3) scale the output by input_scale*weight_scale/output_scale, and + (4) shift the output by output_zero_point + *Assumption is that bias_scale = input_scale*weight_scale + For a floating-value model, only (2) is computed with floating values + ***/ + const bool shift_input = (input_zero_point != 0); + const bool shift_weight = (weight_zero_point != 0); + const bool shift_bias = (bias_zero_point != 0); + const bool scale_output = (input_scale != Dtype(1.0) || weight_scale != Dtype(1.0) || + output_scale != Dtype(1.0)); + const bool shift_output = (output_zero_point != 0); + + if (shift_weight) { // shift the quantized weight + caffe_add_scalar(W->count(), Dtype(-weight_zero_point), W->mutable_cpu_data()); } - if (quant_b) { - Dtype *qb = this->blobs_[1]->mutable_cpu_data(); - caffe_cpu_dequantize(this->blobs_[1]->count(), qb, bias_scale, bias_zero_point); + if (shift_bias) { + caffe_add_scalar(B->count(), Dtype(-bias_zero_point), B->mutable_cpu_data()); } const Dtype* weight = this->blobs_[0]->cpu_data(); for (int i = 0; i < bottom.size(); ++i) { - if (quant_in) { - Dtype* qin = bottom[i]->mutable_cpu_data(); - caffe_cpu_dequantize(bottom[i]->count(), qin, input_scale, input_zero_point); + if (shift_input) { + caffe_add_scalar(bottom[i]->count(), + Dtype(-input_zero_point), bottom[i]->mutable_cpu_data()); } const Dtype* bottom_data = bottom[i]->cpu_data(); @@ -95,8 +105,14 @@ void ConvolutionLayer::Forward_cpu(const vector*>& bottom, } const int count_t = top[i]->count(); - if (quant_out) { - caffe_cpu_quantize(count_t, top_data, output_scale, output_zero_point); + if (scale_output) { + //Dtype out_scal = input_scale * weight_scale / output_scale; + Dtype out_scal = bias_scale / output_scale; + caffe_scal(count_t, out_scal, top_data); + caffe_cpu_round(count_t, top_data); + } + if (shift_output) { + caffe_add_scalar(count_t, Dtype(output_zero_point), top_data); } if (saturate == ConvolutionParameter_SaturateMethod_Signed) caffe_cpu_signed_saturate(count_t, top_data); @@ -107,19 +123,17 @@ void ConvolutionLayer::Forward_cpu(const vector*>& bottom, if (saturate == ConvolutionParameter_SaturateMethod_Unsigned_8bit) caffe_cpu_unsigned_8bit_saturate(count_t, top_data); - if (quant_in) { // restore the quantized input blob - Dtype* qin = bottom[i]->mutable_cpu_data(); - caffe_cpu_quantize(bottom[i]->count(), qin, input_scale, input_zero_point); + if (shift_input) { // shift the quantized input blob back to correct range + caffe_add_scalar(bottom[i]->count(), + Dtype(input_zero_point), bottom[i]->mutable_cpu_data()); } } - // restore quantized weight/bias - if (quant_w) { - Dtype *qw = this->blobs_[0]->mutable_cpu_data(); - caffe_cpu_quantize(this->blobs_[0]->count(), qw, weight_scale, weight_zero_point); + // shift quantized weight/bias back to correct range + if (shift_weight) { + caffe_add_scalar(W->count(), Dtype(weight_zero_point), W->mutable_cpu_data()); } - if (quant_b) { - Dtype *qb = this->blobs_[1]->mutable_cpu_data(); - caffe_cpu_quantize(this->blobs_[1]->count(), qb, bias_scale, bias_zero_point); + if (shift_bias) { + caffe_add_scalar(B->count(), Dtype(bias_zero_point), B->mutable_cpu_data()); } }