Merge remote-tracking branch 'upstream/develop' into bump_scipy

openvinotoolkit · Sep 12, 2023 · 15d446d · 15d446d
2 parents f71dd3c + 8400793
commit 15d446d
Show file tree

Hide file tree

Showing 34 changed files with 1,338 additions and 458 deletions.
diff --git a/Makefile b/Makefile
@@ -10,6 +10,10 @@ ifdef DATA
 	DATA_ARG := --data $(DATA)
 endif
 
+ifdef WEEKLY_MODELS
+	WEEKLY_MODELS_ARG := --weekly-models $(WEEKLY_MODELS)
+endif
+
 install-pre-commit:
 	pip install pre-commit==3.2.2
 
@@ -124,7 +128,13 @@ install-torch-dev: install-torch-test install-pre-commit install-pylint
 	pip install -r examples/post_training_quantization/torch/ssd300_vgg16/requirements.txt
 
 test-torch:
-	pytest ${COVERAGE_ARGS} tests/common tests/torch --junitxml ${JUNITXML_PATH} $(DATA_ARG)
+	pytest ${COVERAGE_ARGS} tests/common tests/torch -m "not weekly and not nightly" --junitxml ${JUNITXML_PATH} $(DATA_ARG)
+
+test-torch-nightly:
+	pytest ${COVERAGE_ARGS} tests/torch -m nightly --junitxml ${JUNITXML_PATH} $(DATA_ARG)
+
+test-torch-weekly:
+	pytest ${COVERAGE_ARGS} tests/torch -m weekly --junitxml ${JUNITXML_PATH} $(DATA_ARG) ${WEEKLY_MODELS_ARG}
 
 COMMON_PYFILES := $(shell python3 tools/collect_pylint_input_files_for_backend.py common)
 pylint-torch:

diff --git a/README.md b/README.md
@@ -280,13 +280,15 @@ A collection of ready-to-run Jupyter* notebooks are available to demonstrate how
 - [NNCF Post-Training Optimization of Segment Anything Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/237-segment-anything)
 - [NNCF Post-Training Optimization of CLIP Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/228-clip-zero-shot-image-classification)
 - [NNCF Post-Training Optimization of ImageBind Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/239-image-bind)
+- [NNCF Post-Training Optimization of Whisper Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/227-whisper-subtitles-generation)
 - [Quantize a Segmentation Model and Show Live Inference](https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/110-ct-segmentation-quantize)
 - [Training to Deployment with TensorFlow and OpenVINO](https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/301-tensorflow-training-openvino)
 - [Migrate quantization from POT API to NNCF API](https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/111-yolov5-quantization-migration)
 - [Post-Training Quantization of Pytorch model with NNCF](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/112-pytorch-post-training-quantization-nncf)
 - [Optimizing PyTorch models with NNCF of OpenVINO by 8-bit quantization](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/302-pytorch-quantization-aware-training)
 - [Optimizing TensorFlow models with NNCF of OpenVINO by 8-bit quantization](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/305-tensorflow-quantization-aware-training)
 - [Accelerate Inference of Sparse Transformer Models with OpenVINO and 4th Gen Intel Xeon Scalable Processors](https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/116-sparsity-optimization)
+- [Quantization with accuracy control using NNCF](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/122-quantizing-model-with-accuracy-control)
 
 ### Post-Training Quantization Samples
 

diff --git a/ReleaseNotes.md b/ReleaseNotes.md
@@ -1,5 +1,65 @@
 # Release Notes
 
+## New in Release 2.6.0
+
+Post-training Quantization:
+
+- Features:
+  - Added `CPU_SPR` device type support.
+  - Added quantizers scales unification.
+  - Added quantization scheme for ReduceSum operation.
+  - Added new types (ReduceL2, ReduceSum, Maximum) to the ignored scope for `ModelType.Transformer`.
+  - (OpenVINO) Added SmoothQuant algorithm.
+  - (OpenVINO) Added ChannelAlignment algorithm.
+  - (OpenVINO) Added HyperparameterTuner algorithm.
+  - (PyTorch) Added FastBiasCorrection algorithm support.
+  - (OpenVINO, ONNX) Added embedding weights quantization.
+  - (OpenVINO, PyTorch) Added new `compress_weights` method that provides data-free [INT8 weights compression](docs/compression_algorithms/CompressWeights.md).
+- Fixes:
+  - Fixed detection of decomposed post-processing in models.
+  - Multiple fixes (new patterns, bugfixes, etc.) to solve [#1936](https://github.com/openvinotoolkit/nncf/issues/1936) issue.
+  - Fixed model reshaping while quantization to keep original model shape.
+  - (OpenVINO) Added support for sequential models quanitzation.
+  - (OpenVINO) Fixed in-place statistics cast to support empty dimensions.
+  - (OpenVINO, ONNX) Fixed quantization of the MatMul operation with weights rank > 2.
+  - (OpenVINO, ONNX) Fixed BiasCorrection algorithm to enable [CLIP model quantization](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/228-clip-zero-shot-image-classification).
+- Improvements:
+  - Optimized `quantize(…)` pipeline (up to 4.3x speed up in total).
+  - Optimized `quantize_with_accuracy_control(…)` pipelilne (up to 8x speed up for [122-quantizing-model-with-accuracy-control](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/122-quantizing-model-with-accuracy-control) notebook).
+  - Optimized general statistics collection (up to 1.2x speed up for ONNX backend).
+  - Ignored patterns separated from Fused patterns scheme (with multiple patterns addition).
+- Tutorials:
+  - [Post-Training Optimization of Segment Anything Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/237-segment-anything).
+  - [Post-Training Optimization of CLIP Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/228-clip-zero-shot-image-classification).
+  - [Post-Training Optimization of ImageBind Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/239-image-bind).
+  - [Post-Training Optimization of Whisper Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/227-whisper-subtitles-generation).
+  - [Post-Training Optimization with accuracy control](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/122-quantizing-model-with-accuracy-control).
+
+Compression-aware training:
+
+- Features:
+  - Added shape pruning processor for BootstrapNAS algorithm.
+  - Added KD loss for BootstrapNAS algorithm.
+  - Added `validate_scopes` parameter for NNCF configuration.
+  - (PyTorch) Added PyTorch 2.0 support.
+  - (PyTorch) Added `.strip()` option to API.
+  - (PyTorch) Enabled bfloat data type for quantization kernels.
+  - (PyTorch) Quantized models can now be `torch.jit.trace`d without calling `.strip()`.
+  - (PyTorch) Added support for overridden `forward` instance attribute on model objects passed into `create_compressed_model`.
+  - (Tensorflow) Added Tensorflow 2.12 support.
+- Fixes:
+  - (PyTorch) Fixed padding adjustment issue in the elastic kernel to work with the different active kernel sizes.
+  - (PyTorch) Fixed the torch graph tracing in the case the tensors belonging to parallel edges are interleaved in the order of the tensor argument.
+  - (PyTorch) Fixed recurrent nodes matching (LSTM, GRU cells) condition with the strict rule to avoid adding not necessary nodes to the ignored scope.
+  - (PyTorch) Fixed `torch.jit.script` wrapper so that user-side handling exceptions during `torch.jit.script` invocation do not cause NNCF to be permanently disabled.
+  - (PyTorch, Tensorflow) Adjusted quantizer propagation algorithm to check if quantizer propagation will result in output quantization.
+  - (PyTorch) Added redefined `__class__` method for ProxyModule that avoids causing error while calling `.super()` in forward method.
+- Deprecations/Removals:
+  - (PyTorch) Removed deprecated `NNCFNetwork.__getattr__`, `NNCFNetwork.get_nncf_wrapped_model` methods.
+- Requirements:
+  - Updated PyTorch version (2.0.1).
+  - Updated Tensorflow version (2.12.0).
+
 ## New in Release 2.5.0
 
 Post-training Quantization:

diff --git a/nncf/common/hardware/configs/cpu.json b/nncf/common/hardware/configs/cpu.json
@@ -258,6 +258,12 @@
                 "activations": "q8_a"
             }
         },
+        {
+            "type": "GroupNormalization",
+            "quantization": {
+                "activations": "q8_a"
+            }
+        },
         {"type": "Flatten"},
         {"type": "Squeeze"},
         {"type": "Unsqueeze"},

diff --git a/nncf/common/hardware/opset.py b/nncf/common/hardware/opset.py
@@ -57,3 +57,4 @@ class HWConfigOpName:
     GELU = "Gelu"
     LSTMSEQUENCE = "LSTMSequence"
     GRUSEQUENCE = "GRUSequence"
+    GROUPNORMALIZATION = "GroupNormalization"
diff --git a/nncf/config/schemata/algo/quantization.py b/nncf/config/schemata/algo/quantization.py
@@ -526,7 +526,7 @@
         },
         "export_to_onnx_standard_ops": with_attributes(
             BOOLEAN,
-            description="Determines how should the additional quantization "
+            description="[Deprecated] Determines how should the additional quantization "
             "operations be exported into the ONNX format. Set "
             "this to true to export to ONNX "
             "standard QuantizeLinear-DequantizeLinear "

diff --git a/nncf/openvino/graph/metatypes/groups.py b/nncf/openvino/graph/metatypes/groups.py
@@ -80,6 +80,7 @@
     ov_metatypes.OVSquaredDifferenceMetatype,
     ov_metatypes.OVLSTMSequenceMetatype,
     ov_metatypes.OVGRUSequenceMetatype,
+    ov_metatypes.OVGroupNormalizationMetatype,
 ]
 
 

diff --git a/nncf/openvino/graph/metatypes/openvino_metatypes.py b/nncf/openvino/graph/metatypes/openvino_metatypes.py
@@ -673,6 +673,13 @@ class OVAbsMetatype(OVOpMetatype):
     op_names = ["Abs"]
 
 
+@OV_OPERATOR_METATYPES.register()
+class OVGroupNormalizationMetatype(OVOpMetatype):
+    name = "GroupNormalizationOp"
+    op_names = ["GroupNormalization"]
+    hw_config_names = [HWConfigOpName.GROUPNORMALIZATION]
+
+
 def get_operator_metatypes() -> List[Type[OperatorMetatype]]:
     """
     Returns a list of the operator metatypes.

diff --git a/nncf/openvino/graph/nncf_graph_builder.py b/nncf/openvino/graph/nncf_graph_builder.py
@@ -115,6 +115,7 @@ def _add_nncf_node(node: ov.Node, graph: NNCFGraph) -> None:
         metatype = get_node_metatype(node)
         graph.add_nncf_node(node_name=node.get_friendly_name(), node_type=node_type, node_metatype=metatype)
 
+    # pylint: disable=too-many-branches
     @staticmethod
     def create_nncf_graph(model: ov.Model) -> NNCFGraph:
         """
@@ -174,8 +175,10 @@ def create_nncf_graph(model: ov.Model) -> NNCFGraph:
                         node_attributes = node.get_attributes()
                         const_transpose_name = attribute_names[const_port_id]
                         const_attrs[const_port_id]["transpose"] = node_attributes[const_transpose_name]
-
                         act_attrs["transpose"] = node_attributes[attribute_names[act_port_id]]
+                    elif metatype == OVGRUSequenceMetatype:
+                        node_attributes = node.get_attributes()
+                        act_attrs["linear_before_reset"] = node_attributes["linear_before_reset"]
 
                     if const_attrs or act_attrs:
                         nncf_node = nncf_graph.get_node_by_name(node_name)

diff --git a/nncf/openvino/quantization/quantize_model.py b/nncf/openvino/quantization/quantize_model.py
@@ -183,6 +183,10 @@ def native_quantize_with_accuracy_control_impl(
         copied_parameters,
     )
 
+    if advanced_accuracy_restorer_parameters.intermediate_model_dir:
+        quantized_model_path = f"{advanced_accuracy_restorer_parameters.intermediate_model_dir}/intermediate_model.xml"
+        ov.serialize(quantized_model, quantized_model_path)
+
     evaluator = Evaluator(validation_fn)
     evaluator.enable_iteration_count()
     initial_metric_results = evaluator.collect_metric_results(model, validation_dataset, model_name="initial")

diff --git a/nncf/quantization/advanced_parameters.py b/nncf/quantization/advanced_parameters.py
@@ -193,12 +193,16 @@ class AdvancedAccuracyRestorerParameters:
     :param num_ranking_processes: The number of parallel processes that are used to rank
         quantization operations.
     :type num_ranking_processes: Optional[int]
+    :param intermediate_model_dir: Path to the folder where the model, which was fully
+        quantized with initial parameters, should be saved.
+    :type intermediate_model_dir: Optional[str]
     """
 
     max_num_iterations: int = sys.maxsize
     tune_hyperparams: bool = False
     ranking_subset_size: Optional[int] = None
     num_ranking_processes: Optional[int] = None
+    intermediate_model_dir: Optional[str] = None
 
 
 def changes_asdict(params: Any) -> Dict[str, Any]:

diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py
@@ -323,6 +323,11 @@ def _get_ignored_names(
 
         ignored_names = {name: IgnoreReason.AUTOGENERATED for name in autogenerated_ignored_names}
 
+        ignored_names_by_layer_attributes = self._backend_entity.get_ignored_names_by_layer_attributes(
+            inference_nncf_graph
+        )
+        ignored_names.update({name: IgnoreReason.AUTOGENERATED for name in ignored_names_by_layer_attributes})
+
         # User ignored scope has higher priority
         ignored_names.update({name: IgnoreReason.USER_REQUESTED for name in user_ignored_names})
 

diff --git a/nncf/quantization/algorithms/min_max/backend.py b/nncf/quantization/algorithms/min_max/backend.py
@@ -215,6 +215,16 @@ def get_ignored_metatypes(model_type: ModelType, device: TargetDevice) -> List[O
         :return: List of ignored metatypes.
         """
 
+    @staticmethod
+    @abstractmethod
+    def get_ignored_names_by_layer_attributes(nncf_graph: NNCFGraph) -> List[str]:
+        """
+        Returns names of ignored nodes based on layer_attributes.
+
+        :param nncf_graph: NNCFGraph instance.
+        :return: List of ignored names.
+        """
+
     @staticmethod
     @abstractmethod
     def get_weight_nodes(nncf_graph: NNCFGraph) -> List[NNCFNode]:

diff --git a/nncf/quantization/algorithms/min_max/onnx_backend.py b/nncf/quantization/algorithms/min_max/onnx_backend.py
@@ -195,6 +195,10 @@ def get_ignored_metatypes(model_type: ModelType, device: TargetDevice) -> List[O
                 types.append(om.ONNXMulLayerMetatype)
         return types
 
+    @staticmethod
+    def get_ignored_names_by_layer_attributes(nncf_graph: NNCFGraph) -> List[str]:
+        return []
+
     @staticmethod
     def get_weight_nodes(nncf_graph: NNCFGraph) -> List[NNCFNode]:
         return [node for node in nncf_graph.get_all_nodes() if node.layer_attributes.has_weight()]

diff --git a/nncf/quantization/algorithms/min_max/openvino_backend.py b/nncf/quantization/algorithms/min_max/openvino_backend.py
@@ -226,6 +226,16 @@ def get_ignored_metatypes(model_type: ModelType, device: TargetDevice) -> List[O
                 types.append(om.OVMultiplyMetatype)
         return types
 
+    @staticmethod
+    def get_ignored_names_by_layer_attributes(nncf_graph: NNCFGraph) -> List[str]:
+        ignored_names = []
+        target_nodes = nncf_graph.get_nodes_by_metatypes([om.OVGRUSequenceMetatype])
+        for node in target_nodes:
+            if isinstance(node.layer_attributes, OVLayerAttributes):
+                if node.layer_attributes.input_attributes["linear_before_reset"]:
+                    ignored_names.append(node.node_name)
+        return ignored_names
+
     @staticmethod
     def get_weight_nodes(nncf_graph: NNCFGraph) -> List[NNCFNode]:
         return [

diff --git a/nncf/quantization/algorithms/min_max/torch_backend.py b/nncf/quantization/algorithms/min_max/torch_backend.py
@@ -327,6 +327,10 @@ def get_ignored_metatypes(model_type: ModelType, device: TargetDevice) -> List[O
                 types.append(om.PTMulMetatype)
         return types
 
+    @staticmethod
+    def get_ignored_names_by_layer_attributes(nncf_graph: NNCFGraph) -> List[str]:
+        return []
+
     @staticmethod
     def get_weight_nodes(nncf_graph: NNCFGraph) -> List[NNCFNode]:
         return [

diff --git a/nncf/torch/quantization/algo.py b/nncf/torch/quantization/algo.py
@@ -30,6 +30,7 @@
 from nncf.api.compression import CompressionLoss
 from nncf.api.compression import CompressionScheduler
 from nncf.api.compression import CompressionStage
+from nncf.common.deprecation import warning_deprecated
 from nncf.common.graph import NNCFGraph
 from nncf.common.graph import NNCFNode
 from nncf.common.graph.definitions import MODEL_INPUT_OP_NAME
@@ -1361,6 +1362,11 @@ def __init__(
             "export_to_onnx_standard_ops", QUANTIZATION_EXPORT_TO_ONNX_STANDARD_OPS
         )
         if should_export_to_onnx_qdq:
+            warning_deprecated(
+                "The config option `export_to_onnx_standard_ops` is deprecated and will be removed "
+                "in a future version. Please use the `nncf.strip(quantized_model)` method before export to ONNX "
+                "to get model with QuantizeLinear-DequantizeLinear node pairs."
+            )
             export_mode = QuantizerExportMode.ONNX_QUANTIZE_DEQUANTIZE_PAIRS
         else:
             export_mode = QuantizerExportMode.FAKE_QUANTIZE

diff --git a/nncf/torch/quantization/weights_compression.py b/nncf/torch/quantization/weights_compression.py
@@ -9,7 +9,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import List, Optional
+from typing import Dict, List, Optional
 
 import torch
 from torch import nn
@@ -39,20 +39,30 @@ def forward(self, layer, op_arg):
 
 
 def _insert_pre_compression_operations(
-    module: nn.Module, allowed_types: List, level_high: int = 255
+    module: nn.Module, allowed_types: List, level_high: int = 255, compression_hist: Dict = None
 ) -> Optional[nn.Module]:
     """
     Inserts weights compression with dequantization for layers in `allowed_types`.
 
     :param module: The module to insert the weights compression.
     :param allowed_types: list of allowed types for weights compression.
     :param level_high: highest possible value of compressed weights (lower is 0 in assymetric quantization).
+    :param compression_hist: mapping between layer weight and corresponding WeightsDecompressor for finding
+     shared weights.
     :return: The non-trainable module with inserted operations.
     """
+    if compression_hist is None:
+        compression_hist = {}
     for _, layer in module.named_children():
         if not type(layer) in allowed_types:
-            _insert_pre_compression_operations(layer, allowed_types, level_high)
+            _insert_pre_compression_operations(layer, allowed_types, level_high, compression_hist)
             continue
+
+        if layer.weight.dtype in [torch.uint8, torch.int8]:
+            if layer.weight in compression_hist:
+                layer.register_pre_forward_operation(compression_hist[layer.weight])
+            continue
+
         target_dim = layer.target_weight_dim_for_compression
         stat_dim = (target_dim + 1) % 2
         input_low = torch.min(layer.weight, dim=stat_dim).values.detach()
@@ -61,14 +71,16 @@ def _insert_pre_compression_operations(
 
         scale = scale.unsqueeze(stat_dim)
         zero_point = zero_point.unsqueeze(stat_dim)
-        layer.register_pre_forward_operation(WeightsDecompressor(zero_point, scale))
+        key = layer.register_pre_forward_operation(WeightsDecompressor(zero_point, scale))
 
         compressed_weight = layer.weight.data / scale + zero_point
         compressed_weight = torch.clamp(torch.round(compressed_weight), 0, level_high)
 
         layer.weight.requires_grad = False
         layer.weight.data = compressed_weight.type(dtype=torch.uint8)
 
+        compression_hist[layer.weight] = layer.get_pre_op(key)
+
 
 def insert_pre_compression_operations(module: nn.Module, bits: int = 8) -> Optional[nn.Module]:
     """

diff --git a/tests/openvino/native/data/reference_graphs/original_nncf_graph/GroupNormalizationModel.dot b/tests/openvino/native/data/reference_graphs/original_nncf_graph/GroupNormalizationModel.dot
@@ -0,0 +1,29 @@
+strict digraph  {
+"0 Input_1" [id=0, type=Parameter];
+"1 Conv" [id=1, type=Convolution];
+"2 Conv_Add" [id=2, type=Add];
+"3 GroupNormalization_169" [id=3, type=GroupNormalization];
+"4 Relu" [id=4, type=Relu];
+"5 Mul" [id=5, type=Multiply];
+"6 Add" [id=6, type=Add];
+"7 Result" [id=7, type=Result];
+"8 Constant_173" [id=8, type=Constant];
+"9 Constant_171" [id=9, type=Constant];
+"10 Constant_168" [id=10, type=Constant];
+"11 Constant_167" [id=11, type=Constant];
+"12 Bias" [id=12, type=Constant];
+"13 Constant_163" [id=13, type=Constant];
+"0 Input_1" -> "1 Conv"  [label="[1, 2, 3, 4, 4]", style=solid];
+"1 Conv" -> "2 Conv_Add"  [label="[1, 4, 1, 2, 2]", style=solid];
+"2 Conv_Add" -> "3 GroupNormalization_169"  [label="[1, 4, 3, 2, 2]", style=solid];
+"3 GroupNormalization_169" -> "4 Relu"  [label="[1, 4, 3, 2, 2]", style=solid];
+"4 Relu" -> "5 Mul"  [label="[1, 4, 3, 2, 2]", style=solid];
+"5 Mul" -> "6 Add"  [label="[1, 4, 3, 2, 2]", style=solid];
+"6 Add" -> "7 Result"  [label="[1, 4, 3, 2, 2]", style=solid];
+"8 Constant_173" -> "6 Add"  [label="[1, 4, 1, 1, 1]", style=solid];
+"9 Constant_171" -> "5 Mul"  [label="[1, 4, 1, 1, 1]", style=solid];
+"10 Constant_168" -> "3 GroupNormalization_169"  [label="[4]", style=solid];
+"11 Constant_167" -> "3 GroupNormalization_169"  [label="[4]", style=solid];
+"12 Bias" -> "2 Conv_Add"  [label="[1, 1, 3, 1, 1]", style=solid];
+"13 Constant_163" -> "1 Conv"  [label="[4, 2, 3, 3, 3]", style=solid];
+}