Merge remote-tracking branch 'origin/develop' into dl/quantization/pa…

…sses_for_splitted_graphs
daniil-lyakhov · Nov 30, 2023 · 9c40a66 · 9c40a66
2 parents 8dc09e8 + db786a8
commit 9c40a66
Show file tree

Hide file tree

Showing 425 changed files with 66,798 additions and 1,724,875 deletions.
diff --git a/.github/workflows/model_hub.yml b/.github/workflows/model_hub.yml
@@ -0,0 +1,21 @@
+name: Model Hub
+
+on:
+  workflow_dispatch:
+
+jobs:
+  torch:
+    runs-on: ubuntu-20.04-16-cores
+    defaults:
+      run:
+        shell: bash
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v3
+        with:
+          python-version: 3.8.10
+      - name: Install NNCF and test requirements
+        run: make install-models-hub-torch
+
+      - name: Run models-hub-torch test scope
+        run: make test-models-hub-torch
diff --git a/Makefile b/Makefile
@@ -50,6 +50,7 @@ test-examples-onnx:
 install-openvino-test:
 	pip install -U pip
 	pip install -e .[openvino]
+	pip install tensorflow==2.12.0
 	pip install -r tests/openvino/requirements.txt
 	pip install -r tests/cross_fw/install/requirements.txt
 	pip install -r tests/cross_fw/examples/requirements.txt
@@ -113,8 +114,17 @@ install-torch-dev: install-torch-test install-pre-commit
 	pip install -r examples/post_training_quantization/torch/mobilenet_v2/requirements.txt
 	pip install -r examples/post_training_quantization/torch/ssd300_vgg16/requirements.txt
 
+install-models-hub-torch:
+	pip install -U pip
+	pip install -e .
+	pip install -r tests/torch/models_hub_test/requirements.txt
+	# Install wheel to run pip with --no-build-isolation
+	pip install wheel
+	pip install --no-build-isolation -r tests/torch/models_hub_test/requirements_secondary.txt
+
+
 test-torch:
-	pytest ${COVERAGE_ARGS} tests/torch -m "not weekly and not nightly" --junitxml ${JUNITXML_PATH} $(DATA_ARG)
+	pytest ${COVERAGE_ARGS} tests/torch -m "not weekly and not nightly and not models_hub" --junitxml ${JUNITXML_PATH} $(DATA_ARG)
 
 test-torch-nightly:
 	pytest ${COVERAGE_ARGS} tests/torch -m nightly --junitxml ${JUNITXML_PATH} $(DATA_ARG)
@@ -138,6 +148,9 @@ test-examples-torch:
 		--backend torch                     \
 		--junitxml ${JUNITXML_PATH}
 
+test-models-hub-torch:
+	pytest tests/torch/models_hub_test --junitxml ${JUNITXML_PATH}
+
 ###############################################################################
 # Common part
 install-common-test:

diff --git a/ReleaseNotes.md b/ReleaseNotes.md
@@ -1,5 +1,52 @@
 # Release Notes
 
+## New in Release 2.7.0
+
+Post-training Quantization:
+
+- Features:
+  - (OpenVINO) Added support for data-free 4-bit weights compression through NF4 and INT4 data types (`compress_weights(…)` pipeline).
+  - (OpenVINO) Added support for [IF operation](https://docs.openvino.ai/latest/openvino_docs_ops_infrastructure_If_8.html) quantization.
+  - (OpenVINO) Added `dump_intermediate_model` parameter support for AccuracyAwareAlgorithm (`quantize_with_accuracy_control(…)` pipeline).
+  - (OpenVINO) Added support for SmoothQuant and ChannelAlignment algorithms for HyperparameterTuner algorithm (`quantize_with_tune_hyperparams(…)` pipeline).
+  - (PyTorch) Post-training Quantization is now supported with `quantize(…)` pipeline and the common implementation of quantization algorithms. Deprecated `create_compressed_model()` method for Post-training Quantization.
+  - Added new types (AvgPool, GroupNorm, LayerNorm) to the ignored scope for `ModelType.Transformer` scheme.
+  - `QuantizationPreset.Mixed` was set as the default for `ModelType.Transformer` scheme.
+- Fixes:
+  - (OpenVINO, ONNX, PyTorch) Aligned/added patterns between backends (SE block, MVN layer, multiple activations, etc.) to restore performance/metrics.
+  - Fixed patterns for `ModelType.Transformer` to align with the [quantization scheme](https://docs.openvino.ai/latest/openvino_docs_OV_UG_lpt.html).
+- Improvements:
+  - Improved UX with the new progress bar for pipeline, new exceptions, and .dot graph visualization updates.
+  - (OpenVINO) Optimized WeightsCompression algorithm (`compress_weights(…)` pipeline) execution time for LLM's quantization, added ignored scope support.
+  - (OpenVINO) Optimized AccuracyAwareQuantization algorithm execution time with multi-threaded approach while calculating ranking score (`quantize_with_accuracy_control(…)` pipeline).
+  - (OpenVINO) Added [extract_ov_subgraph tool](tools/extract_ov_subgraph.py) for large IR subgraph extraction.
+  - (ONNX) Optimized quantization pipeline (up to 1.15x speed up).
+- Tutorials:
+  - [Post-Training Optimization of BLIP Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/233-blip-visual-language-processing)
+  - [Post-Training Optimization of DeepFloyd IF Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/238-deepfloyd-if)
+  - [Post-Training Optimization of Grammatical Error Correction Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/214-grammar-correction)
+  - [Post-Training Optimization of Dolly 2.0 Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/240-dolly-2-instruction-following)
+  - [Post-Training Optimization of Massively Multilingual Speech Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/255-mms-massively-multilingual-speech)
+  - [Post-Training Optimization of OneFormer Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/249-oneformer-segmentation)
+  - [Post-Training Optimization of InstructPix2Pix Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/231-instruct-pix2pix-image-editing)
+  - [Post-Training Optimization of LLaVA Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/257-llava-multimodal-chatbot)
+  - [Post-Training Optimization of Latent Consistency Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/263-latent-consistency-models-image-generation)
+  - [Post-Training Optimization of Distil-Whisper Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/267-distil-whisper-asr)
+  - [Post-Training Optimization of FastSAM Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/261-fast-segment-anything)
+- Known issues:
+  - (ONNX) `quantize(...)` method can generate inaccurate int8 results for models with the BatchNormalization layer that contains biases. To get the best accuracy, use the `do_constant_folding=True` option during export from PyTorch to ONNX.
+
+Compression-aware training:
+
+- Fixes:
+  - (PyTorch) Fixed Hessian trace calculation to solve [#2155](https://github.com/openvinotoolkit/nncf/issues/2155) issue.
+- Requirements:
+  - Updated PyTorch version (2.1.0).
+  - Updated numpy version (<1.27).
+- Deprecations/Removals:
+  - (PyTorch) Removed legacy external quantizer storage names.
+  - (PyTorch) Removed torch < 2.0 version support.
+
 ## New in Release 2.6.0
 
 Post-training Quantization:

diff --git a/docs/Installation.md b/docs/Installation.md
@@ -69,7 +69,8 @@ as well as the supported versions of Python:
 
 | NNCF      | OpenVINO   | PyTorch  | ONNX     | TensorFlow | Python |
 |-----------|------------|----------|----------|------------|--------|
-| `develop` | `2023.1.0` | `2.1`    | `1.13.1` | `2.12.0`   | `3.8`  |
+| `develop` | `2023.2.0` | `2.1`    | `1.13.1` | `2.12.0`   | `3.8`  |
+| `2.7.0`   | `2023.2.0` | `2.1`    | `1.13.1` | `2.12.0`   | `3.8`  |
 | `2.6.0`   | `2023.1.0` | `2.0.1`  | `1.13.1` | `2.12.0`   | `3.8`  |
 | `2.5.0`   | `2023.0.0` | `1.13.1` | `1.13.1` | `2.11.1`   | `3.8`  |
 | `2.4.0`   | `2022.1.0` | `1.12.1` | `1.12.0` | `2.8.2`    | `3.8`  |
diff --git a/docs/compression_algorithms/CompressWeights.md b/docs/compression_algorithms/CompressWeights.md
@@ -11,7 +11,7 @@ The Weights Compression algorithm is aimed at compressing the weights of the mod
 By default, weights are compressed to 8-bit integer data type - "INT8" mode.
 OpenVINO backend also supports 3 modes of mixed precision weight quantization with a 4-bit data type as a primary precision - INT4_SYM, INT4_ASYM and NF4. The primary precision in case of INT4_SYM mode is unsigned 4-bit integer and weights are quantized to it [symmetrically](https://github.com/openvinotoolkit/nncf/blob/develop/docs/compression_algorithms/Quantization.md#symmetric-quantization) with a fixed zero point equals to 8. In case of INT4_ASYM mode - also unsigned 4-bit integer, but weight are quantized to it [asymmetrically](https://github.com/openvinotoolkit/nncf/blob/develop/docs/compression_algorithms/Quantization.md#asymmetric-quantization) with a typical non-fixed zero point. In case of NF4 mode - [nf4](https://arxiv.org/pdf/2305.14314v1.pdf) data type without zero point.
 All 4-bit modes have a grouped quantization support, when small group of weights (e.g. 128) in the channel dimension share quantization parameters (scale).
-First embedding and last linear layers are always compressed to 8-bit integer data type.
+All embeddings and last linear layers are always compressed to 8-bit integer data type.
 Percent of the rest layers compressed to 4-bit can be configured by "ratio" parameter. E.g. ratio=0.9 means 90% of layers compressed to the corresponding 4-bit data type and the rest to 8-bit integer data type.
 
 #### User guide
@@ -23,7 +23,7 @@ from nncf import compress_weights
 compressed_model = compress_weights(model)
 ```
 
-- Compress weights symmetrically to 4-bit integer data type with group size = 128, except first embedding and last linear layers - they are compressed to 8-bit integer data type.
+- Compress weights symmetrically to 4-bit integer data type with group size = 128, except embeddings and last linear layers - they are compressed to 8-bit integer data type.
 
 ```python
 from nncf import compress_weights

diff --git a/docs/styleguide/PyGuide.md b/docs/styleguide/PyGuide.md
@@ -775,6 +775,39 @@ Always use a `.py` filename extension. Never use dashes.
 Python filenames must have a `.py` extension and must not contain dashes (`-`).
 This allows them to be imported and unit tested.
 
+Avoid having `.py` files with names such as `utils`, `helpers` that are a "swiss army knife" containing many unrelated pieces of code used across the code base.
+Instead group your new code in dedicated files/modules that are named explicitly according to the purpose of code.
+
+Bad:
+
+*utils.py*
+
+```python3
+def log_current_time(log_stream: LogStream):
+    ...
+
+def convert_checkpoint(ckpt: CheckpointType) -> AnotherCheckpointType:
+    ...
+```
+
+Good:
+
+*logger.py*
+
+```python3
+def log_current_time(log_stream: LogStream):
+    ...
+```
+
+*checkpointing/converter.py*
+
+```python3
+class CheckpointConverter:
+    # ... 
+    def convert(self, ckpt: CheckpointType) -> AnotherCheckpointType:
+        pass
+```
+
 <a id="s4.8-main"></a>
 <a id="4.8-main"></a>
 <a id="main"></a>

diff --git a/examples/post_training_quantization/onnx/mobilenet_v2/main.py b/examples/post_training_quantization/onnx/mobilenet_v2/main.py
@@ -140,11 +140,11 @@ def transform_fn(data_item):
 print("[4/7] Benchmark INT8 model:")
 int8_fps = run_benchmark(int8_model_path, shape=[1, 3, 224, 224], verbose=True)
 
-print("[5/7] Validate OpenVINO FP32 model:")
+print("[5/7] Validate ONNX FP32 model in OpenVINO:")
 fp32_top1 = validate(fp32_model_path, val_loader)
 print(f"Accuracy @ top1: {fp32_top1:.3f}")
 
-print("[6/7] Validate OpenVINO INT8 model:")
+print("[6/7] Validate ONNX INT8 model in OpenVINO:")
 int8_top1 = validate(int8_model_path, val_loader)
 print(f"Accuracy @ top1: {int8_top1:.3f}")
 

diff --git a/examples/post_training_quantization/onnx/mobilenet_v2/requirements.txt b/examples/post_training_quantization/onnx/mobilenet_v2/requirements.txt
@@ -4,4 +4,4 @@ scikit-learn
 fastdownload
 onnx~=1.13.1
 onnxruntime~=1.14.1
-openvino-dev==2023.1
+openvino-dev==2023.2
diff --git a/.../post_training_quantization/openvino/anomaly_stfpm_quantize_with_accuracy_control/main.py b/.../post_training_quantization/openvino/anomaly_stfpm_quantize_with_accuracy_control/main.py
@@ -39,8 +39,7 @@
 
 DATASET_INFO = download.DownloadInfo(
     name="mvtec_capsule",
-    url="https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/"
-    "download/420937454-1629951595/capsule.tar.xz",
+    url="https://huggingface.co/datasets/alexsu52/mvtec_capsule/resolve/main/capsule.tar.xz",
     hash="380afc46701c99cb7b9a928edbe16eb5",
 )
 DATASET_PATH = HOME_PATH / ".cache/nncf/datasets/mvtec_capsule"

diff --git a/...ining_quantization/openvino/anomaly_stfpm_quantize_with_accuracy_control/requirements.txt b/...ining_quantization/openvino/anomaly_stfpm_quantize_with_accuracy_control/requirements.txt
@@ -1,2 +1,2 @@
 anomalib==0.6.0
-openvino-dev==2023.1
+openvino-dev==2023.2
diff --git a/examples/post_training_quantization/openvino/mobilenet_v2/requirements.txt b/examples/post_training_quantization/openvino/mobilenet_v2/requirements.txt
@@ -2,4 +2,4 @@ torchvision
 tqdm
 scikit-learn
 fastdownload
-openvino-dev==2023.1
+openvino-dev==2023.2
diff --git a/examples/post_training_quantization/openvino/yolov8/requirements.txt b/examples/post_training_quantization/openvino/yolov8/requirements.txt
@@ -1,3 +1,3 @@
 ultralytics==8.0.170
 onnx>=1.12.0
-openvino-dev==2023.1
+openvino-dev==2023.2
diff --git a/...ost_training_quantization/openvino/yolov8_quantize_with_accuracy_control/requirements.txt b/...ost_training_quantization/openvino/yolov8_quantize_with_accuracy_control/requirements.txt
@@ -1,3 +1,3 @@
 ultralytics==8.0.170
 onnx>=1.12.0
-openvino-dev==2023.1
+openvino-dev==2023.2
diff --git a/examples/post_training_quantization/tensorflow/mobilenet_v2/main.py b/examples/post_training_quantization/tensorflow/mobilenet_v2/main.py
@@ -15,10 +15,9 @@
 from pathlib import Path
 from typing import List, Optional
 
-import openvino.runtime as ov
+import openvino as ov
 import tensorflow as tf
 import tensorflow_datasets as tfds
-from openvino.tools import mo
 from tqdm import tqdm
 
 import nncf
@@ -146,16 +145,16 @@ def transform_fn(data_item):
 ###############################################################################
 # Benchmark performance, calculate compression rate and validate accuracy
 
-ov_model = mo.convert_model(tf_model)
-ov_quantized_model = mo.convert_model(tf_quantized_model)
+ov_model = ov.convert_model(tf_model, share_weights=False)
+ov_quantized_model = ov.convert_model(tf_quantized_model, share_weights=False)
 
 fp32_ir_path = f"{ROOT}/mobilenet_v2_fp32.xml"
-ov.serialize(ov_model, fp32_ir_path)
+ov.save_model(ov_model, fp32_ir_path, compress_to_fp16=False)
 print(f"[1/7] Save FP32 model: {fp32_ir_path}")
 fp32_model_size = get_model_size(fp32_ir_path, verbose=True)
 
 int8_ir_path = f"{ROOT}/mobilenet_v2_int8.xml"
-ov.serialize(ov_quantized_model, int8_ir_path)
+ov.save_model(ov_quantized_model, int8_ir_path, compress_to_fp16=False)
 print(f"[2/7] Save INT8 model: {int8_ir_path}")
 int8_model_size = get_model_size(int8_ir_path, verbose=True)
 

diff --git a/examples/post_training_quantization/tensorflow/mobilenet_v2/requirements.txt b/examples/post_training_quantization/tensorflow/mobilenet_v2/requirements.txt
@@ -1,4 +1,4 @@
 tensorflow~=2.12.0
 tensorflow-datasets
 tqdm
-openvino-dev==2023.0.2
+openvino-dev==2023.2
diff --git a/examples/post_training_quantization/torch/mobilenet_v2/main.py b/examples/post_training_quantization/torch/mobilenet_v2/main.py
@@ -12,14 +12,14 @@
 import os
 import re
 import subprocess
+from functools import partial
 from pathlib import Path
 from typing import List, Optional, Tuple
 
 import numpy as np
 import openvino as ov
 import torch
 from fastdownload import FastDownload
-from openvino.tools import mo
 from sklearn.metrics import accuracy_score
 from torchvision import datasets
 from torchvision import models
@@ -107,11 +107,13 @@ def get_model_size(ir_path: str, m_type: str = "Mb", verbose: bool = True) -> fl
         ]
     ),
 )
-val_data_loader = torch.utils.data.DataLoader(val_dataset, batch_size=128, num_workers=4, shuffle=False)
+val_data_loader = torch.utils.data.DataLoader(val_dataset)
 
 torch_model = models.mobilenet_v2(num_classes=DATASET_CLASSES)
-torch_model.eval()
 torch_model = load_checkpoint(torch_model)
+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+torch_model.to(device)
+torch_model.eval()
 
 ###############################################################################
 # Quantize a PyTorch model
@@ -120,12 +122,12 @@ def get_model_size(ir_path: str, m_type: str = "Mb", verbose: bool = True) -> fl
 #
 # To validate the transform function use the following code:
 # >> for data_item in val_loader:
-# >>    model(transform_fn(data_item))
+# >>    model(transform_fn(data_item, device))
 
 
-def transform_fn(data_item: Tuple[torch.Tensor, int]) -> torch.Tensor:
+def transform_fn(data_item: Tuple[torch.Tensor, int], device: torch.device) -> torch.Tensor:
     images, _ = data_item
-    return images
+    return images.to(device)
 
 
 # The calibration dataset is a small, no label, representative dataset
@@ -138,22 +140,15 @@ def transform_fn(data_item: Tuple[torch.Tensor, int]) -> torch.Tensor:
 # item and prepare model input data. The quantize method uses a small subset
 # (default: 300 samples) of the calibration dataset.
 
-calibration_dataset = nncf.Dataset(val_data_loader, transform_fn)
-torch_quantized_model = nncf.quantize(
-    torch_model,
-    calibration_dataset,
-    advanced_parameters=nncf.AdvancedQuantizationParameters(disable_bias_correction=True),
-)
+calibration_dataset = nncf.Dataset(val_data_loader, partial(transform_fn, device=device))
+torch_quantized_model = nncf.quantize(torch_model, calibration_dataset)
 
 ###############################################################################
 # Benchmark performance, calculate compression rate and validate accuracy
 
 dummy_input = torch.randn(1, 3, 224, 224)
-ov_input_shape = (-1, 3, 224, 224)
-ov_model = mo.convert_model(torch_model.cpu(), example_input=dummy_input, input_shape=ov_input_shape)
-ov_quantized_model = mo.convert_model(
-    torch_quantized_model.cpu(), example_input=dummy_input, input_shape=ov_input_shape
-)
+ov_model = ov.convert_model(torch_model.cpu(), example_input=dummy_input)
+ov_quantized_model = ov.convert_model(torch_quantized_model.cpu(), example_input=dummy_input)
 
 fp32_ir_path = f"{ROOT}/mobilenet_v2_fp32.xml"
 ov.save_model(ov_model, fp32_ir_path, compress_to_fp16=False)

diff --git a/examples/post_training_quantization/torch/mobilenet_v2/requirements.txt b/examples/post_training_quantization/torch/mobilenet_v2/requirements.txt
@@ -1,5 +1,5 @@
 fastdownload==0.0.7
-openvino-dev==2023.1
+openvino-dev==2023.2
 scikit-learn
 torch==2.1.0
 torchvision==0.16.0
diff --git a/examples/post_training_quantization/torch/ssd300_vgg16/main.py b/examples/post_training_quantization/torch/ssd300_vgg16/main.py
@@ -23,7 +23,6 @@
 import torch
 import torchvision
 from fastdownload import FastDownload
-from openvino.tools import mo
 from PIL import Image
 from torchmetrics.detection.mean_ap import MeanAveragePrecision
 from torchvision.models.detection.ssd import SSD
@@ -158,11 +157,11 @@ def main():
 
     fp32_onnx_path = f"{ROOT}/ssd300_vgg16_fp32.onnx"
     torch.onnx.export(model.cpu(), dummy_input, fp32_onnx_path)
-    ov_model = mo.convert_model(fp32_onnx_path)
+    ov_model = ov.convert_model(fp32_onnx_path)
 
     int8_onnx_path = f"{ROOT}/ssd300_vgg16_int8.onnx"
     torch.onnx.export(quantized_model.cpu(), dummy_input, int8_onnx_path)
-    ov_quantized_model = mo.convert_model(int8_onnx_path)
+    ov_quantized_model = ov.convert_model(int8_onnx_path)
 
     fp32_ir_path = f"{ROOT}/ssd300_vgg16_fp32.xml"
     ov.save_model(ov_model, fp32_ir_path, compress_to_fp16=False)

diff --git a/examples/post_training_quantization/torch/ssd300_vgg16/requirements.txt b/examples/post_training_quantization/torch/ssd300_vgg16/requirements.txt
@@ -1,6 +1,6 @@
 fastdownload==0.0.7
 onnx==1.13.1
-openvino-dev==2023.1
+openvino-dev==2023.2
 pycocotools==2.0.7
 torch==2.0.1  # ssd300_vgg16 can not be exported with 2.1.0, reference: https://github.com/pytorch/pytorch/issues/113155
 torchmetrics==1.0.1

diff --git a/examples/torch/requirements.txt b/examples/torch/requirements.txt
@@ -3,7 +3,7 @@ pillow>=8.0.1
 tensorboard>=2.1
 matplotlib>=3.3.3
 defusedxml>=0.7.0rc1
-mlflow>=2.5.0,<2.7.0
+mlflow==2.8.1
 returns>0.14
 opencv-python>=4.4.0.46
 torchvision>=0.10.0,<0.17  # the minor version should always match the torch minor version that is installed via NNCF's `pip install nncf[torch]`; TV minor version is torch minor version +1