Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/develop' into dl/quantization/pa…
Browse files Browse the repository at this point in the history
…sses_for_splitted_graphs
  • Loading branch information
daniil-lyakhov committed Nov 30, 2023
2 parents 8dc09e8 + db786a8 commit 9c40a66
Show file tree
Hide file tree
Showing 425 changed files with 66,798 additions and 1,724,875 deletions.
21 changes: 21 additions & 0 deletions .github/workflows/model_hub.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: Model Hub

on:
workflow_dispatch:

jobs:
torch:
runs-on: ubuntu-20.04-16-cores
defaults:
run:
shell: bash
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v3
with:
python-version: 3.8.10
- name: Install NNCF and test requirements
run: make install-models-hub-torch

- name: Run models-hub-torch test scope
run: make test-models-hub-torch
15 changes: 14 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ test-examples-onnx:
install-openvino-test:
pip install -U pip
pip install -e .[openvino]
pip install tensorflow==2.12.0
pip install -r tests/openvino/requirements.txt
pip install -r tests/cross_fw/install/requirements.txt
pip install -r tests/cross_fw/examples/requirements.txt
Expand Down Expand Up @@ -113,8 +114,17 @@ install-torch-dev: install-torch-test install-pre-commit
pip install -r examples/post_training_quantization/torch/mobilenet_v2/requirements.txt
pip install -r examples/post_training_quantization/torch/ssd300_vgg16/requirements.txt

install-models-hub-torch:
pip install -U pip
pip install -e .
pip install -r tests/torch/models_hub_test/requirements.txt
# Install wheel to run pip with --no-build-isolation
pip install wheel
pip install --no-build-isolation -r tests/torch/models_hub_test/requirements_secondary.txt


test-torch:
pytest ${COVERAGE_ARGS} tests/torch -m "not weekly and not nightly" --junitxml ${JUNITXML_PATH} $(DATA_ARG)
pytest ${COVERAGE_ARGS} tests/torch -m "not weekly and not nightly and not models_hub" --junitxml ${JUNITXML_PATH} $(DATA_ARG)

test-torch-nightly:
pytest ${COVERAGE_ARGS} tests/torch -m nightly --junitxml ${JUNITXML_PATH} $(DATA_ARG)
Expand All @@ -138,6 +148,9 @@ test-examples-torch:
--backend torch \
--junitxml ${JUNITXML_PATH}

test-models-hub-torch:
pytest tests/torch/models_hub_test --junitxml ${JUNITXML_PATH}

###############################################################################
# Common part
install-common-test:
Expand Down
47 changes: 47 additions & 0 deletions ReleaseNotes.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,52 @@
# Release Notes

## New in Release 2.7.0

Post-training Quantization:

- Features:
- (OpenVINO) Added support for data-free 4-bit weights compression through NF4 and INT4 data types (`compress_weights(…)` pipeline).
- (OpenVINO) Added support for [IF operation](https://docs.openvino.ai/latest/openvino_docs_ops_infrastructure_If_8.html) quantization.
- (OpenVINO) Added `dump_intermediate_model` parameter support for AccuracyAwareAlgorithm (`quantize_with_accuracy_control(…)` pipeline).
- (OpenVINO) Added support for SmoothQuant and ChannelAlignment algorithms for HyperparameterTuner algorithm (`quantize_with_tune_hyperparams(…)` pipeline).
- (PyTorch) Post-training Quantization is now supported with `quantize(…)` pipeline and the common implementation of quantization algorithms. Deprecated `create_compressed_model()` method for Post-training Quantization.
- Added new types (AvgPool, GroupNorm, LayerNorm) to the ignored scope for `ModelType.Transformer` scheme.
- `QuantizationPreset.Mixed` was set as the default for `ModelType.Transformer` scheme.
- Fixes:
- (OpenVINO, ONNX, PyTorch) Aligned/added patterns between backends (SE block, MVN layer, multiple activations, etc.) to restore performance/metrics.
- Fixed patterns for `ModelType.Transformer` to align with the [quantization scheme](https://docs.openvino.ai/latest/openvino_docs_OV_UG_lpt.html).
- Improvements:
- Improved UX with the new progress bar for pipeline, new exceptions, and .dot graph visualization updates.
- (OpenVINO) Optimized WeightsCompression algorithm (`compress_weights(…)` pipeline) execution time for LLM's quantization, added ignored scope support.
- (OpenVINO) Optimized AccuracyAwareQuantization algorithm execution time with multi-threaded approach while calculating ranking score (`quantize_with_accuracy_control(…)` pipeline).
- (OpenVINO) Added [extract_ov_subgraph tool](tools/extract_ov_subgraph.py) for large IR subgraph extraction.
- (ONNX) Optimized quantization pipeline (up to 1.15x speed up).
- Tutorials:
- [Post-Training Optimization of BLIP Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/233-blip-visual-language-processing)
- [Post-Training Optimization of DeepFloyd IF Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/238-deepfloyd-if)
- [Post-Training Optimization of Grammatical Error Correction Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/214-grammar-correction)
- [Post-Training Optimization of Dolly 2.0 Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/240-dolly-2-instruction-following)
- [Post-Training Optimization of Massively Multilingual Speech Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/255-mms-massively-multilingual-speech)
- [Post-Training Optimization of OneFormer Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/249-oneformer-segmentation)
- [Post-Training Optimization of InstructPix2Pix Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/231-instruct-pix2pix-image-editing)
- [Post-Training Optimization of LLaVA Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/257-llava-multimodal-chatbot)
- [Post-Training Optimization of Latent Consistency Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/263-latent-consistency-models-image-generation)
- [Post-Training Optimization of Distil-Whisper Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/267-distil-whisper-asr)
- [Post-Training Optimization of FastSAM Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/261-fast-segment-anything)
- Known issues:
- (ONNX) `quantize(...)` method can generate inaccurate int8 results for models with the BatchNormalization layer that contains biases. To get the best accuracy, use the `do_constant_folding=True` option during export from PyTorch to ONNX.

Compression-aware training:

- Fixes:
- (PyTorch) Fixed Hessian trace calculation to solve [#2155](https://github.com/openvinotoolkit/nncf/issues/2155) issue.
- Requirements:
- Updated PyTorch version (2.1.0).
- Updated numpy version (<1.27).
- Deprecations/Removals:
- (PyTorch) Removed legacy external quantizer storage names.
- (PyTorch) Removed torch < 2.0 version support.

## New in Release 2.6.0

Post-training Quantization:
Expand Down
3 changes: 2 additions & 1 deletion docs/Installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ as well as the supported versions of Python:

| NNCF | OpenVINO | PyTorch | ONNX | TensorFlow | Python |
|-----------|------------|----------|----------|------------|--------|
| `develop` | `2023.1.0` | `2.1` | `1.13.1` | `2.12.0` | `3.8` |
| `develop` | `2023.2.0` | `2.1` | `1.13.1` | `2.12.0` | `3.8` |
| `2.7.0` | `2023.2.0` | `2.1` | `1.13.1` | `2.12.0` | `3.8` |
| `2.6.0` | `2023.1.0` | `2.0.1` | `1.13.1` | `2.12.0` | `3.8` |
| `2.5.0` | `2023.0.0` | `1.13.1` | `1.13.1` | `2.11.1` | `3.8` |
| `2.4.0` | `2022.1.0` | `1.12.1` | `1.12.0` | `2.8.2` | `3.8` |
4 changes: 2 additions & 2 deletions docs/compression_algorithms/CompressWeights.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ The Weights Compression algorithm is aimed at compressing the weights of the mod
By default, weights are compressed to 8-bit integer data type - "INT8" mode.
OpenVINO backend also supports 3 modes of mixed precision weight quantization with a 4-bit data type as a primary precision - INT4_SYM, INT4_ASYM and NF4. The primary precision in case of INT4_SYM mode is unsigned 4-bit integer and weights are quantized to it [symmetrically](https://github.com/openvinotoolkit/nncf/blob/develop/docs/compression_algorithms/Quantization.md#symmetric-quantization) with a fixed zero point equals to 8. In case of INT4_ASYM mode - also unsigned 4-bit integer, but weight are quantized to it [asymmetrically](https://github.com/openvinotoolkit/nncf/blob/develop/docs/compression_algorithms/Quantization.md#asymmetric-quantization) with a typical non-fixed zero point. In case of NF4 mode - [nf4](https://arxiv.org/pdf/2305.14314v1.pdf) data type without zero point.
All 4-bit modes have a grouped quantization support, when small group of weights (e.g. 128) in the channel dimension share quantization parameters (scale).
First embedding and last linear layers are always compressed to 8-bit integer data type.
All embeddings and last linear layers are always compressed to 8-bit integer data type.
Percent of the rest layers compressed to 4-bit can be configured by "ratio" parameter. E.g. ratio=0.9 means 90% of layers compressed to the corresponding 4-bit data type and the rest to 8-bit integer data type.

#### User guide
Expand All @@ -23,7 +23,7 @@ from nncf import compress_weights
compressed_model = compress_weights(model)
```

- Compress weights symmetrically to 4-bit integer data type with group size = 128, except first embedding and last linear layers - they are compressed to 8-bit integer data type.
- Compress weights symmetrically to 4-bit integer data type with group size = 128, except embeddings and last linear layers - they are compressed to 8-bit integer data type.

```python
from nncf import compress_weights
Expand Down
33 changes: 33 additions & 0 deletions docs/styleguide/PyGuide.md
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,39 @@ Always use a `.py` filename extension. Never use dashes.
Python filenames must have a `.py` extension and must not contain dashes (`-`).
This allows them to be imported and unit tested.

Avoid having `.py` files with names such as `utils`, `helpers` that are a "swiss army knife" containing many unrelated pieces of code used across the code base.
Instead group your new code in dedicated files/modules that are named explicitly according to the purpose of code.

Bad:

*utils.py*

```python3
def log_current_time(log_stream: LogStream):
...

def convert_checkpoint(ckpt: CheckpointType) -> AnotherCheckpointType:
...
```

Good:

*logger.py*

```python3
def log_current_time(log_stream: LogStream):
...
```

*checkpointing/converter.py*

```python3
class CheckpointConverter:
# ...
def convert(self, ckpt: CheckpointType) -> AnotherCheckpointType:
pass
```

<a id="s4.8-main"></a>
<a id="4.8-main"></a>
<a id="main"></a>
Expand Down
4 changes: 2 additions & 2 deletions examples/post_training_quantization/onnx/mobilenet_v2/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,11 +140,11 @@ def transform_fn(data_item):
print("[4/7] Benchmark INT8 model:")
int8_fps = run_benchmark(int8_model_path, shape=[1, 3, 224, 224], verbose=True)

print("[5/7] Validate OpenVINO FP32 model:")
print("[5/7] Validate ONNX FP32 model in OpenVINO:")
fp32_top1 = validate(fp32_model_path, val_loader)
print(f"Accuracy @ top1: {fp32_top1:.3f}")

print("[6/7] Validate OpenVINO INT8 model:")
print("[6/7] Validate ONNX INT8 model in OpenVINO:")
int8_top1 = validate(int8_model_path, val_loader)
print(f"Accuracy @ top1: {int8_top1:.3f}")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ scikit-learn
fastdownload
onnx~=1.13.1
onnxruntime~=1.14.1
openvino-dev==2023.1
openvino-dev==2023.2
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@

DATASET_INFO = download.DownloadInfo(
name="mvtec_capsule",
url="https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/"
"download/420937454-1629951595/capsule.tar.xz",
url="https://huggingface.co/datasets/alexsu52/mvtec_capsule/resolve/main/capsule.tar.xz",
hash="380afc46701c99cb7b9a928edbe16eb5",
)
DATASET_PATH = HOME_PATH / ".cache/nncf/datasets/mvtec_capsule"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
anomalib==0.6.0
openvino-dev==2023.1
openvino-dev==2023.2
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ torchvision
tqdm
scikit-learn
fastdownload
openvino-dev==2023.1
openvino-dev==2023.2
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
ultralytics==8.0.170
onnx>=1.12.0
openvino-dev==2023.1
openvino-dev==2023.2
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
ultralytics==8.0.170
onnx>=1.12.0
openvino-dev==2023.1
openvino-dev==2023.2
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,9 @@
from pathlib import Path
from typing import List, Optional

import openvino.runtime as ov
import openvino as ov
import tensorflow as tf
import tensorflow_datasets as tfds
from openvino.tools import mo
from tqdm import tqdm

import nncf
Expand Down Expand Up @@ -146,16 +145,16 @@ def transform_fn(data_item):
###############################################################################
# Benchmark performance, calculate compression rate and validate accuracy

ov_model = mo.convert_model(tf_model)
ov_quantized_model = mo.convert_model(tf_quantized_model)
ov_model = ov.convert_model(tf_model, share_weights=False)
ov_quantized_model = ov.convert_model(tf_quantized_model, share_weights=False)

fp32_ir_path = f"{ROOT}/mobilenet_v2_fp32.xml"
ov.serialize(ov_model, fp32_ir_path)
ov.save_model(ov_model, fp32_ir_path, compress_to_fp16=False)
print(f"[1/7] Save FP32 model: {fp32_ir_path}")
fp32_model_size = get_model_size(fp32_ir_path, verbose=True)

int8_ir_path = f"{ROOT}/mobilenet_v2_int8.xml"
ov.serialize(ov_quantized_model, int8_ir_path)
ov.save_model(ov_quantized_model, int8_ir_path, compress_to_fp16=False)
print(f"[2/7] Save INT8 model: {int8_ir_path}")
int8_model_size = get_model_size(int8_ir_path, verbose=True)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
tensorflow~=2.12.0
tensorflow-datasets
tqdm
openvino-dev==2023.0.2
openvino-dev==2023.2
29 changes: 12 additions & 17 deletions examples/post_training_quantization/torch/mobilenet_v2/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@
import os
import re
import subprocess
from functools import partial
from pathlib import Path
from typing import List, Optional, Tuple

import numpy as np
import openvino as ov
import torch
from fastdownload import FastDownload
from openvino.tools import mo
from sklearn.metrics import accuracy_score
from torchvision import datasets
from torchvision import models
Expand Down Expand Up @@ -107,11 +107,13 @@ def get_model_size(ir_path: str, m_type: str = "Mb", verbose: bool = True) -> fl
]
),
)
val_data_loader = torch.utils.data.DataLoader(val_dataset, batch_size=128, num_workers=4, shuffle=False)
val_data_loader = torch.utils.data.DataLoader(val_dataset)

torch_model = models.mobilenet_v2(num_classes=DATASET_CLASSES)
torch_model.eval()
torch_model = load_checkpoint(torch_model)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
torch_model.to(device)
torch_model.eval()

###############################################################################
# Quantize a PyTorch model
Expand All @@ -120,12 +122,12 @@ def get_model_size(ir_path: str, m_type: str = "Mb", verbose: bool = True) -> fl
#
# To validate the transform function use the following code:
# >> for data_item in val_loader:
# >> model(transform_fn(data_item))
# >> model(transform_fn(data_item, device))


def transform_fn(data_item: Tuple[torch.Tensor, int]) -> torch.Tensor:
def transform_fn(data_item: Tuple[torch.Tensor, int], device: torch.device) -> torch.Tensor:
images, _ = data_item
return images
return images.to(device)


# The calibration dataset is a small, no label, representative dataset
Expand All @@ -138,22 +140,15 @@ def transform_fn(data_item: Tuple[torch.Tensor, int]) -> torch.Tensor:
# item and prepare model input data. The quantize method uses a small subset
# (default: 300 samples) of the calibration dataset.

calibration_dataset = nncf.Dataset(val_data_loader, transform_fn)
torch_quantized_model = nncf.quantize(
torch_model,
calibration_dataset,
advanced_parameters=nncf.AdvancedQuantizationParameters(disable_bias_correction=True),
)
calibration_dataset = nncf.Dataset(val_data_loader, partial(transform_fn, device=device))
torch_quantized_model = nncf.quantize(torch_model, calibration_dataset)

###############################################################################
# Benchmark performance, calculate compression rate and validate accuracy

dummy_input = torch.randn(1, 3, 224, 224)
ov_input_shape = (-1, 3, 224, 224)
ov_model = mo.convert_model(torch_model.cpu(), example_input=dummy_input, input_shape=ov_input_shape)
ov_quantized_model = mo.convert_model(
torch_quantized_model.cpu(), example_input=dummy_input, input_shape=ov_input_shape
)
ov_model = ov.convert_model(torch_model.cpu(), example_input=dummy_input)
ov_quantized_model = ov.convert_model(torch_quantized_model.cpu(), example_input=dummy_input)

fp32_ir_path = f"{ROOT}/mobilenet_v2_fp32.xml"
ov.save_model(ov_model, fp32_ir_path, compress_to_fp16=False)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
fastdownload==0.0.7
openvino-dev==2023.1
openvino-dev==2023.2
scikit-learn
torch==2.1.0
torchvision==0.16.0
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import torch
import torchvision
from fastdownload import FastDownload
from openvino.tools import mo
from PIL import Image
from torchmetrics.detection.mean_ap import MeanAveragePrecision
from torchvision.models.detection.ssd import SSD
Expand Down Expand Up @@ -158,11 +157,11 @@ def main():

fp32_onnx_path = f"{ROOT}/ssd300_vgg16_fp32.onnx"
torch.onnx.export(model.cpu(), dummy_input, fp32_onnx_path)
ov_model = mo.convert_model(fp32_onnx_path)
ov_model = ov.convert_model(fp32_onnx_path)

int8_onnx_path = f"{ROOT}/ssd300_vgg16_int8.onnx"
torch.onnx.export(quantized_model.cpu(), dummy_input, int8_onnx_path)
ov_quantized_model = mo.convert_model(int8_onnx_path)
ov_quantized_model = ov.convert_model(int8_onnx_path)

fp32_ir_path = f"{ROOT}/ssd300_vgg16_fp32.xml"
ov.save_model(ov_model, fp32_ir_path, compress_to_fp16=False)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
fastdownload==0.0.7
onnx==1.13.1
openvino-dev==2023.1
openvino-dev==2023.2
pycocotools==2.0.7
torch==2.0.1 # ssd300_vgg16 can not be exported with 2.1.0, reference: https://github.com/pytorch/pytorch/issues/113155
torchmetrics==1.0.1
Expand Down
2 changes: 1 addition & 1 deletion examples/torch/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ pillow>=8.0.1
tensorboard>=2.1
matplotlib>=3.3.3
defusedxml>=0.7.0rc1
mlflow>=2.5.0,<2.7.0
mlflow==2.8.1
returns>0.14
opencv-python>=4.4.0.46
torchvision>=0.10.0,<0.17 # the minor version should always match the torch minor version that is installed via NNCF's `pip install nncf[torch]`; TV minor version is torch minor version +1
Expand Down
Loading

0 comments on commit 9c40a66

Please sign in to comment.