diff --git a/nncf/torch/__init__.py b/nncf/torch/__init__.py index 86afa33254c..0b74414cfce 100644 --- a/nncf/torch/__init__.py +++ b/nncf/torch/__init__.py @@ -68,4 +68,4 @@ from nncf.torch.extensions import force_build_cpu_extensions, force_build_cuda_extensions -# patch_torch_operators() +patch_torch_operators() diff --git a/nncf/torch/graph/pattern_operations.py b/nncf/torch/graph/pattern_operations.py index b860560221d..dc0e5b43af2 100644 --- a/nncf/torch/graph/pattern_operations.py +++ b/nncf/torch/graph/pattern_operations.py @@ -35,11 +35,6 @@ GraphPattern.LABEL_ATTR: "BATCH_NORMALIZATION", } -GETITEM_OPERATIONS = { - GraphPattern.METATYPE_ATTR: ["index_select", "__getitem__", "gather", "index_select", "where"], - GraphPattern.LABEL_ATTR: "GETITEM", -} - GROUP_NORMALIZATION_OPERATIONS = { GraphPattern.METATYPE_ATTR: ["group_norm"], GraphPattern.LABEL_ATTR: "GROUP_NORMALIZATION", diff --git a/nncf/torch/hardware/fused_patterns.py b/nncf/torch/hardware/fused_patterns.py index 128287f6287..3cff4d1ce98 100644 --- a/nncf/torch/hardware/fused_patterns.py +++ b/nncf/torch/hardware/fused_patterns.py @@ -16,7 +16,6 @@ from nncf.torch.graph.pattern_operations import ARITHMETIC_OPERATIONS from nncf.torch.graph.pattern_operations import ATOMIC_ACTIVATIONS_OPERATIONS from nncf.torch.graph.pattern_operations import BATCH_NORMALIZATION_OPERATIONS -from nncf.torch.graph.pattern_operations import GETITEM_OPERATIONS from nncf.torch.graph.pattern_operations import GROUP_NORMALIZATION_OPERATIONS from nncf.torch.graph.pattern_operations import LINEAR_OPERATIONS from nncf.torch.graph.pattern_operations import RELU_OPERATIONS @@ -200,12 +199,7 @@ def arithmetic_operations() -> GraphPattern: def batch_norm_operations() -> GraphPattern: pattern = GraphPattern() pattern.add_node(**BATCH_NORMALIZATION_OPERATIONS) - pattern_alt = GraphPattern() - bn = pattern_alt.add_node(**BATCH_NORMALIZATION_OPERATIONS) - get_item = pattern_alt.add_node(**GETITEM_OPERATIONS) - pattern_alt.add_edge(bn, get_item) - pattern.add_pattern_alternative(pattern_alt) - return pattern_alt + return pattern def activation_operations() -> GraphPattern: diff --git a/tests/torch/ptq/test_calculation_quantizer_params.py b/tests/torch/ptq/test_calculation_quantizer_params.py index 0cee8945ff4..06c0ad1b64c 100644 --- a/tests/torch/ptq/test_calculation_quantizer_params.py +++ b/tests/torch/ptq/test_calculation_quantizer_params.py @@ -24,16 +24,16 @@ from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig from nncf.common.quantization.structs import QuantizerGroup -from nncf.experimental.tensor import Tensor -from nncf.experimental.tensor import functions as fn +from nncf.experimental.common.tensor_statistics.statistics import MinMaxTensorStatistic from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization -from nncf.quantization.algorithms.min_max.torch_backend import FXMinMaxAlgoBackend +from nncf.quantization.algorithms.min_max.torch_backend import PTMinMaxAlgoBackend from nncf.quantization.fake_quantize import FakeQuantizeParameters from nncf.quantization.fake_quantize import calculate_quantizer_parameters from nncf.quantization.fake_quantize import get_quantizer_narrow_range +from nncf.tensor import Tensor +from nncf.tensor import functions as fns from nncf.torch.model_creation import wrap_model from nncf.torch.statistics.aggregator import PTStatisticsAggregator -from nncf.torch.tensor_statistics.statistics import PTMinMaxTensorStatistic from tests.post_training.test_templates.test_calculate_quantizer_parameters import TemplateTestFQParams from tests.torch.helpers import get_all_inputs_for_graph_node from tests.torch.helpers import get_nodes_by_type @@ -118,7 +118,7 @@ def test_quantizer_params_sym(case_to_test: CaseSymParams): target_type = ( TargetType.OPERATION_WITH_WEIGHTS if quant_group == QuantizerGroup.WEIGHTS else TargetType.PRE_LAYER_OPERATION ) - quantizer = FXMinMaxAlgoBackend._create_quantizer(qconfig, scale_shape, fq_params, target_type) + quantizer = PTMinMaxAlgoBackend._create_quantizer(qconfig, scale_shape, fq_params, target_type) assert quantizer.levels == fq_params.levels scale = quantizer.scale.detach().numpy() @@ -208,10 +208,10 @@ def test_quantizer_params_asym(case_to_test: CaseSymParams): target_type = ( TargetType.OPERATION_WITH_WEIGHTS if quant_group == QuantizerGroup.WEIGHTS else TargetType.PRE_LAYER_OPERATION ) - quantizer = FXMinMaxAlgoBackend._create_quantizer(qconfig, scale_shape, fq_params, target_type) + quantizer = PTMinMaxAlgoBackend._create_quantizer(qconfig, scale_shape, fq_params, target_type) assert quantizer.levels == fq_params.levels - assert fn.allclose(quantizer.input_low.data, case_to_test.ref_inp_low) - assert fn.allclose(quantizer.input_range.data, case_to_test.ref_inp_range) + assert fns.allclose(quantizer.input_low.data, case_to_test.ref_inp_low) + assert fns.allclose(quantizer.input_range.data, case_to_test.ref_inp_range) class LinearTestModel(nn.Module): @@ -268,7 +268,9 @@ def calculate_statistics(data, mode, qgroup, half_range=False): else: max_values = np.amax(data, axes) - statistics = PTMinMaxTensorStatistic(min_values=torch.tensor(min_values), max_values=torch.tensor(max_values)) + statistics = MinMaxTensorStatistic( + min_values=Tensor(torch.tensor(min_values)), max_values=Tensor(torch.tensor(max_values)) + ) signedness_to_force = True if qgroup == QuantizerGroup.WEIGHTS else None qconfig = QuantizerConfig(num_bits=8, mode=mode, per_channel=per_ch, signedness_to_force=signedness_to_force) narrow_range = get_quantizer_narrow_range(qconfig, qgroup) @@ -340,11 +342,10 @@ def test_quantizer_parameters_export(tmp_path: Path, _seed): for name, param in fq_params.items(): assert name in torch_ptq_params - assert fn.allclose(param["input_low"], torch_ptq_params[name]["input_low"]) - assert fn.allclose(param["input_high"], torch_ptq_params[name]["input_high"]) + assert fns.allclose(param["input_low"], torch_ptq_params[name]["input_low"]) + assert fns.allclose(param["input_high"], torch_ptq_params[name]["input_high"]) class TestFQParams(TemplateTestFQParams): - @property - def tensor_statistic(self): - return PTMinMaxTensorStatistic + def to_nncf_tensor(self, t): + return Tensor(torch.tensor(t)) diff --git a/tests/torch/ptq/test_graphs.py b/tests/torch/ptq/test_graphs.py index 93281435104..aa427735b53 100644 --- a/tests/torch/ptq/test_graphs.py +++ b/tests/torch/ptq/test_graphs.py @@ -15,6 +15,7 @@ import pytest import torch +from nncf import Dataset from nncf.parameters import TargetDevice from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization @@ -22,7 +23,7 @@ from nncf.torch.layers import NNCF_RNN from nncf.torch.layers import LSTMCellNNCF from tests.post_training.test_templates.helpers import EmbeddingModel -from tests.post_training.test_templates.helpers import get_static_dataset +from tests.post_training.test_templates.helpers import ScaledDotProductAttentionModel from tests.torch import test_models from tests.torch.quantization.test_algo_quantization import SharedLayersModel from tests.torch.test_compressed_graph import ModelDesc @@ -49,6 +50,14 @@ def get_model_name(description): TEST_MODELS_DESC = [ (ModelDesc("embedding_model", EmbeddingModel, [1, 10]), {}), + ( + ModelDesc( + "scaled_dot_product_attention_model", + ScaledDotProductAttentionModel, + {"query": [1, 8, 16], "key": [1, 8, 16], "value": [1, 8, 16]}, + ), + {}, + ), (ModelDesc("shared_model", SharedLayersModel, [1, 1, 5, 6]), {}), (ModelDesc("alexnet", test_models.AlexNet, [1, 3, 32, 32]), {}), (ModelDesc("lenet", test_models.LeNet, [1, 3, 32, 32]), {}), @@ -96,18 +105,21 @@ def get_model_name(description): def test_min_max_classification_quantized_graphs(desc: ModelDesc, quantization_parameters, graph_dir, mocker): model = desc.model_builder() - nncf_network = wrap_model(model, torch.ones(desc.input_sample_sizes), trace_parameters=True) + if isinstance(desc.input_sample_sizes, dict): + example_input = {} + for name, size in desc.input_sample_sizes.items(): + example_input[name] = torch.ones(size) + else: + example_input = torch.ones(desc.input_sample_sizes) + + nncf_network = wrap_model(model, example_input, trace_parameters=True) quantization_parameters["advanced_parameters"] = AdvancedQuantizationParameters(disable_bias_correction=True) quantization_parameters["subset_size"] = 1 quantization_algorithm = PostTrainingQuantization(**quantization_parameters) - def transform_fn(input_) -> torch.Tensor: - return torch.tensor(input_[0]) - quantized_model = quantization_algorithm.apply( nncf_network, nncf_network.nncf.get_graph(), - dataset=get_static_dataset(desc.input_sample_sizes, transform_fn, None), + dataset=Dataset([example_input]), ) - check_graph(quantized_model.nncf.get_graph(), desc.dot_filename(), graph_dir) diff --git a/tests/torch/ptq/test_min_max.py b/tests/torch/ptq/test_min_max.py index 3433612f0fa..c57c82be429 100644 --- a/tests/torch/ptq/test_min_max.py +++ b/tests/torch/ptq/test_min_max.py @@ -17,7 +17,7 @@ from nncf.common.graph.layer_attributes import LinearLayerAttributes from nncf.common.graph.transformations.commands import TargetType from nncf.quantization.algorithms.min_max.backend import MinMaxAlgoBackend -from nncf.quantization.algorithms.min_max.torch_backend import FXMinMaxAlgoBackend +from nncf.quantization.algorithms.min_max.torch_backend import PTMinMaxAlgoBackend from nncf.torch.graph.graph import PTNNCFGraph from nncf.torch.graph.operator_metatypes import PTConv2dMetatype from nncf.torch.graph.operator_metatypes import PTDepthwiseConv2dSubtype @@ -32,7 +32,7 @@ class TestTorchMinMaxAlgorithm(TemplateTestMinMaxAlgorithm): @property def backend(self) -> MinMaxAlgoBackend: - return FXMinMaxAlgoBackend + return PTMinMaxAlgoBackend @property def conv_metatype(self): diff --git a/tests/torch/ptq/test_ptq_params.py b/tests/torch/ptq/test_ptq_params.py index 329e23beae8..7404c211002 100644 --- a/tests/torch/ptq/test_ptq_params.py +++ b/tests/torch/ptq/test_ptq_params.py @@ -20,7 +20,7 @@ from nncf.common.utils.backend import BackendType from nncf.parameters import TargetDevice from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization -from nncf.quantization.algorithms.min_max.torch_backend import FXMinMaxAlgoBackend +from nncf.quantization.algorithms.min_max.torch_backend import PTMinMaxAlgoBackend from nncf.scopes import IgnoredScope from nncf.torch.graph.graph import PTNNCFGraph from nncf.torch.graph.graph import PTTargetPoint @@ -88,13 +88,13 @@ def forward(self, x): @pytest.mark.parametrize("target_device", TargetDevice) def test_target_device(target_device): min_max_algo = MinMaxQuantization(target_device=target_device) - min_max_algo._backend_entity = FXMinMaxAlgoBackend() + min_max_algo._backend_entity = PTMinMaxAlgoBackend() assert min_max_algo._target_device == target_device class TestPTQParams(TemplateTestPTQParams): def get_algo_backend(self): - return FXMinMaxAlgoBackend() + return PTMinMaxAlgoBackend() def check_quantize_outputs_fq_num(self, quantize_outputs, act_num_q, weight_num_q): if quantize_outputs: diff --git a/tests/torch/ptq/test_quantizer_config.py b/tests/torch/ptq/test_quantizer_config.py index 9c17914a18d..2ce6fc4d177 100644 --- a/tests/torch/ptq/test_quantizer_config.py +++ b/tests/torch/ptq/test_quantizer_config.py @@ -11,7 +11,7 @@ import pytest -from nncf.quantization.algorithms.min_max.torch_backend import FXMinMaxAlgoBackend +from nncf.quantization.algorithms.min_max.torch_backend import PTMinMaxAlgoBackend from tests.post_training.test_templates.models import NNCFGraphToTest from tests.post_training.test_templates.models import NNCFGraphToTestDepthwiseConv from tests.post_training.test_templates.models import NNCFGraphToTestSumAggregation @@ -23,7 +23,7 @@ class TestQuantizerConfig(TemplateTestQuantizerConfig): def get_algo_backend(self): - return FXMinMaxAlgoBackend() + return PTMinMaxAlgoBackend() @pytest.fixture def single_conv_nncf_graph(self) -> NNCFGraphToTest: diff --git a/tests/torch/ptq/test_reducers_and_aggregators.py b/tests/torch/ptq/test_reducers_and_aggregators.py index 84cb20fb9ea..c657b222802 100644 --- a/tests/torch/ptq/test_reducers_and_aggregators.py +++ b/tests/torch/ptq/test_reducers_and_aggregators.py @@ -19,7 +19,8 @@ import nncf from nncf.common.graph.layer_attributes import Dtype from nncf.experimental.common.tensor_statistics.collectors import TensorCollector -from nncf.torch.tensor import PTNNCFTensor +from nncf.tensor import Tensor +from nncf.tensor import functions as fns from nncf.torch.tensor_statistics.algo import create_register_input_hook from nncf.torch.tensor_statistics.collectors import PTAbsMaxReducer from nncf.torch.tensor_statistics.collectors import PTAbsQuantileReducer @@ -28,15 +29,11 @@ from nncf.torch.tensor_statistics.collectors import PTMeanPerChanelReducer from nncf.torch.tensor_statistics.collectors import PTMeanReducer from nncf.torch.tensor_statistics.collectors import PTMinReducer -from nncf.torch.tensor_statistics.collectors import PTNNCFCollectorTensorProcessor from nncf.torch.tensor_statistics.collectors import PTQuantileReducer -from tests.common.experimental.test_reducers_and_aggregators import TemplateTestReducersAggreagtors +from tests.common.experimental.test_reducers_and_aggregators import TemplateTestReducersAggregators -class BaseTestReducersAggregators(TemplateTestReducersAggreagtors, ABC): - @pytest.fixture - def tensor_processor(self): - return PTNNCFCollectorTensorProcessor +class BaseTestReducersAggregators(TemplateTestReducersAggregators, ABC): def _get_torch_tensor(self, x: np.ndarray, dtype: Optional[Dtype] = None): torch_tensor = torch.tensor(x) @@ -80,7 +77,7 @@ def cast_tensor(self, tensor, dtype: Dtype): class TestCPUReducersAggregators(BaseTestReducersAggregators): def get_nncf_tensor(self, x: np.array, dtype: Optional[Dtype] = None): - return PTNNCFTensor(self._get_torch_tensor(x, dtype=dtype).cpu()) + return Tensor(self._get_torch_tensor(x, dtype=dtype).cpu()) def all_close(self, val: torch.Tensor, ref) -> bool: assert not val.is_cuda @@ -91,23 +88,23 @@ def all_close(self, val: torch.Tensor, ref) -> bool: @pytest.mark.skipif(not torch.cuda.is_available(), reason="Cuda is not available in current environment") class TestCudaReducersAggregators(BaseTestReducersAggregators): def get_nncf_tensor(self, x: np.array, dtype: Optional[Dtype] = None): - return PTNNCFTensor(self._get_torch_tensor(x, dtype=dtype).cuda()) + return Tensor(self._get_torch_tensor(x, dtype=dtype).cuda()) def all_close(self, val: torch.Tensor, ref) -> bool: assert val.is_cuda return super().all_close(val, ref) -@pytest.mark.parametrize("size,ref", [(16_000_000, 1_600_000.8750), (17_000_000, 1_700_000.7500)]) +@pytest.mark.parametrize("size,ref", [(16_000_000, 1_600_000.8), (17_000_000, 1_700_000.8)]) def test_quantile_percentile_function(use_cuda, size, ref): if use_cuda and not torch.cuda.is_available(): pytest.skip("Cuda is not available in current environment") device = "cuda" if use_cuda else "cpu" - tensor = PTNNCFTensor(torch.arange(1, size, 1).float().to(device)) - res_quantile = PTNNCFCollectorTensorProcessor.quantile(tensor, [0.1], axis=0) - res_percentile = PTNNCFCollectorTensorProcessor.percentile(tensor, [10], axis=0) - assert len(res_quantile) == len(res_percentile) == 1 - for tensor in [res_quantile[0].tensor, res_percentile[0].tensor]: + tensor = Tensor(torch.arange(1, size, 1).float().to(device)) + res_quantile = fns.quantile(tensor, [0.1], axis=0) + res_percentile = fns.percentile(tensor, [10], axis=0) + assert res_quantile.shape[0] == res_quantile.shape[0] == 1 + for tensor in [res_quantile[0].data, res_percentile[0].data]: assert tensor == ref assert tensor.is_cuda == (device == "cuda") @@ -117,10 +114,10 @@ def test_median_function(use_cuda, size, ref): if use_cuda and not torch.cuda.is_available(): pytest.skip("Cuda is not available in current environment") device = "cuda" if use_cuda else "cpu" - tensor = PTNNCFTensor(torch.arange(1, size, 1).float().to(device)) - res = PTNNCFCollectorTensorProcessor.median(tensor, axis=0) - assert res.tensor == ref - assert res.tensor.is_cuda == (device == "cuda") + tensor = Tensor(torch.arange(1, size, 1).float().to(device)) + res = fns.median(tensor, axis=0) + assert res.data == ref + assert res.data.is_cuda == (device == "cuda") def test_create_register_input_hook_with_return_type(mocker): @@ -133,7 +130,5 @@ def test_create_register_input_hook_with_return_type(mocker): mocker = collector.register_input_for_all_reducers mocker.assert_called_once() attr = mocker.call_args_list[0][0][0] - assert isinstance(attr, PTNNCFTensor) - assert attr.tensor == torch.tensor( - 1, - ) + assert isinstance(attr, Tensor) + assert attr.data == torch.tensor(1) diff --git a/tests/torch/ptq/test_statistic_collector.py b/tests/torch/ptq/test_statistic_collector.py index ffc5a828625..50930a289ec 100644 --- a/tests/torch/ptq/test_statistic_collector.py +++ b/tests/torch/ptq/test_statistic_collector.py @@ -9,50 +9,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Type import numpy as np -import pytest import torch -from nncf.common.tensor import NNCFTensor -from nncf.common.tensor_statistics.statistics import MeanTensorStatistic -from nncf.common.tensor_statistics.statistics import MedianMADTensorStatistic -from nncf.common.tensor_statistics.statistics import MinMaxTensorStatistic -from nncf.common.tensor_statistics.statistics import PercentileTensorStatistic -from nncf.common.tensor_statistics.statistics import RawTensorStatistic -from nncf.torch.tensor import PTNNCFTensor -from nncf.torch.tensor_statistics.statistics import PTMeanTensorStatistic -from nncf.torch.tensor_statistics.statistics import PTMedianMADTensorStatistic -from nncf.torch.tensor_statistics.statistics import PTMinMaxTensorStatistic -from nncf.torch.tensor_statistics.statistics import PTPercentileTensorStatistic +from nncf.tensor import Tensor from tests.common.experimental.test_statistic_collector import TemplateTestStatisticCollector class TestPTStatisticCollector(TemplateTestStatisticCollector): - def get_nncf_tensor(self, value: np.ndarray) -> NNCFTensor: - return PTNNCFTensor(torch.tensor(value)) - - @pytest.fixture - def min_max_statistic_cls(self) -> Type[MinMaxTensorStatistic]: - return PTMinMaxTensorStatistic - - @pytest.fixture - def mean_statistic_cls(self) -> Type[MeanTensorStatistic]: - return PTMeanTensorStatistic - - @pytest.fixture - def median_mad_statistic_cls(self) -> Type[MedianMADTensorStatistic]: - return PTMedianMADTensorStatistic - - @pytest.fixture - def percentile_statistic_cls(self) -> Type[PercentileTensorStatistic]: - return PTPercentileTensorStatistic - - @pytest.fixture - def raw_statistic_cls(self) -> Type[RawTensorStatistic]: - raise NotImplementedError() - - @pytest.mark.skip - def test_raw_max_stat_building(self, raw_statistic_cls: RawTensorStatistic): - pass + def get_nncf_tensor(self, value: np.ndarray) -> Tensor: + return Tensor(torch.tensor(value)) diff --git a/tests/torch/ptq/test_tensor_collector_batch_size.py b/tests/torch/ptq/test_tensor_collector_batch_size.py index 5beff90e67a..6c4b6aae6e6 100644 --- a/tests/torch/ptq/test_tensor_collector_batch_size.py +++ b/tests/torch/ptq/test_tensor_collector_batch_size.py @@ -14,26 +14,11 @@ import torch from nncf.experimental.common.tensor_statistics.collectors import AGGREGATORS_MAP -from nncf.torch.tensor import PTNNCFTensor from nncf.torch.tensor_statistics.collectors import PT_REDUCERS_MAP -from nncf.torch.tensor_statistics.collectors import PTNNCFCollectorTensorProcessor -from nncf.torch.tensor_statistics.statistics import PTMinMaxTensorStatistic from tests.common.experimental.test_tensor_collector_batch_size import TemplateTestTensorCollectorBatchSize class TestTensorCollectorBatchSize(TemplateTestTensorCollectorBatchSize): - @staticmethod - def get_tensor_statistics_class(): - return PTMinMaxTensorStatistic - - @staticmethod - def get_tensor_processor(): - return PTNNCFCollectorTensorProcessor() - - @staticmethod - def get_nncf_tensor_class(): - return PTNNCFTensor - @pytest.fixture(params=PT_REDUCERS_MAP.values()) def reducers(self, request) -> bool: return request.param diff --git a/tests/torch/ptq/test_weights_compression.py b/tests/torch/ptq/test_weights_compression.py index 8cb5e00932f..30c704e5435 100644 --- a/tests/torch/ptq/test_weights_compression.py +++ b/tests/torch/ptq/test_weights_compression.py @@ -17,7 +17,8 @@ from nncf import SensitivityMetric from nncf.quantization import compress_weights from nncf.torch import wrap_model -from nncf.torch.quantization.layers import WeightsDecompressor +from nncf.torch.quantization.layers import AsymmetricWeightsDecompressor +from nncf.torch.quantization.layers import SymmetricWeightsDecompressor DATA_BASED_SENSITIVITY_METRICS = ( SensitivityMetric.HESSIAN_INPUT_ACTIVATION, @@ -28,7 +29,7 @@ ALL_SENSITIVITY_METRICS = DATA_BASED_SENSITIVITY_METRICS + (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR,) -SUPPORTED_MODES = (CompressWeightsMode.INT8, CompressWeightsMode.INT8_ASYM) +SUPPORTED_MODES = (CompressWeightsMode.INT8, CompressWeightsMode.INT8_ASYM, CompressWeightsMode.INT8_SYM) UNSUPPORTED_MODES = ( CompressWeightsMode.INT4_SYM, CompressWeightsMode.INT4_ASYM, @@ -106,12 +107,14 @@ def forward(self, input_): return x -def test_compress_weights(): +@pytest.mark.parametrize("mode", (CompressWeightsMode.INT8_SYM, CompressWeightsMode.INT8_ASYM)) +def test_compress_weights(mode): model = ShortTransformer(5, 10) + dtype = torch.int8 if mode == CompressWeightsMode.INT8_SYM else torch.uint8 input_ids = torch.randint(0, 10, (5,)) wrapped_model = wrap_model(model, example_input=input_ids, trace_parameters=True) - compressed_model = compress_weights(wrapped_model) + compressed_model = compress_weights(wrapped_model, mode=mode) n_compressed_weights = 0 n_target_modules = 0 @@ -119,22 +122,26 @@ def test_compress_weights(): for _, module in compressed_model.named_children(): if isinstance(module, (torch.nn.Linear, torch.nn.Embedding)): n_target_modules += 1 - if module.weight.dtype in [torch.uint8, torch.int8]: + if module.weight.dtype == dtype: n_compressed_weights += 1 assert n_compressed_weights == n_target_modules -def test_compress_weights_functional_model(): +@pytest.mark.parametrize("mode", (CompressWeightsMode.INT8_SYM, CompressWeightsMode.INT8_ASYM)) +def test_compress_weights_functional_model(mode): model = FunctionalModel() + decompressor_type = ( + SymmetricWeightsDecompressor if mode == CompressWeightsMode.INT8_SYM else AsymmetricWeightsDecompressor + ) input_ids = torch.randint(0, 10, [1, 3, 300, 300]) wrapped_model = wrap_model(model, example_input=input_ids, trace_parameters=True) - compressed_model = compress_weights(wrapped_model) + compressed_model = compress_weights(wrapped_model, mode=mode) n_compressed_weights = 0 for layer in compressed_model.nncf.external_op.values(): - if isinstance(layer, WeightsDecompressor): + if isinstance(layer, decompressor_type): n_compressed_weights += 1 assert n_compressed_weights == 4 @@ -158,12 +165,14 @@ def test_compress_weights_conv(): assert n_compressed_weights == n_target_modules -def test_compress_shared_weights(mocker): +@pytest.mark.parametrize("mode", (CompressWeightsMode.INT8_SYM, CompressWeightsMode.INT8_ASYM)) +def test_compress_shared_weights(mocker, mode): model = ShortTransformer(5, 10, share_weights=True) + dtype = torch.int8 if mode == CompressWeightsMode.INT8_SYM else torch.uint8 input_ids = torch.randint(0, 10, (5,)) wrapped_model = wrap_model(model, example_input=input_ids, trace_parameters=True) - compressed_model = compress_weights(wrapped_model) + compressed_model = compress_weights(wrapped_model, mode=mode) n_compressed_weights = 0 n_target_modules = 0 @@ -171,7 +180,7 @@ def test_compress_shared_weights(mocker): for _, module in compressed_model.named_children(): if isinstance(module, (torch.nn.Linear, torch.nn.Embedding)): n_target_modules += 1 - if module.weight.dtype in [torch.uint8, torch.int8]: + if module.weight.dtype == dtype: n_compressed_weights += 1 assert n_compressed_weights == n_target_modules @@ -201,8 +210,9 @@ def forward(self, input): {"all_layers": True}, {"all_layers": False}, *({"sensitivity_metric": metric} for metric in ALL_SENSITIVITY_METRICS), - {"dataset": "anything"}, - {"ignored_scope": "anything"}, + {"gptq": True}, + {"awq": True}, + {"scale_estimation": True}, ), ) def test_raise_error_with_unsupported_params_for_int8(mode, params): @@ -214,7 +224,7 @@ def test_raise_error_with_unsupported_params_for_int8(mode, params): @pytest.mark.parametrize("mode", UNSUPPORTED_MODES) -def test_raise_error_with_not_int8_asym(mode): +def test_raise_error_with_not_int8(mode): dummy_torch_model = EmptyModel() dummy_input = torch.Tensor() wrapped_model = wrap_model(dummy_torch_model, example_input=dummy_input, trace_parameters=True) diff --git a/tests/torch_fx/test_sanity.py b/tests/torch_fx/test_sanity.py index 7542f1c7504..197c2f95472 100644 --- a/tests/torch_fx/test_sanity.py +++ b/tests/torch_fx/test_sanity.py @@ -26,8 +26,8 @@ from torch._export import capture_pre_autograd_graph import nncf -import nncf.torch from nncf.common.logging.track_progress import track +from nncf.torch.dynamic_graph.patch_pytorch import disable_patching from tests.torch_fx.helpers import TinyImagenetDatasetManager IMAGE_SIZE = 64 @@ -114,25 +114,28 @@ def count_q_dq(model: torch.fx.GraphModule): @pytest.mark.parametrize("test_case", MODELS) def test_sanity(test_case: SanitySampleCase): - device = torch.device("cpu") - model = get_model(test_case.model_id, test_case.checkpoint_url, device) - _, val_dataloader, calibration_dataset = TinyImagenetDatasetManager(IMAGE_SIZE, BATCH_SIZE).create_data_loaders() - - def transform_fn(data_item): - return data_item[0].to(device) - - calibration_dataset = nncf.Dataset(calibration_dataset, transform_fn) - - with torch.no_grad(): - ex_input = next(iter(calibration_dataset.get_inference_data())) - model.eval() - exported_model = capture_pre_autograd_graph(model, args=(ex_input,)) - quantized_model = nncf.quantize(exported_model, calibration_dataset) - quantized_model = torch.compile(quantized_model, backend="openvino") - - top1_int8 = validate(val_dataloader, quantized_model, device) - assert np.isclose(top1_int8, test_case.top1_int8_ref, atol=1e-2) - - num_q, num_dq = count_q_dq(quantized_model) - assert num_q == test_case.ref_num_q - assert num_dq == test_case.ref_num_dq + with disable_patching(): + device = torch.device("cpu") + model = get_model(test_case.model_id, test_case.checkpoint_url, device) + _, val_dataloader, calibration_dataset = TinyImagenetDatasetManager( + IMAGE_SIZE, BATCH_SIZE + ).create_data_loaders() + + def transform_fn(data_item): + return data_item[0].to(device) + + calibration_dataset = nncf.Dataset(calibration_dataset, transform_fn) + + with torch.no_grad(): + ex_input = next(iter(calibration_dataset.get_inference_data())) + model.eval() + exported_model = capture_pre_autograd_graph(model, args=(ex_input,)) + quantized_model = nncf.quantize(exported_model, calibration_dataset) + quantized_model = torch.compile(quantized_model, backend="openvino") + + top1_int8 = validate(val_dataloader, quantized_model, device) + assert np.isclose(top1_int8, test_case.top1_int8_ref, atol=1e-2) + + num_q, num_dq = count_q_dq(quantized_model) + assert num_q == test_case.ref_num_q + assert num_dq == test_case.ref_num_dq