Rebase partly

daniil-lyakhov · Jun 25, 2024 · 24e96b1 · 24e96b1
1 parent 37c2f10
commit 24e96b1
Show file tree

Hide file tree

Showing 13 changed files with 114 additions and 155 deletions.
diff --git a/nncf/torch/__init__.py b/nncf/torch/__init__.py
@@ -68,4 +68,4 @@
 
 from nncf.torch.extensions import force_build_cpu_extensions, force_build_cuda_extensions
 
-# patch_torch_operators()
+patch_torch_operators()
diff --git a/nncf/torch/graph/pattern_operations.py b/nncf/torch/graph/pattern_operations.py
@@ -35,11 +35,6 @@
     GraphPattern.LABEL_ATTR: "BATCH_NORMALIZATION",
 }
 
-GETITEM_OPERATIONS = {
-    GraphPattern.METATYPE_ATTR: ["index_select", "__getitem__", "gather", "index_select", "where"],
-    GraphPattern.LABEL_ATTR: "GETITEM",
-}
-
 GROUP_NORMALIZATION_OPERATIONS = {
     GraphPattern.METATYPE_ATTR: ["group_norm"],
     GraphPattern.LABEL_ATTR: "GROUP_NORMALIZATION",

diff --git a/nncf/torch/hardware/fused_patterns.py b/nncf/torch/hardware/fused_patterns.py
@@ -16,7 +16,6 @@
 from nncf.torch.graph.pattern_operations import ARITHMETIC_OPERATIONS
 from nncf.torch.graph.pattern_operations import ATOMIC_ACTIVATIONS_OPERATIONS
 from nncf.torch.graph.pattern_operations import BATCH_NORMALIZATION_OPERATIONS
-from nncf.torch.graph.pattern_operations import GETITEM_OPERATIONS
 from nncf.torch.graph.pattern_operations import GROUP_NORMALIZATION_OPERATIONS
 from nncf.torch.graph.pattern_operations import LINEAR_OPERATIONS
 from nncf.torch.graph.pattern_operations import RELU_OPERATIONS
@@ -200,12 +199,7 @@ def arithmetic_operations() -> GraphPattern:
 def batch_norm_operations() -> GraphPattern:
     pattern = GraphPattern()
     pattern.add_node(**BATCH_NORMALIZATION_OPERATIONS)
-    pattern_alt = GraphPattern()
-    bn = pattern_alt.add_node(**BATCH_NORMALIZATION_OPERATIONS)
-    get_item = pattern_alt.add_node(**GETITEM_OPERATIONS)
-    pattern_alt.add_edge(bn, get_item)
-    pattern.add_pattern_alternative(pattern_alt)
-    return pattern_alt
+    return pattern
 
 
 def activation_operations() -> GraphPattern:

diff --git a/tests/torch/ptq/test_calculation_quantizer_params.py b/tests/torch/ptq/test_calculation_quantizer_params.py
@@ -24,16 +24,16 @@
 from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode
 from nncf.common.quantization.structs import QuantizerConfig
 from nncf.common.quantization.structs import QuantizerGroup
-from nncf.experimental.tensor import Tensor
-from nncf.experimental.tensor import functions as fn
+from nncf.experimental.common.tensor_statistics.statistics import MinMaxTensorStatistic
 from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization
-from nncf.quantization.algorithms.min_max.torch_backend import FXMinMaxAlgoBackend
+from nncf.quantization.algorithms.min_max.torch_backend import PTMinMaxAlgoBackend
 from nncf.quantization.fake_quantize import FakeQuantizeParameters
 from nncf.quantization.fake_quantize import calculate_quantizer_parameters
 from nncf.quantization.fake_quantize import get_quantizer_narrow_range
+from nncf.tensor import Tensor
+from nncf.tensor import functions as fns
 from nncf.torch.model_creation import wrap_model
 from nncf.torch.statistics.aggregator import PTStatisticsAggregator
-from nncf.torch.tensor_statistics.statistics import PTMinMaxTensorStatistic
 from tests.post_training.test_templates.test_calculate_quantizer_parameters import TemplateTestFQParams
 from tests.torch.helpers import get_all_inputs_for_graph_node
 from tests.torch.helpers import get_nodes_by_type
@@ -118,7 +118,7 @@ def test_quantizer_params_sym(case_to_test: CaseSymParams):
     target_type = (
         TargetType.OPERATION_WITH_WEIGHTS if quant_group == QuantizerGroup.WEIGHTS else TargetType.PRE_LAYER_OPERATION
     )
-    quantizer = FXMinMaxAlgoBackend._create_quantizer(qconfig, scale_shape, fq_params, target_type)
+    quantizer = PTMinMaxAlgoBackend._create_quantizer(qconfig, scale_shape, fq_params, target_type)
 
     assert quantizer.levels == fq_params.levels
     scale = quantizer.scale.detach().numpy()
@@ -208,10 +208,10 @@ def test_quantizer_params_asym(case_to_test: CaseSymParams):
     target_type = (
         TargetType.OPERATION_WITH_WEIGHTS if quant_group == QuantizerGroup.WEIGHTS else TargetType.PRE_LAYER_OPERATION
     )
-    quantizer = FXMinMaxAlgoBackend._create_quantizer(qconfig, scale_shape, fq_params, target_type)
+    quantizer = PTMinMaxAlgoBackend._create_quantizer(qconfig, scale_shape, fq_params, target_type)
     assert quantizer.levels == fq_params.levels
-    assert fn.allclose(quantizer.input_low.data, case_to_test.ref_inp_low)
-    assert fn.allclose(quantizer.input_range.data, case_to_test.ref_inp_range)
+    assert fns.allclose(quantizer.input_low.data, case_to_test.ref_inp_low)
+    assert fns.allclose(quantizer.input_range.data, case_to_test.ref_inp_range)
 
 
 class LinearTestModel(nn.Module):
@@ -268,7 +268,9 @@ def calculate_statistics(data, mode, qgroup, half_range=False):
     else:
         max_values = np.amax(data, axes)
 
-    statistics = PTMinMaxTensorStatistic(min_values=torch.tensor(min_values), max_values=torch.tensor(max_values))
+    statistics = MinMaxTensorStatistic(
+        min_values=Tensor(torch.tensor(min_values)), max_values=Tensor(torch.tensor(max_values))
+    )
     signedness_to_force = True if qgroup == QuantizerGroup.WEIGHTS else None
     qconfig = QuantizerConfig(num_bits=8, mode=mode, per_channel=per_ch, signedness_to_force=signedness_to_force)
     narrow_range = get_quantizer_narrow_range(qconfig, qgroup)
@@ -340,11 +342,10 @@ def test_quantizer_parameters_export(tmp_path: Path, _seed):
 
     for name, param in fq_params.items():
         assert name in torch_ptq_params
-        assert fn.allclose(param["input_low"], torch_ptq_params[name]["input_low"])
-        assert fn.allclose(param["input_high"], torch_ptq_params[name]["input_high"])
+        assert fns.allclose(param["input_low"], torch_ptq_params[name]["input_low"])
+        assert fns.allclose(param["input_high"], torch_ptq_params[name]["input_high"])
 
 
 class TestFQParams(TemplateTestFQParams):
-    @property
-    def tensor_statistic(self):
-        return PTMinMaxTensorStatistic
+    def to_nncf_tensor(self, t):
+        return Tensor(torch.tensor(t))
diff --git a/tests/torch/ptq/test_graphs.py b/tests/torch/ptq/test_graphs.py
@@ -15,14 +15,15 @@
 import pytest
 import torch
 
+from nncf import Dataset
 from nncf.parameters import TargetDevice
 from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters
 from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization
 from nncf.torch import wrap_model
 from nncf.torch.layers import NNCF_RNN
 from nncf.torch.layers import LSTMCellNNCF
 from tests.post_training.test_templates.helpers import EmbeddingModel
-from tests.post_training.test_templates.helpers import get_static_dataset
+from tests.post_training.test_templates.helpers import ScaledDotProductAttentionModel
 from tests.torch import test_models
 from tests.torch.quantization.test_algo_quantization import SharedLayersModel
 from tests.torch.test_compressed_graph import ModelDesc
@@ -49,6 +50,14 @@ def get_model_name(description):
 
 TEST_MODELS_DESC = [
     (ModelDesc("embedding_model", EmbeddingModel, [1, 10]), {}),
+    (
+        ModelDesc(
+            "scaled_dot_product_attention_model",
+            ScaledDotProductAttentionModel,
+            {"query": [1, 8, 16], "key": [1, 8, 16], "value": [1, 8, 16]},
+        ),
+        {},
+    ),
     (ModelDesc("shared_model", SharedLayersModel, [1, 1, 5, 6]), {}),
     (ModelDesc("alexnet", test_models.AlexNet, [1, 3, 32, 32]), {}),
     (ModelDesc("lenet", test_models.LeNet, [1, 3, 32, 32]), {}),
@@ -96,18 +105,21 @@ def get_model_name(description):
 def test_min_max_classification_quantized_graphs(desc: ModelDesc, quantization_parameters, graph_dir, mocker):
     model = desc.model_builder()
 
-    nncf_network = wrap_model(model, torch.ones(desc.input_sample_sizes), trace_parameters=True)
+    if isinstance(desc.input_sample_sizes, dict):
+        example_input = {}
+        for name, size in desc.input_sample_sizes.items():
+            example_input[name] = torch.ones(size)
+    else:
+        example_input = torch.ones(desc.input_sample_sizes)
+
+    nncf_network = wrap_model(model, example_input, trace_parameters=True)
     quantization_parameters["advanced_parameters"] = AdvancedQuantizationParameters(disable_bias_correction=True)
     quantization_parameters["subset_size"] = 1
     quantization_algorithm = PostTrainingQuantization(**quantization_parameters)
 
-    def transform_fn(input_) -> torch.Tensor:
-        return torch.tensor(input_[0])
-
     quantized_model = quantization_algorithm.apply(
         nncf_network,
         nncf_network.nncf.get_graph(),
-        dataset=get_static_dataset(desc.input_sample_sizes, transform_fn, None),
+        dataset=Dataset([example_input]),
     )
-
     check_graph(quantized_model.nncf.get_graph(), desc.dot_filename(), graph_dir)
diff --git a/tests/torch/ptq/test_min_max.py b/tests/torch/ptq/test_min_max.py
@@ -17,7 +17,7 @@
 from nncf.common.graph.layer_attributes import LinearLayerAttributes
 from nncf.common.graph.transformations.commands import TargetType
 from nncf.quantization.algorithms.min_max.backend import MinMaxAlgoBackend
-from nncf.quantization.algorithms.min_max.torch_backend import FXMinMaxAlgoBackend
+from nncf.quantization.algorithms.min_max.torch_backend import PTMinMaxAlgoBackend
 from nncf.torch.graph.graph import PTNNCFGraph
 from nncf.torch.graph.operator_metatypes import PTConv2dMetatype
 from nncf.torch.graph.operator_metatypes import PTDepthwiseConv2dSubtype
@@ -32,7 +32,7 @@
 class TestTorchMinMaxAlgorithm(TemplateTestMinMaxAlgorithm):
     @property
     def backend(self) -> MinMaxAlgoBackend:
-        return FXMinMaxAlgoBackend
+        return PTMinMaxAlgoBackend
 
     @property
     def conv_metatype(self):

diff --git a/tests/torch/ptq/test_ptq_params.py b/tests/torch/ptq/test_ptq_params.py
@@ -20,7 +20,7 @@
 from nncf.common.utils.backend import BackendType
 from nncf.parameters import TargetDevice
 from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization
-from nncf.quantization.algorithms.min_max.torch_backend import FXMinMaxAlgoBackend
+from nncf.quantization.algorithms.min_max.torch_backend import PTMinMaxAlgoBackend
 from nncf.scopes import IgnoredScope
 from nncf.torch.graph.graph import PTNNCFGraph
 from nncf.torch.graph.graph import PTTargetPoint
@@ -88,13 +88,13 @@ def forward(self, x):
 @pytest.mark.parametrize("target_device", TargetDevice)
 def test_target_device(target_device):
     min_max_algo = MinMaxQuantization(target_device=target_device)
-    min_max_algo._backend_entity = FXMinMaxAlgoBackend()
+    min_max_algo._backend_entity = PTMinMaxAlgoBackend()
     assert min_max_algo._target_device == target_device
 
 
 class TestPTQParams(TemplateTestPTQParams):
     def get_algo_backend(self):
-        return FXMinMaxAlgoBackend()
+        return PTMinMaxAlgoBackend()
 
     def check_quantize_outputs_fq_num(self, quantize_outputs, act_num_q, weight_num_q):
         if quantize_outputs:

diff --git a/tests/torch/ptq/test_quantizer_config.py b/tests/torch/ptq/test_quantizer_config.py
@@ -11,7 +11,7 @@
 
 import pytest
 
-from nncf.quantization.algorithms.min_max.torch_backend import FXMinMaxAlgoBackend
+from nncf.quantization.algorithms.min_max.torch_backend import PTMinMaxAlgoBackend
 from tests.post_training.test_templates.models import NNCFGraphToTest
 from tests.post_training.test_templates.models import NNCFGraphToTestDepthwiseConv
 from tests.post_training.test_templates.models import NNCFGraphToTestSumAggregation
@@ -23,7 +23,7 @@
 
 class TestQuantizerConfig(TemplateTestQuantizerConfig):
     def get_algo_backend(self):
-        return FXMinMaxAlgoBackend()
+        return PTMinMaxAlgoBackend()
 
     @pytest.fixture
     def single_conv_nncf_graph(self) -> NNCFGraphToTest:

diff --git a/tests/torch/ptq/test_reducers_and_aggregators.py b/tests/torch/ptq/test_reducers_and_aggregators.py
@@ -19,7 +19,8 @@
 import nncf
 from nncf.common.graph.layer_attributes import Dtype
 from nncf.experimental.common.tensor_statistics.collectors import TensorCollector
-from nncf.torch.tensor import PTNNCFTensor
+from nncf.tensor import Tensor
+from nncf.tensor import functions as fns
 from nncf.torch.tensor_statistics.algo import create_register_input_hook
 from nncf.torch.tensor_statistics.collectors import PTAbsMaxReducer
 from nncf.torch.tensor_statistics.collectors import PTAbsQuantileReducer
@@ -28,15 +29,11 @@
 from nncf.torch.tensor_statistics.collectors import PTMeanPerChanelReducer
 from nncf.torch.tensor_statistics.collectors import PTMeanReducer
 from nncf.torch.tensor_statistics.collectors import PTMinReducer
-from nncf.torch.tensor_statistics.collectors import PTNNCFCollectorTensorProcessor
 from nncf.torch.tensor_statistics.collectors import PTQuantileReducer
-from tests.common.experimental.test_reducers_and_aggregators import TemplateTestReducersAggreagtors
+from tests.common.experimental.test_reducers_and_aggregators import TemplateTestReducersAggregators
 
 
-class BaseTestReducersAggregators(TemplateTestReducersAggreagtors, ABC):
-    @pytest.fixture
-    def tensor_processor(self):
-        return PTNNCFCollectorTensorProcessor
+class BaseTestReducersAggregators(TemplateTestReducersAggregators, ABC):
 
     def _get_torch_tensor(self, x: np.ndarray, dtype: Optional[Dtype] = None):
         torch_tensor = torch.tensor(x)
@@ -80,7 +77,7 @@ def cast_tensor(self, tensor, dtype: Dtype):
 
 class TestCPUReducersAggregators(BaseTestReducersAggregators):
     def get_nncf_tensor(self, x: np.array, dtype: Optional[Dtype] = None):
-        return PTNNCFTensor(self._get_torch_tensor(x, dtype=dtype).cpu())
+        return Tensor(self._get_torch_tensor(x, dtype=dtype).cpu())
 
     def all_close(self, val: torch.Tensor, ref) -> bool:
         assert not val.is_cuda
@@ -91,23 +88,23 @@ def all_close(self, val: torch.Tensor, ref) -> bool:
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="Cuda is not available in current environment")
 class TestCudaReducersAggregators(BaseTestReducersAggregators):
     def get_nncf_tensor(self, x: np.array, dtype: Optional[Dtype] = None):
-        return PTNNCFTensor(self._get_torch_tensor(x, dtype=dtype).cuda())
+        return Tensor(self._get_torch_tensor(x, dtype=dtype).cuda())
 
     def all_close(self, val: torch.Tensor, ref) -> bool:
         assert val.is_cuda
         return super().all_close(val, ref)
 
 
-@pytest.mark.parametrize("size,ref", [(16_000_000, 1_600_000.8750), (17_000_000, 1_700_000.7500)])
+@pytest.mark.parametrize("size,ref", [(16_000_000, 1_600_000.8), (17_000_000, 1_700_000.8)])
 def test_quantile_percentile_function(use_cuda, size, ref):
     if use_cuda and not torch.cuda.is_available():
         pytest.skip("Cuda is not available in current environment")
     device = "cuda" if use_cuda else "cpu"
-    tensor = PTNNCFTensor(torch.arange(1, size, 1).float().to(device))
-    res_quantile = PTNNCFCollectorTensorProcessor.quantile(tensor, [0.1], axis=0)
-    res_percentile = PTNNCFCollectorTensorProcessor.percentile(tensor, [10], axis=0)
-    assert len(res_quantile) == len(res_percentile) == 1
-    for tensor in [res_quantile[0].tensor, res_percentile[0].tensor]:
+    tensor = Tensor(torch.arange(1, size, 1).float().to(device))
+    res_quantile = fns.quantile(tensor, [0.1], axis=0)
+    res_percentile = fns.percentile(tensor, [10], axis=0)
+    assert res_quantile.shape[0] == res_quantile.shape[0] == 1
+    for tensor in [res_quantile[0].data, res_percentile[0].data]:
         assert tensor == ref
         assert tensor.is_cuda == (device == "cuda")
 
@@ -117,10 +114,10 @@ def test_median_function(use_cuda, size, ref):
     if use_cuda and not torch.cuda.is_available():
         pytest.skip("Cuda is not available in current environment")
     device = "cuda" if use_cuda else "cpu"
-    tensor = PTNNCFTensor(torch.arange(1, size, 1).float().to(device))
-    res = PTNNCFCollectorTensorProcessor.median(tensor, axis=0)
-    assert res.tensor == ref
-    assert res.tensor.is_cuda == (device == "cuda")
+    tensor = Tensor(torch.arange(1, size, 1).float().to(device))
+    res = fns.median(tensor, axis=0)
+    assert res.data == ref
+    assert res.data.is_cuda == (device == "cuda")
 
 
 def test_create_register_input_hook_with_return_type(mocker):
@@ -133,7 +130,5 @@ def test_create_register_input_hook_with_return_type(mocker):
     mocker = collector.register_input_for_all_reducers
     mocker.assert_called_once()
     attr = mocker.call_args_list[0][0][0]
-    assert isinstance(attr, PTNNCFTensor)
-    assert attr.tensor == torch.tensor(
-        1,
-    )
+    assert isinstance(attr, Tensor)
+    assert attr.data == torch.tensor(1)
diff --git a/tests/torch/ptq/test_statistic_collector.py b/tests/torch/ptq/test_statistic_collector.py
@@ -9,50 +9,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Type
 
 import numpy as np
-import pytest
 import torch
 
-from nncf.common.tensor import NNCFTensor
-from nncf.common.tensor_statistics.statistics import MeanTensorStatistic
-from nncf.common.tensor_statistics.statistics import MedianMADTensorStatistic
-from nncf.common.tensor_statistics.statistics import MinMaxTensorStatistic
-from nncf.common.tensor_statistics.statistics import PercentileTensorStatistic
-from nncf.common.tensor_statistics.statistics import RawTensorStatistic
-from nncf.torch.tensor import PTNNCFTensor
-from nncf.torch.tensor_statistics.statistics import PTMeanTensorStatistic
-from nncf.torch.tensor_statistics.statistics import PTMedianMADTensorStatistic
-from nncf.torch.tensor_statistics.statistics import PTMinMaxTensorStatistic
-from nncf.torch.tensor_statistics.statistics import PTPercentileTensorStatistic
+from nncf.tensor import Tensor
 from tests.common.experimental.test_statistic_collector import TemplateTestStatisticCollector
 
 
 class TestPTStatisticCollector(TemplateTestStatisticCollector):
-    def get_nncf_tensor(self, value: np.ndarray) -> NNCFTensor:
-        return PTNNCFTensor(torch.tensor(value))
-
-    @pytest.fixture
-    def min_max_statistic_cls(self) -> Type[MinMaxTensorStatistic]:
-        return PTMinMaxTensorStatistic
-
-    @pytest.fixture
-    def mean_statistic_cls(self) -> Type[MeanTensorStatistic]:
-        return PTMeanTensorStatistic
-
-    @pytest.fixture
-    def median_mad_statistic_cls(self) -> Type[MedianMADTensorStatistic]:
-        return PTMedianMADTensorStatistic
-
-    @pytest.fixture
-    def percentile_statistic_cls(self) -> Type[PercentileTensorStatistic]:
-        return PTPercentileTensorStatistic
-
-    @pytest.fixture
-    def raw_statistic_cls(self) -> Type[RawTensorStatistic]:
-        raise NotImplementedError()
-
-    @pytest.mark.skip
-    def test_raw_max_stat_building(self, raw_statistic_cls: RawTensorStatistic):
-        pass
+    def get_nncf_tensor(self, value: np.ndarray) -> Tensor:
+        return Tensor(torch.tensor(value))