Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix (proxy): fix for attributes retrieval #880

Merged
merged 2 commits into from
Mar 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions src/brevitas/proxy/parameter_quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,16 +82,22 @@ def requires_quant_input(self):
return False

def scale(self):
if not self.is_quant_enabled:
return None
scale = self.__call__(self.tracked_parameter_list[0]).scale
return scale

def zero_point(self):
if not self.is_quant_enabled:
return None
zero_point = self.__call__(self.tracked_parameter_list[0]).zero_point
return zero_point

def bit_width(self):
bit_width_ = self.__call__(self.tracked_parameter_list[0]).bit_width
return bit_width_
if not self.is_quant_enabled:
return None
bit_width = self.__call__(self.tracked_parameter_list[0]).bit_width
return bit_width

def forward(self, x: torch.Tensor) -> Union[Tensor, QuantTensor]:
if self.is_quant_enabled:
Expand All @@ -105,11 +111,15 @@ def forward(self, x: torch.Tensor) -> Union[Tensor, QuantTensor]:
class DecoupledWeightQuantProxyFromInjector(WeightQuantProxyFromInjector):

def pre_scale(self):
if not self.is_quant_enabled:
return None
output_tuple = self.tensor_quant(self.tracked_parameter_list[0])
out, scale, zero_point, bit_width, pre_scale, pre_zero_point = output_tuple
return pre_scale

def pre_zero_point(self):
if not self.is_quant_enabled:
return None
output_tuple = self.tensor_quant(self.tracked_parameter_list[0])
out, scale, zero_point, bit_width, pre_scale, pre_zero_point = output_tuple
return pre_zero_point
Expand Down Expand Up @@ -151,7 +161,7 @@ def forward(self, x: torch.Tensor, input_bit_width: torch.Tensor,
out, scale, zero_point, bit_width, pre_scale, pre_zero_point = impl(x, input_bit_width, input_is_signed)
return QuantTensor(out, scale, zero_point, bit_width, self.is_signed, self.training)
else: # quantization disabled
return QuantTensor(x, training=self.training)
return x


class BiasQuantProxyFromInjector(ParameterQuantProxyFromInjector, BiasQuantProxyProtocol):
Expand All @@ -168,18 +178,22 @@ def requires_input_scale(self) -> bool:
return False

def scale(self):
if self.requires_input_scale:
if self.requires_input_scale or not self.is_quant_enabled:
return None
zhs = self._zero_hw_sentinel()
scale = self.__call__(self.tracked_parameter_list[0], zhs).scale
return scale

def zero_point(self):
if not self.is_quant_enabled:
return None
zhs = self._zero_hw_sentinel()
zero_point = self.__call__(self.tracked_parameter_list[0], zhs).zero_point
return zero_point

def bit_width(self):
if not self.is_quant_enabled:
return None
zhs = self._zero_hw_sentinel()
bit_width = self.__call__(self.tracked_parameter_list[0], zhs).bit_width
return bit_width
Expand Down
22 changes: 15 additions & 7 deletions src/brevitas/proxy/runtime_quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ def init_tensor_quant(self):
self.fused_activation_quant_proxy = None

def scale(self, force_eval=True):
if not self.is_quant_enabled:
return None
current_status = self.training
if force_eval:
self.eval()
Expand All @@ -126,16 +128,24 @@ def scale(self, force_eval=True):
return scale

def zero_point(self, force_eval=True):
if not self.is_quant_enabled:
return None
current_status = self.training
if force_eval:
self.eval()
zero_point = self.__call__(self._zero_hw_sentinel()).zero_point
self.train(current_status)
return zero_point

def bit_width(self):
scale = self.__call__(self._zero_hw_sentinel()).bit_width
return scale
def bit_width(self, force_eval=True):
if not self.is_quant_enabled:
return None
current_status = self.training
if force_eval:
self.eval()
bit_width = self.__call__(self._zero_hw_sentinel()).bit_width
self.train(current_status)
return bit_width

def forward(self, x: Union[Tensor, QuantTensor]) -> Union[Tensor, QuantTensor]:
if self.fused_activation_quant_proxy is not None:
Expand Down Expand Up @@ -179,10 +189,6 @@ def scale(self, force_eval=True):
def zero_point(self, force_eval=True):
raise RuntimeError("Zero point for Dynamic Act Quant is input-dependant")

def bit_width(self):
bit_width = self.__call__(self._zero_hw_sentinel()).bit_width
return bit_width


class ClampQuantProxyFromInjector(QuantProxyFromInjector, AccQuantProxyProtocol):

Expand All @@ -198,6 +204,8 @@ def forward(self, x: QuantTensor) -> Union[Tensor, QuantTensor]:
class TruncQuantProxyFromInjector(QuantProxyFromInjector, AccQuantProxyProtocol):

def bit_width(self):
if not self.is_quant_enabled:
return None
zhs = self._zero_hw_sentinel()
# Signed might or might not be defined. We just care about retrieving the bitwidth
empty_imp = QuantTensor(zhs, zhs, zhs, zhs, signed=True, training=self.training)
Expand Down
82 changes: 82 additions & 0 deletions tests/brevitas/proxy/test_proxy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import pytest

from brevitas.nn import QuantLinear
from brevitas.nn.quant_activation import QuantReLU
from brevitas.quant.scaled_int import Int8AccumulatorAwareWeightQuant
from brevitas.quant.scaled_int import Int8BiasPerTensorFloatInternalScaling
from brevitas.quant.scaled_int import Int8WeightPerChannelFloatDecoupled
from brevitas.quant.scaled_int import Int8WeightPerTensorFloat
from brevitas_examples.common.generative.quantizers import Int8DynamicActPerTensorFloat
from tests.marker import jit_disabled_for_dynamic_quant_act


class TestProxy:

def test_bias_proxy(self):
model = QuantLinear(10, 5, bias_quant=Int8BiasPerTensorFloatInternalScaling)
assert model.bias_quant.scale() is not None
assert model.bias_quant.zero_point() is not None
assert model.bias_quant.bit_width() is not None

model.bias_quant.disable_quant = True
assert model.bias_quant.scale() is None
assert model.bias_quant.zero_point() is None
assert model.bias_quant.bit_width() is None

def test_weight_proxy(self):
model = QuantLinear(10, 5, weight_quant=Int8WeightPerTensorFloat)
assert model.weight_quant.scale() is not None
assert model.weight_quant.zero_point() is not None
assert model.weight_quant.bit_width() is not None

model.weight_quant.disable_quant = True
assert model.weight_quant.scale() is None
assert model.weight_quant.zero_point() is None
assert model.weight_quant.bit_width() is None

def test_weight_decoupled_proxy(self):
model = QuantLinear(10, 5, weight_quant=Int8WeightPerChannelFloatDecoupled)
assert model.weight_quant.pre_scale() is not None
assert model.weight_quant.pre_zero_point() is not None

model.weight_quant.disable_quant = True
assert model.weight_quant.pre_scale() is None
assert model.weight_quant.pre_zero_point() is None

def test_weight_decoupled_with_input_proxy(self):
model = QuantLinear(10, 5, weight_quant=Int8AccumulatorAwareWeightQuant)
with pytest.raises(NotImplementedError):
model.weight_quant.scale()
with pytest.raises(NotImplementedError):
model.weight_quant.zero_point()

with pytest.raises(NotImplementedError):
model.weight_quant.pre_scale()
with pytest.raises(NotImplementedError):
model.weight_quant.pre_zero_point()

def test_act_proxy(self):
model = QuantReLU()
assert model.act_quant.scale() is not None
assert model.act_quant.zero_point() is not None
assert model.act_quant.bit_width() is not None

model.act_quant.disable_quant = True
assert model.act_quant.scale() is None
assert model.act_quant.zero_point() is None
assert model.act_quant.bit_width() is None

@jit_disabled_for_dynamic_quant_act()
def test_dynamic_act_proxy(self):
model = QuantReLU(Int8DynamicActPerTensorFloat)

with pytest.raises(RuntimeError, match="Scale for Dynamic Act Quant is input-dependant"):
model.act_quant.scale()
with pytest.raises(RuntimeError,
match="Zero point for Dynamic Act Quant is input-dependant"):
model.act_quant.zero_point()

assert model.act_quant.bit_width() is not None

model.act_quant.disable_quant = True
assert model.act_quant.bit_width() is None
1 change: 0 additions & 1 deletion tests/brevitas/proxy/test_weight_scaling.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Copyright (C) 2023, Advanced Micro Devices, Inc. All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause

import pytest
from torch import nn

from brevitas import config
Expand Down
9 changes: 9 additions & 0 deletions tests/marker.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,14 @@ def skip_wrapper(f):
return skip_wrapper


def jit_disabled_for_dynamic_quant_act():
skip = config.JIT_ENABLED

def skip_wrapper(f):
return pytest.mark.skipif(skip, reason=f'Dynamic Act Quant requires JIT to be disabled')(f)

return skip_wrapper


skip_on_macos_nox = pytest.mark.skipif(
platform.system() == "Darwin", reason="Known issue with Nox and MacOS.")
Loading