Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update QPSG logic #2066

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 24 additions & 3 deletions nncf/common/quantization/quantizer_propagation/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -1383,9 +1383,9 @@ def _handle_output_quantizers_for_weights_as_outputs_ops(
all_qp_ids_in_unified_scale_group = {qp_id_for_current_pq}
for act_qp_id in all_qp_ids_in_unified_scale_group:
curr_act_qconfigs = setup.quantization_points[act_qp_id].possible_qconfigs
curr_intersection_of_qconfigs = [
qconf for qconf in curr_intersection_of_qconfigs if qconf in curr_act_qconfigs
]
curr_intersection_of_qconfigs = self._get_weight_and_activation_qconfig_list_intersection(
curr_intersection_of_qconfigs, curr_act_qconfigs
)

# Do further filtering for per-tensor quantizations only.
# TODO: relax the requirement to allow the scale shape of the weight-as-output quantizer
Expand Down Expand Up @@ -1422,6 +1422,27 @@ def _handle_output_quantizers_for_weights_as_outputs_ops(
setup.discard(qp_id_for_current_pq, keep_shared_input_qps=True)
return setup

@staticmethod
def _get_weight_and_activation_qconfig_list_intersection(
weight_qconfig_options: List[QuantizerConfig], activation_qconfig_options: List[QuantizerConfig]
) -> List[QuantizerConfig]:
"""
Returns special intersection between weight and activation quantization configurations.

:param weight_qconfig_options: List of QuantizerConfig associated with weights.
:param activation_qconfig_options: List of QuantizerConfig associated with activations.
:return: Special intersection between configurations.
"""
act_qconfig_extend_list = []
for act_qconfig in activation_qconfig_options:
if act_qconfig.signedness_to_force is None:
for signedness_to_force_position in [True, False]:
act_qconfig_updated = deepcopy(act_qconfig)
act_qconfig_updated.signedness_to_force = signedness_to_force_position
act_qconfig_extend_list.append(act_qconfig_updated)
act_qconfig_extend_list += activation_qconfig_options
return [qconf for qconf in weight_qconfig_options if qconf in act_qconfig_extend_list]
Comment on lines +1436 to +1444
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make sure that this logic covers the following case:
let weight_qconfig_options = [(*partial_configuration*, signedness_to_force=None), ]

let activation_qconfig_options = [(*same_partial_configuration*, signedness_to_force=None), ]

then the intersection should be:
intersection == [(*same_partial_configuration*, signedness_to_force=None),]

and currently it looks like your logic will produce the following instead:
intersection == [(*same_partial_configuration*, signedness_to_force=True), (*same_partial_configuration*, signedness_to_force=False)]

While we are not likely to encounter this case realistically, I think that the current behaviour in this case would be confusing.

Copy link
Collaborator Author

@KodiaqQ KodiaqQ Aug 18, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently, for the:

  • weight_qconfig_options = [(*partial_configuration*, signedness_to_force=None), ] and
  • activation_qconfig_options = [(*same_partial_configuration*, signedness_to_force=None), ]

the intersection would be:
[(*same_partial_configuration*, signedness_to_force=None), ].
This is because we save the original configuration of the activations and just add the modifications for the originals.
See 1443 line.

Added test case for that.


def run_consistency_check(self) -> bool:
all_pqs = self.collect_all_propagating_quantizers()

Expand Down
134 changes: 134 additions & 0 deletions tests/common/quantization/test_quantizer_propagation_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -1706,3 +1706,137 @@ def test_create_quantizer_setup_with_output_quant_as_weights_ops(
)
ref_quantizer_setup = output_quant_as_weights_test_struct.ref_quantizer_setup()
assert test_quantizer_setup.equivalent_to(ref_quantizer_setup)


@pytest.mark.parametrize(
"weight_configs, activation_configs, reference_configs",
[
(
# Weights #1
[
QuantizerConfig(
num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=True, per_channel=False
),
QuantizerConfig(
num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=True, per_channel=True
),
],
# Activations #1
[
QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC, per_channel=False),
],
# Reference #1
[
QuantizerConfig(
num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=True, per_channel=False
),
],
),
(
# Weights #2
[
QuantizerConfig(
num_bits=8, mode=QuantizationMode.ASYMMETRIC, signedness_to_force=True, per_channel=False
),
QuantizerConfig(
num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=True, per_channel=True
),
],
# Activations #2
[
QuantizerConfig(num_bits=8, mode=QuantizationMode.ASYMMETRIC, per_channel=False),
QuantizerConfig(
num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=True, per_channel=False
),
],
# Reference #2
[
QuantizerConfig(
num_bits=8, mode=QuantizationMode.ASYMMETRIC, signedness_to_force=True, per_channel=False
),
],
),
(
# Weights #3
[
QuantizerConfig(
num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=True, per_channel=False
),
QuantizerConfig(
num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=True, per_channel=True
),
],
# Activations #3
[
QuantizerConfig(
num_bits=8, mode=QuantizationMode.ASYMMETRIC, signedness_to_force=True, per_channel=False
),
],
# Reference #3
[],
),
(
# Weights #4
[
QuantizerConfig(
num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=True, per_channel=False
),
QuantizerConfig(
num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=True, per_channel=True
),
],
# Activations #4
[
QuantizerConfig(
num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=False, per_channel=False
),
],
# Reference #4
[],
),
(
# Weights #5
[
QuantizerConfig(
num_bits=8, mode=QuantizationMode.ASYMMETRIC, signedness_to_force=False, per_channel=False
),
QuantizerConfig(
num_bits=8, mode=QuantizationMode.ASYMMETRIC, signedness_to_force=True, per_channel=True
),
],
# Activations #5
[
QuantizerConfig(
num_bits=8, mode=QuantizationMode.ASYMMETRIC, signedness_to_force=None, per_channel=False
),
QuantizerConfig(
num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=None, per_channel=False
),
],
# Reference #5
[
QuantizerConfig(
num_bits=8, mode=QuantizationMode.ASYMMETRIC, signedness_to_force=False, per_channel=False
),
],
),
(
# Weights #6
[
QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC, per_channel=False),
],
# Activations #6
[
QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC, per_channel=False),
],
# Reference #6
[
QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC, per_channel=False),
],
),
],
)
def test_get_weight_and_activation_qconfig_list_intersection(weight_configs, activation_configs, reference_configs):
# pylint: disable=protected-access
resulted_configs = QPSG._get_weight_and_activation_qconfig_list_intersection(weight_configs, activation_configs)
assert resulted_configs == reference_configs
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,19 @@ strict digraph {
"1 QuantizeLinear_Identity_Y_1" [id=1, type=QuantizeLinear];
"2 DequantizeLinear_Identity_Y_1" [id=2, type=DequantizeLinear];
"3 Embedding" [id=3, type=Gather];
"4 QuantizeLinear_Embedding_Y_1" [id=4, type=QuantizeLinear];
"5 DequantizeLinear_Embedding_Y_1" [id=5, type=DequantizeLinear];
"6 Gather" [id=6, type=Gather];
"7 QuantizeLinear_W_1" [id=7, type=QuantizeLinear];
"8 DequantizeLinear_W_1" [id=8, type=DequantizeLinear];
"9 MatMul" [id=9, type=MatMul];
"10 nncf_model_input_0" [id=10, type=nncf_model_input];
"11 nncf_model_output_0" [id=11, type=nncf_model_output];
"4 Gather" [id=4, type=Gather];
"5 QuantizeLinear_W_1" [id=5, type=QuantizeLinear];
"6 DequantizeLinear_W_1" [id=6, type=DequantizeLinear];
"7 MatMul" [id=7, type=MatMul];
"8 nncf_model_input_0" [id=8, type=nncf_model_input];
"9 nncf_model_output_0" [id=9, type=nncf_model_output];
"0 Identity" -> "1 QuantizeLinear_Identity_Y_1" [label="[10, 20]", style=solid];
"1 QuantizeLinear_Identity_Y_1" -> "2 DequantizeLinear_Identity_Y_1" [label="[10, 20]", style=dashed];
"2 DequantizeLinear_Identity_Y_1" -> "3 Embedding" [label="[10, 20]", style=solid];
"3 Embedding" -> "4 QuantizeLinear_Embedding_Y_1" [label="[1, 10, 20]", style=solid];
"4 QuantizeLinear_Embedding_Y_1" -> "5 DequantizeLinear_Embedding_Y_1" [label="[1, 10, 20]", style=dashed];
"5 DequantizeLinear_Embedding_Y_1" -> "6 Gather" [label="[1, 10, 20]", style=solid];
"6 Gather" -> "9 MatMul" [label="[10, 20]", style=solid];
"7 QuantizeLinear_W_1" -> "8 DequantizeLinear_W_1" [label="[10, 5]", style=dashed];
"8 DequantizeLinear_W_1" -> "9 MatMul" [label="[10, 5]", style=solid];
"9 MatMul" -> "11 nncf_model_output_0" [label="[1, 10]", style=solid];
"10 nncf_model_input_0" -> "3 Embedding" [label="[1, 10]", style=dashed];
"3 Embedding" -> "4 Gather" [label="[1, 10, 20]", style=solid];
"4 Gather" -> "7 MatMul" [label="[10, 20]", style=solid];
"5 QuantizeLinear_W_1" -> "6 DequantizeLinear_W_1" [label="[20, 10]", style=dashed];
"6 DequantizeLinear_W_1" -> "7 MatMul" [label="[20, 10]", style=solid];
"7 MatMul" -> "9 nncf_model_output_0" [label="[10, 10]", style=solid];
"8 nncf_model_input_0" -> "3 Embedding" [label="[1, 10]", style=dashed];
}
Original file line number Diff line number Diff line change
@@ -1,32 +1,6 @@
{
"QuantizeLinear_Identity_Y_1": {
"scale": [
0.0073627750389277935,
0.007852046750485897,
0.0070100342854857445,
0.007835405878722668,
0.007725945208221674,
0.007330845110118389,
0.007606788072735071,
0.007431507110595703,
0.007833994925022125,
0.007731832563877106
],
"zero_point": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
]
},
"QuantizeLinear_Embedding_Y_1": {
"scale": 0.003666950622573495,
"scale": 0.007852046750485897,
"zero_point": 0
},
"QuantizeLinear_W_1": {
Expand Down
Loading