-
Notifications
You must be signed in to change notification settings - Fork 233
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Shift+Scale and Input+Shift+Scale pattern for PT (#1989)
### Changes Introduced Shift + Scale fused pattern and Input+Shift+Scale pattern to insert Fake Quantize operations optimally for CPU. ### Reason for changes Customer has a model that is quantized not optimally: ![image](https://github.com/openvinotoolkit/nncf/assets/4014476/59a228fd-1336-4e91-80c4-b67f76febcb8) FakeQuantize between subtraction and division is redundant and introduces additional cost in runtime. FakeQuantize between input and pre-processing is not needed in case of single edge from input, because pre-processing can be fused to the FQ after pre-processing. ![image](https://github.com/openvinotoolkit/nncf/assets/4014476/2037597e-cf0d-45f3-b36b-83c1fa7f0de2) When there are multiple edges from input and one edge is going to pre-processing, it's optimal to have a common fake quantize for all edges. ![image](https://github.com/openvinotoolkit/nncf/assets/4014476/a150a4b2-1e34-461a-9683-46955abb6ffc) If pre-processing represented via normalize op from torchvision (e.g. like here https://github.com/PeterL1n/RobustVideoMatting/blob/master/model/mobilenetv3.py#L37), NNCF doesn't insert FQ between subtraction and division and between input and pre-processing. It happens because pre-processing is implemented via in-place operations, and since FQ is not in-place it can't be applied (see more details here: #1565) ![image](https://github.com/openvinotoolkit/nncf/assets/4014476/17ecd154-a7d8-468c-95d0-31d99ca3185f) ### Related tickets 112934 ### Tests synthetic tests for pre-processing
- Loading branch information
1 parent
778751d
commit 529523a
Showing
11 changed files
with
215 additions
and
46 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
23 changes: 23 additions & 0 deletions
23
.../torch/data/reference_graphs/quantized/synthetic_model/ShiftScale__multi_input_branch.dot
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
strict digraph { | ||
"0 /nncf_model_input_0" [id=0, type=nncf_model_input]; | ||
"1 SymmetricQuantizer/symmetric_quantize_0" [id=1, type=symmetric_quantize]; | ||
"2 ShiftScaleParametrized/__sub___0" [id=2, type=__sub__]; | ||
"3 ShiftScaleParametrized/__truediv___0" [id=3, type=__truediv__]; | ||
"4 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/__truediv___0|OUTPUT]/symmetric_quantize_0" [id=4, type=symmetric_quantize]; | ||
"5 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_0" [id=5, type=symmetric_quantize]; | ||
"6 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0" [id=6, type=conv2d]; | ||
"7 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_1" [id=7, type=symmetric_quantize]; | ||
"8 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_1" [id=8, type=conv2d]; | ||
"9 /nncf_model_output_0" [id=9, type=nncf_model_output]; | ||
"10 /nncf_model_output_1" [id=10, type=nncf_model_output]; | ||
"0 /nncf_model_input_0" -> "1 SymmetricQuantizer/symmetric_quantize_0"; | ||
"1 SymmetricQuantizer/symmetric_quantize_0" -> "2 ShiftScaleParametrized/__sub___0"; | ||
"1 SymmetricQuantizer/symmetric_quantize_0" -> "8 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_1"; | ||
"2 ShiftScaleParametrized/__sub___0" -> "3 ShiftScaleParametrized/__truediv___0"; | ||
"3 ShiftScaleParametrized/__truediv___0" -> "4 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/__truediv___0|OUTPUT]/symmetric_quantize_0"; | ||
"4 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/__truediv___0|OUTPUT]/symmetric_quantize_0" -> "6 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0"; | ||
"5 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_0" -> "6 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0"; | ||
"6 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0" -> "9 /nncf_model_output_0"; | ||
"7 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_1" -> "8 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_1"; | ||
"8 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_1" -> "10 /nncf_model_output_1"; | ||
} |
27 changes: 27 additions & 0 deletions
27
.../reference_graphs/quantized/synthetic_model/ShiftScale__normalize__multi_input_branch.dot
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
strict digraph { | ||
"0 /nncf_model_input_0" [id=0, type=nncf_model_input]; | ||
"1 ShiftScaleParametrized/is_floating_point_0" [id=1, type=is_floating_point]; | ||
"2 ShiftScaleParametrized/clone_0" [id=2, type=clone]; | ||
"3 ShiftScaleParametrized/sub__0" [id=3, type=sub_]; | ||
"4 ShiftScaleParametrized/div__0" [id=4, type=div_]; | ||
"5 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/div__0|OUTPUT]/symmetric_quantize_0" [id=5, type=symmetric_quantize]; | ||
"6 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_0" [id=6, type=symmetric_quantize]; | ||
"7 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0" [id=7, type=conv2d]; | ||
"8 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_1" [id=8, type=symmetric_quantize]; | ||
"9 ShiftScaleParametrized/NNCFConv2d[conv]/SymmetricQuantizer/symmetric_quantize_0" [id=9, type=symmetric_quantize]; | ||
"10 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_1" [id=10, type=conv2d]; | ||
"11 /nncf_model_output_0" [id=11, type=nncf_model_output]; | ||
"12 /nncf_model_output_1" [id=12, type=nncf_model_output]; | ||
"0 /nncf_model_input_0" -> "1 ShiftScaleParametrized/is_floating_point_0"; | ||
"0 /nncf_model_input_0" -> "2 ShiftScaleParametrized/clone_0"; | ||
"0 /nncf_model_input_0" -> "9 ShiftScaleParametrized/NNCFConv2d[conv]/SymmetricQuantizer/symmetric_quantize_0"; | ||
"2 ShiftScaleParametrized/clone_0" -> "3 ShiftScaleParametrized/sub__0"; | ||
"3 ShiftScaleParametrized/sub__0" -> "4 ShiftScaleParametrized/div__0"; | ||
"4 ShiftScaleParametrized/div__0" -> "5 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/div__0|OUTPUT]/symmetric_quantize_0"; | ||
"5 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/div__0|OUTPUT]/symmetric_quantize_0" -> "7 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0"; | ||
"6 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_0" -> "7 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0"; | ||
"7 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0" -> "11 /nncf_model_output_0"; | ||
"8 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_1" -> "10 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_1"; | ||
"9 ShiftScaleParametrized/NNCFConv2d[conv]/SymmetricQuantizer/symmetric_quantize_0" -> "10 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_1"; | ||
"10 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_1" -> "12 /nncf_model_output_1"; | ||
} |
19 changes: 19 additions & 0 deletions
19
...reference_graphs/quantized/synthetic_model/ShiftScale__normalize__single_input_branch.dot
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
strict digraph { | ||
"0 /nncf_model_input_0" [id=0, type=nncf_model_input]; | ||
"1 ShiftScaleParametrized/is_floating_point_0" [id=1, type=is_floating_point]; | ||
"2 ShiftScaleParametrized/clone_0" [id=2, type=clone]; | ||
"3 ShiftScaleParametrized/sub__0" [id=3, type=sub_]; | ||
"4 ShiftScaleParametrized/div__0" [id=4, type=div_]; | ||
"5 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/div__0|OUTPUT]/symmetric_quantize_0" [id=5, type=symmetric_quantize]; | ||
"6 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_0" [id=6, type=symmetric_quantize]; | ||
"7 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0" [id=7, type=conv2d]; | ||
"8 /nncf_model_output_0" [id=8, type=nncf_model_output]; | ||
"0 /nncf_model_input_0" -> "1 ShiftScaleParametrized/is_floating_point_0"; | ||
"0 /nncf_model_input_0" -> "2 ShiftScaleParametrized/clone_0"; | ||
"2 ShiftScaleParametrized/clone_0" -> "3 ShiftScaleParametrized/sub__0"; | ||
"3 ShiftScaleParametrized/sub__0" -> "4 ShiftScaleParametrized/div__0"; | ||
"4 ShiftScaleParametrized/div__0" -> "5 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/div__0|OUTPUT]/symmetric_quantize_0"; | ||
"5 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/div__0|OUTPUT]/symmetric_quantize_0" -> "7 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0"; | ||
"6 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_0" -> "7 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0"; | ||
"7 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0" -> "8 /nncf_model_output_0"; | ||
} |
15 changes: 15 additions & 0 deletions
15
...torch/data/reference_graphs/quantized/synthetic_model/ShiftScale__single_input_branch.dot
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
strict digraph { | ||
"0 /nncf_model_input_0" [id=0, type=nncf_model_input]; | ||
"1 ShiftScaleParametrized/__sub___0" [id=1, type=__sub__]; | ||
"2 ShiftScaleParametrized/__truediv___0" [id=2, type=__truediv__]; | ||
"3 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/__truediv___0|OUTPUT]/symmetric_quantize_0" [id=3, type=symmetric_quantize]; | ||
"4 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_0" [id=4, type=symmetric_quantize]; | ||
"5 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0" [id=5, type=conv2d]; | ||
"6 /nncf_model_output_0" [id=6, type=nncf_model_output]; | ||
"0 /nncf_model_input_0" -> "1 ShiftScaleParametrized/__sub___0"; | ||
"1 ShiftScaleParametrized/__sub___0" -> "2 ShiftScaleParametrized/__truediv___0"; | ||
"2 ShiftScaleParametrized/__truediv___0" -> "3 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/__truediv___0|OUTPUT]/symmetric_quantize_0"; | ||
"3 ShiftScaleParametrized/NNCFNetworkInterface[_nncf]/ModuleDict[external_quantizers]/SymmetricQuantizer[ShiftScaleParametrized/__truediv___0|OUTPUT]/symmetric_quantize_0" -> "5 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0"; | ||
"4 ShiftScaleParametrized/NNCFConv2d[conv]/ModuleDict[pre_ops]/UpdateWeight[0]/SymmetricQuantizer[op]/symmetric_quantize_0" -> "5 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0"; | ||
"5 ShiftScaleParametrized/NNCFConv2d[conv]/conv2d_0" -> "6 /nncf_model_output_0"; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters