Skip to content

Commit

Permalink
Add docstring for quantizer module
Browse files Browse the repository at this point in the history
  • Loading branch information
Only-bottle committed Nov 13, 2024
1 parent 382944f commit 4f5ca1d
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 35 deletions.
4 changes: 2 additions & 2 deletions examples/quantizer/auto_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
quantizer = netspresso.quantizer()

# 2. Set variables for quantize
input_model = "./examples/sample_models/yolo-fastest.onnx"
input_model = "./examples/sample_models/test.onnx"
OUTPUT_DIR = "./outputs/quantized/onnx2onnx_2"
CALIBRATION_DATASET_PATH = "./examples/sample_datasets/pickle_calibration_dataset_128x128.npy"
BITWIDTH = QuantizationPrecision.INT8
Expand All @@ -26,5 +26,5 @@
weight_precision=QuantizationPrecision.INT8,
activation_precision=QuantizationPrecision.INT8,
metric=SimilarityMetric.SNR,
threshod=0,
threshold=0,
)
5 changes: 2 additions & 3 deletions examples/quantizer/custom_quantization_by_operator_type.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from pathlib import Path

from netspresso import NetsPresso
from netspresso.enums import QuantizationPrecision, SimilarityMetric, OnnxOperator
from netspresso.quantizer.quantizer import PrecisionByLayer, PrecisionByOperator
from netspresso.utils.file import FileHandler
from netspresso.enums import QuantizationPrecision, OnnxOperator
from netspresso.quantizer.quantizer import PrecisionByOperator


EMAIL = "YOUR_EMAIL"
Expand Down
4 changes: 2 additions & 2 deletions examples/quantizer/plain_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,6 @@
output_dir=f"{OUTPUT_DIR}/{Path(input_model).stem}_{BITWIDTH}",
dataset_path=CALIBRATION_DATASET_PATH,
metric=SimilarityMetric.SNR,
weight_precision=QuantizationPrecision.INT16,
activation_precision=QuantizationPrecision.INT16,
weight_precision=QuantizationPrecision.INT8,
activation_precision=QuantizationPrecision.INT8,
)
97 changes: 69 additions & 28 deletions netspresso/quantizer/quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,6 @@ def _quantize_model(
output_dir (str): The local folder path to save the quantized model.
dataset_path (str): Path to the dataset. Useful for certain quantizations.
quantization_mode (QuantizationMode): Quantization mode
metric (SimilarityMetric): Quantization quality metrics.
threshold (Union[float, int]): Quantization quality threshold
weight_precision (QuantizationPrecision): Weight quantization bitwidth
activation_precision (QuantizationPrecision): Activation quantization bitwidth
input_layers (List[InputShape], optional): Target input shape for quantization (e.g., dynamic batch to static batch).
wait_until_done (bool): If True, wait for the quantization result before returning the function.
If False, request the quantization and return the function immediately.
Expand Down Expand Up @@ -295,7 +291,7 @@ def plain_quantization(
wait_until_done: bool = True,
sleep_interval: int = 30,
):
"""Quantize a model to the specified framework.
"""Apply full quantization to a model, specifying precision for weight & activation.
Args:
input_model_path (str): The file path where the model is located.
Expand Down Expand Up @@ -345,16 +341,17 @@ def auto_quantization(
input_layers: List[Dict[str, int]] = None,
wait_until_done: bool = True,
sleep_interval: int = 30,
):
"""Quantize a model to the specified framework.
) -> QuantizerMetadata:
"""Apply auto quantization to a model, specifying precision for weight & activation.
Args:
input_model_path (str): The file path where the model is located.
output_dir (str): The local folder path to save the quantized model.
dataset_path (str): Path to the dataset. Useful for certain quantizations.
metric (SimilarityMetric): Quantization quality metrics.
weight_precision (QuantizationPrecision): Weight precision
activation_precision (QuantizationPrecision): Activation precision
metric (SimilarityMetric): Quantization quality metrics.
threshold (Union[float, int]): Quantization quality threshold
input_layers (List[InputShape], optional): Target input shape for quantization (e.g., dynamic batch to static batch).
wait_until_done (bool): If True, wait for the quantization result before returning the function.
If False, request the quantization and return the function immediately.
Expand Down Expand Up @@ -385,7 +382,7 @@ def auto_quantization(

return metadata

def custom_quantization(
def _custom_quantization(
self,
input_model_path: str,
output_dir: str,
Expand All @@ -397,7 +394,7 @@ def custom_quantization(
input_layers: List[Dict[str, int]] = None,
wait_until_done: bool = True,
sleep_interval: int = 30,
):
) -> QuantizerMetadata:
quantization_options = CustomQuantizeOption(
metric=metric,
custom_precision=custom_quantization_dictionary,
Expand All @@ -424,28 +421,53 @@ def custom_quantization_by_layer_name(
output_dir: str,
dataset_path: Optional[str],
precision_by_layer_name: List[PrecisionByLayer],
default_weight_precision: QuantizationPrecision = QuantizationPrecision.INT8,
default_activation_precision: QuantizationPrecision = QuantizationPrecision.INT8,
metric: SimilarityMetric = SimilarityMetric.SNR,
weight_precision: QuantizationPrecision = QuantizationPrecision.INT8,
activation_precision: QuantizationPrecision = QuantizationPrecision.INT8,
input_layers: List[Dict[str, int]] = None,
wait_until_done: bool = True,
sleep_interval: int = 30,
):
) -> QuantizerMetadata:
"""Apply custom quantization to a model, specifying precision for each layer name.
Args:
input_model_path (str): The file path where the model is located.
output_dir (str): The local folder path to save the quantized model.
dataset_path (str): Path to the dataset. Useful for certain quantizations.
precision_by_layer_name (List[PrecisionByLayer]): A list specifying the precision for each layer name within the model.
precision_by_operator_type (List[PrecisionByLayer]):

Check failure on line 438 in netspresso/quantizer/quantizer.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (W291)

netspresso/quantizer/quantizer.py:438:65: W291 Trailing whitespace
List of `PrecisionByLayer` objects that specify the desired precision for each layer name in the model.

Check failure on line 439 in netspresso/quantizer/quantizer.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (W291)

netspresso/quantizer/quantizer.py:439:120: W291 Trailing whitespace
Each entry includes:
- `name` (str): The layer name (e.g., /backbone/conv_first/block/act/Mul_output_0).
- `precision` (QuantizationPrecision): The quantization precision level for the operator.
default_weight_precision (QuantizationPrecision): Weight precision
default_activation_precision (QuantizationPrecision): Activation precision
metric (SimilarityMetric): Quantization quality metrics.
input_layers (List[InputShape], optional): Target input shape for quantization (e.g., dynamic batch to static batch).
wait_until_done (bool): If True, wait for the quantization result before returning the function.
If False, request the quantization and return the function immediately.
Raises:
e: If an error occurs during the model quantization.
Returns:
QuantizerMetadata: Quantize metadata.
"""
layers = {
layer.name: layer.precision
for layer in precision_by_layer_name
}

custom_quantization_dictionary = {"layers": layers, "operators": {}}

metadata = self.custom_quantization(
metadata = self._custom_quantization(
input_model_path=input_model_path,
output_dir=output_dir,
dataset_path=dataset_path,
custom_quantization_dictionary=custom_quantization_dictionary,
metric=metric,
weight_precision=weight_precision,
activation_precision=activation_precision,
weight_precision=default_weight_precision,
activation_precision=default_activation_precision,
input_layers=input_layers,
wait_until_done=wait_until_done,
sleep_interval=sleep_interval,
Expand All @@ -459,33 +481,52 @@ def custom_quantization_by_operator_type(
output_dir: str,
dataset_path: Optional[str],
precision_by_operator_type: List[PrecisionByOperator],
metric: SimilarityMetric = SimilarityMetric.SNR,
default_weight_precision: QuantizationPrecision = QuantizationPrecision.INT8,
default_activation_precision: QuantizationPrecision = QuantizationPrecision.INT8,
metric: SimilarityMetric = SimilarityMetric.SNR,
input_layers: List[Dict[str, int]] = None,
wait_until_done: bool = True,
sleep_interval: int = 30,
):
) -> QuantizerMetadata:
"""Apply custom quantization to a model, specifying precision for each operator type.
Args:
input_model_path (str): The file path where the model is located.
output_dir (str): The local folder path to save the quantized model.
dataset_path (str): Path to the dataset. Useful for certain quantizations.
precision_by_operator_type (List[PrecisionByOperator]):

Check failure on line 497 in netspresso/quantizer/quantizer.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (W291)

netspresso/quantizer/quantizer.py:497:68: W291 Trailing whitespace
List of `PrecisionByOperator` objects that specify the desired precision for each operator type in the model.

Check failure on line 498 in netspresso/quantizer/quantizer.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (W291)

netspresso/quantizer/quantizer.py:498:126: W291 Trailing whitespace
Each entry includes:
- `type` (str): The operator type (e.g., Conv, MatMul).
- `precision` (QuantizationPrecision): The quantization precision level for the operator.
default_weight_precision (QuantizationPrecision): Weight precision
default_activation_precision (QuantizationPrecision): Activation precision
metric (SimilarityMetric): Quantization quality metrics.
input_layers (List[InputShape], optional): Target input shape for quantization (e.g., dynamic batch to static batch).
wait_until_done (bool): If True, wait for the quantization result before returning the function.
If False, request the quantization and return the function immediately.
Raises:
e: If an error occurs during the model quantization.
Returns:
QuantizerMetadata: Quantize metadata.
"""
operators = {
layer.type: layer.precision
for layer in precision_by_operator_type
}

custom_quantization_dictionary = {"layers": {}, "operators": operators}

quantization_options = CustomQuantizeOption(
metric=metric,
custom_precision=custom_quantization_dictionary,
weight_precision=default_weight_precision,
activation_precision=default_activation_precision,
)

metadata = self._quantize_model(
metadata = self._custom_quantization(
input_model_path=input_model_path,
output_dir=output_dir,
dataset_path=dataset_path,
quantization_mode=QuantizationMode.CUSTOM_QUANTIZATION,
quantization_options=quantization_options,
custom_quantization_dictionary=custom_quantization_dictionary,
metric=metric,
weight_precision=default_weight_precision,
activation_precision=default_activation_precision,
input_layers=input_layers,
wait_until_done=wait_until_done,
sleep_interval=sleep_interval,
Expand Down

0 comments on commit 4f5ca1d

Please sign in to comment.