Tweak amused quantization (#1959)

Changes: - Decrease calibration dataset size from 2400 to 300 - Ignore 1% of outliers during statistic collection - Enable quantization by default
openvinotoolkit · Apr 25, 2024 · f43125b · f43125b
1 parent e3958eb
commit f43125b
Showing 1 changed file with 17 additions and 7 deletions.
diff --git a/notebooks/amused-lightweight-text-to-image/amused-lightweight-text-to-image.ipynb b/notebooks/amused-lightweight-text-to-image/amused-lightweight-text-to-image.ipynb
@@ -726,17 +726,14 @@
     "ExecuteTime": {
      "end_time": "2024-04-11T15:54:24.345655Z",
      "start_time": "2024-04-11T15:54:24.315345Z"
-    },
-    "test_replace": {
-     "value=False": "value=True"
     }
    },
    "outputs": [],
    "source": [
     "QUANTIZED_TRANSFORMER_OV_PATH = Path(str(TRANSFORMER_OV_PATH).replace(\".xml\", \"_quantized.xml\"))\n",
     "\n",
     "to_quantize = widgets.Checkbox(\n",
-    "    value=False,\n",
+    "    value=True,\n",
     "    description=\"Quantization\",\n",
     "    disabled=False,\n",
     ")\n",
@@ -872,17 +869,19 @@
      "start_time": "2024-04-11T15:53:48.042694Z"
     },
     "test_replace": {
-     "CALIBRATION_DATASET_SIZE = 12 * 200": "CALIBRATION_DATASET_SIZE = 12"
+     "CALIBRATION_DATASET_SIZE = 12 * 25": "CALIBRATION_DATASET_SIZE = 12"
     }
    },
    "outputs": [],
    "source": [
     "%%skip not $to_quantize.value\n",
     "\n",
     "from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters\n",
+    "from nncf.quantization.range_estimator import RangeEstimatorParameters, StatisticsCollectorParameters, StatisticsType, \\\n",
+    "    AggregatorType\n",
     "import nncf\n",
     "\n",
-    "CALIBRATION_DATASET_SIZE = 12 * 200\n",
+    "CALIBRATION_DATASET_SIZE = 12 * 25\n",
     "\n",
     "if not QUANTIZED_TRANSFORMER_OV_PATH.exists():\n",
     "    calibration_data = collect_calibration_data(ov_transformer, CALIBRATION_DATASET_SIZE)\n",
@@ -894,7 +893,18 @@
     "        # We ignore convolutions to improve quality of generations without significant drop in inference speed\n",
     "        ignored_scope=nncf.IgnoredScope(types=[\"Convolution\"]),\n",
     "        # Value of 0.85 was obtained using grid search based on Inception Score computed below\n",
-    "        advanced_parameters=nncf.AdvancedQuantizationParameters(smooth_quant_alphas=AdvancedSmoothQuantParameters(matmul=0.85))\n",
+    "        advanced_parameters=nncf.AdvancedQuantizationParameters(\n",
+    "            smooth_quant_alphas=AdvancedSmoothQuantParameters(matmul=0.85),\n",
+    "            # During activation statistics collection we ignore 1% of outliers which improves quantization quality\n",
+    "            activations_range_estimator_params=RangeEstimatorParameters(\n",
+    "                min=StatisticsCollectorParameters(statistics_type=StatisticsType.MIN,\n",
+    "                                                  aggregator_type=AggregatorType.MEAN_NO_OUTLIERS,\n",
+    "                                                  quantile_outlier_prob=0.01),\n",
+    "                max=StatisticsCollectorParameters(statistics_type=StatisticsType.MAX,\n",
+    "                                                  aggregator_type=AggregatorType.MEAN_NO_OUTLIERS,\n",
+    "                                                  quantile_outlier_prob=0.01)\n",
+    "            )\n",
+    "        )\n",
     "    )\n",
     "    ov.save_model(quantized_model, QUANTIZED_TRANSFORMER_OV_PATH)"
    ]