diff --git a/docs/.buildinfo b/docs/.buildinfo
index 028bec665..ae0de1084 100644
--- a/docs/.buildinfo
+++ b/docs/.buildinfo
@@ -1,4 +1,4 @@
 # Sphinx build info version 1
 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: d9a08019dec6882195fa0ef4f685a5cd
+config: 029b3c75722303c51813ef033f4d07df
 tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/docs/_modules/brevitas/core/bit_width/const.html b/docs/_modules/brevitas/core/bit_width/const.html
index 63cecaf10..476d7ce41 100644
--- a/docs/_modules/brevitas/core/bit_width/const.html
+++ b/docs/_modules/brevitas/core/bit_width/const.html
@@ -8,7 +8,7 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>brevitas.core.bit_width.const &#8212; Brevitas 0.10.2 documentation</title>
+    <title>brevitas.core.bit_width.const &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -123,8 +123,8 @@
       
     
     
-    <img src="../../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/_modules/brevitas/core/bit_width/parameter.html b/docs/_modules/brevitas/core/bit_width/parameter.html
index 7b8942cba..d0e990a63 100644
--- a/docs/_modules/brevitas/core/bit_width/parameter.html
+++ b/docs/_modules/brevitas/core/bit_width/parameter.html
@@ -8,7 +8,7 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>brevitas.core.bit_width.parameter &#8212; Brevitas 0.10.2 documentation</title>
+    <title>brevitas.core.bit_width.parameter &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -123,8 +123,8 @@
       
     
     
-    <img src="../../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/_modules/brevitas/core/function_wrapper/clamp.html b/docs/_modules/brevitas/core/function_wrapper/clamp.html
index 76a755ef8..6b6b1c372 100644
--- a/docs/_modules/brevitas/core/function_wrapper/clamp.html
+++ b/docs/_modules/brevitas/core/function_wrapper/clamp.html
@@ -8,7 +8,7 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>brevitas.core.function_wrapper.clamp &#8212; Brevitas 0.10.2 documentation</title>
+    <title>brevitas.core.function_wrapper.clamp &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -123,8 +123,8 @@
       
     
     
-    <img src="../../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
@@ -414,12 +414,16 @@ <h1>Source code for brevitas.core.function_wrapper.clamp</h1><div class="highlig
 <span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">ScriptModule wrappers for various variants of clamping.</span>
 <span class="sd">&quot;&quot;&quot;</span>
+<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Optional</span><span class="p">,</span> <span class="n">Tuple</span>
 
 <span class="kn">import</span> <span class="nn">torch</span>
 <span class="kn">from</span> <span class="nn">torch</span> <span class="kn">import</span> <span class="n">Tensor</span>
+<span class="kn">from</span> <span class="nn">torch.nn</span> <span class="kn">import</span> <span class="n">Module</span>
 
 <span class="kn">import</span> <span class="nn">brevitas</span>
+<span class="kn">from</span> <span class="nn">brevitas.core.utils</span> <span class="kn">import</span> <span class="n">StatelessBuffer</span>
 <span class="kn">from</span> <span class="nn">brevitas.function</span> <span class="kn">import</span> <span class="n">tensor_clamp</span>
+<span class="kn">from</span> <span class="nn">brevitas.function.ops</span> <span class="kn">import</span> <span class="n">max_float</span>
 
 
 <div class="viewcode-block" id="TensorClamp"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.clamp.TensorClamp">[docs]</a><span class="k">class</span> <span class="nc">TensorClamp</span><span class="p">(</span><span class="n">brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">ScriptModule</span><span class="p">):</span>
@@ -483,6 +487,90 @@ <h1>Source code for brevitas.core.function_wrapper.clamp</h1><div class="highlig
 <div class="viewcode-block" id="ClampMin.forward"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.clamp.ClampMin.forward">[docs]</a>    <span class="nd">@brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">script_method</span>
     <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">):</span>
         <span class="k">return</span> <span class="n">x</span><span class="o">.</span><span class="n">clamp_min</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">min_val</span><span class="p">)</span></div></div>
+
+
+<div class="viewcode-block" id="FloatClamp"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.clamp.FloatClamp">[docs]</a><span class="k">class</span> <span class="nc">FloatClamp</span><span class="p">(</span><span class="n">brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">ScriptModule</span><span class="p">):</span>
+<span class="w">    </span><span class="sd">&quot;&quot;&quot;&quot;</span>
+<span class="sd">    ScriptModule for clamping minifloat formats to their inf/NaN implementations.</span>
+
+<span class="sd">    Currently, inf/NaN codes have to be encoded through the mantissa.</span>
+<span class="sd">    I.e. setting inf to 1101.111 (E4M3) is not a valid code.</span>
+<span class="sd">    &quot;&quot;&quot;</span>
+
+    <span class="n">__constants__</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;saturating&#39;</span><span class="p">,</span> <span class="s1">&#39;inf_values&#39;</span><span class="p">,</span> <span class="s1">&#39;nan_values&#39;</span><span class="p">,</span> <span class="s1">&#39;signed&#39;</span><span class="p">]</span>
+
+    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
+            <span class="bp">self</span><span class="p">,</span>
+            <span class="n">tensor_clamp_impl</span><span class="p">:</span> <span class="n">Module</span><span class="p">,</span>
+            <span class="n">signed</span><span class="p">:</span> <span class="nb">bool</span><span class="p">,</span>
+            <span class="n">inf_values</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+            <span class="n">nan_values</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+            <span class="n">max_available_float</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Tensor</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+            <span class="n">saturating</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
+            <span class="n">device</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+            <span class="n">dtype</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">dtype</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+        <span class="nb">super</span><span class="p">(</span><span class="n">FloatClamp</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
+
+        <span class="bp">self</span><span class="o">.</span><span class="n">tensor_clamp_impl</span> <span class="o">=</span> <span class="n">tensor_clamp_impl</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">saturating</span> <span class="o">=</span> <span class="n">saturating</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">inf_values</span> <span class="o">=</span> <span class="n">inf_values</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">nan_values</span> <span class="o">=</span> <span class="n">nan_values</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">signed</span> <span class="o">=</span> <span class="n">signed</span>
+
+        <span class="k">if</span> <span class="n">max_available_float</span><span class="p">:</span>
+            <span class="n">max_available_float</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="n">max_available_float</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">max_available_float</span> <span class="o">=</span> <span class="n">StatelessBuffer</span><span class="p">(</span><span class="n">max_available_float</span><span class="p">)</span>
+        <span class="k">else</span><span class="p">:</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">max_available_float</span> <span class="o">=</span> <span class="kc">None</span>
+
+<div class="viewcode-block" id="FloatClamp.inf_nan_clamp"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.clamp.FloatClamp.inf_nan_clamp">[docs]</a>    <span class="k">def</span> <span class="nf">inf_nan_clamp</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">inf_mask</span><span class="p">,</span> <span class="n">p_max_val_mask</span><span class="p">,</span> <span class="n">n_max_val_mask</span><span class="p">):</span>
+
+        <span class="c1"># if non-saturating, we need to map values greater than max_val to nan or inf</span>
+        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">inf_values</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+            <span class="c1"># we have inf values, so we set abs values &gt; max_value to +- inf, and leave inf at inf</span>
+            <span class="n">x</span><span class="p">[</span><span class="n">p_max_val_mask</span><span class="p">]</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="nb">float</span><span class="p">(</span><span class="s1">&#39;inf&#39;</span><span class="p">))</span>
+            <span class="n">x</span><span class="p">[</span><span class="n">n_max_val_mask</span><span class="p">]</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="nb">float</span><span class="p">(</span><span class="s1">&#39;-inf&#39;</span><span class="p">))</span>
+        <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">nan_values</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+            <span class="c1"># no inf values, so we need to map them to NaN</span>
+            <span class="n">full_max_val_mask</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">logical_or</span><span class="p">(</span><span class="n">p_max_val_mask</span><span class="p">,</span> <span class="n">n_max_val_mask</span><span class="p">)</span>
+            <span class="n">x</span><span class="p">[</span><span class="n">full_max_val_mask</span><span class="p">]</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="nb">float</span><span class="p">(</span><span class="s1">&#39;nan&#39;</span><span class="p">))</span>
+
+            <span class="c1"># we also map the inf values to NaN in this case</span>
+            <span class="n">x</span><span class="p">[</span><span class="n">inf_mask</span><span class="p">]</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="nb">float</span><span class="p">(</span><span class="s1">&#39;nan&#39;</span><span class="p">))</span>
+        <span class="k">else</span><span class="p">:</span>
+            <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span>
+                <span class="s2">&quot;Clamping is not saturating, but neither `inf_values` nor `nan_values` is specified&quot;</span>
+            <span class="p">)</span>
+        <span class="k">return</span> <span class="n">x</span></div>
+
+<div class="viewcode-block" id="FloatClamp.saturating_clamp"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.clamp.FloatClamp.saturating_clamp">[docs]</a>    <span class="k">def</span> <span class="nf">saturating_clamp</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">max_value</span><span class="p">,</span> <span class="n">min_value</span><span class="p">):</span>
+        <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">tensor_clamp_impl</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">min_val</span><span class="o">=</span><span class="n">min_value</span><span class="p">,</span> <span class="n">max_val</span><span class="o">=</span><span class="n">max_value</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="FloatClamp.forward"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.clamp.FloatClamp.forward">[docs]</a>    <span class="nd">@brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">script_method</span>
+    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span>
+            <span class="bp">self</span><span class="p">,</span>
+            <span class="n">x</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span>
+            <span class="n">exponent_bit_width</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span>
+            <span class="n">mantissa_bit_width</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span>
+            <span class="n">exponent_bias</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">):</span>
+
+        <span class="n">max_value</span> <span class="o">=</span> <span class="n">max_float</span><span class="p">(</span><span class="n">exponent_bit_width</span><span class="p">,</span> <span class="n">mantissa_bit_width</span><span class="p">,</span> <span class="n">exponent_bias</span><span class="p">)</span>
+        <span class="n">max_value</span> <span class="o">=</span> <span class="n">max_value</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">max_available_float</span> <span class="ow">is</span> <span class="kc">None</span> <span class="k">else</span> <span class="n">torch</span><span class="o">.</span><span class="n">min</span><span class="p">(</span>
+            <span class="n">max_value</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">max_available_float</span><span class="p">())</span>
+        <span class="n">min_value</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="mf">0.</span><span class="p">)</span> <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">signed</span> <span class="k">else</span> <span class="o">-</span><span class="n">max_value</span>
+
+        <span class="c1"># Compute masks</span>
+        <span class="n">inf_mask</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">isinf</span><span class="p">()</span>
+        <span class="n">p_max_val_mask</span> <span class="o">=</span> <span class="n">x</span> <span class="o">&gt;</span> <span class="n">max_value</span>
+        <span class="n">n_max_val_mask</span> <span class="o">=</span> <span class="o">-</span><span class="n">x</span> <span class="o">&gt;</span> <span class="n">max_value</span>
+
+        <span class="c1"># first clamp everything to +- max_value, basically the saturating case</span>
+        <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">saturating_clamp</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">max_value</span><span class="p">,</span> <span class="n">min_value</span><span class="p">)</span>
+
+        <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">saturating</span><span class="p">:</span>
+            <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">inf_nan_clamp</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">inf_mask</span><span class="p">,</span> <span class="n">p_max_val_mask</span><span class="p">,</span> <span class="n">n_max_val_mask</span><span class="p">)</span>
+
+        <span class="k">return</span> <span class="n">x</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">saturating</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">inf_values</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">nan_values</span></div></div>
 </pre></div>
 
                 </article>
diff --git a/docs/_modules/brevitas/core/function_wrapper/misc.html b/docs/_modules/brevitas/core/function_wrapper/misc.html
index 081a0b671..cdc8f4546 100644
--- a/docs/_modules/brevitas/core/function_wrapper/misc.html
+++ b/docs/_modules/brevitas/core/function_wrapper/misc.html
@@ -8,7 +8,7 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>brevitas.core.function_wrapper.misc &#8212; Brevitas 0.10.2 documentation</title>
+    <title>brevitas.core.function_wrapper.misc &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -123,8 +123,8 @@
       
     
     
-    <img src="../../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/_modules/brevitas/core/function_wrapper/ops_ste.html b/docs/_modules/brevitas/core/function_wrapper/ops_ste.html
index cd18f66aa..7a8cab9da 100644
--- a/docs/_modules/brevitas/core/function_wrapper/ops_ste.html
+++ b/docs/_modules/brevitas/core/function_wrapper/ops_ste.html
@@ -8,7 +8,7 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>brevitas.core.function_wrapper.ops_ste &#8212; Brevitas 0.10.2 documentation</title>
+    <title>brevitas.core.function_wrapper.ops_ste &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -123,8 +123,8 @@
       
     
     
-    <img src="../../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/_modules/brevitas/core/function_wrapper/shape.html b/docs/_modules/brevitas/core/function_wrapper/shape.html
index d4ca6da1c..37f7c4f83 100644
--- a/docs/_modules/brevitas/core/function_wrapper/shape.html
+++ b/docs/_modules/brevitas/core/function_wrapper/shape.html
@@ -8,7 +8,7 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>brevitas.core.function_wrapper.shape &#8212; Brevitas 0.10.2 documentation</title>
+    <title>brevitas.core.function_wrapper.shape &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -123,8 +123,8 @@
       
     
     
-    <img src="../../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
@@ -426,6 +426,7 @@ <h1>Source code for brevitas.core.function_wrapper.shape</h1><div class="highlig
 <span class="kn">from</span> <span class="nn">brevitas.function.shape</span> <span class="kn">import</span> <span class="n">over_output_channels</span>
 <span class="kn">from</span> <span class="nn">brevitas.function.shape</span> <span class="kn">import</span> <span class="n">over_output_features</span>
 <span class="kn">from</span> <span class="nn">brevitas.function.shape</span> <span class="kn">import</span> <span class="n">over_tensor</span>
+<span class="kn">from</span> <span class="nn">brevitas.utils.torch_utils</span> <span class="kn">import</span> <span class="n">padding</span>
 
 
 <div class="viewcode-block" id="PermuteDims"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.PermuteDims">[docs]</a><span class="k">class</span> <span class="nc">PermuteDims</span><span class="p">(</span><span class="n">brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">ScriptModule</span><span class="p">):</span>
@@ -563,6 +564,54 @@ <h1>Source code for brevitas.core.function_wrapper.shape</h1><div class="highlig
         <span class="k">return</span> <span class="n">y</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="n">shape</span><span class="p">)</span></div></div>
 
 
+<div class="viewcode-block" id="OverSubChannelBlockView"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.OverSubChannelBlockView">[docs]</a><span class="k">class</span> <span class="nc">OverSubChannelBlockView</span><span class="p">(</span><span class="n">brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">ScriptModule</span><span class="p">):</span>
+    <span class="n">__constants__</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;expanded_groupwise_shape&#39;</span><span class="p">,</span> <span class="s1">&#39;group_size&#39;</span><span class="p">,</span> <span class="s1">&#39;group_dim&#39;</span><span class="p">]</span>
+
+    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">expanded_groupwise_shape</span><span class="p">,</span> <span class="n">group_size</span><span class="p">,</span> <span class="n">group_dim</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+        <span class="nb">super</span><span class="p">(</span><span class="n">OverSubChannelBlockView</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">expanded_groupwise_shape</span> <span class="o">=</span> <span class="n">expanded_groupwise_shape</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">group_dim</span> <span class="o">=</span> <span class="n">group_dim</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">group_size</span> <span class="o">=</span> <span class="n">group_size</span>
+
+<div class="viewcode-block" id="OverSubChannelBlockView.forward"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.OverSubChannelBlockView.forward">[docs]</a>    <span class="nd">@brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">script_method</span>
+    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span>
+        <span class="c1"># This one is a bit tricky but we could end up here:</span>
+        <span class="c1"># - If we quantize the zero point, which will already have expanded shape matching the scale (although no padding, but we don&#39;t need the padding)</span>
+        <span class="c1"># - Groupwise HQO quantization, where weight will already have been padded and expanded</span>
+        <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">expanded_groupwise_shape</span><span class="p">):</span>
+            <span class="k">return</span> <span class="n">x</span>
+        <span class="n">y</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">functional</span><span class="o">.</span><span class="n">pad</span><span class="p">(</span>
+            <span class="n">x</span><span class="p">,</span> <span class="n">padding</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">group_size</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">group_dim</span><span class="p">),</span> <span class="n">mode</span><span class="o">=</span><span class="s1">&#39;constant&#39;</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="mf">0.</span><span class="p">)</span>
+        <span class="n">y</span> <span class="o">=</span> <span class="n">y</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">expanded_groupwise_shape</span><span class="p">)</span>
+        <span class="k">return</span> <span class="n">y</span></div></div>
+
+
+<div class="viewcode-block" id="DynamicOverSubChannelBlockView"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.DynamicOverSubChannelBlockView">[docs]</a><span class="k">class</span> <span class="nc">DynamicOverSubChannelBlockView</span><span class="p">(</span><span class="n">brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">ScriptModule</span><span class="p">):</span>
+    <span class="n">__constants__</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;group_size&#39;</span><span class="p">,</span> <span class="s1">&#39;group_dim&#39;</span><span class="p">]</span>
+
+    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">group_size</span><span class="p">,</span> <span class="n">group_dim</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+        <span class="nb">super</span><span class="p">(</span><span class="n">DynamicOverSubChannelBlockView</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">group_size</span> <span class="o">=</span> <span class="n">group_size</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">group_dim</span> <span class="o">=</span> <span class="n">group_dim</span>
+
+<div class="viewcode-block" id="DynamicOverSubChannelBlockView.forward"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.DynamicOverSubChannelBlockView.forward">[docs]</a>    <span class="nd">@brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">script_method</span>
+    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
+
+        <span class="n">tensor_shape</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">shape</span>
+        <span class="n">tensor_shape_list</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">tensor_shape</span><span class="p">)</span>
+        <span class="n">pad</span> <span class="o">=</span> <span class="n">padding</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">group_size</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">group_dim</span><span class="p">)</span>
+
+        <span class="n">x</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">functional</span><span class="o">.</span><span class="n">pad</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">pad</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="s1">&#39;constant&#39;</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="mf">0.</span><span class="p">)</span>
+
+        <span class="n">tensor_shape</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">shape</span>
+        <span class="n">tensor_shape_list</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">tensor_shape</span><span class="p">)</span>
+        <span class="n">tensor_shape_list</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">group_dim</span><span class="p">]</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">tensor_shape_list</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">group_dim</span><span class="p">]</span> <span class="o">/</span> <span class="bp">self</span><span class="o">.</span><span class="n">group_size</span><span class="p">)</span>
+        <span class="n">block_dim</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">group_dim</span> <span class="o">+</span> <span class="mi">1</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">group_dim</span> <span class="o">!=</span> <span class="o">-</span><span class="mi">1</span> <span class="k">else</span> <span class="o">-</span><span class="mi">1</span>
+        <span class="n">tensor_shape_list</span><span class="o">.</span><span class="n">insert</span><span class="p">(</span><span class="n">block_dim</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">group_size</span><span class="p">)</span>
+        <span class="n">x</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="n">tensor_shape_list</span><span class="p">)</span>
+        <span class="k">return</span> <span class="n">x</span></div></div>
+
+
 <div class="viewcode-block" id="StatsInputViewShapeImpl"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl">[docs]</a><span class="k">class</span> <span class="nc">StatsInputViewShapeImpl</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Enum-like object to collect pointers to variants of ScriptModules that perform a view on a tensor.</span>
@@ -572,7 +621,9 @@ <h1>Source code for brevitas.core.function_wrapper.shape</h1><div class="highlig
     <span class="n">OVER_OUTPUT_CHANNELS</span> <span class="o">=</span> <span class="n">OverOutputChannelView</span>
     <span class="n">OVER_BATCH_OVER_TENSOR</span> <span class="o">=</span> <span class="n">OverBatchOverTensorView</span>
     <span class="n">OVER_BATCH_OVER_OUTPUT_CHANNELS</span> <span class="o">=</span> <span class="n">OverBatchOverOutputChannelView</span>
-    <span class="n">OVER_OUTPUT_FEATURES</span> <span class="o">=</span> <span class="n">OverOutputFeaturesView</span></div>
+    <span class="n">OVER_OUTPUT_FEATURES</span> <span class="o">=</span> <span class="n">OverOutputFeaturesView</span>
+    <span class="n">OVER_SUBCHANNEL_BLOCK</span> <span class="o">=</span> <span class="n">OverSubChannelBlockView</span>
+    <span class="n">DYNAMIC_OVER_SUBCHANNEL_BLOCK</span> <span class="o">=</span> <span class="n">DynamicOverSubChannelBlockView</span></div>
 </pre></div>
 
                 </article>
diff --git a/docs/_modules/brevitas/core/quant/binary.html b/docs/_modules/brevitas/core/quant/binary.html
index 0c1584966..5ba1e04b4 100644
--- a/docs/_modules/brevitas/core/quant/binary.html
+++ b/docs/_modules/brevitas/core/quant/binary.html
@@ -8,7 +8,7 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>brevitas.core.quant.binary &#8212; Brevitas 0.10.2 documentation</title>
+    <title>brevitas.core.quant.binary &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -123,8 +123,8 @@
       
     
     
-    <img src="../../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
@@ -458,8 +458,9 @@ <h1>Source code for brevitas.core.quant.binary</h1><div class="highlight"><pre>
 <span class="sd">        Set env variable BREVITAS_JIT=1 to enable TorchScript compilation of this module.</span>
 <span class="sd">    &quot;&quot;&quot;</span>
 
-    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">scaling_impl</span><span class="p">:</span> <span class="n">Module</span><span class="p">,</span> <span class="n">quant_delay_steps</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">):</span>
+    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">scaling_impl</span><span class="p">:</span> <span class="n">Module</span><span class="p">,</span> <span class="n">signed</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> <span class="n">quant_delay_steps</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">):</span>
         <span class="nb">super</span><span class="p">(</span><span class="n">BinaryQuant</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
+        <span class="k">assert</span> <span class="n">signed</span><span class="p">,</span> <span class="s2">&quot;Unsigned binary quant not supported&quot;</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">scaling_impl</span> <span class="o">=</span> <span class="n">scaling_impl</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">bit_width</span> <span class="o">=</span> <span class="n">BitWidthConst</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">zero_point</span> <span class="o">=</span> <span class="n">StatelessBuffer</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="mf">0.0</span><span class="p">))</span>
diff --git a/docs/_modules/brevitas/core/quant/delay.html b/docs/_modules/brevitas/core/quant/delay.html
index 347941eea..a401098b0 100644
--- a/docs/_modules/brevitas/core/quant/delay.html
+++ b/docs/_modules/brevitas/core/quant/delay.html
@@ -8,7 +8,7 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>brevitas.core.quant.delay &#8212; Brevitas 0.10.2 documentation</title>
+    <title>brevitas.core.quant.delay &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -123,8 +123,8 @@
       
     
     
-    <img src="../../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/_modules/brevitas/core/quant/int.html b/docs/_modules/brevitas/core/quant/int.html
index 1eb00d929..193997759 100644
--- a/docs/_modules/brevitas/core/quant/int.html
+++ b/docs/_modules/brevitas/core/quant/int.html
@@ -8,7 +8,7 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>brevitas.core.quant.int &#8212; Brevitas 0.10.2 documentation</title>
+    <title>brevitas.core.quant.int &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -123,8 +123,8 @@
       
     
     
-    <img src="../../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
@@ -555,15 +555,18 @@ <h1>Source code for brevitas.core.quant.int</h1><div class="highlight"><pre>
         <span class="bp">self</span><span class="o">.</span><span class="n">int_scaling_impl</span> <span class="o">=</span> <span class="n">int_scaling_impl</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">zero_point_impl</span> <span class="o">=</span> <span class="n">zero_point_impl</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">msb_clamp_bit_width_impl</span> <span class="o">=</span> <span class="n">bit_width_impl</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">observer_only</span> <span class="o">=</span> <span class="n">brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">Attribute</span><span class="p">(</span><span class="kc">False</span><span class="p">,</span> <span class="nb">bool</span><span class="p">)</span>
 
 <div class="viewcode-block" id="RescalingIntQuant.forward"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.quant.html#brevitas.core.quant.int.RescalingIntQuant.forward">[docs]</a>    <span class="nd">@brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">script_method</span>
     <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tuple</span><span class="p">[</span><span class="n">Tensor</span><span class="p">,</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">Tensor</span><span class="p">]:</span>
         <span class="n">bit_width</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">msb_clamp_bit_width_impl</span><span class="p">()</span>
-        <span class="n">threshold</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">scaling_impl</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
         <span class="n">int_threshold</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">int_scaling_impl</span><span class="p">(</span><span class="n">bit_width</span><span class="p">)</span>
-        <span class="n">scale</span> <span class="o">=</span> <span class="n">threshold</span> <span class="o">/</span> <span class="n">int_threshold</span>
+        <span class="n">scale</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">scaling_impl</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">int_threshold</span><span class="p">)</span>
         <span class="n">zero_point</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">zero_point_impl</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">scale</span><span class="p">,</span> <span class="n">bit_width</span><span class="p">)</span>
-        <span class="n">y</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">int_quant</span><span class="p">(</span><span class="n">scale</span><span class="p">,</span> <span class="n">zero_point</span><span class="p">,</span> <span class="n">bit_width</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
+        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">observer_only</span><span class="p">:</span>
+            <span class="n">y</span> <span class="o">=</span> <span class="n">x</span>
+        <span class="k">else</span><span class="p">:</span>
+            <span class="n">y</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">int_quant</span><span class="p">(</span><span class="n">scale</span><span class="p">,</span> <span class="n">zero_point</span><span class="p">,</span> <span class="n">bit_width</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
         <span class="k">return</span> <span class="n">y</span><span class="p">,</span> <span class="n">scale</span><span class="p">,</span> <span class="n">zero_point</span><span class="p">,</span> <span class="n">bit_width</span></div></div>
 
 
@@ -586,6 +589,7 @@ <h1>Source code for brevitas.core.quant.int</h1><div class="highlight"><pre>
         <span class="bp">self</span><span class="o">.</span><span class="n">pre_zero_point_impl</span> <span class="o">=</span> <span class="n">pre_zero_point_impl</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">zero_point_impl</span> <span class="o">=</span> <span class="n">zero_point_impl</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">msb_clamp_bit_width_impl</span> <span class="o">=</span> <span class="n">bit_width_impl</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">observer_only</span> <span class="o">=</span> <span class="n">brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">Attribute</span><span class="p">(</span><span class="kc">False</span><span class="p">,</span> <span class="nb">bool</span><span class="p">)</span>
 
 <div class="viewcode-block" id="DecoupledRescalingIntQuant.forward"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.quant.html#brevitas.core.quant.int.DecoupledRescalingIntQuant.forward">[docs]</a>    <span class="nd">@brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">script_method</span>
     <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tuple</span><span class="p">[</span><span class="n">Tensor</span><span class="p">,</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">Tensor</span><span class="p">]:</span>
@@ -594,10 +598,12 @@ <h1>Source code for brevitas.core.quant.int</h1><div class="highlight"><pre>
         <span class="n">pre_threshold</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">pre_scaling_impl</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
         <span class="n">pre_scale</span> <span class="o">=</span> <span class="n">pre_threshold</span> <span class="o">/</span> <span class="n">int_threshold</span>
         <span class="n">pre_zero_point</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">pre_zero_point_impl</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">pre_scale</span><span class="p">,</span> <span class="n">bit_width</span><span class="p">)</span>
-        <span class="n">threshold</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">scaling_impl</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
-        <span class="n">scale</span> <span class="o">=</span> <span class="n">threshold</span> <span class="o">/</span> <span class="n">int_threshold</span>
+        <span class="n">scale</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">scaling_impl</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">int_threshold</span><span class="p">)</span>
         <span class="n">zero_point</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">zero_point_impl</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">scale</span><span class="p">,</span> <span class="n">bit_width</span><span class="p">)</span>
-        <span class="n">y</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">decoupled_int_quant</span><span class="p">(</span><span class="n">pre_scale</span><span class="p">,</span> <span class="n">pre_zero_point</span><span class="p">,</span> <span class="n">scale</span><span class="p">,</span> <span class="n">zero_point</span><span class="p">,</span> <span class="n">bit_width</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
+        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">observer_only</span><span class="p">:</span>
+            <span class="n">y</span> <span class="o">=</span> <span class="n">x</span>
+        <span class="k">else</span><span class="p">:</span>
+            <span class="n">y</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">decoupled_int_quant</span><span class="p">(</span><span class="n">pre_scale</span><span class="p">,</span> <span class="n">pre_zero_point</span><span class="p">,</span> <span class="n">scale</span><span class="p">,</span> <span class="n">zero_point</span><span class="p">,</span> <span class="n">bit_width</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
         <span class="k">return</span> <span class="n">y</span><span class="p">,</span> <span class="n">scale</span><span class="p">,</span> <span class="n">zero_point</span><span class="p">,</span> <span class="n">bit_width</span><span class="p">,</span> <span class="n">pre_scale</span><span class="p">,</span> <span class="n">pre_zero_point</span></div></div>
 
 
@@ -660,10 +666,12 @@ <h1>Source code for brevitas.core.quant.int</h1><div class="highlight"><pre>
         <span class="n">pre_threshold</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">pre_scaling_impl</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">input_bit_width</span><span class="p">,</span> <span class="n">input_is_signed</span><span class="p">)</span>
         <span class="n">pre_scale</span> <span class="o">=</span> <span class="n">pre_threshold</span> <span class="o">/</span> <span class="n">int_threshold</span>
         <span class="n">pre_zero_point</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">pre_zero_point_impl</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">pre_scale</span><span class="p">,</span> <span class="n">bit_width</span><span class="p">)</span>
-        <span class="n">threshold</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">scaling_impl</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
-        <span class="n">scale</span> <span class="o">=</span> <span class="n">threshold</span> <span class="o">/</span> <span class="n">int_threshold</span>
+        <span class="n">scale</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">scaling_impl</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">int_threshold</span><span class="p">)</span>
         <span class="n">zero_point</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">zero_point_impl</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">scale</span><span class="p">,</span> <span class="n">bit_width</span><span class="p">)</span>
-        <span class="n">y</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">decoupled_int_quant</span><span class="p">(</span><span class="n">pre_scale</span><span class="p">,</span> <span class="n">pre_zero_point</span><span class="p">,</span> <span class="n">scale</span><span class="p">,</span> <span class="n">zero_point</span><span class="p">,</span> <span class="n">bit_width</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
+        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">observer_only</span><span class="p">:</span>
+            <span class="n">y</span> <span class="o">=</span> <span class="n">x</span>
+        <span class="k">else</span><span class="p">:</span>
+            <span class="n">y</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">decoupled_int_quant</span><span class="p">(</span><span class="n">pre_scale</span><span class="p">,</span> <span class="n">pre_zero_point</span><span class="p">,</span> <span class="n">scale</span><span class="p">,</span> <span class="n">zero_point</span><span class="p">,</span> <span class="n">bit_width</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
         <span class="k">return</span> <span class="n">y</span><span class="p">,</span> <span class="n">scale</span><span class="p">,</span> <span class="n">zero_point</span><span class="p">,</span> <span class="n">bit_width</span><span class="p">,</span> <span class="n">pre_scale</span><span class="p">,</span> <span class="n">pre_zero_point</span></div></div>
 </pre></div>
 
diff --git a/docs/_modules/brevitas/core/quant/int_base.html b/docs/_modules/brevitas/core/quant/int_base.html
index d4223cf8b..8c84f3583 100644
--- a/docs/_modules/brevitas/core/quant/int_base.html
+++ b/docs/_modules/brevitas/core/quant/int_base.html
@@ -8,7 +8,7 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>brevitas.core.quant.int_base &#8212; Brevitas 0.10.2 documentation</title>
+    <title>brevitas.core.quant.int_base &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -123,8 +123,8 @@
       
     
     
-    <img src="../../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
@@ -461,6 +461,7 @@ <h1>Source code for brevitas.core.quant.int_base</h1><div class="highlight"><pre
             <span class="bp">self</span><span class="p">,</span>
             <span class="n">narrow_range</span><span class="p">:</span> <span class="nb">bool</span><span class="p">,</span>
             <span class="n">signed</span><span class="p">:</span> <span class="nb">bool</span><span class="p">,</span>
+            <span class="n">input_view_impl</span><span class="p">:</span> <span class="n">Module</span><span class="p">,</span>
             <span class="n">float_to_int_impl</span><span class="p">:</span> <span class="n">Module</span> <span class="o">=</span> <span class="n">RoundSte</span><span class="p">(),</span>
             <span class="n">tensor_clamp_impl</span><span class="p">:</span> <span class="n">Module</span> <span class="o">=</span> <span class="n">TensorClamp</span><span class="p">(),</span>
             <span class="n">quant_delay_steps</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">):</span>
@@ -470,9 +471,11 @@ <h1>Source code for brevitas.core.quant.int_base</h1><div class="highlight"><pre
         <span class="bp">self</span><span class="o">.</span><span class="n">signed</span> <span class="o">=</span> <span class="n">signed</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">narrow_range</span> <span class="o">=</span> <span class="n">narrow_range</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">delay_wrapper</span> <span class="o">=</span> <span class="n">DelayWrapper</span><span class="p">(</span><span class="n">quant_delay_steps</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">input_view_impl</span> <span class="o">=</span> <span class="n">input_view_impl</span>
 
 <div class="viewcode-block" id="IntQuant.to_int"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.quant.html#brevitas.core.quant.int_base.IntQuant.to_int">[docs]</a>    <span class="nd">@brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">script_method</span>
     <span class="k">def</span> <span class="nf">to_int</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">scale</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">zero_point</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">bit_width</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
+        <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">input_view_impl</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
         <span class="n">y</span> <span class="o">=</span> <span class="n">x</span> <span class="o">/</span> <span class="n">scale</span>
         <span class="n">y</span> <span class="o">=</span> <span class="n">y</span> <span class="o">+</span> <span class="n">zero_point</span>
         <span class="n">min_int_val</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">min_int</span><span class="p">(</span><span class="n">bit_width</span><span class="p">)</span>
@@ -534,6 +537,7 @@ <h1>Source code for brevitas.core.quant.int_base</h1><div class="highlight"><pre
             <span class="bp">self</span><span class="p">,</span>
             <span class="n">narrow_range</span><span class="p">:</span> <span class="nb">bool</span><span class="p">,</span>
             <span class="n">signed</span><span class="p">:</span> <span class="nb">bool</span><span class="p">,</span>
+            <span class="n">input_view_impl</span><span class="p">:</span> <span class="n">Module</span><span class="p">,</span>
             <span class="n">float_to_int_impl</span><span class="p">:</span> <span class="n">Module</span> <span class="o">=</span> <span class="n">RoundSte</span><span class="p">(),</span>
             <span class="n">tensor_clamp_impl</span><span class="p">:</span> <span class="n">Module</span> <span class="o">=</span> <span class="n">TensorClamp</span><span class="p">(),</span>
             <span class="n">quant_delay_steps</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">):</span>
@@ -543,11 +547,13 @@ <h1>Source code for brevitas.core.quant.int_base</h1><div class="highlight"><pre
         <span class="bp">self</span><span class="o">.</span><span class="n">signed</span> <span class="o">=</span> <span class="n">signed</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">narrow_range</span> <span class="o">=</span> <span class="n">narrow_range</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">delay_wrapper</span> <span class="o">=</span> <span class="n">DelayWrapper</span><span class="p">(</span><span class="n">quant_delay_steps</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">input_view_impl</span> <span class="o">=</span> <span class="n">input_view_impl</span>
 
 <div class="viewcode-block" id="DecoupledIntQuant.to_int"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.quant.html#brevitas.core.quant.int_base.DecoupledIntQuant.to_int">[docs]</a>    <span class="nd">@brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">script_method</span>
     <span class="k">def</span> <span class="nf">to_int</span><span class="p">(</span>
             <span class="bp">self</span><span class="p">,</span> <span class="n">pre_scale</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">pre_zero_point</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">bit_width</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span>
             <span class="n">x</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
+        <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">input_view_impl</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
         <span class="n">y</span> <span class="o">=</span> <span class="n">x</span> <span class="o">/</span> <span class="n">pre_scale</span>
         <span class="n">y</span> <span class="o">=</span> <span class="n">y</span> <span class="o">+</span> <span class="n">pre_zero_point</span>
         <span class="n">min_int_val</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">min_int</span><span class="p">(</span><span class="n">bit_width</span><span class="p">)</span>
diff --git a/docs/_modules/brevitas/core/quant/ternary.html b/docs/_modules/brevitas/core/quant/ternary.html
index 9ac3f7859..b77615c2c 100644
--- a/docs/_modules/brevitas/core/quant/ternary.html
+++ b/docs/_modules/brevitas/core/quant/ternary.html
@@ -8,7 +8,7 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>brevitas.core.quant.ternary &#8212; Brevitas 0.10.2 documentation</title>
+    <title>brevitas.core.quant.ternary &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -123,8 +123,8 @@
       
     
     
-    <img src="../../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/_modules/brevitas/core/restrict_val.html b/docs/_modules/brevitas/core/restrict_val.html
index cdf92c705..724e23eaf 100644
--- a/docs/_modules/brevitas/core/restrict_val.html
+++ b/docs/_modules/brevitas/core/restrict_val.html
@@ -8,7 +8,7 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>brevitas.core.restrict_val &#8212; Brevitas 0.10.2 documentation</title>
+    <title>brevitas.core.restrict_val &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -123,8 +123,8 @@
       
     
     
-    <img src="../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
@@ -446,7 +446,7 @@ <h1>Source code for brevitas.core.restrict_val</h1><div class="highlight"><pre>
             <span class="bp">self</span><span class="o">.</span><span class="n">restrict_value_impl</span> <span class="o">=</span> <span class="n">Identity</span><span class="p">()</span>
 
     <span class="nd">@brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">script_method</span>
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span>
+    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">):</span>
         <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">restrict_value_impl</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
         <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">clamp_min_ste</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
         <span class="k">return</span> <span class="n">x</span>
@@ -462,7 +462,7 @@ <h1>Source code for brevitas.core.restrict_val</h1><div class="highlight"><pre>
             <span class="bp">self</span><span class="o">.</span><span class="n">restrict_value_impl</span> <span class="o">=</span> <span class="n">Identity</span><span class="p">()</span>
 
     <span class="nd">@brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">script_method</span>
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span>
+    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">):</span>
         <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">restrict_value_impl</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
         <span class="k">return</span> <span class="n">x</span>
 
@@ -478,7 +478,7 @@ <h1>Source code for brevitas.core.restrict_val</h1><div class="highlight"><pre>
         <span class="bp">self</span><span class="o">.</span><span class="n">min_val</span> <span class="o">=</span> <span class="n">scaling_min_val</span>
 
     <span class="nd">@brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">script_method</span>
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span>
+    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">):</span>
         <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">clamp_min_ste</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
         <span class="k">return</span> <span class="n">x</span>
 
@@ -500,8 +500,11 @@ <h1>Source code for brevitas.core.restrict_val</h1><div class="highlight"><pre>
 <div class="viewcode-block" id="FloatRestrictValue.restrict_init_inplace_module"><a class="viewcode-back" href="../../../api_reference/brevitas.core.html#brevitas.core.restrict_val.FloatRestrictValue.restrict_init_inplace_module">[docs]</a>    <span class="k">def</span> <span class="nf">restrict_init_inplace_module</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
         <span class="k">return</span> <span class="n">Identity</span><span class="p">()</span></div>
 
+<div class="viewcode-block" id="FloatRestrictValue.combine_scale_threshold"><a class="viewcode-back" href="../../../api_reference/brevitas.core.html#brevitas.core.restrict_val.FloatRestrictValue.combine_scale_threshold">[docs]</a>    <span class="k">def</span> <span class="nf">combine_scale_threshold</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">threshold</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
+        <span class="k">return</span> <span class="n">x</span> <span class="o">/</span> <span class="n">threshold</span></div>
+
 <div class="viewcode-block" id="FloatRestrictValue.forward"><a class="viewcode-back" href="../../../api_reference/brevitas.core.html#brevitas.core.restrict_val.FloatRestrictValue.forward">[docs]</a>    <span class="nd">@brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">script_method</span>
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
+    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
         <span class="k">return</span> <span class="n">x</span></div></div>
 
 
@@ -514,7 +517,7 @@ <h1>Source code for brevitas.core.restrict_val</h1><div class="highlight"><pre>
 <div class="viewcode-block" id="LogFloatRestrictValue.restrict_init_float"><a class="viewcode-back" href="../../../api_reference/brevitas.core.html#brevitas.core.restrict_val.LogFloatRestrictValue.restrict_init_float">[docs]</a>    <span class="k">def</span> <span class="nf">restrict_init_float</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="nb">float</span><span class="p">):</span>
         <span class="k">return</span> <span class="n">math</span><span class="o">.</span><span class="n">log2</span><span class="p">(</span><span class="n">x</span><span class="p">)</span></div>
 
-<div class="viewcode-block" id="LogFloatRestrictValue.restrict_init_tensor"><a class="viewcode-back" href="../../../api_reference/brevitas.core.html#brevitas.core.restrict_val.LogFloatRestrictValue.restrict_init_tensor">[docs]</a>    <span class="k">def</span> <span class="nf">restrict_init_tensor</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span>
+<div class="viewcode-block" id="LogFloatRestrictValue.restrict_init_tensor"><a class="viewcode-back" href="../../../api_reference/brevitas.core.html#brevitas.core.restrict_val.LogFloatRestrictValue.restrict_init_tensor">[docs]</a>    <span class="k">def</span> <span class="nf">restrict_init_tensor</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">):</span>
         <span class="k">return</span> <span class="n">torch</span><span class="o">.</span><span class="n">log2</span><span class="p">(</span><span class="n">x</span><span class="p">)</span></div>
 
 <div class="viewcode-block" id="LogFloatRestrictValue.restrict_init_module"><a class="viewcode-back" href="../../../api_reference/brevitas.core.html#brevitas.core.restrict_val.LogFloatRestrictValue.restrict_init_module">[docs]</a>    <span class="k">def</span> <span class="nf">restrict_init_module</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
@@ -523,8 +526,11 @@ <h1>Source code for brevitas.core.restrict_val</h1><div class="highlight"><pre>
 <div class="viewcode-block" id="LogFloatRestrictValue.restrict_init_inplace_module"><a class="viewcode-back" href="../../../api_reference/brevitas.core.html#brevitas.core.restrict_val.LogFloatRestrictValue.restrict_init_inplace_module">[docs]</a>    <span class="k">def</span> <span class="nf">restrict_init_inplace_module</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
         <span class="k">return</span> <span class="n">InplaceLogTwo</span><span class="p">()</span></div>
 
+<div class="viewcode-block" id="LogFloatRestrictValue.combine_scale_threshold"><a class="viewcode-back" href="../../../api_reference/brevitas.core.html#brevitas.core.restrict_val.LogFloatRestrictValue.combine_scale_threshold">[docs]</a>    <span class="k">def</span> <span class="nf">combine_scale_threshold</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">threshold</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
+        <span class="k">return</span> <span class="n">x</span> <span class="o">-</span> <span class="n">threshold</span></div>
+
 <div class="viewcode-block" id="LogFloatRestrictValue.forward"><a class="viewcode-back" href="../../../api_reference/brevitas.core.html#brevitas.core.restrict_val.LogFloatRestrictValue.forward">[docs]</a>    <span class="nd">@brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">script_method</span>
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span>
+    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">):</span>
         <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">power_of_two</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
         <span class="k">return</span> <span class="n">x</span></div></div>
 
@@ -538,7 +544,7 @@ <h1>Source code for brevitas.core.restrict_val</h1><div class="highlight"><pre>
 <div class="viewcode-block" id="IntRestrictValue.restrict_init_float"><a class="viewcode-back" href="../../../api_reference/brevitas.core.html#brevitas.core.restrict_val.IntRestrictValue.restrict_init_float">[docs]</a>    <span class="k">def</span> <span class="nf">restrict_init_float</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="nb">float</span><span class="p">):</span>
         <span class="k">return</span> <span class="n">x</span></div>
 
-<div class="viewcode-block" id="IntRestrictValue.restrict_init_tensor"><a class="viewcode-back" href="../../../api_reference/brevitas.core.html#brevitas.core.restrict_val.IntRestrictValue.restrict_init_tensor">[docs]</a>    <span class="k">def</span> <span class="nf">restrict_init_tensor</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span>
+<div class="viewcode-block" id="IntRestrictValue.restrict_init_tensor"><a class="viewcode-back" href="../../../api_reference/brevitas.core.html#brevitas.core.restrict_val.IntRestrictValue.restrict_init_tensor">[docs]</a>    <span class="k">def</span> <span class="nf">restrict_init_tensor</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">):</span>
         <span class="k">return</span> <span class="n">x</span></div>
 
 <div class="viewcode-block" id="IntRestrictValue.restrict_init_module"><a class="viewcode-back" href="../../../api_reference/brevitas.core.html#brevitas.core.restrict_val.IntRestrictValue.restrict_init_module">[docs]</a>    <span class="k">def</span> <span class="nf">restrict_init_module</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
@@ -547,8 +553,11 @@ <h1>Source code for brevitas.core.restrict_val</h1><div class="highlight"><pre>
 <div class="viewcode-block" id="IntRestrictValue.restrict_init_inplace_module"><a class="viewcode-back" href="../../../api_reference/brevitas.core.html#brevitas.core.restrict_val.IntRestrictValue.restrict_init_inplace_module">[docs]</a>    <span class="k">def</span> <span class="nf">restrict_init_inplace_module</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
         <span class="k">return</span> <span class="n">Identity</span><span class="p">()</span></div>
 
+<div class="viewcode-block" id="IntRestrictValue.combine_scale_threshold"><a class="viewcode-back" href="../../../api_reference/brevitas.core.html#brevitas.core.restrict_val.IntRestrictValue.combine_scale_threshold">[docs]</a>    <span class="k">def</span> <span class="nf">combine_scale_threshold</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">threshold</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
+        <span class="k">return</span> <span class="n">x</span> <span class="o">/</span> <span class="n">threshold</span></div>
+
 <div class="viewcode-block" id="IntRestrictValue.forward"><a class="viewcode-back" href="../../../api_reference/brevitas.core.html#brevitas.core.restrict_val.IntRestrictValue.forward">[docs]</a>    <span class="nd">@brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">script_method</span>
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span>
+    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">):</span>
         <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">float_to_int_impl</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
         <span class="k">return</span> <span class="n">x</span></div></div>
 
@@ -563,7 +572,7 @@ <h1>Source code for brevitas.core.restrict_val</h1><div class="highlight"><pre>
 <div class="viewcode-block" id="PowerOfTwoRestrictValue.restrict_init_float"><a class="viewcode-back" href="../../../api_reference/brevitas.core.html#brevitas.core.restrict_val.PowerOfTwoRestrictValue.restrict_init_float">[docs]</a>    <span class="k">def</span> <span class="nf">restrict_init_float</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="nb">float</span><span class="p">):</span>
         <span class="k">return</span> <span class="n">math</span><span class="o">.</span><span class="n">log2</span><span class="p">(</span><span class="n">x</span><span class="p">)</span></div>
 
-<div class="viewcode-block" id="PowerOfTwoRestrictValue.restrict_init_tensor"><a class="viewcode-back" href="../../../api_reference/brevitas.core.html#brevitas.core.restrict_val.PowerOfTwoRestrictValue.restrict_init_tensor">[docs]</a>    <span class="k">def</span> <span class="nf">restrict_init_tensor</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span>
+<div class="viewcode-block" id="PowerOfTwoRestrictValue.restrict_init_tensor"><a class="viewcode-back" href="../../../api_reference/brevitas.core.html#brevitas.core.restrict_val.PowerOfTwoRestrictValue.restrict_init_tensor">[docs]</a>    <span class="k">def</span> <span class="nf">restrict_init_tensor</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">):</span>
         <span class="k">return</span> <span class="n">torch</span><span class="o">.</span><span class="n">log2</span><span class="p">(</span><span class="n">x</span><span class="p">)</span></div>
 
 <div class="viewcode-block" id="PowerOfTwoRestrictValue.restrict_init_module"><a class="viewcode-back" href="../../../api_reference/brevitas.core.html#brevitas.core.restrict_val.PowerOfTwoRestrictValue.restrict_init_module">[docs]</a>    <span class="k">def</span> <span class="nf">restrict_init_module</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
@@ -572,8 +581,11 @@ <h1>Source code for brevitas.core.restrict_val</h1><div class="highlight"><pre>
 <div class="viewcode-block" id="PowerOfTwoRestrictValue.restrict_init_inplace_module"><a class="viewcode-back" href="../../../api_reference/brevitas.core.html#brevitas.core.restrict_val.PowerOfTwoRestrictValue.restrict_init_inplace_module">[docs]</a>    <span class="k">def</span> <span class="nf">restrict_init_inplace_module</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
         <span class="k">return</span> <span class="n">InplaceLogTwo</span><span class="p">()</span></div>
 
+<div class="viewcode-block" id="PowerOfTwoRestrictValue.combine_scale_threshold"><a class="viewcode-back" href="../../../api_reference/brevitas.core.html#brevitas.core.restrict_val.PowerOfTwoRestrictValue.combine_scale_threshold">[docs]</a>    <span class="k">def</span> <span class="nf">combine_scale_threshold</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">threshold</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
+        <span class="k">return</span> <span class="n">x</span> <span class="o">-</span> <span class="n">threshold</span></div>
+
 <div class="viewcode-block" id="PowerOfTwoRestrictValue.forward"><a class="viewcode-back" href="../../../api_reference/brevitas.core.html#brevitas.core.restrict_val.PowerOfTwoRestrictValue.forward">[docs]</a>    <span class="nd">@brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">script_method</span>
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span>
+    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">):</span>
         <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">float_to_int_impl</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
         <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">power_of_two</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
         <span class="k">return</span> <span class="n">x</span></div></div>
diff --git a/docs/_modules/brevitas/core/scaling/int_scaling.html b/docs/_modules/brevitas/core/scaling/int_scaling.html
index ab0663819..3dfd7c93d 100644
--- a/docs/_modules/brevitas/core/scaling/int_scaling.html
+++ b/docs/_modules/brevitas/core/scaling/int_scaling.html
@@ -8,7 +8,7 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>brevitas.core.scaling.int_scaling &#8212; Brevitas 0.10.2 documentation</title>
+    <title>brevitas.core.scaling.int_scaling &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -123,8 +123,8 @@
       
     
     
-    <img src="../../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/_modules/brevitas/core/scaling/runtime.html b/docs/_modules/brevitas/core/scaling/runtime.html
index 566a80cc3..31e87e7bc 100644
--- a/docs/_modules/brevitas/core/scaling/runtime.html
+++ b/docs/_modules/brevitas/core/scaling/runtime.html
@@ -8,7 +8,7 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>brevitas.core.scaling.runtime &#8212; Brevitas 0.10.2 documentation</title>
+    <title>brevitas.core.scaling.runtime &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -123,8 +123,8 @@
       
     
     
-    <img src="../../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
@@ -421,6 +421,7 @@ <h1>Source code for brevitas.core.scaling.runtime</h1><div class="highlight"><pr
 <span class="kn">import</span> <span class="nn">brevitas.config</span> <span class="k">as</span> <span class="nn">config</span>
 <span class="kn">from</span> <span class="nn">brevitas.core.function_wrapper</span> <span class="kn">import</span> <span class="n">Identity</span>
 <span class="kn">from</span> <span class="nn">brevitas.core.restrict_val</span> <span class="kn">import</span> <span class="n">_RestrictClampValue</span>
+<span class="kn">from</span> <span class="nn">brevitas.core.restrict_val</span> <span class="kn">import</span> <span class="n">FloatRestrictValue</span>
 <span class="kn">from</span> <span class="nn">brevitas.core.stats</span> <span class="kn">import</span> <span class="n">_ParameterListStats</span>
 <span class="kn">from</span> <span class="nn">brevitas.core.stats</span> <span class="kn">import</span> <span class="n">_RuntimeStats</span>
 <span class="kn">from</span> <span class="nn">brevitas.core.stats</span> <span class="kn">import</span> <span class="n">DEFAULT_MOMENTUM</span>
@@ -437,8 +438,8 @@ <h1>Source code for brevitas.core.scaling.runtime</h1><div class="highlight"><pr
             <span class="n">scaling_stats_input_view_shape_impl</span><span class="p">:</span> <span class="n">Module</span><span class="p">,</span>
             <span class="n">scaling_stats_input_concat_dim</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
             <span class="n">tracked_parameter_list</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Parameter</span><span class="p">],</span>
-            <span class="n">restrict_scaling_impl</span><span class="p">:</span> <span class="n">Module</span><span class="p">,</span>
             <span class="n">scaling_shape</span><span class="p">:</span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span>
+            <span class="n">restrict_scaling_impl</span><span class="p">:</span> <span class="n">Module</span> <span class="o">=</span> <span class="n">FloatRestrictValue</span><span class="p">(),</span>
             <span class="n">affine_rescaling</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
             <span class="n">affine_shift_scale</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
             <span class="n">scaling_min_val</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
@@ -461,9 +462,12 @@ <h1>Source code for brevitas.core.scaling.runtime</h1><div class="highlight"><pr
             <span class="n">device</span><span class="p">)</span>
 
 <div class="viewcode-block" id="StatsFromParameterScaling.forward"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.scaling.html#brevitas.core.scaling.runtime.StatsFromParameterScaling.forward">[docs]</a>    <span class="nd">@brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">script_method</span>
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ignored</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">:</span>
+    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span>
+            <span class="bp">self</span><span class="p">,</span> <span class="n">ignored</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">,</span> <span class="n">threshold</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">:</span>
         <span class="n">stats</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parameter_list_stats</span><span class="p">()</span>
-        <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">stats_scaling_impl</span><span class="p">(</span><span class="n">stats</span><span class="p">)</span></div></div>
+        <span class="k">if</span> <span class="n">threshold</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+            <span class="n">threshold</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">ones</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">type_as</span><span class="p">(</span><span class="n">stats</span><span class="p">)</span>
+        <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">stats_scaling_impl</span><span class="p">(</span><span class="n">stats</span><span class="p">,</span> <span class="n">threshold</span><span class="p">)</span></div></div>
 
 
 <span class="k">class</span> <span class="nc">_StatsScaling</span><span class="p">(</span><span class="n">brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">ScriptModule</span><span class="p">):</span>
@@ -488,10 +492,16 @@ <h1>Source code for brevitas.core.scaling.runtime</h1><div class="highlight"><pr
             <span class="bp">self</span><span class="o">.</span><span class="n">affine_rescaling</span> <span class="o">=</span> <span class="n">Identity</span><span class="p">()</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">restrict_clamp_scaling</span> <span class="o">=</span> <span class="n">_RestrictClampValue</span><span class="p">(</span><span class="n">scaling_min_val</span><span class="p">,</span> <span class="n">restrict_scaling_impl</span><span class="p">)</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">restrict_scaling_pre</span> <span class="o">=</span> <span class="n">restrict_scaling_impl</span><span class="o">.</span><span class="n">restrict_init_module</span><span class="p">()</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">restrict_scaling_impl</span> <span class="o">=</span> <span class="n">restrict_scaling_impl</span>
 
     <span class="nd">@brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">script_method</span>
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">stats</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">:</span>
+    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span>
+            <span class="bp">self</span><span class="p">,</span> <span class="n">stats</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">,</span> <span class="n">threshold</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">:</span>
+        <span class="k">if</span> <span class="n">threshold</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+            <span class="n">threshold</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">ones</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">type_as</span><span class="p">(</span><span class="n">stats</span><span class="p">)</span>
+        <span class="n">threshold</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">restrict_scaling_pre</span><span class="p">(</span><span class="n">threshold</span><span class="p">)</span>
         <span class="n">stats</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">restrict_scaling_pre</span><span class="p">(</span><span class="n">stats</span><span class="p">)</span>
+        <span class="n">stats</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">restrict_scaling_impl</span><span class="o">.</span><span class="n">combine_scale_threshold</span><span class="p">(</span><span class="n">stats</span><span class="p">,</span> <span class="n">threshold</span><span class="p">)</span>
         <span class="n">stats</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">affine_rescaling</span><span class="p">(</span><span class="n">stats</span><span class="p">)</span>
         <span class="n">stats</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">restrict_clamp_scaling</span><span class="p">(</span><span class="n">stats</span><span class="p">)</span>
         <span class="k">return</span> <span class="n">stats</span>
@@ -503,10 +513,10 @@ <h1>Source code for brevitas.core.scaling.runtime</h1><div class="highlight"><pr
             <span class="bp">self</span><span class="p">,</span>
             <span class="n">scaling_stats_impl</span><span class="p">:</span> <span class="n">Module</span><span class="p">,</span>
             <span class="n">scaling_stats_input_view_shape_impl</span><span class="p">:</span> <span class="n">Module</span><span class="p">,</span>
-            <span class="n">restrict_scaling_impl</span><span class="p">:</span> <span class="n">Module</span><span class="p">,</span>
             <span class="n">scaling_shape</span><span class="p">:</span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span>
             <span class="n">affine_rescaling</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
             <span class="n">affine_shift_scale</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
+            <span class="n">restrict_scaling_impl</span><span class="p">:</span> <span class="n">Module</span> <span class="o">=</span> <span class="n">FloatRestrictValue</span><span class="p">(),</span>
             <span class="n">scaling_stats_momentum</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="n">DEFAULT_MOMENTUM</span><span class="p">,</span>
             <span class="n">scaling_min_val</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
             <span class="n">dtype</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">dtype</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
@@ -530,9 +540,9 @@ <h1>Source code for brevitas.core.scaling.runtime</h1><div class="highlight"><pr
             <span class="n">device</span><span class="p">)</span>
 
 <div class="viewcode-block" id="RuntimeStatsScaling.forward"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.scaling.html#brevitas.core.scaling.runtime.RuntimeStatsScaling.forward">[docs]</a>    <span class="nd">@brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">script_method</span>
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span>
+    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">,</span> <span class="n">threshold</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">:</span>
         <span class="n">stats</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">runtime_stats</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
-        <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">stats_scaling_impl</span><span class="p">(</span><span class="n">stats</span><span class="p">)</span></div></div>
+        <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">stats_scaling_impl</span><span class="p">(</span><span class="n">stats</span><span class="p">,</span> <span class="n">threshold</span><span class="p">)</span></div></div>
 
 
 <span class="k">class</span> <span class="nc">_AffineRescaling</span><span class="p">(</span><span class="n">brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">ScriptModule</span><span class="p">):</span>
@@ -568,6 +578,38 @@ <h1>Source code for brevitas.core.scaling.runtime</h1><div class="highlight"><pr
             <span class="n">missing_keys</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">affine_weight_key</span><span class="p">)</span>
         <span class="k">if</span> <span class="n">config</span><span class="o">.</span><span class="n">IGNORE_MISSING_KEYS</span> <span class="ow">and</span> <span class="n">affine_bias_key</span> <span class="ow">in</span> <span class="n">missing_keys</span><span class="p">:</span>
             <span class="n">missing_keys</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">affine_bias_key</span><span class="p">)</span>
+
+
+<div class="viewcode-block" id="RuntimeDynamicGroupStatsScaling"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.scaling.html#brevitas.core.scaling.runtime.RuntimeDynamicGroupStatsScaling">[docs]</a><span class="k">class</span> <span class="nc">RuntimeDynamicGroupStatsScaling</span><span class="p">(</span><span class="n">brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">ScriptModule</span><span class="p">):</span>
+
+    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
+        <span class="bp">self</span><span class="p">,</span>
+        <span class="n">group_size</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
+        <span class="n">group_dim</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
+        <span class="n">input_view_impl</span><span class="p">:</span> <span class="n">Module</span><span class="p">,</span>
+        <span class="n">scaling_stats_impl</span><span class="p">:</span> <span class="n">Module</span><span class="p">,</span>
+        <span class="n">scaling_min_val</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">],</span>
+        <span class="n">restrict_scaling_impl</span><span class="p">:</span> <span class="n">Module</span> <span class="o">=</span> <span class="n">FloatRestrictValue</span><span class="p">())</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+        <span class="nb">super</span><span class="p">(</span><span class="n">RuntimeDynamicGroupStatsScaling</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">group_size</span> <span class="o">=</span> <span class="n">group_size</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">group_dim</span> <span class="o">=</span> <span class="n">group_dim</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">scaling_stats_impl</span> <span class="o">=</span> <span class="n">scaling_stats_impl</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">scaling_min_val</span> <span class="o">=</span> <span class="n">scaling_min_val</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">input_view_impl</span> <span class="o">=</span> <span class="n">input_view_impl</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">restrict_clamp_scaling</span> <span class="o">=</span> <span class="n">_RestrictClampValue</span><span class="p">(</span><span class="n">scaling_min_val</span><span class="p">,</span> <span class="n">restrict_scaling_impl</span><span class="p">)</span>
+
+<div class="viewcode-block" id="RuntimeDynamicGroupStatsScaling.forward"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.scaling.html#brevitas.core.scaling.runtime.RuntimeDynamicGroupStatsScaling.forward">[docs]</a>    <span class="nd">@brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">script_method</span>
+    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span>
+            <span class="bp">self</span><span class="p">,</span>
+            <span class="n">stats_input</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">,</span>
+            <span class="n">threshold</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">:</span>
+        <span class="k">if</span> <span class="n">threshold</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+            <span class="n">threshold</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">ones</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">type_as</span><span class="p">(</span><span class="n">stats_input</span><span class="p">)</span>
+        <span class="n">stats_input_reshaped</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">input_view_impl</span><span class="p">(</span><span class="n">stats_input</span><span class="p">)</span>
+        <span class="n">out</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">scaling_stats_impl</span><span class="p">(</span><span class="n">stats_input_reshaped</span><span class="p">)</span> <span class="o">/</span> <span class="n">threshold</span>
+        <span class="c1"># Scaling min val</span>
+        <span class="n">out</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">restrict_clamp_scaling</span><span class="p">(</span><span class="n">out</span><span class="p">)</span>
+        <span class="k">return</span> <span class="n">out</span></div></div>
 </pre></div>
 
                 </article>
diff --git a/docs/_modules/brevitas/core/scaling/standalone.html b/docs/_modules/brevitas/core/scaling/standalone.html
index 54ec5390e..2948d3552 100644
--- a/docs/_modules/brevitas/core/scaling/standalone.html
+++ b/docs/_modules/brevitas/core/scaling/standalone.html
@@ -8,7 +8,7 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>brevitas.core.scaling.standalone &#8212; Brevitas 0.10.2 documentation</title>
+    <title>brevitas.core.scaling.standalone &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -123,8 +123,8 @@
       
     
     
-    <img src="../../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
@@ -425,6 +425,7 @@ <h1>Source code for brevitas.core.scaling.standalone</h1><div class="highlight">
 <span class="kn">from</span> <span class="nn">brevitas.core.restrict_val</span> <span class="kn">import</span> <span class="n">_ClampValue</span>
 <span class="kn">from</span> <span class="nn">brevitas.core.restrict_val</span> <span class="kn">import</span> <span class="n">_RestrictClampValue</span>
 <span class="kn">from</span> <span class="nn">brevitas.core.restrict_val</span> <span class="kn">import</span> <span class="n">_RestrictValue</span>
+<span class="kn">from</span> <span class="nn">brevitas.core.restrict_val</span> <span class="kn">import</span> <span class="n">FloatRestrictValue</span>
 <span class="kn">from</span> <span class="nn">brevitas.core.scaling.runtime</span> <span class="kn">import</span> <span class="n">_StatsScaling</span>
 <span class="kn">from</span> <span class="nn">brevitas.core.stats</span> <span class="kn">import</span> <span class="n">_ParameterListStats</span>
 <span class="kn">from</span> <span class="nn">brevitas.core.stats</span> <span class="kn">import</span> <span class="n">_Stats</span>
@@ -470,7 +471,7 @@ <h1>Source code for brevitas.core.scaling.standalone</h1><div class="highlight">
     <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
             <span class="bp">self</span><span class="p">,</span>
             <span class="n">scaling_init</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="n">Tensor</span><span class="p">],</span>
-            <span class="n">restrict_scaling_impl</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Module</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+            <span class="n">restrict_scaling_impl</span><span class="p">:</span> <span class="n">Module</span> <span class="o">=</span> <span class="n">FloatRestrictValue</span><span class="p">(),</span>
             <span class="n">scaling_min_val</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
             <span class="n">dtype</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">dtype</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
             <span class="n">device</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">device</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
@@ -478,18 +479,23 @@ <h1>Source code for brevitas.core.scaling.standalone</h1><div class="highlight">
         <span class="bp">self</span><span class="o">.</span><span class="n">restrict_clamp_scaling</span> <span class="o">=</span> <span class="n">_RestrictClampValue</span><span class="p">(</span><span class="n">scaling_min_val</span><span class="p">,</span> <span class="n">restrict_scaling_impl</span><span class="p">)</span>
         <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">scaling_init</span><span class="p">,</span> <span class="n">Tensor</span><span class="p">):</span>
             <span class="n">scaling_init</span> <span class="o">=</span> <span class="n">scaling_init</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
-            <span class="k">if</span> <span class="n">restrict_scaling_impl</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
-                <span class="n">scaling_init</span> <span class="o">=</span> <span class="n">restrict_scaling_impl</span><span class="o">.</span><span class="n">restrict_init_tensor</span><span class="p">(</span><span class="n">scaling_init</span><span class="p">)</span>
+            <span class="n">scaling_init</span> <span class="o">=</span> <span class="n">restrict_scaling_impl</span><span class="o">.</span><span class="n">restrict_init_tensor</span><span class="p">(</span><span class="n">scaling_init</span><span class="p">)</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">restrict_init_module</span> <span class="o">=</span> <span class="n">restrict_scaling_impl</span><span class="o">.</span><span class="n">restrict_init_module</span><span class="p">()</span>
             <span class="bp">self</span><span class="o">.</span><span class="n">value</span> <span class="o">=</span> <span class="n">StatelessBuffer</span><span class="p">(</span><span class="n">scaling_init</span><span class="o">.</span><span class="n">detach</span><span class="p">())</span>
         <span class="k">else</span><span class="p">:</span>
-            <span class="k">if</span> <span class="n">restrict_scaling_impl</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
-                <span class="n">scaling_init</span> <span class="o">=</span> <span class="n">restrict_scaling_impl</span><span class="o">.</span><span class="n">restrict_init_float</span><span class="p">(</span><span class="n">scaling_init</span><span class="p">)</span>
+            <span class="n">scaling_init</span> <span class="o">=</span> <span class="n">restrict_scaling_impl</span><span class="o">.</span><span class="n">restrict_init_float</span><span class="p">(</span><span class="n">scaling_init</span><span class="p">)</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">restrict_init_module</span> <span class="o">=</span> <span class="n">restrict_scaling_impl</span><span class="o">.</span><span class="n">restrict_init_module</span><span class="p">()</span>
             <span class="bp">self</span><span class="o">.</span><span class="n">value</span> <span class="o">=</span> <span class="n">StatelessBuffer</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="n">scaling_init</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">))</span>
 
 <div class="viewcode-block" id="ConstScaling.forward"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.scaling.html#brevitas.core.scaling.standalone.ConstScaling.forward">[docs]</a>    <span class="nd">@brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">script_method</span>
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">placeholder</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
-        <span class="n">value</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">value</span><span class="p">()</span>
-        <span class="n">restricted_value</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">restrict_clamp_scaling</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
+    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">placeholder</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">threshold</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Tensor</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
+        <span class="k">if</span> <span class="n">threshold</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+            <span class="n">threshold</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">ones</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">type_as</span><span class="p">(</span><span class="n">placeholder</span><span class="p">)</span>
+        <span class="c1"># We first apply any restriction to scaling</span>
+        <span class="c1"># For IntQuant, this is no-op, retrocompatible.</span>
+        <span class="n">threshold</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">restrict_clamp_scaling</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">restrict_init_module</span><span class="p">(</span><span class="n">threshold</span><span class="p">))</span>
+        <span class="n">restricted_value</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">restrict_clamp_scaling</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">value</span><span class="p">())</span>
+        <span class="n">restricted_value</span> <span class="o">=</span> <span class="n">restricted_value</span> <span class="o">/</span> <span class="n">threshold</span>
         <span class="k">return</span> <span class="n">restricted_value</span></div></div>
 
 
@@ -536,7 +542,7 @@ <h1>Source code for brevitas.core.scaling.standalone</h1><div class="highlight">
             <span class="bp">self</span><span class="p">,</span>
             <span class="n">scaling_init</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="n">Tensor</span><span class="p">],</span>
             <span class="n">scaling_shape</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="o">...</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-            <span class="n">restrict_scaling_impl</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Module</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+            <span class="n">restrict_scaling_impl</span><span class="p">:</span> <span class="n">Module</span> <span class="o">=</span> <span class="n">FloatRestrictValue</span><span class="p">(),</span>
             <span class="n">scaling_min_val</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
             <span class="n">dtype</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">dtype</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
             <span class="n">device</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">device</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
@@ -551,17 +557,24 @@ <h1>Source code for brevitas.core.scaling.standalone</h1><div class="highlight">
             <span class="n">scaling_init</span> <span class="o">=</span> <span class="n">scaling_init</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span>
         <span class="k">else</span><span class="p">:</span>
             <span class="n">scaling_init</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="n">scaling_init</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">)</span>
-        <span class="k">if</span> <span class="n">restrict_scaling_impl</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
-            <span class="n">scaling_init</span> <span class="o">=</span> <span class="n">restrict_scaling_impl</span><span class="o">.</span><span class="n">restrict_init_tensor</span><span class="p">(</span><span class="n">scaling_init</span><span class="p">)</span>
+
+        <span class="n">scaling_init</span> <span class="o">=</span> <span class="n">restrict_scaling_impl</span><span class="o">.</span><span class="n">restrict_init_tensor</span><span class="p">(</span><span class="n">scaling_init</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">restrict_init_module</span> <span class="o">=</span> <span class="n">restrict_scaling_impl</span><span class="o">.</span><span class="n">restrict_init_module</span><span class="p">()</span>
+
         <span class="k">if</span> <span class="n">scaling_init</span><span class="o">.</span><span class="n">shape</span> <span class="o">==</span> <span class="n">SCALAR_SHAPE</span> <span class="ow">and</span> <span class="n">scaling_shape</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
             <span class="n">scaling_init</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">full</span><span class="p">(</span><span class="n">scaling_shape</span><span class="p">,</span> <span class="n">scaling_init</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">)</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">value</span> <span class="o">=</span> <span class="n">Parameter</span><span class="p">(</span><span class="n">scaling_init</span><span class="p">)</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">restrict_clamp_scaling</span> <span class="o">=</span> <span class="n">_RestrictClampValue</span><span class="p">(</span><span class="n">scaling_min_val</span><span class="p">,</span> <span class="n">restrict_scaling_impl</span><span class="p">)</span>
 
 <div class="viewcode-block" id="ParameterScaling.forward"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.scaling.html#brevitas.core.scaling.standalone.ParameterScaling.forward">[docs]</a>    <span class="nd">@brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">script_method</span>
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">placeholder</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
+    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">placeholder</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">threshold</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Tensor</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
+        <span class="k">if</span> <span class="n">threshold</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+            <span class="n">threshold</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">ones</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">type_as</span><span class="p">(</span><span class="n">placeholder</span><span class="p">)</span>
+        <span class="c1"># We first apply any restriction to scaling</span>
+        <span class="c1"># For IntQuant, this is no-op, retrocompatible.</span>
+        <span class="n">threshold</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">restrict_clamp_scaling</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">restrict_init_module</span><span class="p">(</span><span class="n">threshold</span><span class="p">))</span>
         <span class="n">value</span> <span class="o">=</span> <span class="n">abs_binary_sign_grad</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">restrict_clamp_scaling</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">value</span><span class="p">))</span>
-        <span class="k">return</span> <span class="n">value</span></div>
+        <span class="k">return</span> <span class="n">value</span> <span class="o">/</span> <span class="n">threshold</span></div>
 
     <span class="k">def</span> <span class="nf">_load_from_state_dict</span><span class="p">(</span>
             <span class="bp">self</span><span class="p">,</span> <span class="n">state_dict</span><span class="p">,</span> <span class="n">prefix</span><span class="p">,</span> <span class="n">local_metadata</span><span class="p">,</span> <span class="n">strict</span><span class="p">,</span> <span class="n">missing_keys</span><span class="p">,</span> <span class="n">unexpected_keys</span><span class="p">,</span>
@@ -588,8 +601,8 @@ <h1>Source code for brevitas.core.scaling.standalone</h1><div class="highlight">
             <span class="n">scaling_stats_input_view_shape_impl</span><span class="p">:</span> <span class="n">Module</span><span class="p">,</span>
             <span class="n">scaling_stats_input_concat_dim</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
             <span class="n">tracked_parameter_list</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Parameter</span><span class="p">],</span>
-            <span class="n">restrict_scaling_impl</span><span class="p">:</span> <span class="n">Module</span><span class="p">,</span>
             <span class="n">scaling_shape</span><span class="p">:</span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span>
+            <span class="n">restrict_scaling_impl</span><span class="p">:</span> <span class="n">Module</span> <span class="o">=</span> <span class="n">FloatRestrictValue</span><span class="p">(),</span>
             <span class="n">scaling_min_val</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
             <span class="n">dtype</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">dtype</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
             <span class="n">device</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">device</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
@@ -600,30 +613,38 @@ <h1>Source code for brevitas.core.scaling.standalone</h1><div class="highlight">
             <span class="n">scaling_stats_input_view_shape_impl</span><span class="p">,</span>
             <span class="n">scaling_stats_input_concat_dim</span><span class="p">,</span>
             <span class="n">tracked_parameter_list</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">restrict_scaling_impl</span> <span class="o">=</span> <span class="n">restrict_scaling_impl</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">stats_scaling_impl</span> <span class="o">=</span> <span class="n">_StatsScaling</span><span class="p">(</span>
             <span class="n">restrict_scaling_impl</span><span class="p">,</span> <span class="n">scaling_shape</span><span class="p">,</span> <span class="n">scaling_min_val</span><span class="p">,</span> <span class="kc">False</span><span class="p">,</span> <span class="kc">False</span><span class="p">,</span> <span class="n">dtype</span><span class="p">,</span> <span class="n">device</span><span class="p">)</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">init_done</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="n">brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">Attribute</span><span class="p">(</span><span class="kc">False</span><span class="p">,</span> <span class="nb">bool</span><span class="p">)</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">local_loss_mode</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="n">brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">Attribute</span><span class="p">(</span><span class="kc">False</span><span class="p">,</span> <span class="nb">bool</span><span class="p">)</span>
-        <span class="k">if</span> <span class="n">restrict_scaling_impl</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
-            <span class="bp">self</span><span class="o">.</span><span class="n">restrict_inplace_preprocess</span> <span class="o">=</span> <span class="n">restrict_scaling_impl</span><span class="o">.</span><span class="n">restrict_init_inplace_module</span><span class="p">()</span>
-        <span class="k">else</span><span class="p">:</span>
-            <span class="bp">self</span><span class="o">.</span><span class="n">restrict_inplace_preprocess</span> <span class="o">=</span> <span class="n">Identity</span><span class="p">()</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">restrict_inplace_preprocess</span> <span class="o">=</span> <span class="n">restrict_scaling_impl</span><span class="o">.</span><span class="n">restrict_init_inplace_module</span><span class="p">()</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">restrict_preprocess</span> <span class="o">=</span> <span class="n">restrict_scaling_impl</span><span class="o">.</span><span class="n">restrict_init_module</span><span class="p">()</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">value</span> <span class="o">=</span> <span class="n">Parameter</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">full</span><span class="p">(</span><span class="n">scaling_shape</span><span class="p">,</span> <span class="mf">1.0</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">))</span>
 
 <div class="viewcode-block" id="ParameterFromStatsFromParameterScaling.forward"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.scaling.html#brevitas.core.scaling.standalone.ParameterFromStatsFromParameterScaling.forward">[docs]</a>    <span class="nd">@brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">script_method</span>
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ignored</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">:</span>
+    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ignored</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">threshold</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Tensor</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
+        <span class="k">if</span> <span class="n">threshold</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+            <span class="n">threshold</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">ones</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">type_as</span><span class="p">(</span><span class="n">ignored</span><span class="p">)</span>
+        <span class="c1"># Threshold division must happen after we update self.value, but before we apply restrict_preproces</span>
+        <span class="c1"># This is because we don&#39;t want to store a parameter dependant on a runtime value (threshold)</span>
+        <span class="c1"># And because restrict needs to happen after we divide by threshold</span>
         <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">init_done</span><span class="p">:</span>
-            <span class="n">value</span> <span class="o">=</span> <span class="n">abs_binary_sign_grad</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">stats_scaling_impl</span><span class="o">.</span><span class="n">restrict_clamp_scaling</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">value</span><span class="p">))</span>
+            <span class="n">threshold</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">restrict_inplace_preprocess</span><span class="p">(</span><span class="n">threshold</span><span class="p">)</span>
+            <span class="n">value</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">restrict_scaling_impl</span><span class="o">.</span><span class="n">combine_scale_threshold</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">value</span><span class="p">,</span> <span class="n">threshold</span><span class="p">)</span>
+            <span class="n">value</span> <span class="o">=</span> <span class="n">abs_binary_sign_grad</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">stats_scaling_impl</span><span class="o">.</span><span class="n">restrict_clamp_scaling</span><span class="p">(</span><span class="n">value</span><span class="p">))</span>
             <span class="k">return</span> <span class="n">value</span>
         <span class="k">else</span><span class="p">:</span>
             <span class="n">stats</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parameter_list_stats</span><span class="p">()</span>
             <span class="c1"># workaround to avoid find_ununsed_parameter=True in DDP</span>
             <span class="n">stats</span> <span class="o">=</span> <span class="n">stats</span> <span class="o">+</span> <span class="mf">0.</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">value</span>
             <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">local_loss_mode</span><span class="p">:</span>
-                <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">stats_scaling_impl</span><span class="p">(</span><span class="n">stats</span><span class="p">)</span>
+                <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">stats_scaling_impl</span><span class="p">(</span><span class="n">stats</span><span class="p">,</span> <span class="n">threshold</span><span class="p">)</span>
             <span class="n">stats</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">restrict_inplace_preprocess</span><span class="p">(</span><span class="n">stats</span><span class="p">)</span>
+            <span class="n">threshold</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">restrict_inplace_preprocess</span><span class="p">(</span><span class="n">threshold</span><span class="p">)</span>
             <span class="n">inplace_tensor_mul</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">value</span><span class="o">.</span><span class="n">detach</span><span class="p">(),</span> <span class="n">stats</span><span class="p">)</span>
-            <span class="n">value</span> <span class="o">=</span> <span class="n">abs_binary_sign_grad</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">stats_scaling_impl</span><span class="o">.</span><span class="n">restrict_clamp_scaling</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">value</span><span class="p">))</span>
+            <span class="n">value</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">restrict_scaling_impl</span><span class="o">.</span><span class="n">combine_scale_threshold</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">value</span><span class="p">,</span> <span class="n">threshold</span><span class="p">)</span>
+            <span class="n">value</span> <span class="o">=</span> <span class="n">abs_binary_sign_grad</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">stats_scaling_impl</span><span class="o">.</span><span class="n">restrict_clamp_scaling</span><span class="p">(</span><span class="n">value</span><span class="p">))</span>
             <span class="bp">self</span><span class="o">.</span><span class="n">init_done</span> <span class="o">=</span> <span class="kc">True</span>
             <span class="k">return</span> <span class="n">value</span></div>
 
@@ -631,7 +652,7 @@ <h1>Source code for brevitas.core.scaling.standalone</h1><div class="highlight">
         <span class="n">output_dict</span> <span class="o">=</span> <span class="nb">super</span><span class="p">(</span><span class="n">ParameterFromStatsFromParameterScaling</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">state_dict</span><span class="p">(</span>
             <span class="n">destination</span><span class="o">=</span><span class="n">destination</span><span class="p">,</span> <span class="n">prefix</span><span class="o">=</span><span class="n">prefix</span><span class="p">,</span> <span class="n">keep_vars</span><span class="o">=</span><span class="n">keep_vars</span><span class="p">)</span>
         <span class="c1"># Avoid saving the init value</span>
-        <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">init_done</span><span class="p">:</span>
+        <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">init_done</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">config</span><span class="o">.</span><span class="n">_FULL_STATE_DICT</span><span class="p">:</span>
             <span class="k">del</span> <span class="n">output_dict</span><span class="p">[</span><span class="n">prefix</span> <span class="o">+</span> <span class="s1">&#39;value&#39;</span><span class="p">]</span>
         <span class="k">return</span> <span class="n">output_dict</span></div>
 
@@ -700,7 +721,7 @@ <h1>Source code for brevitas.core.scaling.standalone</h1><div class="highlight">
             <span class="n">scaling_stats_impl</span><span class="p">:</span> <span class="n">Module</span><span class="p">,</span>
             <span class="n">scaling_stats_input_view_shape_impl</span><span class="p">:</span> <span class="n">Module</span> <span class="o">=</span> <span class="n">OverBatchOverTensorView</span><span class="p">(),</span>
             <span class="n">scaling_shape</span><span class="p">:</span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="o">...</span><span class="p">]</span> <span class="o">=</span> <span class="n">SCALAR_SHAPE</span><span class="p">,</span>
-            <span class="n">restrict_scaling_impl</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Module</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+            <span class="n">restrict_scaling_impl</span><span class="p">:</span> <span class="n">Module</span> <span class="o">=</span> <span class="n">FloatRestrictValue</span><span class="p">(),</span>
             <span class="n">scaling_stats_momentum</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="n">DEFAULT_MOMENTUM</span><span class="p">,</span>
             <span class="n">scaling_min_val</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
             <span class="n">dtype</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">dtype</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
@@ -715,19 +736,19 @@ <h1>Source code for brevitas.core.scaling.standalone</h1><div class="highlight">
             <span class="n">scaling_stats_momentum</span><span class="p">,</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">])</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">register_buffer</span><span class="p">(</span><span class="s1">&#39;buffer&#39;</span><span class="p">,</span> <span class="n">torch</span><span class="o">.</span><span class="n">full</span><span class="p">(</span><span class="n">scaling_shape</span><span class="p">,</span> <span class="mf">1.0</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">))</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">value</span> <span class="o">=</span> <span class="n">Parameter</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">full</span><span class="p">(</span><span class="n">scaling_shape</span><span class="p">,</span> <span class="mf">1.0</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">))</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">restrict_scaling_impl</span> <span class="o">=</span> <span class="n">restrict_scaling_impl</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">restrict_scaling</span> <span class="o">=</span> <span class="n">_RestrictValue</span><span class="p">(</span><span class="n">restrict_scaling_impl</span><span class="p">)</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">clamp_scaling</span> <span class="o">=</span> <span class="n">_ClampValue</span><span class="p">(</span><span class="n">scaling_min_val</span><span class="p">)</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">local_loss_mode</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="n">brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">Attribute</span><span class="p">(</span>
             <span class="kc">False</span><span class="p">,</span> <span class="nb">bool</span><span class="p">)</span>  <span class="c1"># required to support MSE eval or variants</span>
-        <span class="k">if</span> <span class="n">restrict_scaling_impl</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
-            <span class="bp">self</span><span class="o">.</span><span class="n">restrict_inplace_preprocess</span> <span class="o">=</span> <span class="n">restrict_scaling_impl</span><span class="o">.</span><span class="n">restrict_init_inplace_module</span><span class="p">()</span>
-            <span class="bp">self</span><span class="o">.</span><span class="n">restrict_preprocess</span> <span class="o">=</span> <span class="n">restrict_scaling_impl</span><span class="o">.</span><span class="n">restrict_init_module</span><span class="p">()</span>
-        <span class="k">else</span><span class="p">:</span>
-            <span class="bp">self</span><span class="o">.</span><span class="n">restrict_inplace_preprocess</span> <span class="o">=</span> <span class="n">Identity</span><span class="p">()</span>
-            <span class="bp">self</span><span class="o">.</span><span class="n">restrict_preprocess</span> <span class="o">=</span> <span class="n">Identity</span><span class="p">()</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">restrict_inplace_preprocess</span> <span class="o">=</span> <span class="n">restrict_scaling_impl</span><span class="o">.</span><span class="n">restrict_init_inplace_module</span><span class="p">()</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">restrict_preprocess</span> <span class="o">=</span> <span class="n">restrict_scaling_impl</span><span class="o">.</span><span class="n">restrict_init_module</span><span class="p">()</span>
 
 <div class="viewcode-block" id="ParameterFromRuntimeStatsScaling.training_forward"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.scaling.html#brevitas.core.scaling.standalone.ParameterFromRuntimeStatsScaling.training_forward">[docs]</a>    <span class="nd">@brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">script_method</span>
-    <span class="k">def</span> <span class="nf">training_forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">stats_input</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
+    <span class="k">def</span> <span class="nf">training_forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">stats_input</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">threshold</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
+        <span class="c1"># Threshold division must happen after we update self.value, but before we apply restrict_preproces</span>
+        <span class="c1"># This is because we don&#39;t want to store a parameter dependent on a runtime value (threshold)</span>
+        <span class="c1"># And because restrict needs to happen after we divide by threshold</span>
         <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">counter</span> <span class="o">&lt;</span> <span class="bp">self</span><span class="o">.</span><span class="n">collect_stats_steps</span><span class="p">:</span>
             <span class="n">stats_input</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">stats_input_view_shape_impl</span><span class="p">(</span><span class="n">stats_input</span><span class="p">)</span>
             <span class="n">stats</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">stats</span><span class="p">(</span><span class="n">stats_input</span><span class="p">)</span>
@@ -737,32 +758,41 @@ <h1>Source code for brevitas.core.scaling.standalone</h1><div class="highlight">
             <span class="n">new_counter</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">counter</span> <span class="o">+</span> <span class="mi">1</span>
             <span class="c1"># Whenever we are in local loss mode, we don&#39;t update the counter nor the buffer</span>
             <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">local_loss_mode</span><span class="p">:</span>
-                <span class="k">return</span> <span class="n">abs_binary_sign_grad</span><span class="p">(</span><span class="n">clamped_stats</span><span class="p">)</span>
+                <span class="c1"># Local loss mode, we early exit and divide by threshold</span>
+                <span class="k">return</span> <span class="n">abs_binary_sign_grad</span><span class="p">(</span><span class="n">clamped_stats</span> <span class="o">/</span> <span class="n">threshold</span><span class="p">)</span>
             <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">counter</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
                 <span class="n">inplace_tensor_mul</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">buffer</span><span class="p">,</span> <span class="n">clamped_stats</span><span class="o">.</span><span class="n">detach</span><span class="p">())</span>
             <span class="k">else</span><span class="p">:</span>
                 <span class="n">inplace_momentum_update</span><span class="p">(</span>
                     <span class="bp">self</span><span class="o">.</span><span class="n">buffer</span><span class="p">,</span> <span class="n">clamped_stats</span><span class="o">.</span><span class="n">detach</span><span class="p">(),</span> <span class="bp">self</span><span class="o">.</span><span class="n">momentum</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">counter</span><span class="p">,</span> <span class="n">new_counter</span><span class="p">)</span>
             <span class="bp">self</span><span class="o">.</span><span class="n">counter</span> <span class="o">=</span> <span class="n">new_counter</span>
-            <span class="k">return</span> <span class="n">abs_binary_sign_grad</span><span class="p">(</span><span class="n">clamped_stats</span><span class="p">)</span>
+            <span class="k">return</span> <span class="n">abs_binary_sign_grad</span><span class="p">(</span><span class="n">clamped_stats</span> <span class="o">/</span> <span class="n">threshold</span><span class="p">)</span>
         <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">counter</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">collect_stats_steps</span><span class="p">:</span>
             <span class="bp">self</span><span class="o">.</span><span class="n">restrict_inplace_preprocess</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">buffer</span><span class="p">)</span>
             <span class="n">inplace_tensor_mul</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">value</span><span class="o">.</span><span class="n">detach</span><span class="p">(),</span> <span class="bp">self</span><span class="o">.</span><span class="n">buffer</span><span class="p">)</span>
+            <span class="n">threshold</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">restrict_preprocess</span><span class="p">(</span><span class="n">threshold</span><span class="p">)</span>
+            <span class="n">value</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">restrict_scaling_impl</span><span class="o">.</span><span class="n">combine_scale_threshold</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">value</span><span class="p">,</span> <span class="n">threshold</span><span class="p">)</span>
             <span class="bp">self</span><span class="o">.</span><span class="n">counter</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">counter</span> <span class="o">+</span> <span class="mi">1</span>
-            <span class="k">return</span> <span class="n">abs_binary_sign_grad</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">clamp_scaling</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">restrict_scaling</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">value</span><span class="p">)))</span>
+            <span class="k">return</span> <span class="n">abs_binary_sign_grad</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">clamp_scaling</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">restrict_scaling</span><span class="p">(</span><span class="n">value</span><span class="p">)))</span>
         <span class="k">else</span><span class="p">:</span>
-            <span class="k">return</span> <span class="n">abs_binary_sign_grad</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">clamp_scaling</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">restrict_scaling</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">value</span><span class="p">)))</span></div>
+            <span class="n">threshold</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">restrict_preprocess</span><span class="p">(</span><span class="n">threshold</span><span class="p">)</span>
+            <span class="n">value</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">restrict_scaling_impl</span><span class="o">.</span><span class="n">combine_scale_threshold</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">value</span><span class="p">,</span> <span class="n">threshold</span><span class="p">)</span>
+            <span class="k">return</span> <span class="n">abs_binary_sign_grad</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">clamp_scaling</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">restrict_scaling</span><span class="p">(</span><span class="n">value</span><span class="p">)))</span></div>
 
 <div class="viewcode-block" id="ParameterFromRuntimeStatsScaling.forward"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.scaling.html#brevitas.core.scaling.standalone.ParameterFromRuntimeStatsScaling.forward">[docs]</a>    <span class="nd">@brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">script_method</span>
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">stats_input</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
+    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">stats_input</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">threshold</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Tensor</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
+        <span class="k">if</span> <span class="n">threshold</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+            <span class="n">threshold</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">ones</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">type_as</span><span class="p">(</span><span class="n">stats_input</span><span class="p">)</span>
         <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">training</span><span class="p">:</span>
-            <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">training_forward</span><span class="p">(</span><span class="n">stats_input</span><span class="p">)</span>
+            <span class="c1"># Threshold division handled inside the training_forward</span>
+            <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">training_forward</span><span class="p">(</span><span class="n">stats_input</span><span class="p">,</span> <span class="n">threshold</span><span class="p">)</span>
         <span class="k">else</span><span class="p">:</span>
             <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">counter</span> <span class="o">&lt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">collect_stats_steps</span><span class="p">:</span>
-                <span class="n">out</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">buffer</span>
+                <span class="n">out</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">buffer</span> <span class="o">/</span> <span class="n">threshold</span>
                 <span class="n">out</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">restrict_preprocess</span><span class="p">(</span><span class="n">out</span><span class="p">)</span>
             <span class="k">else</span><span class="p">:</span>
-                <span class="n">out</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">value</span>
+                <span class="n">threshold</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">restrict_preprocess</span><span class="p">(</span><span class="n">threshold</span><span class="p">)</span>
+                <span class="n">out</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">restrict_scaling_impl</span><span class="o">.</span><span class="n">combine_scale_threshold</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">value</span><span class="p">,</span> <span class="n">threshold</span><span class="p">)</span>
             <span class="n">out</span> <span class="o">=</span> <span class="n">abs_binary_sign_grad</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">clamp_scaling</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">restrict_scaling</span><span class="p">(</span><span class="n">out</span><span class="p">)))</span>
         <span class="k">return</span> <span class="n">out</span></div>
 
@@ -772,7 +802,7 @@ <h1>Source code for brevitas.core.scaling.standalone</h1><div class="highlight">
         <span class="c1"># Avoid saving the buffer</span>
         <span class="k">del</span> <span class="n">output_dict</span><span class="p">[</span><span class="n">prefix</span> <span class="o">+</span> <span class="s1">&#39;buffer&#39;</span><span class="p">]</span>
         <span class="c1"># Avoid saving the init value</span>
-        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">counter</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">counter</span> <span class="o">==</span> <span class="mi">0</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">config</span><span class="o">.</span><span class="n">_FULL_STATE_DICT</span><span class="p">:</span>
             <span class="k">del</span> <span class="n">output_dict</span><span class="p">[</span><span class="n">prefix</span> <span class="o">+</span> <span class="s1">&#39;value&#39;</span><span class="p">]</span>
         <span class="c1"># Save buffer into value for any non-zero number of collection steps</span>
         <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">counter</span> <span class="o">&lt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">collect_stats_steps</span><span class="p">:</span>
diff --git a/docs/_modules/brevitas/core/stats/stats_op.html b/docs/_modules/brevitas/core/stats/stats_op.html
index 5d95da826..18dffb1fc 100644
--- a/docs/_modules/brevitas/core/stats/stats_op.html
+++ b/docs/_modules/brevitas/core/stats/stats_op.html
@@ -8,7 +8,7 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>brevitas.core.stats.stats_op &#8212; Brevitas 0.10.2 documentation</title>
+    <title>brevitas.core.stats.stats_op &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -123,8 +123,8 @@
       
     
     
-    <img src="../../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
@@ -420,8 +420,11 @@ <h1>Source code for brevitas.core.stats.stats_op</h1><div class="highlight"><pre
 
 <span class="kn">import</span> <span class="nn">brevitas</span>
 <span class="kn">from</span> <span class="nn">brevitas</span> <span class="kn">import</span> <span class="n">config</span>
+<span class="kn">from</span> <span class="nn">brevitas.core.function_wrapper.misc</span> <span class="kn">import</span> <span class="n">Identity</span>
+<span class="kn">from</span> <span class="nn">brevitas.core.function_wrapper.ops_ste</span> <span class="kn">import</span> <span class="n">ScalarClampMinSte</span>
 <span class="kn">from</span> <span class="nn">brevitas.core.utils</span> <span class="kn">import</span> <span class="n">StatelessBuffer</span>
 <span class="kn">from</span> <span class="nn">brevitas.function.ops</span> <span class="kn">import</span> <span class="n">max_int</span>
+<span class="kn">from</span> <span class="nn">brevitas.quant_tensor</span> <span class="kn">import</span> <span class="n">_unpack_quant_tensor</span>
 <span class="c1"># Use custom implementation of kthvalue as work around to (b)float16 kernel limitations</span>
 <span class="kn">from</span> <span class="nn">brevitas.utils.torch_utils</span> <span class="kn">import</span> <span class="n">kthvalue</span>
 
@@ -849,6 +852,19 @@ <h1>Source code for brevitas.core.stats.stats_op</h1><div class="highlight"><pre
             <span class="n">m</span><span class="o">.</span><span class="n">local_loss_mode</span> <span class="o">=</span> <span class="n">enabled</span>
 
 
+<span class="k">def</span> <span class="nf">_set_observer_mode</span><span class="p">(</span><span class="n">module</span><span class="p">,</span> <span class="n">enabled</span><span class="p">,</span> <span class="n">previous_observer_mode</span><span class="p">):</span>
+    <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="n">module</span><span class="o">.</span><span class="n">modules</span><span class="p">():</span>
+        <span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="s1">&#39;observer_only&#39;</span><span class="p">):</span>
+            <span class="n">previous_observer_mode</span><span class="p">[</span><span class="n">m</span><span class="p">]</span> <span class="o">=</span> <span class="n">m</span><span class="o">.</span><span class="n">observer_only</span>
+            <span class="n">m</span><span class="o">.</span><span class="n">observer_only</span> <span class="o">=</span> <span class="n">enabled</span>
+
+
+<span class="k">def</span> <span class="nf">_restore_observer_mode</span><span class="p">(</span><span class="n">module</span><span class="p">,</span> <span class="n">previous_observer_mode</span><span class="p">):</span>
+    <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="n">module</span><span class="o">.</span><span class="n">modules</span><span class="p">():</span>
+        <span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="s1">&#39;observer_only&#39;</span><span class="p">):</span>
+            <span class="n">m</span><span class="o">.</span><span class="n">observer_only</span> <span class="o">=</span> <span class="n">previous_observer_mode</span><span class="p">[</span><span class="n">m</span><span class="p">]</span>
+
+
 <div class="viewcode-block" id="MSE"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.stats.html#brevitas.core.stats.stats_op.MSE">[docs]</a><span class="k">class</span> <span class="nc">MSE</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
     <span class="c1"># References:</span>
     <span class="c1"># https://github.com/cornell-zhang/dnn-quant-ocs/blob/master/distiller/quantization/clip.py</span>
@@ -866,7 +882,12 @@ <h1>Source code for brevitas.core.stats.stats_op</h1><div class="highlight"><pre
         <span class="bp">self</span><span class="o">.</span><span class="n">mse_init_op</span> <span class="o">=</span> <span class="n">mse_init_op</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">input_view_shape_impl</span> <span class="o">=</span> <span class="n">inner_stats_input_view_shape_impl</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">proxy_forward</span> <span class="o">=</span> <span class="n">proxy_module</span><span class="o">.</span><span class="n">forward</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">previous_observer_mode</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">()</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">set_local_loss_mode</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">enabled</span><span class="p">:</span> <span class="n">_set_local_loss_mode</span><span class="p">(</span><span class="n">proxy_module</span><span class="p">,</span> <span class="n">enabled</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">set_observer_mode</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">enabled</span><span class="p">:</span> <span class="n">_set_observer_mode</span><span class="p">(</span>
+            <span class="n">proxy_module</span><span class="p">,</span> <span class="n">enabled</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">previous_observer_mode</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">restore_observer_mode</span> <span class="o">=</span> <span class="k">lambda</span><span class="p">:</span> <span class="n">_restore_observer_mode</span><span class="p">(</span>
+            <span class="n">proxy_module</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">previous_observer_mode</span><span class="p">)</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">internal_candidate</span> <span class="o">=</span> <span class="kc">None</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">num</span> <span class="o">=</span> <span class="n">mse_iters</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">search_method</span> <span class="o">=</span> <span class="n">mse_search_method</span>
@@ -887,11 +908,12 @@ <h1>Source code for brevitas.core.stats.stats_op</h1><div class="highlight"><pre
         <span class="bp">self</span><span class="o">.</span><span class="n">internal_candidate</span> <span class="o">=</span> <span class="n">candidate</span>
         <span class="c1"># Set to local_loss_mode before calling the proxy</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">set_local_loss_mode</span><span class="p">(</span><span class="kc">True</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">set_observer_mode</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
         <span class="n">quant_value</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">proxy_forward</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
-        <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">quant_value</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">):</span>
-            <span class="n">quant_value</span> <span class="o">=</span> <span class="n">quant_value</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+        <span class="n">quant_value</span> <span class="o">=</span> <span class="n">_unpack_quant_tensor</span><span class="p">(</span><span class="n">quant_value</span><span class="p">)</span>
         <span class="n">loss</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">mse_loss_fn</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">quant_value</span><span class="p">)</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">set_local_loss_mode</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">restore_observer_mode</span><span class="p">()</span>
         <span class="k">return</span> <span class="n">loss</span></div>
 
 <div class="viewcode-block" id="MSE.mse_grid_search"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.stats.html#brevitas.core.stats.stats_op.MSE.mse_grid_search">[docs]</a>    <span class="k">def</span> <span class="nf">mse_grid_search</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">xl</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
@@ -954,6 +976,244 @@ <h1>Source code for brevitas.core.stats.stats_op</h1><div class="highlight"><pre
                 <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">input_view_shape_impl</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
                 <span class="bp">self</span><span class="o">.</span><span class="n">internal_candidate</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">mse_init_op</span><span class="p">(</span><span class="n">x</span><span class="p">)</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span>
             <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">internal_candidate</span></div></div>
+
+
+<div class="viewcode-block" id="HalfQuadraticOptimizerScale"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.stats.html#brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale">[docs]</a><span class="k">class</span> <span class="nc">HalfQuadraticOptimizerScale</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
+    <span class="c1"># References:</span>
+    <span class="c1"># https://mobiusml.github.io/hqq_blog/</span>
+    <span class="c1"># https://github.com/mobiusml/hqq?tab=readme-ov-file</span>
+
+    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
+            <span class="bp">self</span><span class="p">,</span>
+            <span class="n">proxy_module</span><span class="p">,</span>
+            <span class="n">hqo_init_op_scale</span><span class="p">,</span>
+            <span class="n">keepdim</span><span class="p">:</span> <span class="nb">bool</span><span class="p">,</span>
+            <span class="n">inner_stats_input_view_shape_impl</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">,</span>
+            <span class="n">scaling_min_val</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+            <span class="n">stats_reduce_dim</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+            <span class="n">int_scaling_impl</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
+            <span class="n">bit_width_impl</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
+            <span class="n">hqo_beta_scale</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1e5</span><span class="p">,</span>
+            <span class="n">hqo_kappa_scale</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.01</span><span class="p">,</span>
+            <span class="n">hqo_lp_norm_scale</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">.7</span><span class="p">,</span>
+            <span class="n">hqo_iters_scale</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1000</span><span class="p">):</span>
+        <span class="nb">super</span><span class="p">(</span><span class="n">HalfQuadraticOptimizerScale</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">hqo_init_op</span> <span class="o">=</span> <span class="n">hqo_init_op_scale</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">input_view_shape_impl</span> <span class="o">=</span> <span class="n">inner_stats_input_view_shape_impl</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">proxy_forward</span> <span class="o">=</span> <span class="n">proxy_module</span><span class="o">.</span><span class="n">forward</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">previous_observer_mode</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">()</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">set_local_loss_mode</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">enabled</span><span class="p">:</span> <span class="n">_set_local_loss_mode</span><span class="p">(</span><span class="n">proxy_module</span><span class="p">,</span> <span class="n">enabled</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">set_observer_mode</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">enabled</span><span class="p">:</span> <span class="n">_set_observer_mode</span><span class="p">(</span>
+            <span class="n">proxy_module</span><span class="p">,</span> <span class="n">enabled</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">previous_observer_mode</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">restore_observer_mode</span> <span class="o">=</span> <span class="k">lambda</span><span class="p">:</span> <span class="n">_restore_observer_mode</span><span class="p">(</span>
+            <span class="n">proxy_module</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">previous_observer_mode</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">internal_candidate</span> <span class="o">=</span> <span class="kc">None</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">hqo_iters</span> <span class="o">=</span> <span class="n">hqo_iters_scale</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">stats_reduce_dim</span> <span class="o">=</span> <span class="n">stats_reduce_dim</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">local_loss_mode</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span>
+
+        <span class="bp">self</span><span class="o">.</span><span class="n">beta</span> <span class="o">=</span> <span class="n">hqo_beta_scale</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">kappa</span> <span class="o">=</span> <span class="n">hqo_kappa_scale</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">lp_norm</span> <span class="o">=</span> <span class="n">hqo_lp_norm_scale</span>
+
+        <span class="bp">self</span><span class="o">.</span><span class="n">int_scaling_impl</span> <span class="o">=</span> <span class="n">int_scaling_impl</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">msb_clamp_bit_width_impl</span> <span class="o">=</span> <span class="n">bit_width_impl</span>
+        <span class="k">if</span> <span class="n">scaling_min_val</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">scaling_min_val</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">:</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">clamp_min_ste</span> <span class="o">=</span> <span class="n">ScalarClampMinSte</span><span class="p">(</span><span class="n">scaling_min_val</span><span class="p">)</span>
+        <span class="k">else</span><span class="p">:</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">clamp_min_ste</span> <span class="o">=</span> <span class="n">Identity</span><span class="p">()</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">keepdim</span> <span class="o">=</span> <span class="n">keepdim</span>
+
+<div class="viewcode-block" id="HalfQuadraticOptimizerScale.parameter_search"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.stats.html#brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale.parameter_search">[docs]</a>    <span class="k">def</span> <span class="nf">parameter_search</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">xl</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
+        <span class="n">best_loss</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="nb">float</span><span class="p">(</span><span class="s1">&#39;inf&#39;</span><span class="p">),</span> <span class="n">device</span><span class="o">=</span><span class="n">x</span><span class="o">.</span><span class="n">device</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">x</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
+        <span class="n">candidate</span> <span class="o">=</span> <span class="n">xl</span>
+        <span class="n">best_candidate</span> <span class="o">=</span> <span class="n">candidate</span>
+        <span class="n">beta</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">beta</span>
+        <span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
+            <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">hqo_iters</span><span class="p">):</span>
+                <span class="bp">self</span><span class="o">.</span><span class="n">internal_candidate</span> <span class="o">=</span> <span class="n">candidate</span>
+                <span class="bp">self</span><span class="o">.</span><span class="n">set_local_loss_mode</span><span class="p">(</span><span class="kc">True</span><span class="p">)</span>
+                <span class="bp">self</span><span class="o">.</span><span class="n">set_observer_mode</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
+                <span class="n">quant_tensor</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">proxy_forward</span><span class="p">(</span><span class="n">x</span><span class="p">)</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span>
+                <span class="bp">self</span><span class="o">.</span><span class="n">set_local_loss_mode</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
+                <span class="bp">self</span><span class="o">.</span><span class="n">restore_observer_mode</span><span class="p">()</span>
+                <span class="n">loss</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">abs</span><span class="p">(</span><span class="n">quant_tensor</span><span class="o">.</span><span class="n">value</span> <span class="o">-</span> <span class="n">x</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span>
+
+                <span class="n">best_candidate</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">loss</span> <span class="o">&lt;</span> <span class="n">best_loss</span><span class="p">,</span> <span class="n">candidate</span><span class="p">,</span> <span class="n">best_candidate</span><span class="p">)</span>
+                <span class="k">if</span> <span class="n">loss</span> <span class="o">&gt;=</span> <span class="n">best_loss</span><span class="p">:</span>
+                    <span class="k">break</span>
+                <span class="n">best_loss</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">min</span><span class="p">(</span><span class="n">loss</span><span class="p">,</span> <span class="n">best_loss</span><span class="p">)</span>
+                <span class="n">W_e</span> <span class="o">=</span> <span class="n">shrink_lp_op</span><span class="p">(</span><span class="n">x</span> <span class="o">-</span> <span class="n">quant_tensor</span><span class="o">.</span><span class="n">value</span><span class="p">,</span> <span class="n">beta</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">lp_norm</span><span class="p">)</span>
+                <span class="n">zero_point</span> <span class="o">=</span> <span class="n">quant_tensor</span><span class="o">.</span><span class="n">zero_point</span>
+                <span class="n">num</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">input_view_shape_impl</span><span class="p">(</span><span class="n">x</span> <span class="o">-</span> <span class="n">W_e</span><span class="p">)</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span>
+                <span class="n">den</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">input_view_shape_impl</span><span class="p">(</span>
+                    <span class="n">torch</span><span class="o">.</span><span class="n">round</span><span class="p">(</span><span class="n">quant_tensor</span><span class="o">.</span><span class="n">value</span> <span class="o">/</span> <span class="n">quant_tensor</span><span class="o">.</span><span class="n">scale</span><span class="p">)</span> <span class="o">-</span> <span class="n">zero_point</span><span class="p">)</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span>
+                <span class="n">mask</span> <span class="o">=</span> <span class="p">(</span><span class="n">num</span> <span class="o">!=</span> <span class="mf">0.</span><span class="p">)</span> <span class="o">&amp;</span> <span class="p">(</span><span class="n">den</span> <span class="o">!=</span> <span class="mf">0.</span><span class="p">)</span>
+                <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">stats_reduce_dim</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+                    <span class="n">candidate</span> <span class="o">=</span> <span class="n">masked_median</span><span class="p">(</span><span class="n">num</span> <span class="o">/</span> <span class="n">den</span><span class="p">,</span> <span class="n">mask</span><span class="p">)</span>
+                <span class="k">else</span><span class="p">:</span>
+                    <span class="n">candidate</span> <span class="o">=</span> <span class="n">masked_median</span><span class="p">(</span>
+                        <span class="n">num</span> <span class="o">/</span> <span class="n">den</span><span class="p">,</span> <span class="n">mask</span><span class="p">,</span> <span class="n">dim</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">stats_reduce_dim</span><span class="p">,</span> <span class="n">keepdim</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">keepdim</span><span class="p">)</span>
+                <span class="n">candidate</span> <span class="o">=</span> <span class="n">candidate</span><span class="o">.</span><span class="n">type_as</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">internal_candidate</span><span class="p">)</span>
+                <span class="n">candidate</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">clamp_min_ste</span><span class="p">(</span><span class="n">candidate</span><span class="p">)</span>
+                <span class="n">bit_width</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">msb_clamp_bit_width_impl</span><span class="p">()</span>
+                <span class="n">int_threshold</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">int_scaling_impl</span><span class="p">(</span><span class="n">bit_width</span><span class="p">)</span>
+                <span class="n">candidate</span> <span class="o">=</span> <span class="n">candidate</span> <span class="o">*</span> <span class="n">int_threshold</span>
+                <span class="n">candidate</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">isnan</span><span class="p">(</span><span class="n">candidate</span><span class="p">)]</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">internal_candidate</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">isnan</span><span class="p">(</span><span class="n">candidate</span><span class="p">)]</span>
+                <span class="n">candidate</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">isinf</span><span class="p">(</span><span class="n">candidate</span><span class="p">)]</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">internal_candidate</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">isinf</span><span class="p">(</span><span class="n">candidate</span><span class="p">)]</span>
+                <span class="n">beta</span> <span class="o">*=</span> <span class="bp">self</span><span class="o">.</span><span class="n">kappa</span>
+        <span class="k">return</span> <span class="n">best_candidate</span></div>
+
+<div class="viewcode-block" id="HalfQuadraticOptimizerScale.optimize"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.stats.html#brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale.optimize">[docs]</a>    <span class="k">def</span> <span class="nf">optimize</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
+        <span class="n">x_view</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">input_view_shape_impl</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
+
+        <span class="n">init</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">hqo_init_op</span><span class="p">(</span><span class="n">x_view</span><span class="p">)</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span>
+        <span class="n">best_candidate</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parameter_search</span><span class="p">(</span><span class="n">init</span><span class="p">,</span> <span class="n">x_view</span><span class="p">)</span>
+
+        <span class="c1"># Save for evaluation by other modules (e.g. zp) invoking local loss mode</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">internal_candidate</span> <span class="o">=</span> <span class="n">best_candidate</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span>
+        <span class="n">torch</span><span class="o">.</span><span class="n">cuda</span><span class="o">.</span><span class="n">empty_cache</span><span class="p">()</span>
+        <span class="k">return</span> <span class="n">best_candidate</span></div>
+
+<div class="viewcode-block" id="HalfQuadraticOptimizerScale.forward"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.stats.html#brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale.forward">[docs]</a>    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
+        <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">local_loss_mode</span><span class="p">:</span>
+            <span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
+                <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">optimize</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
+        <span class="k">else</span><span class="p">:</span>
+            <span class="c1"># This is invoked for the zero-point whenever scale is being optimized first</span>
+            <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">internal_candidate</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+                <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">input_view_shape_impl</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
+                <span class="bp">self</span><span class="o">.</span><span class="n">internal_candidate</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">hqo_init_op</span><span class="p">(</span><span class="n">x</span><span class="p">)</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span>
+            <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">internal_candidate</span></div></div>
+
+
+<div class="viewcode-block" id="HalfQuadraticOptimizerZeroPoint"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.stats.html#brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint">[docs]</a><span class="k">class</span> <span class="nc">HalfQuadraticOptimizerZeroPoint</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
+    <span class="c1"># References:</span>
+    <span class="c1"># https://mobiusml.github.io/hqq_blog/</span>
+    <span class="c1"># https://github.com/mobiusml/hqq?tab=readme-ov-file</span>
+
+    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
+            <span class="bp">self</span><span class="p">,</span>
+            <span class="n">proxy_module</span><span class="p">,</span>
+            <span class="n">keepdim</span><span class="p">:</span> <span class="nb">bool</span><span class="p">,</span>
+            <span class="n">hqo_init_op_zp</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">,</span>
+            <span class="n">inner_stats_input_view_shape_impl</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">,</span>
+            <span class="n">stats_reduce_dim</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+            <span class="n">hqo_beta_zp</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1e0</span><span class="p">,</span>
+            <span class="n">hqo_kappa_zp</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.01</span><span class="p">,</span>
+            <span class="n">hqo_lp_norm_zp</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">.5</span><span class="p">,</span>
+            <span class="n">hqo_iters_zp</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1000</span><span class="p">):</span>
+        <span class="nb">super</span><span class="p">(</span><span class="n">HalfQuadraticOptimizerZeroPoint</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">hqo_init_op_zp</span> <span class="o">=</span> <span class="n">hqo_init_op_zp</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">input_view_shape_impl</span> <span class="o">=</span> <span class="n">inner_stats_input_view_shape_impl</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">proxy_forward</span> <span class="o">=</span> <span class="n">proxy_module</span><span class="o">.</span><span class="n">forward</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">previous_observer_mode</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">()</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">set_local_loss_mode</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">enabled</span><span class="p">:</span> <span class="n">_set_local_loss_mode</span><span class="p">(</span><span class="n">proxy_module</span><span class="p">,</span> <span class="n">enabled</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">set_observer_mode</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">enabled</span><span class="p">:</span> <span class="n">_set_observer_mode</span><span class="p">(</span>
+            <span class="n">proxy_module</span><span class="p">,</span> <span class="n">enabled</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">previous_observer_mode</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">restore_observer_mode</span> <span class="o">=</span> <span class="k">lambda</span><span class="p">:</span> <span class="n">_restore_observer_mode</span><span class="p">(</span>
+            <span class="n">proxy_module</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">previous_observer_mode</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">internal_candidate</span> <span class="o">=</span> <span class="kc">None</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">stats_reduce_dim</span> <span class="o">=</span> <span class="n">stats_reduce_dim</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">local_loss_mode</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">beta</span> <span class="o">=</span> <span class="n">hqo_beta_zp</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">kappa</span> <span class="o">=</span> <span class="n">hqo_kappa_zp</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">lp_norm</span> <span class="o">=</span> <span class="n">hqo_lp_norm_zp</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">hqo_iters</span> <span class="o">=</span> <span class="n">hqo_iters_zp</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">keepdim</span> <span class="o">=</span> <span class="n">keepdim</span>
+
+<div class="viewcode-block" id="HalfQuadraticOptimizerZeroPoint.parameter_search"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.stats.html#brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint.parameter_search">[docs]</a>    <span class="k">def</span> <span class="nf">parameter_search</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">xl</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
+        <span class="n">best_loss</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="nb">float</span><span class="p">(</span><span class="s1">&#39;inf&#39;</span><span class="p">),</span> <span class="n">device</span><span class="o">=</span><span class="n">x</span><span class="o">.</span><span class="n">device</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">x</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
+        <span class="n">candidate</span> <span class="o">=</span> <span class="n">xl</span>
+        <span class="n">best_candidate</span> <span class="o">=</span> <span class="n">candidate</span>
+        <span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
+            <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">hqo_iters</span><span class="p">):</span>
+                <span class="bp">self</span><span class="o">.</span><span class="n">internal_candidate</span> <span class="o">=</span> <span class="n">candidate</span>
+                <span class="bp">self</span><span class="o">.</span><span class="n">set_local_loss_mode</span><span class="p">(</span><span class="kc">True</span><span class="p">)</span>
+                <span class="bp">self</span><span class="o">.</span><span class="n">set_observer_mode</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
+                <span class="n">quant_tensor</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">proxy_forward</span><span class="p">(</span><span class="n">x</span><span class="p">)</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span>
+                <span class="bp">self</span><span class="o">.</span><span class="n">set_local_loss_mode</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
+                <span class="bp">self</span><span class="o">.</span><span class="n">restore_observer_mode</span><span class="p">()</span>
+                <span class="n">qt_value</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">input_view_shape_impl</span><span class="p">(</span><span class="n">quant_tensor</span><span class="o">.</span><span class="n">value</span><span class="p">)</span>
+                <span class="n">qt_scale</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">input_view_shape_impl</span><span class="p">(</span><span class="n">quant_tensor</span><span class="o">.</span><span class="n">scale</span><span class="p">)</span>
+                <span class="n">qt_zp</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">input_view_shape_impl</span><span class="p">(</span><span class="n">quant_tensor</span><span class="o">.</span><span class="n">zero_point</span><span class="p">)</span>
+                <span class="n">qt_int</span> <span class="o">=</span> <span class="n">qt_value</span> <span class="o">/</span> <span class="n">qt_scale</span> <span class="o">+</span> <span class="n">qt_zp</span>
+                <span class="n">loss</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">abs</span><span class="p">(</span><span class="n">qt_value</span> <span class="o">-</span> <span class="n">x</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span>
+                <span class="n">best_candidate</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">loss</span> <span class="o">&lt;</span> <span class="n">best_loss</span><span class="p">,</span> <span class="n">candidate</span><span class="p">,</span> <span class="n">best_candidate</span><span class="p">)</span>
+                <span class="k">if</span> <span class="n">loss</span> <span class="o">&gt;=</span> <span class="n">best_loss</span><span class="p">:</span>
+                    <span class="k">break</span>
+                <span class="n">best_loss</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">min</span><span class="p">(</span><span class="n">loss</span><span class="p">,</span> <span class="n">best_loss</span><span class="p">)</span>
+                <span class="n">W_e</span> <span class="o">=</span> <span class="n">shrink_lp_op</span><span class="p">(</span><span class="n">x</span> <span class="o">-</span> <span class="n">qt_value</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">beta</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">lp_norm</span><span class="p">)</span>
+
+                <span class="c1"># Compared to the original formulation, the value we&#39;re looking for is:</span>
+                <span class="c1"># - scaled by qt_scale</span>
+                <span class="c1"># - opposite sign</span>
+                <span class="n">val</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">input_view_shape_impl</span><span class="p">((</span><span class="n">x</span> <span class="o">-</span> <span class="n">W_e</span><span class="p">)</span> <span class="o">-</span> <span class="n">qt_int</span> <span class="o">*</span> <span class="n">qt_scale</span><span class="p">)</span>
+
+                <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">stats_reduce_dim</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+                    <span class="n">candidate</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">val</span><span class="p">)</span>
+                <span class="k">else</span><span class="p">:</span>
+                    <span class="n">candidate</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">val</span><span class="p">,</span> <span class="n">dim</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">stats_reduce_dim</span><span class="p">,</span> <span class="n">keepdim</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">keepdim</span><span class="p">)</span>
+                <span class="bp">self</span><span class="o">.</span><span class="n">beta</span> <span class="o">*=</span> <span class="bp">self</span><span class="o">.</span><span class="n">kappa</span>
+        <span class="k">return</span> <span class="n">best_candidate</span></div>
+
+<div class="viewcode-block" id="HalfQuadraticOptimizerZeroPoint.optimize"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.stats.html#brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint.optimize">[docs]</a>    <span class="k">def</span> <span class="nf">optimize</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
+        <span class="n">x_view</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">input_view_shape_impl</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
+
+        <span class="n">init</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">hqo_init_op_zp</span><span class="p">(</span><span class="n">x_view</span><span class="p">)</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span>
+
+        <span class="n">best_candidate</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parameter_search</span><span class="p">(</span><span class="n">init</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
+
+        <span class="c1"># Save for evaluation by other modules (e.g. zp) invoking local loss mode</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">internal_candidate</span> <span class="o">=</span> <span class="n">best_candidate</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span>
+        <span class="n">torch</span><span class="o">.</span><span class="n">cuda</span><span class="o">.</span><span class="n">empty_cache</span><span class="p">()</span>
+        <span class="k">return</span> <span class="n">best_candidate</span></div>
+
+<div class="viewcode-block" id="HalfQuadraticOptimizerZeroPoint.forward"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.stats.html#brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint.forward">[docs]</a>    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
+        <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">local_loss_mode</span><span class="p">:</span>
+            <span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
+                <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">optimize</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
+        <span class="k">else</span><span class="p">:</span>
+            <span class="c1"># This is invoked for the zero-point whenever scale is being optimized first</span>
+            <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">internal_candidate</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+                <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">input_view_shape_impl</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
+                <span class="bp">self</span><span class="o">.</span><span class="n">internal_candidate</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">hqo_init_op_zp</span><span class="p">(</span><span class="n">x</span><span class="p">)</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span>
+            <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">internal_candidate</span></div></div>
+
+
+<div class="viewcode-block" id="masked_median"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.stats.html#brevitas.core.stats.stats_op.masked_median">[docs]</a><span class="k">def</span> <span class="nf">masked_median</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">mask</span><span class="p">,</span> <span class="n">dim</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">keepdim</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
+<span class="w">    </span><span class="sd">&quot;&quot;&quot;Compute the median of tensor x along dim, ignoring values where mask is False.</span>
+<span class="sd">    x and mask need to be broadcastable.</span>
+
+<span class="sd">    Args:</span>
+<span class="sd">        x (Tensor): Tensor to compute median of.</span>
+<span class="sd">        mask (BoolTensor): Same shape as x with True where x is valid and False</span>
+<span class="sd">            where x should be masked. Mask should not be all False in any column of</span>
+<span class="sd">            dimension dim to avoid NaNs from zero division.</span>
+<span class="sd">        dim (int, optional): Dimension to take median of. Defaults to 0.</span>
+
+<span class="sd">    Returns:</span>
+<span class="sd">        Tensor: Same shape as x, except dimension dim reduced.</span>
+<span class="sd">    &quot;&quot;&quot;</span>
+    <span class="c1"># uncomment this assert for safety but might impact performance</span>
+    <span class="c1"># assert (</span>
+    <span class="c1">#     mask.sum(dim=dim).ne(0).all()</span>
+    <span class="c1"># ), &quot;mask should not be all False in any column, causes zero division&quot;</span>
+    <span class="n">x_nan</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">float</span><span class="p">()</span><span class="o">.</span><span class="n">masked_fill</span><span class="p">(</span><span class="o">~</span><span class="n">mask</span><span class="p">,</span> <span class="nb">float</span><span class="p">(</span><span class="s2">&quot;nan&quot;</span><span class="p">))</span>
+    <span class="k">if</span> <span class="n">dim</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+        <span class="n">x_median</span> <span class="o">=</span> <span class="n">x_nan</span><span class="o">.</span><span class="n">nanmedian</span><span class="p">()</span>
+    <span class="k">else</span><span class="p">:</span>
+        <span class="n">x_median</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">x_nan</span><span class="o">.</span><span class="n">nanmedian</span><span class="p">(</span><span class="n">dim</span><span class="o">=</span><span class="n">dim</span><span class="p">,</span> <span class="n">keepdim</span><span class="o">=</span><span class="n">keepdim</span><span class="p">)</span>
+    <span class="k">return</span> <span class="n">x_median</span></div>
+
+
+<span class="c1"># Shrinking operator</span>
+<div class="viewcode-block" id="shrink_lp_op"><a class="viewcode-back" href="../../../../api_reference/brevitas.core.stats.html#brevitas.core.stats.stats_op.shrink_lp_op">[docs]</a><span class="k">def</span> <span class="nf">shrink_lp_op</span><span class="p">(</span><span class="n">x</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">beta</span><span class="p">:</span> <span class="nb">float</span><span class="p">,</span> <span class="n">lp_norm</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
+    <span class="k">if</span> <span class="n">lp_norm</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
+        <span class="k">return</span> <span class="n">torch</span><span class="o">.</span><span class="n">sign</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="o">*</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">functional</span><span class="o">.</span><span class="n">relu</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">abs</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="o">-</span> <span class="mf">1.0</span> <span class="o">/</span> <span class="n">beta</span><span class="p">)</span>
+    <span class="k">else</span><span class="p">:</span>
+        <span class="k">return</span> <span class="n">torch</span><span class="o">.</span><span class="n">sign</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="o">*</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">functional</span><span class="o">.</span><span class="n">relu</span><span class="p">(</span>
+            <span class="n">torch</span><span class="o">.</span><span class="n">abs</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="o">-</span> <span class="p">(</span><span class="mf">1.0</span> <span class="o">/</span> <span class="n">beta</span><span class="p">)</span> <span class="o">*</span> <span class="n">torch</span><span class="o">.</span><span class="n">pow</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">abs</span><span class="p">(</span><span class="n">x</span><span class="p">),</span> <span class="n">lp_norm</span> <span class="o">-</span> <span class="mi">1</span><span class="p">))</span></div>
 </pre></div>
 
                 </article>
diff --git a/docs/_modules/brevitas/core/utils.html b/docs/_modules/brevitas/core/utils.html
index 6c90c9602..a7378d4d7 100644
--- a/docs/_modules/brevitas/core/utils.html
+++ b/docs/_modules/brevitas/core/utils.html
@@ -8,7 +8,7 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>brevitas.core.utils &#8212; Brevitas 0.10.2 documentation</title>
+    <title>brevitas.core.utils &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -123,8 +123,8 @@
       
     
     
-    <img src="../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/_modules/brevitas/core/zero_point.html b/docs/_modules/brevitas/core/zero_point.html
index 53919717c..986cdc5a3 100644
--- a/docs/_modules/brevitas/core/zero_point.html
+++ b/docs/_modules/brevitas/core/zero_point.html
@@ -8,7 +8,7 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>brevitas.core.zero_point &#8212; Brevitas 0.10.2 documentation</title>
+    <title>brevitas.core.zero_point &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -123,8 +123,8 @@
       
     
     
-    <img src="../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
@@ -691,8 +691,9 @@ <h1>Source code for brevitas.core.zero_point</h1><div class="highlight"><pre>
         <span class="n">output_dict</span> <span class="o">=</span> <span class="nb">super</span><span class="p">(</span><span class="n">ParameterFromStatsFromParameterZeroPoint</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">state_dict</span><span class="p">(</span>
             <span class="n">destination</span><span class="o">=</span><span class="n">destination</span><span class="p">,</span> <span class="n">prefix</span><span class="o">=</span><span class="n">prefix</span><span class="p">,</span> <span class="n">keep_vars</span><span class="o">=</span><span class="n">keep_vars</span><span class="p">)</span>
         <span class="c1"># Avoid saving the init value</span>
-        <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">init_done</span><span class="p">:</span>
-            <span class="k">del</span> <span class="n">output_dict</span><span class="p">[</span><span class="n">prefix</span> <span class="o">+</span> <span class="s1">&#39;value&#39;</span><span class="p">]</span></div>
+        <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">init_done</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">config</span><span class="o">.</span><span class="n">_FULL_STATE_DICT</span><span class="p">:</span>
+            <span class="k">del</span> <span class="n">output_dict</span><span class="p">[</span><span class="n">prefix</span> <span class="o">+</span> <span class="s1">&#39;value&#39;</span><span class="p">]</span>
+        <span class="k">return</span> <span class="n">output_dict</span></div>
 
     <span class="k">def</span> <span class="nf">_load_from_state_dict</span><span class="p">(</span>
             <span class="bp">self</span><span class="p">,</span> <span class="n">state_dict</span><span class="p">,</span> <span class="n">prefix</span><span class="p">,</span> <span class="n">local_metadata</span><span class="p">,</span> <span class="n">strict</span><span class="p">,</span> <span class="n">missing_keys</span><span class="p">,</span> <span class="n">unexpected_keys</span><span class="p">,</span>
diff --git a/docs/_modules/brevitas/function/ops.html b/docs/_modules/brevitas/function/ops.html
index c325b40bd..53806a3b6 100644
--- a/docs/_modules/brevitas/function/ops.html
+++ b/docs/_modules/brevitas/function/ops.html
@@ -8,7 +8,7 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>brevitas.function.ops &#8212; Brevitas 0.10.2 documentation</title>
+    <title>brevitas.function.ops &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -123,8 +123,8 @@
       
     
     
-    <img src="../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
@@ -599,7 +599,7 @@ <h1>Source code for brevitas.function.ops</h1><div class="highlight"><pre>
     <span class="k">return</span> <span class="n">value</span></div>
 
 
-<div class="viewcode-block" id="max_float"><a class="viewcode-back" href="../../../api_reference/brevitas.function.html#brevitas.function.ops.max_float">[docs]</a><span class="nd">@brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">script</span>
+<div class="viewcode-block" id="max_float"><a class="viewcode-back" href="../../../api_reference/brevitas.function.html#brevitas.function.ops.max_float">[docs]</a><span class="nd">@brevitas</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">ignore</span>
 <span class="k">def</span> <span class="nf">max_float</span><span class="p">(</span><span class="n">exponent_bit_width</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">mantissa_bit_width</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">exponent_bias</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">):</span>
     <span class="n">max_exponent</span> <span class="o">=</span> <span class="p">(</span><span class="mf">2.</span> <span class="o">**</span> <span class="n">exponent_bit_width</span><span class="p">)</span> <span class="o">-</span> <span class="mf">1.</span> <span class="o">-</span> <span class="n">exponent_bias</span>
     <span class="n">max_mantissa</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">sum</span><span class="p">((</span>
diff --git a/docs/_modules/brevitas/function/ops_ste.html b/docs/_modules/brevitas/function/ops_ste.html
index 14b00539e..182924071 100644
--- a/docs/_modules/brevitas/function/ops_ste.html
+++ b/docs/_modules/brevitas/function/ops_ste.html
@@ -8,7 +8,7 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>brevitas.function.ops_ste &#8212; Brevitas 0.10.2 documentation</title>
+    <title>brevitas.function.ops_ste &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -123,8 +123,8 @@
       
     
     
-    <img src="../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/_modules/brevitas/function/shape.html b/docs/_modules/brevitas/function/shape.html
index 9b6e94515..cb64c5178 100644
--- a/docs/_modules/brevitas/function/shape.html
+++ b/docs/_modules/brevitas/function/shape.html
@@ -8,7 +8,7 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>brevitas.function.shape &#8212; Brevitas 0.10.2 documentation</title>
+    <title>brevitas.function.shape &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -123,8 +123,8 @@
       
     
     
-    <img src="../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/_modules/brevitas/ops/autograd_ste_ops.html b/docs/_modules/brevitas/ops/autograd_ste_ops.html
index 21b701fe8..dbb265da7 100644
--- a/docs/_modules/brevitas/ops/autograd_ste_ops.html
+++ b/docs/_modules/brevitas/ops/autograd_ste_ops.html
@@ -8,7 +8,7 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>brevitas.ops.autograd_ste_ops &#8212; Brevitas 0.10.2 documentation</title>
+    <title>brevitas.ops.autograd_ste_ops &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -123,8 +123,8 @@
       
     
     
-    <img src="../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../../../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../../../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/_modules/index.html b/docs/_modules/index.html
index 5380f2cf8..c1ddf62d6 100644
--- a/docs/_modules/index.html
+++ b/docs/_modules/index.html
@@ -8,7 +8,7 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Overview: module code &#8212; Brevitas 0.10.2 documentation</title>
+    <title>Overview: module code &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -123,8 +123,8 @@
       
     
     
-    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/_static/documentation_options.js b/docs/_static/documentation_options.js
index db6d22fe9..9dc22d647 100644
--- a/docs/_static/documentation_options.js
+++ b/docs/_static/documentation_options.js
@@ -1,6 +1,6 @@
 var DOCUMENTATION_OPTIONS = {
     URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'),
-    VERSION: '0.10.2',
+    VERSION: '0.11.0',
     LANGUAGE: 'en',
     COLLAPSE_INDEX: false,
     BUILDER: 'html',
diff --git a/docs/_static/pygments.css b/docs/_static/pygments.css
index 997797f27..012e6a00a 100644
--- a/docs/_static/pygments.css
+++ b/docs/_static/pygments.css
@@ -3,77 +3,77 @@ html[data-theme="light"] .highlight td.linenos .normal { color: inherit; backgro
 html[data-theme="light"] .highlight span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
 html[data-theme="light"] .highlight td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
 html[data-theme="light"] .highlight span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
-html[data-theme="light"] .highlight .hll { background-color: #7971292e }
-html[data-theme="light"] .highlight { background: #fefefe; color: #545454 }
-html[data-theme="light"] .highlight .c { color: #797129 } /* Comment */
-html[data-theme="light"] .highlight .err { color: #d91e18 } /* Error */
-html[data-theme="light"] .highlight .k { color: #7928a1 } /* Keyword */
-html[data-theme="light"] .highlight .l { color: #797129 } /* Literal */
-html[data-theme="light"] .highlight .n { color: #545454 } /* Name */
-html[data-theme="light"] .highlight .o { color: #008000 } /* Operator */
-html[data-theme="light"] .highlight .p { color: #545454 } /* Punctuation */
-html[data-theme="light"] .highlight .ch { color: #797129 } /* Comment.Hashbang */
-html[data-theme="light"] .highlight .cm { color: #797129 } /* Comment.Multiline */
-html[data-theme="light"] .highlight .cp { color: #797129 } /* Comment.Preproc */
-html[data-theme="light"] .highlight .cpf { color: #797129 } /* Comment.PreprocFile */
-html[data-theme="light"] .highlight .c1 { color: #797129 } /* Comment.Single */
-html[data-theme="light"] .highlight .cs { color: #797129 } /* Comment.Special */
-html[data-theme="light"] .highlight .gd { color: #007faa } /* Generic.Deleted */
+html[data-theme="light"] .highlight .hll { background-color: #fae4c2 }
+html[data-theme="light"] .highlight { background: #fefefe; color: #080808 }
+html[data-theme="light"] .highlight .c { color: #515151 } /* Comment */
+html[data-theme="light"] .highlight .err { color: #a12236 } /* Error */
+html[data-theme="light"] .highlight .k { color: #6730c5 } /* Keyword */
+html[data-theme="light"] .highlight .l { color: #7f4707 } /* Literal */
+html[data-theme="light"] .highlight .n { color: #080808 } /* Name */
+html[data-theme="light"] .highlight .o { color: #00622f } /* Operator */
+html[data-theme="light"] .highlight .p { color: #080808 } /* Punctuation */
+html[data-theme="light"] .highlight .ch { color: #515151 } /* Comment.Hashbang */
+html[data-theme="light"] .highlight .cm { color: #515151 } /* Comment.Multiline */
+html[data-theme="light"] .highlight .cp { color: #515151 } /* Comment.Preproc */
+html[data-theme="light"] .highlight .cpf { color: #515151 } /* Comment.PreprocFile */
+html[data-theme="light"] .highlight .c1 { color: #515151 } /* Comment.Single */
+html[data-theme="light"] .highlight .cs { color: #515151 } /* Comment.Special */
+html[data-theme="light"] .highlight .gd { color: #005b82 } /* Generic.Deleted */
 html[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */
-html[data-theme="light"] .highlight .gh { color: #007faa } /* Generic.Heading */
+html[data-theme="light"] .highlight .gh { color: #005b82 } /* Generic.Heading */
 html[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */
-html[data-theme="light"] .highlight .gu { color: #007faa } /* Generic.Subheading */
-html[data-theme="light"] .highlight .kc { color: #7928a1 } /* Keyword.Constant */
-html[data-theme="light"] .highlight .kd { color: #7928a1 } /* Keyword.Declaration */
-html[data-theme="light"] .highlight .kn { color: #7928a1 } /* Keyword.Namespace */
-html[data-theme="light"] .highlight .kp { color: #7928a1 } /* Keyword.Pseudo */
-html[data-theme="light"] .highlight .kr { color: #7928a1 } /* Keyword.Reserved */
-html[data-theme="light"] .highlight .kt { color: #797129 } /* Keyword.Type */
-html[data-theme="light"] .highlight .ld { color: #797129 } /* Literal.Date */
-html[data-theme="light"] .highlight .m { color: #797129 } /* Literal.Number */
-html[data-theme="light"] .highlight .s { color: #008000 } /* Literal.String */
-html[data-theme="light"] .highlight .na { color: #797129 } /* Name.Attribute */
-html[data-theme="light"] .highlight .nb { color: #797129 } /* Name.Builtin */
-html[data-theme="light"] .highlight .nc { color: #007faa } /* Name.Class */
-html[data-theme="light"] .highlight .no { color: #007faa } /* Name.Constant */
-html[data-theme="light"] .highlight .nd { color: #797129 } /* Name.Decorator */
-html[data-theme="light"] .highlight .ni { color: #008000 } /* Name.Entity */
-html[data-theme="light"] .highlight .ne { color: #7928a1 } /* Name.Exception */
-html[data-theme="light"] .highlight .nf { color: #007faa } /* Name.Function */
-html[data-theme="light"] .highlight .nl { color: #797129 } /* Name.Label */
-html[data-theme="light"] .highlight .nn { color: #545454 } /* Name.Namespace */
-html[data-theme="light"] .highlight .nx { color: #545454 } /* Name.Other */
-html[data-theme="light"] .highlight .py { color: #007faa } /* Name.Property */
-html[data-theme="light"] .highlight .nt { color: #007faa } /* Name.Tag */
-html[data-theme="light"] .highlight .nv { color: #d91e18 } /* Name.Variable */
-html[data-theme="light"] .highlight .ow { color: #7928a1 } /* Operator.Word */
-html[data-theme="light"] .highlight .pm { color: #545454 } /* Punctuation.Marker */
-html[data-theme="light"] .highlight .w { color: #545454 } /* Text.Whitespace */
-html[data-theme="light"] .highlight .mb { color: #797129 } /* Literal.Number.Bin */
-html[data-theme="light"] .highlight .mf { color: #797129 } /* Literal.Number.Float */
-html[data-theme="light"] .highlight .mh { color: #797129 } /* Literal.Number.Hex */
-html[data-theme="light"] .highlight .mi { color: #797129 } /* Literal.Number.Integer */
-html[data-theme="light"] .highlight .mo { color: #797129 } /* Literal.Number.Oct */
-html[data-theme="light"] .highlight .sa { color: #008000 } /* Literal.String.Affix */
-html[data-theme="light"] .highlight .sb { color: #008000 } /* Literal.String.Backtick */
-html[data-theme="light"] .highlight .sc { color: #008000 } /* Literal.String.Char */
-html[data-theme="light"] .highlight .dl { color: #008000 } /* Literal.String.Delimiter */
-html[data-theme="light"] .highlight .sd { color: #008000 } /* Literal.String.Doc */
-html[data-theme="light"] .highlight .s2 { color: #008000 } /* Literal.String.Double */
-html[data-theme="light"] .highlight .se { color: #008000 } /* Literal.String.Escape */
-html[data-theme="light"] .highlight .sh { color: #008000 } /* Literal.String.Heredoc */
-html[data-theme="light"] .highlight .si { color: #008000 } /* Literal.String.Interpol */
-html[data-theme="light"] .highlight .sx { color: #008000 } /* Literal.String.Other */
-html[data-theme="light"] .highlight .sr { color: #d91e18 } /* Literal.String.Regex */
-html[data-theme="light"] .highlight .s1 { color: #008000 } /* Literal.String.Single */
-html[data-theme="light"] .highlight .ss { color: #007faa } /* Literal.String.Symbol */
-html[data-theme="light"] .highlight .bp { color: #797129 } /* Name.Builtin.Pseudo */
-html[data-theme="light"] .highlight .fm { color: #007faa } /* Name.Function.Magic */
-html[data-theme="light"] .highlight .vc { color: #d91e18 } /* Name.Variable.Class */
-html[data-theme="light"] .highlight .vg { color: #d91e18 } /* Name.Variable.Global */
-html[data-theme="light"] .highlight .vi { color: #d91e18 } /* Name.Variable.Instance */
-html[data-theme="light"] .highlight .vm { color: #797129 } /* Name.Variable.Magic */
-html[data-theme="light"] .highlight .il { color: #797129 } /* Literal.Number.Integer.Long */
+html[data-theme="light"] .highlight .gu { color: #005b82 } /* Generic.Subheading */
+html[data-theme="light"] .highlight .kc { color: #6730c5 } /* Keyword.Constant */
+html[data-theme="light"] .highlight .kd { color: #6730c5 } /* Keyword.Declaration */
+html[data-theme="light"] .highlight .kn { color: #6730c5 } /* Keyword.Namespace */
+html[data-theme="light"] .highlight .kp { color: #6730c5 } /* Keyword.Pseudo */
+html[data-theme="light"] .highlight .kr { color: #6730c5 } /* Keyword.Reserved */
+html[data-theme="light"] .highlight .kt { color: #7f4707 } /* Keyword.Type */
+html[data-theme="light"] .highlight .ld { color: #7f4707 } /* Literal.Date */
+html[data-theme="light"] .highlight .m { color: #7f4707 } /* Literal.Number */
+html[data-theme="light"] .highlight .s { color: #00622f } /* Literal.String */
+html[data-theme="light"] .highlight .na { color: #912583 } /* Name.Attribute */
+html[data-theme="light"] .highlight .nb { color: #7f4707 } /* Name.Builtin */
+html[data-theme="light"] .highlight .nc { color: #005b82 } /* Name.Class */
+html[data-theme="light"] .highlight .no { color: #005b82 } /* Name.Constant */
+html[data-theme="light"] .highlight .nd { color: #7f4707 } /* Name.Decorator */
+html[data-theme="light"] .highlight .ni { color: #00622f } /* Name.Entity */
+html[data-theme="light"] .highlight .ne { color: #6730c5 } /* Name.Exception */
+html[data-theme="light"] .highlight .nf { color: #005b82 } /* Name.Function */
+html[data-theme="light"] .highlight .nl { color: #7f4707 } /* Name.Label */
+html[data-theme="light"] .highlight .nn { color: #080808 } /* Name.Namespace */
+html[data-theme="light"] .highlight .nx { color: #080808 } /* Name.Other */
+html[data-theme="light"] .highlight .py { color: #005b82 } /* Name.Property */
+html[data-theme="light"] .highlight .nt { color: #005b82 } /* Name.Tag */
+html[data-theme="light"] .highlight .nv { color: #a12236 } /* Name.Variable */
+html[data-theme="light"] .highlight .ow { color: #6730c5 } /* Operator.Word */
+html[data-theme="light"] .highlight .pm { color: #080808 } /* Punctuation.Marker */
+html[data-theme="light"] .highlight .w { color: #080808 } /* Text.Whitespace */
+html[data-theme="light"] .highlight .mb { color: #7f4707 } /* Literal.Number.Bin */
+html[data-theme="light"] .highlight .mf { color: #7f4707 } /* Literal.Number.Float */
+html[data-theme="light"] .highlight .mh { color: #7f4707 } /* Literal.Number.Hex */
+html[data-theme="light"] .highlight .mi { color: #7f4707 } /* Literal.Number.Integer */
+html[data-theme="light"] .highlight .mo { color: #7f4707 } /* Literal.Number.Oct */
+html[data-theme="light"] .highlight .sa { color: #00622f } /* Literal.String.Affix */
+html[data-theme="light"] .highlight .sb { color: #00622f } /* Literal.String.Backtick */
+html[data-theme="light"] .highlight .sc { color: #00622f } /* Literal.String.Char */
+html[data-theme="light"] .highlight .dl { color: #00622f } /* Literal.String.Delimiter */
+html[data-theme="light"] .highlight .sd { color: #00622f } /* Literal.String.Doc */
+html[data-theme="light"] .highlight .s2 { color: #00622f } /* Literal.String.Double */
+html[data-theme="light"] .highlight .se { color: #00622f } /* Literal.String.Escape */
+html[data-theme="light"] .highlight .sh { color: #00622f } /* Literal.String.Heredoc */
+html[data-theme="light"] .highlight .si { color: #00622f } /* Literal.String.Interpol */
+html[data-theme="light"] .highlight .sx { color: #00622f } /* Literal.String.Other */
+html[data-theme="light"] .highlight .sr { color: #a12236 } /* Literal.String.Regex */
+html[data-theme="light"] .highlight .s1 { color: #00622f } /* Literal.String.Single */
+html[data-theme="light"] .highlight .ss { color: #005b82 } /* Literal.String.Symbol */
+html[data-theme="light"] .highlight .bp { color: #7f4707 } /* Name.Builtin.Pseudo */
+html[data-theme="light"] .highlight .fm { color: #005b82 } /* Name.Function.Magic */
+html[data-theme="light"] .highlight .vc { color: #a12236 } /* Name.Variable.Class */
+html[data-theme="light"] .highlight .vg { color: #a12236 } /* Name.Variable.Global */
+html[data-theme="light"] .highlight .vi { color: #a12236 } /* Name.Variable.Instance */
+html[data-theme="light"] .highlight .vm { color: #7f4707 } /* Name.Variable.Magic */
+html[data-theme="light"] .highlight .il { color: #7f4707 } /* Literal.Number.Integer.Long */
 html[data-theme="dark"] .highlight pre { line-height: 125%; }
 html[data-theme="dark"] .highlight td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
 html[data-theme="dark"] .highlight span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
diff --git a/docs/about.html b/docs/about.html
index 8c96f9c07..f62066e2e 100644
--- a/docs/about.html
+++ b/docs/about.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>About &#8212; Brevitas 0.10.2 documentation</title>
+    <title>About &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -125,8 +125,8 @@
       
     
     
-    <img src="_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/api_reference/brevitas.core.bit_width.html b/docs/api_reference/brevitas.core.bit_width.html
index c2a0a5090..226dccfa2 100644
--- a/docs/api_reference/brevitas.core.bit_width.html
+++ b/docs/api_reference/brevitas.core.bit_width.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>brevitas.core.bit_width package &#8212; Brevitas 0.10.2 documentation</title>
+    <title>brevitas.core.bit_width package &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -126,8 +126,8 @@
       
     
     
-    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
@@ -447,11 +447,11 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.bit_width.const.BitWidthConst">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.bit_width.const.</span></span><span class="sig-name descname"><span class="pre">BitWidthConst</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">bit_width</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/bit_width/const.html#BitWidthConst"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.bit_width.const.BitWidthConst" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptModule that returns a constant bit-width wrapped in a float torch.tensor.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
-<dd class="field-odd"><p><strong>bit_width</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a>) – bit-width value.</p>
+<dd class="field-odd"><p><strong>bit_width</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a>) – bit-width value.</p>
 </dd>
 </dl>
 <p class="rubric">Examples</p>
@@ -472,9 +472,9 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.bit_width.const.BitWidthConst.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/bit_width/const.html#BitWidthConst.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.bit_width.const.BitWidthConst.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -489,12 +489,12 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.bit_width.const.BitWidthStatefulConst">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.bit_width.const.</span></span><span class="sig-name descname"><span class="pre">BitWidthStatefulConst</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">bit_width</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/bit_width/const.html#BitWidthStatefulConst"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.bit_width.const.BitWidthStatefulConst" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptModule that returns a constant bit-width wrapped in a float torch.tensor but retains the
 bit-width as part of the module state.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
-<dd class="field-odd"><p><strong>bit_width</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a>) – bit-width value.</p>
+<dd class="field-odd"><p><strong>bit_width</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a>) – bit-width value.</p>
 </dd>
 </dl>
 <p class="rubric">Examples</p>
@@ -517,9 +517,9 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.bit_width.const.BitWidthStatefulConst.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/bit_width/const.html#BitWidthStatefulConst.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.bit_width.const.BitWidthStatefulConst.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -534,13 +534,13 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.bit_width.const.MsbClampBitWidth">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.bit_width.const.</span></span><span class="sig-name descname"><span class="pre">MsbClampBitWidth</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">bit_width_to_remove_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">min_overall_bit_width</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_overall_bit_width</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/bit_width/const.html#MsbClampBitWidth"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.bit_width.const.MsbClampBitWidth" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.bit_width.const.MsbClampBitWidth.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_bit_width</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/bit_width/const.html#MsbClampBitWidth.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.bit_width.const.MsbClampBitWidth.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -558,15 +558,15 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.bit_width.parameter.BitWidthParameter">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.bit_width.parameter.</span></span><span class="sig-name descname"><span class="pre">BitWidthParameter</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">bit_width</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">min_bit_width=2</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">restrict_bit_width_impl=IntRestrictValue(</span>&#160;&#160; <span class="pre">(float_to_int_impl):</span> <span class="pre">RoundSte()</span> <span class="pre">)</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">override_pretrained_bit_width=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device=None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/bit_width/parameter.html#BitWidthParameter"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.bit_width.parameter.BitWidthParameter" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptModule that returns a learnable bit-width wrapped in a float torch.Tensor.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
-<li><p><strong>bit_width</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a>) – value to initialize the output learned bit-width.</p></li>
-<li><p><strong>min_bit_width</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a>) – lower bound for the output learned bit-width. Default: 2.</p></li>
-<li><p><strong>restrict_bit_width_impl</strong> (<a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a>) – restrict the learned bit-width to a subset of values. Default: IntRestrictValue(RoundSte()).</p></li>
-<li><p><strong>override_pretrained_bit_width</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a>) – ignore pretrained bit-width loaded from a state dict. Default: False.</p></li>
+<li><p><strong>bit_width</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a>) – value to initialize the output learned bit-width.</p></li>
+<li><p><strong>min_bit_width</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a>) – lower bound for the output learned bit-width. Default: 2.</p></li>
+<li><p><strong>restrict_bit_width_impl</strong> (<code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code>) – restrict the learned bit-width to a subset of values. Default: IntRestrictValue(RoundSte()).</p></li>
+<li><p><strong>override_pretrained_bit_width</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a>) – ignore pretrained bit-width loaded from a state dict. Default: False.</p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
@@ -576,7 +576,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dd class="field-odd"><p>Tensor</p>
 </dd>
 <dt class="field-even">Raises<span class="colon">:</span></dt>
-<dd class="field-even"><p><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#RuntimeError" title="(in Python v3.12)"><strong>RuntimeError</strong></a> – if bit_width &lt; min_bit_width.</p>
+<dd class="field-even"><p><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#RuntimeError" title="(in Python v3.13)"><strong>RuntimeError</strong></a> – if bit_width &lt; min_bit_width.</p>
 </dd>
 </dl>
 <p class="rubric">Examples</p>
@@ -597,9 +597,9 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.bit_width.parameter.BitWidthParameter.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/bit_width/parameter.html#BitWidthParameter.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.bit_width.parameter.BitWidthParameter.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -614,13 +614,13 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.bit_width.parameter.RemoveBitwidthParameter">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.bit_width.parameter.</span></span><span class="sig-name descname"><span class="pre">RemoveBitwidthParameter</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">bit_width_to_remove</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">override_pretrained_bit_width</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">non_zero_epsilon</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1e-06</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">remove_zero_bit_width</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/bit_width/parameter.html#RemoveBitwidthParameter"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.bit_width.parameter.RemoveBitwidthParameter" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.bit_width.parameter.RemoveBitwidthParameter.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/bit_width/parameter.html#RemoveBitwidthParameter.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.bit_width.parameter.RemoveBitwidthParameter.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
diff --git a/docs/api_reference/brevitas.core.function_wrapper.html b/docs/api_reference/brevitas.core.function_wrapper.html
index 458e945a3..26217f590 100644
--- a/docs/api_reference/brevitas.core.function_wrapper.html
+++ b/docs/api_reference/brevitas.core.function_wrapper.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>brevitas.core.function_wrapper package &#8212; Brevitas 0.10.2 documentation</title>
+    <title>brevitas.core.function_wrapper package &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -126,8 +126,8 @@
       
     
     
-    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
@@ -448,7 +448,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.clamp.ClampMin">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.function_wrapper.clamp.</span></span><span class="sig-name descname"><span class="pre">ClampMin</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">min_val</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/clamp.html#ClampMin"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.clamp.ClampMin" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptModule wrapper for <code class="xref py py-func docutils literal notranslate"><span class="pre">clamp_min()</span></code>.</p>
 <p class="rubric">Examples</p>
 <div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">clamp_min</span> <span class="o">=</span> <span class="n">ClampMin</span><span class="p">(</span><span class="n">min_val</span><span class="o">=-</span><span class="mf">2.0</span><span class="p">)</span>
@@ -459,7 +459,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.clamp.ClampMin.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/clamp.html#ClampMin.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.clamp.ClampMin.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -472,11 +472,45 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 
 </dd></dl>
 
+<dl class="py class">
+<dt class="sig sig-object py" id="brevitas.core.function_wrapper.clamp.FloatClamp">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.function_wrapper.clamp.</span></span><span class="sig-name descname"><span class="pre">FloatClamp</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">tensor_clamp_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">signed</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inf_values</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">nan_values</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_available_float</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">saturating</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/clamp.html#FloatClamp"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.clamp.FloatClamp" title="Permalink to this definition">#</a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
+<p>”
+ScriptModule for clamping minifloat formats to their inf/NaN implementations.</p>
+<p>Currently, inf/NaN codes have to be encoded through the mantissa.
+I.e. setting inf to 1101.111 (E4M3) is not a valid code.</p>
+<dl class="py method">
+<dt class="sig sig-object py" id="brevitas.core.function_wrapper.clamp.FloatClamp.forward">
+<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">exponent_bit_width</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mantissa_bit_width</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">exponent_bias</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/clamp.html#FloatClamp.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.clamp.FloatClamp.forward" title="Permalink to this definition">#</a></dt>
+<dd><p>Define the computation performed at every call.</p>
+<p>Should be overridden by all subclasses.</p>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>Although the recipe for forward pass needs to be defined within
+this function, one should call the <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code> instance afterwards
+instead of this since the former takes care of running the
+registered hooks while the latter silently ignores them.</p>
+</div>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="brevitas.core.function_wrapper.clamp.FloatClamp.inf_nan_clamp">
+<span class="sig-name descname"><span class="pre">inf_nan_clamp</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inf_mask</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_max_val_mask</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_max_val_mask</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/clamp.html#FloatClamp.inf_nan_clamp"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.clamp.FloatClamp.inf_nan_clamp" title="Permalink to this definition">#</a></dt>
+<dd></dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="brevitas.core.function_wrapper.clamp.FloatClamp.saturating_clamp">
+<span class="sig-name descname"><span class="pre">saturating_clamp</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_value</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">min_value</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/clamp.html#FloatClamp.saturating_clamp"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.clamp.FloatClamp.saturating_clamp" title="Permalink to this definition">#</a></dt>
+<dd></dd></dl>
+
+</dd></dl>
+
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.clamp.ScalarClamp">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.function_wrapper.clamp.</span></span><span class="sig-name descname"><span class="pre">ScalarClamp</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">min_val</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_val</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/clamp.html#ScalarClamp"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.clamp.ScalarClamp" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
-<p>ScriptModule wrapper for <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.clamp.html#torch.clamp" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-func docutils literal notranslate"><span class="pre">clamp()</span></code></a>.</p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
+<p>ScriptModule wrapper for <code class="xref py py-func docutils literal notranslate"><span class="pre">clamp()</span></code>.</p>
 <p class="rubric">Examples</p>
 <div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">scalar_clamp</span> <span class="o">=</span> <span class="n">ScalarClamp</span><span class="p">(</span><span class="n">min_val</span><span class="o">=-</span><span class="mf">2.0</span><span class="p">,</span> <span class="n">max_val</span><span class="o">=</span><span class="mf">2.0</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">scalar_clamp</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">tensor</span><span class="p">([</span><span class="o">-</span><span class="mf">3.0</span><span class="p">,</span> <span class="mf">3.0</span><span class="p">]))</span>
@@ -486,7 +520,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.clamp.ScalarClamp.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/clamp.html#ScalarClamp.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.clamp.ScalarClamp.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -502,7 +536,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.clamp.TensorClamp">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.function_wrapper.clamp.</span></span><span class="sig-name descname"><span class="pre">TensorClamp</span></span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/clamp.html#TensorClamp"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.clamp.TensorClamp" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptModule wrapper for <a class="reference internal" href="brevitas.function.html#brevitas.function.ops.tensor_clamp" title="brevitas.function.ops.tensor_clamp"><code class="xref py py-func docutils literal notranslate"><span class="pre">tensor_clamp()</span></code></a>.</p>
 <p class="rubric">Examples</p>
 <div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">tensor_clamp</span> <span class="o">=</span> <span class="n">TensorClamp</span><span class="p">()</span>
@@ -515,7 +549,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.clamp.TensorClamp.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">min_val</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_val</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/clamp.html#TensorClamp.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.clamp.TensorClamp.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -535,7 +569,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.misc.Identity">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.function_wrapper.misc.</span></span><span class="sig-name descname"><span class="pre">Identity</span></span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/misc.html#Identity"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.misc.Identity" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>Identity ScriptModule.</p>
 <p class="rubric">Examples</p>
 <div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">identity</span> <span class="o">=</span> <span class="n">Identity</span><span class="p">()</span>
@@ -548,9 +582,9 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.misc.Identity.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/misc.html#Identity.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.misc.Identity.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -565,7 +599,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.misc.InplaceLogTwo">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.function_wrapper.misc.</span></span><span class="sig-name descname"><span class="pre">InplaceLogTwo</span></span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/misc.html#InplaceLogTwo"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.misc.InplaceLogTwo" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>Module wrapper for <code class="xref py py-func docutils literal notranslate"><span class="pre">log2_()</span></code>.</p>
 <p class="rubric">Examples</p>
 <div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">inplace_log_two</span> <span class="o">=</span> <span class="n">InplaceLogTwo</span><span class="p">()</span>
@@ -580,9 +614,9 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.misc.InplaceLogTwo.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/misc.html#InplaceLogTwo.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.misc.InplaceLogTwo.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -597,8 +631,8 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.misc.LogTwo">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.function_wrapper.misc.</span></span><span class="sig-name descname"><span class="pre">LogTwo</span></span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/misc.html#LogTwo"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.misc.LogTwo" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
-<p>ScriptModule wrapper for <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.log2.html#torch.log2" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-func docutils literal notranslate"><span class="pre">log2()</span></code></a>.</p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
+<p>ScriptModule wrapper for <code class="xref py py-func docutils literal notranslate"><span class="pre">log2()</span></code>.</p>
 <p class="rubric">Examples</p>
 <div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">log_two</span> <span class="o">=</span> <span class="n">LogTwo</span><span class="p">()</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">x</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="mf">8.0</span><span class="p">)</span>
@@ -609,9 +643,9 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.misc.LogTwo.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/misc.html#LogTwo.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.misc.LogTwo.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -626,7 +660,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.misc.PowerOfTwo">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.function_wrapper.misc.</span></span><span class="sig-name descname"><span class="pre">PowerOfTwo</span></span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/misc.html#PowerOfTwo"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.misc.PowerOfTwo" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptModule implementation of 2.0 ** x.</p>
 <p class="rubric">Examples</p>
 <div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">power_of_two</span> <span class="o">=</span> <span class="n">PowerOfTwo</span><span class="p">()</span>
@@ -638,9 +672,9 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.misc.PowerOfTwo.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/misc.html#PowerOfTwo.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.misc.PowerOfTwo.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -659,12 +693,12 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.ops_ste.CeilSte">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.function_wrapper.ops_ste.</span></span><span class="sig-name descname"><span class="pre">CeilSte</span></span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/ops_ste.html#CeilSte"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.ops_ste.CeilSte" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptModule wrapper for <a class="reference internal" href="brevitas.function.html#brevitas.function.ops_ste.ceil_ste" title="brevitas.function.ops_ste.ceil_ste"><code class="xref py py-func docutils literal notranslate"><span class="pre">ceil_ste()</span></code></a>.</p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.ops_ste.CeilSte.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/ops_ste.html#CeilSte.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.ops_ste.CeilSte.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -680,12 +714,12 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.ops_ste.DPURoundSte">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.function_wrapper.ops_ste.</span></span><span class="sig-name descname"><span class="pre">DPURoundSte</span></span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/ops_ste.html#DPURoundSte"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.ops_ste.DPURoundSte" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptModule wrapper for <a class="reference internal" href="brevitas.function.html#brevitas.function.ops_ste.dpu_round_ste" title="brevitas.function.ops_ste.dpu_round_ste"><code class="xref py py-func docutils literal notranslate"><span class="pre">dpu_round_ste()</span></code></a>.</p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.ops_ste.DPURoundSte.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/ops_ste.html#DPURoundSte.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.ops_ste.DPURoundSte.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -701,12 +735,12 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.ops_ste.FloorSte">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.function_wrapper.ops_ste.</span></span><span class="sig-name descname"><span class="pre">FloorSte</span></span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/ops_ste.html#FloorSte"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.ops_ste.FloorSte" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptModule wrapper for <a class="reference internal" href="brevitas.function.html#brevitas.function.ops_ste.floor_ste" title="brevitas.function.ops_ste.floor_ste"><code class="xref py py-func docutils literal notranslate"><span class="pre">floor_ste()</span></code></a>.</p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.ops_ste.FloorSte.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/ops_ste.html#FloorSte.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.ops_ste.FloorSte.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -722,12 +756,12 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.ops_ste.InplaceTensorClampSte">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.function_wrapper.ops_ste.</span></span><span class="sig-name descname"><span class="pre">InplaceTensorClampSte</span></span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/ops_ste.html#InplaceTensorClampSte"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.ops_ste.InplaceTensorClampSte" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptModule wrapper for <a class="reference internal" href="brevitas.function.html#brevitas.function.ops_ste.tensor_clamp_ste_" title="brevitas.function.ops_ste.tensor_clamp_ste_"><code class="xref py py-func docutils literal notranslate"><span class="pre">tensor_clamp_ste_()</span></code></a>.</p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.ops_ste.InplaceTensorClampSte.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">min_val</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_val</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/ops_ste.html#InplaceTensorClampSte.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.ops_ste.InplaceTensorClampSte.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -743,12 +777,12 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.ops_ste.RoundSte">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.function_wrapper.ops_ste.</span></span><span class="sig-name descname"><span class="pre">RoundSte</span></span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/ops_ste.html#RoundSte"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.ops_ste.RoundSte" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptModule wrapper for <a class="reference internal" href="brevitas.function.html#brevitas.function.ops_ste.round_ste" title="brevitas.function.ops_ste.round_ste"><code class="xref py py-func docutils literal notranslate"><span class="pre">round_ste()</span></code></a>.</p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.ops_ste.RoundSte.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/ops_ste.html#RoundSte.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.ops_ste.RoundSte.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -764,12 +798,12 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.ops_ste.RoundToZeroSte">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.function_wrapper.ops_ste.</span></span><span class="sig-name descname"><span class="pre">RoundToZeroSte</span></span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/ops_ste.html#RoundToZeroSte"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.ops_ste.RoundToZeroSte" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptModule wrapper for <a class="reference internal" href="brevitas.function.html#brevitas.function.ops_ste.round_to_zero_ste" title="brevitas.function.ops_ste.round_to_zero_ste"><code class="xref py py-func docutils literal notranslate"><span class="pre">round_to_zero_ste()</span></code></a>.</p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.ops_ste.RoundToZeroSte.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/ops_ste.html#RoundToZeroSte.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.ops_ste.RoundToZeroSte.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -785,12 +819,12 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.ops_ste.ScalarClampMinSte">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.function_wrapper.ops_ste.</span></span><span class="sig-name descname"><span class="pre">ScalarClampMinSte</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">min_val</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/ops_ste.html#ScalarClampMinSte"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.ops_ste.ScalarClampMinSte" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptModule wrapper for <a class="reference internal" href="brevitas.function.html#brevitas.function.ops_ste.scalar_clamp_min_ste" title="brevitas.function.ops_ste.scalar_clamp_min_ste"><code class="xref py py-func docutils literal notranslate"><span class="pre">scalar_clamp_min_ste()</span></code></a>.</p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.ops_ste.ScalarClampMinSte.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/ops_ste.html#ScalarClampMinSte.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.ops_ste.ScalarClampMinSte.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -806,12 +840,12 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.ops_ste.TensorClampSte">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.function_wrapper.ops_ste.</span></span><span class="sig-name descname"><span class="pre">TensorClampSte</span></span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/ops_ste.html#TensorClampSte"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.ops_ste.TensorClampSte" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptModule wrapper for <a class="reference internal" href="brevitas.function.html#brevitas.function.ops_ste.tensor_clamp_ste" title="brevitas.function.ops_ste.tensor_clamp_ste"><code class="xref py py-func docutils literal notranslate"><span class="pre">tensor_clamp_ste()</span></code></a>.</p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.ops_ste.TensorClampSte.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">min_val</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_val</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/ops_ste.html#TensorClampSte.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.ops_ste.TensorClampSte.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -828,10 +862,30 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <section id="module-brevitas.core.function_wrapper.shape">
 <span id="brevitas-core-function-wrapper-shape-module"></span><h2>brevitas.core.function_wrapper.shape module<a class="headerlink" href="#module-brevitas.core.function_wrapper.shape" title="Permalink to this heading">#</a></h2>
 <p>ScriptModule classes to compute the view of a tensor according to various different criteria.</p>
+<dl class="py class">
+<dt class="sig sig-object py" id="brevitas.core.function_wrapper.shape.DynamicOverSubChannelBlockView">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.function_wrapper.shape.</span></span><span class="sig-name descname"><span class="pre">DynamicOverSubChannelBlockView</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">group_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">group_dim</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/shape.html#DynamicOverSubChannelBlockView"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.shape.DynamicOverSubChannelBlockView" title="Permalink to this definition">#</a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
+<dl class="py method">
+<dt class="sig sig-object py" id="brevitas.core.function_wrapper.shape.DynamicOverSubChannelBlockView.forward">
+<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/shape.html#DynamicOverSubChannelBlockView.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.shape.DynamicOverSubChannelBlockView.forward" title="Permalink to this definition">#</a></dt>
+<dd><p>Define the computation performed at every call.</p>
+<p>Should be overridden by all subclasses.</p>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>Although the recipe for forward pass needs to be defined within
+this function, one should call the <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code> instance afterwards
+instead of this since the former takes care of running the
+registered hooks while the latter silently ignores them.</p>
+</div>
+</dd></dl>
+
+</dd></dl>
+
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.shape.OverBatchOverOutputChannelView">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.function_wrapper.shape.</span></span><span class="sig-name descname"><span class="pre">OverBatchOverOutputChannelView</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">permute_dims</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/shape.html#OverBatchOverOutputChannelView"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.shape.OverBatchOverOutputChannelView" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptModule to compute the <a class="reference internal" href="brevitas.function.html#brevitas.function.shape.over_batch_over_output_channels" title="brevitas.function.shape.over_batch_over_output_channels"><code class="xref py py-func docutils literal notranslate"><span class="pre">over_batch_over_output_channels()</span></code></a>
 view of an input tensor.</p>
 <p class="rubric">Examples</p>
@@ -844,7 +898,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.shape.OverBatchOverOutputChannelView.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/shape.html#OverBatchOverOutputChannelView.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.shape.OverBatchOverOutputChannelView.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -860,7 +914,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.shape.OverBatchOverTensorView">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.function_wrapper.shape.</span></span><span class="sig-name descname"><span class="pre">OverBatchOverTensorView</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">permute_dims</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/shape.html#OverBatchOverTensorView"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.shape.OverBatchOverTensorView" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptMoodule to compute the <a class="reference internal" href="brevitas.function.html#brevitas.function.shape.over_batch_over_tensor" title="brevitas.function.shape.over_batch_over_tensor"><code class="xref py py-func docutils literal notranslate"><span class="pre">over_batch_over_tensor()</span></code></a> view of an
 input tensor.</p>
 <p class="rubric">Examples</p>
@@ -873,7 +927,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.shape.OverBatchOverTensorView.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/shape.html#OverBatchOverTensorView.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.shape.OverBatchOverTensorView.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -889,7 +943,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.shape.OverOutputChannelView">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.function_wrapper.shape.</span></span><span class="sig-name descname"><span class="pre">OverOutputChannelView</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">permute_dims</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/shape.html#OverOutputChannelView"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.shape.OverOutputChannelView" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptMoodule to compute the <a class="reference internal" href="brevitas.function.html#brevitas.function.shape.over_output_channels" title="brevitas.function.shape.over_output_channels"><code class="xref py py-func docutils literal notranslate"><span class="pre">over_output_channels()</span></code></a> view of an
 input tensor.</p>
 <p class="rubric">Examples</p>
@@ -902,7 +956,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.shape.OverOutputChannelView.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/shape.html#OverOutputChannelView.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.shape.OverOutputChannelView.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -918,7 +972,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.shape.OverOutputFeaturesView">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.function_wrapper.shape.</span></span><span class="sig-name descname"><span class="pre">OverOutputFeaturesView</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">permute_dims</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/shape.html#OverOutputFeaturesView"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.shape.OverOutputFeaturesView" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptModule to compute the <a class="reference internal" href="brevitas.function.html#brevitas.function.shape.over_output_features" title="brevitas.function.shape.over_output_features"><code class="xref py py-func docutils literal notranslate"><span class="pre">over_output_features()</span></code></a>
 view of an input tensor.</p>
 <p class="rubric">Examples</p>
@@ -931,7 +985,27 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.shape.OverOutputFeaturesView.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/shape.html#OverOutputFeaturesView.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.shape.OverOutputFeaturesView.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
+<p>Should be overridden by all subclasses.</p>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>Although the recipe for forward pass needs to be defined within
+this function, one should call the <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code> instance afterwards
+instead of this since the former takes care of running the
+registered hooks while the latter silently ignores them.</p>
+</div>
+</dd></dl>
+
+</dd></dl>
+
+<dl class="py class">
+<dt class="sig sig-object py" id="brevitas.core.function_wrapper.shape.OverSubChannelBlockView">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.function_wrapper.shape.</span></span><span class="sig-name descname"><span class="pre">OverSubChannelBlockView</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">expanded_groupwise_shape</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">group_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">group_dim</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/shape.html#OverSubChannelBlockView"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.shape.OverSubChannelBlockView" title="Permalink to this definition">#</a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
+<dl class="py method">
+<dt class="sig sig-object py" id="brevitas.core.function_wrapper.shape.OverSubChannelBlockView.forward">
+<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/shape.html#OverSubChannelBlockView.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.shape.OverSubChannelBlockView.forward" title="Permalink to this definition">#</a></dt>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -947,7 +1021,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.shape.OverTensorView">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.function_wrapper.shape.</span></span><span class="sig-name descname"><span class="pre">OverTensorView</span></span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/shape.html#OverTensorView"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.shape.OverTensorView" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptMoodule to compute the <a class="reference internal" href="brevitas.function.html#brevitas.function.shape.over_tensor" title="brevitas.function.shape.over_tensor"><code class="xref py py-func docutils literal notranslate"><span class="pre">over_tensor()</span></code></a> view of an input tensor.</p>
 <p class="rubric">Examples</p>
 <div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">view_module</span> <span class="o">=</span> <span class="n">OverTensorView</span><span class="p">()</span>
@@ -959,7 +1033,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.shape.OverTensorView.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/shape.html#OverTensorView.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.shape.OverTensorView.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -975,11 +1049,11 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.shape.PermuteDims">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.function_wrapper.shape.</span></span><span class="sig-name descname"><span class="pre">PermuteDims</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">permute_dims</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/shape.html#PermuteDims"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.shape.PermuteDims" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.shape.PermuteDims.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/shape.html#PermuteDims.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.shape.PermuteDims.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -995,9 +1069,15 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.function_wrapper.shape.</span></span><span class="sig-name descname"><span class="pre">StatsInputViewShapeImpl</span></span><a class="reference internal" href="../_modules/brevitas/core/function_wrapper/shape.html#StatsInputViewShapeImpl"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.12)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p>
+<dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.13)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p>
 <p>Enum-like object to collect pointers to variants of ScriptModules that perform a view on a tensor.
 All adhere to the same interface.</p>
+<dl class="py attribute">
+<dt class="sig sig-object py" id="brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.DYNAMIC_OVER_SUBCHANNEL_BLOCK">
+<span class="sig-name descname"><span class="pre">DYNAMIC_OVER_SUBCHANNEL_BLOCK</span></span><a class="headerlink" href="#brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.DYNAMIC_OVER_SUBCHANNEL_BLOCK" title="Permalink to this definition">#</a></dt>
+<dd><p>alias of <a class="reference internal" href="#brevitas.core.function_wrapper.shape.DynamicOverSubChannelBlockView" title="brevitas.core.function_wrapper.shape.DynamicOverSubChannelBlockView"><code class="xref py py-class docutils literal notranslate"><span class="pre">DynamicOverSubChannelBlockView</span></code></a></p>
+</dd></dl>
+
 <dl class="py attribute">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_BATCH_OVER_OUTPUT_CHANNELS">
 <span class="sig-name descname"><span class="pre">OVER_BATCH_OVER_OUTPUT_CHANNELS</span></span><a class="headerlink" href="#brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_BATCH_OVER_OUTPUT_CHANNELS" title="Permalink to this definition">#</a></dt>
@@ -1022,6 +1102,12 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dd><p>alias of <a class="reference internal" href="#brevitas.core.function_wrapper.shape.OverOutputFeaturesView" title="brevitas.core.function_wrapper.shape.OverOutputFeaturesView"><code class="xref py py-class docutils literal notranslate"><span class="pre">OverOutputFeaturesView</span></code></a></p>
 </dd></dl>
 
+<dl class="py attribute">
+<dt class="sig sig-object py" id="brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_SUBCHANNEL_BLOCK">
+<span class="sig-name descname"><span class="pre">OVER_SUBCHANNEL_BLOCK</span></span><a class="headerlink" href="#brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_SUBCHANNEL_BLOCK" title="Permalink to this definition">#</a></dt>
+<dd><p>alias of <a class="reference internal" href="#brevitas.core.function_wrapper.shape.OverSubChannelBlockView" title="brevitas.core.function_wrapper.shape.OverSubChannelBlockView"><code class="xref py py-class docutils literal notranslate"><span class="pre">OverSubChannelBlockView</span></code></a></p>
+</dd></dl>
+
 <dl class="py attribute">
 <dt class="sig sig-object py" id="brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_TENSOR">
 <span class="sig-name descname"><span class="pre">OVER_TENSOR</span></span><a class="headerlink" href="#brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_TENSOR" title="Permalink to this definition">#</a></dt>
@@ -1085,6 +1171,12 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.function_wrapper.clamp.ClampMin.forward"><code class="docutils literal notranslate"><span class="pre">ClampMin.forward()</span></code></a></li>
 </ul>
 </li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.function_wrapper.clamp.FloatClamp"><code class="docutils literal notranslate"><span class="pre">FloatClamp</span></code></a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.function_wrapper.clamp.FloatClamp.forward"><code class="docutils literal notranslate"><span class="pre">FloatClamp.forward()</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.function_wrapper.clamp.FloatClamp.inf_nan_clamp"><code class="docutils literal notranslate"><span class="pre">FloatClamp.inf_nan_clamp()</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.function_wrapper.clamp.FloatClamp.saturating_clamp"><code class="docutils literal notranslate"><span class="pre">FloatClamp.saturating_clamp()</span></code></a></li>
+</ul>
+</li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.function_wrapper.clamp.ScalarClamp"><code class="docutils literal notranslate"><span class="pre">ScalarClamp</span></code></a><ul class="nav section-nav flex-column">
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.function_wrapper.clamp.ScalarClamp.forward"><code class="docutils literal notranslate"><span class="pre">ScalarClamp.forward()</span></code></a></li>
 </ul>
@@ -1150,6 +1242,10 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 </ul>
 </li>
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#module-brevitas.core.function_wrapper.shape">brevitas.core.function_wrapper.shape module</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.function_wrapper.shape.DynamicOverSubChannelBlockView"><code class="docutils literal notranslate"><span class="pre">DynamicOverSubChannelBlockView</span></code></a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.function_wrapper.shape.DynamicOverSubChannelBlockView.forward"><code class="docutils literal notranslate"><span class="pre">DynamicOverSubChannelBlockView.forward()</span></code></a></li>
+</ul>
+</li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.function_wrapper.shape.OverBatchOverOutputChannelView"><code class="docutils literal notranslate"><span class="pre">OverBatchOverOutputChannelView</span></code></a><ul class="nav section-nav flex-column">
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.function_wrapper.shape.OverBatchOverOutputChannelView.forward"><code class="docutils literal notranslate"><span class="pre">OverBatchOverOutputChannelView.forward()</span></code></a></li>
 </ul>
@@ -1166,6 +1262,10 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.function_wrapper.shape.OverOutputFeaturesView.forward"><code class="docutils literal notranslate"><span class="pre">OverOutputFeaturesView.forward()</span></code></a></li>
 </ul>
 </li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.function_wrapper.shape.OverSubChannelBlockView"><code class="docutils literal notranslate"><span class="pre">OverSubChannelBlockView</span></code></a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.function_wrapper.shape.OverSubChannelBlockView.forward"><code class="docutils literal notranslate"><span class="pre">OverSubChannelBlockView.forward()</span></code></a></li>
+</ul>
+</li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.function_wrapper.shape.OverTensorView"><code class="docutils literal notranslate"><span class="pre">OverTensorView</span></code></a><ul class="nav section-nav flex-column">
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.function_wrapper.shape.OverTensorView.forward"><code class="docutils literal notranslate"><span class="pre">OverTensorView.forward()</span></code></a></li>
 </ul>
@@ -1175,10 +1275,12 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 </ul>
 </li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl"><code class="docutils literal notranslate"><span class="pre">StatsInputViewShapeImpl</span></code></a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.DYNAMIC_OVER_SUBCHANNEL_BLOCK"><code class="docutils literal notranslate"><span class="pre">StatsInputViewShapeImpl.DYNAMIC_OVER_SUBCHANNEL_BLOCK</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_BATCH_OVER_OUTPUT_CHANNELS"><code class="docutils literal notranslate"><span class="pre">StatsInputViewShapeImpl.OVER_BATCH_OVER_OUTPUT_CHANNELS</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_BATCH_OVER_TENSOR"><code class="docutils literal notranslate"><span class="pre">StatsInputViewShapeImpl.OVER_BATCH_OVER_TENSOR</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_OUTPUT_CHANNELS"><code class="docutils literal notranslate"><span class="pre">StatsInputViewShapeImpl.OVER_OUTPUT_CHANNELS</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_OUTPUT_FEATURES"><code class="docutils literal notranslate"><span class="pre">StatsInputViewShapeImpl.OVER_OUTPUT_FEATURES</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_SUBCHANNEL_BLOCK"><code class="docutils literal notranslate"><span class="pre">StatsInputViewShapeImpl.OVER_SUBCHANNEL_BLOCK</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_TENSOR"><code class="docutils literal notranslate"><span class="pre">StatsInputViewShapeImpl.OVER_TENSOR</span></code></a></li>
 </ul>
 </li>
diff --git a/docs/api_reference/brevitas.core.html b/docs/api_reference/brevitas.core.html
index a95aa4cd4..7302ead4b 100644
--- a/docs/api_reference/brevitas.core.html
+++ b/docs/api_reference/brevitas.core.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>brevitas.core package &#8212; Brevitas 0.10.2 documentation</title>
+    <title>brevitas.core package &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -126,8 +126,8 @@
       
     
     
-    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
@@ -478,6 +478,12 @@ <h2>Subpackages<a class="headerlink" href="#subpackages" title="Permalink to thi
 <li class="toctree-l4"><a class="reference internal" href="brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.clamp.ClampMin.forward"><code class="docutils literal notranslate"><span class="pre">ClampMin.forward()</span></code></a></li>
 </ul>
 </li>
+<li class="toctree-l3"><a class="reference internal" href="brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.clamp.FloatClamp"><code class="docutils literal notranslate"><span class="pre">FloatClamp</span></code></a><ul>
+<li class="toctree-l4"><a class="reference internal" href="brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.clamp.FloatClamp.forward"><code class="docutils literal notranslate"><span class="pre">FloatClamp.forward()</span></code></a></li>
+<li class="toctree-l4"><a class="reference internal" href="brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.clamp.FloatClamp.inf_nan_clamp"><code class="docutils literal notranslate"><span class="pre">FloatClamp.inf_nan_clamp()</span></code></a></li>
+<li class="toctree-l4"><a class="reference internal" href="brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.clamp.FloatClamp.saturating_clamp"><code class="docutils literal notranslate"><span class="pre">FloatClamp.saturating_clamp()</span></code></a></li>
+</ul>
+</li>
 <li class="toctree-l3"><a class="reference internal" href="brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.clamp.ScalarClamp"><code class="docutils literal notranslate"><span class="pre">ScalarClamp</span></code></a><ul>
 <li class="toctree-l4"><a class="reference internal" href="brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.clamp.ScalarClamp.forward"><code class="docutils literal notranslate"><span class="pre">ScalarClamp.forward()</span></code></a></li>
 </ul>
@@ -543,6 +549,10 @@ <h2>Subpackages<a class="headerlink" href="#subpackages" title="Permalink to thi
 </ul>
 </li>
 <li class="toctree-l2"><a class="reference internal" href="brevitas.core.function_wrapper.html#module-brevitas.core.function_wrapper.shape">brevitas.core.function_wrapper.shape module</a><ul>
+<li class="toctree-l3"><a class="reference internal" href="brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.DynamicOverSubChannelBlockView"><code class="docutils literal notranslate"><span class="pre">DynamicOverSubChannelBlockView</span></code></a><ul>
+<li class="toctree-l4"><a class="reference internal" href="brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.DynamicOverSubChannelBlockView.forward"><code class="docutils literal notranslate"><span class="pre">DynamicOverSubChannelBlockView.forward()</span></code></a></li>
+</ul>
+</li>
 <li class="toctree-l3"><a class="reference internal" href="brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.OverBatchOverOutputChannelView"><code class="docutils literal notranslate"><span class="pre">OverBatchOverOutputChannelView</span></code></a><ul>
 <li class="toctree-l4"><a class="reference internal" href="brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.OverBatchOverOutputChannelView.forward"><code class="docutils literal notranslate"><span class="pre">OverBatchOverOutputChannelView.forward()</span></code></a></li>
 </ul>
@@ -559,6 +569,10 @@ <h2>Subpackages<a class="headerlink" href="#subpackages" title="Permalink to thi
 <li class="toctree-l4"><a class="reference internal" href="brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.OverOutputFeaturesView.forward"><code class="docutils literal notranslate"><span class="pre">OverOutputFeaturesView.forward()</span></code></a></li>
 </ul>
 </li>
+<li class="toctree-l3"><a class="reference internal" href="brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.OverSubChannelBlockView"><code class="docutils literal notranslate"><span class="pre">OverSubChannelBlockView</span></code></a><ul>
+<li class="toctree-l4"><a class="reference internal" href="brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.OverSubChannelBlockView.forward"><code class="docutils literal notranslate"><span class="pre">OverSubChannelBlockView.forward()</span></code></a></li>
+</ul>
+</li>
 <li class="toctree-l3"><a class="reference internal" href="brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.OverTensorView"><code class="docutils literal notranslate"><span class="pre">OverTensorView</span></code></a><ul>
 <li class="toctree-l4"><a class="reference internal" href="brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.OverTensorView.forward"><code class="docutils literal notranslate"><span class="pre">OverTensorView.forward()</span></code></a></li>
 </ul>
@@ -568,10 +582,12 @@ <h2>Subpackages<a class="headerlink" href="#subpackages" title="Permalink to thi
 </ul>
 </li>
 <li class="toctree-l3"><a class="reference internal" href="brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl"><code class="docutils literal notranslate"><span class="pre">StatsInputViewShapeImpl</span></code></a><ul>
+<li class="toctree-l4"><a class="reference internal" href="brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.DYNAMIC_OVER_SUBCHANNEL_BLOCK"><code class="docutils literal notranslate"><span class="pre">StatsInputViewShapeImpl.DYNAMIC_OVER_SUBCHANNEL_BLOCK</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_BATCH_OVER_OUTPUT_CHANNELS"><code class="docutils literal notranslate"><span class="pre">StatsInputViewShapeImpl.OVER_BATCH_OVER_OUTPUT_CHANNELS</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_BATCH_OVER_TENSOR"><code class="docutils literal notranslate"><span class="pre">StatsInputViewShapeImpl.OVER_BATCH_OVER_TENSOR</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_OUTPUT_CHANNELS"><code class="docutils literal notranslate"><span class="pre">StatsInputViewShapeImpl.OVER_OUTPUT_CHANNELS</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_OUTPUT_FEATURES"><code class="docutils literal notranslate"><span class="pre">StatsInputViewShapeImpl.OVER_OUTPUT_FEATURES</span></code></a></li>
+<li class="toctree-l4"><a class="reference internal" href="brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_SUBCHANNEL_BLOCK"><code class="docutils literal notranslate"><span class="pre">StatsInputViewShapeImpl.OVER_SUBCHANNEL_BLOCK</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_TENSOR"><code class="docutils literal notranslate"><span class="pre">StatsInputViewShapeImpl.OVER_TENSOR</span></code></a></li>
 </ul>
 </li>
@@ -668,6 +684,10 @@ <h2>Subpackages<a class="headerlink" href="#subpackages" title="Permalink to thi
 </ul>
 </li>
 <li class="toctree-l2"><a class="reference internal" href="brevitas.core.scaling.html#module-brevitas.core.scaling.runtime">brevitas.core.scaling.runtime module</a><ul>
+<li class="toctree-l3"><a class="reference internal" href="brevitas.core.scaling.html#brevitas.core.scaling.runtime.RuntimeDynamicGroupStatsScaling"><code class="docutils literal notranslate"><span class="pre">RuntimeDynamicGroupStatsScaling</span></code></a><ul>
+<li class="toctree-l4"><a class="reference internal" href="brevitas.core.scaling.html#brevitas.core.scaling.runtime.RuntimeDynamicGroupStatsScaling.forward"><code class="docutils literal notranslate"><span class="pre">RuntimeDynamicGroupStatsScaling.forward()</span></code></a></li>
+</ul>
+</li>
 <li class="toctree-l3"><a class="reference internal" href="brevitas.core.scaling.html#brevitas.core.scaling.runtime.RuntimeStatsScaling"><code class="docutils literal notranslate"><span class="pre">RuntimeStatsScaling</span></code></a><ul>
 <li class="toctree-l4"><a class="reference internal" href="brevitas.core.scaling.html#brevitas.core.scaling.runtime.RuntimeStatsScaling.forward"><code class="docutils literal notranslate"><span class="pre">RuntimeStatsScaling.forward()</span></code></a></li>
 </ul>
@@ -730,6 +750,18 @@ <h2>Subpackages<a class="headerlink" href="#subpackages" title="Permalink to thi
 <li class="toctree-l4"><a class="reference internal" href="brevitas.core.stats.html#brevitas.core.stats.stats_op.AbsPercentile.forward"><code class="docutils literal notranslate"><span class="pre">AbsPercentile.forward()</span></code></a></li>
 </ul>
 </li>
+<li class="toctree-l3"><a class="reference internal" href="brevitas.core.stats.html#brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale"><code class="docutils literal notranslate"><span class="pre">HalfQuadraticOptimizerScale</span></code></a><ul>
+<li class="toctree-l4"><a class="reference internal" href="brevitas.core.stats.html#brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale.forward"><code class="docutils literal notranslate"><span class="pre">HalfQuadraticOptimizerScale.forward()</span></code></a></li>
+<li class="toctree-l4"><a class="reference internal" href="brevitas.core.stats.html#brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale.optimize"><code class="docutils literal notranslate"><span class="pre">HalfQuadraticOptimizerScale.optimize()</span></code></a></li>
+<li class="toctree-l4"><a class="reference internal" href="brevitas.core.stats.html#brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale.parameter_search"><code class="docutils literal notranslate"><span class="pre">HalfQuadraticOptimizerScale.parameter_search()</span></code></a></li>
+</ul>
+</li>
+<li class="toctree-l3"><a class="reference internal" href="brevitas.core.stats.html#brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint"><code class="docutils literal notranslate"><span class="pre">HalfQuadraticOptimizerZeroPoint</span></code></a><ul>
+<li class="toctree-l4"><a class="reference internal" href="brevitas.core.stats.html#brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint.forward"><code class="docutils literal notranslate"><span class="pre">HalfQuadraticOptimizerZeroPoint.forward()</span></code></a></li>
+<li class="toctree-l4"><a class="reference internal" href="brevitas.core.stats.html#brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint.optimize"><code class="docutils literal notranslate"><span class="pre">HalfQuadraticOptimizerZeroPoint.optimize()</span></code></a></li>
+<li class="toctree-l4"><a class="reference internal" href="brevitas.core.stats.html#brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint.parameter_search"><code class="docutils literal notranslate"><span class="pre">HalfQuadraticOptimizerZeroPoint.parameter_search()</span></code></a></li>
+</ul>
+</li>
 <li class="toctree-l3"><a class="reference internal" href="brevitas.core.stats.html#brevitas.core.stats.stats_op.KLMinimizerThreshold"><code class="docutils literal notranslate"><span class="pre">KLMinimizerThreshold</span></code></a><ul>
 <li class="toctree-l4"><a class="reference internal" href="brevitas.core.stats.html#brevitas.core.stats.stats_op.KLMinimizerThreshold.forward"><code class="docutils literal notranslate"><span class="pre">KLMinimizerThreshold.forward()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="brevitas.core.stats.html#brevitas.core.stats.stats_op.KLMinimizerThreshold.smooth_normalize_distribution"><code class="docutils literal notranslate"><span class="pre">KLMinimizerThreshold.smooth_normalize_distribution()</span></code></a></li>
@@ -772,6 +804,8 @@ <h2>Subpackages<a class="headerlink" href="#subpackages" title="Permalink to thi
 <li class="toctree-l4"><a class="reference internal" href="brevitas.core.stats.html#brevitas.core.stats.stats_op.PercentileInterval.forward"><code class="docutils literal notranslate"><span class="pre">PercentileInterval.forward()</span></code></a></li>
 </ul>
 </li>
+<li class="toctree-l3"><a class="reference internal" href="brevitas.core.stats.html#brevitas.core.stats.stats_op.masked_median"><code class="docutils literal notranslate"><span class="pre">masked_median()</span></code></a></li>
+<li class="toctree-l3"><a class="reference internal" href="brevitas.core.stats.html#brevitas.core.stats.stats_op.shrink_lp_op"><code class="docutils literal notranslate"><span class="pre">shrink_lp_op()</span></code></a></li>
 </ul>
 </li>
 <li class="toctree-l2"><a class="reference internal" href="brevitas.core.stats.html#module-brevitas.core.stats.stats_wrapper">brevitas.core.stats.stats_wrapper module</a></li>
@@ -790,13 +824,23 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.restrict_val.FloatRestrictValue">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.restrict_val.</span></span><span class="sig-name descname"><span class="pre">FloatRestrictValue</span></span><a class="reference internal" href="../_modules/brevitas/core/restrict_val.html#FloatRestrictValue"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.restrict_val.FloatRestrictValue" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
+<dl class="py method">
+<dt class="sig sig-object py" id="brevitas.core.restrict_val.FloatRestrictValue.combine_scale_threshold">
+<span class="sig-name descname"><span class="pre">combine_scale_threshold</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">threshold</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/restrict_val.html#FloatRestrictValue.combine_scale_threshold"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.restrict_val.FloatRestrictValue.combine_scale_threshold" title="Permalink to this definition">#</a></dt>
+<dd><dl class="field-list simple">
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
+</dd>
+</dl>
+</dd></dl>
+
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.restrict_val.FloatRestrictValue.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/restrict_val.html#FloatRestrictValue.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.restrict_val.FloatRestrictValue.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -811,7 +855,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <span class="sig-name descname"><span class="pre">restrict_init_float</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/restrict_val.html#FloatRestrictValue.restrict_init_float"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.restrict_val.FloatRestrictValue.restrict_init_float" title="Permalink to this definition">#</a></dt>
 <dd><dl class="field-list simple">
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.12)"><code class="xref py py-class docutils literal notranslate"><span class="pre">float</span></code></a></p>
+<dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.13)"><code class="xref py py-class docutils literal notranslate"><span class="pre">float</span></code></a></p>
 </dd>
 </dl>
 </dd></dl>
@@ -831,7 +875,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <span class="sig-name descname"><span class="pre">restrict_init_tensor</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/restrict_val.html#FloatRestrictValue.restrict_init_tensor"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.restrict_val.FloatRestrictValue.restrict_init_tensor" title="Permalink to this definition">#</a></dt>
 <dd><dl class="field-list simple">
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+<dd class="field-odd"><p><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 </dd>
 </dl>
 </dd></dl>
@@ -841,11 +885,21 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.restrict_val.IntRestrictValue">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.restrict_val.</span></span><span class="sig-name descname"><span class="pre">IntRestrictValue</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">restrict_value_float_to_int_impl</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">RoundSte()</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/restrict_val.html#IntRestrictValue"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.restrict_val.IntRestrictValue" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
+<dl class="py method">
+<dt class="sig sig-object py" id="brevitas.core.restrict_val.IntRestrictValue.combine_scale_threshold">
+<span class="sig-name descname"><span class="pre">combine_scale_threshold</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">threshold</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/restrict_val.html#IntRestrictValue.combine_scale_threshold"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.restrict_val.IntRestrictValue.combine_scale_threshold" title="Permalink to this definition">#</a></dt>
+<dd><dl class="field-list simple">
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
+</dd>
+</dl>
+</dd></dl>
+
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.restrict_val.IntRestrictValue.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/restrict_val.html#IntRestrictValue.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.restrict_val.IntRestrictValue.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -881,11 +935,21 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.restrict_val.LogFloatRestrictValue">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.restrict_val.</span></span><span class="sig-name descname"><span class="pre">LogFloatRestrictValue</span></span><a class="reference internal" href="../_modules/brevitas/core/restrict_val.html#LogFloatRestrictValue"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.restrict_val.LogFloatRestrictValue" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
+<dl class="py method">
+<dt class="sig sig-object py" id="brevitas.core.restrict_val.LogFloatRestrictValue.combine_scale_threshold">
+<span class="sig-name descname"><span class="pre">combine_scale_threshold</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">threshold</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/restrict_val.html#LogFloatRestrictValue.combine_scale_threshold"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.restrict_val.LogFloatRestrictValue.combine_scale_threshold" title="Permalink to this definition">#</a></dt>
+<dd><dl class="field-list simple">
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
+</dd>
+</dl>
+</dd></dl>
+
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.restrict_val.LogFloatRestrictValue.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/restrict_val.html#LogFloatRestrictValue.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.restrict_val.LogFloatRestrictValue.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -921,11 +985,21 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.restrict_val.PowerOfTwoRestrictValue">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.restrict_val.</span></span><span class="sig-name descname"><span class="pre">PowerOfTwoRestrictValue</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">restrict_value_float_to_int_impl</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">RoundSte()</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/restrict_val.html#PowerOfTwoRestrictValue"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.restrict_val.PowerOfTwoRestrictValue" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
+<dl class="py method">
+<dt class="sig sig-object py" id="brevitas.core.restrict_val.PowerOfTwoRestrictValue.combine_scale_threshold">
+<span class="sig-name descname"><span class="pre">combine_scale_threshold</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">threshold</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/restrict_val.html#PowerOfTwoRestrictValue.combine_scale_threshold"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.restrict_val.PowerOfTwoRestrictValue.combine_scale_threshold" title="Permalink to this definition">#</a></dt>
+<dd><dl class="field-list simple">
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
+</dd>
+</dl>
+</dd></dl>
+
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.restrict_val.PowerOfTwoRestrictValue.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/restrict_val.html#PowerOfTwoRestrictValue.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.restrict_val.PowerOfTwoRestrictValue.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -964,11 +1038,11 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.utils.ParameterWrapper">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.utils.</span></span><span class="sig-name descname"><span class="pre">ParameterWrapper</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">value</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/utils.html#ParameterWrapper"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.utils.ParameterWrapper" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.utils.ParameterWrapper.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/utils.html#ParameterWrapper.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.utils.ParameterWrapper.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -984,11 +1058,11 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.utils.SingleArgStatelessBuffer">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.utils.</span></span><span class="sig-name descname"><span class="pre">SingleArgStatelessBuffer</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">value</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/utils.html#SingleArgStatelessBuffer"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.utils.SingleArgStatelessBuffer" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.utils.SingleArgStatelessBuffer.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">placeholder</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/utils.html#SingleArgStatelessBuffer.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.utils.SingleArgStatelessBuffer.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -1004,13 +1078,13 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.utils.SliceTensor">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.utils.</span></span><span class="sig-name descname"><span class="pre">SliceTensor</span></span><a class="reference internal" href="../_modules/brevitas/core/utils.html#SliceTensor"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.utils.SliceTensor" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.utils.SliceTensor.eager_forward">
 <span class="sig-name descname"><span class="pre">eager_forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/utils.html#SliceTensor.eager_forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.utils.SliceTensor.eager_forward" title="Permalink to this definition">#</a></dt>
 <dd><dl class="field-list simple">
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+<dd class="field-odd"><p><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 </dd>
 </dl>
 </dd></dl>
@@ -1018,9 +1092,9 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.utils.SliceTensor.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/utils.html#SliceTensor.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.utils.SliceTensor.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -1035,11 +1109,11 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.utils.StatelessBuffer">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.utils.</span></span><span class="sig-name descname"><span class="pre">StatelessBuffer</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">value</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/utils.html#StatelessBuffer"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.utils.StatelessBuffer" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.utils.StatelessBuffer.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/utils.html#StatelessBuffer.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.utils.StatelessBuffer.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -1053,7 +1127,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.utils.StatelessBuffer.state_dict">
 <span class="sig-name descname"><span class="pre">state_dict</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">destination</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prefix</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">''</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">keep_vars</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/utils.html#StatelessBuffer.state_dict"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.utils.StatelessBuffer.state_dict" title="Permalink to this definition">#</a></dt>
-<dd><p>Returns a dictionary containing references to the whole state of the module.</p>
+<dd><p>Return a dictionary containing references to the whole state of the module.</p>
 <p>Both parameters and persistent buffers (e.g. running averages) are
 included. Keys are corresponding parameter and buffer names.
 Parameters and buffers set to <code class="docutils literal notranslate"><span class="pre">None</span></code> are not included.</p>
@@ -1077,13 +1151,13 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
-<li><p><strong>destination</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>optional</em>) – If provided, the state of module will
+<li><p><strong>destination</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>optional</em>) – If provided, the state of module will
 be updated into the dict and the same object is returned.
 Otherwise, an <code class="docutils literal notranslate"><span class="pre">OrderedDict</span></code> will be created and returned.
 Default: <code class="docutils literal notranslate"><span class="pre">None</span></code>.</p></li>
-<li><p><strong>prefix</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>optional</em>) – a prefix added to parameter and buffer
+<li><p><strong>prefix</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>optional</em>) – a prefix added to parameter and buffer
 names to compose the keys in state_dict. Default: <code class="docutils literal notranslate"><span class="pre">''</span></code>.</p></li>
-<li><p><strong>keep_vars</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>optional</em>) – by default the <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a> s
+<li><p><strong>keep_vars</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>optional</em>) – by default the <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code> s
 returned in the state dict are detached from autograd. If it’s
 set to <code class="docutils literal notranslate"><span class="pre">True</span></code>, detaching will not be performed.
 Default: <code class="docutils literal notranslate"><span class="pre">False</span></code>.</p></li>
@@ -1093,7 +1167,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dd class="field-even"><p>a dictionary containing a whole state of the module</p>
 </dd>
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)">dict</a></p>
+<dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)">dict</a></p>
 </dd>
 </dl>
 <p>Example:</p>
@@ -1111,7 +1185,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <span class="sig-prename descclassname"><span class="pre">brevitas.core.utils.</span></span><span class="sig-name descname"><span class="pre">inplace_momentum_update</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">tensor</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">update</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">momentum</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">counter</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">new_counter</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/utils.html#inplace_momentum_update"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.utils.inplace_momentum_update" title="Permalink to this definition">#</a></dt>
 <dd><dl class="field-list simple">
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+<dd class="field-odd"><p><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 </dd>
 </dl>
 </dd></dl>
@@ -1121,7 +1195,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <span class="sig-prename descclassname"><span class="pre">brevitas.core.utils.</span></span><span class="sig-name descname"><span class="pre">inplace_tensor_add</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">tensor</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">value</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/utils.html#inplace_tensor_add"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.utils.inplace_tensor_add" title="Permalink to this definition">#</a></dt>
 <dd><dl class="field-list simple">
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+<dd class="field-odd"><p><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 </dd>
 </dl>
 </dd></dl>
@@ -1131,7 +1205,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <span class="sig-prename descclassname"><span class="pre">brevitas.core.utils.</span></span><span class="sig-name descname"><span class="pre">inplace_tensor_mul</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">tensor</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">value</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/utils.html#inplace_tensor_mul"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.utils.inplace_tensor_mul" title="Permalink to this definition">#</a></dt>
 <dd><dl class="field-list simple">
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+<dd class="field-odd"><p><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 </dd>
 </dl>
 </dd></dl>
@@ -1142,13 +1216,13 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.zero_point.ParameterFromRuntimeZeroPoint">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.zero_point.</span></span><span class="sig-name descname"><span class="pre">ParameterFromRuntimeZeroPoint</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">collect_stats_steps</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">int_quant</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quantize_zero_point</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">zero_point_stats_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">zero_point_shape</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">zero_point_stats_input_view_shape_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">zero_point_stats_momentum</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/zero_point.html#ParameterFromRuntimeZeroPoint"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.zero_point.ParameterFromRuntimeZeroPoint" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.zero_point.ParameterFromRuntimeZeroPoint.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scale</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bit_width</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/zero_point.html#ParameterFromRuntimeZeroPoint.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.zero_point.ParameterFromRuntimeZeroPoint.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -1161,7 +1235,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.zero_point.ParameterFromRuntimeZeroPoint.state_dict">
 <span class="sig-name descname"><span class="pre">state_dict</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">destination</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prefix</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">''</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">keep_vars</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/zero_point.html#ParameterFromRuntimeZeroPoint.state_dict"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.zero_point.ParameterFromRuntimeZeroPoint.state_dict" title="Permalink to this definition">#</a></dt>
-<dd><p>Returns a dictionary containing references to the whole state of the module.</p>
+<dd><p>Return a dictionary containing references to the whole state of the module.</p>
 <p>Both parameters and persistent buffers (e.g. running averages) are
 included. Keys are corresponding parameter and buffer names.
 Parameters and buffers set to <code class="docutils literal notranslate"><span class="pre">None</span></code> are not included.</p>
@@ -1185,13 +1259,13 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
-<li><p><strong>destination</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>optional</em>) – If provided, the state of module will
+<li><p><strong>destination</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>optional</em>) – If provided, the state of module will
 be updated into the dict and the same object is returned.
 Otherwise, an <code class="docutils literal notranslate"><span class="pre">OrderedDict</span></code> will be created and returned.
 Default: <code class="docutils literal notranslate"><span class="pre">None</span></code>.</p></li>
-<li><p><strong>prefix</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>optional</em>) – a prefix added to parameter and buffer
+<li><p><strong>prefix</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>optional</em>) – a prefix added to parameter and buffer
 names to compose the keys in state_dict. Default: <code class="docutils literal notranslate"><span class="pre">''</span></code>.</p></li>
-<li><p><strong>keep_vars</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>optional</em>) – by default the <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a> s
+<li><p><strong>keep_vars</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>optional</em>) – by default the <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code> s
 returned in the state dict are detached from autograd. If it’s
 set to <code class="docutils literal notranslate"><span class="pre">True</span></code>, detaching will not be performed.
 Default: <code class="docutils literal notranslate"><span class="pre">False</span></code>.</p></li>
@@ -1201,7 +1275,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dd class="field-even"><p>a dictionary containing a whole state of the module</p>
 </dd>
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)">dict</a></p>
+<dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)">dict</a></p>
 </dd>
 </dl>
 <p>Example:</p>
@@ -1217,7 +1291,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <span class="sig-name descname"><span class="pre">training_forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/zero_point.html#ParameterFromRuntimeZeroPoint.training_forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.zero_point.ParameterFromRuntimeZeroPoint.training_forward" title="Permalink to this definition">#</a></dt>
 <dd><dl class="field-list simple">
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+<dd class="field-odd"><p><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 </dd>
 </dl>
 </dd></dl>
@@ -1227,15 +1301,15 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.zero_point.ParameterFromStatsFromParameterZeroPoint">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.zero_point.</span></span><span class="sig-name descname"><span class="pre">ParameterFromStatsFromParameterZeroPoint</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">int_quant</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quantize_zero_point</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">zero_point_stats_input_view_shape_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">zero_point_stats_input_concat_dim</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">zero_point_stats_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">zero_point_shape</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tracked_parameter_list</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/zero_point.html#ParameterFromStatsFromParameterZeroPoint"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.zero_point.ParameterFromStatsFromParameterZeroPoint" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptModule implementation of a learned scale factor initialized from statistics of a parameter,
 e.g. weights MSE or AbsMax.</p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.zero_point.ParameterFromStatsFromParameterZeroPoint.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scale</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bit_width</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/zero_point.html#ParameterFromStatsFromParameterZeroPoint.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.zero_point.ParameterFromStatsFromParameterZeroPoint.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -1248,7 +1322,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.zero_point.ParameterFromStatsFromParameterZeroPoint.state_dict">
 <span class="sig-name descname"><span class="pre">state_dict</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">destination</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prefix</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">''</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">keep_vars</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/zero_point.html#ParameterFromStatsFromParameterZeroPoint.state_dict"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.zero_point.ParameterFromStatsFromParameterZeroPoint.state_dict" title="Permalink to this definition">#</a></dt>
-<dd><p>Returns a dictionary containing references to the whole state of the module.</p>
+<dd><p>Return a dictionary containing references to the whole state of the module.</p>
 <p>Both parameters and persistent buffers (e.g. running averages) are
 included. Keys are corresponding parameter and buffer names.
 Parameters and buffers set to <code class="docutils literal notranslate"><span class="pre">None</span></code> are not included.</p>
@@ -1272,13 +1346,13 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
-<li><p><strong>destination</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>optional</em>) – If provided, the state of module will
+<li><p><strong>destination</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>optional</em>) – If provided, the state of module will
 be updated into the dict and the same object is returned.
 Otherwise, an <code class="docutils literal notranslate"><span class="pre">OrderedDict</span></code> will be created and returned.
 Default: <code class="docutils literal notranslate"><span class="pre">None</span></code>.</p></li>
-<li><p><strong>prefix</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>optional</em>) – a prefix added to parameter and buffer
+<li><p><strong>prefix</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>optional</em>) – a prefix added to parameter and buffer
 names to compose the keys in state_dict. Default: <code class="docutils literal notranslate"><span class="pre">''</span></code>.</p></li>
-<li><p><strong>keep_vars</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>optional</em>) – by default the <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a> s
+<li><p><strong>keep_vars</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>optional</em>) – by default the <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code> s
 returned in the state dict are detached from autograd. If it’s
 set to <code class="docutils literal notranslate"><span class="pre">True</span></code>, detaching will not be performed.
 Default: <code class="docutils literal notranslate"><span class="pre">False</span></code>.</p></li>
@@ -1288,7 +1362,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dd class="field-even"><p>a dictionary containing a whole state of the module</p>
 </dd>
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)">dict</a></p>
+<dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)">dict</a></p>
 </dd>
 </dl>
 <p>Example:</p>
@@ -1304,13 +1378,13 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.zero_point.ParameterZeroPoint">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.zero_point.</span></span><span class="sig-name descname"><span class="pre">ParameterZeroPoint</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">zero_point_init</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">int_quant</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quantize_zero_point</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">zero_point_shape</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/zero_point.html#ParameterZeroPoint"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.zero_point.ParameterZeroPoint" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.zero_point.ParameterZeroPoint.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scale</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bit_width</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/zero_point.html#ParameterZeroPoint.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.zero_point.ParameterZeroPoint.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -1325,15 +1399,15 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.zero_point.PreZeroCenterZeroPoint">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.zero_point.</span></span><span class="sig-name descname"><span class="pre">PreZeroCenterZeroPoint</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">stats_reduce_dim</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">pre_zero_point_stats_input_view_shape_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">pre_zero_point_shape</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/zero_point.html#PreZeroCenterZeroPoint"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.zero_point.PreZeroCenterZeroPoint" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>Experimental ScriptModule implementation of a pre-scaling zero-point that zero-centers
 the incoming tensors. This is intended to be used with <cite>DecoupledIntQuant</cite>.</p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.zero_point.PreZeroCenterZeroPoint.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scale</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bit_width</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/zero_point.html#PreZeroCenterZeroPoint.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.zero_point.PreZeroCenterZeroPoint.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -1348,7 +1422,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <span class="sig-name descname"><span class="pre">get_zero_center</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/zero_point.html#PreZeroCenterZeroPoint.get_zero_center"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.zero_point.PreZeroCenterZeroPoint.get_zero_center" title="Permalink to this definition">#</a></dt>
 <dd><dl class="field-list simple">
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+<dd class="field-odd"><p><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 </dd>
 </dl>
 </dd></dl>
@@ -1358,13 +1432,13 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.zero_point.StatsFromParameterZeroPoint">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.zero_point.</span></span><span class="sig-name descname"><span class="pre">StatsFromParameterZeroPoint</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">int_quant</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quantize_zero_point</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">zero_point_stats_input_view_shape_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">zero_point_stats_input_concat_dim</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">zero_point_stats_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">zero_point_shape</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tracked_parameter_list</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/zero_point.html#StatsFromParameterZeroPoint"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.zero_point.StatsFromParameterZeroPoint" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.zero_point.StatsFromParameterZeroPoint.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scale</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bit_width</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/zero_point.html#StatsFromParameterZeroPoint.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.zero_point.StatsFromParameterZeroPoint.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -1379,13 +1453,13 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.zero_point.ZeroZeroPoint">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.zero_point.</span></span><span class="sig-name descname"><span class="pre">ZeroZeroPoint</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/zero_point.html#ZeroZeroPoint"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.zero_point.ZeroZeroPoint" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.zero_point.ZeroZeroPoint.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scale</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bit_width</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/zero_point.html#ZeroZeroPoint.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.zero_point.ZeroZeroPoint.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -1450,6 +1524,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#submodules">Submodules</a></li>
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#module-brevitas.core.restrict_val">brevitas.core.restrict_val module</a><ul class="nav section-nav flex-column">
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.restrict_val.FloatRestrictValue"><code class="docutils literal notranslate"><span class="pre">FloatRestrictValue</span></code></a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.restrict_val.FloatRestrictValue.combine_scale_threshold"><code class="docutils literal notranslate"><span class="pre">FloatRestrictValue.combine_scale_threshold()</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.restrict_val.FloatRestrictValue.forward"><code class="docutils literal notranslate"><span class="pre">FloatRestrictValue.forward()</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.restrict_val.FloatRestrictValue.restrict_init_float"><code class="docutils literal notranslate"><span class="pre">FloatRestrictValue.restrict_init_float()</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.restrict_val.FloatRestrictValue.restrict_init_inplace_module"><code class="docutils literal notranslate"><span class="pre">FloatRestrictValue.restrict_init_inplace_module()</span></code></a></li>
@@ -1458,6 +1533,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 </ul>
 </li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.restrict_val.IntRestrictValue"><code class="docutils literal notranslate"><span class="pre">IntRestrictValue</span></code></a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.restrict_val.IntRestrictValue.combine_scale_threshold"><code class="docutils literal notranslate"><span class="pre">IntRestrictValue.combine_scale_threshold()</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.restrict_val.IntRestrictValue.forward"><code class="docutils literal notranslate"><span class="pre">IntRestrictValue.forward()</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.restrict_val.IntRestrictValue.restrict_init_float"><code class="docutils literal notranslate"><span class="pre">IntRestrictValue.restrict_init_float()</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.restrict_val.IntRestrictValue.restrict_init_inplace_module"><code class="docutils literal notranslate"><span class="pre">IntRestrictValue.restrict_init_inplace_module()</span></code></a></li>
@@ -1466,6 +1542,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 </ul>
 </li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.restrict_val.LogFloatRestrictValue"><code class="docutils literal notranslate"><span class="pre">LogFloatRestrictValue</span></code></a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.restrict_val.LogFloatRestrictValue.combine_scale_threshold"><code class="docutils literal notranslate"><span class="pre">LogFloatRestrictValue.combine_scale_threshold()</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.restrict_val.LogFloatRestrictValue.forward"><code class="docutils literal notranslate"><span class="pre">LogFloatRestrictValue.forward()</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.restrict_val.LogFloatRestrictValue.restrict_init_float"><code class="docutils literal notranslate"><span class="pre">LogFloatRestrictValue.restrict_init_float()</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.restrict_val.LogFloatRestrictValue.restrict_init_inplace_module"><code class="docutils literal notranslate"><span class="pre">LogFloatRestrictValue.restrict_init_inplace_module()</span></code></a></li>
@@ -1474,6 +1551,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 </ul>
 </li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.restrict_val.PowerOfTwoRestrictValue"><code class="docutils literal notranslate"><span class="pre">PowerOfTwoRestrictValue</span></code></a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.restrict_val.PowerOfTwoRestrictValue.combine_scale_threshold"><code class="docutils literal notranslate"><span class="pre">PowerOfTwoRestrictValue.combine_scale_threshold()</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.restrict_val.PowerOfTwoRestrictValue.forward"><code class="docutils literal notranslate"><span class="pre">PowerOfTwoRestrictValue.forward()</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.restrict_val.PowerOfTwoRestrictValue.restrict_init_float"><code class="docutils literal notranslate"><span class="pre">PowerOfTwoRestrictValue.restrict_init_float()</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.restrict_val.PowerOfTwoRestrictValue.restrict_init_inplace_module"><code class="docutils literal notranslate"><span class="pre">PowerOfTwoRestrictValue.restrict_init_inplace_module()</span></code></a></li>
diff --git a/docs/api_reference/brevitas.core.quant.html b/docs/api_reference/brevitas.core.quant.html
index da036f4e6..20022b102 100644
--- a/docs/api_reference/brevitas.core.quant.html
+++ b/docs/api_reference/brevitas.core.quant.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>brevitas.core.quant package &#8212; Brevitas 0.10.2 documentation</title>
+    <title>brevitas.core.quant package &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -126,8 +126,8 @@
       
     
     
-    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
@@ -446,15 +446,15 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <span id="brevitas-core-quant-binary-module"></span><h2>brevitas.core.quant.binary module<a class="headerlink" href="#module-brevitas.core.quant.binary" title="Permalink to this heading">#</a></h2>
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.quant.binary.BinaryQuant">
-<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.quant.binary.</span></span><span class="sig-name descname"><span class="pre">BinaryQuant</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">scaling_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quant_delay_steps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/quant/binary.html#BinaryQuant"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.quant.binary.BinaryQuant" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.quant.binary.</span></span><span class="sig-name descname"><span class="pre">BinaryQuant</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">scaling_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">signed</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quant_delay_steps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/quant/binary.html#BinaryQuant"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.quant.binary.BinaryQuant" title="Permalink to this definition">#</a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptModule that implements scaled uniform binary quantization of an input tensor.
 Quantization is performed with <a class="reference internal" href="brevitas.function.html#brevitas.function.ops_ste.binary_sign_ste" title="brevitas.function.ops_ste.binary_sign_ste"><code class="xref py py-func docutils literal notranslate"><span class="pre">binary_sign_ste()</span></code></a>.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>scaling_impl</strong> (<em>Module</em>) – Module that returns a scale factor.</p></li>
-<li><p><strong>quant_delay_steps</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a>) – Number of training steps to delay quantization for. Default: 0</p></li>
+<li><p><strong>quant_delay_steps</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a>) – Number of training steps to delay quantization for. Default: 0</p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
@@ -490,9 +490,9 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.quant.binary.BinaryQuant.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/quant/binary.html#BinaryQuant.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.quant.binary.BinaryQuant.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.12)"><code class="xref py py-data docutils literal notranslate"><span class="pre">Tuple</span></code></a>[<a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>]</p>
+:rtype: <a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">Tuple</span></code></a>[<code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>]</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -507,7 +507,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.quant.binary.ClampedBinaryQuant">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.quant.binary.</span></span><span class="sig-name descname"><span class="pre">ClampedBinaryQuant</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">scaling_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tensor_clamp_impl</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">TensorClamp()</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quant_delay_steps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/quant/binary.html#ClampedBinaryQuant"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.quant.binary.ClampedBinaryQuant" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptModule that implements scaled uniform binary quantization of an input tensor. Before
 going through quantization, the input tensor is clamped between (- scale, scale), which
 on the backward pass zeroes gradients corresponding to inputs outside that range.
@@ -517,7 +517,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>scaling_impl</strong> (<em>Module</em>) – Module that returns a scale factor.</p></li>
 <li><p><strong>tensor_clamp_impl</strong> (<em>Module</em>) – Module that performs tensor-wise clamping. Default TensorClamp()</p></li>
-<li><p><strong>quant_delay_steps</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a>) – Number of training steps to delay quantization for. Default: 0</p></li>
+<li><p><strong>quant_delay_steps</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a>) – Number of training steps to delay quantization for. Default: 0</p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
@@ -559,9 +559,9 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.quant.binary.ClampedBinaryQuant.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/quant/binary.html#ClampedBinaryQuant.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.quant.binary.ClampedBinaryQuant.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.12)"><code class="xref py py-data docutils literal notranslate"><span class="pre">Tuple</span></code></a>[<a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>]</p>
+:rtype: <a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">Tuple</span></code></a>[<code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>]</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -579,13 +579,13 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.quant.delay.DelayWrapper">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.quant.delay.</span></span><span class="sig-name descname"><span class="pre">DelayWrapper</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">quant_delay_steps</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/quant/delay.html#DelayWrapper"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.quant.delay.DelayWrapper" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.quant.delay.DelayWrapper.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/quant/delay.html#DelayWrapper.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.quant.delay.DelayWrapper.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -603,13 +603,13 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.quant.int.DecoupledRescalingIntQuant">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.quant.int.</span></span><span class="sig-name descname"><span class="pre">DecoupledRescalingIntQuant</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">decoupled_int_quant</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">pre_scaling_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">int_scaling_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">pre_zero_point_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">zero_point_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bit_width_impl</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/quant/int.html#DecoupledRescalingIntQuant"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.quant.int.DecoupledRescalingIntQuant" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.quant.int.DecoupledRescalingIntQuant.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/quant/int.html#DecoupledRescalingIntQuant.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.quant.int.DecoupledRescalingIntQuant.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.12)"><code class="xref py py-data docutils literal notranslate"><span class="pre">Tuple</span></code></a>[<a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>]</p>
+:rtype: <a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">Tuple</span></code></a>[<code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>]</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -628,9 +628,9 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.quant.int.DecoupledRescalingIntQuantWithInput.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">input_bit_width</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">input_is_signed</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/quant/int.html#DecoupledRescalingIntQuantWithInput.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.quant.int.DecoupledRescalingIntQuantWithInput.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.12)"><code class="xref py py-data docutils literal notranslate"><span class="pre">Tuple</span></code></a>[<a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>]</p>
+:rtype: <a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">Tuple</span></code></a>[<code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>]</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -645,13 +645,13 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.quant.int.PrescaledRestrictIntQuant">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.quant.int.</span></span><span class="sig-name descname"><span class="pre">PrescaledRestrictIntQuant</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">int_quant</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bit_width_impl</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/quant/int.html#PrescaledRestrictIntQuant"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.quant.int.PrescaledRestrictIntQuant" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.quant.int.PrescaledRestrictIntQuant.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scale</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/quant/int.html#PrescaledRestrictIntQuant.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.quant.int.PrescaledRestrictIntQuant.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.12)"><code class="xref py py-data docutils literal notranslate"><span class="pre">Tuple</span></code></a>[<a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>]</p>
+:rtype: <a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">Tuple</span></code></a>[<code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>]</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -666,7 +666,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.quant.int.PrescaledRestrictIntQuantWithInputBitWidth">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.quant.int.</span></span><span class="sig-name descname"><span class="pre">PrescaledRestrictIntQuantWithInputBitWidth</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">int_quant</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bit_width_impl</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/quant/int.html#PrescaledRestrictIntQuantWithInputBitWidth"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.quant.int.PrescaledRestrictIntQuantWithInputBitWidth" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptModule that wraps around an integer quantization implementation like
 <code class="xref py py-class docutils literal notranslate"><span class="pre">IntQuant</span></code>. Zero-point is set to zero, scale is taken as input,
 bit-width is computed from an input bit-width.</p>
@@ -715,9 +715,9 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.quant.int.PrescaledRestrictIntQuantWithInputBitWidth.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scale</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">input_bit_width</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/quant/int.html#PrescaledRestrictIntQuantWithInputBitWidth.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.quant.int.PrescaledRestrictIntQuantWithInputBitWidth.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.12)"><code class="xref py py-data docutils literal notranslate"><span class="pre">Tuple</span></code></a>[<a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>]</p>
+:rtype: <a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">Tuple</span></code></a>[<code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>]</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -732,7 +732,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.quant.int.RescalingIntQuant">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.quant.int.</span></span><span class="sig-name descname"><span class="pre">RescalingIntQuant</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">int_quant</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">int_scaling_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">zero_point_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bit_width_impl</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/quant/int.html#RescalingIntQuant"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.quant.int.RescalingIntQuant" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptModule that wraps around an integer quantization implementation like
 <code class="xref py py-class docutils literal notranslate"><span class="pre">IntQuant</span></code>. Scale, zero-point and bit-width are returned from their
 respective implementations and passed on to the integer quantization implementation.</p>
@@ -794,9 +794,9 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.quant.int.RescalingIntQuant.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/quant/int.html#RescalingIntQuant.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.quant.int.RescalingIntQuant.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.12)"><code class="xref py py-data docutils literal notranslate"><span class="pre">Tuple</span></code></a>[<a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>]</p>
+:rtype: <a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">Tuple</span></code></a>[<code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>]</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -811,13 +811,13 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.quant.int.TruncIntQuant">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.quant.int.</span></span><span class="sig-name descname"><span class="pre">TruncIntQuant</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">float_to_int_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bit_width_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quant_delay_steps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/quant/int.html#TruncIntQuant"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.quant.int.TruncIntQuant" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.quant.int.TruncIntQuant.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scale</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">zero_point</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">input_bit_width</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/quant/int.html#TruncIntQuant.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.quant.int.TruncIntQuant.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.12)"><code class="xref py py-data docutils literal notranslate"><span class="pre">Tuple</span></code></a>[<a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>]</p>
+:rtype: <a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">Tuple</span></code></a>[<code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>]</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -834,19 +834,19 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <span id="brevitas-core-quant-int-base-module"></span><h2>brevitas.core.quant.int_base module<a class="headerlink" href="#module-brevitas.core.quant.int_base" title="Permalink to this heading">#</a></h2>
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.quant.int_base.DecoupledIntQuant">
-<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.quant.int_base.</span></span><span class="sig-name descname"><span class="pre">DecoupledIntQuant</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">narrow_range</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">signed</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">float_to_int_impl</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">RoundSte()</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tensor_clamp_impl</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">TensorClamp()</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quant_delay_steps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/quant/int_base.html#DecoupledIntQuant"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.quant.int_base.DecoupledIntQuant" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.quant.int_base.</span></span><span class="sig-name descname"><span class="pre">DecoupledIntQuant</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">narrow_range</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">signed</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">input_view_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">float_to_int_impl</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">RoundSte()</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tensor_clamp_impl</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">TensorClamp()</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quant_delay_steps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/quant/int_base.html#DecoupledIntQuant"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.quant.int_base.DecoupledIntQuant" title="Permalink to this definition">#</a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptModule that implements scale, shifted, uniform integer quantization of an input tensor,
 according to an input pre-scale, scale, pre-zero-point, zero-point and bit-width.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
-<li><p><strong>narrow_range</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a>) – Flag that determines whether restrict quantization to a narrow range or not.</p></li>
-<li><p><strong>signed</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a>) – Flag that determines whether to quantize to a signed range or not.</p></li>
+<li><p><strong>narrow_range</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a>) – Flag that determines whether restrict quantization to a narrow range or not.</p></li>
+<li><p><strong>signed</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a>) – Flag that determines whether to quantize to a signed range or not.</p></li>
 <li><p><strong>float_to_int_impl</strong> (<em>Module</em>) – Module that performs the conversion from floating point to
 integer representation. Default: RoundSte()</p></li>
 <li><p><strong>tensor_clamp_impl</strong> (<em>Module</em>) – Module that performs clamping. Default: TensorClamp()</p></li>
-<li><p><strong>quant_delay_steps</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a>) – Number of training steps to delay quantization for. Default: 0</p></li>
+<li><p><strong>quant_delay_steps</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a>) – Number of training steps to delay quantization for. Default: 0</p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
@@ -874,9 +874,9 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.quant.int_base.DecoupledIntQuant.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">pre_scale</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">pre_zero_point</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scale</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">zero_point</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bit_width</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/quant/int_base.html#DecoupledIntQuant.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.quant.int_base.DecoupledIntQuant.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -901,7 +901,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <span class="sig-name descname"><span class="pre">to_int</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">pre_scale</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">pre_zero_point</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bit_width</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/quant/int_base.html#DecoupledIntQuant.to_int"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.quant.int_base.DecoupledIntQuant.to_int" title="Permalink to this definition">#</a></dt>
 <dd><dl class="field-list simple">
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+<dd class="field-odd"><p><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 </dd>
 </dl>
 </dd></dl>
@@ -910,19 +910,19 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.quant.int_base.IntQuant">
-<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.quant.int_base.</span></span><span class="sig-name descname"><span class="pre">IntQuant</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">narrow_range</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">signed</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">float_to_int_impl</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">RoundSte()</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tensor_clamp_impl</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">TensorClamp()</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quant_delay_steps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/quant/int_base.html#IntQuant"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.quant.int_base.IntQuant" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.quant.int_base.</span></span><span class="sig-name descname"><span class="pre">IntQuant</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">narrow_range</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">signed</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">input_view_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">float_to_int_impl</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">RoundSte()</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tensor_clamp_impl</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">TensorClamp()</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quant_delay_steps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/quant/int_base.html#IntQuant"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.quant.int_base.IntQuant" title="Permalink to this definition">#</a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptModule that implements scale, shifted, uniform integer quantization of an input tensor,
 according to an input scale, zero-point and bit-width.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
-<li><p><strong>narrow_range</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a>) – Flag that determines whether restrict quantization to a narrow range or not.</p></li>
-<li><p><strong>signed</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a>) – Flag that determines whether to quantize to a signed range or not.</p></li>
+<li><p><strong>narrow_range</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a>) – Flag that determines whether restrict quantization to a narrow range or not.</p></li>
+<li><p><strong>signed</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a>) – Flag that determines whether to quantize to a signed range or not.</p></li>
 <li><p><strong>float_to_int_impl</strong> (<em>Module</em>) – Module that performs the conversion from floating point to
 integer representation. Default: RoundSte()</p></li>
 <li><p><strong>tensor_clamp_impl</strong> (<em>Module</em>) – Module that performs clamping. Default: TensorClamp()</p></li>
-<li><p><strong>quant_delay_steps</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a>) – Number of training steps to delay quantization for. Default: 0</p></li>
+<li><p><strong>quant_delay_steps</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a>) – Number of training steps to delay quantization for. Default: 0</p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
@@ -953,9 +953,9 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.quant.int_base.IntQuant.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">scale</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">zero_point</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bit_width</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/quant/int_base.html#IntQuant.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.quant.int_base.IntQuant.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -980,7 +980,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <span class="sig-name descname"><span class="pre">to_int</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">scale</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">zero_point</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bit_width</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/quant/int_base.html#IntQuant.to_int"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.quant.int_base.IntQuant.to_int" title="Permalink to this definition">#</a></dt>
 <dd><dl class="field-list simple">
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+<dd class="field-odd"><p><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 </dd>
 </dl>
 </dd></dl>
@@ -993,15 +993,15 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.quant.ternary.TernaryQuant">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.quant.ternary.</span></span><span class="sig-name descname"><span class="pre">TernaryQuant</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">scaling_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">threshold</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quant_delay_steps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/quant/ternary.html#TernaryQuant"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.quant.ternary.TernaryQuant" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptModule that implements scaled uniform ternary quantization of an input tensor.
 Quantization is performed with <a class="reference internal" href="brevitas.function.html#brevitas.function.ops_ste.ternary_sign_ste" title="brevitas.function.ops_ste.ternary_sign_ste"><code class="xref py py-func docutils literal notranslate"><span class="pre">ternary_sign_ste()</span></code></a>.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>scaling_impl</strong> (<em>Module</em>) – Module that returns a scale factor.</p></li>
-<li><p><strong>threshold</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.12)"><em>float</em></a>) – Ternarization threshold w.r.t. to the scale factor.</p></li>
-<li><p><strong>quant_delay_steps</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a>) – Number of training steps to delay quantization for. Default: 0</p></li>
+<li><p><strong>threshold</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.13)"><em>float</em></a>) – Ternarization threshold w.r.t. to the scale factor.</p></li>
+<li><p><strong>quant_delay_steps</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a>) – Number of training steps to delay quantization for. Default: 0</p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
@@ -1041,9 +1041,9 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.quant.ternary.TernaryQuant.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/quant/ternary.html#TernaryQuant.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.quant.ternary.TernaryQuant.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.12)"><code class="xref py py-data docutils literal notranslate"><span class="pre">Tuple</span></code></a>[<a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>, <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>]</p>
+:rtype: <a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">Tuple</span></code></a>[<code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>]</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
diff --git a/docs/api_reference/brevitas.core.scaling.html b/docs/api_reference/brevitas.core.scaling.html
index 642ea6328..cf7a68d21 100644
--- a/docs/api_reference/brevitas.core.scaling.html
+++ b/docs/api_reference/brevitas.core.scaling.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>brevitas.core.scaling package &#8212; Brevitas 0.10.2 documentation</title>
+    <title>brevitas.core.scaling package &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -126,8 +126,8 @@
       
     
     
-    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
@@ -447,13 +447,13 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.scaling.int_scaling.IntScaling">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.scaling.int_scaling.</span></span><span class="sig-name descname"><span class="pre">IntScaling</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">signed</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">narrow_range</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/int_scaling.html#IntScaling"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.int_scaling.IntScaling" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.scaling.int_scaling.IntScaling.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">bit_width</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/int_scaling.html#IntScaling.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.int_scaling.IntScaling.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -468,13 +468,13 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.scaling.int_scaling.PowerOfTwoIntScaling">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.scaling.int_scaling.</span></span><span class="sig-name descname"><span class="pre">PowerOfTwoIntScaling</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">signed</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/int_scaling.html#PowerOfTwoIntScaling"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.int_scaling.PowerOfTwoIntScaling" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.scaling.int_scaling.PowerOfTwoIntScaling.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">bit_width</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/int_scaling.html#PowerOfTwoIntScaling.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.int_scaling.PowerOfTwoIntScaling.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -489,15 +489,37 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 </section>
 <section id="module-brevitas.core.scaling.runtime">
 <span id="brevitas-core-scaling-runtime-module"></span><h2>brevitas.core.scaling.runtime module<a class="headerlink" href="#module-brevitas.core.scaling.runtime" title="Permalink to this heading">#</a></h2>
+<dl class="py class">
+<dt class="sig sig-object py" id="brevitas.core.scaling.runtime.RuntimeDynamicGroupStatsScaling">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.scaling.runtime.</span></span><span class="sig-name descname"><span class="pre">RuntimeDynamicGroupStatsScaling</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">group_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">group_dim</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">input_view_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_stats_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_min_val</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">restrict_scaling_impl</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">FloatRestrictValue()</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/runtime.html#RuntimeDynamicGroupStatsScaling"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.runtime.RuntimeDynamicGroupStatsScaling" title="Permalink to this definition">#</a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
+<dl class="py method">
+<dt class="sig sig-object py" id="brevitas.core.scaling.runtime.RuntimeDynamicGroupStatsScaling.forward">
+<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">stats_input</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">threshold</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/runtime.html#RuntimeDynamicGroupStatsScaling.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.runtime.RuntimeDynamicGroupStatsScaling.forward" title="Permalink to this definition">#</a></dt>
+<dd><p>Define the computation performed at every call.</p>
+<p>Should be overridden by all subclasses.
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>Although the recipe for forward pass needs to be defined within
+this function, one should call the <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code> instance afterwards
+instead of this since the former takes care of running the
+registered hooks while the latter silently ignores them.</p>
+</div>
+</dd></dl>
+
+</dd></dl>
+
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.scaling.runtime.RuntimeStatsScaling">
-<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.scaling.runtime.</span></span><span class="sig-name descname"><span class="pre">RuntimeStatsScaling</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">scaling_stats_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_stats_input_view_shape_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">restrict_scaling_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_shape</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">affine_rescaling</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">affine_shift_scale</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_stats_momentum</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_min_val</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/runtime.html#RuntimeStatsScaling"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.runtime.RuntimeStatsScaling" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.scaling.runtime.</span></span><span class="sig-name descname"><span class="pre">RuntimeStatsScaling</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">scaling_stats_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_stats_input_view_shape_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_shape</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">affine_rescaling</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">affine_shift_scale</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">restrict_scaling_impl</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">FloatRestrictValue()</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_stats_momentum</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_min_val</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/runtime.html#RuntimeStatsScaling"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.runtime.RuntimeStatsScaling" title="Permalink to this definition">#</a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.scaling.runtime.RuntimeStatsScaling.forward">
-<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/runtime.html#RuntimeStatsScaling.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.runtime.RuntimeStatsScaling.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
-<p>Should be overridden by all subclasses.</p>
+<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">threshold</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/runtime.html#RuntimeStatsScaling.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.runtime.RuntimeStatsScaling.forward" title="Permalink to this definition">#</a></dt>
+<dd><p>Define the computation performed at every call.</p>
+<p>Should be overridden by all subclasses.
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -511,14 +533,14 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.scaling.runtime.StatsFromParameterScaling">
-<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.scaling.runtime.</span></span><span class="sig-name descname"><span class="pre">StatsFromParameterScaling</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">scaling_stats_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_stats_input_view_shape_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_stats_input_concat_dim</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tracked_parameter_list</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">restrict_scaling_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_shape</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">affine_rescaling</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">affine_shift_scale</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_min_val</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/runtime.html#StatsFromParameterScaling"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.runtime.StatsFromParameterScaling" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.scaling.runtime.</span></span><span class="sig-name descname"><span class="pre">StatsFromParameterScaling</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">scaling_stats_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_stats_input_view_shape_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_stats_input_concat_dim</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tracked_parameter_list</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_shape</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">restrict_scaling_impl</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">FloatRestrictValue()</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">affine_rescaling</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">affine_shift_scale</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_min_val</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/runtime.html#StatsFromParameterScaling"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.runtime.StatsFromParameterScaling" title="Permalink to this definition">#</a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.scaling.runtime.StatsFromParameterScaling.forward">
-<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">ignored</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/runtime.html#StatsFromParameterScaling.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.runtime.StatsFromParameterScaling.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">ignored</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">threshold</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/runtime.html#StatsFromParameterScaling.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.runtime.StatsFromParameterScaling.forward" title="Permalink to this definition">#</a></dt>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -535,15 +557,15 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <span id="brevitas-core-scaling-standalone-module"></span><h2>brevitas.core.scaling.standalone module<a class="headerlink" href="#module-brevitas.core.scaling.standalone" title="Permalink to this heading">#</a></h2>
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.scaling.standalone.ConstScaling">
-<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.scaling.standalone.</span></span><span class="sig-name descname"><span class="pre">ConstScaling</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">scaling_init</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">restrict_scaling_impl</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_min_val</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/standalone.html#ConstScaling"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.standalone.ConstScaling" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.scaling.standalone.</span></span><span class="sig-name descname"><span class="pre">ConstScaling</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">scaling_init</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">restrict_scaling_impl</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">FloatRestrictValue()</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_min_val</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/standalone.html#ConstScaling"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.standalone.ConstScaling" title="Permalink to this definition">#</a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptModule implementation of a constant scale factor.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
-<li><p><strong>scaling_init</strong> (<em>Union</em><em>[</em><a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.12)"><em>float</em></a><em>, </em><em>Tensor</em><em>]</em>) – value to use as constant scale factor.</p></li>
+<li><p><strong>scaling_init</strong> (<em>Union</em><em>[</em><a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.13)"><em>float</em></a><em>, </em><em>Tensor</em><em>]</em>) – value to use as constant scale factor.</p></li>
 <li><p><strong>restrict_scaling_impl</strong> (<em>Module</em>) – restrict scaling_init according to some criteria. Default: None</p></li>
-<li><p><strong>scaling_min_val</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.12)"><em>float</em></a>) – force a lower-bound on scaling_init. Default: None</p></li>
+<li><p><strong>scaling_min_val</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.13)"><em>float</em></a>) – force a lower-bound on scaling_init. Default: None</p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
@@ -576,10 +598,10 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 </div>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.scaling.standalone.ConstScaling.forward">
-<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">placeholder</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/standalone.html#ConstScaling.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.standalone.ConstScaling.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">placeholder</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">threshold</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/standalone.html#ConstScaling.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.standalone.ConstScaling.forward" title="Permalink to this definition">#</a></dt>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -593,8 +615,8 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.scaling.standalone.ParameterFromRuntimeStatsScaling">
-<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.scaling.standalone.</span></span><span class="sig-name descname"><span class="pre">ParameterFromRuntimeStatsScaling</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">collect_stats_steps</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_stats_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_stats_input_view_shape_impl=OverBatchOverTensorView(</span>&#160;&#160; <span class="pre">(permute_impl):</span> <span class="pre">Identity()</span> <span class="pre">)</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_shape=()</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">restrict_scaling_impl=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_stats_momentum=0.1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_min_val=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device=None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/standalone.html#ParameterFromRuntimeStatsScaling"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.standalone.ParameterFromRuntimeStatsScaling" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.scaling.standalone.</span></span><span class="sig-name descname"><span class="pre">ParameterFromRuntimeStatsScaling</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">collect_stats_steps</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_stats_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_stats_input_view_shape_impl=OverBatchOverTensorView(</span>&#160;&#160; <span class="pre">(permute_impl):</span> <span class="pre">Identity()</span> <span class="pre">)</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_shape=()</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">restrict_scaling_impl=FloatRestrictValue()</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_stats_momentum=0.1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_min_val=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device=None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/standalone.html#ParameterFromRuntimeStatsScaling"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.standalone.ParameterFromRuntimeStatsScaling" title="Permalink to this definition">#</a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptModule implementation of a learned scale factor initialized from runtime statistics.
 The implementation works in two phases. During the first phase, statistics are collected in
 the same fashion as batchnorm, meaning that while the module is in training mode a set of per-batch
@@ -605,15 +627,15 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
-<li><p><strong>collect_stats_steps</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a>) – Number of calls to the forward method in training mode to collect statistics for.</p></li>
+<li><p><strong>collect_stats_steps</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a>) – Number of calls to the forward method in training mode to collect statistics for.</p></li>
 <li><p><strong>scaling_stats_impl</strong> (<em>Module</em>) – Implementation of the statistics computed during the collection phase.</p></li>
 <li><p><strong>scaling_stats_input_view_shape_impl</strong> (<em>Module</em>) – Implementation of the view applied to the runtime
 input during the statistics collection phase. Default: OverBatchOverTensorView().</p></li>
-<li><p><strong>scaling_shape</strong> (<em>Tuple</em><em>[</em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em>, </em><em>...</em><em>]</em>) – shape of the torch.nn.Parameter used in the second phase. Default: SCALAR_SHAPE.</p></li>
+<li><p><strong>scaling_shape</strong> (<em>Tuple</em><em>[</em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, </em><em>...</em><em>]</em>) – shape of the torch.nn.Parameter used in the second phase. Default: SCALAR_SHAPE.</p></li>
 <li><p><strong>restrict_scaling_impl</strong> (<em>Module</em>) – restrict the learned scale factor according to some criteria. Default: None
 input before going into scaling_stats_input_view_shape_impl. Default: None</p></li>
-<li><p><strong>scaling_stats_momentum</strong> (<a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Optional" title="(in Python v3.12)"><code class="xref py py-data docutils literal notranslate"><span class="pre">Optional</span></code></a>[<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.12)"><code class="xref py py-class docutils literal notranslate"><span class="pre">float</span></code></a>]) – float = Momentum for the statistics moving average. Default: DEFAULT_MOMENTUM.</p></li>
-<li><p><strong>scaling_min_val</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.12)"><em>float</em></a>) – force a lower-bound on the learned scale factor. Default: None.</p></li>
+<li><p><strong>scaling_stats_momentum</strong> (<a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Optional" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">Optional</span></code></a>[<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.13)"><code class="xref py py-class docutils literal notranslate"><span class="pre">float</span></code></a>]) – float = Momentum for the statistics moving average. Default: DEFAULT_MOMENTUM.</p></li>
+<li><p><strong>scaling_min_val</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.13)"><em>float</em></a>) – force a lower-bound on the learned scale factor. Default: None.</p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
@@ -623,7 +645,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dd class="field-odd"><p>Tensor</p>
 </dd>
 <dt class="field-even">Raises<span class="colon">:</span></dt>
-<dd class="field-even"><p><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#RuntimeError" title="(in Python v3.12)"><strong>RuntimeError</strong></a> – if scaling_shape != SCALAR_SHAPE and scaling_stats_permute_dims is None</p>
+<dd class="field-even"><p><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#RuntimeError" title="(in Python v3.13)"><strong>RuntimeError</strong></a> – if scaling_shape != SCALAR_SHAPE and scaling_stats_permute_dims is None</p>
 </dd>
 </dl>
 <p class="rubric">Examples</p>
@@ -649,10 +671,10 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 </div>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.scaling.standalone.ParameterFromRuntimeStatsScaling.forward">
-<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">stats_input</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/standalone.html#ParameterFromRuntimeStatsScaling.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.standalone.ParameterFromRuntimeStatsScaling.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">stats_input</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">threshold</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/standalone.html#ParameterFromRuntimeStatsScaling.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.standalone.ParameterFromRuntimeStatsScaling.forward" title="Permalink to this definition">#</a></dt>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -665,7 +687,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.scaling.standalone.ParameterFromRuntimeStatsScaling.state_dict">
 <span class="sig-name descname"><span class="pre">state_dict</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">destination</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prefix</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">''</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">keep_vars</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/standalone.html#ParameterFromRuntimeStatsScaling.state_dict"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.standalone.ParameterFromRuntimeStatsScaling.state_dict" title="Permalink to this definition">#</a></dt>
-<dd><p>Returns a dictionary containing references to the whole state of the module.</p>
+<dd><p>Return a dictionary containing references to the whole state of the module.</p>
 <p>Both parameters and persistent buffers (e.g. running averages) are
 included. Keys are corresponding parameter and buffer names.
 Parameters and buffers set to <code class="docutils literal notranslate"><span class="pre">None</span></code> are not included.</p>
@@ -689,13 +711,13 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
-<li><p><strong>destination</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>optional</em>) – If provided, the state of module will
+<li><p><strong>destination</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>optional</em>) – If provided, the state of module will
 be updated into the dict and the same object is returned.
 Otherwise, an <code class="docutils literal notranslate"><span class="pre">OrderedDict</span></code> will be created and returned.
 Default: <code class="docutils literal notranslate"><span class="pre">None</span></code>.</p></li>
-<li><p><strong>prefix</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>optional</em>) – a prefix added to parameter and buffer
+<li><p><strong>prefix</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>optional</em>) – a prefix added to parameter and buffer
 names to compose the keys in state_dict. Default: <code class="docutils literal notranslate"><span class="pre">''</span></code>.</p></li>
-<li><p><strong>keep_vars</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>optional</em>) – by default the <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a> s
+<li><p><strong>keep_vars</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>optional</em>) – by default the <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code> s
 returned in the state dict are detached from autograd. If it’s
 set to <code class="docutils literal notranslate"><span class="pre">True</span></code>, detaching will not be performed.
 Default: <code class="docutils literal notranslate"><span class="pre">False</span></code>.</p></li>
@@ -705,7 +727,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dd class="field-even"><p>a dictionary containing a whole state of the module</p>
 </dd>
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)">dict</a></p>
+<dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)">dict</a></p>
 </dd>
 </dl>
 <p>Example:</p>
@@ -718,10 +740,10 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.scaling.standalone.ParameterFromRuntimeStatsScaling.training_forward">
-<span class="sig-name descname"><span class="pre">training_forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">stats_input</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/standalone.html#ParameterFromRuntimeStatsScaling.training_forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.standalone.ParameterFromRuntimeStatsScaling.training_forward" title="Permalink to this definition">#</a></dt>
+<span class="sig-name descname"><span class="pre">training_forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">stats_input</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">threshold</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/standalone.html#ParameterFromRuntimeStatsScaling.training_forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.standalone.ParameterFromRuntimeStatsScaling.training_forward" title="Permalink to this definition">#</a></dt>
 <dd><dl class="field-list simple">
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+<dd class="field-odd"><p><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 </dd>
 </dl>
 </dd></dl>
@@ -730,16 +752,16 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.scaling.standalone.ParameterFromStatsFromParameterScaling">
-<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.scaling.standalone.</span></span><span class="sig-name descname"><span class="pre">ParameterFromStatsFromParameterScaling</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">scaling_stats_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_stats_input_view_shape_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_stats_input_concat_dim</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tracked_parameter_list</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">restrict_scaling_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_shape</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_min_val</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/standalone.html#ParameterFromStatsFromParameterScaling"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.standalone.ParameterFromStatsFromParameterScaling" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.scaling.standalone.</span></span><span class="sig-name descname"><span class="pre">ParameterFromStatsFromParameterScaling</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">scaling_stats_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_stats_input_view_shape_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_stats_input_concat_dim</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tracked_parameter_list</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_shape</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">restrict_scaling_impl</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">FloatRestrictValue()</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_min_val</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/standalone.html#ParameterFromStatsFromParameterScaling"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.standalone.ParameterFromStatsFromParameterScaling" title="Permalink to this definition">#</a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptModule implementation of a learned scale factor initialized from statistics of a parameter,
 e.g. weights MSE or AbsMax.</p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.scaling.standalone.ParameterFromStatsFromParameterScaling.forward">
-<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">ignored</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/standalone.html#ParameterFromStatsFromParameterScaling.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.standalone.ParameterFromStatsFromParameterScaling.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">ignored</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">threshold</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/standalone.html#ParameterFromStatsFromParameterScaling.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.standalone.ParameterFromStatsFromParameterScaling.forward" title="Permalink to this definition">#</a></dt>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -752,7 +774,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.scaling.standalone.ParameterFromStatsFromParameterScaling.state_dict">
 <span class="sig-name descname"><span class="pre">state_dict</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">destination</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prefix</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">''</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">keep_vars</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/standalone.html#ParameterFromStatsFromParameterScaling.state_dict"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.standalone.ParameterFromStatsFromParameterScaling.state_dict" title="Permalink to this definition">#</a></dt>
-<dd><p>Returns a dictionary containing references to the whole state of the module.</p>
+<dd><p>Return a dictionary containing references to the whole state of the module.</p>
 <p>Both parameters and persistent buffers (e.g. running averages) are
 included. Keys are corresponding parameter and buffer names.
 Parameters and buffers set to <code class="docutils literal notranslate"><span class="pre">None</span></code> are not included.</p>
@@ -776,13 +798,13 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
-<li><p><strong>destination</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>optional</em>) – If provided, the state of module will
+<li><p><strong>destination</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>optional</em>) – If provided, the state of module will
 be updated into the dict and the same object is returned.
 Otherwise, an <code class="docutils literal notranslate"><span class="pre">OrderedDict</span></code> will be created and returned.
 Default: <code class="docutils literal notranslate"><span class="pre">None</span></code>.</p></li>
-<li><p><strong>prefix</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>optional</em>) – a prefix added to parameter and buffer
+<li><p><strong>prefix</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>optional</em>) – a prefix added to parameter and buffer
 names to compose the keys in state_dict. Default: <code class="docutils literal notranslate"><span class="pre">''</span></code>.</p></li>
-<li><p><strong>keep_vars</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>optional</em>) – by default the <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a> s
+<li><p><strong>keep_vars</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>optional</em>) – by default the <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code> s
 returned in the state dict are detached from autograd. If it’s
 set to <code class="docutils literal notranslate"><span class="pre">True</span></code>, detaching will not be performed.
 Default: <code class="docutils literal notranslate"><span class="pre">False</span></code>.</p></li>
@@ -792,7 +814,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dd class="field-even"><p>a dictionary containing a whole state of the module</p>
 </dd>
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)">dict</a></p>
+<dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)">dict</a></p>
 </dd>
 </dl>
 <p>Example:</p>
@@ -807,16 +829,16 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.scaling.standalone.ParameterScaling">
-<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.scaling.standalone.</span></span><span class="sig-name descname"><span class="pre">ParameterScaling</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">scaling_init</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_shape</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">restrict_scaling_impl</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_min_val</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/standalone.html#ParameterScaling"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.standalone.ParameterScaling" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.scaling.standalone.</span></span><span class="sig-name descname"><span class="pre">ParameterScaling</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">scaling_init</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_shape</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">restrict_scaling_impl</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">FloatRestrictValue()</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_min_val</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/standalone.html#ParameterScaling"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.standalone.ParameterScaling" title="Permalink to this definition">#</a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptModule implementation of a learned scale factor.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
-<li><p><strong>scaling_init</strong> (<em>Union</em><em>[</em><a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.12)"><em>float</em></a><em>, </em><em>Tensor</em><em>]</em>) – value to initialize the learned scale factor.</p></li>
-<li><p><strong>scaling_shape</strong> (<em>Tuple</em><em>[</em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em>, </em><em>...</em><em>]</em>) – shape to extend a scalar float or tensor scaling_init. Default: None</p></li>
+<li><p><strong>scaling_init</strong> (<em>Union</em><em>[</em><a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.13)"><em>float</em></a><em>, </em><em>Tensor</em><em>]</em>) – value to initialize the learned scale factor.</p></li>
+<li><p><strong>scaling_shape</strong> (<em>Tuple</em><em>[</em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, </em><em>...</em><em>]</em>) – shape to extend a scalar float or tensor scaling_init. Default: None</p></li>
 <li><p><strong>restrict_scaling_impl</strong> (<em>Module</em>) – restrict the learned scale factor according to some criteria. Default: None</p></li>
-<li><p><strong>scaling_min_val</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.12)"><em>float</em></a>) – force a lower-bound on the learned scale factor. Default: None</p></li>
+<li><p><strong>scaling_min_val</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.13)"><em>float</em></a>) – force a lower-bound on the learned scale factor. Default: None</p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
@@ -826,7 +848,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dd class="field-odd"><p>Tensor</p>
 </dd>
 <dt class="field-even">Raises<span class="colon">:</span></dt>
-<dd class="field-even"><p><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#RuntimeError" title="(in Python v3.12)"><strong>RuntimeError</strong></a> – if scaling_init is a non-scalar tensor and scaling_shape is != scaling_init.shape.</p>
+<dd class="field-even"><p><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#RuntimeError" title="(in Python v3.13)"><strong>RuntimeError</strong></a> – if scaling_init is a non-scalar tensor and scaling_shape is != scaling_init.shape.</p>
 </dd>
 </dl>
 <p class="rubric">Examples</p>
@@ -857,10 +879,10 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 </div>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.scaling.standalone.ParameterScaling.forward">
-<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">placeholder</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/standalone.html#ParameterScaling.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.standalone.ParameterScaling.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">placeholder</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">threshold</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/scaling/standalone.html#ParameterScaling.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.scaling.standalone.ParameterScaling.forward" title="Permalink to this definition">#</a></dt>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -934,6 +956,10 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 </ul>
 </li>
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#module-brevitas.core.scaling.runtime">brevitas.core.scaling.runtime module</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.scaling.runtime.RuntimeDynamicGroupStatsScaling"><code class="docutils literal notranslate"><span class="pre">RuntimeDynamicGroupStatsScaling</span></code></a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.scaling.runtime.RuntimeDynamicGroupStatsScaling.forward"><code class="docutils literal notranslate"><span class="pre">RuntimeDynamicGroupStatsScaling.forward()</span></code></a></li>
+</ul>
+</li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.scaling.runtime.RuntimeStatsScaling"><code class="docutils literal notranslate"><span class="pre">RuntimeStatsScaling</span></code></a><ul class="nav section-nav flex-column">
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.scaling.runtime.RuntimeStatsScaling.forward"><code class="docutils literal notranslate"><span class="pre">RuntimeStatsScaling.forward()</span></code></a></li>
 </ul>
diff --git a/docs/api_reference/brevitas.core.stats.html b/docs/api_reference/brevitas.core.stats.html
index 986d25373..b90842df3 100644
--- a/docs/api_reference/brevitas.core.stats.html
+++ b/docs/api_reference/brevitas.core.stats.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>brevitas.core.stats package &#8212; Brevitas 0.10.2 documentation</title>
+    <title>brevitas.core.stats package &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -126,8 +126,8 @@
       
     
     
-    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
@@ -447,11 +447,11 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.AbsAve">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.stats.stats_op.</span></span><span class="sig-name descname"><span class="pre">AbsAve</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">stats_reduce_dim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#AbsAve"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.AbsAve" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.AbsAve.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#AbsAve.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.AbsAve.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -467,11 +467,11 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.AbsMax">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.stats.stats_op.</span></span><span class="sig-name descname"><span class="pre">AbsMax</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">stats_reduce_dim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">keepdim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#AbsMax"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.AbsMax" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.AbsMax.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#AbsMax.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.AbsMax.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -487,11 +487,11 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.AbsMaxAve">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.stats.stats_op.</span></span><span class="sig-name descname"><span class="pre">AbsMaxAve</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">stats_reduce_dim</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#AbsMaxAve"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.AbsMaxAve" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.AbsMaxAve.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#AbsMaxAve.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.AbsMaxAve.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -507,11 +507,11 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.AbsMaxL2">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.stats.stats_op.</span></span><span class="sig-name descname"><span class="pre">AbsMaxL2</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">stats_reduce_dim</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#AbsMaxL2"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.AbsMaxL2" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.AbsMaxL2.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#AbsMaxL2.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.AbsMaxL2.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -527,11 +527,11 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.AbsMinMax">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.stats.stats_op.</span></span><span class="sig-name descname"><span class="pre">AbsMinMax</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">stats_reduce_dim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">keepdim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#AbsMinMax"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.AbsMinMax" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.AbsMinMax.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#AbsMinMax.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.AbsMinMax.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -547,11 +547,11 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.AbsPercentile">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.stats.stats_op.</span></span><span class="sig-name descname"><span class="pre">AbsPercentile</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">high_percentile_q</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">stats_reduce_dim</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">percentile_q</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">keepdim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#AbsPercentile"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.AbsPercentile" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.AbsPercentile.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#AbsPercentile.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.AbsPercentile.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -564,16 +564,76 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 
 </dd></dl>
 
+<dl class="py class">
+<dt class="sig sig-object py" id="brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.stats.stats_op.</span></span><span class="sig-name descname"><span class="pre">HalfQuadraticOptimizerScale</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">proxy_module</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hqo_init_op_scale</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">keepdim</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inner_stats_input_view_shape_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scaling_min_val</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">stats_reduce_dim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">int_scaling_impl</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bit_width_impl</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hqo_beta_scale</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">100000.0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hqo_kappa_scale</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1.01</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hqo_lp_norm_scale</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.7</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hqo_iters_scale</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1000</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#HalfQuadraticOptimizerScale"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale" title="Permalink to this definition">#</a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
+<dl class="py method">
+<dt class="sig sig-object py" id="brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale.forward">
+<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#HalfQuadraticOptimizerScale.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale.forward" title="Permalink to this definition">#</a></dt>
+<dd><p>Define the computation performed at every call.</p>
+<p>Should be overridden by all subclasses.</p>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>Although the recipe for forward pass needs to be defined within
+this function, one should call the <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code> instance afterwards
+instead of this since the former takes care of running the
+registered hooks while the latter silently ignores them.</p>
+</div>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale.optimize">
+<span class="sig-name descname"><span class="pre">optimize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#HalfQuadraticOptimizerScale.optimize"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale.optimize" title="Permalink to this definition">#</a></dt>
+<dd></dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale.parameter_search">
+<span class="sig-name descname"><span class="pre">parameter_search</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">xl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#HalfQuadraticOptimizerScale.parameter_search"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale.parameter_search" title="Permalink to this definition">#</a></dt>
+<dd></dd></dl>
+
+</dd></dl>
+
+<dl class="py class">
+<dt class="sig sig-object py" id="brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.stats.stats_op.</span></span><span class="sig-name descname"><span class="pre">HalfQuadraticOptimizerZeroPoint</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">proxy_module</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">keepdim</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hqo_init_op_zp</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inner_stats_input_view_shape_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">stats_reduce_dim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hqo_beta_zp</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1.0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hqo_kappa_zp</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1.01</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hqo_lp_norm_zp</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.5</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hqo_iters_zp</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1000</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#HalfQuadraticOptimizerZeroPoint"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint" title="Permalink to this definition">#</a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
+<dl class="py method">
+<dt class="sig sig-object py" id="brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint.forward">
+<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#HalfQuadraticOptimizerZeroPoint.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint.forward" title="Permalink to this definition">#</a></dt>
+<dd><p>Define the computation performed at every call.</p>
+<p>Should be overridden by all subclasses.</p>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>Although the recipe for forward pass needs to be defined within
+this function, one should call the <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code> instance afterwards
+instead of this since the former takes care of running the
+registered hooks while the latter silently ignores them.</p>
+</div>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint.optimize">
+<span class="sig-name descname"><span class="pre">optimize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#HalfQuadraticOptimizerZeroPoint.optimize"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint.optimize" title="Permalink to this definition">#</a></dt>
+<dd></dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint.parameter_search">
+<span class="sig-name descname"><span class="pre">parameter_search</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">xl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#HalfQuadraticOptimizerZeroPoint.parameter_search"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint.parameter_search" title="Permalink to this definition">#</a></dt>
+<dd></dd></dl>
+
+</dd></dl>
+
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.KLMinimizerThreshold">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.stats.stats_op.</span></span><span class="sig-name descname"><span class="pre">KLMinimizerThreshold</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">signed</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bit_width_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_bins</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1001</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">smoothing_eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.0001</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#KLMinimizerThreshold"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.KLMinimizerThreshold" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>Based on:
 <a class="github reference external" href="https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/contrib/quantization.py">apache/incubator-mxnet</a></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.KLMinimizerThreshold.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#KLMinimizerThreshold.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.KLMinimizerThreshold.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -594,13 +654,13 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.L1Norm">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.stats.stats_op.</span></span><span class="sig-name descname"><span class="pre">L1Norm</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">stats_reduce_dim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#L1Norm"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.L1Norm" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptModule implementation to collect per-channel L1 normalization stats
 for weight normalization-based quantization.</p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.L1Norm.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#L1Norm.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.L1Norm.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -616,13 +676,13 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.L2Norm">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.stats.stats_op.</span></span><span class="sig-name descname"><span class="pre">L2Norm</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">stats_reduce_dim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#L2Norm"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.L2Norm" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>ScriptModule implementation to collect per-channel L2 normalization stats
 for weight normalization-based quantization.</p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.L2Norm.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#L2Norm.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.L2Norm.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -638,7 +698,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.MSE">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.stats.stats_op.</span></span><span class="sig-name descname"><span class="pre">MSE</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">proxy_module</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mse_init_op</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inner_stats_input_view_shape_impl</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">stats_reduce_dim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mse_search_method</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'fibonacci'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mse_iters</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">20</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#MSE"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.MSE" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.MSE.evaluate_loss">
 <span class="sig-name descname"><span class="pre">evaluate_loss</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">candidate</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#MSE.evaluate_loss"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.MSE.evaluate_loss" title="Permalink to this definition">#</a></dt>
@@ -647,7 +707,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.MSE.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#MSE.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.MSE.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -683,11 +743,11 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.MeanLearnedSigmaStd">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.stats.stats_op.</span></span><span class="sig-name descname"><span class="pre">MeanLearnedSigmaStd</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">sigma</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">stats_output_shape</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">stats_reduce_dim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">std_dev_epsilon</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1e-08</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#MeanLearnedSigmaStd"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.MeanLearnedSigmaStd" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.MeanLearnedSigmaStd.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#MeanLearnedSigmaStd.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.MeanLearnedSigmaStd.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -703,11 +763,11 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.MeanSigmaStd">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.stats.stats_op.</span></span><span class="sig-name descname"><span class="pre">MeanSigmaStd</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">sigma</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">stats_reduce_dim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">std_dev_epsilon</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1e-08</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#MeanSigmaStd"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.MeanSigmaStd" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.MeanSigmaStd.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#MeanSigmaStd.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.MeanSigmaStd.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
@@ -723,13 +783,13 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.NegativeMinOrZero">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.stats.stats_op.</span></span><span class="sig-name descname"><span class="pre">NegativeMinOrZero</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">stats_reduce_dim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">keepdim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#NegativeMinOrZero"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.NegativeMinOrZero" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.NegativeMinOrZero.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#NegativeMinOrZero.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.NegativeMinOrZero.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -744,13 +804,13 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.NegativePercentileOrZero">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.stats.stats_op.</span></span><span class="sig-name descname"><span class="pre">NegativePercentileOrZero</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">low_percentile_q</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">stats_reduce_dim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">keepdim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#NegativePercentileOrZero"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.NegativePercentileOrZero" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.NegativePercentileOrZero.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#NegativePercentileOrZero.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.NegativePercentileOrZero.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -765,13 +825,13 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.PercentileInterval">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.core.stats.stats_op.</span></span><span class="sig-name descname"><span class="pre">PercentileInterval</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">low_percentile_q</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">high_percentile_q</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">stats_reduce_dim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">keepdim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#PercentileInterval"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.PercentileInterval" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="brevitas.core.stats.stats_op.PercentileInterval.forward">
 <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#PercentileInterval.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.PercentileInterval.forward" title="Permalink to this definition">#</a></dt>
-<dd><p>Defines the computation performed at every call.</p>
+<dd><p>Define the computation performed at every call.</p>
 <p>Should be overridden by all subclasses.
-:rtype: <a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+:rtype: <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Although the recipe for forward pass needs to be defined within
@@ -783,6 +843,40 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 
 </dd></dl>
 
+<dl class="py function">
+<dt class="sig sig-object py" id="brevitas.core.stats.stats_op.masked_median">
+<span class="sig-prename descclassname"><span class="pre">brevitas.core.stats.stats_op.</span></span><span class="sig-name descname"><span class="pre">masked_median</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mask</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">keepdim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#masked_median"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.masked_median" title="Permalink to this definition">#</a></dt>
+<dd><p>Compute the median of tensor x along dim, ignoring values where mask is False.
+x and mask need to be broadcastable.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>x</strong> (<em>Tensor</em>) – Tensor to compute median of.</p></li>
+<li><p><strong>mask</strong> (<em>BoolTensor</em>) – Same shape as x with True where x is valid and False
+where x should be masked. Mask should not be all False in any column of
+dimension dim to avoid NaNs from zero division.</p></li>
+<li><p><strong>dim</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, </em><em>optional</em>) – Dimension to take median of. Defaults to 0.</p></li>
+</ul>
+</dd>
+<dt class="field-even">Returns<span class="colon">:</span></dt>
+<dd class="field-even"><p>Same shape as x, except dimension dim reduced.</p>
+</dd>
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p>Tensor</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py function">
+<dt class="sig sig-object py" id="brevitas.core.stats.stats_op.shrink_lp_op">
+<span class="sig-prename descclassname"><span class="pre">brevitas.core.stats.stats_op.</span></span><span class="sig-name descname"><span class="pre">shrink_lp_op</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">beta</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">lp_norm</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/core/stats/stats_op.html#shrink_lp_op"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.core.stats.stats_op.shrink_lp_op" title="Permalink to this definition">#</a></dt>
+<dd><dl class="field-list simple">
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
+</dd>
+</dl>
+</dd></dl>
+
 </section>
 <section id="module-brevitas.core.stats.stats_wrapper">
 <span id="brevitas-core-stats-stats-wrapper-module"></span><h2>brevitas.core.stats.stats_wrapper module<a class="headerlink" href="#module-brevitas.core.stats.stats_wrapper" title="Permalink to this heading">#</a></h2>
@@ -864,6 +958,18 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.stats.stats_op.AbsPercentile.forward"><code class="docutils literal notranslate"><span class="pre">AbsPercentile.forward()</span></code></a></li>
 </ul>
 </li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale"><code class="docutils literal notranslate"><span class="pre">HalfQuadraticOptimizerScale</span></code></a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale.forward"><code class="docutils literal notranslate"><span class="pre">HalfQuadraticOptimizerScale.forward()</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale.optimize"><code class="docutils literal notranslate"><span class="pre">HalfQuadraticOptimizerScale.optimize()</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale.parameter_search"><code class="docutils literal notranslate"><span class="pre">HalfQuadraticOptimizerScale.parameter_search()</span></code></a></li>
+</ul>
+</li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint"><code class="docutils literal notranslate"><span class="pre">HalfQuadraticOptimizerZeroPoint</span></code></a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint.forward"><code class="docutils literal notranslate"><span class="pre">HalfQuadraticOptimizerZeroPoint.forward()</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint.optimize"><code class="docutils literal notranslate"><span class="pre">HalfQuadraticOptimizerZeroPoint.optimize()</span></code></a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint.parameter_search"><code class="docutils literal notranslate"><span class="pre">HalfQuadraticOptimizerZeroPoint.parameter_search()</span></code></a></li>
+</ul>
+</li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.stats.stats_op.KLMinimizerThreshold"><code class="docutils literal notranslate"><span class="pre">KLMinimizerThreshold</span></code></a><ul class="nav section-nav flex-column">
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.stats.stats_op.KLMinimizerThreshold.forward"><code class="docutils literal notranslate"><span class="pre">KLMinimizerThreshold.forward()</span></code></a></li>
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.stats.stats_op.KLMinimizerThreshold.smooth_normalize_distribution"><code class="docutils literal notranslate"><span class="pre">KLMinimizerThreshold.smooth_normalize_distribution()</span></code></a></li>
@@ -906,6 +1012,8 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.stats.stats_op.PercentileInterval.forward"><code class="docutils literal notranslate"><span class="pre">PercentileInterval.forward()</span></code></a></li>
 </ul>
 </li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.stats.stats_op.masked_median"><code class="docutils literal notranslate"><span class="pre">masked_median()</span></code></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#brevitas.core.stats.stats_op.shrink_lp_op"><code class="docutils literal notranslate"><span class="pre">shrink_lp_op()</span></code></a></li>
 </ul>
 </li>
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#module-brevitas.core.stats.stats_wrapper">brevitas.core.stats.stats_wrapper module</a></li>
diff --git a/docs/api_reference/brevitas.function.html b/docs/api_reference/brevitas.function.html
index 0a7b9e7ec..143a5e932 100644
--- a/docs/api_reference/brevitas.function.html
+++ b/docs/api_reference/brevitas.function.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>brevitas.function package &#8212; Brevitas 0.10.2 documentation</title>
+    <title>brevitas.function package &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -126,8 +126,8 @@
       
     
     
-    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
@@ -496,7 +496,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 A.Pappalardo, and J.Petri-Koenig. Note that this assumes integer quantization.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+<dd class="field-odd"><p><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 </dd>
 </dl>
 </dd></dl>
@@ -535,8 +535,8 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
-<li><p><strong>signed</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a>) – Indicates whether the represented integer is signed or not.</p></li>
-<li><p><strong>narrow_range</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a>) – Indicates whether to narrow the maximum unsigned value represented by 1.</p></li>
+<li><p><strong>signed</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a>) – Indicates whether the represented integer is signed or not.</p></li>
+<li><p><strong>narrow_range</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a>) – Indicates whether to narrow the maximum unsigned value represented by 1.</p></li>
 <li><p><strong>bit_width</strong> (<em>Tensor</em>) – Number of bits available for the representation.</p></li>
 </ul>
 </dd>
@@ -567,8 +567,8 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
-<li><p><strong>signed</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a>) – Indicates whether the represented integer is signed or not.</p></li>
-<li><p><strong>narrow_range</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a>) – Indicates whether to narrow the minimum value represented by 1.</p></li>
+<li><p><strong>signed</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a>) – Indicates whether the represented integer is signed or not.</p></li>
+<li><p><strong>narrow_range</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a>) – Indicates whether to narrow the minimum value represented by 1.</p></li>
 <li><p><strong>bit_width</strong> (<em>Tensor</em>) – Number of bits available for the representation.</p></li>
 </ul>
 </dd>
@@ -621,9 +621,9 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
-<li><p><strong>x</strong> (<a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>) – Input on which to apply the clamp operation</p></li>
-<li><p><strong>min_val</strong> (<a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>) – Minimum values for the clamp operation.</p></li>
-<li><p><strong>max_val</strong> (<a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>) – Maximum values for the clamp operation.</p></li>
+<li><p><strong>x</strong> (<code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>) – Input on which to apply the clamp operation</p></li>
+<li><p><strong>min_val</strong> (<code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>) – Minimum values for the clamp operation.</p></li>
+<li><p><strong>max_val</strong> (<code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>) – Maximum values for the clamp operation.</p></li>
 </ul>
 </dd>
 </dl>
@@ -633,7 +633,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <p>Differentiable w.r.t. x, min_val, max_val.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+<dd class="field-odd"><p><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
 <dd class="field-even"><p>Input <cite>x</cite> clamped between the provided minimum and maximum tensors.</p>
@@ -653,7 +653,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 Not differentiable wrt to any of the inputs.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+<dd class="field-odd"><p><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 </dd>
 </dl>
 </dd></dl>
@@ -670,11 +670,11 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py function">
 <dt class="sig sig-object py" id="brevitas.function.ops_ste.abs_binary_sign_grad">
 <span class="sig-prename descclassname"><span class="pre">brevitas.function.ops_ste.</span></span><span class="sig-name descname"><span class="pre">abs_binary_sign_grad</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/function/ops_ste.html#abs_binary_sign_grad"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.function.ops_ste.abs_binary_sign_grad" title="Permalink to this definition">#</a></dt>
-<dd><p>Function that implements <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.abs.html#torch.abs" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-func docutils literal notranslate"><span class="pre">torch.abs()</span></code></a> with a binary-sign backward, in order to
-have subgradient 1 in 0. Compare with <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.abs.html#torch.abs" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-func docutils literal notranslate"><span class="pre">torch.abs()</span></code></a>’ subgradient of 0 in 0.</p>
+<dd><p>Function that implements <code class="xref py py-func docutils literal notranslate"><span class="pre">torch.abs()</span></code> with a binary-sign backward, in order to
+have subgradient 1 in 0. Compare with <code class="xref py py-func docutils literal notranslate"><span class="pre">torch.abs()</span></code>’ subgradient of 0 in 0.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+<dd class="field-odd"><p><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 </dd>
 </dl>
 <p class="rubric">Notes</p>
@@ -701,7 +701,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 gradient estimator.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+<dd class="field-odd"><p><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 </dd>
 </dl>
 <p class="rubric">Notes</p>
@@ -724,10 +724,10 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py function">
 <dt class="sig sig-object py" id="brevitas.function.ops_ste.ceil_ste">
 <span class="sig-prename descclassname"><span class="pre">brevitas.function.ops_ste.</span></span><span class="sig-name descname"><span class="pre">ceil_ste</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/function/ops_ste.html#ceil_ste"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.function.ops_ste.ceil_ste" title="Permalink to this definition">#</a></dt>
-<dd><p>Function that implements <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.ceil.html#torch.ceil" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-func docutils literal notranslate"><span class="pre">torch.ceil()</span></code></a> with a straight-through gradient estimator.</p>
+<dd><p>Function that implements <code class="xref py py-func docutils literal notranslate"><span class="pre">torch.ceil()</span></code> with a straight-through gradient estimator.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+<dd class="field-odd"><p><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 </dd>
 </dl>
 <p class="rubric">Notes</p>
@@ -753,7 +753,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 gradient estimator.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+<dd class="field-odd"><p><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 </dd>
 </dl>
 <p class="rubric">Notes</p>
@@ -776,10 +776,10 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py function">
 <dt class="sig sig-object py" id="brevitas.function.ops_ste.floor_ste">
 <span class="sig-prename descclassname"><span class="pre">brevitas.function.ops_ste.</span></span><span class="sig-name descname"><span class="pre">floor_ste</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/function/ops_ste.html#floor_ste"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.function.ops_ste.floor_ste" title="Permalink to this definition">#</a></dt>
-<dd><p>Function that implements <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.floor.html#torch.floor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-func docutils literal notranslate"><span class="pre">torch.floor()</span></code></a> with a straight-through gradient estimator.</p>
+<dd><p>Function that implements <code class="xref py py-func docutils literal notranslate"><span class="pre">torch.floor()</span></code> with a straight-through gradient estimator.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+<dd class="field-odd"><p><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 </dd>
 </dl>
 <p class="rubric">Notes</p>
@@ -801,10 +801,10 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py function">
 <dt class="sig sig-object py" id="brevitas.function.ops_ste.round_ste">
 <span class="sig-prename descclassname"><span class="pre">brevitas.function.ops_ste.</span></span><span class="sig-name descname"><span class="pre">round_ste</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/function/ops_ste.html#round_ste"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.function.ops_ste.round_ste" title="Permalink to this definition">#</a></dt>
-<dd><p>Function that implements <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.round.html#torch.round" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-func docutils literal notranslate"><span class="pre">torch.round()</span></code></a> with a straight-through gradient estimator.</p>
+<dd><p>Function that implements <code class="xref py py-func docutils literal notranslate"><span class="pre">torch.round()</span></code> with a straight-through gradient estimator.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+<dd class="field-odd"><p><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 </dd>
 </dl>
 <p class="rubric">Notes</p>
@@ -830,7 +830,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 gradient estimator.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+<dd class="field-odd"><p><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 </dd>
 </dl>
 <p class="rubric">Notes</p>
@@ -859,8 +859,8 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
-<li><p><strong>x</strong> (<a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>) – input tensor to clamp.</p></li>
-<li><p><strong>min_val</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.12)"><code class="xref py py-class docutils literal notranslate"><span class="pre">float</span></code></a>) – scalar value to use as lower bound for the input tensor.</p></li>
+<li><p><strong>x</strong> (<code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>) – input tensor to clamp.</p></li>
+<li><p><strong>min_val</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.13)"><code class="xref py py-class docutils literal notranslate"><span class="pre">float</span></code></a>) – scalar value to use as lower bound for the input tensor.</p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
@@ -890,15 +890,15 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py function">
 <dt class="sig sig-object py" id="brevitas.function.ops_ste.scalar_clamp_ste">
 <span class="sig-prename descclassname"><span class="pre">brevitas.function.ops_ste.</span></span><span class="sig-name descname"><span class="pre">scalar_clamp_ste</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">min_val</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_val</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/function/ops_ste.html#scalar_clamp_ste"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.function.ops_ste.scalar_clamp_ste" title="Permalink to this definition">#</a></dt>
-<dd><p>Function that implements <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.clamp.html#torch.clamp" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-func docutils literal notranslate"><span class="pre">torch.clamp()</span></code></a> with a straight-through gradient estimator
+<dd><p>Function that implements <code class="xref py py-func docutils literal notranslate"><span class="pre">torch.clamp()</span></code> with a straight-through gradient estimator
 for the gradient of the output w.r.t. to <code class="docutils literal notranslate"><span class="pre">x</span></code>, while the gradient of <code class="docutils literal notranslate"><span class="pre">y</span></code> w.r.t. to <code class="docutils literal notranslate"><span class="pre">min_val</span></code>
 and <code class="docutils literal notranslate"><span class="pre">max_val</span></code> is always <code class="docutils literal notranslate"><span class="pre">None</span></code>.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
-<li><p><strong>x</strong> (<a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a>) – input tensor to clamp.</p></li>
-<li><p><strong>min_val</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.12)"><code class="xref py py-class docutils literal notranslate"><span class="pre">float</span></code></a>) – scalar value to use as lower bound for the input tensor.</p></li>
-<li><p><strong>max_val</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.12)"><code class="xref py py-class docutils literal notranslate"><span class="pre">float</span></code></a>) – scalar value to use as upper bound for the input tensor.</p></li>
+<li><p><strong>x</strong> (<code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>) – input tensor to clamp.</p></li>
+<li><p><strong>min_val</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.13)"><code class="xref py py-class docutils literal notranslate"><span class="pre">float</span></code></a>) – scalar value to use as lower bound for the input tensor.</p></li>
+<li><p><strong>max_val</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.13)"><code class="xref py py-class docutils literal notranslate"><span class="pre">float</span></code></a>) – scalar value to use as upper bound for the input tensor.</p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
@@ -933,7 +933,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 and max_val is always None.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+<dd class="field-odd"><p><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 </dd>
 </dl>
 <p class="rubric">Notes</p>
@@ -961,7 +961,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 and max_val is always None.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+<dd class="field-odd"><p><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 </dd>
 </dl>
 <p class="rubric">Notes</p>
@@ -985,10 +985,10 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py function">
 <dt class="sig sig-object py" id="brevitas.function.ops_ste.ternary_sign_ste">
 <span class="sig-prename descclassname"><span class="pre">brevitas.function.ops_ste.</span></span><span class="sig-name descname"><span class="pre">ternary_sign_ste</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/function/ops_ste.html#ternary_sign_ste"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.function.ops_ste.ternary_sign_ste" title="Permalink to this definition">#</a></dt>
-<dd><p>Function that implements <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.sign.html#torch.sign" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-func docutils literal notranslate"><span class="pre">torch.sign()</span></code></a> with a straight-through gradient estimator.</p>
+<dd><p>Function that implements <code class="xref py py-func docutils literal notranslate"><span class="pre">torch.sign()</span></code> with a straight-through gradient estimator.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><a class="reference external" href="https://pytorch.org/docs/master/tensors.html#torch.Tensor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></a></p>
+<dd class="field-odd"><p><code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code></p>
 </dd>
 </dl>
 <p class="rubric">Notes</p>
@@ -1043,7 +1043,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dd class="field-odd"><p><strong>x</strong> (<em>Tensor</em>) – Input tensor with batches at dimension 0.</p>
 </dd>
 <dt class="field-even">Return type<span class="colon">:</span></dt>
-<dd class="field-even"><p><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.12)"><code class="xref py py-data docutils literal notranslate"><span class="pre">Tuple</span></code></a>[<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><code class="xref py py-class docutils literal notranslate"><span class="pre">int</span></code></a>, <a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><code class="xref py py-class docutils literal notranslate"><span class="pre">int</span></code></a>]</p>
+<dd class="field-even"><p><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">Tuple</span></code></a>[<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><code class="xref py py-class docutils literal notranslate"><span class="pre">int</span></code></a>, <a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><code class="xref py py-class docutils literal notranslate"><span class="pre">int</span></code></a>]</p>
 </dd>
 <dt class="field-odd">Returns<span class="colon">:</span></dt>
 <dd class="field-odd"><p>A tuple containing the 2-dim shape.</p>
@@ -1065,7 +1065,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 x (Tensor): Input tensor with output channels at dimension 0.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.12)"><code class="xref py py-data docutils literal notranslate"><span class="pre">Tuple</span></code></a>[<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><code class="xref py py-class docutils literal notranslate"><span class="pre">int</span></code></a>, <a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><code class="xref py py-class docutils literal notranslate"><span class="pre">int</span></code></a>]</p>
+<dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">Tuple</span></code></a>[<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><code class="xref py py-class docutils literal notranslate"><span class="pre">int</span></code></a>, <a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><code class="xref py py-class docutils literal notranslate"><span class="pre">int</span></code></a>]</p>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
 <dd class="field-even"><p>A tuple containing the 2-dim shape.</p>
@@ -1107,7 +1107,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dd class="field-odd"><p><strong>x</strong> (<em>Tensor</em>) – Input tensor.</p>
 </dd>
 <dt class="field-even">Return type<span class="colon">:</span></dt>
-<dd class="field-even"><p><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><code class="xref py py-class docutils literal notranslate"><span class="pre">int</span></code></a></p>
+<dd class="field-even"><p><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><code class="xref py py-class docutils literal notranslate"><span class="pre">int</span></code></a></p>
 </dd>
 <dt class="field-odd">Returns<span class="colon">:</span></dt>
 <dd class="field-odd"><p>The number -1 corresponding to a flat shape.</p>
diff --git a/docs/api_reference/brevitas.ops.html b/docs/api_reference/brevitas.ops.html
index e7dbd7ca2..33ceb49f3 100644
--- a/docs/api_reference/brevitas.ops.html
+++ b/docs/api_reference/brevitas.ops.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>brevitas.ops package &#8212; Brevitas 0.10.2 documentation</title>
+    <title>brevitas.ops package &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -126,8 +126,8 @@
       
     
     
-    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
@@ -445,9 +445,9 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.ops.autograd_ste_ops.AbsBinarySignGradFn">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">AbsBinarySignGradFn</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/ops/autograd_ste_ops.html#AbsBinarySignGradFn"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.AbsBinarySignGradFn" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/autograd.html#torch.autograd.Function" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Function</span></code></a></p>
-<p>Autograd function that implements <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.abs.html#torch.abs" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-func docutils literal notranslate"><span class="pre">torch.abs()</span></code></a> with a binary-sign backward, in order to
-have subgradient 1 in 0. Compare with <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.abs.html#torch.abs" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-func docutils literal notranslate"><span class="pre">torch.abs()</span></code></a>’ subgradient of 0 in 0.</p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Function</span></code></p>
+<p>Autograd function that implements <code class="xref py py-func docutils literal notranslate"><span class="pre">torch.abs()</span></code> with a binary-sign backward, in order to
+have subgradient 1 in 0. Compare with <code class="xref py py-func docutils literal notranslate"><span class="pre">torch.abs()</span></code>’ subgradient of 0 in 0.</p>
 <p><code class="docutils literal notranslate"><span class="pre">AbsBinarySignGradFn.apply(*args)</span></code> is first aliased to <a class="reference internal" href="#brevitas.ops.autograd_ste_ops.abs_binary_sign_grad_impl" title="brevitas.ops.autograd_ste_ops.abs_binary_sign_grad_impl"><code class="xref py py-func docutils literal notranslate"><span class="pre">abs_binary_sign_grad(*args)</span></code></a> and then wrapped by
 <a class="reference internal" href="brevitas.function.html#brevitas.function.ops_ste.abs_binary_sign_grad" title="brevitas.function.ops_ste.abs_binary_sign_grad"><code class="xref py py-func docutils literal notranslate"><span class="pre">abs_binary_sign_grad()</span></code></a> when env <code class="docutils literal notranslate"><span class="pre">BREVITAS_JIT=0</span></code>.
 See <a class="reference internal" href="brevitas.function.html#brevitas.function.ops_ste.abs_binary_sign_grad" title="brevitas.function.ops_ste.abs_binary_sign_grad"><code class="xref py py-func docutils literal notranslate"><span class="pre">abs_binary_sign_grad()</span></code></a> for details on the interface and
@@ -457,7 +457,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.ops.autograd_ste_ops.BinarySignSteFn">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">BinarySignSteFn</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/ops/autograd_ste_ops.html#BinarySignSteFn"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.BinarySignSteFn" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/autograd.html#torch.autograd.Function" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Function</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Function</span></code></p>
 <p>Autograd function that implements <a class="reference internal" href="brevitas.function.html#brevitas.function.ops.binary_sign" title="brevitas.function.ops.binary_sign"><code class="xref py py-func docutils literal notranslate"><span class="pre">binary_sign()</span></code></a> with a
 straight-through gradient estimator.</p>
 <p><code class="docutils literal notranslate"><span class="pre">BinarySignSteFn.apply(*args)</span></code> is first aliased to
@@ -470,8 +470,8 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.ops.autograd_ste_ops.CeilSteFn">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">CeilSteFn</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/ops/autograd_ste_ops.html#CeilSteFn"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.CeilSteFn" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/autograd.html#torch.autograd.Function" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Function</span></code></a></p>
-<p>Autograd function that implements <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.ceil.html#torch.ceil" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-func docutils literal notranslate"><span class="pre">torch.ceil()</span></code></a> with a straight-through gradient estimator.</p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Function</span></code></p>
+<p>Autograd function that implements <code class="xref py py-func docutils literal notranslate"><span class="pre">torch.ceil()</span></code> with a straight-through gradient estimator.</p>
 <p><code class="docutils literal notranslate"><span class="pre">CeilSteFn.apply(*args)</span></code> is first aliased to <a class="reference internal" href="#brevitas.ops.autograd_ste_ops.ceil_ste_impl" title="brevitas.ops.autograd_ste_ops.ceil_ste_impl"><code class="xref py py-func docutils literal notranslate"><span class="pre">ceil_ste_impl(*args)</span></code></a> and then wrapped by
 <a class="reference internal" href="brevitas.function.html#brevitas.function.ops_ste.ceil_ste" title="brevitas.function.ops_ste.ceil_ste"><code class="xref py py-func docutils literal notranslate"><span class="pre">ceil_ste()</span></code></a> when env <code class="docutils literal notranslate"><span class="pre">BREVITAS_JIT=0</span></code>.
 See <a class="reference internal" href="brevitas.function.html#brevitas.function.ops_ste.ceil_ste" title="brevitas.function.ops_ste.ceil_ste"><code class="xref py py-func docutils literal notranslate"><span class="pre">ceil_ste()</span></code></a> for details on the interface and
@@ -481,7 +481,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.ops.autograd_ste_ops.DPURoundSteFn">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">DPURoundSteFn</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/ops/autograd_ste_ops.html#DPURoundSteFn"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.DPURoundSteFn" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/autograd.html#torch.autograd.Function" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Function</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Function</span></code></p>
 <p>Autograd function that implements <a class="reference internal" href="brevitas.function.html#brevitas.function.ops.dpu_round" title="brevitas.function.ops.dpu_round"><code class="xref py py-func docutils literal notranslate"><span class="pre">dpu_round()</span></code></a> with a
 straight-through gradient estimator.</p>
 <p><code class="docutils literal notranslate"><span class="pre">DPURoundSteFn.apply(*args)</span></code> is first aliased to <a class="reference internal" href="#brevitas.ops.autograd_ste_ops.dpu_round_ste_impl" title="brevitas.ops.autograd_ste_ops.dpu_round_ste_impl"><code class="xref py py-func docutils literal notranslate"><span class="pre">dpu_round_ste_impl(*args)</span></code></a> and then wrapped by
@@ -493,8 +493,8 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.ops.autograd_ste_ops.FloorSteFn">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">FloorSteFn</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/ops/autograd_ste_ops.html#FloorSteFn"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.FloorSteFn" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/autograd.html#torch.autograd.Function" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Function</span></code></a></p>
-<p>Autograd function that implements <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.floor.html#torch.floor" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-func docutils literal notranslate"><span class="pre">torch.floor()</span></code></a> with a straight-through gradient estimator.</p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Function</span></code></p>
+<p>Autograd function that implements <code class="xref py py-func docutils literal notranslate"><span class="pre">torch.floor()</span></code> with a straight-through gradient estimator.</p>
 <p><code class="docutils literal notranslate"><span class="pre">FloorSteFn.apply(*args)</span></code> is first aliased to <a class="reference internal" href="#brevitas.ops.autograd_ste_ops.floor_ste_impl" title="brevitas.ops.autograd_ste_ops.floor_ste_impl"><code class="xref py py-func docutils literal notranslate"><span class="pre">floor_ste_impl(*args)</span></code></a> and then wrapped by
 <a class="reference internal" href="brevitas.function.html#brevitas.function.ops_ste.floor_ste" title="brevitas.function.ops_ste.floor_ste"><code class="xref py py-func docutils literal notranslate"><span class="pre">floor_ste()</span></code></a> when env <code class="docutils literal notranslate"><span class="pre">BREVITAS_JIT=0</span></code>.
 See <a class="reference internal" href="brevitas.function.html#brevitas.function.ops_ste.floor_ste" title="brevitas.function.ops_ste.floor_ste"><code class="xref py py-func docutils literal notranslate"><span class="pre">floor_ste()</span></code></a> for details on the interface and
@@ -504,7 +504,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.ops.autograd_ste_ops.InplaceTensorClampSteFn">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">InplaceTensorClampSteFn</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/ops/autograd_ste_ops.html#InplaceTensorClampSteFn"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.InplaceTensorClampSteFn" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/autograd.html#torch.autograd.Function" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Function</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Function</span></code></p>
 <p>Autograd function that implements <a class="reference internal" href="brevitas.function.html#brevitas.function.ops.tensor_clamp_" title="brevitas.function.ops.tensor_clamp_"><code class="xref py py-func docutils literal notranslate"><span class="pre">tensor_clamp_()</span></code></a> with a
 straight-through gradient estimator for the gradient of y w.r.t. to x, while the gradient of y
 w.r.t. to min_val and max_val is always None.</p>
@@ -518,8 +518,8 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.ops.autograd_ste_ops.RoundSteFn">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">RoundSteFn</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/ops/autograd_ste_ops.html#RoundSteFn"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.RoundSteFn" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/autograd.html#torch.autograd.Function" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Function</span></code></a></p>
-<p>Autograd function that implements <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.round.html#torch.round" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-func docutils literal notranslate"><span class="pre">torch.round()</span></code></a> with a straight-through gradient
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Function</span></code></p>
+<p>Autograd function that implements <code class="xref py py-func docutils literal notranslate"><span class="pre">torch.round()</span></code> with a straight-through gradient
 estimator.</p>
 <p><code class="docutils literal notranslate"><span class="pre">RoundSteFn.apply(*args)</span></code> is first aliased to <a class="reference internal" href="#brevitas.ops.autograd_ste_ops.round_ste_impl" title="brevitas.ops.autograd_ste_ops.round_ste_impl"><code class="xref py py-func docutils literal notranslate"><span class="pre">round_ste_impl(*args)</span></code></a> and then wrapped by
 <a class="reference internal" href="brevitas.function.html#brevitas.function.ops_ste.round_ste" title="brevitas.function.ops_ste.round_ste"><code class="xref py py-func docutils literal notranslate"><span class="pre">round_ste()</span></code></a> when env <code class="docutils literal notranslate"><span class="pre">BREVITAS_JIT=0</span></code>.
@@ -529,7 +529,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.ops.autograd_ste_ops.RoundToZeroSteFn">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">RoundToZeroSteFn</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/ops/autograd_ste_ops.html#RoundToZeroSteFn"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.RoundToZeroSteFn" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/autograd.html#torch.autograd.Function" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Function</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Function</span></code></p>
 <p>Autograd function that implements <a class="reference internal" href="brevitas.function.html#brevitas.function.ops.round_to_zero" title="brevitas.function.ops.round_to_zero"><code class="xref py py-func docutils literal notranslate"><span class="pre">round_to_zero()</span></code></a> with a
 straight-through gradient estimator.</p>
 <p><code class="docutils literal notranslate"><span class="pre">RoundToZeroSteFn.apply(*args)</span></code> is first aliased to <a class="reference internal" href="#brevitas.ops.autograd_ste_ops.round_to_zero_ste_impl" title="brevitas.ops.autograd_ste_ops.round_to_zero_ste_impl"><code class="xref py py-func docutils literal notranslate"><span class="pre">round_to_zero_ste_impl(*args)</span></code></a> and then wrapped by
@@ -541,7 +541,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.ops.autograd_ste_ops.ScalarClampMinSteFn">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">ScalarClampMinSteFn</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/ops/autograd_ste_ops.html#ScalarClampMinSteFn"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.ScalarClampMinSteFn" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/autograd.html#torch.autograd.Function" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Function</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Function</span></code></p>
 <p>Autograd function that implements <code class="docutils literal notranslate"><span class="pre">torch.clamp_min</span></code> with a straight-through gradient estimator
 for the gradient of y w.r.t. to x, while the gradient of y w.r.t. to <code class="docutils literal notranslate"><span class="pre">min_val</span></code> is always
 <code class="docutils literal notranslate"><span class="pre">None</span></code>.</p>
@@ -554,7 +554,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.ops.autograd_ste_ops.ScalarClampSteFn">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">ScalarClampSteFn</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/ops/autograd_ste_ops.html#ScalarClampSteFn"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.ScalarClampSteFn" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/autograd.html#torch.autograd.Function" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Function</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Function</span></code></p>
 <p>Autograd function that implements <code class="docutils literal notranslate"><span class="pre">torch.clamp</span></code> with a straight-through gradient estimator
 for the gradient of y w.r.t. to x, while the gradient of y w.r.t. to <code class="docutils literal notranslate"><span class="pre">min_val</span></code> and <code class="docutils literal notranslate"><span class="pre">min_val</span></code>
 are always <code class="docutils literal notranslate"><span class="pre">None</span></code>.</p>
@@ -567,7 +567,7 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.ops.autograd_ste_ops.TensorClampSteFn">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">TensorClampSteFn</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/ops/autograd_ste_ops.html#TensorClampSteFn"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.TensorClampSteFn" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/autograd.html#torch.autograd.Function" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Function</span></code></a></p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Function</span></code></p>
 <p>Autograd function that implements <a class="reference internal" href="brevitas.function.html#brevitas.function.ops.tensor_clamp" title="brevitas.function.ops.tensor_clamp"><code class="xref py py-func docutils literal notranslate"><span class="pre">tensor_clamp()</span></code></a> with a
 straight-through gradient estimator for the gradient of y w.r.t. to x, while the gradient of y
 w.r.t. to min_val and max_val is always None.</p>
@@ -580,8 +580,8 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 <dl class="py class">
 <dt class="sig sig-object py" id="brevitas.ops.autograd_ste_ops.TernarySignSteFn">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">TernarySignSteFn</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/brevitas/ops/autograd_ste_ops.html#TernarySignSteFn"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.TernarySignSteFn" title="Permalink to this definition">#</a></dt>
-<dd><p>Bases: <a class="reference external" href="https://pytorch.org/docs/master/autograd.html#torch.autograd.Function" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-class docutils literal notranslate"><span class="pre">Function</span></code></a></p>
-<p>Autograd function that implements <a class="reference external" href="https://pytorch.org/docs/master/generated/torch.sign.html#torch.sign" title="(in PyTorch vmaster (2.1.0a0+gitbe0b12e ))"><code class="xref py py-func docutils literal notranslate"><span class="pre">torch.sign()</span></code></a> with a straight-through gradient estimator.</p>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Function</span></code></p>
+<p>Autograd function that implements <code class="xref py py-func docutils literal notranslate"><span class="pre">torch.sign()</span></code> with a straight-through gradient estimator.</p>
 <p><code class="docutils literal notranslate"><span class="pre">TernarySignSteFn.apply(*args)</span></code> is first aliased to <a class="reference internal" href="#brevitas.ops.autograd_ste_ops.ternary_sign_ste_impl" title="brevitas.ops.autograd_ste_ops.ternary_sign_ste_impl"><code class="xref py py-func docutils literal notranslate"><span class="pre">ternary_sign_ste_impl(*args)</span></code></a> and then wrapped by
 <a class="reference internal" href="brevitas.function.html#brevitas.function.ops_ste.ternary_sign_ste" title="brevitas.function.ops_ste.ternary_sign_ste"><code class="xref py py-func docutils literal notranslate"><span class="pre">ternary_sign_ste()</span></code></a> when env <code class="docutils literal notranslate"><span class="pre">BREVITAS_JIT=0</span></code>.
 See <a class="reference internal" href="brevitas.function.html#brevitas.function.ops_ste.ternary_sign_ste" title="brevitas.function.ops_ste.ternary_sign_ste"><code class="xref py py-func docutils literal notranslate"><span class="pre">ternary_sign_ste()</span></code></a> for details on the interface and
@@ -590,67 +590,67 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
 
 <dl class="py function">
 <dt class="sig sig-object py" id="brevitas.ops.autograd_ste_ops.abs_binary_sign_grad_impl">
-<span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">abs_binary_sign_grad_impl</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.abs_binary_sign_grad_impl" title="Permalink to this definition">#</a></dt>
+<span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">abs_binary_sign_grad_impl</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.abs_binary_sign_grad_impl" title="Permalink to this definition">#</a></dt>
 <dd><p>Alias for <a class="reference internal" href="#brevitas.ops.autograd_ste_ops.AbsBinarySignGradFn" title="brevitas.ops.autograd_ste_ops.AbsBinarySignGradFn"><code class="xref py py-class docutils literal notranslate"><span class="pre">AbsBinarySignGradFn.apply(*args)</span></code></a></p>
 </dd></dl>
 
 <dl class="py function">
 <dt class="sig sig-object py" id="brevitas.ops.autograd_ste_ops.binary_sign_ste_impl">
-<span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">binary_sign_ste_impl</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.binary_sign_ste_impl" title="Permalink to this definition">#</a></dt>
+<span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">binary_sign_ste_impl</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.binary_sign_ste_impl" title="Permalink to this definition">#</a></dt>
 <dd><p>Alias for <a class="reference internal" href="#brevitas.ops.autograd_ste_ops.BinarySignSteFn" title="brevitas.ops.autograd_ste_ops.BinarySignSteFn"><code class="xref py py-class docutils literal notranslate"><span class="pre">BinarySignSteFn.apply(*args)</span></code></a></p>
 </dd></dl>
 
 <dl class="py function">
 <dt class="sig sig-object py" id="brevitas.ops.autograd_ste_ops.ceil_ste_impl">
-<span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">ceil_ste_impl</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.ceil_ste_impl" title="Permalink to this definition">#</a></dt>
+<span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">ceil_ste_impl</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.ceil_ste_impl" title="Permalink to this definition">#</a></dt>
 <dd><p>Alias for <a class="reference internal" href="#brevitas.ops.autograd_ste_ops.CeilSteFn" title="brevitas.ops.autograd_ste_ops.CeilSteFn"><code class="xref py py-class docutils literal notranslate"><span class="pre">CeilSteFn.apply(*args)</span></code></a></p>
 </dd></dl>
 
 <dl class="py function">
 <dt class="sig sig-object py" id="brevitas.ops.autograd_ste_ops.dpu_round_ste_impl">
-<span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">dpu_round_ste_impl</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.dpu_round_ste_impl" title="Permalink to this definition">#</a></dt>
+<span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">dpu_round_ste_impl</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.dpu_round_ste_impl" title="Permalink to this definition">#</a></dt>
 <dd><p>Alias for <a class="reference internal" href="#brevitas.ops.autograd_ste_ops.DPURoundSteFn" title="brevitas.ops.autograd_ste_ops.DPURoundSteFn"><code class="xref py py-class docutils literal notranslate"><span class="pre">DPURoundSteFn.apply(*args)</span></code></a></p>
 </dd></dl>
 
 <dl class="py function">
 <dt class="sig sig-object py" id="brevitas.ops.autograd_ste_ops.floor_ste_impl">
-<span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">floor_ste_impl</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.floor_ste_impl" title="Permalink to this definition">#</a></dt>
+<span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">floor_ste_impl</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.floor_ste_impl" title="Permalink to this definition">#</a></dt>
 <dd><p>Alias for <a class="reference internal" href="#brevitas.ops.autograd_ste_ops.FloorSteFn" title="brevitas.ops.autograd_ste_ops.FloorSteFn"><code class="xref py py-class docutils literal notranslate"><span class="pre">FloorSteFn.apply(*args)</span></code></a></p>
 </dd></dl>
 
 <dl class="py function">
 <dt class="sig sig-object py" id="brevitas.ops.autograd_ste_ops.round_ste_impl">
-<span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">round_ste_impl</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.round_ste_impl" title="Permalink to this definition">#</a></dt>
+<span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">round_ste_impl</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.round_ste_impl" title="Permalink to this definition">#</a></dt>
 <dd><p>Alias for <a class="reference internal" href="#brevitas.ops.autograd_ste_ops.RoundSteFn" title="brevitas.ops.autograd_ste_ops.RoundSteFn"><code class="xref py py-class docutils literal notranslate"><span class="pre">RoundSteFn.apply(*args)</span></code></a></p>
 </dd></dl>
 
 <dl class="py function">
 <dt class="sig sig-object py" id="brevitas.ops.autograd_ste_ops.round_to_zero_ste_impl">
-<span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">round_to_zero_ste_impl</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.round_to_zero_ste_impl" title="Permalink to this definition">#</a></dt>
+<span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">round_to_zero_ste_impl</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.round_to_zero_ste_impl" title="Permalink to this definition">#</a></dt>
 <dd><p>Alias for <a class="reference internal" href="#brevitas.ops.autograd_ste_ops.RoundToZeroSteFn" title="brevitas.ops.autograd_ste_ops.RoundToZeroSteFn"><code class="xref py py-class docutils literal notranslate"><span class="pre">RoundToZeroSteFn.apply(*args)</span></code></a></p>
 </dd></dl>
 
 <dl class="py function">
 <dt class="sig sig-object py" id="brevitas.ops.autograd_ste_ops.scalar_clamp_min_ste_impl">
-<span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">scalar_clamp_min_ste_impl</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.scalar_clamp_min_ste_impl" title="Permalink to this definition">#</a></dt>
+<span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">scalar_clamp_min_ste_impl</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.scalar_clamp_min_ste_impl" title="Permalink to this definition">#</a></dt>
 <dd><p>Alias for <a class="reference internal" href="#brevitas.ops.autograd_ste_ops.ScalarClampMinSteFn" title="brevitas.ops.autograd_ste_ops.ScalarClampMinSteFn"><code class="xref py py-class docutils literal notranslate"><span class="pre">ScalarClampMinSteFn.apply(*args)</span></code></a></p>
 </dd></dl>
 
 <dl class="py function">
 <dt class="sig sig-object py" id="brevitas.ops.autograd_ste_ops.scalar_clamp_ste_impl">
-<span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">scalar_clamp_ste_impl</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.scalar_clamp_ste_impl" title="Permalink to this definition">#</a></dt>
+<span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">scalar_clamp_ste_impl</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.scalar_clamp_ste_impl" title="Permalink to this definition">#</a></dt>
 <dd><p>Alias for <a class="reference internal" href="#brevitas.ops.autograd_ste_ops.ScalarClampSteFn" title="brevitas.ops.autograd_ste_ops.ScalarClampSteFn"><code class="xref py py-class docutils literal notranslate"><span class="pre">ScalarClampSteFn.apply(*args)</span></code></a></p>
 </dd></dl>
 
 <dl class="py function">
 <dt class="sig sig-object py" id="brevitas.ops.autograd_ste_ops.tensor_clamp_ste_impl">
-<span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">tensor_clamp_ste_impl</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.tensor_clamp_ste_impl" title="Permalink to this definition">#</a></dt>
+<span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">tensor_clamp_ste_impl</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.tensor_clamp_ste_impl" title="Permalink to this definition">#</a></dt>
 <dd><p>Alias for <a class="reference internal" href="#brevitas.ops.autograd_ste_ops.TensorClampSteFn" title="brevitas.ops.autograd_ste_ops.TensorClampSteFn"><code class="xref py py-class docutils literal notranslate"><span class="pre">TensorClampSteFn.apply(*args)</span></code></a></p>
 </dd></dl>
 
 <dl class="py function">
 <dt class="sig sig-object py" id="brevitas.ops.autograd_ste_ops.ternary_sign_ste_impl">
-<span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">ternary_sign_ste_impl</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.ternary_sign_ste_impl" title="Permalink to this definition">#</a></dt>
+<span class="sig-prename descclassname"><span class="pre">brevitas.ops.autograd_ste_ops.</span></span><span class="sig-name descname"><span class="pre">ternary_sign_ste_impl</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#brevitas.ops.autograd_ste_ops.ternary_sign_ste_impl" title="Permalink to this definition">#</a></dt>
 <dd><p>Alias for <a class="reference internal" href="#brevitas.ops.autograd_ste_ops.TernarySignSteFn" title="brevitas.ops.autograd_ste_ops.TernarySignSteFn"><code class="xref py py-class docutils literal notranslate"><span class="pre">TernarySignSteFn.apply(*args)</span></code></a></p>
 </dd></dl>
 
diff --git a/docs/api_reference/index.html b/docs/api_reference/index.html
index d7a42f0f1..d4b7ecd58 100644
--- a/docs/api_reference/index.html
+++ b/docs/api_reference/index.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>API reference &#8212; Brevitas 0.10.2 documentation</title>
+    <title>API reference &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -126,8 +126,8 @@
       
     
     
-    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/architecture.html b/docs/architecture.html
index 452d6b035..ab4853118 100644
--- a/docs/architecture.html
+++ b/docs/architecture.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>Architecture &#8212; Brevitas 0.10.2 documentation</title>
+    <title>Architecture &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -126,8 +126,8 @@
       
     
     
-    <img src="_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/faq.html b/docs/faq.html
index afd6d724c..0f2006b35 100644
--- a/docs/faq.html
+++ b/docs/faq.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>F.A.Q. &#8212; Brevitas 0.10.2 documentation</title>
+    <title>F.A.Q. &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -126,8 +126,8 @@
       
     
     
-    <img src="_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/genindex.html b/docs/genindex.html
index ac066b5e1..9e835720a 100644
--- a/docs/genindex.html
+++ b/docs/genindex.html
@@ -8,7 +8,7 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Index &#8212; Brevitas 0.10.2 documentation</title>
+    <title>Index &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -123,8 +123,8 @@
       
     
     
-    <img src="_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
@@ -389,6 +389,7 @@ <h1 id="index">Index</h1>
  | <a href="#E"><strong>E</strong></a>
  | <a href="#F"><strong>F</strong></a>
  | <a href="#G"><strong>G</strong></a>
+ | <a href="#H"><strong>H</strong></a>
  | <a href="#I"><strong>I</strong></a>
  | <a href="#K"><strong>K</strong></a>
  | <a href="#L"><strong>L</strong></a>
@@ -671,14 +672,24 @@ <h2 id="C">C</h2>
 </li>
       <li><a href="api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.ops_ste.CeilSte">CeilSte (class in brevitas.core.function_wrapper.ops_ste)</a>
 </li>
-  </ul></td>
-  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="api_reference/brevitas.ops.html#brevitas.ops.autograd_ste_ops.CeilSteFn">CeilSteFn (class in brevitas.ops.autograd_ste_ops)</a>
 </li>
       <li><a href="api_reference/brevitas.core.quant.html#brevitas.core.quant.binary.ClampedBinaryQuant">ClampedBinaryQuant (class in brevitas.core.quant.binary)</a>
 </li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.clamp.ClampMin">ClampMin (class in brevitas.core.function_wrapper.clamp)</a>
 </li>
+      <li><a href="api_reference/brevitas.core.html#brevitas.core.restrict_val.FloatRestrictValue.combine_scale_threshold">combine_scale_threshold() (brevitas.core.restrict_val.FloatRestrictValue method)</a>
+
+      <ul>
+        <li><a href="api_reference/brevitas.core.html#brevitas.core.restrict_val.IntRestrictValue.combine_scale_threshold">(brevitas.core.restrict_val.IntRestrictValue method)</a>
+</li>
+        <li><a href="api_reference/brevitas.core.html#brevitas.core.restrict_val.LogFloatRestrictValue.combine_scale_threshold">(brevitas.core.restrict_val.LogFloatRestrictValue method)</a>
+</li>
+        <li><a href="api_reference/brevitas.core.html#brevitas.core.restrict_val.PowerOfTwoRestrictValue.combine_scale_threshold">(brevitas.core.restrict_val.PowerOfTwoRestrictValue method)</a>
+</li>
+      </ul></li>
       <li><a href="api_reference/brevitas.core.scaling.html#brevitas.core.scaling.standalone.ConstScaling">ConstScaling (class in brevitas.core.scaling.standalone)</a>
 </li>
   </ul></td>
@@ -695,10 +706,10 @@ <h2 id="D">D</h2>
 </li>
       <li><a href="api_reference/brevitas.core.quant.html#brevitas.core.quant.delay.DelayWrapper">DelayWrapper (class in brevitas.core.quant.delay)</a>
 </li>
-  </ul></td>
-  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="api_reference/brevitas.function.html#brevitas.function.ops.dpu_round">dpu_round() (in module brevitas.function.ops)</a>
 </li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="api_reference/brevitas.function.html#brevitas.function.ops_ste.dpu_round_ste">dpu_round_ste() (in module brevitas.function.ops_ste)</a>
 </li>
       <li><a href="api_reference/brevitas.ops.html#brevitas.ops.autograd_ste_ops.dpu_round_ste_impl">dpu_round_ste_impl() (in module brevitas.ops.autograd_ste_ops)</a>
@@ -706,6 +717,10 @@ <h2 id="D">D</h2>
       <li><a href="api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.ops_ste.DPURoundSte">DPURoundSte (class in brevitas.core.function_wrapper.ops_ste)</a>
 </li>
       <li><a href="api_reference/brevitas.ops.html#brevitas.ops.autograd_ste_ops.DPURoundSteFn">DPURoundSteFn (class in brevitas.ops.autograd_ste_ops)</a>
+</li>
+      <li><a href="api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.DYNAMIC_OVER_SUBCHANNEL_BLOCK">DYNAMIC_OVER_SUBCHANNEL_BLOCK (brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl attribute)</a>
+</li>
+      <li><a href="api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.DynamicOverSubChannelBlockView">DynamicOverSubChannelBlockView (class in brevitas.core.function_wrapper.shape)</a>
 </li>
   </ul></td>
 </tr></table>
@@ -725,6 +740,8 @@ <h2 id="E">E</h2>
 <h2 id="F">F</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
   <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.clamp.FloatClamp">FloatClamp (class in brevitas.core.function_wrapper.clamp)</a>
+</li>
       <li><a href="api_reference/brevitas.core.html#brevitas.core.restrict_val.FloatRestrictValue">FloatRestrictValue (class in brevitas.core.restrict_val)</a>
 </li>
       <li><a href="api_reference/brevitas.function.html#brevitas.function.ops_ste.floor_ste">floor_ste() (in module brevitas.function.ops_ste)</a>
@@ -747,6 +764,8 @@ <h2 id="F">F</h2>
         <li><a href="api_reference/brevitas.core.bit_width.html#brevitas.core.bit_width.parameter.RemoveBitwidthParameter.forward">(brevitas.core.bit_width.parameter.RemoveBitwidthParameter method)</a>
 </li>
         <li><a href="api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.clamp.ClampMin.forward">(brevitas.core.function_wrapper.clamp.ClampMin method)</a>
+</li>
+        <li><a href="api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.clamp.FloatClamp.forward">(brevitas.core.function_wrapper.clamp.FloatClamp method)</a>
 </li>
         <li><a href="api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.clamp.ScalarClamp.forward">(brevitas.core.function_wrapper.clamp.ScalarClamp method)</a>
 </li>
@@ -775,6 +794,8 @@ <h2 id="F">F</h2>
         <li><a href="api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.ops_ste.ScalarClampMinSte.forward">(brevitas.core.function_wrapper.ops_ste.ScalarClampMinSte method)</a>
 </li>
         <li><a href="api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.ops_ste.TensorClampSte.forward">(brevitas.core.function_wrapper.ops_ste.TensorClampSte method)</a>
+</li>
+        <li><a href="api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.DynamicOverSubChannelBlockView.forward">(brevitas.core.function_wrapper.shape.DynamicOverSubChannelBlockView method)</a>
 </li>
         <li><a href="api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.OverBatchOverOutputChannelView.forward">(brevitas.core.function_wrapper.shape.OverBatchOverOutputChannelView method)</a>
 </li>
@@ -783,6 +804,8 @@ <h2 id="F">F</h2>
         <li><a href="api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.OverOutputChannelView.forward">(brevitas.core.function_wrapper.shape.OverOutputChannelView method)</a>
 </li>
         <li><a href="api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.OverOutputFeaturesView.forward">(brevitas.core.function_wrapper.shape.OverOutputFeaturesView method)</a>
+</li>
+        <li><a href="api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.OverSubChannelBlockView.forward">(brevitas.core.function_wrapper.shape.OverSubChannelBlockView method)</a>
 </li>
         <li><a href="api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.OverTensorView.forward">(brevitas.core.function_wrapper.shape.OverTensorView method)</a>
 </li>
@@ -823,6 +846,8 @@ <h2 id="F">F</h2>
         <li><a href="api_reference/brevitas.core.scaling.html#brevitas.core.scaling.int_scaling.IntScaling.forward">(brevitas.core.scaling.int_scaling.IntScaling method)</a>
 </li>
         <li><a href="api_reference/brevitas.core.scaling.html#brevitas.core.scaling.int_scaling.PowerOfTwoIntScaling.forward">(brevitas.core.scaling.int_scaling.PowerOfTwoIntScaling method)</a>
+</li>
+        <li><a href="api_reference/brevitas.core.scaling.html#brevitas.core.scaling.runtime.RuntimeDynamicGroupStatsScaling.forward">(brevitas.core.scaling.runtime.RuntimeDynamicGroupStatsScaling method)</a>
 </li>
         <li><a href="api_reference/brevitas.core.scaling.html#brevitas.core.scaling.runtime.RuntimeStatsScaling.forward">(brevitas.core.scaling.runtime.RuntimeStatsScaling method)</a>
 </li>
@@ -847,6 +872,10 @@ <h2 id="F">F</h2>
         <li><a href="api_reference/brevitas.core.stats.html#brevitas.core.stats.stats_op.AbsMinMax.forward">(brevitas.core.stats.stats_op.AbsMinMax method)</a>
 </li>
         <li><a href="api_reference/brevitas.core.stats.html#brevitas.core.stats.stats_op.AbsPercentile.forward">(brevitas.core.stats.stats_op.AbsPercentile method)</a>
+</li>
+        <li><a href="api_reference/brevitas.core.stats.html#brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale.forward">(brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale method)</a>
+</li>
+        <li><a href="api_reference/brevitas.core.stats.html#brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint.forward">(brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint method)</a>
 </li>
         <li><a href="api_reference/brevitas.core.stats.html#brevitas.core.stats.stats_op.KLMinimizerThreshold.forward">(brevitas.core.stats.stats_op.KLMinimizerThreshold method)</a>
 </li>
@@ -902,12 +931,26 @@ <h2 id="G">G</h2>
   </ul></td>
 </tr></table>
 
+<h2 id="H">H</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="api_reference/brevitas.core.stats.html#brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale">HalfQuadraticOptimizerScale (class in brevitas.core.stats.stats_op)</a>
+</li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="api_reference/brevitas.core.stats.html#brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint">HalfQuadraticOptimizerZeroPoint (class in brevitas.core.stats.stats_op)</a>
+</li>
+  </ul></td>
+</tr></table>
+
 <h2 id="I">I</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
   <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.misc.Identity">Identity (class in brevitas.core.function_wrapper.misc)</a>
 </li>
       <li><a href="api_reference/brevitas.function.html#brevitas.function.ops.identity">identity() (in module brevitas.function.ops)</a>
+</li>
+      <li><a href="api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.clamp.FloatClamp.inf_nan_clamp">inf_nan_clamp() (brevitas.core.function_wrapper.clamp.FloatClamp method)</a>
 </li>
       <li><a href="api_reference/brevitas.core.html#brevitas.core.utils.inplace_momentum_update">inplace_momentum_update() (in module brevitas.core.utils)</a>
 </li>
@@ -959,6 +1002,8 @@ <h2 id="L">L</h2>
 <h2 id="M">M</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
   <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="api_reference/brevitas.core.stats.html#brevitas.core.stats.stats_op.masked_median">masked_median() (in module brevitas.core.stats.stats_op)</a>
+</li>
       <li><a href="api_reference/brevitas.function.html#brevitas.function.ops.max_float">max_float() (in module brevitas.function.ops)</a>
 </li>
       <li><a href="api_reference/brevitas.core.quant.html#brevitas.core.quant.int_base.DecoupledIntQuant.max_int">max_int() (brevitas.core.quant.int_base.DecoupledIntQuant method)</a>
@@ -1078,6 +1123,12 @@ <h2 id="N">N</h2>
 <h2 id="O">O</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
   <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="api_reference/brevitas.core.stats.html#brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale.optimize">optimize() (brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale method)</a>
+
+      <ul>
+        <li><a href="api_reference/brevitas.core.stats.html#brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint.optimize">(brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint method)</a>
+</li>
+      </ul></li>
       <li><a href="api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_BATCH_OVER_OUTPUT_CHANNELS">OVER_BATCH_OVER_OUTPUT_CHANNELS (brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl attribute)</a>
 </li>
       <li><a href="api_reference/brevitas.function.html#brevitas.function.shape.over_batch_over_output_channels">over_batch_over_output_channels() (in module brevitas.function.shape)</a>
@@ -1095,6 +1146,8 @@ <h2 id="O">O</h2>
   </ul></td>
   <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="api_reference/brevitas.function.html#brevitas.function.shape.over_output_features">over_output_features() (in module brevitas.function.shape)</a>
+</li>
+      <li><a href="api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_SUBCHANNEL_BLOCK">OVER_SUBCHANNEL_BLOCK (brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl attribute)</a>
 </li>
       <li><a href="api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_TENSOR">OVER_TENSOR (brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl attribute)</a>
 </li>
@@ -1107,6 +1160,8 @@ <h2 id="O">O</h2>
       <li><a href="api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.OverOutputChannelView">OverOutputChannelView (class in brevitas.core.function_wrapper.shape)</a>
 </li>
       <li><a href="api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.OverOutputFeaturesView">OverOutputFeaturesView (class in brevitas.core.function_wrapper.shape)</a>
+</li>
+      <li><a href="api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.OverSubChannelBlockView">OverSubChannelBlockView (class in brevitas.core.function_wrapper.shape)</a>
 </li>
       <li><a href="api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.OverTensorView">OverTensorView (class in brevitas.core.function_wrapper.shape)</a>
 </li>
@@ -1116,6 +1171,12 @@ <h2 id="O">O</h2>
 <h2 id="P">P</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
   <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="api_reference/brevitas.core.stats.html#brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale.parameter_search">parameter_search() (brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale method)</a>
+
+      <ul>
+        <li><a href="api_reference/brevitas.core.stats.html#brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint.parameter_search">(brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint method)</a>
+</li>
+      </ul></li>
       <li><a href="api_reference/brevitas.core.scaling.html#brevitas.core.scaling.standalone.ParameterFromRuntimeStatsScaling">ParameterFromRuntimeStatsScaling (class in brevitas.core.scaling.standalone)</a>
 </li>
       <li><a href="api_reference/brevitas.core.html#brevitas.core.zero_point.ParameterFromRuntimeZeroPoint">ParameterFromRuntimeZeroPoint (class in brevitas.core.zero_point)</a>
@@ -1127,11 +1188,11 @@ <h2 id="P">P</h2>
       <li><a href="api_reference/brevitas.core.scaling.html#brevitas.core.scaling.standalone.ParameterScaling">ParameterScaling (class in brevitas.core.scaling.standalone)</a>
 </li>
       <li><a href="api_reference/brevitas.core.html#brevitas.core.utils.ParameterWrapper">ParameterWrapper (class in brevitas.core.utils)</a>
-</li>
-      <li><a href="api_reference/brevitas.core.html#brevitas.core.zero_point.ParameterZeroPoint">ParameterZeroPoint (class in brevitas.core.zero_point)</a>
 </li>
   </ul></td>
   <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="api_reference/brevitas.core.html#brevitas.core.zero_point.ParameterZeroPoint">ParameterZeroPoint (class in brevitas.core.zero_point)</a>
+</li>
       <li><a href="api_reference/brevitas.core.stats.html#brevitas.core.stats.stats_op.PercentileInterval">PercentileInterval (class in brevitas.core.stats.stats_op)</a>
 </li>
       <li><a href="api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.shape.PermuteDims">PermuteDims (class in brevitas.core.function_wrapper.shape)</a>
@@ -1217,6 +1278,8 @@ <h2 id="R">R</h2>
       <li><a href="api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.ops_ste.RoundToZeroSte">RoundToZeroSte (class in brevitas.core.function_wrapper.ops_ste)</a>
 </li>
       <li><a href="api_reference/brevitas.ops.html#brevitas.ops.autograd_ste_ops.RoundToZeroSteFn">RoundToZeroSteFn (class in brevitas.ops.autograd_ste_ops)</a>
+</li>
+      <li><a href="api_reference/brevitas.core.scaling.html#brevitas.core.scaling.runtime.RuntimeDynamicGroupStatsScaling">RuntimeDynamicGroupStatsScaling (class in brevitas.core.scaling.runtime)</a>
 </li>
       <li><a href="api_reference/brevitas.core.scaling.html#brevitas.core.scaling.runtime.RuntimeStatsScaling">RuntimeStatsScaling (class in brevitas.core.scaling.runtime)</a>
 </li>
@@ -1226,6 +1289,8 @@ <h2 id="R">R</h2>
 <h2 id="S">S</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
   <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="api_reference/brevitas.core.function_wrapper.html#brevitas.core.function_wrapper.clamp.FloatClamp.saturating_clamp">saturating_clamp() (brevitas.core.function_wrapper.clamp.FloatClamp method)</a>
+</li>
       <li><a href="api_reference/brevitas.function.html#brevitas.function.ops_ste.scalar_clamp_min_ste">scalar_clamp_min_ste() (in module brevitas.function.ops_ste)</a>
 </li>
       <li><a href="api_reference/brevitas.ops.html#brevitas.ops.autograd_ste_ops.scalar_clamp_min_ste_impl">scalar_clamp_min_ste_impl() (in module brevitas.ops.autograd_ste_ops)</a>
@@ -1242,12 +1307,14 @@ <h2 id="S">S</h2>
 </li>
       <li><a href="api_reference/brevitas.ops.html#brevitas.ops.autograd_ste_ops.ScalarClampSteFn">ScalarClampSteFn (class in brevitas.ops.autograd_ste_ops)</a>
 </li>
-      <li><a href="api_reference/brevitas.core.html#brevitas.core.utils.SingleArgStatelessBuffer">SingleArgStatelessBuffer (class in brevitas.core.utils)</a>
+      <li><a href="api_reference/brevitas.core.stats.html#brevitas.core.stats.stats_op.shrink_lp_op">shrink_lp_op() (in module brevitas.core.stats.stats_op)</a>
 </li>
-      <li><a href="api_reference/brevitas.core.html#brevitas.core.utils.SliceTensor">SliceTensor (class in brevitas.core.utils)</a>
+      <li><a href="api_reference/brevitas.core.html#brevitas.core.utils.SingleArgStatelessBuffer">SingleArgStatelessBuffer (class in brevitas.core.utils)</a>
 </li>
   </ul></td>
   <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="api_reference/brevitas.core.html#brevitas.core.utils.SliceTensor">SliceTensor (class in brevitas.core.utils)</a>
+</li>
       <li><a href="api_reference/brevitas.core.stats.html#brevitas.core.stats.stats_op.KLMinimizerThreshold.smooth_normalize_distribution">smooth_normalize_distribution() (brevitas.core.stats.stats_op.KLMinimizerThreshold method)</a>
 </li>
       <li><a href="api_reference/brevitas.core.scaling.html#brevitas.core.scaling.standalone.ParameterFromRuntimeStatsScaling.state_dict">state_dict() (brevitas.core.scaling.standalone.ParameterFromRuntimeStatsScaling method)</a>
diff --git a/docs/getting_started.html b/docs/getting_started.html
index 038c85b06..5818bbc7e 100644
--- a/docs/getting_started.html
+++ b/docs/getting_started.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>Getting started &#8212; Brevitas 0.10.2 documentation</title>
+    <title>Getting started &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -126,8 +126,8 @@
       
     
     
-    <img src="_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
@@ -580,11 +580,13 @@ <h3>Weights, activations, biases quantization<a class="headerlink" href="#weight
 <span class="c1"># ... training ...</span>
 </pre></div>
 </div>
-<p>Compared to the previous scenario:
-- We now set <code class="docutils literal notranslate"><span class="pre">return_quant_tensor=True</span></code> in every quantized activations to propagate a <code class="docutils literal notranslate"><span class="pre">QuantTensor</span></code> to the next layer. This informs each <code class="docutils literal notranslate"><span class="pre">QuantLinear</span></code> or <code class="docutils literal notranslate"><span class="pre">QuantConv2d</span></code> of how the input passed in has been quantized.
-- A <code class="docutils literal notranslate"><span class="pre">QuantTensor</span></code> is just a tensor-like data structure providing metadata about how a tensor has been quantized, similar to a <cite>torch.qint</cite> dtype, but training friendly. Setting <code class="docutils literal notranslate"><span class="pre">return_quant_tensor=True</span></code> does not affect the way quantization is performed, it only changes the way the output is represented.
-- We enable bias quantization by setting the <cite>Int32Bias</cite> quantizer. What it does is to perform bias quantization with <code class="docutils literal notranslate"><span class="pre">`bias_scale</span> <span class="pre">=</span> <span class="pre">input_scale</span> <span class="pre">*</span> <span class="pre">weight_scale</span></code>, as it commonly done across inference toolchains. This is why we have to set <code class="docutils literal notranslate"><span class="pre">return_quant_tensor=True</span></code>: each layer with <code class="docutils literal notranslate"><span class="pre">Int32Bias</span></code> can read the input scale from the <code class="docutils literal notranslate"><span class="pre">QuantTensor</span></code> passed in and use for bias quantization.
-- <code class="docutils literal notranslate"><span class="pre">torch</span></code> operations that are algorithmically invariant to quantization, such as <cite>F.max_pool2d</cite>, can propagate QuantTensor through them without extra changes.</p>
+<p>Compared to the previous scenario:</p>
+<ul class="simple">
+<li><p>We now set <code class="docutils literal notranslate"><span class="pre">return_quant_tensor=True</span></code> in every quantized activations to propagate a <code class="docutils literal notranslate"><span class="pre">QuantTensor</span></code> to the next layer. This informs each <code class="docutils literal notranslate"><span class="pre">QuantLinear</span></code> or <code class="docutils literal notranslate"><span class="pre">QuantConv2d</span></code> of how the input passed in has been quantized.</p></li>
+<li><p>A <code class="docutils literal notranslate"><span class="pre">QuantTensor</span></code> is just a tensor-like data structure providing metadata about how a tensor has been quantized, similar to a <cite>torch.qint</cite> dtype, but training friendly. Setting <code class="docutils literal notranslate"><span class="pre">return_quant_tensor=True</span></code> does not affect the way quantization is performed, it only changes the way the output is represented.</p></li>
+<li><p>We enable bias quantization by setting the <cite>Int32Bias</cite> quantizer. What it does is to perform bias quantization with <code class="docutils literal notranslate"><span class="pre">`bias_scale</span> <span class="pre">=</span> <span class="pre">input_scale</span> <span class="pre">*</span> <span class="pre">weight_scale</span></code>, as it commonly done across inference toolchains. This is why we have to set <code class="docutils literal notranslate"><span class="pre">return_quant_tensor=True</span></code>: each layer with <code class="docutils literal notranslate"><span class="pre">Int32Bias</span></code> can read the input scale from the <code class="docutils literal notranslate"><span class="pre">QuantTensor</span></code> passed in and use for bias quantization.</p></li>
+<li><p><code class="docutils literal notranslate"><span class="pre">torch</span></code> operations that are algorithmically invariant to quantization, such as <cite>F.max_pool2d</cite>, can propagate QuantTensor through them without extra changes.</p></li>
+</ul>
 </section>
 <section id="export-to-onnx">
 <h3>Export to ONNX<a class="headerlink" href="#export-to-onnx" title="Permalink to this heading">#</a></h3>
diff --git a/docs/index.html b/docs/index.html
index af2450986..4f2f0993e 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>Brevitas &#8212; Brevitas 0.10.2 documentation</title>
+    <title>Brevitas &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -125,8 +125,8 @@
       
     
     
-    <img src="_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/objects.inv b/docs/objects.inv
index abc807320..156b239d7 100644
Binary files a/docs/objects.inv and b/docs/objects.inv differ
diff --git a/docs/py-modindex.html b/docs/py-modindex.html
index b335c8f66..584132bde 100644
--- a/docs/py-modindex.html
+++ b/docs/py-modindex.html
@@ -8,7 +8,7 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Python Module Index &#8212; Brevitas 0.10.2 documentation</title>
+    <title>Python Module Index &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -126,8 +126,8 @@
       
     
     
-    <img src="_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/search.html b/docs/search.html
index 5a23ace8d..c59f5d54c 100644
--- a/docs/search.html
+++ b/docs/search.html
@@ -7,7 +7,7 @@
 
   <head>
     <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><title>Search - Brevitas 0.10.2 documentation</title>
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><title>Search - Brevitas 0.11.0 documentation</title>
   
   
   
@@ -125,8 +125,8 @@
       
     
     
-    <img src="_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/searchindex.js b/docs/searchindex.js
index d1a264f26..d36c5b6b0 100644
--- a/docs/searchindex.js
+++ b/docs/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"docnames": ["about", "api_reference/brevitas.core", "api_reference/brevitas.core.bit_width", "api_reference/brevitas.core.function_wrapper", "api_reference/brevitas.core.quant", "api_reference/brevitas.core.scaling", "api_reference/brevitas.core.stats", "api_reference/brevitas.function", "api_reference/brevitas.ops", "api_reference/index", "architecture", "faq", "getting_started", "index", "settings", "setup", "tutorials/anatomy_quantizer", "tutorials/index", "tutorials/onnx_export", "tutorials/quant_activation_overview", "tutorials/quant_recurrent", "tutorials/quant_tensor_quant_conv2d_overview", "tutorials/tvmcon2021", "user_guide/datatypes", "user_guide/export", "user_guide/graph_transformations", "user_guide/index", "user_guide/precision", "user_guide/proxies", "user_guide/quantized_layers", "user_guide/quantized_tensor", "user_guide/quantizers", "user_guide/scaling", "user_guide/zero_point"], "filenames": ["about.rst", "api_reference/brevitas.core.rst", "api_reference/brevitas.core.bit_width.rst", "api_reference/brevitas.core.function_wrapper.rst", "api_reference/brevitas.core.quant.rst", "api_reference/brevitas.core.scaling.rst", "api_reference/brevitas.core.stats.rst", "api_reference/brevitas.function.rst", "api_reference/brevitas.ops.rst", "api_reference/index.rst", "architecture.rst", "faq.rst", "getting_started.rst", "index.rst", "settings.rst", "setup.rst", "tutorials/anatomy_quantizer.nblink", "tutorials/index.rst", "tutorials/onnx_export.nblink", "tutorials/quant_activation_overview.nblink", "tutorials/quant_recurrent.nblink", "tutorials/quant_tensor_quant_conv2d_overview.nblink", "tutorials/tvmcon2021.nblink", "user_guide/datatypes.rst", "user_guide/export.rst", "user_guide/graph_transformations.rst", "user_guide/index.rst", "user_guide/precision.rst", "user_guide/proxies.rst", "user_guide/quantized_layers.rst", "user_guide/quantized_tensor.rst", "user_guide/quantizers.rst", "user_guide/scaling.rst", "user_guide/zero_point.rst"], "titles": ["About", "brevitas.core package", "brevitas.core.bit_width package", "brevitas.core.function_wrapper package", "brevitas.core.quant package", "brevitas.core.scaling package", "brevitas.core.stats package", "brevitas.function package", "brevitas.ops package", "API reference", "Architecture", "F.A.Q.", "Getting started", "Brevitas", "Settings", "Setup", "Anatomy of a Quantizer", "Tutorials", "ONNX Export", "An Overview of Quantized Activations", "Quantized RNNs and LSTMs", "An overview of QuantTensor and QuantConv2d", "Brevitas TVMCon 2021 tutorial", "&lt;no title&gt;", "Export Compatibility", "&lt;no title&gt;", "User Guide", "&lt;no title&gt;", "&lt;no title&gt;", "&lt;no title&gt;", "&lt;no title&gt;", "&lt;no title&gt;", "&lt;no title&gt;", "&lt;no title&gt;"], "terms": {"alessandro": [0, 22], "pappalardo": [0, 7], "volcaciu": 0, "xilinx": [0, 13, 15, 22, 24], "research": [0, 13], "lab": 0, "For": [0, 10, 12, 13, 16, 18, 19, 20, 21, 22], "privat": 0, "commun": 0, "you": [0, 10, 11, 15, 18, 22], "can": [0, 3, 7, 10, 11, 12, 14, 15, 16, 18, 19, 20, 21, 22, 24], "reach": 0, "me": 0, "alessand": [0, 18, 20], "name_of_my_employ": 0, "dot": 0, "com": [0, 15, 22], "softwar": [0, 10], "brevita": [0, 9, 10, 11, 14, 15, 16, 18, 19, 20, 21, 24], "titl": 0, "publish": 0, "zenodo": 0, "doi": 0, "10": [0, 3, 12, 16, 18, 19, 20, 21, 22], "5281": 0, "3333552": 0, "url": 0, "http": [0, 15, 18, 20, 21, 22], "org": 0, "bit_width": [1, 4, 5, 7, 12, 16, 18, 19, 20, 21, 22], "const": [1, 5, 22], "bitwidthconst": [1, 2, 4, 16, 22], "forward": [1, 2, 3, 4, 5, 6, 12, 16, 18, 20, 21, 22], "bitwidthstatefulconst": [1, 2], "msbclampbitwidth": [1, 2], "paramet": [1, 4, 5, 7, 10, 14, 16, 19, 20, 22], "bitwidthparamet": [1, 2], "removebitwidthparamet": [1, 2], "function_wrapp": [1, 4, 22], "clamp": [1, 4, 7, 8, 16], "clampmin": [1, 3], "scalarclamp": [1, 3], "tensorclamp": [1, 3, 4, 16, 22], "misc": 1, "ident": [1, 3, 4, 5, 7, 16, 19, 22], "inplacelogtwo": [1, 3], "logtwo": [1, 3], "poweroftwo": [1, 3], "ops_st": [1, 10, 16], "ceilst": [1, 3], "dpuroundst": [1, 3], "floorst": [1, 3], "inplacetensorclampst": [1, 3], "roundst": [1, 2, 3, 4, 16, 22], "roundtozerost": [1, 3], "scalarclampminst": [1, 3, 16], "tensorclampst": [1, 3], "shape": [1, 5, 12, 16, 18, 20, 22], "overbatchoveroutputchannelview": [1, 3], "overbatchovertensorview": [1, 3, 5], "overoutputchannelview": [1, 3], "overtensorview": [1, 3, 16, 22], "permutedim": [1, 3], "statsinputviewshapeimpl": [1, 3], "over_batch_over_output_channel": [1, 3, 7], "over_batch_over_tensor": [1, 3, 7], "over_output_channel": [1, 3, 7], "over_tensor": [1, 3, 7], "quant": [1, 12, 16, 18, 19, 20, 21, 22], "binari": [1, 7, 8, 10, 15, 20], "binaryqu": [1, 4, 10, 16], "clampedbinaryqu": [1, 4, 10, 16], "delai": [1, 16, 22], "delaywrapp": [1, 4, 16, 22], "int": [1, 2, 5, 7, 16, 18, 20, 21, 22], "decoupledrescalingintqu": [1, 4], "decoupledrescalingintquantwithinput": [1, 4], "prescaledrestrictintqu": [1, 4], "prescaledrestrictintquantwithinputbitwidth": [1, 4], "rescalingintqu": [1, 4, 16, 22], "truncintqu": [1, 4], "int_bas": 1, "decoupledintqu": [1, 4], "max_int": [1, 4, 7], "min_int": [1, 4, 7], "to_int": [1, 4], "intquant": [1, 4, 16, 22], "ternari": 1, "ternaryqu": [1, 4], "scale": [1, 4, 10, 12, 13, 14, 16, 18, 19, 20, 21], "int_scal": 1, "intscal": [1, 4, 5, 16, 22], "poweroftwointsc": [1, 5], "runtim": [1, 10, 12, 14, 15, 21, 22], "runtimestatssc": [1, 5], "statsfromparametersc": [1, 5], "standalon": [1, 18, 22], "constscal": [1, 4, 5, 16], "parameterfromruntimestatssc": [1, 5, 16, 22], "state_dict": [1, 5, 10, 16, 20, 22], "training_forward": [1, 5], "parameterfromstatsfromparametersc": [1, 5], "parametersc": [1, 5, 16, 22], "stat": [1, 10, 16, 22], "stats_op": 1, "absav": [1, 6], "absmax": [1, 5, 6, 14, 22], "absmaxav": [1, 6], "absmaxl2": [1, 6], "absminmax": [1, 6], "abspercentil": [1, 6, 16, 22], "klminimizerthreshold": [1, 6], "smooth_normalize_distribut": [1, 6], "l1norm": [1, 6], "l2norm": [1, 6], "mse": [1, 5, 6], "evaluate_loss": [1, 6], "mse_fib_search": [1, 6], "mse_grid_search": [1, 6], "mse_loss_fn": [1, 6], "mse_search": [1, 6], "meanlearnedsigmastd": [1, 6], "meansigmastd": [1, 6], "negativeminorzero": [1, 6], "negativepercentileorzero": [1, 6], "percentileinterv": [1, 6], "stats_wrapp": 1, "view_wrapp": 1, "class": [1, 2, 3, 4, 5, 6, 8, 10, 12, 16, 18, 22], "floatrestrictvalu": [1, 16, 22], "sourc": [1, 2, 3, 4, 5, 6, 7, 8, 10, 16, 20, 21, 22], "base": [1, 2, 3, 4, 5, 6, 8, 10, 14, 15, 16, 18, 19, 20], "x": [1, 3, 4, 5, 6, 7, 8, 11, 12, 16, 21, 22], "defin": [1, 2, 3, 4, 5, 6, 10, 16, 18, 19, 20, 21], "comput": [1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 16, 18, 20, 21, 22], "perform": [1, 2, 3, 4, 5, 6, 7, 10, 12, 15, 16, 18, 19, 20, 21, 22], "everi": [1, 2, 3, 4, 5, 6, 10, 12, 15, 22], "call": [1, 2, 3, 4, 5, 6, 10, 12, 15, 16, 19, 21], "should": [1, 2, 3, 4, 5, 6, 10, 11, 16, 21, 22], "overridden": [1, 2, 3, 4, 5, 6], "all": [1, 2, 3, 4, 5, 6, 7, 10, 12, 16, 18, 19, 20, 21, 22], "subclass": [1, 2, 3, 4, 5, 6, 16], "rtype": [1, 2, 3, 4, 5, 6], "tensor": [1, 2, 3, 4, 5, 6, 7, 10, 12, 14, 16, 18, 19, 20, 21], "although": [1, 2, 3, 4, 5, 6, 18, 19, 22], "recip": [1, 2, 3, 4, 5, 6], "pass": [1, 2, 3, 4, 5, 6, 10, 12, 18, 19, 20, 21, 22], "need": [1, 2, 3, 4, 5, 6, 7, 10, 12, 16, 18, 21, 22], "within": [1, 2, 3, 4, 5, 6, 10, 12, 14, 16, 18, 20, 21, 22], "thi": [1, 2, 3, 4, 5, 6, 7, 10, 12, 14, 15, 16, 18, 19, 20, 21, 22], "function": [1, 2, 3, 4, 5, 6, 8, 9, 12, 15, 16, 18, 19, 20, 21, 24], "one": [1, 2, 3, 4, 5, 6, 7, 10, 12, 16, 18, 19, 20, 21, 22], "instanc": [1, 2, 3, 4, 5, 6, 20, 21, 22], "afterward": [1, 2, 3, 4, 5, 6, 22], "instead": [1, 2, 3, 4, 5, 6, 10, 16, 18, 20, 21], "sinc": [1, 2, 3, 4, 5, 6, 7, 15, 16, 18, 19, 21, 22], "former": [1, 2, 3, 4, 5, 6, 16], "take": [1, 2, 3, 4, 5, 6, 10, 12, 16, 18, 19, 20, 21, 22], "care": [1, 2, 3, 4, 5, 6, 16, 22], "run": [1, 2, 3, 4, 5, 6, 10, 11, 13, 16, 18, 20, 22], "regist": [1, 2, 3, 4, 5, 6, 22], "hook": [1, 2, 3, 4, 5, 6], "while": [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 15, 16, 19, 20, 21, 22], "latter": [1, 2, 3, 4, 5, 6], "silent": [1, 2, 3, 4, 5, 6, 14], "ignor": [1, 2, 3, 4, 5, 6, 14, 22], "them": [1, 2, 3, 4, 5, 6, 10, 12, 16, 21, 22], "restrict_init_float": 1, "return": [1, 2, 4, 5, 7, 10, 12, 16, 18, 19, 20, 21, 22], "type": [1, 2, 4, 5, 7, 10, 12, 13, 16, 18, 21, 22], "float": [1, 2, 4, 5, 7, 10, 11, 14, 15, 16, 18, 19, 20, 21], "restrict_init_inplace_modul": 1, "restrict_init_modul": 1, "restrict_init_tensor": 1, "intrestrictvalu": [1, 2], "restrict_value_float_to_int_impl": 1, "logfloatrestrictvalu": 1, "poweroftworestrictvalu": [1, 5], "parameterwrapp": 1, "valu": [1, 2, 5, 7, 10, 12, 16, 18, 19, 20, 21, 22], "singleargstatelessbuff": 1, "placehold": [1, 5, 21], "slicetensor": 1, "eager_forward": 1, "statelessbuff": [1, 16, 22], "destin": [1, 5], "none": [1, 2, 3, 4, 5, 6, 7, 8, 10, 16, 18, 19, 20, 21, 22], "prefix": [1, 5, 10, 16, 20, 22], "keep_var": [1, 5], "fals": [1, 2, 5, 6, 7, 10, 14, 16, 18, 19, 20, 21, 22], "dictionari": [1, 5], "contain": [1, 5, 7, 10, 14, 16, 22], "refer": [1, 5, 13, 16], "whole": [1, 5, 16], "state": [1, 2, 5, 10, 12, 14, 16, 20, 22], "both": [1, 5, 10, 12, 13, 15, 16, 18, 19, 20, 21, 22], "persist": [1, 5], "buffer": [1, 5, 22], "e": [1, 5, 10, 11, 14, 16, 18, 19, 21, 22, 24], "g": [1, 5, 10, 11, 14, 16, 18, 21, 22, 24], "averag": [1, 5, 16, 19, 21, 22], "ar": [1, 4, 5, 8, 9, 10, 12, 15, 16, 18, 19, 20, 21, 22, 24], "includ": [1, 5, 10, 11, 16, 19, 22], "kei": [1, 5, 14, 16, 20, 22], "correspond": [1, 4, 5, 7, 14, 15, 22], "name": [1, 5, 10, 16, 18, 20, 22], "set": [1, 2, 4, 5, 10, 11, 12, 13, 15, 16, 18, 19, 20, 21, 22, 24], "The": [1, 2, 5, 7, 10, 11, 12, 13, 14, 15, 16, 19, 20, 21, 22], "object": [1, 3, 5, 10, 16], "i": [1, 2, 3, 4, 5, 7, 8, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 24], "shallow": [1, 5], "copi": [1, 5], "It": [1, 5, 10, 13, 16, 18, 21, 22], "": [1, 2, 5, 7, 10, 11, 12, 13, 14, 15, 19, 20, 21, 22, 24], "current": [1, 5, 10, 11, 13, 15, 16, 18, 19, 20, 21, 22, 24], "also": [1, 5, 10, 16, 18, 19, 20, 21, 22], "accept": [1, 5, 12, 16, 19, 20, 22], "posit": [1, 5, 19], "argument": [1, 5, 7, 10, 16, 18, 19, 20, 21], "order": [1, 5, 7, 8, 10, 12, 16, 21, 22, 24], "howev": [1, 5, 10, 16, 18, 19, 20, 21, 22], "being": [1, 5, 10, 11, 14, 16, 18, 20, 21, 22], "deprec": [1, 5, 20], "keyword": [1, 5, 10, 16, 19, 20, 21], "enforc": [1, 5, 10, 21, 22], "futur": [1, 5, 20], "releas": [1, 5, 15, 22, 24], "pleas": [1, 5, 20, 21, 22], "avoid": [1, 2, 5, 7, 15, 16, 22], "us": [1, 3, 4, 5, 7, 10, 11, 12, 14, 16, 18, 19, 20, 21, 22], "design": [1, 5, 10, 11, 15, 16, 22], "end": [1, 5, 7, 11, 12, 15, 16, 18, 19, 20, 22], "user": [1, 5, 10, 12, 15, 16, 18, 19, 20, 21, 22], "dict": [1, 2, 5, 10, 14, 16, 22], "option": [1, 5, 16, 18, 20, 21, 22], "If": [1, 5, 10, 16, 18, 21, 22], "provid": [1, 5, 7, 10, 11, 12, 13, 15, 16, 18, 22], "updat": [1, 5, 14, 16, 21, 22], "same": [1, 3, 5, 10, 11, 16, 18, 19, 20, 21, 22], "otherwis": [1, 5, 10, 18, 20, 22], "an": [1, 3, 4, 5, 7, 10, 11, 12, 14, 15, 18, 20, 22, 24], "ordereddict": [1, 5], "creat": [1, 5, 10, 16, 21, 22], "default": [1, 2, 4, 5, 10, 12, 14, 15, 16, 18, 19, 20, 21], "str": [1, 5, 20, 21], "ad": [1, 5, 18, 20, 21], "compos": [1, 5, 10], "bool": [1, 2, 4, 5, 7, 20, 21, 22], "detach": [1, 5, 21, 22], "from": [1, 2, 4, 5, 7, 10, 14, 16, 18, 19, 20, 21], "autograd": [1, 5, 7, 8, 10, 15], "true": [1, 3, 4, 5, 7, 10, 12, 14, 16, 18, 19, 20, 21, 22], "exampl": [1, 2, 3, 4, 5, 7, 8, 10, 12, 19, 20, 21, 22, 24], "xdoctest": [1, 5], "skip": [1, 5, 10, 12, 16, 19], "undefin": [1, 5], "var": [1, 5], "bia": [1, 5, 10, 12, 18, 20], "weight": [1, 4, 5, 6, 7, 10, 14, 18, 21], "inplace_momentum_upd": 1, "momentum": [1, 5], "counter": 1, "new_count": 1, "inplace_tensor_add": 1, "inplace_tensor_mul": 1, "parameterfromruntimezeropoint": 1, "collect_stats_step": [1, 5, 22], "int_quant": [1, 4, 16, 22], "quantize_zero_point": 1, "zero_point_stats_impl": 1, "zero_point_shap": 1, "zero_point_stats_input_view_shape_impl": 1, "zero_point_stats_momentum": 1, "0": [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 19, 20, 21, 22], "1": [1, 2, 4, 5, 7, 8, 10, 12, 14, 15, 16, 18, 19, 20, 21, 22], "dtype": [1, 2, 5, 6, 12, 18, 22], "devic": [1, 2, 5, 6], "parameterfromstatsfromparameterzeropoint": 1, "zero_point_stats_input_concat_dim": 1, "tracked_parameter_list": [1, 5], "scriptmodul": [1, 2, 3, 4, 5, 6, 16, 22], "implement": [1, 3, 4, 5, 6, 7, 8, 10, 11, 13, 14, 15, 16, 19, 21, 22, 24], "learn": [1, 2, 5, 10, 14, 16, 19, 21], "factor": [1, 4, 5, 10, 16, 18, 19, 21, 22], "initi": [1, 2, 5, 10, 14, 18, 19, 20, 21, 22], "statist": [1, 5, 10, 19, 22], "parameterzeropoint": 1, "zero_point_init": 1, "statsfromparameterzeropoint": 1, "zerozeropoint": [1, 4, 16, 22], "constant": [2, 5, 18, 22], "bit": [2, 4, 7, 10, 12, 16, 18, 19, 20, 21, 24], "width": [2, 4, 7, 10, 18, 20, 21, 24], "wrap": [2, 4, 5, 8, 10, 16, 21], "torch": [2, 3, 4, 5, 7, 8, 10, 12, 14, 15, 16, 18, 19, 20, 21], "8": [2, 3, 5, 7, 12, 15, 16, 19, 20, 21, 22], "part": [2, 7, 10, 15, 16, 22], "mean": [2, 5, 10, 12, 16, 18, 19, 20, 21, 22], "won": [2, 18, 19], "t": [2, 4, 7, 8, 10, 11, 12, 16, 18, 19, 20, 21, 22], "save": [2, 18, 20], "checkpoint": 2, "map": [2, 4, 5, 16, 22], "bit_width_impl_typ": [2, 22], "bitwidthimpltyp": [2, 22], "higher": [2, 4, 5, 16, 18], "level": [2, 4, 5, 10, 13, 16, 20, 22], "api": [2, 4, 5, 10, 13, 18, 22], "retain": [2, 5], "counterpart": [2, 10, 18], "differ": [2, 3, 5, 10, 12, 13, 16, 18, 19, 20, 21, 22], "stateful_const": 2, "bit_width_to_remove_impl": 2, "min_overall_bit_width": 2, "max_overall_bit_width": 2, "input_bit_width": [2, 4, 7, 21, 22], "min_bit_width": 2, "2": [2, 3, 4, 5, 7, 12, 15, 16, 18, 19, 20, 21, 22], "restrict_bit_width_impl": 2, "float_to_int_impl": [2, 4, 16, 22], "override_pretrained_bit_width": 2, "learnabl": 2, "output": [2, 4, 5, 7, 10, 12, 16, 18, 19, 20], "lower": [2, 5, 7, 20], "bound": [2, 5, 7], "restrict": [2, 4, 5, 7, 10, 12, 18, 21, 22, 24], "subset": [2, 16, 24], "pretrain": [2, 10, 12, 14, 16, 20, 22], "load": [2, 10, 14, 15, 16, 22], "backend": [2, 7, 10, 11, 14], "nn": [2, 5, 10, 14, 16, 18, 19, 20, 21, 22], "rais": [2, 5, 16, 18, 19, 20, 21, 22], "runtimeerror": [2, 5, 16, 19, 20, 21, 22], "bit_width_paramet": 2, "grad_fn": [2, 4, 5, 7, 16, 19, 20, 21, 22], "roundstefnbackward": [2, 7, 22], "env": [2, 4, 5, 7, 8, 15, 16, 20, 21, 22], "variabl": [2, 4, 5, 14, 15, 16, 20, 22], "brevitas_ignore_missing_kei": [2, 5, 14, 16, 22], "error": [2, 5, 14, 16, 18, 19, 20, 21, 22], "when": [2, 4, 5, 7, 8, 10, 14, 15, 16, 18, 19, 20, 21, 22], "retrain": [2, 5], "point": [1, 2, 4, 5, 10, 11, 12, 14, 15, 16, 18, 19, 20, 21], "bit_width_to_remov": 2, "non_zero_epsilon": 2, "1e": [2, 6, 20], "06": 2, "remove_zero_bit_width": 2, "wrapper": [3, 7, 10, 22], "variou": [3, 7, 8, 10, 12, 13, 16, 21, 22, 24], "variant": [3, 7, 10, 12, 16, 19, 20, 22], "min_val": [3, 7, 8, 18, 19], "clamp_min": [3, 7, 8], "3": [3, 4, 5, 7, 10, 12, 15, 16, 18, 19, 20, 21, 22], "max_val": [3, 7, 8, 18, 19], "scalar_clamp": 3, "tensor_clamp": [3, 7, 8], "A": [3, 7, 10, 12, 18, 21, 22], "collect": [3, 5, 6, 10, 19, 22], "miscellan": 3, "quantiz": [3, 4, 6, 7, 11, 13, 14, 15, 18, 24], "randn": [3, 7, 12, 16, 18, 19, 20, 21, 22], "size": [3, 10, 11, 20, 21, 22], "y": [3, 4, 7, 8, 11, 16], "log2_": 3, "inplace_log_two": 3, "note": [3, 7, 12, 16, 18, 20, 21, 22, 24], "inplac": 3, "oper": [3, 7, 11, 12, 13, 15, 18, 19, 20, 21, 24], "torchscript": [3, 4, 5, 7, 10, 14, 16, 20], "problemat": 3, "compil": [3, 4, 7, 10, 13, 14, 15, 16, 22], "disabl": [3, 10, 14, 19, 20, 21, 22], "log2": [3, 22], "log_two": 3, "power_of_two": 3, "5": [3, 4, 7, 12, 16, 18, 19, 20, 21, 22], "32": [3, 12, 16, 22], "ceil_st": [3, 7, 8], "dpu_round_st": [3, 7, 8], "floor_st": [3, 7, 8], "tensor_clamp_ste_": [3, 7], "round_st": [3, 7, 8], "round_to_zero_st": [3, 7, 8], "scalar_clamp_min_st": [3, 7, 8], "tensor_clamp_st": [3, 7], "view": [3, 5, 7, 16, 19], "accord": [3, 4, 5, 7, 10, 12, 16, 21, 22], "criteria": [3, 5, 12], "permute_dim": 3, "input": [3, 4, 5, 7, 10, 12, 16, 18, 19, 20], "view_modul": 3, "empti": [3, 5, 10], "25": [3, 16, 20, 22], "scriptmoodul": 3, "250": 3, "16": [3, 12, 16, 19, 20, 22], "200": [3, 18], "6": [3, 4, 5, 12, 16, 18, 19, 20, 21, 22], "2400": 3, "enum": 3, "like": [3, 4, 10, 12, 16, 19, 20, 21, 22], "pointer": [3, 16, 19, 22], "adher": [3, 7, 10, 11], "interfac": [3, 8, 10, 12, 18, 19, 20, 22], "alia": [3, 8, 22], "scaling_impl": [4, 5, 16, 22], "quant_delay_step": [4, 16], "uniform": [4, 10, 11, 13, 16, 20], "binary_sign_st": [4, 7, 8, 16], "number": [4, 5, 7, 10, 16, 19, 20, 22], "train": [4, 5, 10, 11, 12, 13, 16, 19, 20, 21], "step": [4, 16, 18, 19, 22], "de": [4, 10, 16], "format": [4, 10, 11, 12, 16, 18, 19, 21, 22], "zero": [1, 4, 7, 10, 16, 18, 19, 20, 21, 22], "tupl": [4, 5, 7, 10, 16, 18, 20, 21, 22], "import": [4, 12, 16, 18, 19, 20, 21, 22], "binary_qu": [4, 16], "inp": [4, 16, 18, 19, 20, 21, 22], "04": [4, 16, 21], "out": [4, 12, 13, 16, 18, 19, 21, 22], "zero_point": [4, 16, 19, 20, 21, 22], "1000": [4, 7, 16, 20, 22], "quant_typ": [4, 10, 16, 22], "quanttyp": [4, 10, 16, 22], "appli": [4, 5, 7, 8, 10, 13, 16, 19, 20, 21, 22], "brevitas_jit": [4, 7, 8, 10, 11, 14, 15, 16, 20, 22], "enabl": [4, 7, 10, 11, 12, 14, 15, 16, 19, 21, 22], "tensor_clamp_impl": [4, 16, 22], "befor": [4, 5, 16, 18, 20, 21, 22], "go": [4, 5, 10, 16, 18, 19, 20, 21, 22], "through": [4, 7, 8, 10, 12, 14, 15, 16, 18, 19, 20, 22, 24], "between": [4, 7, 10, 12, 16, 18, 19, 21, 22], "which": [4, 7, 10, 11, 14, 16, 18, 19, 20, 21, 22], "backward": [4, 7, 8], "gradient": [4, 7, 8, 14, 21], "outsid": [4, 11, 16, 22], "rang": [4, 18, 19, 22], "wise": [4, 10, 11], "requires_grad_": 4, "mulbackward0": [4, 16, 19, 21, 22], "grad": [4, 7], "0000": [4, 7, 19, 20, 22], "activ": [4, 5, 10, 14, 18, 20, 21], "decoupled_int_qu": 4, "pre_scaling_impl": 4, "int_scaling_impl": [4, 16, 22], "pre_zero_point_impl": 4, "zero_point_impl": [4, 16, 22], "bit_width_impl": [4, 6, 22], "input_is_sign": [4, 7], "around": [4, 10, 12, 16, 22], "integ": [4, 7, 11, 18, 19, 21, 22], "taken": [4, 22], "narrow_rang": [4, 5, 7, 18, 22], "sign": [4, 5, 6, 7, 8, 10, 16, 18, 19, 21, 22], "int_quant_wrapp": 4, "01": [4, 16], "4": [4, 5, 7, 12, 16, 18, 19, 20, 21, 22], "042": 4, "053": 4, "31": [4, 16, 22], "44": [4, 22], "0400": 4, "0500": 4, "0700": [4, 21], "0100": [4, 16], "respect": [4, 10, 12, 16, 21, 22], "here": [4, 16, 22], "interpret": [4, 18], "threshold": 4, "0429": 4, "0571": 4, "0143": 4, "shift": [4, 19], "pre": [1, 4, 10, 16, 22], "flag": [4, 14, 16, 20, 21, 22], "determin": 4, "whether": [4, 7, 10, 14, 16, 18, 21], "narrow": [4, 7, 18], "convers": [4, 19, 20], "represent": [4, 7, 10, 12, 18, 19, 21, 22], "pre_scal": 4, "pre_zero_point": 4, "02": [4, 21], "0200": 4, "0300": [4, 20], "ternary_sign_st": [4, 7, 8], "ternar": 4, "w": [4, 7, 8, 10, 11], "r": [4, 7, 8, 10], "ternary_qu": 4, "scaling_stats_impl": [5, 22], "scaling_stats_input_view_shape_impl": [5, 22], "restrict_scaling_impl": [5, 22], "scaling_shap": [5, 16, 22], "affine_resc": 5, "affine_shift_scal": 5, "scaling_stats_momentum": 5, "scaling_min_v": 5, "scaling_stats_input_concat_dim": 5, "scaling_init": [5, 16, 22], "union": [5, 21, 22], "some": [5, 10, 11, 12, 15, 18, 19, 20, 21, 22], "forc": [5, 10, 14, 20, 21, 22], "method": [5, 16, 20, 21], "singl": [5, 10, 13, 15, 16, 18, 20, 22], "requir": [5, 10, 14, 16, 19, 20, 21, 22, 24], "earli": 5, "version": [5, 10, 15, 16, 18, 20, 22], "consist": [5, 16], "across": [5, 10, 12, 13, 20, 22], "scaling_impl_typ": [5, 22], "scalingimpltyp": [5, 10, 22], "permute_impl": 5, "work": [5, 10, 16, 20, 22], "two": [5, 10, 12, 16, 18, 19, 20, 21, 22], "phase": [5, 19, 22], "dure": [5, 11, 16, 18, 19, 22], "first": [5, 8, 10, 12, 16, 18, 19, 20, 21, 22], "fashion": [5, 22], "batchnorm": 5, "mode": [5, 10, 19, 21, 22], "per": [5, 6, 16, 18, 20, 21], "batch": [5, 7, 11, 18, 19, 20, 22], "background": [5, 16], "infer": [5, 10, 11, 12, 13, 22, 24], "second": [5, 16, 19, 20, 21, 22], "accumul": [5, 7, 10, 19, 21, 22], "behaviour": [5, 10, 14, 16, 19, 20, 21, 22], "scalar_shap": 5, "move": [5, 19, 22], "default_momentum": 5, "scaling_stats_permute_dim": [5, 22], "arang": 5, "randn_lik": 5, "absbinarysigngradfnbackward": [5, 7, 16], "parameter_from_stat": [5, 10, 22], "stats_input": 5, "extend": [5, 10, 12, 16, 18, 22], "scalar": [5, 7, 18], "non": [5, 10, 13, 16, 19, 21], "powbackward1": 5, "stats_reduce_dim": [1, 6, 22], "keepdim": 6, "high_percentile_q": [6, 22], "percentile_q": 6, "num_bin": 6, "1001": 6, "smoothing_ep": 6, "0001": [6, 21], "apach": 6, "incub": 6, "mxnet": 6, "p": [6, 20], "ep": 6, "channel": [6, 7, 10, 11, 16, 18, 20], "l1": [6, 7], "normal": [6, 16, 18, 19, 22], "l2": 6, "proxy_modul": 6, "mse_init_op": 6, "inner_stats_input_view_shape_impl": 6, "mse_search_method": 6, "fibonacci": 6, "mse_it": 6, "20": [6, 16, 19, 20, 22], "candid": 6, "xl": 6, "xr": 6, "quant_valu": [6, 21], "sigma": 6, "stats_output_shap": 6, "std_dev_epsilon": 6, "08": 6, "low_percentile_q": 6, "core": [7, 9, 16, 22], "often": [7, 16], "impos": [7, 10, 16, 24], "pytorch": [7, 10, 11, 12, 13, 15, 16, 18, 20, 21, 22, 24], "binary_sign": [7, 8], "dpu_round": [7, 8], "dpu": [7, 24], "round": [7, 8, 21, 22], "get_upper_bound_on_l1_norm": 7, "accumulator_bit_width": 7, "calcul": 7, "upper": 7, "norm": [7, 18], "deriv": [7, 10, 12, 22], "neural": [12, 22], "network": [12, 20, 22], "low": [11, 12, 18, 21, 22, 24], "precis": [10, 11, 12, 13, 18, 22], "guarante": 7, "overflow": 7, "colbert": 7, "j": 7, "petri": 7, "koenig": 7, "THe": 7, "7": [7, 16, 18, 19, 20, 21, 22], "max_float": 7, "exponent_bit_width": 7, "mantissa_bit_width": 7, "exponent_bia": 7, "maximum": [7, 16, 22], "given": [7, 18, 20, 21, 22], "indic": 7, "repres": [7, 10, 12, 18, 20, 21, 22], "unsign": [7, 12, 18, 19, 22], "avail": [7, 16, 22], "127": [7, 18, 21, 22], "254": 7, "255": 7, "minimum": [7, 18], "128": [7, 18, 22], "round_to_zero": [7, 8], "toward": [7, 10, 11], "gener": [7, 10, 11, 13, 14, 15, 16, 18, 19, 20, 21, 22, 24], "support": [7, 10, 11, 12, 13, 14, 15, 16, 18, 20, 21, 22, 24], "broadcast": 7, "differenti": [7, 22], "tensor_clamp_": [7, 8], "In": [7, 10, 12, 14, 16, 18, 19, 20, 21, 22, 24], "place": [7, 12, 16, 22], "Not": [7, 21], "wrt": 7, "ani": [7, 10, 11, 12, 14, 16, 18, 19, 20, 21, 22, 24], "straight": [7, 8, 10, 16], "estim": [7, 8, 10, 14], "dispatch": 7, "either": [7, 10, 12, 16, 22], "nativ": [7, 14], "just": [7, 10, 12, 14, 16, 19, 21, 22], "time": [7, 10, 11, 12, 13, 14, 15, 16, 18, 21, 22], "autograd_ste_op": [7, 10], "allow": [7, 10, 16, 18, 19, 20, 21, 22], "built": [7, 10, 14, 18, 22], "abs_binary_sign_grad": [7, 8], "ab": [7, 8, 16, 22], "have": [7, 8, 10, 11, 12, 16, 18, 19, 20, 21, 22], "subgradi": [7, 8], "compar": [7, 8, 12, 18, 19, 20], "abs_binary_sign_grad_impl": [7, 8], "its": [7, 10, 11, 12, 16, 18, 19, 20, 21, 22], "requires_grad": [7, 22], "item": [7, 16, 19, 21], "binary_sign_ste_impl": [7, 8], "binarysignstefnbackward": 7, "ceil": [7, 8], "ceil_ste_impl": [7, 8], "ceilstefnbackward": [7, 22], "dpu_round_ste_impl": [7, 8], "dpuroundstefnbackward": 7, "floor": [7, 8], "floor_ste_impl": [7, 8], "floorstefnbackward": 7, "round_ste_impl": [7, 8], "round_to_zero_ste_impl": [7, 8], "roundtozerostefnbackward": 7, "alwai": [7, 8, 10, 12, 18, 19, 20, 21, 22], "scalar_clamp_min_ste_impl": [7, 8], "c": [7, 10, 11, 14, 15, 16, 19, 20, 22], "5000": 7, "4000": 7, "scalarclampminstefnbackward": 7, "scalar_clamp_st": [7, 8], "scalar_clamp_ste_impl": [7, 8], "scalarclampstefnbackward": 7, "tensor_clamp_ste_impl": [7, 8], "tensorclampstefnbackward": 7, "tensor_clamp_ste_impl_": [7, 8], "inplacetensorclampstefnbackward": 7, "ternary_sign_ste_impl": [7, 8], "ternarysignstefnbackward": 7, "induc": 7, "flatten": 7, "along": [7, 10], "certain": [7, 10, 13, 16, 20, 22, 24], "dimens": 7, "dim": [7, 16, 18, 20, 22], "other": [7, 10, 12, 16, 18, 19, 20, 21, 22, 24], "featur": [7, 10, 16, 18, 20, 22, 24], "arg": [7, 8, 16, 18, 22], "flat": [7, 10, 16], "absbinarysigngradfn": 8, "kwarg": [8, 12, 16, 19, 20, 21, 22], "alias": 8, "see": [8, 11, 16, 18, 19, 21, 22], "detail": [8, 10, 12], "binarysignstefn": 8, "ceilstefn": 8, "dpuroundstefn": 8, "floorstefn": 8, "inplacetensorclampstefn": 8, "roundstefn": 8, "roundtozerostefn": 8, "scalarclampminstefn": 8, "invok": [8, 10, 22], "scalarclampstefn": 8, "tensorclampstefn": 8, "ternarysignstefn": 8, "wip": 9, "most": [9, 10, 16, 19, 21, 22], "packag": [9, 10, 14, 15, 16, 21, 22], "still": [9, 11, 16, 18, 20, 22], "miss": [9, 14, 16, 19, 22], "op": [9, 10, 20, 21], "organ": [10, 20], "few": [10, 14, 16, 18, 20, 21, 22], "concept": [10, 20], "list": [10, 16, 19], "below": [10, 22], "found": [10, 14, 16, 20, 21, 22], "under": [10, 13, 16, 21, 22], "python": [10, 14, 15, 16, 18, 20, 21, 22], "csrc": [10, 20], "becaus": [10, 15, 16, 18, 19, 20, 21, 22, 24], "date": 10, "cannot": [10, 14, 16, 22], "jit": [10, 14, 15, 16, 20, 21, 22], "extens": [10, 16, 18, 22], "simplifi": [10, 15], "distribut": [10, 15], "onli": [10, 16, 18, 19, 21, 22, 24], "cpp": [10, 15, 20], "file": [10, 15], "appropri": [10, 14, 15, 16, 22], "so": [10, 12, 14, 15, 16, 18, 19, 20, 21, 22], "fallback": 10, "long": 10, "switch": [10, 16, 22], "happen": [10, 12, 14, 16, 18, 19, 21, 22], "piec": [10, 16, 22], "commonli": [10, 12], "thei": [10, 16, 18, 20, 21, 22], "algorithm": [10, 11, 12, 16, 19, 22], "build": [10, 13, 19, 22], "block": [10, 13], "assembl": [10, 16], "affin": [10, 11, 13, 16, 21, 22], "old": 10, "style": [10, 13, 16, 18, 19, 22], "scripe": 10, "inherit": 10, "mani": [10, 16, 21, 22], "describ": 10, "section": [10, 11, 12, 22], "abov": [10, 18, 21, 22], "modul": [10, 12, 16, 18, 19, 20, 21, 22], "compat": [10, 15], "everyth": [10, 16, 20, 22], "memori": 10, "optim": [10, 16, 18, 20], "more": [10, 11, 12, 15, 16, 18, 20, 22, 24], "complic": [10, 16], "pipelin": 10, "quit": [10, 16, 19, 20, 21], "signific": 10, "thu": [10, 18], "reduc": [10, 11, 13], "intrins": 10, "cost": [10, 11, 22], "awar": [7, 10, 11, 12, 13, 16, 20, 21, 22], "pose": [10, 16], "challang": 10, "term": [10, 11, 12, 15, 16, 21, 22], "how": [10, 11, 12, 16, 18, 20, 21], "achiev": [10, 11, 16], "flexibl": [10, 11, 16], "minim": [10, 19, 20, 22], "redund": 10, "flavour": [10, 22], "adopt": [10, 13, 15, 16, 19, 20, 21, 22], "doe": [10, 11, 12, 16, 18, 22], "highli": 10, "bias": [10, 11, 16, 21, 22], "leverag": [10, 12, 16, 18, 20, 22], "composit": [10, 22], "particular": [10, 11, 16, 18, 19, 20, 22], "favour": [10, 22], "invers": 10, "control": [10, 14, 16, 20, 22], "depend": [10, 19, 20, 21], "inject": 10, "di": [10, 16], "manual": [10, 19, 22], "librari": [10, 12, 16, 18, 20, 22], "explain": [10, 19, 21], "auto": [10, 21, 22], "wire": [10, 22], "machineri": 10, "heart": 10, "ever": 10, "fixtur": 10, "pytest": 10, "alreadi": [10, 16, 19, 20, 21, 22], "know": [10, 16], "high": [10, 21], "case": [10, 12, 14, 16, 18, 19, 20, 21, 22], "idea": [10, 16, 22], "instanti": [10, 12, 16, 20, 22], "togeth": [10, 15, 16, 19, 20, 21, 22], "declar": [10, 16, 22], "attribut": [10, 16, 18, 19, 22], "drive": [10, 22], "mechan": [10, 15, 22], "behind": [10, 16], "process": [10, 16, 22], "match": [10, 12, 16, 18, 20, 22], "throw": 10, "bunch": [10, 16, 20], "compon": [10, 16, 22], "chosen": 10, "automat": [10, 12, 16, 18, 21, 22], "tensor_qu": [10, 16, 19, 21, 22], "expect": [10, 16, 19, 20, 21, 22], "four": [10, 20, 21], "dequant": [10, 12, 18, 19, 21, 22], "power": [10, 16, 22], "wai": [10, 12, 16, 18, 19, 20, 21, 22, 24], "express": [10, 16, 22], "standard": [10, 11, 12, 15, 16, 18, 19, 20, 21, 22, 24], "lend": 10, "themselv": 10, "multipl": [10, 12, 16, 20, 22], "That": [10, 11, 16, 19, 21, 22], "new": [10, 13, 16, 22], "simpli": [10, 16, 21, 22], "exist": [10, 16, 19, 22], "overrid": [10, 16, 18, 21, 22], "smaller": [10, 22], "specif": [10, 11, 16, 22], "shifted_scaled_int": [10, 19], "hold": 10, "scaled_int": [10, 16, 18, 19, 21, 22], "fixed_point": 10, "fix": [10, 16], "older": [10, 16], "v2": 10, "plai": 10, "well": [10, 12, 13, 21, 22], "extendedinjector": [10, 22], "addit": [10, 18, 20, 21, 22, 24], "dynam": 10, "syntax": [10, 16, 19], "intermedi": [10, 12, 18, 22], "abstract": [10, 13, 16, 22], "try": [10, 11, 16, 19, 20, 21, 22], "navig": 10, "confus": 10, "lack": 10, "clear": [10, 22], "hierarchi": [10, 16], "obviou": [10, 21, 24], "addition": [10, 12, 19, 22], "might": [10, 16, 18, 19, 20, 21, 22], "fit": [10, 11], "pure": [10, 22], "perspect": 10, "machin": 10, "awai": [10, 22], "top": [10, 14, 15, 16, 20, 21, 22], "specifi": [10, 12, 18, 19], "kind": [10, 14, 16, 22], "sai": [10, 12, 16, 19, 21, 22], "v": [10, 16], "gonna": [10, 20, 22], "translat": [10, 22], "target": [10, 11, 13, 22, 24], "relationship": 10, "hyperparamet": [10, 16, 22], "realli": [10, 16, 19], "advantag": [10, 12, 16, 22], "scope": [10, 22], "solv": [10, 21, 22], "against": [10, 19], "enough": [10, 16], "weightquantsolv": [10, 22], "itself": [10, 11, 16, 22, 24], "individu": 10, "task": 10, "look": [10, 12, 16, 18, 19, 20, 21, 22], "seen": [10, 16, 18, 19, 21, 22], "actual": [10, 18, 21, 22], "rather": [10, 16, 18, 22], "than": [10, 11, 12, 15, 16, 18, 20, 22], "directli": [10, 15, 18, 20, 21, 22], "Then": [10, 19], "meant": 10, "blueprint": 10, "understand": [10, 16, 22], "similar": [10, 12, 16, 18, 19, 20, 21], "direct": [10, 20, 22], "scaling_per_output_channel": [10, 16, 22], "ha": [10, 12, 13, 16, 18, 20, 21, 22], "alloc": [10, 16, 20], "much": [10, 16, 21], "known": [10, 22], "capabl": [10, 16, 18], "whose": [10, 14, 22], "again": [10, 16, 19, 21], "thank": [10, 16, 22], "whenev": [10, 14, 16, 20, 22], "last": [7, 10, 16, 18, 19, 21, 22], "possibl": [10, 15, 16, 18, 20, 21, 22], "even": [10, 16, 18, 19, 21, 22], "advanc": [10, 22], "mix": [10, 12, 19, 21], "custom": [10, 13, 15, 18, 20], "final": [10, 16, 18, 19, 20, 21], "possibli": [10, 12, 16], "data": [10, 11, 12, 13, 16, 18, 20, 21, 22], "structur": [10, 12, 16, 20, 21, 22], "quant_tensor": [10, 21, 22], "valid": [10, 16, 21], "had": [10, 21], "been": [10, 11, 12, 13, 16, 20, 21, 22], "previous": [10, 21, 22], "now": [10, 12, 16, 19, 20, 21, 22], "mark": [10, 19, 21, 22], "doesn": [10, 12, 16, 18, 19, 21, 22], "carri": [10, 11, 16], "inform": [10, 12, 16, 21, 22], "back": [10, 14, 20], "wa": [10, 11, 16, 19, 20, 21, 22], "arithmet": [10, 11], "implment": 10, "main": [10, 18], "assumpt": [10, 11, 21, 22], "sum": [10, 20, 21], "constrain": 10, "involv": [10, 11, 20, 22], "deal": 10, "residu": 10, "topologi": 10, "connect": 10, "special": [10, 16, 19], "respons": 10, "make": [10, 11, 12, 15, 16, 18, 19, 20, 21, 22], "sure": [10, 15, 18, 20, 21], "wouldn": [10, 16, 21, 22], "re": [10, 14, 16, 18, 20, 21, 22], "necessari": [10, 15, 16, 20, 21], "priori": 10, "later": [10, 16, 19, 22], "model": [10, 11, 13, 14, 15, 16, 20, 22, 24], "definit": [10, 12, 16, 18, 20, 22], "logic": [10, 16], "recomput": [10, 14, 22], "chang": [10, 12, 16, 18, 21, 22], "do": [10, 11, 12, 16, 18, 19, 21, 22], "reconcil": 10, "inher": 10, "rigid": 10, "typic": [10, 11, 14, 16, 19, 21, 22], "execut": [10, 16, 18, 19, 20, 22], "complex": [10, 11, 16, 21], "scenario": [10, 12, 15, 16, 18, 19, 20, 21, 22], "share": [10, 18, 20], "situat": [10, 11, 16, 21, 22], "where": [10, 16, 18, 19, 20, 21, 22], "branch": 10, "without": [10, 12, 14, 16, 18, 21, 22], "requant": 10, "feed": 10, "accomplish": 10, "weightquantproxi": 10, "among": [10, 16, 18, 20], "what": [10, 11, 12, 18, 21, 22], "affine_stat": 10, "concaten": [10, 20], "start": [10, 13, 18, 20, 22], "track": 10, "underli": [10, 22], "quantlay": 10, "quantconv2d": [10, 12, 16, 18, 19, 22], "conv2d": [10, 16, 18, 21], "plu": [10, 20], "seri": [10, 20], "each": [10, 12, 16, 18, 20, 24], "responsibil": 10, "quantinputmixin": 10, "quantoutputmixin": 10, "_quantwbiol_": 10, "quantweightmixin": 10, "quantbiasmixin": 10, "quantwbiol": [10, 21, 22], "quant_weight": [10, 16, 20, 21, 22], "quant_bia": [10, 21, 22], "quant_input": [10, 20, 22], "quant_output": [10, 22], "suppos": 10, "act": [10, 16, 18, 19], "exactli": [10, 22], "weight_quant": [10, 16, 20, 21, 22], "bias_quant": [10, 12, 18, 20, 21, 22], "input_qu": [10, 16, 18, 19, 21, 22], "output_qu": [10, 16, 18, 19, 21, 22], "behav": [10, 19, 21, 22], "expos": [10, 16, 20, 21, 22], "To": [10, 11, 12, 15, 16, 19, 20, 21, 22], "ux": 10, "weight_": [10, 16, 22], "bias_": [10, 16, 22], "input_": [10, 16, 22], "output_": [10, 16, 22], "quantrelu": [10, 12, 16, 18, 19], "prioriti": [10, 22], "over": [10, 18, 19, 22], "real": 10, "life": 10, "want": [10, 12, 16, 21, 22], "exploratori": 10, "analysi": 10, "accuraci": 10, "correct": [10, 20, 21, 22], "hardwar": [10, 11, 13, 22, 24], "concern": [10, 11, 21], "friction": [10, 22], "remain": [10, 18], "partial": [10, 20], "With": [10, 12, 16, 18, 19, 20, 21, 22], "except": [7, 10, 15, 16, 19, 20, 21, 22], "truncavgpool2d": 10, "receiv": 10, "altough": [10, 21], "nor": [10, 20], "return_quant_tensor": [10, 12, 16, 18, 19, 20, 21, 22], "easili": [10, 16, 19, 21, 22], "todo": 10, "why": [11, 12, 16, 21, 22], "fbgemm": 11, "qnnpack": 11, "platform": [11, 13, 15, 24], "novel": [11, 22], "varieti": [11, 22], "loos": 11, "z": 11, "find": [11, 20], "document": [11, 18, 20, 24], "spars": 11, "until": [11, 22], "improv": 11, "feel": 11, "free": 11, "open": 11, "issu": [11, 16, 20, 21, 22], "ask": 11, "our": [11, 12, 16, 18, 22], "gitter": 11, "slow": [11, 15, 20], "lot": [11, 22], "element": [11, 18], "intens": 11, "contribut": [11, 16], "graph": [11, 16, 18, 22], "backproprag": 11, "As": [11, 16, 18, 19, 20, 21, 22, 24], "up": [11, 16, 19], "slower": [11, 15], "resourc": 11, "greater": 11, "effieci": 11, "principl": [11, 16, 21, 22], "trade": 11, "off": [11, 18], "effici": [11, 15], "mitig": [11, 16, 20], "somewhat": 11, "down": 11, "report": 11, "thought": 11, "my": 11, "faster": 11, "am": 11, "wrong": 11, "path": [11, 13, 18], "acceler": [11, 12, 13, 15, 18, 20, 22, 24], "own": [11, 12, 13, 14, 16, 20, 21, 22], "export": [11, 13, 15, 19], "your": [11, 14, 18, 20], "downstream": [11, 24], "toolchain": [11, 12, 15, 22, 24], "pu": 11, "float16": 11, "bfloat16": 11, "bfloat19": 11, "datatyp": [11, 22, 24], "float32": [11, 20], "test": 11, "tpu": 11, "xla": 11, "math": [11, 22], "reason": [11, 16, 20], "risk": [11, 14], "serv": [12, 16, 18, 20, 22], "goal": [12, 16, 18], "flow": [12, 22, 24], "By": [12, 15, 19, 21, 22], "write": 12, "modifi": [12, 16, 20], "origin": [12, 16, 21, 22], "onc": [12, 18, 21], "post": 12, "qat": 12, "scratch": [12, 16, 21], "finetun": [12, 16], "follow": [12, 16, 18, 19, 20, 21, 22], "combin": [12, 13, 22], "best": [12, 16], "approach": [12, 15, 18, 20, 22], "checkout": 12, "done": [12, 16, 18, 21], "imagenet": 12, "classif": 12, "torchvis": 12, "script": 12, "we": [12, 16, 18, 19, 20, 21, 22], "consid": [12, 16, 18, 19, 20, 21], "classic": 12, "lenet": 12, "let": [12, 16, 19, 21, 22], "interest": [12, 13, 16, 22], "assess": 12, "cifar10": 12, "purpos": [12, 20, 22], "tutori": [12, 13, 16, 18, 19, 20, 21], "tradit": [12, 16, 22], "quantlinear": [12, 18, 20, 21], "weight_bit_width": [12, 18, 20, 22], "relu": [12, 16, 18, 19, 22], "max": [12, 16, 18, 21, 22], "pool": [12, 21], "usual": [12, 21], "max_pool2d": [12, 21], "result": [12, 16, 18, 19, 20, 21, 22, 24], "f": [12, 18, 20, 21, 22], "qnn": [12, 18], "quantweightlenet": 12, "def": [12, 16, 18, 19, 20, 21, 22], "__init__": [12, 16, 18, 19, 20, 21, 22], "self": [12, 16, 18, 19, 20, 21, 22], "super": [12, 13, 16, 18, 19, 20, 24], "conv1": 12, "relu1": 12, "conv2": 12, "relu2": 12, "fc1": 12, "120": [12, 16, 19, 21], "relu3": 12, "fc2": 12, "84": 12, "relu4": 12, "fc3": 12, "reshap": 12, "quant_weight_lenet": 12, "storag": 12, "convert": [12, 18], "practic": [12, 19, 21, 22], "too": [12, 16, 19, 20], "keep": [12, 16, 19, 20, 22], "replac": [12, 18, 20, 22], "veri": [12, 15, 16, 19, 21, 22], "introduc": [12, 16, 18, 19, 20, 22], "quantident": [12, 18, 19, 20, 21], "begin": 12, "int8bia": [12, 21], "biasquant": 12, "quantweightactlenet": 12, "quant_inp": 12, "quant_weight_act_lenet": 12, "coupl": [12, 16, 19, 20, 21], "thing": [12, 16, 18, 19, 20, 21, 22], "fact": [12, 19, 22], "int4": 12, "15": [12, 16, 18, 19, 20, 22], "int32bia": [12, 18, 20], "quantweightactbiaslenet": 12, "lowprecisionlenet": 12, "quant_weight_act_bias_lenet": 12, "previou": [12, 16, 19, 20, 21, 22], "propag": [12, 21], "quanttensor": [12, 16, 18, 19, 20], "next": [12, 18, 19], "metadata": [12, 19, 20, 21], "about": [12, 13, 16, 19, 21, 22], "qint": 12, "friendli": [12, 22], "affect": [12, 18, 20], "bias_scal": 12, "input_scal": [12, 21, 22], "weight_scal": [12, 22], "read": 12, "invari": [12, 16, 21, 22], "extra": [12, 16, 18, 21, 22], "One": 12, "popular": [12, 16], "qdq": [12, 18, 22], "qcdq": [12, 20], "insert": [12, 18], "clip": [12, 20], "node": [12, 18, 20, 22], "export_onnx_qcdq": [12, 18, 20], "export_path": [12, 18, 20, 22], "4b_weight_lenet": 12, "4b_weight_act_lenet": 12, "4b_weight_act_bias_lenet": 12, "check": [12, 20], "recurr": [12, 20], "overview": 12, "tvmcon": 12, "setup": 13, "get": [13, 15, 16, 19, 20, 21, 22], "architectur": 13, "faq": 13, "techinqu": 13, "practition": 13, "techniqu": [13, 16], "scheme": 13, "framework": 13, "unifi": 13, "layer": [13, 14, 18, 19, 20, 21, 24], "finn": [13, 15, 21, 22, 24], "onnxruntim": [13, 15, 18, 20, 24], "successfulli": [13, 16, 20, 22], "project": 13, "larg": 13, "commerci": 13, "deploy": 13, "cpu": 13, "gpu": [13, 15], "fpga": [13, 15, 22, 24], "focu": [13, 21], "box": 13, "boolean": [14, 16, 21], "global": 14, "enviroment": 14, "config": [14, 16, 20, 22], "jit_en": 14, "written": 14, "warn": [14, 18, 20, 22], "fall": 14, "pytorch_jit": 14, "ignore_missing_kei": [14, 16, 20, 22], "intern": [14, 15, 16, 20, 21, 22], "better": 14, "altern": [14, 18, 19, 20, 22], "load_state_dict": [14, 16, 20, 22], "strict": [14, 16, 22], "would": [14, 15, 16, 18, 19, 20, 21, 22], "mismatch": 14, "brevitas_verbos": 14, "verbos": [14, 16, 22], "brevitas_native_ste_backend": 14, "native_ste_backend_en": 14, "brevitas_reinit_on_state_dict_load": 14, "reinit_on_state_dict_load": 14, "trigger": [14, 16, 20, 22], "9": [15, 16, 18, 19, 20, 21, 22], "window": 15, "linux": 15, "maco": 15, "recommend": 15, "latest": 15, "pip": [15, 18, 20, 22], "git": [15, 20, 21], "small": [15, 22], "lt": [15, 16, 18, 19, 20, 21, 22], "benefit": 15, "almost": [15, 22], "never": 15, "util": [15, 20], "cpp_extens": 15, "mantain": 15, "precompil": 15, "put": [15, 22], "burden": 15, "present": [15, 16, 18, 22], "primit": 15, "fake": [15, 18, 22], "dataflow": [15, 22, 24], "integr": [15, 22], "onnx": [15, 19, 20, 21, 24], "broad": 16, "sens": [16, 22], "anyth": [16, 19, 22], "terminologi": 16, "prefer": 16, "said": [16, 19, 20], "int8actpertensorfloat": [16, 18, 19, 20, 21, 22], "issubclass": 16, "delay_wrapp": [16, 22], "delay_impl": [16, 22], "_nodelai": [16, 22], "stats_input_view_shape_impl": [16, 22], "_stat": [16, 22], "stats_impl": [16, 22], "restrict_sc": 16, "_restrictvalu": 16, "restrict_value_impl": [16, 22], "clamp_scal": 16, "_clampvalu": 16, "clamp_min_st": [16, 22], "restrict_inplace_preprocess": [16, 22], "restrict_preprocess": [16, 22], "msb_clamp_bit_width_impl": [16, 22], "explod": 16, "straightforward": 16, "numpi": [16, 18, 20, 22], "come": [16, 18, 19, 22], "program": 16, "problem": [16, 18, 21], "decis": 16, "spot": 16, "line": [16, 22], "code": 16, "yet": [16, 20], "after": [16, 19, 21, 22], "loop": 16, "react": 16, "world": [16, 22], "hard": 16, "impact": [16, 20], "recompil": 16, "lossi": 16, "arbitrarli": 16, "injector": 16, "excel": 16, "interwin": 16, "turn": [16, 22], "assembli": [16, 22], "goe": [16, 18, 22], "context": [16, 21, 22], "gather": 16, "demand": [16, 22], "simpl": 16, "mention": [16, 18, 22], "heavi": 16, "binar": 16, "inspect": [16, 20, 21, 22], "ipython": [16, 18, 19, 20, 21, 22], "displai": [16, 18, 20, 21, 22], "markdown": [16, 20, 21, 22], "pretty_print_sourc": [16, 20, 21, 22], "n": [16, 20, 21, 22], "getsourc": [16, 20, 21, 22], "func": [16, 22], "script_method": 16, "apart": 16, "pick": [16, 19, 21, 22], "equal": [16, 18, 21, 22], "random": [16, 19, 20, 21], "manual_tensor_qu": 16, "gt": [16, 18, 19, 20, 21, 22], "noth": 16, "surpris": 16, "limit": [16, 22], "mybinaryquant": 16, "inj_tensor_qu": 16, "retriev": [16, 22], "mychildbinaryquant": 16, "child_inj_tensor_qu": 16, "Or": [16, 21], "mybinaryimpl": 16, "myscalingimpl": 16, "mycomposedbinaryquant": 16, "comp_inj_tensor_qu": 16, "live": 16, "mostli": 16, "land": 16, "afford": 16, "conveni": [16, 22], "proxy_class": [16, 19, 22], "weightquantproxyfrominjector": [16, 22], "mybinaryweightquant": 16, "11": [16, 18, 19, 20, 21, 22], "binary_weight_quant_conv": 16, "signed_t": [16, 19, 20, 21, 22], "training_t": [16, 19, 20, 21, 22], "properli": 16, "form": [16, 19, 22], "12": [16, 19, 20, 21, 22], "assert": [16, 18, 19, 20, 21], "is_valid": [16, 19, 21], "those": [16, 19, 21, 22], "explicitli": [16, 19, 21, 22], "13": [16, 18, 19, 20, 22], "mysignedbinaryweightquant": 16, "signed_quant_weight": 16, "And": [16, 22], "add": [16, 18, 19, 20, 21], "There": [16, 18, 21], "simpler": 16, "did": [16, 18, 22], "could": [16, 21, 22], "small_scale_quant_conv": 16, "weight_sign": 16, "arbitrari": [16, 21, 22], "17": [16, 19, 20, 22], "actquantproxyfrominjector": [16, 19, 22], "mysignedbinaryactquant": 16, "binary_relu": 16, "act_quant": [16, 19, 22], "isn": 16, "keywork": 16, "18": [16, 19, 20, 22], "small_scale_binary_ident": 16, "001": [16, 18], "0010": [16, 21], "far": [16, 19, 20, 22], "show": [16, 18, 22], "sort": [16, 19, 21, 22], "shine": 16, "19": [16, 19, 20, 22], "paramfrommaxweightquant": 16, "decor": 16, "spirit": 16, "properti": [16, 19, 22], "param_from_max_quant_conv": 16, "1876": [16, 21], "inde": [16, 21], "verifi": [16, 21], "quant_weight_scal": [16, 21], "21": [16, 20, 21, 22], "simuat": 16, "separ": [16, 21, 22], "22": [16, 20, 21, 22], "float_conv": [16, 21], "1897": 16, "maxbackward1": 16, "23": [16, 18, 22], "traceback": [16, 19, 21, 22], "recent": [15, 16, 19, 21, 22], "5b3646241211": 16, "programdata": 16, "miniconda3": [16, 22], "lib": [16, 21, 22], "site": [16, 21, 22], "py": [16, 18, 19, 20, 21, 22], "1405": [16, 22], "len": [16, 22], "error_msg": [16, 22], "1406": [16, 22], "39": [16, 18, 19, 20, 22], "1407": [16, 22], "__class__": [16, 22], "__name__": [16, 22], "34": [16, 18, 19, 21, 22], "join": [16, 22], "1408": [16, 22], "_incompatiblekei": [16, 22], "missing_kei": [16, 22], "unexpected_kei": [16, 22], "1409": [16, 22], "ouch": 16, "forgiv": 16, "strategi": [16, 22], "26": [16, 20, 22], "quant_conv1": 16, "quant_conv2": 16, "27": [16, 20, 22], "assertionerror": 16, "tmp": [16, 21], "ipykernel_58415": 16, "1066539094": 16, "access": 16, "parent": [16, 19], "28": [16, 22], "sharedparamfrommeanweightquant": 16, "isinst": [16, 19], "cat": [16, 20, 22], "els": [16, 19, 20, 21, 22], "old_quant_conv1_scal": 16, "new_quant_conv1_scal": 16, "eager": [16, 22], "don": [16, 19, 20, 22], "semant": [16, 20], "correctli": [16, 19, 21], "easier": 16, "someth": [16, 19, 22], "mind": [16, 19], "linear": [16, 18, 19, 22], "identityqu": 16, "count": 16, "though": [16, 18, 19, 22, 24], "belong": 16, "quant_conv_w_init": 16, "init": [16, 21], "uniform_": 16, "anymor": 16, "33": [16, 22], "init_tensor_qu": [16, 19, 22], "plan": 16, "distinguish": 16, "illustr": [16, 20, 24], "abl": [16, 22], "leav": 16, "commonquant": 16, "is_clamp": 16, "per_channel_broadcastable_shap": 16, "advancedweightquant": 16, "num_ch": 16, "advancedactquant": 16, "unpack": [16, 22], "chain": 16, "35": [16, 22], "per_channel_quant_conv": 16, "weight_is_clamp": 16, "weight_scaling_per_output_channel": [16, 18, 20, 22], "1842": 16, "1838": 16, "vector": 16, "observ": [16, 18, 19, 20, 21], "36": [16, 22], "1875": [16, 22], "similarli": [16, 18, 20, 22], "37": [16, 22], "quant_ident": [16, 19, 20, 21, 22], "dependencyerror": [16, 19], "b3479e90d1a9": 16, "brevitas_fx": [16, 19], "src": [16, 18, 19, 20, 21, 22], "quant_activ": [16, 19], "134": [16, 19], "135": 16, "136": 16, "137": 16, "quant_lay": [16, 19, 21, 22], "act_impl": [16, 19], "passthrough_act": [16, 19], "77": [16, 19, 20], "78": [16, 19], "79": [16, 19], "80": [3, 16, 19], "81": [16, 19], "mixin": [16, 19, 20], "157": [16, 19], "proxy_prefix": [16, 19], "act_": [16, 19], "158": [16, 19], "kwargs_prefix": [16, 19], "159": [16, 19], "160": [16, 19, 21], "161": [16, 19, 21], "proxy_protocol": [16, 19], "none_quant_injector": [16, 19], "98": [16, 19, 22], "quant_injector": [16, 19], "99": [16, 19, 22], "filter_kwarg": [16, 19], "100": [16, 18, 19, 20, 22], "101": [16, 19], "102": [16, 19], "runtime_qu": [16, 19], "108": [16, 19, 21], "109": [16, 19, 21], "110": [16, 19, 21], "111": [16, 19, 21], "is_passthrough_act": [16, 19], "_is_passthrough_act": [16, 19], "112": [16, 19, 20, 21], "quant_proxi": [16, 19], "export_mod": [16, 19, 21, 22], "export_handl": [16, 18, 19, 21, 22], "74": [16, 19, 22], "modulelist": [16, 19, 20], "75": [16, 19], "tracked_module_list": [16, 19], "76": [16, 19], "add_tracked_modul": [16, 19], "130": [16, 19], "append": [16, 19], "131": [16, 19], "update_tracked_modul": [16, 19], "132": [16, 19], "133": [16, 19], "121": [16, 19, 21], "122": [16, 19, 21, 22], "123": [16, 19, 21], "124": [16, 19, 21], "is_act_en": [16, 19], "_is_act_en": [16, 19], "hidden": [16, 19, 20], "frame": [16, 19], "_depend": 16, "__call__": 16, "__self__": 16, "49": 16, "50": 16, "51": 16, "getattr": 16, "symbol": [16, 22], "52": 16, "53": 16, "messag": 16, "resolv": [16, 19], "complet": [16, 22], "instal": [18, 20, 22], "pypi": [18, 22], "notebook": [18, 22], "onnxoptim": [18, 22], "netron": [18, 20, 22], "visual": [18, 20, 22], "qlinearconv": [18, 22], "qlinearmatmul": [18, 22], "cover": [18, 19, 22], "qonnx": [18, 20, 22], "q": 18, "dq": 18, "three": 18, "fp": [18, 22], "u": 18, "int8": [18, 21], "ntenger": 18, "min": 18, "equival": [18, 19, 22], "sever": 18, "implic": [18, 20], "associ": [18, 20, 22], "int32": [18, 22], "deduantizelinear": 18, "desir": 18, "opset": [18, 20, 21, 22], "consider": 18, "quantconv1d": [18, 21, 22], "quantconvtranspose1d": [18, 21, 22], "quantconvtranspose2d": [18, 21, 22], "offer": 18, "ifram": [18, 20, 22], "show_netron": [18, 20, 22], "model_path": [18, 20, 22], "port": [18, 20, 22], "sleep": [18, 20, 22], "address": [18, 20, 22], "localhost": [18, 20, 22], "brows": [18, 20, 22], "height": [18, 20, 22], "400": [18, 20, 22], "in_ch": 18, "out_ch": 18, "batch_siz": [18, 20], "quant_linear_qcdq": 18, "exported_model": [18, 20], "opset_vers": [18, 20], "8082": [18, 20, 22], "moreov": 18, "interv": [18, 21], "perfectli": [18, 19], "symmetr": [18, 22], "absorb": 18, "fusion": 18, "entir": [18, 22], "quantmodel": 18, "quant_model_qcdq": 18, "8083": [18, 20, 22], "furthermor": 18, "uint8": 18, "expand": 18, "imposs": 18, "less": 18, "etc": 18, "eval": [18, 19, 22], "quant_model_3b_4b_qcdq": 18, "8084": [18, 22], "anoth": [18, 21, 22], "preceed": 18, "opposit": 18, "contraint": 18, "export_onnx_qop": [18, 22], "img_siz": 18, "kernel_s": [18, 19, 21, 22], "quant_model_qop": 18, "manag": 18, "userwarn": [18, 20, 22], "default_opset": 18, "ka": 18, "ir_vers": 18, "producer_nam": 18, "producer_vers": 18, "constant_output_0": 18, "op_typ": 18, "data_typ": 18, "raw_data": 18, "000": 18, "constant_1_output_0": 18, "constant_1": 18, "quantizelinear_output_0": 18, "374": 18, "372": 18, "376": 18, "005": 18, "375": 18, "004": 18, "373": 18, "007": 18, "377": 18, "371": 18, "003": 18, "006": 18, "002": 18, "263": 18, "341": 18, "constant_2_output_0": 18, "constant_2": 18, "271": 18, "032": 18, "0009": [18, 21], "302": 18, "031": 18, "024": 18, "000d": 18, "327": 18, "363": 18, "377u": 18, "000t": 18, "321": 18, "236": 18, "241": 18, "237": 18, "010": 18, "350": 18, "267": 18, "355": [18, 22], "000n": 18, "346": 18, "317": 18, "207": 18, "000y": 18, "326": 18, "334": [18, 21], "362": 18, "304": 18, "340": 18, "275": 18, "324": 18, "332": [18, 21], "026": 18, "333": [18, 21], "335": 18, "226": 18, "0002": [18, 21], "000f": 18, "310": 18, "344": 18, "177": 18, "033": 18, "000i": 18, "315": 18, "035": 18, "354": [18, 20], "377z": 18, "377i": 18, "030": 18, "000w": 18, "303": 18, "022": 18, "336": 18, "377p": 18, "351": 18, "000x": 18, "247": 18, "000h": 18, "225": 18, "3776": 18, "301": 18, "210": 18, "307": 18, "320": 18, "377f": 18, "352": 18, "3770": 18, "313": 18, "261": 18, "037": 18, "220": 18, "202": 18, "013": 18, "266": 18, "347": 18, "377o": 18, "244": 18, "214": 18, "014": 18, "qlinearconv_output_0": 18, "dilat": [18, 21, 22], "group": [18, 21, 22], "kernel_shap": 18, "pad": [18, 21, 22], "stride": [18, 21, 22], "torch_jit": 18, "tensor_typ": 18, "elem_typ": 18, "dim_valu": 18, "126": [18, 21], "opset_import": 18, "domain": [18, 22], "8085": [18, 20, 22], "shown": [18, 22], "prevent": 18, "captur": [18, 20, 22], "examin": 18, "ort": [18, 20], "sess_opt": 18, "sessionopt": 18, "sess": [18, 20], "inferencesess": [18, 20], "input_nam": [18, 20], "get_input": [18, 20], "pred_onx": 18, "out_brevita": 18, "out_ort": 18, "allclos": [18, 20], "kernel": [18, 22], "seem": 18, "must": 18, "behavior": 18, "quantconvnd": 18, "qgemm_ort": 18, "unfortun": 18, "log": [18, 22], "unoptim": 18, "conv": 18, "output_bit_width": [18, 20, 21, 22], "quant_model_qops_4b_4b": 18, "atol": [18, 20], "vecaus": 18, "At": 18, "due": 18, "slightli": 18, "closer": 18, "toler": 18, "deeper": 19, "were": [19, 22], "obtain": [19, 21], "manual_se": [19, 20, 21, 22], "output_quant_conv": [19, 21], "in_channel": [19, 21], "out_channel": [19, 21], "default_quant_conv": [19, 21], "output_identity_qu": 19, "out_tensor1": 19, "out_tensor2": 19, "isclos": [19, 21], "input_output_quant_conv": 19, "input_identity_qu": 19, "becom": [19, 21], "clearer": 19, "meantim": 19, "disabled_quant_ident": 19, "return_quant_ident": 19, "out_tensor": [19, 21], "4566": [19, 20], "5707": 19, "5517": [19, 20], "5897": 19, "5409": 19, "5136": 19, "1902": 19, "0761": 19, "4946": 19, "5029": 19, "4376": 19, "3317": 19, "6361": 19, "0736": [19, 20], "7122": 19, "3780": 19, "1224": 19, "3234": 19, "0844": 19, "0951": 19, "7610": 19, "5980": 19, "0190": 19, "7419": 19, "6278": 19, "6468": 19, "2473": 19, "5327": 19, "1605": 19, "7990": 19, "2936": 19, "3127": 19, "2283": 19, "4351": [19, 20], "3615": 19, "2175": 19, "9214": 19, "divbackward0": [19, 20, 21, 22], "whather": 19, "strip": 19, "implicitli": [19, 21, 22], "out_torch_tensor": 19, "return_disabled_quant_ident": 19, "identity_out_tensor": 19, "uint8actpertensorfloat": [19, 20, 22], "return_quant_relu": 19, "5974": 19, "5402": 19, "5041": 19, "1867": 19, "4481": 19, "3255": 19, "0817": 19, "7083": 19, "3804": [19, 20], "0187": 19, "6254": 19, "6348": 19, "1668": 19, "4387": 19, "2334": 19, "7935": 19, "9230": 19, "0093": [19, 21], "preserv": [19, 21], "return_disabled_quant_relu": 19, "relu_out_tensor": 19, "quantsigmoid": 19, "return_disabled_quant_sigmoid": 19, "sigmoid_out_tensor": 19, "3878": 19, "3611": 19, "3655": [19, 20, 21], "6433": [19, 20], "8236": 19, "6257": 19, "3567": [19, 20], "5474": 19, "4810": 19, "3788": 19, "1820": 19, "4526": 19, "6077": 19, "7911": 19, "1630": 19, "8883": 19, "8471": 19, "9151": 19, "2456": 19, "4198": 19, "2527": [19, 20], "4762": 19, "3184": 19, "1683": 19, "5048": 19, "3226": 19, "6520": 19, "6563": 19, "4385": [19, 21], "3699": 19, "7614": 19, "3102": 19, "2152": [19, 20, 21], "2120": 19, "4432": 19, "0805": 19, "5568": 19, "6898": 19, "4106": 19, "2284": 19, "3480": 19, "8723": 19, "sigmoidbackward0": 19, "shifteduint8actpertensorfloat": [19, 22], "neg": 19, "effect": [19, 20, 22], "shifted_quant_ident": 19, "5854": 19, "5485": 19, "5099": 19, "1888": 19, "4532": 19, "3219": 19, "0772": 19, "6996": 19, "3794": 19, "0189": [19, 20], "6232": 19, "6421": 19, "1708": 19, "4343": [19, 22], "2266": 19, "7931": 19, "9262": 19, "relubackward0": 19, "129": 19, "swherebackward0": 19, "common": [19, 21], "3134": 19, "2557": 19, "0392": 19, "4186": 19, "7361": 19, "5340": 19, "8516": 19, "2887": 19, "3175": 19, "8949": 19, "6743": 19, "0722": 19, "0289": 19, "2021": 19, "4907": 19, "14": [19, 20, 22], "default_quant_relu": 19, "3078": [19, 20], "2555": 19, "0397": 19, "4185": 19, "7454": 19, "5427": 19, "8566": 19, "2943": 19, "3269": 19, "8893": 19, "6674": 19, "0785": 19, "0065": [19, 20], "0262": 19, "1962": 19, "4839": 19, "close": 19, "half": 19, "numer": 19, "lost": [19, 22], "treat": 19, "wast": 19, "regard": 19, "premad": 19, "word": 19, "caution": 19, "anticip": 19, "theme": 19, "interact": [19, 22], "30": [19, 22], "basic": [19, 21, 22], "calibr": 19, "These": [19, 22], "exponenti": [19, 22], "inp1": 19, "inp2": 19, "ones": 19, "out1_train": 19, "out2_train": 19, "out1_ev": 19, "out2_ev": 19, "quanthardtanh": 19, "hardtanh": 19, "8145d2f87fcb": 19, "117": [19, 21], "118": [19, 21], "119": [19, 21], "proxi": [19, 21, 22], "int8actpertensorfloatminmaxinit": 19, "scaling_init_impl": 19, "concer": 19, "quant_hard_tanh": 19, "remind": [19, 20], "legal": [19, 21], "encourag": 19, "drop": 20, "further": 20, "upstream": 20, "input_s": 20, "hidden_s": 20, "num_lay": 20, "nonlinear": 20, "tanh": [20, 21], "batch_first": 20, "bidirect": 20, "int8weightpertensorfloat": [20, 21, 22], "io_quant": 20, "gate_acc_qu": 20, "shared_input_hidden_weight": 20, "layer_impl": 20, "_quantrnnlay": 20, "gate": 20, "fuse": [20, 22], "fed": 20, "stack": 20, "potenti": 20, "nest": [20, 22], "rnn_sublay": 20, "sublayer_numb": 20, "right_to_left_direct": 20, "quant_rnn": 20, "quant_rnn_0_left_to_right": 20, "quant_rnn_0_right_to_left": 20, "quant_rnn_1_left_to_right": 20, "quant_rnn_1_right_to_left": 20, "gate_param": 20, "input_weight": 20, "cell": 20, "bitwidth": 20, "deepspeech": 20, "dai": 20, "qualiti": 20, "count_weight": 20, "numel": 20, "named_paramet": 20, "quant_rnn_single_direct": 20, "quant_rnn_bidirect": 20, "quant_rnn_bidirectional_shared_input_hidden": 20, "print": [20, 21, 22], "600": 20, "1200": 20, "4b": [20, 22], "6b": 20, "io": [20, 21], "quant_rnn_4b": 20, "io_bit_width": 20, "quant_rnn_4b_0_left_to_right": 20, "input_hidden_weight": 20, "hidden_hidden_weight": 20, "hidden_weight": 20, "o": 20, "0316": 20, "0317": 20, "0319": 20, "0318": 20, "0314": 20, "0298": 20, "0285": 20, "0306": 20, "0312": [20, 21], "0315": 20, "0293": 20, "0310": 20, "0309": 20, "pack": 20, "length": 20, "unbatch": 20, "sequenc": 20, "hidden_st": 20, "num_direct": 20, "343": 20, "__torch_function__": [20, 21, 22], "plain": 20, "classmethod": 20, "cb": 20, "pytorch_1000000000000": 20, "python_arg_pars": 20, "seq_dim": 20, "4458": [20, 21], "1651": 20, "7045": 20, "5889": 20, "2532": 20, "0330": 20, "1706": 20, "1376": [20, 21], "4348": 20, "5834": 20, "3577": 20, "2807": 20, "1046": [20, 21], "4293": 20, "1486": 20, "1569": 20, "3530": 20, "6995": 20, "0458": 20, "5295": 20, "3007": [20, 21], "7257": 20, "2877": 20, "1308": 20, "6603": 20, "0196": 20, "8237": 20, "4380": 20, "2615": 20, "3138": 20, "0850": 20, "1961": 20, "1929": 20, "5981": 20, "2508": 20, "2251": 20, "5917": 20, "0257": 20, "3023": 20, "2830": 20, "3344": 20, "4309": 20, "0836": 20, "2701": 20, "3666": 20, "1351": 20, "1736": 20, "1286": 20, "6174": 20, "4682": [20, 21], "1804": 20, "2780": 20, "4974": 20, "4389": 20, "0585": 20, "6242": 20, "0098": 20, "2341": 20, "3511": 20, "2926": 20, "4925": 20, "1414": 20, "4633": 20, "0683": 20, "2633": 20, "3024": 20, "1951": 20, "1707": 20, "0852": 20, "0965": 20, "4656": 20, "3180": 20, "3464": 20, "2782": 20, "1931": 20, "6360": 20, "3293": 20, "7211": 20, "4316": 20, "4145": 20, "3066": 20, "5224": [20, 22], "5849": 20, "1420": 20, "5669": 20, "2367": 20, "3027": 20, "3137": 20, "3632": 20, "5999": 20, "2036": 20, "2201": 20, "2862": 20, "3908": 20, "2091": 20, "2697": 20, "0055": [20, 21], "1761": 20, "1242": 20, "4184": 20, "6472": [20, 22], "4707": 20, "5034": [20, 21], "8368": 20, "1504": 20, "0654": 20, "7714": 20, "4903": 20, "6015": 20, "3596": 20, "2484": 20, "2942": 20, "3409": [20, 21], "8168": 20, "7396": 20, "2958": 20, "7782": 20, "1994": 20, "7846": 20, "3087": 20, "1029": 20, "1479": 20, "3216": 20, "2315": 20, "5209": 20, "0878": 20, "0390": 20, "1365": [20, 21], "2243": 20, "2390": 20, "3706": 20, "1609": 20, "5511": 20, "4096": 20, "5121": 20, "5901": 20, "3609": 20, "3755": 20, "0780": 20, "2829": 20, "1987": 20, "0057": 20, "1306": 20, "3861": 20, "2839": 20, "5962": 20, "1647": 20, "0227": 20, "4372": 20, "3748": 20, "1703": 20, "0738": [20, 21], "catbackward0": 20, "0058": 20, "unsqueezebackward0": 20, "1760": 20, "2670": 20, "1214": 20, "3702": 20, "3884": 20, "4127": [20, 21], "0243": 20, "0425": 20, "2246": 20, "0910": 20, "4734": 20, "0971": 20, "3824": 20, "1396": 20, "6858": 20, "0061": [20, 22], "1275": 20, "5037": 20, "2831": 20, "0566": 20, "2661": [20, 21], "0793": 20, "4926": 20, "0510": 20, "6455": 20, "7191": 20, "1812": 20, "6172": 20, "1529": 20, "4077": 20, "7078": 20, "0453": 20, "0963": 20, "4983": 20, "3977": 20, "0947": 20, "1894": 20, "3725": 20, "2589": 20, "3914": 20, "0063": [20, 21], "2652": 20, "5177": 20, "4230": 20, "0821": 20, "0631": 20, "0505": 20, "0253": [20, 22], "1578": 20, "4988": 20, "5556": 20, "4809": 20, "8144": 20, "6925": 20, "4360": 20, "0256": 20, "5130": 20, "2501": 20, "1347": 20, "7631": 20, "5386": 20, "2437": 20, "4296": 20, "1988": 20, "7246": 20, "1154": 20, "0641": 20, "3142": 20, "0706": 20, "0192": 20, "7185": 20, "8211": 20, "5709": 20, "1155": 20, "4683": 20, "3400": 20, "3015": 20, "3528": 20, "3143": 20, "1411": 20, "2309": 20, "5132": 20, "3721": 20, "5196": 20, "5453": 20, "4066": 20, "7768": 20, "6008": 20, "0546": 20, "0182": [20, 22], "1821": 20, "3763": 20, "3520": 20, "0486": 20, "2124": 20, "3641": 20, "4248": 20, "0789": 20, "2321": 20, "1982": 20, "1302": 20, "0283": 20, "4869": 20, "5379": 20, "6964": 20, "0340": 20, "2944": 20, "4643": 20, "3454": 20, "3284": 20, "3341": 20, "5945": 20, "2020": 20, "0379": 20, "8081": 20, "7260": 20, "0694": 20, "5430": 20, "8018": 20, "2273": 20, "3472": 20, "4924": 20, "4735": 20, "5745": 20, "5619": 20, "6313": 20, "1768": 20, "6541": [20, 22], "0385": [20, 21], "5835": 20, "0449": [20, 21], "3270": 20, "7951": 20, "3591": 20, "2757": 20, "7567": 20, "5194": 20, "7438": 20, "7695": 20, "5451": 20, "2630": 20, "4747": 20, "2245": 20, "3336": 20, "4490": 20, "4619": 20, "1796": 20, "3913": 20, "2053": 20, "2823": [20, 22], "6992": 20, "6607": 20, "1989": 20, "6928": 20, "5581": 20, "5966": 20, "0062": 20, "0064": 20, "1984": 20, "2499": [20, 21], "1102": [20, 21], "0955": 20, "4630": 20, "8672": 20, "1911": 20, "4851": 20, "6982": 20, "5806": 20, "4189": 20, "7423": 20, "9260": 20, "0147": 20, "0514": 20, "2167": 20, "5092": 20, "3846": 20, "0650": [20, 21], "6717": 20, "2492": 20, "0867": 20, "3900": 20, "3521": 20, "4767": 20, "1137": 20, "6879": [20, 21], "1733": 20, "0596": 20, "4279": 20, "5471": 20, "2762": 20, "5904": 20, "3737": 20, "1335": 20, "0140": [20, 22], "2810": 20, "5339": 20, "0562": 20, "7236": 20, "1264": 20, "0211": 20, "3021": 20, "1124": 20, "4777": 20, "3793": [20, 22], "2388": [20, 21], "0702": 20, "4847": 20, "3340": 20, "5225": 20, "1499": 20, "3083": 20, "1756": 20, "1713": 20, "3512": 20, "3041": 20, "3126": 20, "5482": 20, "4882": 20, "1028": 20, "4796": 20, "3640": 20, "0471": 20, "4438": 20, "2686": 20, "3095": 20, "2978": 20, "0993": 20, "0584": 20, "4846": 20, "0526": 20, "4496": 20, "1109": 20, "7416": 20, "3445": 20, "4963": 20, "2803": 20, "1927": 20, "6131": 20, "0661": 20, "3013": 20, "2646": 20, "6321": [20, 21], "4557": 20, "0294": 20, "9407": 20, "7350": 20, "6027": 20, "4116": 20, "6835": 20, "1787": 20, "0271": 20, "1354": 20, "3033": 20, "6229": 20, "3250": 20, "0812": [20, 21], "5633": 20, "0325": 20, "2383": 20, "5850": 20, "6771": 20, "3196": 20, "3934": 20, "3231": 20, "0492": 20, "5128": 20, "8149": 20, "7517": 20, "8711": 20, "4004": 20, "8992": 20, "2178": 20, "8851": 20, "5760": 20, "1054": 20, "5198": 20, "2612": 20, "2570": 20, "1542": 20, "1071": 20, "3854": 20, "0685": 20, "0728": 20, "4240": 20, "1627": 20, "3383": 20, "0428": 20, "1199": 20, "3683": 20, "3298": 20, "4204": 20, "2452": 20, "0934": [20, 21], "2336": 20, "1285": [20, 22], "2044": 20, "0701": 20, "3971": 20, "0175": 20, "1810": 20, "5547": 20, "0467": [20, 21], "0060": [20, 21, 22], "simul": [20, 22], "untrain": 20, "float_rnn": 20, "recov": 20, "123456": 20, "prebuilt": 20, "signedbinaryweightpertensorconst": [20, 22], "binary_rnn": 20, "3684": 20, "0946": 20, "4480": 20, "0050": [20, 21], "1543": 20, "6322": 20, "1643": 20, "1693": 20, "2937": 20, "5227": 20, "2290": 20, "3534": 20, "3883": 20, "4331": 20, "3634": 20, "1941": 20, "2240": 20, "0199": 20, "3485": 20, "1145": 20, "4082": 20, "2987": 20, "0647": 20, "0498": 20, "1493": 20, "0299": 20, "1195": 20, "0776": 20, "5670": 20, "4178": 20, "0239": 20, "4476": 20, "2029": 20, "7042": 20, "6326": 20, "4058": 20, "4118": 20, "0477": 20, "2387": 20, "0179": 20, "4416": 20, "4237": 20, "3282": 20, "1074": 20, "2626": 20, "3581": [20, 21], "2328": 20, "2268": 20, "3103": 20, "4536": 20, "3461": 20, "3163": 20, "7639": 20, "5252": 20, "1790": 20, "2984": 20, "5411": 20, "3147": 20, "6184": 20, "3037": 20, "1877": 20, "1767": 20, "1491": 20, "1049": [20, 22], "2871": 20, "0552": 20, "0883": 20, "0331": 20, "4749": 20, "7013": 20, "2264": 20, "0773": 20, "4583": 20, "0166": 20, "5743": 20, "1160": 20, "0442": 20, "1325": 20, "1657": 20, "0718": 20, "1215": 20, "6240": 20, "3092": 20, "0627": 20, "1882": 20, "4642": 20, "1443": 20, "4705": 20, "2447": 20, "1129": 20, "3011": 20, "2572": 20, "2384": 20, "0376": 20, "1380": 20, "0251": 20, "6399": 20, "5771": 20, "2133": 20, "7967": 20, "1631": 20, "4078": 20, "3199": 20, "0753": 20, "6524": [20, 22], "0690": 20, "1819": 20, "2258": 20, "3889": 20, "3764": 20, "5458": 20, "5704": 20, "6139": 20, "1209": 20, "5173": 20, "4447": 20, "0048": [20, 21], "3481": 20, "5946": 20, "5221": 20, "1644": 20, "2949": 20, "1789": 20, "2707": 20, "2900": 20, "5124": 20, "4399": 20, "0725": 20, "6091": 20, "0435": 20, "2030": 20, "2659": 20, "1547": 20, "0580": 20, "4254": 20, "5559": 20, "1740": 20, "4592": 20, "2369": 20, "3046": 20, "3626": 20, "2079": 20, "4641": 20, "sigmoid_qu": 20, "tanh_quant": 20, "cell_state_qu": 20, "coupled_input_forget_g": 20, "cat_output_cell_st": 20, "shared_intra_layer_weight_qu": 20, "shared_intra_layer_gate_acc_qu": 20, "shared_cell_state_qu": 20, "_quantlstmlay": 20, "cifg": 20, "ourselv": [20, 21, 22], "concen": 20, "illeg": 20, "sigmoid": 20, "forget": 20, "forget_g": 20, "input_g": 20, "orthogon": 20, "wors": 20, "calibration_mod": [20, 22], "bias_correction_mod": [20, 22], "progress": 20, "proof": 20, "quantizelinear": [20, 22], "dequantizelinear": [20, 22], "qeight": 20, "quant_lstm_weight_onli": 20, "quant_lstm_weight_only_4b": 20, "8080": 20, "np": 20, "np_input": 20, "astyp": 20, "seq_len": 20, "pred_onnx": 20, "quant_lstm_weight_only_cifg": 20, "quant_lstm_weight_only_cifg_4b": 20, "24": [7, 20, 22], "quant_lstm_weight_only_bidirectional_2_lay": 20, "home": [20, 21], "giusepp": 20, "quant_lstm_weight_only_bidirectional_2_layers_shar": 20, "quant_lstm_weight_only_bidirectional_2_layers_shared_ih": 20, "zp": 20, "quant_lstm_weight_only_bidirectional_2_layers_shared_q": 20, "8086": 20, "side": 20, "quant_lstm_weight_only_bidirectional_2_layers_shared_q_ih": 20, "8087": 20, "quantlstmcel": 20, "export_qonnx": [20, 22], "quant_lstm": 20, "8088": 20, "quant_hidden_st": 20, "quant_cell_st": 20, "quant_weight_ii": 20, "quant_weight_if": 20, "quant_weight_": 20, "quant_weight_io": 20, "quant_weight_hi": 20, "quant_weight_hf": 20, "quant_weight_hc": 20, "quant_weight_ho": 20, "quant_bias_input": 20, "quant_bias_forget": 20, "quant_bias_cel": 20, "quant_bias_output": 20, "output_scal": [20, 21, 22], "output_zero_point": 20, "cell_state_scal": 20, "cell_state_zero_point": 20, "cell_state_bit_width": 20, "input_acc_scal": 20, "input_acc_zero_point": 20, "input_acc_bit_width": 20, "forget_acc_scal": 20, "forget_acc_zero_point": 20, "forget_acc_bit_width": 20, "cell_acc_scal": 20, "cell_acc_zero_point": 20, "cell_acc_bit_width": 20, "output_acc_scal": 20, "output_acc_zero_point": 20, "output_acc_bit_width": 20, "input_sigmoid_scal": 20, "input_sigmoid_zero_point": 20, "input_sigmoid_bit_width": 20, "forget_sigmoid_scal": 20, "forget_sigmoid_zero_point": 20, "forget_sigmoid_bit_width": 20, "cell_tanh_scal": 20, "cell_tanh_zero_point": 20, "cell_tanh_bit_width": 20, "output_sigmoid_scal": 20, "output_sigmoid_zero_point": 20, "output_sigmoid_bit_width": 20, "hidden_state_tanh_scal": 20, "hidden_state_tanh_zero_point": 20, "hidden_state_tanh_bit_width": 20, "quantweightbiasinputoutputlay": [21, 22], "notic": [21, 22], "local": [21, 22], "python3": 21, "tqdm": 21, "tqdmwarn": 21, "iprogress": 21, "jupyt": 21, "ipywidget": 21, "readthedoc": 21, "en": 21, "stabl": [21, 22], "user_instal": 21, "html": 21, "autonotebook": 21, "notebook_tqdm": 21, "padding_typ": 21, "weightquanttyp": [21, 22], "biasquanttyp": [21, 22], "actquanttyp": [21, 22], "padding_mod": 21, "Its": 21, "intercept": [21, 22], "dedic": 21, "is_weight_quant_en": 21, "is_bias_quant_en": 21, "is_input_quant_en": 21, "is_output_quant_en": 21, "convolut": [21, 22], "2594": 21, "5392": 21, "5916": 21, "3493": 21, "6813": 21, "3732": 21, "1229": 21, "0084": [21, 22], "0031": 21, "1702": 21, "1069": 21, "8181": 21, "8056": 21, "4738": 21, "0589": 21, "1278": 21, "1718": 21, "1162": 21, "1526": 21, "9903": 21, "3541": 21, "1645": 21, "0557": 21, "2080": 21, "thnnconv2dbackward0": 21, "unquant": [21, 22], "everywher": 21, "seed": 21, "reproduc": 21, "disabled_quant_conv": 21, "freedom": 21, "experi": [21, 22], "littl": 21, "formula": 21, "integer_valu": 21, "quant_conv": 21, "short": 21, "0790": 21, "0503": 21, "1149": 21, "1903": 21, "1329": 21, "1813": 21, "0108": 21, "0593": 21, "0970": 21, "0215": 21, "0144": 21, "2280": 21, "1239": 21, "0090": 21, "1957": 21, "2011": 21, "0018": 21, "1993": 21, "0359": 21, "1778": 21, "1400": 21, "0916": 21, "1059": 21, "2173": 21, "1670": 21, "1939": 21, "2191": 21, "1688": 21, "1383": 21, "1185": 21, "1742": 21, "0808": 21, "1652": 21, "0233": 21, "0485": 21, "1418": 21, "1077": 21, "0036": 21, "1508": 21, "1616": 21, "0287": 21, "int_weight": 21, "quant_weight_zero_point": 21, "quant_weight_manu": 21, "popul": 21, "field": 21, "account": 21, "rel": 21, "expens": 21, "sparingli": 21, "quant_act": 21, "out_tensor_0": 21, "out_tensor_1": 21, "0173": 21, "0307": 21, "evalu": [21, 22], "9489": 21, "9111": 21, "0536": 21, "5788": 21, "3645": 21, "3401": 21, "4325": 21, "6498": 21, "6411": 21, "4390": 21, "9029": 21, "7012": [21, 22], "1591": 21, "9235": 21, "5883": 21, "7258": 21, "5330": 21, "9165": 21, "0820": 21, "4148": 21, "3651": 21, "0164": 21, "9567": 21, "2758": 21, "2414": 21, "2111": 21, "9124": 21, "3814": 21, "8805": 21, "3191": 21, "8965": 21, "2048": 21, "8113": 21, "1142": 21, "3381": 21, "2238": 21, "0068": 21, "2567": 21, "0731": 21, "4280": 21, "0909": 21, "0875": 21, "6851": 21, "7744": 21, "8143": 21, "3557": 21, "2802": 21, "addbackward0": 21, "0240": 21, "consequ": 21, "longer": 21, "handl": 21, "5800": 21, "0157": 21, "4445": 21, "8577": 21, "5643": 21, "0383": 21, "9028": 21, "5191": 21, "6546": 21, "1442": 21, "5868": 21, "maxpool2dwithindicesbackward0": 21, "0226": 21, "decai": [21, 22], "4943": 21, "9938": 21, "9073": 21, "7681": 21, "3262": 21, "9186": 21, "1786": 21, "3659": 21, "7489": 21, "8946": 21, "0451": 21, "5594": 21, "1346": 21, "4770": 21, "6951": 21, "0676": 21, "5111": 21, "8459": 21, "8990": 21, "9426": 21, "7945": 21, "9220": 21, "7772": 21, "7177": 21, "4414": 21, "2220": 21, "5747": 21, "6710": 21, "4594": 21, "3462": 21, "9729": 21, "5896": 21, "5276": 21, "0900": 21, "8852": 21, "tanhbackward0": 21, "input_quant_conv": 21, "9693": 21, "9431": 21, "2459": 21, "5416": 21, "9037": 21, "5278": 21, "6207": 21, "3578": 21, "4815": 21, "4551": 21, "4065": 21, "8889": 21, "3393": 21, "0803": 21, "1748": 21, "0977": 21, "6284": 21, "7193": [21, 22], "7626": 21, "2634": 21, "3453": 21, "3349": 21, "1923": 21, "5993": 21, "9579": 21, "2208e": 21, "05": [21, 22], "mimpli": 21, "reflect": [21, 22], "worst": 21, "largest": 21, "int_valu": 21, "randint": 21, "quant_tensor_input": 21, "7000e": 21, "03": 21, "5000e": 21, "2400e": 21, "2000e": 21, "3000e": 21, "0000e": 21, "2700e": 21, "9000e": 21, "6000e": 21, "4000e": 21, "1100e": 21, "1500e": 21, "8000e": 21, "0600e": 21, "1000e": 21, "1300e": 21, "0100e": 21, "1900e": 21, "113": 21, "return_quant_conv": 21, "114": [21, 22], "0085": 21, "0066": 21, "0038": 21, "0115": 21, "0037": 21, "0015": [21, 22], "0027": 21, "0079": 21, "0034": 21, "0043": 21, "0008": 21, "0052": 21, "0033": 21, "0082": 21, "0021": [21, 22], "0004": 21, "0054": 21, "0013": [21, 22], "8448e": 21, "07": 21, "115": 21, "116": 21, "0035": 21, "0051": 21, "0047": 21, "0017": [21, 22], "0028": 21, "0044": [21, 22], "0024": 21, "0011": 21, "0039": 21, "7410e": 21, "4060": 21, "3654": 21, "7876": 21, "8119": 21, "9825": 21, "5115": 21, "3979": 21, "3248": 21, "3816": 21, "0568": 21, "8038": 21, "3491": 21, "4141": 21, "5846": 21, "4222": 21, "7389": 21, "2517": 21, "1624": 21, "7308": 21, "0081": 21, "got": 21, "saw": [21, 22], "assum": [7, 21, 22], "somehow": [21, 22], "predefin": [21, 22], "tri": [21, 22], "bias_quant_conv": 21, "ipykernel_48365": 21, "2280634207": 21, "opt": 21, "conda": 21, "torch_1": 21, "_call_impl": [21, 22], "1100": 21, "_backward_hook": [21, 22], "_forward_hook": [21, 22], "_forward_pre_hook": [21, 22], "_global_backward_hook": [21, 22], "1101": 21, "_global_forward_hook": [21, 22], "_global_forward_pre_hook": [21, 22], "forward_cal": [21, 22], "1103": 21, "1104": 21, "full_backward_hook": [21, 22], "non_full_backward_hook": [21, 22], "workspac": 21, "fork_brevita": 21, "190": 21, "191": 21, "192": 21, "forward_impl": [21, 22], "193": 21, "194": [21, 22], "inner_forward_impl": [21, 22], "330": 21, "331": 21, "cache_inference_quant_bia": [21, 22], "_cached_bia": [21, 22], "_cachedio": [21, 22], "metadata_onli": [21, 22], "parameter_qu": [21, 22], "impl": [21, 22], "requires_input_scal": [21, 22], "162": 21, "163": 21, "requires_input_bit_width": [21, 22], "164": 21, "0005": 21, "0106": 21, "0012": 21, "0007": 21, "0067": 21, "0059": 21, "0071": 21, "8108e": 21, "input_bias_quant_conv": 21, "3825": 21, "1371": 21, "9135": 21, "2016": 21, "7495": 21, "4071": 21, "0755": 21, "5283": 21, "0788": 21, "3802": 21, "2234": 21, "8678": 21, "5546": 21, "4408": 21, "6788": 21, "4422": 21, "4412": 21, "3205": 21, "0083": 21, "3295": 21, "2076": 21, "4417": 21, "8610e": 21, "0080": 21, "0014": 21, "0029": 21, "0003": 21, "0019": 21, "0056": 21, "0091": 21, "0095": 21, "8384e": 21, "give": 21, "output_bias_quant_conv": 21, "2990591641": 21, "int8biaspertensorfloatinternalsc": 21, "bias_internal_scale_quant_conv": 21, "8346": 21, "0746": 21, "5212": 21, "1019": 21, "6004": 21, "1500": 21, "1453": 21, "1551": 21, "3458": 21, "1312": 21, "2502": 21, "5267": 21, "2412": 21, "3556": 21, "3289": 21, "2276": 21, "4599": 21, "6094": 21, "5064": 21, "6768": 21, "6638": 21, "2359": 21, "lead": 21, "125": 21, "unquant_bias_input_quant_conv": 21, "6632": 21, "2411": 21, "2064": 21, "7371": 21, "3910": 21, "9533": 21, "2994": 21, "4684": 21, "4495": 21, "5021": 21, "5738": 21, "4199": 21, "3380": 21, "6218": 21, "0408": 21, "8483": 21, "5625": [21, 22], "1837": 21, "0575": 21, "2816": [21, 22], "4993": 21, "4556": 21, "4269": 21, "5369": 21, "0975e": 21, "1276": 21, "0774": 21, "3152": 21, "4585": 21, "7320": 21, "2324": 21, "subject": [21, 22], "retur": 21, "bias_input_quant_conv": 21, "8357": 21, "0733": 21, "9527": 21, "1803": 21, "2154": 21, "7598": 21, "1121": 21, "8728": 21, "7917": 21, "6516": 21, "1852": 21, "7263": 21, "0956": 21, "2747": 21, "1617": 21, "8299": 21, "9934": 21, "3821": 21, "4865": 21, "9309": 21, "7924": 21, "4201": 21, "2343": 21, "1532": 21, "github": 22, "tree": 22, "master": 22, "in_featur": 22, "out_featur": 22, "quant_linear": 22, "0053": 22, "5820": 22, "5204": 22, "2723": 22, "1896": 22, "5607": 22, "0046": 22, "3803": 22, "2704": 22, "1879": 22, "0137": 22, "5591": 22, "absolut": 22, "full": 22, "corrispond": 22, "83": 22, "59": 22, "41": 22, "float_input": 22, "float_output": 22, "5410": 22, "2934": 22, "1788": 22, "5684": 22, "0845": 22, "3986": 22, "9036": 22, "4586": 22, "3096": 22, "2058": 22, "6525": 22, "3723": 22, "8677": 22, "3873": 22, "2801": 22, "9009": 22, "9507": 22, "mmbackward": 22, "made": 22, "int8weightpertensorfixedpoint": 22, "0078": 22, "3828": 22, "5781": 22, "5234": 22, "2734": 22, "0156": 22, "handi": 22, "overal": 22, "quant_linear1": 22, "quant_linear1_scale_before_shar": 22, "quant_linear2": 22, "4f": 22, "9109": 22, "4609": 22, "3135": 22, "6523": 22, "2089": 22, "3752": 22, "8697": 22, "3893": 22, "9011": 22, "9521": 22, "0542e": 22, "5490": 22, "2894": 22, "5617": 22, "0894": 22, "3958": 22, "0170": 22, "quant_relu": 22, "5681": 22, "signed_quant_output": 22, "unsigned_quant_output": 22, "5588": 22, "999": 22, "percentil": 22, "300": 22, "proper": 22, "int16bia": 22, "alessa": 22, "appdata": 22, "temp": 22, "ipykernel_18920": 22, "2660651517": 22, "pt190": 22, "1050": 22, "1051": 22, "1052": 22, "1053": 22, "documenti": 22, "brevitas_tvmcon": 22, "96": 22, "97": 22, "356": 22, "357": 22, "358": 22, "359": 22, "195": 22, "196": 22, "197": 22, "198": 22, "1263": 22, "1680": 22, "1231": 22, "4658": 22, "2395": 22, "5207": 22, "3989": 22, "6461": 22, "8687": 22, "0466": 22, "4813": 22, "addmmbackward": 22, "constraint": 22, "operand": 22, "allign": 22, "easi": 22, "float_inp1": 22, "float_inp2": 22, "train_quant_inp1": 22, "train_quant_inp2": 22, "train_mode_add": 22, "ema": 22, "eval_quant_inp1": 22, "eval_quant_inp2": 22, "eval_mode_add": 22, "5335": 22, "2875": 22, "0447": 22, "5751": 22, "0863": 22, "4057": 22, "0160": 22, "3994": 22, "8307": 22, "7188": 22, "5910": 22, "1757": 22, "9329": 22, "5431": 22, "7636": 22, "6773": 22, "2300": 22, "input_dequant_valu": 22, "input_integer_valu": 22, "output_integer_valu": 22, "max_pool1d": 22, "float_inp": 22, "1218": 22, "1580": 22, "2533": 22, "8504": 22, "6876": 22, "3076": 22, "1170": 22, "4704": 22, "1628": 22, "4475": 22, "2714": 22, "8685": 22, "1448": 22, "1086": 22, "9228": 22, "2666": 22, "0543": 22, "6152": 22, "4162": 22, "8323": 22, "3160": 22, "0181": 22, "squeezebackward1": 22, "652": 22, "experiment": [1, 22, 24], "c10": 22, "tensorimpl": 22, "h": 22, "1156": 22, "ceil_mod": 22, "8204": 22, "2480": 22, "4089": 22, "6913": 22, "5964": 22, "2983": 22, "9714": 22, "4386": 22, "1614": 22, "8952": 22, "2649": 22, "7006": 22, "1438": 22, "1081": 22, "7272": 22, "8529": 22, "9646": 22, "0542": 22, "5478": 22, "3937": 22, "6817": 22, "9807": 22, "tanhbackward": 22, "simiarli": 22, "train_mode_cat": 22, "eval_mode_cat": 22, "concat": 22, "easiest": 22, "3880": 22, "5044": 22, "2716": 22, "1940": 22, "5432": 22, "0388": 22, "1816": 22, "0374": 22, "6341": 22, "5447": 22, "uint8actpertensorfloatmaxinit": 22, "5294": 22, "5647": 22, "0235": 22, "static": 22, "depthwis": 22, "per_channel_depthwise_quant_conv": 22, "input_scaling_per_output_channel": 22, "input_scaling_stats_permute_dim": 22, "permut": 22, "input_per_channel_broadcastable_shap": 22, "4033": 22, "8380": 22, "8616": 22, "4503": 22, "4937": 22, "1901": 22, "compact": 22, "usabl": 22, "perchannel3bactqu": 22, "driven": 22, "solver": 22, "actquantsolv": 22, "float_to_int_impl_typ": 22, "floattointimpltyp": 22, "nearest": 22, "scaling_stats_op": 22, "statsop": 22, "restrict_scaling_typ": 22, "restrictvaluetyp": 22, "overriden": 22, "tweak": 22, "8b": 22, "learnedintweightperchannelfloat": 22, "log_fp": 22, "1887": 22, "0132": 22, "0030": 22, "backpropag": 22, "loss": 22, "regular": 22, "push": 22, "larger": 22, "learnedintactpertensorfloat": 22, "4588": 22, "3119": 22, "6530": 22, "6493": 22, "3731": 22, "8706": 22, "8979": 22, "9543": 22, "9068e": 22, "6866e": 22, "4251e": 22, "didn": 22, "29": 22, "float_linear": 22, "1653109852": 22, "fused_activation_quant_proxi": 22, "bit_width_offset": 22, "supress": 22, "accomod": 22, "basi": 22, "0023": 22, "question": 22, "underneath": 22, "immedi": 22, "_zero_hw_sentinel": 22, "fusedactivationquantproxi": 22, "activation_impl": 22, "restrict_clamp_sc": 22, "_restrictclampvalu": 22, "deped": 22, "procedur": 22, "restrict_v": 22, "modular": 22, "degre": 22, "overhead": 22, "scatter": 22, "int8actpertensorfloatfromscratch": 22, "dequant_valu": 22, "convini": 22, "engin": 22, "\u00e8xtendedinjector": 22, "repeat": 22, "recurs": 22, "manner": 22, "indipend": 22, "fine": 22, "grain": 22, "int8actpertensorfloatparameterfromscratch": 22, "quant_identity_bit_width": 22, "quant_linear_bit_width": 22, "perfect": 22, "indipedent": 22, "satisfi": 22, "protobuf": 22, "six": 22, "38": 22, "32b": 22, "16b": 22, "quant_conv_4b8b": 22, "output_path": 22, "qop_onnx_conv_4b8b": 22, "input_t": 22, "relax": 22, "dialect": 22, "brevitas_onnx_conv4b8b": 22, "40": 22, "quant_conv_4b_weight": 22, "brevitas_onnx_conv_4b_weight": 22, "tvm": [22, 24], "7b": 22, "export_torch_qop": 22, "quant_conv_8b7b": 22, "pytorch_qf_conv_8b7b": 22, "pt": 22, "42": 22, "tracerwarn": 22, "trace": 22, "safe": 22, "caus": 22, "incorrect": 22, "incorpor": 22, "programmat": 22, "pattern": 22, "subsystem": 22, "transform": 22, "beyond": 22, "worth": 22, "embrac": 22, "backport": 22, "tracer": 22, "value_trac": 22, "condit": 22, "caveat": 22, "temporar": 22, "quant_model": 22, "calibrate_model": 22, "calibration_load": 22, "no_grad": 22, "imag": 22, "_": 22, "enumer": 22, "iter": 22, "pyxir": 24, "compliant": 24, "necessar": 24, "overoutputfeaturesview": [1, 3], "prezerocenterzeropoint": 1, "pre_zero_point_stats_input_view_shape_impl": 1, "pre_zero_point_shap": 1, "center": 1, "incom": 1, "intend": 1, "get_zero_cent": 1, "over_output_featur": [1, 3, 7], "a2q": 7, "untest": 15}, "objects": {"brevitas": [[1, 0, 0, "-", "core"]], "brevitas.core": [[2, 0, 0, "-", "bit_width"], [3, 0, 0, "-", "function_wrapper"], [4, 0, 0, "-", "quant"], [1, 0, 0, "-", "restrict_val"], [5, 0, 0, "-", "scaling"], [6, 0, 0, "-", "stats"], [1, 0, 0, "-", "utils"], [1, 0, 0, "-", "zero_point"]], "brevitas.core.bit_width": [[2, 0, 0, "-", "const"], [2, 0, 0, "-", "parameter"]], "brevitas.core.bit_width.const": [[2, 1, 1, "", "BitWidthConst"], [2, 1, 1, "", "BitWidthStatefulConst"], [2, 1, 1, "", "MsbClampBitWidth"]], "brevitas.core.bit_width.const.BitWidthConst": [[2, 2, 1, "", "forward"]], "brevitas.core.bit_width.const.BitWidthStatefulConst": [[2, 2, 1, "", "forward"]], "brevitas.core.bit_width.const.MsbClampBitWidth": [[2, 2, 1, "", "forward"]], "brevitas.core.bit_width.parameter": [[2, 1, 1, "", "BitWidthParameter"], [2, 1, 1, "", "RemoveBitwidthParameter"]], "brevitas.core.bit_width.parameter.BitWidthParameter": [[2, 2, 1, "", "forward"]], "brevitas.core.bit_width.parameter.RemoveBitwidthParameter": [[2, 2, 1, "", "forward"]], "brevitas.core.function_wrapper": [[3, 0, 0, "-", "clamp"], [3, 0, 0, "-", "misc"], [3, 0, 0, "-", "ops_ste"], [3, 0, 0, "-", "shape"]], "brevitas.core.function_wrapper.clamp": [[3, 1, 1, "", "ClampMin"], [3, 1, 1, "", "ScalarClamp"], [3, 1, 1, "", "TensorClamp"]], "brevitas.core.function_wrapper.clamp.ClampMin": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.clamp.ScalarClamp": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.clamp.TensorClamp": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.misc": [[3, 1, 1, "", "Identity"], [3, 1, 1, "", "InplaceLogTwo"], [3, 1, 1, "", "LogTwo"], [3, 1, 1, "", "PowerOfTwo"]], "brevitas.core.function_wrapper.misc.Identity": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.misc.InplaceLogTwo": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.misc.LogTwo": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.misc.PowerOfTwo": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.ops_ste": [[3, 1, 1, "", "CeilSte"], [3, 1, 1, "", "DPURoundSte"], [3, 1, 1, "", "FloorSte"], [3, 1, 1, "", "InplaceTensorClampSte"], [3, 1, 1, "", "RoundSte"], [3, 1, 1, "", "RoundToZeroSte"], [3, 1, 1, "", "ScalarClampMinSte"], [3, 1, 1, "", "TensorClampSte"]], "brevitas.core.function_wrapper.ops_ste.CeilSte": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.ops_ste.DPURoundSte": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.ops_ste.FloorSte": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.ops_ste.InplaceTensorClampSte": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.ops_ste.RoundSte": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.ops_ste.RoundToZeroSte": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.ops_ste.ScalarClampMinSte": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.ops_ste.TensorClampSte": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.shape": [[3, 1, 1, "", "OverBatchOverOutputChannelView"], [3, 1, 1, "", "OverBatchOverTensorView"], [3, 1, 1, "", "OverOutputChannelView"], [3, 1, 1, "", "OverOutputFeaturesView"], [3, 1, 1, "", "OverTensorView"], [3, 1, 1, "", "PermuteDims"], [3, 1, 1, "", "StatsInputViewShapeImpl"]], "brevitas.core.function_wrapper.shape.OverBatchOverOutputChannelView": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.shape.OverBatchOverTensorView": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.shape.OverOutputChannelView": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.shape.OverOutputFeaturesView": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.shape.OverTensorView": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.shape.PermuteDims": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl": [[3, 3, 1, "", "OVER_BATCH_OVER_OUTPUT_CHANNELS"], [3, 3, 1, "", "OVER_BATCH_OVER_TENSOR"], [3, 3, 1, "", "OVER_OUTPUT_CHANNELS"], [3, 3, 1, "", "OVER_OUTPUT_FEATURES"], [3, 3, 1, "", "OVER_TENSOR"]], "brevitas.core.quant": [[4, 0, 0, "-", "binary"], [4, 0, 0, "-", "delay"], [4, 0, 0, "-", "int"], [4, 0, 0, "-", "int_base"], [4, 0, 0, "-", "ternary"]], "brevitas.core.quant.binary": [[4, 1, 1, "", "BinaryQuant"], [4, 1, 1, "", "ClampedBinaryQuant"]], "brevitas.core.quant.binary.BinaryQuant": [[4, 2, 1, "", "forward"]], "brevitas.core.quant.binary.ClampedBinaryQuant": [[4, 2, 1, "", "forward"]], "brevitas.core.quant.delay": [[4, 1, 1, "", "DelayWrapper"]], "brevitas.core.quant.delay.DelayWrapper": [[4, 2, 1, "", "forward"]], "brevitas.core.quant.int": [[4, 1, 1, "", "DecoupledRescalingIntQuant"], [4, 1, 1, "", "DecoupledRescalingIntQuantWithInput"], [4, 1, 1, "", "PrescaledRestrictIntQuant"], [4, 1, 1, "", "PrescaledRestrictIntQuantWithInputBitWidth"], [4, 1, 1, "", "RescalingIntQuant"], [4, 1, 1, "", "TruncIntQuant"]], "brevitas.core.quant.int.DecoupledRescalingIntQuant": [[4, 2, 1, "", "forward"]], "brevitas.core.quant.int.DecoupledRescalingIntQuantWithInput": [[4, 2, 1, "", "forward"]], "brevitas.core.quant.int.PrescaledRestrictIntQuant": [[4, 2, 1, "", "forward"]], "brevitas.core.quant.int.PrescaledRestrictIntQuantWithInputBitWidth": [[4, 2, 1, "", "forward"]], "brevitas.core.quant.int.RescalingIntQuant": [[4, 2, 1, "", "forward"]], "brevitas.core.quant.int.TruncIntQuant": [[4, 2, 1, "", "forward"]], "brevitas.core.quant.int_base": [[4, 1, 1, "", "DecoupledIntQuant"], [4, 1, 1, "", "IntQuant"]], "brevitas.core.quant.int_base.DecoupledIntQuant": [[4, 2, 1, "", "forward"], [4, 2, 1, "", "max_int"], [4, 2, 1, "", "min_int"], [4, 2, 1, "", "to_int"]], "brevitas.core.quant.int_base.IntQuant": [[4, 2, 1, "", "forward"], [4, 2, 1, "", "max_int"], [4, 2, 1, "", "min_int"], [4, 2, 1, "", "to_int"]], "brevitas.core.quant.ternary": [[4, 1, 1, "", "TernaryQuant"]], "brevitas.core.quant.ternary.TernaryQuant": [[4, 2, 1, "", "forward"]], "brevitas.core.restrict_val": [[1, 1, 1, "", "FloatRestrictValue"], [1, 1, 1, "", "IntRestrictValue"], [1, 1, 1, "", "LogFloatRestrictValue"], [1, 1, 1, "", "PowerOfTwoRestrictValue"]], "brevitas.core.restrict_val.FloatRestrictValue": [[1, 2, 1, "", "forward"], [1, 2, 1, "", "restrict_init_float"], [1, 2, 1, "", "restrict_init_inplace_module"], [1, 2, 1, "", "restrict_init_module"], [1, 2, 1, "", "restrict_init_tensor"]], "brevitas.core.restrict_val.IntRestrictValue": [[1, 2, 1, "", "forward"], [1, 2, 1, "", "restrict_init_float"], [1, 2, 1, "", "restrict_init_inplace_module"], [1, 2, 1, "", "restrict_init_module"], [1, 2, 1, "", "restrict_init_tensor"]], "brevitas.core.restrict_val.LogFloatRestrictValue": [[1, 2, 1, "", "forward"], [1, 2, 1, "", "restrict_init_float"], [1, 2, 1, "", "restrict_init_inplace_module"], [1, 2, 1, "", "restrict_init_module"], [1, 2, 1, "", "restrict_init_tensor"]], "brevitas.core.restrict_val.PowerOfTwoRestrictValue": [[1, 2, 1, "", "forward"], [1, 2, 1, "", "restrict_init_float"], [1, 2, 1, "", "restrict_init_inplace_module"], [1, 2, 1, "", "restrict_init_module"], [1, 2, 1, "", "restrict_init_tensor"]], "brevitas.core.scaling": [[5, 0, 0, "-", "int_scaling"], [5, 0, 0, "-", "runtime"], [5, 0, 0, "-", "standalone"]], "brevitas.core.scaling.int_scaling": [[5, 1, 1, "", "IntScaling"], [5, 1, 1, "", "PowerOfTwoIntScaling"]], "brevitas.core.scaling.int_scaling.IntScaling": [[5, 2, 1, "", "forward"]], "brevitas.core.scaling.int_scaling.PowerOfTwoIntScaling": [[5, 2, 1, "", "forward"]], "brevitas.core.scaling.runtime": [[5, 1, 1, "", "RuntimeStatsScaling"], [5, 1, 1, "", "StatsFromParameterScaling"]], "brevitas.core.scaling.runtime.RuntimeStatsScaling": [[5, 2, 1, "", "forward"]], "brevitas.core.scaling.runtime.StatsFromParameterScaling": [[5, 2, 1, "", "forward"]], "brevitas.core.scaling.standalone": [[5, 1, 1, "", "ConstScaling"], [5, 1, 1, "", "ParameterFromRuntimeStatsScaling"], [5, 1, 1, "", "ParameterFromStatsFromParameterScaling"], [5, 1, 1, "", "ParameterScaling"]], "brevitas.core.scaling.standalone.ConstScaling": [[5, 2, 1, "", "forward"]], "brevitas.core.scaling.standalone.ParameterFromRuntimeStatsScaling": [[5, 2, 1, "", "forward"], [5, 2, 1, "", "state_dict"], [5, 2, 1, "", "training_forward"]], "brevitas.core.scaling.standalone.ParameterFromStatsFromParameterScaling": [[5, 2, 1, "", "forward"], [5, 2, 1, "", "state_dict"]], "brevitas.core.scaling.standalone.ParameterScaling": [[5, 2, 1, "", "forward"]], "brevitas.core.stats": [[6, 0, 0, "-", "stats_op"], [6, 0, 0, "-", "stats_wrapper"], [6, 0, 0, "-", "view_wrapper"]], "brevitas.core.stats.stats_op": [[6, 1, 1, "", "AbsAve"], [6, 1, 1, "", "AbsMax"], [6, 1, 1, "", "AbsMaxAve"], [6, 1, 1, "", "AbsMaxL2"], [6, 1, 1, "", "AbsMinMax"], [6, 1, 1, "", "AbsPercentile"], [6, 1, 1, "", "KLMinimizerThreshold"], [6, 1, 1, "", "L1Norm"], [6, 1, 1, "", "L2Norm"], [6, 1, 1, "", "MSE"], [6, 1, 1, "", "MeanLearnedSigmaStd"], [6, 1, 1, "", "MeanSigmaStd"], [6, 1, 1, "", "NegativeMinOrZero"], [6, 1, 1, "", "NegativePercentileOrZero"], [6, 1, 1, "", "PercentileInterval"]], "brevitas.core.stats.stats_op.AbsAve": [[6, 2, 1, "", "forward"]], "brevitas.core.stats.stats_op.AbsMax": [[6, 2, 1, "", "forward"]], "brevitas.core.stats.stats_op.AbsMaxAve": [[6, 2, 1, "", "forward"]], "brevitas.core.stats.stats_op.AbsMaxL2": [[6, 2, 1, "", "forward"]], "brevitas.core.stats.stats_op.AbsMinMax": [[6, 2, 1, "", "forward"]], "brevitas.core.stats.stats_op.AbsPercentile": [[6, 2, 1, "", "forward"]], "brevitas.core.stats.stats_op.KLMinimizerThreshold": [[6, 2, 1, "", "forward"], [6, 2, 1, "", "smooth_normalize_distribution"]], "brevitas.core.stats.stats_op.L1Norm": [[6, 2, 1, "", "forward"]], "brevitas.core.stats.stats_op.L2Norm": [[6, 2, 1, "", "forward"]], "brevitas.core.stats.stats_op.MSE": [[6, 2, 1, "", "evaluate_loss"], [6, 2, 1, "", "forward"], [6, 2, 1, "", "mse_fib_search"], [6, 2, 1, "", "mse_grid_search"], [6, 2, 1, "", "mse_loss_fn"], [6, 2, 1, "", "mse_search"]], "brevitas.core.stats.stats_op.MeanLearnedSigmaStd": [[6, 2, 1, "", "forward"]], "brevitas.core.stats.stats_op.MeanSigmaStd": [[6, 2, 1, "", "forward"]], "brevitas.core.stats.stats_op.NegativeMinOrZero": [[6, 2, 1, "", "forward"]], "brevitas.core.stats.stats_op.NegativePercentileOrZero": [[6, 2, 1, "", "forward"]], "brevitas.core.stats.stats_op.PercentileInterval": [[6, 2, 1, "", "forward"]], "brevitas.core.utils": [[1, 1, 1, "", "ParameterWrapper"], [1, 1, 1, "", "SingleArgStatelessBuffer"], [1, 1, 1, "", "SliceTensor"], [1, 1, 1, "", "StatelessBuffer"], [1, 4, 1, "", "inplace_momentum_update"], [1, 4, 1, "", "inplace_tensor_add"], [1, 4, 1, "", "inplace_tensor_mul"]], "brevitas.core.utils.ParameterWrapper": [[1, 2, 1, "", "forward"]], "brevitas.core.utils.SingleArgStatelessBuffer": [[1, 2, 1, "", "forward"]], "brevitas.core.utils.SliceTensor": [[1, 2, 1, "", "eager_forward"], [1, 2, 1, "", "forward"]], "brevitas.core.utils.StatelessBuffer": [[1, 2, 1, "", "forward"], [1, 2, 1, "", "state_dict"]], "brevitas.core.zero_point": [[1, 1, 1, "", "ParameterFromRuntimeZeroPoint"], [1, 1, 1, "", "ParameterFromStatsFromParameterZeroPoint"], [1, 1, 1, "", "ParameterZeroPoint"], [1, 1, 1, "", "PreZeroCenterZeroPoint"], [1, 1, 1, "", "StatsFromParameterZeroPoint"], [1, 1, 1, "", "ZeroZeroPoint"]], "brevitas.core.zero_point.ParameterFromRuntimeZeroPoint": [[1, 2, 1, "", "forward"], [1, 2, 1, "", "state_dict"], [1, 2, 1, "", "training_forward"]], "brevitas.core.zero_point.ParameterFromStatsFromParameterZeroPoint": [[1, 2, 1, "", "forward"], [1, 2, 1, "", "state_dict"]], "brevitas.core.zero_point.ParameterZeroPoint": [[1, 2, 1, "", "forward"]], "brevitas.core.zero_point.PreZeroCenterZeroPoint": [[1, 2, 1, "", "forward"], [1, 2, 1, "", "get_zero_center"]], "brevitas.core.zero_point.StatsFromParameterZeroPoint": [[1, 2, 1, "", "forward"]], "brevitas.core.zero_point.ZeroZeroPoint": [[1, 2, 1, "", "forward"]], "brevitas.function": [[7, 0, 0, "-", "ops"], [7, 0, 0, "-", "ops_ste"], [7, 0, 0, "-", "shape"]], "brevitas.function.ops": [[7, 4, 1, "", "binary_sign"], [7, 4, 1, "", "dpu_round"], [7, 4, 1, "", "get_upper_bound_on_l1_norm"], [7, 4, 1, "", "identity"], [7, 4, 1, "", "max_float"], [7, 4, 1, "", "max_int"], [7, 4, 1, "", "min_int"], [7, 4, 1, "", "round_to_zero"], [7, 4, 1, "", "tensor_clamp"], [7, 4, 1, "", "tensor_clamp_"]], "brevitas.function.ops_ste": [[7, 4, 1, "", "abs_binary_sign_grad"], [7, 4, 1, "", "binary_sign_ste"], [7, 4, 1, "", "ceil_ste"], [7, 4, 1, "", "dpu_round_ste"], [7, 4, 1, "", "floor_ste"], [7, 4, 1, "", "round_ste"], [7, 4, 1, "", "round_to_zero_ste"], [7, 4, 1, "", "scalar_clamp_min_ste"], [7, 4, 1, "", "scalar_clamp_ste"], [7, 4, 1, "", "tensor_clamp_ste"], [7, 4, 1, "", "tensor_clamp_ste_"], [7, 4, 1, "", "ternary_sign_ste"]], "brevitas.function.shape": [[7, 4, 1, "", "over_batch_over_output_channels"], [7, 4, 1, "", "over_batch_over_tensor"], [7, 4, 1, "", "over_output_channels"], [7, 4, 1, "", "over_output_features"], [7, 4, 1, "", "over_tensor"]], "brevitas.ops": [[8, 0, 0, "-", "autograd_ste_ops"]], "brevitas.ops.autograd_ste_ops": [[8, 1, 1, "", "AbsBinarySignGradFn"], [8, 1, 1, "", "BinarySignSteFn"], [8, 1, 1, "", "CeilSteFn"], [8, 1, 1, "", "DPURoundSteFn"], [8, 1, 1, "", "FloorSteFn"], [8, 1, 1, "", "InplaceTensorClampSteFn"], [8, 1, 1, "", "RoundSteFn"], [8, 1, 1, "", "RoundToZeroSteFn"], [8, 1, 1, "", "ScalarClampMinSteFn"], [8, 1, 1, "", "ScalarClampSteFn"], [8, 1, 1, "", "TensorClampSteFn"], [8, 1, 1, "", "TernarySignSteFn"], [8, 4, 1, "", "abs_binary_sign_grad_impl"], [8, 4, 1, "", "binary_sign_ste_impl"], [8, 4, 1, "", "ceil_ste_impl"], [8, 4, 1, "", "dpu_round_ste_impl"], [8, 4, 1, "", "floor_ste_impl"], [8, 4, 1, "", "round_ste_impl"], [8, 4, 1, "", "round_to_zero_ste_impl"], [8, 4, 1, "", "scalar_clamp_min_ste_impl"], [8, 4, 1, "", "scalar_clamp_ste_impl"], [8, 4, 1, "", "tensor_clamp_ste_impl"], [8, 4, 1, "", "ternary_sign_ste_impl"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:attribute", "4": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "attribute", "Python attribute"], "4": ["py", "function", "Python function"]}, "titleterms": {"about": 0, "author": 0, "cite": 0, "brevita": [1, 2, 3, 4, 5, 6, 7, 8, 12, 13, 22], "core": [1, 2, 3, 4, 5, 6, 10], "packag": [1, 2, 3, 4, 5, 6, 7, 8], "subpackag": [1, 9], "submodul": [1, 2, 3, 4, 5, 6, 7, 8], "restrict_v": 1, "modul": [1, 2, 3, 4, 5, 6, 7, 8], "util": 1, "zero_point": 1, "content": [1, 2, 3, 4, 5, 6], "bit_width": 2, "const": 2, "paramet": 2, "function_wrapp": 3, "clamp": 3, "misc": 3, "ops_st": [3, 7], "shape": [3, 7, 10], "quant": [4, 10], "binari": [4, 16, 22], "delai": 4, "int": 4, "int_bas": 4, "ternari": 4, "scale": [5, 22], "int_scal": 5, "runtim": [5, 18], "standalon": 5, "stat": 6, "stats_op": 6, "stats_wrapp": 6, "view_wrapp": 6, "function": [7, 10, 22], "op": [7, 8], "autograd_ste_op": 8, "api": [9, 16], "refer": 9, "architectur": 10, "scriptmodul": 10, "injector": 10, "quantiz": [10, 12, 16, 19, 20, 21, 22], "enum": [10, 22], "solver": 10, "quanttensor": [10, 21, 22], "proxi": [10, 16], "layer": [10, 12, 16, 22], "mixin": 10, "export": [10, 12, 18, 20, 22, 24], "fx": [10, 22], "graph": 10, "trace": 10, "transform": 10, "loss": 10, "f": 11, "A": [11, 16], "q": 11, "get": 12, "start": 12, "ptq": 12, "over": 12, "hand": 12, "programmat": 12, "defin": [12, 22], "model": [12, 18], "nn": 12, "weight": [12, 16, 20, 22], "onli": [12, 20], "float": [12, 22], "activ": [12, 16, 19, 22], "bias": 12, "onnx": [12, 18, 22], "where": 12, "go": 12, "from": [12, 15, 22], "here": 12, "set": 14, "setup": 15, "requir": [15, 18], "instal": 15, "pypi": 15, "github": 15, "option": 15, "train": [15, 22], "infer": 15, "anatomi": 16, "what": 16, "": 16, "auto": 16, "wire": 16, "depend": [16, 22], "inject": [16, 22], "practic": 16, "exampl": [16, 18], "manual": 16, "an": [16, 19, 21], "extendedinjector": 16, "inherit": [16, 22], "composit": 16, "interfac": 16, "pass": 16, "custom": [16, 22], "quantident": [16, 22], "initi": 16, "statist": 16, "share": [16, 22], "instanc": 16, "deal": 16, "build": 16, "tutori": [17, 22], "introduct": 18, "quantizelinear": 18, "clip": 18, "dequantizelinear": 18, "qcdq": 18, "basic": 18, "complet": 18, "The": 18, "c": 18, "bitwidth": 18, "8": 18, "qop": [18, 22], "qgemm": 18, "v": 18, "gemm": 18, "overview": [19, 21], "rnn": 20, "lstm": 20, "quantrnn": 20, "quantlstm": 20, "just": 20, "time": 20, "compil": 20, "calibr": [20, 22], "full": 20, "quantconv2d": 21, "input": [21, 22], "output": [21, 22], "bia": [21, 22], "tvmcon": 22, "2021": 22, "fundament": 22, "quantlinear": 22, "default": 22, "mix": 22, "point": 22, "fix": 22, "quantrelu": 22, "requant": 22, "tensor": 22, "how": 22, "i": 22, "determin": 22, "oper": 22, "element": 22, "wise": 22, "arithmet": 22, "add": 22, "call": 22, "torch": 22, "max_pool": 22, "tanh": 22, "concaten": 22, "common": 22, "keyword": 22, "argument": 22, "bit": 22, "width": 22, "per": 22, "channel": 22, "max_val": 22, "init": 22, "scratch": 22, "learn": 22, "retrain": 22, "among": 22, "torchscript": 22, "backend": 22, "base": 22, "post": 22, "compat": 24, "user": 26, "guid": 26}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "nbsphinx": 4, "sphinx": 57}, "alltitles": {"About": [[0, "about"]], "Author": [[0, "author"]], "Cite as": [[0, "cite-as"]], "brevitas.core package": [[1, "brevitas-core-package"]], "Subpackages": [[1, "subpackages"], [9, "subpackages"]], "Submodules": [[1, "submodules"], [2, "submodules"], [3, "submodules"], [4, "submodules"], [5, "submodules"], [6, "submodules"], [7, "submodules"], [8, "submodules"]], "brevitas.core.restrict_val module": [[1, "module-brevitas.core.restrict_val"]], "brevitas.core.utils module": [[1, "module-brevitas.core.utils"]], "brevitas.core.zero_point module": [[1, "module-brevitas.core.zero_point"]], "Module contents": [[1, "module-brevitas.core"], [2, "module-brevitas.core.bit_width"], [3, "module-brevitas.core.function_wrapper"], [4, "module-brevitas.core.quant"], [5, "module-brevitas.core.scaling"], [6, "module-brevitas.core.stats"]], "brevitas.core.bit_width package": [[2, "brevitas-core-bit-width-package"]], "brevitas.core.bit_width.const module": [[2, "module-brevitas.core.bit_width.const"]], "brevitas.core.bit_width.parameter module": [[2, "module-brevitas.core.bit_width.parameter"]], "brevitas.core.function_wrapper package": [[3, "brevitas-core-function-wrapper-package"]], "brevitas.core.function_wrapper.clamp module": [[3, "module-brevitas.core.function_wrapper.clamp"]], "brevitas.core.function_wrapper.misc module": [[3, "module-brevitas.core.function_wrapper.misc"]], "brevitas.core.function_wrapper.ops_ste module": [[3, "module-brevitas.core.function_wrapper.ops_ste"]], "brevitas.core.function_wrapper.shape module": [[3, "module-brevitas.core.function_wrapper.shape"]], "brevitas.core.quant package": [[4, "brevitas-core-quant-package"]], "brevitas.core.quant.binary module": [[4, "module-brevitas.core.quant.binary"]], "brevitas.core.quant.delay module": [[4, "module-brevitas.core.quant.delay"]], "brevitas.core.quant.int module": [[4, "module-brevitas.core.quant.int"]], "brevitas.core.quant.int_base module": [[4, "module-brevitas.core.quant.int_base"]], "brevitas.core.quant.ternary module": [[4, "module-brevitas.core.quant.ternary"]], "brevitas.core.scaling package": [[5, "brevitas-core-scaling-package"]], "brevitas.core.scaling.int_scaling module": [[5, "module-brevitas.core.scaling.int_scaling"]], "brevitas.core.scaling.runtime module": [[5, "module-brevitas.core.scaling.runtime"]], "brevitas.core.scaling.standalone module": [[5, "module-brevitas.core.scaling.standalone"]], "brevitas.core.stats package": [[6, "brevitas-core-stats-package"]], "brevitas.core.stats.stats_op module": [[6, "module-brevitas.core.stats.stats_op"]], "brevitas.core.stats.stats_wrapper module": [[6, "module-brevitas.core.stats.stats_wrapper"]], "brevitas.core.stats.view_wrapper module": [[6, "module-brevitas.core.stats.view_wrapper"]], "brevitas.function package": [[7, "brevitas-function-package"]], "brevitas.function.ops module": [[7, "module-brevitas.function.ops"]], "brevitas.function.ops_ste module": [[7, "module-brevitas.function.ops_ste"]], "brevitas.function.shape module": [[7, "module-brevitas.function.shape"]], "brevitas.ops package": [[8, "brevitas-ops-package"]], "brevitas.ops.autograd_ste_ops module": [[8, "module-brevitas.ops.autograd_ste_ops"]], "API reference": [[9, "api-reference"]], "Architecture": [[10, "architecture"]], "Functions": [[10, "functions"]], "Core ScriptModules": [[10, "core-scriptmodules"]], "Injectors and Quantizers": [[10, "injectors-and-quantizers"]], "Enums, Shapes and Solvers": [[10, "enums-shapes-and-solvers"]], "QuantTensor": [[10, "quanttensor"], [21, "QuantTensor"]], "Proxies": [[10, "proxies"]], "Quant Layers and Mixins": [[10, "quant-layers-and-mixins"]], "Export": [[10, "export"], [20, "Export"], [22, "Export"]], "FX graph tracing and transformations": [[10, "fx-graph-tracing-and-transformations"]], "Losses": [[10, "losses"]], "F.A.Q.": [[11, "f-a-q"]], "Getting started": [[12, "getting-started"]], "PTQ over hand or programmatically defined quantized models": [[12, "ptq-over-hand-or-programmatically-defined-quantized-models"]], "Defining a quantized model with brevitas.nn layers": [[12, "defining-a-quantized-model-with-brevitas-nn-layers"]], "Weights-only quantization, float activations and biases": [[12, "weights-only-quantization-float-activations-and-biases"]], "Weights and activations quantization, float biases": [[12, "weights-and-activations-quantization-float-biases"]], "Weights, activations, biases quantization": [[12, "weights-activations-biases-quantization"]], "Export to ONNX": [[12, "export-to-onnx"]], "Where to go from here": [[12, "where-to-go-from-here"]], "Brevitas": [[13, "brevitas"]], "Settings": [[14, "settings"]], "Setup": [[15, "setup"]], "Requirements": [[15, "requirements"], [18, "Requirements"]], "Installation Requirements": [[15, "installation-requirements"]], "Installation": [[15, "installation"]], "Installing from PyPI": [[15, "installing-from-pypi"]], "Installing from Github": [[15, "installing-from-github"]], "Optional Training Requirements": [[15, "optional-training-requirements"]], "Optional Inference Requirements": [[15, "optional-inference-requirements"]], "Anatomy of a Quantizer": [[16, "Anatomy-of-a-Quantizer"]], "What\u2019s in a Quantizer?": [[16, "What's-in-a-Quantizer?"]], "Quantization with auto-wiring Dependency Injection": [[16, "Quantization-with-auto-wiring-Dependency-Injection"]], "A Practical Example: Binary Quantization": [[16, "A-Practical-Example:-Binary-Quantization"]], "Manual Binary Quantization": [[16, "Manual-Binary-Quantization"]], "Binary Quantization with an ExtendedInjector": [[16, "Binary-Quantization-with-an-ExtendedInjector"]], "Inheritance and Composition of Quantizers": [[16, "Inheritance-and-Composition-of-Quantizers"]], "Interfacing a Quantizer with a Quantized Layer": [[16, "Interfacing-a-Quantizer-with-a-Quantized-Layer"]], "Passing a custom quantizer to QuantIdentity": [[16, "Passing-a-custom-quantizer-to-QuantIdentity"]], "A Custom Quantizer initialized with Weight Statistics": [[16, "A-Custom-Quantizer-initialized-with-Weight-Statistics"]], "Sharing a Quantizer": [[16, "Sharing-a-Quantizer"]], "Sharing a proxy": [[16, "Sharing-a-proxy"]], "Sharing an instance of Activation Quantization": [[16, "Sharing-an-instance-of-Activation-Quantization"]], "Dealing with Weight Initialization": [[16, "Dealing-with-Weight-Initialization"]], "Building a Custom Quantization API": [[16, "Building-a-Custom-Quantization-API"]], "Tutorials": [[17, "tutorials"]], "Tutorials:": [[17, null]], "ONNX Export": [[18, "ONNX-Export"]], "Introduction": [[18, "Introduction"]], "QuantizeLinear-Clip-DeQuantizeLinear (QCDQ)": [[18, "QuantizeLinear-Clip-DeQuantizeLinear-(QCDQ)"]], "Basic Example": [[18, "Basic-Example"]], "Complete Model": [[18, "Complete-Model"]], "The C in QCDQ (Bitwidth <= 8)": [[18, "The-C-in-QCDQ-(Bitwidth-<=-8)"]], "QOps Export": [[18, "QOps-Export"]], "Clipping in QOps": [[18, "Clipping-in-QOps"]], "ONNX Runtime": [[18, "ONNX-Runtime"]], "QCDQ": [[18, "QCDQ"]], "QGEMM vs GEMM": [[18, "QGEMM-vs-GEMM"]], "QOps": [[18, "QOps"]], "An Overview of Quantized Activations": [[19, "An-Overview-of-Quantized-Activations"]], "Quantized RNNs and LSTMs": [[20, "Quantized-RNNs-and-LSTMs"]], "QuantRNN": [[20, "QuantRNN"]], "QuantLSTM": [[20, "QuantLSTM"]], "Just-in-time compilation": [[20, "Just-in-time-compilation"]], "Calibration": [[20, "Calibration"]], "QuantLSTM weight-only quantization export": [[20, "QuantLSTM-weight-only-quantization-export"]], "QuantLSTM full quantization export": [[20, "QuantLSTM-full-quantization-export"]], "An overview of QuantTensor and QuantConv2d": [[21, "An-overview-of-QuantTensor-and-QuantConv2d"]], "Input Quantization": [[21, "Input-Quantization"]], "Output Quantization": [[21, "Output-Quantization"]], "Bias Quantization": [[21, "Bias-Quantization"], [22, "Bias-Quantization"]], "Brevitas TVMCon 2021 tutorial": [[22, "Brevitas-TVMCon-2021-tutorial"]], "Fundamentals": [[22, "Fundamentals"]], "QuantLinear layer": [[22, "QuantLinear-layer"]], "Weight quantization": [[22, "Weight-quantization"]], "Default weight quantization": [[22, "Default-weight-quantization"]], "Mixing quantized weights and floating-point inputs": [[22, "Mixing-quantized-weights-and-floating-point-inputs"]], "Fixed-point weight quantization": [[22, "Fixed-point-weight-quantization"]], "Binary weight quantization": [[22, "Binary-weight-quantization"]], "Sharing a weight quantizer": [[22, "Sharing-a-weight-quantizer"]], "Inputs/Outputs/Activations quantization:": [[22, "Inputs/Outputs/Activations-quantization:"]], "QuantIdentity layer": [[22, "QuantIdentity-layer"]], "QuantReLU layer": [[22, "QuantReLU-layer"]], "Requantizing a tensor": [[22, "Requantizing-a-tensor"]], "How is the activation scale determined by default?": [[22, "How-is-the-activation-scale-determined-by-default?"]], "Operations on QuantTensor": [[22, "Operations-on-QuantTensor"]], "Element-wise Arithmetic": [[22, "Element-wise-Arithmetic"]], "Element-wise adds": [[22, "Element-wise-adds"]], "Calling torch functions": [[22, "Calling-torch-functions"]], "max_pool on QuantTensor": [[22, "max_pool-on-QuantTensor"]], "tanh on QuantTensor": [[22, "tanh-on-QuantTensor"]], "QuantTensor concatenation": [[22, "QuantTensor-concatenation"]], "Customizing Quantizers": [[22, "Customizing-Quantizers"]], "Common keyword arguments": [[22, "Common-keyword-arguments"]], "Weight bit-width": [[22, "Weight-bit-width"]], "Per-channel weight quantization": [[22, "Per-channel-weight-quantization"]], "Activation bit-width": [[22, "Activation-bit-width"]], "Activation quantization with max_val init": [[22, "Activation-quantization-with-max_val-init"]], "Per-channel activation quantization": [[22, "Per-channel-activation-quantization"]], "Inheriting from a quantizer": [[22, "Inheriting-from-a-quantizer"]], "Defining a quantizer from scratch with enums": [[22, "Defining-a-quantizer-from-scratch-with-enums"]], "Weight quantizer": [[22, "Weight-quantizer"]], "Activation quantizer": [[22, "Activation-quantizer"]], "Learned scale and bit-width quantizer": [[22, "Learned-scale-and-bit-width-quantizer"]], "Retraining from floating-point": [[22, "Retraining-from-floating-point"]], "Defining a quantizer from scratch with dependency-injection": [[22, "Defining-a-quantizer-from-scratch-with-dependency-injection"]], "Activation quantization from scratch": [[22, "Activation-quantization-from-scratch"]], "Weight quantization with learned scale from scratch": [[22, "Weight-quantization-with-learned-scale-from-scratch"]], "Sharing learned bit-width among layers": [[22, "Sharing-learned-bit-width-among-layers"]], "Export to ONNX QOps": [[22, "Export-to-ONNX-QOps"]], "Export to custom Quantized ONNX": [[22, "Export-to-custom-Quantized-ONNX"]], "Export to TorchScript quantization backend": [[22, "Export-to-TorchScript-quantization-backend"]], "Brevitas and FX": [[22, "Brevitas-and-FX"]], "Calibration-based post-training quantization": [[22, "Calibration-based-post-training-quantization"]], "Export Compatibility": [[24, "export-compatibility"]], "User Guide": [[26, "user-guide"]]}, "indexentries": {"floatrestrictvalue (class in brevitas.core.restrict_val)": [[1, "brevitas.core.restrict_val.FloatRestrictValue"]], "intrestrictvalue (class in brevitas.core.restrict_val)": [[1, "brevitas.core.restrict_val.IntRestrictValue"]], "logfloatrestrictvalue (class in brevitas.core.restrict_val)": [[1, "brevitas.core.restrict_val.LogFloatRestrictValue"]], "parameterfromruntimezeropoint (class in brevitas.core.zero_point)": [[1, "brevitas.core.zero_point.ParameterFromRuntimeZeroPoint"]], "parameterfromstatsfromparameterzeropoint (class in brevitas.core.zero_point)": [[1, "brevitas.core.zero_point.ParameterFromStatsFromParameterZeroPoint"]], "parameterwrapper (class in brevitas.core.utils)": [[1, "brevitas.core.utils.ParameterWrapper"]], "parameterzeropoint (class in brevitas.core.zero_point)": [[1, "brevitas.core.zero_point.ParameterZeroPoint"]], "poweroftworestrictvalue (class in brevitas.core.restrict_val)": [[1, "brevitas.core.restrict_val.PowerOfTwoRestrictValue"]], "prezerocenterzeropoint (class in brevitas.core.zero_point)": [[1, "brevitas.core.zero_point.PreZeroCenterZeroPoint"]], "singleargstatelessbuffer (class in brevitas.core.utils)": [[1, "brevitas.core.utils.SingleArgStatelessBuffer"]], "slicetensor (class in brevitas.core.utils)": [[1, "brevitas.core.utils.SliceTensor"]], "statelessbuffer (class in brevitas.core.utils)": [[1, "brevitas.core.utils.StatelessBuffer"]], "statsfromparameterzeropoint (class in brevitas.core.zero_point)": [[1, "brevitas.core.zero_point.StatsFromParameterZeroPoint"]], "zerozeropoint (class in brevitas.core.zero_point)": [[1, "brevitas.core.zero_point.ZeroZeroPoint"]], "brevitas.core": [[1, "module-brevitas.core"]], "brevitas.core.restrict_val": [[1, "module-brevitas.core.restrict_val"]], "brevitas.core.utils": [[1, "module-brevitas.core.utils"]], "brevitas.core.zero_point": [[1, "module-brevitas.core.zero_point"]], "eager_forward() (brevitas.core.utils.slicetensor method)": [[1, "brevitas.core.utils.SliceTensor.eager_forward"]], "forward() (brevitas.core.restrict_val.floatrestrictvalue method)": [[1, "brevitas.core.restrict_val.FloatRestrictValue.forward"]], "forward() (brevitas.core.restrict_val.intrestrictvalue method)": [[1, "brevitas.core.restrict_val.IntRestrictValue.forward"]], "forward() (brevitas.core.restrict_val.logfloatrestrictvalue method)": [[1, "brevitas.core.restrict_val.LogFloatRestrictValue.forward"]], "forward() (brevitas.core.restrict_val.poweroftworestrictvalue method)": [[1, "brevitas.core.restrict_val.PowerOfTwoRestrictValue.forward"]], "forward() (brevitas.core.utils.parameterwrapper method)": [[1, "brevitas.core.utils.ParameterWrapper.forward"]], "forward() (brevitas.core.utils.singleargstatelessbuffer method)": [[1, "brevitas.core.utils.SingleArgStatelessBuffer.forward"]], "forward() (brevitas.core.utils.slicetensor method)": [[1, "brevitas.core.utils.SliceTensor.forward"]], "forward() (brevitas.core.utils.statelessbuffer method)": [[1, "brevitas.core.utils.StatelessBuffer.forward"]], "forward() (brevitas.core.zero_point.parameterfromruntimezeropoint method)": [[1, "brevitas.core.zero_point.ParameterFromRuntimeZeroPoint.forward"]], "forward() (brevitas.core.zero_point.parameterfromstatsfromparameterzeropoint method)": [[1, "brevitas.core.zero_point.ParameterFromStatsFromParameterZeroPoint.forward"]], "forward() (brevitas.core.zero_point.parameterzeropoint method)": [[1, "brevitas.core.zero_point.ParameterZeroPoint.forward"]], "forward() (brevitas.core.zero_point.prezerocenterzeropoint method)": [[1, "brevitas.core.zero_point.PreZeroCenterZeroPoint.forward"]], "forward() (brevitas.core.zero_point.statsfromparameterzeropoint method)": [[1, "brevitas.core.zero_point.StatsFromParameterZeroPoint.forward"]], "forward() (brevitas.core.zero_point.zerozeropoint method)": [[1, "brevitas.core.zero_point.ZeroZeroPoint.forward"]], "get_zero_center() (brevitas.core.zero_point.prezerocenterzeropoint method)": [[1, "brevitas.core.zero_point.PreZeroCenterZeroPoint.get_zero_center"]], "inplace_momentum_update() (in module brevitas.core.utils)": [[1, "brevitas.core.utils.inplace_momentum_update"]], "inplace_tensor_add() (in module brevitas.core.utils)": [[1, "brevitas.core.utils.inplace_tensor_add"]], "inplace_tensor_mul() (in module brevitas.core.utils)": [[1, "brevitas.core.utils.inplace_tensor_mul"]], "module": [[1, "module-brevitas.core"], [1, "module-brevitas.core.restrict_val"], [1, "module-brevitas.core.utils"], [1, "module-brevitas.core.zero_point"], [2, "module-brevitas.core.bit_width"], [2, "module-brevitas.core.bit_width.const"], [2, "module-brevitas.core.bit_width.parameter"], [3, "module-brevitas.core.function_wrapper"], [3, "module-brevitas.core.function_wrapper.clamp"], [3, "module-brevitas.core.function_wrapper.misc"], [3, "module-brevitas.core.function_wrapper.ops_ste"], [3, "module-brevitas.core.function_wrapper.shape"], [4, "module-brevitas.core.quant"], [4, "module-brevitas.core.quant.binary"], [4, "module-brevitas.core.quant.delay"], [4, "module-brevitas.core.quant.int"], [4, "module-brevitas.core.quant.int_base"], [4, "module-brevitas.core.quant.ternary"], [5, "module-brevitas.core.scaling"], [5, "module-brevitas.core.scaling.int_scaling"], [5, "module-brevitas.core.scaling.runtime"], [5, "module-brevitas.core.scaling.standalone"], [6, "module-brevitas.core.stats"], [6, "module-brevitas.core.stats.stats_op"], [6, "module-brevitas.core.stats.stats_wrapper"], [6, "module-brevitas.core.stats.view_wrapper"], [7, "module-brevitas.function.ops"], [7, "module-brevitas.function.ops_ste"], [7, "module-brevitas.function.shape"], [8, "module-brevitas.ops.autograd_ste_ops"]], "restrict_init_float() (brevitas.core.restrict_val.floatrestrictvalue method)": [[1, "brevitas.core.restrict_val.FloatRestrictValue.restrict_init_float"]], "restrict_init_float() (brevitas.core.restrict_val.intrestrictvalue method)": [[1, "brevitas.core.restrict_val.IntRestrictValue.restrict_init_float"]], "restrict_init_float() (brevitas.core.restrict_val.logfloatrestrictvalue method)": [[1, "brevitas.core.restrict_val.LogFloatRestrictValue.restrict_init_float"]], "restrict_init_float() (brevitas.core.restrict_val.poweroftworestrictvalue method)": [[1, "brevitas.core.restrict_val.PowerOfTwoRestrictValue.restrict_init_float"]], "restrict_init_inplace_module() (brevitas.core.restrict_val.floatrestrictvalue method)": [[1, "brevitas.core.restrict_val.FloatRestrictValue.restrict_init_inplace_module"]], "restrict_init_inplace_module() (brevitas.core.restrict_val.intrestrictvalue method)": [[1, "brevitas.core.restrict_val.IntRestrictValue.restrict_init_inplace_module"]], "restrict_init_inplace_module() (brevitas.core.restrict_val.logfloatrestrictvalue method)": [[1, "brevitas.core.restrict_val.LogFloatRestrictValue.restrict_init_inplace_module"]], "restrict_init_inplace_module() (brevitas.core.restrict_val.poweroftworestrictvalue method)": [[1, "brevitas.core.restrict_val.PowerOfTwoRestrictValue.restrict_init_inplace_module"]], "restrict_init_module() (brevitas.core.restrict_val.floatrestrictvalue method)": [[1, "brevitas.core.restrict_val.FloatRestrictValue.restrict_init_module"]], "restrict_init_module() (brevitas.core.restrict_val.intrestrictvalue method)": [[1, "brevitas.core.restrict_val.IntRestrictValue.restrict_init_module"]], "restrict_init_module() (brevitas.core.restrict_val.logfloatrestrictvalue method)": [[1, "brevitas.core.restrict_val.LogFloatRestrictValue.restrict_init_module"]], "restrict_init_module() (brevitas.core.restrict_val.poweroftworestrictvalue method)": [[1, "brevitas.core.restrict_val.PowerOfTwoRestrictValue.restrict_init_module"]], "restrict_init_tensor() (brevitas.core.restrict_val.floatrestrictvalue method)": [[1, "brevitas.core.restrict_val.FloatRestrictValue.restrict_init_tensor"]], "restrict_init_tensor() (brevitas.core.restrict_val.intrestrictvalue method)": [[1, "brevitas.core.restrict_val.IntRestrictValue.restrict_init_tensor"]], "restrict_init_tensor() (brevitas.core.restrict_val.logfloatrestrictvalue method)": [[1, "brevitas.core.restrict_val.LogFloatRestrictValue.restrict_init_tensor"]], "restrict_init_tensor() (brevitas.core.restrict_val.poweroftworestrictvalue method)": [[1, "brevitas.core.restrict_val.PowerOfTwoRestrictValue.restrict_init_tensor"]], "state_dict() (brevitas.core.utils.statelessbuffer method)": [[1, "brevitas.core.utils.StatelessBuffer.state_dict"]], "state_dict() (brevitas.core.zero_point.parameterfromruntimezeropoint method)": [[1, "brevitas.core.zero_point.ParameterFromRuntimeZeroPoint.state_dict"]], "state_dict() (brevitas.core.zero_point.parameterfromstatsfromparameterzeropoint method)": [[1, "brevitas.core.zero_point.ParameterFromStatsFromParameterZeroPoint.state_dict"]], "training_forward() (brevitas.core.zero_point.parameterfromruntimezeropoint method)": [[1, "brevitas.core.zero_point.ParameterFromRuntimeZeroPoint.training_forward"]], "bitwidthconst (class in brevitas.core.bit_width.const)": [[2, "brevitas.core.bit_width.const.BitWidthConst"]], "bitwidthparameter (class in brevitas.core.bit_width.parameter)": [[2, "brevitas.core.bit_width.parameter.BitWidthParameter"]], "bitwidthstatefulconst (class in brevitas.core.bit_width.const)": [[2, "brevitas.core.bit_width.const.BitWidthStatefulConst"]], "msbclampbitwidth (class in brevitas.core.bit_width.const)": [[2, "brevitas.core.bit_width.const.MsbClampBitWidth"]], "removebitwidthparameter (class in brevitas.core.bit_width.parameter)": [[2, "brevitas.core.bit_width.parameter.RemoveBitwidthParameter"]], "brevitas.core.bit_width": [[2, "module-brevitas.core.bit_width"]], "brevitas.core.bit_width.const": [[2, "module-brevitas.core.bit_width.const"]], "brevitas.core.bit_width.parameter": [[2, "module-brevitas.core.bit_width.parameter"]], "forward() (brevitas.core.bit_width.const.bitwidthconst method)": [[2, "brevitas.core.bit_width.const.BitWidthConst.forward"]], "forward() (brevitas.core.bit_width.const.bitwidthstatefulconst method)": [[2, "brevitas.core.bit_width.const.BitWidthStatefulConst.forward"]], "forward() (brevitas.core.bit_width.const.msbclampbitwidth method)": [[2, "brevitas.core.bit_width.const.MsbClampBitWidth.forward"]], "forward() (brevitas.core.bit_width.parameter.bitwidthparameter method)": [[2, "brevitas.core.bit_width.parameter.BitWidthParameter.forward"]], "forward() (brevitas.core.bit_width.parameter.removebitwidthparameter method)": [[2, "brevitas.core.bit_width.parameter.RemoveBitwidthParameter.forward"]], "ceilste (class in brevitas.core.function_wrapper.ops_ste)": [[3, "brevitas.core.function_wrapper.ops_ste.CeilSte"]], "clampmin (class in brevitas.core.function_wrapper.clamp)": [[3, "brevitas.core.function_wrapper.clamp.ClampMin"]], "dpuroundste (class in brevitas.core.function_wrapper.ops_ste)": [[3, "brevitas.core.function_wrapper.ops_ste.DPURoundSte"]], "floorste (class in brevitas.core.function_wrapper.ops_ste)": [[3, "brevitas.core.function_wrapper.ops_ste.FloorSte"]], "identity (class in brevitas.core.function_wrapper.misc)": [[3, "brevitas.core.function_wrapper.misc.Identity"]], "inplacelogtwo (class in brevitas.core.function_wrapper.misc)": [[3, "brevitas.core.function_wrapper.misc.InplaceLogTwo"]], "inplacetensorclampste (class in brevitas.core.function_wrapper.ops_ste)": [[3, "brevitas.core.function_wrapper.ops_ste.InplaceTensorClampSte"]], "logtwo (class in brevitas.core.function_wrapper.misc)": [[3, "brevitas.core.function_wrapper.misc.LogTwo"]], "over_batch_over_output_channels (brevitas.core.function_wrapper.shape.statsinputviewshapeimpl attribute)": [[3, "brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_BATCH_OVER_OUTPUT_CHANNELS"]], "over_batch_over_tensor (brevitas.core.function_wrapper.shape.statsinputviewshapeimpl attribute)": [[3, "brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_BATCH_OVER_TENSOR"]], "over_output_channels (brevitas.core.function_wrapper.shape.statsinputviewshapeimpl attribute)": [[3, "brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_OUTPUT_CHANNELS"]], "over_output_features (brevitas.core.function_wrapper.shape.statsinputviewshapeimpl attribute)": [[3, "brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_OUTPUT_FEATURES"]], "over_tensor (brevitas.core.function_wrapper.shape.statsinputviewshapeimpl attribute)": [[3, "brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_TENSOR"]], "overbatchoveroutputchannelview (class in brevitas.core.function_wrapper.shape)": [[3, "brevitas.core.function_wrapper.shape.OverBatchOverOutputChannelView"]], "overbatchovertensorview (class in brevitas.core.function_wrapper.shape)": [[3, "brevitas.core.function_wrapper.shape.OverBatchOverTensorView"]], "overoutputchannelview (class in brevitas.core.function_wrapper.shape)": [[3, "brevitas.core.function_wrapper.shape.OverOutputChannelView"]], "overoutputfeaturesview (class in brevitas.core.function_wrapper.shape)": [[3, "brevitas.core.function_wrapper.shape.OverOutputFeaturesView"]], "overtensorview (class in brevitas.core.function_wrapper.shape)": [[3, "brevitas.core.function_wrapper.shape.OverTensorView"]], "permutedims (class in brevitas.core.function_wrapper.shape)": [[3, "brevitas.core.function_wrapper.shape.PermuteDims"]], "poweroftwo (class in brevitas.core.function_wrapper.misc)": [[3, "brevitas.core.function_wrapper.misc.PowerOfTwo"]], "roundste (class in brevitas.core.function_wrapper.ops_ste)": [[3, "brevitas.core.function_wrapper.ops_ste.RoundSte"]], "roundtozeroste (class in brevitas.core.function_wrapper.ops_ste)": [[3, "brevitas.core.function_wrapper.ops_ste.RoundToZeroSte"]], "scalarclamp (class in brevitas.core.function_wrapper.clamp)": [[3, "brevitas.core.function_wrapper.clamp.ScalarClamp"]], "scalarclampminste (class in brevitas.core.function_wrapper.ops_ste)": [[3, "brevitas.core.function_wrapper.ops_ste.ScalarClampMinSte"]], "statsinputviewshapeimpl (class in brevitas.core.function_wrapper.shape)": [[3, "brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl"]], "tensorclamp (class in brevitas.core.function_wrapper.clamp)": [[3, "brevitas.core.function_wrapper.clamp.TensorClamp"]], "tensorclampste (class in brevitas.core.function_wrapper.ops_ste)": [[3, "brevitas.core.function_wrapper.ops_ste.TensorClampSte"]], "brevitas.core.function_wrapper": [[3, "module-brevitas.core.function_wrapper"]], "brevitas.core.function_wrapper.clamp": [[3, "module-brevitas.core.function_wrapper.clamp"]], "brevitas.core.function_wrapper.misc": [[3, "module-brevitas.core.function_wrapper.misc"]], "brevitas.core.function_wrapper.ops_ste": [[3, "module-brevitas.core.function_wrapper.ops_ste"]], "brevitas.core.function_wrapper.shape": [[3, "module-brevitas.core.function_wrapper.shape"]], "forward() (brevitas.core.function_wrapper.clamp.clampmin method)": [[3, "brevitas.core.function_wrapper.clamp.ClampMin.forward"]], "forward() (brevitas.core.function_wrapper.clamp.scalarclamp method)": [[3, "brevitas.core.function_wrapper.clamp.ScalarClamp.forward"]], "forward() (brevitas.core.function_wrapper.clamp.tensorclamp method)": [[3, "brevitas.core.function_wrapper.clamp.TensorClamp.forward"]], "forward() (brevitas.core.function_wrapper.misc.identity method)": [[3, "brevitas.core.function_wrapper.misc.Identity.forward"]], "forward() (brevitas.core.function_wrapper.misc.inplacelogtwo method)": [[3, "brevitas.core.function_wrapper.misc.InplaceLogTwo.forward"]], "forward() (brevitas.core.function_wrapper.misc.logtwo method)": [[3, "brevitas.core.function_wrapper.misc.LogTwo.forward"]], "forward() (brevitas.core.function_wrapper.misc.poweroftwo method)": [[3, "brevitas.core.function_wrapper.misc.PowerOfTwo.forward"]], "forward() (brevitas.core.function_wrapper.ops_ste.ceilste method)": [[3, "brevitas.core.function_wrapper.ops_ste.CeilSte.forward"]], "forward() (brevitas.core.function_wrapper.ops_ste.dpuroundste method)": [[3, "brevitas.core.function_wrapper.ops_ste.DPURoundSte.forward"]], "forward() (brevitas.core.function_wrapper.ops_ste.floorste method)": [[3, "brevitas.core.function_wrapper.ops_ste.FloorSte.forward"]], "forward() (brevitas.core.function_wrapper.ops_ste.inplacetensorclampste method)": [[3, "brevitas.core.function_wrapper.ops_ste.InplaceTensorClampSte.forward"]], "forward() (brevitas.core.function_wrapper.ops_ste.roundste method)": [[3, "brevitas.core.function_wrapper.ops_ste.RoundSte.forward"]], "forward() (brevitas.core.function_wrapper.ops_ste.roundtozeroste method)": [[3, "brevitas.core.function_wrapper.ops_ste.RoundToZeroSte.forward"]], "forward() (brevitas.core.function_wrapper.ops_ste.scalarclampminste method)": [[3, "brevitas.core.function_wrapper.ops_ste.ScalarClampMinSte.forward"]], "forward() (brevitas.core.function_wrapper.ops_ste.tensorclampste method)": [[3, "brevitas.core.function_wrapper.ops_ste.TensorClampSte.forward"]], "forward() (brevitas.core.function_wrapper.shape.overbatchoveroutputchannelview method)": [[3, "brevitas.core.function_wrapper.shape.OverBatchOverOutputChannelView.forward"]], "forward() (brevitas.core.function_wrapper.shape.overbatchovertensorview method)": [[3, "brevitas.core.function_wrapper.shape.OverBatchOverTensorView.forward"]], "forward() (brevitas.core.function_wrapper.shape.overoutputchannelview method)": [[3, "brevitas.core.function_wrapper.shape.OverOutputChannelView.forward"]], "forward() (brevitas.core.function_wrapper.shape.overoutputfeaturesview method)": [[3, "brevitas.core.function_wrapper.shape.OverOutputFeaturesView.forward"]], "forward() (brevitas.core.function_wrapper.shape.overtensorview method)": [[3, "brevitas.core.function_wrapper.shape.OverTensorView.forward"]], "forward() (brevitas.core.function_wrapper.shape.permutedims method)": [[3, "brevitas.core.function_wrapper.shape.PermuteDims.forward"]], "binaryquant (class in brevitas.core.quant.binary)": [[4, "brevitas.core.quant.binary.BinaryQuant"]], "clampedbinaryquant (class in brevitas.core.quant.binary)": [[4, "brevitas.core.quant.binary.ClampedBinaryQuant"]], "decoupledintquant (class in brevitas.core.quant.int_base)": [[4, "brevitas.core.quant.int_base.DecoupledIntQuant"]], "decoupledrescalingintquant (class in brevitas.core.quant.int)": [[4, "brevitas.core.quant.int.DecoupledRescalingIntQuant"]], "decoupledrescalingintquantwithinput (class in brevitas.core.quant.int)": [[4, "brevitas.core.quant.int.DecoupledRescalingIntQuantWithInput"]], "delaywrapper (class in brevitas.core.quant.delay)": [[4, "brevitas.core.quant.delay.DelayWrapper"]], "intquant (class in brevitas.core.quant.int_base)": [[4, "brevitas.core.quant.int_base.IntQuant"]], "prescaledrestrictintquant (class in brevitas.core.quant.int)": [[4, "brevitas.core.quant.int.PrescaledRestrictIntQuant"]], "prescaledrestrictintquantwithinputbitwidth (class in brevitas.core.quant.int)": [[4, "brevitas.core.quant.int.PrescaledRestrictIntQuantWithInputBitWidth"]], "rescalingintquant (class in brevitas.core.quant.int)": [[4, "brevitas.core.quant.int.RescalingIntQuant"]], "ternaryquant (class in brevitas.core.quant.ternary)": [[4, "brevitas.core.quant.ternary.TernaryQuant"]], "truncintquant (class in brevitas.core.quant.int)": [[4, "brevitas.core.quant.int.TruncIntQuant"]], "brevitas.core.quant": [[4, "module-brevitas.core.quant"]], "brevitas.core.quant.binary": [[4, "module-brevitas.core.quant.binary"]], "brevitas.core.quant.delay": [[4, "module-brevitas.core.quant.delay"]], "brevitas.core.quant.int": [[4, "module-brevitas.core.quant.int"]], "brevitas.core.quant.int_base": [[4, "module-brevitas.core.quant.int_base"]], "brevitas.core.quant.ternary": [[4, "module-brevitas.core.quant.ternary"]], "forward() (brevitas.core.quant.binary.binaryquant method)": [[4, "brevitas.core.quant.binary.BinaryQuant.forward"]], "forward() (brevitas.core.quant.binary.clampedbinaryquant method)": [[4, "brevitas.core.quant.binary.ClampedBinaryQuant.forward"]], "forward() (brevitas.core.quant.delay.delaywrapper method)": [[4, "brevitas.core.quant.delay.DelayWrapper.forward"]], "forward() (brevitas.core.quant.int.decoupledrescalingintquant method)": [[4, "brevitas.core.quant.int.DecoupledRescalingIntQuant.forward"]], "forward() (brevitas.core.quant.int.decoupledrescalingintquantwithinput method)": [[4, "brevitas.core.quant.int.DecoupledRescalingIntQuantWithInput.forward"]], "forward() (brevitas.core.quant.int.prescaledrestrictintquant method)": [[4, "brevitas.core.quant.int.PrescaledRestrictIntQuant.forward"]], "forward() (brevitas.core.quant.int.prescaledrestrictintquantwithinputbitwidth method)": [[4, "brevitas.core.quant.int.PrescaledRestrictIntQuantWithInputBitWidth.forward"]], "forward() (brevitas.core.quant.int.rescalingintquant method)": [[4, "brevitas.core.quant.int.RescalingIntQuant.forward"]], "forward() (brevitas.core.quant.int.truncintquant method)": [[4, "brevitas.core.quant.int.TruncIntQuant.forward"]], "forward() (brevitas.core.quant.int_base.decoupledintquant method)": [[4, "brevitas.core.quant.int_base.DecoupledIntQuant.forward"]], "forward() (brevitas.core.quant.int_base.intquant method)": [[4, "brevitas.core.quant.int_base.IntQuant.forward"]], "forward() (brevitas.core.quant.ternary.ternaryquant method)": [[4, "brevitas.core.quant.ternary.TernaryQuant.forward"]], "max_int() (brevitas.core.quant.int_base.decoupledintquant method)": [[4, "brevitas.core.quant.int_base.DecoupledIntQuant.max_int"]], "max_int() (brevitas.core.quant.int_base.intquant method)": [[4, "brevitas.core.quant.int_base.IntQuant.max_int"]], "min_int() (brevitas.core.quant.int_base.decoupledintquant method)": [[4, "brevitas.core.quant.int_base.DecoupledIntQuant.min_int"]], "min_int() (brevitas.core.quant.int_base.intquant method)": [[4, "brevitas.core.quant.int_base.IntQuant.min_int"]], "to_int() (brevitas.core.quant.int_base.decoupledintquant method)": [[4, "brevitas.core.quant.int_base.DecoupledIntQuant.to_int"]], "to_int() (brevitas.core.quant.int_base.intquant method)": [[4, "brevitas.core.quant.int_base.IntQuant.to_int"]], "constscaling (class in brevitas.core.scaling.standalone)": [[5, "brevitas.core.scaling.standalone.ConstScaling"]], "intscaling (class in brevitas.core.scaling.int_scaling)": [[5, "brevitas.core.scaling.int_scaling.IntScaling"]], "parameterfromruntimestatsscaling (class in brevitas.core.scaling.standalone)": [[5, "brevitas.core.scaling.standalone.ParameterFromRuntimeStatsScaling"]], "parameterfromstatsfromparameterscaling (class in brevitas.core.scaling.standalone)": [[5, "brevitas.core.scaling.standalone.ParameterFromStatsFromParameterScaling"]], "parameterscaling (class in brevitas.core.scaling.standalone)": [[5, "brevitas.core.scaling.standalone.ParameterScaling"]], "poweroftwointscaling (class in brevitas.core.scaling.int_scaling)": [[5, "brevitas.core.scaling.int_scaling.PowerOfTwoIntScaling"]], "runtimestatsscaling (class in brevitas.core.scaling.runtime)": [[5, "brevitas.core.scaling.runtime.RuntimeStatsScaling"]], "statsfromparameterscaling (class in brevitas.core.scaling.runtime)": [[5, "brevitas.core.scaling.runtime.StatsFromParameterScaling"]], "brevitas.core.scaling": [[5, "module-brevitas.core.scaling"]], "brevitas.core.scaling.int_scaling": [[5, "module-brevitas.core.scaling.int_scaling"]], "brevitas.core.scaling.runtime": [[5, "module-brevitas.core.scaling.runtime"]], "brevitas.core.scaling.standalone": [[5, "module-brevitas.core.scaling.standalone"]], "forward() (brevitas.core.scaling.int_scaling.intscaling method)": [[5, "brevitas.core.scaling.int_scaling.IntScaling.forward"]], "forward() (brevitas.core.scaling.int_scaling.poweroftwointscaling method)": [[5, "brevitas.core.scaling.int_scaling.PowerOfTwoIntScaling.forward"]], "forward() (brevitas.core.scaling.runtime.runtimestatsscaling method)": [[5, "brevitas.core.scaling.runtime.RuntimeStatsScaling.forward"]], "forward() (brevitas.core.scaling.runtime.statsfromparameterscaling method)": [[5, "brevitas.core.scaling.runtime.StatsFromParameterScaling.forward"]], "forward() (brevitas.core.scaling.standalone.constscaling method)": [[5, "brevitas.core.scaling.standalone.ConstScaling.forward"]], "forward() (brevitas.core.scaling.standalone.parameterfromruntimestatsscaling method)": [[5, "brevitas.core.scaling.standalone.ParameterFromRuntimeStatsScaling.forward"]], "forward() (brevitas.core.scaling.standalone.parameterfromstatsfromparameterscaling method)": [[5, "brevitas.core.scaling.standalone.ParameterFromStatsFromParameterScaling.forward"]], "forward() (brevitas.core.scaling.standalone.parameterscaling method)": [[5, "brevitas.core.scaling.standalone.ParameterScaling.forward"]], "state_dict() (brevitas.core.scaling.standalone.parameterfromruntimestatsscaling method)": [[5, "brevitas.core.scaling.standalone.ParameterFromRuntimeStatsScaling.state_dict"]], "state_dict() (brevitas.core.scaling.standalone.parameterfromstatsfromparameterscaling method)": [[5, "brevitas.core.scaling.standalone.ParameterFromStatsFromParameterScaling.state_dict"]], "training_forward() (brevitas.core.scaling.standalone.parameterfromruntimestatsscaling method)": [[5, "brevitas.core.scaling.standalone.ParameterFromRuntimeStatsScaling.training_forward"]], "absave (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.AbsAve"]], "absmax (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.AbsMax"]], "absmaxave (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.AbsMaxAve"]], "absmaxl2 (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.AbsMaxL2"]], "absminmax (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.AbsMinMax"]], "abspercentile (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.AbsPercentile"]], "klminimizerthreshold (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.KLMinimizerThreshold"]], "l1norm (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.L1Norm"]], "l2norm (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.L2Norm"]], "mse (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.MSE"]], "meanlearnedsigmastd (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.MeanLearnedSigmaStd"]], "meansigmastd (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.MeanSigmaStd"]], "negativeminorzero (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.NegativeMinOrZero"]], "negativepercentileorzero (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.NegativePercentileOrZero"]], "percentileinterval (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.PercentileInterval"]], "brevitas.core.stats": [[6, "module-brevitas.core.stats"]], "brevitas.core.stats.stats_op": [[6, "module-brevitas.core.stats.stats_op"]], "brevitas.core.stats.stats_wrapper": [[6, "module-brevitas.core.stats.stats_wrapper"]], "brevitas.core.stats.view_wrapper": [[6, "module-brevitas.core.stats.view_wrapper"]], "evaluate_loss() (brevitas.core.stats.stats_op.mse method)": [[6, "brevitas.core.stats.stats_op.MSE.evaluate_loss"]], "forward() (brevitas.core.stats.stats_op.absave method)": [[6, "brevitas.core.stats.stats_op.AbsAve.forward"]], "forward() (brevitas.core.stats.stats_op.absmax method)": [[6, "brevitas.core.stats.stats_op.AbsMax.forward"]], "forward() (brevitas.core.stats.stats_op.absmaxave method)": [[6, "brevitas.core.stats.stats_op.AbsMaxAve.forward"]], "forward() (brevitas.core.stats.stats_op.absmaxl2 method)": [[6, "brevitas.core.stats.stats_op.AbsMaxL2.forward"]], "forward() (brevitas.core.stats.stats_op.absminmax method)": [[6, "brevitas.core.stats.stats_op.AbsMinMax.forward"]], "forward() (brevitas.core.stats.stats_op.abspercentile method)": [[6, "brevitas.core.stats.stats_op.AbsPercentile.forward"]], "forward() (brevitas.core.stats.stats_op.klminimizerthreshold method)": [[6, "brevitas.core.stats.stats_op.KLMinimizerThreshold.forward"]], "forward() (brevitas.core.stats.stats_op.l1norm method)": [[6, "brevitas.core.stats.stats_op.L1Norm.forward"]], "forward() (brevitas.core.stats.stats_op.l2norm method)": [[6, "brevitas.core.stats.stats_op.L2Norm.forward"]], "forward() (brevitas.core.stats.stats_op.mse method)": [[6, "brevitas.core.stats.stats_op.MSE.forward"]], "forward() (brevitas.core.stats.stats_op.meanlearnedsigmastd method)": [[6, "brevitas.core.stats.stats_op.MeanLearnedSigmaStd.forward"]], "forward() (brevitas.core.stats.stats_op.meansigmastd method)": [[6, "brevitas.core.stats.stats_op.MeanSigmaStd.forward"]], "forward() (brevitas.core.stats.stats_op.negativeminorzero method)": [[6, "brevitas.core.stats.stats_op.NegativeMinOrZero.forward"]], "forward() (brevitas.core.stats.stats_op.negativepercentileorzero method)": [[6, "brevitas.core.stats.stats_op.NegativePercentileOrZero.forward"]], "forward() (brevitas.core.stats.stats_op.percentileinterval method)": [[6, "brevitas.core.stats.stats_op.PercentileInterval.forward"]], "mse_fib_search() (brevitas.core.stats.stats_op.mse method)": [[6, "brevitas.core.stats.stats_op.MSE.mse_fib_search"]], "mse_grid_search() (brevitas.core.stats.stats_op.mse method)": [[6, "brevitas.core.stats.stats_op.MSE.mse_grid_search"]], "mse_loss_fn() (brevitas.core.stats.stats_op.mse method)": [[6, "brevitas.core.stats.stats_op.MSE.mse_loss_fn"]], "mse_search() (brevitas.core.stats.stats_op.mse method)": [[6, "brevitas.core.stats.stats_op.MSE.mse_search"]], "smooth_normalize_distribution() (brevitas.core.stats.stats_op.klminimizerthreshold method)": [[6, "brevitas.core.stats.stats_op.KLMinimizerThreshold.smooth_normalize_distribution"]], "abs_binary_sign_grad() (in module brevitas.function.ops_ste)": [[7, "brevitas.function.ops_ste.abs_binary_sign_grad"]], "binary_sign() (in module brevitas.function.ops)": [[7, "brevitas.function.ops.binary_sign"]], "binary_sign_ste() (in module brevitas.function.ops_ste)": [[7, "brevitas.function.ops_ste.binary_sign_ste"]], "brevitas.function.ops": [[7, "module-brevitas.function.ops"]], "brevitas.function.ops_ste": [[7, "module-brevitas.function.ops_ste"]], "brevitas.function.shape": [[7, "module-brevitas.function.shape"]], "ceil_ste() (in module brevitas.function.ops_ste)": [[7, "brevitas.function.ops_ste.ceil_ste"]], "dpu_round() (in module brevitas.function.ops)": [[7, "brevitas.function.ops.dpu_round"]], "dpu_round_ste() (in module brevitas.function.ops_ste)": [[7, "brevitas.function.ops_ste.dpu_round_ste"]], "floor_ste() (in module brevitas.function.ops_ste)": [[7, "brevitas.function.ops_ste.floor_ste"]], "get_upper_bound_on_l1_norm() (in module brevitas.function.ops)": [[7, "brevitas.function.ops.get_upper_bound_on_l1_norm"]], "identity() (in module brevitas.function.ops)": [[7, "brevitas.function.ops.identity"]], "max_float() (in module brevitas.function.ops)": [[7, "brevitas.function.ops.max_float"]], "max_int() (in module brevitas.function.ops)": [[7, "brevitas.function.ops.max_int"]], "min_int() (in module brevitas.function.ops)": [[7, "brevitas.function.ops.min_int"]], "over_batch_over_output_channels() (in module brevitas.function.shape)": [[7, "brevitas.function.shape.over_batch_over_output_channels"]], "over_batch_over_tensor() (in module brevitas.function.shape)": [[7, "brevitas.function.shape.over_batch_over_tensor"]], "over_output_channels() (in module brevitas.function.shape)": [[7, "brevitas.function.shape.over_output_channels"]], "over_output_features() (in module brevitas.function.shape)": [[7, "brevitas.function.shape.over_output_features"]], "over_tensor() (in module brevitas.function.shape)": [[7, "brevitas.function.shape.over_tensor"]], "round_ste() (in module brevitas.function.ops_ste)": [[7, "brevitas.function.ops_ste.round_ste"]], "round_to_zero() (in module brevitas.function.ops)": [[7, "brevitas.function.ops.round_to_zero"]], "round_to_zero_ste() (in module brevitas.function.ops_ste)": [[7, "brevitas.function.ops_ste.round_to_zero_ste"]], "scalar_clamp_min_ste() (in module brevitas.function.ops_ste)": [[7, "brevitas.function.ops_ste.scalar_clamp_min_ste"]], "scalar_clamp_ste() (in module brevitas.function.ops_ste)": [[7, "brevitas.function.ops_ste.scalar_clamp_ste"]], "tensor_clamp() (in module brevitas.function.ops)": [[7, "brevitas.function.ops.tensor_clamp"]], "tensor_clamp_() (in module brevitas.function.ops)": [[7, "brevitas.function.ops.tensor_clamp_"]], "tensor_clamp_ste() (in module brevitas.function.ops_ste)": [[7, "brevitas.function.ops_ste.tensor_clamp_ste"]], "tensor_clamp_ste_() (in module brevitas.function.ops_ste)": [[7, "brevitas.function.ops_ste.tensor_clamp_ste_"]], "ternary_sign_ste() (in module brevitas.function.ops_ste)": [[7, "brevitas.function.ops_ste.ternary_sign_ste"]], "absbinarysigngradfn (class in brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.AbsBinarySignGradFn"]], "binarysignstefn (class in brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.BinarySignSteFn"]], "ceilstefn (class in brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.CeilSteFn"]], "dpuroundstefn (class in brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.DPURoundSteFn"]], "floorstefn (class in brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.FloorSteFn"]], "inplacetensorclampstefn (class in brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.InplaceTensorClampSteFn"]], "roundstefn (class in brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.RoundSteFn"]], "roundtozerostefn (class in brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.RoundToZeroSteFn"]], "scalarclampminstefn (class in brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.ScalarClampMinSteFn"]], "scalarclampstefn (class in brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.ScalarClampSteFn"]], "tensorclampstefn (class in brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.TensorClampSteFn"]], "ternarysignstefn (class in brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.TernarySignSteFn"]], "abs_binary_sign_grad_impl() (in module brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.abs_binary_sign_grad_impl"]], "binary_sign_ste_impl() (in module brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.binary_sign_ste_impl"]], "brevitas.ops.autograd_ste_ops": [[8, "module-brevitas.ops.autograd_ste_ops"]], "ceil_ste_impl() (in module brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.ceil_ste_impl"]], "dpu_round_ste_impl() (in module brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.dpu_round_ste_impl"]], "floor_ste_impl() (in module brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.floor_ste_impl"]], "round_ste_impl() (in module brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.round_ste_impl"]], "round_to_zero_ste_impl() (in module brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.round_to_zero_ste_impl"]], "scalar_clamp_min_ste_impl() (in module brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.scalar_clamp_min_ste_impl"]], "scalar_clamp_ste_impl() (in module brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.scalar_clamp_ste_impl"]], "tensor_clamp_ste_impl() (in module brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.tensor_clamp_ste_impl"]], "ternary_sign_ste_impl() (in module brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.ternary_sign_ste_impl"]]}})
\ No newline at end of file
+Search.setIndex({"docnames": ["about", "api_reference/brevitas.core", "api_reference/brevitas.core.bit_width", "api_reference/brevitas.core.function_wrapper", "api_reference/brevitas.core.quant", "api_reference/brevitas.core.scaling", "api_reference/brevitas.core.stats", "api_reference/brevitas.function", "api_reference/brevitas.ops", "api_reference/index", "architecture", "faq", "getting_started", "index", "settings", "setup", "tutorials/anatomy_quantizer", "tutorials/index", "tutorials/onnx_export", "tutorials/quant_activation_overview", "tutorials/quant_recurrent", "tutorials/quant_tensor_quant_conv2d_overview", "tutorials/tvmcon2021", "user_guide/datatypes", "user_guide/export", "user_guide/graph_transformations", "user_guide/index", "user_guide/precision", "user_guide/proxies", "user_guide/quantized_layers", "user_guide/quantized_tensor", "user_guide/quantizers", "user_guide/scaling", "user_guide/zero_point"], "filenames": ["about.rst", "api_reference/brevitas.core.rst", "api_reference/brevitas.core.bit_width.rst", "api_reference/brevitas.core.function_wrapper.rst", "api_reference/brevitas.core.quant.rst", "api_reference/brevitas.core.scaling.rst", "api_reference/brevitas.core.stats.rst", "api_reference/brevitas.function.rst", "api_reference/brevitas.ops.rst", "api_reference/index.rst", "architecture.rst", "faq.rst", "getting_started.rst", "index.rst", "settings.rst", "setup.rst", "tutorials/anatomy_quantizer.nblink", "tutorials/index.rst", "tutorials/onnx_export.nblink", "tutorials/quant_activation_overview.nblink", "tutorials/quant_recurrent.nblink", "tutorials/quant_tensor_quant_conv2d_overview.nblink", "tutorials/tvmcon2021.nblink", "user_guide/datatypes.rst", "user_guide/export.rst", "user_guide/graph_transformations.rst", "user_guide/index.rst", "user_guide/precision.rst", "user_guide/proxies.rst", "user_guide/quantized_layers.rst", "user_guide/quantized_tensor.rst", "user_guide/quantizers.rst", "user_guide/scaling.rst", "user_guide/zero_point.rst"], "titles": ["About", "brevitas.core package", "brevitas.core.bit_width package", "brevitas.core.function_wrapper package", "brevitas.core.quant package", "brevitas.core.scaling package", "brevitas.core.stats package", "brevitas.function package", "brevitas.ops package", "API reference", "Architecture", "F.A.Q.", "Getting started", "Brevitas", "Settings", "Setup", "Anatomy of a Quantizer", "Tutorials", "ONNX Export", "An Overview of Quantized Activations", "Quantized RNNs and LSTMs", "An overview of QuantTensor and QuantConv2d", "Brevitas TVMCon 2021 tutorial", "&lt;no title&gt;", "Export Compatibility", "&lt;no title&gt;", "User Guide", "&lt;no title&gt;", "&lt;no title&gt;", "&lt;no title&gt;", "&lt;no title&gt;", "&lt;no title&gt;", "&lt;no title&gt;", "&lt;no title&gt;"], "terms": {"alessandro": 0, "pappalardo": [0, 7], "volcaciu": 0, "xilinx": [0, 13, 15, 19, 22, 24], "research": [0, 13], "lab": 0, "For": [0, 10, 12, 13, 16, 18, 19, 20, 21, 22], "privat": 0, "commun": 0, "you": [0, 10, 11, 15, 16, 18, 22], "can": [0, 3, 7, 10, 11, 12, 14, 15, 16, 18, 19, 20, 21, 22, 24], "reach": 0, "me": 0, "alessand": 0, "name_of_my_employ": 0, "dot": 0, "com": [0, 15, 22], "softwar": [0, 10], "brevita": [0, 9, 10, 11, 14, 15, 16, 18, 19, 20, 21, 24], "titl": 0, "publish": 0, "zenodo": 0, "doi": 0, "10": [0, 3, 12, 16, 18, 19, 20, 21, 22], "5281": 0, "3333552": 0, "url": 0, "http": [0, 15, 18, 20, 22], "org": 0, "bit_width": [1, 4, 5, 7, 12, 16, 18, 19, 20, 21, 22], "const": [1, 5, 18, 20, 22], "bitwidthconst": [1, 2, 4, 16, 19, 22], "forward": [1, 2, 3, 4, 5, 6, 12, 16, 18, 19, 20, 21, 22], "bitwidthstatefulconst": [1, 2], "msbclampbitwidth": [1, 2], "paramet": [1, 4, 5, 6, 7, 10, 14, 16, 19, 20, 22], "bitwidthparamet": [1, 2], "removebitwidthparamet": [1, 2], "function_wrapp": [1, 4, 22], "clamp": [1, 4, 7, 8, 16], "clampmin": [1, 3], "floatclamp": [1, 3], "inf_nan_clamp": [1, 3], "saturating_clamp": [1, 3], "scalarclamp": [1, 3], "tensorclamp": [1, 3, 4, 16, 19, 22], "misc": [1, 22], "ident": [1, 3, 4, 5, 7, 16, 19, 22], "inplacelogtwo": [1, 3], "logtwo": [1, 3], "poweroftwo": [1, 3], "ops_st": [1, 10, 16], "ceilst": [1, 3], "dpuroundst": [1, 3], "floorst": [1, 3], "inplacetensorclampst": [1, 3], "roundst": [1, 2, 3, 4, 16, 19, 22], "roundtozerost": [1, 3], "scalarclampminst": [1, 3, 16, 19, 22], "tensorclampst": [1, 3], "shape": [1, 5, 6, 12, 16, 19, 20, 22], "dynamicoversubchannelblockview": [1, 3], "overbatchoveroutputchannelview": [1, 3], "overbatchovertensorview": [1, 3, 5], "overoutputchannelview": [1, 3, 19], "overoutputfeaturesview": [1, 3], "oversubchannelblockview": [1, 3], "overtensorview": [1, 3, 16, 19, 22], "permutedim": [1, 3, 19], "statsinputviewshapeimpl": [1, 3], "dynamic_over_subchannel_block": [1, 3], "over_batch_over_output_channel": [1, 3, 7], "over_batch_over_tensor": [1, 3, 7], "over_output_channel": [1, 3, 7], "over_output_featur": [1, 3, 7], "over_subchannel_block": [1, 3], "over_tensor": [1, 3, 7], "quant": [1, 12, 16, 18, 19, 20, 21, 22], "binari": [1, 7, 8, 10, 15, 20], "binaryqu": [1, 4, 10, 16], "clampedbinaryqu": [1, 4, 10, 16], "delai": [1, 16, 22], "delaywrapp": [1, 4, 16, 19, 22], "int": [1, 2, 5, 6, 7, 16, 20, 21, 22], "decoupledrescalingintqu": [1, 4], "decoupledrescalingintquantwithinput": [1, 4], "prescaledrestrictintqu": [1, 4], "prescaledrestrictintquantwithinputbitwidth": [1, 4], "rescalingintqu": [1, 4, 16, 19, 22], "truncintqu": [1, 4], "int_bas": 1, "decoupledintqu": [1, 4], "max_int": [1, 4, 7], "min_int": [1, 4, 7], "to_int": [1, 4], "intquant": [1, 4, 16, 19, 22], "ternari": 1, "ternaryqu": [1, 4], "scale": [1, 4, 10, 12, 13, 14, 16, 18, 19, 20, 21], "int_scal": 1, "intscal": [1, 4, 5, 16, 19, 22], "poweroftwointsc": [1, 5], "runtim": [1, 10, 12, 14, 15, 21, 22], "runtimedynamicgroupstatssc": [1, 5], "runtimestatssc": [1, 5], "statsfromparametersc": [1, 5], "standalon": [1, 22], "constscal": [1, 4, 5, 16], "parameterfromruntimestatssc": [1, 5, 16, 19, 22], "state_dict": [1, 5, 10, 16, 20, 22], "training_forward": [1, 5], "parameterfromstatsfromparametersc": [1, 5], "parametersc": [1, 5, 16, 22], "stat": [1, 10, 16, 19, 22], "stats_op": 1, "absav": [1, 6], "absmax": [1, 5, 6, 14, 22], "absmaxav": [1, 6], "absmaxl2": [1, 6], "absminmax": [1, 6], "abspercentil": [1, 6, 16, 19, 22], "halfquadraticoptimizerscal": [1, 6], "optim": [1, 6, 10, 16, 18, 20], "parameter_search": [1, 6], "halfquadraticoptimizerzeropoint": [1, 6], "klminimizerthreshold": [1, 6], "smooth_normalize_distribut": [1, 6], "l1norm": [1, 6], "l2norm": [1, 6], "mse": [1, 5, 6], "evaluate_loss": [1, 6], "mse_fib_search": [1, 6], "mse_grid_search": [1, 6], "mse_loss_fn": [1, 6], "mse_search": [1, 6], "meanlearnedsigmastd": [1, 6], "meansigmastd": [1, 6], "negativeminorzero": [1, 6], "negativepercentileorzero": [1, 6], "percentileinterv": [1, 6], "masked_median": [1, 6], "shrink_lp_op": [1, 6], "stats_wrapp": 1, "view_wrapp": 1, "class": [1, 2, 3, 4, 5, 6, 8, 10, 12, 16, 18, 21, 22], "floatrestrictvalu": [1, 5, 16, 19, 22], "sourc": [1, 2, 3, 4, 5, 6, 7, 8, 10, 16, 20, 21, 22], "base": [1, 2, 3, 4, 5, 6, 8, 10, 14, 15, 16, 18, 19, 20], "combine_scale_threshold": 1, "x": [1, 3, 4, 5, 6, 7, 8, 11, 12, 16, 18, 21, 22], "threshold": [1, 4, 5], "return": [1, 2, 4, 5, 6, 7, 10, 12, 16, 18, 19, 20, 21, 22], "type": [1, 2, 4, 5, 6, 7, 10, 12, 13, 16, 18, 19, 21, 22], "tensor": [1, 2, 3, 4, 5, 6, 7, 10, 12, 14, 16, 18, 19, 20, 21], "defin": [1, 2, 3, 4, 5, 6, 10, 16, 18, 19, 20, 21], "comput": [1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 16, 18, 20, 21, 22], "perform": [1, 2, 3, 4, 5, 6, 7, 10, 12, 15, 16, 18, 19, 20, 21, 22], "everi": [1, 2, 3, 4, 5, 6, 10, 12, 15], "call": [1, 2, 3, 4, 5, 6, 10, 12, 15, 16, 19, 21], "should": [1, 2, 3, 4, 5, 6, 10, 11, 16, 21, 22], "overridden": [1, 2, 3, 4, 5, 6], "all": [1, 2, 3, 4, 5, 6, 7, 10, 12, 16, 18, 19, 20, 21, 22], "subclass": [1, 2, 3, 4, 5, 6, 16], "rtype": [1, 2, 3, 4, 5, 6], "although": [1, 2, 3, 4, 5, 6, 18, 19, 22], "recip": [1, 2, 3, 4, 5, 6], "pass": [1, 2, 3, 4, 5, 6, 10, 12, 18, 19, 20, 21, 22], "need": [1, 2, 3, 4, 5, 6, 7, 10, 12, 16, 18, 19, 20, 21, 22], "within": [1, 2, 3, 4, 5, 6, 10, 12, 14, 16, 20, 21, 22], "thi": [1, 2, 3, 4, 5, 6, 7, 10, 12, 14, 15, 16, 18, 19, 20, 21, 22], "function": [1, 2, 3, 4, 5, 6, 8, 9, 12, 15, 16, 18, 19, 20, 21, 24], "one": [1, 2, 3, 4, 5, 6, 7, 10, 12, 16, 18, 19, 20, 21, 22], "instanc": [1, 2, 3, 4, 5, 6, 19, 20, 21, 22], "afterward": [1, 2, 3, 4, 5, 6, 22], "instead": [1, 2, 3, 4, 5, 6, 10, 16, 18, 20, 21], "sinc": [1, 2, 3, 4, 5, 6, 7, 15, 16, 18, 19, 21, 22], "former": [1, 2, 3, 4, 5, 6, 16], "take": [1, 2, 3, 4, 5, 6, 10, 12, 16, 18, 19, 20, 21, 22], "care": [1, 2, 3, 4, 5, 6, 16, 22], "run": [1, 2, 3, 4, 5, 6, 10, 11, 13, 16, 18, 19, 20, 21, 22], "regist": [1, 2, 3, 4, 5, 6], "hook": [1, 2, 3, 4, 5, 6, 19, 21, 22], "while": [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 15, 16, 19, 20, 21, 22], "latter": [1, 2, 3, 4, 5, 6], "silent": [1, 2, 3, 4, 5, 6, 14], "ignor": [1, 2, 3, 4, 5, 6, 14], "them": [1, 2, 3, 4, 5, 6, 10, 12, 16, 19, 20, 21, 22], "restrict_init_float": 1, "float": [1, 2, 4, 5, 7, 10, 11, 14, 15, 16, 18, 19, 20, 21], "restrict_init_inplace_modul": 1, "restrict_init_modul": 1, "restrict_init_tensor": 1, "intrestrictvalu": [1, 2], "restrict_value_float_to_int_impl": 1, "logfloatrestrictvalu": 1, "poweroftworestrictvalu": [1, 5], "parameterwrapp": 1, "valu": [1, 2, 5, 6, 7, 10, 12, 16, 18, 19, 20, 21, 22], "singleargstatelessbuff": 1, "placehold": [1, 5, 21], "slicetensor": 1, "eager_forward": 1, "statelessbuff": [1, 16, 19, 22], "destin": [1, 5], "none": [1, 2, 3, 4, 5, 6, 7, 8, 10, 16, 18, 19, 20, 21, 22], "prefix": [1, 5, 10, 16, 20, 22], "keep_var": [1, 5], "fals": [1, 2, 5, 6, 7, 10, 14, 16, 18, 19, 20, 21, 22], "dictionari": [1, 5], "contain": [1, 5, 7, 10, 14, 16, 22], "refer": [1, 5, 13, 16], "whole": [1, 5, 16], "state": [1, 2, 5, 10, 12, 14, 16, 20, 22], "both": [1, 5, 10, 12, 13, 15, 16, 18, 19, 20, 21, 22], "persist": [1, 5], "buffer": [1, 5, 22], "e": [1, 3, 5, 10, 11, 14, 16, 18, 19, 21, 22, 24], "g": [1, 5, 10, 11, 14, 16, 18, 21, 22, 24], "averag": [1, 5, 16, 19, 21, 22], "ar": [1, 4, 5, 8, 9, 10, 12, 15, 16, 18, 19, 20, 21, 22, 24], "includ": [1, 5, 10, 11, 16, 19, 22], "kei": [1, 5, 14, 16, 20, 22], "correspond": [1, 4, 5, 7, 14, 15, 22], "name": [1, 5, 10, 16, 18, 19, 20, 21, 22], "set": [1, 2, 3, 4, 5, 10, 11, 12, 13, 15, 16, 18, 19, 20, 21, 22, 24], "The": [1, 2, 5, 7, 10, 11, 12, 13, 14, 15, 16, 19, 20, 21, 22], "object": [1, 3, 5, 10, 16, 19], "i": [1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 24], "shallow": [1, 5], "copi": [1, 5], "It": [1, 5, 10, 13, 16, 18, 19, 21, 22], "": [1, 2, 5, 7, 10, 11, 12, 13, 14, 15, 19, 20, 21, 22, 24], "current": [1, 3, 5, 10, 11, 13, 15, 16, 18, 19, 20, 21, 22, 24], "also": [1, 5, 10, 16, 18, 19, 20, 21, 22], "accept": [1, 5, 12, 16, 19, 20, 22], "posit": [1, 5, 19], "argument": [1, 5, 7, 10, 16, 18, 19, 20, 21], "order": [1, 5, 7, 8, 10, 12, 16, 21, 22, 24], "howev": [1, 5, 10, 16, 18, 19, 20, 21, 22], "being": [1, 5, 10, 11, 14, 16, 18, 20, 21, 22], "deprec": [1, 5, 18, 19, 20, 21, 22], "keyword": [1, 5, 10, 16, 19, 20, 21], "enforc": [1, 5, 10, 21, 22], "futur": [1, 5, 18, 19, 20, 21, 22], "releas": [1, 5, 15, 16, 19, 20, 21, 22, 24], "pleas": [1, 5, 16, 18, 19, 20, 21, 22], "avoid": [1, 2, 5, 6, 7, 15, 16, 21, 22], "us": [1, 3, 4, 5, 7, 10, 11, 12, 14, 16, 18, 19, 20, 21, 22], "design": [1, 5, 10, 11, 15, 16, 22], "end": [1, 5, 7, 11, 12, 15, 16, 19, 20, 22], "user": [1, 5, 10, 12, 15, 16, 18, 19, 20, 21, 22], "dict": [1, 2, 5, 10, 14, 16, 22], "option": [1, 5, 6, 16, 18, 19, 20, 21, 22], "If": [1, 5, 10, 16, 19, 21, 22], "provid": [1, 5, 7, 10, 11, 12, 13, 15, 16, 18, 19, 22], "updat": [1, 5, 14, 16, 18, 22], "same": [1, 3, 5, 6, 10, 11, 16, 18, 19, 20, 21, 22], "otherwis": [1, 5, 10, 18, 20, 22], "an": [1, 3, 4, 5, 7, 10, 11, 12, 14, 15, 18, 20, 22, 24], "ordereddict": [1, 5], "creat": [1, 5, 10, 16, 21], "default": [1, 2, 4, 5, 6, 10, 12, 14, 15, 16, 18, 19, 20, 21], "str": [1, 5, 20, 21], "ad": [1, 5, 18, 20, 21], "compos": [1, 5, 10, 19], "bool": [1, 2, 4, 5, 7, 16, 19, 20, 21, 22], "detach": [1, 5, 22], "from": [1, 2, 4, 5, 6, 7, 10, 14, 16, 18, 19, 20, 21], "autograd": [1, 5, 7, 8, 10, 15], "true": [1, 3, 4, 5, 6, 7, 10, 12, 14, 16, 18, 19, 20, 21, 22], "exampl": [1, 2, 3, 4, 5, 7, 8, 10, 12, 19, 20, 21, 22, 24], "xdoctest": [1, 5], "skip": [1, 5, 10, 12, 16, 19, 21, 22], "undefin": [1, 5], "var": [1, 5], "bia": [1, 5, 10, 12, 18, 19, 20], "weight": [1, 4, 5, 6, 7, 10, 14, 18, 19, 21], "inplace_momentum_upd": 1, "momentum": [1, 5], "counter": 1, "new_count": 1, "inplace_tensor_add": 1, "inplace_tensor_mul": 1, "parameterfromruntimezeropoint": 1, "collect_stats_step": [1, 5, 22], "int_quant": [1, 4, 16, 19, 22], "quantize_zero_point": 1, "zero_point_stats_impl": 1, "zero_point_shap": 1, "zero_point_stats_input_view_shape_impl": 1, "zero_point_stats_momentum": 1, "0": [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 19, 20, 21, 22], "1": [1, 2, 4, 5, 6, 7, 8, 10, 12, 14, 15, 16, 18, 19, 20, 21, 22], "dtype": [1, 2, 3, 5, 6, 12, 20, 21, 22], "devic": [1, 2, 3, 5, 6, 20, 21, 22], "parameterfromstatsfromparameterzeropoint": 1, "zero_point_stats_input_concat_dim": 1, "tracked_parameter_list": [1, 5], "scriptmodul": [1, 2, 3, 4, 5, 6, 16, 22], "implement": [1, 3, 4, 5, 6, 7, 8, 10, 11, 13, 14, 15, 16, 19, 21, 22, 24], "learn": [1, 2, 5, 10, 14, 16, 19, 21], "factor": [1, 4, 5, 10, 16, 18, 19, 21, 22], "initi": [1, 2, 5, 10, 14, 18, 19, 20, 21, 22], "statist": [1, 5, 10, 19, 22], "parameterzeropoint": 1, "zero_point_init": 1, "prezerocenterzeropoint": 1, "stats_reduce_dim": [1, 6, 22], "pre_zero_point_stats_input_view_shape_impl": 1, "pre_zero_point_shap": 1, "experiment": [1, 16, 19, 20, 21, 22, 24], "pre": [1, 4, 10, 16, 22], "zero": [1, 4, 6, 7, 10, 16, 18, 19, 20, 21, 22], "point": [1, 2, 4, 5, 10, 11, 12, 14, 15, 16, 18, 19, 20, 21], "center": 1, "incom": 1, "intend": 1, "get_zero_cent": 1, "statsfromparameterzeropoint": 1, "zerozeropoint": [1, 4, 16, 19, 22], "constant": [2, 5, 18, 20, 22], "bit": [2, 4, 7, 10, 12, 16, 18, 19, 20, 21, 24], "width": [2, 4, 7, 10, 18, 19, 20, 21, 24], "wrap": [2, 4, 5, 8, 10, 16, 21], "torch": [2, 3, 4, 5, 7, 8, 10, 12, 14, 15, 16, 18, 19, 20, 21], "8": [2, 3, 5, 7, 12, 15, 16, 19, 20, 21, 22], "part": [2, 7, 10, 15, 16, 22], "mean": [2, 5, 10, 12, 16, 18, 19, 20, 21, 22], "won": [2, 18, 19], "t": [2, 4, 7, 8, 10, 11, 12, 16, 18, 19, 20, 21, 22], "save": [2, 18, 20], "checkpoint": 2, "map": [2, 4, 5, 16, 21, 22], "bit_width_impl_typ": [2, 22], "bitwidthimpltyp": [2, 22], "higher": [2, 4, 5, 16, 18], "level": [2, 4, 5, 10, 13, 16, 19, 20, 22], "api": [2, 4, 5, 10, 13, 18, 19, 20, 21, 22], "retain": [2, 5], "counterpart": [2, 10], "differ": [2, 3, 5, 10, 12, 13, 16, 18, 19, 20, 21, 22], "stateful_const": 2, "bit_width_to_remove_impl": 2, "min_overall_bit_width": 2, "max_overall_bit_width": 2, "input_bit_width": [2, 4, 7, 22], "min_bit_width": 2, "2": [2, 3, 4, 5, 7, 12, 15, 16, 18, 19, 20, 21, 22], "restrict_bit_width_impl": 2, "float_to_int_impl": [2, 4, 16, 19, 22], "override_pretrained_bit_width": 2, "learnabl": 2, "output": [2, 4, 5, 7, 10, 12, 16, 18, 19, 20], "lower": [2, 5, 7, 20], "bound": [2, 5, 7], "restrict": [2, 4, 5, 7, 10, 12, 18, 21, 22, 24], "subset": [2, 16, 24], "pretrain": [2, 10, 12, 14, 16, 20, 22], "load": [2, 10, 14, 15, 16, 22], "backend": [2, 7, 10, 11, 14, 22], "nn": [2, 5, 10, 14, 16, 18, 19, 20, 21, 22], "rais": [2, 5, 16, 19, 20, 21, 22], "runtimeerror": [2, 5, 16, 19, 20, 21, 22], "bit_width_paramet": 2, "grad_fn": [2, 4, 5, 7, 16, 19, 20, 21, 22], "roundstefnbackward": [2, 7], "env": [2, 4, 5, 7, 8, 15, 16, 18, 19, 20, 21, 22], "variabl": [2, 4, 5, 14, 15, 16, 20, 22], "brevitas_ignore_missing_kei": [2, 5, 14, 16, 22], "error": [2, 5, 14, 16, 18, 19, 20, 21, 22], "when": [2, 4, 5, 7, 8, 10, 14, 15, 16, 18, 19, 20, 21, 22], "retrain": [2, 5], "bit_width_to_remov": 2, "non_zero_epsilon": 2, "1e": [2, 6, 20], "06": [2, 21], "remove_zero_bit_width": 2, "wrapper": [3, 7, 10, 22], "variou": [3, 7, 8, 10, 12, 13, 16, 21, 22, 24], "variant": [3, 7, 10, 12, 16, 19, 20, 22], "min_val": [3, 7, 8, 18, 19], "clamp_min": [3, 7, 8], "3": [3, 4, 5, 7, 10, 12, 15, 16, 18, 19, 20, 21, 22], "tensor_clamp_impl": [3, 4, 16, 19, 22], "sign": [3, 4, 5, 6, 7, 8, 10, 16, 18, 19, 21, 22], "inf_valu": 3, "nan_valu": 3, "max_available_float": 3, "satur": 3, "minifloat": 3, "format": [3, 4, 10, 11, 12, 16, 18, 19, 21, 22], "inf": 3, "nan": [3, 6], "code": [3, 16], "have": [3, 7, 8, 10, 11, 12, 16, 18, 19, 20, 21, 22], "encod": 3, "through": [3, 4, 7, 8, 10, 12, 14, 15, 16, 18, 19, 20, 22, 24], "mantissa": 3, "1101": [3, 20], "111": [3, 16], "e4m3": 3, "valid": [3, 6, 10, 16, 21], "exponent_bit_width": [3, 7], "mantissa_bit_width": [3, 7], "exponent_bia": [3, 7], "inf_mask": 3, "p_max_val_mask": 3, "n_max_val_mask": 3, "max_valu": 3, "min_valu": 3, "max_val": [3, 7, 8, 18, 19], "scalar_clamp": 3, "tensor_clamp": [3, 7, 8], "A": [3, 7, 10, 12, 18, 21, 22], "collect": [3, 5, 6, 10, 19, 22], "miscellan": 3, "quantiz": [3, 4, 6, 7, 11, 13, 14, 15, 24], "randn": [3, 7, 12, 16, 18, 19, 20, 21, 22], "size": [3, 10, 11, 20, 21, 22], "y": [3, 4, 7, 8, 11, 16], "log2_": 3, "inplace_log_two": 3, "note": [3, 7, 12, 16, 18, 19, 20, 21, 22, 24], "inplac": 3, "oper": [3, 7, 11, 12, 13, 15, 18, 19, 20, 21, 24], "torchscript": [3, 4, 5, 7, 10, 14, 16, 20, 22], "problemat": 3, "compil": [3, 4, 7, 10, 13, 14, 15, 16, 19, 22], "disabl": [3, 10, 14, 19, 20, 21, 22], "log2": [3, 22], "log_two": 3, "power_of_two": 3, "5": [3, 4, 6, 7, 12, 16, 18, 19, 20, 21, 22], "32": [3, 12, 16, 19, 20, 22], "ceil_st": [3, 7, 8], "dpu_round_st": [3, 7, 8], "floor_st": [3, 7, 8], "tensor_clamp_ste_": [3, 7], "round_st": [3, 7, 8], "round_to_zero_st": [3, 7, 8], "scalar_clamp_min_st": [3, 7, 8], "tensor_clamp_st": [3, 7], "view": [3, 5, 7, 16, 19, 21], "accord": [3, 4, 5, 7, 10, 12, 16, 21, 22], "criteria": [3, 5, 12], "group_siz": [3, 5], "group_dim": [3, 5], "permute_dim": 3, "input": [3, 4, 5, 7, 10, 12, 16, 18, 19, 20], "view_modul": 3, "empti": [3, 5, 10], "25": [3, 16, 20, 22], "scriptmoodul": 3, "250": 3, "16": [3, 12, 16, 19, 20, 22], "200": 3, "80": [3, 16, 19], "expanded_groupwise_shap": 3, "6": [3, 4, 5, 12, 16, 18, 19, 20, 21, 22], "2400": 3, "enum": 3, "like": [3, 4, 10, 12, 16, 18, 19, 20, 21, 22], "pointer": [3, 16, 19, 22], "adher": [3, 7, 10, 11], "interfac": [3, 8, 10, 12, 18, 19, 20, 22], "alia": [3, 8, 22], "scaling_impl": [4, 5, 16, 19, 22], "quant_delay_step": [4, 16], "uniform": [4, 10, 11, 13, 16, 20], "binary_sign_st": [4, 7, 8, 16], "number": [4, 5, 7, 10, 16, 19, 20, 22], "train": [4, 5, 10, 11, 12, 13, 16, 19, 20, 21], "step": [4, 16, 18, 19, 22], "de": [4, 10, 16], "tupl": [4, 5, 7, 10, 16, 18, 20, 21, 22], "import": [4, 12, 16, 18, 19, 20, 21, 22], "binary_qu": [4, 16], "inp": [4, 16, 18, 19, 20, 21, 22], "04": [4, 16, 21], "out": [4, 12, 13, 16, 18, 19, 20, 21], "zero_point": [4, 16, 19, 20, 21, 22], "1000": [4, 6, 7, 16, 20, 22], "quant_typ": [4, 10, 16, 22], "quanttyp": [4, 10, 16, 22], "appli": [4, 5, 7, 8, 10, 13, 16, 19, 20, 21, 22], "brevitas_jit": [4, 7, 8, 10, 11, 14, 15, 16, 20, 22], "enabl": [4, 7, 10, 11, 12, 14, 15, 16, 19, 21, 22], "befor": [4, 5, 16, 18, 20, 21, 22], "go": [4, 5, 10, 16, 18, 19, 20, 21, 22], "between": [4, 7, 10, 12, 16, 18, 19, 21, 22], "which": [4, 7, 10, 11, 14, 16, 18, 19, 20, 21, 22], "backward": [4, 7, 8], "gradient": [4, 7, 8, 14, 21], "outsid": [4, 11, 16, 22], "rang": [4, 18, 19, 22], "wise": [4, 10, 11], "requires_grad_": 4, "mulbackward0": [4, 16, 19, 21, 22], "grad": [4, 7], "0000": [4, 7, 19, 20, 22], "activ": [4, 5, 10, 14, 18, 20, 21], "decoupled_int_qu": 4, "pre_scaling_impl": 4, "int_scaling_impl": [4, 6, 16, 19, 22], "pre_zero_point_impl": 4, "zero_point_impl": [4, 16, 19, 22], "bit_width_impl": [4, 6, 22], "input_is_sign": [4, 7], "around": [4, 10, 12, 16], "integ": [4, 7, 11, 18, 19, 21, 22], "taken": [4, 22], "narrow_rang": [4, 5, 7, 18, 22], "int_quant_wrapp": 4, "01": [4, 6, 16], "4": [4, 5, 7, 12, 16, 18, 19, 20, 21, 22], "042": 4, "053": 4, "31": [4, 16, 20, 22], "44": 4, "0400": 4, "0500": 4, "0700": 4, "0100": [4, 16], "respect": [4, 10, 12, 16, 21], "here": [4, 16, 19, 22], "interpret": 4, "0429": 4, "0571": 4, "0143": 4, "input_view_impl": [4, 5, 16, 19, 22], "shift": [4, 16, 19], "flag": [4, 14, 16, 20, 21, 22], "determin": [4, 19], "whether": [4, 7, 10, 14, 16, 18, 21], "narrow": [4, 7, 18], "convers": [4, 19, 20], "represent": [4, 7, 10, 12, 18, 19, 21, 22], "pre_scal": 4, "pre_zero_point": 4, "02": [4, 21], "0200": 4, "0300": 4, "ternary_sign_st": [4, 7, 8], "ternar": 4, "w": [4, 7, 8, 10, 11, 18, 19, 20, 21, 22], "r": [4, 7, 8, 10, 16, 19], "ternary_qu": 4, "scaling_stats_impl": [5, 22], "scaling_min_v": [5, 6], "restrict_scaling_impl": [5, 22], "stats_input": 5, "scaling_stats_input_view_shape_impl": [5, 19, 22], "scaling_shap": [5, 16, 22], "affine_resc": 5, "affine_shift_scal": 5, "scaling_stats_momentum": 5, "scaling_stats_input_concat_dim": 5, "scaling_init": [5, 16, 22], "union": [5, 19, 21, 22], "some": [5, 10, 11, 12, 15, 18, 19, 20, 21, 22], "forc": [5, 10, 14, 20, 21, 22], "method": [5, 16, 18, 19, 20, 21, 22], "singl": [5, 10, 13, 15, 16, 18, 19, 20, 22], "requir": [5, 10, 14, 16, 19, 20, 21, 22, 24], "earli": 5, "version": [5, 10, 15, 16, 20, 22], "consist": [5, 16], "across": [5, 10, 12, 13, 20, 22], "scaling_impl_typ": [5, 22], "scalingimpltyp": [5, 10, 22], "permute_impl": [5, 19], "work": [5, 10, 16, 18, 19, 20, 21, 22], "two": [5, 10, 12, 16, 18, 19, 20, 21, 22], "phase": [5, 19, 22], "dure": [5, 11, 16, 19, 22], "first": [5, 8, 10, 12, 16, 18, 19, 20, 21, 22], "fashion": [5, 22], "batchnorm": 5, "mode": [5, 10, 19, 21, 22], "per": [5, 6, 16, 18, 19, 20, 21], "batch": [5, 7, 11, 18, 19, 20, 22], "background": [5, 16], "infer": [5, 10, 11, 12, 13, 21, 22, 24], "second": [5, 16, 19, 20, 21, 22], "accumul": [5, 7, 10, 19, 21, 22], "behaviour": [5, 10, 14, 16, 19, 20, 21, 22], "scalar_shap": 5, "move": [5, 18, 19, 20, 22], "default_momentum": 5, "scaling_stats_permute_dim": [5, 19, 22], "arang": 5, "randn_lik": 5, "absbinarysigngradfnbackward": [5, 7, 16], "parameter_from_stat": [5, 10, 22], "extend": [5, 10, 12, 16, 18, 22], "scalar": [5, 7, 18], "non": [5, 10, 13, 16, 19, 21], "powbackward1": 5, "keepdim": 6, "high_percentile_q": [6, 22], "percentile_q": 6, "proxy_modul": 6, "hqo_init_op_scal": 6, "inner_stats_input_view_shape_impl": 6, "hqo_beta_scal": 6, "100000": 6, "hqo_kappa_scal": 6, "hqo_lp_norm_scal": 6, "7": [6, 7, 16, 18, 19, 20, 21, 22], "hqo_iters_scal": 6, "xl": 6, "hqo_init_op_zp": 6, "hqo_beta_zp": 6, "hqo_kappa_zp": 6, "hqo_lp_norm_zp": 6, "hqo_iters_zp": 6, "num_bin": 6, "1001": 6, "smoothing_ep": 6, "0001": [6, 21], "apach": 6, "incub": 6, "mxnet": 6, "p": [6, 20], "ep": 6, "channel": [6, 7, 10, 11, 16, 18, 19, 20], "l1": [6, 7], "normal": [6, 16, 18, 19, 22], "l2": 6, "mse_init_op": 6, "mse_search_method": 6, "fibonacci": 6, "mse_it": 6, "20": [6, 16, 19, 20, 22], "candid": 6, "xr": 6, "quant_valu": [6, 21], "sigma": 6, "stats_output_shap": 6, "std_dev_epsilon": 6, "08": 6, "low_percentile_q": 6, "mask": 6, "dim": [6, 7, 16, 19, 20, 22], "median": 6, "along": [6, 7, 10, 19], "where": [6, 10, 16, 18, 19, 20, 21, 22], "broadcast": [6, 7, 19], "booltensor": 6, "ani": [6, 7, 10, 11, 12, 14, 16, 18, 19, 20, 21, 22, 24], "column": 6, "dimens": [6, 7, 19], "divis": 6, "except": [6, 7, 10, 15, 16, 19, 20, 21, 22], "reduc": [6, 10, 11, 13, 19], "beta": 6, "lp_norm": 6, "core": [7, 9, 16, 19, 20, 21, 22], "often": [7, 16], "impos": [7, 10, 16, 24], "pytorch": [7, 10, 11, 12, 13, 15, 16, 18, 19, 20, 21, 22, 24], "binary_sign": [7, 8], "dpu_round": [7, 8], "dpu": [7, 24], "round": [7, 8, 18, 21, 22], "get_upper_bound_on_l1_norm": 7, "accumulator_bit_width": 7, "calcul": [7, 19], "upper": 7, "norm": [7, 18], "guarante": 7, "overflow": 7, "given": [7, 18, 20, 21, 22], "deriv": [7, 10, 12, 22], "a2q": 7, "awar": [7, 10, 11, 12, 13, 16, 20, 21, 22], "colbert": 7, "j": 7, "petri": 7, "koenig": 7, "assum": [7, 21, 22], "THe": 7, "max_float": 7, "maximum": [7, 16, 22], "indic": 7, "repres": [7, 10, 12, 18, 20, 21, 22], "unsign": [7, 12, 16, 18, 19], "avail": [7, 16, 22], "127": [7, 16, 18, 19, 22], "254": [7, 22], "255": 7, "minimum": [7, 18], "128": [7, 16, 18, 19, 22], "round_to_zero": [7, 8], "toward": [7, 10, 11], "gener": [7, 10, 11, 13, 14, 15, 16, 18, 19, 20, 21, 22, 24], "support": [7, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 24], "differenti": [7, 22], "tensor_clamp_": [7, 8], "In": [7, 10, 12, 14, 16, 18, 19, 20, 21, 22, 24], "place": [7, 12, 16, 22], "Not": [7, 21], "wrt": 7, "straight": [7, 8, 10, 16], "estim": [7, 8, 10, 14], "dispatch": 7, "either": [7, 10, 12, 16, 18, 20, 22], "nativ": [7, 14], "just": [7, 10, 12, 14, 16, 19, 21, 22], "time": [7, 10, 11, 12, 13, 14, 15, 16, 18, 19, 21, 22], "autograd_ste_op": [7, 10], "allow": [7, 10, 16, 18, 19, 20, 21, 22], "built": [7, 10, 14, 18, 22], "abs_binary_sign_grad": [7, 8], "ab": [7, 8, 16, 22], "subgradi": [7, 8], "compar": [7, 8, 12, 19, 20], "abs_binary_sign_grad_impl": [7, 8], "its": [7, 10, 11, 12, 16, 18, 19, 20, 21, 22], "requires_grad": [7, 22], "item": [7, 16, 19, 21], "binary_sign_ste_impl": [7, 8], "binarysignstefnbackward": 7, "ceil": [7, 8], "ceil_ste_impl": [7, 8], "ceilstefnbackward": [7, 22], "dpu_round_ste_impl": [7, 8], "dpuroundstefnbackward": 7, "floor": [7, 8], "floor_ste_impl": [7, 8], "floorstefnbackward": 7, "round_ste_impl": [7, 8], "round_to_zero_ste_impl": [7, 8], "roundtozerostefnbackward": 7, "alwai": [7, 8, 10, 12, 19, 20, 21, 22], "scalar_clamp_min_ste_impl": [7, 8], "c": [7, 10, 11, 14, 15, 19], "5000": 7, "4000": [7, 20], "scalarclampminstefnbackward": 7, "scalar_clamp_st": [7, 8], "scalar_clamp_ste_impl": [7, 8], "scalarclampstefnbackward": 7, "tensor_clamp_ste_impl": [7, 8], "tensorclampstefnbackward": 7, "tensor_clamp_ste_impl_": [7, 8], "inplacetensorclampstefnbackward": 7, "ternary_sign_ste_impl": [7, 8], "ternarysignstefnbackward": 7, "induc": 7, "flatten": [7, 19], "certain": [7, 10, 13, 16, 20, 22, 24], "other": [7, 10, 12, 16, 18, 19, 20, 21, 22, 24], "featur": [7, 10, 16, 18, 19, 20, 21, 22, 24], "arg": [7, 8, 16, 18, 19, 22], "last": [7, 10, 16, 18, 19, 21, 22], "24": [7, 16, 20, 22], "flat": [7, 10, 16], "absbinarysigngradfn": 8, "kwarg": [8, 12, 16, 19, 20, 21, 22], "alias": 8, "see": [8, 11, 16, 18, 19, 21, 22], "detail": [8, 10, 12], "binarysignstefn": 8, "ceilstefn": 8, "dpuroundstefn": 8, "floorstefn": 8, "inplacetensorclampstefn": 8, "roundstefn": 8, "roundtozerostefn": 8, "scalarclampminstefn": 8, "invok": [8, 10, 22], "scalarclampstefn": 8, "tensorclampstefn": 8, "ternarysignstefn": 8, "wip": 9, "most": [9, 10, 16, 19, 21, 22], "packag": [9, 10, 14, 15, 16, 18, 19, 20, 21, 22], "still": [9, 11, 16, 18, 20, 22], "miss": [9, 14, 16, 19, 22], "op": [9, 10, 20, 21], "organ": [10, 20], "few": [10, 14, 16, 20, 21, 22], "concept": [10, 20], "list": [10, 16, 19], "below": [10, 19, 22], "found": [10, 14, 16, 20, 21, 22], "under": [10, 13, 16, 21, 22], "python": [10, 14, 15, 16, 18, 20, 21, 22], "csrc": [10, 18, 19, 20, 21, 22], "becaus": [10, 15, 16, 18, 19, 20, 21, 22, 24], "date": 10, "cannot": [10, 14, 16, 22], "jit": [10, 14, 15, 16, 19, 20, 21, 22], "extens": [10, 16, 18], "simplifi": [10, 15], "distribut": [10, 15, 19], "onli": [10, 16, 18, 19, 21, 22, 24], "cpp": [10, 15, 18, 19, 20, 21, 22], "file": [10, 15, 16, 19, 21, 22], "appropri": [10, 14, 15, 16, 22], "so": [10, 12, 14, 15, 16, 18, 19, 20, 21, 22], "fallback": 10, "long": 10, "switch": [10, 16, 22], "happen": [10, 12, 14, 16, 18, 19, 21, 22], "piec": [10, 16, 22], "commonli": [10, 12], "thei": [10, 16, 18, 19, 20, 21, 22], "algorithm": [10, 11, 12, 16, 19, 22], "build": [10, 13, 19, 22], "block": [10, 13], "assembl": [10, 16], "affin": [10, 11, 13, 21, 22], "old": 10, "style": [10, 13, 16, 18, 19, 22], "scripe": 10, "inherit": 10, "mani": [10, 16, 19, 21, 22], "describ": 10, "section": [10, 11, 12, 22], "abov": [10, 19, 21, 22], "modul": [10, 12, 16, 18, 19, 20, 21, 22], "compat": [10, 15, 16, 19], "everyth": [10, 16, 20, 22], "memori": 10, "more": [10, 11, 12, 15, 16, 18, 19, 20, 22, 24], "complic": [10, 16], "pipelin": 10, "quit": [10, 16, 19, 20, 21], "signific": 10, "thu": [10, 18], "intrins": 10, "cost": [10, 11, 22], "pose": [10, 16], "challang": 10, "term": [10, 11, 12, 15, 16, 19, 21, 22], "how": [10, 11, 12, 16, 18, 19, 20, 21], "achiev": [10, 11, 16], "flexibl": [10, 11, 16], "minim": [10, 19, 20, 22], "redund": 10, "flavour": [10, 22], "adopt": [10, 13, 15, 16, 19, 20, 21, 22], "doe": [10, 11, 12, 16, 18, 22], "highli": 10, "bias": [10, 11, 16, 21], "leverag": [10, 12, 16, 18, 20, 22], "composit": [10, 22], "particular": [10, 11, 16, 18, 19, 20, 22], "favour": [10, 22], "invers": 10, "control": [10, 14, 16, 20, 22], "depend": [10, 19, 20, 21], "inject": [10, 19], "di": [10, 16], "manual": [10, 19, 21, 22], "librari": [10, 12, 16, 18, 20, 22], "explain": [10, 19, 21], "auto": [10, 22], "wire": [10, 22], "machineri": 10, "heart": 10, "ever": 10, "fixtur": 10, "pytest": 10, "alreadi": [10, 16, 18, 19, 20, 21, 22], "know": [10, 16], "high": [10, 21], "case": [10, 12, 14, 16, 18, 19, 20, 21, 22], "idea": [10, 16, 22], "instanti": [10, 12, 16, 20, 22], "togeth": [10, 15, 16, 19, 20, 21, 22], "declar": [10, 16, 22], "attribut": [10, 16, 19, 22], "drive": [10, 22], "mechan": [10, 15, 22], "behind": [10, 16], "process": [10, 16, 22], "match": [10, 12, 16, 18, 19, 20, 22], "throw": [10, 19], "bunch": [10, 16, 20], "compon": [10, 16, 22], "chosen": 10, "automat": [10, 12, 16, 18, 21, 22], "tensor_qu": [10, 16, 19, 22], "expect": [10, 16, 19, 20, 21, 22], "four": [10, 20, 21], "dequant": [10, 12, 18, 19, 21, 22], "power": [10, 16, 22], "wai": [10, 12, 16, 18, 19, 20, 21, 22, 24], "express": [10, 16, 22], "standard": [10, 11, 12, 15, 16, 18, 19, 20, 21, 22, 24], "lend": 10, "themselv": 10, "multipl": [10, 12, 16, 20, 22], "That": [10, 11, 16, 19, 21, 22], "new": [10, 13, 16, 19, 22], "simpli": [10, 16, 19, 21, 22], "exist": [10, 16, 19, 22], "overrid": [10, 16, 18, 20, 21, 22], "smaller": [10, 22], "specif": [10, 11, 16, 22], "shifted_scaled_int": [10, 19], "hold": 10, "scaled_int": [10, 16, 18, 19, 21, 22], "fixed_point": 10, "fix": [10, 16], "older": [10, 16], "v2": 10, "plai": 10, "well": [10, 12, 13, 21, 22], "extendedinjector": [10, 22], "addit": [10, 18, 20, 21, 22, 24], "dynam": [10, 19], "syntax": [10, 16, 19], "intermedi": [10, 12, 18, 22], "abstract": [10, 13, 16, 22], "try": [10, 11, 16, 19, 20, 21, 22], "navig": 10, "confus": 10, "lack": 10, "clear": [10, 22], "hierarchi": [10, 16], "obviou": [10, 21, 24], "addition": [10, 12, 19], "might": [10, 16, 19, 20, 21, 22], "fit": [10, 11], "pure": [10, 22], "perspect": 10, "machin": 10, "awai": [10, 22], "top": [10, 14, 15, 16, 20, 21, 22], "specifi": [10, 12, 18, 19], "kind": [10, 14, 16, 22], "sai": [10, 12, 16, 19, 21, 22], "v": [10, 16], "gonna": [10, 20, 22], "translat": [10, 22], "target": [10, 11, 13, 19, 22, 24], "relationship": 10, "hyperparamet": [10, 16, 22], "realli": [10, 16, 19], "advantag": [10, 12, 16, 22], "scope": [10, 22], "solv": [10, 21, 22], "against": [10, 19], "enough": [10, 16], "weightquantsolv": [10, 22], "itself": [10, 11, 16, 19, 22, 24], "individu": 10, "task": 10, "look": [10, 12, 16, 18, 19, 20, 21, 22], "seen": [10, 16, 18, 19, 21, 22], "actual": [10, 18, 21, 22], "rather": [10, 16, 18, 22], "than": [10, 11, 12, 15, 16, 18, 19, 20, 22], "directli": [10, 15, 18, 20, 21, 22], "Then": [10, 19], "meant": 10, "blueprint": 10, "understand": [10, 16, 19, 22], "similar": [10, 12, 16, 18, 19, 20, 21], "direct": [10, 20, 22], "scaling_per_output_channel": [10, 16, 19, 22], "ha": [10, 12, 13, 16, 18, 19, 20, 21, 22], "alloc": [10, 16, 20], "much": [10, 16, 19, 21], "known": [10, 22], "capabl": [10, 16, 18], "whose": [10, 14, 19, 22], "again": [10, 16, 19, 21], "thank": [10, 16, 22], "whenev": [10, 14, 16, 20, 22], "possibl": [10, 15, 16, 18, 20, 21, 22], "even": [10, 16, 18, 19, 21, 22], "advanc": [10, 22], "mix": [10, 12, 19, 21], "custom": [10, 13, 15, 18, 20], "final": [10, 16, 18, 19, 20, 21], "possibli": [10, 12, 16], "data": [10, 11, 12, 13, 16, 18, 19, 20, 21, 22], "structur": [10, 12, 16, 20, 21, 22], "quant_tensor": [10, 21], "had": [10, 21], "been": [10, 11, 12, 13, 16, 19, 20, 21, 22], "previous": [10, 21, 22], "now": [10, 12, 16, 19, 20, 21, 22], "mark": [10, 19, 21], "doesn": [10, 12, 16, 18, 19, 21, 22], "carri": [10, 11, 16], "inform": [10, 12, 16, 19, 21, 22], "back": [10, 14, 20], "wa": [10, 11, 16, 19, 20, 21, 22], "arithmet": [10, 11], "implment": 10, "main": [10, 18], "assumpt": [10, 11, 21, 22], "sum": [10, 20, 21], "constrain": 10, "involv": [10, 11, 20, 22], "deal": 10, "residu": 10, "topologi": 10, "connect": 10, "special": [10, 16, 19], "respons": [10, 19], "make": [10, 11, 12, 15, 16, 18, 19, 20, 21, 22], "sure": [10, 15, 18, 19, 20, 21], "wouldn": [10, 16, 21, 22], "re": [10, 14, 16, 18, 20, 21, 22], "necessari": [10, 15, 16, 19, 20, 21], "priori": 10, "later": [10, 16, 19, 22], "model": [10, 11, 13, 14, 15, 16, 20, 22, 24], "definit": [10, 12, 16, 18, 20, 22], "logic": [10, 16, 19, 21, 22], "recomput": [10, 14, 22], "chang": [10, 12, 16, 18, 19, 20, 21, 22], "do": [10, 11, 12, 16, 18, 19, 20, 21, 22], "reconcil": 10, "inher": 10, "rigid": 10, "typic": [10, 11, 14, 16, 19, 21, 22], "execut": [10, 16, 18, 19, 20, 22], "complex": [10, 11, 16, 21], "scenario": [10, 12, 15, 16, 18, 19, 20, 21, 22], "share": [10, 18, 20], "situat": [10, 11, 16, 21, 22], "branch": 10, "precis": [10, 11, 12, 13, 18, 22], "without": [10, 12, 14, 16, 18, 21, 22], "requant": 10, "feed": [10, 19], "accomplish": [10, 19], "weightquantproxi": 10, "among": [10, 16, 18, 20], "what": [10, 11, 12, 19, 21, 22], "affine_stat": 10, "concaten": [10, 20], "start": [10, 13, 18, 20, 22], "track": 10, "underli": [10, 22], "quantlay": [10, 19, 21, 22], "quantconv2d": [10, 12, 16, 18, 19, 22], "conv2d": [10, 16, 19, 21], "plu": [10, 20], "seri": [10, 20], "each": [10, 12, 16, 18, 19, 20, 24], "responsibil": 10, "quantinputmixin": [10, 16, 19], "quantoutputmixin": 10, "_quantwbiol_": 10, "quantweightmixin": 10, "quantbiasmixin": 10, "quantwbiol": [10, 21, 22], "quant_weight": [10, 16, 18, 20, 21, 22], "quant_bia": [10, 18, 21, 22], "quant_input": [10, 19, 20, 21, 22], "quant_output": [10, 19, 22], "suppos": 10, "act": [10, 16, 18, 19], "exactli": [10, 22], "weight_quant": [10, 16, 20, 21, 22], "bias_quant": [10, 12, 18, 20, 21, 22], "input_qu": [10, 16, 18, 19, 21, 22], "output_qu": [10, 16, 18, 19, 21, 22], "behav": [10, 19, 21, 22], "expos": [10, 16, 20, 21, 22], "To": [10, 11, 12, 15, 16, 19, 20, 21, 22], "ux": 10, "weight_": [10, 16, 22], "bias_": [10, 16, 22], "input_": [10, 16, 22], "output_": [10, 16, 22], "quantrelu": [10, 12, 16, 18, 19], "prioriti": [10, 22], "over": [10, 18, 19, 22], "real": 10, "life": 10, "want": [10, 12, 16, 19, 21, 22], "exploratori": 10, "analysi": 10, "accuraci": 10, "correct": [10, 20, 21, 22], "hardwar": [10, 11, 13, 19, 21, 22, 24], "concern": [10, 11, 21], "friction": [10, 22], "remain": [10, 18], "partial": [10, 20], "With": [10, 12, 16, 19, 20, 21, 22], "truncavgpool2d": 10, "receiv": 10, "altough": [10, 21], "nor": [10, 20], "return_quant_tensor": [10, 12, 16, 19, 20, 21, 22], "easili": [10, 16, 19, 21, 22], "todo": 10, "why": [11, 12, 16, 21, 22], "fbgemm": 11, "qnnpack": 11, "platform": [11, 13, 15, 24], "novel": [11, 22], "varieti": [11, 22], "loos": 11, "z": 11, "find": [11, 20], "document": [11, 24], "spars": 11, "until": [11, 16, 19, 20, 21, 22], "improv": 11, "feel": 11, "free": 11, "open": 11, "issu": [11, 16, 20, 21, 22], "ask": 11, "our": [11, 12, 16, 18, 19, 22], "gitter": 11, "slow": [11, 15, 20], "lot": [11, 22], "element": [11, 19], "low": [11, 12, 18, 21, 22, 24], "intens": 11, "contribut": [11, 16], "graph": [11, 16, 18, 20, 22], "backproprag": 11, "As": [11, 16, 18, 19, 20, 21, 22, 24], "up": [11, 16, 19], "slower": [11, 15], "resourc": 11, "greater": 11, "effieci": 11, "principl": [11, 16, 21, 22], "trade": 11, "off": 11, "effici": [11, 15, 19], "mitig": [11, 16, 20], "somewhat": 11, "down": 11, "report": 11, "thought": 11, "my": 11, "faster": 11, "am": 11, "wrong": 11, "path": [11, 13, 18], "acceler": [11, 12, 13, 15, 18, 20, 22, 24], "own": [11, 12, 13, 14, 16, 19, 20, 21, 22], "export": [11, 13, 15, 19], "your": [11, 14, 18, 19, 20, 21, 22], "downstream": [11, 24], "toolchain": [11, 12, 15, 22, 24], "pu": 11, "float16": 11, "bfloat16": 11, "bfloat19": 11, "datatyp": [11, 22, 24], "float32": [11, 20], "test": 11, "tpu": 11, "xla": 11, "math": [11, 22], "reason": [11, 16, 19, 20, 21, 22], "risk": [11, 14], "serv": [12, 16, 18, 19, 20, 22], "goal": [12, 16, 18], "flow": [12, 22, 24], "By": [12, 15, 19, 21, 22], "write": 12, "modifi": [12, 16, 20], "origin": [12, 16, 21, 22], "onc": [12, 18, 21], "post": 12, "qat": [12, 19], "scratch": [12, 16], "finetun": [12, 16], "follow": [12, 16, 18, 19, 20, 21, 22], "combin": [12, 13, 22], "best": [12, 16, 19], "approach": [12, 15, 18, 20, 22], "checkout": 12, "done": [12, 16, 18, 21], "imagenet": 12, "classif": 12, "torchvis": 12, "script": 12, "we": [12, 16, 18, 19, 20, 21, 22], "consid": [12, 16, 18, 19, 20, 21], "classic": 12, "neural": [12, 22], "network": [12, 20, 22], "lenet": 12, "let": [12, 16, 19, 21, 22], "interest": [12, 13, 16, 22], "assess": 12, "cifar10": 12, "purpos": [12, 19, 20, 22], "tutori": [12, 13, 16, 18, 19, 20, 21], "tradit": [12, 16, 22], "quantlinear": [12, 18, 20, 21], "weight_bit_width": [12, 18, 20, 22], "relu": [12, 16, 18, 19, 22], "max": [12, 16, 18, 21, 22], "pool": [12, 21], "usual": [12, 21], "max_pool2d": [12, 21], "result": [12, 16, 19, 20, 21, 24], "f": [12, 18, 19, 20, 21, 22], "qnn": [12, 18], "quantweightlenet": 12, "def": [12, 16, 18, 19, 20, 21, 22], "__init__": [12, 16, 18, 19, 20, 21, 22], "self": [12, 16, 18, 19, 20, 21, 22], "super": [12, 13, 16, 18, 19, 20, 21, 22, 24], "conv1": 12, "relu1": 12, "conv2": 12, "relu2": 12, "fc1": 12, "120": [12, 16, 19], "relu3": 12, "fc2": 12, "84": 12, "relu4": 12, "fc3": 12, "reshap": 12, "quant_weight_lenet": 12, "storag": 12, "convert": [12, 18, 20], "practic": [12, 19, 21, 22], "too": [12, 16, 19, 20], "keep": [12, 16, 19, 20, 22], "replac": [12, 20, 22], "veri": [12, 15, 16, 19, 21, 22], "introduc": [12, 16, 18, 19, 20, 22], "quantident": [12, 19, 20, 21], "begin": 12, "int8bia": [12, 21], "biasquant": 12, "quantweightactlenet": 12, "quant_inp": 12, "quant_weight_act_lenet": 12, "coupl": [12, 16, 19, 20, 21], "thing": [12, 16, 19, 20, 21, 22], "fact": [12, 19], "int4": 12, "15": [12, 16, 18, 19, 20, 21, 22], "int32bia": [12, 18, 20], "quantweightactbiaslenet": 12, "lowprecisionlenet": 12, "quant_weight_act_bias_lenet": 12, "previou": [12, 16, 19, 20, 21, 22], "propag": [12, 21], "quanttensor": [12, 16, 19, 20], "next": [12, 19], "metadata": [12, 19, 20, 21], "about": [12, 13, 16, 19, 21, 22], "qint": 12, "friendli": [12, 22], "affect": [12, 20], "bias_scal": 12, "input_scal": [12, 21, 22], "weight_scal": [12, 22], "read": 12, "invari": [12, 21, 22], "extra": [12, 16, 18, 19, 21, 22], "One": 12, "popular": [12, 16], "qdq": [12, 18], "qcdq": [12, 20], "insert": [12, 16, 18, 22], "clip": [12, 20], "node": [12, 18, 20, 22], "export_onnx_qcdq": [12, 18, 20], "export_path": [12, 18, 20, 22], "4b_weight_lenet": 12, "4b_weight_act_lenet": 12, "4b_weight_act_bias_lenet": 12, "check": [12, 19, 20], "recurr": [12, 20], "overview": 12, "tvmcon": 12, "setup": 13, "get": [13, 15, 16, 19, 20, 21, 22], "architectur": 13, "faq": 13, "techinqu": 13, "practition": 13, "techniqu": [13, 16], "scheme": 13, "framework": 13, "unifi": 13, "layer": [13, 14, 18, 19, 20, 21, 24], "finn": [13, 15, 21, 22, 24], "onnxruntim": [13, 15, 18, 20, 24], "successfulli": [13, 16, 20, 22], "project": 13, "larg": 13, "commerci": 13, "deploy": 13, "cpu": 13, "gpu": [13, 15], "fpga": [13, 15, 22, 24], "focu": [13, 21], "box": 13, "boolean": [14, 16, 21], "global": 14, "enviroment": 14, "config": [14, 16, 20, 22], "jit_en": 14, "written": 14, "warn": [14, 19, 20, 21], "fall": 14, "pytorch_jit": 14, "ignore_missing_kei": [14, 16, 20, 22], "intern": [14, 15, 16, 18, 19, 20, 21, 22], "better": 14, "altern": [14, 18, 19, 20, 22], "load_state_dict": [14, 16, 20, 22], "strict": [14, 16, 22], "would": [14, 15, 16, 18, 19, 20, 21, 22], "mismatch": [14, 19], "brevitas_verbos": 14, "verbos": [14, 16, 22], "brevitas_native_ste_backend": 14, "native_ste_backend_en": 14, "brevitas_reinit_on_state_dict_load": 14, "reinit_on_state_dict_load": 14, "trigger": [14, 16, 18, 19, 20, 21, 22], "9": [15, 16, 18, 19, 20, 21, 22], "recent": [15, 16, 19, 21, 22], "untest": 15, "window": 15, "linux": 15, "maco": 15, "recommend": 15, "latest": [15, 18, 20], "pip": [15, 18, 20, 22], "git": 15, "small": [15, 22], "lt": [15, 16, 19, 20, 21, 22], "benefit": 15, "almost": [15, 22], "never": 15, "util": [15, 18, 19, 20, 21, 22], "cpp_extens": 15, "mantain": 15, "precompil": 15, "put": [15, 22], "burden": 15, "present": [15, 16, 18, 22], "primit": 15, "fake": [15, 18, 22], "dataflow": [15, 22, 24], "integr": [15, 22], "onnx": [15, 19, 20, 21, 24], "broad": 16, "sens": [16, 22], "anyth": [16, 19, 20, 21, 22], "terminologi": 16, "prefer": 16, "said": [16, 19, 20], "int8actpertensorfloat": [16, 18, 19, 20, 21, 22], "issubclass": 16, "delay_wrapp": [16, 19, 22], "delay_impl": [16, 19, 22], "_nodelai": [16, 19, 22], "stats_input_view_shape_impl": [16, 19, 22], "_stat": [16, 19, 22], "stats_impl": [16, 19, 22], "restrict_sc": [16, 19, 22], "_restrictvalu": [16, 19, 22], "restrict_value_impl": [16, 19, 22], "clamp_scal": [16, 19, 22], "_clampvalu": [16, 19, 22], "clamp_min_st": [16, 19, 22], "restrict_inplace_preprocess": [16, 19, 22], "restrict_preprocess": [16, 19, 22], "msb_clamp_bit_width_impl": [16, 19, 22], "explod": 16, "straightforward": 16, "numpi": [16, 18, 20, 22], "come": [16, 18, 19, 22], "program": 16, "problem": [16, 18, 21], "decis": 16, "spot": 16, "line": [16, 19, 21, 22], "yet": [16, 20], "after": [16, 19, 21, 22], "loop": 16, "react": 16, "world": [16, 22], "hard": 16, "impact": [16, 20], "recompil": 16, "lossi": 16, "arbitrarli": 16, "injector": 16, "excel": 16, "interwin": 16, "turn": [16, 22], "assembli": [16, 22], "goe": [16, 18, 22], "context": [16, 21, 22], "gather": [16, 19], "demand": [16, 22], "simpl": 16, "mention": [16, 18, 22], "heavi": 16, "binar": 16, "inspect": [16, 20, 21, 22], "ipython": [16, 18, 20, 21, 22], "displai": [16, 18, 20, 21, 22], "markdown": [16, 20, 21, 22], "helper": [16, 18, 19, 20, 21, 22], "assert_with_messag": [16, 18, 19, 20, 21, 22], "condit": [16, 18, 19, 20, 21, 22], "assert": [16, 18, 19, 20, 21, 22], "print": [16, 18, 19, 20, 21, 22], "pretty_print_sourc": [16, 20, 21, 22], "n": [16, 20, 21, 22], "getsourc": [16, 20, 21, 22], "func": [16, 22], "script_method": 16, "apart": [16, 19], "pick": [16, 19, 21, 22], "equal": [16, 18, 19, 21, 22], "random": [16, 19, 20, 21], "seed": [16, 18, 19, 21], "notebook": [16, 18, 21, 22], "manual_se": [16, 18, 19, 20, 21, 22], "manual_tensor_qu": 16, "gt": [16, 19, 20, 21, 22], "noth": 16, "surpris": 16, "limit": [16, 18], "mybinaryquant": 16, "inj_tensor_qu": 16, "retriev": [16, 22], "mychildbinaryquant": 16, "child_inj_tensor_qu": 16, "Or": [16, 21], "mybinaryimpl": 16, "myscalingimpl": 16, "mycomposedbinaryquant": 16, "comp_inj_tensor_qu": 16, "live": 16, "mostli": 16, "land": 16, "afford": 16, "conveni": [16, 22], "proxy_class": [16, 19, 22], "weightquantproxyfrominjector": [16, 22], "mybinaryweightquant": 16, "11": [16, 18, 19, 20, 21, 22], "binary_weight_quant_conv": 16, "typeerror": 16, "those": [16, 19, 21, 22], "explicitli": [16, 19, 21], "12": [16, 18, 19, 20, 21, 22], "mysignedbinaryweightquant": 16, "signed_quant_weight": 16, "intquanttensor": [16, 19, 20, 21, 22], "signed_t": [16, 19, 20, 21, 22], "training_t": [16, 19, 20, 21, 22], "13": [16, 18, 19, 20, 21, 22], "is_valid": [16, 19, 21], "And": [16, 22], "add": [16, 19, 20, 21], "There": [16, 18, 21], "simpler": 16, "did": [16, 18, 22], "could": [16, 19, 21, 22], "14": [16, 18, 19, 20, 21, 22], "small_scale_quant_conv": 16, "weight_sign": 16, "arbitrari": [16, 21, 22], "actquantproxyfrominjector": [16, 19, 22], "mysignedbinaryactquant": 16, "binary_relu": 16, "act_quant": [16, 19, 22], "proj": [16, 18, 19, 20, 21, 22], "xlab": [16, 18, 19, 20, 21, 22], "nfraser": [16, 18, 19, 20, 21, 22], "opt": [16, 18, 19, 20, 21, 22], "miniforge3": [16, 18, 19, 20, 21, 22], "20231115_brv_pt1": [16, 18, 19, 20, 21, 22], "lib": [16, 18, 19, 20, 21, 22], "python3": [16, 18, 19, 20, 21, 22], "site": [16, 18, 19, 20, 21, 22], "_tensor": [16, 19, 20, 21, 22], "py": [16, 18, 19, 20, 21, 22], "1255": [16, 19, 20, 21, 22], "userwarn": [16, 18, 19, 20, 21, 22], "associ": [16, 18, 19, 20, 21, 22], "subject": [16, 19, 20, 21, 22], "stabl": [16, 19, 20, 21, 22], "conda": [16, 18, 19, 20, 21, 22], "bld": [16, 18, 19, 20, 21, 22], "pytorch_1670525541990": [16, 18, 19, 20, 21, 22], "c10": [16, 19, 20, 21, 22], "tensorimpl": [16, 19, 20, 21, 22], "h": [16, 19, 20, 21, 22], "1758": [16, 19, 20, 21, 22], "renam": [16, 19, 20, 21, 22], "isn": 16, "keywork": 16, "small_scale_binary_ident": 16, "001": 16, "0010": [16, 21], "far": [16, 19, 20, 22], "show": [16, 18, 22], "sort": [16, 19, 21, 22], "shine": 16, "17": [16, 19, 20, 21, 22], "paramfrommaxweightquant": 16, "decor": 16, "spirit": 16, "properti": [16, 22], "18": [16, 18, 19, 20, 22], "param_from_max_quant_conv": 16, "1820": 16, "inde": [16, 21], "verifi": [16, 21], "19": [16, 19, 20, 22], "simuat": 16, "separ": [16, 21, 22], "float_conv": [16, 21], "1924": [16, 20], "maxbackward1": 16, "21": [16, 20, 21, 22], "traceback": [16, 19, 21, 22], "cell": [16, 19, 20, 21, 22], "1671": [16, 22], "1666": [16, 22], "error_msg": [16, 22], "1667": [16, 22], "39": [16, 18, 19, 20, 21, 22], "1668": [16, 19, 22], "join": [16, 22], "34": [16, 19, 21, 22], "k": [16, 22], "missing_kei": [16, 22], "1670": [16, 22], "len": [16, 22], "1672": [16, 22], "__class__": [16, 22], "__name__": [16, 19, 22], "1673": [16, 22], "_incompatiblekei": [16, 22], "unexpected_kei": [16, 22], "ouch": 16, "forgiv": 16, "22": [16, 20, 21, 22], "23": [16, 20, 21, 22], "strategi": [16, 22], "quant_conv1": 16, "quant_conv2": 16, "26": [16, 20, 22], "access": 16, "parent": [16, 19], "27": [16, 20, 21, 22], "sharedparamfrommeanweightquant": 16, "isinst": [16, 19, 21, 22], "cat": [16, 20, 22], "els": [16, 19, 20, 21, 22], "old_quant_conv1_scal": 16, "new_quant_conv1_scal": 16, "28": [16, 20, 22], "eager": [16, 22], "don": [16, 19, 20, 21, 22], "semant": [16, 20], "correctli": [16, 19, 21, 22], "easier": 16, "someth": [16, 19, 22], "mind": [16, 19], "linear": [16, 18, 19, 22], "identityqu": 16, "count": 16, "though": [16, 18, 19, 22, 24], "belong": 16, "29": [16, 20, 22], "quant_conv_w_init": 16, "init": [16, 19, 21], "uniform_": 16, "anymor": 16, "30": [16, 19, 20, 22], "init_tensor_qu": [16, 19, 22], "plan": 16, "distinguish": 16, "illustr": [16, 20, 24], "abl": [16, 22], "leav": 16, "commonquant": 16, "is_clamp": 16, "per_channel_broadcastable_shap": [16, 19], "advancedweightquant": 16, "num_ch": 16, "advancedactquant": 16, "unpack": [16, 22], "chain": 16, "per_channel_quant_conv": 16, "weight_is_clamp": 16, "weight_scaling_per_output_channel": [16, 18, 20, 22], "1612": 16, "vector": 16, "observ": [16, 18, 19, 20, 21], "33": [16, 19, 20, 22], "1899": 16, "similarli": [16, 18, 20, 22], "quant_ident": [16, 19, 20, 21, 22], "35": [16, 19, 22], "dependencyerror": [16, 19], "quant_activ": [16, 19], "113": 16, "108": 16, "109": 16, "110": 16, "actquanttyp": [16, 19, 21, 22], "112": 16, "quantnlal": [16, 19], "114": [16, 22], "115": 16, "116": 16, "act_impl": [16, 19], "117": 16, "passthrough_act": [16, 19], "118": [16, 19], "119": [16, 19], "quant_lay": [16, 19, 21, 22], "quantnonlinearactlay": [16, 19], "quantlayermixin": [16, 19], "quantnonlinearactmixin": [16, 19], "mixin": [16, 19, 20], "66": [16, 19, 22], "act_proxy_prefix": [16, 19], "act_kwargs_prefix": [16, 19], "55": [16, 19, 20], "56": [16, 19], "57": [16, 19], "61": [16, 19], "62": [16, 19], "63": [16, 19], "prefixed_kwarg": [16, 19], "64": [16, 19], "65": [16, 19, 22], "quantproxymixin": [16, 19], "67": [16, 19], "68": [16, 19], "69": [16, 18, 19, 22], "proxy_prefix": [16, 19], "70": [16, 19], "kwargs_prefix": [16, 19], "71": [16, 19], "proxy_protocol": [16, 19], "actquantproxyprotocol": [16, 19], "72": [16, 19], "none_quant_injector": [16, 19], "noneactqu": [16, 19], "73": [16, 19], "74": [16, 19], "48": [16, 19], "46": [16, 19], "quant_injector": [16, 19], "47": [16, 19], "filter_kwarg": [16, 19], "49": [16, 19], "50": [16, 19], "runtime_qu": [16, 19], "198": [16, 19], "197": [16, 19], "199": [16, 19], "cache_class": [16, 19], "_cachedio": [16, 19], "93": [16, 19], "actquantproxyfrominjectorbas": [16, 19], "92": [16, 19], "quantproxyfrominjector": [16, 19], "94": [16, 19], "95": [16, 19], "is_passthrough_act": [16, 19], "_is_passthrough_act": [16, 19], "quant_proxi": [16, 19], "78": [16, 19], "modulelist": [16, 19, 20], "79": [16, 19], "tracked_module_list": [16, 19], "add_tracked_modul": [16, 19], "81": [16, 19], "disable_qu": [16, 19], "82": [16, 19], "append": [16, 19], "update_tracked_modul": [16, 19], "121": [16, 19], "122": [16, 19, 22], "126": [16, 19], "129": [16, 19], "hidden": [16, 20], "frame": 16, "_depend": 16, "51": [16, 19], "_thisspec": 16, "__call__": 16, "__self__": 16, "getattr": 16, "symbol": [16, 22], "52": [16, 19, 20], "53": [16, 19, 20, 21, 22], "messag": [16, 19], "54": [16, 19], "tri": [16, 21, 22], "_extendedinjectortyp": [16, 19], "__getattr__": [16, 19], "cl": [16, 19], "attrnam": [16, 19], "resolv": [16, 19], "current_attr": [16, 19], "131": [16, 19], "marker": [16, 19], "have_default": [16, 19], "spec": [16, 19], "133": [16, 19], "issubset": [16, 19], "cach": [16, 19, 21], "36": [16, 22], "complet": [16, 22], "instal": [18, 20, 22], "pypi": [18, 22], "onnxoptim": [18, 22], "netron": [18, 20, 22], "visual": [18, 20, 22], "satisfi": [18, 22], "mai": [18, 20], "restart": 18, "kernel": [18, 22], "qlinearconv": [18, 22], "qlinearmatmul": [18, 22], "cover": [18, 19, 22], "qonnx": [18, 20, 22], "q": 18, "dq": 18, "three": 18, "fp": [18, 22], "u": 18, "int8": [18, 21, 22], "ntenger": 18, "min": 18, "equival": [18, 19, 22], "sever": 18, "implic": [18, 20], "int32": 18, "deduantizelinear": 18, "desir": [18, 19], "opset": [18, 20, 21, 22], "consider": 18, "quantconv1d": [18, 21, 22], "quantconvtranspose1d": [18, 21, 22], "quantconvtranspose2d": [18, 21, 22], "offer": 18, "ifram": [18, 20, 22], "show_netron": [18, 20, 22], "model_path": [18, 20, 22], "port": [18, 20, 22], "sleep": [18, 20, 22], "address": [18, 20, 22], "localhost": [18, 20, 22], "brows": [18, 20, 22], "src": [18, 20, 22], "100": [18, 19, 20, 22], "height": [18, 19, 20, 22], "400": [18, 20, 22], "in_ch": 18, "out_ch": 18, "batch_siz": [18, 20], "quant_linear_qcdq": 18, "exported_model": [18, 20, 22], "opset_vers": [18, 20], "8082": [18, 20, 22], "moreov": 18, "interv": [18, 21], "perfectli": [18, 19], "symmetr": 18, "absorb": 18, "fusion": 18, "entir": [18, 22], "quantmodel": 18, "quant_model_qcdq": 18, "8083": [18, 20, 22], "furthermor": [18, 19], "uint8": 18, "expand": 18, "imposs": 18, "less": 18, "etc": 18, "eval": [18, 19, 22], "quant_model_3b_4b_qcdq": 18, "8084": [18, 22], "captur": [18, 20, 22], "examin": 18, "ort": [18, 20], "sess_opt": 18, "sessionopt": 18, "sess": [18, 20], "inferencesess": [18, 20], "input_nam": [18, 20], "get_input": [18, 20], "pred_onx": 18, "out_brevita": 18, "out_ort": 18, "allclos": [18, 20], "2024": [18, 20], "09": [18, 20], "03": [18, 21], "405472924": 18, "cc": [18, 20], "1283": [18, 20], "appear": [18, 20], "treat": [18, 19, 20], "prevent": [18, 20], "fold": [18, 20], "tool": [18, 20], "remove_initializer_from_input": [18, 20], "quant_linear": [18, 22], "__torch_function__": [18, 19, 20, 21, 22], "plain": [18, 19, 20, 21, 22], "classmethod": [18, 19, 20, 21, 22], "python_arg_pars": [18, 19, 20, 21, 22], "350": [18, 19, 20, 21, 22], "output_tensor": [18, 22], "seem": 18, "must": 18, "behavior": [18, 19], "quantconvnd": 18, "qgemm_ort": 18, "unfortun": 18, "log": [18, 22], "unoptim": 18, "dynamicquantizelinear": 18, "asymmetr": 18, "therefor": [18, 19], "nearest": [18, 22], "shown": [18, 22], "brevitas_exampl": 18, "common": [18, 19, 21], "shifteduint8dynamicactpertensorfloat": 18, "img_siz": 18, "dynamic_quant_model_qcdq": 18, "8086": [18, 20], "deeper": 19, "were": [19, 22], "obtain": [19, 21], "reproduc": [19, 21], "output_quant_conv": [19, 21], "in_channel": [19, 21], "out_channel": [19, 21], "kernel_s": [19, 21, 22], "default_quant_conv": [19, 21], "output_identity_qu": 19, "out_tensor1": 19, "out_tensor2": 19, "isclos": [19, 21], "nnpack": [19, 21, 22], "unsupport": [19, 21, 22], "conv": [19, 21, 22], "459": [19, 21], "stride": [19, 21, 22], "input_output_quant_conv": 19, "input_identity_qu": 19, "becom": [19, 21], "clearer": 19, "meantim": 19, "disabled_quant_ident": 19, "return_quant_ident": 19, "out_tensor": [19, 21], "4566": 19, "5707": 19, "5517": 19, "5897": 19, "5409": 19, "5136": 19, "1902": 19, "0761": 19, "4946": 19, "5029": 19, "4376": 19, "3317": 19, "6361": [19, 21], "0736": 19, "7122": 19, "3780": 19, "1224": 19, "3234": 19, "0844": 19, "0951": 19, "7610": 19, "5980": 19, "0190": 19, "7419": 19, "6278": 19, "6468": 19, "2473": 19, "5327": [19, 20], "1605": [19, 20, 21], "7990": 19, "2936": 19, "3127": 19, "2283": 19, "4351": [19, 20], "3615": 19, "2175": [19, 21], "9214": 19, "divbackward0": [19, 20, 21, 22], "whather": 19, "strip": 19, "implicitli": [19, 21], "out_torch_tensor": 19, "aliasbackward0": 19, "return_disabled_quant_ident": 19, "identity_out_tensor": 19, "uint8actpertensorfloat": [19, 20, 22], "return_quant_relu": 19, "5974": [19, 20], "5402": 19, "5041": 19, "1867": 19, "4481": 19, "3255": 19, "0817": 19, "7083": 19, "3804": 19, "0187": 19, "6254": 19, "6348": [19, 20], "4387": [19, 20], "2334": 19, "7935": 19, "9230": 19, "0093": 19, "preserv": [19, 21], "return_disabled_quant_relu": 19, "relu_out_tensor": 19, "quantsigmoid": 19, "return_disabled_quant_sigmoid": 19, "sigmoid_out_tensor": 19, "assertionerror": 19, "1194": [19, 21, 22], "_call_impl": [19, 21, 22], "1190": [19, 21, 22], "rest": [19, 21, 22], "1191": [19, 21, 22], "1192": [19, 21, 22], "_backward_hook": [19, 21, 22], "_forward_hook": [19, 21, 22], "_forward_pre_hook": [19, 21, 22], "_global_backward_hook": [19, 21, 22], "1193": [19, 21, 22], "_global_forward_hook": [19, 21, 22], "_global_forward_pre_hook": [19, 21, 22], "forward_cal": [19, 21, 22], "1195": [19, 20, 21, 22], "1196": [19, 21, 22], "full_backward_hook": [19, 21, 22], "non_full_backward_hook": [19, 21, 22], "pack_output": 19, "97": 19, "96": 19, "configur": [19, 21, 22], "98": 19, "99": [19, 22], "shifteduint8actpertensorfloat": 19, "neg": 19, "form": [19, 22], "effect": [19, 20, 22], "shifted_quant_ident": 19, "5854": 19, "5485": [19, 21], "5099": 19, "1888": 19, "4532": 19, "3219": 19, "0772": 19, "6996": 19, "3794": 19, "0189": 19, "6232": 19, "6421": 19, "1708": [19, 20], "4343": [19, 22], "2266": [19, 20], "7931": [19, 20], "9262": 19, "relubackward0": 19, "wherebackward0": 19, "3134": 19, "2557": 19, "0392": 19, "4186": 19, "7361": 19, "5340": 19, "8516": 19, "2887": [19, 21], "3175": 19, "8949": 19, "6743": 19, "0722": 19, "0289": 19, "2021": [19, 21], "4907": [19, 21], "default_quant_relu": 19, "3078": 19, "2555": [19, 20], "0397": 19, "4185": 19, "7454": 19, "5427": 19, "8566": 19, "2943": 19, "3269": 19, "8893": 19, "6674": 19, "0785": 19, "0065": [19, 20], "0262": 19, "1962": 19, "4839": 19, "close": 19, "half": 19, "numer": 19, "lost": 19, "wast": 19, "regard": 19, "premad": 19, "word": 19, "caution": 19, "anticip": 19, "theme": 19, "interact": [19, 22], "basic": [19, 21, 22], "calibr": 19, "These": [19, 22], "exponenti": [19, 22], "inp1": 19, "inp2": 19, "ones": 19, "out1_train": 19, "out2_train": 19, "out1_ev": 19, "out2_ev": 19, "quanthardtanh": 19, "hardtanh": 19, "90": 19, "91": 19, "int8actpertensorfloatminmaxinit": 19, "101": 19, "102": 19, "103": 19, "proxi": [19, 21, 22], "scaling_init_impl": 19, "concer": 19, "quant_hard_tanh": 19, "256": 19, "mutat": 19, "1st": 19, "larger": [19, 22], "granular": 19, "inp3": 19, "rand": 19, "b": 19, "per_tensor_quant_relu": 19, "9998": 19, "full": [19, 22], "largest": [19, 21], "1d": 19, "stage": 19, "_zero_hw_sentinel": [19, 22], "fused_activation_quant_proxi": [19, 22], "fusedactivationquantproxi": [19, 22], "activation_impl": [19, 22], "initialis": 19, "implictli": 19, "__": 19, "permut": [19, 22], "2d": 19, "give": [19, 21], "rightmost": 19, "tell": 19, "per_chan_quant_relu": 19, "9999": 19, "ideal": 19, "product": 19, "remind": [19, 20], "legal": [19, 21], "encourag": 19, "drop": 20, "further": 20, "upstream": 20, "input_s": 20, "hidden_s": 20, "num_lay": 20, "nonlinear": 20, "tanh": [20, 21], "batch_first": 20, "bidirect": 20, "int8weightpertensorfloat": [20, 21, 22], "io_quant": 20, "gate_acc_qu": 20, "shared_input_hidden_weight": 20, "layer_impl": 20, "_quantrnnlay": 20, "gate": 20, "fuse": [20, 22], "fed": 20, "stack": 20, "potenti": 20, "nest": [20, 22], "rnn_sublay": 20, "sublayer_numb": 20, "right_to_left_direct": 20, "quant_rnn": 20, "quant_rnn_0_left_to_right": 20, "quant_rnn_0_right_to_left": 20, "quant_rnn_1_left_to_right": 20, "quant_rnn_1_right_to_left": 20, "gate_param": 20, "input_weight": 20, "bitwidth": 20, "deepspeech": 20, "dai": 20, "qualiti": 20, "count_weight": 20, "numel": 20, "named_paramet": 20, "quant_rnn_single_direct": 20, "quant_rnn_bidirect": 20, "quant_rnn_bidirectional_shared_input_hidden": 20, "600": 20, "1200": 20, "4b": [20, 22], "6b": 20, "io": 20, "quant_rnn_4b": 20, "io_bit_width": 20, "quant_rnn_4b_0_left_to_right": 20, "input_hidden_weight": 20, "hidden_hidden_weight": 20, "hidden_weight": 20, "o": 20, "0297": 20, "0311": 20, "0298": 20, "0295": 20, "0316": 20, "0318": 20, "0309": 20, "0317": 20, "0319": 20, "0315": 20, "0310": 20, "0312": 20, "pack": 20, "length": 20, "unbatch": 20, "sequenc": 20, "hidden_st": 20, "num_direct": 20, "216": 20, "seq_dim": 20, "0062": 20, "2872": 20, "4309": 20, "5495": 20, "4558": 20, "2373": 20, "6807": 20, "4621": 20, "6120": 20, "1124": 20, "3872": 20, "3060": 20, "7681": 20, "3684": 20, "0437": 20, "7369": 20, "3247": 20, "7743": 20, "3372": 20, "5450": 20, "2962": 20, "3969": 20, "3555": 20, "5628": 20, "2429": 20, "4976": 20, "1777": 20, "1244": 20, "0296": 20, "2607": 20, "0948": 20, "5036": 20, "3673": 20, "5213": 20, "7524": 20, "0770": 20, "2691": 20, "6624": 20, "5434": 20, "4968": 20, "0983": 20, "1345": 20, "1242": 20, "0517": 20, "3726": 20, "3053": 20, "1604": 20, "3208": 20, "3105": 20, "4243": 20, "2794": 20, "1035": 20, "0724": 20, "1284": 20, "3337": 20, "5263": 20, "0449": 20, "3081": 20, "1733": 20, "5648": 20, "4942": 20, "1412": 20, "6225": 20, "3401": 20, "5070": 20, "0642": 20, "3722": 20, "2888": 20, "1155": 20, "0579": 20, "0058": [20, 21], "4054": 20, "1564": 20, "5560": 20, "3301": 20, "3533": 20, "1622": 20, "3765": 20, "1216": 20, "0695": 20, "0927": 20, "6139": 20, "1390": [20, 21], "7066": 20, "1274": 20, "2896": [20, 21], "1374": 20, "5745": 20, "0624": 20, "3310": 20, "5183": 20, "1186": 20, "2997": 20, "0375": 20, "6369": 20, "5308": 20, "6307": 20, "5683": 20, "7556": 20, "4933": 20, "3934": 20, "4871": 20, "1066": 20, "1718": 20, "4266": 20, "5569": 20, "0178": 20, "1185": 20, "3910": 20, "2133": 20, "2903": 20, "1837": 20, "2547": 20, "3495": 20, "2311": 20, "6161": 20, "0880": 20, "1966": 20, "3001": 20, "0569": 20, "4140": 20, "1552": 20, "4554": 20, "5175": 20, "2898": 20, "0414": 20, "3985": 20, "0621": 20, "0828": 20, "2225": 20, "2118": 20, "2824": 20, "5840": 20, "3209": 20, "3530": 20, "4043": 20, "3786": 20, "0257": 20, "1990": 20, "1348": 20, "8215": 20, "3016": 20, "0290": [20, 21], "1738": 20, "2664": 20, "4923": 20, "2143": 20, "4170": 20, "4112": 20, "5502": 20, "6024": 20, "7356": 20, "0348": 20, "1043": 20, "1911": 20, "4518": 20, "catbackward0": 20, "0059": 20, "unsqueezebackward0": 20, "2111": 20, "1267": 20, "0060": [20, 22], "6153": 20, "7721": 20, "3740": 20, "5188": 20, "6273": 20, "4162": [20, 22], "2051": 20, "2292": 20, "7239": 20, "6032": 20, "2533": [20, 21, 22], "5067": 20, "6635": 20, "1206": 20, "5730": 20, "0483": 20, "3318": 20, "5742": 20, "0194": 20, "3807": 20, "0710": 20, "6000": 20, "1807": 20, "1355": 20, "4129": 20, "3936": 20, "0903": 20, "1549": 20, "1032": 20, "0645": 20, "4775": 20, "1161": 20, "1097": 20, "0453": 20, "4533": 20, "1036": 20, "2979": 20, "3432": 20, "0777": 20, "6346": 20, "0842": 20, "3302": 20, "4727": 20, "4856": 20, "4144": 20, "7382": 20, "5439": 20, "4792": 20, "4403": 20, "3198": 20, "2741": 20, "6395": 20, "0971": 20, "6052": 20, "5196": 20, "1770": 20, "5025": 20, "1256": 20, "2056": 20, "2684": 20, "0285": 20, "7309": 20, "7194": 20, "1542": 20, "3426": 20, "6509": 20, "0343": 20, "4004": 20, "3151": [20, 21], "0263": 20, "5842": 20, "1641": 20, "3939": 20, "6499": 20, "5186": 20, "1247": 20, "2101": 20, "8337": 20, "1444": 20, "6762": 20, "5317": 20, "1707": 20, "0197": 20, "3197": 20, "0241": 20, "2895": 20, "1749": 20, "4283": [20, 21], "3680": 20, "5248": 20, "2654": 20, "6394": 20, "1327": 20, "3800": 20, "6775": 20, "3355": 20, "2774": 20, "8259": 20, "2000": 20, "5678": 20, "2258": 20, "2710": 20, "5355": 20, "1290": 20, "6710": 20, "3613": 20, "6388": 20, "5226": 20, "6475": 20, "1684": 20, "3820": 20, "3885": 20, "1943": 20, "3238": 20, "2525": 20, "1230": 20, "4921": 20, "8224": 20, "2396": 20, "1554": 20, "0514": 20, "4111": 20, "4625": 20, "1713": 20, "3369": 20, "2512": 20, "2969": 20, "2341": 20, "3597": 20, "1998": 20, "7137": 20, "1370": 20, "0742": 20, "5938": 20, "5424": 20, "4168": 20, "3479": [20, 21], "1969": 20, "4136": 20, "5383": 20, "3085": 20, "4070": 20, "6630": 20, "2823": [20, 22], "1510": 20, "1313": 20, "4464": 20, "0066": 20, "3777": 20, "2074": 20, "7184": 20, "9110": 20, "0148": 20, "1926": 20, "7110": 20, "4222": 20, "9480": 20, "2592": 20, "2222": 20, "2370": 20, "5407": 20, "5851": 20, "1703": 20, "4444": 20, "4814": 20, "7355": 20, "3878": 20, "5282": 20, "2073": 20, "3677": 20, "1805": 20, "1204": 20, "4614": 20, "2474": 20, "7021": 20, "0401": 20, "4346": 20, "4480": 20, "3143": 20, "6887": 20, "6753": 20, "5038": 20, "3650": 20, "6936": 20, "0146": 20, "9345": 20, "1679": 20, "3066": 20, "1825": 20, "4089": [20, 22], "0949": 20, "3870": 20, "2482": 20, "5914": 20, "0803": 20, "1314": 20, "4235": 20, "3797": 20, "1168": 20, "1795": 20, "2308": 20, "0898": 20, "1282": 20, "5579": 20, "1731": 20, "1603": 20, "3142": 20, "1090": 20, "5835": 20, "1475": 20, "0256": 20, "8143": 20, "2437": 20, "4804": 20, "1184": 20, "6843": 20, "1448": [20, 22], "1842": 20, "6383": 20, "1908": 20, "1053": 20, "1316": [20, 21], "0461": [20, 21], "2764": 20, "3751": 20, "3619": 20, "5001": 20, "5110": 20, "6443": 20, "8221": 20, "4888": 20, "0444": 20, "5999": 20, "4370": 20, "7628": 20, "9332": 20, "6147": 20, "7332": 20, "3629": 20, "9184": 20, "7702": 20, "8887": 20, "8492": 20, "3410": 20, "1404": 20, "5817": 20, "4413": 20, "5550": 20, "6486": 20, "1070": 20, "6285": 20, "4948": 20, "2006": 20, "0535": 20, "4079": 20, "3811": 20, "6060": 20, "7666": 20, "8688": 20, "6863": 20, "5111": 20, "6425": 20, "3577": 20, "3431": 20, "6571": 20, "5622": 20, "7374": 20, "7520": 20, "2336": 20, "2847": 20, "8250": 20, "3014": 20, "2950": 20, "4040": 20, "4681": 20, "0705": 20, "2052": 20, "3334": 20, "6733": 20, "0834": 20, "4937": [20, 22], "0064": 20, "4104": 20, "3527": 20, "6449": 20, "5856": 20, "8357": 20, "0395": 20, "3422": 20, "8028": 20, "0855": 20, "7238": 20, "6317": 20, "4211": 20, "5988": 20, "2632": 20, "4014": 20, "7501": 20, "5659": 20, "0069": 20, "simul": [20, 22], "untrain": 20, "float_rnn": 20, "recov": 20, "atol": 20, "123456": 20, "prebuilt": 20, "signedbinaryweightpertensorconst": [20, 22], "binary_rnn": 20, "0946": 20, "0050": [20, 21], "1543": 20, "6322": 20, "1643": 20, "1693": 20, "2937": 20, "5227": [20, 21], "2290": 20, "3534": 20, "3883": 20, "4331": 20, "3634": 20, "1941": 20, "2240": 20, "0199": [20, 21], "3485": 20, "1145": 20, "4082": 20, "2987": 20, "0647": 20, "0498": 20, "1493": 20, "0299": 20, "0776": 20, "5670": 20, "4178": 20, "0239": 20, "4476": 20, "2029": 20, "0836": 20, "3521": 20, "7042": 20, "6326": 20, "4058": 20, "4118": 20, "0477": 20, "2387": 20, "0179": 20, "4416": 20, "4237": 20, "3282": 20, "1074": 20, "2626": 20, "3581": 20, "2328": [20, 21], "2268": 20, "2686": 20, "3103": 20, "4536": 20, "3461": 20, "3163": 20, "7639": 20, "5849": 20, "5252": 20, "1790": 20, "2984": 20, "5411": 20, "3147": 20, "6184": 20, "3037": 20, "1877": 20, "3755": 20, "1767": 20, "1491": 20, "1049": 20, "2871": 20, "0552": 20, "0883": 20, "0331": 20, "4749": 20, "7013": 20, "2264": 20, "0773": 20, "4583": 20, "0166": 20, "5743": 20, "1160": 20, "0442": 20, "1325": 20, "1657": 20, "0718": 20, "1215": 20, "6240": 20, "3092": 20, "0627": [20, 21], "1882": 20, "4642": 20, "1443": 20, "4705": 20, "3137": 20, "2447": 20, "0063": 20, "1129": 20, "3011": 20, "2572": 20, "2384": 20, "0376": 20, "1380": 20, "0251": 20, "6399": 20, "5771": 20, "7967": 20, "1631": 20, "4078": 20, "3199": 20, "0753": 20, "6524": [20, 22], "0690": 20, "1819": 20, "3889": 20, "3764": 20, "5458": 20, "1756": 20, "5704": 20, "1209": 20, "5173": 20, "4447": 20, "0048": [20, 21], "3481": 20, "5946": 20, "5221": 20, "1644": 20, "2949": 20, "1789": 20, "1982": 20, "2707": 20, "2900": 20, "5124": 20, "4399": 20, "0725": 20, "6091": 20, "0435": 20, "2030": [20, 21], "2659": 20, "1547": 20, "0580": 20, "4254": 20, "5559": 20, "1740": [20, 21], "4592": 20, "2369": 20, "4496": 20, "3336": 20, "3046": 20, "1354": 20, "3626": 20, "2079": 20, "4641": 20, "sigmoid_qu": 20, "tanh_quant": 20, "cell_state_qu": 20, "coupled_input_forget_g": 20, "cat_output_cell_st": 20, "shared_intra_layer_weight_qu": 20, "shared_intra_layer_gate_acc_qu": 20, "shared_cell_state_qu": 20, "_quantlstmlay": 20, "cifg": 20, "ourselv": [20, 21, 22], "concen": 20, "illeg": 20, "sigmoid": 20, "forget": 20, "forget_g": 20, "input_g": 20, "orthogon": 20, "wors": 20, "calibration_mod": [20, 22], "bias_correction_mod": [20, 22], "progress": 20, "proof": 20, "quantizelinear": 20, "dequantizelinear": 20, "qeight": 20, "quant_lstm_weight_onli": 20, "quant_lstm_weight_only_4b": 20, "8080": 20, "np": 20, "np_input": 20, "astyp": 20, "seq_len": 20, "pred_onnx": 20, "692518968": 20, "lstm_93": 20, "quant_lstm_weight_only_cifg": 20, "quant_lstm_weight_only_cifg_4b": 20, "086326293": 20, "lstm_87": 20, "quant_lstm_weight_only_bidirectional_2_lay": 20, "quant_lstm_weight_only_bidirectional_2_layers_shar": 20, "quant_lstm_weight_only_bidirectional_2_layers_shared_ih": 20, "8085": 20, "zp": 20, "quant_lstm_weight_only_bidirectional_2_layers_shared_q": 20, "side": 20, "quant_lstm_weight_only_bidirectional_2_layers_shared_q_ih": 20, "8087": 20, "quantlstmcel": 20, "export_qonnx": [20, 22], "quant_lstm": 20, "8088": 20, "quant_hidden_st": 20, "quant_cell_st": 20, "quant_weight_ii": 20, "quant_weight_if": 20, "quant_weight_": 20, "quant_weight_io": 20, "quant_weight_hi": 20, "quant_weight_hf": 20, "quant_weight_hc": 20, "quant_weight_ho": 20, "quant_bias_input": 20, "quant_bias_forget": 20, "quant_bias_cel": 20, "quant_bias_output": 20, "output_scal": 20, "output_zero_point": 20, "output_bit_width": 20, "cell_state_scal": 20, "cell_state_zero_point": 20, "cell_state_bit_width": 20, "input_acc_scal": 20, "input_acc_zero_point": 20, "input_acc_bit_width": 20, "forget_acc_scal": 20, "forget_acc_zero_point": 20, "forget_acc_bit_width": 20, "cell_acc_scal": 20, "cell_acc_zero_point": 20, "cell_acc_bit_width": 20, "output_acc_scal": 20, "output_acc_zero_point": 20, "output_acc_bit_width": 20, "input_sigmoid_scal": 20, "input_sigmoid_zero_point": 20, "input_sigmoid_bit_width": 20, "forget_sigmoid_scal": 20, "forget_sigmoid_zero_point": 20, "forget_sigmoid_bit_width": 20, "cell_tanh_scal": 20, "cell_tanh_zero_point": 20, "cell_tanh_bit_width": 20, "output_sigmoid_scal": 20, "output_sigmoid_zero_point": 20, "output_sigmoid_bit_width": 20, "hidden_state_tanh_scal": 20, "hidden_state_tanh_zero_point": 20, "hidden_state_tanh_bit_width": 20, "quantweightbiasinputoutputlay": [21, 22], "notic": [21, 22], "pad": 21, "dilat": 21, "group": [21, 22], "padding_mod": 21, "weightquanttyp": [21, 22], "biasquanttyp": [21, 22], "lambda": 21, "is_same_padded_strid": 21, "Its": 21, "intercept": [21, 22], "dedic": 21, "is_quant_en": [21, 22], "convolut": [21, 22], "2908": 21, "1793": 21, "9610": 21, "6542": 21, "3532": 21, "2730": 21, "0969": 21, "6030": 21, "4900": 21, "1607": 21, "3547": 21, "6696": 21, "0652": 21, "7300": 21, "0769": 21, "2424": 21, "1860": 21, "1182": 21, "7017": 21, "0963": 21, "2375": 21, "9439": 21, "convolutionbackward0": [21, 22], "unquant": [21, 22], "everywher": 21, "disabled_quant_conv": 21, "freedom": 21, "experi": [21, 22], "littl": 21, "formula": 21, "integer_valu": 21, "quant_conv": 21, "short": 21, "0018": 21, "1273": 21, "1937": 21, "1734": 21, "0904": 21, "0055": 21, "1863": 21, "0203": 21, "0720": 21, "2251": 21, "1568": 21, "0978": 21, "0092": 21, "0941": 21, "1421": 21, "1033": 21, "0849": 21, "1956": 21, "0480": 21, "1771": 21, "0387": 21, "0258": 21, "2140": 21, "2196": 21, "1476": 21, "0590": 21, "0923": 21, "1531": 21, "1089": 21, "1642": 21, "2214": 21, "1384": 21, "1052": 21, "1144": 21, "0129": 21, "1199": 21, "0406": 21, "1697": 21, "1218": [21, 22], "1494": 21, "1513": 21, "2343": 21, "0314": 21, "int_weight": 21, "quant_weight_manu": 21, "popul": 21, "field": 21, "account": 21, "rel": 21, "expens": 21, "sparingli": 21, "quant_act": 21, "out_tensor_0": 21, "out_tensor_1": 21, "0211": 21, "0162": 21, "evalu": [21, 22], "1106": 21, "1945": 21, "4972": 21, "0968": 21, "7175": 21, "5901": 21, "0588": 21, "2014": 21, "1486": 21, "6435": 21, "9067": 21, "5212": 21, "2193": 21, "2352": 21, "8395": 21, "8351": 21, "6341": [21, 22], "5551": 21, "1040": 21, "8979": [21, 22], "7092": 21, "8232": 21, "0875": 21, "3954": 21, "4363": 21, "3973": 21, "3249": 21, "6914": 21, "3660": 21, "5057": 21, "8094": 21, "5100": 21, "6874": 21, "9981": 21, "2472": 21, "7813": 21, "0334": 21, "2880": 21, "9333": 21, "0180": 21, "4298": 21, "9978": 21, "5494": 21, "4548": 21, "6738": 21, "3177": 21, "3721": 21, "1650": 21, "1871": 21, "addbackward0": 21, "018651068210601807": 21, "consequ": 21, "longer": 21, "handl": 21, "5191": 21, "6402": 21, "1455": 21, "5883": 21, "0417": 21, "2631": 21, "3980": 21, "7959": 21, "8132": 21, "3496": 21, "maxpool2dwithindicesbackward0": 21, "0173": 21, "decai": [21, 22], "tmp": [21, 22], "ipykernel_81376": 21, "1377665000": 21, "4770": 21, "2212": 21, "0691": 21, "5650": 21, "0346": 21, "6618": 21, "4635": 21, "3482": 21, "9730": 21, "7245": 21, "5881": 21, "5287": 21, "0863": [21, 22], "8857": 21, "4498": 21, "9669": 21, "6211": 21, "2376": 21, "6103": 21, "2700": 21, "6808": 21, "8519": 21, "5531": 21, "8264": 21, "3782": 21, "1881": 21, "9764": 21, "5993": 21, "5033": 21, "8031": 21, "1375": 21, "8740": 21, "6714": 21, "8611": 21, "tanhbackward0": [21, 22], "input_quant_conv": 21, "3568": 21, "1883": 21, "3589": 21, "4470": 21, "1039": 21, "3945": 21, "4190": 21, "3723": [21, 22], "8384": 21, "0510": 21, "5514": 21, "2751": 21, "5668": 21, "5824": 21, "2518": 21, "0418": 21, "2734": [21, 22], "7268": 21, "0249": 21, "1732": 21, "5197": 21, "1158": 21, "3771": 21, "3810": 21, "2008": 21, "1958e": 21, "05": [21, 22], "mimpli": 21, "anoth": [21, 22], "reflect": [21, 22], "worst": 21, "int_valu": 21, "randint": 21, "quant_tensor_input": 21, "2000e": 21, "7000e": 21, "4000e": 21, "9000e": 21, "1000e": 21, "1300e": 21, "0000e": 21, "0100e": 21, "8000e": 21, "1900e": 21, "3000e": 21, "00": 21, "0900e": 21, "1400e": 21, "5000e": 21, "1800e": 21, "return_quant_conv": 21, "0019": 21, "0049": 21, "0012": 21, "0074": 21, "0023": [21, 22], "0035": 21, "0033": 21, "0031": 21, "0028": 21, "0116": 21, "0079": 21, "0046": [21, 22], "0022": 21, "0021": [21, 22], "0004": 21, "0011": 21, "0045": 21, "0002": 21, "0044": [21, 22], "0027": 21, "0025": 21, "0009": 21, "0040": 21, "8307e": 21, "07": 21, "0073": 21, "0078": [21, 22], "0005": 21, "0008": 21, "0016": 21, "0015": [21, 22], "0047": 21, "0014": 21, "0039": 21, "0036": 21, "0003": 21, "0026": 21, "7393e": 21, "2117": 21, "4811": 21, "0385": 21, "2502": 21, "2213": 21, "5773": 21, "0192": 21, "1347": 21, "8179": 21, "2316": 21, "6062": 21, "4426": 21, "3849": 21, "1251": 21, "0873": 21, "2406": 21, "4330": 21, "6447": 21, "0096": 21, "got": 21, "saw": [21, 22], "somehow": [21, 22], "predefin": [21, 22], "bias_quant_conv": 21, "194": 21, "193": 21, "forward_impl": [21, 22], "152": [21, 22], "148": [21, 22], "compute_output_quant_tensor": [21, 22], "149": [21, 22], "150": [21, 22], "151": [21, 22], "154": [21, 22], "155": [21, 22], "4238e": 21, "6598e": 21, "1882e": 21, "5582e": 21, "9274e": 21, "9640e": 21, "6283e": 21, "7466e": 21, "8311e": 21, "9322e": 21, "1358e": 21, "2727e": 21, "8723e": 21, "7981e": 21, "0973e": 21, "1031e": 21, "5909e": 21, "2369e": 21, "1967e": 21, "0733e": 21, "6456e": 21, "8197e": 21, "1683e": 21, "8200e": 21, "2585e": 21, "1055e": 21, "9703e": 21, "7953e": 21, "input_bias_quant_conv": 21, "2816": [21, 22], "5271": 21, "1748": 21, "4247": 21, "1575": 21, "0681": 21, "6528": 21, "5346": 21, "0657": 21, "2993": 21, "3383": 21, "3035": 21, "4595": 21, "6796": 21, "9720": 21, "1948": 21, "5169": 21, "5586": 21, "0665": 21, "5807": 21, "5565": 21, "1780": 21, "0555": 21, "1080": 21, "0791": 21, "2262": 21, "2009e": 21, "0030": [21, 22], "0013": [21, 22], "0043": 21, "0061": 21, "0020": 21, "0053": [21, 22], "0037": 21, "7370e": 21, "output_bias_quant_conv": 21, "parameter_qu": 21, "No": 21, "cache_inference_quant_bia": 21, "156": 21, "157": 21, "330": 21, "biasquantproxyfrominjector": 21, "328": 21, "329": 21, "331": 21, "elif": 21, "requires_input_scal": 21, "332": 21, "int8biaspertensorfloatinternalsc": 21, "bias_internal_scale_quant_conv": 21, "4360": 21, "2674": 21, "4194": 21, "2412": 21, "6360": 21, "6838": 21, "1445": 21, "3524": 21, "8025": 21, "2844": 21, "9945": 21, "4782": 21, "8064": 21, "5732": 21, "1249": 21, "3110": 21, "3223": 21, "2530": 21, "2753": 21, "5764": 21, "0181": 21, "4147": 21, "2049": 21, "9944": 21, "lead": 21, "unquant_bias_input_quant_conv": 21, "6912": 21, "0086": 21, "1628": [21, 22], "4786": 21, "8073": 21, "5224": [21, 22], "4157": 21, "4686": 21, "2560": 21, "3170": 21, "5486": 21, "5216": 21, "1832": 21, "0217": 21, "3637": 21, "1115": 21, "6974": 21, "0452": 21, "6168": 21, "5241": 21, "6593": 21, "6408": 21, "4537": 21, "3744": 21, "7771": 21, "2848": 21, "0094e": 21, "339": 21, "3406": 21, "4597": 21, "1797": 21, "3452": 21, "3713": 21, "retur": 21, "bias_input_quant_conv": 21, "2327": 21, "9267": 21, "6294": 21, "0901": 21, "1027": 21, "0727": 21, "5614": 21, "6182": 21, "5394": 21, "4179": 21, "5184": 21, "2016": 21, "3925": 21, "6171": 21, "0814": 21, "6124": 21, "3779": 21, "9408": 21, "1334": 21, "6186": 21, "2167": 21, "5926": 21, "3690": 21, "0284": 21, "github": 22, "tree": 22, "master": 22, "in_featur": 22, "out_featur": 22, "3793": 22, "5820": 22, "5204": 22, "2723": 22, "1896": 22, "0140": 22, "5607": 22, "3803": 22, "2704": 22, "1879": 22, "0137": 22, "5591": 22, "004582525696605444": 22, "absolut": 22, "corrispond": 22, "83": 22, "59": 22, "41": 22, "float_input": 22, "float_output": 22, "5410": 22, "2934": 22, "1788": 22, "5684": 22, "0845": 22, "3986": 22, "9036": 22, "4586": 22, "3096": 22, "6472": 22, "2058": 22, "6525": 22, "8677": 22, "3873": 22, "2801": 22, "9009": 22, "9507": 22, "mmbackward0": 22, "made": 22, "int8weightpertensorfixedpoint": 22, "3828": 22, "5781": 22, "5234": 22, "1875": 22, "0156": 22, "5625": 22, "0078125": 22, "10000000149011612": 22, "handi": 22, "overal": 22, "quant_linear1": 22, "quant_linear1_scale_before_shar": 22, "quant_linear2": 22, "4f": 22, "9109": 22, "4609": 22, "3135": 22, "6523": 22, "2089": 22, "3752": 22, "8697": 22, "3893": 22, "9011": 22, "9521": 22, "0542e": 22, "5490": 22, "2894": 22, "5617": 22, "0894": 22, "3958": 22, "017021792009472847": 22, "quant_relu": 22, "5681": 22, "006043121684342623": 22, "signed_quant_output": 22, "unsigned_quant_output": 22, "5588": 22, "006074443459510803": 22, "999": 22, "percentil": 22, "300": 22, "proper": 22, "int16bia": 22, "6541": 22, "1263": 22, "1680": 22, "1231": 22, "4658": 22, "2395": 22, "5207": 22, "3989": 22, "6461": 22, "8687": 22, "0466": 22, "4813": 22, "addmmbackward0": 22, "constraint": 22, "operand": 22, "allign": 22, "easi": 22, "float_inp1": 22, "float_inp2": 22, "train_quant_inp1": 22, "train_quant_inp2": 22, "train_mode_add": 22, "ema": 22, "eval_quant_inp1": 22, "eval_quant_inp2": 22, "eval_mode_add": 22, "5335": 22, "2875": 22, "0447": 22, "5751": 22, "4057": 22, "015974320471286774": 22, "3994": 22, "8307": 22, "7188": 22, "5910": 22, "1757": 22, "9329": 22, "5431": 22, "7636": 22, "6773": 22, "2300": 22, "input_dequant_valu": 22, "input_integer_valu": 22, "output_integer_valu": 22, "max_pool1d": 22, "float_inp": 22, "1580": 22, "8504": 22, "6876": 22, "3076": 22, "1170": 22, "4704": 22, "4475": 22, "2714": 22, "8685": 22, "1086": 22, "9228": 22, "2666": 22, "0084": 22, "0543": 22, "6152": 22, "8323": 22, "3160": 22, "018094077706336975": 22, "squeezebackward1": 22, "8204": 22, "2480": 22, "6913": 22, "5964": 22, "2983": 22, "9714": 22, "4386": 22, "1614": 22, "8952": 22, "2649": 22, "7006": 22, "1438": 22, "1081": 22, "7272": 22, "8529": 22, "9646": 22, "0542": 22, "5478": 22, "3937": 22, "6817": 22, "9807": 22, "ipykernel_1328": 22, "661358273": 22, "simiarli": 22, "train_mode_cat": 22, "eval_mode_cat": 22, "concat": 22, "3932472163": 22, "easiest": 22, "3880": 22, "5044": 22, "2716": 22, "1940": 22, "5432": 22, "03879871591925621": 22, "1816": 22, "0253": 22, "0388": 22, "0182": 22, "0374": 22, "5447": 22, "5446973443031311": 22, "uint8actpertensorfloatmaxinit": 22, "5294": 22, "5647": 22, "0235": 22, "static": 22, "depthwis": 22, "per_channel_depthwise_quant_conv": 22, "input_scaling_per_output_channel": 22, "input_scaling_stats_permute_dim": 22, "input_per_channel_broadcastable_shap": 22, "4033": 22, "8380": 22, "7193": 22, "8616": 22, "7012": 22, "4503": 22, "1285": 22, "1901": 22, "406": 22, "309": 22, "conv1d": 22, "compact": 22, "usabl": 22, "scalingperoutputtyp": 22, "perchannel3bactqu": 22, "driven": 22, "solver": 22, "actquantsolv": 22, "float_to_int_impl_typ": 22, "floattointimpltyp": 22, "scaling_stats_op": 22, "statsop": 22, "restrict_scaling_typ": 22, "restrictvaluetyp": 22, "overriden": 22, "tweak": 22, "domain": 22, "8b": 22, "int8weightperchannelfloat": 22, "learnedintweightperchannelfloat": 22, "log_fp": 22, "1887": 22, "0132": 22, "backpropag": 22, "loss": 22, "regular": 22, "push": 22, "learnedintactpertensorfloat": 22, "4588": 22, "3119": 22, "6530": 22, "6493": 22, "3731": 22, "8706": 22, "9543": 22, "9068e": 22, "6866e": 22, "4251e": 22, "didn": 22, "float_linear": 22, "bit_width_offset": 22, "supress": 22, "accomod": 22, "basi": 22, "0017": 22, "question": 22, "underneath": 22, "immedi": 22, "deped": 22, "procedur": 22, "restrict_v": 22, "modular": 22, "degre": 22, "overhead": 22, "scatter": 22, "int8actpertensorfloatfromscratch": 22, "dequant_valu": 22, "convini": 22, "engin": 22, "\u00e8xtendedinjector": 22, "repeat": 22, "recurs": 22, "manner": 22, "indipend": 22, "fine": 22, "grain": 22, "int8actpertensorfloatparameterfromscratch": 22, "quant_identity_bit_width": 22, "quant_linear_bit_width": 22, "perfect": 22, "indipedent": 22, "37": 22, "protobuf": 22, "38": 22, "dialect": 22, "quant_conv_4b8b": 22, "output_path": 22, "brevitas_onnx_conv4b8b": 22, "input_t": 22, "40": 22, "quant_conv_4b_weight": 22, "brevitas_onnx_conv_4b_weight": 22, "42": 22, "tvm": [22, 24], "incorpor": 22, "programmat": 22, "pattern": 22, "subsystem": 22, "transform": 22, "beyond": 22, "worth": 22, "embrac": 22, "backport": 22, "tracer": 22, "value_trac": 22, "trace": 22, "caveat": 22, "temporar": 22, "quant_model": 22, "43": 22, "calibrate_model": 22, "calibration_load": 22, "no_grad": 22, "imag": 22, "_": 22, "enumer": 22, "iter": 22, "pyxir": 24, "compliant": 24, "necessar": 24}, "objects": {"brevitas": [[1, 0, 0, "-", "core"]], "brevitas.core": [[2, 0, 0, "-", "bit_width"], [3, 0, 0, "-", "function_wrapper"], [4, 0, 0, "-", "quant"], [1, 0, 0, "-", "restrict_val"], [5, 0, 0, "-", "scaling"], [6, 0, 0, "-", "stats"], [1, 0, 0, "-", "utils"], [1, 0, 0, "-", "zero_point"]], "brevitas.core.bit_width": [[2, 0, 0, "-", "const"], [2, 0, 0, "-", "parameter"]], "brevitas.core.bit_width.const": [[2, 1, 1, "", "BitWidthConst"], [2, 1, 1, "", "BitWidthStatefulConst"], [2, 1, 1, "", "MsbClampBitWidth"]], "brevitas.core.bit_width.const.BitWidthConst": [[2, 2, 1, "", "forward"]], "brevitas.core.bit_width.const.BitWidthStatefulConst": [[2, 2, 1, "", "forward"]], "brevitas.core.bit_width.const.MsbClampBitWidth": [[2, 2, 1, "", "forward"]], "brevitas.core.bit_width.parameter": [[2, 1, 1, "", "BitWidthParameter"], [2, 1, 1, "", "RemoveBitwidthParameter"]], "brevitas.core.bit_width.parameter.BitWidthParameter": [[2, 2, 1, "", "forward"]], "brevitas.core.bit_width.parameter.RemoveBitwidthParameter": [[2, 2, 1, "", "forward"]], "brevitas.core.function_wrapper": [[3, 0, 0, "-", "clamp"], [3, 0, 0, "-", "misc"], [3, 0, 0, "-", "ops_ste"], [3, 0, 0, "-", "shape"]], "brevitas.core.function_wrapper.clamp": [[3, 1, 1, "", "ClampMin"], [3, 1, 1, "", "FloatClamp"], [3, 1, 1, "", "ScalarClamp"], [3, 1, 1, "", "TensorClamp"]], "brevitas.core.function_wrapper.clamp.ClampMin": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.clamp.FloatClamp": [[3, 2, 1, "", "forward"], [3, 2, 1, "", "inf_nan_clamp"], [3, 2, 1, "", "saturating_clamp"]], "brevitas.core.function_wrapper.clamp.ScalarClamp": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.clamp.TensorClamp": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.misc": [[3, 1, 1, "", "Identity"], [3, 1, 1, "", "InplaceLogTwo"], [3, 1, 1, "", "LogTwo"], [3, 1, 1, "", "PowerOfTwo"]], "brevitas.core.function_wrapper.misc.Identity": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.misc.InplaceLogTwo": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.misc.LogTwo": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.misc.PowerOfTwo": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.ops_ste": [[3, 1, 1, "", "CeilSte"], [3, 1, 1, "", "DPURoundSte"], [3, 1, 1, "", "FloorSte"], [3, 1, 1, "", "InplaceTensorClampSte"], [3, 1, 1, "", "RoundSte"], [3, 1, 1, "", "RoundToZeroSte"], [3, 1, 1, "", "ScalarClampMinSte"], [3, 1, 1, "", "TensorClampSte"]], "brevitas.core.function_wrapper.ops_ste.CeilSte": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.ops_ste.DPURoundSte": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.ops_ste.FloorSte": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.ops_ste.InplaceTensorClampSte": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.ops_ste.RoundSte": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.ops_ste.RoundToZeroSte": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.ops_ste.ScalarClampMinSte": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.ops_ste.TensorClampSte": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.shape": [[3, 1, 1, "", "DynamicOverSubChannelBlockView"], [3, 1, 1, "", "OverBatchOverOutputChannelView"], [3, 1, 1, "", "OverBatchOverTensorView"], [3, 1, 1, "", "OverOutputChannelView"], [3, 1, 1, "", "OverOutputFeaturesView"], [3, 1, 1, "", "OverSubChannelBlockView"], [3, 1, 1, "", "OverTensorView"], [3, 1, 1, "", "PermuteDims"], [3, 1, 1, "", "StatsInputViewShapeImpl"]], "brevitas.core.function_wrapper.shape.DynamicOverSubChannelBlockView": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.shape.OverBatchOverOutputChannelView": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.shape.OverBatchOverTensorView": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.shape.OverOutputChannelView": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.shape.OverOutputFeaturesView": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.shape.OverSubChannelBlockView": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.shape.OverTensorView": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.shape.PermuteDims": [[3, 2, 1, "", "forward"]], "brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl": [[3, 3, 1, "", "DYNAMIC_OVER_SUBCHANNEL_BLOCK"], [3, 3, 1, "", "OVER_BATCH_OVER_OUTPUT_CHANNELS"], [3, 3, 1, "", "OVER_BATCH_OVER_TENSOR"], [3, 3, 1, "", "OVER_OUTPUT_CHANNELS"], [3, 3, 1, "", "OVER_OUTPUT_FEATURES"], [3, 3, 1, "", "OVER_SUBCHANNEL_BLOCK"], [3, 3, 1, "", "OVER_TENSOR"]], "brevitas.core.quant": [[4, 0, 0, "-", "binary"], [4, 0, 0, "-", "delay"], [4, 0, 0, "-", "int"], [4, 0, 0, "-", "int_base"], [4, 0, 0, "-", "ternary"]], "brevitas.core.quant.binary": [[4, 1, 1, "", "BinaryQuant"], [4, 1, 1, "", "ClampedBinaryQuant"]], "brevitas.core.quant.binary.BinaryQuant": [[4, 2, 1, "", "forward"]], "brevitas.core.quant.binary.ClampedBinaryQuant": [[4, 2, 1, "", "forward"]], "brevitas.core.quant.delay": [[4, 1, 1, "", "DelayWrapper"]], "brevitas.core.quant.delay.DelayWrapper": [[4, 2, 1, "", "forward"]], "brevitas.core.quant.int": [[4, 1, 1, "", "DecoupledRescalingIntQuant"], [4, 1, 1, "", "DecoupledRescalingIntQuantWithInput"], [4, 1, 1, "", "PrescaledRestrictIntQuant"], [4, 1, 1, "", "PrescaledRestrictIntQuantWithInputBitWidth"], [4, 1, 1, "", "RescalingIntQuant"], [4, 1, 1, "", "TruncIntQuant"]], "brevitas.core.quant.int.DecoupledRescalingIntQuant": [[4, 2, 1, "", "forward"]], "brevitas.core.quant.int.DecoupledRescalingIntQuantWithInput": [[4, 2, 1, "", "forward"]], "brevitas.core.quant.int.PrescaledRestrictIntQuant": [[4, 2, 1, "", "forward"]], "brevitas.core.quant.int.PrescaledRestrictIntQuantWithInputBitWidth": [[4, 2, 1, "", "forward"]], "brevitas.core.quant.int.RescalingIntQuant": [[4, 2, 1, "", "forward"]], "brevitas.core.quant.int.TruncIntQuant": [[4, 2, 1, "", "forward"]], "brevitas.core.quant.int_base": [[4, 1, 1, "", "DecoupledIntQuant"], [4, 1, 1, "", "IntQuant"]], "brevitas.core.quant.int_base.DecoupledIntQuant": [[4, 2, 1, "", "forward"], [4, 2, 1, "", "max_int"], [4, 2, 1, "", "min_int"], [4, 2, 1, "", "to_int"]], "brevitas.core.quant.int_base.IntQuant": [[4, 2, 1, "", "forward"], [4, 2, 1, "", "max_int"], [4, 2, 1, "", "min_int"], [4, 2, 1, "", "to_int"]], "brevitas.core.quant.ternary": [[4, 1, 1, "", "TernaryQuant"]], "brevitas.core.quant.ternary.TernaryQuant": [[4, 2, 1, "", "forward"]], "brevitas.core.restrict_val": [[1, 1, 1, "", "FloatRestrictValue"], [1, 1, 1, "", "IntRestrictValue"], [1, 1, 1, "", "LogFloatRestrictValue"], [1, 1, 1, "", "PowerOfTwoRestrictValue"]], "brevitas.core.restrict_val.FloatRestrictValue": [[1, 2, 1, "", "combine_scale_threshold"], [1, 2, 1, "", "forward"], [1, 2, 1, "", "restrict_init_float"], [1, 2, 1, "", "restrict_init_inplace_module"], [1, 2, 1, "", "restrict_init_module"], [1, 2, 1, "", "restrict_init_tensor"]], "brevitas.core.restrict_val.IntRestrictValue": [[1, 2, 1, "", "combine_scale_threshold"], [1, 2, 1, "", "forward"], [1, 2, 1, "", "restrict_init_float"], [1, 2, 1, "", "restrict_init_inplace_module"], [1, 2, 1, "", "restrict_init_module"], [1, 2, 1, "", "restrict_init_tensor"]], "brevitas.core.restrict_val.LogFloatRestrictValue": [[1, 2, 1, "", "combine_scale_threshold"], [1, 2, 1, "", "forward"], [1, 2, 1, "", "restrict_init_float"], [1, 2, 1, "", "restrict_init_inplace_module"], [1, 2, 1, "", "restrict_init_module"], [1, 2, 1, "", "restrict_init_tensor"]], "brevitas.core.restrict_val.PowerOfTwoRestrictValue": [[1, 2, 1, "", "combine_scale_threshold"], [1, 2, 1, "", "forward"], [1, 2, 1, "", "restrict_init_float"], [1, 2, 1, "", "restrict_init_inplace_module"], [1, 2, 1, "", "restrict_init_module"], [1, 2, 1, "", "restrict_init_tensor"]], "brevitas.core.scaling": [[5, 0, 0, "-", "int_scaling"], [5, 0, 0, "-", "runtime"], [5, 0, 0, "-", "standalone"]], "brevitas.core.scaling.int_scaling": [[5, 1, 1, "", "IntScaling"], [5, 1, 1, "", "PowerOfTwoIntScaling"]], "brevitas.core.scaling.int_scaling.IntScaling": [[5, 2, 1, "", "forward"]], "brevitas.core.scaling.int_scaling.PowerOfTwoIntScaling": [[5, 2, 1, "", "forward"]], "brevitas.core.scaling.runtime": [[5, 1, 1, "", "RuntimeDynamicGroupStatsScaling"], [5, 1, 1, "", "RuntimeStatsScaling"], [5, 1, 1, "", "StatsFromParameterScaling"]], "brevitas.core.scaling.runtime.RuntimeDynamicGroupStatsScaling": [[5, 2, 1, "", "forward"]], "brevitas.core.scaling.runtime.RuntimeStatsScaling": [[5, 2, 1, "", "forward"]], "brevitas.core.scaling.runtime.StatsFromParameterScaling": [[5, 2, 1, "", "forward"]], "brevitas.core.scaling.standalone": [[5, 1, 1, "", "ConstScaling"], [5, 1, 1, "", "ParameterFromRuntimeStatsScaling"], [5, 1, 1, "", "ParameterFromStatsFromParameterScaling"], [5, 1, 1, "", "ParameterScaling"]], "brevitas.core.scaling.standalone.ConstScaling": [[5, 2, 1, "", "forward"]], "brevitas.core.scaling.standalone.ParameterFromRuntimeStatsScaling": [[5, 2, 1, "", "forward"], [5, 2, 1, "", "state_dict"], [5, 2, 1, "", "training_forward"]], "brevitas.core.scaling.standalone.ParameterFromStatsFromParameterScaling": [[5, 2, 1, "", "forward"], [5, 2, 1, "", "state_dict"]], "brevitas.core.scaling.standalone.ParameterScaling": [[5, 2, 1, "", "forward"]], "brevitas.core.stats": [[6, 0, 0, "-", "stats_op"], [6, 0, 0, "-", "stats_wrapper"], [6, 0, 0, "-", "view_wrapper"]], "brevitas.core.stats.stats_op": [[6, 1, 1, "", "AbsAve"], [6, 1, 1, "", "AbsMax"], [6, 1, 1, "", "AbsMaxAve"], [6, 1, 1, "", "AbsMaxL2"], [6, 1, 1, "", "AbsMinMax"], [6, 1, 1, "", "AbsPercentile"], [6, 1, 1, "", "HalfQuadraticOptimizerScale"], [6, 1, 1, "", "HalfQuadraticOptimizerZeroPoint"], [6, 1, 1, "", "KLMinimizerThreshold"], [6, 1, 1, "", "L1Norm"], [6, 1, 1, "", "L2Norm"], [6, 1, 1, "", "MSE"], [6, 1, 1, "", "MeanLearnedSigmaStd"], [6, 1, 1, "", "MeanSigmaStd"], [6, 1, 1, "", "NegativeMinOrZero"], [6, 1, 1, "", "NegativePercentileOrZero"], [6, 1, 1, "", "PercentileInterval"], [6, 4, 1, "", "masked_median"], [6, 4, 1, "", "shrink_lp_op"]], "brevitas.core.stats.stats_op.AbsAve": [[6, 2, 1, "", "forward"]], "brevitas.core.stats.stats_op.AbsMax": [[6, 2, 1, "", "forward"]], "brevitas.core.stats.stats_op.AbsMaxAve": [[6, 2, 1, "", "forward"]], "brevitas.core.stats.stats_op.AbsMaxL2": [[6, 2, 1, "", "forward"]], "brevitas.core.stats.stats_op.AbsMinMax": [[6, 2, 1, "", "forward"]], "brevitas.core.stats.stats_op.AbsPercentile": [[6, 2, 1, "", "forward"]], "brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale": [[6, 2, 1, "", "forward"], [6, 2, 1, "", "optimize"], [6, 2, 1, "", "parameter_search"]], "brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint": [[6, 2, 1, "", "forward"], [6, 2, 1, "", "optimize"], [6, 2, 1, "", "parameter_search"]], "brevitas.core.stats.stats_op.KLMinimizerThreshold": [[6, 2, 1, "", "forward"], [6, 2, 1, "", "smooth_normalize_distribution"]], "brevitas.core.stats.stats_op.L1Norm": [[6, 2, 1, "", "forward"]], "brevitas.core.stats.stats_op.L2Norm": [[6, 2, 1, "", "forward"]], "brevitas.core.stats.stats_op.MSE": [[6, 2, 1, "", "evaluate_loss"], [6, 2, 1, "", "forward"], [6, 2, 1, "", "mse_fib_search"], [6, 2, 1, "", "mse_grid_search"], [6, 2, 1, "", "mse_loss_fn"], [6, 2, 1, "", "mse_search"]], "brevitas.core.stats.stats_op.MeanLearnedSigmaStd": [[6, 2, 1, "", "forward"]], "brevitas.core.stats.stats_op.MeanSigmaStd": [[6, 2, 1, "", "forward"]], "brevitas.core.stats.stats_op.NegativeMinOrZero": [[6, 2, 1, "", "forward"]], "brevitas.core.stats.stats_op.NegativePercentileOrZero": [[6, 2, 1, "", "forward"]], "brevitas.core.stats.stats_op.PercentileInterval": [[6, 2, 1, "", "forward"]], "brevitas.core.utils": [[1, 1, 1, "", "ParameterWrapper"], [1, 1, 1, "", "SingleArgStatelessBuffer"], [1, 1, 1, "", "SliceTensor"], [1, 1, 1, "", "StatelessBuffer"], [1, 4, 1, "", "inplace_momentum_update"], [1, 4, 1, "", "inplace_tensor_add"], [1, 4, 1, "", "inplace_tensor_mul"]], "brevitas.core.utils.ParameterWrapper": [[1, 2, 1, "", "forward"]], "brevitas.core.utils.SingleArgStatelessBuffer": [[1, 2, 1, "", "forward"]], "brevitas.core.utils.SliceTensor": [[1, 2, 1, "", "eager_forward"], [1, 2, 1, "", "forward"]], "brevitas.core.utils.StatelessBuffer": [[1, 2, 1, "", "forward"], [1, 2, 1, "", "state_dict"]], "brevitas.core.zero_point": [[1, 1, 1, "", "ParameterFromRuntimeZeroPoint"], [1, 1, 1, "", "ParameterFromStatsFromParameterZeroPoint"], [1, 1, 1, "", "ParameterZeroPoint"], [1, 1, 1, "", "PreZeroCenterZeroPoint"], [1, 1, 1, "", "StatsFromParameterZeroPoint"], [1, 1, 1, "", "ZeroZeroPoint"]], "brevitas.core.zero_point.ParameterFromRuntimeZeroPoint": [[1, 2, 1, "", "forward"], [1, 2, 1, "", "state_dict"], [1, 2, 1, "", "training_forward"]], "brevitas.core.zero_point.ParameterFromStatsFromParameterZeroPoint": [[1, 2, 1, "", "forward"], [1, 2, 1, "", "state_dict"]], "brevitas.core.zero_point.ParameterZeroPoint": [[1, 2, 1, "", "forward"]], "brevitas.core.zero_point.PreZeroCenterZeroPoint": [[1, 2, 1, "", "forward"], [1, 2, 1, "", "get_zero_center"]], "brevitas.core.zero_point.StatsFromParameterZeroPoint": [[1, 2, 1, "", "forward"]], "brevitas.core.zero_point.ZeroZeroPoint": [[1, 2, 1, "", "forward"]], "brevitas.function": [[7, 0, 0, "-", "ops"], [7, 0, 0, "-", "ops_ste"], [7, 0, 0, "-", "shape"]], "brevitas.function.ops": [[7, 4, 1, "", "binary_sign"], [7, 4, 1, "", "dpu_round"], [7, 4, 1, "", "get_upper_bound_on_l1_norm"], [7, 4, 1, "", "identity"], [7, 4, 1, "", "max_float"], [7, 4, 1, "", "max_int"], [7, 4, 1, "", "min_int"], [7, 4, 1, "", "round_to_zero"], [7, 4, 1, "", "tensor_clamp"], [7, 4, 1, "", "tensor_clamp_"]], "brevitas.function.ops_ste": [[7, 4, 1, "", "abs_binary_sign_grad"], [7, 4, 1, "", "binary_sign_ste"], [7, 4, 1, "", "ceil_ste"], [7, 4, 1, "", "dpu_round_ste"], [7, 4, 1, "", "floor_ste"], [7, 4, 1, "", "round_ste"], [7, 4, 1, "", "round_to_zero_ste"], [7, 4, 1, "", "scalar_clamp_min_ste"], [7, 4, 1, "", "scalar_clamp_ste"], [7, 4, 1, "", "tensor_clamp_ste"], [7, 4, 1, "", "tensor_clamp_ste_"], [7, 4, 1, "", "ternary_sign_ste"]], "brevitas.function.shape": [[7, 4, 1, "", "over_batch_over_output_channels"], [7, 4, 1, "", "over_batch_over_tensor"], [7, 4, 1, "", "over_output_channels"], [7, 4, 1, "", "over_output_features"], [7, 4, 1, "", "over_tensor"]], "brevitas.ops": [[8, 0, 0, "-", "autograd_ste_ops"]], "brevitas.ops.autograd_ste_ops": [[8, 1, 1, "", "AbsBinarySignGradFn"], [8, 1, 1, "", "BinarySignSteFn"], [8, 1, 1, "", "CeilSteFn"], [8, 1, 1, "", "DPURoundSteFn"], [8, 1, 1, "", "FloorSteFn"], [8, 1, 1, "", "InplaceTensorClampSteFn"], [8, 1, 1, "", "RoundSteFn"], [8, 1, 1, "", "RoundToZeroSteFn"], [8, 1, 1, "", "ScalarClampMinSteFn"], [8, 1, 1, "", "ScalarClampSteFn"], [8, 1, 1, "", "TensorClampSteFn"], [8, 1, 1, "", "TernarySignSteFn"], [8, 4, 1, "", "abs_binary_sign_grad_impl"], [8, 4, 1, "", "binary_sign_ste_impl"], [8, 4, 1, "", "ceil_ste_impl"], [8, 4, 1, "", "dpu_round_ste_impl"], [8, 4, 1, "", "floor_ste_impl"], [8, 4, 1, "", "round_ste_impl"], [8, 4, 1, "", "round_to_zero_ste_impl"], [8, 4, 1, "", "scalar_clamp_min_ste_impl"], [8, 4, 1, "", "scalar_clamp_ste_impl"], [8, 4, 1, "", "tensor_clamp_ste_impl"], [8, 4, 1, "", "ternary_sign_ste_impl"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:attribute", "4": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "attribute", "Python attribute"], "4": ["py", "function", "Python function"]}, "titleterms": {"about": 0, "author": 0, "cite": 0, "brevita": [1, 2, 3, 4, 5, 6, 7, 8, 12, 13, 22], "core": [1, 2, 3, 4, 5, 6, 10], "packag": [1, 2, 3, 4, 5, 6, 7, 8], "subpackag": [1, 9], "submodul": [1, 2, 3, 4, 5, 6, 7, 8], "restrict_v": 1, "modul": [1, 2, 3, 4, 5, 6, 7, 8], "util": 1, "zero_point": 1, "content": [1, 2, 3, 4, 5, 6], "bit_width": 2, "const": 2, "paramet": 2, "function_wrapp": 3, "clamp": 3, "misc": 3, "ops_st": [3, 7], "shape": [3, 7, 10], "quant": [4, 10], "binari": [4, 16, 22], "delai": 4, "int": 4, "int_bas": 4, "ternari": 4, "scale": [5, 22], "int_scal": 5, "runtim": [5, 18], "standalon": 5, "stat": 6, "stats_op": 6, "stats_wrapp": 6, "view_wrapp": 6, "function": [7, 10, 22], "op": [7, 8], "autograd_ste_op": 8, "api": [9, 16], "refer": 9, "architectur": 10, "scriptmodul": 10, "injector": 10, "quantiz": [10, 12, 16, 18, 19, 20, 21, 22], "enum": [10, 22], "solver": 10, "quanttensor": [10, 21, 22], "proxi": [10, 16], "layer": [10, 12, 16, 22], "mixin": 10, "export": [10, 12, 18, 20, 22, 24], "fx": [10, 22], "graph": 10, "trace": 10, "transform": 10, "loss": 10, "f": 11, "A": [11, 16], "q": 11, "get": 12, "start": 12, "ptq": 12, "over": 12, "hand": 12, "programmat": 12, "defin": [12, 22], "model": [12, 18], "nn": 12, "weight": [12, 16, 20, 22], "onli": [12, 20], "float": [12, 22], "activ": [12, 16, 19, 22], "bias": 12, "onnx": [12, 18, 22], "where": 12, "go": 12, "from": [12, 15, 22], "here": 12, "set": 14, "setup": 15, "requir": [15, 18], "instal": 15, "pypi": 15, "github": 15, "option": 15, "train": [15, 22], "infer": 15, "anatomi": 16, "what": 16, "": 16, "auto": 16, "wire": 16, "depend": [16, 22], "inject": [16, 22], "practic": 16, "exampl": [16, 18], "manual": 16, "an": [16, 19, 21], "extendedinjector": 16, "inherit": [16, 22], "composit": 16, "interfac": 16, "pass": 16, "custom": [16, 22], "quantident": [16, 22], "initi": 16, "statist": 16, "share": [16, 22], "instanc": 16, "deal": 16, "build": 16, "tutori": [17, 22], "introduct": 18, "quantizelinear": 18, "clip": 18, "dequantizelinear": 18, "qcdq": 18, "basic": 18, "complet": 18, "The": 18, "c": 18, "bitwidth": 18, "8": 18, "qop": 18, "qgemm": 18, "v": 18, "gemm": 18, "dynam": 18, "overview": [19, 21], "rnn": 20, "lstm": 20, "quantrnn": 20, "quantlstm": 20, "just": 20, "time": 20, "compil": 20, "calibr": [20, 22], "full": 20, "quantconv2d": 21, "input": [21, 22], "output": [21, 22], "bia": [21, 22], "tvmcon": 22, "2021": 22, "fundament": 22, "quantlinear": 22, "default": 22, "mix": 22, "point": 22, "fix": 22, "quantrelu": 22, "requant": 22, "tensor": 22, "how": 22, "i": 22, "determin": 22, "oper": 22, "element": 22, "wise": 22, "arithmet": 22, "add": 22, "call": 22, "torch": 22, "max_pool": 22, "tanh": 22, "concaten": 22, "common": 22, "keyword": 22, "argument": 22, "bit": 22, "width": 22, "per": 22, "channel": 22, "max_val": 22, "init": 22, "scratch": 22, "learn": 22, "retrain": 22, "among": 22, "base": 22, "post": 22, "compat": 24, "user": 26, "guid": 26}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "nbsphinx": 4, "sphinx": 57}, "alltitles": {"About": [[0, "about"]], "Author": [[0, "author"]], "Cite as": [[0, "cite-as"]], "brevitas.core package": [[1, "brevitas-core-package"]], "Subpackages": [[1, "subpackages"], [9, "subpackages"]], "Submodules": [[1, "submodules"], [2, "submodules"], [3, "submodules"], [4, "submodules"], [5, "submodules"], [6, "submodules"], [7, "submodules"], [8, "submodules"]], "brevitas.core.restrict_val module": [[1, "module-brevitas.core.restrict_val"]], "brevitas.core.utils module": [[1, "module-brevitas.core.utils"]], "brevitas.core.zero_point module": [[1, "module-brevitas.core.zero_point"]], "Module contents": [[1, "module-brevitas.core"], [2, "module-brevitas.core.bit_width"], [3, "module-brevitas.core.function_wrapper"], [4, "module-brevitas.core.quant"], [5, "module-brevitas.core.scaling"], [6, "module-brevitas.core.stats"]], "brevitas.core.bit_width package": [[2, "brevitas-core-bit-width-package"]], "brevitas.core.bit_width.const module": [[2, "module-brevitas.core.bit_width.const"]], "brevitas.core.bit_width.parameter module": [[2, "module-brevitas.core.bit_width.parameter"]], "brevitas.core.function_wrapper package": [[3, "brevitas-core-function-wrapper-package"]], "brevitas.core.function_wrapper.clamp module": [[3, "module-brevitas.core.function_wrapper.clamp"]], "brevitas.core.function_wrapper.misc module": [[3, "module-brevitas.core.function_wrapper.misc"]], "brevitas.core.function_wrapper.ops_ste module": [[3, "module-brevitas.core.function_wrapper.ops_ste"]], "brevitas.core.function_wrapper.shape module": [[3, "module-brevitas.core.function_wrapper.shape"]], "brevitas.core.quant package": [[4, "brevitas-core-quant-package"]], "brevitas.core.quant.binary module": [[4, "module-brevitas.core.quant.binary"]], "brevitas.core.quant.delay module": [[4, "module-brevitas.core.quant.delay"]], "brevitas.core.quant.int module": [[4, "module-brevitas.core.quant.int"]], "brevitas.core.quant.int_base module": [[4, "module-brevitas.core.quant.int_base"]], "brevitas.core.quant.ternary module": [[4, "module-brevitas.core.quant.ternary"]], "brevitas.core.scaling package": [[5, "brevitas-core-scaling-package"]], "brevitas.core.scaling.int_scaling module": [[5, "module-brevitas.core.scaling.int_scaling"]], "brevitas.core.scaling.runtime module": [[5, "module-brevitas.core.scaling.runtime"]], "brevitas.core.scaling.standalone module": [[5, "module-brevitas.core.scaling.standalone"]], "brevitas.core.stats package": [[6, "brevitas-core-stats-package"]], "brevitas.core.stats.stats_op module": [[6, "module-brevitas.core.stats.stats_op"]], "brevitas.core.stats.stats_wrapper module": [[6, "module-brevitas.core.stats.stats_wrapper"]], "brevitas.core.stats.view_wrapper module": [[6, "module-brevitas.core.stats.view_wrapper"]], "brevitas.function package": [[7, "brevitas-function-package"]], "brevitas.function.ops module": [[7, "module-brevitas.function.ops"]], "brevitas.function.ops_ste module": [[7, "module-brevitas.function.ops_ste"]], "brevitas.function.shape module": [[7, "module-brevitas.function.shape"]], "brevitas.ops package": [[8, "brevitas-ops-package"]], "brevitas.ops.autograd_ste_ops module": [[8, "module-brevitas.ops.autograd_ste_ops"]], "API reference": [[9, "api-reference"]], "Architecture": [[10, "architecture"]], "Functions": [[10, "functions"]], "Core ScriptModules": [[10, "core-scriptmodules"]], "Injectors and Quantizers": [[10, "injectors-and-quantizers"]], "Enums, Shapes and Solvers": [[10, "enums-shapes-and-solvers"]], "QuantTensor": [[10, "quanttensor"], [21, "QuantTensor"]], "Proxies": [[10, "proxies"]], "Quant Layers and Mixins": [[10, "quant-layers-and-mixins"]], "Export": [[10, "export"], [20, "Export"], [22, "Export"]], "FX graph tracing and transformations": [[10, "fx-graph-tracing-and-transformations"]], "Losses": [[10, "losses"]], "F.A.Q.": [[11, "f-a-q"]], "Getting started": [[12, "getting-started"]], "PTQ over hand or programmatically defined quantized models": [[12, "ptq-over-hand-or-programmatically-defined-quantized-models"]], "Defining a quantized model with brevitas.nn layers": [[12, "defining-a-quantized-model-with-brevitas-nn-layers"]], "Weights-only quantization, float activations and biases": [[12, "weights-only-quantization-float-activations-and-biases"]], "Weights and activations quantization, float biases": [[12, "weights-and-activations-quantization-float-biases"]], "Weights, activations, biases quantization": [[12, "weights-activations-biases-quantization"]], "Export to ONNX": [[12, "export-to-onnx"]], "Where to go from here": [[12, "where-to-go-from-here"]], "Brevitas": [[13, "brevitas"]], "Settings": [[14, "settings"]], "Setup": [[15, "setup"]], "Requirements": [[15, "requirements"], [18, "Requirements"]], "Installation Requirements": [[15, "installation-requirements"]], "Installation": [[15, "installation"]], "Installing from PyPI": [[15, "installing-from-pypi"]], "Installing from Github": [[15, "installing-from-github"]], "Optional Training Requirements": [[15, "optional-training-requirements"]], "Optional Inference Requirements": [[15, "optional-inference-requirements"]], "Anatomy of a Quantizer": [[16, "Anatomy-of-a-Quantizer"]], "What\u2019s in a Quantizer?": [[16, "What's-in-a-Quantizer?"]], "Quantization with auto-wiring Dependency Injection": [[16, "Quantization-with-auto-wiring-Dependency-Injection"]], "A Practical Example: Binary Quantization": [[16, "A-Practical-Example:-Binary-Quantization"]], "Manual Binary Quantization": [[16, "Manual-Binary-Quantization"]], "Binary Quantization with an ExtendedInjector": [[16, "Binary-Quantization-with-an-ExtendedInjector"]], "Inheritance and Composition of Quantizers": [[16, "Inheritance-and-Composition-of-Quantizers"]], "Interfacing a Quantizer with a Quantized Layer": [[16, "Interfacing-a-Quantizer-with-a-Quantized-Layer"]], "Passing a custom quantizer to QuantIdentity": [[16, "Passing-a-custom-quantizer-to-QuantIdentity"]], "A Custom Quantizer initialized with Weight Statistics": [[16, "A-Custom-Quantizer-initialized-with-Weight-Statistics"]], "Sharing a Quantizer": [[16, "Sharing-a-Quantizer"]], "Sharing a proxy": [[16, "Sharing-a-proxy"]], "Sharing an instance of Activation Quantization": [[16, "Sharing-an-instance-of-Activation-Quantization"]], "Dealing with Weight Initialization": [[16, "Dealing-with-Weight-Initialization"]], "Building a Custom Quantization API": [[16, "Building-a-Custom-Quantization-API"]], "Tutorials": [[17, "tutorials"]], "Tutorials:": [[17, null]], "ONNX Export": [[18, "ONNX-Export"]], "Introduction": [[18, "Introduction"]], "QuantizeLinear-Clip-DeQuantizeLinear (QCDQ)": [[18, "QuantizeLinear-Clip-DeQuantizeLinear-(QCDQ)"]], "Basic Example": [[18, "Basic-Example"]], "Complete Model": [[18, "Complete-Model"]], "The C in QCDQ (Bitwidth <= 8)": [[18, "The-C-in-QCDQ-(Bitwidth-<=-8)"]], "Clipping in QOps": [[18, "Clipping-in-QOps"]], "ONNX Runtime": [[18, "ONNX-Runtime"]], "QCDQ": [[18, "QCDQ"]], "QGEMM vs GEMM": [[18, "QGEMM-vs-GEMM"]], "Export Dynamically Quantized Models to ONNX": [[18, "Export-Dynamically-Quantized-Models-to-ONNX"]], "An Overview of Quantized Activations": [[19, "An-Overview-of-Quantized-Activations"]], "Quantized RNNs and LSTMs": [[20, "Quantized-RNNs-and-LSTMs"]], "QuantRNN": [[20, "QuantRNN"]], "QuantLSTM": [[20, "QuantLSTM"]], "Just-in-time compilation": [[20, "Just-in-time-compilation"]], "Calibration": [[20, "Calibration"]], "QuantLSTM weight-only quantization export": [[20, "QuantLSTM-weight-only-quantization-export"]], "QuantLSTM full quantization export": [[20, "QuantLSTM-full-quantization-export"]], "An overview of QuantTensor and QuantConv2d": [[21, "An-overview-of-QuantTensor-and-QuantConv2d"]], "Input Quantization": [[21, "Input-Quantization"]], "Output Quantization": [[21, "Output-Quantization"]], "Bias Quantization": [[21, "Bias-Quantization"], [22, "Bias-Quantization"]], "Brevitas TVMCon 2021 tutorial": [[22, "Brevitas-TVMCon-2021-tutorial"]], "Fundamentals": [[22, "Fundamentals"]], "QuantLinear layer": [[22, "QuantLinear-layer"]], "Weight quantization": [[22, "Weight-quantization"]], "Default weight quantization": [[22, "Default-weight-quantization"]], "Mixing quantized weights and floating-point inputs": [[22, "Mixing-quantized-weights-and-floating-point-inputs"]], "Fixed-point weight quantization": [[22, "Fixed-point-weight-quantization"]], "Binary weight quantization": [[22, "Binary-weight-quantization"]], "Sharing a weight quantizer": [[22, "Sharing-a-weight-quantizer"]], "Inputs/Outputs/Activations quantization:": [[22, "Inputs/Outputs/Activations-quantization:"]], "QuantIdentity layer": [[22, "QuantIdentity-layer"]], "QuantReLU layer": [[22, "QuantReLU-layer"]], "Requantizing a tensor": [[22, "Requantizing-a-tensor"]], "How is the activation scale determined by default?": [[22, "How-is-the-activation-scale-determined-by-default?"]], "Operations on QuantTensor": [[22, "Operations-on-QuantTensor"]], "Element-wise Arithmetic": [[22, "Element-wise-Arithmetic"]], "Element-wise adds": [[22, "Element-wise-adds"]], "Calling torch functions": [[22, "Calling-torch-functions"]], "max_pool on QuantTensor": [[22, "max_pool-on-QuantTensor"]], "tanh on QuantTensor": [[22, "tanh-on-QuantTensor"]], "QuantTensor concatenation": [[22, "QuantTensor-concatenation"]], "Customizing Quantizers": [[22, "Customizing-Quantizers"]], "Common keyword arguments": [[22, "Common-keyword-arguments"]], "Weight bit-width": [[22, "Weight-bit-width"]], "Per-channel weight quantization": [[22, "Per-channel-weight-quantization"]], "Activation bit-width": [[22, "Activation-bit-width"]], "Activation quantization with max_val init": [[22, "Activation-quantization-with-max_val-init"]], "Per-channel activation quantization": [[22, "Per-channel-activation-quantization"]], "Inheriting from a quantizer": [[22, "Inheriting-from-a-quantizer"]], "Defining a quantizer from scratch with enums": [[22, "Defining-a-quantizer-from-scratch-with-enums"]], "Weight quantizer": [[22, "Weight-quantizer"]], "Activation quantizer": [[22, "Activation-quantizer"]], "Learned scale and bit-width quantizer": [[22, "Learned-scale-and-bit-width-quantizer"]], "Retraining from floating-point": [[22, "Retraining-from-floating-point"]], "Defining a quantizer from scratch with dependency-injection": [[22, "Defining-a-quantizer-from-scratch-with-dependency-injection"]], "Activation quantization from scratch": [[22, "Activation-quantization-from-scratch"]], "Weight quantization with learned scale from scratch": [[22, "Weight-quantization-with-learned-scale-from-scratch"]], "Sharing learned bit-width among layers": [[22, "Sharing-learned-bit-width-among-layers"]], "Export to custom Quantized ONNX": [[22, "Export-to-custom-Quantized-ONNX"]], "Brevitas and FX": [[22, "Brevitas-and-FX"]], "Calibration-based post-training quantization": [[22, "Calibration-based-post-training-quantization"]], "Export Compatibility": [[24, "export-compatibility"]], "User Guide": [[26, "user-guide"]]}, "indexentries": {"floatrestrictvalue (class in brevitas.core.restrict_val)": [[1, "brevitas.core.restrict_val.FloatRestrictValue"]], "intrestrictvalue (class in brevitas.core.restrict_val)": [[1, "brevitas.core.restrict_val.IntRestrictValue"]], "logfloatrestrictvalue (class in brevitas.core.restrict_val)": [[1, "brevitas.core.restrict_val.LogFloatRestrictValue"]], "parameterfromruntimezeropoint (class in brevitas.core.zero_point)": [[1, "brevitas.core.zero_point.ParameterFromRuntimeZeroPoint"]], "parameterfromstatsfromparameterzeropoint (class in brevitas.core.zero_point)": [[1, "brevitas.core.zero_point.ParameterFromStatsFromParameterZeroPoint"]], "parameterwrapper (class in brevitas.core.utils)": [[1, "brevitas.core.utils.ParameterWrapper"]], "parameterzeropoint (class in brevitas.core.zero_point)": [[1, "brevitas.core.zero_point.ParameterZeroPoint"]], "poweroftworestrictvalue (class in brevitas.core.restrict_val)": [[1, "brevitas.core.restrict_val.PowerOfTwoRestrictValue"]], "prezerocenterzeropoint (class in brevitas.core.zero_point)": [[1, "brevitas.core.zero_point.PreZeroCenterZeroPoint"]], "singleargstatelessbuffer (class in brevitas.core.utils)": [[1, "brevitas.core.utils.SingleArgStatelessBuffer"]], "slicetensor (class in brevitas.core.utils)": [[1, "brevitas.core.utils.SliceTensor"]], "statelessbuffer (class in brevitas.core.utils)": [[1, "brevitas.core.utils.StatelessBuffer"]], "statsfromparameterzeropoint (class in brevitas.core.zero_point)": [[1, "brevitas.core.zero_point.StatsFromParameterZeroPoint"]], "zerozeropoint (class in brevitas.core.zero_point)": [[1, "brevitas.core.zero_point.ZeroZeroPoint"]], "brevitas.core": [[1, "module-brevitas.core"]], "brevitas.core.restrict_val": [[1, "module-brevitas.core.restrict_val"]], "brevitas.core.utils": [[1, "module-brevitas.core.utils"]], "brevitas.core.zero_point": [[1, "module-brevitas.core.zero_point"]], "combine_scale_threshold() (brevitas.core.restrict_val.floatrestrictvalue method)": [[1, "brevitas.core.restrict_val.FloatRestrictValue.combine_scale_threshold"]], "combine_scale_threshold() (brevitas.core.restrict_val.intrestrictvalue method)": [[1, "brevitas.core.restrict_val.IntRestrictValue.combine_scale_threshold"]], "combine_scale_threshold() (brevitas.core.restrict_val.logfloatrestrictvalue method)": [[1, "brevitas.core.restrict_val.LogFloatRestrictValue.combine_scale_threshold"]], "combine_scale_threshold() (brevitas.core.restrict_val.poweroftworestrictvalue method)": [[1, "brevitas.core.restrict_val.PowerOfTwoRestrictValue.combine_scale_threshold"]], "eager_forward() (brevitas.core.utils.slicetensor method)": [[1, "brevitas.core.utils.SliceTensor.eager_forward"]], "forward() (brevitas.core.restrict_val.floatrestrictvalue method)": [[1, "brevitas.core.restrict_val.FloatRestrictValue.forward"]], "forward() (brevitas.core.restrict_val.intrestrictvalue method)": [[1, "brevitas.core.restrict_val.IntRestrictValue.forward"]], "forward() (brevitas.core.restrict_val.logfloatrestrictvalue method)": [[1, "brevitas.core.restrict_val.LogFloatRestrictValue.forward"]], "forward() (brevitas.core.restrict_val.poweroftworestrictvalue method)": [[1, "brevitas.core.restrict_val.PowerOfTwoRestrictValue.forward"]], "forward() (brevitas.core.utils.parameterwrapper method)": [[1, "brevitas.core.utils.ParameterWrapper.forward"]], "forward() (brevitas.core.utils.singleargstatelessbuffer method)": [[1, "brevitas.core.utils.SingleArgStatelessBuffer.forward"]], "forward() (brevitas.core.utils.slicetensor method)": [[1, "brevitas.core.utils.SliceTensor.forward"]], "forward() (brevitas.core.utils.statelessbuffer method)": [[1, "brevitas.core.utils.StatelessBuffer.forward"]], "forward() (brevitas.core.zero_point.parameterfromruntimezeropoint method)": [[1, "brevitas.core.zero_point.ParameterFromRuntimeZeroPoint.forward"]], "forward() (brevitas.core.zero_point.parameterfromstatsfromparameterzeropoint method)": [[1, "brevitas.core.zero_point.ParameterFromStatsFromParameterZeroPoint.forward"]], "forward() (brevitas.core.zero_point.parameterzeropoint method)": [[1, "brevitas.core.zero_point.ParameterZeroPoint.forward"]], "forward() (brevitas.core.zero_point.prezerocenterzeropoint method)": [[1, "brevitas.core.zero_point.PreZeroCenterZeroPoint.forward"]], "forward() (brevitas.core.zero_point.statsfromparameterzeropoint method)": [[1, "brevitas.core.zero_point.StatsFromParameterZeroPoint.forward"]], "forward() (brevitas.core.zero_point.zerozeropoint method)": [[1, "brevitas.core.zero_point.ZeroZeroPoint.forward"]], "get_zero_center() (brevitas.core.zero_point.prezerocenterzeropoint method)": [[1, "brevitas.core.zero_point.PreZeroCenterZeroPoint.get_zero_center"]], "inplace_momentum_update() (in module brevitas.core.utils)": [[1, "brevitas.core.utils.inplace_momentum_update"]], "inplace_tensor_add() (in module brevitas.core.utils)": [[1, "brevitas.core.utils.inplace_tensor_add"]], "inplace_tensor_mul() (in module brevitas.core.utils)": [[1, "brevitas.core.utils.inplace_tensor_mul"]], "module": [[1, "module-brevitas.core"], [1, "module-brevitas.core.restrict_val"], [1, "module-brevitas.core.utils"], [1, "module-brevitas.core.zero_point"], [2, "module-brevitas.core.bit_width"], [2, "module-brevitas.core.bit_width.const"], [2, "module-brevitas.core.bit_width.parameter"], [3, "module-brevitas.core.function_wrapper"], [3, "module-brevitas.core.function_wrapper.clamp"], [3, "module-brevitas.core.function_wrapper.misc"], [3, "module-brevitas.core.function_wrapper.ops_ste"], [3, "module-brevitas.core.function_wrapper.shape"], [4, "module-brevitas.core.quant"], [4, "module-brevitas.core.quant.binary"], [4, "module-brevitas.core.quant.delay"], [4, "module-brevitas.core.quant.int"], [4, "module-brevitas.core.quant.int_base"], [4, "module-brevitas.core.quant.ternary"], [5, "module-brevitas.core.scaling"], [5, "module-brevitas.core.scaling.int_scaling"], [5, "module-brevitas.core.scaling.runtime"], [5, "module-brevitas.core.scaling.standalone"], [6, "module-brevitas.core.stats"], [6, "module-brevitas.core.stats.stats_op"], [6, "module-brevitas.core.stats.stats_wrapper"], [6, "module-brevitas.core.stats.view_wrapper"], [7, "module-brevitas.function.ops"], [7, "module-brevitas.function.ops_ste"], [7, "module-brevitas.function.shape"], [8, "module-brevitas.ops.autograd_ste_ops"]], "restrict_init_float() (brevitas.core.restrict_val.floatrestrictvalue method)": [[1, "brevitas.core.restrict_val.FloatRestrictValue.restrict_init_float"]], "restrict_init_float() (brevitas.core.restrict_val.intrestrictvalue method)": [[1, "brevitas.core.restrict_val.IntRestrictValue.restrict_init_float"]], "restrict_init_float() (brevitas.core.restrict_val.logfloatrestrictvalue method)": [[1, "brevitas.core.restrict_val.LogFloatRestrictValue.restrict_init_float"]], "restrict_init_float() (brevitas.core.restrict_val.poweroftworestrictvalue method)": [[1, "brevitas.core.restrict_val.PowerOfTwoRestrictValue.restrict_init_float"]], "restrict_init_inplace_module() (brevitas.core.restrict_val.floatrestrictvalue method)": [[1, "brevitas.core.restrict_val.FloatRestrictValue.restrict_init_inplace_module"]], "restrict_init_inplace_module() (brevitas.core.restrict_val.intrestrictvalue method)": [[1, "brevitas.core.restrict_val.IntRestrictValue.restrict_init_inplace_module"]], "restrict_init_inplace_module() (brevitas.core.restrict_val.logfloatrestrictvalue method)": [[1, "brevitas.core.restrict_val.LogFloatRestrictValue.restrict_init_inplace_module"]], "restrict_init_inplace_module() (brevitas.core.restrict_val.poweroftworestrictvalue method)": [[1, "brevitas.core.restrict_val.PowerOfTwoRestrictValue.restrict_init_inplace_module"]], "restrict_init_module() (brevitas.core.restrict_val.floatrestrictvalue method)": [[1, "brevitas.core.restrict_val.FloatRestrictValue.restrict_init_module"]], "restrict_init_module() (brevitas.core.restrict_val.intrestrictvalue method)": [[1, "brevitas.core.restrict_val.IntRestrictValue.restrict_init_module"]], "restrict_init_module() (brevitas.core.restrict_val.logfloatrestrictvalue method)": [[1, "brevitas.core.restrict_val.LogFloatRestrictValue.restrict_init_module"]], "restrict_init_module() (brevitas.core.restrict_val.poweroftworestrictvalue method)": [[1, "brevitas.core.restrict_val.PowerOfTwoRestrictValue.restrict_init_module"]], "restrict_init_tensor() (brevitas.core.restrict_val.floatrestrictvalue method)": [[1, "brevitas.core.restrict_val.FloatRestrictValue.restrict_init_tensor"]], "restrict_init_tensor() (brevitas.core.restrict_val.intrestrictvalue method)": [[1, "brevitas.core.restrict_val.IntRestrictValue.restrict_init_tensor"]], "restrict_init_tensor() (brevitas.core.restrict_val.logfloatrestrictvalue method)": [[1, "brevitas.core.restrict_val.LogFloatRestrictValue.restrict_init_tensor"]], "restrict_init_tensor() (brevitas.core.restrict_val.poweroftworestrictvalue method)": [[1, "brevitas.core.restrict_val.PowerOfTwoRestrictValue.restrict_init_tensor"]], "state_dict() (brevitas.core.utils.statelessbuffer method)": [[1, "brevitas.core.utils.StatelessBuffer.state_dict"]], "state_dict() (brevitas.core.zero_point.parameterfromruntimezeropoint method)": [[1, "brevitas.core.zero_point.ParameterFromRuntimeZeroPoint.state_dict"]], "state_dict() (brevitas.core.zero_point.parameterfromstatsfromparameterzeropoint method)": [[1, "brevitas.core.zero_point.ParameterFromStatsFromParameterZeroPoint.state_dict"]], "training_forward() (brevitas.core.zero_point.parameterfromruntimezeropoint method)": [[1, "brevitas.core.zero_point.ParameterFromRuntimeZeroPoint.training_forward"]], "bitwidthconst (class in brevitas.core.bit_width.const)": [[2, "brevitas.core.bit_width.const.BitWidthConst"]], "bitwidthparameter (class in brevitas.core.bit_width.parameter)": [[2, "brevitas.core.bit_width.parameter.BitWidthParameter"]], "bitwidthstatefulconst (class in brevitas.core.bit_width.const)": [[2, "brevitas.core.bit_width.const.BitWidthStatefulConst"]], "msbclampbitwidth (class in brevitas.core.bit_width.const)": [[2, "brevitas.core.bit_width.const.MsbClampBitWidth"]], "removebitwidthparameter (class in brevitas.core.bit_width.parameter)": [[2, "brevitas.core.bit_width.parameter.RemoveBitwidthParameter"]], "brevitas.core.bit_width": [[2, "module-brevitas.core.bit_width"]], "brevitas.core.bit_width.const": [[2, "module-brevitas.core.bit_width.const"]], "brevitas.core.bit_width.parameter": [[2, "module-brevitas.core.bit_width.parameter"]], "forward() (brevitas.core.bit_width.const.bitwidthconst method)": [[2, "brevitas.core.bit_width.const.BitWidthConst.forward"]], "forward() (brevitas.core.bit_width.const.bitwidthstatefulconst method)": [[2, "brevitas.core.bit_width.const.BitWidthStatefulConst.forward"]], "forward() (brevitas.core.bit_width.const.msbclampbitwidth method)": [[2, "brevitas.core.bit_width.const.MsbClampBitWidth.forward"]], "forward() (brevitas.core.bit_width.parameter.bitwidthparameter method)": [[2, "brevitas.core.bit_width.parameter.BitWidthParameter.forward"]], "forward() (brevitas.core.bit_width.parameter.removebitwidthparameter method)": [[2, "brevitas.core.bit_width.parameter.RemoveBitwidthParameter.forward"]], "ceilste (class in brevitas.core.function_wrapper.ops_ste)": [[3, "brevitas.core.function_wrapper.ops_ste.CeilSte"]], "clampmin (class in brevitas.core.function_wrapper.clamp)": [[3, "brevitas.core.function_wrapper.clamp.ClampMin"]], "dpuroundste (class in brevitas.core.function_wrapper.ops_ste)": [[3, "brevitas.core.function_wrapper.ops_ste.DPURoundSte"]], "dynamic_over_subchannel_block (brevitas.core.function_wrapper.shape.statsinputviewshapeimpl attribute)": [[3, "brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.DYNAMIC_OVER_SUBCHANNEL_BLOCK"]], "dynamicoversubchannelblockview (class in brevitas.core.function_wrapper.shape)": [[3, "brevitas.core.function_wrapper.shape.DynamicOverSubChannelBlockView"]], "floatclamp (class in brevitas.core.function_wrapper.clamp)": [[3, "brevitas.core.function_wrapper.clamp.FloatClamp"]], "floorste (class in brevitas.core.function_wrapper.ops_ste)": [[3, "brevitas.core.function_wrapper.ops_ste.FloorSte"]], "identity (class in brevitas.core.function_wrapper.misc)": [[3, "brevitas.core.function_wrapper.misc.Identity"]], "inplacelogtwo (class in brevitas.core.function_wrapper.misc)": [[3, "brevitas.core.function_wrapper.misc.InplaceLogTwo"]], "inplacetensorclampste (class in brevitas.core.function_wrapper.ops_ste)": [[3, "brevitas.core.function_wrapper.ops_ste.InplaceTensorClampSte"]], "logtwo (class in brevitas.core.function_wrapper.misc)": [[3, "brevitas.core.function_wrapper.misc.LogTwo"]], "over_batch_over_output_channels (brevitas.core.function_wrapper.shape.statsinputviewshapeimpl attribute)": [[3, "brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_BATCH_OVER_OUTPUT_CHANNELS"]], "over_batch_over_tensor (brevitas.core.function_wrapper.shape.statsinputviewshapeimpl attribute)": [[3, "brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_BATCH_OVER_TENSOR"]], "over_output_channels (brevitas.core.function_wrapper.shape.statsinputviewshapeimpl attribute)": [[3, "brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_OUTPUT_CHANNELS"]], "over_output_features (brevitas.core.function_wrapper.shape.statsinputviewshapeimpl attribute)": [[3, "brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_OUTPUT_FEATURES"]], "over_subchannel_block (brevitas.core.function_wrapper.shape.statsinputviewshapeimpl attribute)": [[3, "brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_SUBCHANNEL_BLOCK"]], "over_tensor (brevitas.core.function_wrapper.shape.statsinputviewshapeimpl attribute)": [[3, "brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl.OVER_TENSOR"]], "overbatchoveroutputchannelview (class in brevitas.core.function_wrapper.shape)": [[3, "brevitas.core.function_wrapper.shape.OverBatchOverOutputChannelView"]], "overbatchovertensorview (class in brevitas.core.function_wrapper.shape)": [[3, "brevitas.core.function_wrapper.shape.OverBatchOverTensorView"]], "overoutputchannelview (class in brevitas.core.function_wrapper.shape)": [[3, "brevitas.core.function_wrapper.shape.OverOutputChannelView"]], "overoutputfeaturesview (class in brevitas.core.function_wrapper.shape)": [[3, "brevitas.core.function_wrapper.shape.OverOutputFeaturesView"]], "oversubchannelblockview (class in brevitas.core.function_wrapper.shape)": [[3, "brevitas.core.function_wrapper.shape.OverSubChannelBlockView"]], "overtensorview (class in brevitas.core.function_wrapper.shape)": [[3, "brevitas.core.function_wrapper.shape.OverTensorView"]], "permutedims (class in brevitas.core.function_wrapper.shape)": [[3, "brevitas.core.function_wrapper.shape.PermuteDims"]], "poweroftwo (class in brevitas.core.function_wrapper.misc)": [[3, "brevitas.core.function_wrapper.misc.PowerOfTwo"]], "roundste (class in brevitas.core.function_wrapper.ops_ste)": [[3, "brevitas.core.function_wrapper.ops_ste.RoundSte"]], "roundtozeroste (class in brevitas.core.function_wrapper.ops_ste)": [[3, "brevitas.core.function_wrapper.ops_ste.RoundToZeroSte"]], "scalarclamp (class in brevitas.core.function_wrapper.clamp)": [[3, "brevitas.core.function_wrapper.clamp.ScalarClamp"]], "scalarclampminste (class in brevitas.core.function_wrapper.ops_ste)": [[3, "brevitas.core.function_wrapper.ops_ste.ScalarClampMinSte"]], "statsinputviewshapeimpl (class in brevitas.core.function_wrapper.shape)": [[3, "brevitas.core.function_wrapper.shape.StatsInputViewShapeImpl"]], "tensorclamp (class in brevitas.core.function_wrapper.clamp)": [[3, "brevitas.core.function_wrapper.clamp.TensorClamp"]], "tensorclampste (class in brevitas.core.function_wrapper.ops_ste)": [[3, "brevitas.core.function_wrapper.ops_ste.TensorClampSte"]], "brevitas.core.function_wrapper": [[3, "module-brevitas.core.function_wrapper"]], "brevitas.core.function_wrapper.clamp": [[3, "module-brevitas.core.function_wrapper.clamp"]], "brevitas.core.function_wrapper.misc": [[3, "module-brevitas.core.function_wrapper.misc"]], "brevitas.core.function_wrapper.ops_ste": [[3, "module-brevitas.core.function_wrapper.ops_ste"]], "brevitas.core.function_wrapper.shape": [[3, "module-brevitas.core.function_wrapper.shape"]], "forward() (brevitas.core.function_wrapper.clamp.clampmin method)": [[3, "brevitas.core.function_wrapper.clamp.ClampMin.forward"]], "forward() (brevitas.core.function_wrapper.clamp.floatclamp method)": [[3, "brevitas.core.function_wrapper.clamp.FloatClamp.forward"]], "forward() (brevitas.core.function_wrapper.clamp.scalarclamp method)": [[3, "brevitas.core.function_wrapper.clamp.ScalarClamp.forward"]], "forward() (brevitas.core.function_wrapper.clamp.tensorclamp method)": [[3, "brevitas.core.function_wrapper.clamp.TensorClamp.forward"]], "forward() (brevitas.core.function_wrapper.misc.identity method)": [[3, "brevitas.core.function_wrapper.misc.Identity.forward"]], "forward() (brevitas.core.function_wrapper.misc.inplacelogtwo method)": [[3, "brevitas.core.function_wrapper.misc.InplaceLogTwo.forward"]], "forward() (brevitas.core.function_wrapper.misc.logtwo method)": [[3, "brevitas.core.function_wrapper.misc.LogTwo.forward"]], "forward() (brevitas.core.function_wrapper.misc.poweroftwo method)": [[3, "brevitas.core.function_wrapper.misc.PowerOfTwo.forward"]], "forward() (brevitas.core.function_wrapper.ops_ste.ceilste method)": [[3, "brevitas.core.function_wrapper.ops_ste.CeilSte.forward"]], "forward() (brevitas.core.function_wrapper.ops_ste.dpuroundste method)": [[3, "brevitas.core.function_wrapper.ops_ste.DPURoundSte.forward"]], "forward() (brevitas.core.function_wrapper.ops_ste.floorste method)": [[3, "brevitas.core.function_wrapper.ops_ste.FloorSte.forward"]], "forward() (brevitas.core.function_wrapper.ops_ste.inplacetensorclampste method)": [[3, "brevitas.core.function_wrapper.ops_ste.InplaceTensorClampSte.forward"]], "forward() (brevitas.core.function_wrapper.ops_ste.roundste method)": [[3, "brevitas.core.function_wrapper.ops_ste.RoundSte.forward"]], "forward() (brevitas.core.function_wrapper.ops_ste.roundtozeroste method)": [[3, "brevitas.core.function_wrapper.ops_ste.RoundToZeroSte.forward"]], "forward() (brevitas.core.function_wrapper.ops_ste.scalarclampminste method)": [[3, "brevitas.core.function_wrapper.ops_ste.ScalarClampMinSte.forward"]], "forward() (brevitas.core.function_wrapper.ops_ste.tensorclampste method)": [[3, "brevitas.core.function_wrapper.ops_ste.TensorClampSte.forward"]], "forward() (brevitas.core.function_wrapper.shape.dynamicoversubchannelblockview method)": [[3, "brevitas.core.function_wrapper.shape.DynamicOverSubChannelBlockView.forward"]], "forward() (brevitas.core.function_wrapper.shape.overbatchoveroutputchannelview method)": [[3, "brevitas.core.function_wrapper.shape.OverBatchOverOutputChannelView.forward"]], "forward() (brevitas.core.function_wrapper.shape.overbatchovertensorview method)": [[3, "brevitas.core.function_wrapper.shape.OverBatchOverTensorView.forward"]], "forward() (brevitas.core.function_wrapper.shape.overoutputchannelview method)": [[3, "brevitas.core.function_wrapper.shape.OverOutputChannelView.forward"]], "forward() (brevitas.core.function_wrapper.shape.overoutputfeaturesview method)": [[3, "brevitas.core.function_wrapper.shape.OverOutputFeaturesView.forward"]], "forward() (brevitas.core.function_wrapper.shape.oversubchannelblockview method)": [[3, "brevitas.core.function_wrapper.shape.OverSubChannelBlockView.forward"]], "forward() (brevitas.core.function_wrapper.shape.overtensorview method)": [[3, "brevitas.core.function_wrapper.shape.OverTensorView.forward"]], "forward() (brevitas.core.function_wrapper.shape.permutedims method)": [[3, "brevitas.core.function_wrapper.shape.PermuteDims.forward"]], "inf_nan_clamp() (brevitas.core.function_wrapper.clamp.floatclamp method)": [[3, "brevitas.core.function_wrapper.clamp.FloatClamp.inf_nan_clamp"]], "saturating_clamp() (brevitas.core.function_wrapper.clamp.floatclamp method)": [[3, "brevitas.core.function_wrapper.clamp.FloatClamp.saturating_clamp"]], "binaryquant (class in brevitas.core.quant.binary)": [[4, "brevitas.core.quant.binary.BinaryQuant"]], "clampedbinaryquant (class in brevitas.core.quant.binary)": [[4, "brevitas.core.quant.binary.ClampedBinaryQuant"]], "decoupledintquant (class in brevitas.core.quant.int_base)": [[4, "brevitas.core.quant.int_base.DecoupledIntQuant"]], "decoupledrescalingintquant (class in brevitas.core.quant.int)": [[4, "brevitas.core.quant.int.DecoupledRescalingIntQuant"]], "decoupledrescalingintquantwithinput (class in brevitas.core.quant.int)": [[4, "brevitas.core.quant.int.DecoupledRescalingIntQuantWithInput"]], "delaywrapper (class in brevitas.core.quant.delay)": [[4, "brevitas.core.quant.delay.DelayWrapper"]], "intquant (class in brevitas.core.quant.int_base)": [[4, "brevitas.core.quant.int_base.IntQuant"]], "prescaledrestrictintquant (class in brevitas.core.quant.int)": [[4, "brevitas.core.quant.int.PrescaledRestrictIntQuant"]], "prescaledrestrictintquantwithinputbitwidth (class in brevitas.core.quant.int)": [[4, "brevitas.core.quant.int.PrescaledRestrictIntQuantWithInputBitWidth"]], "rescalingintquant (class in brevitas.core.quant.int)": [[4, "brevitas.core.quant.int.RescalingIntQuant"]], "ternaryquant (class in brevitas.core.quant.ternary)": [[4, "brevitas.core.quant.ternary.TernaryQuant"]], "truncintquant (class in brevitas.core.quant.int)": [[4, "brevitas.core.quant.int.TruncIntQuant"]], "brevitas.core.quant": [[4, "module-brevitas.core.quant"]], "brevitas.core.quant.binary": [[4, "module-brevitas.core.quant.binary"]], "brevitas.core.quant.delay": [[4, "module-brevitas.core.quant.delay"]], "brevitas.core.quant.int": [[4, "module-brevitas.core.quant.int"]], "brevitas.core.quant.int_base": [[4, "module-brevitas.core.quant.int_base"]], "brevitas.core.quant.ternary": [[4, "module-brevitas.core.quant.ternary"]], "forward() (brevitas.core.quant.binary.binaryquant method)": [[4, "brevitas.core.quant.binary.BinaryQuant.forward"]], "forward() (brevitas.core.quant.binary.clampedbinaryquant method)": [[4, "brevitas.core.quant.binary.ClampedBinaryQuant.forward"]], "forward() (brevitas.core.quant.delay.delaywrapper method)": [[4, "brevitas.core.quant.delay.DelayWrapper.forward"]], "forward() (brevitas.core.quant.int.decoupledrescalingintquant method)": [[4, "brevitas.core.quant.int.DecoupledRescalingIntQuant.forward"]], "forward() (brevitas.core.quant.int.decoupledrescalingintquantwithinput method)": [[4, "brevitas.core.quant.int.DecoupledRescalingIntQuantWithInput.forward"]], "forward() (brevitas.core.quant.int.prescaledrestrictintquant method)": [[4, "brevitas.core.quant.int.PrescaledRestrictIntQuant.forward"]], "forward() (brevitas.core.quant.int.prescaledrestrictintquantwithinputbitwidth method)": [[4, "brevitas.core.quant.int.PrescaledRestrictIntQuantWithInputBitWidth.forward"]], "forward() (brevitas.core.quant.int.rescalingintquant method)": [[4, "brevitas.core.quant.int.RescalingIntQuant.forward"]], "forward() (brevitas.core.quant.int.truncintquant method)": [[4, "brevitas.core.quant.int.TruncIntQuant.forward"]], "forward() (brevitas.core.quant.int_base.decoupledintquant method)": [[4, "brevitas.core.quant.int_base.DecoupledIntQuant.forward"]], "forward() (brevitas.core.quant.int_base.intquant method)": [[4, "brevitas.core.quant.int_base.IntQuant.forward"]], "forward() (brevitas.core.quant.ternary.ternaryquant method)": [[4, "brevitas.core.quant.ternary.TernaryQuant.forward"]], "max_int() (brevitas.core.quant.int_base.decoupledintquant method)": [[4, "brevitas.core.quant.int_base.DecoupledIntQuant.max_int"]], "max_int() (brevitas.core.quant.int_base.intquant method)": [[4, "brevitas.core.quant.int_base.IntQuant.max_int"]], "min_int() (brevitas.core.quant.int_base.decoupledintquant method)": [[4, "brevitas.core.quant.int_base.DecoupledIntQuant.min_int"]], "min_int() (brevitas.core.quant.int_base.intquant method)": [[4, "brevitas.core.quant.int_base.IntQuant.min_int"]], "to_int() (brevitas.core.quant.int_base.decoupledintquant method)": [[4, "brevitas.core.quant.int_base.DecoupledIntQuant.to_int"]], "to_int() (brevitas.core.quant.int_base.intquant method)": [[4, "brevitas.core.quant.int_base.IntQuant.to_int"]], "constscaling (class in brevitas.core.scaling.standalone)": [[5, "brevitas.core.scaling.standalone.ConstScaling"]], "intscaling (class in brevitas.core.scaling.int_scaling)": [[5, "brevitas.core.scaling.int_scaling.IntScaling"]], "parameterfromruntimestatsscaling (class in brevitas.core.scaling.standalone)": [[5, "brevitas.core.scaling.standalone.ParameterFromRuntimeStatsScaling"]], "parameterfromstatsfromparameterscaling (class in brevitas.core.scaling.standalone)": [[5, "brevitas.core.scaling.standalone.ParameterFromStatsFromParameterScaling"]], "parameterscaling (class in brevitas.core.scaling.standalone)": [[5, "brevitas.core.scaling.standalone.ParameterScaling"]], "poweroftwointscaling (class in brevitas.core.scaling.int_scaling)": [[5, "brevitas.core.scaling.int_scaling.PowerOfTwoIntScaling"]], "runtimedynamicgroupstatsscaling (class in brevitas.core.scaling.runtime)": [[5, "brevitas.core.scaling.runtime.RuntimeDynamicGroupStatsScaling"]], "runtimestatsscaling (class in brevitas.core.scaling.runtime)": [[5, "brevitas.core.scaling.runtime.RuntimeStatsScaling"]], "statsfromparameterscaling (class in brevitas.core.scaling.runtime)": [[5, "brevitas.core.scaling.runtime.StatsFromParameterScaling"]], "brevitas.core.scaling": [[5, "module-brevitas.core.scaling"]], "brevitas.core.scaling.int_scaling": [[5, "module-brevitas.core.scaling.int_scaling"]], "brevitas.core.scaling.runtime": [[5, "module-brevitas.core.scaling.runtime"]], "brevitas.core.scaling.standalone": [[5, "module-brevitas.core.scaling.standalone"]], "forward() (brevitas.core.scaling.int_scaling.intscaling method)": [[5, "brevitas.core.scaling.int_scaling.IntScaling.forward"]], "forward() (brevitas.core.scaling.int_scaling.poweroftwointscaling method)": [[5, "brevitas.core.scaling.int_scaling.PowerOfTwoIntScaling.forward"]], "forward() (brevitas.core.scaling.runtime.runtimedynamicgroupstatsscaling method)": [[5, "brevitas.core.scaling.runtime.RuntimeDynamicGroupStatsScaling.forward"]], "forward() (brevitas.core.scaling.runtime.runtimestatsscaling method)": [[5, "brevitas.core.scaling.runtime.RuntimeStatsScaling.forward"]], "forward() (brevitas.core.scaling.runtime.statsfromparameterscaling method)": [[5, "brevitas.core.scaling.runtime.StatsFromParameterScaling.forward"]], "forward() (brevitas.core.scaling.standalone.constscaling method)": [[5, "brevitas.core.scaling.standalone.ConstScaling.forward"]], "forward() (brevitas.core.scaling.standalone.parameterfromruntimestatsscaling method)": [[5, "brevitas.core.scaling.standalone.ParameterFromRuntimeStatsScaling.forward"]], "forward() (brevitas.core.scaling.standalone.parameterfromstatsfromparameterscaling method)": [[5, "brevitas.core.scaling.standalone.ParameterFromStatsFromParameterScaling.forward"]], "forward() (brevitas.core.scaling.standalone.parameterscaling method)": [[5, "brevitas.core.scaling.standalone.ParameterScaling.forward"]], "state_dict() (brevitas.core.scaling.standalone.parameterfromruntimestatsscaling method)": [[5, "brevitas.core.scaling.standalone.ParameterFromRuntimeStatsScaling.state_dict"]], "state_dict() (brevitas.core.scaling.standalone.parameterfromstatsfromparameterscaling method)": [[5, "brevitas.core.scaling.standalone.ParameterFromStatsFromParameterScaling.state_dict"]], "training_forward() (brevitas.core.scaling.standalone.parameterfromruntimestatsscaling method)": [[5, "brevitas.core.scaling.standalone.ParameterFromRuntimeStatsScaling.training_forward"]], "absave (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.AbsAve"]], "absmax (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.AbsMax"]], "absmaxave (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.AbsMaxAve"]], "absmaxl2 (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.AbsMaxL2"]], "absminmax (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.AbsMinMax"]], "abspercentile (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.AbsPercentile"]], "halfquadraticoptimizerscale (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale"]], "halfquadraticoptimizerzeropoint (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint"]], "klminimizerthreshold (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.KLMinimizerThreshold"]], "l1norm (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.L1Norm"]], "l2norm (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.L2Norm"]], "mse (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.MSE"]], "meanlearnedsigmastd (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.MeanLearnedSigmaStd"]], "meansigmastd (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.MeanSigmaStd"]], "negativeminorzero (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.NegativeMinOrZero"]], "negativepercentileorzero (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.NegativePercentileOrZero"]], "percentileinterval (class in brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.PercentileInterval"]], "brevitas.core.stats": [[6, "module-brevitas.core.stats"]], "brevitas.core.stats.stats_op": [[6, "module-brevitas.core.stats.stats_op"]], "brevitas.core.stats.stats_wrapper": [[6, "module-brevitas.core.stats.stats_wrapper"]], "brevitas.core.stats.view_wrapper": [[6, "module-brevitas.core.stats.view_wrapper"]], "evaluate_loss() (brevitas.core.stats.stats_op.mse method)": [[6, "brevitas.core.stats.stats_op.MSE.evaluate_loss"]], "forward() (brevitas.core.stats.stats_op.absave method)": [[6, "brevitas.core.stats.stats_op.AbsAve.forward"]], "forward() (brevitas.core.stats.stats_op.absmax method)": [[6, "brevitas.core.stats.stats_op.AbsMax.forward"]], "forward() (brevitas.core.stats.stats_op.absmaxave method)": [[6, "brevitas.core.stats.stats_op.AbsMaxAve.forward"]], "forward() (brevitas.core.stats.stats_op.absmaxl2 method)": [[6, "brevitas.core.stats.stats_op.AbsMaxL2.forward"]], "forward() (brevitas.core.stats.stats_op.absminmax method)": [[6, "brevitas.core.stats.stats_op.AbsMinMax.forward"]], "forward() (brevitas.core.stats.stats_op.abspercentile method)": [[6, "brevitas.core.stats.stats_op.AbsPercentile.forward"]], "forward() (brevitas.core.stats.stats_op.halfquadraticoptimizerscale method)": [[6, "brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale.forward"]], "forward() (brevitas.core.stats.stats_op.halfquadraticoptimizerzeropoint method)": [[6, "brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint.forward"]], "forward() (brevitas.core.stats.stats_op.klminimizerthreshold method)": [[6, "brevitas.core.stats.stats_op.KLMinimizerThreshold.forward"]], "forward() (brevitas.core.stats.stats_op.l1norm method)": [[6, "brevitas.core.stats.stats_op.L1Norm.forward"]], "forward() (brevitas.core.stats.stats_op.l2norm method)": [[6, "brevitas.core.stats.stats_op.L2Norm.forward"]], "forward() (brevitas.core.stats.stats_op.mse method)": [[6, "brevitas.core.stats.stats_op.MSE.forward"]], "forward() (brevitas.core.stats.stats_op.meanlearnedsigmastd method)": [[6, "brevitas.core.stats.stats_op.MeanLearnedSigmaStd.forward"]], "forward() (brevitas.core.stats.stats_op.meansigmastd method)": [[6, "brevitas.core.stats.stats_op.MeanSigmaStd.forward"]], "forward() (brevitas.core.stats.stats_op.negativeminorzero method)": [[6, "brevitas.core.stats.stats_op.NegativeMinOrZero.forward"]], "forward() (brevitas.core.stats.stats_op.negativepercentileorzero method)": [[6, "brevitas.core.stats.stats_op.NegativePercentileOrZero.forward"]], "forward() (brevitas.core.stats.stats_op.percentileinterval method)": [[6, "brevitas.core.stats.stats_op.PercentileInterval.forward"]], "masked_median() (in module brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.masked_median"]], "mse_fib_search() (brevitas.core.stats.stats_op.mse method)": [[6, "brevitas.core.stats.stats_op.MSE.mse_fib_search"]], "mse_grid_search() (brevitas.core.stats.stats_op.mse method)": [[6, "brevitas.core.stats.stats_op.MSE.mse_grid_search"]], "mse_loss_fn() (brevitas.core.stats.stats_op.mse method)": [[6, "brevitas.core.stats.stats_op.MSE.mse_loss_fn"]], "mse_search() (brevitas.core.stats.stats_op.mse method)": [[6, "brevitas.core.stats.stats_op.MSE.mse_search"]], "optimize() (brevitas.core.stats.stats_op.halfquadraticoptimizerscale method)": [[6, "brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale.optimize"]], "optimize() (brevitas.core.stats.stats_op.halfquadraticoptimizerzeropoint method)": [[6, "brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint.optimize"]], "parameter_search() (brevitas.core.stats.stats_op.halfquadraticoptimizerscale method)": [[6, "brevitas.core.stats.stats_op.HalfQuadraticOptimizerScale.parameter_search"]], "parameter_search() (brevitas.core.stats.stats_op.halfquadraticoptimizerzeropoint method)": [[6, "brevitas.core.stats.stats_op.HalfQuadraticOptimizerZeroPoint.parameter_search"]], "shrink_lp_op() (in module brevitas.core.stats.stats_op)": [[6, "brevitas.core.stats.stats_op.shrink_lp_op"]], "smooth_normalize_distribution() (brevitas.core.stats.stats_op.klminimizerthreshold method)": [[6, "brevitas.core.stats.stats_op.KLMinimizerThreshold.smooth_normalize_distribution"]], "abs_binary_sign_grad() (in module brevitas.function.ops_ste)": [[7, "brevitas.function.ops_ste.abs_binary_sign_grad"]], "binary_sign() (in module brevitas.function.ops)": [[7, "brevitas.function.ops.binary_sign"]], "binary_sign_ste() (in module brevitas.function.ops_ste)": [[7, "brevitas.function.ops_ste.binary_sign_ste"]], "brevitas.function.ops": [[7, "module-brevitas.function.ops"]], "brevitas.function.ops_ste": [[7, "module-brevitas.function.ops_ste"]], "brevitas.function.shape": [[7, "module-brevitas.function.shape"]], "ceil_ste() (in module brevitas.function.ops_ste)": [[7, "brevitas.function.ops_ste.ceil_ste"]], "dpu_round() (in module brevitas.function.ops)": [[7, "brevitas.function.ops.dpu_round"]], "dpu_round_ste() (in module brevitas.function.ops_ste)": [[7, "brevitas.function.ops_ste.dpu_round_ste"]], "floor_ste() (in module brevitas.function.ops_ste)": [[7, "brevitas.function.ops_ste.floor_ste"]], "get_upper_bound_on_l1_norm() (in module brevitas.function.ops)": [[7, "brevitas.function.ops.get_upper_bound_on_l1_norm"]], "identity() (in module brevitas.function.ops)": [[7, "brevitas.function.ops.identity"]], "max_float() (in module brevitas.function.ops)": [[7, "brevitas.function.ops.max_float"]], "max_int() (in module brevitas.function.ops)": [[7, "brevitas.function.ops.max_int"]], "min_int() (in module brevitas.function.ops)": [[7, "brevitas.function.ops.min_int"]], "over_batch_over_output_channels() (in module brevitas.function.shape)": [[7, "brevitas.function.shape.over_batch_over_output_channels"]], "over_batch_over_tensor() (in module brevitas.function.shape)": [[7, "brevitas.function.shape.over_batch_over_tensor"]], "over_output_channels() (in module brevitas.function.shape)": [[7, "brevitas.function.shape.over_output_channels"]], "over_output_features() (in module brevitas.function.shape)": [[7, "brevitas.function.shape.over_output_features"]], "over_tensor() (in module brevitas.function.shape)": [[7, "brevitas.function.shape.over_tensor"]], "round_ste() (in module brevitas.function.ops_ste)": [[7, "brevitas.function.ops_ste.round_ste"]], "round_to_zero() (in module brevitas.function.ops)": [[7, "brevitas.function.ops.round_to_zero"]], "round_to_zero_ste() (in module brevitas.function.ops_ste)": [[7, "brevitas.function.ops_ste.round_to_zero_ste"]], "scalar_clamp_min_ste() (in module brevitas.function.ops_ste)": [[7, "brevitas.function.ops_ste.scalar_clamp_min_ste"]], "scalar_clamp_ste() (in module brevitas.function.ops_ste)": [[7, "brevitas.function.ops_ste.scalar_clamp_ste"]], "tensor_clamp() (in module brevitas.function.ops)": [[7, "brevitas.function.ops.tensor_clamp"]], "tensor_clamp_() (in module brevitas.function.ops)": [[7, "brevitas.function.ops.tensor_clamp_"]], "tensor_clamp_ste() (in module brevitas.function.ops_ste)": [[7, "brevitas.function.ops_ste.tensor_clamp_ste"]], "tensor_clamp_ste_() (in module brevitas.function.ops_ste)": [[7, "brevitas.function.ops_ste.tensor_clamp_ste_"]], "ternary_sign_ste() (in module brevitas.function.ops_ste)": [[7, "brevitas.function.ops_ste.ternary_sign_ste"]], "absbinarysigngradfn (class in brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.AbsBinarySignGradFn"]], "binarysignstefn (class in brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.BinarySignSteFn"]], "ceilstefn (class in brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.CeilSteFn"]], "dpuroundstefn (class in brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.DPURoundSteFn"]], "floorstefn (class in brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.FloorSteFn"]], "inplacetensorclampstefn (class in brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.InplaceTensorClampSteFn"]], "roundstefn (class in brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.RoundSteFn"]], "roundtozerostefn (class in brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.RoundToZeroSteFn"]], "scalarclampminstefn (class in brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.ScalarClampMinSteFn"]], "scalarclampstefn (class in brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.ScalarClampSteFn"]], "tensorclampstefn (class in brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.TensorClampSteFn"]], "ternarysignstefn (class in brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.TernarySignSteFn"]], "abs_binary_sign_grad_impl() (in module brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.abs_binary_sign_grad_impl"]], "binary_sign_ste_impl() (in module brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.binary_sign_ste_impl"]], "brevitas.ops.autograd_ste_ops": [[8, "module-brevitas.ops.autograd_ste_ops"]], "ceil_ste_impl() (in module brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.ceil_ste_impl"]], "dpu_round_ste_impl() (in module brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.dpu_round_ste_impl"]], "floor_ste_impl() (in module brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.floor_ste_impl"]], "round_ste_impl() (in module brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.round_ste_impl"]], "round_to_zero_ste_impl() (in module brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.round_to_zero_ste_impl"]], "scalar_clamp_min_ste_impl() (in module brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.scalar_clamp_min_ste_impl"]], "scalar_clamp_ste_impl() (in module brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.scalar_clamp_ste_impl"]], "tensor_clamp_ste_impl() (in module brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.tensor_clamp_ste_impl"]], "ternary_sign_ste_impl() (in module brevitas.ops.autograd_ste_ops)": [[8, "brevitas.ops.autograd_ste_ops.ternary_sign_ste_impl"]]}})
\ No newline at end of file
diff --git a/docs/settings.html b/docs/settings.html
index 6cde012b1..0fc91e3bc 100644
--- a/docs/settings.html
+++ b/docs/settings.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>Settings &#8212; Brevitas 0.10.2 documentation</title>
+    <title>Settings &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -126,8 +126,8 @@
       
     
     
-    <img src="_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/setup.html b/docs/setup.html
index 8fd8453b4..ac796146e 100644
--- a/docs/setup.html
+++ b/docs/setup.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>Setup &#8212; Brevitas 0.10.2 documentation</title>
+    <title>Setup &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -126,8 +126,8 @@
       
     
     
-    <img src="_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/tutorials/anatomy_quantizer.html b/docs/tutorials/anatomy_quantizer.html
index eb82ca551..c2c81917b 100644
--- a/docs/tutorials/anatomy_quantizer.html
+++ b/docs/tutorials/anatomy_quantizer.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>Anatomy of a Quantizer &#8212; Brevitas 0.10.2 documentation</title>
+    <title>Anatomy of a Quantizer &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -129,8 +129,8 @@
       
     
     
-    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
@@ -493,6 +493,7 @@ <h2>What’s in a Quantizer?<a class="headerlink" href="#What's-in-a-Quantizer?"
     (delay_wrapper): DelayWrapper(
       (delay_impl): _NoDelay()
     )
+    (input_view_impl): Identity()
   )
   (scaling_impl): ParameterFromRuntimeStatsScaling(
     (stats_input_view_shape_impl): OverTensorView()
@@ -542,6 +543,11 @@ <h2>A Practical Example: Binary Quantization<a class="headerlink" href="#A-Pract
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">inspect</span>
 <span class="kn">from</span> <span class="nn">IPython.display</span> <span class="kn">import</span> <span class="n">Markdown</span><span class="p">,</span> <span class="n">display</span>
 
+<span class="c1"># helpers</span>
+<span class="k">def</span> <span class="nf">assert_with_message</span><span class="p">(</span><span class="n">condition</span><span class="p">):</span>
+    <span class="k">assert</span> <span class="n">condition</span>
+    <span class="nb">print</span><span class="p">(</span><span class="n">condition</span><span class="p">)</span>
+
 <span class="k">def</span> <span class="nf">pretty_print_source</span><span class="p">(</span><span class="n">source</span><span class="p">):</span>
     <span class="n">display</span><span class="p">(</span><span class="n">Markdown</span><span class="p">(</span><span class="s1">&#39;```python</span><span class="se">\n</span><span class="s1">&#39;</span> <span class="o">+</span> <span class="n">source</span> <span class="o">+</span> <span class="s1">&#39;</span><span class="se">\n</span><span class="s1">```&#39;</span><span class="p">))</span>
 </pre></div>
@@ -595,8 +601,9 @@ <h2>A Practical Example: Binary Quantization<a class="headerlink" href="#A-Pract
 <span class="sd">        Set env variable BREVITAS_JIT=1 to enable TorchScript compilation of this module.</span>
 <span class="sd">    &quot;&quot;&quot;</span>
 
-    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">scaling_impl</span><span class="p">:</span> <span class="n">Module</span><span class="p">,</span> <span class="n">quant_delay_steps</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">):</span>
+    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">scaling_impl</span><span class="p">:</span> <span class="n">Module</span><span class="p">,</span> <span class="n">signed</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> <span class="n">quant_delay_steps</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">):</span>
         <span class="nb">super</span><span class="p">(</span><span class="n">BinaryQuant</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
+        <span class="k">assert</span> <span class="n">signed</span><span class="p">,</span> <span class="s2">&quot;Unsigned binary quant not supported&quot;</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">scaling_impl</span> <span class="o">=</span> <span class="n">scaling_impl</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">bit_width</span> <span class="o">=</span> <span class="n">BitWidthConst</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">zero_point</span> <span class="o">=</span> <span class="n">StatelessBuffer</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="mf">0.0</span><span class="p">))</span>
@@ -631,6 +638,9 @@ <h3>Manual Binary Quantization<a class="headerlink" href="#Manual-Binary-Quantiz
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">torch</span>
 
+<span class="c1"># set seed for notebook</span>
+<span class="n">torch</span><span class="o">.</span><span class="n">manual_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
+
 <span class="n">manual_tensor_quant</span> <span class="o">=</span> <span class="n">BinaryQuant</span><span class="p">(</span><span class="n">scaling_impl</span><span class="o">=</span><span class="n">ParameterScaling</span><span class="p">(</span><span class="n">scaling_init</span><span class="o">=</span><span class="mf">0.1</span><span class="p">))</span>
 <span class="n">manual_tensor_quant</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">))</span>
 </pre></div>
@@ -642,10 +652,10 @@ <h3>Manual Binary Quantization<a class="headerlink" href="#Manual-Binary-Quantiz
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-(tensor([[ 0.1000,  0.1000,  0.1000,  0.1000],
-         [-0.1000, -0.1000,  0.1000,  0.1000],
-         [ 0.1000, -0.1000,  0.1000, -0.1000],
-         [ 0.1000, -0.1000,  0.1000, -0.1000]], grad_fn=&lt;MulBackward0&gt;),
+(tensor([[-0.1000, -0.1000, -0.1000, -0.1000],
+         [ 0.1000,  0.1000, -0.1000, -0.1000],
+         [ 0.1000, -0.1000,  0.1000,  0.1000],
+         [ 0.1000,  0.1000,  0.1000, -0.1000]], grad_fn=&lt;MulBackward0&gt;),
  tensor(0.1000, grad_fn=&lt;AbsBinarySignGradFnBackward&gt;),
  tensor(0.),
  tensor(1.))
@@ -678,10 +688,10 @@ <h3>Binary Quantization with an ExtendedInjector<a class="headerlink" href="#Bin
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-(tensor([[-0.1000,  0.1000, -0.1000,  0.1000],
-         [ 0.1000,  0.1000, -0.1000, -0.1000],
-         [-0.1000,  0.1000, -0.1000,  0.1000],
-         [-0.1000,  0.1000,  0.1000,  0.1000]], grad_fn=&lt;MulBackward0&gt;),
+(tensor([[-0.1000, -0.1000,  0.1000,  0.1000],
+         [ 0.1000, -0.1000, -0.1000,  0.1000],
+         [ 0.1000, -0.1000, -0.1000,  0.1000],
+         [ 0.1000,  0.1000,  0.1000, -0.1000]], grad_fn=&lt;MulBackward0&gt;),
  tensor(0.1000, grad_fn=&lt;AbsBinarySignGradFnBackward&gt;),
  tensor(0.),
  tensor(1.))
@@ -711,10 +721,10 @@ <h2>Inheritance and Composition of Quantizers<a class="headerlink" href="#Inheri
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-(tensor([[ 1., -1.,  1.,  1.],
-         [ 1.,  1., -1.,  1.],
-         [ 1.,  1.,  1., -1.],
-         [-1.,  1., -1., -1.]], grad_fn=&lt;MulBackward0&gt;),
+(tensor([[-1.,  1., -1.,  1.],
+         [ 1.,  1.,  1.,  1.],
+         [-1.,  1., -1.,  1.],
+         [ 1.,  1., -1., -1.]], grad_fn=&lt;MulBackward0&gt;),
  tensor(1., grad_fn=&lt;AbsBinarySignGradFnBackward&gt;),
  tensor(0.),
  tensor(1.))
@@ -746,10 +756,10 @@ <h2>Inheritance and Composition of Quantizers<a class="headerlink" href="#Inheri
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-(tensor([[ 0.1000, -0.1000, -0.1000, -0.1000],
-         [-0.1000,  0.1000, -0.1000,  0.1000],
-         [ 0.1000, -0.1000,  0.1000,  0.1000],
-         [-0.1000,  0.1000, -0.1000,  0.1000]], grad_fn=&lt;MulBackward0&gt;),
+(tensor([[-0.1000,  0.1000,  0.1000,  0.1000],
+         [-0.1000, -0.1000,  0.1000, -0.1000],
+         [-0.1000,  0.1000,  0.1000,  0.1000],
+         [-0.1000, -0.1000,  0.1000, -0.1000]], grad_fn=&lt;MulBackward0&gt;),
  tensor(0.1000, grad_fn=&lt;AbsBinarySignGradFnBackward&gt;),
  tensor(0.),
  tensor(1.))
@@ -773,62 +783,24 @@ <h2>Interfacing a Quantizer with a Quantized Layer<a class="headerlink" href="#I
 </div>
 </div>
 <p>We can now use <code class="docutils literal notranslate"><span class="pre">MyBinaryWeightQuantizer</span></code> as the weight quantizer of a layer:</p>
-<div class="nbinput docutils container">
+<div class="nbinput nblast docutils container">
 <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[11]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">brevitas.nn</span> <span class="kn">import</span> <span class="n">QuantConv2d</span>
 
 <span class="n">binary_weight_quant_conv</span> <span class="o">=</span> <span class="n">QuantConv2d</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="p">(</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">),</span> <span class="n">weight_quant</span><span class="o">=</span><span class="n">MyBinaryWeightQuantizer</span><span class="p">)</span>
-<span class="n">quant_weight</span> <span class="o">=</span> <span class="n">binary_weight_quant_conv</span><span class="o">.</span><span class="n">quant_weight</span><span class="p">()</span>
-<span class="n">quant_weight</span>
-</pre></div>
-</div>
-</div>
-<div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[11]:
-</pre></div>
-</div>
-<div class="output_area docutils container">
-<div class="highlight"><pre>
-QuantTensor(value=tensor([[[[ 0.1000,  0.1000, -0.1000],
-          [-0.1000,  0.1000, -0.1000],
-          [ 0.1000, -0.1000, -0.1000]],
-
-         [[-0.1000,  0.1000, -0.1000],
-          [ 0.1000, -0.1000,  0.1000],
-          [-0.1000, -0.1000,  0.1000]],
-
-         [[ 0.1000, -0.1000, -0.1000],
-          [ 0.1000,  0.1000, -0.1000],
-          [-0.1000, -0.1000,  0.1000]]],
-
-
-        [[[ 0.1000, -0.1000,  0.1000],
-          [ 0.1000, -0.1000, -0.1000],
-          [ 0.1000, -0.1000,  0.1000]],
-
-         [[-0.1000,  0.1000, -0.1000],
-          [ 0.1000,  0.1000,  0.1000],
-          [-0.1000, -0.1000, -0.1000]],
-
-         [[ 0.1000,  0.1000, -0.1000],
-          [-0.1000,  0.1000, -0.1000],
-          [ 0.1000,  0.1000,  0.1000]]]], grad_fn=&lt;MulBackward0&gt;), scale=tensor(0.1000, grad_fn=&lt;AbsBinarySignGradFnBackward&gt;), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=None, training_t=tensor(True))
-</pre></div></div>
-</div>
-<p>Note however how the <code class="docutils literal notranslate"><span class="pre">QuantTensor</span></code> is not properly formed, as the <code class="docutils literal notranslate"><span class="pre">signed</span></code> attribute is <code class="docutils literal notranslate"><span class="pre">None</span></code>. This means that <code class="docutils literal notranslate"><span class="pre">quant_weight</span></code> is not considered valid, as the affine quantization invariant cannot be computed:</p>
-<div class="nbinput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[12]:
-</pre></div>
-</div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="k">assert</span> <span class="ow">not</span> <span class="n">quant_weight</span><span class="o">.</span><span class="n">is_valid</span>
-</pre></div>
+<span class="k">try</span><span class="p">:</span>
+    <span class="n">quant_weight</span> <span class="o">=</span> <span class="n">binary_weight_quant_conv</span><span class="o">.</span><span class="n">quant_weight</span><span class="p">()</span>
+<span class="k">except</span> <span class="ne">TypeError</span><span class="p">:</span>
+    <span class="k">pass</span>
+<br/></pre></div>
 </div>
 </div>
+<p>Note however that we cannot compute the quantized weight, as the <code class="docutils literal notranslate"><span class="pre">signed</span></code> attribute is <code class="docutils literal notranslate"><span class="pre">None</span></code>.</p>
 <p><code class="docutils literal notranslate"><span class="pre">signed</span></code> is one of those attributes that in the case of binary quantization has to be explicitly defined by the user. The idea is that it informs the proxy on whether the value generated by our quantizer should be considered signed or not. We can do so by simply setting it in the quantizer:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[13]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[12]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">MySignedBinaryWeightQuantizer</span><span class="p">(</span><span class="n">MyBinaryWeightQuantizer</span><span class="p">):</span>
@@ -841,49 +813,57 @@ <h2>Interfacing a Quantizer with a Quantized Layer<a class="headerlink" href="#I
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[13]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[12]:
 </pre></div>
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-QuantTensor(value=tensor([[[[ 0.1000,  0.1000, -0.1000],
+IntQuantTensor(value=tensor([[[[ 0.1000,  0.1000, -0.1000],
+          [ 0.1000, -0.1000, -0.1000],
+          [-0.1000, -0.1000, -0.1000]],
+
+         [[-0.1000, -0.1000,  0.1000],
           [-0.1000, -0.1000,  0.1000],
-          [ 0.1000,  0.1000, -0.1000]],
+          [-0.1000,  0.1000, -0.1000]],
 
          [[ 0.1000,  0.1000,  0.1000],
-          [ 0.1000, -0.1000,  0.1000],
-          [ 0.1000, -0.1000, -0.1000]],
-
-         [[-0.1000,  0.1000,  0.1000],
-          [ 0.1000, -0.1000, -0.1000],
-          [-0.1000, -0.1000, -0.1000]]],
+          [ 0.1000,  0.1000, -0.1000],
+          [-0.1000, -0.1000,  0.1000]]],
 
 
-        [[[ 0.1000,  0.1000,  0.1000],
-          [ 0.1000,  0.1000, -0.1000],
-          [-0.1000, -0.1000,  0.1000]],
+        [[[-0.1000, -0.1000, -0.1000],
+          [-0.1000, -0.1000, -0.1000],
+          [ 0.1000,  0.1000, -0.1000]],
 
-         [[-0.1000, -0.1000,  0.1000],
-          [-0.1000,  0.1000,  0.1000],
+         [[ 0.1000, -0.1000, -0.1000],
+          [-0.1000, -0.1000, -0.1000],
           [-0.1000, -0.1000, -0.1000]],
 
-         [[-0.1000,  0.1000, -0.1000],
-          [-0.1000,  0.1000, -0.1000],
-          [-0.1000,  0.1000, -0.1000]]]], grad_fn=&lt;MulBackward0&gt;), scale=tensor(0.1000, grad_fn=&lt;AbsBinarySignGradFnBackward&gt;), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))
+         [[ 0.1000, -0.1000,  0.1000],
+          [-0.1000, -0.1000,  0.1000],
+          [-0.1000,  0.1000,  0.1000]]]], grad_fn=&lt;MulBackward0&gt;), scale=tensor(0.1000, grad_fn=&lt;AbsBinarySignGradFnBackward&gt;), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))
 </pre></div></div>
 </div>
-<div class="nbinput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[15]:
+<div class="nbinput docutils container">
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[13]:
 </pre></div>
 </div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="k">assert</span> <span class="n">signed_quant_weight</span><span class="o">.</span><span class="n">is_valid</span> <span class="o">==</span> <span class="kc">True</span>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">assert_with_message</span><span class="p">(</span><span class="n">signed_quant_weight</span><span class="o">.</span><span class="n">is_valid</span><span class="p">)</span>
 </pre></div>
 </div>
 </div>
+<div class="nboutput nblast docutils container">
+<div class="prompt empty docutils container">
+</div>
+<div class="output_area docutils container">
+<div class="highlight"><pre>
+True
+</pre></div></div>
+</div>
 <p>And now the quant weights are valid.</p>
 <p>When we want to add or override an single attribute of a quantizer passed to a layer, defining a whole new quantizer can be too verbose. There is a simpler syntax to achieve the same goal. Let’s say we want to have add the <code class="docutils literal notranslate"><span class="pre">signed</span></code> attribute to <code class="docutils literal notranslate"><span class="pre">MyBinaryQuantizer</span></code>, as we just did. We could have also simply done the following:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[16]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[14]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">small_scale_quant_conv</span> <span class="o">=</span> <span class="n">QuantConv2d</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="p">(</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">),</span> <span class="n">weight_quant</span><span class="o">=</span><span class="n">MyBinaryWeightQuantizer</span><span class="p">,</span> <span class="n">weight_signed</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
@@ -892,35 +872,35 @@ <h2>Interfacing a Quantizer with a Quantized Layer<a class="headerlink" href="#I
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[16]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[14]:
 </pre></div>
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-QuantTensor(value=tensor([[[[-0.1000, -0.1000,  0.1000],
-          [-0.1000, -0.1000, -0.1000],
-          [ 0.1000, -0.1000,  0.1000]],
+IntQuantTensor(value=tensor([[[[-0.1000, -0.1000,  0.1000],
+          [-0.1000,  0.1000, -0.1000],
+          [ 0.1000,  0.1000,  0.1000]],
 
-         [[-0.1000,  0.1000, -0.1000],
+         [[ 0.1000, -0.1000, -0.1000],
           [-0.1000,  0.1000,  0.1000],
-          [ 0.1000, -0.1000, -0.1000]],
+          [ 0.1000, -0.1000,  0.1000]],
 
-         [[-0.1000,  0.1000, -0.1000],
-          [-0.1000, -0.1000,  0.1000],
-          [-0.1000, -0.1000, -0.1000]]],
+         [[-0.1000, -0.1000, -0.1000],
+          [ 0.1000, -0.1000, -0.1000],
+          [ 0.1000,  0.1000, -0.1000]]],
 
 
-        [[[-0.1000, -0.1000, -0.1000],
-          [-0.1000, -0.1000, -0.1000],
-          [ 0.1000,  0.1000, -0.1000]],
+        [[[-0.1000, -0.1000,  0.1000],
+          [-0.1000,  0.1000,  0.1000],
+          [ 0.1000, -0.1000, -0.1000]],
 
-         [[-0.1000, -0.1000,  0.1000],
-          [-0.1000,  0.1000, -0.1000],
-          [ 0.1000, -0.1000,  0.1000]],
+         [[ 0.1000, -0.1000, -0.1000],
+          [ 0.1000, -0.1000, -0.1000],
+          [ 0.1000, -0.1000, -0.1000]],
 
-         [[ 0.1000,  0.1000, -0.1000],
-          [ 0.1000,  0.1000,  0.1000],
-          [ 0.1000, -0.1000,  0.1000]]]], grad_fn=&lt;MulBackward0&gt;), scale=tensor(0.1000, grad_fn=&lt;AbsBinarySignGradFnBackward&gt;), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))
+         [[ 0.1000,  0.1000,  0.1000],
+          [-0.1000,  0.1000, -0.1000],
+          [-0.1000, -0.1000, -0.1000]]]], grad_fn=&lt;MulBackward0&gt;), scale=tensor(0.1000, grad_fn=&lt;AbsBinarySignGradFnBackward&gt;), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))
 </pre></div></div>
 </div>
 <p>What we did was to take the name of the attribute <code class="docutils literal notranslate"><span class="pre">signed</span></code>, add the prefix <code class="docutils literal notranslate"><span class="pre">weight_</span></code>, and pass it as a keyword argument to <code class="docutils literal notranslate"><span class="pre">QuantConv2d</span></code>. What happens in the background is that the keyword arguments prefixed with <code class="docutils literal notranslate"><span class="pre">weight_</span></code> are set as attributes of <code class="docutils literal notranslate"><span class="pre">weight_quant</span></code>, possibly overriding any pre-existing value. The same principle applies to <code class="docutils literal notranslate"><span class="pre">input_</span></code>, <code class="docutils literal notranslate"><span class="pre">output_</span></code> and <code class="docutils literal notranslate"><span class="pre">bias_</span></code>.</p>
@@ -930,7 +910,7 @@ <h2>Interfacing a Quantizer with a Quantized Layer<a class="headerlink" href="#I
 <h2>Passing a custom quantizer to QuantIdentity<a class="headerlink" href="#Passing-a-custom-quantizer-to-QuantIdentity" title="Permalink to this heading">#</a></h2>
 <p>We can do a similar thing with quantized activations:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[17]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[15]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">brevitas.proxy</span> <span class="kn">import</span> <span class="n">ActQuantProxyFromInjector</span>
@@ -945,21 +925,30 @@ <h2>Passing a custom quantizer to QuantIdentity<a class="headerlink" href="#Pass
 </pre></div>
 </div>
 </div>
+<div class="nboutput docutils container">
+<div class="prompt empty docutils container">
+</div>
+<div class="output_area stderr docutils container">
+<div class="highlight"><pre>
+/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/torch/_tensor.py:1255: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at /opt/conda/conda-bld/pytorch_1670525541990/work/c10/core/TensorImpl.h:1758.)
+  return super(Tensor, self).rename(names)
+</pre></div></div>
+</div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[17]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[15]:
 </pre></div>
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-QuantTensor(value=tensor([[-0.1000,  0.1000, -0.1000,  0.1000],
+IntQuantTensor(value=tensor([[-0.1000,  0.1000, -0.1000, -0.1000],
         [ 0.1000,  0.1000,  0.1000,  0.1000],
-        [-0.1000,  0.1000,  0.1000,  0.1000],
-        [-0.1000, -0.1000,  0.1000, -0.1000]], grad_fn=&lt;MulBackward0&gt;), scale=tensor(0.1000, grad_fn=&lt;AbsBinarySignGradFnBackward&gt;), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))
+        [-0.1000, -0.1000,  0.1000, -0.1000],
+        [-0.1000,  0.1000, -0.1000,  0.1000]], grad_fn=&lt;MulBackward0&gt;), scale=tensor(0.1000, grad_fn=&lt;AbsBinarySignGradFnBackward&gt;), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))
 </pre></div></div>
 </div>
 <p>So there isn’t really much difference between a quantizer for weights and a quantizer for activations, they are just wrapped by different proxies. Also, with activations a prefix is not required when passing keyword arguments. For example, when can override the existing <code class="docutils literal notranslate"><span class="pre">scaling_init</span></code> defined in <code class="docutils literal notranslate"><span class="pre">MyBinaryQuantizer</span></code> with a new value passed in as a keywork argument:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[18]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[16]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">small_scale_binary_identity</span> <span class="o">=</span> <span class="n">QuantIdentity</span><span class="p">(</span>
@@ -969,15 +958,15 @@ <h2>Passing a custom quantizer to QuantIdentity<a class="headerlink" href="#Pass
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[18]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[16]:
 </pre></div>
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-QuantTensor(value=tensor([[ 0.0010,  0.0010,  0.0010, -0.0010],
-        [ 0.0010, -0.0010,  0.0010, -0.0010],
-        [-0.0010, -0.0010, -0.0010, -0.0010],
-        [ 0.0010,  0.0010,  0.0010,  0.0010]], grad_fn=&lt;MulBackward0&gt;), scale=tensor(0.0010, grad_fn=&lt;AbsBinarySignGradFnBackward&gt;), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))
+IntQuantTensor(value=tensor([[ 0.0010, -0.0010, -0.0010,  0.0010],
+        [ 0.0010,  0.0010,  0.0010,  0.0010],
+        [ 0.0010, -0.0010,  0.0010,  0.0010],
+        [ 0.0010, -0.0010, -0.0010, -0.0010]], grad_fn=&lt;MulBackward0&gt;), scale=tensor(0.0010, grad_fn=&lt;AbsBinarySignGradFnBackward&gt;), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))
 </pre></div></div>
 </div>
 </section>
@@ -986,7 +975,7 @@ <h2>A Custom Quantizer initialized with Weight Statistics<a class="headerlink" h
 <p>So far we have seen use-cases where an <code class="docutils literal notranslate"><span class="pre">ExtendedInjector</span></code> provides, at best, a different kind of syntax to define a quantizer, without any particular other advantage. Let’s now make things a bit more complicated to show the sort of situations where it really shines.</p>
 <p>Let’s say we want to define a binary weight quantizer where <code class="docutils literal notranslate"><span class="pre">scaling_impl</span></code> is still <code class="docutils literal notranslate"><span class="pre">ParameterScaling</span></code>. However, instead of being user-defined, we want <code class="docutils literal notranslate"><span class="pre">scaling_init</span></code> to be the maximum value found in the weight tensor of the quantized layer. To support this sort of use cases where the quantizer depends on the layer, a quantized layer automatically passes itself to all its quantizers under the name of <code class="docutils literal notranslate"><span class="pre">module</span></code>. With only a few lines of code then, we can achieve our goal:</p>
 <div class="nbinput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[19]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[17]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">brevitas.inject</span> <span class="kn">import</span> <span class="n">value</span>
@@ -1002,7 +991,7 @@ <h2>A Custom Quantizer initialized with Weight Statistics<a class="headerlink" h
 <p>Note how we are leveraging the <code class="docutils literal notranslate"><span class="pre">&#64;value</span></code> <em>decorator</em> to define a function that is executed at <em>dependency-injection (DI) time</em>. This kind of behaviour is similar in spirit to defining a <code class="docutils literal notranslate"><span class="pre">&#64;property</span></code> instead of an <em>attribute</em>, with the difference that a <code class="docutils literal notranslate"><span class="pre">&#64;value</span></code> function can depend on other attributes of the Injector, which are automatically passed in as arguments of the function during DI.</p>
 <p>Let’s now pass the quantizer to a QuantConv2d and retrieve its quantized weights:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[20]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[18]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">param_from_max_quant_conv</span> <span class="o">=</span> <span class="n">QuantConv2d</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">),</span> <span class="n">weight_quant</span><span class="o">=</span><span class="n">ParamFromMaxWeightQuantizer</span><span class="p">)</span>
@@ -1011,49 +1000,48 @@ <h2>A Custom Quantizer initialized with Weight Statistics<a class="headerlink" h
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[20]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[18]:
 </pre></div>
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-QuantTensor(value=tensor([[[[-0.1876, -0.1876, -0.1876],
-          [ 0.1876,  0.1876,  0.1876],
-          [-0.1876, -0.1876,  0.1876]],
+IntQuantTensor(value=tensor([[[[ 0.1820, -0.1820, -0.1820],
+          [ 0.1820,  0.1820,  0.1820],
+          [-0.1820, -0.1820, -0.1820]],
 
-         [[-0.1876, -0.1876,  0.1876],
-          [ 0.1876,  0.1876, -0.1876],
-          [-0.1876,  0.1876,  0.1876]],
+         [[ 0.1820, -0.1820, -0.1820],
+          [ 0.1820, -0.1820, -0.1820],
+          [ 0.1820,  0.1820, -0.1820]],
 
-         [[-0.1876, -0.1876, -0.1876],
-          [ 0.1876,  0.1876,  0.1876],
-          [-0.1876,  0.1876, -0.1876]]],
+         [[-0.1820,  0.1820,  0.1820],
+          [ 0.1820, -0.1820,  0.1820],
+          [-0.1820, -0.1820, -0.1820]]],
 
 
-        [[[-0.1876, -0.1876, -0.1876],
-          [ 0.1876,  0.1876, -0.1876],
-          [ 0.1876, -0.1876, -0.1876]],
+        [[[ 0.1820,  0.1820, -0.1820],
+          [-0.1820, -0.1820,  0.1820],
+          [-0.1820,  0.1820, -0.1820]],
 
-         [[-0.1876,  0.1876, -0.1876],
-          [ 0.1876, -0.1876, -0.1876],
-          [-0.1876, -0.1876,  0.1876]],
+         [[-0.1820,  0.1820, -0.1820],
+          [ 0.1820,  0.1820, -0.1820],
+          [ 0.1820,  0.1820,  0.1820]],
 
-         [[-0.1876,  0.1876,  0.1876],
-          [ 0.1876, -0.1876,  0.1876],
-          [-0.1876, -0.1876, -0.1876]]]], grad_fn=&lt;MulBackward0&gt;), scale=tensor(0.1876, grad_fn=&lt;AbsBinarySignGradFnBackward&gt;), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))
+         [[-0.1820, -0.1820, -0.1820],
+          [ 0.1820, -0.1820,  0.1820],
+          [ 0.1820, -0.1820,  0.1820]]]], grad_fn=&lt;MulBackward0&gt;), scale=tensor(0.1820, grad_fn=&lt;AbsBinarySignGradFnBackward&gt;), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))
 </pre></div></div>
 </div>
-<p>Indeed we can verify that <code class="docutils literal notranslate"><span class="pre">quant_weight_scale()</span></code> is equal to <code class="docutils literal notranslate"><span class="pre">weight.abs().max()</span></code>:</p>
+<p>Indeed we can verify that <code class="docutils literal notranslate"><span class="pre">weight_quant.scale()</span></code> is equal to <code class="docutils literal notranslate"><span class="pre">weight.abs().max()</span></code>:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[21]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[19]:
 </pre></div>
 </div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="p">(</span><span class="n">param_from_max_quant_conv</span><span class="o">.</span><span class="n">quant_weight_scale</span><span class="p">()</span> <span class="o">==</span> <span class="n">param_from_max_quant_conv</span><span class="o">.</span><span class="n">weight</span><span class="o">.</span><span class="n">abs</span><span class="p">()</span><span class="o">.</span><span class="n">max</span><span class="p">())</span><span class="o">.</span><span class="n">item</span><span class="p">()</span>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">assert_with_message</span><span class="p">((</span><span class="n">param_from_max_quant_conv</span><span class="o">.</span><span class="n">weight_quant</span><span class="o">.</span><span class="n">scale</span><span class="p">()</span> <span class="o">==</span> <span class="n">param_from_max_quant_conv</span><span class="o">.</span><span class="n">weight</span><span class="o">.</span><span class="n">abs</span><span class="p">()</span><span class="o">.</span><span class="n">max</span><span class="p">())</span><span class="o">.</span><span class="n">item</span><span class="p">())</span>
 </pre></div>
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[21]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
@@ -1062,7 +1050,7 @@ <h2>A Custom Quantizer initialized with Weight Statistics<a class="headerlink" h
 </div>
 <p>Let’s say now that we want to load a pretrained floating-point weight tensor on top of our quantized model. We simuate this scenario by defining a separate <code class="docutils literal notranslate"><span class="pre">nn.Conv2d</span></code> layer with the same weight shape:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[22]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[20]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">torch</span> <span class="kn">import</span> <span class="n">nn</span>
@@ -1073,17 +1061,17 @@ <h2>A Custom Quantizer initialized with Weight Statistics<a class="headerlink" h
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[22]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[20]:
 </pre></div>
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-tensor(0.1897, grad_fn=&lt;MaxBackward1&gt;)
+tensor(0.1924, grad_fn=&lt;MaxBackward1&gt;)
 </pre></div></div>
 </div>
 <p>and then we load it on top of <code class="docutils literal notranslate"><span class="pre">param_from_max_quant_conv</span></code>:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[23]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[21]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">param_from_max_quant_conv</span><span class="o">.</span><span class="n">load_state_dict</span><span class="p">(</span><span class="n">float_conv</span><span class="o">.</span><span class="n">state_dict</span><span class="p">())</span>
@@ -1095,25 +1083,27 @@ <h2>A Custom Quantizer initialized with Weight Statistics<a class="headerlink" h
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-<span class="ansi-red-intense-fg ansi-bold">---------------------------------------------------------------------------</span>
-<span class="ansi-red-intense-fg ansi-bold">RuntimeError</span>                              Traceback (most recent call last)
-<span class="ansi-green-intense-fg ansi-bold">&lt;ipython-input-22-5b3646241211&gt;</span> in <span class="ansi-cyan-fg">&lt;module&gt;</span>
-<span class="ansi-green-intense-fg ansi-bold">----&gt; 1</span><span class="ansi-yellow-intense-fg ansi-bold"> </span>param_from_max_quant_conv<span class="ansi-yellow-intense-fg ansi-bold">.</span>load_state_dict<span class="ansi-yellow-intense-fg ansi-bold">(</span>float_conv<span class="ansi-yellow-intense-fg ansi-bold">.</span>state_dict<span class="ansi-yellow-intense-fg ansi-bold">(</span><span class="ansi-yellow-intense-fg ansi-bold">)</span><span class="ansi-yellow-intense-fg ansi-bold">)</span>
-
-<span class="ansi-green-intense-fg ansi-bold">C:\ProgramData\Miniconda3\envs\pytorch\lib\site-packages\torch\nn\modules\module.py</span> in <span class="ansi-cyan-fg">load_state_dict</span><span class="ansi-blue-intense-fg ansi-bold">(self, state_dict, strict)</span>
-<span class="ansi-green-fg">   1405</span>         <span class="ansi-green-intense-fg ansi-bold">if</span> len<span class="ansi-yellow-intense-fg ansi-bold">(</span>error_msgs<span class="ansi-yellow-intense-fg ansi-bold">)</span> <span class="ansi-yellow-intense-fg ansi-bold">&gt;</span> <span class="ansi-cyan-intense-fg ansi-bold">0</span><span class="ansi-yellow-intense-fg ansi-bold">:</span>
-<span class="ansi-green-fg">   1406</span>             raise RuntimeError(&#39;Error(s) in loading state_dict for {}:\n\t{}&#39;.format(
-<span class="ansi-green-intense-fg ansi-bold">-&gt; 1407</span><span class="ansi-yellow-intense-fg ansi-bold">                                self.__class__.__name__, &#34;\n\t&#34;.join(error_msgs)))
-</span><span class="ansi-green-fg">   1408</span>         <span class="ansi-green-intense-fg ansi-bold">return</span> _IncompatibleKeys<span class="ansi-yellow-intense-fg ansi-bold">(</span>missing_keys<span class="ansi-yellow-intense-fg ansi-bold">,</span> unexpected_keys<span class="ansi-yellow-intense-fg ansi-bold">)</span>
-<span class="ansi-green-fg">   1409</span>
-
-<span class="ansi-red-intense-fg ansi-bold">RuntimeError</span>: Error(s) in loading state_dict for QuantConv2d:
+<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
+<span class="ansi-red-fg">RuntimeError</span>                              Traceback (most recent call last)
+Cell <span class="ansi-green-fg">In[21], line 1</span>
+<span class="ansi-green-fg">----&gt; 1</span> <span class="ansi-yellow-bg">param_from_max_quant_conv</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">load_state_dict</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">float_conv</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">state_dict</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">)</span><span class="ansi-yellow-bg">)</span>
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/torch/nn/modules/module.py:1671</span>, in <span class="ansi-cyan-fg">Module.load_state_dict</span><span class="ansi-blue-fg">(self, state_dict, strict)</span>
+<span class="ansi-green-intense-fg ansi-bold">   1666</span>         error_msgs<span style="color: rgb(98,98,98)">.</span>insert(
+<span class="ansi-green-intense-fg ansi-bold">   1667</span>             <span style="color: rgb(98,98,98)">0</span>, <span style="color: rgb(175,0,0)">&#39;</span><span style="color: rgb(175,0,0)">Missing key(s) in state_dict: </span><span class="ansi-bold" style="color: rgb(175,95,135)">{}</span><span style="color: rgb(175,0,0)">. </span><span style="color: rgb(175,0,0)">&#39;</span><span style="color: rgb(98,98,98)">.</span>format(
+<span class="ansi-green-intense-fg ansi-bold">   1668</span>                 <span style="color: rgb(175,0,0)">&#39;</span><span style="color: rgb(175,0,0)">, </span><span style="color: rgb(175,0,0)">&#39;</span><span style="color: rgb(98,98,98)">.</span>join(<span style="color: rgb(175,0,0)">&#39;</span><span style="color: rgb(175,0,0)">&#34;</span><span class="ansi-bold" style="color: rgb(175,95,135)">{}</span><span style="color: rgb(175,0,0)">&#34;</span><span style="color: rgb(175,0,0)">&#39;</span><span style="color: rgb(98,98,98)">.</span>format(k) <span class="ansi-bold" style="color: rgb(0,135,0)">for</span> k <span class="ansi-bold" style="color: rgb(175,0,255)">in</span> missing_keys)))
+<span class="ansi-green-intense-fg ansi-bold">   1670</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> <span style="color: rgb(0,135,0)">len</span>(error_msgs) <span style="color: rgb(98,98,98)">&gt;</span> <span style="color: rgb(98,98,98)">0</span>:
+<span class="ansi-green-fg">-&gt; 1671</span>     <span class="ansi-bold" style="color: rgb(0,135,0)">raise</span> <span class="ansi-bold" style="color: rgb(215,95,95)">RuntimeError</span>(<span style="color: rgb(175,0,0)">&#39;</span><span style="color: rgb(175,0,0)">Error(s) in loading state_dict for </span><span class="ansi-bold" style="color: rgb(175,95,135)">{}</span><span style="color: rgb(175,0,0)">:</span><span class="ansi-bold" style="color: rgb(175,95,0)">\n</span><span class="ansi-bold" style="color: rgb(175,95,0)">\t</span><span class="ansi-bold" style="color: rgb(175,95,135)">{}</span><span style="color: rgb(175,0,0)">&#39;</span><span style="color: rgb(98,98,98)">.</span>format(
+<span class="ansi-green-intense-fg ansi-bold">   1672</span>                        <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span><span style="color: rgb(0,0,135)">__class__</span><span style="color: rgb(98,98,98)">.</span><span style="color: rgb(0,0,135)">__name__</span>, <span style="color: rgb(175,0,0)">&#34;</span><span class="ansi-bold" style="color: rgb(175,95,0)">\n</span><span class="ansi-bold" style="color: rgb(175,95,0)">\t</span><span style="color: rgb(175,0,0)">&#34;</span><span style="color: rgb(98,98,98)">.</span>join(error_msgs)))
+<span class="ansi-green-intense-fg ansi-bold">   1673</span> <span class="ansi-bold" style="color: rgb(0,135,0)">return</span> _IncompatibleKeys(missing_keys, unexpected_keys)
+
+<span class="ansi-red-fg">RuntimeError</span>: Error(s) in loading state_dict for QuantConv2d:
         Missing key(s) in state_dict: &#34;weight_quant.tensor_quant.scaling_impl.value&#34;.
 </pre></div></div>
 </div>
 <p>Ouch, we get an error. This is because <code class="docutils literal notranslate"><span class="pre">ParameterScaling</span></code> contains a learned <code class="docutils literal notranslate"><span class="pre">torch.nn.Parameter</span></code>, and Pytorch expects all learned parameters of a model to be contained in a <code class="docutils literal notranslate"><span class="pre">state_dict</span></code> that is being loaded. We can work around the issue by either setting the <code class="docutils literal notranslate"><span class="pre">IGNORE_MISSING_KEYS</span></code> config flag in Brevitas, or by passing <code class="docutils literal notranslate"><span class="pre">strict=False</span></code> to load_state_dict. We go with the former as setting <code class="docutils literal notranslate"><span class="pre">strict=False</span></code> is too forgiving to other kind of problems:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[23]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[22]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">brevitas</span> <span class="kn">import</span> <span class="n">config</span>
@@ -1124,7 +1114,7 @@ <h2>A Custom Quantizer initialized with Weight Statistics<a class="headerlink" h
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[23]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[22]:
 </pre></div>
 </div>
 <div class="output_area docutils container">
@@ -1135,7 +1125,7 @@ <h2>A Custom Quantizer initialized with Weight Statistics<a class="headerlink" h
 <p>Note that we could have also achieve the same goal by setting the <em>env variable</em> <code class="docutils literal notranslate"><span class="pre">BREVITAS_IGNORE_MISSING_KEYS=1</span></code>.</p>
 <p>And now if we take a look at the quantized weights again:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[25]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[23]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">param_from_max_quant_conv</span><span class="o">.</span><span class="n">quant_weight</span><span class="p">()</span>
@@ -1143,35 +1133,35 @@ <h2>A Custom Quantizer initialized with Weight Statistics<a class="headerlink" h
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[25]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[23]:
 </pre></div>
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-QuantTensor(value=tensor([[[[ 0.1897, -0.1897,  0.1897],
-          [-0.1897,  0.1897, -0.1897],
-          [-0.1897,  0.1897, -0.1897]],
+IntQuantTensor(value=tensor([[[[ 0.1924,  0.1924, -0.1924],
+          [ 0.1924,  0.1924,  0.1924],
+          [ 0.1924,  0.1924,  0.1924]],
 
-         [[-0.1897,  0.1897,  0.1897],
-          [ 0.1897, -0.1897, -0.1897],
-          [ 0.1897, -0.1897,  0.1897]],
+         [[-0.1924, -0.1924, -0.1924],
+          [ 0.1924,  0.1924, -0.1924],
+          [ 0.1924,  0.1924,  0.1924]],
 
-         [[-0.1897,  0.1897, -0.1897],
-          [-0.1897,  0.1897,  0.1897],
-          [-0.1897,  0.1897,  0.1897]]],
+         [[ 0.1924,  0.1924, -0.1924],
+          [-0.1924,  0.1924,  0.1924],
+          [-0.1924,  0.1924,  0.1924]]],
 
 
-        [[[ 0.1897,  0.1897,  0.1897],
-          [-0.1897,  0.1897, -0.1897],
-          [ 0.1897,  0.1897, -0.1897]],
+        [[[ 0.1924, -0.1924,  0.1924],
+          [-0.1924,  0.1924, -0.1924],
+          [ 0.1924,  0.1924,  0.1924]],
 
-         [[ 0.1897, -0.1897, -0.1897],
-          [ 0.1897,  0.1897, -0.1897],
-          [ 0.1897,  0.1897,  0.1897]],
+         [[-0.1924,  0.1924, -0.1924],
+          [ 0.1924, -0.1924, -0.1924],
+          [-0.1924,  0.1924,  0.1924]],
 
-         [[-0.1897,  0.1897, -0.1897],
-          [-0.1897,  0.1897, -0.1897],
-          [ 0.1897,  0.1897,  0.1897]]]], grad_fn=&lt;MulBackward0&gt;), scale=tensor(0.1897, grad_fn=&lt;AbsBinarySignGradFnBackward&gt;), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))
+         [[ 0.1924,  0.1924, -0.1924],
+          [-0.1924, -0.1924, -0.1924],
+          [ 0.1924, -0.1924, -0.1924]]]], grad_fn=&lt;MulBackward0&gt;), scale=tensor(0.1924, grad_fn=&lt;AbsBinarySignGradFnBackward&gt;), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))
 </pre></div></div>
 </div>
 <p>We see that, as expected, the scale factor has been updated to the new <code class="docutils literal notranslate"><span class="pre">weight.abs().max()</span></code>.</p>
@@ -1182,42 +1172,40 @@ <h2>Sharing a Quantizer<a class="headerlink" href="#Sharing-a-Quantizer" title="
 <p>There are two ways to share a quantizer between multiple layers, with importance differences.</p>
 <p>The first one, which we have seen so far, is to simply pass the same ExtendedInjector to multiple layers. What that does is sharing the same quantization strategy among different layers. Each layer still gets its own instance of the quantization implementation.</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[26]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[24]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">quant_conv1</span> <span class="o">=</span> <span class="n">QuantConv2d</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">),</span> <span class="n">weight_quant</span><span class="o">=</span><span class="n">MySignedBinaryWeightQuantizer</span><span class="p">)</span>
 <span class="n">quant_conv2</span> <span class="o">=</span> <span class="n">QuantConv2d</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">),</span> <span class="n">weight_quant</span><span class="o">=</span><span class="n">MySignedBinaryWeightQuantizer</span><span class="p">)</span>
 
-<span class="n">quant_conv1</span><span class="o">.</span><span class="n">weight_quant</span> <span class="ow">is</span> <span class="n">quant_conv2</span><span class="o">.</span><span class="n">weight_quant</span>
+<span class="n">assert_with_message</span><span class="p">(</span><span class="n">quant_conv1</span><span class="o">.</span><span class="n">weight_quant</span> <span class="ow">is</span> <span class="ow">not</span> <span class="n">quant_conv2</span><span class="o">.</span><span class="n">weight_quant</span><span class="p">)</span>
 </pre></div>
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[26]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-False
+True
 </pre></div></div>
 </div>
 <section id="Sharing-a-proxy">
 <h3>Sharing a proxy<a class="headerlink" href="#Sharing-a-proxy" title="Permalink to this heading">#</a></h3>
 <p>The second one, which we are introducing now, allows to share the same quantization instance among multiple layers. This is done by simply sharing the proxy wrapping it. This can be useful in those scenarios where, for example, we want different layers to share the same scale factor. The syntax goes as follows:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[26]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[25]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">quant_conv1</span> <span class="o">=</span> <span class="n">QuantConv2d</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">),</span> <span class="n">weight_quant</span><span class="o">=</span><span class="n">MySignedBinaryWeightQuantizer</span><span class="p">)</span>
 <span class="n">quant_conv2</span> <span class="o">=</span> <span class="n">QuantConv2d</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">),</span> <span class="n">weight_quant</span><span class="o">=</span><span class="n">quant_conv1</span><span class="o">.</span><span class="n">weight_quant</span><span class="p">)</span>
 
-<span class="k">assert</span> <span class="n">quant_conv1</span><span class="o">.</span><span class="n">weight_quant</span> <span class="ow">is</span> <span class="n">quant_conv2</span><span class="o">.</span><span class="n">weight_quant</span>
+<span class="n">assert_with_message</span><span class="p">(</span><span class="n">quant_conv1</span><span class="o">.</span><span class="n">weight_quant</span> <span class="ow">is</span> <span class="n">quant_conv2</span><span class="o">.</span><span class="n">weight_quant</span><span class="p">)</span>
 </pre></div>
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[26]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
@@ -1225,10 +1213,10 @@ <h3>Sharing a proxy<a class="headerlink" href="#Sharing-a-proxy" title="Permalin
 </pre></div></div>
 </div>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[27]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[26]:
 </pre></div>
 </div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="k">assert</span> <span class="p">(</span><span class="n">quant_conv1</span><span class="o">.</span><span class="n">quant_weight_scale</span><span class="p">()</span> <span class="o">==</span> <span class="n">quant_conv2</span><span class="o">.</span><span class="n">quant_weight_scale</span><span class="p">())</span><span class="o">.</span><span class="n">item</span><span class="p">()</span>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">assert_with_message</span><span class="p">((</span><span class="n">quant_conv1</span><span class="o">.</span><span class="n">weight_quant</span><span class="o">.</span><span class="n">scale</span><span class="p">()</span> <span class="o">==</span> <span class="n">quant_conv2</span><span class="o">.</span><span class="n">weight_quant</span><span class="o">.</span><span class="n">scale</span><span class="p">())</span><span class="o">.</span><span class="n">item</span><span class="p">())</span>
 </pre></div>
 </div>
 </div>
@@ -1237,17 +1225,12 @@ <h3>Sharing a proxy<a class="headerlink" href="#Sharing-a-proxy" title="Permalin
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
-<span class="ansi-red-fg">AssertionError</span>                            Traceback (most recent call last)
-<span class="ansi-green-fg">/tmp/ipykernel_58415/1066539094.py</span> in <span class="ansi-cyan-fg">&lt;module&gt;</span>
-<span class="ansi-green-fg">----&gt; 1</span><span class="ansi-red-fg"> </span><span class="ansi-green-fg">assert</span> <span class="ansi-green-fg">not</span> <span class="ansi-blue-fg">(</span>quant_conv1<span class="ansi-blue-fg">.</span>quant_weight_scale<span class="ansi-blue-fg">(</span><span class="ansi-blue-fg">)</span> <span class="ansi-blue-fg">==</span> quant_conv2<span class="ansi-blue-fg">.</span>quant_weight_scale<span class="ansi-blue-fg">(</span><span class="ansi-blue-fg">)</span><span class="ansi-blue-fg">)</span><span class="ansi-blue-fg">.</span>item<span class="ansi-blue-fg">(</span><span class="ansi-blue-fg">)</span>
-
-<span class="ansi-red-fg">AssertionError</span>:
+True
 </pre></div></div>
 </div>
 <p>What happens in background is that the weight quantizer now has access to both <code class="docutils literal notranslate"><span class="pre">quant_conv1</span></code> and <code class="docutils literal notranslate"><span class="pre">quant_conv2</span></code>. So let’s say we want to build a quantizer similar to <code class="docutils literal notranslate"><span class="pre">ParamFromMaxWeightQuantizer</span></code>, but in this case we want the scale factor to be initialized with the average of both weight tensors. When a quantizer has access to multiple parent modules, they are passed in at dependency injection time as a <em>tuple</em> under the same name <code class="docutils literal notranslate"><span class="pre">module</span></code> as before. So we can do the following:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[28]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[27]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">SharedParamFromMeanWeightQuantizer</span><span class="p">(</span><span class="n">MySignedBinaryWeightQuantizer</span><span class="p">):</span>
@@ -1260,31 +1243,38 @@ <h3>Sharing a proxy<a class="headerlink" href="#Sharing-a-proxy" title="Permalin
             <span class="k">return</span> <span class="n">module</span><span class="o">.</span><span class="n">weight</span><span class="o">.</span><span class="n">abs</span><span class="p">()</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span>
 
 <span class="n">quant_conv1</span> <span class="o">=</span> <span class="n">QuantConv2d</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">),</span> <span class="n">weight_quant</span><span class="o">=</span><span class="n">SharedParamFromMeanWeightQuantizer</span><span class="p">)</span>
-<span class="n">old_quant_conv1_scale</span> <span class="o">=</span> <span class="n">quant_conv1</span><span class="o">.</span><span class="n">quant_weight_scale</span><span class="p">()</span>
+<span class="n">old_quant_conv1_scale</span> <span class="o">=</span> <span class="n">quant_conv1</span><span class="o">.</span><span class="n">weight_quant</span><span class="o">.</span><span class="n">scale</span><span class="p">()</span>
 <span class="n">quant_conv2</span> <span class="o">=</span> <span class="n">QuantConv2d</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">),</span> <span class="n">weight_quant</span><span class="o">=</span><span class="n">quant_conv1</span><span class="o">.</span><span class="n">weight_quant</span><span class="p">)</span>
-<span class="n">new_quant_conv1_scale</span> <span class="o">=</span> <span class="n">quant_conv1</span><span class="o">.</span><span class="n">quant_weight_scale</span><span class="p">()</span>
+<span class="n">new_quant_conv1_scale</span> <span class="o">=</span> <span class="n">quant_conv1</span><span class="o">.</span><span class="n">weight_quant</span><span class="o">.</span><span class="n">scale</span><span class="p">()</span>
 
-<span class="k">assert</span> <span class="ow">not</span> <span class="p">(</span><span class="n">old_quant_conv1_scale</span> <span class="o">==</span> <span class="n">new_quant_conv1_scale</span><span class="p">)</span><span class="o">.</span><span class="n">item</span><span class="p">()</span>
+<span class="n">assert_with_message</span><span class="p">(</span><span class="ow">not</span> <span class="p">(</span><span class="n">old_quant_conv1_scale</span> <span class="o">==</span> <span class="n">new_quant_conv1_scale</span><span class="p">)</span><span class="o">.</span><span class="n">item</span><span class="p">())</span>
 </pre></div>
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[28]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-False
+True
 </pre></div></div>
 </div>
-<div class="nbinput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[31]:
+<div class="nbinput docutils container">
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[28]:
 </pre></div>
 </div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="k">assert</span> <span class="p">(</span><span class="n">new_quant_conv1_scale</span> <span class="o">==</span> <span class="n">quant_conv2</span><span class="o">.</span><span class="n">quant_weight_scale</span><span class="p">())</span><span class="o">.</span><span class="n">item</span><span class="p">()</span>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">assert_with_message</span><span class="p">((</span><span class="n">new_quant_conv1_scale</span> <span class="o">==</span> <span class="n">quant_conv2</span><span class="o">.</span><span class="n">weight_quant</span><span class="o">.</span><span class="n">scale</span><span class="p">())</span><span class="o">.</span><span class="n">item</span><span class="p">())</span>
 </pre></div>
 </div>
 </div>
+<div class="nboutput nblast docutils container">
+<div class="prompt empty docutils container">
+</div>
+<div class="output_area docutils container">
+<div class="highlight"><pre>
+True
+</pre></div></div>
+</div>
 <p>Note how, when <code class="docutils literal notranslate"><span class="pre">quant_conv2</span></code> is initialized using the <code class="docutils literal notranslate"><span class="pre">weight_quant</span></code> of <code class="docutils literal notranslate"><span class="pre">quant_conv1</span></code>, weight quantization is re-initialized for both layers such that they end up having the same scale.</p>
 <p>We can see in this example how Brevitas works consistently with Pytorch’s eager execution model. When we initialize <code class="docutils literal notranslate"><span class="pre">quant_conv1</span></code> we still don’t know that its weight quantizer is going to be shared with <code class="docutils literal notranslate"><span class="pre">quant_conv2</span></code>, and the semantics of Pytorch impose that <code class="docutils literal notranslate"><span class="pre">quant_conv1</span></code> should work correctly both before and after <code class="docutils literal notranslate"><span class="pre">quant_conv2</span></code> is declared. The way we take advantage of dependency injection allows to do so.</p>
 </section>
@@ -1302,28 +1292,44 @@ <h3>Sharing an instance of Activation Quantization<a class="headerlink" href="#S
 <h2>Dealing with Weight Initialization<a class="headerlink" href="#Dealing-with-Weight-Initialization" title="Permalink to this heading">#</a></h2>
 <p>There is a type of situation that Brevitas cannot deal with automatically. That is, when the initialization of the quantizer depends on the layer to which it is applied (like with the <code class="docutils literal notranslate"><span class="pre">ParamFromMaxWeightQuantizer</span></code> or <code class="docutils literal notranslate"><span class="pre">SharedParamFromMeanWeightQuantizer</span></code> quantizers), but the layer gets modified after it is initialized.</p>
 <p>The typical example is with weight initialization when training from scratch (so rather than loading from a floating-point state_dict):</p>
-<div class="nbinput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[32]:
+<div class="nbinput docutils container">
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[29]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">quant_conv_w_init</span> <span class="o">=</span> <span class="n">QuantConv2d</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">),</span> <span class="n">weight_quant</span><span class="o">=</span><span class="n">ParamFromMaxWeightQuantizer</span><span class="p">)</span>
 <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">uniform_</span><span class="p">(</span><span class="n">quant_conv_w_init</span><span class="o">.</span><span class="n">weight</span><span class="p">)</span>
 
-<span class="k">assert</span> <span class="ow">not</span> <span class="p">(</span><span class="n">quant_conv_w_init</span><span class="o">.</span><span class="n">weight</span><span class="o">.</span><span class="n">abs</span><span class="p">()</span><span class="o">.</span><span class="n">max</span><span class="p">()</span> <span class="o">==</span> <span class="n">quant_conv_w_init</span><span class="o">.</span><span class="n">quant_weight_scale</span><span class="p">())</span><span class="o">.</span><span class="n">item</span><span class="p">()</span>
+<span class="n">assert_with_message</span><span class="p">(</span><span class="ow">not</span> <span class="p">(</span><span class="n">quant_conv_w_init</span><span class="o">.</span><span class="n">weight</span><span class="o">.</span><span class="n">abs</span><span class="p">()</span><span class="o">.</span><span class="n">max</span><span class="p">()</span> <span class="o">==</span> <span class="n">quant_conv_w_init</span><span class="o">.</span><span class="n">weight_quant</span><span class="o">.</span><span class="n">scale</span><span class="p">())</span><span class="o">.</span><span class="n">item</span><span class="p">())</span>
 </pre></div>
 </div>
 </div>
+<div class="nboutput nblast docutils container">
+<div class="prompt empty docutils container">
+</div>
+<div class="output_area docutils container">
+<div class="highlight"><pre>
+True
+</pre></div></div>
+</div>
 <p>We can see how the scale factor is not initialized correctly anymore. In this case we can simply trigger re-initialization of the weight quantizer manually:</p>
-<div class="nbinput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[33]:
+<div class="nbinput docutils container">
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[30]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">quant_conv_w_init</span><span class="o">.</span><span class="n">weight_quant</span><span class="o">.</span><span class="n">init_tensor_quant</span><span class="p">()</span>
 
-<span class="k">assert</span> <span class="p">(</span><span class="n">quant_conv_w_init</span><span class="o">.</span><span class="n">weight</span><span class="o">.</span><span class="n">abs</span><span class="p">()</span><span class="o">.</span><span class="n">max</span><span class="p">()</span> <span class="o">==</span> <span class="n">quant_conv_w_init</span><span class="o">.</span><span class="n">quant_weight_scale</span><span class="p">())</span><span class="o">.</span><span class="n">item</span><span class="p">()</span>
+<span class="n">assert_with_message</span><span class="p">((</span><span class="n">quant_conv_w_init</span><span class="o">.</span><span class="n">weight</span><span class="o">.</span><span class="n">abs</span><span class="p">()</span><span class="o">.</span><span class="n">max</span><span class="p">()</span> <span class="o">==</span> <span class="n">quant_conv_w_init</span><span class="o">.</span><span class="n">weight_quant</span><span class="o">.</span><span class="n">scale</span><span class="p">())</span><span class="o">.</span><span class="n">item</span><span class="p">())</span>
 </pre></div>
 </div>
 </div>
+<div class="nboutput nblast docutils container">
+<div class="prompt empty docutils container">
+</div>
+<div class="output_area docutils container">
+<div class="highlight"><pre>
+True
+</pre></div></div>
+</div>
 <p><em>Note</em>: because the way weights are initialized is often the same as to how an optimizer performs the weight update step, there are currently no plans to try to perform re-initialization automatically (as it happens e.g. when a <code class="docutils literal notranslate"><span class="pre">state_dict</span></code> is loaded) since it wouldn’t be possible to distinguish between the two scenarios.</p>
 </section>
 <section id="Building-a-Custom-Quantization-API">
@@ -1333,7 +1339,7 @@ <h2>Building a Custom Quantization API<a class="headerlink" href="#Building-a-Cu
 idea then that the flags can be set through keyword arguments of the respective quantized layers.</p>
 <p>We can go as follows:</p>
 <div class="nbinput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[32]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[31]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">brevitas.core.quant</span> <span class="kn">import</span> <span class="n">ClampedBinaryQuant</span>
@@ -1391,7 +1397,7 @@ <h2>Building a Custom Quantization API<a class="headerlink" href="#Building-a-Cu
 <p>The second one is the special object <code class="docutils literal notranslate"><span class="pre">this</span></code>. <code class="docutils literal notranslate"><span class="pre">this</span></code> is already present in the <em>dependencies</em> library, and it’s used as a way to retrieve attributes of the quantizer from within the quantizer itself. However, normally it wouldn’t be possible to return a reference to <code class="docutils literal notranslate"><span class="pre">this</span></code> from a <code class="docutils literal notranslate"><span class="pre">&#64;value</span></code> function. Again this is something that only a <code class="docutils literal notranslate"><span class="pre">ExtendedInjector</span></code> supports, and it allows to chain different attributes in a way such that the chained values are computed only when necessary.</p>
 <p>Let’s see the quantizers applied to a layer:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[35]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[32]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">per_channel_quant_conv</span> <span class="o">=</span> <span class="n">QuantConv2d</span><span class="p">(</span>
@@ -1404,43 +1410,43 @@ <h2>Building a Custom Quantization API<a class="headerlink" href="#Building-a-Cu
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[35]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[32]:
 </pre></div>
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-QuantTensor(value=tensor([[[[-0.1842,  0.1842, -0.1842],
-          [-0.1842, -0.1842,  0.1842],
-          [-0.1842, -0.1842,  0.1842]],
+IntQuantTensor(value=tensor([[[[ 0.1612, -0.1612, -0.1612],
+          [-0.1612, -0.1612, -0.1612],
+          [ 0.1612,  0.1612,  0.1612]],
 
-         [[-0.1842, -0.1842,  0.1842],
-          [ 0.1842, -0.1842,  0.1842],
-          [ 0.1842,  0.1842, -0.1842]],
+         [[-0.1612,  0.1612, -0.1612],
+          [-0.1612,  0.1612,  0.1612],
+          [-0.1612, -0.1612,  0.1612]],
 
-         [[-0.1842, -0.1842,  0.1842],
-          [ 0.1842,  0.1842,  0.1842],
-          [-0.1842,  0.1842, -0.1842]]],
+         [[-0.1612,  0.1612,  0.1612],
+          [ 0.1612,  0.1612, -0.1612],
+          [ 0.1612,  0.1612,  0.1612]]],
 
 
-        [[[ 0.1838,  0.1838,  0.1838],
-          [-0.1838, -0.1838, -0.1838],
-          [ 0.1838,  0.1838, -0.1838]],
+        [[[ 0.1924,  0.1924,  0.1924],
+          [-0.1924, -0.1924,  0.1924],
+          [-0.1924,  0.1924, -0.1924]],
 
-         [[ 0.1838, -0.1838,  0.1838],
-          [ 0.1838,  0.1838,  0.1838],
-          [-0.1838,  0.1838, -0.1838]],
+         [[ 0.1924, -0.1924,  0.1924],
+          [ 0.1924,  0.1924, -0.1924],
+          [ 0.1924, -0.1924, -0.1924]],
 
-         [[-0.1838,  0.1838, -0.1838],
-          [ 0.1838, -0.1838, -0.1838],
-          [ 0.1838, -0.1838,  0.1838]]]], grad_fn=&lt;MulBackward0&gt;), scale=tensor([[[[0.1842]]],
+         [[-0.1924, -0.1924,  0.1924],
+          [ 0.1924, -0.1924, -0.1924],
+          [ 0.1924, -0.1924,  0.1924]]]], grad_fn=&lt;MulBackward0&gt;), scale=tensor([[[[0.1612]]],
 
 
-        [[[0.1838]]]], grad_fn=&lt;AbsBinarySignGradFnBackward&gt;), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))
+        [[[0.1924]]]], grad_fn=&lt;AbsBinarySignGradFnBackward&gt;), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))
 </pre></div></div>
 </div>
 <p>As expected the weight scale is now a vector. Everything we said so far about quantizers still applies, so for example we can load the floating-point state dict we defined before and observe how it triggers an update of the weight scale:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[36]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[33]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">per_channel_quant_conv</span><span class="o">.</span><span class="n">load_state_dict</span><span class="p">(</span><span class="n">float_conv</span><span class="o">.</span><span class="n">state_dict</span><span class="p">())</span>
@@ -1449,44 +1455,44 @@ <h2>Building a Custom Quantization API<a class="headerlink" href="#Building-a-Cu
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[36]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[33]:
 </pre></div>
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-QuantTensor(value=tensor([[[[ 0.1875, -0.1875,  0.1875],
-          [-0.1875,  0.1875, -0.1875],
-          [-0.1875,  0.1875, -0.1875]],
+IntQuantTensor(value=tensor([[[[ 0.1924,  0.1924, -0.1924],
+          [ 0.1924,  0.1924,  0.1924],
+          [ 0.1924,  0.1924,  0.1924]],
 
-         [[-0.1875,  0.1875,  0.1875],
-          [ 0.1875, -0.1875, -0.1875],
-          [ 0.1875, -0.1875,  0.1875]],
+         [[-0.1924, -0.1924, -0.1924],
+          [ 0.1924,  0.1924, -0.1924],
+          [ 0.1924,  0.1924,  0.1924]],
 
-         [[-0.1875,  0.1875, -0.1875],
-          [-0.1875,  0.1875,  0.1875],
-          [-0.1875,  0.1875,  0.1875]]],
+         [[ 0.1924,  0.1924, -0.1924],
+          [-0.1924,  0.1924,  0.1924],
+          [-0.1924,  0.1924,  0.1924]]],
 
 
-        [[[ 0.1897,  0.1897,  0.1897],
-          [-0.1897,  0.1897, -0.1897],
-          [ 0.1897,  0.1897, -0.1897]],
+        [[[ 0.1899, -0.1899,  0.1899],
+          [-0.1899,  0.1899, -0.1899],
+          [ 0.1899,  0.1899,  0.1899]],
 
-         [[ 0.1897, -0.1897, -0.1897],
-          [ 0.1897,  0.1897, -0.1897],
-          [ 0.1897,  0.1897,  0.1897]],
+         [[-0.1899,  0.1899, -0.1899],
+          [ 0.1899, -0.1899, -0.1899],
+          [-0.1899,  0.1899,  0.1899]],
 
-         [[-0.1897,  0.1897, -0.1897],
-          [-0.1897,  0.1897, -0.1897],
-          [ 0.1897,  0.1897,  0.1897]]]], grad_fn=&lt;MulBackward0&gt;), scale=tensor([[[[0.1875]]],
+         [[ 0.1899,  0.1899, -0.1899],
+          [-0.1899, -0.1899, -0.1899],
+          [ 0.1899, -0.1899, -0.1899]]]], grad_fn=&lt;MulBackward0&gt;), scale=tensor([[[[0.1924]]],
 
 
-        [[[0.1897]]]], grad_fn=&lt;AbsBinarySignGradFnBackward&gt;), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))
+        [[[0.1899]]]], grad_fn=&lt;AbsBinarySignGradFnBackward&gt;), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))
 </pre></div></div>
 </div>
 <p>In this case we have a per-channel quantizer, so the original floating-point weight tensor is now quantized per channel.</p>
 <p>Similarly, we can apply our custom activation quantizer to e.g. a <code class="docutils literal notranslate"><span class="pre">QuantIdentity</span></code> layer:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[37]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[34]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">brevitas.nn</span> <span class="kn">import</span> <span class="n">QuantIdentity</span>
@@ -1498,20 +1504,20 @@ <h2>Building a Custom Quantization API<a class="headerlink" href="#Building-a-Cu
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[37]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[34]:
 </pre></div>
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-tensor([[-0.0100, -0.0100,  0.0100, -0.0100],
-        [-0.0100, -0.0100, -0.0100,  0.0100],
-        [-0.0100,  0.0100,  0.0100,  0.0100],
-        [-0.0100,  0.0100,  0.0100,  0.0100]], grad_fn=&lt;MulBackward0&gt;)
+tensor([[ 0.0100,  0.0100, -0.0100,  0.0100],
+        [ 0.0100,  0.0100, -0.0100,  0.0100],
+        [-0.0100,  0.0100, -0.0100, -0.0100],
+        [ 0.0100, -0.0100, -0.0100, -0.0100]], grad_fn=&lt;MulBackward0&gt;)
 </pre></div></div>
 </div>
 <p>Note how <code class="docutils literal notranslate"><span class="pre">AdvancedActQuantizer</span></code> doesn’t define a <code class="docutils literal notranslate"><span class="pre">per_channel_broadcastable_shape</span></code>, yet no errors are triggered. This is because <code class="docutils literal notranslate"><span class="pre">this.per_channel_broadcastable_shape</span></code> is required only when <code class="docutils literal notranslate"><span class="pre">scaling_per_output_channel</span></code> is <code class="docutils literal notranslate"><span class="pre">True</span></code>, while in this case <code class="docutils literal notranslate"><span class="pre">scaling_per_output_channel</span></code> is <code class="docutils literal notranslate"><span class="pre">False</span></code>. Let’ try to set it to <code class="docutils literal notranslate"><span class="pre">True</span></code> then:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[36]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[35]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">brevitas.nn</span> <span class="kn">import</span> <span class="n">QuantIdentity</span>
@@ -1526,85 +1532,116 @@ <h2>Building a Custom Quantization API<a class="headerlink" href="#Building-a-Cu
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-<span class="ansi-red-intense-fg ansi-bold">---------------------------------------------------------------------------</span>
-<span class="ansi-red-intense-fg ansi-bold">DependencyError</span>                           Traceback (most recent call last)
-<span class="ansi-green-intense-fg ansi-bold">&lt;ipython-input-36-b3479e90d1a9&gt;</span> in <span class="ansi-cyan-fg">&lt;module&gt;</span>
-<span class="ansi-green-fg">      2</span>
-<span class="ansi-green-fg">      3</span> quant_identity = QuantIdentity(
-<span class="ansi-green-intense-fg ansi-bold">----&gt; 4</span><span class="ansi-yellow-intense-fg ansi-bold">     act_quant=AdvancedActQuantizer, is_clamped=True, scaling_per_output_channel=True)
-</span>
-<span class="ansi-green-intense-fg ansi-bold">c:\brevitas_fx\src\brevitas\nn\quant_activation.py</span> in <span class="ansi-cyan-fg">__init__</span><span class="ansi-blue-intense-fg ansi-bold">(self, act_quant, return_quant_tensor, **kwargs)</span>
-<span class="ansi-green-fg">    134</span>             act_quant<span class="ansi-yellow-intense-fg ansi-bold">=</span>act_quant<span class="ansi-yellow-intense-fg ansi-bold">,</span>
-<span class="ansi-green-fg">    135</span>             return_quant_tensor<span class="ansi-yellow-intense-fg ansi-bold">=</span>return_quant_tensor<span class="ansi-yellow-intense-fg ansi-bold">,</span>
-<span class="ansi-green-intense-fg ansi-bold">--&gt; 136</span><span class="ansi-yellow-intense-fg ansi-bold">             **kwargs)
-</span><span class="ansi-green-fg">    137</span>
-
-<span class="ansi-green-intense-fg ansi-bold">c:\brevitas_fx\src\brevitas\nn\quant_layer.py</span> in <span class="ansi-cyan-fg">__init__</span><span class="ansi-blue-intense-fg ansi-bold">(self, act_impl, passthrough_act, input_quant, act_quant, return_quant_tensor, **kwargs)</span>
-<span class="ansi-green-fg">     77</span>             passthrough_act<span class="ansi-yellow-intense-fg ansi-bold">,</span>
-<span class="ansi-green-fg">     78</span>             act_quant<span class="ansi-yellow-intense-fg ansi-bold">,</span>
-<span class="ansi-green-intense-fg ansi-bold">---&gt; 79</span><span class="ansi-yellow-intense-fg ansi-bold">             **kwargs)
-</span><span class="ansi-green-fg">     80</span>
-<span class="ansi-green-fg">     81</span>     <span class="ansi-yellow-intense-fg ansi-bold">@</span>property
-
-<span class="ansi-green-intense-fg ansi-bold">c:\brevitas_fx\src\brevitas\nn\mixin\act.py</span> in <span class="ansi-cyan-fg">__init__</span><span class="ansi-blue-intense-fg ansi-bold">(self, act_impl, passthrough_act, act_quant, **kwargs)</span>
-<span class="ansi-green-fg">    157</span>             proxy_prefix<span class="ansi-yellow-intense-fg ansi-bold">=</span><span class="ansi-blue-intense-fg ansi-bold">&#39;act_&#39;</span><span class="ansi-yellow-intense-fg ansi-bold">,</span>
-<span class="ansi-green-fg">    158</span>             kwargs_prefix<span class="ansi-yellow-intense-fg ansi-bold">=</span><span class="ansi-blue-intense-fg ansi-bold">&#39;&#39;</span><span class="ansi-yellow-intense-fg ansi-bold">,</span>
-<span class="ansi-green-intense-fg ansi-bold">--&gt; 159</span><span class="ansi-yellow-intense-fg ansi-bold">             **kwargs)
-</span><span class="ansi-green-fg">    160</span>
-<span class="ansi-green-fg">    161</span>     <span class="ansi-yellow-intense-fg ansi-bold">@</span>property
-
-<span class="ansi-green-intense-fg ansi-bold">c:\brevitas_fx\src\brevitas\nn\mixin\base.py</span> in <span class="ansi-cyan-fg">__init__</span><span class="ansi-blue-intense-fg ansi-bold">(self, quant, proxy_protocol, none_quant_injector, proxy_prefix, kwargs_prefix, **kwargs)</span>
-<span class="ansi-green-fg">     98</span>             quant_injector <span class="ansi-yellow-intense-fg ansi-bold">=</span> quant
-<span class="ansi-green-fg">     99</span>             quant_injector <span class="ansi-yellow-intense-fg ansi-bold">=</span> quant_injector<span class="ansi-yellow-intense-fg ansi-bold">.</span>let<span class="ansi-yellow-intense-fg ansi-bold">(</span><span class="ansi-yellow-intense-fg ansi-bold">**</span>filter_kwargs<span class="ansi-yellow-intense-fg ansi-bold">(</span>kwargs_prefix<span class="ansi-yellow-intense-fg ansi-bold">,</span> kwargs<span class="ansi-yellow-intense-fg ansi-bold">)</span><span class="ansi-yellow-intense-fg ansi-bold">)</span>
-<span class="ansi-green-intense-fg ansi-bold">--&gt; 100</span><span class="ansi-yellow-intense-fg ansi-bold">             </span>quant <span class="ansi-yellow-intense-fg ansi-bold">=</span> quant_injector<span class="ansi-yellow-intense-fg ansi-bold">.</span>proxy_class<span class="ansi-yellow-intense-fg ansi-bold">(</span>self<span class="ansi-yellow-intense-fg ansi-bold">,</span> quant_injector<span class="ansi-yellow-intense-fg ansi-bold">)</span>
-<span class="ansi-green-fg">    101</span>         <span class="ansi-green-intense-fg ansi-bold">else</span><span class="ansi-yellow-intense-fg ansi-bold">:</span>
-<span class="ansi-green-fg">    102</span>             <span class="ansi-green-intense-fg ansi-bold">if</span> <span class="ansi-green-intense-fg ansi-bold">not</span> isinstance<span class="ansi-yellow-intense-fg ansi-bold">(</span>quant<span class="ansi-yellow-intense-fg ansi-bold">,</span> proxy_protocol<span class="ansi-yellow-intense-fg ansi-bold">)</span><span class="ansi-yellow-intense-fg ansi-bold">:</span>
-
-<span class="ansi-green-intense-fg ansi-bold">c:\brevitas_fx\src\brevitas\proxy\runtime_quant.py</span> in <span class="ansi-cyan-fg">__init__</span><span class="ansi-blue-intense-fg ansi-bold">(self, quant_layer, quant_injector)</span>
-<span class="ansi-green-fg">    108</span>
-<span class="ansi-green-fg">    109</span>     <span class="ansi-green-intense-fg ansi-bold">def</span> __init__<span class="ansi-yellow-intense-fg ansi-bold">(</span>self<span class="ansi-yellow-intense-fg ansi-bold">,</span> quant_layer<span class="ansi-yellow-intense-fg ansi-bold">,</span> quant_injector<span class="ansi-yellow-intense-fg ansi-bold">)</span><span class="ansi-yellow-intense-fg ansi-bold">:</span>
-<span class="ansi-green-intense-fg ansi-bold">--&gt; 110</span><span class="ansi-yellow-intense-fg ansi-bold">         </span>super<span class="ansi-yellow-intense-fg ansi-bold">(</span>ActQuantProxyFromInjector<span class="ansi-yellow-intense-fg ansi-bold">,</span> self<span class="ansi-yellow-intense-fg ansi-bold">)</span><span class="ansi-yellow-intense-fg ansi-bold">.</span>__init__<span class="ansi-yellow-intense-fg ansi-bold">(</span>quant_layer<span class="ansi-yellow-intense-fg ansi-bold">,</span> quant_injector<span class="ansi-yellow-intense-fg ansi-bold">)</span>
-<span class="ansi-green-fg">    111</span>         self<span class="ansi-yellow-intense-fg ansi-bold">.</span>is_passthrough_act <span class="ansi-yellow-intense-fg ansi-bold">=</span> _is_passthrough_act<span class="ansi-yellow-intense-fg ansi-bold">(</span>quant_injector<span class="ansi-yellow-intense-fg ansi-bold">)</span>
-<span class="ansi-green-fg">    112</span>
-
-<span class="ansi-green-intense-fg ansi-bold">c:\brevitas_fx\src\brevitas\proxy\quant_proxy.py</span> in <span class="ansi-cyan-fg">__init__</span><span class="ansi-blue-intense-fg ansi-bold">(self, quant_layer, quant_injector, export_mode, export_handler)</span>
-<span class="ansi-green-fg">     74</span>         <span class="ansi-red-intense-fg ansi-bold"># Use a normal list and not a ModuleList since this is a pointer to parent modules</span>
-<span class="ansi-green-fg">     75</span>         self<span class="ansi-yellow-intense-fg ansi-bold">.</span>tracked_module_list <span class="ansi-yellow-intense-fg ansi-bold">=</span> <span class="ansi-yellow-intense-fg ansi-bold">[</span><span class="ansi-yellow-intense-fg ansi-bold">]</span>
-<span class="ansi-green-intense-fg ansi-bold">---&gt; 76</span><span class="ansi-yellow-intense-fg ansi-bold">         </span>self<span class="ansi-yellow-intense-fg ansi-bold">.</span>add_tracked_module<span class="ansi-yellow-intense-fg ansi-bold">(</span>quant_layer<span class="ansi-yellow-intense-fg ansi-bold">)</span>
-<span class="ansi-green-fg">     77</span>         self<span class="ansi-yellow-intense-fg ansi-bold">.</span>export_handler <span class="ansi-yellow-intense-fg ansi-bold">=</span> export_handler
-<span class="ansi-green-fg">     78</span>         self<span class="ansi-yellow-intense-fg ansi-bold">.</span>export_mode <span class="ansi-yellow-intense-fg ansi-bold">=</span> export_mode
-
-<span class="ansi-green-intense-fg ansi-bold">c:\brevitas_fx\src\brevitas\proxy\quant_proxy.py</span> in <span class="ansi-cyan-fg">add_tracked_module</span><span class="ansi-blue-intense-fg ansi-bold">(self, module)</span>
-<span class="ansi-green-fg">    130</span>             self<span class="ansi-yellow-intense-fg ansi-bold">.</span>tracked_module_list<span class="ansi-yellow-intense-fg ansi-bold">.</span>append<span class="ansi-yellow-intense-fg ansi-bold">(</span>module<span class="ansi-yellow-intense-fg ansi-bold">)</span>
-<span class="ansi-green-fg">    131</span>             self<span class="ansi-yellow-intense-fg ansi-bold">.</span>update_tracked_modules<span class="ansi-yellow-intense-fg ansi-bold">(</span><span class="ansi-yellow-intense-fg ansi-bold">)</span>
-<span class="ansi-green-intense-fg ansi-bold">--&gt; 132</span><span class="ansi-yellow-intense-fg ansi-bold">             </span>self<span class="ansi-yellow-intense-fg ansi-bold">.</span>init_tensor_quant<span class="ansi-yellow-intense-fg ansi-bold">(</span><span class="ansi-yellow-intense-fg ansi-bold">)</span>
-<span class="ansi-green-fg">    133</span>         <span class="ansi-green-intense-fg ansi-bold">else</span><span class="ansi-yellow-intense-fg ansi-bold">:</span>
-<span class="ansi-green-fg">    134</span>             <span class="ansi-green-intense-fg ansi-bold">raise</span> RuntimeError<span class="ansi-yellow-intense-fg ansi-bold">(</span><span class="ansi-blue-intense-fg ansi-bold">&#34;Trying to add None as a parent module.&#34;</span><span class="ansi-yellow-intense-fg ansi-bold">)</span>
-
-<span class="ansi-green-intense-fg ansi-bold">c:\brevitas_fx\src\brevitas\proxy\runtime_quant.py</span> in <span class="ansi-cyan-fg">init_tensor_quant</span><span class="ansi-blue-intense-fg ansi-bold">(self)</span>
-<span class="ansi-green-fg">    120</span>
-<span class="ansi-green-fg">    121</span>     <span class="ansi-green-intense-fg ansi-bold">def</span> init_tensor_quant<span class="ansi-yellow-intense-fg ansi-bold">(</span>self<span class="ansi-yellow-intense-fg ansi-bold">)</span><span class="ansi-yellow-intense-fg ansi-bold">:</span>
-<span class="ansi-green-intense-fg ansi-bold">--&gt; 122</span><span class="ansi-yellow-intense-fg ansi-bold">         </span>tensor_quant <span class="ansi-yellow-intense-fg ansi-bold">=</span> self<span class="ansi-yellow-intense-fg ansi-bold">.</span>quant_injector<span class="ansi-yellow-intense-fg ansi-bold">.</span>tensor_quant
-<span class="ansi-green-fg">    123</span>         act_impl <span class="ansi-yellow-intense-fg ansi-bold">=</span> self<span class="ansi-yellow-intense-fg ansi-bold">.</span>quant_injector<span class="ansi-yellow-intense-fg ansi-bold">.</span>act_impl
-<span class="ansi-green-fg">    124</span>         is_act_enabled <span class="ansi-yellow-intense-fg ansi-bold">=</span> _is_act_enabled<span class="ansi-yellow-intense-fg ansi-bold">(</span>act_impl<span class="ansi-yellow-intense-fg ansi-bold">,</span> tensor_quant<span class="ansi-yellow-intense-fg ansi-bold">)</span>
-
-    <span class="ansi-red-intense-fg ansi-bold">[... skipping hidden 1 frame]</span>
-
-<span class="ansi-green-intense-fg ansi-bold">C:\ProgramData\Miniconda3\envs\pytorch\lib\site-packages\_dependencies\this.py</span> in <span class="ansi-cyan-fg">__call__</span><span class="ansi-blue-intense-fg ansi-bold">(self, __self__)</span>
-<span class="ansi-green-fg">     49</span>             <span class="ansi-green-intense-fg ansi-bold">if</span> kind <span class="ansi-yellow-intense-fg ansi-bold">==</span> <span class="ansi-blue-intense-fg ansi-bold">&#34;.&#34;</span><span class="ansi-yellow-intense-fg ansi-bold">:</span>
-<span class="ansi-green-fg">     50</span>                 <span class="ansi-green-intense-fg ansi-bold">try</span><span class="ansi-yellow-intense-fg ansi-bold">:</span>
-<span class="ansi-green-intense-fg ansi-bold">---&gt; 51</span><span class="ansi-yellow-intense-fg ansi-bold">                     </span>result <span class="ansi-yellow-intense-fg ansi-bold">=</span> getattr<span class="ansi-yellow-intense-fg ansi-bold">(</span>result<span class="ansi-yellow-intense-fg ansi-bold">,</span> symbol<span class="ansi-yellow-intense-fg ansi-bold">)</span>
-<span class="ansi-green-fg">     52</span>                 <span class="ansi-green-intense-fg ansi-bold">except</span> DependencyError<span class="ansi-yellow-intense-fg ansi-bold">:</span>
-<span class="ansi-green-fg">     53</span>                     message = (
-
-    <span class="ansi-red-intense-fg ansi-bold">[... skipping hidden 1 frame]</span>
-
-<span class="ansi-red-intense-fg ansi-bold">DependencyError</span>: &#39;AdvancedActQuantizer&#39; can not resolve attribute &#39;per_channel_broadcastable_shape&#39;
+<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
+<span class="ansi-red-fg">DependencyError</span>                           Traceback (most recent call last)
+Cell <span class="ansi-green-fg">In[35], line 3</span>
+<span class="ansi-green-intense-fg ansi-bold">      1</span> <span class="ansi-bold" style="color: rgb(0,135,0)">from</span> <span class="ansi-bold" style="color: rgb(0,0,255)">brevitas</span><span class="ansi-bold" style="color: rgb(0,0,255)">.</span><span class="ansi-bold" style="color: rgb(0,0,255)">nn</span> <span class="ansi-bold" style="color: rgb(0,135,0)">import</span> QuantIdentity
+<span class="ansi-green-fg">----&gt; 3</span> quant_identity <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg">QuantIdentity</span><span class="ansi-yellow-bg">(</span>
+<span class="ansi-green-intense-fg ansi-bold">      4</span> <span class="ansi-yellow-bg">    </span><span class="ansi-yellow-bg">act_quant</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">AdvancedActQuantizer</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">is_clamped</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg ansi-bold" style="color: rgb(0,135,0)">True</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">scaling_per_output_channel</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg ansi-bold" style="color: rgb(0,135,0)">True</span><span class="ansi-yellow-bg">)</span>
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/quant_activation.py:113</span>, in <span class="ansi-cyan-fg">QuantIdentity.__init__</span><span class="ansi-blue-fg">(self, act_quant, return_quant_tensor, **kwargs)</span>
+<span class="ansi-green-intense-fg ansi-bold">    108</span> <span class="ansi-bold" style="color: rgb(0,135,0)">def</span> <span style="color: rgb(0,0,255)">__init__</span>(
+<span class="ansi-green-intense-fg ansi-bold">    109</span>         <span style="color: rgb(0,135,0)">self</span>,
+<span class="ansi-green-intense-fg ansi-bold">    110</span>         act_quant: Optional[ActQuantType] <span style="color: rgb(98,98,98)">=</span> Int8ActPerTensorFloat,
+<span class="ansi-green-intense-fg ansi-bold">    111</span>         return_quant_tensor: <span style="color: rgb(0,135,0)">bool</span> <span style="color: rgb(98,98,98)">=</span> <span class="ansi-bold" style="color: rgb(0,135,0)">False</span>,
+<span class="ansi-green-intense-fg ansi-bold">    112</span>         <span style="color: rgb(98,98,98)">*</span><span style="color: rgb(98,98,98)">*</span>kwargs):
+<span class="ansi-green-fg">--&gt; 113</span>     <span class="ansi-yellow-bg">QuantNLAL</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg" style="color: rgb(0,0,255)">__init__</span><span class="ansi-yellow-bg">(</span>
+<span class="ansi-green-intense-fg ansi-bold">    114</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg" style="color: rgb(0,135,0)">self</span><span class="ansi-yellow-bg">,</span>
+<span class="ansi-green-intense-fg ansi-bold">    115</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg">input_quant</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg ansi-bold" style="color: rgb(0,135,0)">None</span><span class="ansi-yellow-bg">,</span>
+<span class="ansi-green-intense-fg ansi-bold">    116</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg">act_impl</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg ansi-bold" style="color: rgb(0,135,0)">None</span><span class="ansi-yellow-bg">,</span>
+<span class="ansi-green-intense-fg ansi-bold">    117</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg">passthrough_act</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg ansi-bold" style="color: rgb(0,135,0)">True</span><span class="ansi-yellow-bg">,</span>
+<span class="ansi-green-intense-fg ansi-bold">    118</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg">act_quant</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">act_quant</span><span class="ansi-yellow-bg">,</span>
+<span class="ansi-green-intense-fg ansi-bold">    119</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg">return_quant_tensor</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">return_quant_tensor</span><span class="ansi-yellow-bg">,</span>
+<span class="ansi-green-intense-fg ansi-bold">    120</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg">kwargs</span><span class="ansi-yellow-bg">)</span>
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/quant_layer.py:34</span>, in <span class="ansi-cyan-fg">QuantNonLinearActLayer.__init__</span><span class="ansi-blue-fg">(self, act_impl, passthrough_act, input_quant, act_quant, return_quant_tensor, **kwargs)</span>
+<span class="ansi-green-intense-fg ansi-bold">     32</span> QuantLayerMixin<span style="color: rgb(98,98,98)">.</span><span style="color: rgb(0,0,255)">__init__</span>(<span style="color: rgb(0,135,0)">self</span>, return_quant_tensor)
+<span class="ansi-green-intense-fg ansi-bold">     33</span> QuantInputMixin<span style="color: rgb(98,98,98)">.</span><span style="color: rgb(0,0,255)">__init__</span>(<span style="color: rgb(0,135,0)">self</span>, input_quant, <span style="color: rgb(98,98,98)">*</span><span style="color: rgb(98,98,98)">*</span>kwargs)
+<span class="ansi-green-fg">---&gt; 34</span> <span class="ansi-yellow-bg">QuantNonLinearActMixin</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg" style="color: rgb(0,0,255)">__init__</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg" style="color: rgb(0,135,0)">self</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">act_impl</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">passthrough_act</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">act_quant</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg">kwargs</span><span class="ansi-yellow-bg">)</span>
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/mixin/act.py:66</span>, in <span class="ansi-cyan-fg">QuantNonLinearActMixin.__init__</span><span class="ansi-blue-fg">(self, act_impl, passthrough_act, act_quant, act_proxy_prefix, act_kwargs_prefix, **kwargs)</span>
+<span class="ansi-green-intense-fg ansi-bold">     55</span> <span class="ansi-bold" style="color: rgb(0,135,0)">def</span> <span style="color: rgb(0,0,255)">__init__</span>(
+<span class="ansi-green-intense-fg ansi-bold">     56</span>         <span style="color: rgb(0,135,0)">self</span>,
+<span class="ansi-green-intense-fg ansi-bold">     57</span>         act_impl: Optional[Type[Module]],
+<span class="ansi-green-fg">   (...)</span>
+<span class="ansi-green-intense-fg ansi-bold">     61</span>         act_kwargs_prefix<span style="color: rgb(98,98,98)">=</span><span style="color: rgb(175,0,0)">&#39;</span><span style="color: rgb(175,0,0)">&#39;</span>,
+<span class="ansi-green-intense-fg ansi-bold">     62</span>         <span style="color: rgb(98,98,98)">*</span><span style="color: rgb(98,98,98)">*</span>kwargs):
+<span class="ansi-green-intense-fg ansi-bold">     63</span>     prefixed_kwargs <span style="color: rgb(98,98,98)">=</span> {
+<span class="ansi-green-intense-fg ansi-bold">     64</span>         act_kwargs_prefix <span style="color: rgb(98,98,98)">+</span> <span style="color: rgb(175,0,0)">&#39;</span><span style="color: rgb(175,0,0)">act_impl</span><span style="color: rgb(175,0,0)">&#39;</span>: act_impl,
+<span class="ansi-green-intense-fg ansi-bold">     65</span>         act_kwargs_prefix <span style="color: rgb(98,98,98)">+</span> <span style="color: rgb(175,0,0)">&#39;</span><span style="color: rgb(175,0,0)">passthrough_act</span><span style="color: rgb(175,0,0)">&#39;</span>: passthrough_act}
+<span class="ansi-green-fg">---&gt; 66</span>     <span class="ansi-yellow-bg">QuantProxyMixin</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg" style="color: rgb(0,0,255)">__init__</span><span class="ansi-yellow-bg">(</span>
+<span class="ansi-green-intense-fg ansi-bold">     67</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg" style="color: rgb(0,135,0)">self</span><span class="ansi-yellow-bg">,</span>
+<span class="ansi-green-intense-fg ansi-bold">     68</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg">quant</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">act_quant</span><span class="ansi-yellow-bg">,</span>
+<span class="ansi-green-intense-fg ansi-bold">     69</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg">proxy_prefix</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">act_proxy_prefix</span><span class="ansi-yellow-bg">,</span>
+<span class="ansi-green-intense-fg ansi-bold">     70</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg">kwargs_prefix</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">act_kwargs_prefix</span><span class="ansi-yellow-bg">,</span>
+<span class="ansi-green-intense-fg ansi-bold">     71</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg">proxy_protocol</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">ActQuantProxyProtocol</span><span class="ansi-yellow-bg">,</span>
+<span class="ansi-green-intense-fg ansi-bold">     72</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg">none_quant_injector</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">NoneActQuant</span><span class="ansi-yellow-bg">,</span>
+<span class="ansi-green-intense-fg ansi-bold">     73</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg">prefixed_kwargs</span><span class="ansi-yellow-bg">,</span>
+<span class="ansi-green-intense-fg ansi-bold">     74</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg">kwargs</span><span class="ansi-yellow-bg">)</span>
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/mixin/base.py:48</span>, in <span class="ansi-cyan-fg">QuantProxyMixin.__init__</span><span class="ansi-blue-fg">(self, quant, proxy_protocol, none_quant_injector, proxy_prefix, kwargs_prefix, **kwargs)</span>
+<span class="ansi-green-intense-fg ansi-bold">     46</span>     quant_injector <span style="color: rgb(98,98,98)">=</span> quant
+<span class="ansi-green-intense-fg ansi-bold">     47</span>     quant_injector <span style="color: rgb(98,98,98)">=</span> quant_injector<span style="color: rgb(98,98,98)">.</span>let(<span style="color: rgb(98,98,98)">*</span><span style="color: rgb(98,98,98)">*</span>filter_kwargs(kwargs_prefix, kwargs))
+<span class="ansi-green-fg">---&gt; 48</span>     quant <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg">quant_injector</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">proxy_class</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg" style="color: rgb(0,135,0)">self</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">quant_injector</span><span class="ansi-yellow-bg">)</span>
+<span class="ansi-green-intense-fg ansi-bold">     49</span> <span class="ansi-bold" style="color: rgb(0,135,0)">else</span>:
+<span class="ansi-green-intense-fg ansi-bold">     50</span>     <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> <span class="ansi-bold" style="color: rgb(175,0,255)">not</span> <span style="color: rgb(0,135,0)">isinstance</span>(quant, proxy_protocol):
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/proxy/runtime_quant.py:198</span>, in <span class="ansi-cyan-fg">ActQuantProxyFromInjector.__init__</span><span class="ansi-blue-fg">(self, quant_layer, quant_injector)</span>
+<span class="ansi-green-intense-fg ansi-bold">    197</span> <span class="ansi-bold" style="color: rgb(0,135,0)">def</span> <span style="color: rgb(0,0,255)">__init__</span>(<span style="color: rgb(0,135,0)">self</span>, quant_layer, quant_injector):
+<span class="ansi-green-fg">--&gt; 198</span>     <span class="ansi-yellow-bg" style="color: rgb(0,135,0)">super</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">)</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg" style="color: rgb(0,0,255)">__init__</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">quant_layer</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">quant_injector</span><span class="ansi-yellow-bg">)</span>
+<span class="ansi-green-intense-fg ansi-bold">    199</span>     <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>cache_class <span style="color: rgb(98,98,98)">=</span> _CachedIO
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/proxy/runtime_quant.py:93</span>, in <span class="ansi-cyan-fg">ActQuantProxyFromInjectorBase.__init__</span><span class="ansi-blue-fg">(self, quant_layer, quant_injector)</span>
+<span class="ansi-green-intense-fg ansi-bold">     92</span> <span class="ansi-bold" style="color: rgb(0,135,0)">def</span> <span style="color: rgb(0,0,255)">__init__</span>(<span style="color: rgb(0,135,0)">self</span>, quant_layer, quant_injector):
+<span class="ansi-green-fg">---&gt; 93</span>     <span class="ansi-yellow-bg">QuantProxyFromInjector</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg" style="color: rgb(0,0,255)">__init__</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg" style="color: rgb(0,135,0)">self</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">quant_layer</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">quant_injector</span><span class="ansi-yellow-bg">)</span>
+<span class="ansi-green-intense-fg ansi-bold">     94</span>     ActQuantProxyProtocol<span style="color: rgb(98,98,98)">.</span><span style="color: rgb(0,0,255)">__init__</span>(<span style="color: rgb(0,135,0)">self</span>)
+<span class="ansi-green-intense-fg ansi-bold">     95</span>     <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>is_passthrough_act <span style="color: rgb(98,98,98)">=</span> _is_passthrough_act(quant_injector)
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/proxy/quant_proxy.py:80</span>, in <span class="ansi-cyan-fg">QuantProxyFromInjector.__init__</span><span class="ansi-blue-fg">(self, quant_layer, quant_injector)</span>
+<span class="ansi-green-intense-fg ansi-bold">     78</span> <span style="color: rgb(95,135,135)"># Use a normal list and not a ModuleList since this is a pointer to parent modules</span>
+<span class="ansi-green-intense-fg ansi-bold">     79</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>tracked_module_list <span style="color: rgb(98,98,98)">=</span> []
+<span class="ansi-green-fg">---&gt; 80</span> <span class="ansi-yellow-bg" style="color: rgb(0,135,0)">self</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">add_tracked_module</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">quant_layer</span><span class="ansi-yellow-bg">)</span>
+<span class="ansi-green-intense-fg ansi-bold">     81</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>disable_quant <span style="color: rgb(98,98,98)">=</span> <span class="ansi-bold" style="color: rgb(0,135,0)">False</span>
+<span class="ansi-green-intense-fg ansi-bold">     82</span> <span style="color: rgb(95,135,135)"># Torch.compile compatibility requires this</span>
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/proxy/quant_proxy.py:120</span>, in <span class="ansi-cyan-fg">QuantProxyFromInjector.add_tracked_module</span><span class="ansi-blue-fg">(self, module)</span>
+<span class="ansi-green-intense-fg ansi-bold">    118</span>     <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>tracked_module_list<span style="color: rgb(98,98,98)">.</span>append(module)
+<span class="ansi-green-intense-fg ansi-bold">    119</span>     <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>update_tracked_modules()
+<span class="ansi-green-fg">--&gt; 120</span>     <span class="ansi-yellow-bg" style="color: rgb(0,135,0)">self</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">init_tensor_quant</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">)</span>
+<span class="ansi-green-intense-fg ansi-bold">    121</span> <span class="ansi-bold" style="color: rgb(0,135,0)">else</span>:
+<span class="ansi-green-intense-fg ansi-bold">    122</span>     <span class="ansi-bold" style="color: rgb(0,135,0)">raise</span> <span class="ansi-bold" style="color: rgb(215,95,95)">RuntimeError</span>(<span style="color: rgb(175,0,0)">&#34;</span><span style="color: rgb(175,0,0)">Trying to add None as a parent module.</span><span style="color: rgb(175,0,0)">&#34;</span>)
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/proxy/runtime_quant.py:127</span>, in <span class="ansi-cyan-fg">ActQuantProxyFromInjectorBase.init_tensor_quant</span><span class="ansi-blue-fg">(self)</span>
+<span class="ansi-green-intense-fg ansi-bold">    126</span> <span class="ansi-bold" style="color: rgb(0,135,0)">def</span> <span style="color: rgb(0,0,255)">init_tensor_quant</span>(<span style="color: rgb(0,135,0)">self</span>):
+<span class="ansi-green-fg">--&gt; 127</span>     tensor_quant <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg" style="color: rgb(0,135,0)">self</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">quant_injector</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">tensor_quant</span>
+<span class="ansi-green-intense-fg ansi-bold">    128</span>     <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> <span style="color: rgb(175,0,0)">&#39;</span><span style="color: rgb(175,0,0)">act_impl</span><span style="color: rgb(175,0,0)">&#39;</span> <span class="ansi-bold" style="color: rgb(175,0,255)">in</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>quant_injector:
+<span class="ansi-green-intense-fg ansi-bold">    129</span>         act_impl <span style="color: rgb(98,98,98)">=</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>quant_injector<span style="color: rgb(98,98,98)">.</span>act_impl
+
+    <span class="ansi-red-fg">[... skipping hidden 1 frame]</span>
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/_dependencies/this.py:51</span>, in <span class="ansi-cyan-fg">_ThisSpec.__call__</span><span class="ansi-blue-fg">(self, __self__)</span>
+<span class="ansi-green-intense-fg ansi-bold">     49</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> kind <span style="color: rgb(98,98,98)">==</span> <span style="color: rgb(175,0,0)">&#34;</span><span style="color: rgb(175,0,0)">.</span><span style="color: rgb(175,0,0)">&#34;</span>:
+<span class="ansi-green-intense-fg ansi-bold">     50</span>     <span class="ansi-bold" style="color: rgb(0,135,0)">try</span>:
+<span class="ansi-green-fg">---&gt; 51</span>         result <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg" style="color: rgb(0,135,0)">getattr</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">result</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">symbol</span><span class="ansi-yellow-bg">)</span>
+<span class="ansi-green-intense-fg ansi-bold">     52</span>     <span class="ansi-bold" style="color: rgb(0,135,0)">except</span> DependencyError:
+<span class="ansi-green-intense-fg ansi-bold">     53</span>         message <span style="color: rgb(98,98,98)">=</span> (
+<span class="ansi-green-intense-fg ansi-bold">     54</span>             <span style="color: rgb(175,0,0)">&#34;</span><span style="color: rgb(175,0,0)">You tried to shift this more times than Injector has levels</span><span style="color: rgb(175,0,0)">&#34;</span>
+<span class="ansi-green-intense-fg ansi-bold">     55</span>         )
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/inject/__init__.py:129</span>, in <span class="ansi-cyan-fg">_ExtendedInjectorType.__getattr__</span><span class="ansi-blue-fg">(cls, attrname)</span>
+<span class="ansi-green-intense-fg ansi-bold">    126</span>     <span class="ansi-bold" style="color: rgb(0,135,0)">else</span>:
+<span class="ansi-green-intense-fg ansi-bold">    127</span>         message <span style="color: rgb(98,98,98)">=</span> <span style="color: rgb(175,0,0)">&#34;</span><span class="ansi-bold" style="color: rgb(175,95,135)">{!r}</span><span style="color: rgb(175,0,0)"> can not resolve attribute </span><span class="ansi-bold" style="color: rgb(175,95,135)">{!r}</span><span style="color: rgb(175,0,0)">&#34;</span><span style="color: rgb(98,98,98)">.</span>format(
+<span class="ansi-green-intense-fg ansi-bold">    128</span>             <span style="color: rgb(0,135,0)">cls</span><span style="color: rgb(98,98,98)">.</span><span style="color: rgb(0,0,135)">__name__</span>, current_attr)
+<span class="ansi-green-fg">--&gt; 129</span>     <span class="ansi-bold" style="color: rgb(0,135,0)">raise</span> DependencyError(message)
+<span class="ansi-green-intense-fg ansi-bold">    131</span> marker, attribute, args, have_defaults <span style="color: rgb(98,98,98)">=</span> spec
+<span class="ansi-green-intense-fg ansi-bold">    133</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> <span style="color: rgb(0,135,0)">set</span>(args)<span style="color: rgb(98,98,98)">.</span>issubset(cached):
+
+<span class="ansi-red-fg">DependencyError</span>: &#39;AdvancedActQuantizer&#39; can not resolve attribute &#39;per_channel_broadcastable_shape&#39;
 </pre></div></div>
 </div>
 <p>As expected we get an error saying that the quantizer cannot resolve <code class="docutils literal notranslate"><span class="pre">per_channel_broadcastable_shape</span></code>. If we pass it in then we can get a per-channel quantizer:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[39]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[36]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">quant_identity</span> <span class="o">=</span> <span class="n">QuantIdentity</span><span class="p">(</span>
@@ -1615,15 +1652,15 @@ <h2>Building a Custom Quantization API<a class="headerlink" href="#Building-a-Cu
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[39]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[36]:
 </pre></div>
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-QuantTensor(value=tensor([[-0.0100,  0.0100, -0.0100, -0.0100],
-        [-0.0100,  0.0100, -0.0100, -0.0100],
-        [ 0.0100, -0.0100,  0.0100, -0.0100],
-        [ 0.0100, -0.0100, -0.0100, -0.0100]], grad_fn=&lt;MulBackward0&gt;), scale=tensor([[0.0100],
+IntQuantTensor(value=tensor([[ 0.0100,  0.0100,  0.0100, -0.0100],
+        [ 0.0100, -0.0100, -0.0100,  0.0100],
+        [-0.0100, -0.0100, -0.0100,  0.0100],
+        [ 0.0100,  0.0100,  0.0100,  0.0100]], grad_fn=&lt;MulBackward0&gt;), scale=tensor([[0.0100],
         [0.0100],
         [0.0100],
         [0.0100]], grad_fn=&lt;AbsBinarySignGradFnBackward&gt;), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))
diff --git a/docs/tutorials/anatomy_quantizer.ipynb b/docs/tutorials/anatomy_quantizer.ipynb
index 2055a1714..10de11221 100644
--- a/docs/tutorials/anatomy_quantizer.ipynb
+++ b/docs/tutorials/anatomy_quantizer.ipynb
@@ -3,7 +3,10 @@
   {
    "cell_type": "markdown",
    "metadata": {
-    "collapsed": true
+    "collapsed": true,
+    "jupyter": {
+     "outputs_hidden": true
+    }
    },
    "source": [
     "# Anatomy of a Quantizer\n",
@@ -54,6 +57,7 @@
        "    (delay_wrapper): DelayWrapper(\n",
        "      (delay_impl): _NoDelay()\n",
        "    )\n",
+       "    (input_view_impl): Identity()\n",
        "  )\n",
        "  (scaling_impl): ParameterFromRuntimeStatsScaling(\n",
        "    (stats_input_view_shape_impl): OverTensorView()\n",
@@ -135,6 +139,11 @@
     "import inspect\n",
     "from IPython.display import Markdown, display\n",
     "\n",
+    "# helpers\n",
+    "def assert_with_message(condition):\n",
+    "    assert condition\n",
+    "    print(condition)\n",
+    "\n",
     "def pretty_print_source(source):\n",
     "    display(Markdown('```python\\n' + source + '\\n```'))"
    ]
@@ -181,8 +190,9 @@
        "        Set env variable BREVITAS_JIT=1 to enable TorchScript compilation of this module.\n",
        "    \"\"\"\n",
        "\n",
-       "    def __init__(self, scaling_impl: Module, quant_delay_steps: int = 0):\n",
+       "    def __init__(self, scaling_impl: Module, signed: bool = True, quant_delay_steps: int = 0):\n",
        "        super(BinaryQuant, self).__init__()\n",
+       "        assert signed, \"Unsigned binary quant not supported\"\n",
        "        self.scaling_impl = scaling_impl\n",
        "        self.bit_width = BitWidthConst(1)\n",
        "        self.zero_point = StatelessBuffer(torch.tensor(0.0))\n",
@@ -247,10 +257,10 @@
     {
      "data": {
       "text/plain": [
-       "(tensor([[ 0.1000,  0.1000,  0.1000,  0.1000],\n",
-       "         [-0.1000, -0.1000,  0.1000,  0.1000],\n",
-       "         [ 0.1000, -0.1000,  0.1000, -0.1000],\n",
-       "         [ 0.1000, -0.1000,  0.1000, -0.1000]], grad_fn=<MulBackward0>),\n",
+       "(tensor([[-0.1000, -0.1000, -0.1000, -0.1000],\n",
+       "         [ 0.1000,  0.1000, -0.1000, -0.1000],\n",
+       "         [ 0.1000, -0.1000,  0.1000,  0.1000],\n",
+       "         [ 0.1000,  0.1000,  0.1000, -0.1000]], grad_fn=<MulBackward0>),\n",
        " tensor(0.1000, grad_fn=<AbsBinarySignGradFnBackward>),\n",
        " tensor(0.),\n",
        " tensor(1.))"
@@ -264,6 +274,9 @@
    "source": [
     "import torch\n",
     "\n",
+    "# set seed for notebook\n",
+    "torch.manual_seed(0)\n",
+    "\n",
     "manual_tensor_quant = BinaryQuant(scaling_impl=ParameterScaling(scaling_init=0.1))\n",
     "manual_tensor_quant(torch.randn(4, 4))"
    ]
@@ -292,10 +305,10 @@
     {
      "data": {
       "text/plain": [
-       "(tensor([[-0.1000,  0.1000, -0.1000,  0.1000],\n",
-       "         [ 0.1000,  0.1000, -0.1000, -0.1000],\n",
-       "         [-0.1000,  0.1000, -0.1000,  0.1000],\n",
-       "         [-0.1000,  0.1000,  0.1000,  0.1000]], grad_fn=<MulBackward0>),\n",
+       "(tensor([[-0.1000, -0.1000,  0.1000,  0.1000],\n",
+       "         [ 0.1000, -0.1000, -0.1000,  0.1000],\n",
+       "         [ 0.1000, -0.1000, -0.1000,  0.1000],\n",
+       "         [ 0.1000,  0.1000,  0.1000, -0.1000]], grad_fn=<MulBackward0>),\n",
        " tensor(0.1000, grad_fn=<AbsBinarySignGradFnBackward>),\n",
        " tensor(0.),\n",
        " tensor(1.))"
@@ -342,10 +355,10 @@
     {
      "data": {
       "text/plain": [
-       "(tensor([[ 1., -1.,  1.,  1.],\n",
-       "         [ 1.,  1., -1.,  1.],\n",
-       "         [ 1.,  1.,  1., -1.],\n",
-       "         [-1.,  1., -1., -1.]], grad_fn=<MulBackward0>),\n",
+       "(tensor([[-1.,  1., -1.,  1.],\n",
+       "         [ 1.,  1.,  1.,  1.],\n",
+       "         [-1.,  1., -1.,  1.],\n",
+       "         [ 1.,  1., -1., -1.]], grad_fn=<MulBackward0>),\n",
        " tensor(1., grad_fn=<AbsBinarySignGradFnBackward>),\n",
        " tensor(0.),\n",
        " tensor(1.))"
@@ -379,10 +392,10 @@
     {
      "data": {
       "text/plain": [
-       "(tensor([[ 0.1000, -0.1000, -0.1000, -0.1000],\n",
-       "         [-0.1000,  0.1000, -0.1000,  0.1000],\n",
-       "         [ 0.1000, -0.1000,  0.1000,  0.1000],\n",
-       "         [-0.1000,  0.1000, -0.1000,  0.1000]], grad_fn=<MulBackward0>),\n",
+       "(tensor([[-0.1000,  0.1000,  0.1000,  0.1000],\n",
+       "         [-0.1000, -0.1000,  0.1000, -0.1000],\n",
+       "         [-0.1000,  0.1000,  0.1000,  0.1000],\n",
+       "         [-0.1000, -0.1000,  0.1000, -0.1000]], grad_fn=<MulBackward0>),\n",
        " tensor(0.1000, grad_fn=<AbsBinarySignGradFnBackward>),\n",
        " tensor(0.),\n",
        " tensor(1.))"
@@ -444,63 +457,22 @@
    "cell_type": "code",
    "execution_count": 11,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "QuantTensor(value=tensor([[[[ 0.1000,  0.1000, -0.1000],\n",
-       "          [-0.1000,  0.1000, -0.1000],\n",
-       "          [ 0.1000, -0.1000, -0.1000]],\n",
-       "\n",
-       "         [[-0.1000,  0.1000, -0.1000],\n",
-       "          [ 0.1000, -0.1000,  0.1000],\n",
-       "          [-0.1000, -0.1000,  0.1000]],\n",
-       "\n",
-       "         [[ 0.1000, -0.1000, -0.1000],\n",
-       "          [ 0.1000,  0.1000, -0.1000],\n",
-       "          [-0.1000, -0.1000,  0.1000]]],\n",
-       "\n",
-       "\n",
-       "        [[[ 0.1000, -0.1000,  0.1000],\n",
-       "          [ 0.1000, -0.1000, -0.1000],\n",
-       "          [ 0.1000, -0.1000,  0.1000]],\n",
-       "\n",
-       "         [[-0.1000,  0.1000, -0.1000],\n",
-       "          [ 0.1000,  0.1000,  0.1000],\n",
-       "          [-0.1000, -0.1000, -0.1000]],\n",
-       "\n",
-       "         [[ 0.1000,  0.1000, -0.1000],\n",
-       "          [-0.1000,  0.1000, -0.1000],\n",
-       "          [ 0.1000,  0.1000,  0.1000]]]], grad_fn=<MulBackward0>), scale=tensor(0.1000, grad_fn=<AbsBinarySignGradFnBackward>), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=None, training_t=tensor(True))"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "from brevitas.nn import QuantConv2d\n",
     "\n",
     "binary_weight_quant_conv = QuantConv2d(3, 2, (3,3), weight_quant=MyBinaryWeightQuantizer)\n",
-    "quant_weight = binary_weight_quant_conv.quant_weight()\n",
-    "quant_weight"
+    "try:\n",
+    "    quant_weight = binary_weight_quant_conv.quant_weight()\n",
+    "except TypeError:\n",
+    "    pass\n"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Note however how the `QuantTensor` is not properly formed, as the `signed` attribute is `None`. This means that `quant_weight` is not considered valid, as the affine quantization invariant cannot be computed:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "assert not quant_weight.is_valid"
+    "Note however that we cannot compute the quantized weight, as the `signed` attribute is `None`."
    ]
   },
   {
@@ -512,39 +484,39 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "QuantTensor(value=tensor([[[[ 0.1000,  0.1000, -0.1000],\n",
+       "IntQuantTensor(value=tensor([[[[ 0.1000,  0.1000, -0.1000],\n",
+       "          [ 0.1000, -0.1000, -0.1000],\n",
+       "          [-0.1000, -0.1000, -0.1000]],\n",
+       "\n",
+       "         [[-0.1000, -0.1000,  0.1000],\n",
        "          [-0.1000, -0.1000,  0.1000],\n",
-       "          [ 0.1000,  0.1000, -0.1000]],\n",
+       "          [-0.1000,  0.1000, -0.1000]],\n",
        "\n",
        "         [[ 0.1000,  0.1000,  0.1000],\n",
-       "          [ 0.1000, -0.1000,  0.1000],\n",
-       "          [ 0.1000, -0.1000, -0.1000]],\n",
-       "\n",
-       "         [[-0.1000,  0.1000,  0.1000],\n",
-       "          [ 0.1000, -0.1000, -0.1000],\n",
-       "          [-0.1000, -0.1000, -0.1000]]],\n",
+       "          [ 0.1000,  0.1000, -0.1000],\n",
+       "          [-0.1000, -0.1000,  0.1000]]],\n",
        "\n",
        "\n",
-       "        [[[ 0.1000,  0.1000,  0.1000],\n",
-       "          [ 0.1000,  0.1000, -0.1000],\n",
-       "          [-0.1000, -0.1000,  0.1000]],\n",
+       "        [[[-0.1000, -0.1000, -0.1000],\n",
+       "          [-0.1000, -0.1000, -0.1000],\n",
+       "          [ 0.1000,  0.1000, -0.1000]],\n",
        "\n",
-       "         [[-0.1000, -0.1000,  0.1000],\n",
-       "          [-0.1000,  0.1000,  0.1000],\n",
+       "         [[ 0.1000, -0.1000, -0.1000],\n",
+       "          [-0.1000, -0.1000, -0.1000],\n",
        "          [-0.1000, -0.1000, -0.1000]],\n",
        "\n",
-       "         [[-0.1000,  0.1000, -0.1000],\n",
-       "          [-0.1000,  0.1000, -0.1000],\n",
-       "          [-0.1000,  0.1000, -0.1000]]]], grad_fn=<MulBackward0>), scale=tensor(0.1000, grad_fn=<AbsBinarySignGradFnBackward>), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))"
+       "         [[ 0.1000, -0.1000,  0.1000],\n",
+       "          [-0.1000, -0.1000,  0.1000],\n",
+       "          [-0.1000,  0.1000,  0.1000]]]], grad_fn=<MulBackward0>), scale=tensor(0.1000, grad_fn=<AbsBinarySignGradFnBackward>), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))"
       ]
      },
-     "execution_count": 13,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -560,11 +532,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 13,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
+    }
+   ],
    "source": [
-    "assert signed_quant_weight.is_valid == True"
+    "assert_with_message(signed_quant_weight.is_valid)"
    ]
   },
   {
@@ -578,39 +558,39 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "QuantTensor(value=tensor([[[[-0.1000, -0.1000,  0.1000],\n",
-       "          [-0.1000, -0.1000, -0.1000],\n",
-       "          [ 0.1000, -0.1000,  0.1000]],\n",
+       "IntQuantTensor(value=tensor([[[[-0.1000, -0.1000,  0.1000],\n",
+       "          [-0.1000,  0.1000, -0.1000],\n",
+       "          [ 0.1000,  0.1000,  0.1000]],\n",
        "\n",
-       "         [[-0.1000,  0.1000, -0.1000],\n",
+       "         [[ 0.1000, -0.1000, -0.1000],\n",
        "          [-0.1000,  0.1000,  0.1000],\n",
-       "          [ 0.1000, -0.1000, -0.1000]],\n",
+       "          [ 0.1000, -0.1000,  0.1000]],\n",
        "\n",
-       "         [[-0.1000,  0.1000, -0.1000],\n",
-       "          [-0.1000, -0.1000,  0.1000],\n",
-       "          [-0.1000, -0.1000, -0.1000]]],\n",
+       "         [[-0.1000, -0.1000, -0.1000],\n",
+       "          [ 0.1000, -0.1000, -0.1000],\n",
+       "          [ 0.1000,  0.1000, -0.1000]]],\n",
        "\n",
        "\n",
-       "        [[[-0.1000, -0.1000, -0.1000],\n",
-       "          [-0.1000, -0.1000, -0.1000],\n",
-       "          [ 0.1000,  0.1000, -0.1000]],\n",
+       "        [[[-0.1000, -0.1000,  0.1000],\n",
+       "          [-0.1000,  0.1000,  0.1000],\n",
+       "          [ 0.1000, -0.1000, -0.1000]],\n",
        "\n",
-       "         [[-0.1000, -0.1000,  0.1000],\n",
-       "          [-0.1000,  0.1000, -0.1000],\n",
-       "          [ 0.1000, -0.1000,  0.1000]],\n",
+       "         [[ 0.1000, -0.1000, -0.1000],\n",
+       "          [ 0.1000, -0.1000, -0.1000],\n",
+       "          [ 0.1000, -0.1000, -0.1000]],\n",
        "\n",
-       "         [[ 0.1000,  0.1000, -0.1000],\n",
-       "          [ 0.1000,  0.1000,  0.1000],\n",
-       "          [ 0.1000, -0.1000,  0.1000]]]], grad_fn=<MulBackward0>), scale=tensor(0.1000, grad_fn=<AbsBinarySignGradFnBackward>), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))"
+       "         [[ 0.1000,  0.1000,  0.1000],\n",
+       "          [-0.1000,  0.1000, -0.1000],\n",
+       "          [-0.1000, -0.1000, -0.1000]]]], grad_fn=<MulBackward0>), scale=tensor(0.1000, grad_fn=<AbsBinarySignGradFnBackward>), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -640,19 +620,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/torch/_tensor.py:1255: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at /opt/conda/conda-bld/pytorch_1670525541990/work/c10/core/TensorImpl.h:1758.)\n",
+      "  return super(Tensor, self).rename(names)\n"
+     ]
+    },
     {
      "data": {
       "text/plain": [
-       "QuantTensor(value=tensor([[-0.1000,  0.1000, -0.1000,  0.1000],\n",
+       "IntQuantTensor(value=tensor([[-0.1000,  0.1000, -0.1000, -0.1000],\n",
        "        [ 0.1000,  0.1000,  0.1000,  0.1000],\n",
-       "        [-0.1000,  0.1000,  0.1000,  0.1000],\n",
-       "        [-0.1000, -0.1000,  0.1000, -0.1000]], grad_fn=<MulBackward0>), scale=tensor(0.1000, grad_fn=<AbsBinarySignGradFnBackward>), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))"
+       "        [-0.1000, -0.1000,  0.1000, -0.1000],\n",
+       "        [-0.1000,  0.1000, -0.1000,  0.1000]], grad_fn=<MulBackward0>), scale=tensor(0.1000, grad_fn=<AbsBinarySignGradFnBackward>), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -678,19 +666,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "QuantTensor(value=tensor([[ 0.0010,  0.0010,  0.0010, -0.0010],\n",
-       "        [ 0.0010, -0.0010,  0.0010, -0.0010],\n",
-       "        [-0.0010, -0.0010, -0.0010, -0.0010],\n",
-       "        [ 0.0010,  0.0010,  0.0010,  0.0010]], grad_fn=<MulBackward0>), scale=tensor(0.0010, grad_fn=<AbsBinarySignGradFnBackward>), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))"
+       "IntQuantTensor(value=tensor([[ 0.0010, -0.0010, -0.0010,  0.0010],\n",
+       "        [ 0.0010,  0.0010,  0.0010,  0.0010],\n",
+       "        [ 0.0010, -0.0010,  0.0010,  0.0010],\n",
+       "        [ 0.0010, -0.0010, -0.0010, -0.0010]], grad_fn=<MulBackward0>), scale=tensor(0.0010, grad_fn=<AbsBinarySignGradFnBackward>), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -716,7 +704,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -740,7 +728,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 18,
    "metadata": {
     "scrolled": true
    },
@@ -748,33 +736,33 @@
     {
      "data": {
       "text/plain": [
-       "QuantTensor(value=tensor([[[[-0.1876, -0.1876, -0.1876],\n",
-       "          [ 0.1876,  0.1876,  0.1876],\n",
-       "          [-0.1876, -0.1876,  0.1876]],\n",
+       "IntQuantTensor(value=tensor([[[[ 0.1820, -0.1820, -0.1820],\n",
+       "          [ 0.1820,  0.1820,  0.1820],\n",
+       "          [-0.1820, -0.1820, -0.1820]],\n",
        "\n",
-       "         [[-0.1876, -0.1876,  0.1876],\n",
-       "          [ 0.1876,  0.1876, -0.1876],\n",
-       "          [-0.1876,  0.1876,  0.1876]],\n",
+       "         [[ 0.1820, -0.1820, -0.1820],\n",
+       "          [ 0.1820, -0.1820, -0.1820],\n",
+       "          [ 0.1820,  0.1820, -0.1820]],\n",
        "\n",
-       "         [[-0.1876, -0.1876, -0.1876],\n",
-       "          [ 0.1876,  0.1876,  0.1876],\n",
-       "          [-0.1876,  0.1876, -0.1876]]],\n",
+       "         [[-0.1820,  0.1820,  0.1820],\n",
+       "          [ 0.1820, -0.1820,  0.1820],\n",
+       "          [-0.1820, -0.1820, -0.1820]]],\n",
        "\n",
        "\n",
-       "        [[[-0.1876, -0.1876, -0.1876],\n",
-       "          [ 0.1876,  0.1876, -0.1876],\n",
-       "          [ 0.1876, -0.1876, -0.1876]],\n",
+       "        [[[ 0.1820,  0.1820, -0.1820],\n",
+       "          [-0.1820, -0.1820,  0.1820],\n",
+       "          [-0.1820,  0.1820, -0.1820]],\n",
        "\n",
-       "         [[-0.1876,  0.1876, -0.1876],\n",
-       "          [ 0.1876, -0.1876, -0.1876],\n",
-       "          [-0.1876, -0.1876,  0.1876]],\n",
+       "         [[-0.1820,  0.1820, -0.1820],\n",
+       "          [ 0.1820,  0.1820, -0.1820],\n",
+       "          [ 0.1820,  0.1820,  0.1820]],\n",
        "\n",
-       "         [[-0.1876,  0.1876,  0.1876],\n",
-       "          [ 0.1876, -0.1876,  0.1876],\n",
-       "          [-0.1876, -0.1876, -0.1876]]]], grad_fn=<MulBackward0>), scale=tensor(0.1876, grad_fn=<AbsBinarySignGradFnBackward>), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))"
+       "         [[-0.1820, -0.1820, -0.1820],\n",
+       "          [ 0.1820, -0.1820,  0.1820],\n",
+       "          [ 0.1820, -0.1820,  0.1820]]]], grad_fn=<MulBackward0>), scale=tensor(0.1820, grad_fn=<AbsBinarySignGradFnBackward>), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))"
       ]
      },
-     "execution_count": 20,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -788,27 +776,24 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Indeed we can verify that `quant_weight_scale()` is equal to `weight.abs().max()`:"
+    "Indeed we can verify that `weight_quant.scale()` is equal to `weight.abs().max()`:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
     }
    ],
    "source": [
-    "(param_from_max_quant_conv.quant_weight_scale() == param_from_max_quant_conv.weight.abs().max()).item()"
+    "assert_with_message((param_from_max_quant_conv.weight_quant.scale() == param_from_max_quant_conv.weight.abs().max()).item())"
    ]
   },
   {
@@ -820,16 +805,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "tensor(0.1897, grad_fn=<MaxBackward1>)"
+       "tensor(0.1924, grad_fn=<MaxBackward1>)"
       ]
      },
-     "execution_count": 22,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -850,7 +835,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 21,
    "metadata": {
     "tags": [
      "raises-exception"
@@ -862,11 +847,11 @@
      "evalue": "Error(s) in loading state_dict for QuantConv2d:\n\tMissing key(s) in state_dict: \"weight_quant.tensor_quant.scaling_impl.value\". ",
      "output_type": "error",
      "traceback": [
-      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[1;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
-      "\u001b[1;32m<ipython-input-22-5b3646241211>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mparam_from_max_quant_conv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mload_state_dict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfloat_conv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstate_dict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[1;32mC:\\ProgramData\\Miniconda3\\envs\\pytorch\\lib\\site-packages\\torch\\nn\\modules\\module.py\u001b[0m in \u001b[0;36mload_state_dict\u001b[1;34m(self, state_dict, strict)\u001b[0m\n\u001b[0;32m   1405\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0merror_msgs\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1406\u001b[0m             raise RuntimeError('Error(s) in loading state_dict for {}:\\n\\t{}'.format(\n\u001b[1;32m-> 1407\u001b[1;33m                                self.__class__.__name__, \"\\n\\t\".join(error_msgs)))\n\u001b[0m\u001b[0;32m   1408\u001b[0m         \u001b[1;32mreturn\u001b[0m \u001b[0m_IncompatibleKeys\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmissing_keys\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0munexpected_keys\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1409\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;31mRuntimeError\u001b[0m: Error(s) in loading state_dict for QuantConv2d:\n\tMissing key(s) in state_dict: \"weight_quant.tensor_quant.scaling_impl.value\". "
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[21], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mparam_from_max_quant_conv\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_state_dict\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfloat_conv\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstate_dict\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/torch/nn/modules/module.py:1671\u001b[0m, in \u001b[0;36mModule.load_state_dict\u001b[0;34m(self, state_dict, strict)\u001b[0m\n\u001b[1;32m   1666\u001b[0m         error_msgs\u001b[38;5;241m.\u001b[39minsert(\n\u001b[1;32m   1667\u001b[0m             \u001b[38;5;241m0\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mMissing key(s) in state_dict: \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m. \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mformat(\n\u001b[1;32m   1668\u001b[0m                 \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mformat(k) \u001b[38;5;28;01mfor\u001b[39;00m k \u001b[38;5;129;01min\u001b[39;00m missing_keys)))\n\u001b[1;32m   1670\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(error_msgs) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m-> 1671\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mError(s) in loading state_dict for \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\t\u001b[39;00m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mformat(\n\u001b[1;32m   1672\u001b[0m                        \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\t\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(error_msgs)))\n\u001b[1;32m   1673\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _IncompatibleKeys(missing_keys, unexpected_keys)\n",
+      "\u001b[0;31mRuntimeError\u001b[0m: Error(s) in loading state_dict for QuantConv2d:\n\tMissing key(s) in state_dict: \"weight_quant.tensor_quant.scaling_impl.value\". "
      ]
     }
    ],
@@ -884,7 +869,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
@@ -893,7 +878,7 @@
        "<All keys matched successfully>"
       ]
      },
-     "execution_count": 23,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -916,39 +901,39 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "QuantTensor(value=tensor([[[[ 0.1897, -0.1897,  0.1897],\n",
-       "          [-0.1897,  0.1897, -0.1897],\n",
-       "          [-0.1897,  0.1897, -0.1897]],\n",
+       "IntQuantTensor(value=tensor([[[[ 0.1924,  0.1924, -0.1924],\n",
+       "          [ 0.1924,  0.1924,  0.1924],\n",
+       "          [ 0.1924,  0.1924,  0.1924]],\n",
        "\n",
-       "         [[-0.1897,  0.1897,  0.1897],\n",
-       "          [ 0.1897, -0.1897, -0.1897],\n",
-       "          [ 0.1897, -0.1897,  0.1897]],\n",
+       "         [[-0.1924, -0.1924, -0.1924],\n",
+       "          [ 0.1924,  0.1924, -0.1924],\n",
+       "          [ 0.1924,  0.1924,  0.1924]],\n",
        "\n",
-       "         [[-0.1897,  0.1897, -0.1897],\n",
-       "          [-0.1897,  0.1897,  0.1897],\n",
-       "          [-0.1897,  0.1897,  0.1897]]],\n",
+       "         [[ 0.1924,  0.1924, -0.1924],\n",
+       "          [-0.1924,  0.1924,  0.1924],\n",
+       "          [-0.1924,  0.1924,  0.1924]]],\n",
        "\n",
        "\n",
-       "        [[[ 0.1897,  0.1897,  0.1897],\n",
-       "          [-0.1897,  0.1897, -0.1897],\n",
-       "          [ 0.1897,  0.1897, -0.1897]],\n",
+       "        [[[ 0.1924, -0.1924,  0.1924],\n",
+       "          [-0.1924,  0.1924, -0.1924],\n",
+       "          [ 0.1924,  0.1924,  0.1924]],\n",
        "\n",
-       "         [[ 0.1897, -0.1897, -0.1897],\n",
-       "          [ 0.1897,  0.1897, -0.1897],\n",
-       "          [ 0.1897,  0.1897,  0.1897]],\n",
+       "         [[-0.1924,  0.1924, -0.1924],\n",
+       "          [ 0.1924, -0.1924, -0.1924],\n",
+       "          [-0.1924,  0.1924,  0.1924]],\n",
        "\n",
-       "         [[-0.1897,  0.1897, -0.1897],\n",
-       "          [-0.1897,  0.1897, -0.1897],\n",
-       "          [ 0.1897,  0.1897,  0.1897]]]], grad_fn=<MulBackward0>), scale=tensor(0.1897, grad_fn=<AbsBinarySignGradFnBackward>), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))"
+       "         [[ 0.1924,  0.1924, -0.1924],\n",
+       "          [-0.1924, -0.1924, -0.1924],\n",
+       "          [ 0.1924, -0.1924, -0.1924]]]], grad_fn=<MulBackward0>), scale=tensor(0.1924, grad_fn=<AbsBinarySignGradFnBackward>), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))"
       ]
      },
-     "execution_count": 25,
+     "execution_count": 23,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -979,25 +964,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "False"
-      ]
-     },
-     "execution_count": 25,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
     }
    ],
    "source": [
     "quant_conv1 = QuantConv2d(3, 2, (3, 3), weight_quant=MySignedBinaryWeightQuantizer)\n",
     "quant_conv2 = QuantConv2d(3, 2, (3, 3), weight_quant=MySignedBinaryWeightQuantizer)\n",
     "\n",
-    "quant_conv1.weight_quant is quant_conv2.weight_quant"
+    "assert_with_message(quant_conv1.weight_quant is not quant_conv2.weight_quant)"
    ]
   },
   {
@@ -1011,46 +993,39 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 26,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
     }
    ],
    "source": [
     "quant_conv1 = QuantConv2d(3, 2, (3, 3), weight_quant=MySignedBinaryWeightQuantizer)\n",
     "quant_conv2 = QuantConv2d(3, 2, (3, 3), weight_quant=quant_conv1.weight_quant)\n",
     "\n",
-    "assert quant_conv1.weight_quant is quant_conv2.weight_quant"
+    "assert_with_message(quant_conv1.weight_quant is quant_conv2.weight_quant)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
-     "ename": "AssertionError",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mAssertionError\u001b[0m                            Traceback (most recent call last)",
-      "\u001b[0;32m/tmp/ipykernel_58415/1066539094.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32massert\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mquant_conv1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mquant_weight_scale\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mquant_conv2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mquant_weight_scale\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[0;31mAssertionError\u001b[0m: "
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
      ]
     }
    ],
    "source": [
-    "assert (quant_conv1.quant_weight_scale() == quant_conv2.quant_weight_scale()).item()"
+    "assert_with_message((quant_conv1.weight_quant.scale() == quant_conv2.weight_quant.scale()).item())"
    ]
   },
   {
@@ -1063,18 +1038,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "False"
-      ]
-     },
-     "execution_count": 28,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
     }
    ],
    "source": [
@@ -1088,20 +1060,28 @@
     "            return module.weight.abs().mean()\n",
     "        \n",
     "quant_conv1 = QuantConv2d(3, 2, (3, 3), weight_quant=SharedParamFromMeanWeightQuantizer)\n",
-    "old_quant_conv1_scale = quant_conv1.quant_weight_scale()\n",
+    "old_quant_conv1_scale = quant_conv1.weight_quant.scale()\n",
     "quant_conv2 = QuantConv2d(3, 2, (3, 3), weight_quant=quant_conv1.weight_quant)\n",
-    "new_quant_conv1_scale = quant_conv1.quant_weight_scale()\n",
+    "new_quant_conv1_scale = quant_conv1.weight_quant.scale()\n",
     "\n",
-    "assert not (old_quant_conv1_scale == new_quant_conv1_scale).item()"
+    "assert_with_message(not (old_quant_conv1_scale == new_quant_conv1_scale).item())"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 28,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
+    }
+   ],
    "source": [
-    "assert (new_quant_conv1_scale == quant_conv2.quant_weight_scale()).item()"
+    "assert_with_message((new_quant_conv1_scale == quant_conv2.weight_quant.scale()).item())"
    ]
   },
   {
@@ -1140,14 +1120,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 29,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
+    }
+   ],
    "source": [
     "quant_conv_w_init = QuantConv2d(3, 2, (3, 3), weight_quant=ParamFromMaxWeightQuantizer)\n",
     "torch.nn.init.uniform_(quant_conv_w_init.weight)\n",
     "\n",
-    "assert not (quant_conv_w_init.weight.abs().max() == quant_conv_w_init.quant_weight_scale()).item()"
+    "assert_with_message(not (quant_conv_w_init.weight.abs().max() == quant_conv_w_init.weight_quant.scale()).item())"
    ]
   },
   {
@@ -1159,13 +1147,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 30,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
+    }
+   ],
    "source": [
     "quant_conv_w_init.weight_quant.init_tensor_quant()\n",
     "\n",
-    "assert (quant_conv_w_init.weight.abs().max() == quant_conv_w_init.quant_weight_scale()).item()"
+    "assert_with_message((quant_conv_w_init.weight.abs().max() == quant_conv_w_init.weight_quant.scale()).item())"
    ]
   },
   {
@@ -1192,7 +1188,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1260,42 +1256,42 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "QuantTensor(value=tensor([[[[-0.1842,  0.1842, -0.1842],\n",
-       "          [-0.1842, -0.1842,  0.1842],\n",
-       "          [-0.1842, -0.1842,  0.1842]],\n",
+       "IntQuantTensor(value=tensor([[[[ 0.1612, -0.1612, -0.1612],\n",
+       "          [-0.1612, -0.1612, -0.1612],\n",
+       "          [ 0.1612,  0.1612,  0.1612]],\n",
        "\n",
-       "         [[-0.1842, -0.1842,  0.1842],\n",
-       "          [ 0.1842, -0.1842,  0.1842],\n",
-       "          [ 0.1842,  0.1842, -0.1842]],\n",
+       "         [[-0.1612,  0.1612, -0.1612],\n",
+       "          [-0.1612,  0.1612,  0.1612],\n",
+       "          [-0.1612, -0.1612,  0.1612]],\n",
        "\n",
-       "         [[-0.1842, -0.1842,  0.1842],\n",
-       "          [ 0.1842,  0.1842,  0.1842],\n",
-       "          [-0.1842,  0.1842, -0.1842]]],\n",
+       "         [[-0.1612,  0.1612,  0.1612],\n",
+       "          [ 0.1612,  0.1612, -0.1612],\n",
+       "          [ 0.1612,  0.1612,  0.1612]]],\n",
        "\n",
        "\n",
-       "        [[[ 0.1838,  0.1838,  0.1838],\n",
-       "          [-0.1838, -0.1838, -0.1838],\n",
-       "          [ 0.1838,  0.1838, -0.1838]],\n",
+       "        [[[ 0.1924,  0.1924,  0.1924],\n",
+       "          [-0.1924, -0.1924,  0.1924],\n",
+       "          [-0.1924,  0.1924, -0.1924]],\n",
        "\n",
-       "         [[ 0.1838, -0.1838,  0.1838],\n",
-       "          [ 0.1838,  0.1838,  0.1838],\n",
-       "          [-0.1838,  0.1838, -0.1838]],\n",
+       "         [[ 0.1924, -0.1924,  0.1924],\n",
+       "          [ 0.1924,  0.1924, -0.1924],\n",
+       "          [ 0.1924, -0.1924, -0.1924]],\n",
        "\n",
-       "         [[-0.1838,  0.1838, -0.1838],\n",
-       "          [ 0.1838, -0.1838, -0.1838],\n",
-       "          [ 0.1838, -0.1838,  0.1838]]]], grad_fn=<MulBackward0>), scale=tensor([[[[0.1842]]],\n",
+       "         [[-0.1924, -0.1924,  0.1924],\n",
+       "          [ 0.1924, -0.1924, -0.1924],\n",
+       "          [ 0.1924, -0.1924,  0.1924]]]], grad_fn=<MulBackward0>), scale=tensor([[[[0.1612]]],\n",
        "\n",
        "\n",
-       "        [[[0.1838]]]], grad_fn=<AbsBinarySignGradFnBackward>), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))"
+       "        [[[0.1924]]]], grad_fn=<AbsBinarySignGradFnBackward>), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))"
       ]
      },
-     "execution_count": 35,
+     "execution_count": 32,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1318,42 +1314,42 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 33,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "QuantTensor(value=tensor([[[[ 0.1875, -0.1875,  0.1875],\n",
-       "          [-0.1875,  0.1875, -0.1875],\n",
-       "          [-0.1875,  0.1875, -0.1875]],\n",
+       "IntQuantTensor(value=tensor([[[[ 0.1924,  0.1924, -0.1924],\n",
+       "          [ 0.1924,  0.1924,  0.1924],\n",
+       "          [ 0.1924,  0.1924,  0.1924]],\n",
        "\n",
-       "         [[-0.1875,  0.1875,  0.1875],\n",
-       "          [ 0.1875, -0.1875, -0.1875],\n",
-       "          [ 0.1875, -0.1875,  0.1875]],\n",
+       "         [[-0.1924, -0.1924, -0.1924],\n",
+       "          [ 0.1924,  0.1924, -0.1924],\n",
+       "          [ 0.1924,  0.1924,  0.1924]],\n",
        "\n",
-       "         [[-0.1875,  0.1875, -0.1875],\n",
-       "          [-0.1875,  0.1875,  0.1875],\n",
-       "          [-0.1875,  0.1875,  0.1875]]],\n",
+       "         [[ 0.1924,  0.1924, -0.1924],\n",
+       "          [-0.1924,  0.1924,  0.1924],\n",
+       "          [-0.1924,  0.1924,  0.1924]]],\n",
        "\n",
        "\n",
-       "        [[[ 0.1897,  0.1897,  0.1897],\n",
-       "          [-0.1897,  0.1897, -0.1897],\n",
-       "          [ 0.1897,  0.1897, -0.1897]],\n",
+       "        [[[ 0.1899, -0.1899,  0.1899],\n",
+       "          [-0.1899,  0.1899, -0.1899],\n",
+       "          [ 0.1899,  0.1899,  0.1899]],\n",
        "\n",
-       "         [[ 0.1897, -0.1897, -0.1897],\n",
-       "          [ 0.1897,  0.1897, -0.1897],\n",
-       "          [ 0.1897,  0.1897,  0.1897]],\n",
+       "         [[-0.1899,  0.1899, -0.1899],\n",
+       "          [ 0.1899, -0.1899, -0.1899],\n",
+       "          [-0.1899,  0.1899,  0.1899]],\n",
        "\n",
-       "         [[-0.1897,  0.1897, -0.1897],\n",
-       "          [-0.1897,  0.1897, -0.1897],\n",
-       "          [ 0.1897,  0.1897,  0.1897]]]], grad_fn=<MulBackward0>), scale=tensor([[[[0.1875]]],\n",
+       "         [[ 0.1899,  0.1899, -0.1899],\n",
+       "          [-0.1899, -0.1899, -0.1899],\n",
+       "          [ 0.1899, -0.1899, -0.1899]]]], grad_fn=<MulBackward0>), scale=tensor([[[[0.1924]]],\n",
        "\n",
        "\n",
-       "        [[[0.1897]]]], grad_fn=<AbsBinarySignGradFnBackward>), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))"
+       "        [[[0.1899]]]], grad_fn=<AbsBinarySignGradFnBackward>), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))"
       ]
      },
-     "execution_count": 36,
+     "execution_count": 33,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1374,19 +1370,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 34,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "tensor([[-0.0100, -0.0100,  0.0100, -0.0100],\n",
-       "        [-0.0100, -0.0100, -0.0100,  0.0100],\n",
-       "        [-0.0100,  0.0100,  0.0100,  0.0100],\n",
-       "        [-0.0100,  0.0100,  0.0100,  0.0100]], grad_fn=<MulBackward0>)"
+       "tensor([[ 0.0100,  0.0100, -0.0100,  0.0100],\n",
+       "        [ 0.0100,  0.0100, -0.0100,  0.0100],\n",
+       "        [-0.0100,  0.0100, -0.0100, -0.0100],\n",
+       "        [ 0.0100, -0.0100, -0.0100, -0.0100]], grad_fn=<MulBackward0>)"
       ]
      },
-     "execution_count": 37,
+     "execution_count": 34,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1409,7 +1405,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 35,
    "metadata": {
     "tags": [
      "raises-exception"
@@ -1421,21 +1417,22 @@
      "evalue": "'AdvancedActQuantizer' can not resolve attribute 'per_channel_broadcastable_shape'",
      "output_type": "error",
      "traceback": [
-      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[1;31mDependencyError\u001b[0m                           Traceback (most recent call last)",
-      "\u001b[1;32m<ipython-input-36-b3479e90d1a9>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m quant_identity = QuantIdentity(\n\u001b[1;32m----> 4\u001b[1;33m     act_quant=AdvancedActQuantizer, is_clamped=True, scaling_per_output_channel=True)\n\u001b[0m",
-      "\u001b[1;32mc:\\brevitas_fx\\src\\brevitas\\nn\\quant_activation.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, act_quant, return_quant_tensor, **kwargs)\u001b[0m\n\u001b[0;32m    134\u001b[0m             \u001b[0mact_quant\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mact_quant\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    135\u001b[0m             \u001b[0mreturn_quant_tensor\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mreturn_quant_tensor\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 136\u001b[1;33m             **kwargs)\n\u001b[0m\u001b[0;32m    137\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32mc:\\brevitas_fx\\src\\brevitas\\nn\\quant_layer.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, act_impl, passthrough_act, input_quant, act_quant, return_quant_tensor, **kwargs)\u001b[0m\n\u001b[0;32m     77\u001b[0m             \u001b[0mpassthrough_act\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     78\u001b[0m             \u001b[0mact_quant\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 79\u001b[1;33m             **kwargs)\n\u001b[0m\u001b[0;32m     80\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     81\u001b[0m     \u001b[1;33m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32mc:\\brevitas_fx\\src\\brevitas\\nn\\mixin\\act.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, act_impl, passthrough_act, act_quant, **kwargs)\u001b[0m\n\u001b[0;32m    157\u001b[0m             \u001b[0mproxy_prefix\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'act_'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    158\u001b[0m             \u001b[0mkwargs_prefix\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m''\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 159\u001b[1;33m             **kwargs)\n\u001b[0m\u001b[0;32m    160\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    161\u001b[0m     \u001b[1;33m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32mc:\\brevitas_fx\\src\\brevitas\\nn\\mixin\\base.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, quant, proxy_protocol, none_quant_injector, proxy_prefix, kwargs_prefix, **kwargs)\u001b[0m\n\u001b[0;32m     98\u001b[0m             \u001b[0mquant_injector\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mquant\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     99\u001b[0m             \u001b[0mquant_injector\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mquant_injector\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlet\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m**\u001b[0m\u001b[0mfilter_kwargs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkwargs_prefix\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 100\u001b[1;33m             \u001b[0mquant\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mquant_injector\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mproxy_class\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mquant_injector\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    101\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    102\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mquant\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mproxy_protocol\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32mc:\\brevitas_fx\\src\\brevitas\\proxy\\runtime_quant.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, quant_layer, quant_injector)\u001b[0m\n\u001b[0;32m    108\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    109\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mquant_layer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mquant_injector\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 110\u001b[1;33m         \u001b[0msuper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mActQuantProxyFromInjector\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mquant_layer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mquant_injector\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    111\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mis_passthrough_act\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_is_passthrough_act\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mquant_injector\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    112\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32mc:\\brevitas_fx\\src\\brevitas\\proxy\\quant_proxy.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, quant_layer, quant_injector, export_mode, export_handler)\u001b[0m\n\u001b[0;32m     74\u001b[0m         \u001b[1;31m# Use a normal list and not a ModuleList since this is a pointer to parent modules\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     75\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtracked_module_list\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 76\u001b[1;33m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0madd_tracked_module\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mquant_layer\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     77\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexport_handler\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mexport_handler\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     78\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexport_mode\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mexport_mode\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32mc:\\brevitas_fx\\src\\brevitas\\proxy\\quant_proxy.py\u001b[0m in \u001b[0;36madd_tracked_module\u001b[1;34m(self, module)\u001b[0m\n\u001b[0;32m    130\u001b[0m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtracked_module_list\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmodule\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    131\u001b[0m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mupdate_tracked_modules\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 132\u001b[1;33m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minit_tensor_quant\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    133\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    134\u001b[0m             \u001b[1;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Trying to add None as a parent module.\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32mc:\\brevitas_fx\\src\\brevitas\\proxy\\runtime_quant.py\u001b[0m in \u001b[0;36minit_tensor_quant\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m    120\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    121\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0minit_tensor_quant\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 122\u001b[1;33m         \u001b[0mtensor_quant\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mquant_injector\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtensor_quant\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    123\u001b[0m         \u001b[0mact_impl\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mquant_injector\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mact_impl\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    124\u001b[0m         \u001b[0mis_act_enabled\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_is_act_enabled\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mact_impl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtensor_quant\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "    \u001b[1;31m[... skipping hidden 1 frame]\u001b[0m\n",
-      "\u001b[1;32mC:\\ProgramData\\Miniconda3\\envs\\pytorch\\lib\\site-packages\\_dependencies\\this.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, __self__)\u001b[0m\n\u001b[0;32m     49\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mkind\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m\".\"\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     50\u001b[0m                 \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 51\u001b[1;33m                     \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msymbol\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     52\u001b[0m                 \u001b[1;32mexcept\u001b[0m \u001b[0mDependencyError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     53\u001b[0m                     message = (\n",
-      "    \u001b[1;31m[... skipping hidden 1 frame]\u001b[0m\n",
-      "\u001b[1;31mDependencyError\u001b[0m: 'AdvancedActQuantizer' can not resolve attribute 'per_channel_broadcastable_shape'"
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mDependencyError\u001b[0m                           Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[35], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mbrevitas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mnn\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m QuantIdentity\n\u001b[0;32m----> 3\u001b[0m quant_identity \u001b[38;5;241m=\u001b[39m \u001b[43mQuantIdentity\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      4\u001b[0m \u001b[43m    \u001b[49m\u001b[43mact_quant\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mAdvancedActQuantizer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mis_clamped\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mscaling_per_output_channel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/quant_activation.py:113\u001b[0m, in \u001b[0;36mQuantIdentity.__init__\u001b[0;34m(self, act_quant, return_quant_tensor, **kwargs)\u001b[0m\n\u001b[1;32m    108\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m    109\u001b[0m         \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m    110\u001b[0m         act_quant: Optional[ActQuantType] \u001b[38;5;241m=\u001b[39m Int8ActPerTensorFloat,\n\u001b[1;32m    111\u001b[0m         return_quant_tensor: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m    112\u001b[0m         \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 113\u001b[0m     \u001b[43mQuantNLAL\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m    114\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    115\u001b[0m \u001b[43m        \u001b[49m\u001b[43minput_quant\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    116\u001b[0m \u001b[43m        \u001b[49m\u001b[43mact_impl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    117\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpassthrough_act\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    118\u001b[0m \u001b[43m        \u001b[49m\u001b[43mact_quant\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mact_quant\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    119\u001b[0m \u001b[43m        \u001b[49m\u001b[43mreturn_quant_tensor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_quant_tensor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    120\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/quant_layer.py:34\u001b[0m, in \u001b[0;36mQuantNonLinearActLayer.__init__\u001b[0;34m(self, act_impl, passthrough_act, input_quant, act_quant, return_quant_tensor, **kwargs)\u001b[0m\n\u001b[1;32m     32\u001b[0m QuantLayerMixin\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, return_quant_tensor)\n\u001b[1;32m     33\u001b[0m QuantInputMixin\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, input_quant, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m---> 34\u001b[0m \u001b[43mQuantNonLinearActMixin\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mact_impl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpassthrough_act\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mact_quant\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/mixin/act.py:66\u001b[0m, in \u001b[0;36mQuantNonLinearActMixin.__init__\u001b[0;34m(self, act_impl, passthrough_act, act_quant, act_proxy_prefix, act_kwargs_prefix, **kwargs)\u001b[0m\n\u001b[1;32m     55\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m     56\u001b[0m         \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m     57\u001b[0m         act_impl: Optional[Type[Module]],\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     61\u001b[0m         act_kwargs_prefix\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m     62\u001b[0m         \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m     63\u001b[0m     prefixed_kwargs \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m     64\u001b[0m         act_kwargs_prefix \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mact_impl\u001b[39m\u001b[38;5;124m'\u001b[39m: act_impl,\n\u001b[1;32m     65\u001b[0m         act_kwargs_prefix \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpassthrough_act\u001b[39m\u001b[38;5;124m'\u001b[39m: passthrough_act}\n\u001b[0;32m---> 66\u001b[0m     \u001b[43mQuantProxyMixin\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m     67\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m     68\u001b[0m \u001b[43m        \u001b[49m\u001b[43mquant\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mact_quant\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     69\u001b[0m \u001b[43m        \u001b[49m\u001b[43mproxy_prefix\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mact_proxy_prefix\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     70\u001b[0m \u001b[43m        \u001b[49m\u001b[43mkwargs_prefix\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mact_kwargs_prefix\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     71\u001b[0m \u001b[43m        \u001b[49m\u001b[43mproxy_protocol\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mActQuantProxyProtocol\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     72\u001b[0m \u001b[43m        \u001b[49m\u001b[43mnone_quant_injector\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mNoneActQuant\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     73\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mprefixed_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     74\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/mixin/base.py:48\u001b[0m, in \u001b[0;36mQuantProxyMixin.__init__\u001b[0;34m(self, quant, proxy_protocol, none_quant_injector, proxy_prefix, kwargs_prefix, **kwargs)\u001b[0m\n\u001b[1;32m     46\u001b[0m     quant_injector \u001b[38;5;241m=\u001b[39m quant\n\u001b[1;32m     47\u001b[0m     quant_injector \u001b[38;5;241m=\u001b[39m quant_injector\u001b[38;5;241m.\u001b[39mlet(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mfilter_kwargs(kwargs_prefix, kwargs))\n\u001b[0;32m---> 48\u001b[0m     quant \u001b[38;5;241m=\u001b[39m \u001b[43mquant_injector\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mproxy_class\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mquant_injector\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     49\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m     50\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(quant, proxy_protocol):\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/proxy/runtime_quant.py:198\u001b[0m, in \u001b[0;36mActQuantProxyFromInjector.__init__\u001b[0;34m(self, quant_layer, quant_injector)\u001b[0m\n\u001b[1;32m    197\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, quant_layer, quant_injector):\n\u001b[0;32m--> 198\u001b[0m     \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mquant_layer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mquant_injector\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    199\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcache_class \u001b[38;5;241m=\u001b[39m _CachedIO\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/proxy/runtime_quant.py:93\u001b[0m, in \u001b[0;36mActQuantProxyFromInjectorBase.__init__\u001b[0;34m(self, quant_layer, quant_injector)\u001b[0m\n\u001b[1;32m     92\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, quant_layer, quant_injector):\n\u001b[0;32m---> 93\u001b[0m     \u001b[43mQuantProxyFromInjector\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mquant_layer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mquant_injector\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     94\u001b[0m     ActQuantProxyProtocol\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m)\n\u001b[1;32m     95\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mis_passthrough_act \u001b[38;5;241m=\u001b[39m _is_passthrough_act(quant_injector)\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/proxy/quant_proxy.py:80\u001b[0m, in \u001b[0;36mQuantProxyFromInjector.__init__\u001b[0;34m(self, quant_layer, quant_injector)\u001b[0m\n\u001b[1;32m     78\u001b[0m \u001b[38;5;66;03m# Use a normal list and not a ModuleList since this is a pointer to parent modules\u001b[39;00m\n\u001b[1;32m     79\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtracked_module_list \u001b[38;5;241m=\u001b[39m []\n\u001b[0;32m---> 80\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43madd_tracked_module\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquant_layer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     81\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdisable_quant \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m     82\u001b[0m \u001b[38;5;66;03m# Torch.compile compatibility requires this\u001b[39;00m\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/proxy/quant_proxy.py:120\u001b[0m, in \u001b[0;36mQuantProxyFromInjector.add_tracked_module\u001b[0;34m(self, module)\u001b[0m\n\u001b[1;32m    118\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtracked_module_list\u001b[38;5;241m.\u001b[39mappend(module)\n\u001b[1;32m    119\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mupdate_tracked_modules()\n\u001b[0;32m--> 120\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minit_tensor_quant\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    121\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    122\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTrying to add None as a parent module.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/proxy/runtime_quant.py:127\u001b[0m, in \u001b[0;36mActQuantProxyFromInjectorBase.init_tensor_quant\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    126\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minit_tensor_quant\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m--> 127\u001b[0m     tensor_quant \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquant_injector\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtensor_quant\u001b[49m\n\u001b[1;32m    128\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mact_impl\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mquant_injector:\n\u001b[1;32m    129\u001b[0m         act_impl \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mquant_injector\u001b[38;5;241m.\u001b[39mact_impl\n",
+      "    \u001b[0;31m[... skipping hidden 1 frame]\u001b[0m\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/_dependencies/this.py:51\u001b[0m, in \u001b[0;36m_ThisSpec.__call__\u001b[0;34m(self, __self__)\u001b[0m\n\u001b[1;32m     49\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m kind \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m     50\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 51\u001b[0m         result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mresult\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msymbol\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     52\u001b[0m     \u001b[38;5;28;01mexcept\u001b[39;00m DependencyError:\n\u001b[1;32m     53\u001b[0m         message \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m     54\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou tried to shift this more times than Injector has levels\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     55\u001b[0m         )\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/inject/__init__.py:129\u001b[0m, in \u001b[0;36m_ExtendedInjectorType.__getattr__\u001b[0;34m(cls, attrname)\u001b[0m\n\u001b[1;32m    126\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    127\u001b[0m         message \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{!r}\u001b[39;00m\u001b[38;5;124m can not resolve attribute \u001b[39m\u001b[38;5;132;01m{!r}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(\n\u001b[1;32m    128\u001b[0m             \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, current_attr)\n\u001b[0;32m--> 129\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m DependencyError(message)\n\u001b[1;32m    131\u001b[0m marker, attribute, args, have_defaults \u001b[38;5;241m=\u001b[39m spec\n\u001b[1;32m    133\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mset\u001b[39m(args)\u001b[38;5;241m.\u001b[39missubset(cached):\n",
+      "\u001b[0;31mDependencyError\u001b[0m: 'AdvancedActQuantizer' can not resolve attribute 'per_channel_broadcastable_shape'"
      ]
     }
    ],
@@ -1455,22 +1452,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 36,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "QuantTensor(value=tensor([[-0.0100,  0.0100, -0.0100, -0.0100],\n",
-       "        [-0.0100,  0.0100, -0.0100, -0.0100],\n",
-       "        [ 0.0100, -0.0100,  0.0100, -0.0100],\n",
-       "        [ 0.0100, -0.0100, -0.0100, -0.0100]], grad_fn=<MulBackward0>), scale=tensor([[0.0100],\n",
+       "IntQuantTensor(value=tensor([[ 0.0100,  0.0100,  0.0100, -0.0100],\n",
+       "        [ 0.0100, -0.0100, -0.0100,  0.0100],\n",
+       "        [-0.0100, -0.0100, -0.0100,  0.0100],\n",
+       "        [ 0.0100,  0.0100,  0.0100,  0.0100]], grad_fn=<MulBackward0>), scale=tensor([[0.0100],\n",
        "        [0.0100],\n",
        "        [0.0100],\n",
        "        [0.0100]], grad_fn=<AbsBinarySignGradFnBackward>), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))"
       ]
      },
-     "execution_count": 39,
+     "execution_count": 36,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1492,7 +1489,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -1506,9 +1503,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.7"
+   "version": "3.10.13"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 1
+ "nbformat_minor": 4
 }
diff --git a/docs/tutorials/index.html b/docs/tutorials/index.html
index 094cc0463..fdd008c76 100644
--- a/docs/tutorials/index.html
+++ b/docs/tutorials/index.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>Tutorials &#8212; Brevitas 0.10.2 documentation</title>
+    <title>Tutorials &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -127,8 +127,8 @@
       
     
     
-    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/tutorials/onnx_export.html b/docs/tutorials/onnx_export.html
index 58010c658..f5a6ecd98 100644
--- a/docs/tutorials/onnx_export.html
+++ b/docs/tutorials/onnx_export.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>ONNX Export &#8212; Brevitas 0.10.2 documentation</title>
+    <title>ONNX Export &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -129,8 +129,8 @@
       
     
     
-    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
@@ -449,14 +449,23 @@ <h1>ONNX Export<a class="headerlink" href="#ONNX-Export" title="Permalink to thi
 <h2>Requirements<a class="headerlink" href="#Requirements" title="Permalink to this heading">#</a></h2>
 <p>Brevitas requires Python 3.8+ and PyTorch 1.9.1+ and can be installed from PyPI with <code class="docutils literal notranslate"><span class="pre">pip</span> <span class="pre">install</span> <span class="pre">brevitas</span></code>.</p>
 <p>For this notebook, you will also need to install <code class="docutils literal notranslate"><span class="pre">onnx</span></code>, <code class="docutils literal notranslate"><span class="pre">onnxruntime</span></code>, <code class="docutils literal notranslate"><span class="pre">onnxoptimizer</span></code> and <code class="docutils literal notranslate"><span class="pre">netron</span></code> (for visualization of ONNX models). For this tutorial, PyTorch 1.8.1+ is required.</p>
-<div class="nbinput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
+<div class="nbinput docutils container">
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[1]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="o">%</span><span class="k">pip</span> install netron
 </pre></div>
 </div>
 </div>
+<div class="nboutput nblast docutils container">
+<div class="prompt empty docutils container">
+</div>
+<div class="output_area docutils container">
+<div class="highlight"><pre>
+Requirement already satisfied: netron in /proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages (7.2.9)
+Note: you may need to restart the kernel to use updated packages.
+</pre></div></div>
+</div>
 </section>
 <section id="Introduction">
 <h2>Introduction<a class="headerlink" href="#Introduction" title="Permalink to this heading">#</a></h2>
@@ -475,13 +484,18 @@ <h3>Basic Example<a class="headerlink" href="#Basic-Example" title="Permalink to
 <p>First, we will look at <code class="docutils literal notranslate"><span class="pre">brevitas.nn.QuantLinear</span></code>, a quantized alternative to <code class="docutils literal notranslate"><span class="pre">torch.nn.Linear</span></code>. Similar considerations can also be used for <code class="docutils literal notranslate"><span class="pre">QuantConv1d</span></code>, <code class="docutils literal notranslate"><span class="pre">QuantConv2d</span></code>, <code class="docutils literal notranslate"><span class="pre">QuantConvTranspose1d</span></code> and <code class="docutils literal notranslate"><span class="pre">QuantConvTranspose2d</span></code>.</p>
 <p>Brevitas offers several API to export Pytorch modules into several different formats, all sharing the same interface. The three required arguments are: - The PyTorch model to export - A representative input tensor (or a tuple of input args) - The path where to save the exported model</p>
 <div class="nbinput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[1]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[2]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">netron</span>
 <span class="kn">import</span> <span class="nn">time</span>
 <span class="kn">from</span> <span class="nn">IPython.display</span> <span class="kn">import</span> <span class="n">IFrame</span>
 
+<span class="c1"># helpers</span>
+<span class="k">def</span> <span class="nf">assert_with_message</span><span class="p">(</span><span class="n">condition</span><span class="p">):</span>
+    <span class="k">assert</span> <span class="n">condition</span>
+    <span class="nb">print</span><span class="p">(</span><span class="n">condition</span><span class="p">)</span>
+
 <span class="k">def</span> <span class="nf">show_netron</span><span class="p">(</span><span class="n">model_path</span><span class="p">,</span> <span class="n">port</span><span class="p">):</span>
     <span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="mf">3.</span><span class="p">)</span>
     <span class="n">netron</span><span class="o">.</span><span class="n">start</span><span class="p">(</span><span class="n">model_path</span><span class="p">,</span> <span class="n">address</span><span class="o">=</span><span class="p">(</span><span class="s2">&quot;localhost&quot;</span><span class="p">,</span> <span class="n">port</span><span class="p">),</span> <span class="n">browse</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
@@ -490,7 +504,7 @@ <h3>Basic Example<a class="headerlink" href="#Basic-Example" title="Permalink to
 </div>
 </div>
 <div class="nbinput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[2]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[3]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">brevitas.nn</span> <span class="k">as</span> <span class="nn">qnn</span>
@@ -501,6 +515,9 @@ <h3>Basic Example<a class="headerlink" href="#Basic-Example" title="Permalink to
 <span class="n">OUT_CH</span> <span class="o">=</span> <span class="mi">128</span>
 <span class="n">BATCH_SIZE</span> <span class="o">=</span> <span class="mi">1</span>
 
+<span class="c1"># set seed</span>
+<span class="n">torch</span><span class="o">.</span><span class="n">manual_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
+
 <span class="n">linear</span> <span class="o">=</span> <span class="n">qnn</span><span class="o">.</span><span class="n">QuantLinear</span><span class="p">(</span><span class="n">IN_CH</span><span class="p">,</span> <span class="n">OUT_CH</span><span class="p">,</span> <span class="n">bias</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
 <span class="n">inp</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="n">BATCH_SIZE</span><span class="p">,</span> <span class="n">IN_CH</span><span class="p">)</span>
 <span class="n">path</span> <span class="o">=</span> <span class="s1">&#39;quant_linear_qcdq.onnx&#39;</span>
@@ -510,7 +527,7 @@ <h3>Basic Example<a class="headerlink" href="#Basic-Example" title="Permalink to
 </div>
 </div>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[3]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[4]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">show_netron</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="mi">8082</span><span class="p">)</span>
@@ -526,7 +543,7 @@ <h3>Basic Example<a class="headerlink" href="#Basic-Example" title="Permalink to
 </pre></div></div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[3]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[4]:
 </pre></div>
 </div>
 <div class="output_area rendered_html docutils container">
@@ -547,7 +564,7 @@ <h3>Basic Example<a class="headerlink" href="#Basic-Example" title="Permalink to
 <h3>Complete Model<a class="headerlink" href="#Complete-Model" title="Permalink to this heading">#</a></h3>
 <p>A similar approach can be used with entire Pytorch models, rather than single layer.</p>
 <div class="nbinput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[4]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[5]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">QuantModel</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
@@ -570,7 +587,7 @@ <h3>Complete Model<a class="headerlink" href="#Complete-Model" title="Permalink
 </div>
 </div>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[5]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[6]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">show_netron</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="mi">8083</span><span class="p">)</span>
@@ -586,7 +603,7 @@ <h3>Complete Model<a class="headerlink" href="#Complete-Model" title="Permalink
 </pre></div></div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[5]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[6]:
 </pre></div>
 </div>
 <div class="output_area rendered_html docutils container">
@@ -611,7 +628,7 @@ <h3>The C in QCDQ (Bitwidth &lt;= 8)<a class="headerlink" href="#The-C-in-QCDQ-(
 <p>In Brevitas however, if a quantized layer with bit-width &lt;= 8 is exported, the Clip node will be automatically inserted, with the min/max values computed based on the particular type of quantized performed (i.e., signed vs unsigned, narrow range vs no narrow range, etc.).</p>
 <p>Even though the Tensor data type will still be a Int8 or UInt8, its values are restricted to the desired bit-width.</p>
 <div class="nbinput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[6]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[7]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">Model</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
@@ -636,7 +653,7 @@ <h3>The C in QCDQ (Bitwidth &lt;= 8)<a class="headerlink" href="#The-C-in-QCDQ-(
 </div>
 </div>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[7]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[8]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">show_netron</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="mi">8084</span><span class="p">)</span>
@@ -652,7 +669,7 @@ <h3>The C in QCDQ (Bitwidth &lt;= 8)<a class="headerlink" href="#The-C-in-QCDQ-(
 </pre></div></div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[7]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[8]:
 </pre></div>
 </div>
 <div class="output_area rendered_html docutils container">
@@ -668,285 +685,6 @@ <h3>The C in QCDQ (Bitwidth &lt;= 8)<a class="headerlink" href="#The-C-in-QCDQ-(
 <p>As can be seen from the generated ONNX, the weights of the <code class="docutils literal notranslate"><span class="pre">QuantLinear</span></code> layer are clipped between -3 and 3, considering that we are performing a signed 3 bit quantization, with <code class="docutils literal notranslate"><span class="pre">narrow_range=True</span></code>.</p>
 <p>Similarly, the output of the QuantReLU is clipped between 0 and 15, since in this case we are doing an unsigned 4 bit quantization.</p>
 </section>
-</section>
-<section id="QOps-Export">
-<h2>QOps Export<a class="headerlink" href="#QOps-Export" title="Permalink to this heading">#</a></h2>
-<p>Another supported style for exporting quantized operation in ONNX is represented by QOps.</p>
-<p>Compared to QCDQ, where it is possible to re-use standard floating point nodes (e.g., GEMM or Conv2d) preceeded by QCDQ nodes, with QOps the entire layer is replaced by its quantized counterpart.</p>
-<p>Opposite to what happens with QCDQ, all elements of the computation in this case have to be quantized: Input, Weight, Bias (if present), and Output tensors.</p>
-<p>This introduces some contraints on how we define our quantized layers through Brevitas.</p>
-<div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[8]:
-</pre></div>
-</div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">brevitas.quant.scaled_int</span> <span class="kn">import</span> <span class="n">Int8ActPerTensorFloat</span><span class="p">,</span> <span class="n">Int32Bias</span>
-<span class="kn">from</span> <span class="nn">brevitas.export</span> <span class="kn">import</span> <span class="n">export_onnx_qop</span>
-
-<span class="n">IN_CH</span> <span class="o">=</span> <span class="mi">3</span>
-<span class="n">IMG_SIZE</span> <span class="o">=</span> <span class="mi">128</span>
-<span class="n">OUT_CH</span> <span class="o">=</span> <span class="mi">128</span>
-<span class="n">BATCH_SIZE</span> <span class="o">=</span> <span class="mi">1</span>
-
-<span class="k">class</span> <span class="nc">Model</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
-    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
-        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">input_quant</span> <span class="o">=</span> <span class="n">qnn</span><span class="o">.</span><span class="n">QuantIdentity</span><span class="p">(</span><span class="n">return_quant_tensor</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">linear</span> <span class="o">=</span> <span class="n">qnn</span><span class="o">.</span><span class="n">QuantConv2d</span><span class="p">(</span>
-            <span class="n">IN_CH</span><span class="p">,</span> <span class="n">OUT_CH</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">bias</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
-            <span class="n">weight_bit_width</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">bias_quant</span><span class="o">=</span><span class="n">Int32Bias</span><span class="p">,</span>
-            <span class="n">output_quant</span><span class="o">=</span><span class="n">Int8ActPerTensorFloat</span><span class="p">)</span>
-
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">inp</span><span class="p">):</span>
-        <span class="n">inp</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">input_quant</span><span class="p">(</span><span class="n">inp</span><span class="p">)</span>
-        <span class="n">inp</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">linear</span><span class="p">(</span><span class="n">inp</span><span class="p">)</span>
-        <span class="k">return</span> <span class="n">inp</span>
-
-<span class="n">inp</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="n">BATCH_SIZE</span><span class="p">,</span> <span class="n">IN_CH</span><span class="p">,</span> <span class="n">IMG_SIZE</span><span class="p">,</span> <span class="n">IMG_SIZE</span><span class="p">)</span>
-<span class="n">model</span> <span class="o">=</span> <span class="n">Model</span><span class="p">()</span>
-<span class="n">model</span><span class="o">.</span><span class="n">eval</span><span class="p">()</span>
-
-
-<span class="n">export_onnx_qop</span><span class="p">(</span>
-    <span class="n">model</span><span class="p">,</span> <span class="n">args</span><span class="o">=</span><span class="n">inp</span><span class="p">,</span> <span class="n">export_path</span><span class="o">=</span><span class="s2">&quot;quant_model_qop.onnx&quot;</span><span class="p">)</span>
-</pre></div>
-</div>
-</div>
-<div class="nboutput docutils container">
-<div class="prompt empty docutils container">
-</div>
-<div class="output_area stderr docutils container">
-<div class="highlight"><pre>
-c:\users\alessand\documents\brevitas\src\brevitas\export\onnx\standard\manager.py:23: UserWarning: ONNX opset version set to 13, override with opset_version=
-  warnings.warn(f&#34;ONNX opset version set to {DEFAULT_OPSET}, override with {ka}=&#34;)
-</pre></div></div>
-</div>
-<div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[8]:
-</pre></div>
-</div>
-<div class="output_area docutils container">
-<div class="highlight"><pre>
-ir_version: 7
-producer_name: &#34;pytorch&#34;
-producer_version: &#34;1.13.1&#34;
-graph {
-  node {
-    output: &#34;/input_quant/export_handler/Constant_output_0&#34;
-    name: &#34;/input_quant/export_handler/Constant&#34;
-    op_type: &#34;Constant&#34;
-    attribute {
-      name: &#34;value&#34;
-      t {
-        data_type: 1
-        raw_data: &#34;\000\000\000&lt;&#34;
-      }
-      type: TENSOR
-    }
-  }
-  node {
-    output: &#34;/input_quant/export_handler/Constant_1_output_0&#34;
-    name: &#34;/input_quant/export_handler/Constant_1&#34;
-    op_type: &#34;Constant&#34;
-    attribute {
-      name: &#34;value&#34;
-      t {
-        data_type: 3
-        raw_data: &#34;\000&#34;
-      }
-      type: TENSOR
-    }
-  }
-  node {
-    input: &#34;inp.1&#34;
-    input: &#34;/input_quant/export_handler/Constant_output_0&#34;
-    input: &#34;/input_quant/export_handler/Constant_1_output_0&#34;
-    output: &#34;/input_quant/export_handler/QuantizeLinear_output_0&#34;
-    name: &#34;/input_quant/export_handler/QuantizeLinear&#34;
-    op_type: &#34;QuantizeLinear&#34;
-  }
-  node {
-    output: &#34;/linear/export_handler/Constant_output_0&#34;
-    name: &#34;/linear/export_handler/Constant&#34;
-    op_type: &#34;Constant&#34;
-    attribute {
-      name: &#34;value&#34;
-      t {
-        dims: 128
-        dims: 3
-        dims: 3
-        dims: 3
-        data_type: 3
-        raw_data: &#34;\374\372\376\374\005\000\375\374\004\375\373\373\375\007\376\374\377\000\000\373\373\004\005\371\003\375\004\373\004\374\000\006\002\003\003\005\004\377\005\000\373\376\375\376\002\376\004\377\003\005\375\371\006\373\003\007\377\374\005\375\375\006\375\377\374\001\005\371\006\005\007\376\376\372\376\004\001\374\002\373\373\376\002\376\375\377\001\376\006\371\002\000\004\005\005\000\004\373\004\002\003\000\374\376\005\000\004\372\004\000\373\000\006\377\002\005\004\005\374\000\007\377\374\371\373\007\004\376\372\001\005\001\372\377\003\001\375\006\372\377\006\003\006\004\001\004\372\005\006\003\376\373\374\375\376\005\000\004\377\372\373\000\007\377\373\003\373\376\374\374\377\375\377\003\372\005\004\007\003\375\377\001\007\377\373\374\000\377\376\374\373\377\373\375\003\004\004\376\004\377\375\003\003\377\004\000\005\004\000\372\005\007\003\004\377\373\003\371\373\002\377\006\006\007\377\376\375\002\006\005\004\374\002\000\373\004\002\002\374\371\372\371\375\001\004\000\006\376\377\002\000\372\001\001\375\007\376\005\001\373\003\374\005\003\007\005\372\004\006\375\005\003\001\373\376\374\002\376\377\376\000\006\001\375\376\377\374\000\005\002\005\006\371\375\005\375\376\374\004\001\003\001\372\005\007\371\005\000\372\001\001\371\007\374\372\373\373\372\376\004\000\002\375\376\000\004\003\003\375\003\001\376\006\001\000\372\374\376\373\002\002\004\372\377\374\005\000\001\005\005\374\007\003\377\377\000\007\002\377\377\377\374\001\001\376\000\377\373\001\004\376\003\000\007\005\000\374\372\376\005\003\003\004\372\375\372\377\006\376\374\007\373\002\374\003\377\374\002\007\373\004\376\004\004\003\005\373\003\005\376\001\000\002\371\376\000\374\377\372\375\005\373\002\373\373\377\004\375\006\377\005\005\002\375\375\003\376\376\006\002\371\000\002\373\000\006\002\372\372\006\374\372\004\006\004\000\003\001\377\371\376\003\003\373\005\000\001\003\004\001\005\001\004\373\373\372\002\371\375\372\004\377\005\375\376\374\375\003\372\001\373\372\376\005\003\372\004\373\004\374\374\376\376\377\371\375\004\375\377\376\007\004\372\000\007\372\006\002\006\001\006\372\004\004\003\002\375\006\374\002\001\001\000\376\376\006\373\374\002\372\005\374\004\004\001\374\004\377\373\002\376\001\377\003\377\007\004\372\371\002\375\377\373\002\376\375\377\006\001\001\000\374\001\006\004\371\377\375\374\377\376\003\372\373\002\005\374\000\002\004\372\004\372\003\006\375\003\377\376\000\377\374\006\377\374\375\377\373\376\372\375\006\004\371\372\374\375\004\002\372\376\001\001\002\373\000\003\000\371\001\003\377\376\371\376\004\000\003\376\002\006\004\372\007\005\004\376\000\007\372\003\002\005\005\004\372\002\377\006\002\371\375\375\372\376\005\003\000\002\371\005\372\373\377\371\376\005\374\377\007\003\001\376\006\376\001\374\374\001\373\006\376\376\001\372\377\003\006\372\373\003\377\376\000\377\373\004\372\371\376\002\004\004\006\001\372\001\376\005\001\000\000\007\002\375\002\375\375\006\007\375\375\002\006\371\375\002\377\002\377\000\373\001\372\372\001\377\372\001\002\000\375\373\377\372\001\371\372\007\372\001\377\372\004\376\376\374\375\373\373\005\371\375\006\005\007\374\373\005\372\000\001\374\005\000\002\373\004\001\004\006\002\003\373\376\372\374\003\375\005\000\005\373\001\375\374\002\002\000\373\374\003\005\376\003\374\374\373\374\000\004\371\375\372\003\375\005\005\006\007\371\003\372\003\375\004\374\001\376\373\000\004\003\001\003\372\377\003\004\374\000\376\002\377\001\374\376\002\002\001\005\375\373\001\372\000\007\004\007\006\006\000\004\004\006\000\377\375\000\002\374\376\374\006\373\377\000\374\006\373\005\001\001\006\005\373\373\001\003\371\006\372\003\005\372\003\005\006\005\006\001\001\377\372\001\003\372\005\002\376\377\373\005\376\375\373\005\004\007\001\000\002\001\374\004\003\377\004\372\373\373\007\375\002\002\377\373\007\001\004\374\007\376\000\003\376\006\371\377\003\376\003\004\375\006\376\371\373\373\004\000\005\377\372\372\377\004\002\001\000\005\372\004\377\376\375\001\005\375\375\000\003\006\374\004\377\004\006\000\374\003\000\005\376\372\371\371\000\374\372\006\004\006\376\377\001\377\376\373\374\000\003\004\372\375\000\006\002\374\377\004\372\371\373\001\006\377\003\007\377\373\000\371\002\376\003\002\377\006\006\006\371\006\373\377\006\000\374\375\376\001\376\007\003\007\376\004\001\005\003\375\372\003\004\376\374\005\372\372\000\006\377\003\000\002\001\003\375\000\004\375\372\000\001\000\000\002\000\004\005\377\005\007\376\372\001\374\006\002\376\002\005\374\372\000\375\372\372\000\001\000\377\007\376\000\374\375\000\373\003\001\006\003\376\007\374\376\374\005\371\372\001\374\374\002\375\004\001\002\002\376\003\373\000\375\375\005\373\002\376\371\006\004\001\001\371\376\005\377\375\005\003\374\375\002\373\376\001\002\001\007\002\004\376\375\377\376\004\373\000\001\375\377\372\376\002\001\375\006\005\006\004\376\004\004\001\001\377\004\006\003\001\005\006\001\377\000\000\000\372\004\375\004\377\377\006\377\373\003\375\373\004\005\377\006\376\374\374\371\376\003\376\374\001\373\001\375\001\376\376\000\376\371\376\377\372\373\374\374\375\376\003\376\002\372\375\375\007\377\373\377\006\376\377\373\002\001\000\005\004\006\376\001\373\372\371\001\371\001\373\374\001\375\373\003\375\373\005\373\004\377\002\000\002\006\001\373\375\005\376\004\000\376\003\007\000\377\003\004\005\376\004\003\004\006\006\006\371\002\374\375\003\375\000\375\377\004\003\374\373\004\005\375\003\376\001\001\374\003\377\004\006\003\377\001\003\377\377\371\000\374\003\373\374\006\372\372\006\004\375\375\373\004\005\001\373\371\377\376\004\005\373\374\005\000\376\001\002\003\006\006\374\375\374\377\001\373\003\004\372\004\375\001\371\004\002\001\376\377\005\000\376\376\372\005\000\376\004\371\000\377\377\377\373\377\001\004\002\374\373\000\374\377\373\373\374\005\006\374\003\373\000\006\001\003\371\373\006\374\005\005\006\371\002\005\373\000\377\377\003\005\003\004\376\372\000\005\004\371\372\376\371\005\375\000\001\001\000\006\005\006\002\002\000\003\006\374\005\000\373\372\376\002\372\006\003\375\373\373\375\002\004\001\007\373\377\004\005\004\375\005\376\376\004\003\000\004\376\006\001\376\003\376\007\006\002\376\001\376\006\371\006\375\375\004\003\006\377\374\004\003\375\372\374\375\006\377\000\004\373\002\006\373\377\374\372\000\000\376\006\373\372\004\001\006\003\377\006\371\006\006\004\004\005\371\376\001\003\372\005\001\002\373\001\372\375\004\372\006\373\375\001\003\375\377\003\372\374\374\373\006\005\373\002\000\004\376\377\004\374\006\374\006\373\004\375\373\006\376\006\002\002\377\372\372\005\004\375\000\002\374\002\376\007\373\376\371\377\005\376\006\002\006\376\004\372\000\005\002\002\003\006\004\377\007\374\372\372\002\375\377\001\375\005\374\377\003\007\002\005\006\006\000\001\004\000\376\371\001\000\005\004\375\372\375\004\007\371\374\002\005\000\002\002\004\004\007\005\006\373\006\004\002\005\004\376\375\000\372\004\377\003\374\003\376\006\376\006\006\005\002\006\007\002\372\372\377\373\004\373\375\004\004\003\006\002\000\002\376\000\000\005\006\005\372\003\372\006\001\007\372\002\372\004\001\005\002\005\374\372\372\002\372\001\377\002\006\005\000\005\372\375\007\377\375\004\005\003\372\004\005\376\373\001\372\003\371\371\374\005\002\005\374\377\004\002\376\004\373\377\377\377\001\005\372\003\373\375\006\374\007\376\372\006\005\371\377\005\001\003\005\002\006\003\001\377\374\004\376\374\375\376\001\001\001\004\007\007\000\005\001\376\003\376\000\000\001\001\375\371\006\002\001\373\000\377\007\004\002\374\000\001\377\003\374\003\007\373\371\373\001\005\373\372\005\373\375\005\006\372\000\005\007\003\003\377\005\006\004\374\372\375\003\004\000\005\376\374\374\375\375\377\372\000\004\002\005\002\000\374\376\373\373\376\002\374\000\376\373\000\371\373\372\006\000\376\002\375\376\005\372\004\376\375\005\006\006\004\003\002\002\002\002\375\006\377\000\004\375\004\007\004\005\372\374\004\377\003\377\000\375\372\374\372\000\004\007\002\007\372\376\004\371\375\001\001\007\003\000\004\373\001\001\376\002\377\377\006\002\003\373\373\004\372\372\376\372\002\002\002\373\001\375\374\000\004\003\376\003\376\002\373\374\003\372\371\001\375\004\371\374\004\005\002\374\371\001\373\377\374\006\373\006\000\005\005\006\006\002\375\002\001\001\005\375\000\372\371\003\004\375\376\003\377\374\005\007\007\377\374\375\374\376\373\003\002\002\374\377\373\004\375\372\374\003\374\005\376\002\373\376\006\005\374\002\371\005\004\001\373\000\377\374\003\000\001\001\003\372\005\001\371\371\000\375\001\375\372\374\003\373\376\001\371\006\005\004\377\004\376\377\377\003\373\001\372\376\006\372\372\005\374\001\374\004\001\004\375\002\002\373\006\000\001\002\377\371\005\005\374\374\006\003\001\002\001\374\377\372\000\377\374\373\371\007\003\375\373\374\373\374\005\004\005\006\002\374\000\372\001\376\002\373\371\372\374\374\377\005\375\371\002\374\374\005\377\007\004\376\007\373\372\007\007\377\004\002\002\007\377\375\002\005\006\003\002\006\376\003\004\003\000\371\002\002\374\006\373\005\003\003\002\003\376\002\004\377\377\371\007\001\373\376\003\002\007\376\002\005\004\374\003\377\374\003\007\004\377\002\001\003\005\373\377\374\002\377\004\000\000\005\007\002\003\376\371\377\006\372\372\002\372\371\375\000\376\005\372\000\373\372\007\002\001\372\374\375\005\005\004\001\002\002\006\372\001\007\373\375\000\372\005\003\000\375\377\001\003\006\000\376\374\002\375\375\003\001\007\376\377\003\000\005\376\374\005\373\004\377\000\375\002\005\001\001\000\001\375\374\001\006\372\375\376\372\371\001\372\005\004\376\373\006\005\375\006\377\001\001\000\006\000\006\007\003\372\004\375\373\372\372\000\374\001\006\007\376\374\371\373\372\375\003\377\372\377\005\002\006\372\006\004\005\000\376\007\003\372\004\377\006\001\373\375\374\373\373\004\004\375\373\005\376\000\001\375\371\372\005\375\000\002\372\003\004\372\003\374\005\002\374\377\001\005\376\377\374\376\005\376\372\003\373\372\006\372\377\373\006\372\004\006\373\005\375\375\007\374\005\002\374\374\002\002\377\375\376\372\005\375\371\003\005\003\372\377\375\372\002\005\000\006\372\005\371\376\000\001\377\004\004\006\000\377\007\002\006\000\371\375\374\374\001\373\371\002\376\002\000\374\006\001\374\006\005\001\003\376\003\374\003\374\002\007\373\002\004\007\005\374\376\372\372\001\371\002\005\373\376\006\375\372\376\004\003\001\004\376\002\373\006\006\371\372\003\004\006\375\004\007\371\000\000\001\000\374\001\006\002\006\002\000\002\373\372\372\000\372\005\006\004\000\376\372\373\006\007\373\006\373\377\003\375\373\001\377\001\002\376\003\373\002\376\007\371\371\374\006\377\001\002\005\001\376\375\000\377\371\005\372\002\377\375\375\002\375\376\003\003\373\373\005\004\004\373\000\000\007\003\372\375\004\003\376\377\373\376\004\372\004\377\376\007\002\005\003\001\006\006\002\005\373\000\004\000\004\374\372\376\007\002\003\006\002\000\372\001\374\005\376\006\007\373\001\375\004\377\374\375\377\001\377\003\375\005\000\003\376\375\003\377\372\002\006\003\007\005\374\003\006\003\000\375\000\001\000\001\002\374\377\372\004\372\377\377\003\377\007\006\371\003\005\004\007\006\371\006\001\375\001\001\376\002\374\006\375\375\376\377\002\002\007\373\373\374\373\377\001\006\375\375\001\375\373\375\373\372\376\003\371\006\376\376\375\007\377\374\376\377\006\377\001\371\377\007\375\371\005\002\373\003\005\002\371\375\003\003\003\374\000\377\375\003\002\006\006\375\006\002\000\374\373\374\002\003\373\002\375\377\004\006\003\006\000\377\372\375\375\002\002\003\006\003\003\377\373\003\003\003\003\377\004\004\372\377\000\374\375\005\004\005\003\002\375\376\001\376\003\374\002\007\002\376\377\007\006\376\372\374\004\371\004\006\006\374\374\377\374\003\006\371\377\007\372\375\006\374\374\005\372\006\372\371\001\000\375\372\374\373\374\374\374\005\004\002\375\004\007\004\006\002\005\005\372\375\000\004\000\377\004\004\001\374\377\006\003\377\374\000\376\372\376\373\377\006\377\376\002\005\005\372\004\000\001\004\005\373\005\003\371\374\373\000\375\002\375\006\003\001\004\377\374\372\005\006\005\005\005\005\007\372\006\004\006\372\372\002\373\371\001\004\006\374\005\373\004\006\001\005\006\377\006\373\001\373\373\376\375\007\372\374\372\377\004\006\004\375\374\000\007\005\000\002\377\002\372\002\001\377\372\006\002\001\000\376\375\374\003\376\371\005\001\000\002\372\373\375\004\376\371\374\376\000\004\004\376\375\007\374\377\375\377\001\003\005\372\002\376\003\003\375\001\004\001\001\000\002\004\375\375\372\003\003\372\002\375\372\377\373\000\002\371\005\003\001\001\376\372\374\001\001\376\000\001\376\001\376\005\002\374\002\004\004\000\374\007\000\000\006\003\371\376\371\006\005\006\007\002\371\373\005\372\375\006\003\373\005\375\375\373\002\000\375\005\001\372\377\377\373\375\375\374\000\376\372\000\374\001\001\372\375\373\004\374\000\006\375\004\001\006\000\373\001\375\003\372\000\373\376\003\374\005\007\377\373\007\006\002\371\373\377\004\373\001\374\000\001\004\001\005\375\372\002\376\377\371\374\375\371\373\005\376\374\001\377\376\371\375\371\000\375\373\377\006\002\003\005\372\003\004\005\005\004\000\376\372\371\006\000\377\373\003\376\005\007\006\372\004\007\374\375\376\374\000\001\001\375\003\371\001\006\374\376\006\377\000\001\375\006\004\372\371\001\377\377\377\376\006\375\372\000\371\376\002\374\372\006\372\002\006\005\001\376\004\374\002\376\000\004\376\375\000\376\004\000\006\372\005\007\006\002\004\373\373\006\003\007\001\375\007\007\372\004\005\376\005\376\007\002\376\004\373\373\376\004\372\375\373\374\001\000\375\004\375\375\377\004\001\377\002\376\004\377\001\001\374\376\374\377\377\001\000\000\377\373\374\002\006\001\375\376\000\000\374\006\004\004\004\375\001\376\001\002\373\006\006\376\002\005\005\374\373\377\376\004\005\374\000\376\002\375\376\004\373\001\377\377\002\377\373\372\371\003\003\372\006\000\002\003\005\375\371\375\004\376\374\007\375\371\002\374\000\375\005\006\374\373\004\371\000\007\376\001\375\377\372\372\373\005\005\001\372\377\371\377\375&#34;
-      }
-      type: TENSOR
-    }
-  }
-  node {
-    output: &#34;/linear/export_handler/Constant_1_output_0&#34;
-    name: &#34;/linear/export_handler/Constant_1&#34;
-    op_type: &#34;Constant&#34;
-    attribute {
-      name: &#34;value&#34;
-      t {
-        data_type: 1
-        raw_data: &#34;\263-\341&lt;&#34;
-      }
-      type: TENSOR
-    }
-  }
-  node {
-    output: &#34;/linear/export_handler/Constant_2_output_0&#34;
-    name: &#34;/linear/export_handler/Constant_2&#34;
-    op_type: &#34;Constant&#34;
-    attribute {
-      name: &#34;value&#34;
-      t {
-        dims: 128
-        data_type: 6
-        raw_data: &#34;\271\377\377\377\032\003\000\0009\001\000\000\302\002\000\000;\375\377\377\031\000\000\000\024\003\000\000d\003\000\000\327\374\377\377\363\377\377\377u\003\000\000\374\000\000\000t\000\000\000\321\002\000\000\236\377\377\377\241\377\377\377\237\375\377\377\010\000\000\000\350\002\000\000}\376\377\377\267\377\377\377\374\000\000\000\355\001\000\000N\375\377\377\\\002\000\000\346\002\000\000\317\000\000\000\207\001\000\000?\000\000\000\302\002\000\000Y\377\377\377\326\376\377\377\\\003\000\000\374\376\377\377\334\000\000\000\200\001\000\000\362\377\377\377+\000\000\000\304\375\377\377u\000\000\000\340\000\000\000\275\001\000\000\324\377\377\377\332\000\000\000\026\001\000\000\333\001\000\000\371\375\377\377\363\000\000\000|\002\000\000\335\376\377\377\226\375\377\377\335\002\000\0002\001\000\000F\377\377\377\006\003\000\000\310\375\377\377\344\377\377\377\177\376\377\377&gt;\001\000\000\033\002\000\000I\003\000\000\006\376\377\377\315\375\377\377\033\003\000\000\236\000\000\000@\376\377\377\031\002\000\000\321\002\000\000;\000\000\000\035\377\377\377\354\377\377\377Z\001\000\000N\375\377\377I\001\000\000\030\001\000\000w\377\377\377\303\002\000\000\022\000\000\000\377\001\000\000!\000\000\000\035\001\000\000\003\375\377\377^\377\377\377\336\374\377\377p\377\377\377\351\002\000\000X\376\377\377\247\000\000\000H\376\377\377}\000\000\000\225\374\377\3776\001\000\000\301\001\000\000\210\001\000\000\374\376\377\377\307\377\377\377\320\374\377\377\267\377\377\377F\375\377\377\352\377\377\377=\377\377\3770\376\377\377#\000\000\000\313\376\377\377\334\000\000\000\261\001\000\000\363\001\000\000\037\001\000\000\220\377\377\377\202\000\000\000d\377\377\377\013\002\000\000\266\002\000\000\347\374\377\377+\001\000\000\301\376\377\377\341\377\377\377O\003\000\000\037\375\377\377\244\375\377\377\352\000\000\000\302\001\000\000I\002\000\000~\377\377\377*\376\377\377\333\000\000\000\214\000\000\000\014\002\000\000&#34;
-      }
-      type: TENSOR
-    }
-  }
-  node {
-    input: &#34;/input_quant/export_handler/QuantizeLinear_output_0&#34;
-    input: &#34;/input_quant/export_handler/Constant_output_0&#34;
-    input: &#34;/input_quant/export_handler/Constant_1_output_0&#34;
-    input: &#34;/linear/export_handler/Constant_output_0&#34;
-    input: &#34;/linear/export_handler/Constant_1_output_0&#34;
-    input: &#34;/input_quant/export_handler/Constant_1_output_0&#34;
-    input: &#34;/input_quant/export_handler/Constant_output_0&#34;
-    input: &#34;/input_quant/export_handler/Constant_1_output_0&#34;
-    input: &#34;/linear/export_handler/Constant_2_output_0&#34;
-    output: &#34;/linear/export_handler/QLinearConv_output_0&#34;
-    name: &#34;/linear/export_handler/QLinearConv&#34;
-    op_type: &#34;QLinearConv&#34;
-    attribute {
-      name: &#34;dilations&#34;
-      ints: 1
-      ints: 1
-      type: INTS
-    }
-    attribute {
-      name: &#34;group&#34;
-      i: 1
-      type: INT
-    }
-    attribute {
-      name: &#34;kernel_shape&#34;
-      ints: 3
-      ints: 3
-      type: INTS
-    }
-    attribute {
-      name: &#34;pads&#34;
-      ints: 0
-      ints: 0
-      ints: 0
-      ints: 0
-      type: INTS
-    }
-    attribute {
-      name: &#34;strides&#34;
-      ints: 1
-      ints: 1
-      type: INTS
-    }
-  }
-  node {
-    input: &#34;/linear/export_handler/QLinearConv_output_0&#34;
-    input: &#34;/input_quant/export_handler/Constant_output_0&#34;
-    input: &#34;/input_quant/export_handler/Constant_1_output_0&#34;
-    output: &#34;10&#34;
-    name: &#34;/linear/export_handler/DequantizeLinear&#34;
-    op_type: &#34;DequantizeLinear&#34;
-  }
-  name: &#34;torch_jit&#34;
-  input {
-    name: &#34;inp.1&#34;
-    type {
-      tensor_type {
-        elem_type: 1
-        shape {
-          dim {
-            dim_value: 1
-          }
-          dim {
-            dim_value: 3
-          }
-          dim {
-            dim_value: 128
-          }
-          dim {
-            dim_value: 128
-          }
-        }
-      }
-    }
-  }
-  output {
-    name: &#34;10&#34;
-    type {
-      tensor_type {
-        elem_type: 1
-        shape {
-          dim {
-            dim_value: 1
-          }
-          dim {
-            dim_value: 128
-          }
-          dim {
-            dim_value: 126
-          }
-          dim {
-            dim_value: 126
-          }
-        }
-      }
-    }
-  }
-}
-opset_import {
-  domain: &#34;&#34;
-  version: 13
-}
-</pre></div></div>
-</div>
-<div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[9]:
-</pre></div>
-</div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">show_netron</span><span class="p">(</span><span class="s2">&quot;quant_model_qop.onnx&quot;</span><span class="p">,</span> <span class="mi">8085</span><span class="p">)</span>
-</pre></div>
-</div>
-</div>
-<div class="nboutput docutils container">
-<div class="prompt empty docutils container">
-</div>
-<div class="output_area docutils container">
-<div class="highlight"><pre>
-Serving &#39;quant_model_qop.onnx&#39; at http://localhost:8085
-</pre></div></div>
-</div>
-<div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[9]:
-</pre></div>
-</div>
-<div class="output_area rendered_html docutils container">
-<iframe
-    width="100%"
-    height="400"
-    src="http://localhost:8085/"
-    frameborder="0"
-    allowfullscreen
-
-></iframe></div>
-</div>
-<p>In this case, we need to make sure that our input to <code class="docutils literal notranslate"><span class="pre">QuantLinear</span></code> is quantized. Using the approach shown above, with a standalone <code class="docutils literal notranslate"><span class="pre">QuantIdentity</span></code>, Brevitas will add a <code class="docutils literal notranslate"><span class="pre">QuantizeLinear</span></code> node. If <code class="docutils literal notranslate"><span class="pre">return_quant_tensor=True</span></code> is specified in <code class="docutils literal notranslate"><span class="pre">QuantIdentity</span></code>, a <code class="docutils literal notranslate"><span class="pre">DeQuantizeLinear</span></code> node won’t be added. Setting <code class="docutils literal notranslate"><span class="pre">input_quant</span></code> in <code class="docutils literal notranslate"><span class="pre">QuantConv2d</span></code> is also an option.</p>
-<p>Note that the way <code class="docutils literal notranslate"><span class="pre">return_quant_tensor=True</span></code> is interpreted differs between QCDQ export and QOps export. With QCDQ, it doesn’t affect the export, as a dequantize node is always generated. With QOps export, it prevents a quantization node from being inserted, so that an integer tensor is passed to the next layer.</p>
-<p>Moreover, our <code class="docutils literal notranslate"><span class="pre">QuantLinear</span></code> layer has to specify how to re-quantize the output - in this case, with the <code class="docutils literal notranslate"><span class="pre">Int8ActPerTensorFloat</span></code> activation quantizer, otherwise an error will be raised during export-time.</p>
-<p>Similarly, if the bias is present, it has to be quantized or an error will be raised.</p>
 <section id="Clipping-in-QOps">
 <h3>Clipping in QOps<a class="headerlink" href="#Clipping-in-QOps" title="Permalink to this heading">#</a></h3>
 <p>Even when using <code class="docutils literal notranslate"><span class="pre">QLinearConv</span></code> and <code class="docutils literal notranslate"><span class="pre">QLinearMatMul</span></code>, it is still possible to represent bit-width &lt; 8 through the use of clipping.</p>
@@ -959,8 +697,8 @@ <h2>ONNX Runtime<a class="headerlink" href="#ONNX-Runtime" title="Permalink to t
 <section id="QCDQ">
 <h3>QCDQ<a class="headerlink" href="#QCDQ" title="Permalink to this heading">#</a></h3>
 <p>Since for QCDQ we are only using standard ONNX operation, it is possible to run the exported model using ONNX Runtime.</p>
-<div class="nbinput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[10]:
+<div class="nbinput docutils container">
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[11]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">onnxruntime</span> <span class="k">as</span> <span class="nn">ort</span>
@@ -992,10 +730,28 @@ <h3>QCDQ<a class="headerlink" href="#QCDQ" title="Permalink to this heading">#</
 <span class="n">out_brevitas</span> <span class="o">=</span> <span class="n">model</span><span class="p">(</span><span class="n">inp</span><span class="p">)</span>
 <span class="n">out_ort</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="n">pred_onx</span><span class="p">)</span>
 
-<span class="k">assert</span> <span class="n">torch</span><span class="o">.</span><span class="n">allclose</span><span class="p">(</span><span class="n">out_brevitas</span><span class="p">,</span> <span class="n">out_ort</span><span class="p">)</span>
+<span class="n">assert_with_message</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">allclose</span><span class="p">(</span><span class="n">out_brevitas</span><span class="p">,</span> <span class="n">out_ort</span><span class="p">))</span>
 </pre></div>
 </div>
 </div>
+<div class="nboutput docutils container">
+<div class="prompt empty docutils container">
+</div>
+<div class="output_area docutils container">
+<div class="highlight"><pre>
+True
+</pre></div></div>
+</div>
+<div class="nboutput nblast docutils container">
+<div class="prompt empty docutils container">
+</div>
+<div class="output_area stderr docutils container">
+<div class="highlight"><pre>
+2024-09-12 12:18:03.405472924 [W:onnxruntime:, graph.cc:1283 Graph] Initializer linear.bias appears in graph inputs and will not be treated as constant value/weight. This may prevent some of the graph optimizations, like const folding. Move it out of graph inputs if there is no need to override it, by either re-generating the model with latest exporter/converter or with the tool onnxruntime/tools/python/remove_initializer_from_input.py.
+/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/quant_linear.py:69: UserWarning: Defining your `__torch_function__` as a plain method is deprecated and will be an error in future, please define it as a classmethod. (Triggered internally at /opt/conda/conda-bld/pytorch_1670525541990/work/torch/csrc/utils/python_arg_parser.cpp:350.)
+  output_tensor = linear(x, quant_weight, quant_bias)
+</pre></div></div>
+</div>
 <section id="QGEMM-vs-GEMM">
 <h4>QGEMM vs GEMM<a class="headerlink" href="#QGEMM-vs-GEMM" title="Permalink to this heading">#</a></h4>
 <p>QCDQ allows to execute low precision fake-quantization in ONNX Runtime, meaning operations actually happen among floating-point values. ONNX Runtime is also capable of optimizing and accelerating a QCDQ model leveraging a int8 based QGEMM kernels in some scenarios.</p>
@@ -1003,7 +759,7 @@ <h4>QGEMM vs GEMM<a class="headerlink" href="#QGEMM-vs-GEMM" title="Permalink to
 <p>We did not observe a similar behavior for other operations such as <code class="docutils literal notranslate"><span class="pre">QuantConvNd</span></code>.</p>
 <p>An example of a layer that will match this definition is the following:</p>
 <div class="nbinput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[11]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[12]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">brevitas.quant.scaled_int</span> <span class="kn">import</span> <span class="n">Int32Bias</span>
@@ -1021,62 +777,71 @@ <h4>QGEMM vs GEMM<a class="headerlink" href="#QGEMM-vs-GEMM" title="Permalink to
 <p>Unfortunately ONNX Runtime does not provide a built-in way to log whether execution goes through unoptimized floating-point GEMM, or int8 QGEMM.</p>
 </section>
 </section>
-<section id="QOps">
-<h3>QOps<a class="headerlink" href="#QOps" title="Permalink to this heading">#</a></h3>
-<p>As for the QCDQ case, also in this case we are using only standard ONNX operations, thus we can use ONNX Runtime for executing our exported models.</p>
-<div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[13]:
+<section id="Export-Dynamically-Quantized-Models-to-ONNX">
+<h3>Export Dynamically Quantized Models to ONNX<a class="headerlink" href="#Export-Dynamically-Quantized-Models-to-ONNX" title="Permalink to this heading">#</a></h3>
+<p>You can also export dynamically quantized models to ONNX, but there are some limitations. The ONNX DynamicQuantizeLinear requires the following settings: - Asymmetric quantization (and therefore <em>unsigned</em>) - Min-max scaling - Rounding to nearest - Per tensor scaling - Bit width set to 8</p>
+<p>This is shown in the following example:</p>
+<div class="nbinput nblast docutils container">
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[14]:
 </pre></div>
 </div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">onnxruntime</span> <span class="k">as</span> <span class="nn">ort</span>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">brevitas_examples.common.generative.quantizers</span> <span class="kn">import</span> <span class="n">ShiftedUint8DynamicActPerTensorFloat</span>
+
+<span class="n">IN_CH</span> <span class="o">=</span> <span class="mi">3</span>
+<span class="n">IMG_SIZE</span> <span class="o">=</span> <span class="mi">128</span>
+<span class="n">OUT_CH</span> <span class="o">=</span> <span class="mi">128</span>
+<span class="n">BATCH_SIZE</span> <span class="o">=</span> <span class="mi">1</span>
 
 <span class="k">class</span> <span class="nc">Model</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
     <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
         <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">input_quant</span> <span class="o">=</span> <span class="n">qnn</span><span class="o">.</span><span class="n">QuantIdentity</span><span class="p">(</span><span class="n">return_quant_tensor</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">conv</span> <span class="o">=</span> <span class="n">qnn</span><span class="o">.</span><span class="n">QuantConv2d</span><span class="p">(</span>
-            <span class="n">IN_CH</span><span class="p">,</span> <span class="n">OUT_CH</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">bias</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
-            <span class="n">weight_bit_width</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span>
-            <span class="n">output_quant</span><span class="o">=</span><span class="n">Int8ActPerTensorFloat</span><span class="p">,</span>
-            <span class="n">output_bit_width</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span>
-            <span class="n">return_quant_tensor</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">linear</span> <span class="o">=</span> <span class="n">qnn</span><span class="o">.</span><span class="n">QuantLinear</span><span class="p">(</span><span class="n">IN_CH</span><span class="p">,</span> <span class="n">OUT_CH</span><span class="p">,</span> <span class="n">bias</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">weight_bit_width</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span> <span class="n">input_quant</span><span class="o">=</span><span class="n">ShiftedUint8DynamicActPerTensorFloat</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">act</span> <span class="o">=</span> <span class="n">qnn</span><span class="o">.</span><span class="n">QuantReLU</span><span class="p">(</span><span class="n">input_quant</span><span class="o">=</span><span class="n">ShiftedUint8DynamicActPerTensorFloat</span><span class="p">)</span>
 
     <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">inp</span><span class="p">):</span>
-        <span class="n">inp</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">input_quant</span><span class="p">(</span><span class="n">inp</span><span class="p">)</span>
-        <span class="n">inp</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">conv</span><span class="p">(</span><span class="n">inp</span><span class="p">)</span>
+        <span class="n">inp</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">linear</span><span class="p">(</span><span class="n">inp</span><span class="p">)</span>
+        <span class="n">inp</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">act</span><span class="p">(</span><span class="n">inp</span><span class="p">)</span>
         <span class="k">return</span> <span class="n">inp</span>
 
+<span class="n">inp</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="n">BATCH_SIZE</span><span class="p">,</span> <span class="n">IN_CH</span><span class="p">)</span>
 <span class="n">model</span> <span class="o">=</span> <span class="n">Model</span><span class="p">()</span>
 <span class="n">model</span><span class="o">.</span><span class="n">eval</span><span class="p">()</span>
-<span class="n">inp</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="n">BATCH_SIZE</span><span class="p">,</span> <span class="n">IN_CH</span><span class="p">,</span> <span class="n">IMG_SIZE</span><span class="p">,</span> <span class="n">IMG_SIZE</span><span class="p">)</span>
-<span class="n">path</span> <span class="o">=</span> <span class="s1">&#39;quant_model_qops_4b_4b.onnx&#39;</span>
-
-<span class="n">exported_model</span> <span class="o">=</span> <span class="n">export_onnx_qop</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">args</span><span class="o">=</span><span class="n">inp</span><span class="p">,</span> <span class="n">export_path</span><span class="o">=</span><span class="n">path</span><span class="p">)</span>
-
-<span class="n">sess_opt</span> <span class="o">=</span> <span class="n">ort</span><span class="o">.</span><span class="n">SessionOptions</span><span class="p">()</span>
-<span class="n">sess</span> <span class="o">=</span> <span class="n">ort</span><span class="o">.</span><span class="n">InferenceSession</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">sess_opt</span><span class="p">)</span>
-<span class="n">input_name</span> <span class="o">=</span> <span class="n">sess</span><span class="o">.</span><span class="n">get_inputs</span><span class="p">()[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">name</span>
-<span class="n">pred_onx</span> <span class="o">=</span> <span class="n">sess</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="p">{</span><span class="n">input_name</span><span class="p">:</span> <span class="n">inp</span><span class="o">.</span><span class="n">numpy</span><span class="p">()})[</span><span class="mi">0</span><span class="p">]</span>
-
-
-<span class="n">out_brevitas</span> <span class="o">=</span> <span class="n">model</span><span class="p">(</span><span class="n">inp</span><span class="p">)</span><span class="o">.</span><span class="n">int</span><span class="p">()</span>
-<span class="n">out_ort</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="n">pred_onx</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">torch</span><span class="o">.</span><span class="n">int8</span><span class="p">)</span>
+<span class="n">path</span> <span class="o">=</span> <span class="s1">&#39;dynamic_quant_model_qcdq.onnx&#39;</span>
 
-<span class="k">assert</span> <span class="n">torch</span><span class="o">.</span><span class="n">allclose</span><span class="p">(</span><span class="n">out_brevitas</span><span class="p">,</span> <span class="n">out_ort</span><span class="p">,</span> <span class="n">atol</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
+<span class="n">exported_model</span> <span class="o">=</span> <span class="n">export_onnx_qcdq</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">args</span><span class="o">=</span><span class="n">inp</span><span class="p">,</span> <span class="n">export_path</span><span class="o">=</span><span class="n">path</span><span class="p">,</span> <span class="n">opset_version</span><span class="o">=</span><span class="mi">13</span><span class="p">)</span>
 </pre></div>
 </div>
 </div>
-<div class="nboutput nblast docutils container">
+<div class="nbinput docutils container">
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[15]:
+</pre></div>
+</div>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">show_netron</span><span class="p">(</span><span class="s2">&quot;dynamic_quant_model_qcdq.onnx&quot;</span><span class="p">,</span> <span class="mi">8086</span><span class="p">)</span>
+</pre></div>
+</div>
+</div>
+<div class="nboutput docutils container">
 <div class="prompt empty docutils container">
 </div>
-<div class="output_area stderr docutils container">
+<div class="output_area docutils container">
 <div class="highlight"><pre>
-c:\users\alessand\documents\brevitas\src\brevitas\export\onnx\standard\manager.py:23: UserWarning: ONNX opset version set to 13, override with opset_version=
-  warnings.warn(f&#34;ONNX opset version set to {DEFAULT_OPSET}, override with {ka}=&#34;)
+Serving &#39;dynamic_quant_model_qcdq.onnx&#39; at http://localhost:8086
 </pre></div></div>
 </div>
-<p>Note a few things. <code class="docutils literal notranslate"><span class="pre">QuantConv2d</span></code> defines <code class="docutils literal notranslate"><span class="pre">return_quant_tensor=True</span></code> so that the exported ONNX model doesn’t have a dequantize node at the end. Vecause we set <code class="docutils literal notranslate"><span class="pre">output_bit_width=4</span></code> (overriding the 8 bit bit-width in the <code class="docutils literal notranslate"><span class="pre">output_quant</span></code> quantizer), we have a <code class="docutils literal notranslate"><span class="pre">Clip</span></code> node at the end. At the same time, within Brevitas, <code class="docutils literal notranslate"><span class="pre">return_quant_tensor=True</span></code> means the PyTorch model returns a Brevitas <code class="docutils literal notranslate"><span class="pre">QuantTensor</span></code>, from which we are taking the <code class="docutils literal notranslate"><span class="pre">int</span></code> representation.</p>
-<p>Due to differences in how the computation is performed between Brevitas and ONNX Runtime, it might happen the two results are slightly different (since Brevitas uses a style closer to QCDQ, rather than operating between integers), thus we added a tolerance for off-by-1 errors.</p>
+<div class="nboutput nblast docutils container">
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[15]:
+</pre></div>
+</div>
+<div class="output_area rendered_html docutils container">
+<iframe
+    width="100%"
+    height="400"
+    src="http://localhost:8086/"
+    frameborder="0"
+    allowfullscreen
+
+></iframe></div>
+</div>
 </section>
 </section>
 </section>
@@ -1130,9 +895,6 @@ <h3>QOps<a class="headerlink" href="#QOps" title="Permalink to this heading">#</
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#Basic-Example">Basic Example</a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#Complete-Model">Complete Model</a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#The-C-in-QCDQ-(Bitwidth-&lt;=-8)">The C in QCDQ (Bitwidth &lt;= 8)</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#QOps-Export">QOps Export</a><ul class="nav section-nav flex-column">
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#Clipping-in-QOps">Clipping in QOps</a></li>
 </ul>
 </li>
@@ -1141,7 +903,7 @@ <h3>QOps<a class="headerlink" href="#QOps" title="Permalink to this heading">#</
 <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#QGEMM-vs-GEMM">QGEMM vs GEMM</a></li>
 </ul>
 </li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#QOps">QOps</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#Export-Dynamically-Quantized-Models-to-ONNX">Export Dynamically Quantized Models to ONNX</a></li>
 </ul>
 </li>
 </ul>
diff --git a/docs/tutorials/onnx_export.ipynb b/docs/tutorials/onnx_export.ipynb
index e0debd571..a315178af 100644
--- a/docs/tutorials/onnx_export.ipynb
+++ b/docs/tutorials/onnx_export.ipynb
@@ -5,6 +5,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -22,9 +25,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: netron in /proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages (7.2.9)\r\n",
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
    "source": [
     "%pip install netron"
    ]
@@ -34,6 +46,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -51,6 +66,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -77,6 +95,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -95,9 +116,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -108,6 +132,11 @@
     "import time\n",
     "from IPython.display import IFrame\n",
     "\n",
+    "# helpers\n",
+    "def assert_with_message(condition):\n",
+    "    assert condition\n",
+    "    print(condition)\n",
+    "\n",
     "def show_netron(model_path, port):\n",
     "    time.sleep(3.)\n",
     "    netron.start(model_path, address=(\"localhost\", port), browse=False)\n",
@@ -116,9 +145,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -133,6 +165,9 @@
     "OUT_CH = 128\n",
     "BATCH_SIZE = 1\n",
     "\n",
+    "# set seed\n",
+    "torch.manual_seed(0)\n",
+    "\n",
     "linear = qnn.QuantLinear(IN_CH, OUT_CH, bias=True)\n",
     "inp = torch.randn(BATCH_SIZE, IN_CH)\n",
     "path = 'quant_linear_qcdq.onnx'\n",
@@ -142,9 +177,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     },
@@ -175,10 +213,10 @@
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x216a648ef70>"
+       "<IPython.lib.display.IFrame at 0x7fb62ae3fe50>"
       ]
      },
-     "execution_count": 3,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -191,6 +229,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -207,6 +248,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -219,9 +263,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -248,9 +295,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     },
@@ -281,10 +331,10 @@
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x216a64ec340>"
+       "<IPython.lib.display.IFrame at 0x7fb734383710>"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -297,6 +347,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -314,6 +367,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -334,9 +390,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -365,9 +424,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     },
@@ -398,10 +460,10 @@
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x216a64ec610>"
+       "<IPython.lib.display.IFrame at 0x7fb629e8a010>"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -414,6 +476,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -424,29 +489,14 @@
     "Similarly, the output of the QuantReLU is clipped between 0 and 15, since in this case we are doing an unsigned 4 bit quantization."
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   },
-   "source": [
-    "In this case, we need to make sure that our input to `QuantLinear` is quantized. Using the approach shown above, with a standalone `QuantIdentity`, Brevitas will add a `QuantizeLinear` node. If `return_quant_tensor=True` is specified in `QuantIdentity`, a `DeQuantizeLinear` node won't be added. Setting `input_quant` in `QuantConv2d` is also an option.\n",
-    "\n",
-    "Note that the way `return_quant_tensor=True` is interpreted differs between QCDQ export and QOps export. With QCDQ, it doesn't affect the export, as a dequantize node is always generated. With QOps export, it prevents a quantization node from being inserted, so that an integer tensor is passed to the next layer.\n",
-    "\n",
-    "Moreover, our `QuantLinear` layer has to specify how to re-quantize the output - in this case, with the `Int8ActPerTensorFloat` activation quantizer, otherwise an error will be raised during export-time.\n",
-    "\n",
-    "Similarly, if the bias is present, it has to be quantized or an error will be raised.\n"
-   ]
-  },
   {
    "attachments": {},
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -466,6 +516,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -480,14 +533,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2024-09-12 12:18:03.405472924 [W:onnxruntime:, graph.cc:1283 Graph] Initializer linear.bias appears in graph inputs and will not be treated as constant value/weight. This may prevent some of the graph optimizations, like const folding. Move it out of graph inputs if there is no need to override it, by either re-generating the model with latest exporter/converter or with the tool onnxruntime/tools/python/remove_initializer_from_input.py.\n",
+      "/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/quant_linear.py:69: UserWarning: Defining your `__torch_function__` as a plain method is deprecated and will be an error in future, please define it as a classmethod. (Triggered internally at /opt/conda/conda-bld/pytorch_1670525541990/work/torch/csrc/utils/python_arg_parser.cpp:350.)\n",
+      "  output_tensor = linear(x, quant_weight, quant_bias)\n"
+     ]
+    }
+   ],
    "source": [
     "import onnxruntime as ort\n",
     "\n",
@@ -518,13 +591,16 @@
     "out_brevitas = model(inp)\n",
     "out_ort = torch.tensor(pred_onx)\n",
     "\n",
-    "assert torch.allclose(out_brevitas, out_ort)"
+    "assert_with_message(torch.allclose(out_brevitas, out_ort))"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -550,9 +626,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 12,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -574,6 +653,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -581,11 +663,103 @@
    "source": [
     "Unfortunately ONNX Runtime does not provide a built-in way to log whether execution goes through unoptimized floating-point GEMM, or int8 QGEMM."
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Export Dynamically Quantized Models to ONNX "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can also export dynamically quantized models to ONNX, but there are some limitations. The ONNX DynamicQuantizeLinear requires the following settings:\n",
+    "- Asymmetric quantization (and therefore *unsigned*)\n",
+    "- Min-max scaling\n",
+    "- Rounding to nearest\n",
+    "- Per tensor scaling\n",
+    "- Bit width set to 8\n",
+    "\n",
+    "This is shown in the following example:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from brevitas_examples.common.generative.quantizers import ShiftedUint8DynamicActPerTensorFloat\n",
+    "\n",
+    "IN_CH = 3\n",
+    "IMG_SIZE = 128\n",
+    "OUT_CH = 128\n",
+    "BATCH_SIZE = 1\n",
+    "\n",
+    "class Model(torch.nn.Module):\n",
+    "    def __init__(self) -> None:\n",
+    "        super().__init__()\n",
+    "        self.linear = qnn.QuantLinear(IN_CH, OUT_CH, bias=True, weight_bit_width=8, input_quant=ShiftedUint8DynamicActPerTensorFloat)\n",
+    "        self.act = qnn.QuantReLU(input_quant=ShiftedUint8DynamicActPerTensorFloat)\n",
+    "    \n",
+    "    def forward(self, inp):\n",
+    "        inp = self.linear(inp)\n",
+    "        inp = self.act(inp)\n",
+    "        return inp\n",
+    "\n",
+    "inp = torch.randn(BATCH_SIZE, IN_CH)\n",
+    "model = Model() \n",
+    "model.eval()\n",
+    "path = 'dynamic_quant_model_qcdq.onnx'\n",
+    "\n",
+    "exported_model = export_onnx_qcdq(model, args=inp, export_path=path, opset_version=13)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Serving 'dynamic_quant_model_qcdq.onnx' at http://localhost:8086\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "        <iframe\n",
+       "            width=\"100%\"\n",
+       "            height=\"400\"\n",
+       "            src=\"http://localhost:8086/\"\n",
+       "            frameborder=\"0\"\n",
+       "            allowfullscreen\n",
+       "            \n",
+       "        ></iframe>\n",
+       "        "
+      ],
+      "text/plain": [
+       "<IPython.lib.display.IFrame at 0x7fb62856ccd0>"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "show_netron(\"dynamic_quant_model_qcdq.onnx\", 8086)"
+   ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "pytorch_latest",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -599,7 +773,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.15"
+   "version": "3.10.13"
   },
   "vscode": {
    "interpreter": {
@@ -608,5 +782,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/docs/tutorials/quant_activation_overview.html b/docs/tutorials/quant_activation_overview.html
index 00f28f6e9..924b6f168 100644
--- a/docs/tutorials/quant_activation_overview.html
+++ b/docs/tutorials/quant_activation_overview.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>An Overview of Quantized Activations &#8212; Brevitas 0.10.2 documentation</title>
+    <title>An Overview of Quantized Activations &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -129,8 +129,8 @@
       
     
     
-    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
@@ -450,19 +450,30 @@ <h1>An Overview of Quantized Activations<a class="headerlink" href="#An-Overview
 <div class="line">We were already introduced to quantized activations in the previous tutorial, when we looked at input and output quantization of <code class="docutils literal notranslate"><span class="pre">QuantConv2d</span></code> with the <code class="docutils literal notranslate"><span class="pre">Int8ActPerTensorFloat</span></code> quantizer. The same result can be obtained with different syntax by coupling <code class="docutils literal notranslate"><span class="pre">QuantConv2d</span></code> with <code class="docutils literal notranslate"><span class="pre">QuantIdentity</span></code> layers, which by default uses the <code class="docutils literal notranslate"><span class="pre">Int8ActPerTensorFloat</span></code> quantizer. As an example, we compare - on the <em>same input</em> - the result of <code class="docutils literal notranslate"><span class="pre">QuantConv2d</span></code> with <code class="docutils literal notranslate"><span class="pre">output_quant</span></code> enabled with the result of a
 <code class="docutils literal notranslate"><span class="pre">QuantConv2d</span></code> followed by a <code class="docutils literal notranslate"><span class="pre">QuantIdentity</span></code>:</div>
 </div>
-<div class="nbinput docutils container">
+<div class="nbinput nblast docutils container">
 <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[1]:
 </pre></div>
 </div>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="c1"># helpers</span>
+<span class="k">def</span> <span class="nf">assert_with_message</span><span class="p">(</span><span class="n">condition</span><span class="p">):</span>
+    <span class="k">assert</span> <span class="n">condition</span>
+    <span class="nb">print</span><span class="p">(</span><span class="n">condition</span><span class="p">)</span>
+</pre></div>
+</div>
+</div>
+<div class="nbinput docutils container">
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[2]:
+</pre></div>
+</div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">torch</span>
 <span class="kn">from</span> <span class="nn">brevitas.nn</span> <span class="kn">import</span> <span class="n">QuantConv2d</span><span class="p">,</span> <span class="n">QuantIdentity</span>
 <span class="kn">from</span> <span class="nn">brevitas.quant.scaled_int</span> <span class="kn">import</span> <span class="n">Int8ActPerTensorFloat</span>
 
-<span class="n">torch</span><span class="o">.</span><span class="n">manual_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
+<span class="n">torch</span><span class="o">.</span><span class="n">manual_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>  <span class="c1"># set a seed to make sure the random weight init is reproducible</span>
 <span class="n">output_quant_conv</span> <span class="o">=</span> <span class="n">QuantConv2d</span><span class="p">(</span>
     <span class="n">in_channels</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">out_channels</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">),</span> <span class="n">output_quant</span><span class="o">=</span><span class="n">Int8ActPerTensorFloat</span><span class="p">)</span>
 
-<span class="n">torch</span><span class="o">.</span><span class="n">manual_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
+<span class="n">torch</span><span class="o">.</span><span class="n">manual_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>  <span class="c1"># reproduce the same random weight init as above</span>
 <span class="n">default_quant_conv</span> <span class="o">=</span> <span class="n">QuantConv2d</span><span class="p">(</span>
     <span class="n">in_channels</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">out_channels</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">))</span>
 <span class="n">output_identity_quant</span> <span class="o">=</span> <span class="n">QuantIdentity</span><span class="p">()</span>
@@ -471,22 +482,33 @@ <h1>An Overview of Quantized Activations<a class="headerlink" href="#An-Overview
 <span class="n">out_tensor1</span> <span class="o">=</span> <span class="n">output_quant_conv</span><span class="p">(</span><span class="n">inp</span><span class="p">)</span>
 <span class="n">out_tensor2</span> <span class="o">=</span> <span class="n">output_identity_quant</span><span class="p">(</span><span class="n">default_quant_conv</span><span class="p">(</span><span class="n">inp</span><span class="p">))</span>
 
-<span class="k">assert</span> <span class="n">out_tensor1</span><span class="o">.</span><span class="n">isclose</span><span class="p">(</span><span class="n">out_tensor2</span><span class="p">)</span><span class="o">.</span><span class="n">all</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">()</span>
+<span class="n">assert_with_message</span><span class="p">(</span><span class="n">out_tensor1</span><span class="o">.</span><span class="n">isclose</span><span class="p">(</span><span class="n">out_tensor2</span><span class="p">)</span><span class="o">.</span><span class="n">all</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">())</span>
 </pre></div>
 </div>
 </div>
-<div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[1]:
-</pre></div>
+<div class="nboutput docutils container">
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
 True
 </pre></div></div>
 </div>
+<div class="nboutput nblast docutils container">
+<div class="prompt empty docutils container">
+</div>
+<div class="output_area stderr docutils container">
+<div class="highlight"><pre>
+/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/torch/_tensor.py:1255: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at /opt/conda/conda-bld/pytorch_1670525541990/work/c10/core/TensorImpl.h:1758.)
+  return super(Tensor, self).rename(names)
+[W NNPACK.cpp:53] Could not initialize NNPACK! Reason: Unsupported hardware.
+/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/torch/nn/modules/conv.py:459: UserWarning: Defining your `__torch_function__` as a plain method is deprecated and will be an error in future, please define it as a classmethod. (Triggered internally at /opt/conda/conda-bld/pytorch_1670525541990/work/torch/csrc/utils/python_arg_parser.cpp:350.)
+  return F.conv2d(input, weight, bias, self.stride,
+</pre></div></div>
+</div>
 <p>We can observe a similar behaviour if we enable input quantization too:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[2]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[3]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">torch</span><span class="o">.</span><span class="n">manual_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
@@ -504,13 +526,12 @@ <h1>An Overview of Quantized Activations<a class="headerlink" href="#An-Overview
 <span class="n">out_tensor1</span> <span class="o">=</span> <span class="n">input_output_quant_conv</span><span class="p">(</span><span class="n">inp</span><span class="p">)</span>
 <span class="n">out_tensor2</span> <span class="o">=</span> <span class="n">output_identity_quant</span><span class="p">(</span><span class="n">default_quant_conv</span><span class="p">(</span><span class="n">input_identity_quant</span><span class="p">(</span><span class="n">inp</span><span class="p">)))</span>
 
-<span class="k">assert</span> <span class="n">out_tensor1</span><span class="o">.</span><span class="n">isclose</span><span class="p">(</span><span class="n">out_tensor2</span><span class="p">)</span><span class="o">.</span><span class="n">all</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">()</span>
+<span class="n">assert_with_message</span><span class="p">(</span><span class="n">out_tensor1</span><span class="o">.</span><span class="n">isclose</span><span class="p">(</span><span class="n">out_tensor2</span><span class="p">)</span><span class="o">.</span><span class="n">all</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">())</span>
 </pre></div>
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[2]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
@@ -520,17 +541,16 @@ <h1>An Overview of Quantized Activations<a class="headerlink" href="#An-Overview
 <p>From an algorithmic point of view then the two different implementation are doing the same thing. However, as it will become clearer in later tutorials, there are currently some scenarios where picking one style over the other can make a difference when it comes to exporting to a format such as standard ONNX. In the meantime, we can just keep in mind that both alternatives exist.</p>
 <p>As it was the case with <code class="docutils literal notranslate"><span class="pre">QuantConv2d</span></code>, when we disable quantization of an activation, the layer behaves as its floating-point variant. In the case of <code class="docutils literal notranslate"><span class="pre">QuantIdentity</span></code>, that means behaving like an identity function:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[3]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[4]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">disabled_quant_identity</span> <span class="o">=</span> <span class="n">QuantIdentity</span><span class="p">(</span><span class="n">act_quant</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
-<span class="p">(</span><span class="n">inp</span> <span class="o">==</span> <span class="n">disabled_quant_identity</span><span class="p">(</span><span class="n">inp</span><span class="p">))</span><span class="o">.</span><span class="n">all</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">()</span>
+<span class="n">assert_with_message</span><span class="p">((</span><span class="n">inp</span> <span class="o">==</span> <span class="n">disabled_quant_identity</span><span class="p">(</span><span class="n">inp</span><span class="p">))</span><span class="o">.</span><span class="n">all</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">())</span>
 </pre></div>
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[3]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
@@ -539,7 +559,7 @@ <h1>An Overview of Quantized Activations<a class="headerlink" href="#An-Overview
 </div>
 <p>Again, as it was the case for <code class="docutils literal notranslate"><span class="pre">QuantConv2d</span></code>, quantized activation layers can also return a <code class="docutils literal notranslate"><span class="pre">QuantTensor</span></code>:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[4]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[5]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">return_quant_identity</span> <span class="o">=</span> <span class="n">QuantIdentity</span><span class="p">(</span><span class="n">return_quant_tensor</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
@@ -549,12 +569,12 @@ <h1>An Overview of Quantized Activations<a class="headerlink" href="#An-Overview
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[4]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[5]:
 </pre></div>
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-QuantTensor(value=tensor([[[[-0.4566, -0.5707, -0.5517,  0.5897,  1.5409],
+IntQuantTensor(value=tensor([[[[-0.4566, -0.5707, -0.5517,  0.5897,  1.5409],
           [ 0.5136, -0.5897, -0.5707,  0.1902, -0.0761],
           [-0.4946, -1.5029, -0.1902,  0.4376,  1.3317],
           [-1.6361,  2.0736,  1.7122,  2.3780, -1.1224],
@@ -569,16 +589,15 @@ <h1>An Overview of Quantized Activations<a class="headerlink" href="#An-Overview
 </pre></div></div>
 </div>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[5]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[6]:
 </pre></div>
 </div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="k">assert</span> <span class="n">out_tensor</span><span class="o">.</span><span class="n">is_valid</span>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">assert_with_message</span><span class="p">(</span><span class="n">out_tensor</span><span class="o">.</span><span class="n">is_valid</span><span class="p">)</span>
 </pre></div>
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[5]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
@@ -587,7 +606,7 @@ <h1>An Overview of Quantized Activations<a class="headerlink" href="#An-Overview
 </div>
 <p>As expected, a <code class="docutils literal notranslate"><span class="pre">QuantIdentity</span></code> with quantization disabled behaves like an identity function also when a <code class="docutils literal notranslate"><span class="pre">QuantTensor</span></code> is passed in. However, depending on whather <code class="docutils literal notranslate"><span class="pre">return_quant_tensor</span></code> is set to <code class="docutils literal notranslate"><span class="pre">False</span></code> or not, quantization metadata might be stripped out, i.e. the input <code class="docutils literal notranslate"><span class="pre">QuantTensor</span></code> is going to be returned as an implicitly quantized <code class="docutils literal notranslate"><span class="pre">torch.Tensor</span></code>:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[6]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[7]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">out_torch_tensor</span> <span class="o">=</span> <span class="n">disabled_quant_identity</span><span class="p">(</span><span class="n">out_tensor</span><span class="p">)</span>
@@ -596,7 +615,7 @@ <h1>An Overview of Quantized Activations<a class="headerlink" href="#An-Overview
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[6]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[7]:
 </pre></div>
 </div>
 <div class="output_area docutils container">
@@ -612,11 +631,11 @@ <h1>An Overview of Quantized Activations<a class="headerlink" href="#An-Overview
           [-0.7990, -1.2936, -0.7419, -1.3127, -0.2283],
           [-2.4351, -0.0761,  0.2283,  0.7990, -0.1902],
           [-0.3615, -1.2175, -0.6278, -0.4566,  1.9214]]]],
-       grad_fn=&lt;MulBackward0&gt;)
+       grad_fn=&lt;AliasBackward0&gt;)
 </pre></div></div>
 </div>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[7]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[8]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">return_disabled_quant_identity</span> <span class="o">=</span> <span class="n">QuantIdentity</span><span class="p">(</span><span class="n">act_quant</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">return_quant_tensor</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
@@ -626,12 +645,12 @@ <h1>An Overview of Quantized Activations<a class="headerlink" href="#An-Overview
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[7]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[8]:
 </pre></div>
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-QuantTensor(value=tensor([[[[-0.4566, -0.5707, -0.5517,  0.5897,  1.5409],
+IntQuantTensor(value=tensor([[[[-0.4566, -0.5707, -0.5517,  0.5897,  1.5409],
           [ 0.5136, -0.5897, -0.5707,  0.1902, -0.0761],
           [-0.4946, -1.5029, -0.1902,  0.4376,  1.3317],
           [-1.6361,  2.0736,  1.7122,  2.3780, -1.1224],
@@ -642,12 +661,12 @@ <h1>An Overview of Quantized Activations<a class="headerlink" href="#An-Overview
           [-0.7990, -1.2936, -0.7419, -1.3127, -0.2283],
           [-2.4351, -0.0761,  0.2283,  0.7990, -0.1902],
           [-0.3615, -1.2175, -0.6278, -0.4566,  1.9214]]]],
-       grad_fn=&lt;MulBackward0&gt;), scale=tensor(0.0190, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))
+       grad_fn=&lt;AliasBackward0&gt;), scale=tensor(0.0190, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))
 </pre></div></div>
 </div>
 <p>Moving on from <code class="docutils literal notranslate"><span class="pre">QuantIdentity</span></code>, let’s take a look at <code class="docutils literal notranslate"><span class="pre">QuantReLU</span></code>. Anything we said so far about <code class="docutils literal notranslate"><span class="pre">QuantIdentity</span></code> also applies to <code class="docutils literal notranslate"><span class="pre">QuantReLU</span></code>. The difference though is that <code class="docutils literal notranslate"><span class="pre">QuantReLU</span></code> implements a ReLU function followed by quantization, while <code class="docutils literal notranslate"><span class="pre">QuantIdentity</span></code> is really just the quantization operator. Additionally, by default <code class="docutils literal notranslate"><span class="pre">QuantReLU</span></code> adopts the <code class="docutils literal notranslate"><span class="pre">Uint8ActPerTensorFloat</span></code>, meaning that the output of quantization is <em>unsigned</em>:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[8]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[9]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">brevitas.nn</span> <span class="kn">import</span> <span class="n">QuantReLU</span>
@@ -658,12 +677,12 @@ <h1>An Overview of Quantized Activations<a class="headerlink" href="#An-Overview
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[8]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[9]:
 </pre></div>
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-QuantTensor(value=tensor([[[[0.0000, 0.0000, 0.0000, 0.5974, 1.5402],
+IntQuantTensor(value=tensor([[[[0.0000, 0.0000, 0.0000, 0.5974, 1.5402],
           [0.5041, 0.0000, 0.0000, 0.1867, 0.0000],
           [0.0000, 0.0000, 0.0000, 0.4481, 1.3255],
           [0.0000, 2.0817, 1.7083, 2.3804, 0.0000],
@@ -677,67 +696,79 @@ <h1>An Overview of Quantized Activations<a class="headerlink" href="#An-Overview
 </pre></div></div>
 </div>
 <p><code class="docutils literal notranslate"><span class="pre">QuantReLU</span></code>, like <code class="docutils literal notranslate"><span class="pre">QuantIdentity</span></code>, is also special compared to other non-linear quantized activation layers as it preserves the metadata of an input <code class="docutils literal notranslate"><span class="pre">QuantTensor</span></code> even when quantization is disabled:</p>
-<div class="nbinput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[9]:
-</pre></div>
-</div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">return_disabled_quant_relu</span> <span class="o">=</span> <span class="n">QuantReLU</span><span class="p">(</span><span class="n">act_quant</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">return_quant_tensor</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
-<span class="n">relu_out_tensor</span> <span class="o">=</span> <span class="n">return_disabled_quant_relu</span><span class="p">(</span><span class="n">out_tensor</span><span class="p">)</span>
-<span class="k">assert</span> <span class="n">relu_out_tensor</span><span class="o">.</span><span class="n">is_valid</span><span class="o">==</span><span class="kc">True</span>
-<span class="k">assert</span> <span class="n">relu_out_tensor</span><span class="o">.</span><span class="n">scale</span> <span class="o">==</span> <span class="n">out_tensor</span><span class="o">.</span><span class="n">scale</span>
-<span class="k">assert</span> <span class="n">relu_out_tensor</span><span class="o">.</span><span class="n">zero_point</span> <span class="o">==</span> <span class="n">out_tensor</span><span class="o">.</span><span class="n">zero_point</span>
-<span class="k">assert</span> <span class="n">relu_out_tensor</span><span class="o">.</span><span class="n">bit_width</span> <span class="o">==</span> <span class="n">out_tensor</span><span class="o">.</span><span class="n">bit_width</span>
-</pre></div>
-</div>
-</div>
-<p>That doesn’t apply to other layers like, say, <code class="docutils literal notranslate"><span class="pre">QuantSigmoid</span></code>:</p>
 <div class="nbinput docutils container">
 <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[10]:
 </pre></div>
 </div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">brevitas.nn</span> <span class="kn">import</span> <span class="n">QuantSigmoid</span>
-
-<span class="n">return_disabled_quant_sigmoid</span> <span class="o">=</span> <span class="n">QuantSigmoid</span><span class="p">(</span><span class="n">act_quant</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">return_quant_tensor</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
-<span class="n">sigmoid_out_tensor</span> <span class="o">=</span> <span class="n">return_disabled_quant_sigmoid</span><span class="p">(</span><span class="n">out_tensor</span><span class="p">)</span>
-<span class="n">sigmoid_out_tensor</span>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">return_disabled_quant_relu</span> <span class="o">=</span> <span class="n">QuantReLU</span><span class="p">(</span><span class="n">act_quant</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">return_quant_tensor</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+<span class="n">relu_out_tensor</span> <span class="o">=</span> <span class="n">return_disabled_quant_relu</span><span class="p">(</span><span class="n">out_tensor</span><span class="p">)</span>
+<span class="n">assert_with_message</span><span class="p">(</span><span class="n">relu_out_tensor</span><span class="o">.</span><span class="n">is_valid</span><span class="p">)</span>
+<span class="n">assert_with_message</span><span class="p">(</span><span class="n">relu_out_tensor</span><span class="o">.</span><span class="n">scale</span> <span class="o">==</span> <span class="n">out_tensor</span><span class="o">.</span><span class="n">scale</span><span class="p">)</span>
+<span class="n">assert_with_message</span><span class="p">(</span><span class="n">relu_out_tensor</span><span class="o">.</span><span class="n">zero_point</span> <span class="o">==</span> <span class="n">out_tensor</span><span class="o">.</span><span class="n">zero_point</span><span class="p">)</span>
+<span class="n">assert_with_message</span><span class="p">(</span><span class="n">relu_out_tensor</span><span class="o">.</span><span class="n">bit_width</span> <span class="o">==</span> <span class="n">out_tensor</span><span class="o">.</span><span class="n">bit_width</span><span class="p">)</span>
 </pre></div>
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[10]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-QuantTensor(value=(tensor([[[[0.3878, 0.3611, 0.3655, 0.6433, 0.8236],
-          [0.6257, 0.3567, 0.3611, 0.5474, 0.4810],
-          [0.3788, 0.1820, 0.4526, 0.6077, 0.7911],
-          [0.1630, 0.8883, 0.8471, 0.9151, 0.2456],
-          [0.4198, 0.2527, 0.4810, 0.4762, 0.3184]],
-
-         [[0.1683, 0.5048, 0.3226, 0.5474, 0.6520],
-          [0.6563, 0.4385, 0.3699, 0.7614, 0.6077],
-          [0.3102, 0.2152, 0.3226, 0.2120, 0.4432],
-          [0.0805, 0.4810, 0.5568, 0.6898, 0.4526],
-          [0.4106, 0.2284, 0.3480, 0.3878, 0.8723]]]],
-       grad_fn=&lt;SigmoidBackward0&gt;), None, None, None), scale=None, zero_point=None, bit_width=None, signed_t=None, training_t=tensor(True))
+True
+tensor(True)
+tensor(True)
+tensor(True)
 </pre></div></div>
 </div>
+<p>That doesn’t apply to other layers like, say, <code class="docutils literal notranslate"><span class="pre">QuantSigmoid</span></code>:</p>
 <div class="nbinput docutils container">
 <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[11]:
 </pre></div>
 </div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="k">assert</span> <span class="ow">not</span> <span class="n">sigmoid_out_tensor</span><span class="o">.</span><span class="n">is_valid</span>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">brevitas.nn</span> <span class="kn">import</span> <span class="n">QuantSigmoid</span>
+
+<span class="n">return_disabled_quant_sigmoid</span> <span class="o">=</span> <span class="n">QuantSigmoid</span><span class="p">(</span><span class="n">act_quant</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">return_quant_tensor</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+<span class="n">sigmoid_out_tensor</span> <span class="o">=</span> <span class="n">return_disabled_quant_sigmoid</span><span class="p">(</span><span class="n">out_tensor</span><span class="p">)</span>
+<span class="n">sigmoid_out_tensor</span>
 </pre></div>
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[11]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-False
+<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
+<span class="ansi-red-fg">AssertionError</span>                            Traceback (most recent call last)
+Cell <span class="ansi-green-fg">In[11], line 4</span>
+<span class="ansi-green-intense-fg ansi-bold">      1</span> <span class="ansi-bold" style="color: rgb(0,135,0)">from</span> <span class="ansi-bold" style="color: rgb(0,0,255)">brevitas</span><span class="ansi-bold" style="color: rgb(0,0,255)">.</span><span class="ansi-bold" style="color: rgb(0,0,255)">nn</span> <span class="ansi-bold" style="color: rgb(0,135,0)">import</span> QuantSigmoid
+<span class="ansi-green-intense-fg ansi-bold">      3</span> return_disabled_quant_sigmoid <span style="color: rgb(98,98,98)">=</span> QuantSigmoid(act_quant<span style="color: rgb(98,98,98)">=</span><span class="ansi-bold" style="color: rgb(0,135,0)">None</span>, return_quant_tensor<span style="color: rgb(98,98,98)">=</span><span class="ansi-bold" style="color: rgb(0,135,0)">True</span>)
+<span class="ansi-green-fg">----&gt; 4</span> sigmoid_out_tensor <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg">return_disabled_quant_sigmoid</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">out_tensor</span><span class="ansi-yellow-bg">)</span>
+<span class="ansi-green-intense-fg ansi-bold">      5</span> sigmoid_out_tensor
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/torch/nn/modules/module.py:1194</span>, in <span class="ansi-cyan-fg">Module._call_impl</span><span class="ansi-blue-fg">(self, *input, **kwargs)</span>
+<span class="ansi-green-intense-fg ansi-bold">   1190</span> <span style="color: rgb(95,135,135)"># If we don&#39;t have any hooks, we want to skip the rest of the logic in</span>
+<span class="ansi-green-intense-fg ansi-bold">   1191</span> <span style="color: rgb(95,135,135)"># this function, and just call forward.</span>
+<span class="ansi-green-intense-fg ansi-bold">   1192</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> <span class="ansi-bold" style="color: rgb(175,0,255)">not</span> (<span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>_backward_hooks <span class="ansi-bold" style="color: rgb(175,0,255)">or</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>_forward_hooks <span class="ansi-bold" style="color: rgb(175,0,255)">or</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>_forward_pre_hooks <span class="ansi-bold" style="color: rgb(175,0,255)">or</span> _global_backward_hooks
+<span class="ansi-green-intense-fg ansi-bold">   1193</span>         <span class="ansi-bold" style="color: rgb(175,0,255)">or</span> _global_forward_hooks <span class="ansi-bold" style="color: rgb(175,0,255)">or</span> _global_forward_pre_hooks):
+<span class="ansi-green-fg">-&gt; 1194</span>     <span class="ansi-bold" style="color: rgb(0,135,0)">return</span> <span class="ansi-yellow-bg">forward_call</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg" style="color: rgb(0,135,0)">input</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg">kwargs</span><span class="ansi-yellow-bg">)</span>
+<span class="ansi-green-intense-fg ansi-bold">   1195</span> <span style="color: rgb(95,135,135)"># Do not call functions when jit is used</span>
+<span class="ansi-green-intense-fg ansi-bold">   1196</span> full_backward_hooks, non_full_backward_hooks <span style="color: rgb(98,98,98)">=</span> [], []
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/quant_layer.py:53</span>, in <span class="ansi-cyan-fg">QuantNonLinearActLayer.forward</span><span class="ansi-blue-fg">(self, input)</span>
+<span class="ansi-green-intense-fg ansi-bold">     51</span>     <span class="ansi-bold" style="color: rgb(0,135,0)">return</span> out
+<span class="ansi-green-intense-fg ansi-bold">     52</span> out <span style="color: rgb(98,98,98)">=</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>act_quant(quant_input)
+<span class="ansi-green-fg">---&gt; 53</span> out <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg" style="color: rgb(0,135,0)">self</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">pack_output</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">out</span><span class="ansi-yellow-bg">)</span>
+<span class="ansi-green-intense-fg ansi-bold">     54</span> <span class="ansi-bold" style="color: rgb(0,135,0)">return</span> out
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/mixin/base.py:97</span>, in <span class="ansi-cyan-fg">QuantLayerMixin.pack_output</span><span class="ansi-blue-fg">(self, quant_output)</span>
+<span class="ansi-green-intense-fg ansi-bold">     95</span> <span class="ansi-bold" style="color: rgb(0,135,0)">def</span> <span style="color: rgb(0,0,255)">pack_output</span>(<span style="color: rgb(0,135,0)">self</span>, quant_output: Union[Tensor, QuantTensor]) <span style="color: rgb(98,98,98)">-</span><span style="color: rgb(98,98,98)">&gt;</span> Union[Tensor, QuantTensor]:
+<span class="ansi-green-intense-fg ansi-bold">     96</span>     <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>return_quant_tensor:
+<span class="ansi-green-fg">---&gt; 97</span>         <span class="ansi-bold" style="color: rgb(0,135,0)">assert</span> <span style="color: rgb(0,135,0)">isinstance</span>(quant_output, QuantTensor), <span style="color: rgb(175,0,0)">&#39;</span><span style="color: rgb(175,0,0)">QuantLayer is not correctly configured, check if warnings were raised</span><span style="color: rgb(175,0,0)">&#39;</span>
+<span class="ansi-green-intense-fg ansi-bold">     98</span>         <span class="ansi-bold" style="color: rgb(0,135,0)">return</span> quant_output
+<span class="ansi-green-intense-fg ansi-bold">     99</span>     <span class="ansi-bold" style="color: rgb(0,135,0)">else</span>:
+
+<span class="ansi-red-fg">AssertionError</span>: QuantLayer is not correctly configured, check if warnings were raised
 </pre></div></div>
 </div>
 <p>Something to always keep in mind is that the non-linearity of a quantized activation layer is always called on the <em>dequantized</em> representation of the input. For example, let’s say we first quantize a floating-point <code class="docutils literal notranslate"><span class="pre">torch.Tensor</span></code> with an unsigned shifted quantizer such as <code class="docutils literal notranslate"><span class="pre">ShiftedUint8ActPerTensorFloat</span></code>, i.e. with zero-point such that the integer representation of its output is non-negative. Then, we pass this tensor as input to a <code class="docutils literal notranslate"><span class="pre">QuantReLU</span></code> with quantization <em>disabled</em>. The fact that
@@ -760,7 +791,7 @@ <h1>An Overview of Quantized Activations<a class="headerlink" href="#An-Overview
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-QuantTensor(value=tensor([[[[0.0000, 0.0000, 0.0000, 0.5854, 1.5485],
+IntQuantTensor(value=tensor([[[[0.0000, 0.0000, 0.0000, 0.5854, 1.5485],
           [0.5099, 0.0000, 0.0000, 0.1888, 0.0000],
           [0.0000, 0.0000, 0.0000, 0.4532, 1.3219],
           [0.0000, 2.0772, 1.6996, 2.3794, 0.0000],
@@ -770,7 +801,7 @@ <h1>An Overview of Quantized Activations<a class="headerlink" href="#An-Overview
           [0.6421, 0.0000, 0.0000, 1.1708, 0.4343],
           [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.2266, 0.7931, 0.0000],
-          [0.0000, 0.0000, 0.0000, 0.0000, 1.9262]]]], grad_fn=&lt;ReluBackward0&gt;), scale=tensor(0.0189, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(129., grad_fn=&lt;SWhereBackward0&gt;), bit_width=tensor(8.), signed_t=tensor(False), training_t=tensor(True))
+          [0.0000, 0.0000, 0.0000, 0.0000, 1.9262]]]], grad_fn=&lt;ReluBackward0&gt;), scale=tensor(0.0189, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(129., grad_fn=&lt;WhereBackward0&gt;), bit_width=tensor(8.), signed_t=tensor(False), training_t=tensor(True))
 </pre></div></div>
 </div>
 <p>Let’s now consider the very common scenario of a <code class="docutils literal notranslate"><span class="pre">QuantConv2d</span></code> followed by a <code class="docutils literal notranslate"><span class="pre">ReLU</span></code> or <code class="docutils literal notranslate"><span class="pre">QuantReLU</span></code>. In particular, let’s say we have a <code class="docutils literal notranslate"><span class="pre">QuantConv2d</span></code> with output quantization <em>enabled</em> followed by a <code class="docutils literal notranslate"><span class="pre">ReLU</span></code>:</p>
@@ -861,17 +892,16 @@ <h1>An Overview of Quantized Activations<a class="headerlink" href="#An-Overview
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">out1_train</span> <span class="o">=</span> <span class="n">quant_identity</span><span class="p">(</span><span class="n">inp1</span><span class="p">)</span>
 <span class="n">out2_train</span> <span class="o">=</span> <span class="n">quant_identity</span><span class="p">(</span><span class="n">inp2</span><span class="p">)</span>
-<span class="k">assert</span> <span class="ow">not</span> <span class="n">out1_train</span><span class="o">.</span><span class="n">scale</span><span class="o">.</span><span class="n">isclose</span><span class="p">(</span><span class="n">out2_train</span><span class="o">.</span><span class="n">scale</span><span class="p">)</span><span class="o">.</span><span class="n">item</span><span class="p">()</span>
+<span class="n">assert_with_message</span><span class="p">(</span><span class="ow">not</span> <span class="n">out1_train</span><span class="o">.</span><span class="n">scale</span><span class="o">.</span><span class="n">isclose</span><span class="p">(</span><span class="n">out2_train</span><span class="o">.</span><span class="n">scale</span><span class="p">)</span><span class="o">.</span><span class="n">item</span><span class="p">())</span>
 </pre></div>
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[16]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-False
+True
 </pre></div></div>
 </div>
 <div class="nbinput docutils container">
@@ -881,20 +911,20 @@ <h1>An Overview of Quantized Activations<a class="headerlink" href="#An-Overview
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">quant_identity</span><span class="o">.</span><span class="n">eval</span><span class="p">()</span>
 <span class="n">out1_eval</span> <span class="o">=</span> <span class="n">quant_identity</span><span class="p">(</span><span class="n">inp1</span><span class="p">)</span>
 <span class="n">out2_eval</span> <span class="o">=</span> <span class="n">quant_identity</span><span class="p">(</span><span class="n">inp2</span><span class="p">)</span>
-<span class="k">assert</span> <span class="n">out1_eval</span><span class="o">.</span><span class="n">scale</span><span class="o">.</span><span class="n">isclose</span><span class="p">(</span><span class="n">out2_eval</span><span class="o">.</span><span class="n">scale</span><span class="p">)</span><span class="o">.</span><span class="n">item</span><span class="p">()</span>
+<span class="n">assert_with_message</span><span class="p">(</span><span class="n">out1_eval</span><span class="o">.</span><span class="n">scale</span><span class="o">.</span><span class="n">isclose</span><span class="p">(</span><span class="n">out2_eval</span><span class="o">.</span><span class="n">scale</span><span class="p">)</span><span class="o">.</span><span class="n">item</span><span class="p">())</span>
 </pre></div>
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[17]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
 True
 </pre></div></div>
 </div>
-<p>By default, the only layer that is an exception to this is <code class="docutils literal notranslate"><span class="pre">QuantHardTanh</span></code>. That is because the interface to <code class="docutils literal notranslate"><span class="pre">torch.nn.HardTanh</span></code> already requires users to manually specify <code class="docutils literal notranslate"><span class="pre">min_val</span></code> and <code class="docutils literal notranslate"><span class="pre">max_val</span></code>, so Brevitas preserves that both when quantization is enabled or disabled. With quantization enabled, by default those values are used for initialization, but then the range is learned. Let’s look at an example:</p>
+<p>By default, the only layer that is an exception to this is <code class="docutils literal notranslate"><span class="pre">QuantHardTanh</span></code>. That is because the interface to <code class="docutils literal notranslate"><span class="pre">torch.nn.HardTanh</span></code> already requires users to manually specify <code class="docutils literal notranslate"><span class="pre">min_val</span></code> and <code class="docutils literal notranslate"><span class="pre">max_val</span></code>, so Brevitas preserves that both when quantization is enabled or disabled. With quantization enabled, by default those values are used for initialization, but then the range is learned. Let’s look at an example. Run the cell below, and we expect it to throw an error because of missing
+attributes:</p>
 <div class="nbinput docutils container">
 <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[18]:
 </pre></div>
@@ -910,72 +940,100 @@ <h1>An Overview of Quantized Activations<a class="headerlink" href="#An-Overview
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-<span class="ansi-red-intense-fg ansi-bold">---------------------------------------------------------------------------</span>
-<span class="ansi-red-intense-fg ansi-bold">DependencyError</span>                           Traceback (most recent call last)
-<span class="ansi-green-intense-fg ansi-bold">&lt;ipython-input-18-8145d2f87fcb&gt;</span> in <span class="ansi-cyan-fg">&lt;module&gt;</span>
-<span class="ansi-green-fg">      1</span> <span class="ansi-green-intense-fg ansi-bold">from</span> brevitas<span class="ansi-yellow-intense-fg ansi-bold">.</span>nn <span class="ansi-green-intense-fg ansi-bold">import</span> QuantHardTanh
-<span class="ansi-green-fg">      2</span>
-<span class="ansi-green-intense-fg ansi-bold">----&gt; 3</span><span class="ansi-yellow-intense-fg ansi-bold"> </span>QuantHardTanh<span class="ansi-yellow-intense-fg ansi-bold">(</span><span class="ansi-yellow-intense-fg ansi-bold">)</span>
-
-<span class="ansi-green-intense-fg ansi-bold">c:\brevitas_fx\src\brevitas\nn\quant_activation.py</span> in <span class="ansi-cyan-fg">__init__</span><span class="ansi-blue-intense-fg ansi-bold">(self, act_quant, input_quant, return_quant_tensor, **kwargs)</span>
-<span class="ansi-green-fg">    117</span>             act_quant<span class="ansi-yellow-intense-fg ansi-bold">=</span>act_quant<span class="ansi-yellow-intense-fg ansi-bold">,</span>
-<span class="ansi-green-fg">    118</span>             return_quant_tensor<span class="ansi-yellow-intense-fg ansi-bold">=</span>return_quant_tensor<span class="ansi-yellow-intense-fg ansi-bold">,</span>
-<span class="ansi-green-intense-fg ansi-bold">--&gt; 119</span><span class="ansi-yellow-intense-fg ansi-bold">             **kwargs)
-</span><span class="ansi-green-fg">    120</span>
-<span class="ansi-green-fg">    121</span>
-
-<span class="ansi-green-intense-fg ansi-bold">c:\brevitas_fx\src\brevitas\nn\quant_layer.py</span> in <span class="ansi-cyan-fg">__init__</span><span class="ansi-blue-intense-fg ansi-bold">(self, act_impl, passthrough_act, input_quant, act_quant, return_quant_tensor, **kwargs)</span>
-<span class="ansi-green-fg">     77</span>             passthrough_act<span class="ansi-yellow-intense-fg ansi-bold">,</span>
-<span class="ansi-green-fg">     78</span>             act_quant<span class="ansi-yellow-intense-fg ansi-bold">,</span>
-<span class="ansi-green-intense-fg ansi-bold">---&gt; 79</span><span class="ansi-yellow-intense-fg ansi-bold">             **kwargs)
-</span><span class="ansi-green-fg">     80</span>
-<span class="ansi-green-fg">     81</span>     <span class="ansi-yellow-intense-fg ansi-bold">@</span>property
-
-<span class="ansi-green-intense-fg ansi-bold">c:\brevitas_fx\src\brevitas\nn\mixin\act.py</span> in <span class="ansi-cyan-fg">__init__</span><span class="ansi-blue-intense-fg ansi-bold">(self, act_impl, passthrough_act, act_quant, **kwargs)</span>
-<span class="ansi-green-fg">    157</span>             proxy_prefix<span class="ansi-yellow-intense-fg ansi-bold">=</span><span class="ansi-blue-intense-fg ansi-bold">&#39;act_&#39;</span><span class="ansi-yellow-intense-fg ansi-bold">,</span>
-<span class="ansi-green-fg">    158</span>             kwargs_prefix<span class="ansi-yellow-intense-fg ansi-bold">=</span><span class="ansi-blue-intense-fg ansi-bold">&#39;&#39;</span><span class="ansi-yellow-intense-fg ansi-bold">,</span>
-<span class="ansi-green-intense-fg ansi-bold">--&gt; 159</span><span class="ansi-yellow-intense-fg ansi-bold">             **kwargs)
-</span><span class="ansi-green-fg">    160</span>
-<span class="ansi-green-fg">    161</span>     <span class="ansi-yellow-intense-fg ansi-bold">@</span>property
-
-<span class="ansi-green-intense-fg ansi-bold">c:\brevitas_fx\src\brevitas\nn\mixin\base.py</span> in <span class="ansi-cyan-fg">__init__</span><span class="ansi-blue-intense-fg ansi-bold">(self, quant, proxy_protocol, none_quant_injector, proxy_prefix, kwargs_prefix, **kwargs)</span>
-<span class="ansi-green-fg">     98</span>             quant_injector <span class="ansi-yellow-intense-fg ansi-bold">=</span> quant
-<span class="ansi-green-fg">     99</span>             quant_injector <span class="ansi-yellow-intense-fg ansi-bold">=</span> quant_injector<span class="ansi-yellow-intense-fg ansi-bold">.</span>let<span class="ansi-yellow-intense-fg ansi-bold">(</span><span class="ansi-yellow-intense-fg ansi-bold">**</span>filter_kwargs<span class="ansi-yellow-intense-fg ansi-bold">(</span>kwargs_prefix<span class="ansi-yellow-intense-fg ansi-bold">,</span> kwargs<span class="ansi-yellow-intense-fg ansi-bold">)</span><span class="ansi-yellow-intense-fg ansi-bold">)</span>
-<span class="ansi-green-intense-fg ansi-bold">--&gt; 100</span><span class="ansi-yellow-intense-fg ansi-bold">             </span>quant <span class="ansi-yellow-intense-fg ansi-bold">=</span> quant_injector<span class="ansi-yellow-intense-fg ansi-bold">.</span>proxy_class<span class="ansi-yellow-intense-fg ansi-bold">(</span>self<span class="ansi-yellow-intense-fg ansi-bold">,</span> quant_injector<span class="ansi-yellow-intense-fg ansi-bold">)</span>
-<span class="ansi-green-fg">    101</span>         <span class="ansi-green-intense-fg ansi-bold">else</span><span class="ansi-yellow-intense-fg ansi-bold">:</span>
-<span class="ansi-green-fg">    102</span>             <span class="ansi-green-intense-fg ansi-bold">if</span> <span class="ansi-green-intense-fg ansi-bold">not</span> isinstance<span class="ansi-yellow-intense-fg ansi-bold">(</span>quant<span class="ansi-yellow-intense-fg ansi-bold">,</span> proxy_protocol<span class="ansi-yellow-intense-fg ansi-bold">)</span><span class="ansi-yellow-intense-fg ansi-bold">:</span>
-
-<span class="ansi-green-intense-fg ansi-bold">c:\brevitas_fx\src\brevitas\proxy\runtime_quant.py</span> in <span class="ansi-cyan-fg">__init__</span><span class="ansi-blue-intense-fg ansi-bold">(self, quant_layer, quant_injector)</span>
-<span class="ansi-green-fg">    108</span>
-<span class="ansi-green-fg">    109</span>     <span class="ansi-green-intense-fg ansi-bold">def</span> __init__<span class="ansi-yellow-intense-fg ansi-bold">(</span>self<span class="ansi-yellow-intense-fg ansi-bold">,</span> quant_layer<span class="ansi-yellow-intense-fg ansi-bold">,</span> quant_injector<span class="ansi-yellow-intense-fg ansi-bold">)</span><span class="ansi-yellow-intense-fg ansi-bold">:</span>
-<span class="ansi-green-intense-fg ansi-bold">--&gt; 110</span><span class="ansi-yellow-intense-fg ansi-bold">         </span>super<span class="ansi-yellow-intense-fg ansi-bold">(</span>ActQuantProxyFromInjector<span class="ansi-yellow-intense-fg ansi-bold">,</span> self<span class="ansi-yellow-intense-fg ansi-bold">)</span><span class="ansi-yellow-intense-fg ansi-bold">.</span>__init__<span class="ansi-yellow-intense-fg ansi-bold">(</span>quant_layer<span class="ansi-yellow-intense-fg ansi-bold">,</span> quant_injector<span class="ansi-yellow-intense-fg ansi-bold">)</span>
-<span class="ansi-green-fg">    111</span>         self<span class="ansi-yellow-intense-fg ansi-bold">.</span>is_passthrough_act <span class="ansi-yellow-intense-fg ansi-bold">=</span> _is_passthrough_act<span class="ansi-yellow-intense-fg ansi-bold">(</span>quant_injector<span class="ansi-yellow-intense-fg ansi-bold">)</span>
-<span class="ansi-green-fg">    112</span>
-
-<span class="ansi-green-intense-fg ansi-bold">c:\brevitas_fx\src\brevitas\proxy\quant_proxy.py</span> in <span class="ansi-cyan-fg">__init__</span><span class="ansi-blue-intense-fg ansi-bold">(self, quant_layer, quant_injector, export_mode, export_handler)</span>
-<span class="ansi-green-fg">     74</span>         <span class="ansi-red-intense-fg ansi-bold"># Use a normal list and not a ModuleList since this is a pointer to parent modules</span>
-<span class="ansi-green-fg">     75</span>         self<span class="ansi-yellow-intense-fg ansi-bold">.</span>tracked_module_list <span class="ansi-yellow-intense-fg ansi-bold">=</span> <span class="ansi-yellow-intense-fg ansi-bold">[</span><span class="ansi-yellow-intense-fg ansi-bold">]</span>
-<span class="ansi-green-intense-fg ansi-bold">---&gt; 76</span><span class="ansi-yellow-intense-fg ansi-bold">         </span>self<span class="ansi-yellow-intense-fg ansi-bold">.</span>add_tracked_module<span class="ansi-yellow-intense-fg ansi-bold">(</span>quant_layer<span class="ansi-yellow-intense-fg ansi-bold">)</span>
-<span class="ansi-green-fg">     77</span>         self<span class="ansi-yellow-intense-fg ansi-bold">.</span>export_handler <span class="ansi-yellow-intense-fg ansi-bold">=</span> export_handler
-<span class="ansi-green-fg">     78</span>         self<span class="ansi-yellow-intense-fg ansi-bold">.</span>export_mode <span class="ansi-yellow-intense-fg ansi-bold">=</span> export_mode
-
-<span class="ansi-green-intense-fg ansi-bold">c:\brevitas_fx\src\brevitas\proxy\quant_proxy.py</span> in <span class="ansi-cyan-fg">add_tracked_module</span><span class="ansi-blue-intense-fg ansi-bold">(self, module)</span>
-<span class="ansi-green-fg">    130</span>             self<span class="ansi-yellow-intense-fg ansi-bold">.</span>tracked_module_list<span class="ansi-yellow-intense-fg ansi-bold">.</span>append<span class="ansi-yellow-intense-fg ansi-bold">(</span>module<span class="ansi-yellow-intense-fg ansi-bold">)</span>
-<span class="ansi-green-fg">    131</span>             self<span class="ansi-yellow-intense-fg ansi-bold">.</span>update_tracked_modules<span class="ansi-yellow-intense-fg ansi-bold">(</span><span class="ansi-yellow-intense-fg ansi-bold">)</span>
-<span class="ansi-green-intense-fg ansi-bold">--&gt; 132</span><span class="ansi-yellow-intense-fg ansi-bold">             </span>self<span class="ansi-yellow-intense-fg ansi-bold">.</span>init_tensor_quant<span class="ansi-yellow-intense-fg ansi-bold">(</span><span class="ansi-yellow-intense-fg ansi-bold">)</span>
-<span class="ansi-green-fg">    133</span>         <span class="ansi-green-intense-fg ansi-bold">else</span><span class="ansi-yellow-intense-fg ansi-bold">:</span>
-<span class="ansi-green-fg">    134</span>             <span class="ansi-green-intense-fg ansi-bold">raise</span> RuntimeError<span class="ansi-yellow-intense-fg ansi-bold">(</span><span class="ansi-blue-intense-fg ansi-bold">&#34;Trying to add None as a parent module.&#34;</span><span class="ansi-yellow-intense-fg ansi-bold">)</span>
-
-<span class="ansi-green-intense-fg ansi-bold">c:\brevitas_fx\src\brevitas\proxy\runtime_quant.py</span> in <span class="ansi-cyan-fg">init_tensor_quant</span><span class="ansi-blue-intense-fg ansi-bold">(self)</span>
-<span class="ansi-green-fg">    120</span>
-<span class="ansi-green-fg">    121</span>     <span class="ansi-green-intense-fg ansi-bold">def</span> init_tensor_quant<span class="ansi-yellow-intense-fg ansi-bold">(</span>self<span class="ansi-yellow-intense-fg ansi-bold">)</span><span class="ansi-yellow-intense-fg ansi-bold">:</span>
-<span class="ansi-green-intense-fg ansi-bold">--&gt; 122</span><span class="ansi-yellow-intense-fg ansi-bold">         </span>tensor_quant <span class="ansi-yellow-intense-fg ansi-bold">=</span> self<span class="ansi-yellow-intense-fg ansi-bold">.</span>quant_injector<span class="ansi-yellow-intense-fg ansi-bold">.</span>tensor_quant
-<span class="ansi-green-fg">    123</span>         act_impl <span class="ansi-yellow-intense-fg ansi-bold">=</span> self<span class="ansi-yellow-intense-fg ansi-bold">.</span>quant_injector<span class="ansi-yellow-intense-fg ansi-bold">.</span>act_impl
-<span class="ansi-green-fg">    124</span>         is_act_enabled <span class="ansi-yellow-intense-fg ansi-bold">=</span> _is_act_enabled<span class="ansi-yellow-intense-fg ansi-bold">(</span>act_impl<span class="ansi-yellow-intense-fg ansi-bold">,</span> tensor_quant<span class="ansi-yellow-intense-fg ansi-bold">)</span>
-
-    <span class="ansi-red-intense-fg ansi-bold">[... skipping hidden 1 frame]</span>
-
-<span class="ansi-red-intense-fg ansi-bold">DependencyError</span>: &#39;Int8ActPerTensorFloatMinMaxInit&#39; can not resolve attribute &#39;max_val&#39; while building &#39;scaling_init_impl&#39;
+<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
+<span class="ansi-red-fg">DependencyError</span>                           Traceback (most recent call last)
+Cell <span class="ansi-green-fg">In[18], line 3</span>
+<span class="ansi-green-intense-fg ansi-bold">      1</span> <span class="ansi-bold" style="color: rgb(0,135,0)">from</span> <span class="ansi-bold" style="color: rgb(0,0,255)">brevitas</span><span class="ansi-bold" style="color: rgb(0,0,255)">.</span><span class="ansi-bold" style="color: rgb(0,0,255)">nn</span> <span class="ansi-bold" style="color: rgb(0,135,0)">import</span> QuantHardTanh
+<span class="ansi-green-fg">----&gt; 3</span> <span class="ansi-yellow-bg">QuantHardTanh</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">)</span>
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/quant_activation.py:96</span>, in <span class="ansi-cyan-fg">QuantHardTanh.__init__</span><span class="ansi-blue-fg">(self, act_quant, input_quant, return_quant_tensor, **kwargs)</span>
+<span class="ansi-green-intense-fg ansi-bold">     90</span> <span class="ansi-bold" style="color: rgb(0,135,0)">def</span> <span style="color: rgb(0,0,255)">__init__</span>(
+<span class="ansi-green-intense-fg ansi-bold">     91</span>         <span style="color: rgb(0,135,0)">self</span>,
+<span class="ansi-green-intense-fg ansi-bold">     92</span>         act_quant: Optional[ActQuantType] <span style="color: rgb(98,98,98)">=</span> Int8ActPerTensorFloatMinMaxInit,
+<span class="ansi-green-intense-fg ansi-bold">     93</span>         input_quant: Optional[ActQuantType] <span style="color: rgb(98,98,98)">=</span> <span class="ansi-bold" style="color: rgb(0,135,0)">None</span>,
+<span class="ansi-green-intense-fg ansi-bold">     94</span>         return_quant_tensor: <span style="color: rgb(0,135,0)">bool</span> <span style="color: rgb(98,98,98)">=</span> <span class="ansi-bold" style="color: rgb(0,135,0)">False</span>,
+<span class="ansi-green-intense-fg ansi-bold">     95</span>         <span style="color: rgb(98,98,98)">*</span><span style="color: rgb(98,98,98)">*</span>kwargs):
+<span class="ansi-green-fg">---&gt; 96</span>     <span class="ansi-yellow-bg">QuantNLAL</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg" style="color: rgb(0,0,255)">__init__</span><span class="ansi-yellow-bg">(</span>
+<span class="ansi-green-intense-fg ansi-bold">     97</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg" style="color: rgb(0,135,0)">self</span><span class="ansi-yellow-bg">,</span>
+<span class="ansi-green-intense-fg ansi-bold">     98</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg">act_impl</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">nn</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">Hardtanh</span><span class="ansi-yellow-bg">,</span>
+<span class="ansi-green-intense-fg ansi-bold">     99</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg">passthrough_act</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg ansi-bold" style="color: rgb(0,135,0)">True</span><span class="ansi-yellow-bg">,</span>
+<span class="ansi-green-intense-fg ansi-bold">    100</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg">input_quant</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">input_quant</span><span class="ansi-yellow-bg">,</span>
+<span class="ansi-green-intense-fg ansi-bold">    101</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg">act_quant</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">act_quant</span><span class="ansi-yellow-bg">,</span>
+<span class="ansi-green-intense-fg ansi-bold">    102</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg">return_quant_tensor</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">return_quant_tensor</span><span class="ansi-yellow-bg">,</span>
+<span class="ansi-green-intense-fg ansi-bold">    103</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg">kwargs</span><span class="ansi-yellow-bg">)</span>
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/quant_layer.py:34</span>, in <span class="ansi-cyan-fg">QuantNonLinearActLayer.__init__</span><span class="ansi-blue-fg">(self, act_impl, passthrough_act, input_quant, act_quant, return_quant_tensor, **kwargs)</span>
+<span class="ansi-green-intense-fg ansi-bold">     32</span> QuantLayerMixin<span style="color: rgb(98,98,98)">.</span><span style="color: rgb(0,0,255)">__init__</span>(<span style="color: rgb(0,135,0)">self</span>, return_quant_tensor)
+<span class="ansi-green-intense-fg ansi-bold">     33</span> QuantInputMixin<span style="color: rgb(98,98,98)">.</span><span style="color: rgb(0,0,255)">__init__</span>(<span style="color: rgb(0,135,0)">self</span>, input_quant, <span style="color: rgb(98,98,98)">*</span><span style="color: rgb(98,98,98)">*</span>kwargs)
+<span class="ansi-green-fg">---&gt; 34</span> <span class="ansi-yellow-bg">QuantNonLinearActMixin</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg" style="color: rgb(0,0,255)">__init__</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg" style="color: rgb(0,135,0)">self</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">act_impl</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">passthrough_act</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">act_quant</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg">kwargs</span><span class="ansi-yellow-bg">)</span>
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/mixin/act.py:66</span>, in <span class="ansi-cyan-fg">QuantNonLinearActMixin.__init__</span><span class="ansi-blue-fg">(self, act_impl, passthrough_act, act_quant, act_proxy_prefix, act_kwargs_prefix, **kwargs)</span>
+<span class="ansi-green-intense-fg ansi-bold">     55</span> <span class="ansi-bold" style="color: rgb(0,135,0)">def</span> <span style="color: rgb(0,0,255)">__init__</span>(
+<span class="ansi-green-intense-fg ansi-bold">     56</span>         <span style="color: rgb(0,135,0)">self</span>,
+<span class="ansi-green-intense-fg ansi-bold">     57</span>         act_impl: Optional[Type[Module]],
+<span class="ansi-green-fg">   (...)</span>
+<span class="ansi-green-intense-fg ansi-bold">     61</span>         act_kwargs_prefix<span style="color: rgb(98,98,98)">=</span><span style="color: rgb(175,0,0)">&#39;</span><span style="color: rgb(175,0,0)">&#39;</span>,
+<span class="ansi-green-intense-fg ansi-bold">     62</span>         <span style="color: rgb(98,98,98)">*</span><span style="color: rgb(98,98,98)">*</span>kwargs):
+<span class="ansi-green-intense-fg ansi-bold">     63</span>     prefixed_kwargs <span style="color: rgb(98,98,98)">=</span> {
+<span class="ansi-green-intense-fg ansi-bold">     64</span>         act_kwargs_prefix <span style="color: rgb(98,98,98)">+</span> <span style="color: rgb(175,0,0)">&#39;</span><span style="color: rgb(175,0,0)">act_impl</span><span style="color: rgb(175,0,0)">&#39;</span>: act_impl,
+<span class="ansi-green-intense-fg ansi-bold">     65</span>         act_kwargs_prefix <span style="color: rgb(98,98,98)">+</span> <span style="color: rgb(175,0,0)">&#39;</span><span style="color: rgb(175,0,0)">passthrough_act</span><span style="color: rgb(175,0,0)">&#39;</span>: passthrough_act}
+<span class="ansi-green-fg">---&gt; 66</span>     <span class="ansi-yellow-bg">QuantProxyMixin</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg" style="color: rgb(0,0,255)">__init__</span><span class="ansi-yellow-bg">(</span>
+<span class="ansi-green-intense-fg ansi-bold">     67</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg" style="color: rgb(0,135,0)">self</span><span class="ansi-yellow-bg">,</span>
+<span class="ansi-green-intense-fg ansi-bold">     68</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg">quant</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">act_quant</span><span class="ansi-yellow-bg">,</span>
+<span class="ansi-green-intense-fg ansi-bold">     69</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg">proxy_prefix</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">act_proxy_prefix</span><span class="ansi-yellow-bg">,</span>
+<span class="ansi-green-intense-fg ansi-bold">     70</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg">kwargs_prefix</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">act_kwargs_prefix</span><span class="ansi-yellow-bg">,</span>
+<span class="ansi-green-intense-fg ansi-bold">     71</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg">proxy_protocol</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">ActQuantProxyProtocol</span><span class="ansi-yellow-bg">,</span>
+<span class="ansi-green-intense-fg ansi-bold">     72</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg">none_quant_injector</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">NoneActQuant</span><span class="ansi-yellow-bg">,</span>
+<span class="ansi-green-intense-fg ansi-bold">     73</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg">prefixed_kwargs</span><span class="ansi-yellow-bg">,</span>
+<span class="ansi-green-intense-fg ansi-bold">     74</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg">kwargs</span><span class="ansi-yellow-bg">)</span>
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/mixin/base.py:48</span>, in <span class="ansi-cyan-fg">QuantProxyMixin.__init__</span><span class="ansi-blue-fg">(self, quant, proxy_protocol, none_quant_injector, proxy_prefix, kwargs_prefix, **kwargs)</span>
+<span class="ansi-green-intense-fg ansi-bold">     46</span>     quant_injector <span style="color: rgb(98,98,98)">=</span> quant
+<span class="ansi-green-intense-fg ansi-bold">     47</span>     quant_injector <span style="color: rgb(98,98,98)">=</span> quant_injector<span style="color: rgb(98,98,98)">.</span>let(<span style="color: rgb(98,98,98)">*</span><span style="color: rgb(98,98,98)">*</span>filter_kwargs(kwargs_prefix, kwargs))
+<span class="ansi-green-fg">---&gt; 48</span>     quant <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg">quant_injector</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">proxy_class</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg" style="color: rgb(0,135,0)">self</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">quant_injector</span><span class="ansi-yellow-bg">)</span>
+<span class="ansi-green-intense-fg ansi-bold">     49</span> <span class="ansi-bold" style="color: rgb(0,135,0)">else</span>:
+<span class="ansi-green-intense-fg ansi-bold">     50</span>     <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> <span class="ansi-bold" style="color: rgb(175,0,255)">not</span> <span style="color: rgb(0,135,0)">isinstance</span>(quant, proxy_protocol):
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/proxy/runtime_quant.py:198</span>, in <span class="ansi-cyan-fg">ActQuantProxyFromInjector.__init__</span><span class="ansi-blue-fg">(self, quant_layer, quant_injector)</span>
+<span class="ansi-green-intense-fg ansi-bold">    197</span> <span class="ansi-bold" style="color: rgb(0,135,0)">def</span> <span style="color: rgb(0,0,255)">__init__</span>(<span style="color: rgb(0,135,0)">self</span>, quant_layer, quant_injector):
+<span class="ansi-green-fg">--&gt; 198</span>     <span class="ansi-yellow-bg" style="color: rgb(0,135,0)">super</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">)</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg" style="color: rgb(0,0,255)">__init__</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">quant_layer</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">quant_injector</span><span class="ansi-yellow-bg">)</span>
+<span class="ansi-green-intense-fg ansi-bold">    199</span>     <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>cache_class <span style="color: rgb(98,98,98)">=</span> _CachedIO
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/proxy/runtime_quant.py:93</span>, in <span class="ansi-cyan-fg">ActQuantProxyFromInjectorBase.__init__</span><span class="ansi-blue-fg">(self, quant_layer, quant_injector)</span>
+<span class="ansi-green-intense-fg ansi-bold">     92</span> <span class="ansi-bold" style="color: rgb(0,135,0)">def</span> <span style="color: rgb(0,0,255)">__init__</span>(<span style="color: rgb(0,135,0)">self</span>, quant_layer, quant_injector):
+<span class="ansi-green-fg">---&gt; 93</span>     <span class="ansi-yellow-bg">QuantProxyFromInjector</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg" style="color: rgb(0,0,255)">__init__</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg" style="color: rgb(0,135,0)">self</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">quant_layer</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">quant_injector</span><span class="ansi-yellow-bg">)</span>
+<span class="ansi-green-intense-fg ansi-bold">     94</span>     ActQuantProxyProtocol<span style="color: rgb(98,98,98)">.</span><span style="color: rgb(0,0,255)">__init__</span>(<span style="color: rgb(0,135,0)">self</span>)
+<span class="ansi-green-intense-fg ansi-bold">     95</span>     <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>is_passthrough_act <span style="color: rgb(98,98,98)">=</span> _is_passthrough_act(quant_injector)
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/proxy/quant_proxy.py:80</span>, in <span class="ansi-cyan-fg">QuantProxyFromInjector.__init__</span><span class="ansi-blue-fg">(self, quant_layer, quant_injector)</span>
+<span class="ansi-green-intense-fg ansi-bold">     78</span> <span style="color: rgb(95,135,135)"># Use a normal list and not a ModuleList since this is a pointer to parent modules</span>
+<span class="ansi-green-intense-fg ansi-bold">     79</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>tracked_module_list <span style="color: rgb(98,98,98)">=</span> []
+<span class="ansi-green-fg">---&gt; 80</span> <span class="ansi-yellow-bg" style="color: rgb(0,135,0)">self</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">add_tracked_module</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">quant_layer</span><span class="ansi-yellow-bg">)</span>
+<span class="ansi-green-intense-fg ansi-bold">     81</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>disable_quant <span style="color: rgb(98,98,98)">=</span> <span class="ansi-bold" style="color: rgb(0,135,0)">False</span>
+<span class="ansi-green-intense-fg ansi-bold">     82</span> <span style="color: rgb(95,135,135)"># Torch.compile compatibility requires this</span>
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/proxy/quant_proxy.py:120</span>, in <span class="ansi-cyan-fg">QuantProxyFromInjector.add_tracked_module</span><span class="ansi-blue-fg">(self, module)</span>
+<span class="ansi-green-intense-fg ansi-bold">    118</span>     <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>tracked_module_list<span style="color: rgb(98,98,98)">.</span>append(module)
+<span class="ansi-green-intense-fg ansi-bold">    119</span>     <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>update_tracked_modules()
+<span class="ansi-green-fg">--&gt; 120</span>     <span class="ansi-yellow-bg" style="color: rgb(0,135,0)">self</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">init_tensor_quant</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">)</span>
+<span class="ansi-green-intense-fg ansi-bold">    121</span> <span class="ansi-bold" style="color: rgb(0,135,0)">else</span>:
+<span class="ansi-green-intense-fg ansi-bold">    122</span>     <span class="ansi-bold" style="color: rgb(0,135,0)">raise</span> <span class="ansi-bold" style="color: rgb(215,95,95)">RuntimeError</span>(<span style="color: rgb(175,0,0)">&#34;</span><span style="color: rgb(175,0,0)">Trying to add None as a parent module.</span><span style="color: rgb(175,0,0)">&#34;</span>)
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/proxy/runtime_quant.py:127</span>, in <span class="ansi-cyan-fg">ActQuantProxyFromInjectorBase.init_tensor_quant</span><span class="ansi-blue-fg">(self)</span>
+<span class="ansi-green-intense-fg ansi-bold">    126</span> <span class="ansi-bold" style="color: rgb(0,135,0)">def</span> <span style="color: rgb(0,0,255)">init_tensor_quant</span>(<span style="color: rgb(0,135,0)">self</span>):
+<span class="ansi-green-fg">--&gt; 127</span>     tensor_quant <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg" style="color: rgb(0,135,0)">self</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">quant_injector</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">tensor_quant</span>
+<span class="ansi-green-intense-fg ansi-bold">    128</span>     <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> <span style="color: rgb(175,0,0)">&#39;</span><span style="color: rgb(175,0,0)">act_impl</span><span style="color: rgb(175,0,0)">&#39;</span> <span class="ansi-bold" style="color: rgb(175,0,255)">in</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>quant_injector:
+<span class="ansi-green-intense-fg ansi-bold">    129</span>         act_impl <span style="color: rgb(98,98,98)">=</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>quant_injector<span style="color: rgb(98,98,98)">.</span>act_impl
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/inject/__init__.py:129</span>, in <span class="ansi-cyan-fg">_ExtendedInjectorType.__getattr__</span><span class="ansi-blue-fg">(cls, attrname)</span>
+<span class="ansi-green-intense-fg ansi-bold">    126</span>     <span class="ansi-bold" style="color: rgb(0,135,0)">else</span>:
+<span class="ansi-green-intense-fg ansi-bold">    127</span>         message <span style="color: rgb(98,98,98)">=</span> <span style="color: rgb(175,0,0)">&#34;</span><span class="ansi-bold" style="color: rgb(175,95,135)">{!r}</span><span style="color: rgb(175,0,0)"> can not resolve attribute </span><span class="ansi-bold" style="color: rgb(175,95,135)">{!r}</span><span style="color: rgb(175,0,0)">&#34;</span><span style="color: rgb(98,98,98)">.</span>format(
+<span class="ansi-green-intense-fg ansi-bold">    128</span>             <span style="color: rgb(0,135,0)">cls</span><span style="color: rgb(98,98,98)">.</span><span style="color: rgb(0,0,135)">__name__</span>, current_attr)
+<span class="ansi-green-fg">--&gt; 129</span>     <span class="ansi-bold" style="color: rgb(0,135,0)">raise</span> DependencyError(message)
+<span class="ansi-green-intense-fg ansi-bold">    131</span> marker, attribute, args, have_defaults <span style="color: rgb(98,98,98)">=</span> spec
+<span class="ansi-green-intense-fg ansi-bold">    133</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> <span style="color: rgb(0,135,0)">set</span>(args)<span style="color: rgb(98,98,98)">.</span>issubset(cached):
+
+<span class="ansi-red-fg">DependencyError</span>: &#39;Int8ActPerTensorFloatMinMaxInit&#39; can not resolve attribute &#39;max_val&#39; while building &#39;scaling_init_impl&#39;
 </pre></div></div>
 </div>
 <p>As expected, we get an error concering a missing <code class="docutils literal notranslate"><span class="pre">max_val</span></code> attribute. Let’s try to pass it then, together with <code class="docutils literal notranslate"><span class="pre">min_val</span></code>:</p>
@@ -995,19 +1053,199 @@ <h1>An Overview of Quantized Activations<a class="headerlink" href="#An-Overview
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">out1_train</span> <span class="o">=</span> <span class="n">quant_hard_tanh</span><span class="p">(</span><span class="n">inp1</span><span class="p">)</span>
 <span class="n">quant_hard_tanh</span><span class="o">.</span><span class="n">eval</span><span class="p">()</span>
 <span class="n">out2_eval</span> <span class="o">=</span> <span class="n">quant_hard_tanh</span><span class="p">(</span><span class="n">inp2</span><span class="p">)</span>
-<span class="k">assert</span> <span class="n">out1_train</span><span class="o">.</span><span class="n">scale</span><span class="o">.</span><span class="n">isclose</span><span class="p">(</span><span class="n">out2_eval</span><span class="o">.</span><span class="n">scale</span><span class="p">)</span><span class="o">.</span><span class="n">item</span><span class="p">()</span>
+<span class="n">assert_with_message</span><span class="p">(</span><span class="n">out1_train</span><span class="o">.</span><span class="n">scale</span><span class="o">.</span><span class="n">isclose</span><span class="p">(</span><span class="n">out2_eval</span><span class="o">.</span><span class="n">scale</span><span class="p">)</span><span class="o">.</span><span class="n">item</span><span class="p">())</span>
 </pre></div>
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[20]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
 True
 </pre></div></div>
 </div>
+<p>In all of the examples that have currently been looked at in this tutorial, we have used per-tensor quantization. I.e., the output tensor of the activation, if quantized, was always quantized on a per-tensor level, with a single scale and zero-point quantization parameter per output tensor. However, one can also do per-channel quantization, where each output channel of the tensor has its own quantization parameters. In the example below, we look at per-tensor quantization of an input tensor that
+has 3 channels and 256 elements in the height and width dimensions. We purposely mutate the 1st channel to have its dynamic range be 3 times larger than the other 2 channels. We then feed it through a <code class="docutils literal notranslate"><span class="pre">QuantReLU</span></code>, whose default behavior is to quantize at a per-tensor granularity.</p>
+<div class="nbinput docutils container">
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[32]:
+</pre></div>
+</div>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">out_channels</span> <span class="o">=</span> <span class="mi">3</span>
+<span class="n">inp3</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="n">out_channels</span><span class="p">,</span> <span class="mi">256</span><span class="p">,</span> <span class="mi">256</span><span class="p">)</span>  <span class="c1"># (B, C, H, W)</span>
+<span class="n">inp3</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">,</span> <span class="p">:,</span> <span class="p">:]</span> <span class="o">*=</span> <span class="mi">3</span>
+
+<span class="n">per_tensor_quant_relu</span> <span class="o">=</span> <span class="n">QuantReLU</span><span class="p">(</span><span class="n">return_quant_tensor</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+<span class="n">out_tensor</span> <span class="o">=</span> <span class="n">per_tensor_quant_relu</span><span class="p">(</span><span class="n">inp3</span><span class="p">)</span>
+<span class="n">out_tensor</span><span class="o">.</span><span class="n">scale</span> <span class="o">*</span> <span class="p">((</span><span class="mi">2</span><span class="o">**</span><span class="mi">8</span><span class="p">)</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span>
+</pre></div>
+</div>
+</div>
+<div class="nboutput nblast docutils container">
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[32]:
+</pre></div>
+</div>
+<div class="output_area docutils container">
+<div class="highlight"><pre>
+tensor(2.9998, grad_fn=&lt;MulBackward0&gt;)
+</pre></div></div>
+</div>
+<p>We can see that the per-tensor scale parameter has calibrated itself to provide a full quantization range of 3, matching that of the channel with the largest dynamic range.</p>
+<p>We can take a look at the <code class="docutils literal notranslate"><span class="pre">QuantReLU</span></code> object, and in particular look at what the <code class="docutils literal notranslate"><span class="pre">scaling_impl</span></code> object is composed of. It is responsible for gathering statistics for determining the quantization parameters, and we can see that its <code class="docutils literal notranslate"><span class="pre">stats_input_view_shape_impl</span></code> attribute is set to be an instance of <code class="docutils literal notranslate"><span class="pre">OverTensorView</span></code>. This is defined <a class="reference external" href="https://github.com/Xilinx/brevitas/blob/200456825f3b4b8db414f2b25b64311f82d3991a/src/brevitas/core/function_wrapper/shape.py#L78">here</a>, and serves to
+flatten out the observed tensor into a 1D tensor and, in this case, use the <code class="docutils literal notranslate"><span class="pre">AbsPercentile</span></code> observer to calculate the quantization parameters during the gathering statistics stage of QAT.</p>
+<div class="nbinput docutils container">
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[33]:
+</pre></div>
+</div>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">per_tensor_quant_relu</span>
+</pre></div>
+</div>
+</div>
+<div class="nboutput nblast docutils container">
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[33]:
+</pre></div>
+</div>
+<div class="output_area docutils container">
+<div class="highlight"><pre>
+QuantReLU(
+  (input_quant): ActQuantProxyFromInjector(
+    (_zero_hw_sentinel): StatelessBuffer()
+  )
+  (act_quant): ActQuantProxyFromInjector(
+    (_zero_hw_sentinel): StatelessBuffer()
+    (fused_activation_quant_proxy): FusedActivationQuantProxy(
+      (activation_impl): ReLU()
+      (tensor_quant): RescalingIntQuant(
+        (int_quant): IntQuant(
+          (float_to_int_impl): RoundSte()
+          (tensor_clamp_impl): TensorClamp()
+          (delay_wrapper): DelayWrapper(
+            (delay_impl): _NoDelay()
+          )
+          (input_view_impl): Identity()
+        )
+        (scaling_impl): ParameterFromRuntimeStatsScaling(
+          (stats_input_view_shape_impl): OverTensorView()
+          (stats): _Stats(
+            (stats_impl): AbsPercentile()
+          )
+          (restrict_scaling): _RestrictValue(
+            (restrict_value_impl): FloatRestrictValue()
+          )
+          (clamp_scaling): _ClampValue(
+            (clamp_min_ste): ScalarClampMinSte()
+          )
+          (restrict_inplace_preprocess): Identity()
+          (restrict_preprocess): Identity()
+        )
+        (int_scaling_impl): IntScaling()
+        (zero_point_impl): ZeroZeroPoint(
+          (zero_point): StatelessBuffer()
+        )
+        (msb_clamp_bit_width_impl): BitWidthConst(
+          (bit_width): StatelessBuffer()
+        )
+      )
+    )
+  )
+)
+</pre></div></div>
+</div>
+<p>Next, we initialise a new <code class="docutils literal notranslate"><span class="pre">QuantRelU</span></code> instance, but this time we specify that we desire per-channel quantization i.e. <code class="docutils literal notranslate"><span class="pre">scaling_per_output_channel=True</span></code>. This will implictly call <code class="docutils literal notranslate"><span class="pre">scaling_stats_input_view_shape_impl</span></code>, defined <a class="reference external" href="https://github.com/Xilinx/brevitas/blob/200456825f3b4b8db414f2b25b64311f82d3991a/src/brevitas/quant/solver/common.py#L184">here</a>, and will change the <code class="docutils literal notranslate"><span class="pre">QuantReLU</span></code> from using a per-tensor view when gathering stats to a per output channel view
+(<code class="docutils literal notranslate"><span class="pre">`OverOutputChannelView</span></code> &lt;<a class="github reference external" href="https://github.com/Xilinx/brevitas/blob/200456825f3b4b8db414f2b25b64311f82d3991a/src/brevitas/core/function_wrapper/shape.py#L52">Xilinx/brevitas</a>&gt;`__). This simply permutes the tensor into a 2D tensor, with dim 0 equal to the number of output channels.</p>
+<p>To accomplish this, we also need to give it some extra information: <code class="docutils literal notranslate"><span class="pre">scaling_stats_permute_dims</span></code> and <code class="docutils literal notranslate"><span class="pre">per_channel_broadcastable_shape</span></code>. <code class="docutils literal notranslate"><span class="pre">scaling_stats_permute_dims</span></code> is responsible for defining how we do the permutation. <code class="docutils literal notranslate"><span class="pre">per_channel_broadcastable_shape</span></code> is necessary to understand along which dimensions the scale factor has to be broadcasted, so that the scale factor values are applied along the channel dimensions of the input. By default, PyTorch will broadcast along the first rightmost
+dimension for which the shapes of the two tensors match. To make sure that we apply the scale factor in our desired output channel dimension, we need to tell PyTorch how to correctly broadcast the scale factors. Therefore the scale factor will have as many dimensions as the input tensors, with all the shapes equal to 1 apart from the channel dimension.</p>
+<p>Below, we can see that in the per-channel <code class="docutils literal notranslate"><span class="pre">QuantReLU</span></code> instance, the <code class="docutils literal notranslate"><span class="pre">stats_input_view_shape_impl</span></code> is now <code class="docutils literal notranslate"><span class="pre">OverOutputChannelView</span></code>, and uses a <code class="docutils literal notranslate"><span class="pre">PermuteDims</span></code> <a class="reference external" href="https://github.com/Xilinx/brevitas/blob/200456825f3b4b8db414f2b25b64311f82d3991a/src/brevitas/core/function_wrapper/shape.py#L21">instance</a> to do the permutation of the tensor to, in this case, a 2D tensor.</p>
+<div class="nbinput docutils container">
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[35]:
+</pre></div>
+</div>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">per_chan_quant_relu</span> <span class="o">=</span> <span class="n">QuantReLU</span><span class="p">(</span><span class="n">return_quant_tensor</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
+                                <span class="n">scaling_per_output_channel</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
+                                <span class="n">per_channel_broadcastable_shape</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="n">out_channels</span><span class="p">,</span> <span class="mi">1</span> <span class="p">,</span> <span class="mi">1</span><span class="p">),</span>
+                                <span class="n">scaling_stats_permute_dims</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">),</span>
+                                <span class="p">)</span>
+<span class="n">per_chan_quant_relu</span>
+</pre></div>
+</div>
+</div>
+<div class="nboutput nblast docutils container">
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[35]:
+</pre></div>
+</div>
+<div class="output_area docutils container">
+<div class="highlight"><pre>
+QuantReLU(
+  (input_quant): ActQuantProxyFromInjector(
+    (_zero_hw_sentinel): StatelessBuffer()
+  )
+  (act_quant): ActQuantProxyFromInjector(
+    (_zero_hw_sentinel): StatelessBuffer()
+    (fused_activation_quant_proxy): FusedActivationQuantProxy(
+      (activation_impl): ReLU()
+      (tensor_quant): RescalingIntQuant(
+        (int_quant): IntQuant(
+          (float_to_int_impl): RoundSte()
+          (tensor_clamp_impl): TensorClamp()
+          (delay_wrapper): DelayWrapper(
+            (delay_impl): _NoDelay()
+          )
+          (input_view_impl): Identity()
+        )
+        (scaling_impl): ParameterFromRuntimeStatsScaling(
+          (stats_input_view_shape_impl): OverOutputChannelView(
+            (permute_impl): PermuteDims()
+          )
+          (stats): _Stats(
+            (stats_impl): AbsPercentile()
+          )
+          (restrict_scaling): _RestrictValue(
+            (restrict_value_impl): FloatRestrictValue()
+          )
+          (clamp_scaling): _ClampValue(
+            (clamp_min_ste): ScalarClampMinSte()
+          )
+          (restrict_inplace_preprocess): Identity()
+          (restrict_preprocess): Identity()
+        )
+        (int_scaling_impl): IntScaling()
+        (zero_point_impl): ZeroZeroPoint(
+          (zero_point): StatelessBuffer()
+        )
+        (msb_clamp_bit_width_impl): BitWidthConst(
+          (bit_width): StatelessBuffer()
+        )
+      )
+    )
+  )
+)
+</pre></div></div>
+</div>
+<p>We can also observe the effect on the quantization parameters:</p>
+<div class="nbinput docutils container">
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[34]:
+</pre></div>
+</div>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">out_channel</span> <span class="o">=</span> <span class="n">per_chan_quant_relu</span><span class="p">(</span><span class="n">inp3</span><span class="p">)</span>
+<span class="n">out_channel</span><span class="o">.</span><span class="n">scale</span> <span class="o">*</span> <span class="p">((</span><span class="mi">2</span><span class="o">**</span><span class="mi">8</span><span class="p">)</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span>
+</pre></div>
+</div>
+</div>
+<div class="nboutput nblast docutils container">
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[34]:
+</pre></div>
+</div>
+<div class="output_area docutils container">
+<div class="highlight"><pre>
+tensor([[[[2.9999]],
+
+         [[1.0000]],
+
+         [[1.0000]]]], grad_fn=&lt;MulBackward0&gt;)
+</pre></div></div>
+</div>
+<p>We can see that the number of elements in the quantization scale of the outputted tensor is now 3, matching those of the 3-channel tensor! Furthermore, we see that each channel has an 8-bit quantization range that matches its data distribution, which is much more ideal in terms of reducing quantization mismatch. However, it’s important to note that some hardware providers don’t efficiently support per-channel quantization in production, so it’s best to check if your targetted hardware will allow
+per-channel quantization.</p>
 <p>Finally, a reminder that mixing things up is perfectly legal and encouraged in Brevitas. For example, a <code class="docutils literal notranslate"><span class="pre">QuantIdentity</span></code> with <code class="docutils literal notranslate"><span class="pre">act_quant=Int8ActPerTensorFloatMinMaxInit</span></code> is equivalent to a default <code class="docutils literal notranslate"><span class="pre">QuantHardTanh</span></code>, or conversely a <code class="docutils literal notranslate"><span class="pre">QuantHardTanh</span></code> with <code class="docutils literal notranslate"><span class="pre">act_quant=Int8ActPerTensorFloat</span></code> is equivalent to a default <code class="docutils literal notranslate"><span class="pre">QuantIdentity</span></code>. This is allowed by the fact that - as it will be explained in the next tutorial - the same layer can accept different keyword arguments when different
 quantizers are set. So a QuantIdentity with <code class="docutils literal notranslate"><span class="pre">act_quant=Int8ActPerTensorFloatMinMaxInit</span></code> is going to expect arguments <code class="docutils literal notranslate"><span class="pre">min_val</span></code> and <code class="docutils literal notranslate"><span class="pre">max_val</span></code> the same way a default <code class="docutils literal notranslate"><span class="pre">QuantHardTanh</span></code> would.</p>
 </section>
diff --git a/docs/tutorials/quant_activation_overview.ipynb b/docs/tutorials/quant_activation_overview.ipynb
index 4d2ac73d1..962e249e7 100644
--- a/docs/tutorials/quant_activation_overview.ipynb
+++ b/docs/tutorials/quant_activation_overview.ipynb
@@ -3,7 +3,10 @@
   {
    "cell_type": "markdown",
    "metadata": {
-    "collapsed": true
+    "collapsed": true,
+    "jupyter": {
+     "outputs_hidden": true
+    }
    },
    "source": [
     "# An Overview of Quantized Activations"
@@ -21,31 +24,51 @@
   {
    "cell_type": "code",
    "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# helpers\n",
+    "def assert_with_message(condition):\n",
+    "    assert condition\n",
+    "    print(condition)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
    "metadata": {
     "scrolled": true
    },
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 1,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/torch/_tensor.py:1255: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at /opt/conda/conda-bld/pytorch_1670525541990/work/c10/core/TensorImpl.h:1758.)\n",
+      "  return super(Tensor, self).rename(names)\n",
+      "[W NNPACK.cpp:53] Could not initialize NNPACK! Reason: Unsupported hardware.\n",
+      "/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/torch/nn/modules/conv.py:459: UserWarning: Defining your `__torch_function__` as a plain method is deprecated and will be an error in future, please define it as a classmethod. (Triggered internally at /opt/conda/conda-bld/pytorch_1670525541990/work/torch/csrc/utils/python_arg_parser.cpp:350.)\n",
+      "  return F.conv2d(input, weight, bias, self.stride,\n"
+     ]
     }
    ],
    "source": [
     "import torch\n",
     "from brevitas.nn import QuantConv2d, QuantIdentity\n",
-    "from brevitas.quant.scaled_int import Int8ActPerTensorFloat \n",
+    "from brevitas.quant.scaled_int import Int8ActPerTensorFloat\n",
     "\n",
-    "torch.manual_seed(0)\n",
+    "torch.manual_seed(0)  # set a seed to make sure the random weight init is reproducible\n",
     "output_quant_conv = QuantConv2d(\n",
     "    in_channels=2, out_channels=3, kernel_size=(3,3), output_quant=Int8ActPerTensorFloat)\n",
     "\n",
-    "torch.manual_seed(0)\n",
+    "torch.manual_seed(0)  # reproduce the same random weight init as above\n",
     "default_quant_conv = QuantConv2d(\n",
     "    in_channels=2, out_channels=3, kernel_size=(3,3))\n",
     "output_identity_quant = QuantIdentity()\n",
@@ -54,7 +77,7 @@
     "out_tensor1 = output_quant_conv(inp)\n",
     "out_tensor2 = output_identity_quant(default_quant_conv(inp))\n",
     "\n",
-    "assert out_tensor1.isclose(out_tensor2).all().item()"
+    "assert_with_message(out_tensor1.isclose(out_tensor2).all().item())"
    ]
   },
   {
@@ -66,24 +89,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
     }
    ],
    "source": [
     "torch.manual_seed(0)\n",
     "input_output_quant_conv = QuantConv2d(\n",
-    "    in_channels=2, out_channels=3, kernel_size=(3,3), \n",
+    "    in_channels=2, out_channels=3, kernel_size=(3,3),\n",
     "    input_quant=Int8ActPerTensorFloat, output_quant=Int8ActPerTensorFloat)\n",
     "\n",
     "torch.manual_seed(0)\n",
@@ -96,7 +116,7 @@
     "out_tensor1 = input_output_quant_conv(inp)\n",
     "out_tensor2 = output_identity_quant(default_quant_conv(input_identity_quant(inp)))\n",
     "\n",
-    "assert out_tensor1.isclose(out_tensor2).all().item()"
+    "assert_with_message(out_tensor1.isclose(out_tensor2).all().item())"
    ]
   },
   {
@@ -115,23 +135,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
     }
    ],
    "source": [
     "disabled_quant_identity = QuantIdentity(act_quant=None)\n",
-    "(inp == disabled_quant_identity(inp)).all().item()"
+    "assert_with_message((inp == disabled_quant_identity(inp)).all().item())"
    ]
   },
   {
@@ -143,13 +160,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "QuantTensor(value=tensor([[[[-0.4566, -0.5707, -0.5517,  0.5897,  1.5409],\n",
+       "IntQuantTensor(value=tensor([[[[-0.4566, -0.5707, -0.5517,  0.5897,  1.5409],\n",
        "          [ 0.5136, -0.5897, -0.5707,  0.1902, -0.0761],\n",
        "          [-0.4946, -1.5029, -0.1902,  0.4376,  1.3317],\n",
        "          [-1.6361,  2.0736,  1.7122,  2.3780, -1.1224],\n",
@@ -163,7 +180,7 @@
        "       grad_fn=<MulBackward0>), scale=tensor(0.0190, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))"
       ]
      },
-     "execution_count": 4,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -176,22 +193,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
     }
    ],
    "source": [
-    "assert out_tensor.is_valid"
+    "assert_with_message(out_tensor.is_valid)"
    ]
   },
   {
@@ -203,7 +217,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -220,10 +234,10 @@
        "          [-0.7990, -1.2936, -0.7419, -1.3127, -0.2283],\n",
        "          [-2.4351, -0.0761,  0.2283,  0.7990, -0.1902],\n",
        "          [-0.3615, -1.2175, -0.6278, -0.4566,  1.9214]]]],\n",
-       "       grad_fn=<MulBackward0>)"
+       "       grad_fn=<AliasBackward0>)"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -235,13 +249,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "QuantTensor(value=tensor([[[[-0.4566, -0.5707, -0.5517,  0.5897,  1.5409],\n",
+       "IntQuantTensor(value=tensor([[[[-0.4566, -0.5707, -0.5517,  0.5897,  1.5409],\n",
        "          [ 0.5136, -0.5897, -0.5707,  0.1902, -0.0761],\n",
        "          [-0.4946, -1.5029, -0.1902,  0.4376,  1.3317],\n",
        "          [-1.6361,  2.0736,  1.7122,  2.3780, -1.1224],\n",
@@ -252,10 +266,10 @@
        "          [-0.7990, -1.2936, -0.7419, -1.3127, -0.2283],\n",
        "          [-2.4351, -0.0761,  0.2283,  0.7990, -0.1902],\n",
        "          [-0.3615, -1.2175, -0.6278, -0.4566,  1.9214]]]],\n",
-       "       grad_fn=<MulBackward0>), scale=tensor(0.0190, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))"
+       "       grad_fn=<AliasBackward0>), scale=tensor(0.0190, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -275,13 +289,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "QuantTensor(value=tensor([[[[0.0000, 0.0000, 0.0000, 0.5974, 1.5402],\n",
+       "IntQuantTensor(value=tensor([[[[0.0000, 0.0000, 0.0000, 0.5974, 1.5402],\n",
        "          [0.5041, 0.0000, 0.0000, 0.1867, 0.0000],\n",
        "          [0.0000, 0.0000, 0.0000, 0.4481, 1.3255],\n",
        "          [0.0000, 2.0817, 1.7083, 2.3804, 0.0000],\n",
@@ -294,7 +308,7 @@
        "          [0.0000, 0.0000, 0.0000, 0.0000, 1.9230]]]], grad_fn=<MulBackward0>), scale=tensor(0.0093, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(False), training_t=tensor(True))"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -315,16 +329,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n",
+      "tensor(True)\n",
+      "tensor(True)\n",
+      "tensor(True)\n"
+     ]
+    }
+   ],
    "source": [
     "return_disabled_quant_relu = QuantReLU(act_quant=None, return_quant_tensor=True)\n",
     "relu_out_tensor = return_disabled_quant_relu(out_tensor)\n",
-    "assert relu_out_tensor.is_valid==True\n",
-    "assert relu_out_tensor.scale == out_tensor.scale\n",
-    "assert relu_out_tensor.zero_point == out_tensor.zero_point\n",
-    "assert relu_out_tensor.bit_width == out_tensor.bit_width"
+    "assert_with_message(relu_out_tensor.is_valid)\n",
+    "assert_with_message(relu_out_tensor.scale == out_tensor.scale)\n",
+    "assert_with_message(relu_out_tensor.zero_point == out_tensor.zero_point)\n",
+    "assert_with_message(relu_out_tensor.bit_width == out_tensor.bit_width)"
    ]
   },
   {
@@ -336,29 +361,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
+   "execution_count": 11,
+   "metadata": {
+    "tags": [
+     "raises-exception"
+    ]
+   },
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "QuantTensor(value=(tensor([[[[0.3878, 0.3611, 0.3655, 0.6433, 0.8236],\n",
-       "          [0.6257, 0.3567, 0.3611, 0.5474, 0.4810],\n",
-       "          [0.3788, 0.1820, 0.4526, 0.6077, 0.7911],\n",
-       "          [0.1630, 0.8883, 0.8471, 0.9151, 0.2456],\n",
-       "          [0.4198, 0.2527, 0.4810, 0.4762, 0.3184]],\n",
-       "\n",
-       "         [[0.1683, 0.5048, 0.3226, 0.5474, 0.6520],\n",
-       "          [0.6563, 0.4385, 0.3699, 0.7614, 0.6077],\n",
-       "          [0.3102, 0.2152, 0.3226, 0.2120, 0.4432],\n",
-       "          [0.0805, 0.4810, 0.5568, 0.6898, 0.4526],\n",
-       "          [0.4106, 0.2284, 0.3480, 0.3878, 0.8723]]]],\n",
-       "       grad_fn=<SigmoidBackward0>), None, None, None), scale=None, zero_point=None, bit_width=None, signed_t=None, training_t=tensor(True))"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
+     "ename": "AssertionError",
+     "evalue": "QuantLayer is not correctly configured, check if warnings were raised",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mAssertionError\u001b[0m                            Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[11], line 4\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mbrevitas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mnn\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m QuantSigmoid\n\u001b[1;32m      3\u001b[0m return_disabled_quant_sigmoid \u001b[38;5;241m=\u001b[39m QuantSigmoid(act_quant\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, return_quant_tensor\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m----> 4\u001b[0m sigmoid_out_tensor \u001b[38;5;241m=\u001b[39m \u001b[43mreturn_disabled_quant_sigmoid\u001b[49m\u001b[43m(\u001b[49m\u001b[43mout_tensor\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      5\u001b[0m sigmoid_out_tensor\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/torch/nn/modules/module.py:1194\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m   1190\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1191\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1192\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1193\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1194\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1195\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m   1196\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/quant_layer.py:53\u001b[0m, in \u001b[0;36mQuantNonLinearActLayer.forward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m     51\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m out\n\u001b[1;32m     52\u001b[0m out \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mact_quant(quant_input)\n\u001b[0;32m---> 53\u001b[0m out \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpack_output\u001b[49m\u001b[43m(\u001b[49m\u001b[43mout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     54\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m out\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/mixin/base.py:97\u001b[0m, in \u001b[0;36mQuantLayerMixin.pack_output\u001b[0;34m(self, quant_output)\u001b[0m\n\u001b[1;32m     95\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpack_output\u001b[39m(\u001b[38;5;28mself\u001b[39m, quant_output: Union[Tensor, QuantTensor]) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Union[Tensor, QuantTensor]:\n\u001b[1;32m     96\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreturn_quant_tensor:\n\u001b[0;32m---> 97\u001b[0m         \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(quant_output, QuantTensor), \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mQuantLayer is not correctly configured, check if warnings were raised\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m     98\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m quant_output\n\u001b[1;32m     99\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n",
+      "\u001b[0;31mAssertionError\u001b[0m: QuantLayer is not correctly configured, check if warnings were raised"
+     ]
     }
    ],
    "source": [
@@ -369,26 +391,6 @@
     "sigmoid_out_tensor"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "False"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "assert not sigmoid_out_tensor.is_valid"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -406,7 +408,7 @@
     {
      "data": {
       "text/plain": [
-       "QuantTensor(value=tensor([[[[0.0000, 0.0000, 0.0000, 0.5854, 1.5485],\n",
+       "IntQuantTensor(value=tensor([[[[0.0000, 0.0000, 0.0000, 0.5854, 1.5485],\n",
        "          [0.5099, 0.0000, 0.0000, 0.1888, 0.0000],\n",
        "          [0.0000, 0.0000, 0.0000, 0.4532, 1.3219],\n",
        "          [0.0000, 2.0772, 1.6996, 2.3794, 0.0000],\n",
@@ -416,7 +418,7 @@
        "          [0.6421, 0.0000, 0.0000, 1.1708, 0.4343],\n",
        "          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],\n",
        "          [0.0000, 0.0000, 0.2266, 0.7931, 0.0000],\n",
-       "          [0.0000, 0.0000, 0.0000, 0.0000, 1.9262]]]], grad_fn=<ReluBackward0>), scale=tensor(0.0189, grad_fn=<DivBackward0>), zero_point=tensor(129., grad_fn=<SWhereBackward0>), bit_width=tensor(8.), signed_t=tensor(False), training_t=tensor(True))"
+       "          [0.0000, 0.0000, 0.0000, 0.0000, 1.9262]]]], grad_fn=<ReluBackward0>), scale=tensor(0.0189, grad_fn=<DivBackward0>), zero_point=tensor(129., grad_fn=<WhereBackward0>), bit_width=tensor(8.), signed_t=tensor(False), training_t=tensor(True))"
       ]
      },
      "execution_count": 12,
@@ -557,20 +559,17 @@
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "False"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
     }
    ],
    "source": [
     "out1_train = quant_identity(inp1)\n",
     "out2_train = quant_identity(inp2)\n",
-    "assert not out1_train.scale.isclose(out2_train.scale).item()"
+    "assert_with_message(not out1_train.scale.isclose(out2_train.scale).item())"
    ]
   },
   {
@@ -579,28 +578,25 @@
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
     }
    ],
    "source": [
     "quant_identity.eval()\n",
     "out1_eval = quant_identity(inp1)\n",
     "out2_eval = quant_identity(inp2)\n",
-    "assert out1_eval.scale.isclose(out2_eval.scale).item()"
+    "assert_with_message(out1_eval.scale.isclose(out2_eval.scale).item())"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "By default, the only layer that is an exception to this is `QuantHardTanh`. That is because the interface to `torch.nn.HardTanh` already requires users to manually specify `min_val` and `max_val`, so Brevitas preserves that both when quantization is enabled or disabled. With quantization enabled, by default those values are used for initialization, but then the range is learned. Let's look at an example:"
+    "By default, the only layer that is an exception to this is `QuantHardTanh`. That is because the interface to `torch.nn.HardTanh` already requires users to manually specify `min_val` and `max_val`, so Brevitas preserves that both when quantization is enabled or disabled. With quantization enabled, by default those values are used for initialization, but then the range is learned. Let's look at an example. Run the cell below, and we expect it to throw an error because of missing attributes:"
    ]
   },
   {
@@ -617,19 +613,20 @@
      "evalue": "'Int8ActPerTensorFloatMinMaxInit' can not resolve attribute 'max_val' while building 'scaling_init_impl'",
      "output_type": "error",
      "traceback": [
-      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[1;31mDependencyError\u001b[0m                           Traceback (most recent call last)",
-      "\u001b[1;32m<ipython-input-18-8145d2f87fcb>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mbrevitas\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnn\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mQuantHardTanh\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mQuantHardTanh\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[1;32mc:\\brevitas_fx\\src\\brevitas\\nn\\quant_activation.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, act_quant, input_quant, return_quant_tensor, **kwargs)\u001b[0m\n\u001b[0;32m    117\u001b[0m             \u001b[0mact_quant\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mact_quant\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    118\u001b[0m             \u001b[0mreturn_quant_tensor\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mreturn_quant_tensor\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 119\u001b[1;33m             **kwargs)\n\u001b[0m\u001b[0;32m    120\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    121\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32mc:\\brevitas_fx\\src\\brevitas\\nn\\quant_layer.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, act_impl, passthrough_act, input_quant, act_quant, return_quant_tensor, **kwargs)\u001b[0m\n\u001b[0;32m     77\u001b[0m             \u001b[0mpassthrough_act\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     78\u001b[0m             \u001b[0mact_quant\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 79\u001b[1;33m             **kwargs)\n\u001b[0m\u001b[0;32m     80\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     81\u001b[0m     \u001b[1;33m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32mc:\\brevitas_fx\\src\\brevitas\\nn\\mixin\\act.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, act_impl, passthrough_act, act_quant, **kwargs)\u001b[0m\n\u001b[0;32m    157\u001b[0m             \u001b[0mproxy_prefix\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'act_'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    158\u001b[0m             \u001b[0mkwargs_prefix\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m''\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 159\u001b[1;33m             **kwargs)\n\u001b[0m\u001b[0;32m    160\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    161\u001b[0m     \u001b[1;33m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32mc:\\brevitas_fx\\src\\brevitas\\nn\\mixin\\base.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, quant, proxy_protocol, none_quant_injector, proxy_prefix, kwargs_prefix, **kwargs)\u001b[0m\n\u001b[0;32m     98\u001b[0m             \u001b[0mquant_injector\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mquant\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     99\u001b[0m             \u001b[0mquant_injector\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mquant_injector\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlet\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m**\u001b[0m\u001b[0mfilter_kwargs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkwargs_prefix\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 100\u001b[1;33m             \u001b[0mquant\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mquant_injector\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mproxy_class\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mquant_injector\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    101\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    102\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mquant\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mproxy_protocol\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32mc:\\brevitas_fx\\src\\brevitas\\proxy\\runtime_quant.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, quant_layer, quant_injector)\u001b[0m\n\u001b[0;32m    108\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    109\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mquant_layer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mquant_injector\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 110\u001b[1;33m         \u001b[0msuper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mActQuantProxyFromInjector\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mquant_layer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mquant_injector\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    111\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mis_passthrough_act\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_is_passthrough_act\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mquant_injector\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    112\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32mc:\\brevitas_fx\\src\\brevitas\\proxy\\quant_proxy.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, quant_layer, quant_injector, export_mode, export_handler)\u001b[0m\n\u001b[0;32m     74\u001b[0m         \u001b[1;31m# Use a normal list and not a ModuleList since this is a pointer to parent modules\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     75\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtracked_module_list\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 76\u001b[1;33m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0madd_tracked_module\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mquant_layer\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     77\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexport_handler\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mexport_handler\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     78\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexport_mode\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mexport_mode\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32mc:\\brevitas_fx\\src\\brevitas\\proxy\\quant_proxy.py\u001b[0m in \u001b[0;36madd_tracked_module\u001b[1;34m(self, module)\u001b[0m\n\u001b[0;32m    130\u001b[0m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtracked_module_list\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmodule\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    131\u001b[0m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mupdate_tracked_modules\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 132\u001b[1;33m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minit_tensor_quant\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    133\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    134\u001b[0m             \u001b[1;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Trying to add None as a parent module.\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32mc:\\brevitas_fx\\src\\brevitas\\proxy\\runtime_quant.py\u001b[0m in \u001b[0;36minit_tensor_quant\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m    120\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    121\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0minit_tensor_quant\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 122\u001b[1;33m         \u001b[0mtensor_quant\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mquant_injector\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtensor_quant\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    123\u001b[0m         \u001b[0mact_impl\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mquant_injector\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mact_impl\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    124\u001b[0m         \u001b[0mis_act_enabled\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_is_act_enabled\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mact_impl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtensor_quant\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "    \u001b[1;31m[... skipping hidden 1 frame]\u001b[0m\n",
-      "\u001b[1;31mDependencyError\u001b[0m: 'Int8ActPerTensorFloatMinMaxInit' can not resolve attribute 'max_val' while building 'scaling_init_impl'"
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mDependencyError\u001b[0m                           Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[18], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mbrevitas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mnn\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m QuantHardTanh\n\u001b[0;32m----> 3\u001b[0m \u001b[43mQuantHardTanh\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/quant_activation.py:96\u001b[0m, in \u001b[0;36mQuantHardTanh.__init__\u001b[0;34m(self, act_quant, input_quant, return_quant_tensor, **kwargs)\u001b[0m\n\u001b[1;32m     90\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m     91\u001b[0m         \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m     92\u001b[0m         act_quant: Optional[ActQuantType] \u001b[38;5;241m=\u001b[39m Int8ActPerTensorFloatMinMaxInit,\n\u001b[1;32m     93\u001b[0m         input_quant: Optional[ActQuantType] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m     94\u001b[0m         return_quant_tensor: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m     95\u001b[0m         \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m---> 96\u001b[0m     \u001b[43mQuantNLAL\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m     97\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m     98\u001b[0m \u001b[43m        \u001b[49m\u001b[43mact_impl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mHardtanh\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     99\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpassthrough_act\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    100\u001b[0m \u001b[43m        \u001b[49m\u001b[43minput_quant\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_quant\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    101\u001b[0m \u001b[43m        \u001b[49m\u001b[43mact_quant\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mact_quant\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    102\u001b[0m \u001b[43m        \u001b[49m\u001b[43mreturn_quant_tensor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_quant_tensor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    103\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/quant_layer.py:34\u001b[0m, in \u001b[0;36mQuantNonLinearActLayer.__init__\u001b[0;34m(self, act_impl, passthrough_act, input_quant, act_quant, return_quant_tensor, **kwargs)\u001b[0m\n\u001b[1;32m     32\u001b[0m QuantLayerMixin\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, return_quant_tensor)\n\u001b[1;32m     33\u001b[0m QuantInputMixin\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, input_quant, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m---> 34\u001b[0m \u001b[43mQuantNonLinearActMixin\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mact_impl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpassthrough_act\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mact_quant\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/mixin/act.py:66\u001b[0m, in \u001b[0;36mQuantNonLinearActMixin.__init__\u001b[0;34m(self, act_impl, passthrough_act, act_quant, act_proxy_prefix, act_kwargs_prefix, **kwargs)\u001b[0m\n\u001b[1;32m     55\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m     56\u001b[0m         \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m     57\u001b[0m         act_impl: Optional[Type[Module]],\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     61\u001b[0m         act_kwargs_prefix\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m     62\u001b[0m         \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m     63\u001b[0m     prefixed_kwargs \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m     64\u001b[0m         act_kwargs_prefix \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mact_impl\u001b[39m\u001b[38;5;124m'\u001b[39m: act_impl,\n\u001b[1;32m     65\u001b[0m         act_kwargs_prefix \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpassthrough_act\u001b[39m\u001b[38;5;124m'\u001b[39m: passthrough_act}\n\u001b[0;32m---> 66\u001b[0m     \u001b[43mQuantProxyMixin\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m     67\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m     68\u001b[0m \u001b[43m        \u001b[49m\u001b[43mquant\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mact_quant\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     69\u001b[0m \u001b[43m        \u001b[49m\u001b[43mproxy_prefix\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mact_proxy_prefix\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     70\u001b[0m \u001b[43m        \u001b[49m\u001b[43mkwargs_prefix\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mact_kwargs_prefix\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     71\u001b[0m \u001b[43m        \u001b[49m\u001b[43mproxy_protocol\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mActQuantProxyProtocol\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     72\u001b[0m \u001b[43m        \u001b[49m\u001b[43mnone_quant_injector\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mNoneActQuant\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     73\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mprefixed_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     74\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/mixin/base.py:48\u001b[0m, in \u001b[0;36mQuantProxyMixin.__init__\u001b[0;34m(self, quant, proxy_protocol, none_quant_injector, proxy_prefix, kwargs_prefix, **kwargs)\u001b[0m\n\u001b[1;32m     46\u001b[0m     quant_injector \u001b[38;5;241m=\u001b[39m quant\n\u001b[1;32m     47\u001b[0m     quant_injector \u001b[38;5;241m=\u001b[39m quant_injector\u001b[38;5;241m.\u001b[39mlet(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mfilter_kwargs(kwargs_prefix, kwargs))\n\u001b[0;32m---> 48\u001b[0m     quant \u001b[38;5;241m=\u001b[39m \u001b[43mquant_injector\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mproxy_class\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mquant_injector\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     49\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m     50\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(quant, proxy_protocol):\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/proxy/runtime_quant.py:198\u001b[0m, in \u001b[0;36mActQuantProxyFromInjector.__init__\u001b[0;34m(self, quant_layer, quant_injector)\u001b[0m\n\u001b[1;32m    197\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, quant_layer, quant_injector):\n\u001b[0;32m--> 198\u001b[0m     \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mquant_layer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mquant_injector\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    199\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcache_class \u001b[38;5;241m=\u001b[39m _CachedIO\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/proxy/runtime_quant.py:93\u001b[0m, in \u001b[0;36mActQuantProxyFromInjectorBase.__init__\u001b[0;34m(self, quant_layer, quant_injector)\u001b[0m\n\u001b[1;32m     92\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, quant_layer, quant_injector):\n\u001b[0;32m---> 93\u001b[0m     \u001b[43mQuantProxyFromInjector\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mquant_layer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mquant_injector\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     94\u001b[0m     ActQuantProxyProtocol\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m)\n\u001b[1;32m     95\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mis_passthrough_act \u001b[38;5;241m=\u001b[39m _is_passthrough_act(quant_injector)\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/proxy/quant_proxy.py:80\u001b[0m, in \u001b[0;36mQuantProxyFromInjector.__init__\u001b[0;34m(self, quant_layer, quant_injector)\u001b[0m\n\u001b[1;32m     78\u001b[0m \u001b[38;5;66;03m# Use a normal list and not a ModuleList since this is a pointer to parent modules\u001b[39;00m\n\u001b[1;32m     79\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtracked_module_list \u001b[38;5;241m=\u001b[39m []\n\u001b[0;32m---> 80\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43madd_tracked_module\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquant_layer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     81\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdisable_quant \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m     82\u001b[0m \u001b[38;5;66;03m# Torch.compile compatibility requires this\u001b[39;00m\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/proxy/quant_proxy.py:120\u001b[0m, in \u001b[0;36mQuantProxyFromInjector.add_tracked_module\u001b[0;34m(self, module)\u001b[0m\n\u001b[1;32m    118\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtracked_module_list\u001b[38;5;241m.\u001b[39mappend(module)\n\u001b[1;32m    119\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mupdate_tracked_modules()\n\u001b[0;32m--> 120\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minit_tensor_quant\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    121\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    122\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTrying to add None as a parent module.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/proxy/runtime_quant.py:127\u001b[0m, in \u001b[0;36mActQuantProxyFromInjectorBase.init_tensor_quant\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    126\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minit_tensor_quant\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m--> 127\u001b[0m     tensor_quant \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquant_injector\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtensor_quant\u001b[49m\n\u001b[1;32m    128\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mact_impl\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mquant_injector:\n\u001b[1;32m    129\u001b[0m         act_impl \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mquant_injector\u001b[38;5;241m.\u001b[39mact_impl\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/inject/__init__.py:129\u001b[0m, in \u001b[0;36m_ExtendedInjectorType.__getattr__\u001b[0;34m(cls, attrname)\u001b[0m\n\u001b[1;32m    126\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    127\u001b[0m         message \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{!r}\u001b[39;00m\u001b[38;5;124m can not resolve attribute \u001b[39m\u001b[38;5;132;01m{!r}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(\n\u001b[1;32m    128\u001b[0m             \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, current_attr)\n\u001b[0;32m--> 129\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m DependencyError(message)\n\u001b[1;32m    131\u001b[0m marker, attribute, args, have_defaults \u001b[38;5;241m=\u001b[39m spec\n\u001b[1;32m    133\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mset\u001b[39m(args)\u001b[38;5;241m.\u001b[39missubset(cached):\n",
+      "\u001b[0;31mDependencyError\u001b[0m: 'Int8ActPerTensorFloatMinMaxInit' can not resolve attribute 'max_val' while building 'scaling_init_impl'"
      ]
     }
    ],
@@ -666,23 +663,242 @@
    "cell_type": "code",
    "execution_count": 20,
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
+    }
+   ],
+   "source": [
+    "out1_train = quant_hard_tanh(inp1)\n",
+    "quant_hard_tanh.eval()\n",
+    "out2_eval = quant_hard_tanh(inp2)\n",
+    "assert_with_message(out1_train.scale.isclose(out2_eval.scale).item())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In all of the examples that have currently been looked at in this tutorial, we have used per-tensor quantization. I.e., the output tensor of the activation, if quantized, was always quantized on a per-tensor level, with a single scale and zero-point quantization parameter per output tensor. However, one can also do per-channel quantization, where each output channel of the tensor has its own quantization parameters. In the example below, we look at per-tensor quantization of an input tensor that has 3 channels and 256 elements in the height and width dimensions. We purposely mutate the 1st channel to have its dynamic range be 3 times larger than the other 2 channels. We then feed it through a `QuantReLU`, whose default behavior is to quantize at a per-tensor granularity."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "True"
+       "tensor(2.9998, grad_fn=<MulBackward0>)"
       ]
      },
-     "execution_count": 20,
+     "execution_count": 32,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "out1_train = quant_hard_tanh(inp1)\n",
-    "quant_hard_tanh.eval()\n",
-    "out2_eval = quant_hard_tanh(inp2)\n",
-    "assert out1_train.scale.isclose(out2_eval.scale).item()"
+    "out_channels = 3\n",
+    "inp3 = torch.rand(1, out_channels, 256, 256)  # (B, C, H, W)\n",
+    "inp3[:, 0, :, :] *= 3\n",
+    "\n",
+    "per_tensor_quant_relu = QuantReLU(return_quant_tensor=True)\n",
+    "out_tensor = per_tensor_quant_relu(inp3)\n",
+    "out_tensor.scale * ((2**8) -1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can see that the per-tensor scale parameter has calibrated itself to provide a full quantization range of 3, matching that of the channel with the largest dynamic range. \n",
+    "\n",
+    "We can take a look at the `QuantReLU` object, and in particular look at what the `scaling_impl` object is composed of. It is responsible for gathering statistics for determining the quantization parameters, and we can see that its `stats_input_view_shape_impl` attribute is set to be an instance of `OverTensorView`. This is defined [here](https://github.com/Xilinx/brevitas/blob/200456825f3b4b8db414f2b25b64311f82d3991a/src/brevitas/core/function_wrapper/shape.py#L78), and serves to flatten out the observed tensor into a 1D tensor and, in this case, use the `AbsPercentile` observer to calculate the quantization parameters during the gathering statistics stage of QAT."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "QuantReLU(\n",
+       "  (input_quant): ActQuantProxyFromInjector(\n",
+       "    (_zero_hw_sentinel): StatelessBuffer()\n",
+       "  )\n",
+       "  (act_quant): ActQuantProxyFromInjector(\n",
+       "    (_zero_hw_sentinel): StatelessBuffer()\n",
+       "    (fused_activation_quant_proxy): FusedActivationQuantProxy(\n",
+       "      (activation_impl): ReLU()\n",
+       "      (tensor_quant): RescalingIntQuant(\n",
+       "        (int_quant): IntQuant(\n",
+       "          (float_to_int_impl): RoundSte()\n",
+       "          (tensor_clamp_impl): TensorClamp()\n",
+       "          (delay_wrapper): DelayWrapper(\n",
+       "            (delay_impl): _NoDelay()\n",
+       "          )\n",
+       "          (input_view_impl): Identity()\n",
+       "        )\n",
+       "        (scaling_impl): ParameterFromRuntimeStatsScaling(\n",
+       "          (stats_input_view_shape_impl): OverTensorView()\n",
+       "          (stats): _Stats(\n",
+       "            (stats_impl): AbsPercentile()\n",
+       "          )\n",
+       "          (restrict_scaling): _RestrictValue(\n",
+       "            (restrict_value_impl): FloatRestrictValue()\n",
+       "          )\n",
+       "          (clamp_scaling): _ClampValue(\n",
+       "            (clamp_min_ste): ScalarClampMinSte()\n",
+       "          )\n",
+       "          (restrict_inplace_preprocess): Identity()\n",
+       "          (restrict_preprocess): Identity()\n",
+       "        )\n",
+       "        (int_scaling_impl): IntScaling()\n",
+       "        (zero_point_impl): ZeroZeroPoint(\n",
+       "          (zero_point): StatelessBuffer()\n",
+       "        )\n",
+       "        (msb_clamp_bit_width_impl): BitWidthConst(\n",
+       "          (bit_width): StatelessBuffer()\n",
+       "        )\n",
+       "      )\n",
+       "    )\n",
+       "  )\n",
+       ")"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "per_tensor_quant_relu"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next, we initialise a new `QuantRelU` instance, but this time we specify that we desire per-channel quantization i.e. `scaling_per_output_channel=True`. This will implictly call `scaling_stats_input_view_shape_impl`, defined [here](https://github.com/Xilinx/brevitas/blob/200456825f3b4b8db414f2b25b64311f82d3991a/src/brevitas/quant/solver/common.py#L184), and will change the `QuantReLU` from using a per-tensor view when gathering stats to a per output channel view ([`OverOutputChannelView`](https://github.com/Xilinx/brevitas/blob/200456825f3b4b8db414f2b25b64311f82d3991a/src/brevitas/core/function_wrapper/shape.py#L52)). This simply permutes the tensor into a 2D tensor, with dim 0 equal to the number of output channels.\n",
+    "\n",
+    "To accomplish this, we also need to give it some extra information: `scaling_stats_permute_dims` and `per_channel_broadcastable_shape`. `scaling_stats_permute_dims` is responsible for defining how we do the permutation. `per_channel_broadcastable_shape` is necessary to understand along which dimensions the scale factor has to be broadcasted, so that the scale factor values are applied along the channel dimensions of the input.\n",
+    "By default, PyTorch will broadcast along the first rightmost dimension for which the shapes of the two tensors match. To make sure that we apply the scale factor in our desired output channel dimension, we need to tell PyTorch how to correctly broadcast the scale factors. Therefore the scale factor will have as many dimensions as the input tensors, with all the shapes equal to 1 apart from the channel dimension.\n",
+    "\n",
+    "Below, we can see that in the per-channel ` QuantReLU` instance, the `stats_input_view_shape_impl` is now ` OverOutputChannelView`, and uses a `PermuteDims` [instance](https://github.com/Xilinx/brevitas/blob/200456825f3b4b8db414f2b25b64311f82d3991a/src/brevitas/core/function_wrapper/shape.py#L21) to do the permutation of the tensor to, in this case, a 2D tensor. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "QuantReLU(\n",
+       "  (input_quant): ActQuantProxyFromInjector(\n",
+       "    (_zero_hw_sentinel): StatelessBuffer()\n",
+       "  )\n",
+       "  (act_quant): ActQuantProxyFromInjector(\n",
+       "    (_zero_hw_sentinel): StatelessBuffer()\n",
+       "    (fused_activation_quant_proxy): FusedActivationQuantProxy(\n",
+       "      (activation_impl): ReLU()\n",
+       "      (tensor_quant): RescalingIntQuant(\n",
+       "        (int_quant): IntQuant(\n",
+       "          (float_to_int_impl): RoundSte()\n",
+       "          (tensor_clamp_impl): TensorClamp()\n",
+       "          (delay_wrapper): DelayWrapper(\n",
+       "            (delay_impl): _NoDelay()\n",
+       "          )\n",
+       "          (input_view_impl): Identity()\n",
+       "        )\n",
+       "        (scaling_impl): ParameterFromRuntimeStatsScaling(\n",
+       "          (stats_input_view_shape_impl): OverOutputChannelView(\n",
+       "            (permute_impl): PermuteDims()\n",
+       "          )\n",
+       "          (stats): _Stats(\n",
+       "            (stats_impl): AbsPercentile()\n",
+       "          )\n",
+       "          (restrict_scaling): _RestrictValue(\n",
+       "            (restrict_value_impl): FloatRestrictValue()\n",
+       "          )\n",
+       "          (clamp_scaling): _ClampValue(\n",
+       "            (clamp_min_ste): ScalarClampMinSte()\n",
+       "          )\n",
+       "          (restrict_inplace_preprocess): Identity()\n",
+       "          (restrict_preprocess): Identity()\n",
+       "        )\n",
+       "        (int_scaling_impl): IntScaling()\n",
+       "        (zero_point_impl): ZeroZeroPoint(\n",
+       "          (zero_point): StatelessBuffer()\n",
+       "        )\n",
+       "        (msb_clamp_bit_width_impl): BitWidthConst(\n",
+       "          (bit_width): StatelessBuffer()\n",
+       "        )\n",
+       "      )\n",
+       "    )\n",
+       "  )\n",
+       ")"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "per_chan_quant_relu = QuantReLU(return_quant_tensor=True,\n",
+    "                                scaling_per_output_channel=True,\n",
+    "                                per_channel_broadcastable_shape=(1, out_channels, 1 , 1),\n",
+    "                                scaling_stats_permute_dims=(1, 0, 2, 3),\n",
+    "                                )\n",
+    "per_chan_quant_relu"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can also observe the effect on the quantization parameters:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([[[[2.9999]],\n",
+       "\n",
+       "         [[1.0000]],\n",
+       "\n",
+       "         [[1.0000]]]], grad_fn=<MulBackward0>)"
+      ]
+     },
+     "execution_count": 34,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "out_channel = per_chan_quant_relu(inp3)\n",
+    "out_channel.scale * ((2**8) -1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can see that the number of elements in the quantization scale of the outputted tensor is now 3, matching those of the 3-channel tensor! Furthermore, we see that each channel has an 8-bit quantization range that matches its data distribution, which is much more ideal in terms of reducing quantization mismatch. However, it's important to note that some hardware providers don't efficiently support per-channel quantization in production, so it's best to check if your targetted hardware will allow per-channel quantization."
    ]
   },
   {
@@ -697,7 +913,7 @@
  "metadata": {
   "celltoolbar": "Tags",
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -711,9 +927,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.7"
+   "version": "3.10.13"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 1
+ "nbformat_minor": 4
 }
diff --git a/docs/tutorials/quant_recurrent.html b/docs/tutorials/quant_recurrent.html
index 7c6d89e27..40484c355 100644
--- a/docs/tutorials/quant_recurrent.html
+++ b/docs/tutorials/quant_recurrent.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>Quantized RNNs and LSTMs &#8212; Brevitas 0.10.2 documentation</title>
+    <title>Quantized RNNs and LSTMs &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -129,8 +129,8 @@
       
     
     
-    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
@@ -460,6 +460,11 @@ <h2>QuantRNN<a class="headerlink" href="#QuantRNN" title="Permalink to this head
 <span class="kn">import</span> <span class="nn">torch</span>
 <span class="n">torch</span><span class="o">.</span><span class="n">manual_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
 
+<span class="c1"># helpers</span>
+<span class="k">def</span> <span class="nf">assert_with_message</span><span class="p">(</span><span class="n">condition</span><span class="p">):</span>
+    <span class="k">assert</span> <span class="n">condition</span>
+    <span class="nb">print</span><span class="p">(</span><span class="n">condition</span><span class="p">)</span>
+
 <span class="k">def</span> <span class="nf">pretty_print_source</span><span class="p">(</span><span class="n">source</span><span class="p">):</span>
     <span class="n">display</span><span class="p">(</span><span class="n">Markdown</span><span class="p">(</span><span class="s1">&#39;```python</span><span class="se">\n</span><span class="s1">&#39;</span> <span class="o">+</span> <span class="n">source</span> <span class="o">+</span> <span class="s1">&#39;</span><span class="se">\n</span><span class="s1">```&#39;</span><span class="p">))</span>
 
@@ -478,15 +483,17 @@ <h2>QuantRNN<a class="headerlink" href="#QuantRNN" title="Permalink to this head
         <span class="n">hidden_size</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
         <span class="n">num_layers</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span>
         <span class="n">nonlinearity</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">&#39;tanh&#39;</span><span class="p">,</span>
-        <span class="n">bias</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
+        <span class="n">bias</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
         <span class="n">batch_first</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
         <span class="n">bidirectional</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
-        <span class="n">weight_quant</span> <span class="o">=</span> <span class="n">Int8WeightPerTensorFloat</span><span class="p">,</span>
-        <span class="n">bias_quant</span> <span class="o">=</span> <span class="n">Int32Bias</span><span class="p">,</span>
-        <span class="n">io_quant</span> <span class="o">=</span> <span class="n">Int8ActPerTensorFloat</span><span class="p">,</span>
-        <span class="n">gate_acc_quant</span> <span class="o">=</span> <span class="n">Int8ActPerTensorFloat</span><span class="p">,</span>
-        <span class="n">shared_input_hidden_weights</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
+        <span class="n">weight_quant</span><span class="o">=</span><span class="n">Int8WeightPerTensorFloat</span><span class="p">,</span>
+        <span class="n">bias_quant</span><span class="o">=</span><span class="n">Int32Bias</span><span class="p">,</span>
+        <span class="n">io_quant</span><span class="o">=</span><span class="n">Int8ActPerTensorFloat</span><span class="p">,</span>
+        <span class="n">gate_acc_quant</span><span class="o">=</span><span class="n">Int8ActPerTensorFloat</span><span class="p">,</span>
+        <span class="n">shared_input_hidden_weights</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
         <span class="n">return_quant_tensor</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
+        <span class="n">dtype</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">dtype</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+        <span class="n">device</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">device</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
         <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
     <span class="nb">super</span><span class="p">(</span><span class="n">QuantRNN</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span>
         <span class="n">layer_impl</span><span class="o">=</span><span class="n">_QuantRNNLayer</span><span class="p">,</span>
@@ -503,6 +510,8 @@ <h2>QuantRNN<a class="headerlink" href="#QuantRNN" title="Permalink to this head
         <span class="n">gate_acc_quant</span><span class="o">=</span><span class="n">gate_acc_quant</span><span class="p">,</span>
         <span class="n">shared_input_hidden_weights</span><span class="o">=</span><span class="n">shared_input_hidden_weights</span><span class="p">,</span>
         <span class="n">return_quant_tensor</span><span class="o">=</span><span class="n">return_quant_tensor</span><span class="p">,</span>
+        <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">,</span>
+        <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">,</span>
         <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
 </pre></div>
 </div>
@@ -531,7 +540,7 @@ <h2>QuantRNN<a class="headerlink" href="#QuantRNN" title="Permalink to this head
 </div>
 <div class="output_area stderr docutils container">
 <div class="highlight"><pre>
-c:\users\alessand\documents\brevitas\src\brevitas\nn\mixin\base.py:112: UserWarning: Keyword arguments are being passed but they not being used.
+/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/mixin/base.py:55: UserWarning: Keyword arguments are being passed but they not being used.
   warn(&#39;Keyword arguments are being passed but they not being used.&#39;)
 </pre></div></div>
 </div>
@@ -540,51 +549,48 @@ <h2>QuantRNN<a class="headerlink" href="#QuantRNN" title="Permalink to this head
 <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[3]:
 </pre></div>
 </div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">quant_rnn_0_left_to_right</span><span class="o">.</span><span class="n">gate_params</span><span class="o">.</span><span class="n">input_weight</span><span class="o">.</span><span class="n">weight_quant</span> <span class="ow">is</span> <span class="n">quant_rnn_1_right_to_left</span><span class="o">.</span><span class="n">gate_params</span><span class="o">.</span><span class="n">input_weight</span><span class="o">.</span><span class="n">weight_quant</span>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">assert_with_message</span><span class="p">(</span><span class="ow">not</span> <span class="n">quant_rnn_0_left_to_right</span><span class="o">.</span><span class="n">gate_params</span><span class="o">.</span><span class="n">input_weight</span><span class="o">.</span><span class="n">weight_quant</span> <span class="ow">is</span> <span class="n">quant_rnn_1_right_to_left</span><span class="o">.</span><span class="n">gate_params</span><span class="o">.</span><span class="n">input_weight</span><span class="o">.</span><span class="n">weight_quant</span><span class="p">)</span>
 </pre></div>
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[3]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-False
+True
 </pre></div></div>
 </div>
 <div class="nbinput docutils container">
 <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[4]:
 </pre></div>
 </div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">quant_rnn_0_left_to_right</span><span class="o">.</span><span class="n">cell</span><span class="o">.</span><span class="n">gate_acc_quant</span> <span class="ow">is</span> <span class="n">quant_rnn_1_right_to_left</span><span class="o">.</span><span class="n">cell</span><span class="o">.</span><span class="n">gate_acc_quant</span>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">assert_with_message</span><span class="p">(</span><span class="ow">not</span> <span class="n">quant_rnn_0_left_to_right</span><span class="o">.</span><span class="n">cell</span><span class="o">.</span><span class="n">gate_acc_quant</span> <span class="ow">is</span> <span class="n">quant_rnn_1_right_to_left</span><span class="o">.</span><span class="n">cell</span><span class="o">.</span><span class="n">gate_acc_quant</span><span class="p">)</span>
 </pre></div>
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[4]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-False
+True
 </pre></div></div>
 </div>
 <div class="nbinput docutils container">
 <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[5]:
 </pre></div>
 </div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">quant_rnn_0_left_to_right</span><span class="o">.</span><span class="n">gate_params</span><span class="o">.</span><span class="n">bias_quant</span> <span class="ow">is</span> <span class="n">quant_rnn_1_right_to_left</span><span class="o">.</span><span class="n">gate_params</span><span class="o">.</span><span class="n">bias_quant</span>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">assert_with_message</span><span class="p">(</span><span class="ow">not</span> <span class="n">quant_rnn_0_left_to_right</span><span class="o">.</span><span class="n">gate_params</span><span class="o">.</span><span class="n">bias_quant</span> <span class="ow">is</span> <span class="n">quant_rnn_1_right_to_left</span><span class="o">.</span><span class="n">gate_params</span><span class="o">.</span><span class="n">bias_quant</span><span class="p">)</span>
 </pre></div>
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[5]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-False
+True
 </pre></div></div>
 </div>
 <p>Conversely, for <code class="docutils literal notranslate"><span class="pre">io_quant</span></code> the same <em>instance</em> is gonna be shared among all layers and directions. This is to make sure that input/output tensors that are internally concatenated together share the same quantization scale/zero-point/bitwidth.</p>
@@ -592,13 +598,12 @@ <h2>QuantRNN<a class="headerlink" href="#QuantRNN" title="Permalink to this head
 <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[6]:
 </pre></div>
 </div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">quant_rnn_0_left_to_right</span><span class="o">.</span><span class="n">io_quant</span> <span class="ow">is</span> <span class="n">quant_rnn_1_right_to_left</span><span class="o">.</span><span class="n">io_quant</span>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">assert_with_message</span><span class="p">(</span><span class="n">quant_rnn_0_left_to_right</span><span class="o">.</span><span class="n">io_quant</span> <span class="ow">is</span> <span class="n">quant_rnn_1_right_to_left</span><span class="o">.</span><span class="n">io_quant</span><span class="p">)</span>
 </pre></div>
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[6]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
@@ -662,46 +667,46 @@ <h2>QuantRNN<a class="headerlink" href="#QuantRNN" title="Permalink to this head
 Input-hidden weight bit-width: 4.0
 Hidden-hidden weight bit-width: 4.0
 I/O quant bit-width: 6.0
-Input-hidden weight scale: tensor([[0.0316],
-        [0.0317],
-        [0.0319],
-        [0.0318],
-        [0.0314],
+Input-hidden weight scale: tensor([[0.0297],
+        [0.0311],
         [0.0298],
+        [0.0295],
+        [0.0316],
+        [0.0311],
+        [0.0318],
+        [0.0309],
         [0.0317],
-        [0.0285],
-        [0.0306],
-        [0.0312],
+        [0.0309],
+        [0.0316],
+        [0.0319],
+        [0.0319],
         [0.0318],
         [0.0315],
-        [0.0298],
-        [0.0314],
-        [0.0293],
-        [0.0310],
-        [0.0306],
         [0.0310],
-        [0.0309],
-        [0.0317]], grad_fn=&lt;DivBackward0&gt;)
-Hidden-hidden weight scale: tensor([[0.0316],
-        [0.0317],
+        [0.0319],
         [0.0319],
         [0.0318],
-        [0.0314],
+        [0.0312]], grad_fn=&lt;DivBackward0&gt;)
+Hidden-hidden weight scale: tensor([[0.0297],
+        [0.0311],
         [0.0298],
+        [0.0295],
+        [0.0316],
+        [0.0311],
+        [0.0318],
+        [0.0309],
         [0.0317],
-        [0.0285],
-        [0.0306],
-        [0.0312],
+        [0.0309],
+        [0.0316],
+        [0.0319],
+        [0.0319],
         [0.0318],
         [0.0315],
-        [0.0298],
-        [0.0314],
-        [0.0293],
-        [0.0310],
-        [0.0306],
         [0.0310],
-        [0.0309],
-        [0.0317]], grad_fn=&lt;DivBackward0&gt;)
+        [0.0319],
+        [0.0319],
+        [0.0318],
+        [0.0312]], grad_fn=&lt;DivBackward0&gt;)
 </pre></div></div>
 </div>
 <p><code class="docutils literal notranslate"><span class="pre">QuantRNN</span></code> follows the same <code class="docutils literal notranslate"><span class="pre">forward</span></code> interface of <code class="docutils literal notranslate"><span class="pre">torch.nn.RNN</span></code>, with a couple of exceptions. Packed variable length inputs are currently not supported, and unbatched inputs are not supported. Other than that, everything else is the same.</p>
@@ -748,7 +753,7 @@ <h2>QuantRNN<a class="headerlink" href="#QuantRNN" title="Permalink to this head
 </div>
 <div class="output_area stderr docutils container">
 <div class="highlight"><pre>
-c:\users\alessand\documents\brevitas\src\brevitas\nn\mixin\base.py:343: UserWarning: Defining your `__torch_function__` as a plain method is deprecated and will be an error in future, please define it as a classmethod. (Triggered internally at C:\cb\pytorch_1000000000000\work\torch\csrc\utils\python_arg_parser.cpp:354.)
+/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/mixin/base.py:216: UserWarning: Defining your `__torch_function__` as a plain method is deprecated and will be an error in future, please define it as a classmethod. (Triggered internally at /opt/conda/conda-bld/pytorch_1670525541990/work/torch/csrc/utils/python_arg_parser.cpp:350.)
   return torch.cat(outputs, dim=seq_dim)
 </pre></div></div>
 </div>
@@ -758,45 +763,45 @@ <h2>QuantRNN<a class="headerlink" href="#QuantRNN" title="Permalink to this head
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-(QuantTensor(value=tensor([[[-0.4458, -0.1651, -0.7045, -0.5889, -0.2532, -0.0330, -0.1651,
-            0.1706,  0.1376,  0.4348,  0.5834, -0.3577, -0.2807,  0.1046,
-            0.2532,  0.2807,  0.2532, -0.4293,  0.1376, -0.1486],
-          [-0.1569,  0.3530, -0.6995, -0.0458, -0.5295, -0.3007, -0.7257,
-            0.2877, -0.1308,  0.6603,  0.0196, -0.8237,  0.0065, -0.4380,
-           -0.2615,  0.3138, -0.0850,  0.0065,  0.0458, -0.1961],
-          [ 0.1929, -0.5981, -0.2508, -0.2251, -0.5917,  0.2251,  0.0257,
-            0.2508, -0.3023,  0.2830,  0.3344, -0.4309, -0.0836,  0.2701,
-            0.3666, -0.1351,  0.1736, -0.0257,  0.1286, -0.6174],
-          [ 0.4682, -0.1804,  0.2780,  0.4974,  0.4389, -0.0585, -0.6242,
-           -0.0098,  0.2341,  0.3511, -0.2926, -0.4925,  0.1414, -0.4633,
-           -0.0683,  0.2633,  0.3804,  0.3024,  0.1951,  0.1707],
-          [-0.0852,  0.0965, -0.4656, -0.3180, -0.3464, -0.2782, -0.1931,
-           -0.6360, -0.3180, -0.3293,  0.7211,  0.4316,  0.4145, -0.3066,
-           -0.5224, -0.3066, -0.5849, -0.7211,  0.3293,  0.1420]],
-
-         [[ 0.5669,  0.2367, -0.3027, -0.3137, -0.3632, -0.1651, -0.5999,
-            0.2036,  0.4293,  0.2201, -0.2862, -0.3908, -0.2091, -0.2532,
-           -0.2532, -0.5834, -0.2697,  0.0055,  0.2532,  0.1761],
-          [ 0.1242,  0.4184, -0.6472, -0.0196, -0.4707, -0.5034, -0.8368,
-            0.3530,  0.1504,  0.0458, -0.0654, -0.7714, -0.1961, -0.4903,
-           -0.6015, -0.3596, -0.2484, -0.4380, -0.0458,  0.2942],
-          [ 0.3409,  0.8168, -0.7396,  0.2958,  0.2508, -0.1286, -0.1286,
-            0.7782, -0.1994,  0.7846, -0.3087, -0.3666,  0.1029,  0.1479,
-           -0.3216, -0.1479, -0.2315,  0.4566,  0.5209, -0.3344],
-          [-0.0878,  0.0390, -0.1707, -0.1365, -0.2243, -0.2390, -0.3706,
-            0.1609, -0.5511, -0.4096,  0.5121, -0.5901,  0.2633, -0.3609,
-           -0.5511,  0.3755, -0.4925, -0.0293, -0.0780, -0.2829],
-          [ 0.0965, -0.1987,  0.0057,  0.1306,  0.3861,  0.2839, -0.3861,
-            0.5962, -0.1987,  0.3180, -0.1647, -0.3066, -0.0227,  0.4372,
-            0.0852,  0.3748,  0.0852, -0.0057, -0.1703, -0.0738]]],
-        grad_fn=&lt;CatBackward0&gt;), scale=tensor(0.0058, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True)),
- QuantTensor(value=tensor([[[-0.0852,  0.0965, -0.4656, -0.3180, -0.3464, -0.2782, -0.1931,
-           -0.6360, -0.3180, -0.3293,  0.7211,  0.4316,  0.4145, -0.3066,
-           -0.5224, -0.3066, -0.5849, -0.7211,  0.3293,  0.1420],
-          [ 0.0965, -0.1987,  0.0057,  0.1306,  0.3861,  0.2839, -0.3861,
-            0.5962, -0.1987,  0.3180, -0.1647, -0.3066, -0.0227,  0.4372,
-            0.0852,  0.3748,  0.0852, -0.0057, -0.1703, -0.0738]]],
-        grad_fn=&lt;UnsqueezeBackward0&gt;), scale=tensor(0.0057, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True)))
+(IntQuantTensor(value=tensor([[[-0.0062, -0.2872,  0.7931,  0.4309,  0.5495, -0.4558,  0.2373,
+            0.6807,  0.4621,  0.6120, -0.1124,  0.3872,  0.3060,  0.7681,
+           -0.3684,  0.0437, -0.7369, -0.3247,  0.7743,  0.3372],
+          [ 0.5450,  0.2962, -0.3969,  0.3555, -0.5628,  0.2429, -0.4976,
+            0.1777, -0.1244,  0.0296, -0.2607,  0.0948,  0.5036, -0.3673,
+            0.5213, -0.2962,  0.7524,  0.0770, -0.0948, -0.0948],
+          [ 0.2691, -0.6624, -0.5434,  0.4968, -0.6624,  0.0983,  0.1345,
+            0.1242, -0.0517, -0.3726,  0.3053,  0.1604,  0.3208,  0.0983,
+            0.3105,  0.4243,  0.2794,  0.1604,  0.1035, -0.0724],
+          [ 0.1284, -0.3337, -0.5263, -0.0449, -0.5263,  0.3081, -0.1733,
+            0.5648,  0.4942, -0.1412,  0.1733,  0.3337,  0.6225,  0.3401,
+            0.5070, -0.1412,  0.0642, -0.3722,  0.2888,  0.1155],
+          [ 0.0579, -0.0058, -0.4054, -0.1564, -0.5560, -0.3301,  0.3533,
+            0.0058, -0.1622, -0.3765,  0.1216,  0.0695, -0.4054,  0.0927,
+            0.6139, -0.1390,  0.7066,  0.1274,  0.1622, -0.2896]],
+
+         [[ 0.1374,  0.5745,  0.0624, -0.2373,  0.3060,  0.3310, -0.5183,
+            0.1186,  0.1124,  0.2997,  0.0375,  0.6369, -0.5308,  0.6307,
+           -0.5683,  0.7556,  0.2997, -0.4933,  0.3934, -0.4871],
+          [ 0.1066, -0.1244, -0.1718,  0.4266,  0.5569,  0.0178,  0.1185,
+           -0.3910,  0.2133,  0.0178, -0.1066, -0.2903,  0.1837, -0.2547,
+           -0.2903,  0.0770,  0.3495,  0.2547,  0.2311, -0.6161],
+          [-0.0880, -0.1966,  0.3001, -0.0569,  0.4140, -0.1552, -0.1345,
+            0.4554,  0.5175,  0.1242, -0.2898,  0.1966, -0.0414,  0.3985,
+           -0.1708, -0.0621, -0.1708,  0.0828,  0.2225,  0.0517],
+          [ 0.2118,  0.5648, -0.2824, -0.0449,  0.5840,  0.3209, -0.5648,
+            0.3530,  0.4043, -0.4942, -0.3786,  0.0257,  0.5327, -0.1990,
+           -0.1348, -0.8215,  0.3016,  0.5327,  0.5648, -0.1155],
+          [-0.0290, -0.1738,  0.0695,  0.3765,  0.1738,  0.0579, -0.4054,
+           -0.2664,  0.4923,  0.2143, -0.4170,  0.4112,  0.5502,  0.7066,
+           -0.6024,  0.7356,  0.0348,  0.1043, -0.1911, -0.4518]]],
+        grad_fn=&lt;CatBackward0&gt;), scale=tensor(0.0059, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True)),
+ IntQuantTensor(value=tensor([[[ 0.0579, -0.0058, -0.4054, -0.1564, -0.5560, -0.3301,  0.3533,
+            0.0058, -0.1622, -0.3765,  0.1216,  0.0695, -0.4054,  0.0927,
+            0.6139, -0.1390,  0.7066,  0.1274,  0.1622, -0.2896],
+          [-0.0290, -0.1738,  0.0695,  0.3765,  0.1738,  0.0579, -0.4054,
+           -0.2664,  0.4923,  0.2143, -0.4170,  0.4112,  0.5502,  0.7066,
+           -0.6024,  0.7356,  0.0348,  0.1043, -0.1911, -0.4518]]],
+        grad_fn=&lt;UnsqueezeBackward0&gt;), scale=tensor(0.0058, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True)))
 </pre></div></div>
 </div>
 <p>Similarly, a <code class="docutils literal notranslate"><span class="pre">QuantTensor</span></code> can be passed in as input. However, whenever <code class="docutils literal notranslate"><span class="pre">io_quant</span></code> is set (which it is by default), the input is gonna be re-quantized:</p>
@@ -811,51 +816,60 @@ <h2>QuantRNN<a class="headerlink" href="#QuantRNN" title="Permalink to this head
 </pre></div>
 </div>
 </div>
+<div class="nboutput docutils container">
+<div class="prompt empty docutils container">
+</div>
+<div class="output_area stderr docutils container">
+<div class="highlight"><pre>
+/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/torch/_tensor.py:1255: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at /opt/conda/conda-bld/pytorch_1670525541990/work/c10/core/TensorImpl.h:1758.)
+  return super(Tensor, self).rename(names)
+</pre></div></div>
+</div>
 <div class="nboutput nblast docutils container">
 <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[11]:
 </pre></div>
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-(QuantTensor(value=tensor([[[ 0.1760,  0.2670, -0.1214, -0.3702,  0.3884,  0.4127,  0.0243,
-            0.0425, -0.2246, -0.0910, -0.2670,  0.4734,  0.0971, -0.3824,
-            0.1396,  0.6858,  0.0061,  0.3702,  0.1275,  0.5037],
-          [ 0.2831,  0.0566, -0.2831, -0.2661, -0.0793,  0.3511, -0.4926,
-            0.0510, -0.6455,  0.7191, -0.1812, -0.6172,  0.1529,  0.4077,
-           -0.7078, -0.0453, -0.0963,  0.4926, -0.4983, -0.4077],
-          [ 0.0000, -0.3977,  0.0947,  0.1894, -0.3725, -0.2589, -0.3914,
-            0.3409, -0.0063,  0.2652, -0.5177, -0.4230, -0.0821, -0.0631,
-            0.0505, -0.0189,  0.0253, -0.1578, -0.4988,  0.5556],
-          [ 0.4809,  0.8144, -0.6925,  0.4360,  0.0256, -0.4360, -0.5130,
-            0.2501, -0.1347,  0.7631, -0.5386, -0.2437,  0.4296, -0.1988,
-           -0.7246, -0.1154, -0.2437,  0.3655,  0.0641,  0.3142],
-          [ 0.0706, -0.0192, -0.7185, -0.8211, -0.5709,  0.1155,  0.4683,
-            0.3400, -0.3015,  0.3528,  0.3143, -0.1155, -0.3143, -0.0257,
-            0.1411, -0.2309,  0.5132,  0.3721,  0.5196, -0.5453]],
-
-         [[ 0.4066, -0.7768,  0.6008,  0.0546,  0.0182,  0.1821,  0.0971,
-           -0.3763,  0.3520, -0.5037, -0.0061,  0.2246, -0.0486,  0.2124,
-            0.3641, -0.6433,  0.4248,  0.0789,  0.1275, -0.1214],
-          [ 0.2321,  0.1982, -0.1302,  0.1529, -0.0736, -0.3567, -0.4360,
-           -0.0283,  0.4869,  0.5379, -0.6964, -0.0340, -0.2944, -0.1529,
-           -0.2152, -0.4643,  0.3454,  0.3284, -0.3341,  0.5945],
-          [-0.2020,  0.0379, -0.8081, -0.7260, -0.0821,  0.0631,  0.4988,
-            0.0694,  0.0253,  0.5430,  0.8018,  0.2273, -0.3472, -0.0505,
-            0.4924, -0.4735,  0.5745, -0.5619,  0.6313, -0.1768],
-          [ 0.2501, -0.4360,  0.6541,  0.0385,  0.5835, -0.3078, -0.0449,
-            0.3270,  0.7951, -0.3591, -0.4809, -0.2757, -0.3591, -0.7567,
-            0.5194,  0.2757,  0.7438,  0.7695,  0.5451,  0.4296],
-          [ 0.2630, -0.4747,  0.1347, -0.0641, -0.2245, -0.3336, -0.4490,
-           -0.4619, -0.1796, -0.5517,  0.3913,  0.0257, -0.2053, -0.2823,
-           -0.6992, -0.6607,  0.1989, -0.6928, -0.5581,  0.5966]]],
+(IntQuantTensor(value=tensor([[[ 0.2111,  0.1267,  0.0060,  0.6153, -0.7721, -0.3740, -0.5188,
+            0.6273,  0.4162,  0.2051,  0.2292,  0.7239,  0.6032,  0.2533,
+            0.5067,  0.6635,  0.1206, -0.5730,  0.0483,  0.3318],
+          [ 0.5742,  0.0194, -0.3807, -0.0710, -0.6000,  0.1807,  0.1355,
+            0.4129,  0.3807,  0.3936, -0.0903,  0.1549,  0.1032,  0.0645,
+            0.4775, -0.0645,  0.1161, -0.0065,  0.0194, -0.1097],
+          [ 0.0453, -0.4533,  0.1036, -0.0194, -0.2979,  0.3432,  0.0777,
+            0.6346, -0.0842,  0.3302,  0.4727,  0.4856, -0.4144,  0.7382,
+           -0.0453,  0.5439,  0.2266, -0.4792,  0.4403, -0.1036],
+          [ 0.3198,  0.2741, -0.6395,  0.0971, -0.6052, -0.5196,  0.1770,
+           -0.5025, -0.1256,  0.2056,  0.2684, -0.6395, -0.0285, -0.7309,
+            0.7194, -0.7194,  0.1542, -0.3426, -0.6509,  0.0343],
+          [ 0.0000, -0.4004,  0.3151, -0.0263, -0.5842, -0.1641, -0.3939,
+            0.0263, -0.2429,  0.6499, -0.5186,  0.1247, -0.2101,  0.8337,
+           -0.1444,  0.6762, -0.1641, -0.5317, -0.1707, -0.0197]],
+
+         [[ 0.2111, -0.2111, -0.3197, -0.0241, -0.5067, -0.0241, -0.2895,
+            0.1749, -0.4283,  0.0000, -0.3680,  0.5308, -0.1267,  0.5248,
+            0.1206,  0.2654,  0.6394, -0.1327,  0.2292, -0.3800],
+          [ 0.6775, -0.3355, -0.1807,  0.2774, -0.8259, -0.2000, -0.0065,
+            0.5678,  0.4000,  0.2258,  0.4387,  0.2710,  0.5355,  0.1290,
+            0.6710, -0.0645, -0.2710, -0.3613,  0.6388,  0.5226],
+          [-0.0065, -0.0777, -0.6475, -0.1684, -0.3820,  0.3885,  0.0065,
+            0.1943, -0.3238, -0.2525, -0.1230, -0.0453, -0.0777,  0.3432,
+            0.4921, -0.1101,  0.8224,  0.2396,  0.1554, -0.3885],
+          [-0.0514, -0.4111, -0.4625, -0.1713, -0.3369,  0.2512, -0.2969,
+           -0.4111, -0.2341,  0.3597, -0.1998,  0.0000,  0.2741,  0.7137,
+           -0.1256,  0.1370, -0.0742, -0.5938, -0.5424, -0.4168],
+          [ 0.3479,  0.5974, -0.3939,  0.1444, -0.6762,  0.1969, -0.6499,
+            0.4136,  0.5383, -0.3085,  0.4070,  0.4070,  0.6630, -0.0263,
+            0.2823, -0.1510,  0.1313, -0.5186,  0.4464, -0.0066]]],
         grad_fn=&lt;CatBackward0&gt;), scale=tensor(0.0062, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True)),
- QuantTensor(value=tensor([[[ 0.0706, -0.0192, -0.7185, -0.8211, -0.5709,  0.1155,  0.4683,
-            0.3400, -0.3015,  0.3528,  0.3143, -0.1155, -0.3143, -0.0257,
-            0.1411, -0.2309,  0.5132,  0.3721,  0.5196, -0.5453],
-          [ 0.2630, -0.4747,  0.1347, -0.0641, -0.2245, -0.3336, -0.4490,
-           -0.4619, -0.1796, -0.5517,  0.3913,  0.0257, -0.2053, -0.2823,
-           -0.6992, -0.6607,  0.1989, -0.6928, -0.5581,  0.5966]]],
-        grad_fn=&lt;UnsqueezeBackward0&gt;), scale=tensor(0.0064, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True)))
+ IntQuantTensor(value=tensor([[[ 0.0000, -0.4004,  0.3151, -0.0263, -0.5842, -0.1641, -0.3939,
+            0.0263, -0.2429,  0.6499, -0.5186,  0.1247, -0.2101,  0.8337,
+           -0.1444,  0.6762, -0.1641, -0.5317, -0.1707, -0.0197],
+          [ 0.3479,  0.5974, -0.3939,  0.1444, -0.6762,  0.1969, -0.6499,
+            0.4136,  0.5383, -0.3085,  0.4070,  0.4070,  0.6630, -0.0263,
+            0.2823, -0.1510,  0.1313, -0.5186,  0.4464, -0.0066]]],
+        grad_fn=&lt;UnsqueezeBackward0&gt;), scale=tensor(0.0066, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True)))
 </pre></div></div>
 </div>
 <p>As with <code class="docutils literal notranslate"><span class="pre">torch.nn.RNN</span></code>, by default the initial hidden state is initialized to 0, but a custom hidden state of shape <code class="docutils literal notranslate"><span class="pre">(num_directions</span> <span class="pre">*</span> <span class="pre">num_layers,</span> <span class="pre">batch,</span> <span class="pre">hidden_size)</span></code> can be passed in:</p>
@@ -873,45 +887,45 @@ <h2>QuantRNN<a class="headerlink" href="#QuantRNN" title="Permalink to this head
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-(QuantTensor(value=tensor([[[-0.1984,  0.2499, -0.1102,  0.2499, -0.0955, -0.4630, -0.8672,
-            0.1911, -0.4851,  0.8085,  0.6982, -0.5806,  0.0000, -0.4189,
-           -0.7423, -0.4851, -0.9260, -0.0147,  0.0514, -0.1984],
-          [-0.2167,  0.5092, -0.3846,  0.0650,  0.6717, -0.2492, -0.0867,
-            0.3142, -0.3900,  0.3521,  0.4767, -0.1137,  0.6879,  0.1733,
-           -0.0596,  0.4279, -0.5471, -0.2762,  0.5904, -0.3737],
-          [-0.1335, -0.0140, -0.2810, -0.5339, -0.5339,  0.0562,  0.7236,
-           -0.1264, -0.0211, -0.3021, -0.1124,  0.4777,  0.3793,  0.2388,
-           -0.0702,  0.4847, -0.4988,  0.7236,  0.5901, -0.4847],
-          [ 0.3340, -0.5225, -0.1242,  0.1499,  0.3083, -0.1756, -0.1713,
-            0.0000,  0.3512, -0.3041,  0.3126, -0.5482,  0.4882,  0.1028,
-           -0.4796,  0.1028, -0.2527, -0.3640,  0.1713,  0.0471],
-          [-0.4438, -0.2686, -0.3095, -0.2978, -0.0993,  0.0584,  0.4846,
-           -0.0526,  0.3737, -0.4496,  0.1109,  0.7416, -0.0526,  0.3445,
-            0.4963,  0.2803,  0.1927,  0.0000,  0.6131,  0.1109]],
-
-         [[ 0.1102, -0.8085,  0.5806, -0.0661,  0.3013,  0.2646,  0.2499,
-           -0.6321,  0.4557,  0.4777,  0.6321,  0.0294, -0.2646, -0.9407,
-            0.7350, -0.6027,  0.6174, -0.4116,  0.6835,  0.0514],
-          [ 0.1787,  0.0271,  0.1354, -0.3033,  0.6229, -0.3250, -0.3846,
-            0.0812,  0.5633,  0.6879, -0.0325, -0.2383, -0.3521, -0.5850,
-            0.3033, -0.3900,  0.6771,  0.3196,  0.5633,  0.2383],
-          [-0.1264,  0.5901, -0.3934,  0.3231,  0.0492, -0.5128, -0.8149,
-            0.1124, -0.7517,  0.8711,  0.4004, -0.8992,  0.0702, -0.2178,
-           -0.8851, -0.5760, -0.1054, -0.0702, -0.3512, -0.5198],
-          [ 0.2612,  0.2570,  0.1542, -0.1071, -0.0300,  0.0257, -0.3854,
-           -0.0685, -0.2570,  0.0728, -0.4240, -0.3083,  0.1627, -0.3383,
-           -0.0428,  0.0300, -0.1199,  0.3683,  0.3298, -0.3340],
-          [ 0.4204, -0.2452, -0.0934,  0.2336,  0.1285, -0.1285,  0.2044,
-           -0.0701,  0.0058,  0.3971,  0.0175, -0.3270,  0.2803,  0.1810,
-           -0.4963, -0.5547,  0.0467,  0.0175,  0.1927, -0.2452]]],
-        grad_fn=&lt;CatBackward0&gt;), scale=tensor(0.0060, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True)),
- QuantTensor(value=tensor([[[-0.4438, -0.2686, -0.3095, -0.2978, -0.0993,  0.0584,  0.4846,
-           -0.0526,  0.3737, -0.4496,  0.1109,  0.7416, -0.0526,  0.3445,
-            0.4963,  0.2803,  0.1927,  0.0000,  0.6131,  0.1109],
-          [ 0.4204, -0.2452, -0.0934,  0.2336,  0.1285, -0.1285,  0.2044,
-           -0.0701,  0.0058,  0.3971,  0.0175, -0.3270,  0.2803,  0.1810,
-           -0.4963, -0.5547,  0.0467,  0.0175,  0.1927, -0.2452]]],
-        grad_fn=&lt;UnsqueezeBackward0&gt;), scale=tensor(0.0058, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True)))
+(IntQuantTensor(value=tensor([[[-0.3777, -0.2074,  0.7184,  0.9110,  0.0148, -0.1926, -0.7110,
+            0.1926, -0.4222, -0.9480,  0.2592,  0.2222, -0.2370, -0.5407,
+            0.5851, -0.2370,  0.3555,  0.1703,  0.4444, -0.2222],
+          [ 0.4814, -0.7355, -0.1605,  0.3878, -0.5282,  0.2073,  0.0000,
+            0.3677,  0.1805, -0.1204, -0.4614,  0.2474,  0.7021,  0.0401,
+            0.4346,  0.4480, -0.3143,  0.0401,  0.6887,  0.6753],
+          [ 0.5038, -0.3650, -0.6936,  0.0146, -0.9345,  0.0000,  0.1679,
+           -0.3066,  0.1825,  0.4089,  0.0949, -0.2555,  0.3870, -0.2482,
+            0.5914, -0.0803,  0.1314, -0.4235, -0.3797,  0.1168],
+          [ 0.1795,  0.1795,  0.0449,  0.0449,  0.2308,  0.0898, -0.1282,
+            0.5579,  0.1731, -0.1795,  0.1603,  0.3142,  0.1090,  0.5835,
+           -0.1475,  0.0449,  0.1795, -0.0256,  0.8143, -0.2437],
+          [-0.0066,  0.4804,  0.0066, -0.1184,  0.6843, -0.0197,  0.1448,
+            0.1842,  0.6383, -0.1908, -0.0066, -0.1053, -0.1316,  0.0461,
+           -0.0066, -0.2764,  0.3751,  0.3619,  0.5001, -0.1316]],
+
+         [[ 0.5110, -0.3555,  0.6443, -0.8221,  0.4888, -0.2074,  0.0444,
+            0.4888,  0.5999,  0.4370,  0.0000,  0.5036, -0.7628,  0.9332,
+           -0.6147,  0.7332,  0.3629,  0.9184,  0.7702, -0.8887],
+          [ 0.8492, -0.3410, -0.3878,  0.1404, -0.3410,  0.3143, -0.1204,
+            0.5817,  0.4413,  0.5550,  0.6486, -0.1070,  0.6285, -0.4948,
+            0.2006,  0.1605,  0.0535, -0.4079,  0.3811,  0.4948],
+          [ 0.6060,  0.7666, -0.8688, -0.6863, -0.5111, -0.0803, -0.6425,
+           -0.0146, -0.3577,  0.3431, -0.6571,  0.5622,  0.0000,  0.7374,
+           -0.1314, -0.3650,  0.7520,  0.2336, -0.2847, -0.8250],
+          [ 0.3014,  0.2950, -0.0898, -0.3142,  0.4040,  0.4681, -0.0705,
+           -0.2052,  0.8143, -0.1603,  0.3334, -0.6733,  0.0834,  0.0898,
+           -0.4937,  0.1924,  0.0064,  0.4104,  0.6348, -0.3527],
+          [-0.6449,  0.5856, -0.0263, -0.0197,  0.8357, -0.5856,  0.0395,
+           -0.3422,  0.8028,  0.0855, -0.7238, -0.6317,  0.2764, -0.0461,
+           -0.4211, -0.5988,  0.2632,  0.4014, -0.7501, -0.5659]]],
+        grad_fn=&lt;CatBackward0&gt;), scale=tensor(0.0069, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True)),
+ IntQuantTensor(value=tensor([[[-0.0066,  0.4804,  0.0066, -0.1184,  0.6843, -0.0197,  0.1448,
+            0.1842,  0.6383, -0.1908, -0.0066, -0.1053, -0.1316,  0.0461,
+           -0.0066, -0.2764,  0.3751,  0.3619,  0.5001, -0.1316],
+          [-0.6449,  0.5856, -0.0263, -0.0197,  0.8357, -0.5856,  0.0395,
+           -0.3422,  0.8028,  0.0855, -0.7238, -0.6317,  0.2764, -0.0461,
+           -0.4211, -0.5988,  0.2632,  0.4014, -0.7501, -0.5659]]],
+        grad_fn=&lt;UnsqueezeBackward0&gt;), scale=tensor(0.0066, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True)))
 </pre></div></div>
 </div>
 <p>As with other Brevitas layers, <code class="docutils literal notranslate"><span class="pre">QuantRNN</span></code> can be initialized from a pretrained floating-point <code class="docutils literal notranslate"><span class="pre">torch.nn.RNN</span></code>. For the purpose of this tutorial, can simulate it from an untrained <code class="docutils literal notranslate"><span class="pre">torch.nn.RNN</span></code>. As for other quantized layers, setting <code class="docutils literal notranslate"><span class="pre">brevitas.config.IGNORE_MISSING_KEYS</span></code> might be necessary (depending on which quantizers are set). With the default quantizers, an error on activation scale keys would be triggered, so we set it to true:</p>
@@ -941,7 +955,7 @@ <h2>QuantRNN<a class="headerlink" href="#QuantRNN" title="Permalink to this head
 </pre></div></div>
 </div>
 <p>Similar to other quantized layers, quantization on a certain tensor can be disabled by setting a quantizer to <code class="docutils literal notranslate"><span class="pre">None</span></code>. Setting all quantizers to <code class="docutils literal notranslate"><span class="pre">None</span></code> recovers the same behaviour as the floating-point variant:</p>
-<div class="nbinput nblast docutils container">
+<div class="nbinput docutils container">
 <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[14]:
 </pre></div>
 </div>
@@ -962,12 +976,21 @@ <h2>QuantRNN<a class="headerlink" href="#QuantRNN" title="Permalink to this head
 <span class="c1"># Generate random input</span>
 <span class="n">inp</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span>
 <span class="c1"># Check outputs are the same</span>
-<span class="k">assert</span> <span class="n">torch</span><span class="o">.</span><span class="n">allclose</span><span class="p">(</span><span class="n">quant_rnn</span><span class="p">(</span><span class="n">inp</span><span class="p">)[</span><span class="mi">0</span><span class="p">],</span> <span class="n">float_rnn</span><span class="p">(</span><span class="n">inp</span><span class="p">)[</span><span class="mi">0</span><span class="p">],</span> <span class="n">atol</span><span class="o">=</span><span class="n">ATOL</span><span class="p">)</span>
+<span class="n">assert_with_message</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">allclose</span><span class="p">(</span><span class="n">quant_rnn</span><span class="p">(</span><span class="n">inp</span><span class="p">)[</span><span class="mi">0</span><span class="p">],</span> <span class="n">float_rnn</span><span class="p">(</span><span class="n">inp</span><span class="p">)[</span><span class="mi">0</span><span class="p">],</span> <span class="n">atol</span><span class="o">=</span><span class="n">ATOL</span><span class="p">))</span>
 <span class="c1"># Check hidden states are the same</span>
-<span class="k">assert</span> <span class="n">torch</span><span class="o">.</span><span class="n">allclose</span><span class="p">(</span><span class="n">quant_rnn</span><span class="p">(</span><span class="n">inp</span><span class="p">)[</span><span class="mi">1</span><span class="p">],</span> <span class="n">float_rnn</span><span class="p">(</span><span class="n">inp</span><span class="p">)[</span><span class="mi">1</span><span class="p">],</span> <span class="n">atol</span><span class="o">=</span><span class="n">ATOL</span><span class="p">)</span>
+<span class="n">assert_with_message</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">allclose</span><span class="p">(</span><span class="n">quant_rnn</span><span class="p">(</span><span class="n">inp</span><span class="p">)[</span><span class="mi">1</span><span class="p">],</span> <span class="n">float_rnn</span><span class="p">(</span><span class="n">inp</span><span class="p">)[</span><span class="mi">1</span><span class="p">],</span> <span class="n">atol</span><span class="o">=</span><span class="n">ATOL</span><span class="p">))</span>
 </pre></div>
 </div>
 </div>
+<div class="nboutput nblast docutils container">
+<div class="prompt empty docutils container">
+</div>
+<div class="output_area docutils container">
+<div class="highlight"><pre>
+True
+True
+</pre></div></div>
+</div>
 <p>As with other quantized layers, we can leverage other prebuilt quantizers too. For example, to perform binary weight quantization:</p>
 <div class="nbinput docutils container">
 <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[15]:
@@ -1059,23 +1082,25 @@ <h2>QuantLSTM<a class="headerlink" href="#QuantLSTM" title="Permalink to this he
         <span class="n">input_size</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
         <span class="n">hidden_size</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
         <span class="n">num_layers</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span>
-        <span class="n">bias</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
+        <span class="n">bias</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
         <span class="n">batch_first</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
         <span class="n">bidirectional</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
-        <span class="n">weight_quant</span> <span class="o">=</span> <span class="n">Int8WeightPerTensorFloat</span><span class="p">,</span>
-        <span class="n">bias_quant</span> <span class="o">=</span> <span class="n">Int32Bias</span><span class="p">,</span>
-        <span class="n">io_quant</span> <span class="o">=</span> <span class="n">Int8ActPerTensorFloat</span><span class="p">,</span>
-        <span class="n">gate_acc_quant</span> <span class="o">=</span> <span class="n">Int8ActPerTensorFloat</span><span class="p">,</span>
-        <span class="n">sigmoid_quant</span> <span class="o">=</span> <span class="n">Uint8ActPerTensorFloat</span><span class="p">,</span>
-        <span class="n">tanh_quant</span> <span class="o">=</span> <span class="n">Int8ActPerTensorFloat</span><span class="p">,</span>
-        <span class="n">cell_state_quant</span> <span class="o">=</span> <span class="n">Int8ActPerTensorFloat</span><span class="p">,</span>
+        <span class="n">weight_quant</span><span class="o">=</span><span class="n">Int8WeightPerTensorFloat</span><span class="p">,</span>
+        <span class="n">bias_quant</span><span class="o">=</span><span class="n">Int32Bias</span><span class="p">,</span>
+        <span class="n">io_quant</span><span class="o">=</span><span class="n">Int8ActPerTensorFloat</span><span class="p">,</span>
+        <span class="n">gate_acc_quant</span><span class="o">=</span><span class="n">Int8ActPerTensorFloat</span><span class="p">,</span>
+        <span class="n">sigmoid_quant</span><span class="o">=</span><span class="n">Uint8ActPerTensorFloat</span><span class="p">,</span>
+        <span class="n">tanh_quant</span><span class="o">=</span><span class="n">Int8ActPerTensorFloat</span><span class="p">,</span>
+        <span class="n">cell_state_quant</span><span class="o">=</span><span class="n">Int8ActPerTensorFloat</span><span class="p">,</span>
         <span class="n">coupled_input_forget_gates</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
-        <span class="n">cat_output_cell_states</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
-        <span class="n">shared_input_hidden_weights</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
-        <span class="n">shared_intra_layer_weight_quant</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
-        <span class="n">shared_intra_layer_gate_acc_quant</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
-        <span class="n">shared_cell_state_quant</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
+        <span class="n">cat_output_cell_states</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
+        <span class="n">shared_input_hidden_weights</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
+        <span class="n">shared_intra_layer_weight_quant</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
+        <span class="n">shared_intra_layer_gate_acc_quant</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
+        <span class="n">shared_cell_state_quant</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
         <span class="n">return_quant_tensor</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
+        <span class="n">device</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">device</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+        <span class="n">dtype</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">dtype</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
         <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
     <span class="nb">super</span><span class="p">(</span><span class="n">QuantLSTM</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span>
         <span class="n">layer_impl</span><span class="o">=</span><span class="n">_QuantLSTMLayer</span><span class="p">,</span>
@@ -1098,9 +1123,13 @@ <h2>QuantLSTM<a class="headerlink" href="#QuantLSTM" title="Permalink to this he
         <span class="n">shared_intra_layer_gate_acc_quant</span><span class="o">=</span><span class="n">shared_intra_layer_gate_acc_quant</span><span class="p">,</span>
         <span class="n">shared_cell_state_quant</span><span class="o">=</span><span class="n">shared_cell_state_quant</span><span class="p">,</span>
         <span class="n">return_quant_tensor</span><span class="o">=</span><span class="n">return_quant_tensor</span><span class="p">,</span>
+        <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">,</span>
+        <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">,</span>
         <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
     <span class="k">if</span> <span class="n">cat_output_cell_states</span> <span class="ow">and</span> <span class="n">cell_state_quant</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">shared_cell_state_quant</span><span class="p">:</span>
         <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">&quot;Concatenating cell states requires shared cell quantizers.&quot;</span><span class="p">)</span>
+    <span class="k">if</span> <span class="n">return_quant_tensor</span> <span class="ow">and</span> <span class="n">cell_state_quant</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+        <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">&quot;return_quant_tensor=True requires cell_state_quant != None.&quot;</span><span class="p">)</span>
     <span class="bp">self</span><span class="o">.</span><span class="n">cat_output_cell_states</span> <span class="o">=</span> <span class="n">cat_output_cell_states</span>
 </pre></div>
 </div>
@@ -1191,8 +1220,8 @@ <h3>QuantLSTM weight-only quantization export<a class="headerlink" href="#QuantL
 ></iframe></div>
 </div>
 <p>Note that the model can then be accelerated in <code class="docutils literal notranslate"><span class="pre">onnxruntime</span></code>:</p>
-<div class="nbinput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[19]:
+<div class="nbinput docutils container">
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[20]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">onnxruntime</span> <span class="k">as</span> <span class="nn">ort</span>
@@ -1205,6 +1234,14 @@ <h3>QuantLSTM weight-only quantization export<a class="headerlink" href="#QuantL
 </pre></div>
 </div>
 </div>
+<div class="nboutput nblast docutils container">
+<div class="prompt empty docutils container">
+</div>
+<div class="output_area stderr docutils container">
+<div class="highlight"><pre>
+2024-09-12 12:18:52.692518968 [W:onnxruntime:, graph.cc:1283 Graph] Initializer onnx::LSTM_93 appears in graph inputs and will not be treated as constant value/weight. This may prevent some of the graph optimizations, like const folding. Move it out of graph inputs if there is no need to override it, by either re-generating the model with latest exporter/converter or with the tool onnxruntime/tools/python/remove_initializer_from_input.py.
+</pre></div></div>
+</div>
 <p>CIFG is also supported in a way that follows the semantics of <code class="docutils literal notranslate"><span class="pre">onnxruntime</span></code>:</p>
 <div class="nbinput nblast docutils container">
 <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[21]:
@@ -1253,8 +1290,8 @@ <h3>QuantLSTM weight-only quantization export<a class="headerlink" href="#QuantL
 ></iframe></div>
 </div>
 <p>As before we can run it with <code class="docutils literal notranslate"><span class="pre">onnxruntime</span></code>:</p>
-<div class="nbinput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[21]:
+<div class="nbinput docutils container">
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[23]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">onnxruntime</span> <span class="k">as</span> <span class="nn">ort</span>
@@ -1267,6 +1304,14 @@ <h3>QuantLSTM weight-only quantization export<a class="headerlink" href="#QuantL
 </pre></div>
 </div>
 </div>
+<div class="nboutput nblast docutils container">
+<div class="prompt empty docutils container">
+</div>
+<div class="output_area stderr docutils container">
+<div class="highlight"><pre>
+2024-09-12 12:18:53.086326293 [W:onnxruntime:, graph.cc:1283 Graph] Initializer onnx::LSTM_87 appears in graph inputs and will not be treated as constant value/weight. This may prevent some of the graph optimizations, like const folding. Move it out of graph inputs if there is no need to override it, by either re-generating the model with latest exporter/converter or with the tool onnxruntime/tools/python/remove_initializer_from_input.py.
+</pre></div></div>
+</div>
 <p>For the 2 layers, 2 directions use case:</p>
 <div class="nbinput docutils container">
 <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[24]:
@@ -1289,7 +1334,7 @@ <h3>QuantLSTM weight-only quantization export<a class="headerlink" href="#QuantL
 </div>
 <div class="output_area stderr docutils container">
 <div class="highlight"><pre>
-/home/giuseppe/Documents/git/brevitas/src/brevitas/nn/mixin/base.py:77: UserWarning: Keyword arguments are being passed but they not being used.
+/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/mixin/base.py:55: UserWarning: Keyword arguments are being passed but they not being used.
   warn(&#39;Keyword arguments are being passed but they not being used.&#39;)
 </pre></div></div>
 </div>
@@ -1345,7 +1390,7 @@ <h3>QuantLSTM weight-only quantization export<a class="headerlink" href="#QuantL
 </div>
 <div class="output_area stderr docutils container">
 <div class="highlight"><pre>
-/home/giuseppe/Documents/git/brevitas/src/brevitas/nn/mixin/base.py:77: UserWarning: Keyword arguments are being passed but they not being used.
+/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/mixin/base.py:55: UserWarning: Keyword arguments are being passed but they not being used.
   warn(&#39;Keyword arguments are being passed but they not being used.&#39;)
 </pre></div></div>
 </div>
@@ -1381,7 +1426,7 @@ <h3>QuantLSTM weight-only quantization export<a class="headerlink" href="#QuantL
 </div>
 <p>We can observe how setting <code class="docutils literal notranslate"><span class="pre">shared_intra_layer_weight_quant=True</span></code> affects the network. Now, for each layer and for each direction within a layer, all weight quantizers share the same scale/zp/bit-width:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[24]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[28]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">torch</span>
@@ -1396,15 +1441,23 @@ <h3>QuantLSTM weight-only quantization export<a class="headerlink" href="#QuantL
 </pre></div>
 </div>
 </div>
-<div class="nboutput docutils container">
+<div class="nboutput nblast docutils container">
 <div class="prompt empty docutils container">
 </div>
 <div class="output_area stderr docutils container">
 <div class="highlight"><pre>
-c:\users\alessand\documents\brevitas\src\brevitas\nn\mixin\base.py:112: UserWarning: Keyword arguments are being passed but they not being used.
+/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/mixin/base.py:55: UserWarning: Keyword arguments are being passed but they not being used.
   warn(&#39;Keyword arguments are being passed but they not being used.&#39;)
 </pre></div></div>
 </div>
+<div class="nbinput docutils container">
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[29]:
+</pre></div>
+</div>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">show_netron</span><span class="p">(</span><span class="n">export_path</span><span class="p">,</span> <span class="mi">8086</span><span class="p">)</span>
+</pre></div>
+</div>
+</div>
 <div class="nboutput docutils container">
 <div class="prompt empty docutils container">
 </div>
@@ -1414,7 +1467,7 @@ <h3>QuantLSTM weight-only quantization export<a class="headerlink" href="#QuantL
 </pre></div></div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[24]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[29]:
 </pre></div>
 </div>
 <div class="output_area rendered_html docutils container">
@@ -1427,17 +1480,9 @@ <h3>QuantLSTM weight-only quantization export<a class="headerlink" href="#QuantL
 
 ></iframe></div>
 </div>
-<div class="nbinput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
-</pre></div>
-</div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">show_netron</span><span class="p">(</span><span class="n">export_path</span><span class="p">,</span> <span class="mi">8086</span><span class="p">)</span>
-</pre></div>
-</div>
-</div>
 <p>Alternatively, if we set both <code class="docutils literal notranslate"><span class="pre">shared_input_hidden_weights=True</span></code> and <code class="docutils literal notranslate"><span class="pre">shared_intra_layer_weight_quant=True</span></code>, the side effect is that all quantizers among both directions in a given layer are gonna have the same scale/zp/bit-width.</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[25]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[30]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">torch</span>
@@ -1453,15 +1498,23 @@ <h3>QuantLSTM weight-only quantization export<a class="headerlink" href="#QuantL
 </pre></div>
 </div>
 </div>
-<div class="nboutput docutils container">
+<div class="nboutput nblast docutils container">
 <div class="prompt empty docutils container">
 </div>
 <div class="output_area stderr docutils container">
 <div class="highlight"><pre>
-c:\users\alessand\documents\brevitas\src\brevitas\nn\mixin\base.py:112: UserWarning: Keyword arguments are being passed but they not being used.
+/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/mixin/base.py:55: UserWarning: Keyword arguments are being passed but they not being used.
   warn(&#39;Keyword arguments are being passed but they not being used.&#39;)
 </pre></div></div>
 </div>
+<div class="nbinput docutils container">
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[31]:
+</pre></div>
+</div>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">show_netron</span><span class="p">(</span><span class="n">export_path</span><span class="p">,</span> <span class="mi">8087</span><span class="p">)</span>
+</pre></div>
+</div>
+</div>
 <div class="nboutput docutils container">
 <div class="prompt empty docutils container">
 </div>
@@ -1471,7 +1524,7 @@ <h3>QuantLSTM weight-only quantization export<a class="headerlink" href="#QuantL
 </pre></div></div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[25]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[31]:
 </pre></div>
 </div>
 <div class="output_area rendered_html docutils container">
@@ -1484,20 +1537,12 @@ <h3>QuantLSTM weight-only quantization export<a class="headerlink" href="#QuantL
 
 ></iframe></div>
 </div>
-<div class="nbinput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
-</pre></div>
-</div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">show_netron</span><span class="p">(</span><span class="n">export_path</span><span class="p">,</span> <span class="mi">8087</span><span class="p">)</span>
-</pre></div>
-</div>
-</div>
 </section>
 <section id="QuantLSTM-full-quantization-export">
 <h3>QuantLSTM full quantization export<a class="headerlink" href="#QuantLSTM-full-quantization-export" title="Permalink to this heading">#</a></h3>
 <p>For use case (2) we export to QONNX. Weight quantization is represented with <code class="docutils literal notranslate"><span class="pre">Quant</span></code> nodes, while a custom quantized LSTM operator <code class="docutils literal notranslate"><span class="pre">QuantLSTMCell</span></code> operator is generated for the recurrent cell. Note that currently <code class="docutils literal notranslate"><span class="pre">QuantLSTMCell</span></code> is not yet supported for execution in the <code class="docutils literal notranslate"><span class="pre">qonnx</span></code> library. In a future version of Brevitas, <code class="docutils literal notranslate"><span class="pre">QuantLSTMCell</span></code> will instead be lowered to a series of standard ops + <code class="docutils literal notranslate"><span class="pre">Quant</span></code> nodes. For the purpose example, we keep all quantizers at default:</p>
-<div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[26]:
+<div class="nbinput nblast docutils container">
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[32]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">torch</span>
@@ -1510,6 +1555,14 @@ <h3>QuantLSTM full quantization export<a class="headerlink" href="#QuantLSTM-ful
 </pre></div>
 </div>
 </div>
+<div class="nbinput docutils container">
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[33]:
+</pre></div>
+</div>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">show_netron</span><span class="p">(</span><span class="n">export_path</span><span class="p">,</span> <span class="mi">8088</span><span class="p">)</span>
+</pre></div>
+</div>
+</div>
 <div class="nboutput docutils container">
 <div class="prompt empty docutils container">
 </div>
@@ -1519,7 +1572,7 @@ <h3>QuantLSTM full quantization export<a class="headerlink" href="#QuantLSTM-ful
 </pre></div></div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[26]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[33]:
 </pre></div>
 </div>
 <div class="output_area rendered_html docutils container">
@@ -1532,14 +1585,6 @@ <h3>QuantLSTM full quantization export<a class="headerlink" href="#QuantLSTM-ful
 
 ></iframe></div>
 </div>
-<div class="nbinput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
-</pre></div>
-</div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">show_netron</span><span class="p">(</span><span class="n">export_path</span><span class="p">,</span> <span class="mi">8088</span><span class="p">)</span>
-</pre></div>
-</div>
-</div>
 <p><code class="docutils literal notranslate"><span class="pre">QuantLSTMCell</span></code> takes the following series of inputs.</p>
 <ul class="simple">
 <li><p>quant_input,</p></li>
diff --git a/docs/tutorials/quant_recurrent.ipynb b/docs/tutorials/quant_recurrent.ipynb
index 30e0943de..cd13fbf79 100644
--- a/docs/tutorials/quant_recurrent.ipynb
+++ b/docs/tutorials/quant_recurrent.ipynb
@@ -35,15 +35,17 @@
        "            hidden_size: int,\n",
        "            num_layers: int = 1,\n",
        "            nonlinearity: str = 'tanh',\n",
-       "            bias: bool = True,\n",
+       "            bias: Optional[bool] = True,\n",
        "            batch_first: bool = False,\n",
        "            bidirectional: bool = False,\n",
-       "            weight_quant = Int8WeightPerTensorFloat,\n",
-       "            bias_quant = Int32Bias,\n",
-       "            io_quant = Int8ActPerTensorFloat,\n",
-       "            gate_acc_quant = Int8ActPerTensorFloat,\n",
-       "            shared_input_hidden_weights = False,\n",
+       "            weight_quant=Int8WeightPerTensorFloat,\n",
+       "            bias_quant=Int32Bias,\n",
+       "            io_quant=Int8ActPerTensorFloat,\n",
+       "            gate_acc_quant=Int8ActPerTensorFloat,\n",
+       "            shared_input_hidden_weights=False,\n",
        "            return_quant_tensor: bool = False,\n",
+       "            dtype: Optional[torch.dtype] = None,\n",
+       "            device: Optional[torch.device] = None,\n",
        "            **kwargs):\n",
        "        super(QuantRNN, self).__init__(\n",
        "            layer_impl=_QuantRNNLayer,\n",
@@ -60,6 +62,8 @@
        "            gate_acc_quant=gate_acc_quant,\n",
        "            shared_input_hidden_weights=shared_input_hidden_weights,\n",
        "            return_quant_tensor=return_quant_tensor,\n",
+       "            dtype=dtype,\n",
+       "            device=device,\n",
        "            **kwargs)\n",
        "\n",
        "```"
@@ -79,6 +83,11 @@
     "import torch\n",
     "torch.manual_seed(0)\n",
     "\n",
+    "# helpers\n",
+    "def assert_with_message(condition):\n",
+    "    assert condition\n",
+    "    print(condition)\n",
+    "\n",
     "def pretty_print_source(source):\n",
     "    display(Markdown('```python\\n' + source + '\\n```'))\n",
     "    \n",
@@ -107,7 +116,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "c:\\users\\alessand\\documents\\brevitas\\src\\brevitas\\nn\\mixin\\base.py:112: UserWarning: Keyword arguments are being passed but they not being used.\n",
+      "/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/mixin/base.py:55: UserWarning: Keyword arguments are being passed but they not being used.\n",
       "  warn('Keyword arguments are being passed but they not being used.')\n"
      ]
     }
@@ -137,18 +146,15 @@
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "False"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
     }
    ],
    "source": [
-    "quant_rnn_0_left_to_right.gate_params.input_weight.weight_quant is quant_rnn_1_right_to_left.gate_params.input_weight.weight_quant"
+    "assert_with_message(not quant_rnn_0_left_to_right.gate_params.input_weight.weight_quant is quant_rnn_1_right_to_left.gate_params.input_weight.weight_quant)"
    ]
   },
   {
@@ -157,18 +163,15 @@
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "False"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
     }
    ],
    "source": [
-    "quant_rnn_0_left_to_right.cell.gate_acc_quant is quant_rnn_1_right_to_left.cell.gate_acc_quant"
+    "assert_with_message(not quant_rnn_0_left_to_right.cell.gate_acc_quant is quant_rnn_1_right_to_left.cell.gate_acc_quant)"
    ]
   },
   {
@@ -177,18 +180,15 @@
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "False"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
     }
    ],
    "source": [
-    "quant_rnn_0_left_to_right.gate_params.bias_quant is quant_rnn_1_right_to_left.gate_params.bias_quant"
+    "assert_with_message(not quant_rnn_0_left_to_right.gate_params.bias_quant is quant_rnn_1_right_to_left.gate_params.bias_quant)"
    ]
   },
   {
@@ -205,18 +205,15 @@
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
     }
    ],
    "source": [
-    "quant_rnn_0_left_to_right.io_quant is quant_rnn_1_right_to_left.io_quant"
+    "assert_with_message(quant_rnn_0_left_to_right.io_quant is quant_rnn_1_right_to_left.io_quant)"
    ]
   },
   {
@@ -278,46 +275,46 @@
       "Input-hidden weight bit-width: 4.0\n",
       "Hidden-hidden weight bit-width: 4.0\n",
       "I/O quant bit-width: 6.0\n",
-      "Input-hidden weight scale: tensor([[0.0316],\n",
-      "        [0.0317],\n",
-      "        [0.0319],\n",
-      "        [0.0318],\n",
-      "        [0.0314],\n",
+      "Input-hidden weight scale: tensor([[0.0297],\n",
+      "        [0.0311],\n",
       "        [0.0298],\n",
+      "        [0.0295],\n",
+      "        [0.0316],\n",
+      "        [0.0311],\n",
+      "        [0.0318],\n",
+      "        [0.0309],\n",
       "        [0.0317],\n",
-      "        [0.0285],\n",
-      "        [0.0306],\n",
-      "        [0.0312],\n",
+      "        [0.0309],\n",
+      "        [0.0316],\n",
+      "        [0.0319],\n",
+      "        [0.0319],\n",
       "        [0.0318],\n",
       "        [0.0315],\n",
-      "        [0.0298],\n",
-      "        [0.0314],\n",
-      "        [0.0293],\n",
-      "        [0.0310],\n",
-      "        [0.0306],\n",
       "        [0.0310],\n",
-      "        [0.0309],\n",
-      "        [0.0317]], grad_fn=<DivBackward0>)\n",
-      "Hidden-hidden weight scale: tensor([[0.0316],\n",
-      "        [0.0317],\n",
+      "        [0.0319],\n",
       "        [0.0319],\n",
       "        [0.0318],\n",
-      "        [0.0314],\n",
+      "        [0.0312]], grad_fn=<DivBackward0>)\n",
+      "Hidden-hidden weight scale: tensor([[0.0297],\n",
+      "        [0.0311],\n",
       "        [0.0298],\n",
+      "        [0.0295],\n",
+      "        [0.0316],\n",
+      "        [0.0311],\n",
+      "        [0.0318],\n",
+      "        [0.0309],\n",
       "        [0.0317],\n",
-      "        [0.0285],\n",
-      "        [0.0306],\n",
-      "        [0.0312],\n",
+      "        [0.0309],\n",
+      "        [0.0316],\n",
+      "        [0.0319],\n",
+      "        [0.0319],\n",
       "        [0.0318],\n",
       "        [0.0315],\n",
-      "        [0.0298],\n",
-      "        [0.0314],\n",
-      "        [0.0293],\n",
       "        [0.0310],\n",
-      "        [0.0306],\n",
-      "        [0.0310],\n",
-      "        [0.0309],\n",
-      "        [0.0317]], grad_fn=<DivBackward0>)\n"
+      "        [0.0319],\n",
+      "        [0.0319],\n",
+      "        [0.0318],\n",
+      "        [0.0312]], grad_fn=<DivBackward0>)\n"
      ]
     }
    ],
@@ -387,52 +384,52 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "c:\\users\\alessand\\documents\\brevitas\\src\\brevitas\\nn\\mixin\\base.py:343: UserWarning: Defining your `__torch_function__` as a plain method is deprecated and will be an error in future, please define it as a classmethod. (Triggered internally at C:\\cb\\pytorch_1000000000000\\work\\torch\\csrc\\utils\\python_arg_parser.cpp:354.)\n",
+      "/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/mixin/base.py:216: UserWarning: Defining your `__torch_function__` as a plain method is deprecated and will be an error in future, please define it as a classmethod. (Triggered internally at /opt/conda/conda-bld/pytorch_1670525541990/work/torch/csrc/utils/python_arg_parser.cpp:350.)\n",
       "  return torch.cat(outputs, dim=seq_dim)\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "(QuantTensor(value=tensor([[[-0.4458, -0.1651, -0.7045, -0.5889, -0.2532, -0.0330, -0.1651,\n",
-       "            0.1706,  0.1376,  0.4348,  0.5834, -0.3577, -0.2807,  0.1046,\n",
-       "            0.2532,  0.2807,  0.2532, -0.4293,  0.1376, -0.1486],\n",
-       "          [-0.1569,  0.3530, -0.6995, -0.0458, -0.5295, -0.3007, -0.7257,\n",
-       "            0.2877, -0.1308,  0.6603,  0.0196, -0.8237,  0.0065, -0.4380,\n",
-       "           -0.2615,  0.3138, -0.0850,  0.0065,  0.0458, -0.1961],\n",
-       "          [ 0.1929, -0.5981, -0.2508, -0.2251, -0.5917,  0.2251,  0.0257,\n",
-       "            0.2508, -0.3023,  0.2830,  0.3344, -0.4309, -0.0836,  0.2701,\n",
-       "            0.3666, -0.1351,  0.1736, -0.0257,  0.1286, -0.6174],\n",
-       "          [ 0.4682, -0.1804,  0.2780,  0.4974,  0.4389, -0.0585, -0.6242,\n",
-       "           -0.0098,  0.2341,  0.3511, -0.2926, -0.4925,  0.1414, -0.4633,\n",
-       "           -0.0683,  0.2633,  0.3804,  0.3024,  0.1951,  0.1707],\n",
-       "          [-0.0852,  0.0965, -0.4656, -0.3180, -0.3464, -0.2782, -0.1931,\n",
-       "           -0.6360, -0.3180, -0.3293,  0.7211,  0.4316,  0.4145, -0.3066,\n",
-       "           -0.5224, -0.3066, -0.5849, -0.7211,  0.3293,  0.1420]],\n",
+       "(IntQuantTensor(value=tensor([[[-0.0062, -0.2872,  0.7931,  0.4309,  0.5495, -0.4558,  0.2373,\n",
+       "            0.6807,  0.4621,  0.6120, -0.1124,  0.3872,  0.3060,  0.7681,\n",
+       "           -0.3684,  0.0437, -0.7369, -0.3247,  0.7743,  0.3372],\n",
+       "          [ 0.5450,  0.2962, -0.3969,  0.3555, -0.5628,  0.2429, -0.4976,\n",
+       "            0.1777, -0.1244,  0.0296, -0.2607,  0.0948,  0.5036, -0.3673,\n",
+       "            0.5213, -0.2962,  0.7524,  0.0770, -0.0948, -0.0948],\n",
+       "          [ 0.2691, -0.6624, -0.5434,  0.4968, -0.6624,  0.0983,  0.1345,\n",
+       "            0.1242, -0.0517, -0.3726,  0.3053,  0.1604,  0.3208,  0.0983,\n",
+       "            0.3105,  0.4243,  0.2794,  0.1604,  0.1035, -0.0724],\n",
+       "          [ 0.1284, -0.3337, -0.5263, -0.0449, -0.5263,  0.3081, -0.1733,\n",
+       "            0.5648,  0.4942, -0.1412,  0.1733,  0.3337,  0.6225,  0.3401,\n",
+       "            0.5070, -0.1412,  0.0642, -0.3722,  0.2888,  0.1155],\n",
+       "          [ 0.0579, -0.0058, -0.4054, -0.1564, -0.5560, -0.3301,  0.3533,\n",
+       "            0.0058, -0.1622, -0.3765,  0.1216,  0.0695, -0.4054,  0.0927,\n",
+       "            0.6139, -0.1390,  0.7066,  0.1274,  0.1622, -0.2896]],\n",
        " \n",
-       "         [[ 0.5669,  0.2367, -0.3027, -0.3137, -0.3632, -0.1651, -0.5999,\n",
-       "            0.2036,  0.4293,  0.2201, -0.2862, -0.3908, -0.2091, -0.2532,\n",
-       "           -0.2532, -0.5834, -0.2697,  0.0055,  0.2532,  0.1761],\n",
-       "          [ 0.1242,  0.4184, -0.6472, -0.0196, -0.4707, -0.5034, -0.8368,\n",
-       "            0.3530,  0.1504,  0.0458, -0.0654, -0.7714, -0.1961, -0.4903,\n",
-       "           -0.6015, -0.3596, -0.2484, -0.4380, -0.0458,  0.2942],\n",
-       "          [ 0.3409,  0.8168, -0.7396,  0.2958,  0.2508, -0.1286, -0.1286,\n",
-       "            0.7782, -0.1994,  0.7846, -0.3087, -0.3666,  0.1029,  0.1479,\n",
-       "           -0.3216, -0.1479, -0.2315,  0.4566,  0.5209, -0.3344],\n",
-       "          [-0.0878,  0.0390, -0.1707, -0.1365, -0.2243, -0.2390, -0.3706,\n",
-       "            0.1609, -0.5511, -0.4096,  0.5121, -0.5901,  0.2633, -0.3609,\n",
-       "           -0.5511,  0.3755, -0.4925, -0.0293, -0.0780, -0.2829],\n",
-       "          [ 0.0965, -0.1987,  0.0057,  0.1306,  0.3861,  0.2839, -0.3861,\n",
-       "            0.5962, -0.1987,  0.3180, -0.1647, -0.3066, -0.0227,  0.4372,\n",
-       "            0.0852,  0.3748,  0.0852, -0.0057, -0.1703, -0.0738]]],\n",
-       "        grad_fn=<CatBackward0>), scale=tensor(0.0058, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True)),\n",
-       " QuantTensor(value=tensor([[[-0.0852,  0.0965, -0.4656, -0.3180, -0.3464, -0.2782, -0.1931,\n",
-       "           -0.6360, -0.3180, -0.3293,  0.7211,  0.4316,  0.4145, -0.3066,\n",
-       "           -0.5224, -0.3066, -0.5849, -0.7211,  0.3293,  0.1420],\n",
-       "          [ 0.0965, -0.1987,  0.0057,  0.1306,  0.3861,  0.2839, -0.3861,\n",
-       "            0.5962, -0.1987,  0.3180, -0.1647, -0.3066, -0.0227,  0.4372,\n",
-       "            0.0852,  0.3748,  0.0852, -0.0057, -0.1703, -0.0738]]],\n",
-       "        grad_fn=<UnsqueezeBackward0>), scale=tensor(0.0057, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True)))"
+       "         [[ 0.1374,  0.5745,  0.0624, -0.2373,  0.3060,  0.3310, -0.5183,\n",
+       "            0.1186,  0.1124,  0.2997,  0.0375,  0.6369, -0.5308,  0.6307,\n",
+       "           -0.5683,  0.7556,  0.2997, -0.4933,  0.3934, -0.4871],\n",
+       "          [ 0.1066, -0.1244, -0.1718,  0.4266,  0.5569,  0.0178,  0.1185,\n",
+       "           -0.3910,  0.2133,  0.0178, -0.1066, -0.2903,  0.1837, -0.2547,\n",
+       "           -0.2903,  0.0770,  0.3495,  0.2547,  0.2311, -0.6161],\n",
+       "          [-0.0880, -0.1966,  0.3001, -0.0569,  0.4140, -0.1552, -0.1345,\n",
+       "            0.4554,  0.5175,  0.1242, -0.2898,  0.1966, -0.0414,  0.3985,\n",
+       "           -0.1708, -0.0621, -0.1708,  0.0828,  0.2225,  0.0517],\n",
+       "          [ 0.2118,  0.5648, -0.2824, -0.0449,  0.5840,  0.3209, -0.5648,\n",
+       "            0.3530,  0.4043, -0.4942, -0.3786,  0.0257,  0.5327, -0.1990,\n",
+       "           -0.1348, -0.8215,  0.3016,  0.5327,  0.5648, -0.1155],\n",
+       "          [-0.0290, -0.1738,  0.0695,  0.3765,  0.1738,  0.0579, -0.4054,\n",
+       "           -0.2664,  0.4923,  0.2143, -0.4170,  0.4112,  0.5502,  0.7066,\n",
+       "           -0.6024,  0.7356,  0.0348,  0.1043, -0.1911, -0.4518]]],\n",
+       "        grad_fn=<CatBackward0>), scale=tensor(0.0059, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True)),\n",
+       " IntQuantTensor(value=tensor([[[ 0.0579, -0.0058, -0.4054, -0.1564, -0.5560, -0.3301,  0.3533,\n",
+       "            0.0058, -0.1622, -0.3765,  0.1216,  0.0695, -0.4054,  0.0927,\n",
+       "            0.6139, -0.1390,  0.7066,  0.1274,  0.1622, -0.2896],\n",
+       "          [-0.0290, -0.1738,  0.0695,  0.3765,  0.1738,  0.0579, -0.4054,\n",
+       "           -0.2664,  0.4923,  0.2143, -0.4170,  0.4112,  0.5502,  0.7066,\n",
+       "           -0.6024,  0.7356,  0.0348,  0.1043, -0.1911, -0.4518]]],\n",
+       "        grad_fn=<UnsqueezeBackward0>), scale=tensor(0.0058, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True)))"
       ]
      },
      "execution_count": 10,
@@ -461,48 +458,56 @@
    "execution_count": 11,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/torch/_tensor.py:1255: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at /opt/conda/conda-bld/pytorch_1670525541990/work/c10/core/TensorImpl.h:1758.)\n",
+      "  return super(Tensor, self).rename(names)\n"
+     ]
+    },
     {
      "data": {
       "text/plain": [
-       "(QuantTensor(value=tensor([[[ 0.1760,  0.2670, -0.1214, -0.3702,  0.3884,  0.4127,  0.0243,\n",
-       "            0.0425, -0.2246, -0.0910, -0.2670,  0.4734,  0.0971, -0.3824,\n",
-       "            0.1396,  0.6858,  0.0061,  0.3702,  0.1275,  0.5037],\n",
-       "          [ 0.2831,  0.0566, -0.2831, -0.2661, -0.0793,  0.3511, -0.4926,\n",
-       "            0.0510, -0.6455,  0.7191, -0.1812, -0.6172,  0.1529,  0.4077,\n",
-       "           -0.7078, -0.0453, -0.0963,  0.4926, -0.4983, -0.4077],\n",
-       "          [ 0.0000, -0.3977,  0.0947,  0.1894, -0.3725, -0.2589, -0.3914,\n",
-       "            0.3409, -0.0063,  0.2652, -0.5177, -0.4230, -0.0821, -0.0631,\n",
-       "            0.0505, -0.0189,  0.0253, -0.1578, -0.4988,  0.5556],\n",
-       "          [ 0.4809,  0.8144, -0.6925,  0.4360,  0.0256, -0.4360, -0.5130,\n",
-       "            0.2501, -0.1347,  0.7631, -0.5386, -0.2437,  0.4296, -0.1988,\n",
-       "           -0.7246, -0.1154, -0.2437,  0.3655,  0.0641,  0.3142],\n",
-       "          [ 0.0706, -0.0192, -0.7185, -0.8211, -0.5709,  0.1155,  0.4683,\n",
-       "            0.3400, -0.3015,  0.3528,  0.3143, -0.1155, -0.3143, -0.0257,\n",
-       "            0.1411, -0.2309,  0.5132,  0.3721,  0.5196, -0.5453]],\n",
+       "(IntQuantTensor(value=tensor([[[ 0.2111,  0.1267,  0.0060,  0.6153, -0.7721, -0.3740, -0.5188,\n",
+       "            0.6273,  0.4162,  0.2051,  0.2292,  0.7239,  0.6032,  0.2533,\n",
+       "            0.5067,  0.6635,  0.1206, -0.5730,  0.0483,  0.3318],\n",
+       "          [ 0.5742,  0.0194, -0.3807, -0.0710, -0.6000,  0.1807,  0.1355,\n",
+       "            0.4129,  0.3807,  0.3936, -0.0903,  0.1549,  0.1032,  0.0645,\n",
+       "            0.4775, -0.0645,  0.1161, -0.0065,  0.0194, -0.1097],\n",
+       "          [ 0.0453, -0.4533,  0.1036, -0.0194, -0.2979,  0.3432,  0.0777,\n",
+       "            0.6346, -0.0842,  0.3302,  0.4727,  0.4856, -0.4144,  0.7382,\n",
+       "           -0.0453,  0.5439,  0.2266, -0.4792,  0.4403, -0.1036],\n",
+       "          [ 0.3198,  0.2741, -0.6395,  0.0971, -0.6052, -0.5196,  0.1770,\n",
+       "           -0.5025, -0.1256,  0.2056,  0.2684, -0.6395, -0.0285, -0.7309,\n",
+       "            0.7194, -0.7194,  0.1542, -0.3426, -0.6509,  0.0343],\n",
+       "          [ 0.0000, -0.4004,  0.3151, -0.0263, -0.5842, -0.1641, -0.3939,\n",
+       "            0.0263, -0.2429,  0.6499, -0.5186,  0.1247, -0.2101,  0.8337,\n",
+       "           -0.1444,  0.6762, -0.1641, -0.5317, -0.1707, -0.0197]],\n",
        " \n",
-       "         [[ 0.4066, -0.7768,  0.6008,  0.0546,  0.0182,  0.1821,  0.0971,\n",
-       "           -0.3763,  0.3520, -0.5037, -0.0061,  0.2246, -0.0486,  0.2124,\n",
-       "            0.3641, -0.6433,  0.4248,  0.0789,  0.1275, -0.1214],\n",
-       "          [ 0.2321,  0.1982, -0.1302,  0.1529, -0.0736, -0.3567, -0.4360,\n",
-       "           -0.0283,  0.4869,  0.5379, -0.6964, -0.0340, -0.2944, -0.1529,\n",
-       "           -0.2152, -0.4643,  0.3454,  0.3284, -0.3341,  0.5945],\n",
-       "          [-0.2020,  0.0379, -0.8081, -0.7260, -0.0821,  0.0631,  0.4988,\n",
-       "            0.0694,  0.0253,  0.5430,  0.8018,  0.2273, -0.3472, -0.0505,\n",
-       "            0.4924, -0.4735,  0.5745, -0.5619,  0.6313, -0.1768],\n",
-       "          [ 0.2501, -0.4360,  0.6541,  0.0385,  0.5835, -0.3078, -0.0449,\n",
-       "            0.3270,  0.7951, -0.3591, -0.4809, -0.2757, -0.3591, -0.7567,\n",
-       "            0.5194,  0.2757,  0.7438,  0.7695,  0.5451,  0.4296],\n",
-       "          [ 0.2630, -0.4747,  0.1347, -0.0641, -0.2245, -0.3336, -0.4490,\n",
-       "           -0.4619, -0.1796, -0.5517,  0.3913,  0.0257, -0.2053, -0.2823,\n",
-       "           -0.6992, -0.6607,  0.1989, -0.6928, -0.5581,  0.5966]]],\n",
+       "         [[ 0.2111, -0.2111, -0.3197, -0.0241, -0.5067, -0.0241, -0.2895,\n",
+       "            0.1749, -0.4283,  0.0000, -0.3680,  0.5308, -0.1267,  0.5248,\n",
+       "            0.1206,  0.2654,  0.6394, -0.1327,  0.2292, -0.3800],\n",
+       "          [ 0.6775, -0.3355, -0.1807,  0.2774, -0.8259, -0.2000, -0.0065,\n",
+       "            0.5678,  0.4000,  0.2258,  0.4387,  0.2710,  0.5355,  0.1290,\n",
+       "            0.6710, -0.0645, -0.2710, -0.3613,  0.6388,  0.5226],\n",
+       "          [-0.0065, -0.0777, -0.6475, -0.1684, -0.3820,  0.3885,  0.0065,\n",
+       "            0.1943, -0.3238, -0.2525, -0.1230, -0.0453, -0.0777,  0.3432,\n",
+       "            0.4921, -0.1101,  0.8224,  0.2396,  0.1554, -0.3885],\n",
+       "          [-0.0514, -0.4111, -0.4625, -0.1713, -0.3369,  0.2512, -0.2969,\n",
+       "           -0.4111, -0.2341,  0.3597, -0.1998,  0.0000,  0.2741,  0.7137,\n",
+       "           -0.1256,  0.1370, -0.0742, -0.5938, -0.5424, -0.4168],\n",
+       "          [ 0.3479,  0.5974, -0.3939,  0.1444, -0.6762,  0.1969, -0.6499,\n",
+       "            0.4136,  0.5383, -0.3085,  0.4070,  0.4070,  0.6630, -0.0263,\n",
+       "            0.2823, -0.1510,  0.1313, -0.5186,  0.4464, -0.0066]]],\n",
        "        grad_fn=<CatBackward0>), scale=tensor(0.0062, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True)),\n",
-       " QuantTensor(value=tensor([[[ 0.0706, -0.0192, -0.7185, -0.8211, -0.5709,  0.1155,  0.4683,\n",
-       "            0.3400, -0.3015,  0.3528,  0.3143, -0.1155, -0.3143, -0.0257,\n",
-       "            0.1411, -0.2309,  0.5132,  0.3721,  0.5196, -0.5453],\n",
-       "          [ 0.2630, -0.4747,  0.1347, -0.0641, -0.2245, -0.3336, -0.4490,\n",
-       "           -0.4619, -0.1796, -0.5517,  0.3913,  0.0257, -0.2053, -0.2823,\n",
-       "           -0.6992, -0.6607,  0.1989, -0.6928, -0.5581,  0.5966]]],\n",
-       "        grad_fn=<UnsqueezeBackward0>), scale=tensor(0.0064, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True)))"
+       " IntQuantTensor(value=tensor([[[ 0.0000, -0.4004,  0.3151, -0.0263, -0.5842, -0.1641, -0.3939,\n",
+       "            0.0263, -0.2429,  0.6499, -0.5186,  0.1247, -0.2101,  0.8337,\n",
+       "           -0.1444,  0.6762, -0.1641, -0.5317, -0.1707, -0.0197],\n",
+       "          [ 0.3479,  0.5974, -0.3939,  0.1444, -0.6762,  0.1969, -0.6499,\n",
+       "            0.4136,  0.5383, -0.3085,  0.4070,  0.4070,  0.6630, -0.0263,\n",
+       "            0.2823, -0.1510,  0.1313, -0.5186,  0.4464, -0.0066]]],\n",
+       "        grad_fn=<UnsqueezeBackward0>), scale=tensor(0.0066, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True)))"
       ]
      },
      "execution_count": 11,
@@ -533,45 +538,45 @@
     {
      "data": {
       "text/plain": [
-       "(QuantTensor(value=tensor([[[-0.1984,  0.2499, -0.1102,  0.2499, -0.0955, -0.4630, -0.8672,\n",
-       "            0.1911, -0.4851,  0.8085,  0.6982, -0.5806,  0.0000, -0.4189,\n",
-       "           -0.7423, -0.4851, -0.9260, -0.0147,  0.0514, -0.1984],\n",
-       "          [-0.2167,  0.5092, -0.3846,  0.0650,  0.6717, -0.2492, -0.0867,\n",
-       "            0.3142, -0.3900,  0.3521,  0.4767, -0.1137,  0.6879,  0.1733,\n",
-       "           -0.0596,  0.4279, -0.5471, -0.2762,  0.5904, -0.3737],\n",
-       "          [-0.1335, -0.0140, -0.2810, -0.5339, -0.5339,  0.0562,  0.7236,\n",
-       "           -0.1264, -0.0211, -0.3021, -0.1124,  0.4777,  0.3793,  0.2388,\n",
-       "           -0.0702,  0.4847, -0.4988,  0.7236,  0.5901, -0.4847],\n",
-       "          [ 0.3340, -0.5225, -0.1242,  0.1499,  0.3083, -0.1756, -0.1713,\n",
-       "            0.0000,  0.3512, -0.3041,  0.3126, -0.5482,  0.4882,  0.1028,\n",
-       "           -0.4796,  0.1028, -0.2527, -0.3640,  0.1713,  0.0471],\n",
-       "          [-0.4438, -0.2686, -0.3095, -0.2978, -0.0993,  0.0584,  0.4846,\n",
-       "           -0.0526,  0.3737, -0.4496,  0.1109,  0.7416, -0.0526,  0.3445,\n",
-       "            0.4963,  0.2803,  0.1927,  0.0000,  0.6131,  0.1109]],\n",
+       "(IntQuantTensor(value=tensor([[[-0.3777, -0.2074,  0.7184,  0.9110,  0.0148, -0.1926, -0.7110,\n",
+       "            0.1926, -0.4222, -0.9480,  0.2592,  0.2222, -0.2370, -0.5407,\n",
+       "            0.5851, -0.2370,  0.3555,  0.1703,  0.4444, -0.2222],\n",
+       "          [ 0.4814, -0.7355, -0.1605,  0.3878, -0.5282,  0.2073,  0.0000,\n",
+       "            0.3677,  0.1805, -0.1204, -0.4614,  0.2474,  0.7021,  0.0401,\n",
+       "            0.4346,  0.4480, -0.3143,  0.0401,  0.6887,  0.6753],\n",
+       "          [ 0.5038, -0.3650, -0.6936,  0.0146, -0.9345,  0.0000,  0.1679,\n",
+       "           -0.3066,  0.1825,  0.4089,  0.0949, -0.2555,  0.3870, -0.2482,\n",
+       "            0.5914, -0.0803,  0.1314, -0.4235, -0.3797,  0.1168],\n",
+       "          [ 0.1795,  0.1795,  0.0449,  0.0449,  0.2308,  0.0898, -0.1282,\n",
+       "            0.5579,  0.1731, -0.1795,  0.1603,  0.3142,  0.1090,  0.5835,\n",
+       "           -0.1475,  0.0449,  0.1795, -0.0256,  0.8143, -0.2437],\n",
+       "          [-0.0066,  0.4804,  0.0066, -0.1184,  0.6843, -0.0197,  0.1448,\n",
+       "            0.1842,  0.6383, -0.1908, -0.0066, -0.1053, -0.1316,  0.0461,\n",
+       "           -0.0066, -0.2764,  0.3751,  0.3619,  0.5001, -0.1316]],\n",
        " \n",
-       "         [[ 0.1102, -0.8085,  0.5806, -0.0661,  0.3013,  0.2646,  0.2499,\n",
-       "           -0.6321,  0.4557,  0.4777,  0.6321,  0.0294, -0.2646, -0.9407,\n",
-       "            0.7350, -0.6027,  0.6174, -0.4116,  0.6835,  0.0514],\n",
-       "          [ 0.1787,  0.0271,  0.1354, -0.3033,  0.6229, -0.3250, -0.3846,\n",
-       "            0.0812,  0.5633,  0.6879, -0.0325, -0.2383, -0.3521, -0.5850,\n",
-       "            0.3033, -0.3900,  0.6771,  0.3196,  0.5633,  0.2383],\n",
-       "          [-0.1264,  0.5901, -0.3934,  0.3231,  0.0492, -0.5128, -0.8149,\n",
-       "            0.1124, -0.7517,  0.8711,  0.4004, -0.8992,  0.0702, -0.2178,\n",
-       "           -0.8851, -0.5760, -0.1054, -0.0702, -0.3512, -0.5198],\n",
-       "          [ 0.2612,  0.2570,  0.1542, -0.1071, -0.0300,  0.0257, -0.3854,\n",
-       "           -0.0685, -0.2570,  0.0728, -0.4240, -0.3083,  0.1627, -0.3383,\n",
-       "           -0.0428,  0.0300, -0.1199,  0.3683,  0.3298, -0.3340],\n",
-       "          [ 0.4204, -0.2452, -0.0934,  0.2336,  0.1285, -0.1285,  0.2044,\n",
-       "           -0.0701,  0.0058,  0.3971,  0.0175, -0.3270,  0.2803,  0.1810,\n",
-       "           -0.4963, -0.5547,  0.0467,  0.0175,  0.1927, -0.2452]]],\n",
-       "        grad_fn=<CatBackward0>), scale=tensor(0.0060, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True)),\n",
-       " QuantTensor(value=tensor([[[-0.4438, -0.2686, -0.3095, -0.2978, -0.0993,  0.0584,  0.4846,\n",
-       "           -0.0526,  0.3737, -0.4496,  0.1109,  0.7416, -0.0526,  0.3445,\n",
-       "            0.4963,  0.2803,  0.1927,  0.0000,  0.6131,  0.1109],\n",
-       "          [ 0.4204, -0.2452, -0.0934,  0.2336,  0.1285, -0.1285,  0.2044,\n",
-       "           -0.0701,  0.0058,  0.3971,  0.0175, -0.3270,  0.2803,  0.1810,\n",
-       "           -0.4963, -0.5547,  0.0467,  0.0175,  0.1927, -0.2452]]],\n",
-       "        grad_fn=<UnsqueezeBackward0>), scale=tensor(0.0058, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True)))"
+       "         [[ 0.5110, -0.3555,  0.6443, -0.8221,  0.4888, -0.2074,  0.0444,\n",
+       "            0.4888,  0.5999,  0.4370,  0.0000,  0.5036, -0.7628,  0.9332,\n",
+       "           -0.6147,  0.7332,  0.3629,  0.9184,  0.7702, -0.8887],\n",
+       "          [ 0.8492, -0.3410, -0.3878,  0.1404, -0.3410,  0.3143, -0.1204,\n",
+       "            0.5817,  0.4413,  0.5550,  0.6486, -0.1070,  0.6285, -0.4948,\n",
+       "            0.2006,  0.1605,  0.0535, -0.4079,  0.3811,  0.4948],\n",
+       "          [ 0.6060,  0.7666, -0.8688, -0.6863, -0.5111, -0.0803, -0.6425,\n",
+       "           -0.0146, -0.3577,  0.3431, -0.6571,  0.5622,  0.0000,  0.7374,\n",
+       "           -0.1314, -0.3650,  0.7520,  0.2336, -0.2847, -0.8250],\n",
+       "          [ 0.3014,  0.2950, -0.0898, -0.3142,  0.4040,  0.4681, -0.0705,\n",
+       "           -0.2052,  0.8143, -0.1603,  0.3334, -0.6733,  0.0834,  0.0898,\n",
+       "           -0.4937,  0.1924,  0.0064,  0.4104,  0.6348, -0.3527],\n",
+       "          [-0.6449,  0.5856, -0.0263, -0.0197,  0.8357, -0.5856,  0.0395,\n",
+       "           -0.3422,  0.8028,  0.0855, -0.7238, -0.6317,  0.2764, -0.0461,\n",
+       "           -0.4211, -0.5988,  0.2632,  0.4014, -0.7501, -0.5659]]],\n",
+       "        grad_fn=<CatBackward0>), scale=tensor(0.0069, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True)),\n",
+       " IntQuantTensor(value=tensor([[[-0.0066,  0.4804,  0.0066, -0.1184,  0.6843, -0.0197,  0.1448,\n",
+       "            0.1842,  0.6383, -0.1908, -0.0066, -0.1053, -0.1316,  0.0461,\n",
+       "           -0.0066, -0.2764,  0.3751,  0.3619,  0.5001, -0.1316],\n",
+       "          [-0.6449,  0.5856, -0.0263, -0.0197,  0.8357, -0.5856,  0.0395,\n",
+       "           -0.3422,  0.8028,  0.0855, -0.7238, -0.6317,  0.2764, -0.0461,\n",
+       "           -0.4211, -0.5988,  0.2632,  0.4014, -0.7501, -0.5659]]],\n",
+       "        grad_fn=<UnsqueezeBackward0>), scale=tensor(0.0066, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True)))"
       ]
      },
      "execution_count": 12,
@@ -631,7 +636,16 @@
    "cell_type": "code",
    "execution_count": 14,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n",
+      "True\n"
+     ]
+    }
+   ],
    "source": [
     "from torch.nn import RNN\n",
     "from brevitas.nn import QuantRNN\n",
@@ -650,9 +664,9 @@
     "# Generate random input\n",
     "inp = torch.randn(5, 2, 10)\n",
     "# Check outputs are the same\n",
-    "assert torch.allclose(quant_rnn(inp)[0], float_rnn(inp)[0], atol=ATOL)\n",
+    "assert_with_message(torch.allclose(quant_rnn(inp)[0], float_rnn(inp)[0], atol=ATOL))\n",
     "# Check hidden states are the same\n",
-    "assert torch.allclose(quant_rnn(inp)[1], float_rnn(inp)[1], atol=ATOL)"
+    "assert_with_message(torch.allclose(quant_rnn(inp)[1], float_rnn(inp)[1], atol=ATOL))"
    ]
   },
   {
@@ -751,23 +765,25 @@
        "            input_size: int,\n",
        "            hidden_size: int,\n",
        "            num_layers: int = 1,\n",
-       "            bias: bool = True,\n",
+       "            bias: Optional[bool] = True,\n",
        "            batch_first: bool = False,\n",
        "            bidirectional: bool = False,\n",
-       "            weight_quant = Int8WeightPerTensorFloat,\n",
-       "            bias_quant = Int32Bias,\n",
-       "            io_quant = Int8ActPerTensorFloat,\n",
-       "            gate_acc_quant = Int8ActPerTensorFloat,\n",
-       "            sigmoid_quant = Uint8ActPerTensorFloat,\n",
-       "            tanh_quant = Int8ActPerTensorFloat,\n",
-       "            cell_state_quant = Int8ActPerTensorFloat,\n",
+       "            weight_quant=Int8WeightPerTensorFloat,\n",
+       "            bias_quant=Int32Bias,\n",
+       "            io_quant=Int8ActPerTensorFloat,\n",
+       "            gate_acc_quant=Int8ActPerTensorFloat,\n",
+       "            sigmoid_quant=Uint8ActPerTensorFloat,\n",
+       "            tanh_quant=Int8ActPerTensorFloat,\n",
+       "            cell_state_quant=Int8ActPerTensorFloat,\n",
        "            coupled_input_forget_gates: bool = False,\n",
-       "            cat_output_cell_states = True,\n",
-       "            shared_input_hidden_weights = False,\n",
-       "            shared_intra_layer_weight_quant = False,\n",
-       "            shared_intra_layer_gate_acc_quant = False,\n",
-       "            shared_cell_state_quant = True,\n",
+       "            cat_output_cell_states=True,\n",
+       "            shared_input_hidden_weights=False,\n",
+       "            shared_intra_layer_weight_quant=False,\n",
+       "            shared_intra_layer_gate_acc_quant=False,\n",
+       "            shared_cell_state_quant=True,\n",
        "            return_quant_tensor: bool = False,\n",
+       "            device: Optional[torch.device] = None,\n",
+       "            dtype: Optional[torch.dtype] = None,\n",
        "            **kwargs):\n",
        "        super(QuantLSTM, self).__init__(\n",
        "            layer_impl=_QuantLSTMLayer,\n",
@@ -790,9 +806,13 @@
        "            shared_intra_layer_gate_acc_quant=shared_intra_layer_gate_acc_quant,\n",
        "            shared_cell_state_quant=shared_cell_state_quant,\n",
        "            return_quant_tensor=return_quant_tensor,\n",
+       "            dtype=dtype,\n",
+       "            device=device,\n",
        "            **kwargs)\n",
        "        if cat_output_cell_states and cell_state_quant is not None and not shared_cell_state_quant:\n",
        "            raise RuntimeError(\"Concatenating cell states requires shared cell quantizers.\")\n",
+       "        if return_quant_tensor and cell_state_quant is None:\n",
+       "            raise RuntimeError(\"return_quant_tensor=True requires cell_state_quant != None.\")\n",
        "        self.cat_output_cell_states = cat_output_cell_states\n",
        "\n",
        "```"
@@ -936,7 +956,7 @@
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7f5401082890>"
+       "<IPython.lib.display.IFrame at 0x7faa79c37310>"
       ]
      },
      "execution_count": 19,
@@ -958,9 +978,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 20,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2024-09-12 12:18:52.692518968 [W:onnxruntime:, graph.cc:1283 Graph] Initializer onnx::LSTM_93 appears in graph inputs and will not be treated as constant value/weight. This may prevent some of the graph optimizations, like const folding. Move it out of graph inputs if there is no need to override it, by either re-generating the model with latest exporter/converter or with the tool onnxruntime/tools/python/remove_initializer_from_input.py.\n"
+     ]
+    }
+   ],
    "source": [
     "import onnxruntime as ort\n",
     "import numpy as np\n",
@@ -1027,7 +1055,7 @@
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7f5400fef4d0>"
+       "<IPython.lib.display.IFrame at 0x7faa7ad26650>"
       ]
      },
      "execution_count": 22,
@@ -1049,9 +1077,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 23,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2024-09-12 12:18:53.086326293 [W:onnxruntime:, graph.cc:1283 Graph] Initializer onnx::LSTM_87 appears in graph inputs and will not be treated as constant value/weight. This may prevent some of the graph optimizations, like const folding. Move it out of graph inputs if there is no need to override it, by either re-generating the model with latest exporter/converter or with the tool onnxruntime/tools/python/remove_initializer_from_input.py.\n"
+     ]
+    }
+   ],
    "source": [
     "import onnxruntime as ort\n",
     "import numpy as np\n",
@@ -1079,7 +1115,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/home/giuseppe/Documents/git/brevitas/src/brevitas/nn/mixin/base.py:77: UserWarning: Keyword arguments are being passed but they not being used.\n",
+      "/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/mixin/base.py:55: UserWarning: Keyword arguments are being passed but they not being used.\n",
       "  warn('Keyword arguments are being passed but they not being used.')\n"
      ]
     }
@@ -1127,7 +1163,7 @@
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7f53fe880cd0>"
+       "<IPython.lib.display.IFrame at 0x7faab66e2150>"
       ]
      },
      "execution_count": 25,
@@ -1155,7 +1191,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/home/giuseppe/Documents/git/brevitas/src/brevitas/nn/mixin/base.py:77: UserWarning: Keyword arguments are being passed but they not being used.\n",
+      "/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/mixin/base.py:55: UserWarning: Keyword arguments are being passed but they not being used.\n",
       "  warn('Keyword arguments are being passed but they not being used.')\n"
      ]
     }
@@ -1203,7 +1239,7 @@
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7f5430180890>"
+       "<IPython.lib.display.IFrame at 0x7faba0549390>"
       ]
      },
      "execution_count": 27,
@@ -1225,17 +1261,39 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "c:\\users\\alessand\\documents\\brevitas\\src\\brevitas\\nn\\mixin\\base.py:112: UserWarning: Keyword arguments are being passed but they not being used.\n",
+      "/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/mixin/base.py:55: UserWarning: Keyword arguments are being passed but they not being used.\n",
       "  warn('Keyword arguments are being passed but they not being used.')\n"
      ]
-    },
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "from brevitas.nn import QuantLSTM\n",
+    "from brevitas.export import export_onnx_qcdq\n",
+    "\n",
+    "quant_lstm_weight_only_bidirectional_2_layers = QuantLSTM(\n",
+    "    input_size=10, hidden_size=20, bidirectional=True, num_layers=2, weight_bit_width=4, shared_intra_layer_weight_quant=True,\n",
+    "    io_quant=None, bias_quant=None, gate_acc_quant=None, sigmoid_quant=None, tanh_quant=None, cell_state_quant=None)\n",
+    "export_path = 'quant_lstm_weight_only_bidirectional_2_layers_shared_q.onnx'\n",
+    "exported_model = export_onnx_qcdq(quant_lstm_weight_only_bidirectional_2_layers, (torch.randn(5, 1, 10)), opset_version=14, export_path=export_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {
+    "tags": [
+     "skip-execution"
+    ]
+   },
+   "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
@@ -1258,35 +1316,14 @@
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x1f106230dc0>"
+       "<IPython.lib.display.IFrame at 0x7faa79c34710>"
       ]
      },
-     "execution_count": 24,
+     "execution_count": 29,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "source": [
-    "import torch\n",
-    "from brevitas.nn import QuantLSTM\n",
-    "from brevitas.export import export_onnx_qcdq\n",
-    "\n",
-    "quant_lstm_weight_only_bidirectional_2_layers = QuantLSTM(\n",
-    "    input_size=10, hidden_size=20, bidirectional=True, num_layers=2, weight_bit_width=4, shared_intra_layer_weight_quant=True,\n",
-    "    io_quant=None, bias_quant=None, gate_acc_quant=None, sigmoid_quant=None, tanh_quant=None, cell_state_quant=None)\n",
-    "export_path = 'quant_lstm_weight_only_bidirectional_2_layers_shared_q.onnx'\n",
-    "exported_model = export_onnx_qcdq(quant_lstm_weight_only_bidirectional_2_layers, (torch.randn(5, 1, 10)), opset_version=14, export_path=export_path)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "tags": [
-     "skip-execution"
-    ]
-   },
-   "outputs": [],
    "source": [
     "show_netron(export_path, 8086)"
    ]
@@ -1301,17 +1338,40 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "c:\\users\\alessand\\documents\\brevitas\\src\\brevitas\\nn\\mixin\\base.py:112: UserWarning: Keyword arguments are being passed but they not being used.\n",
+      "/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/mixin/base.py:55: UserWarning: Keyword arguments are being passed but they not being used.\n",
       "  warn('Keyword arguments are being passed but they not being used.')\n"
      ]
-    },
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "from brevitas.nn import QuantLSTM\n",
+    "from brevitas.export import export_onnx_qcdq\n",
+    "\n",
+    "quant_lstm_weight_only_bidirectional_2_layers = QuantLSTM(\n",
+    "    input_size=10, hidden_size=20, bidirectional=True, num_layers=2, weight_bit_width=4, \n",
+    "    shared_input_hidden_weights=True, shared_intra_layer_weight_quant=True,\n",
+    "    io_quant=None, bias_quant=None, gate_acc_quant=None, sigmoid_quant=None, tanh_quant=None, cell_state_quant=None)\n",
+    "export_path = 'quant_lstm_weight_only_bidirectional_2_layers_shared_q_ih.onnx'\n",
+    "exported_model = export_onnx_qcdq(quant_lstm_weight_only_bidirectional_2_layers, (torch.randn(5, 1, 10)), opset_version=14, export_path=export_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "tags": [
+     "skip-execution"
+    ]
+   },
+   "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
@@ -1334,36 +1394,14 @@
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x1f10758f610>"
+       "<IPython.lib.display.IFrame at 0x7faa78c8acd0>"
       ]
      },
-     "execution_count": 25,
+     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "source": [
-    "import torch\n",
-    "from brevitas.nn import QuantLSTM\n",
-    "from brevitas.export import export_onnx_qcdq\n",
-    "\n",
-    "quant_lstm_weight_only_bidirectional_2_layers = QuantLSTM(\n",
-    "    input_size=10, hidden_size=20, bidirectional=True, num_layers=2, weight_bit_width=4, \n",
-    "    shared_input_hidden_weights=True, shared_intra_layer_weight_quant=True,\n",
-    "    io_quant=None, bias_quant=None, gate_acc_quant=None, sigmoid_quant=None, tanh_quant=None, cell_state_quant=None)\n",
-    "export_path = 'quant_lstm_weight_only_bidirectional_2_layers_shared_q_ih.onnx'\n",
-    "exported_model = export_onnx_qcdq(quant_lstm_weight_only_bidirectional_2_layers, (torch.randn(5, 1, 10)), opset_version=14, export_path=export_path)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "tags": [
-     "skip-execution"
-    ]
-   },
-   "outputs": [],
    "source": [
     "show_netron(export_path, 8087)"
    ]
@@ -1380,8 +1418,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 32,
    "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "from brevitas.nn import QuantLSTM\n",
+    "from brevitas.export import export_qonnx\n",
+    "\n",
+    "quant_lstm = QuantLSTM(input_size=10, hidden_size=20)\n",
+    "export_path = 'quant_lstm.onnx'\n",
+    "exported_model = export_qonnx(quant_lstm, (torch.randn(5, 1, 10)), export_path=export_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {
+    "tags": [
+     "skip-execution"
+    ]
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -1405,33 +1462,14 @@
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x1f107514e50>"
+       "<IPython.lib.display.IFrame at 0x7faa78ab76d0>"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 33,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "source": [
-    "import torch\n",
-    "from brevitas.nn import QuantLSTM\n",
-    "from brevitas.export import export_qonnx\n",
-    "\n",
-    "quant_lstm = QuantLSTM(input_size=10, hidden_size=20)\n",
-    "export_path = 'quant_lstm.onnx'\n",
-    "exported_model = export_qonnx(quant_lstm, (torch.randn(5, 1, 10)), export_path=export_path)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "tags": [
-     "skip-execution"
-    ]
-   },
-   "outputs": [],
    "source": [
     "show_netron(export_path, 8088)"
    ]
@@ -1504,7 +1542,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "pytorch_latest",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -1518,7 +1556,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.5 (default, Oct 25 2019, 15:51:11) \n[GCC 7.3.0]"
+   "version": "3.10.13"
   },
   "vscode": {
    "interpreter": {
@@ -1527,5 +1565,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/docs/tutorials/quant_tensor_quant_conv2d_overview.html b/docs/tutorials/quant_tensor_quant_conv2d_overview.html
index 5d004802a..66bddb864 100644
--- a/docs/tutorials/quant_tensor_quant_conv2d_overview.html
+++ b/docs/tutorials/quant_tensor_quant_conv2d_overview.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>An overview of QuantTensor and QuantConv2d &#8212; Brevitas 0.10.2 documentation</title>
+    <title>An overview of QuantTensor and QuantConv2d &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -129,8 +129,8 @@
       
     
     
-    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
@@ -456,24 +456,24 @@ <h1>An overview of QuantTensor and QuantConv2d<a class="headerlink" href="#An-ov
 <span class="kn">from</span> <span class="nn">brevitas.nn</span> <span class="kn">import</span> <span class="n">QuantConv2d</span>
 <span class="kn">from</span> <span class="nn">brevitas.nn</span> <span class="kn">import</span> <span class="n">QuantIdentity</span>
 <span class="kn">from</span> <span class="nn">IPython.display</span> <span class="kn">import</span> <span class="n">Markdown</span><span class="p">,</span> <span class="n">display</span>
+<span class="kn">import</span> <span class="nn">torch</span>
+
+<span class="c1"># helpers</span>
+<span class="k">def</span> <span class="nf">assert_with_message</span><span class="p">(</span><span class="n">condition</span><span class="p">):</span>
+    <span class="k">assert</span> <span class="n">condition</span>
+    <span class="nb">print</span><span class="p">(</span><span class="n">condition</span><span class="p">)</span>
 
 <span class="k">def</span> <span class="nf">pretty_print_source</span><span class="p">(</span><span class="n">source</span><span class="p">):</span>
     <span class="n">display</span><span class="p">(</span><span class="n">Markdown</span><span class="p">(</span><span class="s1">&#39;```python</span><span class="se">\n</span><span class="s1">&#39;</span> <span class="o">+</span> <span class="n">source</span> <span class="o">+</span> <span class="s1">&#39;</span><span class="se">\n</span><span class="s1">```&#39;</span><span class="p">))</span>
 
+<span class="c1"># set manual seed for the notebook</span>
+<span class="n">torch</span><span class="o">.</span><span class="n">manual_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
+
 <span class="n">source</span> <span class="o">=</span> <span class="n">inspect</span><span class="o">.</span><span class="n">getsource</span><span class="p">(</span><span class="n">QuantConv2d</span><span class="o">.</span><span class="fm">__init__</span><span class="p">)</span>
 <span class="n">pretty_print_source</span><span class="p">(</span><span class="n">source</span><span class="p">)</span>
 </pre></div>
 </div>
 </div>
-<div class="nboutput docutils container">
-<div class="prompt empty docutils container">
-</div>
-<div class="output_area stderr docutils container">
-<div class="highlight"><pre>
-/home/user/.local/lib/python3.7/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
-  from .autonotebook import tqdm as notebook_tqdm
-</pre></div></div>
-</div>
 <div class="nboutput nblast docutils container">
 <div class="prompt empty docutils container">
 </div>
@@ -487,14 +487,23 @@ <h1>An overview of QuantTensor and QuantConv2d<a class="headerlink" href="#An-ov
         <span class="n">padding</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">]]</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span>
         <span class="n">dilation</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">]]</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span>
         <span class="n">groups</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span>
-        <span class="n">bias</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
-        <span class="n">padding_type</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">&#39;standard&#39;</span><span class="p">,</span>
+        <span class="n">padding_mode</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">&#39;zeros&#39;</span><span class="p">,</span>
+        <span class="n">bias</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
         <span class="n">weight_quant</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">WeightQuantType</span><span class="p">]</span> <span class="o">=</span> <span class="n">Int8WeightPerTensorFloat</span><span class="p">,</span>
         <span class="n">bias_quant</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">BiasQuantType</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
         <span class="n">input_quant</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">ActQuantType</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
         <span class="n">output_quant</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">ActQuantType</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
         <span class="n">return_quant_tensor</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
+        <span class="n">device</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">device</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+        <span class="n">dtype</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">dtype</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
         <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+    <span class="c1"># avoid an init error in the super class by setting padding to 0</span>
+    <span class="k">if</span> <span class="n">padding_mode</span> <span class="o">==</span> <span class="s1">&#39;zeros&#39;</span> <span class="ow">and</span> <span class="n">padding</span> <span class="o">==</span> <span class="s1">&#39;same&#39;</span> <span class="ow">and</span> <span class="p">(</span><span class="n">stride</span> <span class="o">&gt;</span> <span class="mi">1</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span>
+            <span class="n">stride</span><span class="p">,</span> <span class="nb">int</span><span class="p">)</span> <span class="k">else</span> <span class="nb">any</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">,</span> <span class="n">stride</span><span class="p">))):</span>
+        <span class="n">padding</span> <span class="o">=</span> <span class="mi">0</span>
+        <span class="n">is_same_padded_strided</span> <span class="o">=</span> <span class="kc">True</span>
+    <span class="k">else</span><span class="p">:</span>
+        <span class="n">is_same_padded_strided</span> <span class="o">=</span> <span class="kc">False</span>
     <span class="n">Conv2d</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span>
         <span class="bp">self</span><span class="p">,</span>
         <span class="n">in_channels</span><span class="o">=</span><span class="n">in_channels</span><span class="p">,</span>
@@ -502,9 +511,12 @@ <h1>An overview of QuantTensor and QuantConv2d<a class="headerlink" href="#An-ov
         <span class="n">kernel_size</span><span class="o">=</span><span class="n">kernel_size</span><span class="p">,</span>
         <span class="n">stride</span><span class="o">=</span><span class="n">stride</span><span class="p">,</span>
         <span class="n">padding</span><span class="o">=</span><span class="n">padding</span><span class="p">,</span>
+        <span class="n">padding_mode</span><span class="o">=</span><span class="n">padding_mode</span><span class="p">,</span>
         <span class="n">dilation</span><span class="o">=</span><span class="n">dilation</span><span class="p">,</span>
         <span class="n">groups</span><span class="o">=</span><span class="n">groups</span><span class="p">,</span>
-        <span class="n">bias</span><span class="o">=</span><span class="n">bias</span><span class="p">)</span>
+        <span class="n">bias</span><span class="o">=</span><span class="n">bias</span><span class="p">,</span>
+        <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">,</span>
+        <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
     <span class="n">QuantWBIOL</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span>
         <span class="bp">self</span><span class="p">,</span>
         <span class="n">weight_quant</span><span class="o">=</span><span class="n">weight_quant</span><span class="p">,</span>
@@ -513,9 +525,7 @@ <h1>An overview of QuantTensor and QuantConv2d<a class="headerlink" href="#An-ov
         <span class="n">output_quant</span><span class="o">=</span><span class="n">output_quant</span><span class="p">,</span>
         <span class="n">return_quant_tensor</span><span class="o">=</span><span class="n">return_quant_tensor</span><span class="p">,</span>
         <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
-    <span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">padding_mode</span> <span class="o">==</span> <span class="s1">&#39;zeros&#39;</span>
-    <span class="k">assert</span> <span class="ow">not</span> <span class="p">(</span><span class="n">padding_type</span> <span class="o">==</span> <span class="s1">&#39;same&#39;</span> <span class="ow">and</span> <span class="n">padding</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">)</span>
-    <span class="bp">self</span><span class="o">.</span><span class="n">padding_type</span> <span class="o">=</span> <span class="n">padding_type</span>
+    <span class="bp">self</span><span class="o">.</span><span class="n">is_same_padded_strided</span> <span class="o">=</span> <span class="n">is_same_padded_strided</span>
 </pre></div>
 </div>
 </div>
@@ -538,10 +548,10 @@ <h1>An overview of QuantTensor and QuantConv2d<a class="headerlink" href="#An-ov
 <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[3]:
 </pre></div>
 </div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;Is weight quant enabled: </span><span class="si">{</span><span class="n">default_quant_conv</span><span class="o">.</span><span class="n">is_weight_quant_enabled</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
-<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;Is bias quant enabled: </span><span class="si">{</span><span class="n">default_quant_conv</span><span class="o">.</span><span class="n">is_bias_quant_enabled</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
-<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;Is input quant enabled: </span><span class="si">{</span><span class="n">default_quant_conv</span><span class="o">.</span><span class="n">is_input_quant_enabled</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
-<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;Is output quant enabled: </span><span class="si">{</span><span class="n">default_quant_conv</span><span class="o">.</span><span class="n">is_output_quant_enabled</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;Is weight quant enabled: </span><span class="si">{</span><span class="n">default_quant_conv</span><span class="o">.</span><span class="n">weight_quant</span><span class="o">.</span><span class="n">is_quant_enabled</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
+<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;Is bias quant enabled: </span><span class="si">{</span><span class="n">default_quant_conv</span><span class="o">.</span><span class="n">bias_quant</span><span class="o">.</span><span class="n">is_quant_enabled</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
+<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;Is input quant enabled: </span><span class="si">{</span><span class="n">default_quant_conv</span><span class="o">.</span><span class="n">input_quant</span><span class="o">.</span><span class="n">is_quant_enabled</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
+<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;Is output quant enabled: </span><span class="si">{</span><span class="n">default_quant_conv</span><span class="o">.</span><span class="n">output_quant</span><span class="o">.</span><span class="n">is_quant_enabled</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
 </pre></div>
 </div>
 </div>
@@ -568,28 +578,40 @@ <h1>An overview of QuantTensor and QuantConv2d<a class="headerlink" href="#An-ov
 </pre></div>
 </div>
 </div>
+<div class="nboutput docutils container">
+<div class="prompt empty docutils container">
+</div>
+<div class="output_area stderr docutils container">
+<div class="highlight"><pre>
+/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/torch/_tensor.py:1255: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at /opt/conda/conda-bld/pytorch_1670525541990/work/c10/core/TensorImpl.h:1758.)
+  return super(Tensor, self).rename(names)
+[W NNPACK.cpp:53] Could not initialize NNPACK! Reason: Unsupported hardware.
+/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/torch/nn/modules/conv.py:459: UserWarning: Defining your `__torch_function__` as a plain method is deprecated and will be an error in future, please define it as a classmethod. (Triggered internally at /opt/conda/conda-bld/pytorch_1670525541990/work/torch/csrc/utils/python_arg_parser.cpp:350.)
+  return F.conv2d(input, weight, bias, self.stride,
+</pre></div></div>
+</div>
 <div class="nboutput nblast docutils container">
 <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[4]:
 </pre></div>
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-tensor([[[[-0.2594,  0.5392,  0.5916],
-          [ 0.3493,  0.6813,  0.2499],
-          [ 1.3732,  0.1229, -0.0084]],
+tensor([[[[ 0.2908, -0.1793, -0.9610],
+          [-0.6542, -0.3532,  0.6361],
+          [ 1.0290,  0.2730,  0.0969]],
 
-         [[ 0.0031, -0.1702,  0.1069],
-          [-0.8181, -0.8056,  0.0385],
-          [-0.4738,  0.0589,  0.1278]],
+         [[-0.3479,  0.6030,  0.4900],
+          [ 0.1607,  0.3547, -0.4283],
+          [-0.6696,  0.0652,  0.7300]],
 
-         [[-0.1718, -0.1162, -0.1526],
-          [-0.9903, -0.3541,  0.1645],
-          [ 0.0557, -0.4458, -0.2080]]]], grad_fn=&lt;ThnnConv2DBackward0&gt;)
+         [[-0.0769, -0.2424,  0.1860],
+          [ 0.1740, -0.1182, -0.7017],
+          [ 0.0963,  0.2375, -0.9439]]]], grad_fn=&lt;ConvolutionBackward0&gt;)
 </pre></div></div>
 </div>
 <p>In this case we are computing the convolution between an unquantized input tensor and quantized weights, so the output in general is unquantized.</p>
 <p>A QuantConv2d with quantization disabled everywhere behaves like a standard <code class="docutils literal notranslate"><span class="pre">Conv2d</span></code>. Again can easily verify this:</p>
-<div class="nbinput nblast docutils container">
+<div class="nbinput docutils container">
 <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[5]:
 </pre></div>
 </div>
@@ -602,10 +624,18 @@ <h1>An overview of QuantTensor and QuantConv2d<a class="headerlink" href="#An-ov
 <span class="n">float_conv</span> <span class="o">=</span> <span class="n">Conv2d</span><span class="p">(</span>
     <span class="n">in_channels</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">out_channels</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">),</span> <span class="n">bias</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
 <span class="n">inp</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span>
-<span class="k">assert</span> <span class="n">torch</span><span class="o">.</span><span class="n">isclose</span><span class="p">(</span><span class="n">disabled_quant_conv</span><span class="p">(</span><span class="n">inp</span><span class="p">),</span> <span class="n">float_conv</span><span class="p">(</span><span class="n">inp</span><span class="p">))</span><span class="o">.</span><span class="n">all</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">()</span>
+<span class="n">assert_with_message</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">isclose</span><span class="p">(</span><span class="n">disabled_quant_conv</span><span class="p">(</span><span class="n">inp</span><span class="p">),</span> <span class="n">float_conv</span><span class="p">(</span><span class="n">inp</span><span class="p">))</span><span class="o">.</span><span class="n">all</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">())</span>
 </pre></div>
 </div>
 </div>
+<div class="nboutput nblast docutils container">
+<div class="prompt empty docutils container">
+</div>
+<div class="output_area docutils container">
+<div class="highlight"><pre>
+True
+</pre></div></div>
+</div>
 <div class="line-block">
 <div class="line">As we have just seen, Brevitas allows users as much freedom as possible to experiment with quantization, meaning that computation between quantized and unquantized values is considered legal. This allows users to mix Brevitas layers with Pytorch layers with little restrictions.</div>
 <div class="line">To make this possible, quantized values are typically represented in <em>dequantized format</em>, meaning that - in the case of affine quantization implemented in Brevitas - zero-point and scale factor are applied to their integer values according to the formula <strong>quant_value = (integer_value - zero_point) * scale</strong>.</div>
@@ -627,71 +657,85 @@ <h2>QuantTensor<a class="headerlink" href="#QuantTensor" title="Permalink to thi
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-QuantTensor(value=tensor([[[[-0.0790,  0.0503, -0.0934],
-          [-0.1149, -0.1903, -0.1329],
-          [-0.1813,  0.0108,  0.0593]],
+IntQuantTensor(value=tensor([[[[-0.0018,  0.1273, -0.1937],
+          [-0.1734, -0.0904,  0.0627],
+          [-0.0055,  0.1863, -0.0203]],
 
-         [[ 0.0970, -0.0215, -0.0144],
-          [ 0.2280,  0.1239, -0.0090],
-          [ 0.1957, -0.2011, -0.0108]]],
+         [[ 0.0627, -0.0720, -0.0461],
+          [-0.2251, -0.1568, -0.0978],
+          [ 0.0092,  0.0941,  0.1421]]],
 
 
-        [[[-0.0018, -0.1957,  0.1993],
-          [-0.0359,  0.1778, -0.1400],
-          [ 0.0916,  0.1059,  0.2173]],
+        [[[-0.1605, -0.1033,  0.0849],
+          [ 0.1956, -0.0480,  0.1771],
+          [-0.0387,  0.0258,  0.2140]],
 
-         [[-0.1670,  0.1939, -0.2191],
-          [-0.0215,  0.1688, -0.1383],
-          [-0.0449, -0.1185,  0.1742]]],
+         [[-0.2196, -0.1476, -0.0590],
+          [-0.0923,  0.2030, -0.1531],
+          [-0.1089, -0.1642, -0.2214]]],
 
 
-        [[[-0.0808, -0.1652, -0.0233],
-          [-0.0700,  0.0467, -0.0485],
-          [ 0.1059,  0.1418,  0.1077]],
+        [[[-0.1384,  0.2030,  0.1052],
+          [ 0.1144,  0.0129, -0.1199],
+          [ 0.0406, -0.2196, -0.1697]],
 
-         [[-0.0593,  0.0108,  0.0036],
-          [-0.1508,  0.0808,  0.1616],
-          [ 0.0144, -0.0287, -0.1365]]]], grad_fn=&lt;MulBackward0&gt;), scale=tensor(0.0018, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))
+         [[-0.1218,  0.1494,  0.1384],
+          [-0.1052, -0.0092,  0.1513],
+          [ 0.2343,  0.0941,  0.0314]]]], grad_fn=&lt;MulBackward0&gt;), scale=tensor(0.0018, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))
 </pre></div></div>
 </div>
 <p>Notice how the quantized weights are wrapped in a data structure implemented by Brevitas called <code class="docutils literal notranslate"><span class="pre">QuantTensor</span></code>. A <code class="docutils literal notranslate"><span class="pre">QuantTensor</span></code> is a way to represent an affine quantized tensor with all its metadata, meaning: the <code class="docutils literal notranslate"><span class="pre">value</span></code> of the quantized tensor in <em>dequantized</em> format, <code class="docutils literal notranslate"><span class="pre">scale</span></code>, <code class="docutils literal notranslate"><span class="pre">zero_point</span></code>, <code class="docutils literal notranslate"><span class="pre">bit_width</span></code>, whether the quantized value it’s <code class="docutils literal notranslate"><span class="pre">signed</span></code> or not, and whether the tensor was generated in <code class="docutils literal notranslate"><span class="pre">training</span></code> mode.</p>
 <p>As expected, we have that the quantized value (in dequantized format) can be computer from its integer representation, together with zero-point and scale:</p>
-<div class="nbinput nblast docutils container">
+<div class="nbinput docutils container">
 <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[7]:
 </pre></div>
 </div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">int_weight</span> <span class="o">=</span> <span class="n">default_quant_conv</span><span class="o">.</span><span class="n">int_weight</span><span class="p">()</span>
-<span class="n">zero_point</span> <span class="o">=</span> <span class="n">default_quant_conv</span><span class="o">.</span><span class="n">quant_weight_zero_point</span><span class="p">()</span>
-<span class="n">scale</span> <span class="o">=</span> <span class="n">default_quant_conv</span><span class="o">.</span><span class="n">quant_weight_scale</span><span class="p">()</span>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">int_weight</span> <span class="o">=</span> <span class="n">default_quant_conv</span><span class="o">.</span><span class="n">quant_weight</span><span class="p">()</span><span class="o">.</span><span class="n">int</span><span class="p">()</span>
+<span class="n">zero_point</span> <span class="o">=</span> <span class="n">default_quant_conv</span><span class="o">.</span><span class="n">weight_quant</span><span class="o">.</span><span class="n">zero_point</span><span class="p">()</span>
+<span class="n">scale</span> <span class="o">=</span> <span class="n">default_quant_conv</span><span class="o">.</span><span class="n">weight_quant</span><span class="o">.</span><span class="n">scale</span><span class="p">()</span>
 <span class="n">quant_weight_manually</span> <span class="o">=</span> <span class="p">(</span><span class="n">int_weight</span> <span class="o">-</span> <span class="n">zero_point</span><span class="p">)</span> <span class="o">*</span> <span class="n">scale</span>
 
-<span class="k">assert</span> <span class="n">default_quant_conv</span><span class="o">.</span><span class="n">quant_weight</span><span class="p">()</span><span class="o">.</span><span class="n">value</span><span class="o">.</span><span class="n">isclose</span><span class="p">(</span><span class="n">quant_weight_manually</span><span class="p">)</span><span class="o">.</span><span class="n">all</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">()</span>
+<span class="n">assert_with_message</span><span class="p">(</span><span class="n">default_quant_conv</span><span class="o">.</span><span class="n">quant_weight</span><span class="p">()</span><span class="o">.</span><span class="n">value</span><span class="o">.</span><span class="n">isclose</span><span class="p">(</span><span class="n">quant_weight_manually</span><span class="p">)</span><span class="o">.</span><span class="n">all</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">())</span>
 </pre></div>
 </div>
 </div>
+<div class="nboutput nblast docutils container">
+<div class="prompt empty docutils container">
+</div>
+<div class="output_area docutils container">
+<div class="highlight"><pre>
+True
+</pre></div></div>
+</div>
 <p>A <em>valid</em> QuantTensor correctly populates all its fields with values <code class="docutils literal notranslate"><span class="pre">!=</span> <span class="pre">None</span></code> and respect the <strong>affine quantization invariant</strong>, i.e. <code class="docutils literal notranslate"><span class="pre">value</span> <span class="pre">/</span> <span class="pre">scale</span> <span class="pre">+</span> <span class="pre">zero_point</span></code> is (accounting for rounding errors) an <em>integer</em> that can be represented within the interval defined by the <code class="docutils literal notranslate"><span class="pre">bit_width</span></code> and <code class="docutils literal notranslate"><span class="pre">signed</span></code> fields of the <code class="docutils literal notranslate"><span class="pre">QuantTensor</span></code>. A <em>non-valid</em> one doesn’t. We can observe that the quantized weights are indeed marked as valid:</p>
-<div class="nbinput nblast docutils container">
+<div class="nbinput docutils container">
 <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[8]:
 </pre></div>
 </div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="k">assert</span> <span class="n">default_quant_conv</span><span class="o">.</span><span class="n">quant_weight</span><span class="p">()</span><span class="o">.</span><span class="n">is_valid</span>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">assert_with_message</span><span class="p">(</span><span class="n">default_quant_conv</span><span class="o">.</span><span class="n">quant_weight</span><span class="p">()</span><span class="o">.</span><span class="n">is_valid</span><span class="p">)</span>
 </pre></div>
 </div>
 </div>
+<div class="nboutput nblast docutils container">
+<div class="prompt empty docutils container">
+</div>
+<div class="output_area docutils container">
+<div class="highlight"><pre>
+True
+</pre></div></div>
+</div>
 <p>Calling <code class="docutils literal notranslate"><span class="pre">is_valid</span></code> is relative expensive, so it should be using sparingly, but there are a few cases where a non-valid QuantTensor might be generated that is important to be aware of. Say we have two QuantTensor as output of the same quantized activation, and we want to sum them together:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[10]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[9]:
 </pre></div>
 </div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">brevitas.quant_tensor</span> <span class="kn">import</span> <span class="n">QuantTensor</span>
-
-<span class="n">quant_act</span> <span class="o">=</span> <span class="n">QuantIdentity</span><span class="p">(</span><span class="n">return_quant_tensor</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">quant_act</span> <span class="o">=</span> <span class="n">QuantIdentity</span><span class="p">(</span><span class="n">return_quant_tensor</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
 
 <span class="n">out_tensor_0</span> <span class="o">=</span> <span class="n">quant_act</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">5</span><span class="p">,</span><span class="mi">5</span><span class="p">))</span>
 <span class="n">out_tensor_1</span> <span class="o">=</span> <span class="n">quant_act</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">5</span><span class="p">,</span><span class="mi">5</span><span class="p">))</span>
 
-<span class="k">assert</span> <span class="n">out_tensor_0</span><span class="o">.</span><span class="n">is_valid</span>
-<span class="k">assert</span> <span class="n">out_tensor_1</span><span class="o">.</span><span class="n">is_valid</span>
+<span class="n">assert_with_message</span><span class="p">(</span><span class="n">out_tensor_0</span><span class="o">.</span><span class="n">is_valid</span><span class="p">)</span>
+<span class="n">assert_with_message</span><span class="p">(</span><span class="n">out_tensor_1</span><span class="o">.</span><span class="n">is_valid</span><span class="p">)</span>
 <span class="nb">print</span><span class="p">(</span><span class="n">out_tensor_0</span><span class="o">.</span><span class="n">scale</span><span class="p">)</span>
 <span class="nb">print</span><span class="p">(</span><span class="n">out_tensor_1</span><span class="o">.</span><span class="n">scale</span><span class="p">)</span>
 </pre></div>
@@ -702,52 +746,61 @@ <h2>QuantTensor<a class="headerlink" href="#QuantTensor" title="Permalink to thi
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-tensor(0.0173, grad_fn=&lt;DivBackward0&gt;)
-tensor(0.0307, grad_fn=&lt;DivBackward0&gt;)
+True
+True
+tensor(0.0211, grad_fn=&lt;DivBackward0&gt;)
+tensor(0.0162, grad_fn=&lt;DivBackward0&gt;)
 </pre></div></div>
 </div>
 <p>Both QuantTensor are valid but since the quantized activation is in training mode by default, their scale factors are going to be different. It is important to note that the behaviour is different at evaluation time, where the two scale factors will be the same.</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[11]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[10]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">out_tensor</span> <span class="o">=</span> <span class="n">out_tensor_0</span> <span class="o">+</span> <span class="n">out_tensor_1</span>
-<span class="n">out_tensor</span>
+<span class="nb">print</span><span class="p">(</span><span class="n">out_tensor</span><span class="p">)</span>
 </pre></div>
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[11]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-QuantTensor(value=tensor([[[[ 0.9489, -0.9111, -0.0536,  0.5788,  0.3645],
-          [ 0.3401,  1.4325,  0.6498,  0.6411, -1.4390],
-          [-1.9029,  0.7012,  0.1591,  1.9235,  0.5883],
-          [-2.7258,  2.5330,  0.9165, -0.0820,  3.4148],
-          [-0.3651,  1.0164,  0.9567, -0.2758, -1.1376]],
-
-         [[-0.2414,  2.2111, -1.9124, -2.3814, -0.8805],
-          [ 1.3191, -0.8965, -0.2048, -3.8113,  1.1142],
-          [-0.3381, -0.2238,  1.2661,  0.0068,  0.2567],
-          [ 0.0731, -0.4280,  0.0909,  0.0875, -1.6851],
-          [-0.7744, -1.4127, -0.8143,  1.3557, -0.2802]]]],
-       grad_fn=&lt;AddBackward0&gt;), scale=tensor(0.0240, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(9.), signed_t=tensor(True), training_t=tensor(True))
+IntQuantTensor(value=tensor([[[[-0.1106,  1.1945, -0.4972, -2.0968,  0.7175],
+          [-2.5901,  0.0588, -0.2014,  2.1486,  1.6435],
+          [ 0.9067, -2.5212,  2.2193,  0.2352, -0.8395],
+          [-0.8351,  0.6341, -0.5551,  0.1040, -3.3151],
+          [-0.8979, -0.7092,  3.8232,  1.0875,  0.3954]],
+
+         [[ 1.4363, -1.3973,  1.3249,  2.6914,  0.3660],
+          [ 1.5057,  1.8094,  0.5100, -1.6874,  1.9981],
+          [ 1.2472, -1.7813,  0.0334, -1.2880, -2.9333],
+          [ 0.0180, -1.4298, -2.9978,  0.5494, -1.4548],
+          [ 1.6738, -0.3177, -0.3721, -0.1650, -1.1871]]]],
+       grad_fn=&lt;AddBackward0&gt;), scale=0.018651068210601807, zero_point=0.0, bit_width=9.0, signed_t=True, training_t=True)
 </pre></div></div>
 </div>
 <p>Because we set <code class="docutils literal notranslate"><span class="pre">training</span></code> to <code class="docutils literal notranslate"><span class="pre">True</span></code> for both of them, we are allowed to sum them even if they have different scale factors. The output QuantTensor will have the correct <code class="docutils literal notranslate"><span class="pre">bit_width</span></code>, and a scale which is the average of the two original scale factors. This is done only at training time, in order to propagate gradient information, however the consequence is that the resulting QuantTensor is no longer valid:</p>
-<div class="nbinput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[12]:
+<div class="nbinput docutils container">
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[11]:
 </pre></div>
 </div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="k">assert</span> <span class="ow">not</span> <span class="n">out_tensor</span><span class="o">.</span><span class="n">is_valid</span>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">assert_with_message</span><span class="p">(</span><span class="ow">not</span> <span class="n">out_tensor</span><span class="o">.</span><span class="n">is_valid</span><span class="p">)</span>
 </pre></div>
 </div>
 </div>
+<div class="nboutput nblast docutils container">
+<div class="prompt empty docutils container">
+</div>
+<div class="output_area docutils container">
+<div class="highlight"><pre>
+True
+</pre></div></div>
+</div>
 <p><code class="docutils literal notranslate"><span class="pre">QuantTensor</span></code> implements <code class="docutils literal notranslate"><span class="pre">__torch_function__</span></code> to handle being called from torch functional operators (e.g. ops under <code class="docutils literal notranslate"><span class="pre">torch.nn.functional</span></code>). Passing a QuantTensor to supported ops that are invariant to quantization, e.g. max-pooling, preserve the the validity of a QuantTensor. Example:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[108]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[12]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">torch</span>
@@ -759,50 +812,59 @@ <h2>QuantTensor<a class="headerlink" href="#QuantTensor" title="Permalink to thi
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[108]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[12]:
 </pre></div>
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-QuantTensor(value=tensor([[[[1.5800, 1.0157],
-          [1.4445, 0.8577]],
+IntQuantTensor(value=tensor([[[[0.5191, 0.6402],
+          [2.1455, 0.5883]],
 
-         [[0.5643, 1.2414],
-          [1.0383, 0.9028]],
+         [[2.0417, 0.5883],
+          [1.2631, 0.3980]],
 
-         [[0.5191, 0.6546],
-          [2.1442, 0.5868]]]], grad_fn=&lt;MaxPool2DWithIndicesBackward0&gt;), scale=tensor(0.0226, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))
+         [[0.7959, 0.5191],
+          [0.8132, 1.3496]]]], grad_fn=&lt;MaxPool2DWithIndicesBackward0&gt;), scale=tensor(0.0173, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))
 </pre></div></div>
 </div>
 <p>For ops that are not invariant to quantization, a <code class="docutils literal notranslate"><span class="pre">QuantTensor</span></code> decays into a floating-point <code class="docutils literal notranslate"><span class="pre">torch.Tensor</span></code>. Example:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[109]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[13]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">torch</span><span class="o">.</span><span class="n">tanh</span><span class="p">(</span><span class="n">quant_tensor</span><span class="p">)</span>
 </pre></div>
 </div>
 </div>
+<div class="nboutput docutils container">
+<div class="prompt empty docutils container">
+</div>
+<div class="output_area stderr docutils container">
+<div class="highlight"><pre>
+/tmp/ipykernel_81376/1377665000.py:1: UserWarning: Defining your `__torch_function__` as a plain method is deprecated and will be an error in future, please define it as a classmethod. (Triggered internally at /opt/conda/conda-bld/pytorch_1670525541990/work/torch/csrc/utils/python_arg_parser.cpp:350.)
+  torch.tanh(quant_tensor)
+</pre></div></div>
+</div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[109]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[13]:
 </pre></div>
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-tensor([[[[-0.4943, -0.9938, -0.9073,  0.7681],
-          [-0.3262,  0.9186,  0.1786,  0.3659],
-          [ 0.7489,  0.8946, -0.0451, -0.5594],
-          [-0.1346, -0.4943, -0.4770,  0.6951]],
-
-         [[ 0.0676,  0.5111,  0.4943,  0.8459],
-          [-0.8990, -0.9426,  0.0676, -0.7945],
-          [-0.9220,  0.0676, -0.5594,  0.6321],
-          [-0.0676,  0.7772,  0.7177, -0.4414]],
-
-         [[ 0.4770,  0.2220,  0.0676,  0.5747],
-          [-0.0451, -0.6710, -0.4594, -0.3462],
-          [ 0.9729, -0.7177, -0.5896, -0.5276],
-          [-0.0900,  0.8852,  0.5276, -0.4414]]]], grad_fn=&lt;TanhBackward0&gt;)
+tensor([[[[ 0.4770,  0.2212,  0.0691,  0.5650],
+          [-0.0346, -0.6618, -0.4635, -0.3482],
+          [ 0.9730, -0.7245, -0.5881, -0.5287],
+          [-0.0863,  0.8857,  0.5287, -0.4498]],
+
+         [[ 0.9669,  0.5650, -0.6211, -0.4498],
+          [-0.2376,  0.6103,  0.5287,  0.2700],
+          [-0.6808,  0.8519,  0.2700, -0.5531],
+          [-0.0173,  0.8264,  0.3782, -0.1881]],
+
+         [[-0.6211, -0.9764, -0.5993,  0.4770],
+          [ 0.5033,  0.6618, -0.1881, -0.6211],
+          [-0.8031,  0.1375,  0.5287,  0.8740],
+          [-0.6714,  0.6714, -0.5650,  0.8611]]]], grad_fn=&lt;TanhBackward0&gt;)
 </pre></div></div>
 </div>
 </section>
@@ -810,7 +872,7 @@ <h2>QuantTensor<a class="headerlink" href="#QuantTensor" title="Permalink to thi
 <h2>Input Quantization<a class="headerlink" href="#Input-Quantization" title="Permalink to this heading">#</a></h2>
 <p>We can obtain a valid output <code class="docutils literal notranslate"><span class="pre">QuantTensor</span></code> by making sure that both input and weight of <code class="docutils literal notranslate"><span class="pre">QuantConv2d</span></code> are quantized. To do so, we can set a quantizer for <code class="docutils literal notranslate"><span class="pre">input_quant</span></code>. In this example we pick a <em>signed 8-bit</em> quantizer with <em>per-tensor floating-point scale factor</em>:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[110]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[14]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">brevitas.quant.scaled_int</span> <span class="kn">import</span> <span class="n">Int8ActPerTensorFloat</span>
@@ -824,35 +886,34 @@ <h2>Input Quantization<a class="headerlink" href="#Input-Quantization" title="Pe
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[110]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[14]:
 </pre></div>
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-QuantTensor(value=tensor([[[[ 0.9693, -0.9431,  0.2459],
-          [ 0.5416,  0.9037, -0.5278],
-          [-0.6207, -1.3578, -0.4815]],
+IntQuantTensor(value=tensor([[[[-0.3568, -0.1883,  0.3589],
+          [-0.4470,  0.1039, -0.3945],
+          [-0.4190,  0.3723,  0.8384]],
 
-         [[ 0.4551, -1.4065,  0.8889],
-          [-0.3393,  0.0803, -0.1748],
-          [-0.0977,  0.6284, -0.7193]],
+         [[-0.0510,  0.5514, -0.2751],
+          [-0.5668,  0.5824,  0.2328],
+          [ 0.1316, -0.2518,  1.0418]],
 
-         [[ 0.3655,  0.7626, -0.2634],
-          [-0.3453,  0.3349,  0.1923],
-          [ 0.5993, -0.9579,  0.3557]]]], grad_fn=&lt;ThnnConv2DBackward0&gt;), scale=tensor([[[[3.2208e-05]]]], grad_fn=&lt;MulBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(21.), signed_t=tensor(True), training_t=tensor(True))
+         [[ 0.2734,  0.7268, -0.0249],
+          [-0.1732,  0.5197,  1.1158],
+          [ 0.3771, -0.3810,  0.2008]]]], grad_fn=&lt;ConvolutionBackward0&gt;), scale=tensor([[[[3.1958e-05]]]], grad_fn=&lt;MulBackward0&gt;), zero_point=tensor([0.]), bit_width=tensor(21.), signed_t=tensor(True), training_t=tensor(True))
 </pre></div></div>
 </div>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[111]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[15]:
 </pre></div>
 </div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="k">assert</span> <span class="n">out_tensor</span><span class="o">.</span><span class="n">is_valid</span>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">assert_with_message</span><span class="p">(</span><span class="n">out_tensor</span><span class="o">.</span><span class="n">is_valid</span><span class="p">)</span>
 </pre></div>
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[111]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
@@ -864,17 +925,17 @@ <h2>Input Quantization<a class="headerlink" href="#Input-Quantization" title="Pe
 the affine quantization invariant is always respected.</p>
 <p>We could have obtained a similar result by directly passing as input a QuantTensor. In this example we are directly defining a QuantTensor ourselves, but it could also be the output of a previous layer.</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[112]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[17]:
 </pre></div>
 </div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">brevitas.quant_tensor</span> <span class="kn">import</span> <span class="n">QuantTensor</span>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">brevitas.quant_tensor</span> <span class="kn">import</span> <span class="n">IntQuantTensor</span>
 
 <span class="n">scale</span> <span class="o">=</span> <span class="mf">0.0001</span>
 <span class="n">bit_width</span> <span class="o">=</span> <span class="mi">8</span>
 <span class="n">zero_point</span> <span class="o">=</span> <span class="mf">0.</span>
 <span class="n">int_value</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="n">low</span><span class="o">=-</span> <span class="mi">2</span> <span class="o">**</span> <span class="p">(</span><span class="n">bit_width</span> <span class="o">-</span> <span class="mi">1</span><span class="p">),</span> <span class="n">high</span><span class="o">=</span><span class="mi">2</span> <span class="o">**</span> <span class="p">(</span><span class="n">bit_width</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">5</span><span class="p">))</span>
 <span class="n">quant_value</span> <span class="o">=</span> <span class="p">(</span><span class="n">int_value</span> <span class="o">-</span> <span class="n">zero_point</span><span class="p">)</span> <span class="o">*</span> <span class="n">scale</span>
-<span class="n">quant_tensor_input</span> <span class="o">=</span> <span class="n">QuantTensor</span><span class="p">(</span>
+<span class="n">quant_tensor_input</span> <span class="o">=</span> <span class="n">IntQuantTensor</span><span class="p">(</span>
     <span class="n">quant_value</span><span class="p">,</span>
     <span class="n">scale</span><span class="o">=</span><span class="n">torch</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="n">scale</span><span class="p">),</span>
     <span class="n">zero_point</span><span class="o">=</span><span class="n">torch</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="n">zero_point</span><span class="p">),</span>
@@ -886,35 +947,34 @@ <h2>Input Quantization<a class="headerlink" href="#Input-Quantization" title="Pe
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[112]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[17]:
 </pre></div>
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-QuantTensor(value=tensor([[[[ 5.7000e-03,  2.5000e-03, -1.2400e-02, -7.2000e-03,  3.7000e-03],
-          [-2.3000e-03,  7.0000e-04, -1.2700e-02,  5.2000e-03,  4.0000e-04],
-          [-7.9000e-03,  9.5000e-03,  6.6000e-03,  5.4000e-03,  2.5000e-03],
-          [ 1.1100e-02,  2.4000e-03,  1.0000e-02, -3.7000e-03,  7.2000e-03],
-          [-1.1500e-02, -5.8000e-03, -9.3000e-03,  1.0000e-02,  3.5000e-03]],
-
-         [[-6.8000e-03,  1.1500e-02, -1.0600e-02, -1.5000e-03, -1.9000e-03],
-          [ 2.9000e-03,  9.5000e-03,  7.2000e-03, -3.7000e-03,  7.7000e-03],
-          [-2.4000e-03, -8.9000e-03, -1.2000e-02, -8.1000e-03,  7.2000e-03],
-          [-1.1300e-02, -9.7000e-03, -1.0000e-03,  1.0100e-02,  3.8000e-03],
-          [-1.1900e-02,  6.9000e-03,  8.3000e-03,  1.0000e-04, -6.9000e-03]]]]), scale=tensor(1.0000e-04), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))
+IntQuantTensor(value=tensor([[[[ 7.2000e-03, -3.7000e-03,  7.7000e-03, -2.4000e-03, -8.9000e-03],
+          [-1.2000e-02, -8.1000e-03,  7.2000e-03, -1.1300e-02, -9.7000e-03],
+          [-1.0000e-03,  1.0100e-02,  3.8000e-03, -1.1900e-02,  6.9000e-03],
+          [ 8.3000e-03,  1.0000e-04, -6.9000e-03,  3.9000e-03, -5.4000e-03],
+          [ 1.1300e-02, -6.0000e-03,  9.7000e-03,  0.0000e+00,  1.0900e-02]],
+
+         [[-1.0900e-02,  1.1400e-02, -6.4000e-03,  9.2000e-03,  7.1000e-03],
+          [-6.0000e-04,  9.2000e-03, -8.5000e-03,  5.0000e-03,  6.5000e-03],
+          [-8.3000e-03, -1.2000e-03,  7.4000e-03,  9.2000e-03, -6.0000e-04],
+          [-2.1000e-03,  9.5000e-03,  3.0000e-04, -2.9000e-03, -6.5000e-03],
+          [-1.1800e-02, -4.8000e-03,  5.4000e-03, -2.5000e-03,  9.0000e-04]]]]), scale=tensor(1.0000e-04), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))
 </pre></div></div>
 </div>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[113]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
 </pre></div>
 </div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="k">assert</span> <span class="n">quant_tensor_input</span><span class="o">.</span><span class="n">is_valid</span>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">assert_with_message</span><span class="p">(</span><span class="n">quant_tensor_input</span><span class="o">.</span><span class="n">is_valid</span><span class="p">)</span>
 </pre></div>
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[113]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
@@ -924,7 +984,7 @@ <h2>Input Quantization<a class="headerlink" href="#Input-Quantization" title="Pe
 <p><strong>Note</strong>: how we are explicitly forcing <code class="docutils literal notranslate"><span class="pre">value</span></code>, <code class="docutils literal notranslate"><span class="pre">scale</span></code>, <code class="docutils literal notranslate"><span class="pre">zero_point</span></code> and <code class="docutils literal notranslate"><span class="pre">bit_width</span></code> to be floating-point <code class="docutils literal notranslate"><span class="pre">torch.Tensor</span></code>, as this is expected by Brevitas but it’s currently not enforced automatically at initialization time.</p>
 <p>If we now pass in <code class="docutils literal notranslate"><span class="pre">quant_tensor_input</span></code> to <code class="docutils literal notranslate"><span class="pre">return_quant_conv</span></code>, we will see that indeed the output is a valid 21-bit <code class="docutils literal notranslate"><span class="pre">QuantTensor</span></code>:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[114]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">return_quant_conv</span> <span class="o">=</span> <span class="n">QuantConv2d</span><span class="p">(</span>
@@ -935,35 +995,33 @@ <h2>Input Quantization<a class="headerlink" href="#Input-Quantization" title="Pe
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[114]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-QuantTensor(value=tensor([[[[ 0.0085,  0.0066,  0.0050],
-          [-0.0038, -0.0009, -0.0115],
-          [-0.0055, -0.0037,  0.0009]],
+IntQuantTensor(value=tensor([[[[-0.0019,  0.0049, -0.0012],
+          [-0.0012,  0.0050, -0.0074],
+          [-0.0023, -0.0035, -0.0033]],
 
-         [[ 0.0015, -0.0027, -0.0079],
-          [-0.0034, -0.0060,  0.0043],
-          [-0.0008,  0.0052, -0.0033]],
+         [[-0.0031,  0.0028,  0.0116],
+          [ 0.0079,  0.0046,  0.0022],
+          [ 0.0021, -0.0004,  0.0011]],
 
-         [[-0.0015,  0.0082, -0.0038],
-          [-0.0021,  0.0004, -0.0054],
-          [-0.0021, -0.0079,  0.0013]]]], grad_fn=&lt;ThnnConv2DBackward0&gt;), scale=tensor([[[[1.8448e-07]]]], grad_fn=&lt;MulBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(21.), signed_t=tensor(True), training_t=tensor(True))
+         [[-0.0045, -0.0010,  0.0002],
+          [-0.0044,  0.0027,  0.0025],
+          [-0.0009,  0.0040, -0.0044]]]], grad_fn=&lt;ConvolutionBackward0&gt;), scale=tensor([[[[1.8307e-07]]]], grad_fn=&lt;MulBackward0&gt;), zero_point=tensor([0.]), bit_width=tensor(21.), signed_t=tensor(True), training_t=tensor(True))
 </pre></div></div>
 </div>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[115]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
 </pre></div>
 </div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="k">assert</span> <span class="n">out_tensor</span><span class="o">.</span><span class="n">is_valid</span>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">assert_with_message</span><span class="p">(</span><span class="n">out_tensor</span><span class="o">.</span><span class="n">is_valid</span><span class="p">)</span>
 </pre></div>
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[115]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
@@ -972,7 +1030,7 @@ <h2>Input Quantization<a class="headerlink" href="#Input-Quantization" title="Pe
 </div>
 <p>We can also pass in an input <code class="docutils literal notranslate"><span class="pre">QuantTensor</span></code> to a layer that has <code class="docutils literal notranslate"><span class="pre">input_quant</span></code> enabled. In that case, the input gets re-quantized:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[116]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">input_quant_conv</span><span class="p">(</span><span class="n">quant_tensor_input</span><span class="p">)</span>
@@ -980,22 +1038,21 @@ <h2>Input Quantization<a class="headerlink" href="#Input-Quantization" title="Pe
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[116]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-QuantTensor(value=tensor([[[[-0.0035, -0.0037, -0.0050],
-          [ 0.0010, -0.0051, -0.0027],
-          [-0.0010,  0.0047,  0.0017]],
+IntQuantTensor(value=tensor([[[[-0.0073,  0.0040, -0.0011],
+          [-0.0033,  0.0078, -0.0028],
+          [ 0.0005, -0.0025, -0.0008]],
 
-         [[ 0.0021,  0.0002,  0.0027],
-          [ 0.0028,  0.0002, -0.0044],
-          [ 0.0008, -0.0052, -0.0024]],
+         [[ 0.0021, -0.0021,  0.0035],
+          [ 0.0012, -0.0016, -0.0023],
+          [-0.0010, -0.0015,  0.0040]],
 
-         [[ 0.0010, -0.0052, -0.0011],
-          [-0.0018,  0.0024,  0.0011],
-          [-0.0001,  0.0039,  0.0035]]]], grad_fn=&lt;ThnnConv2DBackward0&gt;), scale=tensor([[[[1.7410e-07]]]], grad_fn=&lt;MulBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(21.), signed_t=tensor(True), training_t=tensor(True))
+         [[-0.0010,  0.0047,  0.0025],
+          [-0.0014,  0.0021, -0.0039],
+          [ 0.0036, -0.0003,  0.0026]]]], grad_fn=&lt;ConvolutionBackward0&gt;), scale=tensor([[[[1.7393e-07]]]], grad_fn=&lt;MulBackward0&gt;), zero_point=tensor([0.]), bit_width=tensor(21.), signed_t=tensor(True), training_t=tensor(True))
 </pre></div></div>
 </div>
 </section>
@@ -1003,7 +1060,7 @@ <h2>Input Quantization<a class="headerlink" href="#Input-Quantization" title="Pe
 <h2>Output Quantization<a class="headerlink" href="#Output-Quantization" title="Permalink to this heading">#</a></h2>
 <p>Let’s now look at would have happened if we instead enabled output quantization:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[117]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">brevitas.quant.scaled_int</span> <span class="kn">import</span> <span class="n">Int8ActPerTensorFloat</span>
@@ -1017,35 +1074,33 @@ <h2>Output Quantization<a class="headerlink" href="#Output-Quantization" title="
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[117]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-QuantTensor(value=tensor([[[[ 0.2111,  0.4060,  0.3654],
-          [-0.7876,  0.8119, -0.9825],
-          [-0.5115,  0.3979, -0.3248]],
+IntQuantTensor(value=tensor([[[[-0.2117, -0.4811,  0.0385],
+          [-0.5100, -0.2502, -0.2213],
+          [-0.5773,  0.0192, -0.5485]],
 
-         [[ 0.3816,  0.0568, -0.0812],
-          [ 1.0312, -0.7876,  0.8038],
-          [-0.3491, -0.4141,  0.0650]],
+         [[ 0.1347,  0.8179, -1.2316],
+          [-0.6062,  0.4426, -0.3849],
+          [ 0.1732, -0.5100, -0.1251]],
 
-         [[-0.5846, -0.4222, -0.0731],
-          [-0.7389,  0.5034, -0.2517],
-          [-0.1624, -0.4385,  0.7308]]]], grad_fn=&lt;MulBackward0&gt;), scale=tensor(0.0081, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))
+         [[ 1.0873,  0.2406, -0.2887],
+          [-0.4330, -0.4907, -0.2021],
+          [ 0.6447,  0.4811,  0.1347]]]], grad_fn=&lt;MulBackward0&gt;), scale=tensor(0.0096, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))
 </pre></div></div>
 </div>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[118]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
 </pre></div>
 </div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="k">assert</span> <span class="n">out_tensor</span><span class="o">.</span><span class="n">is_valid</span>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">assert_with_message</span><span class="p">(</span><span class="n">out_tensor</span><span class="o">.</span><span class="n">is_valid</span><span class="p">)</span>
 </pre></div>
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[118]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
@@ -1064,7 +1119,7 @@ <h2>Bias Quantization<a class="headerlink" href="#Bias-Quantization" title="Perm
 <p>There is an important scenario where the various options we just saw make a practical difference, and it’s quantization of <em>bias</em>. In many contexts, such as in the ONNX standard opset and in FINN, bias is assumed to be quantized with scale factor equal to <em>input scale</em> weight scale*, which means that we need a valid quantized input somehow. A predefined bias quantizer that reflects that assumption is <code class="docutils literal notranslate"><span class="pre">brevitas.quant.scaled_int.Int8Bias</span></code>. If we simply tried to set it to a <code class="docutils literal notranslate"><span class="pre">QuantConv2d</span></code>
 without any sort of input quantization, we would get an error:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[119]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">brevitas.quant.scaled_int</span> <span class="kn">import</span> <span class="n">Int8Bias</span>
@@ -1083,52 +1138,41 @@ <h2>Bias Quantization<a class="headerlink" href="#Bias-Quantization" title="Perm
 <div class="highlight"><pre>
 <span class="ansi-red-fg">---------------------------------------------------------------------------</span>
 <span class="ansi-red-fg">RuntimeError</span>                              Traceback (most recent call last)
-<span class="ansi-green-fg">/tmp/ipykernel_48365/2280634207.py</span> in <span class="ansi-cyan-fg">&lt;module&gt;</span>
-<span class="ansi-green-intense-fg ansi-bold">      4</span>     in_channels<span class="ansi-blue-fg">=</span><span class="ansi-cyan-fg">2</span><span class="ansi-blue-fg">,</span> out_channels<span class="ansi-blue-fg">=</span><span class="ansi-cyan-fg">3</span><span class="ansi-blue-fg">,</span> kernel_size<span class="ansi-blue-fg">=</span><span class="ansi-blue-fg">(</span><span class="ansi-cyan-fg">3</span><span class="ansi-blue-fg">,</span><span class="ansi-cyan-fg">3</span><span class="ansi-blue-fg">)</span><span class="ansi-blue-fg">,</span> bias<span class="ansi-blue-fg">=</span><span class="ansi-green-fg">True</span><span class="ansi-blue-fg">,</span>
-<span class="ansi-green-intense-fg ansi-bold">      5</span>     bias_quant=Int8Bias, return_quant_tensor=True)
-<span class="ansi-green-fg">----&gt; 6</span><span class="ansi-red-fg"> </span>bias_quant_conv<span class="ansi-blue-fg">(</span>torch<span class="ansi-blue-fg">.</span>randn<span class="ansi-blue-fg">(</span><span class="ansi-cyan-fg">1</span><span class="ansi-blue-fg">,</span> <span class="ansi-cyan-fg">2</span><span class="ansi-blue-fg">,</span> <span class="ansi-cyan-fg">5</span><span class="ansi-blue-fg">,</span> <span class="ansi-cyan-fg">5</span><span class="ansi-blue-fg">)</span><span class="ansi-blue-fg">)</span>
-
-<span class="ansi-green-fg">/opt/conda/envs/torch_1.10/lib/python3.7/site-packages/torch/nn/modules/module.py</span> in <span class="ansi-cyan-fg">_call_impl</span><span class="ansi-blue-fg">(self, *input, **kwargs)</span>
-<span class="ansi-green-intense-fg ansi-bold">   1100</span>         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
-<span class="ansi-green-intense-fg ansi-bold">   1101</span>                 or _global_forward_hooks or _global_forward_pre_hooks):
-<span class="ansi-green-fg">-&gt; 1102</span><span class="ansi-red-fg">             </span><span class="ansi-green-fg">return</span> forward_call<span class="ansi-blue-fg">(</span><span class="ansi-blue-fg">*</span>input<span class="ansi-blue-fg">,</span> <span class="ansi-blue-fg">**</span>kwargs<span class="ansi-blue-fg">)</span>
-<span class="ansi-green-intense-fg ansi-bold">   1103</span>         <span class="ansi-red-fg"># Do not call functions when jit is used</span>
-<span class="ansi-green-intense-fg ansi-bold">   1104</span>         full_backward_hooks<span class="ansi-blue-fg">,</span> non_full_backward_hooks <span class="ansi-blue-fg">=</span> <span class="ansi-blue-fg">[</span><span class="ansi-blue-fg">]</span><span class="ansi-blue-fg">,</span> <span class="ansi-blue-fg">[</span><span class="ansi-blue-fg">]</span>
-
-<span class="ansi-green-fg">/workspace/scratch/git/fork_brevitas/src/brevitas/nn/quant_conv.py</span> in <span class="ansi-cyan-fg">forward</span><span class="ansi-blue-fg">(self, input)</span>
-<span class="ansi-green-intense-fg ansi-bold">    190</span>
-<span class="ansi-green-intense-fg ansi-bold">    191</span>     <span class="ansi-green-fg">def</span> forward<span class="ansi-blue-fg">(</span>self<span class="ansi-blue-fg">,</span> input<span class="ansi-blue-fg">:</span> Union<span class="ansi-blue-fg">[</span>Tensor<span class="ansi-blue-fg">,</span> QuantTensor<span class="ansi-blue-fg">]</span><span class="ansi-blue-fg">)</span> <span class="ansi-blue-fg">-&gt;</span> Union<span class="ansi-blue-fg">[</span>Tensor<span class="ansi-blue-fg">,</span> QuantTensor<span class="ansi-blue-fg">]</span><span class="ansi-blue-fg">:</span>
-<span class="ansi-green-fg">--&gt; 192</span><span class="ansi-red-fg">         </span><span class="ansi-green-fg">return</span> self<span class="ansi-blue-fg">.</span>forward_impl<span class="ansi-blue-fg">(</span>input<span class="ansi-blue-fg">)</span>
-<span class="ansi-green-intense-fg ansi-bold">    193</span>
-<span class="ansi-green-intense-fg ansi-bold">    194</span>     <span class="ansi-green-fg">def</span> inner_forward_impl<span class="ansi-blue-fg">(</span>self<span class="ansi-blue-fg">,</span> x<span class="ansi-blue-fg">:</span> Tensor<span class="ansi-blue-fg">,</span> quant_weight<span class="ansi-blue-fg">:</span> Tensor<span class="ansi-blue-fg">,</span> quant_bias<span class="ansi-blue-fg">:</span> Optional<span class="ansi-blue-fg">[</span>Tensor<span class="ansi-blue-fg">]</span><span class="ansi-blue-fg">)</span><span class="ansi-blue-fg">:</span>
-
-<span class="ansi-green-fg">/workspace/scratch/git/fork_brevitas/src/brevitas/nn/quant_layer.py</span> in <span class="ansi-cyan-fg">forward_impl</span><span class="ansi-blue-fg">(self, inp)</span>
-<span class="ansi-green-intense-fg ansi-bold">    330</span>
-<span class="ansi-green-intense-fg ansi-bold">    331</span>         <span class="ansi-green-fg">if</span> self<span class="ansi-blue-fg">.</span>bias <span class="ansi-green-fg">is</span> <span class="ansi-green-fg">not</span> <span class="ansi-green-fg">None</span><span class="ansi-blue-fg">:</span>
-<span class="ansi-green-fg">--&gt; 332</span><span class="ansi-red-fg">             </span>quant_bias <span class="ansi-blue-fg">=</span> self<span class="ansi-blue-fg">.</span>bias_quant<span class="ansi-blue-fg">(</span>self<span class="ansi-blue-fg">.</span>bias<span class="ansi-blue-fg">,</span> output_scale<span class="ansi-blue-fg">,</span> output_bit_width<span class="ansi-blue-fg">)</span>
-<span class="ansi-green-intense-fg ansi-bold">    333</span>             <span class="ansi-green-fg">if</span> <span class="ansi-green-fg">not</span> self<span class="ansi-blue-fg">.</span>training <span class="ansi-green-fg">and</span> self<span class="ansi-blue-fg">.</span>cache_inference_quant_bias<span class="ansi-blue-fg">:</span>
-<span class="ansi-green-intense-fg ansi-bold">    334</span>                 self<span class="ansi-blue-fg">.</span>_cached_bias <span class="ansi-blue-fg">=</span> _CachedIO<span class="ansi-blue-fg">(</span>quant_bias<span class="ansi-blue-fg">.</span>detach<span class="ansi-blue-fg">(</span><span class="ansi-blue-fg">)</span><span class="ansi-blue-fg">,</span> metadata_only<span class="ansi-blue-fg">=</span><span class="ansi-green-fg">False</span><span class="ansi-blue-fg">)</span>
-
-<span class="ansi-green-fg">/opt/conda/envs/torch_1.10/lib/python3.7/site-packages/torch/nn/modules/module.py</span> in <span class="ansi-cyan-fg">_call_impl</span><span class="ansi-blue-fg">(self, *input, **kwargs)</span>
-<span class="ansi-green-intense-fg ansi-bold">   1100</span>         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
-<span class="ansi-green-intense-fg ansi-bold">   1101</span>                 or _global_forward_hooks or _global_forward_pre_hooks):
-<span class="ansi-green-fg">-&gt; 1102</span><span class="ansi-red-fg">             </span><span class="ansi-green-fg">return</span> forward_call<span class="ansi-blue-fg">(</span><span class="ansi-blue-fg">*</span>input<span class="ansi-blue-fg">,</span> <span class="ansi-blue-fg">**</span>kwargs<span class="ansi-blue-fg">)</span>
-<span class="ansi-green-intense-fg ansi-bold">   1103</span>         <span class="ansi-red-fg"># Do not call functions when jit is used</span>
-<span class="ansi-green-intense-fg ansi-bold">   1104</span>         full_backward_hooks<span class="ansi-blue-fg">,</span> non_full_backward_hooks <span class="ansi-blue-fg">=</span> <span class="ansi-blue-fg">[</span><span class="ansi-blue-fg">]</span><span class="ansi-blue-fg">,</span> <span class="ansi-blue-fg">[</span><span class="ansi-blue-fg">]</span>
-
-<span class="ansi-green-fg">/workspace/scratch/git/fork_brevitas/src/brevitas/proxy/parameter_quant.py</span> in <span class="ansi-cyan-fg">forward</span><span class="ansi-blue-fg">(self, x, input_scale, input_bit_width)</span>
-<span class="ansi-green-intense-fg ansi-bold">    160</span>             impl <span class="ansi-blue-fg">=</span> self<span class="ansi-blue-fg">.</span>export_handler <span class="ansi-green-fg">if</span> self<span class="ansi-blue-fg">.</span>export_mode <span class="ansi-green-fg">else</span> self<span class="ansi-blue-fg">.</span>tensor_quant
-<span class="ansi-green-intense-fg ansi-bold">    161</span>             <span class="ansi-green-fg">if</span> self<span class="ansi-blue-fg">.</span>requires_input_scale <span class="ansi-green-fg">and</span> input_scale <span class="ansi-green-fg">is</span> <span class="ansi-green-fg">None</span><span class="ansi-blue-fg">:</span>
-<span class="ansi-green-fg">--&gt; 162</span><span class="ansi-red-fg">                 </span><span class="ansi-green-fg">raise</span> RuntimeError<span class="ansi-blue-fg">(</span><span class="ansi-blue-fg">&#34;Input scale required&#34;</span><span class="ansi-blue-fg">)</span>
-<span class="ansi-green-intense-fg ansi-bold">    163</span>             <span class="ansi-green-fg">if</span> self<span class="ansi-blue-fg">.</span>requires_input_bit_width <span class="ansi-green-fg">and</span> input_bit_width <span class="ansi-green-fg">is</span> <span class="ansi-green-fg">None</span><span class="ansi-blue-fg">:</span>
-<span class="ansi-green-intense-fg ansi-bold">    164</span>                 <span class="ansi-green-fg">raise</span> RuntimeError<span class="ansi-blue-fg">(</span><span class="ansi-blue-fg">&#34;Input bit-width required&#34;</span><span class="ansi-blue-fg">)</span>
-
-<span class="ansi-red-fg">RuntimeError</span>: Input scale required
+Cell <span class="ansi-green-fg">In[23], line 6</span>
+<span class="ansi-green-intense-fg ansi-bold">      1</span> <span class="ansi-bold" style="color: rgb(0,135,0)">from</span> <span class="ansi-bold" style="color: rgb(0,0,255)">brevitas</span><span class="ansi-bold" style="color: rgb(0,0,255)">.</span><span class="ansi-bold" style="color: rgb(0,0,255)">quant</span><span class="ansi-bold" style="color: rgb(0,0,255)">.</span><span class="ansi-bold" style="color: rgb(0,0,255)">scaled_int</span> <span class="ansi-bold" style="color: rgb(0,135,0)">import</span> Int8Bias
+<span class="ansi-green-intense-fg ansi-bold">      3</span> bias_quant_conv <span style="color: rgb(98,98,98)">=</span> QuantConv2d(
+<span class="ansi-green-intense-fg ansi-bold">      4</span>     in_channels<span style="color: rgb(98,98,98)">=</span><span style="color: rgb(98,98,98)">2</span>, out_channels<span style="color: rgb(98,98,98)">=</span><span style="color: rgb(98,98,98)">3</span>, kernel_size<span style="color: rgb(98,98,98)">=</span>(<span style="color: rgb(98,98,98)">3</span>,<span style="color: rgb(98,98,98)">3</span>), bias<span style="color: rgb(98,98,98)">=</span><span class="ansi-bold" style="color: rgb(0,135,0)">True</span>,
+<span class="ansi-green-intense-fg ansi-bold">      5</span>     bias_quant<span style="color: rgb(98,98,98)">=</span>Int8Bias, return_quant_tensor<span style="color: rgb(98,98,98)">=</span><span class="ansi-bold" style="color: rgb(0,135,0)">True</span>)
+<span class="ansi-green-fg">----&gt; 6</span> <span class="ansi-yellow-bg">bias_quant_conv</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">torch</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">randn</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">1</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">2</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">5</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">5</span><span class="ansi-yellow-bg">)</span><span class="ansi-yellow-bg">)</span>
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/torch/nn/modules/module.py:1194</span>, in <span class="ansi-cyan-fg">Module._call_impl</span><span class="ansi-blue-fg">(self, *input, **kwargs)</span>
+<span class="ansi-green-intense-fg ansi-bold">   1190</span> <span style="color: rgb(95,135,135)"># If we don&#39;t have any hooks, we want to skip the rest of the logic in</span>
+<span class="ansi-green-intense-fg ansi-bold">   1191</span> <span style="color: rgb(95,135,135)"># this function, and just call forward.</span>
+<span class="ansi-green-intense-fg ansi-bold">   1192</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> <span class="ansi-bold" style="color: rgb(175,0,255)">not</span> (<span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>_backward_hooks <span class="ansi-bold" style="color: rgb(175,0,255)">or</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>_forward_hooks <span class="ansi-bold" style="color: rgb(175,0,255)">or</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>_forward_pre_hooks <span class="ansi-bold" style="color: rgb(175,0,255)">or</span> _global_backward_hooks
+<span class="ansi-green-intense-fg ansi-bold">   1193</span>         <span class="ansi-bold" style="color: rgb(175,0,255)">or</span> _global_forward_hooks <span class="ansi-bold" style="color: rgb(175,0,255)">or</span> _global_forward_pre_hooks):
+<span class="ansi-green-fg">-&gt; 1194</span>     <span class="ansi-bold" style="color: rgb(0,135,0)">return</span> <span class="ansi-yellow-bg">forward_call</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg" style="color: rgb(0,135,0)">input</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg">kwargs</span><span class="ansi-yellow-bg">)</span>
+<span class="ansi-green-intense-fg ansi-bold">   1195</span> <span style="color: rgb(95,135,135)"># Do not call functions when jit is used</span>
+<span class="ansi-green-intense-fg ansi-bold">   1196</span> full_backward_hooks, non_full_backward_hooks <span style="color: rgb(98,98,98)">=</span> [], []
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/quant_conv.py:194</span>, in <span class="ansi-cyan-fg">QuantConv2d.forward</span><span class="ansi-blue-fg">(self, input)</span>
+<span class="ansi-green-intense-fg ansi-bold">    193</span> <span class="ansi-bold" style="color: rgb(0,135,0)">def</span> <span style="color: rgb(0,0,255)">forward</span>(<span style="color: rgb(0,135,0)">self</span>, <span style="color: rgb(0,135,0)">input</span>: Union[Tensor, QuantTensor]) <span style="color: rgb(98,98,98)">-</span><span style="color: rgb(98,98,98)">&gt;</span> Union[Tensor, QuantTensor]:
+<span class="ansi-green-fg">--&gt; 194</span>     <span class="ansi-bold" style="color: rgb(0,135,0)">return</span> <span class="ansi-yellow-bg" style="color: rgb(0,135,0)">self</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">forward_impl</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg" style="color: rgb(0,135,0)">input</span><span class="ansi-yellow-bg">)</span>
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/quant_layer.py:152</span>, in <span class="ansi-cyan-fg">QuantWeightBiasInputOutputLayer.forward_impl</span><span class="ansi-blue-fg">(self, inp)</span>
+<span class="ansi-green-intense-fg ansi-bold">    148</span> compute_output_quant_tensor <span style="color: rgb(98,98,98)">=</span> <span style="color: rgb(0,135,0)">isinstance</span>(quant_input, QuantTensor) <span class="ansi-bold" style="color: rgb(175,0,255)">and</span> <span style="color: rgb(0,135,0)">isinstance</span>(
+<span class="ansi-green-intense-fg ansi-bold">    149</span>     quant_weight, QuantTensor)
+<span class="ansi-green-intense-fg ansi-bold">    150</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> <span class="ansi-bold" style="color: rgb(175,0,255)">not</span> (compute_output_quant_tensor <span class="ansi-bold" style="color: rgb(175,0,255)">or</span>
+<span class="ansi-green-intense-fg ansi-bold">    151</span>         <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>output_quant<span style="color: rgb(98,98,98)">.</span>is_quant_enabled) <span class="ansi-bold" style="color: rgb(175,0,255)">and</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>return_quant_tensor:
+<span class="ansi-green-fg">--&gt; 152</span>     <span class="ansi-bold" style="color: rgb(0,135,0)">raise</span> <span class="ansi-bold" style="color: rgb(215,95,95)">RuntimeError</span>(<span style="color: rgb(175,0,0)">&#34;</span><span style="color: rgb(175,0,0)">QuantLayer is not correctly configured</span><span style="color: rgb(175,0,0)">&#34;</span>)
+<span class="ansi-green-intense-fg ansi-bold">    154</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>bias <span class="ansi-bold" style="color: rgb(175,0,255)">is</span> <span class="ansi-bold" style="color: rgb(175,0,255)">not</span> <span class="ansi-bold" style="color: rgb(0,135,0)">None</span>:
+<span class="ansi-green-intense-fg ansi-bold">    155</span>     quant_bias <span style="color: rgb(98,98,98)">=</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>bias_quant(<span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>bias, quant_input, quant_weight)
+
+<span class="ansi-red-fg">RuntimeError</span>: QuantLayer is not correctly configured
 </pre></div></div>
 </div>
 <p>We can solve the issue by passing in a valid <code class="docutils literal notranslate"><span class="pre">QuantTensor</span></code>, e.g. the <code class="docutils literal notranslate"><span class="pre">quant_tensor_input</span></code> we defined above:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[120]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">bias_quant_conv</span><span class="p">(</span><span class="n">quant_tensor_input</span><span class="p">)</span>
@@ -1136,27 +1180,27 @@ <h2>Bias Quantization<a class="headerlink" href="#Bias-Quantization" title="Perm
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[120]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-QuantTensor(value=tensor([[[[ 0.0005,  0.0043, -0.0004],
-          [ 0.0005,  0.0106,  0.0012],
-          [ 0.0021,  0.0007, -0.0050]],
-
-         [[-0.0067, -0.0035, -0.0059],
-          [-0.0050, -0.0015, -0.0039],
-          [ 0.0015,  0.0028, -0.0008]],
-
-         [[-0.0051, -0.0050,  0.0060],
-          [-0.0015,  0.0037,  0.0071],
-          [ 0.0067,  0.0035, -0.0071]]]], grad_fn=&lt;ThnnConv2DBackward0&gt;), scale=tensor([[[[1.8108e-07]]]], grad_fn=&lt;MulBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(22.), signed_t=tensor(True), training_t=tensor(True))
+IntQuantTensor(value=tensor([[[[-2.4238e-03, -5.6598e-03,  5.1882e-03],
+          [-6.5582e-03,  8.9274e-03,  4.9640e-04],
+          [ 9.6283e-03, -1.7466e-03, -4.8311e-03]],
+
+         [[ 2.9322e-03, -3.1358e-03, -6.2727e-04],
+          [ 2.8723e-06, -3.7981e-03,  1.0973e-02],
+          [-4.1031e-03,  6.5909e-03, -4.2369e-03]],
+
+         [[ 4.1967e-03, -7.0733e-03,  1.6456e-03],
+          [ 1.8197e-03, -3.1683e-03,  4.8200e-03],
+          [-3.2585e-04,  3.1055e-03,  1.9703e-03]]]],
+       grad_fn=&lt;ConvolutionBackward0&gt;), scale=tensor([[[[1.7953e-07]]]], grad_fn=&lt;MulBackward0&gt;), zero_point=tensor([0.]), bit_width=tensor(22.), signed_t=tensor(True), training_t=tensor(True))
 </pre></div></div>
 </div>
 <p>Or by enabling input quantization and then passing in a float a <code class="docutils literal notranslate"><span class="pre">torch.Tensor</span></code> or a <code class="docutils literal notranslate"><span class="pre">QuantTensor</span></code>:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[121]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">input_bias_quant_conv</span> <span class="o">=</span> <span class="n">QuantConv2d</span><span class="p">(</span>
@@ -1167,26 +1211,25 @@ <h2>Bias Quantization<a class="headerlink" href="#Bias-Quantization" title="Perm
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[121]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-QuantTensor(value=tensor([[[[-0.3825,  0.1371,  0.9135],
-          [-0.2016,  0.7495, -0.4071],
-          [-0.0755,  0.5283,  0.2388]],
+IntQuantTensor(value=tensor([[[[-0.2816, -0.5271, -0.1748],
+          [-0.4247, -0.1575,  0.0681],
+          [ 0.6528, -0.5346, -0.0657]],
 
-         [[ 0.0788, -0.3802, -0.2234],
-          [ 0.8678, -0.5546,  0.4408],
-          [-0.6788,  0.4422,  0.3007]],
+         [[ 0.2993, -0.3383,  0.3035],
+          [-0.4595, -0.6796, -0.9720],
+          [-0.1948, -0.5169, -0.2175]],
 
-         [[ 0.4412, -0.3205,  1.0033],
-          [-0.0083, -0.3295, -0.2076],
-          [ 0.4417, -0.1046, -0.3493]]]], grad_fn=&lt;ThnnConv2DBackward0&gt;), scale=tensor([[[[3.8610e-05]]]], grad_fn=&lt;MulBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(22.), signed_t=tensor(True), training_t=tensor(True))
+         [[ 0.5586,  0.0665, -0.5807],
+          [ 0.5565,  0.1780, -0.0555],
+          [-0.1080,  0.0791, -0.2262]]]], grad_fn=&lt;ConvolutionBackward0&gt;), scale=tensor([[[[4.2009e-05]]]], grad_fn=&lt;MulBackward0&gt;), zero_point=tensor([0.]), bit_width=tensor(22.), signed_t=tensor(True), training_t=tensor(True))
 </pre></div></div>
 </div>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[122]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">input_bias_quant_conv</span><span class="p">(</span><span class="n">quant_tensor_input</span><span class="p">)</span>
@@ -1194,28 +1237,27 @@ <h2>Bias Quantization<a class="headerlink" href="#Bias-Quantization" title="Perm
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[122]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-QuantTensor(value=tensor([[[[ 0.0036,  0.0024, -0.0033],
-          [ 0.0050,  0.0080, -0.0014],
-          [-0.0036, -0.0080, -0.0029]],
+IntQuantTensor(value=tensor([[[[-0.0058,  0.0030,  0.0030],
+          [-0.0013, -0.0002,  0.0043],
+          [-0.0061,  0.0033, -0.0001]],
 
-         [[ 0.0083, -0.0093,  0.0048],
-          [ 0.0035,  0.0015, -0.0011],
-          [-0.0003,  0.0067,  0.0013]],
+         [[ 0.0013, -0.0008, -0.0015],
+          [ 0.0011,  0.0012, -0.0012],
+          [-0.0013, -0.0020,  0.0002]],
 
-         [[-0.0009, -0.0019,  0.0039],
-          [ 0.0010,  0.0056, -0.0037],
-          [ 0.0091, -0.0095,  0.0054]]]], grad_fn=&lt;ThnnConv2DBackward0&gt;), scale=tensor([[[[1.8384e-07]]]], grad_fn=&lt;MulBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(22.), signed_t=tensor(True), training_t=tensor(True))
+         [[-0.0061,  0.0053, -0.0004],
+          [ 0.0028,  0.0031, -0.0037],
+          [ 0.0027, -0.0048, -0.0044]]]], grad_fn=&lt;ConvolutionBackward0&gt;), scale=tensor([[[[1.7370e-07]]]], grad_fn=&lt;MulBackward0&gt;), zero_point=tensor([0.]), bit_width=tensor(22.), signed_t=tensor(True), training_t=tensor(True))
 </pre></div></div>
 </div>
 <p>Notice how the output <code class="docutils literal notranslate"><span class="pre">bit_width=tensor(22.)</span></code>. This is because, in the worst-case, summing a <em>21-bit</em> integer (the size of the accumulator before bias is added) and an <em>8-bit</em> integer (the size of quantized bias) gives a <em>22-bit</em> integer.</p>
 <p>Let’s try now to enable output quantization instead of input quantization. That wouldn’t have solved the problem with bias quantization, as output quantization is performed after bias is added:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[123]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">output_bias_quant_conv</span> <span class="o">=</span> <span class="n">QuantConv2d</span><span class="p">(</span>
@@ -1225,6 +1267,15 @@ <h2>Bias Quantization<a class="headerlink" href="#Bias-Quantization" title="Perm
 </pre></div>
 </div>
 </div>
+<div class="nboutput docutils container">
+<div class="prompt empty docutils container">
+</div>
+<div class="output_area stderr docutils container">
+<div class="highlight"><pre>
+/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/proxy/parameter_quant.py:154: UserWarning: No quant bias cache found, set cache_inference_quant_bias=True and run an inference pass first
+  warn(
+</pre></div></div>
+</div>
 <div class="nboutput nblast docutils container">
 <div class="prompt empty docutils container">
 </div>
@@ -1232,52 +1283,54 @@ <h2>Bias Quantization<a class="headerlink" href="#Bias-Quantization" title="Perm
 <div class="highlight"><pre>
 <span class="ansi-red-fg">---------------------------------------------------------------------------</span>
 <span class="ansi-red-fg">RuntimeError</span>                              Traceback (most recent call last)
-<span class="ansi-green-fg">/tmp/ipykernel_48365/2990591641.py</span> in <span class="ansi-cyan-fg">&lt;module&gt;</span>
-<span class="ansi-green-intense-fg ansi-bold">      2</span>     in_channels<span class="ansi-blue-fg">=</span><span class="ansi-cyan-fg">2</span><span class="ansi-blue-fg">,</span> out_channels<span class="ansi-blue-fg">=</span><span class="ansi-cyan-fg">3</span><span class="ansi-blue-fg">,</span> kernel_size<span class="ansi-blue-fg">=</span><span class="ansi-blue-fg">(</span><span class="ansi-cyan-fg">3</span><span class="ansi-blue-fg">,</span><span class="ansi-cyan-fg">3</span><span class="ansi-blue-fg">)</span><span class="ansi-blue-fg">,</span> bias<span class="ansi-blue-fg">=</span><span class="ansi-green-fg">True</span><span class="ansi-blue-fg">,</span>
-<span class="ansi-green-intense-fg ansi-bold">      3</span>     output_quant=Int8ActPerTensorFloat, bias_quant=Int8Bias, return_quant_tensor=True)
-<span class="ansi-green-fg">----&gt; 4</span><span class="ansi-red-fg"> </span>output_bias_quant_conv<span class="ansi-blue-fg">(</span>torch<span class="ansi-blue-fg">.</span>randn<span class="ansi-blue-fg">(</span><span class="ansi-cyan-fg">1</span><span class="ansi-blue-fg">,</span> <span class="ansi-cyan-fg">2</span><span class="ansi-blue-fg">,</span> <span class="ansi-cyan-fg">5</span><span class="ansi-blue-fg">,</span> <span class="ansi-cyan-fg">5</span><span class="ansi-blue-fg">)</span><span class="ansi-blue-fg">)</span>
-
-<span class="ansi-green-fg">/opt/conda/envs/torch_1.10/lib/python3.7/site-packages/torch/nn/modules/module.py</span> in <span class="ansi-cyan-fg">_call_impl</span><span class="ansi-blue-fg">(self, *input, **kwargs)</span>
-<span class="ansi-green-intense-fg ansi-bold">   1100</span>         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
-<span class="ansi-green-intense-fg ansi-bold">   1101</span>                 or _global_forward_hooks or _global_forward_pre_hooks):
-<span class="ansi-green-fg">-&gt; 1102</span><span class="ansi-red-fg">             </span><span class="ansi-green-fg">return</span> forward_call<span class="ansi-blue-fg">(</span><span class="ansi-blue-fg">*</span>input<span class="ansi-blue-fg">,</span> <span class="ansi-blue-fg">**</span>kwargs<span class="ansi-blue-fg">)</span>
-<span class="ansi-green-intense-fg ansi-bold">   1103</span>         <span class="ansi-red-fg"># Do not call functions when jit is used</span>
-<span class="ansi-green-intense-fg ansi-bold">   1104</span>         full_backward_hooks<span class="ansi-blue-fg">,</span> non_full_backward_hooks <span class="ansi-blue-fg">=</span> <span class="ansi-blue-fg">[</span><span class="ansi-blue-fg">]</span><span class="ansi-blue-fg">,</span> <span class="ansi-blue-fg">[</span><span class="ansi-blue-fg">]</span>
-
-<span class="ansi-green-fg">/workspace/scratch/git/fork_brevitas/src/brevitas/nn/quant_conv.py</span> in <span class="ansi-cyan-fg">forward</span><span class="ansi-blue-fg">(self, input)</span>
-<span class="ansi-green-intense-fg ansi-bold">    190</span>
-<span class="ansi-green-intense-fg ansi-bold">    191</span>     <span class="ansi-green-fg">def</span> forward<span class="ansi-blue-fg">(</span>self<span class="ansi-blue-fg">,</span> input<span class="ansi-blue-fg">:</span> Union<span class="ansi-blue-fg">[</span>Tensor<span class="ansi-blue-fg">,</span> QuantTensor<span class="ansi-blue-fg">]</span><span class="ansi-blue-fg">)</span> <span class="ansi-blue-fg">-&gt;</span> Union<span class="ansi-blue-fg">[</span>Tensor<span class="ansi-blue-fg">,</span> QuantTensor<span class="ansi-blue-fg">]</span><span class="ansi-blue-fg">:</span>
-<span class="ansi-green-fg">--&gt; 192</span><span class="ansi-red-fg">         </span><span class="ansi-green-fg">return</span> self<span class="ansi-blue-fg">.</span>forward_impl<span class="ansi-blue-fg">(</span>input<span class="ansi-blue-fg">)</span>
-<span class="ansi-green-intense-fg ansi-bold">    193</span>
-<span class="ansi-green-intense-fg ansi-bold">    194</span>     <span class="ansi-green-fg">def</span> inner_forward_impl<span class="ansi-blue-fg">(</span>self<span class="ansi-blue-fg">,</span> x<span class="ansi-blue-fg">:</span> Tensor<span class="ansi-blue-fg">,</span> quant_weight<span class="ansi-blue-fg">:</span> Tensor<span class="ansi-blue-fg">,</span> quant_bias<span class="ansi-blue-fg">:</span> Optional<span class="ansi-blue-fg">[</span>Tensor<span class="ansi-blue-fg">]</span><span class="ansi-blue-fg">)</span><span class="ansi-blue-fg">:</span>
-
-<span class="ansi-green-fg">/workspace/scratch/git/fork_brevitas/src/brevitas/nn/quant_layer.py</span> in <span class="ansi-cyan-fg">forward_impl</span><span class="ansi-blue-fg">(self, inp)</span>
-<span class="ansi-green-intense-fg ansi-bold">    330</span>
-<span class="ansi-green-intense-fg ansi-bold">    331</span>         <span class="ansi-green-fg">if</span> self<span class="ansi-blue-fg">.</span>bias <span class="ansi-green-fg">is</span> <span class="ansi-green-fg">not</span> <span class="ansi-green-fg">None</span><span class="ansi-blue-fg">:</span>
-<span class="ansi-green-fg">--&gt; 332</span><span class="ansi-red-fg">             </span>quant_bias <span class="ansi-blue-fg">=</span> self<span class="ansi-blue-fg">.</span>bias_quant<span class="ansi-blue-fg">(</span>self<span class="ansi-blue-fg">.</span>bias<span class="ansi-blue-fg">,</span> output_scale<span class="ansi-blue-fg">,</span> output_bit_width<span class="ansi-blue-fg">)</span>
-<span class="ansi-green-intense-fg ansi-bold">    333</span>             <span class="ansi-green-fg">if</span> <span class="ansi-green-fg">not</span> self<span class="ansi-blue-fg">.</span>training <span class="ansi-green-fg">and</span> self<span class="ansi-blue-fg">.</span>cache_inference_quant_bias<span class="ansi-blue-fg">:</span>
-<span class="ansi-green-intense-fg ansi-bold">    334</span>                 self<span class="ansi-blue-fg">.</span>_cached_bias <span class="ansi-blue-fg">=</span> _CachedIO<span class="ansi-blue-fg">(</span>quant_bias<span class="ansi-blue-fg">.</span>detach<span class="ansi-blue-fg">(</span><span class="ansi-blue-fg">)</span><span class="ansi-blue-fg">,</span> metadata_only<span class="ansi-blue-fg">=</span><span class="ansi-green-fg">False</span><span class="ansi-blue-fg">)</span>
-
-<span class="ansi-green-fg">/opt/conda/envs/torch_1.10/lib/python3.7/site-packages/torch/nn/modules/module.py</span> in <span class="ansi-cyan-fg">_call_impl</span><span class="ansi-blue-fg">(self, *input, **kwargs)</span>
-<span class="ansi-green-intense-fg ansi-bold">   1100</span>         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
-<span class="ansi-green-intense-fg ansi-bold">   1101</span>                 or _global_forward_hooks or _global_forward_pre_hooks):
-<span class="ansi-green-fg">-&gt; 1102</span><span class="ansi-red-fg">             </span><span class="ansi-green-fg">return</span> forward_call<span class="ansi-blue-fg">(</span><span class="ansi-blue-fg">*</span>input<span class="ansi-blue-fg">,</span> <span class="ansi-blue-fg">**</span>kwargs<span class="ansi-blue-fg">)</span>
-<span class="ansi-green-intense-fg ansi-bold">   1103</span>         <span class="ansi-red-fg"># Do not call functions when jit is used</span>
-<span class="ansi-green-intense-fg ansi-bold">   1104</span>         full_backward_hooks<span class="ansi-blue-fg">,</span> non_full_backward_hooks <span class="ansi-blue-fg">=</span> <span class="ansi-blue-fg">[</span><span class="ansi-blue-fg">]</span><span class="ansi-blue-fg">,</span> <span class="ansi-blue-fg">[</span><span class="ansi-blue-fg">]</span>
-
-<span class="ansi-green-fg">/workspace/scratch/git/fork_brevitas/src/brevitas/proxy/parameter_quant.py</span> in <span class="ansi-cyan-fg">forward</span><span class="ansi-blue-fg">(self, x, input_scale, input_bit_width)</span>
-<span class="ansi-green-intense-fg ansi-bold">    160</span>             impl <span class="ansi-blue-fg">=</span> self<span class="ansi-blue-fg">.</span>export_handler <span class="ansi-green-fg">if</span> self<span class="ansi-blue-fg">.</span>export_mode <span class="ansi-green-fg">else</span> self<span class="ansi-blue-fg">.</span>tensor_quant
-<span class="ansi-green-intense-fg ansi-bold">    161</span>             <span class="ansi-green-fg">if</span> self<span class="ansi-blue-fg">.</span>requires_input_scale <span class="ansi-green-fg">and</span> input_scale <span class="ansi-green-fg">is</span> <span class="ansi-green-fg">None</span><span class="ansi-blue-fg">:</span>
-<span class="ansi-green-fg">--&gt; 162</span><span class="ansi-red-fg">                 </span><span class="ansi-green-fg">raise</span> RuntimeError<span class="ansi-blue-fg">(</span><span class="ansi-blue-fg">&#34;Input scale required&#34;</span><span class="ansi-blue-fg">)</span>
-<span class="ansi-green-intense-fg ansi-bold">    163</span>             <span class="ansi-green-fg">if</span> self<span class="ansi-blue-fg">.</span>requires_input_bit_width <span class="ansi-green-fg">and</span> input_bit_width <span class="ansi-green-fg">is</span> <span class="ansi-green-fg">None</span><span class="ansi-blue-fg">:</span>
-<span class="ansi-green-intense-fg ansi-bold">    164</span>                 <span class="ansi-green-fg">raise</span> RuntimeError<span class="ansi-blue-fg">(</span><span class="ansi-blue-fg">&#34;Input bit-width required&#34;</span><span class="ansi-blue-fg">)</span>
+Cell <span class="ansi-green-fg">In[27], line 4</span>
+<span class="ansi-green-intense-fg ansi-bold">      1</span> output_bias_quant_conv <span style="color: rgb(98,98,98)">=</span> QuantConv2d(
+<span class="ansi-green-intense-fg ansi-bold">      2</span>     in_channels<span style="color: rgb(98,98,98)">=</span><span style="color: rgb(98,98,98)">2</span>, out_channels<span style="color: rgb(98,98,98)">=</span><span style="color: rgb(98,98,98)">3</span>, kernel_size<span style="color: rgb(98,98,98)">=</span>(<span style="color: rgb(98,98,98)">3</span>,<span style="color: rgb(98,98,98)">3</span>), bias<span style="color: rgb(98,98,98)">=</span><span class="ansi-bold" style="color: rgb(0,135,0)">True</span>,
+<span class="ansi-green-intense-fg ansi-bold">      3</span>     output_quant<span style="color: rgb(98,98,98)">=</span>Int8ActPerTensorFloat, bias_quant<span style="color: rgb(98,98,98)">=</span>Int8Bias, return_quant_tensor<span style="color: rgb(98,98,98)">=</span><span class="ansi-bold" style="color: rgb(0,135,0)">True</span>)
+<span class="ansi-green-fg">----&gt; 4</span> <span class="ansi-yellow-bg">output_bias_quant_conv</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">torch</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">randn</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">1</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">2</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">5</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">5</span><span class="ansi-yellow-bg">)</span><span class="ansi-yellow-bg">)</span>
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/torch/nn/modules/module.py:1194</span>, in <span class="ansi-cyan-fg">Module._call_impl</span><span class="ansi-blue-fg">(self, *input, **kwargs)</span>
+<span class="ansi-green-intense-fg ansi-bold">   1190</span> <span style="color: rgb(95,135,135)"># If we don&#39;t have any hooks, we want to skip the rest of the logic in</span>
+<span class="ansi-green-intense-fg ansi-bold">   1191</span> <span style="color: rgb(95,135,135)"># this function, and just call forward.</span>
+<span class="ansi-green-intense-fg ansi-bold">   1192</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> <span class="ansi-bold" style="color: rgb(175,0,255)">not</span> (<span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>_backward_hooks <span class="ansi-bold" style="color: rgb(175,0,255)">or</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>_forward_hooks <span class="ansi-bold" style="color: rgb(175,0,255)">or</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>_forward_pre_hooks <span class="ansi-bold" style="color: rgb(175,0,255)">or</span> _global_backward_hooks
+<span class="ansi-green-intense-fg ansi-bold">   1193</span>         <span class="ansi-bold" style="color: rgb(175,0,255)">or</span> _global_forward_hooks <span class="ansi-bold" style="color: rgb(175,0,255)">or</span> _global_forward_pre_hooks):
+<span class="ansi-green-fg">-&gt; 1194</span>     <span class="ansi-bold" style="color: rgb(0,135,0)">return</span> <span class="ansi-yellow-bg">forward_call</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg" style="color: rgb(0,135,0)">input</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg">kwargs</span><span class="ansi-yellow-bg">)</span>
+<span class="ansi-green-intense-fg ansi-bold">   1195</span> <span style="color: rgb(95,135,135)"># Do not call functions when jit is used</span>
+<span class="ansi-green-intense-fg ansi-bold">   1196</span> full_backward_hooks, non_full_backward_hooks <span style="color: rgb(98,98,98)">=</span> [], []
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/quant_conv.py:194</span>, in <span class="ansi-cyan-fg">QuantConv2d.forward</span><span class="ansi-blue-fg">(self, input)</span>
+<span class="ansi-green-intense-fg ansi-bold">    193</span> <span class="ansi-bold" style="color: rgb(0,135,0)">def</span> <span style="color: rgb(0,0,255)">forward</span>(<span style="color: rgb(0,135,0)">self</span>, <span style="color: rgb(0,135,0)">input</span>: Union[Tensor, QuantTensor]) <span style="color: rgb(98,98,98)">-</span><span style="color: rgb(98,98,98)">&gt;</span> Union[Tensor, QuantTensor]:
+<span class="ansi-green-fg">--&gt; 194</span>     <span class="ansi-bold" style="color: rgb(0,135,0)">return</span> <span class="ansi-yellow-bg" style="color: rgb(0,135,0)">self</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">forward_impl</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg" style="color: rgb(0,135,0)">input</span><span class="ansi-yellow-bg">)</span>
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/quant_layer.py:155</span>, in <span class="ansi-cyan-fg">QuantWeightBiasInputOutputLayer.forward_impl</span><span class="ansi-blue-fg">(self, inp)</span>
+<span class="ansi-green-intense-fg ansi-bold">    152</span>     <span class="ansi-bold" style="color: rgb(0,135,0)">raise</span> <span class="ansi-bold" style="color: rgb(215,95,95)">RuntimeError</span>(<span style="color: rgb(175,0,0)">&#34;</span><span style="color: rgb(175,0,0)">QuantLayer is not correctly configured</span><span style="color: rgb(175,0,0)">&#34;</span>)
+<span class="ansi-green-intense-fg ansi-bold">    154</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>bias <span class="ansi-bold" style="color: rgb(175,0,255)">is</span> <span class="ansi-bold" style="color: rgb(175,0,255)">not</span> <span class="ansi-bold" style="color: rgb(0,135,0)">None</span>:
+<span class="ansi-green-fg">--&gt; 155</span>     quant_bias <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg" style="color: rgb(0,135,0)">self</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">bias_quant</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg" style="color: rgb(0,135,0)">self</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">bias</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">quant_input</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">quant_weight</span><span class="ansi-yellow-bg">)</span>
+<span class="ansi-green-intense-fg ansi-bold">    156</span> <span class="ansi-bold" style="color: rgb(0,135,0)">else</span>:
+<span class="ansi-green-intense-fg ansi-bold">    157</span>     quant_bias <span style="color: rgb(98,98,98)">=</span> <span class="ansi-bold" style="color: rgb(0,135,0)">None</span>
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/torch/nn/modules/module.py:1194</span>, in <span class="ansi-cyan-fg">Module._call_impl</span><span class="ansi-blue-fg">(self, *input, **kwargs)</span>
+<span class="ansi-green-intense-fg ansi-bold">   1190</span> <span style="color: rgb(95,135,135)"># If we don&#39;t have any hooks, we want to skip the rest of the logic in</span>
+<span class="ansi-green-intense-fg ansi-bold">   1191</span> <span style="color: rgb(95,135,135)"># this function, and just call forward.</span>
+<span class="ansi-green-intense-fg ansi-bold">   1192</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> <span class="ansi-bold" style="color: rgb(175,0,255)">not</span> (<span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>_backward_hooks <span class="ansi-bold" style="color: rgb(175,0,255)">or</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>_forward_hooks <span class="ansi-bold" style="color: rgb(175,0,255)">or</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>_forward_pre_hooks <span class="ansi-bold" style="color: rgb(175,0,255)">or</span> _global_backward_hooks
+<span class="ansi-green-intense-fg ansi-bold">   1193</span>         <span class="ansi-bold" style="color: rgb(175,0,255)">or</span> _global_forward_hooks <span class="ansi-bold" style="color: rgb(175,0,255)">or</span> _global_forward_pre_hooks):
+<span class="ansi-green-fg">-&gt; 1194</span>     <span class="ansi-bold" style="color: rgb(0,135,0)">return</span> <span class="ansi-yellow-bg">forward_call</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg" style="color: rgb(0,135,0)">input</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg">kwargs</span><span class="ansi-yellow-bg">)</span>
+<span class="ansi-green-intense-fg ansi-bold">   1195</span> <span style="color: rgb(95,135,135)"># Do not call functions when jit is used</span>
+<span class="ansi-green-intense-fg ansi-bold">   1196</span> full_backward_hooks, non_full_backward_hooks <span style="color: rgb(98,98,98)">=</span> [], []
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/proxy/parameter_quant.py:330</span>, in <span class="ansi-cyan-fg">BiasQuantProxyFromInjector.forward</span><span class="ansi-blue-fg">(self, x, input, weight)</span>
+<span class="ansi-green-intense-fg ansi-bold">    328</span>     input_scale <span style="color: rgb(98,98,98)">=</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>scale()
+<span class="ansi-green-intense-fg ansi-bold">    329</span>     <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> input_scale <span class="ansi-bold" style="color: rgb(175,0,255)">is</span> <span class="ansi-bold" style="color: rgb(0,135,0)">None</span>:
+<span class="ansi-green-fg">--&gt; 330</span>         <span class="ansi-bold" style="color: rgb(0,135,0)">raise</span> <span class="ansi-bold" style="color: rgb(215,95,95)">RuntimeError</span>(<span style="color: rgb(175,0,0)">&#34;</span><span style="color: rgb(175,0,0)">Input scale required</span><span style="color: rgb(175,0,0)">&#34;</span>)
+<span class="ansi-green-intense-fg ansi-bold">    331</span> <span class="ansi-bold" style="color: rgb(0,135,0)">elif</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>requires_input_scale <span class="ansi-bold" style="color: rgb(175,0,255)">and</span> input_scale <span class="ansi-bold" style="color: rgb(175,0,255)">is</span> <span class="ansi-bold" style="color: rgb(175,0,255)">not</span> <span class="ansi-bold" style="color: rgb(0,135,0)">None</span> <span class="ansi-bold" style="color: rgb(175,0,255)">and</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>is_quant_enabled:
+<span class="ansi-green-intense-fg ansi-bold">    332</span>     input_scale <span style="color: rgb(98,98,98)">=</span> input_scale<span style="color: rgb(98,98,98)">.</span>view(<span style="color: rgb(98,98,98)">-</span><span style="color: rgb(98,98,98)">1</span>)
 
 <span class="ansi-red-fg">RuntimeError</span>: Input scale required
 </pre></div></div>
 </div>
 <p>Not all scenarios require bias quantization to depend on the scale factor of the input. In those cases, biases can be quantized the same way weights are quantized, and have their own scale factor. In Brevitas, a predefined quantizer that reflects this other scenario is <code class="docutils literal notranslate"><span class="pre">Int8BiasPerTensorFloatInternalScaling</span></code>. In this case then a valid quantized input is not required:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[124]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">brevitas.quant.scaled_int</span> <span class="kn">import</span> <span class="n">Int8BiasPerTensorFloatInternalScaling</span>
@@ -1290,28 +1343,27 @@ <h2>Bias Quantization<a class="headerlink" href="#Bias-Quantization" title="Perm
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[124]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-tensor([[[[ 0.2152,  0.8346,  0.0746],
-          [-0.0738, -0.5212,  0.1019],
-          [-0.6004,  0.1500, -0.1453]],
+tensor([[[[-0.4360, -0.2674, -0.4194],
+          [-0.2412, -0.6360, -0.6838],
+          [-0.5227, -0.0199, -0.1445]],
 
-         [[-1.1551, -1.3458, -0.1312],
-          [ 0.2502, -0.5267,  0.2412],
-          [-0.3556, -0.3289, -0.2276]],
+         [[-0.3524,  0.8025,  0.2844],
+          [ 0.9945, -0.4782,  0.8064],
+          [ 0.5732,  0.1249,  0.3110]],
 
-         [[-0.4599, -0.6094,  0.4682],
-          [-0.5064, -0.6768, -0.6638],
-          [ 0.0066, -0.3581,  0.2359]]]], grad_fn=&lt;ThnnConv2DBackward0&gt;)
+         [[ 0.3223,  0.2530,  0.2753],
+          [ 0.5764, -0.2533, -0.0181],
+          [-0.4147,  0.2049, -0.9944]]]], grad_fn=&lt;ConvolutionBackward0&gt;)
 </pre></div></div>
 </div>
 <p>There are a couple of situations to be aware of concerning bias quantization that can lead to changes in the output <code class="docutils literal notranslate"><span class="pre">zero_point</span></code>.</p>
 <p>Let’s consider the scenario where we compute the convolution between a quantized input tensor and quantized weights. In the first case, we then add an <em>unquantized</em> bias on top of the output. In the second one, we add a bias quantized with its own scale factor, e.g. with the <code class="docutils literal notranslate"><span class="pre">Int8BiasPerTensorFloatInternalScaling</span></code> quantizer. In both cases, in order to make sure the output <code class="docutils literal notranslate"><span class="pre">QuantTensor</span></code> is valid (i.e. the affine quantization invariant is respected), the output <code class="docutils literal notranslate"><span class="pre">zero_point</span></code> becomes non-zero:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[125]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">unquant_bias_input_quant_conv</span> <span class="o">=</span> <span class="n">QuantConv2d</span><span class="p">(</span>
@@ -1323,39 +1375,37 @@ <h2>Bias Quantization<a class="headerlink" href="#Bias-Quantization" title="Perm
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[125]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-QuantTensor(value=tensor([[[[-0.6879, -0.6632, -0.2411],
-          [ 0.2064, -0.7371,  0.3910],
-          [ 0.9533,  0.2994,  0.6546]],
+IntQuantTensor(value=tensor([[[[-0.6912,  0.0086,  0.1628],
+          [-0.4786, -0.8073,  0.5224],
+          [ 0.4157,  0.4686,  0.2560]],
 
-         [[-0.4684, -0.4495, -0.5021],
-          [ 0.5738,  0.4199, -0.3380],
-          [ 0.6218, -0.0408, -0.8483]],
+         [[ 0.3170, -0.5486, -0.5216],
+          [ 0.1832,  1.0217, -0.3637],
+          [-0.1115,  0.6974, -0.0452]],
 
-         [[-0.5625,  0.1837, -1.0575],
-          [-1.2816, -0.4993, -0.3409],
-          [ 0.4556, -1.4269,  0.5369]]]], grad_fn=&lt;ThnnConv2DBackward0&gt;), scale=tensor([[[[3.0975e-05]]]], grad_fn=&lt;MulBackward0&gt;), zero_point=tensor([[[[ 1276.0774]],
+         [[-0.6168, -0.5241, -0.6593],
+          [ 0.6408,  0.2674,  0.4537],
+          [-0.3744, -0.7771, -0.2848]]]], grad_fn=&lt;ConvolutionBackward0&gt;), scale=tensor([[[[3.0094e-05]]]], grad_fn=&lt;MulBackward0&gt;), zero_point=tensor([[[[  339.3406]],
 
-         [[-3152.4585]],
+         [[-4597.1797]],
 
-         [[ 7320.2324]]]], grad_fn=&lt;DivBackward0&gt;), bit_width=tensor(21.), signed_t=tensor(True), training_t=tensor(True))
+         [[-3452.3713]]]], grad_fn=&lt;DivBackward0&gt;), bit_width=tensor(21.), signed_t=tensor(True), training_t=tensor(True))
 </pre></div></div>
 </div>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[126]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
 </pre></div>
 </div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="k">assert</span> <span class="n">out_tensor</span><span class="o">.</span><span class="n">is_valid</span>
+<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">assert_with_message</span><span class="p">(</span><span class="n">out_tensor</span><span class="o">.</span><span class="n">is_valid</span><span class="p">)</span>
 </pre></div>
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[126]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
@@ -1364,7 +1414,7 @@ <h2>Bias Quantization<a class="headerlink" href="#Bias-Quantization" title="Perm
 </div>
 <p>Finally, an important point about <code class="docutils literal notranslate"><span class="pre">QuantTensor</span></code>. With the exception of learned bit-width (which will be the subject of a separate tutorial) and some of the bias quantization scenarios we have just seen, usually returing a <code class="docutils literal notranslate"><span class="pre">QuantTensor</span></code> is not necessary and can create extra complexity. This is why currently <code class="docutils literal notranslate"><span class="pre">return_quant_tensor</span></code> defaults to <code class="docutils literal notranslate"><span class="pre">False</span></code>. We can easily see it in an example:</p>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[127]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">bias_input_quant_conv</span> <span class="o">=</span> <span class="n">QuantConv2d</span><span class="p">(</span>
@@ -1375,22 +1425,21 @@ <h2>Bias Quantization<a class="headerlink" href="#Bias-Quantization" title="Perm
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[127]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-tensor([[[[ 0.8357,  0.0733,  0.9527],
-          [ 0.1803,  0.2154,  0.7598],
-          [ 1.1121, -0.8728,  1.0039]],
+tensor([[[[-0.2327,  0.9267,  0.6294],
+          [ 0.0901,  0.1027, -0.0727],
+          [-0.5614,  0.6182,  0.5394]],
 
-         [[ 0.7917,  1.0063,  0.6516],
-          [-0.1852, -0.7263,  0.0956],
-          [-0.1876,  0.2747, -0.1617]],
+         [[ 0.4179, -0.5184, -0.2016],
+          [ 0.1390, -0.3925, -0.6171],
+          [ 0.4782,  0.0814,  0.6124]],
 
-         [[ 0.8299,  0.9934, -0.3821],
-          [ 0.4865,  0.9309, -0.7924],
-          [-0.4201,  0.2343,  0.1532]]]], grad_fn=&lt;ThnnConv2DBackward0&gt;)
+         [[ 0.2896, -0.3779,  0.9408],
+          [-0.1334,  0.6186,  0.2167],
+          [-0.5926,  0.3690, -0.0284]]]], grad_fn=&lt;ConvolutionBackward0&gt;)
 </pre></div></div>
 </div>
 <p>Altough not obvious, the output is actually implicitly quantized.</p>
diff --git a/docs/tutorials/quant_tensor_quant_conv2d_overview.ipynb b/docs/tutorials/quant_tensor_quant_conv2d_overview.ipynb
index 2e9ef9179..e418439e9 100644
--- a/docs/tutorials/quant_tensor_quant_conv2d_overview.ipynb
+++ b/docs/tutorials/quant_tensor_quant_conv2d_overview.ipynb
@@ -3,7 +3,10 @@
   {
    "cell_type": "markdown",
    "metadata": {
-    "collapsed": true
+    "collapsed": true,
+    "jupyter": {
+     "outputs_hidden": true
+    }
    },
    "source": [
     "# An overview of QuantTensor and QuantConv2d\n",
@@ -18,14 +21,6 @@
    "execution_count": 1,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/user/.local/lib/python3.7/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    },
     {
      "data": {
       "text/markdown": [
@@ -39,14 +34,23 @@
        "            padding: Union[int, Tuple[int, int]] = 0,\n",
        "            dilation: Union[int, Tuple[int, int]] = 1,\n",
        "            groups: int = 1,\n",
-       "            bias: bool = True,\n",
-       "            padding_type: str = 'standard',\n",
+       "            padding_mode: str = 'zeros',\n",
+       "            bias: Optional[bool] = True,\n",
        "            weight_quant: Optional[WeightQuantType] = Int8WeightPerTensorFloat,\n",
        "            bias_quant: Optional[BiasQuantType] = None,\n",
        "            input_quant: Optional[ActQuantType] = None,\n",
        "            output_quant: Optional[ActQuantType] = None,\n",
        "            return_quant_tensor: bool = False,\n",
+       "            device: Optional[torch.device] = None,\n",
+       "            dtype: Optional[torch.dtype] = None,\n",
        "            **kwargs) -> None:\n",
+       "        # avoid an init error in the super class by setting padding to 0\n",
+       "        if padding_mode == 'zeros' and padding == 'same' and (stride > 1 if isinstance(\n",
+       "                stride, int) else any(map(lambda x: x > 1, stride))):\n",
+       "            padding = 0\n",
+       "            is_same_padded_strided = True\n",
+       "        else:\n",
+       "            is_same_padded_strided = False\n",
        "        Conv2d.__init__(\n",
        "            self,\n",
        "            in_channels=in_channels,\n",
@@ -54,9 +58,12 @@
        "            kernel_size=kernel_size,\n",
        "            stride=stride,\n",
        "            padding=padding,\n",
+       "            padding_mode=padding_mode,\n",
        "            dilation=dilation,\n",
        "            groups=groups,\n",
-       "            bias=bias)\n",
+       "            bias=bias,\n",
+       "            device=device,\n",
+       "            dtype=dtype)\n",
        "        QuantWBIOL.__init__(\n",
        "            self,\n",
        "            weight_quant=weight_quant,\n",
@@ -65,9 +72,7 @@
        "            output_quant=output_quant,\n",
        "            return_quant_tensor=return_quant_tensor,\n",
        "            **kwargs)\n",
-       "        assert self.padding_mode == 'zeros'\n",
-       "        assert not (padding_type == 'same' and padding != 0)\n",
-       "        self.padding_type = padding_type\n",
+       "        self.is_same_padded_strided = is_same_padded_strided\n",
        "\n",
        "```"
       ],
@@ -84,9 +89,18 @@
     "from brevitas.nn import QuantConv2d\n",
     "from brevitas.nn import QuantIdentity\n",
     "from IPython.display import Markdown, display\n",
+    "import torch\n",
+    "\n",
+    "# helpers\n",
+    "def assert_with_message(condition):\n",
+    "    assert condition\n",
+    "    print(condition)\n",
     "\n",
     "def pretty_print_source(source):\n",
     "    display(Markdown('```python\\n' + source + '\\n```'))\n",
+    "\n",
+    "# set manual seed for the notebook\n",
+    "torch.manual_seed(0)\n",
     "    \n",
     "source = inspect.getsource(QuantConv2d.__init__)  \n",
     "pretty_print_source(source)"
@@ -129,10 +143,10 @@
     }
    ],
    "source": [
-    "print(f'Is weight quant enabled: {default_quant_conv.is_weight_quant_enabled}')\n",
-    "print(f'Is bias quant enabled: {default_quant_conv.is_bias_quant_enabled}')\n",
-    "print(f'Is input quant enabled: {default_quant_conv.is_input_quant_enabled}')\n",
-    "print(f'Is output quant enabled: {default_quant_conv.is_output_quant_enabled}')"
+    "print(f'Is weight quant enabled: {default_quant_conv.weight_quant.is_quant_enabled}')\n",
+    "print(f'Is bias quant enabled: {default_quant_conv.bias_quant.is_quant_enabled}')\n",
+    "print(f'Is input quant enabled: {default_quant_conv.input_quant.is_quant_enabled}')\n",
+    "print(f'Is output quant enabled: {default_quant_conv.output_quant.is_quant_enabled}')"
    ]
   },
   {
@@ -149,20 +163,31 @@
     "scrolled": true
    },
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/torch/_tensor.py:1255: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at /opt/conda/conda-bld/pytorch_1670525541990/work/c10/core/TensorImpl.h:1758.)\n",
+      "  return super(Tensor, self).rename(names)\n",
+      "[W NNPACK.cpp:53] Could not initialize NNPACK! Reason: Unsupported hardware.\n",
+      "/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/torch/nn/modules/conv.py:459: UserWarning: Defining your `__torch_function__` as a plain method is deprecated and will be an error in future, please define it as a classmethod. (Triggered internally at /opt/conda/conda-bld/pytorch_1670525541990/work/torch/csrc/utils/python_arg_parser.cpp:350.)\n",
+      "  return F.conv2d(input, weight, bias, self.stride,\n"
+     ]
+    },
     {
      "data": {
       "text/plain": [
-       "tensor([[[[-0.2594,  0.5392,  0.5916],\n",
-       "          [ 0.3493,  0.6813,  0.2499],\n",
-       "          [ 1.3732,  0.1229, -0.0084]],\n",
+       "tensor([[[[ 0.2908, -0.1793, -0.9610],\n",
+       "          [-0.6542, -0.3532,  0.6361],\n",
+       "          [ 1.0290,  0.2730,  0.0969]],\n",
        "\n",
-       "         [[ 0.0031, -0.1702,  0.1069],\n",
-       "          [-0.8181, -0.8056,  0.0385],\n",
-       "          [-0.4738,  0.0589,  0.1278]],\n",
+       "         [[-0.3479,  0.6030,  0.4900],\n",
+       "          [ 0.1607,  0.3547, -0.4283],\n",
+       "          [-0.6696,  0.0652,  0.7300]],\n",
        "\n",
-       "         [[-0.1718, -0.1162, -0.1526],\n",
-       "          [-0.9903, -0.3541,  0.1645],\n",
-       "          [ 0.0557, -0.4458, -0.2080]]]], grad_fn=<ThnnConv2DBackward0>)"
+       "         [[-0.0769, -0.2424,  0.1860],\n",
+       "          [ 0.1740, -0.1182, -0.7017],\n",
+       "          [ 0.0963,  0.2375, -0.9439]]]], grad_fn=<ConvolutionBackward0>)"
       ]
      },
      "execution_count": 4,
@@ -195,7 +220,15 @@
    "cell_type": "code",
    "execution_count": 5,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
+    }
+   ],
    "source": [
     "from torch.nn import Conv2d\n",
     "\n",
@@ -206,7 +239,7 @@
     "float_conv = Conv2d(\n",
     "    in_channels=2, out_channels=3, kernel_size=(3,3), bias=False)\n",
     "inp = torch.randn(1, 2, 5, 5)\n",
-    "assert torch.isclose(disabled_quant_conv(inp), float_conv(inp)).all().item()"
+    "assert_with_message(torch.isclose(disabled_quant_conv(inp), float_conv(inp)).all().item())"
    ]
   },
   {
@@ -234,31 +267,31 @@
     {
      "data": {
       "text/plain": [
-       "QuantTensor(value=tensor([[[[-0.0790,  0.0503, -0.0934],\n",
-       "          [-0.1149, -0.1903, -0.1329],\n",
-       "          [-0.1813,  0.0108,  0.0593]],\n",
+       "IntQuantTensor(value=tensor([[[[-0.0018,  0.1273, -0.1937],\n",
+       "          [-0.1734, -0.0904,  0.0627],\n",
+       "          [-0.0055,  0.1863, -0.0203]],\n",
        "\n",
-       "         [[ 0.0970, -0.0215, -0.0144],\n",
-       "          [ 0.2280,  0.1239, -0.0090],\n",
-       "          [ 0.1957, -0.2011, -0.0108]]],\n",
+       "         [[ 0.0627, -0.0720, -0.0461],\n",
+       "          [-0.2251, -0.1568, -0.0978],\n",
+       "          [ 0.0092,  0.0941,  0.1421]]],\n",
        "\n",
        "\n",
-       "        [[[-0.0018, -0.1957,  0.1993],\n",
-       "          [-0.0359,  0.1778, -0.1400],\n",
-       "          [ 0.0916,  0.1059,  0.2173]],\n",
+       "        [[[-0.1605, -0.1033,  0.0849],\n",
+       "          [ 0.1956, -0.0480,  0.1771],\n",
+       "          [-0.0387,  0.0258,  0.2140]],\n",
        "\n",
-       "         [[-0.1670,  0.1939, -0.2191],\n",
-       "          [-0.0215,  0.1688, -0.1383],\n",
-       "          [-0.0449, -0.1185,  0.1742]]],\n",
+       "         [[-0.2196, -0.1476, -0.0590],\n",
+       "          [-0.0923,  0.2030, -0.1531],\n",
+       "          [-0.1089, -0.1642, -0.2214]]],\n",
        "\n",
        "\n",
-       "        [[[-0.0808, -0.1652, -0.0233],\n",
-       "          [-0.0700,  0.0467, -0.0485],\n",
-       "          [ 0.1059,  0.1418,  0.1077]],\n",
+       "        [[[-0.1384,  0.2030,  0.1052],\n",
+       "          [ 0.1144,  0.0129, -0.1199],\n",
+       "          [ 0.0406, -0.2196, -0.1697]],\n",
        "\n",
-       "         [[-0.0593,  0.0108,  0.0036],\n",
-       "          [-0.1508,  0.0808,  0.1616],\n",
-       "          [ 0.0144, -0.0287, -0.1365]]]], grad_fn=<MulBackward0>), scale=tensor(0.0018, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))"
+       "         [[-0.1218,  0.1494,  0.1384],\n",
+       "          [-0.1052, -0.0092,  0.1513],\n",
+       "          [ 0.2343,  0.0941,  0.0314]]]], grad_fn=<MulBackward0>), scale=tensor(0.0018, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))"
       ]
      },
      "execution_count": 6,
@@ -288,14 +321,22 @@
    "cell_type": "code",
    "execution_count": 7,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
+    }
+   ],
    "source": [
-    "int_weight = default_quant_conv.int_weight()\n",
-    "zero_point = default_quant_conv.quant_weight_zero_point()\n",
-    "scale = default_quant_conv.quant_weight_scale()\n",
+    "int_weight = default_quant_conv.quant_weight().int()\n",
+    "zero_point = default_quant_conv.weight_quant.zero_point()\n",
+    "scale = default_quant_conv.weight_quant.scale()\n",
     "quant_weight_manually = (int_weight - zero_point) * scale\n",
     "\n",
-    "assert default_quant_conv.quant_weight().value.isclose(quant_weight_manually).all().item()"
+    "assert_with_message(default_quant_conv.quant_weight().value.isclose(quant_weight_manually).all().item())"
    ]
   },
   {
@@ -310,9 +351,17 @@
    "cell_type": "code",
    "execution_count": 8,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
+    }
+   ],
    "source": [
-    "assert default_quant_conv.quant_weight().is_valid"
+    "assert_with_message(default_quant_conv.quant_weight().is_valid)"
    ]
   },
   {
@@ -325,28 +374,28 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "tensor(0.0173, grad_fn=<DivBackward0>)\n",
-      "tensor(0.0307, grad_fn=<DivBackward0>)\n"
+      "True\n",
+      "True\n",
+      "tensor(0.0211, grad_fn=<DivBackward0>)\n",
+      "tensor(0.0162, grad_fn=<DivBackward0>)\n"
      ]
     }
    ],
    "source": [
-    "from brevitas.quant_tensor import QuantTensor\n",
-    "\n",
     "quant_act = QuantIdentity(return_quant_tensor=True)\n",
     "\n",
     "out_tensor_0 = quant_act(torch.randn(1,2,5,5))\n",
     "out_tensor_1 = quant_act(torch.randn(1,2,5,5))\n",
     "\n",
-    "assert out_tensor_0.is_valid\n",
-    "assert out_tensor_1.is_valid\n",
+    "assert_with_message(out_tensor_0.is_valid)\n",
+    "assert_with_message(out_tensor_1.is_valid)\n",
     "print(out_tensor_0.scale)\n",
     "print(out_tensor_1.scale)"
    ]
@@ -361,34 +410,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "QuantTensor(value=tensor([[[[ 0.9489, -0.9111, -0.0536,  0.5788,  0.3645],\n",
-       "          [ 0.3401,  1.4325,  0.6498,  0.6411, -1.4390],\n",
-       "          [-1.9029,  0.7012,  0.1591,  1.9235,  0.5883],\n",
-       "          [-2.7258,  2.5330,  0.9165, -0.0820,  3.4148],\n",
-       "          [-0.3651,  1.0164,  0.9567, -0.2758, -1.1376]],\n",
-       "\n",
-       "         [[-0.2414,  2.2111, -1.9124, -2.3814, -0.8805],\n",
-       "          [ 1.3191, -0.8965, -0.2048, -3.8113,  1.1142],\n",
-       "          [-0.3381, -0.2238,  1.2661,  0.0068,  0.2567],\n",
-       "          [ 0.0731, -0.4280,  0.0909,  0.0875, -1.6851],\n",
-       "          [-0.7744, -1.4127, -0.8143,  1.3557, -0.2802]]]],\n",
-       "       grad_fn=<AddBackward0>), scale=tensor(0.0240, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(9.), signed_t=tensor(True), training_t=tensor(True))"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "IntQuantTensor(value=tensor([[[[-0.1106,  1.1945, -0.4972, -2.0968,  0.7175],\n",
+      "          [-2.5901,  0.0588, -0.2014,  2.1486,  1.6435],\n",
+      "          [ 0.9067, -2.5212,  2.2193,  0.2352, -0.8395],\n",
+      "          [-0.8351,  0.6341, -0.5551,  0.1040, -3.3151],\n",
+      "          [-0.8979, -0.7092,  3.8232,  1.0875,  0.3954]],\n",
+      "\n",
+      "         [[ 1.4363, -1.3973,  1.3249,  2.6914,  0.3660],\n",
+      "          [ 1.5057,  1.8094,  0.5100, -1.6874,  1.9981],\n",
+      "          [ 1.2472, -1.7813,  0.0334, -1.2880, -2.9333],\n",
+      "          [ 0.0180, -1.4298, -2.9978,  0.5494, -1.4548],\n",
+      "          [ 1.6738, -0.3177, -0.3721, -0.1650, -1.1871]]]],\n",
+      "       grad_fn=<AddBackward0>), scale=0.018651068210601807, zero_point=0.0, bit_width=9.0, signed_t=True, training_t=True)\n"
+     ]
     }
    ],
    "source": [
     "out_tensor = out_tensor_0 + out_tensor_1\n",
-    "out_tensor"
+    "print(out_tensor)"
    ]
   },
   {
@@ -401,11 +447,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 11,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
+    }
+   ],
    "source": [
-    "assert not out_tensor.is_valid"
+    "assert_with_message(not out_tensor.is_valid)"
    ]
   },
   {
@@ -417,23 +471,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 108,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "QuantTensor(value=tensor([[[[1.5800, 1.0157],\n",
-       "          [1.4445, 0.8577]],\n",
+       "IntQuantTensor(value=tensor([[[[0.5191, 0.6402],\n",
+       "          [2.1455, 0.5883]],\n",
        "\n",
-       "         [[0.5643, 1.2414],\n",
-       "          [1.0383, 0.9028]],\n",
+       "         [[2.0417, 0.5883],\n",
+       "          [1.2631, 0.3980]],\n",
        "\n",
-       "         [[0.5191, 0.6546],\n",
-       "          [2.1442, 0.5868]]]], grad_fn=<MaxPool2DWithIndicesBackward0>), scale=tensor(0.0226, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))"
+       "         [[0.7959, 0.5191],\n",
+       "          [0.8132, 1.3496]]]], grad_fn=<MaxPool2DWithIndicesBackward0>), scale=tensor(0.0173, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))"
       ]
      },
-     "execution_count": 108,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -455,29 +509,37 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 109,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_81376/1377665000.py:1: UserWarning: Defining your `__torch_function__` as a plain method is deprecated and will be an error in future, please define it as a classmethod. (Triggered internally at /opt/conda/conda-bld/pytorch_1670525541990/work/torch/csrc/utils/python_arg_parser.cpp:350.)\n",
+      "  torch.tanh(quant_tensor)\n"
+     ]
+    },
     {
      "data": {
       "text/plain": [
-       "tensor([[[[-0.4943, -0.9938, -0.9073,  0.7681],\n",
-       "          [-0.3262,  0.9186,  0.1786,  0.3659],\n",
-       "          [ 0.7489,  0.8946, -0.0451, -0.5594],\n",
-       "          [-0.1346, -0.4943, -0.4770,  0.6951]],\n",
+       "tensor([[[[ 0.4770,  0.2212,  0.0691,  0.5650],\n",
+       "          [-0.0346, -0.6618, -0.4635, -0.3482],\n",
+       "          [ 0.9730, -0.7245, -0.5881, -0.5287],\n",
+       "          [-0.0863,  0.8857,  0.5287, -0.4498]],\n",
        "\n",
-       "         [[ 0.0676,  0.5111,  0.4943,  0.8459],\n",
-       "          [-0.8990, -0.9426,  0.0676, -0.7945],\n",
-       "          [-0.9220,  0.0676, -0.5594,  0.6321],\n",
-       "          [-0.0676,  0.7772,  0.7177, -0.4414]],\n",
+       "         [[ 0.9669,  0.5650, -0.6211, -0.4498],\n",
+       "          [-0.2376,  0.6103,  0.5287,  0.2700],\n",
+       "          [-0.6808,  0.8519,  0.2700, -0.5531],\n",
+       "          [-0.0173,  0.8264,  0.3782, -0.1881]],\n",
        "\n",
-       "         [[ 0.4770,  0.2220,  0.0676,  0.5747],\n",
-       "          [-0.0451, -0.6710, -0.4594, -0.3462],\n",
-       "          [ 0.9729, -0.7177, -0.5896, -0.5276],\n",
-       "          [-0.0900,  0.8852,  0.5276, -0.4414]]]], grad_fn=<TanhBackward0>)"
+       "         [[-0.6211, -0.9764, -0.5993,  0.4770],\n",
+       "          [ 0.5033,  0.6618, -0.1881, -0.6211],\n",
+       "          [-0.8031,  0.1375,  0.5287,  0.8740],\n",
+       "          [-0.6714,  0.6714, -0.5650,  0.8611]]]], grad_fn=<TanhBackward0>)"
       ]
      },
-     "execution_count": 109,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -497,26 +559,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 110,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "QuantTensor(value=tensor([[[[ 0.9693, -0.9431,  0.2459],\n",
-       "          [ 0.5416,  0.9037, -0.5278],\n",
-       "          [-0.6207, -1.3578, -0.4815]],\n",
+       "IntQuantTensor(value=tensor([[[[-0.3568, -0.1883,  0.3589],\n",
+       "          [-0.4470,  0.1039, -0.3945],\n",
+       "          [-0.4190,  0.3723,  0.8384]],\n",
        "\n",
-       "         [[ 0.4551, -1.4065,  0.8889],\n",
-       "          [-0.3393,  0.0803, -0.1748],\n",
-       "          [-0.0977,  0.6284, -0.7193]],\n",
+       "         [[-0.0510,  0.5514, -0.2751],\n",
+       "          [-0.5668,  0.5824,  0.2328],\n",
+       "          [ 0.1316, -0.2518,  1.0418]],\n",
        "\n",
-       "         [[ 0.3655,  0.7626, -0.2634],\n",
-       "          [-0.3453,  0.3349,  0.1923],\n",
-       "          [ 0.5993, -0.9579,  0.3557]]]], grad_fn=<ThnnConv2DBackward0>), scale=tensor([[[[3.2208e-05]]]], grad_fn=<MulBackward0>), zero_point=tensor(0.), bit_width=tensor(21.), signed_t=tensor(True), training_t=tensor(True))"
+       "         [[ 0.2734,  0.7268, -0.0249],\n",
+       "          [-0.1732,  0.5197,  1.1158],\n",
+       "          [ 0.3771, -0.3810,  0.2008]]]], grad_fn=<ConvolutionBackward0>), scale=tensor([[[[3.1958e-05]]]], grad_fn=<MulBackward0>), zero_point=tensor([0.]), bit_width=tensor(21.), signed_t=tensor(True), training_t=tensor(True))"
       ]
      },
-     "execution_count": 110,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -533,22 +595,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 111,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 111,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
     }
    ],
    "source": [
-    "assert out_tensor.is_valid"
+    "assert_with_message(out_tensor.is_valid)"
    ]
   },
   {
@@ -569,39 +628,39 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 112,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "QuantTensor(value=tensor([[[[ 5.7000e-03,  2.5000e-03, -1.2400e-02, -7.2000e-03,  3.7000e-03],\n",
-       "          [-2.3000e-03,  7.0000e-04, -1.2700e-02,  5.2000e-03,  4.0000e-04],\n",
-       "          [-7.9000e-03,  9.5000e-03,  6.6000e-03,  5.4000e-03,  2.5000e-03],\n",
-       "          [ 1.1100e-02,  2.4000e-03,  1.0000e-02, -3.7000e-03,  7.2000e-03],\n",
-       "          [-1.1500e-02, -5.8000e-03, -9.3000e-03,  1.0000e-02,  3.5000e-03]],\n",
+       "IntQuantTensor(value=tensor([[[[ 7.2000e-03, -3.7000e-03,  7.7000e-03, -2.4000e-03, -8.9000e-03],\n",
+       "          [-1.2000e-02, -8.1000e-03,  7.2000e-03, -1.1300e-02, -9.7000e-03],\n",
+       "          [-1.0000e-03,  1.0100e-02,  3.8000e-03, -1.1900e-02,  6.9000e-03],\n",
+       "          [ 8.3000e-03,  1.0000e-04, -6.9000e-03,  3.9000e-03, -5.4000e-03],\n",
+       "          [ 1.1300e-02, -6.0000e-03,  9.7000e-03,  0.0000e+00,  1.0900e-02]],\n",
        "\n",
-       "         [[-6.8000e-03,  1.1500e-02, -1.0600e-02, -1.5000e-03, -1.9000e-03],\n",
-       "          [ 2.9000e-03,  9.5000e-03,  7.2000e-03, -3.7000e-03,  7.7000e-03],\n",
-       "          [-2.4000e-03, -8.9000e-03, -1.2000e-02, -8.1000e-03,  7.2000e-03],\n",
-       "          [-1.1300e-02, -9.7000e-03, -1.0000e-03,  1.0100e-02,  3.8000e-03],\n",
-       "          [-1.1900e-02,  6.9000e-03,  8.3000e-03,  1.0000e-04, -6.9000e-03]]]]), scale=tensor(1.0000e-04), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))"
+       "         [[-1.0900e-02,  1.1400e-02, -6.4000e-03,  9.2000e-03,  7.1000e-03],\n",
+       "          [-6.0000e-04,  9.2000e-03, -8.5000e-03,  5.0000e-03,  6.5000e-03],\n",
+       "          [-8.3000e-03, -1.2000e-03,  7.4000e-03,  9.2000e-03, -6.0000e-04],\n",
+       "          [-2.1000e-03,  9.5000e-03,  3.0000e-04, -2.9000e-03, -6.5000e-03],\n",
+       "          [-1.1800e-02, -4.8000e-03,  5.4000e-03, -2.5000e-03,  9.0000e-04]]]]), scale=tensor(1.0000e-04), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))"
       ]
      },
-     "execution_count": 112,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "from brevitas.quant_tensor import QuantTensor\n",
+    "from brevitas.quant_tensor import IntQuantTensor\n",
     "\n",
     "scale = 0.0001\n",
     "bit_width = 8\n",
     "zero_point = 0.\n",
     "int_value = torch.randint(low=- 2 ** (bit_width - 1), high=2 ** (bit_width - 1) - 1, size=(1, 2, 5, 5))\n",
     "quant_value = (int_value - zero_point) * scale\n",
-    "quant_tensor_input = QuantTensor(\n",
+    "quant_tensor_input = IntQuantTensor(\n",
     "    quant_value, \n",
     "    scale=torch.tensor(scale), \n",
     "    zero_point=torch.tensor(zero_point), \n",
@@ -613,22 +672,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 113,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 113,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
     }
    ],
    "source": [
-    "assert quant_tensor_input.is_valid"
+    "assert_with_message(quant_tensor_input.is_valid)"
    ]
   },
   {
@@ -642,26 +698,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 114,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "QuantTensor(value=tensor([[[[ 0.0085,  0.0066,  0.0050],\n",
-       "          [-0.0038, -0.0009, -0.0115],\n",
-       "          [-0.0055, -0.0037,  0.0009]],\n",
+       "IntQuantTensor(value=tensor([[[[-0.0019,  0.0049, -0.0012],\n",
+       "          [-0.0012,  0.0050, -0.0074],\n",
+       "          [-0.0023, -0.0035, -0.0033]],\n",
        "\n",
-       "         [[ 0.0015, -0.0027, -0.0079],\n",
-       "          [-0.0034, -0.0060,  0.0043],\n",
-       "          [-0.0008,  0.0052, -0.0033]],\n",
+       "         [[-0.0031,  0.0028,  0.0116],\n",
+       "          [ 0.0079,  0.0046,  0.0022],\n",
+       "          [ 0.0021, -0.0004,  0.0011]],\n",
        "\n",
-       "         [[-0.0015,  0.0082, -0.0038],\n",
-       "          [-0.0021,  0.0004, -0.0054],\n",
-       "          [-0.0021, -0.0079,  0.0013]]]], grad_fn=<ThnnConv2DBackward0>), scale=tensor([[[[1.8448e-07]]]], grad_fn=<MulBackward0>), zero_point=tensor(0.), bit_width=tensor(21.), signed_t=tensor(True), training_t=tensor(True))"
+       "         [[-0.0045, -0.0010,  0.0002],\n",
+       "          [-0.0044,  0.0027,  0.0025],\n",
+       "          [-0.0009,  0.0040, -0.0044]]]], grad_fn=<ConvolutionBackward0>), scale=tensor([[[[1.8307e-07]]]], grad_fn=<MulBackward0>), zero_point=tensor([0.]), bit_width=tensor(21.), signed_t=tensor(True), training_t=tensor(True))"
       ]
      },
-     "execution_count": 114,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -675,22 +731,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 115,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 115,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
     }
    ],
    "source": [
-    "assert out_tensor.is_valid"
+    "assert_with_message(out_tensor.is_valid)"
    ]
   },
   {
@@ -702,26 +755,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 116,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "QuantTensor(value=tensor([[[[-0.0035, -0.0037, -0.0050],\n",
-       "          [ 0.0010, -0.0051, -0.0027],\n",
-       "          [-0.0010,  0.0047,  0.0017]],\n",
+       "IntQuantTensor(value=tensor([[[[-0.0073,  0.0040, -0.0011],\n",
+       "          [-0.0033,  0.0078, -0.0028],\n",
+       "          [ 0.0005, -0.0025, -0.0008]],\n",
        "\n",
-       "         [[ 0.0021,  0.0002,  0.0027],\n",
-       "          [ 0.0028,  0.0002, -0.0044],\n",
-       "          [ 0.0008, -0.0052, -0.0024]],\n",
+       "         [[ 0.0021, -0.0021,  0.0035],\n",
+       "          [ 0.0012, -0.0016, -0.0023],\n",
+       "          [-0.0010, -0.0015,  0.0040]],\n",
        "\n",
-       "         [[ 0.0010, -0.0052, -0.0011],\n",
-       "          [-0.0018,  0.0024,  0.0011],\n",
-       "          [-0.0001,  0.0039,  0.0035]]]], grad_fn=<ThnnConv2DBackward0>), scale=tensor([[[[1.7410e-07]]]], grad_fn=<MulBackward0>), zero_point=tensor(0.), bit_width=tensor(21.), signed_t=tensor(True), training_t=tensor(True))"
+       "         [[-0.0010,  0.0047,  0.0025],\n",
+       "          [-0.0014,  0.0021, -0.0039],\n",
+       "          [ 0.0036, -0.0003,  0.0026]]]], grad_fn=<ConvolutionBackward0>), scale=tensor([[[[1.7393e-07]]]], grad_fn=<MulBackward0>), zero_point=tensor([0.]), bit_width=tensor(21.), signed_t=tensor(True), training_t=tensor(True))"
       ]
      },
-     "execution_count": 116,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -741,26 +794,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 117,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "QuantTensor(value=tensor([[[[ 0.2111,  0.4060,  0.3654],\n",
-       "          [-0.7876,  0.8119, -0.9825],\n",
-       "          [-0.5115,  0.3979, -0.3248]],\n",
+       "IntQuantTensor(value=tensor([[[[-0.2117, -0.4811,  0.0385],\n",
+       "          [-0.5100, -0.2502, -0.2213],\n",
+       "          [-0.5773,  0.0192, -0.5485]],\n",
        "\n",
-       "         [[ 0.3816,  0.0568, -0.0812],\n",
-       "          [ 1.0312, -0.7876,  0.8038],\n",
-       "          [-0.3491, -0.4141,  0.0650]],\n",
+       "         [[ 0.1347,  0.8179, -1.2316],\n",
+       "          [-0.6062,  0.4426, -0.3849],\n",
+       "          [ 0.1732, -0.5100, -0.1251]],\n",
        "\n",
-       "         [[-0.5846, -0.4222, -0.0731],\n",
-       "          [-0.7389,  0.5034, -0.2517],\n",
-       "          [-0.1624, -0.4385,  0.7308]]]], grad_fn=<MulBackward0>), scale=tensor(0.0081, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))"
+       "         [[ 1.0873,  0.2406, -0.2887],\n",
+       "          [-0.4330, -0.4907, -0.2021],\n",
+       "          [ 0.6447,  0.4811,  0.1347]]]], grad_fn=<MulBackward0>), scale=tensor(0.0096, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))"
       ]
      },
-     "execution_count": 117,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -777,22 +830,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 118,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 118,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
     }
    ],
    "source": [
-    "assert out_tensor.is_valid"
+    "assert_with_message(out_tensor.is_valid)"
    ]
   },
   {
@@ -816,7 +866,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 119,
+   "execution_count": null,
    "metadata": {
     "tags": [
      "raises-exception"
@@ -825,18 +875,16 @@
    "outputs": [
     {
      "ename": "RuntimeError",
-     "evalue": "Input scale required",
+     "evalue": "QuantLayer is not correctly configured",
      "output_type": "error",
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
       "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
-      "\u001b[0;32m/tmp/ipykernel_48365/2280634207.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      4\u001b[0m     \u001b[0min_channels\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mout_channels\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkernel_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbias\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m     bias_quant=Int8Bias, return_quant_tensor=True)\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mbias_quant_conv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[0;32m/opt/conda/envs/torch_1.10/lib/python3.7/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m   1100\u001b[0m         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[1;32m   1101\u001b[0m                 or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1102\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1103\u001b[0m         \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1104\u001b[0m         \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/workspace/scratch/git/fork_brevitas/src/brevitas/nn/quant_conv.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m    190\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    191\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mTensor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mQuantTensor\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mTensor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mQuantTensor\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 192\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    193\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    194\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0minner_forward_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mTensor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mquant_weight\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mTensor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mquant_bias\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mTensor\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/workspace/scratch/git/fork_brevitas/src/brevitas/nn/quant_layer.py\u001b[0m in \u001b[0;36mforward_impl\u001b[0;34m(self, inp)\u001b[0m\n\u001b[1;32m    330\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    331\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbias\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 332\u001b[0;31m             \u001b[0mquant_bias\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbias_quant\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbias\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput_scale\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput_bit_width\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    333\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtraining\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcache_inference_quant_bias\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    334\u001b[0m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_cached_bias\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_CachedIO\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mquant_bias\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdetach\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmetadata_only\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/opt/conda/envs/torch_1.10/lib/python3.7/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m   1100\u001b[0m         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[1;32m   1101\u001b[0m                 or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1102\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1103\u001b[0m         \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1104\u001b[0m         \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/workspace/scratch/git/fork_brevitas/src/brevitas/proxy/parameter_quant.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, x, input_scale, input_bit_width)\u001b[0m\n\u001b[1;32m    160\u001b[0m             \u001b[0mimpl\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexport_handler\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexport_mode\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtensor_quant\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    161\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequires_input_scale\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0minput_scale\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 162\u001b[0;31m                 \u001b[0;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Input scale required\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    163\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequires_input_bit_width\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0minput_bit_width\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    164\u001b[0m                 \u001b[0;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Input bit-width required\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mRuntimeError\u001b[0m: Input scale required"
+      "Cell \u001b[0;32mIn[23], line 6\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mbrevitas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mquant\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mscaled_int\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Int8Bias\n\u001b[1;32m      3\u001b[0m bias_quant_conv \u001b[38;5;241m=\u001b[39m QuantConv2d(\n\u001b[1;32m      4\u001b[0m     in_channels\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2\u001b[39m, out_channels\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m3\u001b[39m, kernel_size\u001b[38;5;241m=\u001b[39m(\u001b[38;5;241m3\u001b[39m,\u001b[38;5;241m3\u001b[39m), bias\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m      5\u001b[0m     bias_quant\u001b[38;5;241m=\u001b[39mInt8Bias, return_quant_tensor\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m----> 6\u001b[0m \u001b[43mbias_quant_conv\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrandn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/torch/nn/modules/module.py:1194\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m   1190\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1191\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1192\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1193\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1194\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1195\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m   1196\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/quant_conv.py:194\u001b[0m, in \u001b[0;36mQuantConv2d.forward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m    193\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m: Union[Tensor, QuantTensor]) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Union[Tensor, QuantTensor]:\n\u001b[0;32m--> 194\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforward_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/quant_layer.py:152\u001b[0m, in \u001b[0;36mQuantWeightBiasInputOutputLayer.forward_impl\u001b[0;34m(self, inp)\u001b[0m\n\u001b[1;32m    148\u001b[0m compute_output_quant_tensor \u001b[38;5;241m=\u001b[39m \u001b[38;5;28misinstance\u001b[39m(quant_input, QuantTensor) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\n\u001b[1;32m    149\u001b[0m     quant_weight, QuantTensor)\n\u001b[1;32m    150\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (compute_output_quant_tensor \u001b[38;5;129;01mor\u001b[39;00m\n\u001b[1;32m    151\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moutput_quant\u001b[38;5;241m.\u001b[39mis_quant_enabled) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreturn_quant_tensor:\n\u001b[0;32m--> 152\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mQuantLayer is not correctly configured\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    154\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbias \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    155\u001b[0m     quant_bias \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbias_quant(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbias, quant_input, quant_weight)\n",
+      "\u001b[0;31mRuntimeError\u001b[0m: QuantLayer is not correctly configured"
      ]
     }
    ],
@@ -858,26 +906,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 120,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "QuantTensor(value=tensor([[[[ 0.0005,  0.0043, -0.0004],\n",
-       "          [ 0.0005,  0.0106,  0.0012],\n",
-       "          [ 0.0021,  0.0007, -0.0050]],\n",
+       "IntQuantTensor(value=tensor([[[[-2.4238e-03, -5.6598e-03,  5.1882e-03],\n",
+       "          [-6.5582e-03,  8.9274e-03,  4.9640e-04],\n",
+       "          [ 9.6283e-03, -1.7466e-03, -4.8311e-03]],\n",
        "\n",
-       "         [[-0.0067, -0.0035, -0.0059],\n",
-       "          [-0.0050, -0.0015, -0.0039],\n",
-       "          [ 0.0015,  0.0028, -0.0008]],\n",
+       "         [[ 2.9322e-03, -3.1358e-03, -6.2727e-04],\n",
+       "          [ 2.8723e-06, -3.7981e-03,  1.0973e-02],\n",
+       "          [-4.1031e-03,  6.5909e-03, -4.2369e-03]],\n",
        "\n",
-       "         [[-0.0051, -0.0050,  0.0060],\n",
-       "          [-0.0015,  0.0037,  0.0071],\n",
-       "          [ 0.0067,  0.0035, -0.0071]]]], grad_fn=<ThnnConv2DBackward0>), scale=tensor([[[[1.8108e-07]]]], grad_fn=<MulBackward0>), zero_point=tensor(0.), bit_width=tensor(22.), signed_t=tensor(True), training_t=tensor(True))"
+       "         [[ 4.1967e-03, -7.0733e-03,  1.6456e-03],\n",
+       "          [ 1.8197e-03, -3.1683e-03,  4.8200e-03],\n",
+       "          [-3.2585e-04,  3.1055e-03,  1.9703e-03]]]],\n",
+       "       grad_fn=<ConvolutionBackward0>), scale=tensor([[[[1.7953e-07]]]], grad_fn=<MulBackward0>), zero_point=tensor([0.]), bit_width=tensor(22.), signed_t=tensor(True), training_t=tensor(True))"
       ]
      },
-     "execution_count": 120,
+     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -895,26 +944,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 121,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "QuantTensor(value=tensor([[[[-0.3825,  0.1371,  0.9135],\n",
-       "          [-0.2016,  0.7495, -0.4071],\n",
-       "          [-0.0755,  0.5283,  0.2388]],\n",
+       "IntQuantTensor(value=tensor([[[[-0.2816, -0.5271, -0.1748],\n",
+       "          [-0.4247, -0.1575,  0.0681],\n",
+       "          [ 0.6528, -0.5346, -0.0657]],\n",
        "\n",
-       "         [[ 0.0788, -0.3802, -0.2234],\n",
-       "          [ 0.8678, -0.5546,  0.4408],\n",
-       "          [-0.6788,  0.4422,  0.3007]],\n",
+       "         [[ 0.2993, -0.3383,  0.3035],\n",
+       "          [-0.4595, -0.6796, -0.9720],\n",
+       "          [-0.1948, -0.5169, -0.2175]],\n",
        "\n",
-       "         [[ 0.4412, -0.3205,  1.0033],\n",
-       "          [-0.0083, -0.3295, -0.2076],\n",
-       "          [ 0.4417, -0.1046, -0.3493]]]], grad_fn=<ThnnConv2DBackward0>), scale=tensor([[[[3.8610e-05]]]], grad_fn=<MulBackward0>), zero_point=tensor(0.), bit_width=tensor(22.), signed_t=tensor(True), training_t=tensor(True))"
+       "         [[ 0.5586,  0.0665, -0.5807],\n",
+       "          [ 0.5565,  0.1780, -0.0555],\n",
+       "          [-0.1080,  0.0791, -0.2262]]]], grad_fn=<ConvolutionBackward0>), scale=tensor([[[[4.2009e-05]]]], grad_fn=<MulBackward0>), zero_point=tensor([0.]), bit_width=tensor(22.), signed_t=tensor(True), training_t=tensor(True))"
       ]
      },
-     "execution_count": 121,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -928,26 +977,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 122,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "QuantTensor(value=tensor([[[[ 0.0036,  0.0024, -0.0033],\n",
-       "          [ 0.0050,  0.0080, -0.0014],\n",
-       "          [-0.0036, -0.0080, -0.0029]],\n",
+       "IntQuantTensor(value=tensor([[[[-0.0058,  0.0030,  0.0030],\n",
+       "          [-0.0013, -0.0002,  0.0043],\n",
+       "          [-0.0061,  0.0033, -0.0001]],\n",
        "\n",
-       "         [[ 0.0083, -0.0093,  0.0048],\n",
-       "          [ 0.0035,  0.0015, -0.0011],\n",
-       "          [-0.0003,  0.0067,  0.0013]],\n",
+       "         [[ 0.0013, -0.0008, -0.0015],\n",
+       "          [ 0.0011,  0.0012, -0.0012],\n",
+       "          [-0.0013, -0.0020,  0.0002]],\n",
        "\n",
-       "         [[-0.0009, -0.0019,  0.0039],\n",
-       "          [ 0.0010,  0.0056, -0.0037],\n",
-       "          [ 0.0091, -0.0095,  0.0054]]]], grad_fn=<ThnnConv2DBackward0>), scale=tensor([[[[1.8384e-07]]]], grad_fn=<MulBackward0>), zero_point=tensor(0.), bit_width=tensor(22.), signed_t=tensor(True), training_t=tensor(True))"
+       "         [[-0.0061,  0.0053, -0.0004],\n",
+       "          [ 0.0028,  0.0031, -0.0037],\n",
+       "          [ 0.0027, -0.0048, -0.0044]]]], grad_fn=<ConvolutionBackward0>), scale=tensor([[[[1.7370e-07]]]], grad_fn=<MulBackward0>), zero_point=tensor([0.]), bit_width=tensor(22.), signed_t=tensor(True), training_t=tensor(True))"
       ]
      },
-     "execution_count": 122,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -967,13 +1016,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 123,
+   "execution_count": null,
    "metadata": {
     "tags": [
      "raises-exception"
     ]
    },
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/proxy/parameter_quant.py:154: UserWarning: No quant bias cache found, set cache_inference_quant_bias=True and run an inference pass first\n",
+      "  warn(\n"
+     ]
+    },
     {
      "ename": "RuntimeError",
      "evalue": "Input scale required",
@@ -981,12 +1038,12 @@
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
       "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
-      "\u001b[0;32m/tmp/ipykernel_48365/2990591641.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      2\u001b[0m     \u001b[0min_channels\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mout_channels\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkernel_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbias\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m     output_quant=Int8ActPerTensorFloat, bias_quant=Int8Bias, return_quant_tensor=True)\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0moutput_bias_quant_conv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[0;32m/opt/conda/envs/torch_1.10/lib/python3.7/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m   1100\u001b[0m         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[1;32m   1101\u001b[0m                 or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1102\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1103\u001b[0m         \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1104\u001b[0m         \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/workspace/scratch/git/fork_brevitas/src/brevitas/nn/quant_conv.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m    190\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    191\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mTensor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mQuantTensor\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mTensor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mQuantTensor\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 192\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    193\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    194\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0minner_forward_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mTensor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mquant_weight\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mTensor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mquant_bias\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mTensor\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/workspace/scratch/git/fork_brevitas/src/brevitas/nn/quant_layer.py\u001b[0m in \u001b[0;36mforward_impl\u001b[0;34m(self, inp)\u001b[0m\n\u001b[1;32m    330\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    331\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbias\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 332\u001b[0;31m             \u001b[0mquant_bias\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbias_quant\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbias\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput_scale\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput_bit_width\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    333\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtraining\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcache_inference_quant_bias\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    334\u001b[0m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_cached_bias\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_CachedIO\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mquant_bias\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdetach\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmetadata_only\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/opt/conda/envs/torch_1.10/lib/python3.7/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m   1100\u001b[0m         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[1;32m   1101\u001b[0m                 or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1102\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1103\u001b[0m         \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1104\u001b[0m         \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/workspace/scratch/git/fork_brevitas/src/brevitas/proxy/parameter_quant.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, x, input_scale, input_bit_width)\u001b[0m\n\u001b[1;32m    160\u001b[0m             \u001b[0mimpl\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexport_handler\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexport_mode\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtensor_quant\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    161\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequires_input_scale\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0minput_scale\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 162\u001b[0;31m                 \u001b[0;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Input scale required\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    163\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequires_input_bit_width\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0minput_bit_width\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    164\u001b[0m                 \u001b[0;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Input bit-width required\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "Cell \u001b[0;32mIn[27], line 4\u001b[0m\n\u001b[1;32m      1\u001b[0m output_bias_quant_conv \u001b[38;5;241m=\u001b[39m QuantConv2d(\n\u001b[1;32m      2\u001b[0m     in_channels\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2\u001b[39m, out_channels\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m3\u001b[39m, kernel_size\u001b[38;5;241m=\u001b[39m(\u001b[38;5;241m3\u001b[39m,\u001b[38;5;241m3\u001b[39m), bias\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m      3\u001b[0m     output_quant\u001b[38;5;241m=\u001b[39mInt8ActPerTensorFloat, bias_quant\u001b[38;5;241m=\u001b[39mInt8Bias, return_quant_tensor\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m----> 4\u001b[0m \u001b[43moutput_bias_quant_conv\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrandn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/torch/nn/modules/module.py:1194\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m   1190\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1191\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1192\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1193\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1194\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1195\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m   1196\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/quant_conv.py:194\u001b[0m, in \u001b[0;36mQuantConv2d.forward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m    193\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m: Union[Tensor, QuantTensor]) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Union[Tensor, QuantTensor]:\n\u001b[0;32m--> 194\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforward_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/quant_layer.py:155\u001b[0m, in \u001b[0;36mQuantWeightBiasInputOutputLayer.forward_impl\u001b[0;34m(self, inp)\u001b[0m\n\u001b[1;32m    152\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mQuantLayer is not correctly configured\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    154\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbias \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 155\u001b[0m     quant_bias \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbias_quant\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbias\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mquant_input\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mquant_weight\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    156\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    157\u001b[0m     quant_bias \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/torch/nn/modules/module.py:1194\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m   1190\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1191\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1192\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1193\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1194\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1195\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m   1196\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/proxy/parameter_quant.py:330\u001b[0m, in \u001b[0;36mBiasQuantProxyFromInjector.forward\u001b[0;34m(self, x, input, weight)\u001b[0m\n\u001b[1;32m    328\u001b[0m     input_scale \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mscale()\n\u001b[1;32m    329\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m input_scale \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 330\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInput scale required\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    331\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrequires_input_scale \u001b[38;5;129;01mand\u001b[39;00m input_scale \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mis_quant_enabled:\n\u001b[1;32m    332\u001b[0m     input_scale \u001b[38;5;241m=\u001b[39m input_scale\u001b[38;5;241m.\u001b[39mview(\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n",
       "\u001b[0;31mRuntimeError\u001b[0m: Input scale required"
      ]
     }
@@ -1007,26 +1064,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 124,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "tensor([[[[ 0.2152,  0.8346,  0.0746],\n",
-       "          [-0.0738, -0.5212,  0.1019],\n",
-       "          [-0.6004,  0.1500, -0.1453]],\n",
+       "tensor([[[[-0.4360, -0.2674, -0.4194],\n",
+       "          [-0.2412, -0.6360, -0.6838],\n",
+       "          [-0.5227, -0.0199, -0.1445]],\n",
        "\n",
-       "         [[-1.1551, -1.3458, -0.1312],\n",
-       "          [ 0.2502, -0.5267,  0.2412],\n",
-       "          [-0.3556, -0.3289, -0.2276]],\n",
+       "         [[-0.3524,  0.8025,  0.2844],\n",
+       "          [ 0.9945, -0.4782,  0.8064],\n",
+       "          [ 0.5732,  0.1249,  0.3110]],\n",
        "\n",
-       "         [[-0.4599, -0.6094,  0.4682],\n",
-       "          [-0.5064, -0.6768, -0.6638],\n",
-       "          [ 0.0066, -0.3581,  0.2359]]]], grad_fn=<ThnnConv2DBackward0>)"
+       "         [[ 0.3223,  0.2530,  0.2753],\n",
+       "          [ 0.5764, -0.2533, -0.0181],\n",
+       "          [-0.4147,  0.2049, -0.9944]]]], grad_fn=<ConvolutionBackward0>)"
       ]
      },
-     "execution_count": 124,
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1051,30 +1108,30 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 125,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "QuantTensor(value=tensor([[[[-0.6879, -0.6632, -0.2411],\n",
-       "          [ 0.2064, -0.7371,  0.3910],\n",
-       "          [ 0.9533,  0.2994,  0.6546]],\n",
+       "IntQuantTensor(value=tensor([[[[-0.6912,  0.0086,  0.1628],\n",
+       "          [-0.4786, -0.8073,  0.5224],\n",
+       "          [ 0.4157,  0.4686,  0.2560]],\n",
        "\n",
-       "         [[-0.4684, -0.4495, -0.5021],\n",
-       "          [ 0.5738,  0.4199, -0.3380],\n",
-       "          [ 0.6218, -0.0408, -0.8483]],\n",
+       "         [[ 0.3170, -0.5486, -0.5216],\n",
+       "          [ 0.1832,  1.0217, -0.3637],\n",
+       "          [-0.1115,  0.6974, -0.0452]],\n",
        "\n",
-       "         [[-0.5625,  0.1837, -1.0575],\n",
-       "          [-1.2816, -0.4993, -0.3409],\n",
-       "          [ 0.4556, -1.4269,  0.5369]]]], grad_fn=<ThnnConv2DBackward0>), scale=tensor([[[[3.0975e-05]]]], grad_fn=<MulBackward0>), zero_point=tensor([[[[ 1276.0774]],\n",
+       "         [[-0.6168, -0.5241, -0.6593],\n",
+       "          [ 0.6408,  0.2674,  0.4537],\n",
+       "          [-0.3744, -0.7771, -0.2848]]]], grad_fn=<ConvolutionBackward0>), scale=tensor([[[[3.0094e-05]]]], grad_fn=<MulBackward0>), zero_point=tensor([[[[  339.3406]],\n",
        "\n",
-       "         [[-3152.4585]],\n",
+       "         [[-4597.1797]],\n",
        "\n",
-       "         [[ 7320.2324]]]], grad_fn=<DivBackward0>), bit_width=tensor(21.), signed_t=tensor(True), training_t=tensor(True))"
+       "         [[-3452.3713]]]], grad_fn=<DivBackward0>), bit_width=tensor(21.), signed_t=tensor(True), training_t=tensor(True))"
       ]
      },
-     "execution_count": 125,
+     "execution_count": 29,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1089,22 +1146,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 126,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 126,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
     }
    ],
    "source": [
-    "assert out_tensor.is_valid"
+    "assert_with_message(out_tensor.is_valid)"
    ]
   },
   {
@@ -1116,26 +1170,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 127,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "tensor([[[[ 0.8357,  0.0733,  0.9527],\n",
-       "          [ 0.1803,  0.2154,  0.7598],\n",
-       "          [ 1.1121, -0.8728,  1.0039]],\n",
+       "tensor([[[[-0.2327,  0.9267,  0.6294],\n",
+       "          [ 0.0901,  0.1027, -0.0727],\n",
+       "          [-0.5614,  0.6182,  0.5394]],\n",
        "\n",
-       "         [[ 0.7917,  1.0063,  0.6516],\n",
-       "          [-0.1852, -0.7263,  0.0956],\n",
-       "          [-0.1876,  0.2747, -0.1617]],\n",
+       "         [[ 0.4179, -0.5184, -0.2016],\n",
+       "          [ 0.1390, -0.3925, -0.6171],\n",
+       "          [ 0.4782,  0.0814,  0.6124]],\n",
        "\n",
-       "         [[ 0.8299,  0.9934, -0.3821],\n",
-       "          [ 0.4865,  0.9309, -0.7924],\n",
-       "          [-0.4201,  0.2343,  0.1532]]]], grad_fn=<ThnnConv2DBackward0>)"
+       "         [[ 0.2896, -0.3779,  0.9408],\n",
+       "          [-0.1334,  0.6186,  0.2167],\n",
+       "          [-0.5926,  0.3690, -0.0284]]]], grad_fn=<ConvolutionBackward0>)"
       ]
      },
-     "execution_count": 127,
+     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1157,7 +1211,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "torch_1.10",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -1171,7 +1225,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.15"
+   "version": "3.10.13"
   },
   "vscode": {
    "interpreter": {
@@ -1180,5 +1234,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 1
+ "nbformat_minor": 4
 }
diff --git a/docs/tutorials/tvmcon2021.html b/docs/tutorials/tvmcon2021.html
index dc1065045..628389dac 100644
--- a/docs/tutorials/tvmcon2021.html
+++ b/docs/tutorials/tvmcon2021.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>Brevitas TVMCon 2021 tutorial &#8212; Brevitas 0.10.2 documentation</title>
+    <title>Brevitas TVMCon 2021 tutorial &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -129,8 +129,8 @@
       
     
     
-    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
@@ -460,6 +460,11 @@ <h2>QuantLinear layer<a class="headerlink" href="#QuantLinear-layer" title="Perm
 <span class="kn">from</span> <span class="nn">brevitas.nn</span> <span class="kn">import</span> <span class="n">QuantLinear</span>
 <span class="kn">from</span> <span class="nn">IPython.display</span> <span class="kn">import</span> <span class="n">Markdown</span><span class="p">,</span> <span class="n">display</span>
 
+<span class="c1"># helpers</span>
+<span class="k">def</span> <span class="nf">assert_with_message</span><span class="p">(</span><span class="n">condition</span><span class="p">):</span>
+    <span class="k">assert</span> <span class="n">condition</span>
+    <span class="nb">print</span><span class="p">(</span><span class="n">condition</span><span class="p">)</span>
+
 <span class="k">def</span> <span class="nf">pretty_print_source</span><span class="p">(</span><span class="n">source</span><span class="p">):</span>
     <span class="n">display</span><span class="p">(</span><span class="n">Markdown</span><span class="p">(</span><span class="s1">&#39;```python</span><span class="se">\n</span><span class="s1">&#39;</span> <span class="o">+</span> <span class="n">source</span> <span class="o">+</span> <span class="s1">&#39;</span><span class="se">\n</span><span class="s1">```&#39;</span><span class="p">))</span>
 
@@ -476,14 +481,16 @@ <h2>QuantLinear layer<a class="headerlink" href="#QuantLinear-layer" title="Perm
         <span class="bp">self</span><span class="p">,</span>
         <span class="n">in_features</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
         <span class="n">out_features</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
-        <span class="n">bias</span><span class="p">:</span> <span class="nb">bool</span><span class="p">,</span>
+        <span class="n">bias</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
         <span class="n">weight_quant</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">WeightQuantType</span><span class="p">]</span> <span class="o">=</span> <span class="n">Int8WeightPerTensorFloat</span><span class="p">,</span>
         <span class="n">bias_quant</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">BiasQuantType</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
         <span class="n">input_quant</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">ActQuantType</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
         <span class="n">output_quant</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">ActQuantType</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
         <span class="n">return_quant_tensor</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
+        <span class="n">device</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">device</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+        <span class="n">dtype</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">dtype</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
         <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
-    <span class="n">Linear</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">in_features</span><span class="p">,</span> <span class="n">out_features</span><span class="p">,</span> <span class="n">bias</span><span class="p">)</span>
+    <span class="n">Linear</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">in_features</span><span class="p">,</span> <span class="n">out_features</span><span class="p">,</span> <span class="n">bias</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
     <span class="n">QuantWBIOL</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span>
         <span class="bp">self</span><span class="p">,</span>
         <span class="n">weight_quant</span><span class="o">=</span><span class="n">weight_quant</span><span class="p">,</span>
@@ -532,10 +539,10 @@ <h3>Default weight quantization<a class="headerlink" href="#Default-weight-quant
         [-0.0140,  0.5607]], requires_grad=True)
 
 Quantized weight QuantTensor:
- QuantTensor(value=tensor([[-0.0046,  0.3803],
+ IntQuantTensor(value=tensor([[-0.0046,  0.3803],
         [-0.5820, -0.5224],
         [-0.2704,  0.1879],
-        [-0.0137,  0.5591]], grad_fn=&lt;MulBackward0&gt;), scale=tensor(0.0046, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))
+        [-0.0137,  0.5591]], grad_fn=&lt;MulBackward0&gt;), scale=0.004582525696605444, zero_point=0.0, bit_width=8.0, signed_t=True, training_t=True)
 
 </pre></div></div>
 </div>
@@ -558,7 +565,7 @@ <h3>Default weight quantization<a class="headerlink" href="#Default-weight-quant
  tensor([[  -1,   83],
         [-127, -114],
         [ -59,   41],
-        [  -3,  122]], dtype=torch.int32)
+        [  -3,  122]], dtype=torch.int8)
 </pre></div></div>
 </div>
 </section>
@@ -580,7 +587,7 @@ <h3>Mixing quantized weights and floating-point inputs<a class="headerlink" href
 </pre></div>
 </div>
 </div>
-<div class="nboutput nblast docutils container">
+<div class="nboutput docutils container">
 <div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
@@ -592,7 +599,18 @@ <h3>Mixing quantized weights and floating-point inputs<a class="headerlink" href
 Float output:
  tensor([[-0.9036, -0.4586,  0.3096, -0.6472],
         [ 1.2058,  0.6525, -0.3723,  0.8677],
-        [ 1.3873,  0.2801, -0.9009,  0.9507]], grad_fn=&lt;MmBackward&gt;)
+        [ 1.3873,  0.2801, -0.9009,  0.9507]], grad_fn=&lt;MmBackward0&gt;)
+</pre></div></div>
+</div>
+<div class="nboutput nblast docutils container">
+<div class="prompt empty docutils container">
+</div>
+<div class="output_area stderr docutils container">
+<div class="highlight"><pre>
+/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/torch/_tensor.py:1255: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at /opt/conda/conda-bld/pytorch_1670525541990/work/c10/core/TensorImpl.h:1758.)
+  return super(Tensor, self).rename(names)
+/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/quant_linear.py:69: UserWarning: Defining your `__torch_function__` as a plain method is deprecated and will be an error in future, please define it as a classmethod. (Triggered internally at /opt/conda/conda-bld/pytorch_1670525541990/work/torch/csrc/utils/python_arg_parser.cpp:350.)
+  output_tensor = linear(x, quant_weight, quant_bias)
 </pre></div></div>
 </div>
 <p>In general operations involving quantized tensors are always computed through standard torch operators (here <code class="docutils literal notranslate"><span class="pre">torch.nn.functional.linear</span></code> called internally by the module) on the dequantized representation, the so-called fake-quantization approach.</p>
@@ -622,10 +640,10 @@ <h3>Fixed-point weight quantization<a class="headerlink" href="#Fixed-point-weig
 <div class="output_area docutils container">
 <div class="highlight"><pre>
 Weight QuantTensor:
- QuantTensor(value=tensor([[-0.0078,  0.3828],
+ IntQuantTensor(value=tensor([[-0.0078,  0.3828],
         [-0.5781, -0.5234],
         [-0.2734,  0.1875],
-        [-0.0156,  0.5625]], grad_fn=&lt;MulBackward0&gt;), scale=tensor(0.0078, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))
+        [-0.0156,  0.5625]], grad_fn=&lt;MulBackward0&gt;), scale=0.0078125, zero_point=0.0, bit_width=8.0, signed_t=True, training_t=True)
 Weight fix point: 7.0
 </pre></div></div>
 </div>
@@ -653,10 +671,10 @@ <h3>Binary weight quantization<a class="headerlink" href="#Binary-weight-quantiz
 <div class="output_area docutils container">
 <div class="highlight"><pre>
 Weight QuantTensor:
- QuantTensor(value=tensor([[-0.1000,  0.1000],
+ IntQuantTensor(value=tensor([[-0.1000,  0.1000],
         [-0.1000, -0.1000],
         [-0.1000,  0.1000],
-        [-0.1000,  0.1000]], grad_fn=&lt;MulBackward0&gt;), scale=tensor(0.1000), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))
+        [-0.1000,  0.1000]], grad_fn=&lt;MulBackward0&gt;), scale=0.10000000149011612, zero_point=0.0, bit_width=1.0, signed_t=True, training_t=True)
 </pre></div></div>
 </div>
 </section>
@@ -732,7 +750,7 @@ <h2>Inputs/Outputs/Activations quantization:<a class="headerlink" href="#Inputs/
 Quant output:
  tensor([[-0.9109, -0.4609,  0.3135, -0.6523],
         [ 1.2089,  0.6524, -0.3752,  0.8697],
-        [ 1.3893,  0.2816, -0.9011,  0.9521]], grad_fn=&lt;MmBackward&gt;)
+        [ 1.3893,  0.2816, -0.9011,  0.9521]], grad_fn=&lt;MmBackward0&gt;)
 </pre></div></div>
 </div>
 <p>Note how by default the output tensor is returned as a standard torch tensor in dequantized format. This is designed to minimize friction with standard torch operators in the default case. To return an output <code class="docutils literal notranslate"><span class="pre">QuantTensor</span></code> we have to set <code class="docutils literal notranslate"><span class="pre">return_quant_tensor=True</span></code>:</p>
@@ -759,9 +777,9 @@ <h2>Inputs/Outputs/Activations quantization:<a class="headerlink" href="#Inputs/
 <div class="output_area docutils container">
 <div class="highlight"><pre>
 Quant output:
- QuantTensor(value=tensor([[-0.9109, -0.4609,  0.3135, -0.6523],
+ IntQuantTensor(value=tensor([[-0.9109, -0.4609,  0.3135, -0.6523],
         [ 1.2089,  0.6524, -0.3752,  0.8697],
-        [ 1.3893,  0.2816, -0.9011,  0.9521]], grad_fn=&lt;MmBackward&gt;), scale=tensor([[9.0542e-05]], grad_fn=&lt;MulBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(17.), signed_t=tensor(True), training_t=tensor(True))
+        [ 1.3893,  0.2816, -0.9011,  0.9521]], grad_fn=&lt;MmBackward0&gt;), scale=tensor([[9.0542e-05]], grad_fn=&lt;MulBackward0&gt;), zero_point=tensor([0.]), bit_width=17.0, signed_t=True, training_t=True)
 </pre></div></div>
 </div>
 <section id="QuantIdentity-layer">
@@ -799,14 +817,14 @@ <h3>QuantIdentity layer<a class="headerlink" href="#QuantIdentity-layer" title="
         [-1.0845, -1.3986]])
 
 Quant input:
- QuantTensor(value=tensor([[ 1.5490, -0.2894],
+ IntQuantTensor(value=tensor([[ 1.5490, -0.2894],
         [-2.1788,  0.5617],
-        [-1.0894, -1.3958]], grad_fn=&lt;MulBackward0&gt;), scale=tensor(0.0170, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))
+        [-1.0894, -1.3958]], grad_fn=&lt;MulBackward0&gt;), scale=0.017021792009472847, zero_point=0.0, bit_width=8.0, signed_t=True, training_t=True)
 
 Quant output:
- QuantTensor(value=tensor([[-0.9109, -0.4609,  0.3135, -0.6523],
+ IntQuantTensor(value=tensor([[-0.9109, -0.4609,  0.3135, -0.6523],
         [ 1.2089,  0.6524, -0.3752,  0.8697],
-        [ 1.3893,  0.2816, -0.9011,  0.9521]], grad_fn=&lt;MmBackward&gt;), scale=tensor([[9.0542e-05]], grad_fn=&lt;MulBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(17.), signed_t=tensor(True), training_t=tensor(True))
+        [ 1.3893,  0.2816, -0.9011,  0.9521]], grad_fn=&lt;MmBackward0&gt;), scale=tensor([[9.0542e-05]], grad_fn=&lt;MulBackward0&gt;), zero_point=tensor([0.]), bit_width=17.0, signed_t=True, training_t=True)
 </pre></div></div>
 </div>
 <p>Note that Having the input/output quantizer as part of a layer like <code class="docutils literal notranslate"><span class="pre">QuantLinear</span></code> or as a standalone <code class="docutils literal notranslate"><span class="pre">QuantIdentity</span></code> can make a difference at export time with targets like e.g. the standard ONNX opset, where <code class="docutils literal notranslate"><span class="pre">QLinearMatMul</span></code> or <code class="docutils literal notranslate"><span class="pre">QLinearConv</span></code> assume the output quantizer is part of the layer.</p>
@@ -843,9 +861,9 @@ <h3>QuantReLU layer<a class="headerlink" href="#QuantReLU-layer" title="Permalin
         [-1.0845, -1.3986]])
 
 Quant output:
- QuantTensor(value=tensor([[1.5410, 0.0000],
+ IntQuantTensor(value=tensor([[1.5410, 0.0000],
         [0.0000, 0.5681],
-        [0.0000, 0.0000]], grad_fn=&lt;MulBackward0&gt;), scale=tensor(0.0060, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(False), training_t=tensor(True))
+        [0.0000, 0.0000]], grad_fn=&lt;MulBackward0&gt;), scale=0.006043121684342623, zero_point=0.0, bit_width=8.0, signed_t=False, training_t=True)
 </pre></div></div>
 </div>
 </section>
@@ -882,13 +900,13 @@ <h3>Requantizing a tensor<a class="headerlink" href="#Requantizing-a-tensor" tit
         [-1.0845, -1.3986]])
 
 Quant output after QuantIdentity:
- QuantTensor(value=tensor([[ 1.5490, -0.2894],
+ IntQuantTensor(value=tensor([[ 1.5490, -0.2894],
         [-2.1788,  0.5617],
-        [-1.0894, -1.3958]], grad_fn=&lt;MulBackward0&gt;), scale=tensor(0.0170, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))
+        [-1.0894, -1.3958]], grad_fn=&lt;MulBackward0&gt;), scale=0.017021792009472847, zero_point=0.0, bit_width=8.0, signed_t=True, training_t=True)
 Quant output after QuantReLU:
- QuantTensor(value=tensor([[1.5490, 0.0000],
+ IntQuantTensor(value=tensor([[1.5490, 0.0000],
         [0.0000, 0.5588],
-        [0.0000, 0.0000]], grad_fn=&lt;MulBackward0&gt;), scale=tensor(0.0061, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(False), training_t=tensor(True))
+        [0.0000, 0.0000]], grad_fn=&lt;MulBackward0&gt;), scale=0.006074443459510803, zero_point=0.0, bit_width=8.0, signed_t=False, training_t=True)
 </pre></div></div>
 </div>
 </section>
@@ -922,49 +940,36 @@ <h2>Bias Quantization<a class="headerlink" href="#Bias-Quantization" title="Perm
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-<span class="ansi-red-intense-fg ansi-bold">---------------------------------------------------------------------------</span>
-<span class="ansi-red-intense-fg ansi-bold">RuntimeError</span>                              Traceback (most recent call last)
-<span class="ansi-green-intense-fg ansi-bold">C:\Users\ALESSA~1\AppData\Local\Temp/ipykernel_18920/2660651517.py</span> in <span class="ansi-cyan-fg">&lt;module&gt;</span>
-<span class="ansi-green-fg">      6</span> quant_linear <span class="ansi-yellow-intense-fg ansi-bold">=</span> QuantLinear<span class="ansi-yellow-intense-fg ansi-bold">(</span><span class="ansi-cyan-intense-fg ansi-bold">2</span><span class="ansi-yellow-intense-fg ansi-bold">,</span> <span class="ansi-cyan-intense-fg ansi-bold">4</span><span class="ansi-yellow-intense-fg ansi-bold">,</span> bias<span class="ansi-yellow-intense-fg ansi-bold">=</span><span class="ansi-green-intense-fg ansi-bold">True</span><span class="ansi-yellow-intense-fg ansi-bold">,</span> bias_quant<span class="ansi-yellow-intense-fg ansi-bold">=</span>Int16Bias<span class="ansi-yellow-intense-fg ansi-bold">,</span> return_quant_tensor<span class="ansi-yellow-intense-fg ansi-bold">=</span><span class="ansi-green-intense-fg ansi-bold">True</span><span class="ansi-yellow-intense-fg ansi-bold">)</span>
-<span class="ansi-green-fg">      7</span>
-<span class="ansi-green-intense-fg ansi-bold">----&gt; 8</span><span class="ansi-yellow-intense-fg ansi-bold"> </span>quant_output <span class="ansi-yellow-intense-fg ansi-bold">=</span> quant_linear<span class="ansi-yellow-intense-fg ansi-bold">(</span>float_input<span class="ansi-yellow-intense-fg ansi-bold">)</span>
-
-<span class="ansi-green-intense-fg ansi-bold">~\miniconda3\envs\pt190\lib\site-packages\torch\nn\modules\module.py</span> in <span class="ansi-cyan-fg">_call_impl</span><span class="ansi-blue-intense-fg ansi-bold">(self, *input, **kwargs)</span>
-<span class="ansi-green-fg">   1049</span>         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
-<span class="ansi-green-fg">   1050</span>                 or _global_forward_hooks or _global_forward_pre_hooks):
-<span class="ansi-green-intense-fg ansi-bold">-&gt; 1051</span><span class="ansi-yellow-intense-fg ansi-bold">             </span><span class="ansi-green-intense-fg ansi-bold">return</span> forward_call<span class="ansi-yellow-intense-fg ansi-bold">(</span><span class="ansi-yellow-intense-fg ansi-bold">*</span>input<span class="ansi-yellow-intense-fg ansi-bold">,</span> <span class="ansi-yellow-intense-fg ansi-bold">**</span>kwargs<span class="ansi-yellow-intense-fg ansi-bold">)</span>
-<span class="ansi-green-fg">   1052</span>         <span class="ansi-red-intense-fg ansi-bold"># Do not call functions when jit is used</span>
-<span class="ansi-green-fg">   1053</span>         full_backward_hooks<span class="ansi-yellow-intense-fg ansi-bold">,</span> non_full_backward_hooks <span class="ansi-yellow-intense-fg ansi-bold">=</span> <span class="ansi-yellow-intense-fg ansi-bold">[</span><span class="ansi-yellow-intense-fg ansi-bold">]</span><span class="ansi-yellow-intense-fg ansi-bold">,</span> <span class="ansi-yellow-intense-fg ansi-bold">[</span><span class="ansi-yellow-intense-fg ansi-bold">]</span>
-
-<span class="ansi-green-intense-fg ansi-bold">c:\users\alessandro\documenti\brevitas_tvmcon\src\brevitas\nn\quant_linear.py</span> in <span class="ansi-cyan-fg">forward</span><span class="ansi-blue-intense-fg ansi-bold">(self, input)</span>
-<span class="ansi-green-fg">     96</span>
-<span class="ansi-green-fg">     97</span>     <span class="ansi-green-intense-fg ansi-bold">def</span> forward<span class="ansi-yellow-intense-fg ansi-bold">(</span>self<span class="ansi-yellow-intense-fg ansi-bold">,</span> input<span class="ansi-yellow-intense-fg ansi-bold">:</span> Union<span class="ansi-yellow-intense-fg ansi-bold">[</span>Tensor<span class="ansi-yellow-intense-fg ansi-bold">,</span> QuantTensor<span class="ansi-yellow-intense-fg ansi-bold">]</span><span class="ansi-yellow-intense-fg ansi-bold">)</span> <span class="ansi-yellow-intense-fg ansi-bold">-&gt;</span> Union<span class="ansi-yellow-intense-fg ansi-bold">[</span>Tensor<span class="ansi-yellow-intense-fg ansi-bold">,</span> QuantTensor<span class="ansi-yellow-intense-fg ansi-bold">]</span><span class="ansi-yellow-intense-fg ansi-bold">:</span>
-<span class="ansi-green-intense-fg ansi-bold">---&gt; 98</span><span class="ansi-yellow-intense-fg ansi-bold">         </span><span class="ansi-green-intense-fg ansi-bold">return</span> self<span class="ansi-yellow-intense-fg ansi-bold">.</span>forward_impl<span class="ansi-yellow-intense-fg ansi-bold">(</span>input<span class="ansi-yellow-intense-fg ansi-bold">)</span>
-<span class="ansi-green-fg">     99</span>
-<span class="ansi-green-fg">    100</span>     <span class="ansi-green-intense-fg ansi-bold">def</span> inner_forward_impl<span class="ansi-yellow-intense-fg ansi-bold">(</span>self<span class="ansi-yellow-intense-fg ansi-bold">,</span> x<span class="ansi-yellow-intense-fg ansi-bold">:</span> Tensor<span class="ansi-yellow-intense-fg ansi-bold">,</span> quant_weight<span class="ansi-yellow-intense-fg ansi-bold">:</span> Tensor<span class="ansi-yellow-intense-fg ansi-bold">,</span> quant_bias<span class="ansi-yellow-intense-fg ansi-bold">:</span> Optional<span class="ansi-yellow-intense-fg ansi-bold">[</span>Tensor<span class="ansi-yellow-intense-fg ansi-bold">]</span><span class="ansi-yellow-intense-fg ansi-bold">)</span><span class="ansi-yellow-intense-fg ansi-bold">:</span>
-
-<span class="ansi-green-intense-fg ansi-bold">c:\users\alessandro\documenti\brevitas_tvmcon\src\brevitas\nn\quant_layer.py</span> in <span class="ansi-cyan-fg">forward_impl</span><span class="ansi-blue-intense-fg ansi-bold">(self, inp)</span>
-<span class="ansi-green-fg">    355</span>
-<span class="ansi-green-fg">    356</span>         <span class="ansi-green-intense-fg ansi-bold">if</span> self<span class="ansi-yellow-intense-fg ansi-bold">.</span>bias <span class="ansi-green-intense-fg ansi-bold">is</span> <span class="ansi-green-intense-fg ansi-bold">not</span> <span class="ansi-green-intense-fg ansi-bold">None</span><span class="ansi-yellow-intense-fg ansi-bold">:</span>
-<span class="ansi-green-intense-fg ansi-bold">--&gt; 357</span><span class="ansi-yellow-intense-fg ansi-bold">             </span>quant_bias <span class="ansi-yellow-intense-fg ansi-bold">=</span> self<span class="ansi-yellow-intense-fg ansi-bold">.</span>bias_quant<span class="ansi-yellow-intense-fg ansi-bold">(</span>self<span class="ansi-yellow-intense-fg ansi-bold">.</span>bias<span class="ansi-yellow-intense-fg ansi-bold">,</span> output_scale<span class="ansi-yellow-intense-fg ansi-bold">,</span> output_bit_width<span class="ansi-yellow-intense-fg ansi-bold">)</span>
-<span class="ansi-green-fg">    358</span>             <span class="ansi-green-intense-fg ansi-bold">if</span> <span class="ansi-green-intense-fg ansi-bold">not</span> self<span class="ansi-yellow-intense-fg ansi-bold">.</span>training <span class="ansi-green-intense-fg ansi-bold">and</span> self<span class="ansi-yellow-intense-fg ansi-bold">.</span>cache_inference_quant_bias<span class="ansi-yellow-intense-fg ansi-bold">:</span>
-<span class="ansi-green-fg">    359</span>                 self<span class="ansi-yellow-intense-fg ansi-bold">.</span>_cached_bias <span class="ansi-yellow-intense-fg ansi-bold">=</span> _CachedIO<span class="ansi-yellow-intense-fg ansi-bold">(</span>quant_bias<span class="ansi-yellow-intense-fg ansi-bold">.</span>detach<span class="ansi-yellow-intense-fg ansi-bold">(</span><span class="ansi-yellow-intense-fg ansi-bold">)</span><span class="ansi-yellow-intense-fg ansi-bold">,</span> metadata_only<span class="ansi-yellow-intense-fg ansi-bold">=</span><span class="ansi-green-intense-fg ansi-bold">False</span><span class="ansi-yellow-intense-fg ansi-bold">)</span>
-
-<span class="ansi-green-intense-fg ansi-bold">~\miniconda3\envs\pt190\lib\site-packages\torch\nn\modules\module.py</span> in <span class="ansi-cyan-fg">_call_impl</span><span class="ansi-blue-intense-fg ansi-bold">(self, *input, **kwargs)</span>
-<span class="ansi-green-fg">   1049</span>         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
-<span class="ansi-green-fg">   1050</span>                 or _global_forward_hooks or _global_forward_pre_hooks):
-<span class="ansi-green-intense-fg ansi-bold">-&gt; 1051</span><span class="ansi-yellow-intense-fg ansi-bold">             </span><span class="ansi-green-intense-fg ansi-bold">return</span> forward_call<span class="ansi-yellow-intense-fg ansi-bold">(</span><span class="ansi-yellow-intense-fg ansi-bold">*</span>input<span class="ansi-yellow-intense-fg ansi-bold">,</span> <span class="ansi-yellow-intense-fg ansi-bold">**</span>kwargs<span class="ansi-yellow-intense-fg ansi-bold">)</span>
-<span class="ansi-green-fg">   1052</span>         <span class="ansi-red-intense-fg ansi-bold"># Do not call functions when jit is used</span>
-<span class="ansi-green-fg">   1053</span>         full_backward_hooks<span class="ansi-yellow-intense-fg ansi-bold">,</span> non_full_backward_hooks <span class="ansi-yellow-intense-fg ansi-bold">=</span> <span class="ansi-yellow-intense-fg ansi-bold">[</span><span class="ansi-yellow-intense-fg ansi-bold">]</span><span class="ansi-yellow-intense-fg ansi-bold">,</span> <span class="ansi-yellow-intense-fg ansi-bold">[</span><span class="ansi-yellow-intense-fg ansi-bold">]</span>
-
-<span class="ansi-green-intense-fg ansi-bold">c:\users\alessandro\documenti\brevitas_tvmcon\src\brevitas\proxy\parameter_quant.py</span> in <span class="ansi-cyan-fg">forward</span><span class="ansi-blue-intense-fg ansi-bold">(self, x, input_scale, input_bit_width)</span>
-<span class="ansi-green-fg">    194</span>             impl <span class="ansi-yellow-intense-fg ansi-bold">=</span> self<span class="ansi-yellow-intense-fg ansi-bold">.</span>export_handler <span class="ansi-green-intense-fg ansi-bold">if</span> self<span class="ansi-yellow-intense-fg ansi-bold">.</span>export_mode <span class="ansi-green-intense-fg ansi-bold">else</span> self<span class="ansi-yellow-intense-fg ansi-bold">.</span>tensor_quant
-<span class="ansi-green-fg">    195</span>             <span class="ansi-green-intense-fg ansi-bold">if</span> self<span class="ansi-yellow-intense-fg ansi-bold">.</span>requires_input_scale <span class="ansi-green-intense-fg ansi-bold">and</span> input_scale <span class="ansi-green-intense-fg ansi-bold">is</span> <span class="ansi-green-intense-fg ansi-bold">None</span><span class="ansi-yellow-intense-fg ansi-bold">:</span>
-<span class="ansi-green-intense-fg ansi-bold">--&gt; 196</span><span class="ansi-yellow-intense-fg ansi-bold">                 </span><span class="ansi-green-intense-fg ansi-bold">raise</span> RuntimeError<span class="ansi-yellow-intense-fg ansi-bold">(</span><span class="ansi-blue-intense-fg ansi-bold">&#34;Input scale required&#34;</span><span class="ansi-yellow-intense-fg ansi-bold">)</span>
-<span class="ansi-green-fg">    197</span>             <span class="ansi-green-intense-fg ansi-bold">if</span> self<span class="ansi-yellow-intense-fg ansi-bold">.</span>requires_input_bit_width <span class="ansi-green-intense-fg ansi-bold">and</span> input_bit_width <span class="ansi-green-intense-fg ansi-bold">is</span> <span class="ansi-green-intense-fg ansi-bold">None</span><span class="ansi-yellow-intense-fg ansi-bold">:</span>
-<span class="ansi-green-fg">    198</span>                 <span class="ansi-green-intense-fg ansi-bold">raise</span> RuntimeError<span class="ansi-yellow-intense-fg ansi-bold">(</span><span class="ansi-blue-intense-fg ansi-bold">&#34;Input bit-width required&#34;</span><span class="ansi-yellow-intense-fg ansi-bold">)</span>
-
-<span class="ansi-red-intense-fg ansi-bold">RuntimeError</span>: Input scale required
+<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
+<span class="ansi-red-fg">RuntimeError</span>                              Traceback (most recent call last)
+Cell <span class="ansi-green-fg">In[13], line 8</span>
+<span class="ansi-green-intense-fg ansi-bold">      5</span> float_input <span style="color: rgb(98,98,98)">=</span> torch<span style="color: rgb(98,98,98)">.</span>randn(<span style="color: rgb(98,98,98)">3</span>, <span style="color: rgb(98,98,98)">2</span>)
+<span class="ansi-green-intense-fg ansi-bold">      6</span> quant_linear <span style="color: rgb(98,98,98)">=</span> QuantLinear(<span style="color: rgb(98,98,98)">2</span>, <span style="color: rgb(98,98,98)">4</span>, bias<span style="color: rgb(98,98,98)">=</span><span class="ansi-bold" style="color: rgb(0,135,0)">True</span>, bias_quant<span style="color: rgb(98,98,98)">=</span>Int16Bias, return_quant_tensor<span style="color: rgb(98,98,98)">=</span><span class="ansi-bold" style="color: rgb(0,135,0)">True</span>)
+<span class="ansi-green-fg">----&gt; 8</span> quant_output <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg">quant_linear</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">float_input</span><span class="ansi-yellow-bg">)</span>
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/torch/nn/modules/module.py:1194</span>, in <span class="ansi-cyan-fg">Module._call_impl</span><span class="ansi-blue-fg">(self, *input, **kwargs)</span>
+<span class="ansi-green-intense-fg ansi-bold">   1190</span> <span style="color: rgb(95,135,135)"># If we don&#39;t have any hooks, we want to skip the rest of the logic in</span>
+<span class="ansi-green-intense-fg ansi-bold">   1191</span> <span style="color: rgb(95,135,135)"># this function, and just call forward.</span>
+<span class="ansi-green-intense-fg ansi-bold">   1192</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> <span class="ansi-bold" style="color: rgb(175,0,255)">not</span> (<span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>_backward_hooks <span class="ansi-bold" style="color: rgb(175,0,255)">or</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>_forward_hooks <span class="ansi-bold" style="color: rgb(175,0,255)">or</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>_forward_pre_hooks <span class="ansi-bold" style="color: rgb(175,0,255)">or</span> _global_backward_hooks
+<span class="ansi-green-intense-fg ansi-bold">   1193</span>         <span class="ansi-bold" style="color: rgb(175,0,255)">or</span> _global_forward_hooks <span class="ansi-bold" style="color: rgb(175,0,255)">or</span> _global_forward_pre_hooks):
+<span class="ansi-green-fg">-&gt; 1194</span>     <span class="ansi-bold" style="color: rgb(0,135,0)">return</span> <span class="ansi-yellow-bg">forward_call</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg" style="color: rgb(0,135,0)">input</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">*</span><span class="ansi-yellow-bg">kwargs</span><span class="ansi-yellow-bg">)</span>
+<span class="ansi-green-intense-fg ansi-bold">   1195</span> <span style="color: rgb(95,135,135)"># Do not call functions when jit is used</span>
+<span class="ansi-green-intense-fg ansi-bold">   1196</span> full_backward_hooks, non_full_backward_hooks <span style="color: rgb(98,98,98)">=</span> [], []
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/quant_linear.py:66</span>, in <span class="ansi-cyan-fg">QuantLinear.forward</span><span class="ansi-blue-fg">(self, input)</span>
+<span class="ansi-green-intense-fg ansi-bold">     65</span> <span class="ansi-bold" style="color: rgb(0,135,0)">def</span> <span style="color: rgb(0,0,255)">forward</span>(<span style="color: rgb(0,135,0)">self</span>, <span style="color: rgb(0,135,0)">input</span>: Union[Tensor, QuantTensor]) <span style="color: rgb(98,98,98)">-</span><span style="color: rgb(98,98,98)">&gt;</span> Union[Tensor, QuantTensor]:
+<span class="ansi-green-fg">---&gt; 66</span>     <span class="ansi-bold" style="color: rgb(0,135,0)">return</span> <span class="ansi-yellow-bg" style="color: rgb(0,135,0)">self</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">forward_impl</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg" style="color: rgb(0,135,0)">input</span><span class="ansi-yellow-bg">)</span>
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/quant_layer.py:152</span>, in <span class="ansi-cyan-fg">QuantWeightBiasInputOutputLayer.forward_impl</span><span class="ansi-blue-fg">(self, inp)</span>
+<span class="ansi-green-intense-fg ansi-bold">    148</span> compute_output_quant_tensor <span style="color: rgb(98,98,98)">=</span> <span style="color: rgb(0,135,0)">isinstance</span>(quant_input, QuantTensor) <span class="ansi-bold" style="color: rgb(175,0,255)">and</span> <span style="color: rgb(0,135,0)">isinstance</span>(
+<span class="ansi-green-intense-fg ansi-bold">    149</span>     quant_weight, QuantTensor)
+<span class="ansi-green-intense-fg ansi-bold">    150</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> <span class="ansi-bold" style="color: rgb(175,0,255)">not</span> (compute_output_quant_tensor <span class="ansi-bold" style="color: rgb(175,0,255)">or</span>
+<span class="ansi-green-intense-fg ansi-bold">    151</span>         <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>output_quant<span style="color: rgb(98,98,98)">.</span>is_quant_enabled) <span class="ansi-bold" style="color: rgb(175,0,255)">and</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>return_quant_tensor:
+<span class="ansi-green-fg">--&gt; 152</span>     <span class="ansi-bold" style="color: rgb(0,135,0)">raise</span> <span class="ansi-bold" style="color: rgb(215,95,95)">RuntimeError</span>(<span style="color: rgb(175,0,0)">&#34;</span><span style="color: rgb(175,0,0)">QuantLayer is not correctly configured</span><span style="color: rgb(175,0,0)">&#34;</span>)
+<span class="ansi-green-intense-fg ansi-bold">    154</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>bias <span class="ansi-bold" style="color: rgb(175,0,255)">is</span> <span class="ansi-bold" style="color: rgb(175,0,255)">not</span> <span class="ansi-bold" style="color: rgb(0,135,0)">None</span>:
+<span class="ansi-green-intense-fg ansi-bold">    155</span>     quant_bias <span style="color: rgb(98,98,98)">=</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>bias_quant(<span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>bias, quant_input, quant_weight)
+
+<span class="ansi-red-fg">RuntimeError</span>: QuantLayer is not correctly configured
 </pre></div></div>
 </div>
 <p>We can solve the issue by passing in a <code class="docutils literal notranslate"><span class="pre">QuantTensor</span></code> coming from a different layer as input, or by setting an input quantizer:</p>
@@ -988,9 +993,9 @@ <h2>Bias Quantization<a class="headerlink" href="#Bias-Quantization" title="Perm
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-QuantTensor(value=tensor([[-0.6541,  0.1263,  0.1680, -0.1231],
+IntQuantTensor(value=tensor([[-0.6541,  0.1263,  0.1680, -0.1231],
         [ 1.4658,  1.2395, -0.5207,  1.3989],
-        [ 1.6461,  0.8687, -1.0466,  1.4813]], grad_fn=&lt;AddmmBackward&gt;), scale=tensor([[9.0542e-05]], grad_fn=&lt;MulBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(18.), signed_t=tensor(True), training_t=tensor(True))
+        [ 1.6461,  0.8687, -1.0466,  1.4813]], grad_fn=&lt;AddmmBackward0&gt;), scale=tensor([[9.0542e-05]], grad_fn=&lt;MulBackward0&gt;), zero_point=tensor([0.]), bit_width=tensor(18.), signed_t=tensor(True), training_t=tensor(True))
 </pre></div></div>
 </div>
 </section>
@@ -1037,17 +1042,17 @@ <h3>Element-wise adds<a class="headerlink" href="#Element-wise-adds" title="Perm
 <div class="output_area docutils container">
 <div class="highlight"><pre>
 Eval mode add quant inputs:
- QuantTensor(value=tensor([[ 1.5335, -0.2875],
+ IntQuantTensor(value=tensor([[ 1.5335, -0.2875],
         [-2.0447,  0.5751],
-        [-1.0863, -1.4057]]), scale=tensor(0.0160), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(False))
- QuantTensor(value=tensor([[ 0.3994,  0.8307],
+        [-1.0863, -1.4057]]), scale=0.015974320471286774, zero_point=0.0, bit_width=8.0, signed_t=True, training_t=False)
+ IntQuantTensor(value=tensor([[ 0.3994,  0.8307],
         [-0.7188, -0.3994],
-        [-0.5910,  0.1757]]), scale=tensor(0.0160), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(False))
+        [-0.5910,  0.1757]]), scale=0.015974320471286774, zero_point=0.0, bit_width=8.0, signed_t=True, training_t=False)
 
 Eval mode add quant output:
- QuantTensor(value=tensor([[ 1.9329,  0.5431],
+ IntQuantTensor(value=tensor([[ 1.9329,  0.5431],
         [-2.7636,  0.1757],
-        [-1.6773, -1.2300]]), scale=tensor(0.0160), zero_point=tensor(0.), bit_width=tensor(9.), signed_t=tensor(True), training_t=tensor(False))
+        [-1.6773, -1.2300]]), scale=0.015974320471286774, zero_point=0.0, bit_width=9.0, signed_t=True, training_t=False)
 </pre></div></div>
 </div>
 </section>
@@ -1077,39 +1082,30 @@ <h3>max_pool on QuantTensor<a class="headerlink" href="#max_pool-on-QuantTensor"
 </pre></div>
 </div>
 </div>
-<div class="nboutput docutils container">
+<div class="nboutput nblast docutils container">
 <div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
 Quant input:
- QuantTensor(value=tensor([[[-1.1218, -1.1580, -0.2533, -0.4343],
+ IntQuantTensor(value=tensor([[[-1.1218, -1.1580, -0.2533, -0.4343],
          [ 0.8504,  0.6876, -0.3076, -2.1170]],
 
         [[ 0.4704, -0.1628,  1.4475,  0.2714],
          [ 0.1628,  0.8685, -0.1448, -0.1086]],
 
         [[ 0.9228,  1.2666,  2.0084,  0.0543],
-         [ 0.6152, -0.4162, -0.8323, -2.3160]]], grad_fn=&lt;MulBackward0&gt;), scale=tensor(0.0181, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))
+         [ 0.6152, -0.4162, -0.8323, -2.3160]]], grad_fn=&lt;MulBackward0&gt;), scale=0.018094077706336975, zero_point=0.0, bit_width=8.0, signed_t=True, training_t=True)
 
 Quant output:
- QuantTensor(value=tensor([[[-1.1218, -0.2533],
+ IntQuantTensor(value=tensor([[[-1.1218, -0.2533],
          [ 0.8504, -0.3076]],
 
         [[ 0.4704,  1.4475],
          [ 0.8685, -0.1086]],
 
         [[ 1.2666,  2.0084],
-         [ 0.6152, -0.8323]]], grad_fn=&lt;SqueezeBackward1&gt;), scale=tensor(0.0181, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))
-</pre></div></div>
-</div>
-<div class="nboutput nblast docutils container">
-<div class="prompt empty docutils container">
-</div>
-<div class="output_area stderr docutils container">
-<div class="highlight"><pre>
-C:\Users\Alessandro\miniconda3\envs\pt190\lib\site-packages\torch\nn\functional.py:652: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at  ..\c10/core/TensorImpl.h:1156.)
-  return torch.max_pool1d(input, kernel_size, stride, padding, dilation, ceil_mode)
+         [ 0.6152, -0.8323]]], grad_fn=&lt;SqueezeBackward1&gt;), scale=0.018094077706336975, zero_point=0.0, bit_width=8.0, signed_t=True, training_t=True)
 </pre></div></div>
 </div>
 </section>
@@ -1133,20 +1129,20 @@ <h3>tanh on QuantTensor<a class="headerlink" href="#tanh-on-QuantTensor" title="
 </pre></div>
 </div>
 </div>
-<div class="nboutput nblast docutils container">
+<div class="nboutput docutils container">
 <div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
 Quant input:
- QuantTensor(value=tensor([[[-1.1218, -1.1580, -0.2533, -0.4343],
+ IntQuantTensor(value=tensor([[[-1.1218, -1.1580, -0.2533, -0.4343],
          [ 0.8504,  0.6876, -0.3076, -2.1170]],
 
         [[ 0.4704, -0.1628,  1.4475,  0.2714],
          [ 0.1628,  0.8685, -0.1448, -0.1086]],
 
         [[ 0.9228,  1.2666,  2.0084,  0.0543],
-         [ 0.6152, -0.4162, -0.8323, -2.3160]]], grad_fn=&lt;MulBackward0&gt;), scale=tensor(0.0181, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))
+         [ 0.6152, -0.4162, -0.8323, -2.3160]]], grad_fn=&lt;MulBackward0&gt;), scale=0.018094077706336975, zero_point=0.0, bit_width=8.0, signed_t=True, training_t=True)
 
 Quant output:
  tensor([[[-0.8082, -0.8204, -0.2480, -0.4089],
@@ -1156,7 +1152,16 @@ <h3>tanh on QuantTensor<a class="headerlink" href="#tanh-on-QuantTensor" title="
          [ 0.1614,  0.7006, -0.1438, -0.1081]],
 
         [[ 0.7272,  0.8529,  0.9646,  0.0542],
-         [ 0.5478, -0.3937, -0.6817, -0.9807]]], grad_fn=&lt;TanhBackward&gt;)
+         [ 0.5478, -0.3937, -0.6817, -0.9807]]], grad_fn=&lt;TanhBackward0&gt;)
+</pre></div></div>
+</div>
+<div class="nboutput nblast docutils container">
+<div class="prompt empty docutils container">
+</div>
+<div class="output_area stderr docutils container">
+<div class="highlight"><pre>
+/tmp/ipykernel_1328/661358273.py:7: UserWarning: Defining your `__torch_function__` as a plain method is deprecated and will be an error in future, please define it as a classmethod. (Triggered internally at /opt/conda/conda-bld/pytorch_1670525541990/work/torch/csrc/utils/python_arg_parser.cpp:350.)
+  quant_output = torch.tanh(quant_input)
 </pre></div></div>
 </div>
 </section>
@@ -1187,22 +1192,33 @@ <h3>QuantTensor concatenation<a class="headerlink" href="#QuantTensor-concatenat
 </pre></div>
 </div>
 </div>
-<div class="nboutput nblast docutils container">
+<div class="nboutput docutils container">
 <div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
 Eval mode concat quant inputs:
- QuantTensor(value=tensor([[ 1.5335, -0.2875],
+ IntQuantTensor(value=tensor([[ 1.5335, -0.2875],
         [-2.0447,  0.5751],
-        [-1.0863, -1.4057]]), scale=tensor(0.0160), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(False)) QuantTensor(value=tensor([[ 0.3994,  0.8307],
+        [-1.0863, -1.4057]]), scale=0.015974320471286774, zero_point=0.0, bit_width=8.0, signed_t=True, training_t=False) IntQuantTensor(value=tensor([[ 0.3994,  0.8307],
         [-0.7188, -0.3994],
-        [-0.5910,  0.1757]]), scale=tensor(0.0160), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(False))
+        [-0.5910,  0.1757]]), scale=0.015974320471286774, zero_point=0.0, bit_width=8.0, signed_t=True, training_t=False)
 
 Eval mode concat quant output:
- QuantTensor(value=tensor([[ 1.5335, -0.2875,  0.3994,  0.8307],
+ IntQuantTensor(value=tensor([[ 1.5335, -0.2875,  0.3994,  0.8307],
         [-2.0447,  0.5751, -0.7188, -0.3994],
-        [-1.0863, -1.4057, -0.5910,  0.1757]]), scale=tensor(0.0160), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(False))
+        [-1.0863, -1.4057, -0.5910,  0.1757]]), scale=0.015974320471286774, zero_point=0.0, bit_width=8.0, signed_t=True, training_t=False)
+</pre></div></div>
+</div>
+<div class="nboutput nblast docutils container">
+<div class="prompt empty docutils container">
+</div>
+<div class="output_area stderr docutils container">
+<div class="highlight"><pre>
+/tmp/ipykernel_1328/3932472163.py:8: UserWarning: Defining your `__torch_function__` as a plain method is deprecated and will be an error in future, please define it as a classmethod. (Triggered internally at /opt/conda/conda-bld/pytorch_1670525541990/work/torch/csrc/utils/python_arg_parser.cpp:350.)
+  train_mode_cat = torch.cat([quant_identity(float_inp1), quant_identity(float_inp2)], dim=1)
+/tmp/ipykernel_1328/3932472163.py:14: UserWarning: Defining your `__torch_function__` as a plain method is deprecated and will be an error in future, please define it as a classmethod. (Triggered internally at /opt/conda/conda-bld/pytorch_1670525541990/work/torch/csrc/utils/python_arg_parser.cpp:350.)
+  eval_mode_cat = torch.cat([eval_quant_inp1, eval_quant_inp2], dim=1)
 </pre></div></div>
 </div>
 </section>
@@ -1234,10 +1250,10 @@ <h3>Weight bit-width<a class="headerlink" href="#Weight-bit-width" title="Permal
 <div class="output_area docutils container">
 <div class="highlight"><pre>
 Weight QuantTensor:
- QuantTensor(value=tensor([[-0.0000,  0.3880],
+ IntQuantTensor(value=tensor([[-0.0000,  0.3880],
         [-0.5820, -0.5044],
         [-0.2716,  0.1940],
-        [-0.0000,  0.5432]], grad_fn=&lt;MulBackward0&gt;), scale=tensor(0.0388, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(5.), signed_t=tensor(True), training_t=tensor(True))
+        [-0.0000,  0.5432]], grad_fn=&lt;MulBackward0&gt;), scale=0.03879871591925621, zero_point=0.0, bit_width=5.0, signed_t=True, training_t=True)
 </pre></div></div>
 </div>
 </section>
@@ -1262,13 +1278,13 @@ <h3>Per-channel weight quantization<a class="headerlink" href="#Per-channel-weig
 <div class="output_area docutils container">
 <div class="highlight"><pre>
 Weight QuantTensor:
- QuantTensor(value=tensor([[-0.0000,  0.3793],
+ IntQuantTensor(value=tensor([[-0.0000,  0.3793],
         [-0.5820, -0.5044],
         [-0.2723,  0.1816],
         [-0.0000,  0.5607]], grad_fn=&lt;MulBackward0&gt;), scale=tensor([[0.0253],
         [0.0388],
         [0.0182],
-        [0.0374]], grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(5.), signed_t=tensor(True), training_t=tensor(True))
+        [0.0374]], grad_fn=&lt;DivBackward0&gt;), zero_point=0.0, bit_width=5.0, signed_t=True, training_t=True)
 </pre></div></div>
 </div>
 </section>
@@ -1293,9 +1309,9 @@ <h3>Activation bit-width<a class="headerlink" href="#Activation-bit-width" title
 <div class="output_area docutils container">
 <div class="highlight"><pre>
 QuantTensor:
- QuantTensor(value=tensor([[ 1.6341, -0.5447],
+ IntQuantTensor(value=tensor([[ 1.6341, -0.5447],
         [-2.1788,  0.5447],
-        [-1.0894, -1.6341]], grad_fn=&lt;MulBackward0&gt;), scale=tensor(0.5447, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(3.), signed_t=tensor(True), training_t=tensor(True))
+        [-1.0894, -1.6341]], grad_fn=&lt;MulBackward0&gt;), scale=0.5446973443031311, zero_point=0.0, bit_width=3.0, signed_t=True, training_t=True)
 </pre></div></div>
 </div>
 </section>
@@ -1322,7 +1338,7 @@ <h3>Activation quantization with max_val init<a class="headerlink" href="#Activa
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-QuantTensor(value=tensor([[1.5294, 0.0000],
+IntQuantTensor(value=tensor([[1.5294, 0.0000],
         [0.0000, 0.5647],
         [0.0000, 0.0000]], grad_fn=&lt;MulBackward0&gt;), scale=tensor(0.0235, grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(False), training_t=tensor(True))
 </pre></div></div>
@@ -1364,7 +1380,7 @@ <h3>Per-channel activation quantization<a class="headerlink" href="#Per-channel-
 </pre></div>
 </div>
 </div>
-<div class="nboutput nblast docutils container">
+<div class="nboutput docutils container">
 <div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
@@ -1374,9 +1390,19 @@ <h3>Per-channel activation quantization<a class="headerlink" href="#Per-channel-
          [-1.3986,  0.4033,  0.8380, -0.7193, -0.4033]]])
 
 Per-channel quant output:
- QuantTensor(value=tensor([[[ 0.8616, -0.7012,  0.4503],
-         [-1.1285, -0.4937, -0.1901]]], grad_fn=&lt;SqueezeBackward1&gt;), scale=tensor([[[0.0021],
-         [0.0013]]], grad_fn=&lt;MulBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(17.), signed_t=tensor(True), training_t=tensor(True))
+ IntQuantTensor(value=tensor([[[ 0.8616, -0.7012,  0.4503],
+         [-1.1285, -0.4937, -0.1901]]], grad_fn=&lt;ConvolutionBackward0&gt;), scale=tensor([[[0.0021],
+         [0.0013]]], grad_fn=&lt;MulBackward0&gt;), zero_point=tensor([[[-254.0000],
+         [ 406.0000]]], grad_fn=&lt;DivBackward0&gt;), bit_width=17.0, signed_t=True, training_t=True)
+</pre></div></div>
+</div>
+<div class="nboutput nblast docutils container">
+<div class="prompt empty docutils container">
+</div>
+<div class="output_area stderr docutils container">
+<div class="highlight"><pre>
+/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/torch/nn/modules/conv.py:309: UserWarning: Defining your `__torch_function__` as a plain method is deprecated and will be an error in future, please define it as a classmethod. (Triggered internally at /opt/conda/conda-bld/pytorch_1670525541990/work/torch/csrc/utils/python_arg_parser.cpp:350.)
+  return F.conv1d(input, weight, bias, self.stride,
 </pre></div></div>
 </div>
 </section>
@@ -1390,6 +1416,7 @@ <h2>Inheriting from a quantizer<a class="headerlink" href="#Inheriting-from-a-qu
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">torch</span><span class="o">.</span><span class="n">manual_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
 
+<span class="kn">from</span> <span class="nn">brevitas.inject.enum</span> <span class="kn">import</span> <span class="n">ScalingPerOutputType</span>
 <span class="kn">from</span> <span class="nn">brevitas.nn</span> <span class="kn">import</span> <span class="n">QuantConv1d</span>
 
 <span class="n">BATCHES</span> <span class="o">=</span> <span class="mi">1</span>
@@ -1429,9 +1456,10 @@ <h2>Inheriting from a quantizer<a class="headerlink" href="#Inheriting-from-a-qu
          [-1.3986,  0.4033,  0.8380, -0.7193, -0.4033]]])
 
 Per-channel quant output:
- QuantTensor(value=tensor([[[ 0.8616, -0.7012,  0.4503],
-         [-1.1285, -0.4937, -0.1901]]], grad_fn=&lt;SqueezeBackward1&gt;), scale=tensor([[[0.0021],
-         [0.0013]]], grad_fn=&lt;MulBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(17.), signed_t=tensor(True), training_t=tensor(True))
+ IntQuantTensor(value=tensor([[[ 0.8616, -0.7012,  0.4503],
+         [-1.1285, -0.4937, -0.1901]]], grad_fn=&lt;ConvolutionBackward0&gt;), scale=tensor([[[0.0021],
+         [0.0013]]], grad_fn=&lt;MulBackward0&gt;), zero_point=tensor([[[-254.0000],
+         [ 406.0000]]], grad_fn=&lt;DivBackward0&gt;), bit_width=17.0, signed_t=True, training_t=True)
 </pre></div></div>
 </div>
 </section>
@@ -1503,10 +1531,9 @@ <h2>Learned scale and bit-width quantizer<a class="headerlink" href="#Learned-sc
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">torch</span><span class="o">.</span><span class="n">manual_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
 
-<span class="kn">from</span> <span class="nn">brevitas.quant</span> <span class="kn">import</span> <span class="n">Int8WeightPerTensorFloat</span>
+<span class="kn">from</span> <span class="nn">brevitas.quant</span> <span class="kn">import</span> <span class="n">Int8WeightPerChannelFloat</span>
 
-<span class="k">class</span> <span class="nc">LearnedIntWeightPerChannelFloat</span><span class="p">(</span><span class="n">Int8WeightPerTensorFloat</span><span class="p">):</span>
-    <span class="n">scaling_per_output_channel</span> <span class="o">=</span> <span class="kc">True</span>
+<span class="k">class</span> <span class="nc">LearnedIntWeightPerChannelFloat</span><span class="p">(</span><span class="n">Int8WeightPerChannelFloat</span><span class="p">):</span>
     <span class="n">scaling_impl_type</span> <span class="o">=</span> <span class="n">ScalingImplType</span><span class="o">.</span><span class="n">PARAMETER_FROM_STATS</span>
     <span class="n">restrict_scaling_type</span> <span class="o">=</span> <span class="n">RestrictValueType</span><span class="o">.</span><span class="n">LOG_FP</span>
     <span class="n">bit_width_impl_type</span> <span class="o">=</span> <span class="n">BitWidthImplType</span><span class="o">.</span><span class="n">PARAMETER</span>
@@ -1524,13 +1551,13 @@ <h2>Learned scale and bit-width quantizer<a class="headerlink" href="#Learned-sc
 <div class="output_area docutils container">
 <div class="highlight"><pre>
 Weight QuantTensor:
- QuantTensor(value=tensor([[-0.0060,  0.3793],
+ IntQuantTensor(value=tensor([[-0.0060,  0.3793],
         [-0.5820, -0.5224],
         [-0.2723,  0.1887],
         [-0.0132,  0.5607]], grad_fn=&lt;MulBackward0&gt;), scale=tensor([[0.0030],
         [0.0046],
         [0.0021],
-        [0.0044]], grad_fn=&lt;DivBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(8., grad_fn=&lt;RoundSteFnBackward&gt;), signed_t=tensor(True), training_t=tensor(True))
+        [0.0044]], grad_fn=&lt;DivBackward0&gt;), zero_point=0.0, bit_width=8.0, signed_t=True, training_t=True)
 </pre></div></div>
 </div>
 <p>Notice how the quantized weights’ <code class="docutils literal notranslate"><span class="pre">bit_width</span></code> is now a value we can backpropagate through, as it exposes a <code class="docutils literal notranslate"><span class="pre">grad_fn</span></code> function. This way we can use it as part of a loss regularization function to model a particular hardware cost function, e.g. pushing larger layers to have a smaller bit width. The same principle applies to activation, and we can combine them:</p>
@@ -1561,10 +1588,10 @@ <h2>Learned scale and bit-width quantizer<a class="headerlink" href="#Learned-sc
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-QuantTensor(value=tensor([[-0.9109, -0.4588,  0.3119, -0.6530],
+IntQuantTensor(value=tensor([[-0.9109, -0.4588,  0.3119, -0.6530],
         [ 1.2089,  0.6493, -0.3731,  0.8706],
-        [ 1.3893,  0.2823, -0.8979,  0.9543]], grad_fn=&lt;MmBackward&gt;), scale=tensor([[9.0542e-05, 3.9068e-05, 5.6866e-05, 6.4251e-05]],
-       grad_fn=&lt;MulBackward0&gt;), zero_point=tensor(0.), bit_width=tensor(17., grad_fn=&lt;CeilSteFnBackward&gt;), signed_t=tensor(True), training_t=tensor(True))
+        [ 1.3893,  0.2823, -0.8979,  0.9543]], grad_fn=&lt;MmBackward0&gt;), scale=tensor([[9.0542e-05, 3.9068e-05, 5.6866e-05, 6.4251e-05]],
+       grad_fn=&lt;MulBackward0&gt;), zero_point=tensor([0.]), bit_width=tensor(17., grad_fn=&lt;CeilSteFnBackward&gt;), signed_t=tensor(True), training_t=tensor(True))
 </pre></div></div>
 </div>
 <p>As we can see, we can backpropagate through the output <code class="docutils literal notranslate"><span class="pre">bit_width</span></code>. By including it in a loss function, we could then try to control the size of the output accumulator, and in turn the <code class="docutils literal notranslate"><span class="pre">bit_width</span></code> of both weights and input.</p>
@@ -1597,21 +1624,27 @@ <h1>Retraining from floating-point<a class="headerlink" href="#Retraining-from-f
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-<span class="ansi-red-intense-fg ansi-bold">---------------------------------------------------------------------------</span>
-<span class="ansi-red-intense-fg ansi-bold">RuntimeError</span>                              Traceback (most recent call last)
-<span class="ansi-green-intense-fg ansi-bold">C:\Users\ALESSA~1\AppData\Local\Temp/ipykernel_18920/1653109852.py</span> in <span class="ansi-cyan-fg">&lt;module&gt;</span>
-<span class="ansi-green-fg">     10</span>     return_quant_tensor=True, bias=False)
-<span class="ansi-green-fg">     11</span>
-<span class="ansi-green-intense-fg ansi-bold">---&gt; 12</span><span class="ansi-yellow-intense-fg ansi-bold"> </span>quant_linear<span class="ansi-yellow-intense-fg ansi-bold">.</span>load_state_dict<span class="ansi-yellow-intense-fg ansi-bold">(</span>float_linear<span class="ansi-yellow-intense-fg ansi-bold">.</span>state_dict<span class="ansi-yellow-intense-fg ansi-bold">(</span><span class="ansi-yellow-intense-fg ansi-bold">)</span><span class="ansi-yellow-intense-fg ansi-bold">)</span>
-
-<span class="ansi-green-intense-fg ansi-bold">~\miniconda3\envs\pt190\lib\site-packages\torch\nn\modules\module.py</span> in <span class="ansi-cyan-fg">load_state_dict</span><span class="ansi-blue-intense-fg ansi-bold">(self, state_dict, strict)</span>
-<span class="ansi-green-fg">   1405</span>         <span class="ansi-green-intense-fg ansi-bold">if</span> len<span class="ansi-yellow-intense-fg ansi-bold">(</span>error_msgs<span class="ansi-yellow-intense-fg ansi-bold">)</span> <span class="ansi-yellow-intense-fg ansi-bold">&gt;</span> <span class="ansi-cyan-intense-fg ansi-bold">0</span><span class="ansi-yellow-intense-fg ansi-bold">:</span>
-<span class="ansi-green-fg">   1406</span>             raise RuntimeError(&#39;Error(s) in loading state_dict for {}:\n\t{}&#39;.format(
-<span class="ansi-green-intense-fg ansi-bold">-&gt; 1407</span><span class="ansi-yellow-intense-fg ansi-bold">                                self.__class__.__name__, &#34;\n\t&#34;.join(error_msgs)))
-</span><span class="ansi-green-fg">   1408</span>         <span class="ansi-green-intense-fg ansi-bold">return</span> _IncompatibleKeys<span class="ansi-yellow-intense-fg ansi-bold">(</span>missing_keys<span class="ansi-yellow-intense-fg ansi-bold">,</span> unexpected_keys<span class="ansi-yellow-intense-fg ansi-bold">)</span>
-<span class="ansi-green-fg">   1409</span>
-
-<span class="ansi-red-intense-fg ansi-bold">RuntimeError</span>: Error(s) in loading state_dict for QuantLinear:
+<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
+<span class="ansi-red-fg">RuntimeError</span>                              Traceback (most recent call last)
+Cell <span class="ansi-green-fg">In[29], line 12</span>
+<span class="ansi-green-intense-fg ansi-bold">      5</span> float_linear <span style="color: rgb(98,98,98)">=</span> nn<span style="color: rgb(98,98,98)">.</span>Linear(<span style="color: rgb(98,98,98)">2</span>, <span style="color: rgb(98,98,98)">4</span>, bias<span style="color: rgb(98,98,98)">=</span><span class="ansi-bold" style="color: rgb(0,135,0)">False</span>)
+<span class="ansi-green-intense-fg ansi-bold">      6</span> quant_linear <span style="color: rgb(98,98,98)">=</span> QuantLinear(
+<span class="ansi-green-intense-fg ansi-bold">      7</span>     <span style="color: rgb(98,98,98)">2</span>, <span style="color: rgb(98,98,98)">4</span>,
+<span class="ansi-green-intense-fg ansi-bold">      8</span>     input_quant<span style="color: rgb(98,98,98)">=</span>LearnedIntActPerTensorFloat,
+<span class="ansi-green-intense-fg ansi-bold">      9</span>     weight_quant<span style="color: rgb(98,98,98)">=</span>LearnedIntWeightPerChannelFloat,
+<span class="ansi-green-intense-fg ansi-bold">     10</span>     return_quant_tensor<span style="color: rgb(98,98,98)">=</span><span class="ansi-bold" style="color: rgb(0,135,0)">True</span>, bias<span style="color: rgb(98,98,98)">=</span><span class="ansi-bold" style="color: rgb(0,135,0)">False</span>)
+<span class="ansi-green-fg">---&gt; 12</span> <span class="ansi-yellow-bg">quant_linear</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">load_state_dict</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">float_linear</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">state_dict</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">)</span><span class="ansi-yellow-bg">)</span>
+
+File <span class="ansi-green-fg">/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/torch/nn/modules/module.py:1671</span>, in <span class="ansi-cyan-fg">Module.load_state_dict</span><span class="ansi-blue-fg">(self, state_dict, strict)</span>
+<span class="ansi-green-intense-fg ansi-bold">   1666</span>         error_msgs<span style="color: rgb(98,98,98)">.</span>insert(
+<span class="ansi-green-intense-fg ansi-bold">   1667</span>             <span style="color: rgb(98,98,98)">0</span>, <span style="color: rgb(175,0,0)">&#39;</span><span style="color: rgb(175,0,0)">Missing key(s) in state_dict: </span><span class="ansi-bold" style="color: rgb(175,95,135)">{}</span><span style="color: rgb(175,0,0)">. </span><span style="color: rgb(175,0,0)">&#39;</span><span style="color: rgb(98,98,98)">.</span>format(
+<span class="ansi-green-intense-fg ansi-bold">   1668</span>                 <span style="color: rgb(175,0,0)">&#39;</span><span style="color: rgb(175,0,0)">, </span><span style="color: rgb(175,0,0)">&#39;</span><span style="color: rgb(98,98,98)">.</span>join(<span style="color: rgb(175,0,0)">&#39;</span><span style="color: rgb(175,0,0)">&#34;</span><span class="ansi-bold" style="color: rgb(175,95,135)">{}</span><span style="color: rgb(175,0,0)">&#34;</span><span style="color: rgb(175,0,0)">&#39;</span><span style="color: rgb(98,98,98)">.</span>format(k) <span class="ansi-bold" style="color: rgb(0,135,0)">for</span> k <span class="ansi-bold" style="color: rgb(175,0,255)">in</span> missing_keys)))
+<span class="ansi-green-intense-fg ansi-bold">   1670</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> <span style="color: rgb(0,135,0)">len</span>(error_msgs) <span style="color: rgb(98,98,98)">&gt;</span> <span style="color: rgb(98,98,98)">0</span>:
+<span class="ansi-green-fg">-&gt; 1671</span>     <span class="ansi-bold" style="color: rgb(0,135,0)">raise</span> <span class="ansi-bold" style="color: rgb(215,95,95)">RuntimeError</span>(<span style="color: rgb(175,0,0)">&#39;</span><span style="color: rgb(175,0,0)">Error(s) in loading state_dict for </span><span class="ansi-bold" style="color: rgb(175,95,135)">{}</span><span style="color: rgb(175,0,0)">:</span><span class="ansi-bold" style="color: rgb(175,95,0)">\n</span><span class="ansi-bold" style="color: rgb(175,95,0)">\t</span><span class="ansi-bold" style="color: rgb(175,95,135)">{}</span><span style="color: rgb(175,0,0)">&#39;</span><span style="color: rgb(98,98,98)">.</span>format(
+<span class="ansi-green-intense-fg ansi-bold">   1672</span>                        <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span><span style="color: rgb(0,0,135)">__class__</span><span style="color: rgb(98,98,98)">.</span><span style="color: rgb(0,0,135)">__name__</span>, <span style="color: rgb(175,0,0)">&#34;</span><span class="ansi-bold" style="color: rgb(175,95,0)">\n</span><span class="ansi-bold" style="color: rgb(175,95,0)">\t</span><span style="color: rgb(175,0,0)">&#34;</span><span style="color: rgb(98,98,98)">.</span>join(error_msgs)))
+<span class="ansi-green-intense-fg ansi-bold">   1673</span> <span class="ansi-bold" style="color: rgb(0,135,0)">return</span> _IncompatibleKeys(missing_keys, unexpected_keys)
+
+<span class="ansi-red-fg">RuntimeError</span>: Error(s) in loading state_dict for QuantLinear:
         Missing key(s) in state_dict: &#34;input_quant.fused_activation_quant_proxy.tensor_quant.scaling_impl.value&#34;, &#34;input_quant.fused_activation_quant_proxy.tensor_quant.msb_clamp_bit_width_impl.bit_width_offset&#34;, &#34;weight_quant.tensor_quant.scaling_impl.value&#34;, &#34;weight_quant.tensor_quant.msb_clamp_bit_width_impl.bit_width_offset&#34;.
 </pre></div></div>
 </div>
@@ -1727,16 +1760,19 @@ <h2>Activation quantization from scratch<a class="headerlink" href="#Activation-
           (delay_wrapper): DelayWrapper(
             (delay_impl): _NoDelay()
           )
+          (input_view_impl): Identity()
         )
         (scaling_impl): ParameterFromRuntimeStatsScaling(
           (stats_input_view_shape_impl): OverTensorView()
           (stats): _Stats(
             (stats_impl): AbsPercentile()
           )
-          (restrict_clamp_scaling): _RestrictClampValue(
-            (clamp_min_ste): Identity()
+          (restrict_scaling): _RestrictValue(
             (restrict_value_impl): FloatRestrictValue()
           )
+          (clamp_scaling): _ClampValue(
+            (clamp_min_ste): ScalarClampMinSte()
+          )
           (restrict_inplace_preprocess): Identity()
           (restrict_preprocess): Identity()
         )
@@ -1767,11 +1803,13 @@ <h2>Activation quantization from scratch<a class="headerlink" href="#Activation-
 <span class="kn">from</span> <span class="nn">brevitas.core.bit_width</span> <span class="kn">import</span> <span class="n">BitWidthConst</span>
 <span class="kn">from</span> <span class="nn">brevitas.core.quant</span> <span class="kn">import</span> <span class="n">IntQuant</span><span class="p">,</span> <span class="n">RescalingIntQuant</span>
 <span class="kn">from</span> <span class="nn">brevitas.core.zero_point</span> <span class="kn">import</span> <span class="n">ZeroZeroPoint</span>
+<span class="kn">from</span> <span class="nn">brevitas.core.function_wrapper.misc</span> <span class="kn">import</span> <span class="n">Identity</span>
 
 <span class="n">tensor_quant</span> <span class="o">=</span> <span class="n">RescalingIntQuant</span><span class="p">(</span>
     <span class="n">int_quant</span><span class="o">=</span><span class="n">IntQuant</span><span class="p">(</span>
         <span class="n">float_to_int_impl</span><span class="o">=</span><span class="n">RoundSte</span><span class="p">(),</span>
         <span class="n">tensor_clamp_impl</span><span class="o">=</span><span class="n">TensorClamp</span><span class="p">(),</span>
+        <span class="n">input_view_impl</span><span class="o">=</span><span class="n">Identity</span><span class="p">,</span>
         <span class="n">signed</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
         <span class="n">narrow_range</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span>
     <span class="n">zero_point_impl</span><span class="o">=</span><span class="n">ZeroZeroPoint</span><span class="p">(),</span>
@@ -1839,6 +1877,7 @@ <h2>Weight quantization with learned scale from scratch<a class="headerlink" hre
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">brevitas.inject</span> <span class="kn">import</span> <span class="n">value</span>
 <span class="kn">from</span> <span class="nn">brevitas.proxy</span> <span class="kn">import</span> <span class="n">WeightQuantProxyFromInjector</span>
 <span class="kn">from</span> <span class="nn">brevitas.core.scaling</span> <span class="kn">import</span> <span class="n">ParameterScaling</span>
+<span class="kn">from</span> <span class="nn">brevitas.core.function_wrapper.misc</span> <span class="kn">import</span> <span class="n">Identity</span>
 
 <span class="k">class</span> <span class="nc">Int8ActPerTensorFloatParameterFromScratch</span><span class="p">(</span><span class="n">ExtendedInjector</span><span class="p">):</span>
 
@@ -1856,6 +1895,7 @@ <h2>Weight quantization with learned scale from scratch<a class="headerlink" hre
     <span class="n">int_scaling_impl</span> <span class="o">=</span> <span class="n">IntScaling</span>
     <span class="n">scaling_impl</span> <span class="o">=</span> <span class="n">ParameterScaling</span>
     <span class="n">restrict_scaling_impl</span> <span class="o">=</span> <span class="n">FloatRestrictValue</span>
+    <span class="n">input_view_impl</span> <span class="o">=</span> <span class="n">Identity</span>
     <span class="n">scaling_shape</span> <span class="o">=</span> <span class="p">()</span>
     <span class="n">bit_width</span> <span class="o">=</span> <span class="mi">8</span>
     <span class="n">narrow_range</span> <span class="o">=</span> <span class="kc">True</span>
@@ -1889,13 +1929,12 @@ <h2>Sharing learned bit-width among layers<a class="headerlink" href="#Sharing-l
 <span class="n">quant_identity_bit_width</span> <span class="o">=</span> <span class="n">quant_identity</span><span class="o">.</span><span class="n">act_quant</span><span class="o">.</span><span class="n">fused_activation_quant_proxy</span><span class="o">.</span><span class="n">tensor_quant</span><span class="o">.</span><span class="n">msb_clamp_bit_width_impl</span>
 <span class="n">quant_linear_bit_width</span> <span class="o">=</span> <span class="n">quant_linear</span><span class="o">.</span><span class="n">weight_quant</span><span class="o">.</span><span class="n">tensor_quant</span><span class="o">.</span><span class="n">msb_clamp_bit_width_impl</span>
 
-<span class="n">quant_identity_bit_width</span> <span class="ow">is</span> <span class="n">quant_linear_bit_width</span>
+<span class="n">assert_with_message</span><span class="p">(</span><span class="n">quant_identity_bit_width</span> <span class="ow">is</span> <span class="n">quant_linear_bit_width</span><span class="p">)</span>
 </pre></div>
 </div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[36]:
-</pre></div>
+<div class="prompt empty docutils container">
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
@@ -1924,13 +1963,11 @@ <h1>Export<a class="headerlink" href="#Export" title="Permalink to this heading"
 </div>
 <div class="output_area docutils container">
 <div class="highlight"><pre>
-Requirement already satisfied: netron in c:\users\alessandro\miniconda3\envs\pt190\lib\site-packages (5.3.9)
-Requirement already satisfied: onnx in c:\users\alessandro\miniconda3\envs\pt190\lib\site-packages (1.10.2)
-Requirement already satisfied: onnxoptimizer in c:\users\alessandro\miniconda3\envs\pt190\lib\site-packages (0.2.6)
-Requirement already satisfied: numpy&gt;=1.16.6 in c:\users\alessandro\miniconda3\envs\pt190\lib\site-packages (from onnx) (1.21.2)
-Requirement already satisfied: typing-extensions&gt;=3.6.2.1 in c:\users\alessandro\miniconda3\envs\pt190\lib\site-packages (from onnx) (3.10.0.2)
-Requirement already satisfied: protobuf in c:\users\alessandro\miniconda3\envs\pt190\lib\site-packages (from onnx) (3.19.1)
-Requirement already satisfied: six in c:\users\alessandro\miniconda3\envs\pt190\lib\site-packages (from onnx) (1.16.0)
+Requirement already satisfied: netron in /proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages (7.2.9)
+Requirement already satisfied: onnx in /proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages (1.15.0)
+Requirement already satisfied: onnxoptimizer in /proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages (0.3.13)
+Requirement already satisfied: numpy in /proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages (from onnx) (1.26.0)
+Requirement already satisfied: protobuf&gt;=3.20.2 in /proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages (from onnx) (4.24.4)
 </pre></div></div>
 </div>
 <div class="nbinput nblast docutils container">
@@ -1948,76 +1985,17 @@ <h1>Export<a class="headerlink" href="#Export" title="Permalink to this heading"
 </pre></div>
 </div>
 </div>
-<section id="Export-to-ONNX-QOps">
-<h2>Export to ONNX QOps<a class="headerlink" href="#Export-to-ONNX-QOps" title="Permalink to this heading">#</a></h2>
-<p>Say we want to export a QuantConv1d with 4b symmetric weights, 8b symmetric inputs and outputs, and 16 biases. We can export it to a ONNX’s <code class="docutils literal notranslate"><span class="pre">QLinearConv</span></code>, but some information will be lost. In particular, weights will be represented as 8b and bias as 32b, even though they are respectively 4b and 16b. This is because ONNX does not provide a standardized way to represent them as such:</p>
-<div class="nbinput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
-</pre></div>
-</div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">torch</span><span class="o">.</span><span class="n">manual_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
-
-<span class="kn">from</span> <span class="nn">brevitas.nn</span> <span class="kn">import</span> <span class="n">QuantConv1d</span>
-<span class="kn">from</span> <span class="nn">brevitas.quant</span> <span class="kn">import</span> <span class="n">Int8WeightPerTensorFloat</span><span class="p">,</span> <span class="n">Int8ActPerTensorFloat</span><span class="p">,</span> <span class="n">Int16Bias</span>
-<span class="kn">from</span> <span class="nn">brevitas.export</span> <span class="kn">import</span> <span class="n">export_onnx_qop</span>
-
-<span class="n">float_inp</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span>
-
-<span class="n">quant_conv_4b8b</span> <span class="o">=</span> <span class="n">QuantConv1d</span><span class="p">(</span>
-    <span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="n">bias</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">weight_bit_width</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span>
-    <span class="n">input_quant</span><span class="o">=</span><span class="n">Int8ActPerTensorFloat</span><span class="p">,</span>
-    <span class="n">output_quant</span><span class="o">=</span><span class="n">Int8ActPerTensorFloat</span><span class="p">,</span>
-    <span class="n">bias_quant</span><span class="o">=</span><span class="n">Int16Bias</span><span class="p">)</span>
-
-<span class="n">output_path</span> <span class="o">=</span> <span class="s1">&#39;qop_onnx_conv_4b8b.onnx&#39;</span>
-<span class="n">export_onnx_qop</span><span class="p">(</span><span class="n">quant_conv_4b8b</span><span class="p">,</span> <span class="n">input_t</span><span class="o">=</span><span class="n">float_inp</span><span class="p">,</span> <span class="n">export_path</span><span class="o">=</span><span class="n">output_path</span><span class="p">)</span>
-</pre></div>
-</div>
-</div>
-<div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[39]:
-</pre></div>
-</div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">show_netron</span><span class="p">(</span><span class="n">output_path</span><span class="p">,</span> <span class="mi">8082</span><span class="p">)</span>
-</pre></div>
-</div>
-</div>
-<div class="nboutput docutils container">
-<div class="prompt empty docutils container">
-</div>
-<div class="output_area docutils container">
-<div class="highlight"><pre>
-Serving &#39;qop_onnx_conv_4b8b.onnx&#39; at http://localhost:8082
-</pre></div></div>
-</div>
-<div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[39]:
-</pre></div>
-</div>
-<div class="output_area rendered_html docutils container">
-<iframe
-    width="100%"
-    height="400"
-    src="http://localhost:8082/"
-    frameborder="0"
-    allowfullscreen
-
-></iframe></div>
-</div>
-<p>In general the standard ONNX opset doesn’t support representing quantization below 8b. Additionally, ONNX QOp representation requires an output quantizer to be set at part of of the layer.</p>
-<p>The constraint of always having an output quantizer is relaxed in the more recently introduced QDQ style of representation (for which there is support in Brevitas starting from version 0.8), which uses only <code class="docutils literal notranslate"><span class="pre">QuantizeLinear</span></code> and <code class="docutils literal notranslate"><span class="pre">DequantizeLinear</span></code> to represent quantization, but even with that support is still limited to 8b quantization.</p>
-</section>
 <section id="Export-to-custom-Quantized-ONNX">
 <h2>Export to custom Quantized ONNX<a class="headerlink" href="#Export-to-custom-Quantized-ONNX" title="Permalink to this heading">#</a></h2>
 <p>As an alternative, we can export it to QONNX, a custom ONNX dialect that Brevitas defines with support for custom quantization operators that can capture those informations:</p>
-<div class="nbinput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
+<div class="nbinput docutils container">
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[39]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">torch</span><span class="o">.</span><span class="n">manual_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
 
 <span class="kn">from</span> <span class="nn">brevitas.export</span> <span class="kn">import</span> <span class="n">export_qonnx</span>
-<span class="kn">from</span> <span class="nn">brevitas.quant</span> <span class="kn">import</span> <span class="n">Int8WeightPerTensorFloat</span><span class="p">,</span> <span class="n">Int8ActPerTensorFloat</span><span class="p">,</span> <span class="n">Int16Bias</span>
+<span class="kn">from</span> <span class="nn">brevitas.quant</span> <span class="kn">import</span> <span class="n">Int8ActPerTensorFloat</span><span class="p">,</span> <span class="n">Int16Bias</span>
 
 <span class="n">float_inp</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span>
 
@@ -2028,10 +2006,18 @@ <h2>Export to custom Quantized ONNX<a class="headerlink" href="#Export-to-custom
     <span class="n">bias_quant</span><span class="o">=</span><span class="n">Int16Bias</span><span class="p">)</span>
 
 <span class="n">output_path</span> <span class="o">=</span> <span class="s1">&#39;brevitas_onnx_conv4b8b.onnx&#39;</span>
-<span class="n">export_qonnx</span><span class="p">(</span><span class="n">quant_conv_4b8b</span><span class="p">,</span> <span class="n">input_t</span><span class="o">=</span><span class="n">float_inp</span><span class="p">,</span> <span class="n">export_path</span><span class="o">=</span><span class="n">output_path</span><span class="p">)</span>
+<span class="n">exported_model</span> <span class="o">=</span> <span class="n">export_qonnx</span><span class="p">(</span><span class="n">quant_conv_4b8b</span><span class="p">,</span> <span class="n">input_t</span><span class="o">=</span><span class="n">float_inp</span><span class="p">,</span> <span class="n">export_path</span><span class="o">=</span><span class="n">output_path</span><span class="p">)</span>
 </pre></div>
 </div>
 </div>
+<div class="nboutput nblast docutils container">
+<div class="prompt empty docutils container">
+</div>
+<div class="output_area stderr docutils container">
+<div class="highlight"><pre>
+[W NNPACK.cpp:53] Could not initialize NNPACK! Reason: Unsupported hardware.
+</pre></div></div>
+</div>
 <div class="nbinput docutils container">
 <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[40]:
 </pre></div>
@@ -2064,7 +2050,7 @@ <h2>Export to custom Quantized ONNX<a class="headerlink" href="#Export-to-custom
 </div>
 <p>In the <code class="docutils literal notranslate"><span class="pre">Quant</span></code> nodes above, arbitrary scale, zero-point and bit-widths are supported. This way we can support exporting scenarios where, for example, we are quantizing only weights to 4b, which a QLinearConv wouldn’t be able to capture:</p>
 <div class="nbinput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[41]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">torch</span><span class="o">.</span><span class="n">manual_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
@@ -2072,12 +2058,12 @@ <h2>Export to custom Quantized ONNX<a class="headerlink" href="#Export-to-custom
 <span class="n">quant_conv_4b_weights</span> <span class="o">=</span> <span class="n">QuantConv1d</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="n">bias</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">weight_bit_width</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span>
 
 <span class="n">output_path</span> <span class="o">=</span> <span class="s1">&#39;brevitas_onnx_conv_4b_weights.onnx&#39;</span>
-<span class="n">export_qonnx</span><span class="p">(</span><span class="n">quant_conv_4b_weights</span><span class="p">,</span> <span class="n">input_t</span><span class="o">=</span><span class="n">float_inp</span><span class="p">,</span> <span class="n">export_path</span><span class="o">=</span><span class="n">output_path</span><span class="p">)</span>
+<span class="n">exported_model</span> <span class="o">=</span> <span class="n">export_qonnx</span><span class="p">(</span><span class="n">quant_conv_4b_weights</span><span class="p">,</span> <span class="n">input_t</span><span class="o">=</span><span class="n">float_inp</span><span class="p">,</span> <span class="n">export_path</span><span class="o">=</span><span class="n">output_path</span><span class="p">)</span>
 </pre></div>
 </div>
 </div>
 <div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[41]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[42]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">show_netron</span><span class="p">(</span><span class="n">output_path</span><span class="p">,</span> <span class="mi">8084</span><span class="p">)</span>
@@ -2093,7 +2079,7 @@ <h2>Export to custom Quantized ONNX<a class="headerlink" href="#Export-to-custom
 </pre></div></div>
 </div>
 <div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[41]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[42]:
 </pre></div>
 </div>
 <div class="output_area rendered_html docutils container">
@@ -2108,71 +2094,6 @@ <h2>Export to custom Quantized ONNX<a class="headerlink" href="#Export-to-custom
 </div>
 <p>The custom format shown above can integrated into ONNX-based toolchains, e.g. it’s supported by our own FINN toolchain for low-precision dataflow style custom FPGAs implementations, and would be a starting point for direct integration with TVM.</p>
 </section>
-<section id="Export-to-TorchScript-quantization-backend">
-<h2>Export to TorchScript quantization backend<a class="headerlink" href="#Export-to-TorchScript-quantization-backend" title="Permalink to this heading">#</a></h2>
-<p>It’s also possible to export to TorchScript own quantized functional operators, which come with their own set of restrictions. In particular, weights should be 7b and unsigned, which requires a zero-point. We can model that with appropriate quantizers:</p>
-<div class="nbinput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
-</pre></div>
-</div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">brevitas.quant</span> <span class="kn">import</span> <span class="n">ShiftedUint8ActPerTensorFloat</span>
-<span class="kn">from</span> <span class="nn">brevitas.export</span> <span class="kn">import</span> <span class="n">export_torch_qop</span>
-
-
-<span class="n">quant_conv_8b7b</span> <span class="o">=</span> <span class="n">QuantConv1d</span><span class="p">(</span>
-    <span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="n">bias</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
-    <span class="n">input_quant</span><span class="o">=</span><span class="n">ShiftedUint8ActPerTensorFloat</span><span class="p">,</span>
-    <span class="n">output_quant</span><span class="o">=</span><span class="n">ShiftedUint8ActPerTensorFloat</span><span class="p">,</span>
-    <span class="n">weight_bit_width</span><span class="o">=</span><span class="mi">7</span><span class="p">,</span>
-    <span class="n">bias_quant</span><span class="o">=</span><span class="n">Int16Bias</span><span class="p">)</span>
-
-<span class="n">output_path</span> <span class="o">=</span> <span class="s1">&#39;pytorch_qf_conv_8b7b.pt&#39;</span>
-<span class="n">export_torch_qop</span><span class="p">(</span><span class="n">quant_conv_8b7b</span><span class="p">,</span> <span class="n">input_t</span><span class="o">=</span><span class="n">float_inp</span><span class="p">,</span> <span class="n">export_path</span><span class="o">=</span><span class="n">output_path</span><span class="p">)</span>
-</pre></div>
-</div>
-</div>
-<div class="nbinput docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[42]:
-</pre></div>
-</div>
-<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">show_netron</span><span class="p">(</span><span class="n">output_path</span><span class="p">,</span> <span class="mi">8085</span><span class="p">)</span>
-</pre></div>
-</div>
-</div>
-<div class="nboutput docutils container">
-<div class="prompt empty docutils container">
-</div>
-<div class="output_area stderr docutils container">
-<div class="highlight"><pre>
-c:\users\alessandro\documenti\brevitas_tvmcon\src\brevitas\quant_tensor\__init__.py:74: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.
-  training = torch.tensor(training, dtype=torch.bool)
-</pre></div></div>
-</div>
-<div class="nboutput docutils container">
-<div class="prompt empty docutils container">
-</div>
-<div class="output_area docutils container">
-<div class="highlight"><pre>
-Serving &#39;pytorch_qf_conv_8b7b.pt&#39; at http://localhost:8085
-</pre></div></div>
-</div>
-<div class="nboutput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[42]:
-</pre></div>
-</div>
-<div class="output_area rendered_html docutils container">
-<iframe
-    width="100%"
-    height="400"
-    src="http://localhost:8085/"
-    frameborder="0"
-    allowfullscreen
-
-></iframe></div>
-</div>
-<p>As we can see though information on the fact that activations are 7b is lost, and they simply marked as 8b.</p>
-<p>Additionally, because bias quantization is not represented explicitly (although it is performed implicitly at 32b at runtime in the backend), any information around that is lost. As with standard ONNX, representing precisions below 8b is not possible.</p>
-</section>
 </section>
 <section id="Brevitas-and-FX">
 <h1>Brevitas and FX<a class="headerlink" href="#Brevitas-and-FX" title="Permalink to this heading">#</a></h1>
@@ -2187,7 +2108,7 @@ <h1>Calibration-based post-training quantization<a class="headerlink" href="#Cal
 <p>Any Brevitas quantizer that is based on statistics can be used for this purpose, with the caveat that don’t want quantization to be enabled while statistics are being collected, or the data wouldn’t be representative of what the floating-point model does, so we temporarely disabling quantization while doing so. Afterwards, we re-enable calibration and apply bias correction.</p>
 <p>Assuming <code class="docutils literal notranslate"><span class="pre">quant_model</span></code> is a quantized model, with pretrained floating-point weights loaded on top, we can do as follows:</p>
 <div class="nbinput nblast docutils container">
-<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[44]:
+<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[43]:
 </pre></div>
 </div>
 <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">brevitas.graph.calibrate</span> <span class="kn">import</span> <span class="n">bias_correction_mode</span>
@@ -2316,9 +2237,7 @@ <h1>Calibration-based post-training quantization<a class="headerlink" href="#Cal
 </ul>
 </li>
 <li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#Export">Export</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#Export-to-ONNX-QOps">Export to ONNX QOps</a></li>
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#Export-to-custom-Quantized-ONNX">Export to custom Quantized ONNX</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#Export-to-TorchScript-quantization-backend">Export to TorchScript quantization backend</a></li>
 </ul>
 </li>
 <li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#Brevitas-and-FX">Brevitas and FX</a></li>
diff --git a/docs/tutorials/tvmcon2021.ipynb b/docs/tutorials/tvmcon2021.ipynb
index cdaaed038..00fbd6c0c 100644
--- a/docs/tutorials/tvmcon2021.ipynb
+++ b/docs/tutorials/tvmcon2021.ipynb
@@ -39,14 +39,16 @@
        "            self,\n",
        "            in_features: int,\n",
        "            out_features: int,\n",
-       "            bias: bool,\n",
+       "            bias: Optional[bool] = True,\n",
        "            weight_quant: Optional[WeightQuantType] = Int8WeightPerTensorFloat,\n",
        "            bias_quant: Optional[BiasQuantType] = None,\n",
        "            input_quant: Optional[ActQuantType] = None,\n",
        "            output_quant: Optional[ActQuantType] = None,\n",
        "            return_quant_tensor: bool = False,\n",
+       "            device: Optional[torch.device] = None,\n",
+       "            dtype: Optional[torch.dtype] = None,\n",
        "            **kwargs) -> None:\n",
-       "        Linear.__init__(self, in_features, out_features, bias)\n",
+       "        Linear.__init__(self, in_features, out_features, bias, device=device, dtype=dtype)\n",
        "        QuantWBIOL.__init__(\n",
        "            self,\n",
        "            weight_quant=weight_quant,\n",
@@ -71,6 +73,11 @@
     "from brevitas.nn import QuantLinear\n",
     "from IPython.display import Markdown, display\n",
     "\n",
+    "# helpers\n",
+    "def assert_with_message(condition):\n",
+    "    assert condition\n",
+    "    print(condition)\n",
+    "\n",
     "def pretty_print_source(source):\n",
     "    display(Markdown('```python\\n' + source + '\\n```'))\n",
     "    \n",
@@ -115,10 +122,10 @@
       "        [-0.0140,  0.5607]], requires_grad=True) \n",
       "\n",
       "Quantized weight QuantTensor:\n",
-      " QuantTensor(value=tensor([[-0.0046,  0.3803],\n",
+      " IntQuantTensor(value=tensor([[-0.0046,  0.3803],\n",
       "        [-0.5820, -0.5224],\n",
       "        [-0.2704,  0.1879],\n",
-      "        [-0.0137,  0.5591]], grad_fn=<MulBackward0>), scale=tensor(0.0046, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True)) \n",
+      "        [-0.0137,  0.5591]], grad_fn=<MulBackward0>), scale=0.004582525696605444, zero_point=0.0, bit_width=8.0, signed_t=True, training_t=True) \n",
       "\n"
      ]
     }
@@ -161,7 +168,7 @@
       " tensor([[  -1,   83],\n",
       "        [-127, -114],\n",
       "        [ -59,   41],\n",
-      "        [  -3,  122]], dtype=torch.int32)\n"
+      "        [  -3,  122]], dtype=torch.int8)\n"
      ]
     }
    ],
@@ -194,7 +201,17 @@
       "Float output:\n",
       " tensor([[-0.9036, -0.4586,  0.3096, -0.6472],\n",
       "        [ 1.2058,  0.6525, -0.3723,  0.8677],\n",
-      "        [ 1.3873,  0.2801, -0.9009,  0.9507]], grad_fn=<MmBackward>)\n"
+      "        [ 1.3873,  0.2801, -0.9009,  0.9507]], grad_fn=<MmBackward0>)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/torch/_tensor.py:1255: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at /opt/conda/conda-bld/pytorch_1670525541990/work/c10/core/TensorImpl.h:1758.)\n",
+      "  return super(Tensor, self).rename(names)\n",
+      "/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/quant_linear.py:69: UserWarning: Defining your `__torch_function__` as a plain method is deprecated and will be an error in future, please define it as a classmethod. (Triggered internally at /opt/conda/conda-bld/pytorch_1670525541990/work/torch/csrc/utils/python_arg_parser.cpp:350.)\n",
+      "  output_tensor = linear(x, quant_weight, quant_bias)\n"
      ]
     }
    ],
@@ -235,10 +252,10 @@
      "output_type": "stream",
      "text": [
       "Weight QuantTensor:\n",
-      " QuantTensor(value=tensor([[-0.0078,  0.3828],\n",
+      " IntQuantTensor(value=tensor([[-0.0078,  0.3828],\n",
       "        [-0.5781, -0.5234],\n",
       "        [-0.2734,  0.1875],\n",
-      "        [-0.0156,  0.5625]], grad_fn=<MulBackward0>), scale=tensor(0.0078, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))\n",
+      "        [-0.0156,  0.5625]], grad_fn=<MulBackward0>), scale=0.0078125, zero_point=0.0, bit_width=8.0, signed_t=True, training_t=True)\n",
       "Weight fix point: 7.0\n"
      ]
     }
@@ -274,10 +291,10 @@
      "output_type": "stream",
      "text": [
       "Weight QuantTensor:\n",
-      " QuantTensor(value=tensor([[-0.1000,  0.1000],\n",
+      " IntQuantTensor(value=tensor([[-0.1000,  0.1000],\n",
       "        [-0.1000, -0.1000],\n",
       "        [-0.1000,  0.1000],\n",
-      "        [-0.1000,  0.1000]], grad_fn=<MulBackward0>), scale=tensor(0.1000), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))\n"
+      "        [-0.1000,  0.1000]], grad_fn=<MulBackward0>), scale=0.10000000149011612, zero_point=0.0, bit_width=1.0, signed_t=True, training_t=True)\n"
      ]
     }
    ],
@@ -372,7 +389,7 @@
       "Quant output:\n",
       " tensor([[-0.9109, -0.4609,  0.3135, -0.6523],\n",
       "        [ 1.2089,  0.6524, -0.3752,  0.8697],\n",
-      "        [ 1.3893,  0.2816, -0.9011,  0.9521]], grad_fn=<MmBackward>)\n"
+      "        [ 1.3893,  0.2816, -0.9011,  0.9521]], grad_fn=<MmBackward0>)\n"
      ]
     }
    ],
@@ -407,9 +424,9 @@
      "output_type": "stream",
      "text": [
       "Quant output:\n",
-      " QuantTensor(value=tensor([[-0.9109, -0.4609,  0.3135, -0.6523],\n",
+      " IntQuantTensor(value=tensor([[-0.9109, -0.4609,  0.3135, -0.6523],\n",
       "        [ 1.2089,  0.6524, -0.3752,  0.8697],\n",
-      "        [ 1.3893,  0.2816, -0.9011,  0.9521]], grad_fn=<MmBackward>), scale=tensor([[9.0542e-05]], grad_fn=<MulBackward0>), zero_point=tensor(0.), bit_width=tensor(17.), signed_t=tensor(True), training_t=tensor(True))\n"
+      "        [ 1.3893,  0.2816, -0.9011,  0.9521]], grad_fn=<MmBackward0>), scale=tensor([[9.0542e-05]], grad_fn=<MulBackward0>), zero_point=tensor([0.]), bit_width=17.0, signed_t=True, training_t=True)\n"
      ]
     }
    ],
@@ -450,14 +467,14 @@
       "        [-1.0845, -1.3986]]) \n",
       "\n",
       "Quant input:\n",
-      " QuantTensor(value=tensor([[ 1.5490, -0.2894],\n",
+      " IntQuantTensor(value=tensor([[ 1.5490, -0.2894],\n",
       "        [-2.1788,  0.5617],\n",
-      "        [-1.0894, -1.3958]], grad_fn=<MulBackward0>), scale=tensor(0.0170, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True)) \n",
+      "        [-1.0894, -1.3958]], grad_fn=<MulBackward0>), scale=0.017021792009472847, zero_point=0.0, bit_width=8.0, signed_t=True, training_t=True) \n",
       "\n",
       "Quant output:\n",
-      " QuantTensor(value=tensor([[-0.9109, -0.4609,  0.3135, -0.6523],\n",
+      " IntQuantTensor(value=tensor([[-0.9109, -0.4609,  0.3135, -0.6523],\n",
       "        [ 1.2089,  0.6524, -0.3752,  0.8697],\n",
-      "        [ 1.3893,  0.2816, -0.9011,  0.9521]], grad_fn=<MmBackward>), scale=tensor([[9.0542e-05]], grad_fn=<MulBackward0>), zero_point=tensor(0.), bit_width=tensor(17.), signed_t=tensor(True), training_t=tensor(True))\n"
+      "        [ 1.3893,  0.2816, -0.9011,  0.9521]], grad_fn=<MmBackward0>), scale=tensor([[9.0542e-05]], grad_fn=<MulBackward0>), zero_point=tensor([0.]), bit_width=17.0, signed_t=True, training_t=True)\n"
      ]
     }
    ],
@@ -509,9 +526,9 @@
       "        [-1.0845, -1.3986]]) \n",
       "\n",
       "Quant output:\n",
-      " QuantTensor(value=tensor([[1.5410, 0.0000],\n",
+      " IntQuantTensor(value=tensor([[1.5410, 0.0000],\n",
       "        [0.0000, 0.5681],\n",
-      "        [0.0000, 0.0000]], grad_fn=<MulBackward0>), scale=tensor(0.0060, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(False), training_t=tensor(True))\n"
+      "        [0.0000, 0.0000]], grad_fn=<MulBackward0>), scale=0.006043121684342623, zero_point=0.0, bit_width=8.0, signed_t=False, training_t=True)\n"
      ]
     }
    ],
@@ -553,13 +570,13 @@
       "        [-1.0845, -1.3986]]) \n",
       "\n",
       "Quant output after QuantIdentity:\n",
-      " QuantTensor(value=tensor([[ 1.5490, -0.2894],\n",
+      " IntQuantTensor(value=tensor([[ 1.5490, -0.2894],\n",
       "        [-2.1788,  0.5617],\n",
-      "        [-1.0894, -1.3958]], grad_fn=<MulBackward0>), scale=tensor(0.0170, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))\n",
+      "        [-1.0894, -1.3958]], grad_fn=<MulBackward0>), scale=0.017021792009472847, zero_point=0.0, bit_width=8.0, signed_t=True, training_t=True)\n",
       "Quant output after QuantReLU:\n",
-      " QuantTensor(value=tensor([[1.5490, 0.0000],\n",
+      " IntQuantTensor(value=tensor([[1.5490, 0.0000],\n",
       "        [0.0000, 0.5588],\n",
-      "        [0.0000, 0.0000]], grad_fn=<MulBackward0>), scale=tensor(0.0061, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(False), training_t=tensor(True))\n"
+      "        [0.0000, 0.0000]], grad_fn=<MulBackward0>), scale=0.006074443459510803, zero_point=0.0, bit_width=8.0, signed_t=False, training_t=True)\n"
      ]
     }
    ],
@@ -611,18 +628,16 @@
    "outputs": [
     {
      "ename": "RuntimeError",
-     "evalue": "Input scale required",
+     "evalue": "QuantLayer is not correctly configured",
      "output_type": "error",
      "traceback": [
-      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[1;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
-      "\u001b[1;32mC:\\Users\\ALESSA~1\\AppData\\Local\\Temp/ipykernel_18920/2660651517.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      6\u001b[0m \u001b[0mquant_linear\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mQuantLinear\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m2\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m4\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbias\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbias_quant\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mInt16Bias\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreturn_quant_tensor\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      7\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 8\u001b[1;33m \u001b[0mquant_output\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mquant_linear\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfloat_input\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[1;32m~\\miniconda3\\envs\\pt190\\lib\\site-packages\\torch\\nn\\modules\\module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[1;34m(self, *input, **kwargs)\u001b[0m\n\u001b[0;32m   1049\u001b[0m         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[0;32m   1050\u001b[0m                 or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[1;32m-> 1051\u001b[1;33m             \u001b[1;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0minput\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1052\u001b[0m         \u001b[1;31m# Do not call functions when jit is used\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1053\u001b[0m         \u001b[0mfull_backward_hooks\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32mc:\\users\\alessandro\\documenti\\brevitas_tvmcon\\src\\brevitas\\nn\\quant_linear.py\u001b[0m in \u001b[0;36mforward\u001b[1;34m(self, input)\u001b[0m\n\u001b[0;32m     96\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     97\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minput\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mTensor\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mQuantTensor\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m->\u001b[0m \u001b[0mUnion\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mTensor\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mQuantTensor\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 98\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mforward_impl\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minput\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     99\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    100\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0minner_forward_impl\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mTensor\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mquant_weight\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mTensor\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mquant_bias\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mTensor\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32mc:\\users\\alessandro\\documenti\\brevitas_tvmcon\\src\\brevitas\\nn\\quant_layer.py\u001b[0m in \u001b[0;36mforward_impl\u001b[1;34m(self, inp)\u001b[0m\n\u001b[0;32m    355\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    356\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbias\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 357\u001b[1;33m             \u001b[0mquant_bias\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbias_quant\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbias\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0moutput_scale\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0moutput_bit_width\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    358\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtraining\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcache_inference_quant_bias\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    359\u001b[0m                 \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_cached_bias\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_CachedIO\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mquant_bias\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdetach\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmetadata_only\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32m~\\miniconda3\\envs\\pt190\\lib\\site-packages\\torch\\nn\\modules\\module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[1;34m(self, *input, **kwargs)\u001b[0m\n\u001b[0;32m   1049\u001b[0m         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[0;32m   1050\u001b[0m                 or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[1;32m-> 1051\u001b[1;33m             \u001b[1;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0minput\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1052\u001b[0m         \u001b[1;31m# Do not call functions when jit is used\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1053\u001b[0m         \u001b[0mfull_backward_hooks\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32mc:\\users\\alessandro\\documenti\\brevitas_tvmcon\\src\\brevitas\\proxy\\parameter_quant.py\u001b[0m in \u001b[0;36mforward\u001b[1;34m(self, x, input_scale, input_bit_width)\u001b[0m\n\u001b[0;32m    194\u001b[0m             \u001b[0mimpl\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexport_handler\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexport_mode\u001b[0m \u001b[1;32melse\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtensor_quant\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    195\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrequires_input_scale\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0minput_scale\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 196\u001b[1;33m                 \u001b[1;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Input scale required\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    197\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrequires_input_bit_width\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0minput_bit_width\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    198\u001b[0m                 \u001b[1;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Input bit-width required\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;31mRuntimeError\u001b[0m: Input scale required"
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[13], line 8\u001b[0m\n\u001b[1;32m      5\u001b[0m float_input \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mrandn(\u001b[38;5;241m3\u001b[39m, \u001b[38;5;241m2\u001b[39m)\n\u001b[1;32m      6\u001b[0m quant_linear \u001b[38;5;241m=\u001b[39m QuantLinear(\u001b[38;5;241m2\u001b[39m, \u001b[38;5;241m4\u001b[39m, bias\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, bias_quant\u001b[38;5;241m=\u001b[39mInt16Bias, return_quant_tensor\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m----> 8\u001b[0m quant_output \u001b[38;5;241m=\u001b[39m \u001b[43mquant_linear\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfloat_input\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/torch/nn/modules/module.py:1194\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m   1190\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1191\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1192\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1193\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1194\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1195\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m   1196\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/quant_linear.py:66\u001b[0m, in \u001b[0;36mQuantLinear.forward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m     65\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m: Union[Tensor, QuantTensor]) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Union[Tensor, QuantTensor]:\n\u001b[0;32m---> 66\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforward_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/brevitas/nn/quant_layer.py:152\u001b[0m, in \u001b[0;36mQuantWeightBiasInputOutputLayer.forward_impl\u001b[0;34m(self, inp)\u001b[0m\n\u001b[1;32m    148\u001b[0m compute_output_quant_tensor \u001b[38;5;241m=\u001b[39m \u001b[38;5;28misinstance\u001b[39m(quant_input, QuantTensor) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\n\u001b[1;32m    149\u001b[0m     quant_weight, QuantTensor)\n\u001b[1;32m    150\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (compute_output_quant_tensor \u001b[38;5;129;01mor\u001b[39;00m\n\u001b[1;32m    151\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moutput_quant\u001b[38;5;241m.\u001b[39mis_quant_enabled) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreturn_quant_tensor:\n\u001b[0;32m--> 152\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mQuantLayer is not correctly configured\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    154\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbias \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    155\u001b[0m     quant_bias \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbias_quant(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbias, quant_input, quant_weight)\n",
+      "\u001b[0;31mRuntimeError\u001b[0m: QuantLayer is not correctly configured"
      ]
     }
    ],
@@ -652,9 +667,9 @@
     {
      "data": {
       "text/plain": [
-       "QuantTensor(value=tensor([[-0.6541,  0.1263,  0.1680, -0.1231],\n",
+       "IntQuantTensor(value=tensor([[-0.6541,  0.1263,  0.1680, -0.1231],\n",
        "        [ 1.4658,  1.2395, -0.5207,  1.3989],\n",
-       "        [ 1.6461,  0.8687, -1.0466,  1.4813]], grad_fn=<AddmmBackward>), scale=tensor([[9.0542e-05]], grad_fn=<MulBackward0>), zero_point=tensor(0.), bit_width=tensor(18.), signed_t=tensor(True), training_t=tensor(True))"
+       "        [ 1.6461,  0.8687, -1.0466,  1.4813]], grad_fn=<AddmmBackward0>), scale=tensor([[9.0542e-05]], grad_fn=<MulBackward0>), zero_point=tensor([0.]), bit_width=tensor(18.), signed_t=tensor(True), training_t=tensor(True))"
       ]
      },
      "execution_count": 14,
@@ -711,17 +726,17 @@
      "output_type": "stream",
      "text": [
       "Eval mode add quant inputs:\n",
-      " QuantTensor(value=tensor([[ 1.5335, -0.2875],\n",
+      " IntQuantTensor(value=tensor([[ 1.5335, -0.2875],\n",
       "        [-2.0447,  0.5751],\n",
-      "        [-1.0863, -1.4057]]), scale=tensor(0.0160), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(False)) \n",
-      " QuantTensor(value=tensor([[ 0.3994,  0.8307],\n",
+      "        [-1.0863, -1.4057]]), scale=0.015974320471286774, zero_point=0.0, bit_width=8.0, signed_t=True, training_t=False) \n",
+      " IntQuantTensor(value=tensor([[ 0.3994,  0.8307],\n",
       "        [-0.7188, -0.3994],\n",
-      "        [-0.5910,  0.1757]]), scale=tensor(0.0160), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(False)) \n",
+      "        [-0.5910,  0.1757]]), scale=0.015974320471286774, zero_point=0.0, bit_width=8.0, signed_t=True, training_t=False) \n",
       "\n",
       "Eval mode add quant output:\n",
-      " QuantTensor(value=tensor([[ 1.9329,  0.5431],\n",
+      " IntQuantTensor(value=tensor([[ 1.9329,  0.5431],\n",
       "        [-2.7636,  0.1757],\n",
-      "        [-1.6773, -1.2300]]), scale=tensor(0.0160), zero_point=tensor(0.), bit_width=tensor(9.), signed_t=tensor(True), training_t=tensor(False))\n"
+      "        [-1.6773, -1.2300]]), scale=0.015974320471286774, zero_point=0.0, bit_width=9.0, signed_t=True, training_t=False)\n"
      ]
     }
    ],
@@ -777,32 +792,24 @@
      "output_type": "stream",
      "text": [
       "Quant input:\n",
-      " QuantTensor(value=tensor([[[-1.1218, -1.1580, -0.2533, -0.4343],\n",
+      " IntQuantTensor(value=tensor([[[-1.1218, -1.1580, -0.2533, -0.4343],\n",
       "         [ 0.8504,  0.6876, -0.3076, -2.1170]],\n",
       "\n",
       "        [[ 0.4704, -0.1628,  1.4475,  0.2714],\n",
       "         [ 0.1628,  0.8685, -0.1448, -0.1086]],\n",
       "\n",
       "        [[ 0.9228,  1.2666,  2.0084,  0.0543],\n",
-      "         [ 0.6152, -0.4162, -0.8323, -2.3160]]], grad_fn=<MulBackward0>), scale=tensor(0.0181, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True)) \n",
+      "         [ 0.6152, -0.4162, -0.8323, -2.3160]]], grad_fn=<MulBackward0>), scale=0.018094077706336975, zero_point=0.0, bit_width=8.0, signed_t=True, training_t=True) \n",
       "\n",
       "Quant output:\n",
-      " QuantTensor(value=tensor([[[-1.1218, -0.2533],\n",
+      " IntQuantTensor(value=tensor([[[-1.1218, -0.2533],\n",
       "         [ 0.8504, -0.3076]],\n",
       "\n",
       "        [[ 0.4704,  1.4475],\n",
       "         [ 0.8685, -0.1086]],\n",
       "\n",
       "        [[ 1.2666,  2.0084],\n",
-      "         [ 0.6152, -0.8323]]], grad_fn=<SqueezeBackward1>), scale=tensor(0.0181, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True))\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\Alessandro\\miniconda3\\envs\\pt190\\lib\\site-packages\\torch\\nn\\functional.py:652: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at  ..\\c10/core/TensorImpl.h:1156.)\n",
-      "  return torch.max_pool1d(input, kernel_size, stride, padding, dilation, ceil_mode)\n"
+      "         [ 0.6152, -0.8323]]], grad_fn=<SqueezeBackward1>), scale=0.018094077706336975, zero_point=0.0, bit_width=8.0, signed_t=True, training_t=True)\n"
      ]
     }
    ],
@@ -838,14 +845,14 @@
      "output_type": "stream",
      "text": [
       "Quant input:\n",
-      " QuantTensor(value=tensor([[[-1.1218, -1.1580, -0.2533, -0.4343],\n",
+      " IntQuantTensor(value=tensor([[[-1.1218, -1.1580, -0.2533, -0.4343],\n",
       "         [ 0.8504,  0.6876, -0.3076, -2.1170]],\n",
       "\n",
       "        [[ 0.4704, -0.1628,  1.4475,  0.2714],\n",
       "         [ 0.1628,  0.8685, -0.1448, -0.1086]],\n",
       "\n",
       "        [[ 0.9228,  1.2666,  2.0084,  0.0543],\n",
-      "         [ 0.6152, -0.4162, -0.8323, -2.3160]]], grad_fn=<MulBackward0>), scale=tensor(0.0181, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(True)) \n",
+      "         [ 0.6152, -0.4162, -0.8323, -2.3160]]], grad_fn=<MulBackward0>), scale=0.018094077706336975, zero_point=0.0, bit_width=8.0, signed_t=True, training_t=True) \n",
       "\n",
       "Quant output:\n",
       " tensor([[[-0.8082, -0.8204, -0.2480, -0.4089],\n",
@@ -855,7 +862,15 @@
       "         [ 0.1614,  0.7006, -0.1438, -0.1081]],\n",
       "\n",
       "        [[ 0.7272,  0.8529,  0.9646,  0.0542],\n",
-      "         [ 0.5478, -0.3937, -0.6817, -0.9807]]], grad_fn=<TanhBackward>)\n"
+      "         [ 0.5478, -0.3937, -0.6817, -0.9807]]], grad_fn=<TanhBackward0>)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_1328/661358273.py:7: UserWarning: Defining your `__torch_function__` as a plain method is deprecated and will be an error in future, please define it as a classmethod. (Triggered internally at /opt/conda/conda-bld/pytorch_1670525541990/work/torch/csrc/utils/python_arg_parser.cpp:350.)\n",
+      "  quant_output = torch.tanh(quant_input)\n"
      ]
     }
    ],
@@ -891,16 +906,26 @@
      "output_type": "stream",
      "text": [
       "Eval mode concat quant inputs:\n",
-      " QuantTensor(value=tensor([[ 1.5335, -0.2875],\n",
+      " IntQuantTensor(value=tensor([[ 1.5335, -0.2875],\n",
       "        [-2.0447,  0.5751],\n",
-      "        [-1.0863, -1.4057]]), scale=tensor(0.0160), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(False)) QuantTensor(value=tensor([[ 0.3994,  0.8307],\n",
+      "        [-1.0863, -1.4057]]), scale=0.015974320471286774, zero_point=0.0, bit_width=8.0, signed_t=True, training_t=False) IntQuantTensor(value=tensor([[ 0.3994,  0.8307],\n",
       "        [-0.7188, -0.3994],\n",
-      "        [-0.5910,  0.1757]]), scale=tensor(0.0160), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(False)) \n",
+      "        [-0.5910,  0.1757]]), scale=0.015974320471286774, zero_point=0.0, bit_width=8.0, signed_t=True, training_t=False) \n",
       "\n",
       "Eval mode concat quant output:\n",
-      " QuantTensor(value=tensor([[ 1.5335, -0.2875,  0.3994,  0.8307],\n",
+      " IntQuantTensor(value=tensor([[ 1.5335, -0.2875,  0.3994,  0.8307],\n",
       "        [-2.0447,  0.5751, -0.7188, -0.3994],\n",
-      "        [-1.0863, -1.4057, -0.5910,  0.1757]]), scale=tensor(0.0160), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(True), training_t=tensor(False))\n"
+      "        [-1.0863, -1.4057, -0.5910,  0.1757]]), scale=0.015974320471286774, zero_point=0.0, bit_width=8.0, signed_t=True, training_t=False)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_1328/3932472163.py:8: UserWarning: Defining your `__torch_function__` as a plain method is deprecated and will be an error in future, please define it as a classmethod. (Triggered internally at /opt/conda/conda-bld/pytorch_1670525541990/work/torch/csrc/utils/python_arg_parser.cpp:350.)\n",
+      "  train_mode_cat = torch.cat([quant_identity(float_inp1), quant_identity(float_inp2)], dim=1)\n",
+      "/tmp/ipykernel_1328/3932472163.py:14: UserWarning: Defining your `__torch_function__` as a plain method is deprecated and will be an error in future, please define it as a classmethod. (Triggered internally at /opt/conda/conda-bld/pytorch_1670525541990/work/torch/csrc/utils/python_arg_parser.cpp:350.)\n",
+      "  eval_mode_cat = torch.cat([eval_quant_inp1, eval_quant_inp2], dim=1)\n"
      ]
     }
    ],
@@ -954,10 +979,10 @@
      "output_type": "stream",
      "text": [
       "Weight QuantTensor:\n",
-      " QuantTensor(value=tensor([[-0.0000,  0.3880],\n",
+      " IntQuantTensor(value=tensor([[-0.0000,  0.3880],\n",
       "        [-0.5820, -0.5044],\n",
       "        [-0.2716,  0.1940],\n",
-      "        [-0.0000,  0.5432]], grad_fn=<MulBackward0>), scale=tensor(0.0388, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(5.), signed_t=tensor(True), training_t=tensor(True))\n"
+      "        [-0.0000,  0.5432]], grad_fn=<MulBackward0>), scale=0.03879871591925621, zero_point=0.0, bit_width=5.0, signed_t=True, training_t=True)\n"
      ]
     }
    ],
@@ -988,13 +1013,13 @@
      "output_type": "stream",
      "text": [
       "Weight QuantTensor:\n",
-      " QuantTensor(value=tensor([[-0.0000,  0.3793],\n",
+      " IntQuantTensor(value=tensor([[-0.0000,  0.3793],\n",
       "        [-0.5820, -0.5044],\n",
       "        [-0.2723,  0.1816],\n",
       "        [-0.0000,  0.5607]], grad_fn=<MulBackward0>), scale=tensor([[0.0253],\n",
       "        [0.0388],\n",
       "        [0.0182],\n",
-      "        [0.0374]], grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(5.), signed_t=tensor(True), training_t=tensor(True))\n"
+      "        [0.0374]], grad_fn=<DivBackward0>), zero_point=0.0, bit_width=5.0, signed_t=True, training_t=True)\n"
      ]
     }
    ],
@@ -1025,9 +1050,9 @@
      "output_type": "stream",
      "text": [
       "QuantTensor:\n",
-      " QuantTensor(value=tensor([[ 1.6341, -0.5447],\n",
+      " IntQuantTensor(value=tensor([[ 1.6341, -0.5447],\n",
       "        [-2.1788,  0.5447],\n",
-      "        [-1.0894, -1.6341]], grad_fn=<MulBackward0>), scale=tensor(0.5447, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(3.), signed_t=tensor(True), training_t=tensor(True))\n"
+      "        [-1.0894, -1.6341]], grad_fn=<MulBackward0>), scale=0.5446973443031311, zero_point=0.0, bit_width=3.0, signed_t=True, training_t=True)\n"
      ]
     }
    ],
@@ -1056,7 +1081,7 @@
     {
      "data": {
       "text/plain": [
-       "QuantTensor(value=tensor([[1.5294, 0.0000],\n",
+       "IntQuantTensor(value=tensor([[1.5294, 0.0000],\n",
        "        [0.0000, 0.5647],\n",
        "        [0.0000, 0.0000]], grad_fn=<MulBackward0>), scale=tensor(0.0235, grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8.), signed_t=tensor(False), training_t=tensor(True))"
       ]
@@ -1099,9 +1124,18 @@
       "         [-1.3986,  0.4033,  0.8380, -0.7193, -0.4033]]]) \n",
       "\n",
       "Per-channel quant output:\n",
-      " QuantTensor(value=tensor([[[ 0.8616, -0.7012,  0.4503],\n",
-      "         [-1.1285, -0.4937, -0.1901]]], grad_fn=<SqueezeBackward1>), scale=tensor([[[0.0021],\n",
-      "         [0.0013]]], grad_fn=<MulBackward0>), zero_point=tensor(0.), bit_width=tensor(17.), signed_t=tensor(True), training_t=tensor(True))\n"
+      " IntQuantTensor(value=tensor([[[ 0.8616, -0.7012,  0.4503],\n",
+      "         [-1.1285, -0.4937, -0.1901]]], grad_fn=<ConvolutionBackward0>), scale=tensor([[[0.0021],\n",
+      "         [0.0013]]], grad_fn=<MulBackward0>), zero_point=tensor([[[-254.0000],\n",
+      "         [ 406.0000]]], grad_fn=<DivBackward0>), bit_width=17.0, signed_t=True, training_t=True)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/torch/nn/modules/conv.py:309: UserWarning: Defining your `__torch_function__` as a plain method is deprecated and will be an error in future, please define it as a classmethod. (Triggered internally at /opt/conda/conda-bld/pytorch_1670525541990/work/torch/csrc/utils/python_arg_parser.cpp:350.)\n",
+      "  return F.conv1d(input, weight, bias, self.stride,\n"
      ]
     }
    ],
@@ -1157,15 +1191,17 @@
       "         [-1.3986,  0.4033,  0.8380, -0.7193, -0.4033]]]) \n",
       "\n",
       "Per-channel quant output:\n",
-      " QuantTensor(value=tensor([[[ 0.8616, -0.7012,  0.4503],\n",
-      "         [-1.1285, -0.4937, -0.1901]]], grad_fn=<SqueezeBackward1>), scale=tensor([[[0.0021],\n",
-      "         [0.0013]]], grad_fn=<MulBackward0>), zero_point=tensor(0.), bit_width=tensor(17.), signed_t=tensor(True), training_t=tensor(True))\n"
+      " IntQuantTensor(value=tensor([[[ 0.8616, -0.7012,  0.4503],\n",
+      "         [-1.1285, -0.4937, -0.1901]]], grad_fn=<ConvolutionBackward0>), scale=tensor([[[0.0021],\n",
+      "         [0.0013]]], grad_fn=<MulBackward0>), zero_point=tensor([[[-254.0000],\n",
+      "         [ 406.0000]]], grad_fn=<DivBackward0>), bit_width=17.0, signed_t=True, training_t=True)\n"
      ]
     }
    ],
    "source": [
     "torch.manual_seed(0)\n",
     "\n",
+    "from brevitas.inject.enum import ScalingPerOutputType\n",
     "from brevitas.nn import QuantConv1d\n",
     "\n",
     "BATCHES = 1\n",
@@ -1301,23 +1337,22 @@
      "output_type": "stream",
      "text": [
       "Weight QuantTensor:\n",
-      " QuantTensor(value=tensor([[-0.0060,  0.3793],\n",
+      " IntQuantTensor(value=tensor([[-0.0060,  0.3793],\n",
       "        [-0.5820, -0.5224],\n",
       "        [-0.2723,  0.1887],\n",
       "        [-0.0132,  0.5607]], grad_fn=<MulBackward0>), scale=tensor([[0.0030],\n",
       "        [0.0046],\n",
       "        [0.0021],\n",
-      "        [0.0044]], grad_fn=<DivBackward0>), zero_point=tensor(0.), bit_width=tensor(8., grad_fn=<RoundSteFnBackward>), signed_t=tensor(True), training_t=tensor(True))\n"
+      "        [0.0044]], grad_fn=<DivBackward0>), zero_point=0.0, bit_width=8.0, signed_t=True, training_t=True)\n"
      ]
     }
    ],
    "source": [
     "torch.manual_seed(0)\n",
     "\n",
-    "from brevitas.quant import Int8WeightPerTensorFloat\n",
+    "from brevitas.quant import Int8WeightPerChannelFloat\n",
     "\n",
-    "class LearnedIntWeightPerChannelFloat(Int8WeightPerTensorFloat):\n",
-    "    scaling_per_output_channel = True\n",
+    "class LearnedIntWeightPerChannelFloat(Int8WeightPerChannelFloat):\n",
     "    scaling_impl_type = ScalingImplType.PARAMETER_FROM_STATS\n",
     "    restrict_scaling_type = RestrictValueType.LOG_FP\n",
     "    bit_width_impl_type = BitWidthImplType.PARAMETER \n",
@@ -1343,10 +1378,10 @@
     {
      "data": {
       "text/plain": [
-       "QuantTensor(value=tensor([[-0.9109, -0.4588,  0.3119, -0.6530],\n",
+       "IntQuantTensor(value=tensor([[-0.9109, -0.4588,  0.3119, -0.6530],\n",
        "        [ 1.2089,  0.6493, -0.3731,  0.8706],\n",
-       "        [ 1.3893,  0.2823, -0.8979,  0.9543]], grad_fn=<MmBackward>), scale=tensor([[9.0542e-05, 3.9068e-05, 5.6866e-05, 6.4251e-05]],\n",
-       "       grad_fn=<MulBackward0>), zero_point=tensor(0.), bit_width=tensor(17., grad_fn=<CeilSteFnBackward>), signed_t=tensor(True), training_t=tensor(True))"
+       "        [ 1.3893,  0.2823, -0.8979,  0.9543]], grad_fn=<MmBackward0>), scale=tensor([[9.0542e-05, 3.9068e-05, 5.6866e-05, 6.4251e-05]],\n",
+       "       grad_fn=<MulBackward0>), zero_point=tensor([0.]), bit_width=tensor(17., grad_fn=<CeilSteFnBackward>), signed_t=tensor(True), training_t=tensor(True))"
       ]
      },
      "execution_count": 28,
@@ -1406,11 +1441,11 @@
      "evalue": "Error(s) in loading state_dict for QuantLinear:\n\tMissing key(s) in state_dict: \"input_quant.fused_activation_quant_proxy.tensor_quant.scaling_impl.value\", \"input_quant.fused_activation_quant_proxy.tensor_quant.msb_clamp_bit_width_impl.bit_width_offset\", \"weight_quant.tensor_quant.scaling_impl.value\", \"weight_quant.tensor_quant.msb_clamp_bit_width_impl.bit_width_offset\". ",
      "output_type": "error",
      "traceback": [
-      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[1;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
-      "\u001b[1;32mC:\\Users\\ALESSA~1\\AppData\\Local\\Temp/ipykernel_18920/1653109852.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m     10\u001b[0m     return_quant_tensor=True, bias=False)\n\u001b[0;32m     11\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 12\u001b[1;33m \u001b[0mquant_linear\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mload_state_dict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfloat_linear\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstate_dict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[1;32m~\\miniconda3\\envs\\pt190\\lib\\site-packages\\torch\\nn\\modules\\module.py\u001b[0m in \u001b[0;36mload_state_dict\u001b[1;34m(self, state_dict, strict)\u001b[0m\n\u001b[0;32m   1405\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0merror_msgs\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1406\u001b[0m             raise RuntimeError('Error(s) in loading state_dict for {}:\\n\\t{}'.format(\n\u001b[1;32m-> 1407\u001b[1;33m                                self.__class__.__name__, \"\\n\\t\".join(error_msgs)))\n\u001b[0m\u001b[0;32m   1408\u001b[0m         \u001b[1;32mreturn\u001b[0m \u001b[0m_IncompatibleKeys\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmissing_keys\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0munexpected_keys\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1409\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;31mRuntimeError\u001b[0m: Error(s) in loading state_dict for QuantLinear:\n\tMissing key(s) in state_dict: \"input_quant.fused_activation_quant_proxy.tensor_quant.scaling_impl.value\", \"input_quant.fused_activation_quant_proxy.tensor_quant.msb_clamp_bit_width_impl.bit_width_offset\", \"weight_quant.tensor_quant.scaling_impl.value\", \"weight_quant.tensor_quant.msb_clamp_bit_width_impl.bit_width_offset\". "
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[29], line 12\u001b[0m\n\u001b[1;32m      5\u001b[0m float_linear \u001b[38;5;241m=\u001b[39m nn\u001b[38;5;241m.\u001b[39mLinear(\u001b[38;5;241m2\u001b[39m, \u001b[38;5;241m4\u001b[39m, bias\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m      6\u001b[0m quant_linear \u001b[38;5;241m=\u001b[39m QuantLinear(\n\u001b[1;32m      7\u001b[0m     \u001b[38;5;241m2\u001b[39m, \u001b[38;5;241m4\u001b[39m, \n\u001b[1;32m      8\u001b[0m     input_quant\u001b[38;5;241m=\u001b[39mLearnedIntActPerTensorFloat,\n\u001b[1;32m      9\u001b[0m     weight_quant\u001b[38;5;241m=\u001b[39mLearnedIntWeightPerChannelFloat, \n\u001b[1;32m     10\u001b[0m     return_quant_tensor\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, bias\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m---> 12\u001b[0m \u001b[43mquant_linear\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_state_dict\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfloat_linear\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstate_dict\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages/torch/nn/modules/module.py:1671\u001b[0m, in \u001b[0;36mModule.load_state_dict\u001b[0;34m(self, state_dict, strict)\u001b[0m\n\u001b[1;32m   1666\u001b[0m         error_msgs\u001b[38;5;241m.\u001b[39minsert(\n\u001b[1;32m   1667\u001b[0m             \u001b[38;5;241m0\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mMissing key(s) in state_dict: \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m. \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mformat(\n\u001b[1;32m   1668\u001b[0m                 \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mformat(k) \u001b[38;5;28;01mfor\u001b[39;00m k \u001b[38;5;129;01min\u001b[39;00m missing_keys)))\n\u001b[1;32m   1670\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(error_msgs) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m-> 1671\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mError(s) in loading state_dict for \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\t\u001b[39;00m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mformat(\n\u001b[1;32m   1672\u001b[0m                        \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\t\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(error_msgs)))\n\u001b[1;32m   1673\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _IncompatibleKeys(missing_keys, unexpected_keys)\n",
+      "\u001b[0;31mRuntimeError\u001b[0m: Error(s) in loading state_dict for QuantLinear:\n\tMissing key(s) in state_dict: \"input_quant.fused_activation_quant_proxy.tensor_quant.scaling_impl.value\", \"input_quant.fused_activation_quant_proxy.tensor_quant.msb_clamp_bit_width_impl.bit_width_offset\", \"weight_quant.tensor_quant.scaling_impl.value\", \"weight_quant.tensor_quant.msb_clamp_bit_width_impl.bit_width_offset\". "
      ]
     }
    ],
@@ -1569,16 +1604,19 @@
        "          (delay_wrapper): DelayWrapper(\n",
        "            (delay_impl): _NoDelay()\n",
        "          )\n",
+       "          (input_view_impl): Identity()\n",
        "        )\n",
        "        (scaling_impl): ParameterFromRuntimeStatsScaling(\n",
        "          (stats_input_view_shape_impl): OverTensorView()\n",
        "          (stats): _Stats(\n",
        "            (stats_impl): AbsPercentile()\n",
        "          )\n",
-       "          (restrict_clamp_scaling): _RestrictClampValue(\n",
-       "            (clamp_min_ste): Identity()\n",
+       "          (restrict_scaling): _RestrictValue(\n",
        "            (restrict_value_impl): FloatRestrictValue()\n",
        "          )\n",
+       "          (clamp_scaling): _ClampValue(\n",
+       "            (clamp_min_ste): ScalarClampMinSte()\n",
+       "          )\n",
        "          (restrict_inplace_preprocess): Identity()\n",
        "          (restrict_preprocess): Identity()\n",
        "        )\n",
@@ -1628,11 +1666,13 @@
     "from brevitas.core.bit_width import BitWidthConst\n",
     "from brevitas.core.quant import IntQuant, RescalingIntQuant\n",
     "from brevitas.core.zero_point import ZeroZeroPoint\n",
+    "from brevitas.core.function_wrapper.misc import Identity\n",
     "\n",
     "tensor_quant = RescalingIntQuant(\n",
     "    int_quant=IntQuant(\n",
     "        float_to_int_impl=RoundSte(),\n",
     "        tensor_clamp_impl=TensorClamp(),\n",
+    "        input_view_impl=Identity,\n",
     "        signed=False,\n",
     "        narrow_range=False),\n",
     "    zero_point_impl=ZeroZeroPoint(),\n",
@@ -1736,6 +1776,7 @@
     "from brevitas.inject import value\n",
     "from brevitas.proxy import WeightQuantProxyFromInjector\n",
     "from brevitas.core.scaling import ParameterScaling\n",
+    "from brevitas.core.function_wrapper.misc import Identity\n",
     "\n",
     "class Int8ActPerTensorFloatParameterFromScratch(ExtendedInjector):\n",
     "    \n",
@@ -1753,11 +1794,12 @@
     "    int_scaling_impl = IntScaling\n",
     "    scaling_impl = ParameterScaling\n",
     "    restrict_scaling_impl = FloatRestrictValue\n",
+    "    input_view_impl = Identity\n",
     "    scaling_shape = ()\n",
     "    bit_width = 8\n",
     "    narrow_range = True\n",
     "    signed = True\n",
-    "    \n",
+    "\n",
     "quant_linear = QuantLinear(2, 4, weight_quant=Int8ActPerTensorFloatParameterFromScratch, bias=False)"
    ]
   },
@@ -1789,14 +1831,11 @@
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 36,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
     }
    ],
    "source": [
@@ -1812,7 +1851,7 @@
     "quant_identity_bit_width = quant_identity.act_quant.fused_activation_quant_proxy.tensor_quant.msb_clamp_bit_width_impl\n",
     "quant_linear_bit_width = quant_linear.weight_quant.tensor_quant.msb_clamp_bit_width_impl\n",
     "\n",
-    "quant_identity_bit_width is quant_linear_bit_width"
+    "assert_with_message(quant_identity_bit_width is quant_linear_bit_width)"
    ]
   },
   {
@@ -1852,13 +1891,11 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Requirement already satisfied: netron in c:\\users\\alessandro\\miniconda3\\envs\\pt190\\lib\\site-packages (5.3.9)\n",
-      "Requirement already satisfied: onnx in c:\\users\\alessandro\\miniconda3\\envs\\pt190\\lib\\site-packages (1.10.2)\n",
-      "Requirement already satisfied: onnxoptimizer in c:\\users\\alessandro\\miniconda3\\envs\\pt190\\lib\\site-packages (0.2.6)\n",
-      "Requirement already satisfied: numpy>=1.16.6 in c:\\users\\alessandro\\miniconda3\\envs\\pt190\\lib\\site-packages (from onnx) (1.21.2)\n",
-      "Requirement already satisfied: typing-extensions>=3.6.2.1 in c:\\users\\alessandro\\miniconda3\\envs\\pt190\\lib\\site-packages (from onnx) (3.10.0.2)\n",
-      "Requirement already satisfied: protobuf in c:\\users\\alessandro\\miniconda3\\envs\\pt190\\lib\\site-packages (from onnx) (3.19.1)\n",
-      "Requirement already satisfied: six in c:\\users\\alessandro\\miniconda3\\envs\\pt190\\lib\\site-packages (from onnx) (1.16.0)\n"
+      "Requirement already satisfied: netron in /proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages (7.2.9)\r\n",
+      "Requirement already satisfied: onnx in /proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages (1.15.0)\r\n",
+      "Requirement already satisfied: onnxoptimizer in /proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages (0.3.13)\r\n",
+      "Requirement already satisfied: numpy in /proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages (from onnx) (1.26.0)\r\n",
+      "Requirement already satisfied: protobuf>=3.20.2 in /proj/xlabs/users/nfraser/opt/miniforge3/envs/20231115_brv_pt1.13.1/lib/python3.10/site-packages (from onnx) (4.24.4)\r\n"
      ]
     }
    ],
@@ -1893,14 +1930,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 39,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[W NNPACK.cpp:53] Could not initialize NNPACK! Reason: Unsupported hardware.\n"
+     ]
+    }
+   ],
    "source": [
     "torch.manual_seed(0)\n",
     "\n",
     "from brevitas.export import export_qonnx\n",
-    "from brevitas.quant import Int8WeightPerTensorFloat, Int8ActPerTensorFloat, Int16Bias\n",
+    "from brevitas.quant import Int8ActPerTensorFloat, Int16Bias\n",
     "\n",
     "float_inp = torch.randn(1, 2, 5)\n",
     "\n",
@@ -1911,7 +1956,7 @@
     "    bias_quant=Int16Bias)\n",
     "\n",
     "output_path = 'brevitas_onnx_conv4b8b.onnx'\n",
-    "export_qonnx(quant_conv_4b8b, input_t=float_inp, export_path=output_path)"
+    "exported_model = export_qonnx(quant_conv_4b8b, input_t=float_inp, export_path=output_path)"
    ]
   },
   {
@@ -1945,7 +1990,7 @@
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x1720e7cf128>"
+       "<IPython.lib.display.IFrame at 0x7f57acb08f90>"
       ]
      },
      "execution_count": 40,
@@ -1966,7 +2011,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 41,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1975,12 +2020,12 @@
     "quant_conv_4b_weights = QuantConv1d(2, 4, 3, bias=True, weight_bit_width=4)\n",
     "\n",
     "output_path = 'brevitas_onnx_conv_4b_weights.onnx'\n",
-    "export_qonnx(quant_conv_4b_weights, input_t=float_inp, export_path=output_path)"
+    "exported_model = export_qonnx(quant_conv_4b_weights, input_t=float_inp, export_path=output_path)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 42,
    "metadata": {
     "tags": [
      "skip-execution"
@@ -2009,10 +2054,10 @@
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x1720e80a978>"
+       "<IPython.lib.display.IFrame at 0x7f57acb52fd0>"
       ]
      },
-     "execution_count": 41,
+     "execution_count": 42,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2066,7 +2111,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 43,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2093,7 +2138,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "pytorch_latest",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -2107,7 +2152,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.15 | packaged by conda-forge | (default, Nov 22 2022, 08:42:03) [MSC v.1929 64 bit (AMD64)]"
+   "version": "3.10.13"
   },
   "vscode": {
    "interpreter": {
diff --git a/docs/user_guide/datatypes.html b/docs/user_guide/datatypes.html
index d634b371d..991bf3f1e 100644
--- a/docs/user_guide/datatypes.html
+++ b/docs/user_guide/datatypes.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>&lt;no title&gt; &#8212; Brevitas 0.10.2 documentation</title>
+    <title>&lt;no title&gt; &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -124,8 +124,8 @@
       
     
     
-    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/user_guide/export.html b/docs/user_guide/export.html
index b9e756003..dca43d18e 100644
--- a/docs/user_guide/export.html
+++ b/docs/user_guide/export.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>Export Compatibility &#8212; Brevitas 0.10.2 documentation</title>
+    <title>Export Compatibility &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -124,8 +124,8 @@
       
     
     
-    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/user_guide/graph_transformations.html b/docs/user_guide/graph_transformations.html
index 388a67efd..2f5447257 100644
--- a/docs/user_guide/graph_transformations.html
+++ b/docs/user_guide/graph_transformations.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>&lt;no title&gt; &#8212; Brevitas 0.10.2 documentation</title>
+    <title>&lt;no title&gt; &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -124,8 +124,8 @@
       
     
     
-    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/user_guide/index.html b/docs/user_guide/index.html
index 656d7bbc6..d85f57670 100644
--- a/docs/user_guide/index.html
+++ b/docs/user_guide/index.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>User Guide &#8212; Brevitas 0.10.2 documentation</title>
+    <title>User Guide &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -124,8 +124,8 @@
       
     
     
-    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/user_guide/precision.html b/docs/user_guide/precision.html
index 5aa07e1ed..57c1b6c1f 100644
--- a/docs/user_guide/precision.html
+++ b/docs/user_guide/precision.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>&lt;no title&gt; &#8212; Brevitas 0.10.2 documentation</title>
+    <title>&lt;no title&gt; &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -124,8 +124,8 @@
       
     
     
-    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/user_guide/proxies.html b/docs/user_guide/proxies.html
index 3f1569d17..506326598 100644
--- a/docs/user_guide/proxies.html
+++ b/docs/user_guide/proxies.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>&lt;no title&gt; &#8212; Brevitas 0.10.2 documentation</title>
+    <title>&lt;no title&gt; &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -124,8 +124,8 @@
       
     
     
-    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/user_guide/quantized_layers.html b/docs/user_guide/quantized_layers.html
index a29656481..49f60c3e7 100644
--- a/docs/user_guide/quantized_layers.html
+++ b/docs/user_guide/quantized_layers.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>&lt;no title&gt; &#8212; Brevitas 0.10.2 documentation</title>
+    <title>&lt;no title&gt; &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -124,8 +124,8 @@
       
     
     
-    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/user_guide/quantized_tensor.html b/docs/user_guide/quantized_tensor.html
index 968c360c0..fbe3c2e8b 100644
--- a/docs/user_guide/quantized_tensor.html
+++ b/docs/user_guide/quantized_tensor.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>&lt;no title&gt; &#8212; Brevitas 0.10.2 documentation</title>
+    <title>&lt;no title&gt; &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -124,8 +124,8 @@
       
     
     
-    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/user_guide/quantizers.html b/docs/user_guide/quantizers.html
index a6b38634c..13a6bc3aa 100644
--- a/docs/user_guide/quantizers.html
+++ b/docs/user_guide/quantizers.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>&lt;no title&gt; &#8212; Brevitas 0.10.2 documentation</title>
+    <title>&lt;no title&gt; &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -124,8 +124,8 @@
       
     
     
-    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/user_guide/scaling.html b/docs/user_guide/scaling.html
index d78cfbbbd..9ac9ccd93 100644
--- a/docs/user_guide/scaling.html
+++ b/docs/user_guide/scaling.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>&lt;no title&gt; &#8212; Brevitas 0.10.2 documentation</title>
+    <title>&lt;no title&gt; &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -124,8 +124,8 @@
       
     
     
-    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>
diff --git a/docs/user_guide/zero_point.html b/docs/user_guide/zero_point.html
index 4d1e76fc8..08bf292e0 100644
--- a/docs/user_guide/zero_point.html
+++ b/docs/user_guide/zero_point.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
 
-    <title>&lt;no title&gt; &#8212; Brevitas 0.10.2 documentation</title>
+    <title>&lt;no title&gt; &#8212; Brevitas 0.11.0 documentation</title>
   
   
   
@@ -124,8 +124,8 @@
       
     
     
-    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.10.2 documentation - Home"/>
-    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.10.2 documentation - Home"/>`);</script>
+    <img src="../_static/brevitas_logo_black.svg" class="logo__image only-light" alt="Brevitas 0.11.0 documentation - Home"/>
+    <script>document.write(`<img src="../_static/brevitas_logo_white.svg" class="logo__image only-dark" alt="Brevitas 0.11.0 documentation - Home"/>`);</script>
   
   
 </a></div>