Fix (example/llm): disable embedded lookup quantization

Xilinx · Aug 19, 2024 · 8b18edc · 8b18edc
1 parent b46a001
commit 8b18edc
Showing 1 changed file with 1 addition and 3 deletions.
diff --git a/src/brevitas_examples/llm/main.py b/src/brevitas_examples/llm/main.py
@@ -145,8 +145,6 @@
     help='Group size for per_group input quantization. Default: 64.')
 parser.add_argument(
     '--quantize-input-zero-point', action='store_true', help='Quantize input zero-point.')
-parser.add_argument(
-    '--quantize-embedding', action='store_true', help='Quantize first nn.Embedding layer.')
 parser.add_argument(
     '--quantize-last-layer', action='store_true', help='Quantize last nn.Linear layer.')
 parser.add_argument('--gptq', action='store_true', help='Apply GPTQ.')
@@ -403,7 +401,7 @@ def main():
             dtype=dtype,
             device=device,
             input_quant_format=args.input_quant_format,
-            quantize_embedding=args.quantize_embedding)
+            quantize_embedding=False)
         if not args.quantize_last_layer:
             name_blacklist += ["lm_head"]
         model = layerwise_quantize(