From ac48ecabe08b6ee3c517c01e127211cb5515b2c2 Mon Sep 17 00:00:00 2001 From: meiravgri Date: Tue, 30 Jul 2024 05:14:45 +0000 Subject: [PATCH] handle out of range values in scalar quantization by clipping --- sentence_transformers/quantization.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sentence_transformers/quantization.py b/sentence_transformers/quantization.py index 37402cae7..7decc7652 100644 --- a/sentence_transformers/quantization.py +++ b/sentence_transformers/quantization.py @@ -423,9 +423,12 @@ def quantize_embeddings( steps = (ranges[1, :] - ranges[0, :]) / 255 if precision == "uint8": - return ((embeddings - starts) / steps).astype(np.uint8) + q_vals = np.floor((embeddings - starts) / steps) + return np.clip(q_vals, 0, 255).astype(np.uint8) elif precision == "int8": - return ((embeddings - starts) / steps - 128).astype(np.int8) + q_vals = np.floor((embeddings - starts) / steps) + q_vals = np.clip(q_vals, 0, 255) + return (q_vals - 128).astype(np.int8) if precision == "binary": return (np.packbits(embeddings > 0).reshape(embeddings.shape[0], -1) - 128).astype(np.int8)