From 8a8c05c1dec8e46b7cb5464234b2c0a59cf5862e Mon Sep 17 00:00:00 2001 From: UsernamesLame <156965854+UsernamesLame@users.noreply.github.com> Date: Fri, 30 Aug 2024 11:35:35 -0400 Subject: [PATCH 1/2] removed audio normalization that isn't required --- pywhispercpp/model.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pywhispercpp/model.py b/pywhispercpp/model.py index be73b94..089dd81 100644 --- a/pywhispercpp/model.py +++ b/pywhispercpp/model.py @@ -273,7 +273,6 @@ def _load_audio(media_file_path: str) -> np.array: sound = AudioSegment.from_file(media_file_path) sound = sound.set_frame_rate(constants.WHISPER_SAMPLE_RATE).set_channels(1) arr = np.array(sound.get_array_of_samples()).T.astype(np.float32) - arr /= np.iinfo(samples[0].typecode).max return arr def __del__(self): From ee19adc4a29ebb48ae87c103a5e9f4603f2a69c4 Mon Sep 17 00:00:00 2001 From: UsernamesLame <156965854+UsernamesLame@users.noreply.github.com> Date: Fri, 30 Aug 2024 20:49:00 -0400 Subject: [PATCH 2/2] Re-add normalization to prevent UTF-8 errors --- pywhispercpp/model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pywhispercpp/model.py b/pywhispercpp/model.py index 089dd81..8742bba 100644 --- a/pywhispercpp/model.py +++ b/pywhispercpp/model.py @@ -273,6 +273,7 @@ def _load_audio(media_file_path: str) -> np.array: sound = AudioSegment.from_file(media_file_path) sound = sound.set_frame_rate(constants.WHISPER_SAMPLE_RATE).set_channels(1) arr = np.array(sound.get_array_of_samples()).T.astype(np.float32) + arr /= np.iinfo(np.int16).max return arr def __del__(self):