From 8a6986084a024b281c0ccc33cd999258b2eeb58f Mon Sep 17 00:00:00 2001
From: Andres Duran <andresduranrod@gmail.com>
Date: Sun, 23 Apr 2023 16:37:44 -0600
Subject: [PATCH 1/6] spliting logic into methods to a cleaner code

---
 README.md                |   0
 demo.gif                 | Bin
 packate_requerements.txt |   2 +
 requirements.txt         |   0
 transcribe_demo.py       | 147 +++++++++++++++++++++++++--------------
 5 files changed, 96 insertions(+), 53 deletions(-)
 mode change 100644 => 100755 README.md
 mode change 100644 => 100755 demo.gif
 create mode 100644 packate_requerements.txt
 mode change 100644 => 100755 requirements.txt
 mode change 100644 => 100755 transcribe_demo.py

diff --git a/README.md b/README.md
old mode 100644
new mode 100755
diff --git a/demo.gif b/demo.gif
old mode 100644
new mode 100755
diff --git a/packate_requerements.txt b/packate_requerements.txt
new file mode 100644
index 0000000..4450128
--- /dev/null
+++ b/packate_requerements.txt
@@ -0,0 +1,2 @@
+sudo apt-get install libasound-dev portaudio19-dev libportaudio2 libportaudiocpp0
+sudo apt install ffmpeg
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
old mode 100644
new mode 100755
diff --git a/transcribe_demo.py b/transcribe_demo.py
old mode 100644
new mode 100755
index 6dd8972..ce0ef34
--- a/transcribe_demo.py
+++ b/transcribe_demo.py
@@ -14,8 +14,7 @@
 from sys import platform
 
 
-def main():
-    parser = argparse.ArgumentParser()
+def parser_validation(parser):
     parser.add_argument("--model", default="medium", help="Model to use",
                         choices=["tiny", "base", "small", "medium", "large"])
     parser.add_argument("--non_english", action='store_true',
@@ -32,6 +31,73 @@ def main():
                             help="Default microphone name for SpeechRecognition. "
                                  "Run this with 'list' to view available Microphones.", type=str)
     args = parser.parse_args()
+    return args
+
+def get_microphone_device_index(mic_name):
+    #If is not a linux system, then return None
+    if not 'linux' in platform:
+        return None
+    #If is requesting fot the list, print it and exit the program
+    if not mic_name or mic_name == 'list':
+        print("Available microphone devices are: ")
+        for index, name in enumerate(sr.Microphone.list_microphone_names()):
+            print(f"Microphone with name \"{name}\" found")   
+        exit()
+    #If non of the above, then return the microphone found or None
+    device_index = None
+    for index, name in enumerate(sr.Microphone.list_microphone_names()):
+        if mic_name in name:
+            device_index = index
+            break
+    return device_index
+
+def load_mode(args):
+    ONLY_ENGLISH = False
+    model = args.model
+    if args.model != "large" and not args.non_english and ONLY_ENGLISH:
+        model = model + ".en"
+    return whisper.load_model(model)
+
+def result_transcription_handler(result,transcription,has_silence_timeout):
+    text = result['text'].strip()
+    # If we detected a pause between recordings, add a new item to our transcripion.
+    # Otherwise edit the existing one.
+    if has_silence_timeout:
+        transcription.append(text)
+    else:
+        transcription[-1] = text
+    return transcription
+
+def show_transcription(transcription):
+    # Clear the console to reprint the updated transcription.
+    os.system('cls' if os.name=='nt' else 'clear')
+    for line in transcription:
+        print(line)
+    # Flush stdout.
+    print('', end='', flush=True)
+
+def write_temp_audio_file(temp_file,wav_data):
+    # Write wav data to the temporary file as bytes.
+    with open(temp_file, 'w+b') as f:
+        f.write(wav_data.read())
+
+def concat_data_to_current_audio(last_sample,data_queue):
+    while not data_queue.empty():
+        data = data_queue.get()
+        last_sample += data
+    return last_sample
+
+def silence_time_is_up(silence_timeout,phrase_time):
+    now = datetime.utcnow()
+    has_silence_timeout = False
+    if(phrase_time is None): return has_silence_timeout
+    elapsed_time_delta = now - phrase_time
+    if phrase_time and elapsed_time_delta > timedelta(seconds=silence_timeout):
+        has_silence_timeout = True
+    return has_silence_timeout
+
+def main():
+    args = parser_validation(argparse.ArgumentParser())
     
     # The last time a recording was retreived from the queue.
     phrase_time = None
@@ -47,33 +113,17 @@ def main():
     
     # Important for linux users. 
     # Prevents permanent application hang and crash by using the wrong Microphone
-    if 'linux' in platform:
-        mic_name = args.default_microphone
-        if not mic_name or mic_name == 'list':
-            print("Available microphone devices are: ")
-            for index, name in enumerate(sr.Microphone.list_microphone_names()):
-                print(f"Microphone with name \"{name}\" found")   
-            return
-        else:
-            for index, name in enumerate(sr.Microphone.list_microphone_names()):
-                if mic_name in name:
-                    source = sr.Microphone(sample_rate=16000, device_index=index)
-                    break
-    else:
-        source = sr.Microphone(sample_rate=16000)
+    device_index = get_microphone_device_index(args.default_microphone)
         
     # Load / Download model
-    model = args.model
-    if args.model != "large" and not args.non_english:
-        model = model + ".en"
-    audio_model = whisper.load_model(model)
+    audio_model = load_mode(args)
 
     record_timeout = args.record_timeout
-    phrase_timeout = args.phrase_timeout
+    silence_timeout = args.phrase_timeout
 
     temp_file = NamedTemporaryFile().name
-    transcription = ['']
     
+    source = sr.Microphone(sample_rate=16000,device_index=device_index)
     with source:
         recorder.adjust_for_ambient_noise(source)
 
@@ -92,56 +142,47 @@ def record_callback(_, audio:sr.AudioData) -> None:
 
     # Cue the user that we're ready to go.
     print("Model loaded.\n")
-
+    #clear terminal
+    transcription = ['']
+    is_speaking = False
     while True:
         try:
-            now = datetime.utcnow()
             # Pull raw recorded audio from the queue.
             if not data_queue.empty():
-                phrase_complete = False
                 # If enough time has passed between recordings, consider the phrase complete.
                 # Clear the current working audio buffer to start over with the new data.
-                if phrase_time and now - phrase_time > timedelta(seconds=phrase_timeout):
-                    last_sample = bytes()
-                    phrase_complete = True
+                has_silence_timeout = silence_time_is_up(silence_timeout,phrase_time)
+                if(has_silence_timeout): last_sample = bytes()
+
                 # This is the last time we received new audio data from the queue.
-                phrase_time = now
+                is_speaking = True
+                phrase_time = datetime.utcnow()
 
                 # Concatenate our current audio data with the latest audio data.
-                while not data_queue.empty():
-                    data = data_queue.get()
-                    last_sample += data
+                last_sample = concat_data_to_current_audio(last_sample,data_queue)
 
                 # Use AudioData to convert the raw data to wav data.
                 audio_data = sr.AudioData(last_sample, source.SAMPLE_RATE, source.SAMPLE_WIDTH)
                 wav_data = io.BytesIO(audio_data.get_wav_data())
 
                 # Write wav data to the temporary file as bytes.
-                with open(temp_file, 'w+b') as f:
-                    f.write(wav_data.read())
+                write_temp_audio_file(temp_file,wav_data)
 
                 # Read the transcription.
                 result = audio_model.transcribe(temp_file, fp16=torch.cuda.is_available())
-                text = result['text'].strip()
-
-                # If we detected a pause between recordings, add a new item to our transcripion.
-                # Otherwise edit the existing one.
-                if phrase_complete:
-                    transcription.append(text)
-                else:
-                    transcription[-1] = text
-
-                # Clear the console to reprint the updated transcription.
-                os.system('cls' if os.name=='nt' else 'clear')
-                for line in transcription:
-                    print(line)
-                # Flush stdout.
-                print('', end='', flush=True)
-
-                # Infinite loops are bad for processors, must sleep.
-                sleep(0.25)
+                transcription = result_transcription_handler(result,transcription,has_silence_timeout)
+                show_transcription(transcription)
+                    
+            else:
+                if(is_speaking and silence_time_is_up(silence_timeout,phrase_time)):
+                    transcription[-1] = f"[Final]: {transcription[-1]}"
+                    show_transcription(transcription)
+                    is_speaking = False
+
         except KeyboardInterrupt:
             break
+        # Infinite loops are bad for processors, must sleep.
+        sleep(0.25)
 
     print("\n\nTranscription:")
     for line in transcription:
@@ -149,4 +190,4 @@ def record_callback(_, audio:sr.AudioData) -> None:
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()

From 71a9e7711fc883a00b9b8b5385e715751e4b21ab Mon Sep 17 00:00:00 2001
From: Andres Duran <andresduranrod@gmail.com>
Date: Sun, 23 Apr 2023 17:19:57 -0600
Subject: [PATCH 2/6] creating more files to split the logic

---
 audio_util.py           |  14 ++
 system_configuration.py |  74 ++++++++++
 transcribe_demo.py      | 313 +++++++++++++++++-----------------------
 3 files changed, 223 insertions(+), 178 deletions(-)
 create mode 100644 audio_util.py
 create mode 100644 system_configuration.py

diff --git a/audio_util.py b/audio_util.py
new file mode 100644
index 0000000..03fcb81
--- /dev/null
+++ b/audio_util.py
@@ -0,0 +1,14 @@
+class AudioUtil:
+
+    @staticmethod
+    def write_temp_audio_file(temp_file,wav_data):
+        # Write wav data to the temporary file as bytes.
+        with open(temp_file, 'w+b') as f:
+            f.write(wav_data.read())
+
+    @staticmethod
+    def concat_data_to_current_audio(last_sample,data_queue):
+        while not data_queue.empty():
+            data = data_queue.get()
+            last_sample += data
+        return last_sample
\ No newline at end of file
diff --git a/system_configuration.py b/system_configuration.py
new file mode 100644
index 0000000..6ed6cb9
--- /dev/null
+++ b/system_configuration.py
@@ -0,0 +1,74 @@
+import argparse
+import speech_recognition as sr
+
+from sys import platform
+
+class ParserValues:
+    model: str
+    non_english: bool
+    energy_threshold: int
+    record_timeout: float
+    silence_timeout: float
+    default_microphone: str
+
+    def __init__(self, model, non_english, energy_threshold, record_timeout, silence_timeout, default_microphone):
+        self.model = model
+        self.non_english = non_english
+        self.energy_threshold = energy_threshold
+        self.record_timeout = record_timeout
+        self.silence_timeout = silence_timeout
+        self.default_microphone = default_microphone
+
+    @classmethod 
+    def parser_validation(cls,parser):
+        parser.add_argument("--model", default="medium", help="Model to use",
+                            choices=["tiny", "base", "small", "medium", "large"])
+        parser.add_argument("--non_english", action='store_true',
+                            help="Don't use the english model.")
+        parser.add_argument("--energy_threshold", default=1000,
+                            help="Energy level for mic to detect.", type=int)
+        parser.add_argument("--record_timeout", default=2,
+                            help="How real time the recording is in seconds.", type=float)
+        parser.add_argument("--silence_timeout", default=3,
+                            help="How much empty space between recordings before we "
+                                "consider it a new line in the transcription.", type=float)  
+        if 'linux' in platform:
+            parser.add_argument("--default_microphone", default='pulse',
+                                help="Default microphone name for SpeechRecognition. "
+                                    "Run this with 'list' to view available Microphones.", type=str)
+        args = parser.parse_args()
+        return args
+
+    @classmethod
+    def fromSystemArguments(cls):
+        parser = argparse.ArgumentParser()
+        args = cls.parser_validation(parser)
+        return cls(
+            model=args.model,
+            non_english=args.non_english,
+            energy_threshold=args.energy_threshold,
+            record_timeout=args.record_timeout,
+            silence_timeout=args.silence_timeout,
+            default_microphone=args.default_microphone
+        )
+
+class AudioDeviceConfiguration:
+
+    @staticmethod
+    def get_microphone_device_index(mic_name):
+        #If is not a linux system, then return None
+        if not 'linux' in platform:
+            return None
+        #If is requesting fot the list, print it and exit the program
+        if not mic_name or mic_name == 'list':
+            print("Available microphone devices are: ")
+            for index, name in enumerate(sr.Microphone.list_microphone_names()):
+                print(f"Microphone with name \"{name}\" found")   
+            exit()
+        #If non of the above, then return the microphone found or None
+        device_index = None
+        for index, name in enumerate(sr.Microphone.list_microphone_names()):
+            if mic_name in name:
+                device_index = index
+                break
+        return device_index
\ No newline at end of file
diff --git a/transcribe_demo.py b/transcribe_demo.py
index ce0ef34..e2f4b7e 100755
--- a/transcribe_demo.py
+++ b/transcribe_demo.py
@@ -1,6 +1,5 @@
 #! python3.7
 
-import argparse
 import io
 import os
 import speech_recognition as sr
@@ -11,183 +10,141 @@
 from queue import Queue
 from tempfile import NamedTemporaryFile
 from time import sleep
-from sys import platform
-
-
-def parser_validation(parser):
-    parser.add_argument("--model", default="medium", help="Model to use",
-                        choices=["tiny", "base", "small", "medium", "large"])
-    parser.add_argument("--non_english", action='store_true',
-                        help="Don't use the english model.")
-    parser.add_argument("--energy_threshold", default=1000,
-                        help="Energy level for mic to detect.", type=int)
-    parser.add_argument("--record_timeout", default=2,
-                        help="How real time the recording is in seconds.", type=float)
-    parser.add_argument("--phrase_timeout", default=3,
-                        help="How much empty space between recordings before we "
-                             "consider it a new line in the transcription.", type=float)  
-    if 'linux' in platform:
-        parser.add_argument("--default_microphone", default='pulse',
-                            help="Default microphone name for SpeechRecognition. "
-                                 "Run this with 'list' to view available Microphones.", type=str)
-    args = parser.parse_args()
-    return args
-
-def get_microphone_device_index(mic_name):
-    #If is not a linux system, then return None
-    if not 'linux' in platform:
-        return None
-    #If is requesting fot the list, print it and exit the program
-    if not mic_name or mic_name == 'list':
-        print("Available microphone devices are: ")
-        for index, name in enumerate(sr.Microphone.list_microphone_names()):
-            print(f"Microphone with name \"{name}\" found")   
-        exit()
-    #If non of the above, then return the microphone found or None
-    device_index = None
-    for index, name in enumerate(sr.Microphone.list_microphone_names()):
-        if mic_name in name:
-            device_index = index
-            break
-    return device_index
-
-def load_mode(args):
-    ONLY_ENGLISH = False
-    model = args.model
-    if args.model != "large" and not args.non_english and ONLY_ENGLISH:
-        model = model + ".en"
-    return whisper.load_model(model)
-
-def result_transcription_handler(result,transcription,has_silence_timeout):
-    text = result['text'].strip()
-    # If we detected a pause between recordings, add a new item to our transcripion.
-    # Otherwise edit the existing one.
-    if has_silence_timeout:
-        transcription.append(text)
-    else:
-        transcription[-1] = text
-    return transcription
-
-def show_transcription(transcription):
-    # Clear the console to reprint the updated transcription.
-    os.system('cls' if os.name=='nt' else 'clear')
-    for line in transcription:
-        print(line)
-    # Flush stdout.
-    print('', end='', flush=True)
-
-def write_temp_audio_file(temp_file,wav_data):
-    # Write wav data to the temporary file as bytes.
-    with open(temp_file, 'w+b') as f:
-        f.write(wav_data.read())
-
-def concat_data_to_current_audio(last_sample,data_queue):
-    while not data_queue.empty():
-        data = data_queue.get()
-        last_sample += data
-    return last_sample
-
-def silence_time_is_up(silence_timeout,phrase_time):
-    now = datetime.utcnow()
-    has_silence_timeout = False
-    if(phrase_time is None): return has_silence_timeout
-    elapsed_time_delta = now - phrase_time
-    if phrase_time and elapsed_time_delta > timedelta(seconds=silence_timeout):
-        has_silence_timeout = True
-    return has_silence_timeout
-
-def main():
-    args = parser_validation(argparse.ArgumentParser())
-    
-    # The last time a recording was retreived from the queue.
-    phrase_time = None
-    # Current raw audio bytes.
-    last_sample = bytes()
-    # Thread safe Queue for passing data from the threaded recording callback.
-    data_queue = Queue()
-    # We use SpeechRecognizer to record our audio because it has a nice feauture where it can detect when speech ends.
-    recorder = sr.Recognizer()
-    recorder.energy_threshold = args.energy_threshold
-    # Definitely do this, dynamic energy compensation lowers the energy threshold dramtically to a point where the SpeechRecognizer never stops recording.
-    recorder.dynamic_energy_threshold = False
-    
-    # Important for linux users. 
-    # Prevents permanent application hang and crash by using the wrong Microphone
-    device_index = get_microphone_device_index(args.default_microphone)
-        
-    # Load / Download model
-    audio_model = load_mode(args)
-
-    record_timeout = args.record_timeout
-    silence_timeout = args.phrase_timeout
-
-    temp_file = NamedTemporaryFile().name
-    
-    source = sr.Microphone(sample_rate=16000,device_index=device_index)
-    with source:
-        recorder.adjust_for_ambient_noise(source)
-
-    def record_callback(_, audio:sr.AudioData) -> None:
-        """
-        Threaded callback function to recieve audio data when recordings finish.
-        audio: An AudioData containing the recorded bytes.
-        """
-        # Grab the raw bytes and push it into the thread safe queue.
-        data = audio.get_raw_data()
-        data_queue.put(data)
-
-    # Create a background thread that will pass us raw audio bytes.
-    # We could do this manually but SpeechRecognizer provides a nice helper.
-    recorder.listen_in_background(source, record_callback, phrase_time_limit=record_timeout)
-
-    # Cue the user that we're ready to go.
-    print("Model loaded.\n")
-    #clear terminal
-    transcription = ['']
-    is_speaking = False
-    while True:
-        try:
-            # Pull raw recorded audio from the queue.
-            if not data_queue.empty():
-                # If enough time has passed between recordings, consider the phrase complete.
-                # Clear the current working audio buffer to start over with the new data.
-                has_silence_timeout = silence_time_is_up(silence_timeout,phrase_time)
-                if(has_silence_timeout): last_sample = bytes()
-
-                # This is the last time we received new audio data from the queue.
-                is_speaking = True
-                phrase_time = datetime.utcnow()
-
-                # Concatenate our current audio data with the latest audio data.
-                last_sample = concat_data_to_current_audio(last_sample,data_queue)
-
-                # Use AudioData to convert the raw data to wav data.
-                audio_data = sr.AudioData(last_sample, source.SAMPLE_RATE, source.SAMPLE_WIDTH)
-                wav_data = io.BytesIO(audio_data.get_wav_data())
-
-                # Write wav data to the temporary file as bytes.
-                write_temp_audio_file(temp_file,wav_data)
-
-                # Read the transcription.
-                result = audio_model.transcribe(temp_file, fp16=torch.cuda.is_available())
-                transcription = result_transcription_handler(result,transcription,has_silence_timeout)
-                show_transcription(transcription)
-                    
-            else:
-                if(is_speaking and silence_time_is_up(silence_timeout,phrase_time)):
-                    transcription[-1] = f"[Final]: {transcription[-1]}"
-                    show_transcription(transcription)
-                    is_speaking = False
-
-        except KeyboardInterrupt:
-            break
-        # Infinite loops are bad for processors, must sleep.
-        sleep(0.25)
-
-    print("\n\nTranscription:")
-    for line in transcription:
-        print(line)
+from system_configuration import ParserValues, AudioDeviceConfiguration
+from audio_util import AudioUtil
+
+class SpeechHandler:
+    def __init__(self):
+        self.args = ParserValues.fromSystemArguments()
+        # The last time a recording was retreived from the queue.
+        self.phrase_time = None
+        # Current raw audio bytes.
+        self.last_sample = bytes()
+        # Thread safe Queue for passing data from the threaded recording callback.
+        self.data_queue = Queue()
+        # We use SpeechRecognizer to record our audio because it has a nice feauture where it can detect when speech ends.
+        self.recorder = sr.Recognizer()
+        self.recorder.energy_threshold = self.args.energy_threshold
+        # Definitely do this, dynamic energy compensation lowers the energy threshold dramtically to a point where the SpeechRecognizer never stops recording.
+        self.recorder.dynamic_energy_threshold = False
+
+        # Important for linux users. 
+        # Prevents permanent application hang and crash by using the wrong Microphone
+        self.device_index = AudioDeviceConfiguration.get_microphone_device_index(self.args.default_microphone)
+
+        # Load / Download model
+        self.audio_model = self.load_mode()
+
+        self.record_timeout = self.args.record_timeout
+        self.silence_timeout = self.args.silence_timeout
+
+        self.temp_file = NamedTemporaryFile().name
+
+        self.generate_audio_source()
+
+    def load_mode(self):
+        args = self.args
+        ONLY_ENGLISH = False
+        model = args.model
+        if args.model != "large" and not args.non_english and ONLY_ENGLISH:
+            model = model + ".en"
+        return whisper.load_model(model)
+
+    def generate_audio_source(self):
+        self.source = sr.Microphone(sample_rate=16000,device_index=self.device_index)
+        with self.source:
+            self.recorder.adjust_for_ambient_noise(self.source)
+
+        def record_callback(_, audio:sr.AudioData) -> None:
+            """
+            Threaded callback function to recieve audio data when recordings finish.
+            audio: An AudioData containing the recorded bytes.
+            """
+            # Grab the raw bytes and push it into the thread safe queue.
+            data = audio.get_raw_data()
+            self.data_queue.put(data)
+
+        # Create a background thread that will pass us raw audio bytes.
+        # We could do this manually but SpeechRecognizer provides a nice helper.
+        self.recorder.listen_in_background(self.source, record_callback, phrase_time_limit=self.record_timeout)
+
+    def execute(self):
+        # Cue the user that we're ready to go.
+        print("Model loaded.\n")
+        #clear terminal
+        self.transcription = ['']
+        is_speaking = False
+        while True:
+            try:
+                # Pull raw recorded audio from the queue.
+                if not self.data_queue.empty():
+                    # If enough time has passed between recordings, consider the phrase complete.
+                    # Clear the current working audio buffer to start over with the new data.
+                    has_silence_timeout = self.silence_time_is_up()
+                    if(has_silence_timeout): self.last_sample = bytes()
+
+                    # This is the last time we received new audio data from the queue.
+                    is_speaking = True
+                    self.phrase_time = datetime.utcnow()
+
+                    # Concatenate our current audio data with the latest audio data.
+                    self.last_sample = AudioUtil.concat_data_to_current_audio(self.last_sample,self.data_queue)
+
+                    # Use AudioData to convert the raw data to wav data.
+                    audio_data = sr.AudioData(self.last_sample, self.source.SAMPLE_RATE, self.source.SAMPLE_WIDTH)
+                    wav_data = io.BytesIO(audio_data.get_wav_data())
+
+                    # Write wav data to the temporary file as bytes.
+                    AudioUtil.write_temp_audio_file(self.temp_file,wav_data)
+
+                    # Read the transcription.
+                    result = self.audio_model.transcribe(self.temp_file, fp16=torch.cuda.is_available())
+                    self.transcription = self.result_transcription_handler(result,has_silence_timeout)
+                    self.show_transcription()
+                        
+                else:
+                    if(is_speaking and self.silence_time_is_up()):
+                        self.transcription[-1] = f"[Final]: {self.transcription[-1]}"
+                        self.show_transcription()
+                        is_speaking = False
+
+            except KeyboardInterrupt:
+                break
+            # Infinite loops are bad for processors, must sleep.
+            sleep(0.25)
+
+        print("\n\nTranscription:")
+        for line in self.transcription:
+            print(line)
+
+    def silence_time_is_up(self):
+        silence_timeout = self.silence_timeout
+        phrase_time = self.phrase_time
+        if(phrase_time is None): return False
+        now = datetime.utcnow()
+        elapsed_time_delta = now - phrase_time
+        has_silence_timeout = phrase_time and elapsed_time_delta > timedelta(seconds=silence_timeout)
+        return has_silence_timeout
+
+    def result_transcription_handler(self,result,has_silence_timeout):
+        text = result['text'].strip()
+        # If we detected a pause between recordings, add a new item to our transcripion.
+        # Otherwise edit the existing one.
+        if has_silence_timeout:
+            self.transcription.append(text)
+        else:
+            self.transcription[-1] = text
+        return self.transcription
+
+    def show_transcription(self):
+        # Clear the console to reprint the updated transcription.
+        os.system('cls' if os.name=='nt' else 'clear')
+        for line in self.transcription:
+            print(line)
+        # Flush stdout.
+        print('', end='', flush=True)
 
 
 if __name__ == "__main__":
-    main()
+    speechHandler = SpeechHandler()
+    speechHandler.execute()

From 9118ecc548cfda63f4851ed03e73cfdce067b723 Mon Sep 17 00:00:00 2001
From: Andres Duran <andresduranrod@gmail.com>
Date: Sun, 23 Apr 2023 20:33:43 -0600
Subject: [PATCH 3/6] formatting init

---
 transcribe_demo.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/transcribe_demo.py b/transcribe_demo.py
index e2f4b7e..d7ca43b 100755
--- a/transcribe_demo.py
+++ b/transcribe_demo.py
@@ -18,13 +18,16 @@ def __init__(self):
         self.args = ParserValues.fromSystemArguments()
         # The last time a recording was retreived from the queue.
         self.phrase_time = None
+
         # Current raw audio bytes.
         self.last_sample = bytes()
+
         # Thread safe Queue for passing data from the threaded recording callback.
         self.data_queue = Queue()
+
         # We use SpeechRecognizer to record our audio because it has a nice feauture where it can detect when speech ends.
         self.recorder = sr.Recognizer()
-        self.recorder.energy_threshold = self.args.energy_threshold
+
         # Definitely do this, dynamic energy compensation lowers the energy threshold dramtically to a point where the SpeechRecognizer never stops recording.
         self.recorder.dynamic_energy_threshold = False
 
@@ -32,18 +35,20 @@ def __init__(self):
         # Prevents permanent application hang and crash by using the wrong Microphone
         self.device_index = AudioDeviceConfiguration.get_microphone_device_index(self.args.default_microphone)
 
+        # Getting names for the Temporary Files
+        self.temp_file = NamedTemporaryFile().name
+
         # Load / Download model
-        self.audio_model = self.load_mode()
+        self.audio_model = self.load_mode(self.args)
 
+        # Setting values according to the args 
+        self.recorder.energy_threshold = self.args.energy_threshold
         self.record_timeout = self.args.record_timeout
         self.silence_timeout = self.args.silence_timeout
 
-        self.temp_file = NamedTemporaryFile().name
-
         self.generate_audio_source()
 
-    def load_mode(self):
-        args = self.args
+    def load_mode(self,args):
         ONLY_ENGLISH = False
         model = args.model
         if args.model != "large" and not args.non_english and ONLY_ENGLISH:

From 2e65458fba1214fbb361793f46387718dc764d02 Mon Sep 17 00:00:00 2001
From: Andres Duran <andresduranrod@gmail.com>
Date: Sun, 23 Apr 2023 20:36:36 -0600
Subject: [PATCH 4/6] avoiding empty strings

---
 transcribe_demo.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/transcribe_demo.py b/transcribe_demo.py
index d7ca43b..d567592 100755
--- a/transcribe_demo.py
+++ b/transcribe_demo.py
@@ -45,9 +45,13 @@ def __init__(self):
         self.recorder.energy_threshold = self.args.energy_threshold
         self.record_timeout = self.args.record_timeout
         self.silence_timeout = self.args.silence_timeout
+        self.transcription = ['']
 
         self.generate_audio_source()
 
+        # Cue the user that we're ready to go.
+        print("Model loaded.\n")
+
     def load_mode(self,args):
         ONLY_ENGLISH = False
         model = args.model
@@ -74,10 +78,6 @@ def record_callback(_, audio:sr.AudioData) -> None:
         self.recorder.listen_in_background(self.source, record_callback, phrase_time_limit=self.record_timeout)
 
     def execute(self):
-        # Cue the user that we're ready to go.
-        print("Model loaded.\n")
-        #clear terminal
-        self.transcription = ['']
         is_speaking = False
         while True:
             try:
@@ -133,6 +133,7 @@ def silence_time_is_up(self):
 
     def result_transcription_handler(self,result,has_silence_timeout):
         text = result['text'].strip()
+        if(text is None or text is ""): return self.transcription
         # If we detected a pause between recordings, add a new item to our transcripion.
         # Otherwise edit the existing one.
         if has_silence_timeout:

From 258acf5c555354c5e66a69b32d99766656ffb92e Mon Sep 17 00:00:00 2001
From: Andres Duran <andresduranrod@gmail.com>
Date: Sun, 23 Apr 2023 20:38:22 -0600
Subject: [PATCH 5/6] adding portaudio19 for clean install

---
 README.md                | 1 +
 packate_requerements.txt | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)
 delete mode 100644 packate_requerements.txt

diff --git a/README.md b/README.md
index e8f6acc..ab97942 100755
--- a/README.md
+++ b/README.md
@@ -15,6 +15,7 @@ Whisper also requires the command-line tool [`ffmpeg`](https://ffmpeg.org/) to b
 ```
 # on Ubuntu or Debian
 sudo apt update && sudo apt install ffmpeg
+sudo apt-get install libasound-dev portaudio19-dev libportaudio2 libportaudiocpp0
 
 # on Arch Linux
 sudo pacman -S ffmpeg
diff --git a/packate_requerements.txt b/packate_requerements.txt
deleted file mode 100644
index 4450128..0000000
--- a/packate_requerements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-sudo apt-get install libasound-dev portaudio19-dev libportaudio2 libportaudiocpp0
-sudo apt install ffmpeg
\ No newline at end of file

From ab9964ba037aab2964adb2f17180b14d937c434b Mon Sep 17 00:00:00 2001
From: Andres Duran <andresduranrod@gmail.com>
Date: Fri, 9 Jun 2023 20:07:39 -0600
Subject: [PATCH 6/6] adding more intuitive feedback

---
 transcribe_demo.py | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/transcribe_demo.py b/transcribe_demo.py
index d567592..11a60f3 100755
--- a/transcribe_demo.py
+++ b/transcribe_demo.py
@@ -77,12 +77,19 @@ def record_callback(_, audio:sr.AudioData) -> None:
         # We could do this manually but SpeechRecognizer provides a nice helper.
         self.recorder.listen_in_background(self.source, record_callback, phrase_time_limit=self.record_timeout)
 
+    def read_complete_audio(self):
+        # Read the transcription.
+        result = self.audio_model.transcribe(self.temp_file, fp16=torch.cuda.is_available())
+        self.transcription = self.result_transcription_handler(result,True)
+        self.show_transcription()
+
     def execute(self):
         is_speaking = False
         while True:
             try:
                 # Pull raw recorded audio from the queue.
                 if not self.data_queue.empty():
+                    self.show_hearing()
                     # If enough time has passed between recordings, consider the phrase complete.
                     # Clear the current working audio buffer to start over with the new data.
                     has_silence_timeout = self.silence_time_is_up()
@@ -102,15 +109,9 @@ def execute(self):
                     # Write wav data to the temporary file as bytes.
                     AudioUtil.write_temp_audio_file(self.temp_file,wav_data)
 
-                    # Read the transcription.
-                    result = self.audio_model.transcribe(self.temp_file, fp16=torch.cuda.is_available())
-                    self.transcription = self.result_transcription_handler(result,has_silence_timeout)
-                    self.show_transcription()
-                        
                 else:
                     if(is_speaking and self.silence_time_is_up()):
-                        self.transcription[-1] = f"[Final]: {self.transcription[-1]}"
-                        self.show_transcription()
+                        self.read_complete_audio()
                         is_speaking = False
 
             except KeyboardInterrupt:
@@ -133,7 +134,7 @@ def silence_time_is_up(self):
 
     def result_transcription_handler(self,result,has_silence_timeout):
         text = result['text'].strip()
-        if(text is None or text is ""): return self.transcription
+        if(text is None or text == ""): return self.transcription
         # If we detected a pause between recordings, add a new item to our transcripion.
         # Otherwise edit the existing one.
         if has_silence_timeout:
@@ -150,6 +151,11 @@ def show_transcription(self):
         # Flush stdout.
         print('', end='', flush=True)
 
+    def show_hearing(self):
+        os.system('cls' if os.name=='nt' else 'clear')
+        print("Escuchando...")
+        print('', end='', flush=True)
+
 
 if __name__ == "__main__":
     speechHandler = SpeechHandler()