Long chinese voice to text with python #2402

alicia2739 · 2024-10-23T08:28:50Z

alicia2739
Oct 23, 2024

When I use whisper to deal long time audio with python, it just return part of the whole audio.
Shorter audio's text is even longer than longer audio's text
Here is my code:

import os
import pandas as pd
import argparse
import whisper
import torch

device = "cuda:0" if torch.cuda.is_available() else "cpu"
print(device)
model = whisper.load_model("base")

获取音频文件夹

def get_voice_folder(source_folder,folder):
folder = os.path.join(source_folder, folder)
voice_folder = os.listdir(folder)
voice_folder = [folder for folder in voice_folder if 'voice' in folder]
voice_folder = voice_folder[0]
voice_folder = os.path.join(folder, voice_folder)

return voice_folder

def convert_voice_to_text(source_folder,folder):
voice_folder = get_voice_folder(source_folder,folder)
voice_filenames = os.listdir(voice_folder)

voice_filenames = [os.path.join(voice_folder, voice_filename) for voice_filename in voice_filenames]
result=[]
for voice_filename in voice_filenames:
    audio = whisper.load_audio(voice_filename)  
    audio = whisper.pad_or_trim(audio)
    options = whisper.DecodingOptions(beam_size=5,prompt="生于忧患，死于欢乐。不亦快哉！")
     
    mel = whisper.log_mel_spectrogram(audio).to(model.device)
    test = whisper.decode(model, mel, options)       
    text = test.text
    text = pd.DataFrame({"voice_text":[text]})
    text['file_path'] = voice_filename
    result.append(text) 
    
result = pd.concat(result)    
return result

def main():

source_folder = '/home.....'

folders = os.listdir(source_folder)

folders = folders[0:1]
    
output_data=[]
for folder in folders:     
    result = convert_voice_to_text(source_folder,folder)
    output_data.append(result)

output_data = pd.concat(output_data)
output_data_name = f'/home2/wangmenghan/PBC_video/PBC_voice_result/voice_text/voice_text_base.csv'
output_data.to_csv(output_data_name,encoding='utf-8-sig',index=False)

if name == "main":
main()

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Long chinese voice to text with python #2402

{{title}}

Replies: 0 comments

Select a reply

Long chinese voice to text with python #2402

alicia2739 Oct 23, 2024

获取音频文件夹

Replies: 0 comments

alicia2739
Oct 23, 2024