From ffc2c2acf3dc794ab6152def001bbd3ef0300bf9 Mon Sep 17 00:00:00 2001 From: XXXXRT666 Date: Mon, 8 Apr 2024 00:46:43 +0100 Subject: [PATCH 1/9] Integrate Fish-Audio's audio preprocess into GPT-SoVits, adding loudness normalization and maximum audio length control. --- requirements.txt | 3 +- tools/loudness_norm.py | 132 +++++++++++ tools/slice_audio.py | 526 +++++++++++++++++++++++++++++++++++++---- tools/slicer2.py | 261 -------------------- webui.py | 110 ++++++--- 5 files changed, 688 insertions(+), 344 deletions(-) create mode 100644 tools/loudness_norm.py delete mode 100644 tools/slicer2.py diff --git a/requirements.txt b/requirements.txt index 73912d017..2d9e9bd11 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,4 +25,5 @@ jieba_fast jieba LangSegment>=0.2.0 Faster_Whisper -wordsegment \ No newline at end of file +wordsegment +pyloudnorm \ No newline at end of file diff --git a/tools/loudness_norm.py b/tools/loudness_norm.py new file mode 100644 index 000000000..e702e736d --- /dev/null +++ b/tools/loudness_norm.py @@ -0,0 +1,132 @@ +# modifiled from https://github.com/fishaudio/audio-preprocess/blob/main/fish_audio_preprocess/cli/loudness_norm.py + +from pathlib import Path +from typing import Union +import numpy as np +import pyloudnorm as pyln +import soundfile as sf +from tqdm import tqdm +from concurrent.futures import ProcessPoolExecutor, as_completed +import argparse +import os + + +def loudness_norm_( + input_dir: str, + peak: float, + loudness: float, + block_size: float, + num_workers: int, +): + """Perform loudness normalization (ITU-R BS.1770-4) on audio files.""" + + if isinstance(input_dir, str): + path = Path(input_dir) + input_dir, output_dir = Path(input_dir), Path(input_dir) + + if not path.exists(): + raise FileNotFoundError(f"Directory {path} does not exist.") + files = ( + [f for f in path.glob("*") if f.is_file() and f.suffix == ".wav"] + ) + + + print(f"Found {len(files)} files, normalizing loudness") + + + with ProcessPoolExecutor(max_workers=num_workers) as executor: + tasks = [] + + for file in tqdm(files, desc="Preparing tasks"): + # Get relative path to input_dir + relative_path = file.relative_to(input_dir) + new_file = output_dir / relative_path + + if new_file.parent.exists() is False: + new_file.parent.mkdir(parents=True) + + tasks.append( + executor.submit( + loudness_norm_file, file, new_file, peak, loudness, block_size + ) + ) + + for i in tqdm(as_completed(tasks), total=len(tasks), desc="Processing"): + assert i.exception() is None, i.exception() + + print("Done!") + + + +def loudness_norm_file( + input_file: Union[str, Path], + output_file: Union[str, Path], + peak=-1.0, + loudness=-23.0, + block_size=0.400, +) -> None: + """ + Perform loudness normalization (ITU-R BS.1770-4) on audio files. + + Args: + input_file: input audio file + output_file: output audio file + peak: peak normalize audio to N dB. Defaults to -1.0. + loudness: loudness normalize audio to N dB LUFS. Defaults to -23.0. + block_size: block size for loudness measurement. Defaults to 0.400. (400 ms) + """ + + # Thanks to .against's feedback + # https://github.com/librosa/librosa/issues/1236 + + input_file, output_file = str(input_file), str(output_file) + + audio, rate = sf.read(input_file) + audio = loudness_norm(audio, rate, peak, loudness, block_size) + sf.write(output_file, audio, rate) + + + + +def loudness_norm( + audio: np.ndarray, rate: int, peak=-1.0, loudness=-23.0, block_size=0.400 +) -> np.ndarray: + """ + Perform loudness normalization (ITU-R BS.1770-4) on audio files. + + Args: + audio: audio data + rate: sample rate + peak: peak normalize audio to N dB. Defaults to -1.0. + loudness: loudness normalize audio to N dB LUFS. Defaults to -23.0. + block_size: block size for loudness measurement. Defaults to 0.400. (400 ms) + + Returns: + loudness normalized audio + """ + + # peak normalize audio to [peak] dB + audio = pyln.normalize.peak(audio, peak) + + # measure the loudness first + meter = pyln.Meter(rate, block_size=block_size) # create BS.1770 meter + _loudness = meter.integrated_loudness(audio) + + return pyln.normalize.loudness(audio, _loudness, loudness) + + + + +parser = argparse.ArgumentParser() +parser.add_argument("-i","--input_dir",help="匹配响度输入文件夹") +parser.add_argument("-l","--loudness",help="响度") +parser.add_argument("-p","--peak",help="响度峰值") +parser.add_argument("-n","--num_worker") +args = parser.parse_args() +input_dir = args.input_dir +loudness = float(args.loudness) +peak = float(args.peak) +num_worker = int(args.num_worker) + +if __name__ == "__main__": + loudness_norm_(input_dir=input_dir,peak=peak,loudness=loudness,block_size=0.4,num_workers=num_worker) \ No newline at end of file diff --git a/tools/slice_audio.py b/tools/slice_audio.py index 46ee408aa..fb6ef413b 100644 --- a/tools/slice_audio.py +++ b/tools/slice_audio.py @@ -1,48 +1,478 @@ -import os,sys,numpy as np -import traceback -from scipy.io import wavfile -# parent_directory = os.path.dirname(os.path.abspath(__file__)) -# sys.path.append(parent_directory) -from my_utils import load_audio -from slicer2 import Slicer - -def slice(inp,opt_root,threshold,min_length,min_interval,hop_size,max_sil_kept,_max,alpha,i_part,all_part): - os.makedirs(opt_root,exist_ok=True) - if os.path.isfile(inp): - input=[inp] - elif os.path.isdir(inp): - input=[os.path.join(inp, name) for name in sorted(list(os.listdir(inp)))] - else: - return "输入路径存在但既不是文件也不是文件夹" - slicer = Slicer( - sr=32000, # 长音频采样率 - threshold= int(threshold), # 音量小于这个值视作静音的备选切割点 - min_length= int(min_length), # 每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值 - min_interval= int(min_interval), # 最短切割间隔 - hop_size= int(hop_size), # 怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好) - max_sil_kept= int(max_sil_kept), # 切完后静音最多留多长 - ) - _max=float(_max) - alpha=float(alpha) - for inp_path in input[int(i_part)::int(all_part)]: - # print(inp_path) - try: - name = os.path.basename(inp_path) - audio = load_audio(inp_path, 32000) - # print(audio.shape) - for chunk, start, end in slicer.slice(audio): # start和end是帧数 - tmp_max = np.abs(chunk).max() - if(tmp_max>1):chunk/=tmp_max - chunk = (chunk / tmp_max * (_max * alpha)) + (1 - alpha) * chunk - wavfile.write( - "%s/%s_%010d_%010d.wav" % (opt_root, name, start, end), - 32000, - # chunk.astype(np.float32), - (chunk * 32767).astype(np.int16), - ) - except: - print(inp_path,"->fail->",traceback.format_exc()) - return "执行完毕,请检查输出文件" - -print(slice(*sys.argv[1:])) - +# modified from https://github.com/fishaudio/audio-preprocess/blob/main/fish_audio_preprocess/cli/slice_audio.py + +from pathlib import Path +from typing import Union, Iterable +from concurrent.futures import ProcessPoolExecutor, as_completed +from tqdm import tqdm +import librosa +import numpy as np +import soundfile as sf +import math +from my_utils import load_audio +import argparse +import os + + +AUDIO_EXTENSIONS = { + ".mp3", + ".wav", + ".flac", + ".ogg", + ".m4a", + ".wma", + ".aac", + ".aiff", + ".aif", + ".aifc", +} + + +def list_files( + path: Union[Path, str], + extensions: set[str] = None, + sort: bool = True, +) -> list[Path]: + """List files in a directory. + + Args: + path (Path): Path to the directory. + extensions (set, optional): Extensions to filter. Defaults to None. + sort (bool, optional): Whether to sort the files. Defaults to True. + + Returns: + list: List of files. + """ + + if isinstance(path, str): + path = Path(path) + + if not path.exists(): + raise FileNotFoundError(f"Directory {path} does not exist.") + + files = [f for f in path.glob("*") if f.is_file()] + + if extensions is not None: + files = [f for f in files if f.suffix in extensions] + + if sort: + files = sorted(files) + + return files + + +def make_dirs(path: Union[Path, str]): + """Make directories. + + Args: + path (Union[Path, str]): Path to the directory. + """ + if isinstance(path, str): + path = Path(path) + + if path.exists(): + print(f"Output directory already exists: {path}") + + path.mkdir(parents=True, exist_ok=True) + + +def slice_audio_v2_( + input_path: str, + output_dir: str, + num_workers: int, + min_duration: float, + max_duration: float, + min_silence_duration: float, + top_db: int, + hop_length: int, + max_silence_kept: float, + merge_short:bool +): + """(OpenVPI version) Slice audio files into smaller chunks by silence.""" + + input_path_, output_dir_ = Path(input_path), Path(output_dir) + if not input_path_.exists(): + raise RuntimeError("You input a wrong audio path that does not exists, please fix it!") + make_dirs(output_dir_) + if input_path_.is_dir(): + files = list_files(input_path_, extensions=AUDIO_EXTENSIONS) + elif input_path_.is_file() and input_path_.suffix in AUDIO_EXTENSIONS: + files = [input_path_] + input_path_ = input_path_.parent + else: + raise RuntimeError("The input path is not file or dir, please fixes it") + print(f"Found {len(files)} files, processing...") + + + with ProcessPoolExecutor(max_workers=num_workers) as executor: + tasks = [] + + for file in tqdm(files, desc="Preparing tasks"): + # Get relative path to input_dir + relative_path = file.relative_to(input_path_) + save_path = output_dir_ / relative_path.parent / relative_path.stem + + tasks.append( + executor.submit( + slice_audio_file_v2, + input_file=str(file), + output_dir=save_path, + min_duration=min_duration, + max_duration=max_duration, + min_silence_duration=min_silence_duration, + top_db=top_db, + hop_length=hop_length, + max_silence_kept=max_silence_kept, + merge_short=merge_short + ) + ) + + for i in tqdm(as_completed(tasks), total=len(tasks), desc="Processing"): + assert i.exception() is None, i.exception() + + print("Done!") + print(f"Total: {len(files)}") + print(f"Output directory: {output_dir}") + + +def slice_audio_file_v2( + input_file: Union[str, Path], + output_dir: Union[str, Path], + min_duration: float = 4.0, + max_duration: float = 12.0, + min_silence_duration: float = 0.3, + top_db: int = -40, + hop_length: int = 10, + max_silence_kept: float = 0.4, + merge_short: bool = False +) -> None: + """ + Slice audio by silence and save to output folder + + Args: + input_file: input audio file + output_dir: output folder + min_duration: minimum duration of each slice + max_duration: maximum duration of each slice + min_silence_duration: minimum duration of silence + top_db: threshold to detect silence + hop_length: hop length to detect silence + max_silence_kept: maximum duration of silence to be kept + """ + + output_dir = Path(output_dir) + + audio = load_audio(str(input_file),32000) + rate = 32000 + for idx, sliced in enumerate( + slice_audio_v2( + audio, + rate, + min_duration=min_duration, + max_duration=max_duration, + min_silence_duration=min_silence_duration, + top_db=top_db, + hop_length=hop_length, + max_silence_kept=max_silence_kept, + merge_short=merge_short + ) + ): + if len(sliced) <= 3*rate: continue + max_audio=np.abs(sliced).max()#防止爆音,懒得搞混合了,后面有响度匹配 + if max_audio>1: + sliced/=max_audio + sf.write(str(output_dir) + f"_{idx:04d}.wav", sliced, rate) + + +def slice_audio_v2( + audio: np.ndarray, + rate: int, + min_duration: float = 4.0, + max_duration: float = 12.0, + min_silence_duration: float = 0.3, + top_db: int = -40, + hop_length: int = 10, + max_silence_kept: float = 0.5, + merge_short: bool = False +) -> Iterable[np.ndarray]: + """Slice audio by silence + + Args: + audio: audio data, in shape (samples, channels) + rate: sample rate + min_duration: minimum duration of each slice + max_duration: maximum duration of each slice + min_silence_duration: minimum duration of silence + top_db: threshold to detect silence + hop_length: hop length to detect silence + max_silence_kept: maximum duration of silence to be kept + merge_short: merge short slices automatically + + Returns: + Iterable of sliced audio + """ + + if len(audio) / rate < min_duration: + sliced_by_max_duration_chunk = slice_by_max_duration(audio, max_duration, rate) + yield from merge_short_chunks( + sliced_by_max_duration_chunk, max_duration, rate + ) if merge_short else sliced_by_max_duration_chunk + return + + slicer = Slicer( + sr=rate, + threshold=top_db, + min_length=min_duration * 1000, + min_interval=min_silence_duration * 1000, + hop_size=hop_length, + max_sil_kept=max_silence_kept * 1000, + ) + + sliced_audio = slicer.slice(audio) + if merge_short: + sliced_audio = merge_short_chunks(sliced_audio, max_duration, rate) + + for chunk in sliced_audio: + sliced_by_max_duration_chunk = slice_by_max_duration(chunk, max_duration, rate) + yield from sliced_by_max_duration_chunk + + +def slice_by_max_duration( + gen: np.ndarray, slice_max_duration: float, rate: int +) -> Iterable[np.ndarray]: + """Slice audio by max duration + + Args: + gen: audio data, in shape (samples, channels) + slice_max_duration: maximum duration of each slice + rate: sample rate + + Returns: + generator of sliced audio data + """ + + if len(gen) > slice_max_duration * rate: + # Evenly split _gen into multiple slices + n_chunks = math.ceil(len(gen) / (slice_max_duration * rate)) + chunk_size = math.ceil(len(gen) / n_chunks) + + for i in range(0, len(gen), chunk_size): + yield gen[i : i + chunk_size] + else: + yield gen + + +class Slicer: + def __init__( + self, + sr: int, + threshold: float = -40.0, + min_length: int = 4000, + min_interval: int = 300, + hop_size: int = 10, + max_sil_kept: int = 5000, + ): + if not min_length >= min_interval >= hop_size: + raise ValueError( + "The following condition must be satisfied: min_length >= min_interval >= hop_size" + ) + + if not max_sil_kept >= hop_size: + raise ValueError( + "The following condition must be satisfied: max_sil_kept >= hop_size" + ) + + min_interval = sr * min_interval / 1000 + self.threshold = 10 ** (threshold / 20.0) + self.hop_size = round(sr * hop_size / 1000) + self.win_size = min(round(min_interval), 4 * self.hop_size) + self.min_length = round(sr * min_length / 1000 / self.hop_size) + self.min_interval = round(min_interval / self.hop_size) + self.max_sil_kept = round(sr * max_sil_kept / 1000 / self.hop_size) + + def _apply_slice(self, waveform, begin, end): + if len(waveform.shape) > 1: + return waveform[ + :, begin * self.hop_size : min(waveform.shape[1], end * self.hop_size) + ] + else: + return waveform[ + begin * self.hop_size : min(waveform.shape[0], end * self.hop_size) + ] + + def slice(self, waveform): + if len(waveform.shape) > 1: + samples = waveform.mean(axis=0) + else: + samples = waveform + + if samples.shape[0] <= self.min_length: + return [waveform] + + rms_list = librosa.feature.rms( + y=samples, frame_length=self.win_size, hop_length=self.hop_size + ).squeeze(0) + sil_tags = [] + silence_start = None + clip_start = 0 + + for i, rms in enumerate(rms_list): + # Keep looping while frame is silent. + if rms < self.threshold: + # Record start of silent frames. + if silence_start is None: + silence_start = i + continue + + # Keep looping while frame is not silent and silence start has not been recorded. + if silence_start is None: + continue + + # Clear recorded silence start if interval is not enough or clip is too short + is_leading_silence = silence_start == 0 and i > self.max_sil_kept + need_slice_middle = ( + i - silence_start >= self.min_interval + and i - clip_start >= self.min_length + ) + + if not is_leading_silence and not need_slice_middle: + silence_start = None + continue + + # Need slicing. Record the range of silent frames to be removed. + if i - silence_start <= self.max_sil_kept: + pos = rms_list[silence_start : i + 1].argmin() + silence_start + + if silence_start == 0: + sil_tags.append((0, pos)) + else: + sil_tags.append((pos, pos)) + + clip_start = pos + elif i - silence_start <= self.max_sil_kept * 2: + pos = rms_list[ + i - self.max_sil_kept : silence_start + self.max_sil_kept + 1 + ].argmin() + pos += i - self.max_sil_kept + pos_l = ( + rms_list[ + silence_start : silence_start + self.max_sil_kept + 1 + ].argmin() + + silence_start + ) + pos_r = ( + rms_list[i - self.max_sil_kept : i + 1].argmin() + + i + - self.max_sil_kept + ) + + if silence_start == 0: + sil_tags.append((0, pos_r)) + clip_start = pos_r + else: + sil_tags.append((min(pos_l, pos), max(pos_r, pos))) + clip_start = max(pos_r, pos) + else: + pos_l = ( + rms_list[ + silence_start : silence_start + self.max_sil_kept + 1 + ].argmin() + + silence_start + ) + pos_r = ( + rms_list[i - self.max_sil_kept : i + 1].argmin() + + i + - self.max_sil_kept + ) + + if silence_start == 0: + sil_tags.append((0, pos_r)) + else: + sil_tags.append((pos_l, pos_r)) + + clip_start = pos_r + silence_start = None + + # Deal with trailing silence. + total_frames = rms_list.shape[0] + if ( + silence_start is not None + and total_frames - silence_start >= self.min_interval + ): + silence_end = min(total_frames, silence_start + self.max_sil_kept) + pos = rms_list[silence_start : silence_end + 1].argmin() + silence_start + sil_tags.append((pos, total_frames + 1)) + + # Apply and return slices. + if len(sil_tags) == 0: + return [waveform] + else: + chunks = [] + + if sil_tags[0][0] > 0: + chunks.append(self._apply_slice(waveform, 0, sil_tags[0][0])) + + for i in range(len(sil_tags) - 1): + chunks.append( + self._apply_slice(waveform, sil_tags[i][1], sil_tags[i + 1][0]) + ) + + if sil_tags[-1][1] < total_frames: + chunks.append( + self._apply_slice(waveform, sil_tags[-1][1], total_frames) + ) + + return chunks + + +def merge_short_chunks(chunks, max_duration, rate): + merged_chunks = [] + buffer, length = [], 0 + + for chunk in chunks: + if length + len(chunk) > max_duration * rate and len(buffer) > 0: + merged_chunks.append(np.concatenate(buffer)) + buffer, length = [], 0 + else: + buffer.append(chunk) + length += len(chunk) + + if len(buffer) > 0: + merged_chunks.append(np.concatenate(buffer)) + + return merged_chunks + + + + + + + + + +parser = argparse.ArgumentParser() +parser.add_argument("-i","--input_dir",help="切割输入文件夹") +parser.add_argument("-o","--output_dir",help="切割输入文件夹") +parser.add_argument("--threshold",default=-40,help="音量小于这个值视作静音的备选切割点") +parser.add_argument("--min_duration",default=4,help="每段最短多长,如果第一段太短一直和后面段连起来直到超过这个值") +parser.add_argument("--max_duration",default=12,help="每段最长多长") +parser.add_argument("--min_interval",default=0.3,help="最短切割间隔") +parser.add_argument("--hop_size",default=10,help="怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好)") +parser.add_argument("--max_sil_kept",default=0.4,help="切完后静音最多留多长") +parser.add_argument("--num_worker",default=os.cpu_count(),help="切使用的进程数") +parser.add_argument("--merge_short",default="False",help="响割使用的进程数") + + +args = parser.parse_args() +input_path = args.input_dir +output_dir = args.output_dir +threshold = float(args.threshold) +min_duration = float(args.min_duration) +max_duration = float(args.max_duration) +min_interval = float(args.min_interval) +hop_size = float(args.hop_size) +max_sil_kept = float(args.max_sil_kept) +num_worker = int(args.num_worker) +merge_short = bool(args.merge_short) + +if __name__ == "__main__": + slice_audio_v2_(input_path, output_dir, num_worker, min_duration, max_duration, min_interval, threshold, hop_size, max_sil_kept,merge_short) diff --git a/tools/slicer2.py b/tools/slicer2.py deleted file mode 100644 index ba6794b63..000000000 --- a/tools/slicer2.py +++ /dev/null @@ -1,261 +0,0 @@ -import numpy as np - - -# This function is obtained from librosa. -def get_rms( - y, - frame_length=2048, - hop_length=512, - pad_mode="constant", -): - padding = (int(frame_length // 2), int(frame_length // 2)) - y = np.pad(y, padding, mode=pad_mode) - - axis = -1 - # put our new within-frame axis at the end for now - out_strides = y.strides + tuple([y.strides[axis]]) - # Reduce the shape on the framing axis - x_shape_trimmed = list(y.shape) - x_shape_trimmed[axis] -= frame_length - 1 - out_shape = tuple(x_shape_trimmed) + tuple([frame_length]) - xw = np.lib.stride_tricks.as_strided(y, shape=out_shape, strides=out_strides) - if axis < 0: - target_axis = axis - 1 - else: - target_axis = axis + 1 - xw = np.moveaxis(xw, -1, target_axis) - # Downsample along the target axis - slices = [slice(None)] * xw.ndim - slices[axis] = slice(0, None, hop_length) - x = xw[tuple(slices)] - - # Calculate power - power = np.mean(np.abs(x) ** 2, axis=-2, keepdims=True) - - return np.sqrt(power) - - -class Slicer: - def __init__( - self, - sr: int, - threshold: float = -40.0, - min_length: int = 5000, - min_interval: int = 300, - hop_size: int = 20, - max_sil_kept: int = 5000, - ): - if not min_length >= min_interval >= hop_size: - raise ValueError( - "The following condition must be satisfied: min_length >= min_interval >= hop_size" - ) - if not max_sil_kept >= hop_size: - raise ValueError( - "The following condition must be satisfied: max_sil_kept >= hop_size" - ) - min_interval = sr * min_interval / 1000 - self.threshold = 10 ** (threshold / 20.0) - self.hop_size = round(sr * hop_size / 1000) - self.win_size = min(round(min_interval), 4 * self.hop_size) - self.min_length = round(sr * min_length / 1000 / self.hop_size) - self.min_interval = round(min_interval / self.hop_size) - self.max_sil_kept = round(sr * max_sil_kept / 1000 / self.hop_size) - - def _apply_slice(self, waveform, begin, end): - if len(waveform.shape) > 1: - return waveform[ - :, begin * self.hop_size : min(waveform.shape[1], end * self.hop_size) - ] - else: - return waveform[ - begin * self.hop_size : min(waveform.shape[0], end * self.hop_size) - ] - - # @timeit - def slice(self, waveform): - if len(waveform.shape) > 1: - samples = waveform.mean(axis=0) - else: - samples = waveform - if samples.shape[0] <= self.min_length: - return [waveform] - rms_list = get_rms( - y=samples, frame_length=self.win_size, hop_length=self.hop_size - ).squeeze(0) - sil_tags = [] - silence_start = None - clip_start = 0 - for i, rms in enumerate(rms_list): - # Keep looping while frame is silent. - if rms < self.threshold: - # Record start of silent frames. - if silence_start is None: - silence_start = i - continue - # Keep looping while frame is not silent and silence start has not been recorded. - if silence_start is None: - continue - # Clear recorded silence start if interval is not enough or clip is too short - is_leading_silence = silence_start == 0 and i > self.max_sil_kept - need_slice_middle = ( - i - silence_start >= self.min_interval - and i - clip_start >= self.min_length - ) - if not is_leading_silence and not need_slice_middle: - silence_start = None - continue - # Need slicing. Record the range of silent frames to be removed. - if i - silence_start <= self.max_sil_kept: - pos = rms_list[silence_start : i + 1].argmin() + silence_start - if silence_start == 0: - sil_tags.append((0, pos)) - else: - sil_tags.append((pos, pos)) - clip_start = pos - elif i - silence_start <= self.max_sil_kept * 2: - pos = rms_list[ - i - self.max_sil_kept : silence_start + self.max_sil_kept + 1 - ].argmin() - pos += i - self.max_sil_kept - pos_l = ( - rms_list[ - silence_start : silence_start + self.max_sil_kept + 1 - ].argmin() - + silence_start - ) - pos_r = ( - rms_list[i - self.max_sil_kept : i + 1].argmin() - + i - - self.max_sil_kept - ) - if silence_start == 0: - sil_tags.append((0, pos_r)) - clip_start = pos_r - else: - sil_tags.append((min(pos_l, pos), max(pos_r, pos))) - clip_start = max(pos_r, pos) - else: - pos_l = ( - rms_list[ - silence_start : silence_start + self.max_sil_kept + 1 - ].argmin() - + silence_start - ) - pos_r = ( - rms_list[i - self.max_sil_kept : i + 1].argmin() - + i - - self.max_sil_kept - ) - if silence_start == 0: - sil_tags.append((0, pos_r)) - else: - sil_tags.append((pos_l, pos_r)) - clip_start = pos_r - silence_start = None - # Deal with trailing silence. - total_frames = rms_list.shape[0] - if ( - silence_start is not None - and total_frames - silence_start >= self.min_interval - ): - silence_end = min(total_frames, silence_start + self.max_sil_kept) - pos = rms_list[silence_start : silence_end + 1].argmin() + silence_start - sil_tags.append((pos, total_frames + 1)) - # Apply and return slices. - ####音频+起始时间+终止时间 - if len(sil_tags) == 0: - return [[waveform,0,int(total_frames*self.hop_size)]] - else: - chunks = [] - if sil_tags[0][0] > 0: - chunks.append([self._apply_slice(waveform, 0, sil_tags[0][0]),0,int(sil_tags[0][0]*self.hop_size)]) - for i in range(len(sil_tags) - 1): - chunks.append( - [self._apply_slice(waveform, sil_tags[i][1], sil_tags[i + 1][0]),int(sil_tags[i][1]*self.hop_size),int(sil_tags[i + 1][0]*self.hop_size)] - ) - if sil_tags[-1][1] < total_frames: - chunks.append( - [self._apply_slice(waveform, sil_tags[-1][1], total_frames),int(sil_tags[-1][1]*self.hop_size),int(total_frames*self.hop_size)] - ) - return chunks - - -def main(): - import os.path - from argparse import ArgumentParser - - import librosa - import soundfile - - parser = ArgumentParser() - parser.add_argument("audio", type=str, help="The audio to be sliced") - parser.add_argument( - "--out", type=str, help="Output directory of the sliced audio clips" - ) - parser.add_argument( - "--db_thresh", - type=float, - required=False, - default=-40, - help="The dB threshold for silence detection", - ) - parser.add_argument( - "--min_length", - type=int, - required=False, - default=5000, - help="The minimum milliseconds required for each sliced audio clip", - ) - parser.add_argument( - "--min_interval", - type=int, - required=False, - default=300, - help="The minimum milliseconds for a silence part to be sliced", - ) - parser.add_argument( - "--hop_size", - type=int, - required=False, - default=10, - help="Frame length in milliseconds", - ) - parser.add_argument( - "--max_sil_kept", - type=int, - required=False, - default=500, - help="The maximum silence length kept around the sliced clip, presented in milliseconds", - ) - args = parser.parse_args() - out = args.out - if out is None: - out = os.path.dirname(os.path.abspath(args.audio)) - audio, sr = librosa.load(args.audio, sr=None, mono=False) - slicer = Slicer( - sr=sr, - threshold=args.db_thresh, - min_length=args.min_length, - min_interval=args.min_interval, - hop_size=args.hop_size, - max_sil_kept=args.max_sil_kept, - ) - chunks = slicer.slice(audio) - if not os.path.exists(out): - os.makedirs(out) - for i, chunk in enumerate(chunks): - if len(chunk.shape) > 1: - chunk = chunk.T - soundfile.write( - os.path.join( - out, - f"%s_%d.wav" - % (os.path.basename(args.audio).rsplit(".", maxsplit=1)[0], i), - ), - chunk, - sr, - ) - - -if __name__ == "__main__": - main() diff --git a/webui.py b/webui.py index e1c36e1ed..40cbcf4f8 100644 --- a/webui.py +++ b/webui.py @@ -119,6 +119,7 @@ def change_choices(): p_uvr5=None p_asr=None p_denoise=None +p_loudness_norm=None p_tts_inference=None def kill_proc_tree(pid, including_parent=True): @@ -246,6 +247,31 @@ def close_denoise(): p_denoise=None return "已终止语音降噪进程",{"__type__":"update","visible":True},{"__type__":"update","visible":False} +def open_loudness_norm(loudness_norm_inp_dir,loudness,peak,num_worker): + global p_loudness_norm + if(p_loudness_norm == None): + loudness_norm_inp_dir=my_utils.clean_path(loudness_norm_inp_dir) + cmd = '"%s" tools/loudness_norm.py -i "%s" -l "%s" -p "%s" -n "%s"'%(python_exec,loudness_norm_inp_dir,loudness,peak,num_worker) + + yield "响度匹配任务开启:%s"%cmd,{"__type__":"update","visible":False},{"__type__":"update","visible":True} + print(cmd) + p_loudness_morm = Popen(cmd, shell=True) + p_loudness_morm.wait() + p_loudness_morm=None + yield f"响度匹配任务完成, 查看终端进行下一步",{"__type__":"update","visible":True},{"__type__":"update","visible":False} + else: + yield "已有正在进行的响度匹配任务,需先终止才能开启下一次任务", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} + + +def close_loudness_norm(): + global p_loudness_norm + if(p_loudness_norm!=None): + try: + kill_process(p_loudness_norm.pid) + except: + traceback.print_exc() + return "已终止响度匹配进程",{"__type__":"update","visible":True},{"__type__":"update","visible":False} + p_train_SoVITS=None def open1Ba(batch_size,total_epoch,exp_name,text_low_lr_rate,if_save_latest,if_save_every_weights,save_every_epoch,gpu_numbers1Ba,pretrained_s2G,pretrained_s2D): global p_train_SoVITS @@ -337,42 +363,56 @@ def close1Bb(): p_train_GPT=None return "已终止GPT训练",{"__type__":"update","visible":True},{"__type__":"update","visible":False} -ps_slice=[] -def open_slice(inp,opt_root,threshold,min_length,min_interval,hop_size,max_sil_kept,_max,alpha,n_parts): - global ps_slice - inp = my_utils.clean_path(inp) - opt_root = my_utils.clean_path(opt_root) - if(os.path.exists(inp)==False): +p_slice=None +def open_slice(input_path, output_dir, num_worker, min_duration, max_duration, min_interval, threshold, hop_size, max_sil_kept, merge_short, loudness_norm,loudness, peak): + global p_slice + input_path = my_utils.clean_path(input_path) + output_dir = my_utils.clean_path(output_dir) + if(os.path.exists(input_path)==False): yield "输入路径不存在",{"__type__":"update","visible":True},{"__type__":"update","visible":False} return - if os.path.isfile(inp):n_parts=1 - elif os.path.isdir(inp):pass + if os.path.isfile(input_path):num_worker=1 + elif os.path.isdir(input_path):pass else: yield "输入路径存在但既不是文件也不是文件夹",{"__type__":"update","visible":True},{"__type__":"update","visible":False} return - if (ps_slice == []): - for i_part in range(n_parts): - cmd = '"%s" tools/slice_audio.py "%s" "%s" %s %s %s %s %s %s %s %s %s''' % (python_exec,inp, opt_root, threshold, min_length, min_interval, hop_size, max_sil_kept, _max, alpha, i_part, n_parts) - print(cmd) - p = Popen(cmd, shell=True) - ps_slice.append(p) + if (p_slice == None): + cmd = f'"{python_exec}" tools/slice_audio.py -i "{input_path}" -o "{output_dir}" --threshold {threshold} --min_duration {min_duration} --max_duration {max_duration} --min_interval {min_interval} --hop_size {hop_size} --max_sil_kept {max_sil_kept} --num_worker {num_worker} --merge_short {merge_short}''' + print(cmd) + p_slice = Popen(cmd, shell=True) yield "切割执行中", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} - for p in ps_slice: - p.wait() - ps_slice=[] + p_slice.wait() + p_slice=None + if loudness_norm: + loudness_norm_inp_dir = output_dir + global p_loudness_norm + if(p_loudness_norm == None): + loudness_norm_inp_dir=my_utils.clean_path(loudness_norm_inp_dir) + cmd = '"%s" tools/loudness_norm.py -i "%s" -l "%s" -p "%s" -n "%s"'%(python_exec,loudness_norm_inp_dir,loudness,peak,num_worker) + print("响度匹配任务开启") + print(cmd) + p_loudness_morm = Popen(cmd, shell=True) + p_loudness_morm.wait() + p_loudness_morm=None + print("响度匹配任务完成, 查看终端进行下一步") + yield "切割结束",{"__type__":"update","visible":True},{"__type__":"update","visible":False} else: yield "已有正在进行的切割任务,需先终止才能开启下一次任务", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} def close_slice(): - global ps_slice - if (ps_slice != []): - for p_slice in ps_slice: - try: - kill_process(p_slice.pid) - except: - traceback.print_exc() - ps_slice=[] + global p_slice + if (p_slice != None): + try: + kill_process(p_slice.pid) + except: + traceback.print_exc() + p_slice=None + if (p_loudness_norm != None): + global p_denoise + if(p_denoise!=None): + kill_process(p_denoise.pid) + p_denoise=None return "已终止所有切割进程", {"__type__": "update", "visible": True}, {"__type__": "update", "visible": False} ps1a=[] @@ -692,16 +732,19 @@ def close1abc(): slice_inp_path=gr.Textbox(label=i18n("音频自动切分输入路径,可文件可文件夹"),value="") slice_opt_root=gr.Textbox(label=i18n("切分后的子音频的输出根目录"),value="output/slicer_opt") threshold=gr.Textbox(label=i18n("threshold:音量小于这个值视作静音的备选切割点"),value="-34") - min_length=gr.Textbox(label=i18n("min_length:每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值"),value="4000") - min_interval=gr.Textbox(label=i18n("min_interval:最短切割间隔"),value="300") + min_duration=gr.Textbox(label=i18n("min_duration:每段最短多长,如果第一段太短一直和后面段连起来直到超过这个值"),value="4.0") + max_duration=gr.Textbox(label=i18n("max_duration:每段最长多长"),value="10.0") + min_interval=gr.Textbox(label=i18n("min_interval:最短切割间隔"),value="0.3") + max_sil_kept=gr.Textbox(label=i18n("max_sil_kept:切完后静音最多留多长"),value="0.5") hop_size=gr.Textbox(label=i18n("hop_size:怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好)"),value="10") - max_sil_kept=gr.Textbox(label=i18n("max_sil_kept:切完后静音最多留多长"),value="500") + if_merge_short = gr.Checkbox(label=i18n("对于过短音频的处理方法,勾选则合并,不勾选则抛弃"),show_label=True) with gr.Row(): + loudness=gr.Textbox(label=i18n("目标响度"),value="-23") + peak=gr.Textbox(label=i18n("峰值响度"),value="-1") + if_loudness_norm = gr.Checkbox(label=i18n("是否匹配响度"),show_label=True,value=True) + num_worker=gr.Slider(minimum=1,maximum=n_cpu,step=1,label=i18n("切割使用的进程数"),value=4,interactive=True) open_slicer_button=gr.Button(i18n("开启语音切割"), variant="primary",visible=True) close_slicer_button=gr.Button(i18n("终止语音切割"), variant="primary",visible=False) - _max=gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("max:归一化后最大值多少"),value=0.9,interactive=True) - alpha=gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("alpha_mix:混多少比例归一化后音频进来"),value=0.25,interactive=True) - n_process=gr.Slider(minimum=1,maximum=n_cpu,step=1,label=i18n("切割使用的进程数"),value=4,interactive=True) slicer_info = gr.Textbox(label=i18n("语音切割进程输出信息")) gr.Markdown(value=i18n("0bb-语音降噪工具")) with gr.Row(): @@ -770,8 +813,7 @@ def change_size_choices(key): # 根据选择的模型修改可选的模型尺寸 if_uvr5.change(change_uvr5, [if_uvr5], [uvr5_info]) open_asr_button.click(open_asr, [asr_inp_dir, asr_opt_dir, asr_model, asr_size, asr_lang], [asr_info,open_asr_button,close_asr_button]) close_asr_button.click(close_asr, [], [asr_info,open_asr_button,close_asr_button]) - open_slicer_button.click(open_slice, [slice_inp_path,slice_opt_root,threshold,min_length,min_interval,hop_size,max_sil_kept,_max,alpha,n_process], [slicer_info,open_slicer_button,close_slicer_button]) - close_slicer_button.click(close_slice, [], [slicer_info,open_slicer_button,close_slicer_button]) + open_slicer_button.click(open_slice, [slice_inp_path, slice_opt_root, num_worker, min_duration, max_duration, min_interval, threshold, hop_size, max_sil_kept,if_merge_short, if_loudness_norm, loudness, peak], [slicer_info,open_slicer_button,close_slicer_button]) open_denoise_button.click(open_denoise, [denoise_input_dir,denoise_output_dir], [denoise_info,open_denoise_button,close_denoise_button]) close_denoise_button.click(close_denoise, [], [denoise_info,open_denoise_button,close_denoise_button]) @@ -785,7 +827,7 @@ def change_size_choices(key): # 根据选择的模型修改可选的模型尺寸 with gr.TabItem(i18n("1A-训练集格式化工具")): gr.Markdown(value=i18n("输出logs/实验名目录下应有23456开头的文件和文件夹")) with gr.Row(): - inp_text = gr.Textbox(label=i18n("*文本标注文件"),value=r"D:\RVC1006\GPT-SoVITS\raw\xxx.list",interactive=True) + inp_text = gr.Textbox(label=i18n("*文本标注文件"),value=r"output/asr_opt/slicer_opt.list",interactive=True) inp_wav_dir = gr.Textbox( label=i18n("*训练集音频文件目录"), # value=r"D:\RVC1006\GPT-SoVITS\raw\xxx", From 3f88bcc0234ca18d5a00b80368846db56366d461 Mon Sep 17 00:00:00 2001 From: XXXXRT666 Date: Mon, 8 Apr 2024 00:52:16 +0100 Subject: [PATCH 2/9] Upgrade the requirements.txt --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 2d9e9bd11..56edbf710 100644 --- a/requirements.txt +++ b/requirements.txt @@ -26,4 +26,4 @@ jieba LangSegment>=0.2.0 Faster_Whisper wordsegment -pyloudnorm \ No newline at end of file +pyloudnorm>=0.1.1 \ No newline at end of file From 1b6b4892494ad01f1c043c6f3137c4eec6b49307 Mon Sep 17 00:00:00 2001 From: XXXXRT666 Date: Mon, 8 Apr 2024 02:06:23 +0100 Subject: [PATCH 3/9] Update Readme and i18n --- README.md | 1 + docs/cn/README.md | 2 +- docs/ja/README.md | 1 + docs/ko/README.md | 1 + i18n/locale/en_US.json | 7 ++++++- i18n/locale/es_ES.json | 7 ++++++- i18n/locale/fr_FR.json | 7 ++++++- i18n/locale/it_IT.json | 7 ++++++- i18n/locale/ja_JP.json | 7 ++++++- i18n/locale/ko_KR.json | 7 ++++++- i18n/locale/pt_BR.json | 7 ++++++- i18n/locale/zh_CN.json | 7 ++++++- 12 files changed, 52 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 11225162c..7c45163c5 100644 --- a/README.md +++ b/README.md @@ -255,6 +255,7 @@ Special thanks to the following projects and contributors: - [gradio](https://github.com/gradio-app/gradio) - [faster-whisper](https://github.com/SYSTRAN/faster-whisper) - [FunASR](https://github.com/alibaba-damo-academy/FunASR) +- [audio-preprocess](https://github.com/fishaudio/audio-preprocess) ## Thanks to all contributors for their efforts diff --git a/docs/cn/README.md b/docs/cn/README.md index 2c48cbc7e..970e71e62 100644 --- a/docs/cn/README.md +++ b/docs/cn/README.md @@ -244,7 +244,7 @@ python ./tools/asr/fasterwhisper_asr.py -i -o -l - [SubFix](https://github.com/cronrpc/SubFix) - [FFmpeg](https://github.com/FFmpeg/FFmpeg) - [gradio](https://github.com/gradio-app/gradio) - +- [audio-preprocess](https://github.com/fishaudio/audio-preprocess) ## 感谢所有贡献者的努力 diff --git a/docs/ja/README.md b/docs/ja/README.md index 02d1b8366..6b480bcc0 100644 --- a/docs/ja/README.md +++ b/docs/ja/README.md @@ -222,6 +222,7 @@ python ./tools/asr/fasterwhisper_asr.py -i -o -l - [SubFix](https://github.com/cronrpc/SubFix) - [FFmpeg](https://github.com/FFmpeg/FFmpeg) - [gradio](https://github.com/gradio-app/gradio) +- [audio-preprocess](https://github.com/fishaudio/audio-preprocess) ## すべてのコントリビューターに感謝します diff --git a/docs/ko/README.md b/docs/ko/README.md index 57696f1df..bdbea7929 100644 --- a/docs/ko/README.md +++ b/docs/ko/README.md @@ -226,6 +226,7 @@ python ./tools/asr/fasterwhisper_asr.py -i -o -l - [SubFix](https://github.com/cronrpc/SubFix) - [FFmpeg](https://github.com/FFmpeg/FFmpeg) - [gradio](https://github.com/gradio-app/gradio) +- [audio-preprocess](https://github.com/fishaudio/audio-preprocess) ## 모든 기여자들에게 감사드립니다 ;) diff --git a/i18n/locale/en_US.json b/i18n/locale/en_US.json index 292a915c1..004c54120 100644 --- a/i18n/locale/en_US.json +++ b/i18n/locale/en_US.json @@ -31,10 +31,15 @@ "凑四句一切": "Slice once every 4 sentences", "按英文句号.切": "Slice by English punct", "threshold:音量小于这个值视作静音的备选切割点": "Noise gate threshold (loudness below this value will be treated as noise", - "min_length:每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值": "Minimum length", + "min_duration:每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值": "min_duration: The minimum length for each segment. If the first segment is too short, it will be combined with subsequent segments until it exceeds this value.", + "max_duration:每段最长多长": "Maxium duration", "min_interval:最短切割间隔": "Minumum interval for audio cutting", "hop_size:怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好)": "hop_size: FO hop size, the smaller the value, the higher the accuracy)", "max_sil_kept:切完后静音最多留多长": "Maximum length for silence to be kept", + "对于过短音频的处理方法,勾选则合并,不勾选则抛弃": "Check to enable the short audio pieces", + "目标响度": "Target Loudness", + "峰值响度": "Peak Loudness", + "是否匹配响度": "Check to enable loudness normalization", "开启语音切割": "Start audio slicer", "终止语音切割": "Stop audio cutting", "max:归一化后最大值多少": "Loudness multiplier after normalized", diff --git a/i18n/locale/es_ES.json b/i18n/locale/es_ES.json index 3bcd2a38a..678553d8f 100644 --- a/i18n/locale/es_ES.json +++ b/i18n/locale/es_ES.json @@ -19,10 +19,15 @@ "凑四句一切": "Completa cuatro oraciones para rellenar todo", "按英文句号.切": "Cortar por puntos en inglés.", "threshold:音量小于这个值视作静音的备选切割点": "umbral: puntos de corte alternativos considerados como silencio si el volumen es menor que este valor", - "min_length:每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值": "min_length: duración mínima de cada segmento, si el primer segmento es demasiado corto, se conecta continuamente con los siguientes hasta que supera este valor", + "min_duration:每段最短多长,如果第一段太短一直和后面段连起来直到超过这个值": "min_duration: Duración mínima de cada segmento. Si el primer segmento es demasiado corto, se combinará con los siguientes segmentos hasta superar este valor.", + "max_duration:每段最长多长": "max_duration: Duración máxima", "min_interval:最短切割间隔": "min_interval: intervalo mínimo de corte", "hop_size:怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好)": "hop_size: cómo calcular la curva de volumen, cuanto más pequeño, mayor precisión pero mayor carga computacional (mayor precisión no significa mejor rendimiento)", "max_sil_kept:切完后静音最多留多长": "max_sil_kept: duración máxima del silencio después del corte", + "对于过短音频的处理方法,勾选则合并,不勾选则抛弃": "Marcar para activar la fusión de clips de audio cortos", + "目标响度": "Loudness objetivo", + "峰值响度": "Loudness de pico", + "是否匹配响度": "Marcar para activar la correspondencia de loudness", "开启语音切割": "Habilitar la división de voz", "终止语音切割": "Terminar la división de voz", "max:归一化后最大值多少": "max: valor máximo después de la normalización", diff --git a/i18n/locale/fr_FR.json b/i18n/locale/fr_FR.json index 99d83aca2..de4e145fc 100644 --- a/i18n/locale/fr_FR.json +++ b/i18n/locale/fr_FR.json @@ -19,10 +19,15 @@ "凑四句一切": "Composez quatre phrases pour tout remplir", "按英文句号.切": "Découpez par des points en anglais", "threshold:音量小于这个值视作静音的备选切割点": "seuil: le volume inférieur à cette valeur est considéré comme un point de coupe silencieux alternatif", - "min_length:每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值": "min_length: longueur minimale de chaque segment, si le premier segment est trop court, il est continué avec le segment suivant jusqu'à dépasser cette valeur", + "min_furation:每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值": "min_duration : Durée minimale de chaque segment. Si le premier segment est trop court, il sera combiné avec les segments suivants jusqu'à dépasser cette valeur.", + "max_duration:每段最长多长": "Maximum duration: Durée maximale", "min_interval:最短切割间隔": "min_interval: intervalle de coupe minimum", "hop_size:怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好)": "hop_size: comment calculer la courbe de volume, plus petit pour une précision plus élevée mais une charge de calcul plus élevée (ce n'est pas une meilleure précision)", "max_sil_kept:切完后静音最多留多长": "max_sil_kept: durée maximale de silence après la coupe", + "对于过短音频的处理方法,勾选则合并,不勾选则抛弃": "Cocher pour activer la fusion des clips audio courts", + "目标响度": "Loudness cible", + "峰值响度": "Loudness de crête", + "是否匹配响度": "Cocher pour activer l'ajustement de loudness", "开启语音切割": "Activer le découpage vocal", "终止语音切割": "Arrêter le découpage vocal", "max:归一化后最大值多少": "max: valeur maximale après normalisation", diff --git a/i18n/locale/it_IT.json b/i18n/locale/it_IT.json index f34395af4..77d118ec8 100644 --- a/i18n/locale/it_IT.json +++ b/i18n/locale/it_IT.json @@ -11,10 +11,15 @@ "音频自动切分输入路径,可文件可文件夹": "Percorso di input per la segmentazione automatica dell'audio, può essere un file o una cartella", "切分后的子音频的输出根目录": "Directory radice di output per gli audio segmentati", "threshold:音量小于这个值视作静音的备选切割点": "threshold: Punto di taglio alternativo considerato silenzioso se il volume è inferiore a questo valore", - "min_length:每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值": "min_length: Lunghezza minima di ogni segmento. Se il primo segmento è troppo corto, verrà unito agli segmenti successivi fino a superare questo valore", + "min_duration:每段最短多长,如果第一段太短一直和后面段连起来直到超过这个值": "min_duration: Quanto breve può essere ciascun segmento, se il primo segmento è troppo breve, continuerà ad essere unito ai segmenti successivi fino a superare questo valore.", + "max_duration:每段最长多长": "Maximum duration: Durata massima", "min_interval:最短切割间隔": "min_interval: Intervallo minimo di taglio", "hop_size:怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好)": "hop_size: Come calcolare la curva del volume. Più piccolo è, maggiore è la precisione ma aumenta la complessità computazionale (non significa che una maggiore precisione dà risultati migliori)", "max_sil_kept:切完后静音最多留多长": "max_sil_kept: Massima durata del silenzio dopo il taglio", + "对于过短音频的处理方法,勾选则合并,不勾选则抛弃": "Seleziona per abilitare l'unione di clip audio brevi", + "目标响度": "Loudness obiettivo", + "峰值响度": "Loudness di picco", + "是否匹配响度": "Seleziona per abilitare il matching di loudness", "开启语音切割": "Attivare la segmentazione vocale", "终止语音切割": "Terminare la segmentazione vocale", "max:归一化后最大值多少": "max: Massimo valore dopo la normalizzazione", diff --git a/i18n/locale/ja_JP.json b/i18n/locale/ja_JP.json index 6b3be12b3..015c198ad 100644 --- a/i18n/locale/ja_JP.json +++ b/i18n/locale/ja_JP.json @@ -19,10 +19,15 @@ "凑四句一切": "4つの文で埋める", "按英文句号.切": "英文のピリオドで切ってください", "threshold:音量小于这个值视作静音的备选切割点": "閾値:この値未満の音量は静音と見なされ、代替のカットポイントとして扱われます", - "min_length:每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值": "min_length:各セグメントの最小長さ。最初のセグメントが短すぎる場合、連続して後続のセグメントに接続され、この値を超えるまで続きます。", + "min_duration:每段最短多长,如果第一段太短一直和后面段连起来直到超过这个值": "min_duration: 各セグメントの最短長さ。最初のセグメントが短すぎる場合は、この値を超えるまで次のセグメントと連続して結合されます。", + "max_duration:每段最长多长": "maximum duration: 最大持続時間", "min_interval:最短切割间隔": "min_interval:最短カット間隔", "hop_size:怎么算音量曲线,越小精度越大計算量越高(不是精度越大效果越好)": "hop_size:音量曲線を計算する方法。値が小さいほど精度が高くなり、計算量が増加します(精度が高いほど効果が良いわけではありません)。", "max_sil_kept:切完后静音最多留多长": "max_sil_kept:切り終えた後、最大でどれだけ静かにするか", + "对于过短音频的处理方法,勾选则合并,不勾选则抛弃": "对短いオーディオクリップのマージを有効にするためにチェック", + "目标响度": "目標ラウドネス", + "峰值响度": "ピークラウドネス", + "是否匹配响度": "ラウドネスマッチングを有効にするためにチェック", "开启语音切割": "音声の分割を開始", "终止语音切割": "音声の分割を停止", "max:归一化后最大值多少": "max:正規化後の最大値", diff --git a/i18n/locale/ko_KR.json b/i18n/locale/ko_KR.json index 1898c9b9f..4ead0fd1d 100644 --- a/i18n/locale/ko_KR.json +++ b/i18n/locale/ko_KR.json @@ -19,10 +19,15 @@ "凑四句一切": "네 문장의 세트를 완성하세요.", "按英文句号.切": "영어 문장으로 분리하기", "threshold:音量小于这个值视作静音的备选切割点": "임계 값: 이 값보다 작은 볼륨은 대체 분리 지점으로 간주됩니다.", - "min_length:每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值": "최소 길이: 각 세그먼트의 최소 길이. 첫 번째 세그먼트가 너무 짧으면 계속해서 뒷부분과 연결하여 이 값 이상이 될 때까지", + "min_duration:每段最短多长,如果第一段太短一直和后面段连起来直到超过这个值": "min_duration: 각 세그먼트의 최소 길이가 얼마인지, 첫 번째 세그먼트가 너무 짧다면 이 값을 초과할 때까지 계속해서 다음 세그먼트와 결합됩니다.", + "max_duration:每段最长多长": "Maximum duration: 최대 지속 시간", "min_interval:最短切割间隔": "최소 분리 간격", "hop_size:怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好)": "hop 크기: 볼륨 곡선을 계산하는 방법. 작을수록 정확도가 높아지지만 계산량이 높아집니다 (정확도가 높다고 효과가 좋아지지 않음)", "max_sil_kept:切完后静音最多留多长": "최대 유지되는 정적 길이 (분리 후)", + "对于过短音频的处理方法,勾选则合并,不勾选则抛弃": "짧은 오디오 클립 합치기 활성화 체크", + "目标响度": "목표 라우드니스", + "峰值响度": "피크 라우드니스", + "是否匹配响度": "라우드니스 매칭 활성화 체크", "开启语音切割": "음성 분리 활성화", "终止语音切割": "음성 분리 종료", "max:归一化后最大值多少": "최대 값 (정규화 후)", diff --git a/i18n/locale/pt_BR.json b/i18n/locale/pt_BR.json index 9a7cc935c..5b520bcf2 100644 --- a/i18n/locale/pt_BR.json +++ b/i18n/locale/pt_BR.json @@ -11,10 +11,15 @@ "音频自动切分输入路径,可文件可文件夹": "Caminho de entrada automático de corte de áudio, pode ser um arquivo ou uma pasta", "切分后的子音频的输出根目录": "Diretório raiz de saída do sub-áudio após o corte", "threshold:音量小于这个值视作静音的备选切割点": "Limiar: O volume menor que este valor é considerado como um ponto de corte mudo alternativo", - "min_length:每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值": "min_length: O comprimento mínimo de cada parágrafo, se o primeiro for muito curto, conecte-o continuamente aos próximos até ultrapassar este valor", + "min_duration:每段最短多长,如果第一段太短一直和后面段连起来直到超过这个值": "min_duration: Duração mínima de cada segmento. Se o primeiro segmento for muito curto, ele será combinado com os segmentos seguintes até ultrapassar esse valor.", + "max_duration:每段最长多长": "Maximum duration: Duração máxima", "min_interval:最短切割间隔": "min_interval: O intervalo de corte mínimo", "hop_size:怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好)": "HOP_SIZE: Como calcular a curva de volume, quanto menor a precisão, maior a quantidade de cálculos (não significa que quanto maior a precisão, melhor o efeito)", "max_sil_kept:切完后静音最多留多长": "max_sil_kept: Depois de cortar, por quanto tempo no máximo o silêncio é mantido", + "对于过短音频的处理方法,勾选则合并,不勾选则抛弃": "Marcar para ativar a união de clipes de áudio curtos", + "目标响度": "Loudness alvo", + "峰值响度": "Loudness de pico", + "是否匹配响度": "Marcar para ativar o ajuste de loudness", "开启语音切割": "Ativar corte de voz", "终止语音切割": "Encerrar corte de voz", "max:归一化后最大值多少": "MAX: Qual é o valor máximo após a normalização?", diff --git a/i18n/locale/zh_CN.json b/i18n/locale/zh_CN.json index e6639c51e..aa9a6f349 100644 --- a/i18n/locale/zh_CN.json +++ b/i18n/locale/zh_CN.json @@ -31,10 +31,15 @@ "凑四句一切": "凑四句一切", "按英文句号.切": "按英文句号.切", "threshold:音量小于这个值视作静音的备选切割点": "threshold:音量小于这个值视作静音的备选切割点", - "min_length:每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值": "min_length:每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值", + "min_duration:每段最短多长,如果第一段太短一直和后面段连起来直到超过这个值": "min_duration:每段最短多长,如果第一段太短一直和后面段连起来直到超过这个值", + "max_duration:每段最长多长": "max_duration:每段最长多长", "min_interval:最短切割间隔": "min_interval:最短切割间隔", "hop_size:怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好)": "hop_size:怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好)", "max_sil_kept:切完后静音最多留多长": "max_sil_kept:切完后静音最多留多长", + "对于过短音频的处理方法,勾选则合并,不勾选则抛弃": "对于过短音频的处理方法,勾选则合并,不勾选则抛弃", + "目标响度": "目标响度", + "峰值响度": "峰值响度", + "是否匹配响度": "是否匹配响度", "开启语音切割": "开启语音切割", "终止语音切割": "终止语音切割", "max:归一化后最大值多少": "max:归一化后最大值多少", From a01557eb3fd5f1c540bcb9173e74b4a22538ce66 Mon Sep 17 00:00:00 2001 From: XXXXRT666 Date: Mon, 8 Apr 2024 16:00:09 +0100 Subject: [PATCH 4/9] fixed --- webui.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/webui.py b/webui.py index 40cbcf4f8..a44099883 100644 --- a/webui.py +++ b/webui.py @@ -377,7 +377,7 @@ def open_slice(input_path, output_dir, num_worker, min_duration, max_duration, m yield "输入路径存在但既不是文件也不是文件夹",{"__type__":"update","visible":True},{"__type__":"update","visible":False} return if (p_slice == None): - cmd = f'"{python_exec}" tools/slice_audio.py -i "{input_path}" -o "{output_dir}" --threshold {threshold} --min_duration {min_duration} --max_duration {max_duration} --min_interval {min_interval} --hop_size {hop_size} --max_sil_kept {max_sil_kept} --num_worker {num_worker} --merge_short {merge_short}''' + cmd = '"%s" tools/slice_audio.py -i "%s" -o "%s" --threshold %s --min_duration %s --max_duration %s --min_interval %s --hop_size %s --max_sil_kept %s --num_worker %s --merge_short %s'%(python_exec, input_path, output_dir, threshold, min_duration, max_duration, min_interval, hop_size, max_sil_kept, num_worker, merge_short) print(cmd) p_slice = Popen(cmd, shell=True) yield "切割执行中", {"__type__": "update", "visible": False}, {"__type__": "update", "visible": True} @@ -814,6 +814,7 @@ def change_size_choices(key): # 根据选择的模型修改可选的模型尺寸 open_asr_button.click(open_asr, [asr_inp_dir, asr_opt_dir, asr_model, asr_size, asr_lang], [asr_info,open_asr_button,close_asr_button]) close_asr_button.click(close_asr, [], [asr_info,open_asr_button,close_asr_button]) open_slicer_button.click(open_slice, [slice_inp_path, slice_opt_root, num_worker, min_duration, max_duration, min_interval, threshold, hop_size, max_sil_kept,if_merge_short, if_loudness_norm, loudness, peak], [slicer_info,open_slicer_button,close_slicer_button]) + close_slicer_button.click(close_slice, [], [slicer_info,open_slicer_button,close_slicer_button]) open_denoise_button.click(open_denoise, [denoise_input_dir,denoise_output_dir], [denoise_info,open_denoise_button,close_denoise_button]) close_denoise_button.click(close_denoise, [], [denoise_info,open_denoise_button,close_denoise_button]) From 988a381084cc13aa00ba78ba221acfdb42cb7de0 Mon Sep 17 00:00:00 2001 From: XXXXRT666 Date: Mon, 8 Apr 2024 19:34:45 +0100 Subject: [PATCH 5/9] Update i18n json files --- i18n/locale/ru_RU.json | 187 +++++++++++++++++++++++++++++++++++++++-- i18n/locale/tr_TR.json | 185 ++++++++++++++++++++++++++++++++++++++-- i18n/locale/zh_HK.json | 185 ++++++++++++++++++++++++++++++++++++++-- i18n/locale/zh_SG.json | 185 ++++++++++++++++++++++++++++++++++++++-- i18n/locale/zh_TW.json | 185 ++++++++++++++++++++++++++++++++++++++-- webui.py | 2 +- 6 files changed, 887 insertions(+), 42 deletions(-) diff --git a/i18n/locale/ru_RU.json b/i18n/locale/ru_RU.json index f01bc8f09..6c816be28 100644 --- a/i18n/locale/ru_RU.json +++ b/i18n/locale/ru_RU.json @@ -1,4 +1,181 @@ -{ +{ + "很遗憾您这没有能用的显卡来支持您训练": "К сожалению, у вас нет подходящей видеокарты для поддержки вашей тренировки", + "UVR5已开启": "UVR5 включен", + "UVR5已关闭": "UVR5 выключен", + "输入文件夹路径": "Введите путь к папке", + "输出文件夹路径": "Путь к папке для вывода", + "ASR 模型": "Модель ASR", + "ASR 模型尺寸": "Размер модели ASR", + "ASR 语言设置": "Настройки языка ASR", + "模型切换": "Переключение модели", + "是否开启dpo训练选项(实验性)": "Включить опцию тренировки dpo (экспериментально)", + "开启无参考文本模式。不填参考文本亦相当于开启。": "Включить режим без референтного текста. Не заполняя референтный текст, вы также включаете этот режим.", + "使用无参考文本模式时建议使用微调的GPT": "При использовании режима без референтного текста рекомендуется использовать тонко настроенный GPT", + "后续将支持转音素、手工修改音素、语音合成分步执行。": "В будущем будет поддерживаться преобразование в фонемы, ручная коррекция фонем, пошаговая синтезация речи.", + "gpt采样参数(无参考文本时不要太低):": "Параметры выборки GPT (не слишком низкие, когда нет референтного текста):", + "按标点符号切": "Разрезать по пунктуационным знакам", + "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.
如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.": "Это программное обеспечение открыто по лицензии MIT, автор не имеет никакого контроля над программным обеспечением, пользователи программного обеспечения и те, кто распространяет звуки, экспортированные программным обеспечением, несут полную ответственность.
Если вы не согласны с этими условиями, вы не можете использовать или ссылаться на любой код и файлы в пакете программного обеспечения. Смотрите LICENSE в корневом каталоге.", + "0-前置数据集获取工具": "0-Инструмент для получения предварительного набора данных", + "0a-UVR5人声伴奏分离&去混响去延迟工具": "0a-Инструмент для разделения вокала и аккомпанемента UVR5 & устранения реверберации и задержек", + "是否开启UVR5-WebUI": "Включить UVR5-WebUI", + "UVR5进程输出信息": "Вывод информации процесса UVR5", + "0b-语音切分工具": "0b-Инструмент для разделения речи", + ".list标注文件的路径": "Путь к файлу аннотации .list", + "GPT模型列表": "Список моделей GPT", + "SoVITS模型列表": "Список моделей SoVITS", + "填切割后音频所在目录!读取的音频文件完整路径=该目录-拼接-list文件里波形对应的文件名(不是全路径)。": "Заполните каталог, где находятся аудиофайлы после нарезки! Полный путь к читаемому аудиофайлу = этот каталог + имя файла волновой формы в файле .list (не полный путь).", + "音频自动切分输入路径,可文件可文件夹": "Путь ввода для автоматического разделения аудио, может быть файлом или папкой", + "切分后的子音频的输出根目录": "Корневой каталог вывода для подаудио после разделения", + "怎么切": "Как разрезать", + "不切": "Не разрезать", + "凑四句一切": "Собрать четыре предложения и разрезать", + "按英文句号.切": "Разрезать по английской точке.", + "threshold:音量小于这个值视作静音的备选切割点": "threshold:Значение громкости ниже этого считается тишиной для альтернативной точки разреза", + "min_duration:每段最短多长,如果第一段太短一直和后面段连起来直到超过这个值": "min_duration:Минимальная длительность каждого сегмента, если первый сегмент слишком короткий, он объединяется со следующими сегментами до превышения этого значения", + "max_duration:每段最长多长": "max_duration:Максимальная длительность каждого сегмента", + "min_interval:最短切割间隔": "min_interval:Минимальный интервал разреза", + "hop_size:怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好)": "hop_size:Как рассчитывается кривая громкости, чем меньше, тем выше точность и больше вычислительная нагрузка (большая точность не всегда означает лучший результат)", + "max_sil_kept:切完后静音最多留多长": "max_sil_kept:Максимальная длительность тишины после разреза", + "对于过短音频的处理方法,勾选则合并,不勾选则抛弃": "Метод обработки слишком коротких аудио, галочка означает объединение, без галочки — отбрасывание", + "目标响度": "Целевая громкость", + "峰值响度": "Пиковая громкость", + "是否匹配响度": "Соответствие громкости", + "开启语音切割": "Включить разрезание речи", + "终止语音切割": "Прекратить разрезание речи", + "max:归一化后最大值多少": "max:Максимальное значение после нормализации", + "alpha_mix:混多少比例归一化后音频进来": "alpha_mix:Какая доля нормализованного аудио смешивается", + "切割使用的进程数": "Количество процессов, используемых для разрезания", + "语音切割进程输出信息": "Информация о процессе разрезания речи", + "0c-中文批量离线ASR工具": "0c-Инструмент для пакетной офлайн ASR на китайском", + "开启离线批量ASR": "Включить пакетную офлайн ASR", + "终止ASR进程": "Прекратить процесс ASR", + "批量ASR(中文only)输入文件夹路径": "Путь к папке ввода для пакетной ASR (только китайский)", + "ASR进程输出信息": "Информация о процессе ASR", + "0d-语音文本校对标注工具": "0d-Инструмент для коррекции и аннотации текста речи", + "是否开启打标WebUI": "Включить интерфейс веб-аннотации", + "打标数据标注文件路径": "Путь к файлу аннотации данных", + "打标工具进程输出信息": "Информация о процессе аннотации", + "1-GPT-SoVITS-TTS": "1-GPT-SoVITS-TTS", + "*实验/模型名": "*Название эксперимента/модели", + "显卡信息": "Информация о видеокарте", + "预训练的SoVITS-G模型路径": "Путь к предварительно обученной модели SoVITS-G", + "预训练的SoVITS-D模型路径": "Путь к предварительно обученной модели SoVITS-D", + "预训练的GPT模型路径": "Путь к предварительно обученной модели GPT", + "1A-训练集格式化工具": "1A-Инструмент для форматирования обучающего набора", + "输出logs/实验名目录下应有23456开头的文件和文件夹": "В директории logs/имя_эксперимента должны быть файлы и папки, начинающиеся с 23456", + "*文本标注文件": "*Файл текстовой аннотации", + "*训练集音频文件目录": "*Директория аудиофайлов обучающего набора", + "训练集音频文件目录 拼接 list文件里波形对应的文件名。": "Директория аудиофайлов обучающего набора соединяется с именами файлов волновой формы в файле list.", + "1Aa-文本内容": "1Aa-Содержание текста", + "GPU卡号以-分割,每个卡号一个进程": "Номера GPU разделяются дефисом, на каждый номер отдельный процесс", + "预训练的中文BERT模型路径": "Путь к предварительно обученной китайской модели BERT", + "开启文本获取": "Включить получение текста", + "终止文本获取进程": "Прекратить процесс получения текста", + "文本进程输出信息": "Информация о процессе обработки текста", + "1Ab-SSL自监督特征提取": "1Ab-Самоконтролируемое извлечение признаков SSL", + "预训练的SSL模型路径": "Путь к предварительно обученной модели SSL", + "开启SSL提取": "Включить извлечение SSL", + "终止SSL提取进程": "Прекратить процесс извлечения SSL", + "SSL进程输出信息": "Информация о процессе SSL", + "1Ac-语义token提取": "1Ac-Извлечение семантических токенов", + "开启语义token提取": "Включить извлечение семантических токенов", + "终止语义token提取进程": "Прекратить процесс извлечения семантических токенов", + "语义token提取进程输出信息": "Информация о процессе извлечения семантических токенов", + "1Aabc-训练集格式化一键三连": "1Aabc-Форматирование обучающего набора одним нажатием", + "开启一键三连": "Включить одно нажатие", + "终止一键三连": "Прекратить одно нажатие", + "一键三连进程输出信息": "Информация о процессе одного нажатия", + "1B-微调训练": "1B-Дообучение", + "1Ba-SoVITS训练。用于分享的模型文件输出在SoVITS_weights下。": "1Ba-Обучение SoVITS. Файлы моделей для распространения находятся в SoVITS_weights.", + "每张显卡的batch_size": "Размер пакета для каждой видеокарты", + "总训练轮数total_epoch,不建议太高": "Общее количество эпох обучения total_epoch, не рекомендуется слишком высокое", + "文本模块学习率权重": "Веса скорости обучения текстового модуля", + "保存频率save_every_epoch": "Частота сохранения save_every_epoch", + "是否仅保存最新的ckpt файлы для экономии места на диске": "Сохранять только последние файлы ckpt для экономии дискового пространства", + "是否在 каждом этапе сохранения сохранять конечную модель в папку weights": "Сохранять ли конечную модель в папку weights при каждом этапе сохранения", + "开启SoVITS训练": "Включить обучение SoVITS", + "终止SoVITS训练": "Прекратить обучение SoVITS", + "SoVITS训练进程输出信息": "Информация о процессе обучения SoVITS", + "1Bb-GPT训练。用于分享的 модель файлы выводятся в GPT_weights.": "1Bb-Обучение GPT. Файлы моделей для распространения находятся в GPT_weights.", + "总训练轮数total_epoch": "Общее количество эпох обучения total_epoch", + "开启GPT训练": "Включить обучение GPT", + "终止GPT训练": "Прекратить обучение GPT", + "GPT训练进程输出信息": "Информация о процессе обучения GPT", + "1C-推理": "1C-Инференс", + "选择训练完存放在SoVITS_weights и GPT_weights под модели. Умолчанию одна является базовой моделью, используется для тестирования Zero Shot TTS в течение 5 секунд.": "Выберите модели, размещенные в SoVITS_weights и GPT_weights после обучения. По умолчанию одна из них является базовой, используется для тестирования Zero Shot TTS в течение 5 секунд.", + "*GPT模型列表": "*Список моделей GPT", + "*SoVITS模型列表": "*Список моделей SoVITS", + "GPU卡号, только одно целое число": "Номер GPU, только одно целое число", + "刷新 модель пути": "Обновить путь модели", + "是否开启TTS推理WebUI": "Включить TTS инференс WebUI", + "TTS推理WebUI进程输出信息": "Информация о процессе TTS инференса WebUI", + "2-GPT-SoVITS-变声": "2-GPT-SoVITS-переозвучивание", + "施工中,请静候佳音": "В разработке, ожидайте хороших новостей", + "参考音频在3~10秒范围外,请更换!": "Референтное аудио вне диапазона 3~10 секунд, пожалуйста, замените!", + "请上传3~10秒内参考音频,超过会报错!": "Пожалуйста, загрузите референтное аудио длительностью от 3 до 10 секунд, иначе будет ошибка!", + "TTS推理进程已开启": "Процесс TTS-инференции запущен", + "TTS推理进程已关闭": "Процесс TTS-инференции остановлен", + "打标工具WebUI已开启": "WebUI инструмента маркировки запущен", + "打标工具WebUI已关闭": "WebUI инструмента маркировки остановлен", + "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. 如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.": "Это программное обеспечение распространяется под лицензией MIT, автор не имеет никакого контроля над программным обеспечением, пользователи программного обеспечения и те, кто распространяет звук, экспортированный программным обеспечением, несут полную ответственность. Если вы не согласны с этими условиями, вы не можете использовать или ссылаться на любой код или файл в пакете программного обеспечения. См. LICENSE в корневом каталоге.", + "*请上传并填写参考信息": "*Пожалуйста, загрузите и заполните референтные данные", + "*请填写需要合成的目标文本。中英混合选中文,日英混合选日文,中日混合暂不支持,非目标语言文本自动遗弃。": "*Пожалуйста, введите текст цели для синтеза. Для смешанных китайско-английских текстов выберите китайский, для смешанных японско-английских текстов выберите японский, смешанные китайско-японские тексты пока не поддерживаются, тексты на нецелевых языках будут автоматически отброшены.", + "ASR任务开启:%s": "Задача ASR запущена: %s", + "GPT训练完成": "Тренировка GPT завершена", + "GPT训练开始:%s": "Тренировка GPT начата: %s", + "SSL提取进程执行中": "Процесс извлечения SSL выполняется", + "SSL提取进程结束": "Процесс извлечения SSL завершен", + "SoVITS训练完成": "Тренировка SoVITS завершена", + "SoVITS训练开始:%s": "Тренировка SoVITS начата: %s", + "一键三连中途报错": "Ошибка в процессе одного клика", + "一键三连进程结束": "Процесс одного клика завершен", + "中文": "Китайский", + "凑50字一切": "Соберите все в 50 символов", + "凑五句一切": "Соберите все в пять предложений", + "切分后文本": "Текст после разделения", + "切割执行中": "Выполняется разрезание", + "切割结束": "Разрезание завершено", + "参考音频的文本": "Текст референтного аудио", + "参考音频的语种": "Язык референтного аудио", + "合成语音": "Синтезированный голос", + "后续将支持混合语种编码文本输入。": "Впоследствии будет поддерживаться ввод текста с кодировкой смешанных языков.", + "已有正在进行的ASR任务,需先终止才能开启下一次任务": "Существует выполняемая задача ASR, необходимо сначала завершить ее, прежде чем начать следующую задачу", + "已有正在进行的GPT训练任务,需先终止才能开启下一次任务": "Существует выполняемая задача тренировки GPT, необходимо сначала завершить ее, прежде чем начать следующую задачу", + "已有正在进行的SSL提取任务,需先终止才能开启下一次任务": "Существует выполняемая задача извлечения SSL, необходимо сначала завершить ее, прежде чем начать следующую задачу", + "已有正在进行的SoVITS训练任务,需先终止才能开启下一次任务": "Существует выполняемая задача тренировки SoVITS, необходимо сначала завершить ее, прежде чем начать следующую задачу", + "已有正在进行的一键三连任务,需先终止才能开启下一次任务": "Существует выполняемая задача одного клика, необходимо сначала завершить ее, прежде чем начать следующую задачу", + "已有正在进行的切割任务,需先终止才能开启下一次任务": "Существует выполняемая задача разрезания, необходимо сначала завершить ее, прежде чем начать следующую задачу", + "已有正在进行的文本任务,需先终止才能开启下一次任务": "Существует выполняемая текстовая задача, необходимо сначала завершить ее, прежде чем начать следующую задачу", + "已有正在进行的语义token提取任务,需先终止才能开启下一次任务": "Существует выполняемая задача извлечения семантических токенов, необходимо сначала завершить ее, прежде чем начать следующую задачу", + "已终止ASR进程": "Процесс ASR прерван", + "已终止GPT训练": "Тренировка GPT прервана", + "已终止SoVITS训练": "Тренировка SoVITS прервана", + "已终止所有1a进程": "Все процессы 1a прерваны", + "已终止所有1b进程": "Все процессы 1b прерваны", + "已终止所有一键三连进程": "Все процессы одного клика прерваны", + "已终止所有切割进程": "Все процессы разрезания прерваны", + "已终止所有语义token进程": "Все процессы извлечения семантических токенов прерваны", + "按中文句号。切": "Разделение по китайским точкам.", + "文本切分工具。太长的文本合成出来效果不一定好,所以太长建议先切。合成会根据文本的换行分开合成再拼起来。": "Инструмент для разделения текста. Слишком длинные тексты могут не давать хороших результатов синтеза, поэтому рекомендуется сначала их разделить. Синтез будет выполняться отдельно для каждого абзаца, а затем результаты будут соединены вместе.", + "文本进程执行中": "Текстовый процесс выполняется", + "文本进程结束": "Текстовый процесс завершен", + "日文": "Японский", + "英文": "Английский", + "语义token提取进程执行中": "Процесс извлечения семантических токенов выполняется", + "语义token提取进程结束": "Процесс извлечения семантических токенов завершен", + "请上传参考音频": "Пожалуйста, загрузите референтное аудио", + "输入路径不存在": "Введенный путь не существует", + "输入路径存在但既不是文件也不是文件夹": "Указанный путь существует, но не является ни файлом, ни папкой", + "输出的语音": "Выводимый звук", + "进度:1a-done": "Прогресс: 1a-завершено", + "进度:1a-done, 1b-ing": "Прогресс: 1a-завершено, 1b-выполняется", + "进度:1a-ing": "Прогресс: 1a-выполняется", + "进度:1a1b-done": "Прогресс: 1a1b-завершено", + "进度:1a1b-done, 1cing": "Прогресс: 1a1b-завершено, 1c-выполняется", + "进度:all-done": "Прогресс: все завершено", + "需要合成的切分前文本": "Текст для синтеза до разделения", + "需要合成的文本": "Текст для синтеза", + "需要合成的语种": "Язык для синтеза", ">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音": "Если значение больше 3: применить медианную фильтрацию к вытащенным тональностям. Значение контролирует радиус фильтра и может уменьшить излишнее дыхание.", "A模型权重": "Весы (w) модели А:", "A模型路径": "Путь к модели А:", @@ -31,7 +208,6 @@ "保存名": "Имя файла для сохранения:", "保存的文件名, 默认空为和源文件同名": "Название сохранённого файла (по умолчанию: такое же, как и у входного):", "保存的模型名不带后缀": "Имя файла модели для сохранения (без расширения):", - "保存频率save_every_epoch": "Частота сохранения (save_every_epoch):", "保护清辅音和呼吸声,防止电音撕裂等artifact,拉满0.5不开启,调低加大保护力度但可能降低索引效果": "Защитить глухие согласные и звуки дыхания для предотвращения артефактов, например, разрывания в электронной музыке. Поставьте на 0.5, чтобы выключить. Уменьшите значение для повышения защиты, но учтите, что при этом может ухудшиться точность индексирования:", "修改": "Изменить", "修改模型信息(仅支持weights文件夹下提取的小模型文件)": "Изменить информацию о модели (работает только с маленькими моделями, взятыми из папки 'weights')", @@ -55,9 +231,7 @@ "常见问题解答": "ЧаВо (часто задаваемые вопросы)", "常规设置": "Основные настройки", "开始音频转换": "Начать конвертацию аудио", - "很遗憾您这没有能用的显卡来支持您训练": "К сожалению, у вас нету графического процессора, который поддерживает обучение моделей.", "性能设置": "Настройки быстроты", - "总训练轮数total_epoch": "Полное количество эпох (total_epoch):", "批量推理": "批量推理", "批量转换, 输入待转换音频文件夹, 或上传多个音频文件, 在指定文件夹(默认opt)下输出转换的音频. ": "Массовое преобразование. Введите путь к папке, в которой находятся файлы для преобразования голоса или выгрузите несколько аудиофайлов. Сконвертированные файлы будут сохранены в указанной папке (по умолчанию: 'opt').", "指定输出主人声文件夹": "Путь к папке для сохранения вокала:", @@ -68,11 +242,7 @@ "提取": "Создать модель", "提取音高和处理数据使用的CPU进程数": "Число процессов ЦП, используемое для оценки высоты голоса и обработки данных:", "是": "Да", - "是否仅保存最新的ckpt文件以节省硬盘空间": "Сохранять только последний файл '.ckpt', чтобы сохранить место на диске:", - "是否在每次保存时间点将最终小模型保存至weights文件夹": "Сохранять маленькую финальную модель в папку 'weights' на каждой точке сохранения:", "是否缓存所有训练集至显存. 10min以下小数据可缓存以加速训练, 大数据缓存会炸显存也加不了多少速": "Кэшировать все тренировочные сеты в видеопамять. Кэширование маленький датасетов (меньше 10 минут) может ускорить тренировку, но кэширование больших, наоборот, займёт много видеопамяти и не сильно ускорит тренировку:", - "显卡信息": "Информация о графических процессорах (GPUs):", - "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.
如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.": "Это программное обеспечение с открытым исходным кодом распространяется по лицензии MIT. Автор никак не контролирует это программное обеспечение. Пользователи, которые используют эту программу и распространяют аудиозаписи, полученные с помощью этой программы, несут полную ответственность за это. Если вы не согласны с этим, вы не можете использовать какие-либо коды и файлы в рамках этой программы или ссылаться на них. Подробнее в файле Agreement-LICENSE.txt в корневом каталоге программы.", "查看": "Просмотреть информацию", "查看模型信息(仅支持weights文件夹下提取的小模型文件)": "Просмотреть информацию о модели (работает только с маленькими моделями, взятыми из папки 'weights')", "检索特征占比": "Соотношение поиска черт:", @@ -85,7 +255,6 @@ "模型版本型号": "Версия архитектуры модели:", "模型融合, 可用于测试音色融合": "Слияние моделей, может быть использовано для проверки слияния тембра", "模型路径": "Путь к папке:", - "每张显卡的batch_size": "Размер пачки для GPU:", "淡入淡出长度": "Длина затухания", "版本": "Версия архитектуры модели:", "特征提取": "Извлечь черты", diff --git a/i18n/locale/tr_TR.json b/i18n/locale/tr_TR.json index bd1c17b34..0a1b72753 100644 --- a/i18n/locale/tr_TR.json +++ b/i18n/locale/tr_TR.json @@ -1,4 +1,181 @@ { + "很遗憾您这没有能用的显卡来支持您训练": "Maalesef eğitim için kullanabileceğiniz bir ekran kartınız yok", + "UVR5已开启": "UVR5 açıldı", + "UVR5已关闭": "UVR5 kapandı", + "输入文件夹路径": "Dosya klasörü yolu girin", + "输出文件夹路径": "Çıktı klasörü yolu", + "ASR 模型": "ASR modeli", + "ASR 模型尺寸": "ASR model boyutu", + "ASR 语言设置": "ASR dil ayarları", + "模型切换": "Model değiştirme", + "是否开启dpo训练选项(实验性)": "dpo eğitim seçeneği açılsın mı? (deneysel)", + "开启无参考文本模式。不填参考文本亦相当于开启。": "Referans metni olmayan mod açık. Referans metni doldurulmazsa bu mod otomatik olarak açılır.", + "使用无参考文本模式时建议使用微调的GPT": "Referans metni olmayan modda, ince ayarlı GPT kullanılması önerilir", + "后续将支持转音素、手工修改音素、语音合成分步执行。": "İlerleyen zamanlarda fonem dönüştürme, manuel fonem düzenleme ve adım adım ses sentezi desteklenecek.", + "gpt采样参数(无参考文本时不要太低):": "gpt örnekleme parametresi (referans metin olmadığında çok düşük olmamalı):", + "按标点符号切": "Noktalama işaretlerine göre kes", + "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.
如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.": "Bu yazılım MIT lisansı ile açık kaynaktır, yazar yazılım üzerinde herhangi bir kontrol gücüne sahip değildir, yazılımı kullanıcılar ve yazılım tarafından üretilen sesleri yayınlayanlar tüm sorumluluğu üstlenir.
Eğer bu şartları kabul etmiyorsanız, yazılım paketindeki hiçbir kodu veya dosyayı kullanamaz veya atıfta bulunamazsınız. Ayrıntılar için ana dizindeki LICENSE'ı görün.", + "0-前置数据集获取工具": "0-Ön veri seti alma aracı", + "0a-UVR5人声伴奏分离&去混响去延迟工具": "0a-UVR5 vokal eşlik ayırma & yankıyı giderme gecikme aracı", + "是否开启UVR5-WebUI": "UVR5-WebUI açılsın mı", + "UVR5进程输出信息": "UVR5 işlem çıktı bilgisi", + "0b-语音切分工具": "0b-Ses bölme aracı", + ".list标注文件的路径": ".list etiketleme dosyasının yolu", + "GPT模型列表": "GPT model listesi", + "SoVITS模型列表": "SoVITS model listesi", + "填切割后音频所在目录!读取的音频文件完整路径=该目录-拼接-list文件里波形对应的文件名(不是全路径)。": "Kesildikten sonra ses dosyalarının bulunduğu klasörü doldurun! Okunan ses dosyasının tam yolu=bu klasör-list dosyasındaki dalgaların dosya adları ile birleştirilmiş hali (tam yol değil).", + "音频自动切分输入路径,可文件可文件夹": "Ses otomatik bölme giriş yolu, dosya veya klasör olabilir", + "切分后的子音频的输出根目录": "Bölündükten sonra alt ses dosyalarının çıktı kök dizini", + "怎么切": "Nasıl kesilir", + "不切": "Kesme", + "凑四句一切": "Dört cümleyi bir araya getirip kes", + "按英文句号.切": "İngilizce nokta işaretine göre kes", + "threshold:音量小于这个值视作静音的备选切割点": "threshold:Ses bu değerden düşükse sessiz olarak kabul edilen alternatif kesim noktası", + "min_duration:每段最短多长,如果第一段太短一直和后面段连起来直到超过这个值": "min_duration:Her parçanın minimum uzunluğu ne kadar, eğer ilk parça çok kısa ise sonraki parçalarla birleştirilir ta ki bu değeri aşana kadar", + "max_duration:每段最长多长": "max_duration:Her parçanın maksimum uzunluğu ne kadar", + "min_interval:最短切割间隔": "min_interval:Minimum kesim aralığı", + "hop_size:怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好)": "hop_size:Ses seviyesi eğrisi nasıl hesaplanır, ne kadar küçükse hassasiyet o kadar yüksek ve hesaplama yükü o kadar artar (hassasiyet arttıkça etki mutlaka daha iyi olmaz)", + "max_sil_kept:切完后静音最多留多长": "max_sil_kept:Kesimden sonra en fazla ne kadar sessizlik bırakılır", + "对于过短音频的处理方法,勾选则合并,不勾选则抛弃": "Çok kısa ses dosyalarının işlenme yöntemi, seçilirse birleştirilir, seçilmezse atılır", + "目标响度": "Hedef ses seviyesi", + "峰值响度": "Zirve ses seviyesi", + "是否匹配响度": "Ses seviyesi eşleştirilsin mi", + "开启语音切割": "Ses kesimi başlat", + "终止语音切割": "Ses kesimini durdur", + "max:归一化后最大值多少": "max:Normalizasyondan sonra maksimum değer ne kadar", + "alpha_mix:混多少比例归一化后音频进来": "alpha_mix:Normalizasyondan sonraki sesin ne kadarlık bir oranı karıştırılsın", + "切割使用的进程数": "Kesim için kullanılan işlem sayısı", + "语音切割进程输出信息": "Ses kesim işlemi çıktı bilgisi", + "0c-中文批量离线ASR工具": "0c-Çince toplu offline ASR aracı", + "开启离线批量ASR": "Offline toplu ASR başlat", + "终止ASR进程": "ASR işlemini durdur", + "批量ASR(中文only)输入文件夹路径": "Toplu ASR (sadece Çince) giriş dosya klasörü yolu", + "ASR进程输出信息": "ASR işlemi çıktı bilgisi", + "0d-语音文本校对标注工具": "0d-Ses ve metin düzeltme etiketleme aracı", + "是否开启打标WebUI": "Etiketleme WebUI'si başlatılsın mı", + "打标数据标注文件路径": "Etiketleme veri etiketleme dosya yolu", + "打标工具进程输出信息": "Etiketleme aracı işlemi çıktı bilgisi", + "1-GPT-SoVITS-TTS": "1-GPT-SoVITS-TTS", + "*实验/模型名": "*Deney/model adı", + "显卡信息": "Ekran kartı bilgisi", + "预训练的SoVITS-G模型路径": "Ön eğitilmiş SoVITS-G model yolu", + "预训练的SoVITS-D模型路径": "Ön eğitilmiş SoVITS-D model yolu", + "预训练的GPT模型路径": "Ön eğitilmiş GPT model yolu", + "1A-训练集格式化工具": "1A-Eğitim seti formatlama aracı", + "输出logs/实验名目录下应有23456开头的文件和文件夹": "Çıktı logs/deney adı dizininde 23456 ile başlayan dosya ve klasörler olmalı", + "*文本标注文件": "*Metin etiketleme dosyası", + "*训练集音频文件目录": "*Eğitim seti ses dosyası dizini", + "训练集音频文件目录 拼接 list文件里波形对应的文件名。": "Eğitim seti ses dosyası dizini list dosyasındaki dalgaların dosya adları ile birleştirilir.", + "1Aa-文本内容": "1Aa-Metin içeriği", + "GPU卡号以-分割,每个卡号一个进程": "GPU kart numaraları - ile ayrılır, her kart numarası için bir işlem", + "预训练的中文BERT模型路径": "Ön eğitilmiş Çince BERT model yolu", + "开启文本获取": "Metin alma başlat", + "终止文本获取进程": "Metin alma işlemini durdur", + "文本进程输出信息": "Metin işlemi çıktı bilgisi", + "1Ab-SSL自监督特征提取": "1Ab-SSL kendi kendine denetimli özellik çıkarma", + "预训练的SSL模型路径": "Ön eğitilmiş SSL model yolu", + "开启SSL提取": "SSL çıkarmayı başlat", + "终止SSL提取进程": "SSL çıkarma işlemini durdur", + "SSL进程输出信息": "SSL işlemi çıktı bilgisi", + "1Ac-语义token提取": "1Ac-Anlamsal token çıkarma", + "开启语义token提取": "Anlamsal token çıkarmayı başlat", + "终止语义token提取进程": "Anlamsal token çıkarma işlemini durdur", + "语义token提取进程输出信息": "Anlamsal token çıkarma işlemi çıktı bilgisi", + "1Aabc-训练集格式化一键三连": "1Aabc-Eğitim seti formatlama tek tuşla üçleme", + "开启一键三连": "Tek tuşla üçlemeyi başlat", + "终止一键三连": "Tek tuşla üçlemeyi durdur", + "一键三连进程输出信息": "Tek tuşla üçleme işlemi çıktı bilgisi", + "1B-微调训练": "1B-Fine-tuning eğitimi", + "1Ba-SoVITS训练。用于分享的模型文件输出在SoVITS_weights下。": "1Ba-SoVITS eğitimi. Paylaşım için model dosyaları SoVITS_weights altında çıkarılır.", + "每张显卡的batch_size": "Her bir ekran kartı için batch_size", + "总训练轮数total_epoch,不建议太高": "Toplam eğitim turu sayısı total_epoch, çok yüksek önerilmez", + "文本模块学习率权重": "Metin modülü öğrenme oranı ağırlığı", + "保存频率save_every_epoch": "Kayıt sıklığı save_every_epoch", + "是否仅保存最新的ckpt文件以节省硬盘空间": "Sadece en yeni ckpt dosyasını kaydederek disk alanından tasarruf edilsin mi", + "是否在每次保存时间点将最终小模型保存至weights文件夹": "Her kayıt zamanında son küçük modelin weights klasörüne kaydedilmesi gerekiyor mu", + "开启SoVITS训练": "SoVITS eğitimini başlat", + "终止SoVITS训练": "SoVITS eğitimini durdur", + "SoVITS训练进程输出信息": "SoVITS eğitimi işlemi çıktı bilgisi", + "1Bb-GPT训练。用于分享的模型文件输出在GPT_weights下。": "1Bb-GPT eğitimi. Paylaşım için model dosyaları GPT_weights altında çıkarılır.", + "总训练轮数total_epoch": "Toplam eğitim turu sayısı total_epoch", + "开启GPT训练": "GPT eğitimini başlat", + "终止GPT训练": "GPT eğitimini durdur", + "GPT训练进程输出信息": "GPT eğitimi işlemi çıktı bilgisi", + "1C-推理": "1C-Çıkarım", + "选择训练完存放在SoVITS_weights和GPT_weights下的模型。默认的一个是底模,体验5秒Zero Shot TTS用。": "Eğitimi tamamlanmış ve SoVITS_weights ile GPT_weights altına kaydedilmiş modeli seçin. Varsayılan bir temel modeldir, 5 saniyelik Zero Shot TTS deneyimi için kullanılır.", + "*GPT模型列表": "*GPT model listesi", + "*SoVITS模型列表": "*SoVITS model listesi", + "GPU卡号,只能填1个整数": "GPU kart numarası, sadece bir tamsayı girilebilir", + "刷新模型路径": "Model yolu yenile", + "是否开启TTS推理WebUI": "TTS çıkarımı WebUI'si başlatılsın mı", + "TTS推理WebUI进程输出信息": "TTS çıkarımı WebUI işlemi çıktı bilgisi", + "2-GPT-SoVITS-变声": "2-GPT-SoVITS-Ses Değiştirme", + "施工中,请静候佳音": "Yapım aşamasında, lütfen iyi haberler için bekleyin", + "参考音频在3~10秒范围外,请更换!": "Referans ses dosyası 3~10 saniye aralığının dışında, lütfen değiştirin!", + "请上传3~10秒内参考音频,超过会报错!": "Lütfen 3~10 saniye arasında bir referans ses dosyası yükleyin, aşım durumunda hata verilecektir!", + "TTS推理进程已开启": "TTS çıkarım işlemi başlatıldı", + "TTS推理进程已关闭": "TTS çıkarım işlemi kapatıldı", + "打标工具WebUI已开启": "Etiketleme aracı WebUI'si açıldı", + "打标工具WebUI已关闭": "Etiketleme aracı WebUI'si kapatıldı", + "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. 如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.": "Bu yazılım MIT lisansı ile açık kaynaklıdır, yazarın yazılım üzerinde herhangi bir kontrolü yoktur, yazılımı kullananlar ve yazılım tarafından üretilen sesleri yayınlayanlar tüm sorumluluğu üstlenir. Bu şartları kabul etmiyorsanız, yazılım paketindeki hiçbir kod veya dosyayı kullanamaz veya referans veremezsiniz. Ayrıntılar için ana dizindeki LICENSE'a bakın.", + "*请上传并填写参考信息": "*Lütfen referans bilgilerini yükleyin ve doldurun", + "*请填写需要合成的目标文本。中英混合选中文,日英混合选日文,中日混合暂不支持,非目标语言文本自动遗弃。": "*Lütfen sentezlenmesi gereken hedef metni girin. Çince-İngilizce karışımı için Çince, Japonca-İngilizce karışımı için Japonca seçin, Çince-Japonca karışımı şu an desteklenmiyor, hedef dili olmayan metinler otomatik olarak atılacaktır.", + "ASR任务开启:%s": "ASR görevi başlatıldı: %s", + "GPT训练完成": "GPT eğitimi tamamlandı", + "GPT训练开始:%s": "GPT eğitimi başladı: %s", + "SSL提取进程执行中": "SSL çıkarma işlemi devam ediyor", + "SSL提取进程结束": "SSL çıkarma işlemi sona erdi", + "SoVITS训练完成": "SoVITS eğitimi tamamlandı", + "SoVITS训练开始:%s": "SoVITS eğitimi başladı: %s", + "一键三连中途报错": "Tek tuşla üçleme işlemi sırasında hata oluştu", + "一键三连进程结束": "Tek tuşla üçleme işlemi sona erdi", + "中文": "Çince", + "凑50字一切": "50 kelime birleştir ve kes", + "凑五句一切": "Beş cümle birleştir ve kes", + "切分后文本": "Bölündükten sonra metin", + "切割执行中": "Kesim işlemi devam ediyor", + "切割结束": "Kesim işlemi sona erdi", + "参考音频的文本": "Referans ses dosyasının metni", + "参考音频的语种": "Referans ses dosyasının dili", + "合成语音": "Ses sentezi", + "后续将支持混合语种编码文本输入。": "İlerleyen zamanlarda karışık dil kodlaması ile metin girişi desteklenecek.", + "已有正在进行的ASR任务,需先终止才能开启下一次任务": "Devam eden bir ASR görevi var, bir sonraki görevi başlatmadan önce önceki görevi sonlandırmalısınız", + "已有正在进行的GPT训练任务,需先终止才能开启下一次任务": "Devam eden bir GPT eğitim görevi var, bir sonraki görevi başlatmadan önce önceki görevi sonlandırmalısınız", + "已有正在进行的SSL提取任务,需先终止才能开启下一次任务": "Devam eden bir SSL çıkarma görevi var, bir sonraki görevi başlatmadan önce önceki görevi sonlandırmalısınız", + "已有正在进行的SoVITS训练任务,需先终止才能开启下一次任务": "Devam eden bir SoVITS eğitim görevi var, bir sonraki görevi başlatmadan önce önceki görevi sonlandırmalısınız", + "已有正在进行的一键三连任务,需先终止才能开启下一次任务": "Devam eden bir tek tuşla üçleme görevi var, bir sonraki görevi başlatmadan önce önceki görevi sonlandırmalısınız", + "已有正在进行的切割任务,需先终止才能开启下一次任务": "Devam eden bir kesim görevi var, bir sonraki görevi başlatmadan önce önceki görevi sonlandırmalısınız", + "已有正在进行的文本任务,需先终止才能开启下一次任务": "Devam eden bir metin görevi var, bir sonraki görevi başlatmadan önce önceki görevi sonlandırmalısınız", + "已有正在进行的语义token提取任务,需先终止才能开启下一次任务": "Devam eden bir semantik token çıkarma görevi var, bir sonraki görevi başlatmadan önce önceki görevi sonlandırmalısınız", + "已终止ASR进程": "ASR işlemi sonlandırıldı", + "已终止GPT训练": "GPT eğitimi sonlandırıldı", + "已终止SoVITS训练": "SoVITS eğitimi sonlandırıldı", + "已终止所有1a进程": "Tüm 1a işlemleri sonlandırıldı", + "已终止所有1b进程": "Tüm 1b işlemleri sonlandırıldı", + "已终止所有一键三连进程": "Tüm tek tuşla üçleme işlemleri sonlandırıldı", + "已终止所有切割进程": "Tüm kesim işlemleri sonlandırıldı", + "已终止所有语义token进程": "Tüm semantik token işlemleri sonlandırıldı", + "按中文句号。切": "Çince dönem işaretine göre kes", + "文本切分工具。太长的文本合成出来效果不一定好,所以太长建议先切。合成会根据文本的换行分开合成再拼起来。": "Metin bölme aracı. Çok uzun metinlerin sentezi her zaman iyi sonuçlar vermez, bu yüzden önerilen önce kesmektir. Sentez, metnin yeni satırlarına göre ayrı ayrı yapılır ve daha sonra birleştirilir.", + "文本进程执行中": "Metin işlemi devam ediyor", + "文本进程结束": "Metin işlemi sona erdi", + "日文": "Japonca", + "英文": "İngilizce", + "语义token提取进程执行中": "Semantik token çıkarma işlemi devam ediyor", + "语义token提取进程结束": "Semantik token çıkarma işlemi sona erdi", + "请上传参考音频": "Lütfen referans ses dosyası yükleyin", + "输入路径不存在": "Giriş yolu mevcut değil", + "输入路径存在但既不是文件也不是文件夹": "Giriş yolu mevcut ama ne dosya ne de klasör", + "输出的语音": "Çıktı sesi", + "进度:1a-done": "İlerleme: 1a-tamamlandı", + "进度:1a-done, 1b-ing": "İlerleme: 1a-tamamlandı, 1b-devam ediyor", + "进度:1a-ing": "İlerleme: 1a-devam ediyor", + "进度:1a1b-done": "İlerleme: 1a1b-tamamlandı", + "进度:1a1b-done, 1cing": "İlerleme: 1a1b-tamamlandı, 1c-devam ediyor", + "进度:all-done": "İlerleme: hepsi-tamamlandı", + "需要合成的切分前文本": "Bölünmeden önce sentezlenmesi gereken metin", + "需要合成的文本": "Sentezlenmesi gereken metin", + "需要合成的语种": "Sentezlenmesi gereken dil", ">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音": "Eğer >=3 ise, elde edilen pitch sonuçlarına median filtreleme uygula. Bu değer, filtre yarıçapını temsil eder ve nefesliliği azaltabilir.", "A模型权重": "A Modeli Ağırlığı:", "A模型路径": "A Modeli Yolu:", @@ -31,7 +208,6 @@ "保存名": "Kaydetme Adı:", "保存的文件名, 默认空为和源文件同名": "Kaydedilecek dosya adı (varsayılan: kaynak dosya ile aynı):", "保存的模型名不带后缀": "Kaydedilecek model adı (uzantı olmadan):", - "保存频率save_every_epoch": "Kaydetme sıklığı (save_every_epoch):", "保护清辅音和呼吸声,防止电音撕裂等artifact,拉满0.5不开启,调低加大保护力度但可能降低索引效果": "Sessiz ünsüzleri ve nefes seslerini koruyarak elektronik müzikte yırtılma gibi sanal hataların oluşmasını engeller. 0.5 olarak ayarlandığında devre dışı kalır. Değerin azaltılması korumayı artırabilir, ancak indeksleme doğruluğunu azaltabilir:", "修改": "Düzenle", "修改模型信息(仅支持weights文件夹下提取的小模型文件)": "Model bilgilerini düzenle (sadece 'weights' klasöründen çıkarılan küçük model dosyaları desteklenir)", @@ -55,9 +231,7 @@ "常见问题解答": "Sıkça Sorulan Sorular (SSS)", "常规设置": "Genel ayarlar", "开始音频转换": "Ses dönüştürmeyi başlat", - "很遗憾您这没有能用的显卡来支持您训练": "Maalesef, eğitiminizi desteklemek için uyumlu bir GPU bulunmamaktadır.", "性能设置": "Performans ayarları", - "总训练轮数total_epoch": "Toplam eğitim turu (total_epoch):", "批量推理": "批量推理", "批量转换, 输入待转换音频文件夹, 或上传多个音频文件, 在指定文件夹(默认opt)下输出转换的音频. ": "Toplu dönüştür. Dönüştürülecek ses dosyalarının bulunduğu klasörü girin veya birden çok ses dosyasını yükleyin. Dönüştürülen ses dosyaları belirtilen klasöre ('opt' varsayılan olarak) dönüştürülecektir", "指定输出主人声文件夹": "Vokal için çıkış klasörünü belirtin:", @@ -68,11 +242,7 @@ "提取": "Çıkart", "提取音高和处理数据使用的CPU进程数": "Ses yüksekliği çıkartmak (Pitch) ve verileri işlemek için kullanılacak CPU işlemci sayısı:", "是": "Evet", - "是否仅保存最新的ckpt文件以节省硬盘空间": "Sadece en son '.ckpt' dosyasını kaydet:", - "是否在每次保存时间点将最终小模型保存至weights文件夹": "Her kaydetme noktasında son küçük bir modeli 'weights' klasörüne kaydetmek için:", "是否缓存所有训练集至显存. 10min以下小数据可缓存以加速训练, 大数据缓存会炸显存也加不了多少速": "Tüm eğitim verilerini GPU belleğine önbelleğe alıp almayacağınızı belirtin. Küçük veri setlerini (10 dakikadan az) önbelleğe almak eğitimi hızlandırabilir, ancak büyük veri setlerini önbelleğe almak çok fazla GPU belleği tüketir ve çok fazla hız artışı sağlamaz:", - "显卡信息": "GPU Bilgisi", - "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.
如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.": "Bu yazılım, MIT lisansı altında açık kaynaklıdır. Yazarın yazılım üzerinde herhangi bir kontrolü yoktur. Yazılımı kullanan ve yazılım tarafından dışa aktarılan sesleri dağıtan kullanıcılar sorumludur.
Eğer bu maddeyle aynı fikirde değilseniz, yazılım paketi içindeki herhangi bir kod veya dosyayı kullanamaz veya referans göremezsiniz. Detaylar için kök dizindeki Agreement-LICENSE.txt dosyasına bakınız.", "查看": "Görüntüle", "查看模型信息(仅支持weights文件夹下提取的小模型文件)": "Model bilgilerini görüntüle (sadece 'weights' klasöründen çıkarılan küçük model dosyaları desteklenir)", "检索特征占比": "Arama özelliği oranı (vurgu gücünü kontrol eder, çok yüksek olması sanal etkilere neden olur)", @@ -85,7 +255,6 @@ "模型版本型号": "Model mimari versiyonu:", "模型融合, 可用于测试音色融合": "Model birleştirme, ses rengi birleştirmesi için kullanılabilir", "模型路径": "Model Yolu:", - "每张显卡的batch_size": "Her GPU için yığın boyutu (batch_size):", "淡入淡出长度": "Geçiş (Fade) uzunluğu", "版本": "Sürüm", "特征提取": "Özellik çıkartma", diff --git a/i18n/locale/zh_HK.json b/i18n/locale/zh_HK.json index 93aaff3ec..b02575366 100644 --- a/i18n/locale/zh_HK.json +++ b/i18n/locale/zh_HK.json @@ -1,4 +1,181 @@ { + "很遗憾您这没有能用的显卡来支持您训练": "很遺憾您這沒有能用的顯卡來支持您訓練", + "UVR5已开启": "UVR5已開啟", + "UVR5已关闭": "UVR5已關閉", + "输入文件夹路径": "輸入文件夾路徑", + "输出文件夹路径": "輸出文件夾路徑", + "ASR 模型": "ASR 模型", + "ASR 模型尺寸": "ASR 模型尺寸", + "ASR 语言设置": "ASR 語言設置", + "模型切换": "模型切換", + "是否开启dpo训练选项(实验性)": "是否開啟dpo訓練選項(實驗性)", + "开启无参考文本模式。不填参考文本亦相当于开启。": "開啟無參考文本模式。不填參考文本亦相當於開啟。", + "使用无参考文本模式时建议使用微调的GPT": "使用無參考文本模式時建議使用微調的GPT", + "后续将支持转音素、手工修改音素、语音合成分步执行。": "後續將支持轉音素、手工修改音素、語音合成分步執行。", + "gpt采样参数(无参考文本时不要太低):": "gpt採樣參數(無參考文本時不要太低):", + "按标点符号切": "按標點符號切", + "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.
如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.": "本軟件以MIT協議開源, 作者不對軟件具備任何控制力, 使用軟件者、傳播軟件導出的聲音者自負全責.
如不認可該條款, 則不能使用或引用軟件包內任何代碼和文件. 詳見根目錄LICENSE.", + "0-前置数据集获取工具": "0-前置數據集獲取工具", + "0a-UVR5人声伴奏分离&去混响去延迟工具": "0a-UVR5人聲伴奏分離&去混響去延遲工具", + "是否开启UVR5-WebUI": "是否開啟UVR5-WebUI", + "UVR5进程输出信息": "UVR5進程輸出信息", + "0b-语音切分工具": "0b-語音切分工具", + ".list标注文件的路径": ".list標註文件的路徑", + "GPT模型列表": "GPT模型列表", + "SoVITS模型列表": "SoVITS模型列表", + "填切割后音频所在目录!读取的音频文件完整路径=该目录-拼接-list文件里波形对应的文件名(不是全路径)。": "填切割後音頻所在目錄!讀取的音頻文件完整路徑=該目錄-拼接-list文件裏波形對應的文件名(不是全路徑)。", + "音频自动切分输入路径,可文件可文件夹": "音頻自動切分輸入路徑,可文件可文件夾", + "切分后的子音频的输出根目录": "切分後的子音頻的輸出根目錄", + "怎么切": "怎麼切", + "不切": "不切", + "凑四句一切": "湊四句一切", + "按英文句号.切": "按英文句號.切", + "threshold:音量小于这个值视作静音的备选切割点": "threshold:音量小於這個值視作靜音的備選切割點", + "min_duration:每段最短多长,如果第一段太短一直和后面段连起来直到超过这个值": "min_duration:每段最短多長,如果第一段太短一直和後面段連起來直到超過這個值", + "max_duration:每段最长多长": "max_duration:每段最長多長", + "min_interval:最短切割间隔": "min_interval:最短切割間隔", + "hop_size:怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好)": "hop_size:怎麼算音量曲線,越小精度越大計算量越高(不是精度越大效果越好)", + "max_sil_kept:切完后静音最多留多长": "max_sil_kept:切完後靜音最多留多長", + "对于过短音频的处理方法,勾选则合并,不勾选则抛弃": "對於過短音頻的處理方法,勾選則合併,不勾選則拋棄", + "目标响度": "目標響度", + "峰值响度": "峰值響度", + "是否匹配响度": "是否匹配響度", + "开启语音切割": "開啟語音切割", + "终止语音切割": "終止語音切割", + "max:归一化后最大值多少": "max:歸一化後最大值多少", + "alpha_mix:混多少比例归一化后音频进来": "alpha_mix:混多少比例歸一化後音頻進來", + "切割使用的进程数": "切割使用的進程數", + "语音切割进程输出信息": "語音切割進程輸出信息", + "0c-中文批量离线ASR工具": "0c-中文批量離線ASR工具", + "开启离线批量ASR": "開啟離線批量ASR", + "终止ASR进程": "終止ASR進程", + "批量ASR(中文only)输入文件夹路径": "批量ASR(中文only)輸入文件夾路徑", + "ASR进程输出信息": "ASR進程輸出信息", + "0d-语音文本校对标注工具": "0d-語音文本校對標注工具", + "是否开启打标WebUI": "是否開啟打標WebUI", + "打标数据标注文件路径": "打標數據標注文件路徑", + "打标工具进程输出信息": "打標工具進程輸出信息", + "1-GPT-SoVITS-TTS": "1-GPT-SoVITS-TTS", + "*实验/模型名": "*實驗/模型名", + "显卡信息": "顯卡信息", + "预训练的SoVITS-G模型路径": "預訓練的SoVITS-G模型路徑", + "预训练的SoVITS-D模型路径": "預訓練的SoVITS-D模型路徑", + "预训练的GPT模型路径": "預訓練的GPT模型路徑", + "1A-训练集格式化工具": "1A-訓練集格式化工具", + "输出logs/实验名目录下应有23456开头的文件和文件夹": "輸出logs/實驗名目錄下應有23456開頭的文件和文件夾", + "*文本标注文件": "*文本標注文件", + "*训练集音频文件目录": "*訓練集音頻文件目錄", + "训练集音频文件目录 拼接 list文件里波形对应的文件名。": "訓練集音頻文件目錄 拼接 list文件裏波形對應的文件名。", + "1Aa-文本内容": "1Aa-文本內容", + "GPU卡号以-分割,每个卡号一个进程": "GPU卡號以-分割,每個卡號一個進程", + "预训练的中文BERT模型路径": "預訓練的中文BERT模型路徑", + "开启文本获取": "開啟文本獲取", + "终止文本获取进程": "終止文本獲取進程", + "文本进程输出信息": "文本進程輸出信息", + "1Ab-SSL自监督特征提取": "1Ab-SSL自監督特徵提取", + "预训练的SSL模型路径": "預訓練的SSL模型路徑", + "开启SSL提取": "開啟SSL提取", + "终止SSL提取进程": "終止SSL提取進程", + "SSL进程输出信息": "SSL進程輸出信息", + "1Ac-语义token提取": "1Ac-語義token提取", + "开启语义token提取": "開啟語義token提取", + "终止语义token提取进程": "終止語義token提取進程", + "语义token提取进程输出信息": "語義token提取進程輸出信息", + "1Aabc-训练集格式化一键三连": "1Aabc-訓練集格式化一鍵三連", + "开启一键三连": "開啟一鍵三連", + "终止一键三连": "終止一鍵三連", + "一键三连进程输出信息": "一鍵三連進程輸出信息", + "1B-微调训练": "1B-微調訓練", + "1Ba-SoVITS训练。用于分享的模型文件输出在SoVITS_weights下。": "1Ba-SoVITS訓練。用於分享的模型文件輸出在SoVITS_weights下。", + "每张显卡的batch_size": "每張顯卡的batch_size", + "总训练轮数total_epoch,不建议太高": "總訓練輪數total_epoch,不建議太高", + "文本模块学习率权重": "文本模塊學習率權重", + "保存频率save_every_epoch": "保存頻率save_every_epoch", + "是否仅保存最新的ckpt文件以节省硬盘空间": "是否僅保存最新的ckpt文件以節省硬碟空間", + "是否在每次保存时间点将最终小模型保存至weights文件夹": "是否在每次保存時間點將最終小模型保存至weights文件夾", + "开启SoVITS训练": "開啟SoVITS訓練", + "终止SoVITS训练": "終止SoVITS訓練", + "SoVITS训练进程输出信息": "SoVITS訓練進程輸出信息", + "1Bb-GPT训练。用于分享的模型文件输出在GPT_weights下。": "1Bb-GPT訓練。用於分享的模型文件輸出在GPT_weights下。", + "总训练轮数total_epoch": "總訓練輪數total_epoch", + "开启GPT训练": "開啟GPT訓練", + "终止GPT训练": "終止GPT訓練", + "GPT训练进程输出信息": "GPT訓練進程輸出信息", + "1C-推理": "1C-推理", + "选择训练完存放在SoVITS_weights和GPT_weights下的模型。默认的一个是底模,体验5秒Zero Shot TTS用。": "選擇訓練完存放在SoVITS_weights和GPT_weights下的模型。默認的一個是底模,體驗5秒Zero Shot TTS用。", + "*GPT模型列表": "*GPT模型列表", + "*SoVITS模型列表": "*SoVITS模型列表", + "GPU卡号,只能填1个整数": "GPU卡號,只能填1個整數", + "刷新模型路径": "刷新模型路徑", + "是否开启TTS推理WebUI": "是否開啟TTS推理WebUI", + "TTS推理WebUI进程输出信息": "TTS推理WebUI進程輸出信息", + "2-GPT-SoVITS-变声": "2-GPT-SoVITS-變聲", + "施工中,请静候佳音": "施工中,請靜候佳音", + "参考音频在3~10秒范围外,请更换!": "參考音頻在3~10秒範圍外,請更換!", + "请上传3~10秒内参考音频,超过会报错!": "請上傳3~10秒內參考音頻,超過會報錯!", + "TTS推理进程已开启": "TTS推理進程已開啟", + "TTS推理进程已关闭": "TTS推理進程已關閉", + "打标工具WebUI已开启": "打標工具WebUI已開啟", + "打标工具WebUI已关闭": "打標工具WebUI已關閉", + "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. 如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.": "本軟件以MIT協議開源,作者不對軟件具備任何控制力,使用軟件者、傳播軟件導出的聲音者自負全責。如不認可該條款,則不能使用或引用軟件包內任何代碼和文件。詳見根目錄LICENSE。", + "*请上传并填写参考信息": "*請上傳並填寫參考信息", + "*请填写需要合成的目标文本。中英混合选中文,日英混合选日文,中日混合暂不支持,非目标语言文本自动遗弃。": "*請填寫需要合成的目標文本。中英混合選中文,日英混合選日文,中日混合暫不支持,非目標語言文本自動遺棄。", + "ASR任务开启:%s": "ASR任務開啟:%s", + "GPT训练完成": "GPT訓練完成", + "GPT训练开始:%s": "GPT訓練開始:%s", + "SSL提取进程执行中": "SSL提取進程執行中", + "SSL提取进程结束": "SSL提取進程結束", + "SoVITS训练完成": "SoVITS訓練完成", + "SoVITS训练开始:%s": "SoVITS訓練開始:%s", + "一键三连中途报错": "一鍵三連中途報錯", + "一键三连进程结束": "一鍵三連進程結束", + "中文": "中文", + "凑50字一切": "湊50字一切", + "凑五句一切": "湊五句一切", + "切分后文本": "切分後文本", + "切割执行中": "切割執行中", + "切割结束": "切割結束", + "参考音频的文本": "參考音頻的文本", + "参考音频的语种": "參考音頻的語種", + "合成语音": "合成語音", + "后续将支持混合语种编码文本输入。": "後續將支持混合語種編碼文本輸入。", + "已有正在进行的ASR任务,需先终止才能开启下一次任务": "已有正在進行的ASR任務,需先終止才能開啟下一次任務", + "已有正在进行的GPT训练任务,需先终止才能开启下一次任务": "已有正在進行的GPT訓練任務,需先終止才能開啟下一次任務", + "已有正在进行的SSL提取任务,需先终止才能开启下一次任务": "已有正在進行的SSL提取任務,需先終止才能開啟下一次任務", + "已有正在进行的SoVITS训练任务,需先终止才能开启下一次任务": "已有正在進行的SoVITS訓練任務,需先終止才能開啟下一次任務", + "已有正在进行的一键三连任务,需先终止才能开启下一次任务": "已有正在進行的一鍵三連任務,需先終止才能開啟下一次任務", + "已有正在进行的切割任务,需先终止才能开启下一次任务": "已有正在進行的切割任務,需先終止才能開啟下一次任務", + "已有正在进行的文本任务,需先终止才能开启下一次任务": "已有正在進行的文本任務,需先終止才能開啟下一次任務", + "已有正在进行的语义token提取任务,需先终止才能开启下一次任务": "已有正在進行的語義token提取任務,需先終止才能開啟下一次任務", + "已终止ASR进程": "已終止ASR進程", + "已终止GPT训练": "已終止GPT訓練", + "已终止SoVITS训练": "已終止SoVITS訓練", + "已终止所有1a进程": "已終止所有1a進程", + "已终止所有1b进程": "已終止所有1b進程", + "已终止所有一键三连进程": "已終止所有一鍵三連進程", + "已终止所有切割进程": "已終止所有切割進程", + "已终止所有语义token进程": "已終止所有語義token進程", + "按中文句号。切": "按中文句號。切", + "文本切分工具。太长的文本合成出来效果不一定好,所以太长建议先切。合成会根据文本的换行分开合成再拼起来。": "文本切分工具。太長的文本合成出來效果不一定好,所以太長建議先切。合成會根據文本的換行分開合成再拼起來。", + "文本进程执行中": "文本進程執行中", + "文本进程结束": "文本進程結束", + "日文": "日文", + "英文": "英文", + "语义token提取进程执行中": "語義token提取進程執行中", + "语义token提取进程结束": "語義token提取進程結束", + "请上传参考音频": "請上傳參考音頻", + "输入路径不存在": "輸入路徑不存在", + "输入路径存在但既不是文件也不是文件夹": "輸入路徑存在但既不是文件也不是文件夾", + "输出的语音": "輸出的語音", + "进度:1a-done": "進度:1a-done", + "进度:1a-done, 1b-ing": "進度:1a-done, 1b-ing", + "进度:1a-ing": "進度:1a-ing", + "进度:1a1b-done": "進度:1a1b-done", + "进度:1a1b-done, 1cing": "進度:1a1b-done, 1cing", + "进度:all-done": "進度:all-done", + "需要合成的切分前文本": "需要合成的切分前文本", + "需要合成的文本": "需要合成的文本", + "需要合成的语种": "需要合成的語種", ">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音": ">=3則使用對harvest音高識別的結果使用中值濾波,數值為濾波半徑,使用可以削弱啞音", "A模型权重": "A模型權重", "A模型路径": "A模型路徑", @@ -31,7 +208,6 @@ "保存名": "儲存名", "保存的文件名, 默认空为和源文件同名": "儲存的檔案名,預設空為與來源檔案同名", "保存的模型名不带后缀": "儲存的模型名不帶副檔名", - "保存频率save_every_epoch": "保存頻率save_every_epoch", "保护清辅音和呼吸声,防止电音撕裂等artifact,拉满0.5不开启,调低加大保护力度但可能降低索引效果": "保護清輔音和呼吸聲,防止電音撕裂等artifact,拉滿0.5不開啟,調低加大保護力度但可能降低索引效果", "修改": "修改", "修改模型信息(仅支持weights文件夹下提取的小模型文件)": "修改模型資訊(僅支援weights資料夾下提取的小模型檔案)", @@ -55,9 +231,7 @@ "常见问题解答": "常見問題解答", "常规设置": "一般設定", "开始音频转换": "開始音訊轉換", - "很遗憾您这没有能用的显卡来支持您训练": "很遗憾您这没有能用的显卡来支持您训练", "性能设置": "效能設定", - "总训练轮数total_epoch": "總訓練輪數total_epoch", "批量推理": "批量推理", "批量转换, 输入待转换音频文件夹, 或上传多个音频文件, 在指定文件夹(默认opt)下输出转换的音频. ": "批量轉換,輸入待轉換音頻資料夾,或上傳多個音頻檔案,在指定資料夾(默認opt)下輸出轉換的音頻。", "指定输出主人声文件夹": "指定输出主人声文件夹", @@ -68,11 +242,7 @@ "提取": "提取", "提取音高和处理数据使用的CPU进程数": "提取音高和處理數據使用的CPU進程數", "是": "是", - "是否仅保存最新的ckpt文件以节省硬盘空间": "是否僅保存最新的ckpt檔案以節省硬碟空間", - "是否在每次保存时间点将最终小模型保存至weights文件夹": "是否在每次保存時間點將最終小模型保存至weights檔夾", "是否缓存所有训练集至显存. 10min以下小数据可缓存以加速训练, 大数据缓存会炸显存也加不了多少速": "是否緩存所有訓練集至 VRAM。小於10分鐘的小數據可緩存以加速訓練,大數據緩存會爆 VRAM 也加不了多少速度", - "显卡信息": "顯示卡資訊", - "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.
如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.": "本軟體以MIT協議開源,作者不對軟體具備任何控制力,使用軟體者、傳播軟體導出的聲音者自負全責。
如不認可該條款,則不能使用或引用軟體包內任何程式碼和檔案。詳見根目錄使用需遵守的協議-LICENSE.txt。", "查看": "查看", "查看模型信息(仅支持weights文件夹下提取的小模型文件)": "查看模型資訊(僅支援weights資料夾下提取的小模型檔案)", "检索特征占比": "檢索特徵佔比", @@ -85,7 +255,6 @@ "模型版本型号": "模型版本型號", "模型融合, 可用于测试音色融合": "模型融合,可用於測試音色融合", "模型路径": "模型路徑", - "每张显卡的batch_size": "每张显卡的batch_size", "淡入淡出长度": "淡入淡出長度", "版本": "版本", "特征提取": "特徵提取", diff --git a/i18n/locale/zh_SG.json b/i18n/locale/zh_SG.json index 93aaff3ec..2eca6fd75 100644 --- a/i18n/locale/zh_SG.json +++ b/i18n/locale/zh_SG.json @@ -1,4 +1,181 @@ { + "很遗憾您这没有能用的显卡来支持您训练": "很遺憾您這沒有能用的顯卡來支持您訓練", + "UVR5已开启": "UVR5已開啟", + "UVR5已关闭": "UVR5已關閉", + "输入文件夹路径": "輸入文件夾路徑", + "输出文件夹路径": "輸出文件夾路徑", + "ASR 模型": "ASR 模型", + "ASR 模型尺寸": "ASR 模型尺寸", + "ASR 语言设置": "ASR 語言設定", + "模型切换": "模型切換", + "是否开启dpo训练选项(实验性)": "是否開啟dpo訓練選項(實驗性)", + "开启无参考文本模式。不填参考文本亦相当于开启。": "開啟無參考文本模式。不填參考文本亦相當於開啟。", + "使用无参考文本模式时建议使用微调的GPT": "使用無參考文本模式時建議使用微調的GPT", + "后续将支持转音素、手工修改音素、语音合成分步执行。": "後續將支持轉音素、手工修改音素、語音合成分步執行。", + "gpt采样参数(无参考文本时不要太低):": "gpt採樣參數(無參考文本時不要太低):", + "按标点符号切": "按標點符號切", + "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.
如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.": "本軟件以MIT協議開源, 作者不對軟件具備任何控制力, 使用軟件者、傳播軟件導出的聲音者自負全責.
如不認可該條款, 則不能使用或引用軟件包內任何代碼和文件. 詳見根目錄LICENSE.", + "0-前置数据集获取工具": "0-前置數據集獲取工具", + "0a-UVR5人声伴奏分离&去混响去延迟工具": "0a-UVR5人聲伴奏分離&去混響去延遲工具", + "是否开启UVR5-WebUI": "是否開啟UVR5-WebUI", + "UVR5进程输出信息": "UVR5進程輸出資訊", + "0b-语音切分工具": "0b-語音切分工具", + ".list标注文件的路径": ".list標註文件的路徑", + "GPT模型列表": "GPT模型列表", + "SoVITS模型列表": "SoVITS模型列表", + "填切割后音频所在目录!读取的音频文件完整路径=该目录-拼接-list文件里波形对应的文件名(不是全路径)。": "填切割後音頻所在目錄!讀取的音頻文件完整路徑=該目錄-拼接-list文件裡波形對應的文件名(不是全路徑)。", + "音频自动切分输入路径,可文件可文件夹": "音頻自動切分輸入路徑,可文件可文件夾", + "切分后的子音频的输出根目录": "切分後的子音頻的輸出根目錄", + "怎么切": "怎麼切", + "不切": "不切", + "凑四句一切": "湊四句一切", + "按英文句号.切": "按英文句號.切", + "threshold:音量小于这个值视作静音的备选切割点": "threshold:音量小於這個值視作靜音的備選切割點", + "min_duration:每段最短多长,如果第一段太短一直和后面段连起来直到超过这个值": "min_duration:每段最短多長,如果第一段太短一直和後面段連起來直到超過這個值", + "max_duration:每段最长多长": "max_duration:每段最長多長", + "min_interval:最短切割间隔": "min_interval:最短切割間隔", + "hop_size:怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好)": "hop_size:怎麼算音量曲線,越小精度越大計算量越高(不是精度越大效果越好)", + "max_sil_kept:切完后静音最多留多长": "max_sil_kept:切完後靜音最多留多長", + "对于过短音频的处理方法,勾选则合并,不勾选则抛弃": "對於過短音頻的處理方法,勾選則合併,不勾選則拋棄", + "目标响度": "目標響度", + "峰值响度": "峰值響度", + "是否匹配响度": "是否匹配響度", + "开启语音切割": "開啟語音切割", + "终止语音切割": "終止語音切割", + "max:归一化后最大值多少": "max:歸一化後最大值多少", + "alpha_mix:混多少比例归一化后音频进来": "alpha_mix:混多少比例歸一化後音頻進來", + "切割使用的进程数": "切割使用的進程數", + "语音切割进程输出信息": "語音切割進程輸出資訊", + "0c-中文批量离线ASR工具": "0c-中文批量離線ASR工具", + "开启离线批量ASR": "開啟離線批量ASR", + "终止ASR进程": "終止ASR進程", + "批量ASR(中文only)输入文件夹路径": "批量ASR(中文only)輸入文件夾路徑", + "ASR进程输出信息": "ASR進程輸出資訊", + "0d-语音文本校对标注工具": "0d-語音文本校對標註工具", + "是否开启打标WebUI": "是否開啟打標WebUI", + "打标数据标注文件路径": "打標數據標註文件路徑", + "打标工具进程输出信息": "打標工具進程輸出資訊", + "1-GPT-SoVITS-TTS": "1-GPT-SoVITS-TTS", + "*实验/模型名": "*實驗/模型名", + "显卡信息": "顯卡資訊", + "预训练的SoVITS-G模型路径": "預訓練的SoVITS-G模型路徑", + "预训练的SoVITS-D模型路径": "預訓練的SoVITS-D模型路徑", + "预训练的GPT模型路径": "預訓練的GPT模型路徑", + "1A-训练集格式化工具": "1A-訓練集格式化工具", + "输出logs/实验名目录下应有23456开头的文件和文件夹": "輸出logs/實驗名目錄下應有23456開頭的文件和文件夾", + "*文本标注文件": "*文本標註文件", + "*训练集音频文件目录": "*訓練集音頻文件目錄", + "训练集音频文件目录 拼接 list文件里波形对应的文件名。": "訓練集音頻文件目錄 拼接 list文件裡波形對應的文件名。", + "1Aa-文本内容": "1Aa-文本內容", + "GPU卡号以-分割,每个卡号一个进程": "GPU卡號以-分割,每個卡號一個進程", + "预训练的中文BERT模型路径": "預訓練的中文BERT模型路徑", + "开启文本获取": "開啟文本獲取", + "终止文本获取进程": "終止文本獲取進程", + "文本进程输出信息": "文本進程輸出資訊", + "1Ab-SSL自监督特征提取": "1Ab-SSL自監督特徵提取", + "预训练的SSL模型路径": "預訓練的SSL模型路徑", + "开启SSL提取": "開啟SSL提取", + "终止SSL提取进程": "終止SSL提取進程", + "SSL进程输出信息": "SSL進程輸出資訊", + "1Ac-语义token提取": "1Ac-語義token提取", + "开启语义token提取": "開啟語義token提取", + "终止语义token提取进程": "終止語義token提取進程", + "语义token提取进程输出信息": "語義token提取進程輸出資訊", + "1Aabc-训练集格式化一键三连": "1Aabc-訓練集格式化一鍵三連", + "开启一键三连": "開啟一鍵三連", + "终止一键三连": "終止一鍵三連", + "一键三连进程输出信息": "一鍵三連進程輸出資訊", + "1B-微调训练": "1B-微調訓練", + "1Ba-SoVITS训练。用于分享的模型文件输出在SoVITS_weights下。": "1Ba-SoVITS訓練。用於分享的模型文件輸出在SoVITS_weights下。", + "每张显卡的batch_size": "每張顯卡的batch_size", + "总训练轮数total_epoch,不建议太高": "總訓練輪數total_epoch,不建議太高", + "文本模块学习率权重": "文本模塊學習率權重", + "保存频率save_every_epoch": "保存頻率save_every_epoch", + "是否仅保存最新的ckpt文件以节省硬盘空间": "是否僅保存最新的ckpt文件以節省硬碟空間", + "是否在每次保存时间点将最终小模型保存至weights文件夹": "是否在每次保存時間點將最終小模型保存至weights文件夾", + "开启SoVITS训练": "開啟SoVITS訓練", + "终止SoVITS训练": "終止SoVITS訓練", + "SoVITS训练进程输出信息": "SoVITS訓練進程輸出資訊", + "1Bb-GPT训练。用于分享的模型文件输出在GPT_weights下。": "1Bb-GPT訓練。用於分享的模型文件輸出在GPT_weights下。", + "总训练轮数total_epoch": "總訓練輪數total_epoch", + "开启GPT训练": "開啟GPT訓練", + "终止GPT训练": "終止GPT訓練", + "GPT训练进程输出信息": "GPT訓練進程輸出資訊", + "1C-推理": "1C-推理", + "选择训练完存放在SoVITS_weights和GPT_weights下的模型。默认的一个是底模,体验5秒Zero Shot TTS用。": "選擇訓練完存放在SoVITS_weights和GPT_weights下的模型。默認的一個是底模,體驗5秒Zero Shot TTS用。", + "*GPT模型列表": "*GPT模型列表", + "*SoVITS模型列表": "*SoVITS模型列表", + "GPU卡号,只能填1个整数": "GPU卡號,只能填1個整數", + "刷新模型路径": "刷新模型路徑", + "是否开启TTS推理WebUI": "是否開啟TTS推理WebUI", + "TTS推理WebUI进程输出信息": "TTS推理WebUI進程輸出資訊", + "2-GPT-SoVITS-变声": "2-GPT-SoVITS-變聲", + "施工中,请静候佳音": "施工中,請靜候佳音", + "参考音频在3~10秒范围外,请更换!": "參考音頻在3~10秒範圍外,請更換!", + "请上传3~10秒内参考音频,超过会报错!": "請上傳3~10秒內參考音頻,超過會報錯!", + "TTS推理进程已开启": "TTS推理進程已開啟", + "TTS推理进程已关闭": "TTS推理進程已關閉", + "打标工具WebUI已开启": "打標工具WebUI已開啟", + "打标工具WebUI已关闭": "打標工具WebUI已關閉", + "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. 如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.": "本軟件以MIT協議開源, 作者不對軟件具備任何控制力, 使用軟件者、傳播軟件導出的聲音者自負全責. 如不認可該條款, 則不能使用或引用軟件包內任何代碼和文件. 詳見根目錄LICENSE.", + "*请上传并填写参考信息": "*請上傳並填寫參考信息", + "*请填写需要合成的目标文本。中英混合选中文,日英混合选日文,中日混合暂不支持,非目标语言文本自动遗弃。": "*請填寫需要合成的目標文本。中英混合選中文,日英混合選日文,中日混合暫不支持,非目標語言文本自動遺棄。", + "ASR任务开启:%s": "ASR任務開啟:%s", + "GPT训练完成": "GPT訓練完成", + "GPT训练开始:%s": "GPT訓練開始:%s", + "SSL提取进程执行中": "SSL提取進程執行中", + "SSL提取进程结束": "SSL提取進程結束", + "SoVITS训练完成": "SoVITS訓練完成", + "SoVITS训练开始:%s": "SoVITS訓練開始:%s", + "一键三连中途报错": "一鍵三連中途報錯", + "一键三连进程结束": "一鍵三連進程結束", + "中文": "中文", + "凑50字一切": "湊50字一切", + "凑五句一切": "湊五句一切", + "切分后文本": "切分後文本", + "切割执行中": "切割執行中", + "切割结束": "切割結束", + "参考音频的文本": "參考音頻的文本", + "参考音频的语种": "參考音頻的語種", + "合成语音": "合成語音", + "后续将支持混合语种编码文本输入。": "後續將支持混合語種編碼文本輸入。", + "已有正在进行的ASR任务,需先终止才能开启下一次任务": "已有正在進行的ASR任務,需先終止才能開啟下一次任務", + "已有正在进行的GPT训练任务,需先终止才能开启下一次任务": "已有正在進行的GPT訓練任務,需先終止才能開啟下一次任務", + "已有正在进行的SSL提取任务,需先终止才能开启下一次任务": "已有正在進行的SSL提取任務,需先終止才能開啟下一次任務", + "已有正在进行的SoVITS训练任务,需先终止才能开启下一次任务": "已有正在進行的SoVITS訓練任務,需先終止才能開啟下一次任務", + "已有正在进行的一键三连任务,需先终止才能开启下一次任务": "已有正在進行的一鍵三連任務,需先終止才能開啟下一次任務", + "已有正在进行的切割任务,需先终止才能开启下一次任务": "已有正在進行的切割任務,需先終止才能開啟下一次任務", + "已有正在进行的文本任务,需先终止才能开启下一次任务": "已有正在進行的文本任務,需先終止才能開啟下一次任務", + "已有正在进行的语义token提取任务,需先终止才能开启下一次任务": "已有正在進行的語義token提取任務,需先終止才能開啟下一次任務", + "已终止ASR进程": "已終止ASR進程", + "已终止GPT训练": "已終止GPT訓練", + "已终止SoVITS训练": "已終止SoVITS訓練", + "已终止所有1a进程": "已終止所有1a進程", + "已终止所有1b进程": "已終止所有1b進程", + "已终止所有一键三连进程": "已終止所有一鍵三連進程", + "已终止所有切割进程": "已終止所有切割進程", + "已终止所有语义token进程": "已終止所有語義token進程", + "按中文句号。切": "按中文句號。切", + "文本切分工具。太长的文本合成出来效果不一定好,所以太长建议先切。合成会根据文本的换行分开合成再拼起来。": "文本切分工具。太長的文本合成出來效果不一定好,所以太長建議先切。合成會根據文本的換行分開合成再拼起來。", + "文本进程执行中": "文本進程執行中", + "文本进程结束": "文本進程結束", + "日文": "日文", + "英文": "英文", + "语义token提取进程执行中": "語義token提取進程執行中", + "语义token提取进程结束": "語義token提取進程結束", + "请上传参考音频": "請上傳參考音頻", + "输入路径不存在": "輸入路徑不存在", + "输入路径存在但既不是文件也不是文件夹": "輸入路徑存在但既不是文件也不是文件夾", + "输出的语音": "輸出的語音", + "进度:1a-done": "進度:1a-done", + "进度:1a-done, 1b-ing": "進度:1a-done, 1b-ing", + "进度:1a-ing": "進度:1a-ing", + "进度:1a1b-done": "進度:1a1b-done", + "进度:1a1b-done, 1cing": "進度:1a1b-done, 1cing", + "进度:all-done": "進度:all-done", + "需要合成的切分前文本": "需要合成的切分前文本", + "需要合成的文本": "需要合成的文本", + "需要合成的语种": "需要合成的語種", ">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音": ">=3則使用對harvest音高識別的結果使用中值濾波,數值為濾波半徑,使用可以削弱啞音", "A模型权重": "A模型權重", "A模型路径": "A模型路徑", @@ -31,7 +208,6 @@ "保存名": "儲存名", "保存的文件名, 默认空为和源文件同名": "儲存的檔案名,預設空為與來源檔案同名", "保存的模型名不带后缀": "儲存的模型名不帶副檔名", - "保存频率save_every_epoch": "保存頻率save_every_epoch", "保护清辅音和呼吸声,防止电音撕裂等artifact,拉满0.5不开启,调低加大保护力度但可能降低索引效果": "保護清輔音和呼吸聲,防止電音撕裂等artifact,拉滿0.5不開啟,調低加大保護力度但可能降低索引效果", "修改": "修改", "修改模型信息(仅支持weights文件夹下提取的小模型文件)": "修改模型資訊(僅支援weights資料夾下提取的小模型檔案)", @@ -55,9 +231,7 @@ "常见问题解答": "常見問題解答", "常规设置": "一般設定", "开始音频转换": "開始音訊轉換", - "很遗憾您这没有能用的显卡来支持您训练": "很遗憾您这没有能用的显卡来支持您训练", "性能设置": "效能設定", - "总训练轮数total_epoch": "總訓練輪數total_epoch", "批量推理": "批量推理", "批量转换, 输入待转换音频文件夹, 或上传多个音频文件, 在指定文件夹(默认opt)下输出转换的音频. ": "批量轉換,輸入待轉換音頻資料夾,或上傳多個音頻檔案,在指定資料夾(默認opt)下輸出轉換的音頻。", "指定输出主人声文件夹": "指定输出主人声文件夹", @@ -68,11 +242,7 @@ "提取": "提取", "提取音高和处理数据使用的CPU进程数": "提取音高和處理數據使用的CPU進程數", "是": "是", - "是否仅保存最新的ckpt文件以节省硬盘空间": "是否僅保存最新的ckpt檔案以節省硬碟空間", - "是否在每次保存时间点将最终小模型保存至weights文件夹": "是否在每次保存時間點將最終小模型保存至weights檔夾", "是否缓存所有训练集至显存. 10min以下小数据可缓存以加速训练, 大数据缓存会炸显存也加不了多少速": "是否緩存所有訓練集至 VRAM。小於10分鐘的小數據可緩存以加速訓練,大數據緩存會爆 VRAM 也加不了多少速度", - "显卡信息": "顯示卡資訊", - "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.
如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.": "本軟體以MIT協議開源,作者不對軟體具備任何控制力,使用軟體者、傳播軟體導出的聲音者自負全責。
如不認可該條款,則不能使用或引用軟體包內任何程式碼和檔案。詳見根目錄使用需遵守的協議-LICENSE.txt。", "查看": "查看", "查看模型信息(仅支持weights文件夹下提取的小模型文件)": "查看模型資訊(僅支援weights資料夾下提取的小模型檔案)", "检索特征占比": "檢索特徵佔比", @@ -85,7 +255,6 @@ "模型版本型号": "模型版本型號", "模型融合, 可用于测试音色融合": "模型融合,可用於測試音色融合", "模型路径": "模型路徑", - "每张显卡的batch_size": "每张显卡的batch_size", "淡入淡出长度": "淡入淡出長度", "版本": "版本", "特征提取": "特徵提取", diff --git a/i18n/locale/zh_TW.json b/i18n/locale/zh_TW.json index 93aaff3ec..dae3b4e0c 100644 --- a/i18n/locale/zh_TW.json +++ b/i18n/locale/zh_TW.json @@ -1,4 +1,181 @@ { + "很遗憾您这没有能用的显卡来支持您训练": "很遺憾您這裡沒有可用的顯卡來支持您訓練", + "UVR5已开启": "UVR5已開啟", + "UVR5已关闭": "UVR5已關閉", + "输入文件夹路径": "輸入文件夾路徑", + "输出文件夹路径": "輸出文件夾路徑", + "ASR 模型": "ASR 模型", + "ASR 模型尺寸": "ASR 模型尺寸", + "ASR 语言设置": "ASR 語言設置", + "模型切换": "模型切換", + "是否开启dpo训练选项(实验性)": "是否開啟dpo訓練選項(實驗性)", + "开启无参考文本模式。不填参考文本亦相当于开启。": "開啟無參考文本模式。不填參考文本亦相當於開啟。", + "使用无参考文本模式时建议使用微调的GPT": "使用無參考文本模式時建議使用微調的GPT", + "后续将支持转音素、手工修改音素、语音合成分步执行。": "後續將支持轉音素、手工修改音素、語音合成分步執行。", + "gpt采样参数(无参考文本时不要太低):": "gpt採樣參數(無參考文本時不要太低):", + "按标点符号切": "按標點符號切", + "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.
如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.": "本軟體以MIT協議開源,作者不對軟體具備任何控制力,使用軟體者、傳播軟體導出的聲音者自負全責。
如不認可該條款,則不能使用或引用軟體包內任何代碼和文件。詳見根目錄LICENSE。", + "0-前置数据集获取工具": "0-前置數據集獲取工具", + "0a-UVR5人声伴奏分离&去混响去延迟工具": "0a-UVR5人聲伴奏分離&去混響去延遲工具", + "是否开启UVR5-WebUI": "是否開啟UVR5-WebUI", + "UVR5进程输出信息": "UVR5進程輸出資訊", + "0b-语音切分工具": "0b-語音切分工具", + ".list标注文件的路径": ".list標注文件的路徑", + "GPT模型列表": "GPT模型列表", + "SoVITS模型列表": "SoVITS模型列表", + "填切割后音频所在目录!读取的音频文件完整路径=该目录-拼接-list文件里波形对应的文件名(不是全路径)。": "填切割後音頻所在目錄!讀取的音頻文件完整路徑=該目錄-拼接-list文件裡波形對應的文件名(不是全路徑)。", + "音频自动切分输入路径,可文件可文件夹": "音頻自動切分輸入路徑,可文件可文件夾", + "切分后的子音频的输出根目录": "切分後的子音頻的輸出根目錄", + "怎么切": "怎麼切", + "不切": "不切", + "凑四句一切": "湊四句一切", + "按英文句号.切": "按英文句號.切", + "threshold:音量小于这个值视作静音的备选切割点": "threshold:音量小於這個值視作靜音的備選切割點", + "min_duration:每段最短多长,如果第一段太短一直和后面段连起来直到超过这个值": "min_duration:每段最短多長,如果第一段太短一直和後面段連起來直到超過這個值", + "max_duration:每段最长多长": "max_duration:每段最長多長", + "min_interval:最短切割间隔": "min_interval:最短切割間隔", + "hop_size:怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好)": "hop_size:怎麼算音量曲線,越小精度越大計算量越高(不是精度越大效果越好)", + "max_sil_kept:切完后静音最多留多长": "max_sil_kept:切完後靜音最多留多長", + "对于过短音频的处理方法,勾选则合并,不勾选则抛弃": "對於過短音頻的處理方法,勾選則合併,不勾選則拋棄", + "目标响度": "目標響度", + "峰值响度": "峰值響度", + "是否匹配响度": "是否匹配響度", + "开启语音切割": "開啟語音切割", + "终止语音切割": "終止語音切割", + "max:归一化后最大值多少": "max:歸一化後最大值多少", + "alpha_mix:混多少比例归一化后音频进来": "alpha_mix:混多少比例歸一化後音頻進來", + "切割使用的进程数": "切割使用的進程數", + "语音切割进程输出信息": "語音切割進程輸出資訊", + "0c-中文批量离线ASR工具": "0c-中文批量離線ASR工具", + "开启离线批量ASR": "開啟離線批量ASR", + "终止ASR进程": "終止ASR進程", + "批量ASR(中文only)输入文件夹路径": "批量ASR(中文only)輸入文件夾路徑", + "ASR进程输出信息": "ASR進程輸出資訊", + "0d-语音文本校对标注工具": "0d-語音文本校對標注工具", + "是否开启打标WebUI": "是否開啟打標WebUI", + "打标数据标注文件路径": "打標數據標注文件路徑", + "打标工具进程输出信息": "打標工具進程輸出資訊", + "1-GPT-SoVITS-TTS": "1-GPT-SoVITS-TTS", + "*实验/模型名": "*實驗/模型名", + "显卡信息": "顯卡資訊", + "预训练的SoVITS-G模型路径": "預訓練的SoVITS-G模型路徑", + "预训练的SoVITS-D模型路径": "預訓練的SoVITS-D模型路徑", + "预训练的GPT模型路径": "預訓練的GPT模型路徑", + "1A-训练集格式化工具": "1A-訓練集格式化工具", + "输出logs/实验名目录下应有23456开头的文件和文件夹": "輸出logs/實驗名目錄下應有23456開頭的文件和文件夾", + "*文本标注文件": "*文本標注文件", + "*训练集音频文件目录": "*訓練集音頻文件目錄", + "训练集音频文件目录 拼接 list文件里波形对应的文件名。": "訓練集音頻文件目錄 拼接 list文件裡波形對應的文件名。", + "1Aa-文本内容": "1Aa-文本內容", + "GPU卡号以-分割,每个卡号一个进程": "GPU卡號以-分割,每個卡號一個進程", + "预训练的中文BERT模型路径": "預訓練的中文BERT模型路徑", + "开启文本获取": "開啟文本獲取", + "终止文本获取进程": "終止文本獲取進程", + "文本进程输出信息": "文本進程輸出資訊", + "1Ab-SSL自监督特征提取": "1Ab-SSL自監督特徵提取", + "预训练的SSL模型路径": "預訓練的SSL模型路徑", + "开启SSL提取": "開啟SSL提取", + "终止SSL提取进程": "終止SSL提取進程", + "SSL进程输出信息": "SSL進程輸出資訊", + "1Ac-语义token提取": "1Ac-語義token提取", + "开启语义token提取": "開啟語義token提取", + "终止语义token提取进程": "終止語義token提取進程", + "语义token提取进程输出信息": "語義token提取進程輸出資訊", + "1Aabc-训练集格式化一键三连": "1Aabc-訓練集格式化一鍵三連", + "开启一键三连": "開啟一鍵三連", + "终止一键三连": "終止一鍵三連", + "一键三连进程输出信息": "一鍵三連進程輸出資訊", + "1B-微调训练": "1B-微調訓練", + "1Ba-SoVITS训练。用于分享的模型文件输出在SoVITS_weights下。": "1Ba-SoVITS訓練。用於分享的模型文件輸出在SoVITS_weights下。", + "每张显卡的batch_size": "每張顯卡的batch_size", + "总训练轮数total_epoch,不建议太高": "總訓練輪數total_epoch,不建議太高", + "文本模块学习率权重": "文本模塊學習率權重", + "保存频率save_every_epoch": "保存頻率save_every_epoch", + "是否仅保存最新的ckpt文件以节省硬盘空间": "是否僅保存最新的ckpt文件以節省硬盤空間", + "是否在每次保存时间点将最终小模型保存至weights文件夹": "是否在每次保存時間點將最終小模型保存至weights文件夾", + "开启SoVITS训练": "開啟SoVITS訓練", + "终止SoVITS训练": "終止SoVITS訓練", + "SoVITS训练进程输出信息": "SoVITS訓練進程輸出資訊", + "1Bb-GPT训练。用于分享的模型文件输出在GPT_weights下。": "1Bb-GPT訓練。用於分享的模型文件輸出在GPT_weights下。", + "总训练轮数total_epoch": "總訓練輪數total_epoch", + "开启GPT训练": "開啟GPT訓練", + "终止GPT训练": "終止GPT訓練", + "GPT训练进程输出信息": "GPT訓練進程輸出資訊", + "1C-推理": "1C-推理", + "选择训练完存放在SoVITS_weights和GPT_weights下的模型。默认的一个是底模,体验5秒Zero Shot TTS用。": "選擇訓練完存放在SoVITS_weights和GPT_weights下的模型。默認的一個是底模,體驗5秒Zero Shot TTS用。", + "*GPT模型列表": "*GPT模型列表", + "*SoVITS模型列表": "*SoVITS模型列表", + "GPU卡号,只能填1个整数": "GPU卡號,只能填1個整數", + "刷新模型路径": "刷新模型路徑", + "是否开启TTS推理WebUI": "是否開啟TTS推理WebUI", + "TTS推理WebUI进程输出信息": "TTS推理WebUI進程輸出資訊", + "2-GPT-SoVITS-变声": "2-GPT-SoVITS-變聲", + "施工中,请静候佳音": "施工中,請靜候佳音", + "参考音频在3~10秒范围外,请更换!": "參考音頻在3~10秒範圍外,請更換!", + "请上传3~10秒内参考音频,超过会报错!": "請上傳3~10秒內參考音頻,超過會報錯!", + "TTS推理进程已开启": "TTS推理進程已開啟", + "TTS推理进程已关闭": "TTS推理進程已關閉", + "打标工具WebUI已开启": "打標工具WebUI已開啟", + "打标工具WebUI已关闭": "打標工具WebUI已關閉", + "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. 如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.": "本軟體以MIT協議開源, 作者不對軟體具備任何控制力, 使用軟體者、傳播軟體導出的聲音者自負全責. 如不認可該條款, 則不能使用或引用軟體包內任何代碼和文件. 詳見根目錄LICENSE.", + "*请上传并填写参考信息": "*請上傳並填寫參考資訊", + "*请填写需要合成的目标文本。中英混合选中文,日英混合选日文,中日混合暂不支持,非目标语言文本自动遗弃。": "*請填寫需要合成的目標文本。中英混合選中文,日英混合選日文,中日混合暫不支持,非目標語言文本自動遺棄。", + "ASR任务开启:%s": "ASR任務開啟:%s", + "GPT训练完成": "GPT訓練完成", + "GPT训练开始:%s": "GPT訓練開始:%s", + "SSL提取进程执行中": "SSL提取進程執行中", + "SSL提取进程结束": "SSL提取進程結束", + "SoVITS训练完成": "SoVITS訓練完成", + "SoVITS训练开始:%s": "SoVITS訓練開始:%s", + "一键三连中途报错": "一鍵三連中途報錯", + "一键三连进程结束": "一鍵三連進程結束", + "中文": "中文", + "凑50字一切": "湊50字一切", + "凑五句一切": "湊五句一切", + "切分后文本": "切分後文本", + "切割执行中": "切割執行中", + "切割结束": "切割結束", + "参考音频的文本": "參考音頻的文本", + "参考音频的语种": "參考音頻的語種", + "合成语音": "合成語音", + "后续将支持混合语种编码文本输入。": "後續將支持混合語種編碼文本輸入。", + "已有正在进行的ASR任务,需先终止才能开启下一次任务": "已有正在進行的ASR任務,需先終止才能開啟下一次任務", + "已有正在进行的GPT训练任务,需先终止才能开启下一次任务": "已有正在進行的GPT訓練任務,需先終止才能開啟下一次任務", + "已有正在进行的SSL提取任务,需先终止才能开启下一次任务": "已有正在進行的SSL提取任務,需先終止才能開啟下一次任務", + "已有正在进行的SoVITS训练任务,需先终止才能开启下一次任务": "已有正在進行的SoVITS訓練任務,需先終止才能開啟下一次任務", + "已有正在进行的一键三连任务,需先终止才能开启下一次任务": "已有正在進行的一鍵三連任務,需先終止才能開啟下一次任務", + "已有正在进行的切割任务,需先终止才能开启下一次任务": "已有正在進行的切割任務,需先終止才能開啟下一次任務", + "已有正在进行的文本任务,需先终止才能开启下一次任务": "已有正在進行的文本任務,需先終止才能開啟下一次任務", + "已有正在进行的语义token提取任务,需先终止才能开启下一次任务": "已有正在進行的語義token提取任務,需先終止才能開啟下一次任務", + "已终止ASR进程": "已終止ASR進程", + "已终止GPT训练": "已終止GPT訓練", + "已终止SoVITS训练": "已終止SoVITS訓練", + "已终止所有1a进程": "已終止所有1a進程", + "已终止所有1b进程": "已終止所有1b進程", + "已终止所有一键三连进程": "已終止所有一鍵三連進程", + "已终止所有切割进程": "已終止所有切割進程", + "已终止所有语义token进程": "已終止所有語義token進程", + "按中文句号。切": "按中文句號。切", + "文本切分工具。太长的文本合成出来效果不一定好,所以太长建议先切。合成会根据文本的换行分开合成再拼起来。": "文本切分工具。太長的文本合成出來效果不一定好,所以太長建議先切。合成會根據文本的換行分開合成再拼起來。", + "文本进程执行中": "文本進程執行中", + "文本进程结束": "文本進程結束", + "日文": "日文", + "英文": "英文", + "语义token提取进程执行中": "語義token提取進程執行中", + "语义token提取进程结束": "語義token提取進程結束", + "请上传参考音频": "請上傳參考音頻", + "输入路径不存在": "輸入路徑不存在", + "输入路径存在但既不是文件也不是文件夹": "輸入路徑存在但既不是文件也不是文件夾", + "输出的语音": "輸出的語音", + "进度:1a-done": "進度:1a-done", + "进度:1a-done, 1b-ing": "進度:1a-done, 1b-ing", + "进度:1a-ing": "進度:1a-ing", + "进度:1a1b-done": "進度:1a1b-done", + "进度:1a1b-done, 1cing": "進度:1a1b-done, 1cing", + "进度:all-done": "進度:all-done", + "需要合成的切分前文本": "需要合成的切分前文本", + "需要合成的文本": "需要合成的文本", + "需要合成的语种": "需要合成的語種", ">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音": ">=3則使用對harvest音高識別的結果使用中值濾波,數值為濾波半徑,使用可以削弱啞音", "A模型权重": "A模型權重", "A模型路径": "A模型路徑", @@ -31,7 +208,6 @@ "保存名": "儲存名", "保存的文件名, 默认空为和源文件同名": "儲存的檔案名,預設空為與來源檔案同名", "保存的模型名不带后缀": "儲存的模型名不帶副檔名", - "保存频率save_every_epoch": "保存頻率save_every_epoch", "保护清辅音和呼吸声,防止电音撕裂等artifact,拉满0.5不开启,调低加大保护力度但可能降低索引效果": "保護清輔音和呼吸聲,防止電音撕裂等artifact,拉滿0.5不開啟,調低加大保護力度但可能降低索引效果", "修改": "修改", "修改模型信息(仅支持weights文件夹下提取的小模型文件)": "修改模型資訊(僅支援weights資料夾下提取的小模型檔案)", @@ -55,9 +231,7 @@ "常见问题解答": "常見問題解答", "常规设置": "一般設定", "开始音频转换": "開始音訊轉換", - "很遗憾您这没有能用的显卡来支持您训练": "很遗憾您这没有能用的显卡来支持您训练", "性能设置": "效能設定", - "总训练轮数total_epoch": "總訓練輪數total_epoch", "批量推理": "批量推理", "批量转换, 输入待转换音频文件夹, 或上传多个音频文件, 在指定文件夹(默认opt)下输出转换的音频. ": "批量轉換,輸入待轉換音頻資料夾,或上傳多個音頻檔案,在指定資料夾(默認opt)下輸出轉換的音頻。", "指定输出主人声文件夹": "指定输出主人声文件夹", @@ -68,11 +242,7 @@ "提取": "提取", "提取音高和处理数据使用的CPU进程数": "提取音高和處理數據使用的CPU進程數", "是": "是", - "是否仅保存最新的ckpt文件以节省硬盘空间": "是否僅保存最新的ckpt檔案以節省硬碟空間", - "是否在每次保存时间点将最终小模型保存至weights文件夹": "是否在每次保存時間點將最終小模型保存至weights檔夾", "是否缓存所有训练集至显存. 10min以下小数据可缓存以加速训练, 大数据缓存会炸显存也加不了多少速": "是否緩存所有訓練集至 VRAM。小於10分鐘的小數據可緩存以加速訓練,大數據緩存會爆 VRAM 也加不了多少速度", - "显卡信息": "顯示卡資訊", - "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.
如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.": "本軟體以MIT協議開源,作者不對軟體具備任何控制力,使用軟體者、傳播軟體導出的聲音者自負全責。
如不認可該條款,則不能使用或引用軟體包內任何程式碼和檔案。詳見根目錄使用需遵守的協議-LICENSE.txt。", "查看": "查看", "查看模型信息(仅支持weights文件夹下提取的小模型文件)": "查看模型資訊(僅支援weights資料夾下提取的小模型檔案)", "检索特征占比": "檢索特徵佔比", @@ -85,7 +255,6 @@ "模型版本型号": "模型版本型號", "模型融合, 可用于测试音色融合": "模型融合,可用於測試音色融合", "模型路径": "模型路徑", - "每张显卡的batch_size": "每张显卡的batch_size", "淡入淡出长度": "淡入淡出長度", "版本": "版本", "特征提取": "特徵提取", diff --git a/webui.py b/webui.py index a44099883..a84f1dd9a 100644 --- a/webui.py +++ b/webui.py @@ -739,7 +739,7 @@ def close1abc(): hop_size=gr.Textbox(label=i18n("hop_size:怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好)"),value="10") if_merge_short = gr.Checkbox(label=i18n("对于过短音频的处理方法,勾选则合并,不勾选则抛弃"),show_label=True) with gr.Row(): - loudness=gr.Textbox(label=i18n("目标响度"),value="-23") + loudness=gr.Textbox(label=i18n("目标响度"),value="-18") peak=gr.Textbox(label=i18n("峰值响度"),value="-1") if_loudness_norm = gr.Checkbox(label=i18n("是否匹配响度"),show_label=True,value=True) num_worker=gr.Slider(minimum=1,maximum=n_cpu,step=1,label=i18n("切割使用的进程数"),value=4,interactive=True) From 87a79974549117905aead496bbd10a6bdd74310c Mon Sep 17 00:00:00 2001 From: XXXXRT666 Date: Wed, 10 Apr 2024 23:03:22 +0100 Subject: [PATCH 6/9] a small change for i18n --- i18n/locale/en_US.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/i18n/locale/en_US.json b/i18n/locale/en_US.json index 004c54120..dbdc61997 100644 --- a/i18n/locale/en_US.json +++ b/i18n/locale/en_US.json @@ -31,7 +31,7 @@ "凑四句一切": "Slice once every 4 sentences", "按英文句号.切": "Slice by English punct", "threshold:音量小于这个值视作静音的备选切割点": "Noise gate threshold (loudness below this value will be treated as noise", - "min_duration:每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值": "min_duration: The minimum length for each segment. If the first segment is too short, it will be combined with subsequent segments until it exceeds this value.", + "min_duration:每段最短多长,如果第一段太短一直和后面段连起来直到超过这个值": "min_duration: The minimum length for each segment. If the first segment is too short, it will be combined with subsequent segments until it exceeds this value.", "max_duration:每段最长多长": "Maxium duration", "min_interval:最短切割间隔": "Minumum interval for audio cutting", "hop_size:怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好)": "hop_size: FO hop size, the smaller the value, the higher the accuracy)", From 326f2577f02383046f281e00b021477f41b6ec53 Mon Sep 17 00:00:00 2001 From: XXXXRT666 Date: Wed, 10 Apr 2024 23:13:55 +0100 Subject: [PATCH 7/9] change the default loudness --- webui.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/webui.py b/webui.py index a84f1dd9a..3c8c057b6 100644 --- a/webui.py +++ b/webui.py @@ -737,9 +737,9 @@ def close1abc(): min_interval=gr.Textbox(label=i18n("min_interval:最短切割间隔"),value="0.3") max_sil_kept=gr.Textbox(label=i18n("max_sil_kept:切完后静音最多留多长"),value="0.5") hop_size=gr.Textbox(label=i18n("hop_size:怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好)"),value="10") - if_merge_short = gr.Checkbox(label=i18n("对于过短音频的处理方法,勾选则合并,不勾选则抛弃"),show_label=True) + if_merge_short = gr.Checkbox(label=i18n("对于过短音频的处理方法,勾选则合并,不勾选则抛弃"),show_label=True,value=True) with gr.Row(): - loudness=gr.Textbox(label=i18n("目标响度"),value="-18") + loudness=gr.Textbox(label=i18n("目标响度"),value="-21") peak=gr.Textbox(label=i18n("峰值响度"),value="-1") if_loudness_norm = gr.Checkbox(label=i18n("是否匹配响度"),show_label=True,value=True) num_worker=gr.Slider(minimum=1,maximum=n_cpu,step=1,label=i18n("切割使用的进程数"),value=4,interactive=True) From 9907fc1152904d4efaa710c2592a61644ee185b4 Mon Sep 17 00:00:00 2001 From: XXXXRT666 Date: Thu, 11 Apr 2024 01:16:14 +0100 Subject: [PATCH 8/9] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E4=BA=86=E5=88=87?= =?UTF-8?q?=E5=AE=8C=E6=BC=8F=E9=9F=B3=E9=A2=91=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tools/slice_audio.py | 52 +++++++++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/tools/slice_audio.py b/tools/slice_audio.py index fb6ef413b..5451899eb 100644 --- a/tools/slice_audio.py +++ b/tools/slice_audio.py @@ -424,22 +424,48 @@ def slice(self, waveform): return chunks -def merge_short_chunks(chunks, max_duration, rate): - merged_chunks = [] - buffer, length = [], 0 +# def merge_short_chunks(chunks, max_duration, rate): +# merged_chunks = [] +# buffer, length = [], 0 +# lengths = [len(chunk)/rate for chunk in chunks] +# print(lengths) +# for chunk in chunks: +# if length + len(chunk) > max_duration * rate and len(buffer) > 0: +# print(len(buffer)) +# merged_chunks.append(np.concatenate(buffer)) +# buffer, length = [], 0 +# else: +# buffer.append(chunk) +# length += len(chunk) + + +# if len(buffer) > 0: +# print(len(buffer)) +# merged_chunks.append(np.concatenate(buffer)) + +# print([len(chunk)/rate for chunk in merged_chunks]) + +# return merged_chunks + - for chunk in chunks: - if length + len(chunk) > max_duration * rate and len(buffer) > 0: - merged_chunks.append(np.concatenate(buffer)) - buffer, length = [], 0 +def merge_short_chunks(chunks, max_duration, rate): + if not chunks: + return [] + + max_length = int(max_duration * rate) # 确保 max_length 是整数 + merged = [] + current = chunks[0] # 开始时 current 是第一个音频块 + for chunk in chunks[1:]: # 从第二个音频块开始遍历 + if len(current) + len(chunk) <= max_length: + current = np.concatenate((current, np.zeros(int(0.1*rate)), chunk)) # 在合并前后加入一个0.1s作为间隔 else: - buffer.append(chunk) - length += len(chunk) + merged.append(current) + current = chunk # 开始新的合并块 + + merged.append(current) # 添加最后一个块 + return merged - if len(buffer) > 0: - merged_chunks.append(np.concatenate(buffer)) - return merged_chunks @@ -472,7 +498,7 @@ def merge_short_chunks(chunks, max_duration, rate): hop_size = float(args.hop_size) max_sil_kept = float(args.max_sil_kept) num_worker = int(args.num_worker) -merge_short = bool(args.merge_short) +merge_short = eval(args.merge_short) if __name__ == "__main__": slice_audio_v2_(input_path, output_dir, num_worker, min_duration, max_duration, min_interval, threshold, hop_size, max_sil_kept,merge_short) From 0fe0f973a744782ebcfa71f485a9e126273c1d17 Mon Sep 17 00:00:00 2001 From: XXXXRT666 Date: Thu, 11 Apr 2024 01:19:25 +0100 Subject: [PATCH 9/9] . --- tools/slice_audio.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/slice_audio.py b/tools/slice_audio.py index 5451899eb..219bca3e5 100644 --- a/tools/slice_audio.py +++ b/tools/slice_audio.py @@ -452,15 +452,15 @@ def merge_short_chunks(chunks, max_duration, rate): if not chunks: return [] - max_length = int(max_duration * rate) # 确保 max_length 是整数 + max_length = int(max_duration * rate) merged = [] - current = chunks[0] # 开始时 current 是第一个音频块 - for chunk in chunks[1:]: # 从第二个音频块开始遍历 + current = chunks[0] + for chunk in chunks[1:]: if len(current) + len(chunk) <= max_length: current = np.concatenate((current, np.zeros(int(0.1*rate)), chunk)) # 在合并前后加入一个0.1s作为间隔 else: merged.append(current) - current = chunk # 开始新的合并块 + current = chunk merged.append(current) # 添加最后一个块 return merged