-
Notifications
You must be signed in to change notification settings - Fork 477
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
3ba9a49
commit a69d0a9
Showing
23 changed files
with
400 additions
and
108 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,3 @@ | ||
module non-streaming-decode-files | ||
|
||
go 1.12 | ||
|
||
require ( | ||
github.com/k2-fsa/sherpa-onnx-go v1.7.12-alpha | ||
github.com/spf13/pflag v1.0.5 | ||
github.com/youpy/go-wav v0.3.2 | ||
) |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
module non-streaming-tts | ||
|
||
go 1.12 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
package main | ||
|
||
import ( | ||
sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx" | ||
flag "github.com/spf13/pflag" | ||
"log" | ||
) | ||
|
||
func main() { | ||
log.SetFlags(log.LstdFlags | log.Lmicroseconds) | ||
|
||
config := sherpa.OfflineTtsConfig{} | ||
sid := 0 | ||
filename := "./generated.wav" | ||
|
||
flag.StringVar(&config.Model.Vits.Model, "vits-model", "", "Path to the vits ONNX model") | ||
flag.StringVar(&config.Model.Vits.Lexicon, "vits-lexicon", "", "Path to lexicon.txt") | ||
flag.StringVar(&config.Model.Vits.Tokens, "vits-tokens", "", "Path to tokens.txt") | ||
|
||
flag.Float32Var(&config.Model.Vits.NoiseScale, "vits-noise-scale", 0.667, "noise_scale for VITS") | ||
flag.Float32Var(&config.Model.Vits.NoiseScaleW, "vits-noise-scale-w", 0.8, "noise_scale_w for VITS") | ||
flag.Float32Var(&config.Model.Vits.LengthScale, "vits-length-scale", 1.0, "length_scale for VITS. small -> faster in speech speed; large -> slower") | ||
|
||
flag.IntVar(&config.Model.NumThreads, "num-threads", 1, "Number of threads for computing") | ||
flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message") | ||
flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use") | ||
|
||
flag.IntVar(&sid, "sid", 0, "Speaker ID. Used only for multi-speaker models") | ||
flag.StringVar(&filename, "output-filename", "./generated.wav", "Filename to save the generated audio") | ||
|
||
flag.Parse() | ||
|
||
if len(flag.Args()) != 1 { | ||
log.Fatalf("Please provide the text to generate audios") | ||
} | ||
|
||
text := flag.Arg(0) | ||
|
||
log.Println("Input text:", text) | ||
log.Println("Speaker ID:", sid) | ||
log.Println("Output filename:", filename) | ||
|
||
log.Println("Initializing model (may take several seconds)") | ||
|
||
tts := sherpa.NewOfflineTts(&config) | ||
defer sherpa.DeleteOfflineTts(tts) | ||
|
||
log.Println("Model created!") | ||
|
||
log.Println("Start generating!") | ||
|
||
audio := tts.Generate(text, sid) | ||
|
||
log.Println("Done!") | ||
|
||
ok := audio.Save(filename) | ||
if ok != 1 { | ||
log.Fatalf("Failed to write", filename) | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
#!/usr/bin/env bash | ||
|
||
# please refer to | ||
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#ljspeech-english-single-speaker | ||
# to download the model before you run this script | ||
|
||
./non-streaming-tts \ | ||
--vits-model=./vits-ljs/vits-ljs.onnx \ | ||
--vits-lexicon=./vits-ljs/lexicon.txt \ | ||
--vits-tokens=./vits-ljs/tokens.txt \ | ||
--sid=0 \ | ||
--debug=1 \ | ||
--output-filename=./vits-ljs.wav \ | ||
"Liliana, the most beautiful and lovely assistant of our team!" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#!/usr/bin/env bash | ||
|
||
# please refer to | ||
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vctk-english-multi-speaker-109-speakers | ||
# to download the model before you run this script | ||
|
||
for sid in 0 10 108; do | ||
./non-streaming-tts \ | ||
--vits-model=./vits-vctk/vits-vctk.onnx \ | ||
--vits-lexicon=./vits-vctk/lexicon.txt \ | ||
--vits-tokens=./vits-vctk/tokens.txt \ | ||
--sid=0 \ | ||
--debug=1 \ | ||
--output-filename=./kennedy-$sid.wav \ | ||
'Ask not what your country can do for you; ask what you can do for your country.' | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#!/usr/bin/env bash | ||
|
||
# please refer to | ||
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#aishell3-chinese-multi-speaker-174-speakers | ||
# to download the model before you run this script | ||
|
||
for sid in 10 33 99; do | ||
./non-streaming-tts \ | ||
--vits-model=./vits-zh-aishell3/vits-aishell3.onnx \ | ||
--vits-lexicon=./vits-zh-aishell3/lexicon.txt \ | ||
--vits-tokens=./vits-zh-aishell3/tokens.txt \ | ||
--sid=10 \ | ||
--debug=1 \ | ||
--output-filename=./liliana-$sid.wav \ | ||
"林美丽最美丽、最漂亮、最可爱!" | ||
done |
6 changes: 0 additions & 6 deletions
6
go-api-examples/real-time-speech-recognition-from-microphone/go.mod
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,3 @@ | ||
module real-time-speech-recognition-from-microphone | ||
|
||
go 1.12 | ||
|
||
require ( | ||
github.com/gordonklaus/portaudio v0.0.0-20230709114228-aafa478834f5 | ||
github.com/k2-fsa/sherpa-onnx-go v1.7.12-alpha | ||
github.com/spf13/pflag v1.0.5 | ||
) |
12 changes: 0 additions & 12 deletions
12
go-api-examples/real-time-speech-recognition-from-microphone/go.sum
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,3 @@ | ||
module streaming-decode-files | ||
|
||
go 1.12 | ||
|
||
require ( | ||
github.com/k2-fsa/sherpa-onnx-go v1.7.12-alpha | ||
github.com/spf13/pflag v1.0.5 | ||
github.com/youpy/go-wav v0.3.2 | ||
) |
Oops, something went wrong.