Skip to content

Commit

Permalink
Add Go API for TTS (#377)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Oct 20, 2023
1 parent 3ba9a49 commit a69d0a9
Show file tree
Hide file tree
Showing 23 changed files with 400 additions and 108 deletions.
122 changes: 121 additions & 1 deletion .github/workflows/test-go-package.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
fetch-depth: 0
- uses: actions/setup-go@v4
with:
go-version: '>=1.20'
go-version: '>=1.12'

- name: Display go version
shell: bash
Expand All @@ -66,6 +66,121 @@ jobs:
run: |
gcc --version
- name: Test non-streaming TTS (Linux/macOS)
if: matrix.os != 'windows-latest'
shell: bash
run: |
mkdir tts-waves
cd go-api-examples/non-streaming-tts
ls -lh
go mod tidy
cat go.mod
go build
ls -lh
git lfs install
echo "Test vits-ljs"
git clone https://huggingface.co/csukuangfj/vits-ljs
./run-vits-ljs.sh
rm -rf vits-ljs
echo "Test vits-vctk"
git clone https://huggingface.co/csukuangfj/vits-vctk
./run-vits-vctk.sh
rm -rf vits-vctk
echo "Test vits-zh-aishell3"
git clone https://huggingface.co/csukuangfj/vits-zh-aishell3
./run-vits-zh-aishell3.sh
rm -rf vits-zh-aishell3
ls -lh *.wav
cp *.wav ../../tts-waves/
- name: Test non-streaming TTS (Win64)
if: matrix.os == 'windows-latest' && matrix.arch == 'x64'
shell: bash
run: |
mkdir tts-waves
cd go-api-examples/non-streaming-tts
ls -lh
go mod tidy
cat go.mod
go build
ls -lh
echo $PWD
ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/*
cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll .
ls -lh
git lfs install
echo "Test vits-ljs"
git clone https://huggingface.co/csukuangfj/vits-ljs
./run-vits-ljs.sh
rm -rf vits-ljs
echo "Test vits-vctk"
git clone https://huggingface.co/csukuangfj/vits-vctk
./run-vits-vctk.sh
rm -rf vits-vctk
echo "Test vits-zh-aishell3"
git clone https://huggingface.co/csukuangfj/vits-zh-aishell3
./run-vits-zh-aishell3.sh
rm -rf vits-zh-aishell3
ls -lh *.wav
cp *.wav ../../tts-waves/
- name: Test non-streaming TTS (Win32)
if: matrix.os == 'windows-latest' && matrix.arch == 'x86'
shell: bash
run: |
cd go-api-examples/non-streaming-tts
ls -lh
go mod tidy
cat go.mod
ls -lh
go env GOARCH
go env
echo "------------------------------"
go env -w GOARCH=386
go env -w CGO_ENABLED=1
go env
go clean
go build
echo $PWD
ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll .
ls -lh
git lfs install
echo "Test vits-ljs"
git clone https://huggingface.co/csukuangfj/vits-ljs
./run-vits-ljs.sh
rm -rf vits-ljs
echo "Test vits-vctk"
git clone https://huggingface.co/csukuangfj/vits-vctk
./run-vits-vctk.sh
rm -rf vits-vctk
echo "Test vits-zh-aishell3"
git clone https://huggingface.co/csukuangfj/vits-zh-aishell3
./run-vits-zh-aishell3.sh
rm -rf vits-zh-aishell3
ls -lh *.wav
cp *.wav ../../tts-waves/
- name: Test non-streaming decoding files (Linux/macOS)
if: matrix.os != 'windows-latest'
shell: bash
Expand Down Expand Up @@ -298,3 +413,8 @@ jobs:
git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-paraformer-bilingual-zh-en
./run-paraformer.sh
rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en
- uses: actions/upload-artifact@v3
with:
name: tts-waves
path: tts-waves
36 changes: 36 additions & 0 deletions .github/workflows/test-go.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,42 @@ jobs:
go mod tidy
go build
- name: Test non-streaming TTS (macOS)
shell: bash
run: |
mkdir tts-waves
cd scripts/go/_internal/non-streaming-tts/
ls -lh
go mod tidy
cat go.mod
go build
ls -lh
git lfs install
echo "Test vits-ljs"
git clone https://huggingface.co/csukuangfj/vits-ljs
./run-vits-ljs.sh
rm -rf vits-ljs
echo "Test vits-vctk"
git clone https://huggingface.co/csukuangfj/vits-vctk
./run-vits-vctk.sh
rm -rf vits-vctk
echo "Test vits-zh-aishell3"
git clone https://huggingface.co/csukuangfj/vits-zh-aishell3
./run-vits-zh-aishell3.sh
rm -rf vits-zh-aishell3
cp *.wav ../../../../tts-waves/
- uses: actions/upload-artifact@v3
with:
name: tts-waves
path: tts-waves

- name: Test non-streaming decoding files (macOS)
shell: bash
run: |
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
project(sherpa-onnx)

set(SHERPA_ONNX_VERSION "1.8.3")
set(SHERPA_ONNX_VERSION "1.8.4")

# Disable warning about
#
Expand Down
2 changes: 1 addition & 1 deletion c-api-examples/offline-tts-c-api.c
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ int32_t main(int32_t argc, char *argv[]) {
const SherpaOnnxGeneratedAudio *audio =
SherpaOnnxOfflineTtsGenerate(tts, text, sid);

SherpaOnnxDestroyOfflineWriteWave(audio, filename);
SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename);

SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
SherpaOnnxDestroyOfflineTts(tts);
Expand Down
6 changes: 0 additions & 6 deletions go-api-examples/non-streaming-decode-files/go.mod
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
module non-streaming-decode-files

go 1.12

require (
github.com/k2-fsa/sherpa-onnx-go v1.7.12-alpha
github.com/spf13/pflag v1.0.5
github.com/youpy/go-wav v0.3.2
)
35 changes: 0 additions & 35 deletions go-api-examples/non-streaming-decode-files/go.sum

This file was deleted.

3 changes: 3 additions & 0 deletions go-api-examples/non-streaming-tts/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
module non-streaming-tts

go 1.12
61 changes: 61 additions & 0 deletions go-api-examples/non-streaming-tts/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package main

import (
sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
flag "github.com/spf13/pflag"
"log"
)

func main() {
log.SetFlags(log.LstdFlags | log.Lmicroseconds)

config := sherpa.OfflineTtsConfig{}
sid := 0
filename := "./generated.wav"

flag.StringVar(&config.Model.Vits.Model, "vits-model", "", "Path to the vits ONNX model")
flag.StringVar(&config.Model.Vits.Lexicon, "vits-lexicon", "", "Path to lexicon.txt")
flag.StringVar(&config.Model.Vits.Tokens, "vits-tokens", "", "Path to tokens.txt")

flag.Float32Var(&config.Model.Vits.NoiseScale, "vits-noise-scale", 0.667, "noise_scale for VITS")
flag.Float32Var(&config.Model.Vits.NoiseScaleW, "vits-noise-scale-w", 0.8, "noise_scale_w for VITS")
flag.Float32Var(&config.Model.Vits.LengthScale, "vits-length-scale", 1.0, "length_scale for VITS. small -> faster in speech speed; large -> slower")

flag.IntVar(&config.Model.NumThreads, "num-threads", 1, "Number of threads for computing")
flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message")
flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use")

flag.IntVar(&sid, "sid", 0, "Speaker ID. Used only for multi-speaker models")
flag.StringVar(&filename, "output-filename", "./generated.wav", "Filename to save the generated audio")

flag.Parse()

if len(flag.Args()) != 1 {
log.Fatalf("Please provide the text to generate audios")
}

text := flag.Arg(0)

log.Println("Input text:", text)
log.Println("Speaker ID:", sid)
log.Println("Output filename:", filename)

log.Println("Initializing model (may take several seconds)")

tts := sherpa.NewOfflineTts(&config)
defer sherpa.DeleteOfflineTts(tts)

log.Println("Model created!")

log.Println("Start generating!")

audio := tts.Generate(text, sid)

log.Println("Done!")

ok := audio.Save(filename)
if ok != 1 {
log.Fatalf("Failed to write", filename)
}

}
14 changes: 14 additions & 0 deletions go-api-examples/non-streaming-tts/run-vits-ljs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/usr/bin/env bash

# please refer to
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#ljspeech-english-single-speaker
# to download the model before you run this script

./non-streaming-tts \
--vits-model=./vits-ljs/vits-ljs.onnx \
--vits-lexicon=./vits-ljs/lexicon.txt \
--vits-tokens=./vits-ljs/tokens.txt \
--sid=0 \
--debug=1 \
--output-filename=./vits-ljs.wav \
"Liliana, the most beautiful and lovely assistant of our team!"
16 changes: 16 additions & 0 deletions go-api-examples/non-streaming-tts/run-vits-vctk.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/usr/bin/env bash

# please refer to
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vctk-english-multi-speaker-109-speakers
# to download the model before you run this script

for sid in 0 10 108; do
./non-streaming-tts \
--vits-model=./vits-vctk/vits-vctk.onnx \
--vits-lexicon=./vits-vctk/lexicon.txt \
--vits-tokens=./vits-vctk/tokens.txt \
--sid=0 \
--debug=1 \
--output-filename=./kennedy-$sid.wav \
'Ask not what your country can do for you; ask what you can do for your country.'
done
16 changes: 16 additions & 0 deletions go-api-examples/non-streaming-tts/run-vits-zh-aishell3.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/usr/bin/env bash

# please refer to
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#aishell3-chinese-multi-speaker-174-speakers
# to download the model before you run this script

for sid in 10 33 99; do
./non-streaming-tts \
--vits-model=./vits-zh-aishell3/vits-aishell3.onnx \
--vits-lexicon=./vits-zh-aishell3/lexicon.txt \
--vits-tokens=./vits-zh-aishell3/tokens.txt \
--sid=10 \
--debug=1 \
--output-filename=./liliana-$sid.wav \
"林美丽最美丽、最漂亮、最可爱!"
done
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
module real-time-speech-recognition-from-microphone

go 1.12

require (
github.com/gordonklaus/portaudio v0.0.0-20230709114228-aafa478834f5
github.com/k2-fsa/sherpa-onnx-go v1.7.12-alpha
github.com/spf13/pflag v1.0.5
)

This file was deleted.

6 changes: 0 additions & 6 deletions go-api-examples/streaming-decode-files/go.mod
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
module streaming-decode-files

go 1.12

require (
github.com/k2-fsa/sherpa-onnx-go v1.7.12-alpha
github.com/spf13/pflag v1.0.5
github.com/youpy/go-wav v0.3.2
)
Loading

0 comments on commit a69d0a9

Please sign in to comment.