[Example] ChatTTS: add advanced option

second-state · Jul 13, 2024 · 8e7c7d2 · 8e7c7d2
1 parent 4956272
commit 8e7c7d2
Show file tree

Hide file tree

Showing 9 changed files with 72 additions and 87 deletions.
diff --git a/wasmedge-chatTTS/.gitignore b/wasmedge-chatTTS/.gitignore
@@ -0,0 +1,3 @@
+asset
+config
+*.wav
diff --git a/wasmedge-chatTTS/README.md b/wasmedge-chatTTS/README.md
@@ -1,3 +1,60 @@
+# ChatTTS example with WasmEdge WASI-NN ChatTTS plugin
+This example demonstrates how to use the WasmEdge WASI-NN ChatTTS plugin to generate speech from text. ChatTTS is a text-to-speech model designed specifically for dialogue scenarios such as LLM assistant. This example will use the WasmEdge WASI-NN ChatTTS plugin to run the ChatTTS to generate speech.
+
+## Install WasmEdge with WASI-NN ChatTTS plugin
+The ChatTTS backend relies on ChatTTS and Python library, we recommend the following commands to install the dependencies.
+``` bash
+sudo apt update
+sudo apt upgrade
+sudo apt install python3-dev
+pip install chattts==0.1.1
+```
+
+Then build and install WasmEdge from source.
+
+``` bash
+cd <path/to/your/wasmedge/source/folder>
+
+cmake -GNinja -Bbuild -DCMAKE_BUILD_TYPE=Release -DWASMEDGE_PLUGIN_WASI_NN_BACKEND="chatTTS"
+cmake --build build
+
+# For the WASI-NN plugin, you should install this project.
+cmake --install build
+```
+
+Then you will have an executable `wasmedge` runtime under `/usr/local/bin` and the WASI-NN with Neural Speed backend plug-in under `/usr/local/lib/wasmedge/libwasmedgePluginWasiNN.so` after installation.
+
+## Build wasm
+
+Run the following command to build wasm, the output WASM file will be at `target/wasm32-wasi/release/`
+
+```bash
 cargo build --target wasm32-wasi --release
+```
+
+## Execute
+
+Execute the WASM with the `wasmedge`.
+
+``` bash
+wasmedge --dir .:.  ./target/wasm32-wasi/release/wasmedge-chattts.wasm
+```
+
+Then you will generate the `output1.wav` file. It is the wav file of the input text.
+
+## Advanced Options
+
+The `config_data` is used to adjust the configuration of the ChatTTS.
+Supports the following options:
+- `prompt`: Generate the special token in the text to synthesize.
+- `spk_emb`: Sampled speaker (Using `random` for random speaker).
+- `temperature`: Custom temperature.
+- `top_k`: Top P decode.
+- `top_p`: Top K decode.
 
-wasmedge --dir .:.  ./target/wasm32-wasi/release/wasmedge-chattts.wasm
+``` rust
+let config_data = serde_json::to_string(&json!({"prompt": "[oral_2][laugh_0][break_6]", "spk_emb": "random", "temperature": 0.5, "top_k": 0, "top_p": 0.9}))
+        .unwrap()
+        .as_bytes()
+        .to_vec();
+```
diff --git a/wasmedge-chatTTS/config/decoder.yaml b/wasmedge-chatTTS/config/decoder.yaml
diff --git a/wasmedge-chatTTS/config/dvae.yaml b/wasmedge-chatTTS/config/dvae.yaml
diff --git a/wasmedge-chatTTS/config/gpt.yaml b/wasmedge-chatTTS/config/gpt.yaml
diff --git a/wasmedge-chatTTS/config/path.yaml b/wasmedge-chatTTS/config/path.yaml
diff --git a/wasmedge-chatTTS/config/vocos.yaml b/wasmedge-chatTTS/config/vocos.yaml
diff --git a/wasmedge-chatTTS/src/main.rs b/wasmedge-chatTTS/src/main.rs
@@ -1,11 +1,10 @@
+use hound;
+use serde_json::json;
 use wasmedge_wasi_nn::{
-    self, ExecutionTarget, GraphBuilder, GraphEncoding, GraphExecutionContext,
-    TensorType,
+    self, ExecutionTarget, GraphBuilder, GraphEncoding, GraphExecutionContext, TensorType,
 };
-use hound;
 
 fn get_data_from_context(context: &GraphExecutionContext, index: usize, limit: usize) -> Vec<u8> {
-    // Preserve for 4096 tokens with average token length 8
     const MAX_OUTPUT_BUFFER_SIZE: usize = 4096 * 4096;
     let mut output_buffer = vec![0u8; MAX_OUTPUT_BUFFER_SIZE];
     let _ = context
@@ -16,8 +15,12 @@ fn get_data_from_context(context: &GraphExecutionContext, index: usize, limit: u
 }
 
 fn main() {
-    let prompt = "It is a test sentence.";
+    let prompt = "It is [uv_break] test sentence [laugh] for chat T T S";
     let tensor_data = prompt.as_bytes().to_vec();
+    let config_data = serde_json::to_string(&json!({"prompt": "[oral_2][laugh_0][break_6]", "spk_emb": "random", "temperature": 0.5, "top_k": 0, "top_p": 0.9}))
+        .unwrap()
+        .as_bytes()
+        .to_vec();
     let empty_vec: Vec<Vec<u8>> = Vec::new();
     let graph = GraphBuilder::new(GraphEncoding::ChatTTS, ExecutionTarget::CPU)
         .build_from_bytes(empty_vec)
@@ -28,6 +31,9 @@ fn main() {
     context
         .set_input(0, TensorType::U8, &[1], &tensor_data)
         .expect("Failed to set input");
+    context
+        .set_input(1, TensorType::U8, &[1], &config_data)
+        .expect("Failed to set input");
     context.compute().expect("Failed to compute");
     let bytes_written = get_data_from_context(&context, 1, 4);
     let bytes_written = usize::from_le_bytes(bytes_written.as_slice().try_into().unwrap());

diff --git a/wasmedge-chatTTS/wasmedge-chattts.wasm b/wasmedge-chatTTS/wasmedge-chattts.wasm
-Original file line number
+Diff line change
@@ -0,0 +1,3 @@
+    asset
+    config
+    *.wav