diff --git a/.github/workflows/chatTTS.yml b/.github/workflows/chatTTS.yml new file mode 100644 index 0000000..bf38b14 --- /dev/null +++ b/.github/workflows/chatTTS.yml @@ -0,0 +1,53 @@ +name: ChatTTS example + +on: + schedule: + - cron: "0 0 * * *" + push: + paths: + - ".github/workflows/chatTTS.yml" + - "wasmedge-chatTTS/**" + pull_request: + paths: + - ".github/workflows/chatTTS.yml" + - "wasmedge-chatTTS/**" +jobs: + build: + runs-on: ubuntu-22.04 + steps: + - name: Install Dependencies for building WasmEdge + run: | + sudo apt-get -y update + sudo apt-get -y install wget git curl software-properties-common build-essential python3 python3-dev python3-pip ninja-build + pip install chattts==0.1.1 + + - name: Install Rust target for wasm + run: | + rustup target add wasm32-wasi + + - name: Checkout WasmEdge + uses: actions/checkout@v4 + with: + repository: WasmEdge/WasmEdge + path: WasmEdge + - name: Build WasmEdge with WASI-NN ChatTTS plugin + run: | + cmake -GNinja -Bbuild -DCMAKE_BUILD_TYPE=Release -DWASMEDGE_USE_LLVM=OFF -DWASMEDGE_PLUGIN_WASI_NN_BACKEND=ChatTTS + cmake --build build + working-directory: WasmEdge + + - name: Checkout WasmEdge-WASINN-examples + uses: actions/checkout@v4 + with: + path: WasmEdge-WASINN-examples + + - name: Build wasm + run: cargo build --target wasm32-wasi --release + working-directory: WasmEdge-WASINN-examples/wasmedge-chatTTS + + - name: Execute + run: WASMEDGE_PLUGIN_PATH=WasmEdge/build/plugins/wasi_nn WasmEdge/build/tools/wasmedge/wasmedge --dir .:. WasmEdge-WASINN-examples/wasmedge-chatTTS/target/wasm32-wasi/release/wasmedge-chattts.wasm + + - name: Verify output + run: test "$(file --brief output1.wav)" == 'RIFF (little-endian) data, WAVE audio, mono 24000 Hz' + diff --git a/wasmedge-chatTTS/.gitignore b/wasmedge-chatTTS/.gitignore new file mode 100644 index 0000000..8207d82 --- /dev/null +++ b/wasmedge-chatTTS/.gitignore @@ -0,0 +1,2 @@ +asset +config \ No newline at end of file diff --git a/wasmedge-chatTTS/Cargo.toml b/wasmedge-chatTTS/Cargo.toml new file mode 100644 index 0000000..149398b --- /dev/null +++ b/wasmedge-chatTTS/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "wasmedge-chattts" +version = "0.1.0" +edition = "2021" + +[dependencies] +serde_json = "1.0" +wasmedge-wasi-nn = {git = "https://github.com/second-state/wasmedge-wasi-nn.git", branch = "ggml"} +hound = "3.4" \ No newline at end of file diff --git a/wasmedge-chatTTS/README.md b/wasmedge-chatTTS/README.md new file mode 100644 index 0000000..139e88b --- /dev/null +++ b/wasmedge-chatTTS/README.md @@ -0,0 +1,69 @@ +# ChatTTS example with WasmEdge WASI-NN ChatTTS plugin +This example demonstrates how to use the WasmEdge WASI-NN ChatTTS plugin to generate speech from text. ChatTTS is a text-to-speech model designed specifically for dialogue scenarios such as LLM assistant. This example will use the WasmEdge WASI-NN ChatTTS plugin to run the ChatTTS to generate speech. + +## Install WasmEdge with WASI-NN ChatTTS plugin +The ChatTTS backend relies on ChatTTS and Python library, we recommend the following commands to install the dependencies. +``` bash +sudo apt update +sudo apt upgrade +sudo apt install python3-dev +pip install chattts==0.1.1 +``` + +Then build and install WasmEdge from source. + +``` bash +cd + +cmake -GNinja -Bbuild -DCMAKE_BUILD_TYPE=Release -DWASMEDGE_PLUGIN_WASI_NN_BACKEND="chatTTS" +cmake --build build + +# For the WASI-NN plugin, you should install this project. +cmake --install build +``` + +Then you will have an executable `wasmedge` runtime under `/usr/local/bin` and the WASI-NN with Neural Speed backend plug-in under `/usr/local/lib/wasmedge/libwasmedgePluginWasiNN.so` after installation. + +## Build wasm + +Run the following command to build wasm, the output WASM file will be at `target/wasm32-wasi/release/` + +```bash +cargo build --target wasm32-wasi --release +``` + +## Execute + +Execute the WASM with the `wasmedge`. + +``` bash +wasmedge --dir .:. ./target/wasm32-wasi/release/wasmedge-chattts.wasm +``` + +Then you will generate the `output1.wav` file. It is the wav file of the input text. + +## Advanced Options + +The `config_data` is used to adjust the configuration of the ChatTTS. +Supports the following options: +- `prompt`: Generate the special token in the text to synthesize. +- `spk_emb`: Sampled speaker (Using `random` for random speaker). +- `temperature`: Custom temperature. +- `top_k`: Top P decode. +- `top_p`: Top K decode. + +``` rust +let config_data = serde_json::to_string(&json!({"prompt": "[oral_2][laugh_0][break_6]", "spk_emb": "random", "temperature": 0.5, "top_k": 0, "top_p": 0.9})) + .unwrap() + .as_bytes() + .to_vec(); +``` + + + + +
+ +[demo.webm](https://github.com/user-attachments/assets/377e0487-9107-41db-9c22-31962ce53f88) + +
\ No newline at end of file diff --git a/wasmedge-chatTTS/assets/demo.wav b/wasmedge-chatTTS/assets/demo.wav new file mode 100644 index 0000000..f9b38d1 Binary files /dev/null and b/wasmedge-chatTTS/assets/demo.wav differ diff --git a/wasmedge-chatTTS/assets/demo.webm b/wasmedge-chatTTS/assets/demo.webm new file mode 100644 index 0000000..9a1f52c Binary files /dev/null and b/wasmedge-chatTTS/assets/demo.webm differ diff --git a/wasmedge-chatTTS/src/main.rs b/wasmedge-chatTTS/src/main.rs new file mode 100644 index 0000000..7af21f5 --- /dev/null +++ b/wasmedge-chatTTS/src/main.rs @@ -0,0 +1,55 @@ +use hound; +use serde_json::json; +use wasmedge_wasi_nn::{ + self, ExecutionTarget, GraphBuilder, GraphEncoding, GraphExecutionContext, TensorType, +}; + +fn get_data_from_context(context: &GraphExecutionContext, index: usize) -> Vec { + const MAX_OUTPUT_BUFFER_SIZE: usize = 4096 * 4096; + let mut output_buffer = vec![0u8; MAX_OUTPUT_BUFFER_SIZE]; + let bytes_written = context + .get_output(index, &mut output_buffer) + .expect("Failed to get output"); + + return output_buffer[..bytes_written].to_vec(); +} + +fn main() { + let prompt = "It is test sentence [uv_break] for chat T T S."; + let tensor_data = prompt.as_bytes().to_vec(); + let config_data = serde_json::to_string(&json!({"prompt": "[oral_2][laugh_0][break_6]", "spk_emb": "random", "temperature": 0.5, "top_k": 0, "top_p": 0.9})) + .unwrap() + .as_bytes() + .to_vec(); + let empty_vec: Vec> = Vec::new(); + let graph = GraphBuilder::new(GraphEncoding::ChatTTS, ExecutionTarget::CPU) + .build_from_bytes(empty_vec) + .expect("Failed to build graph"); + let mut context = graph + .init_execution_context() + .expect("Failed to init context"); + context + .set_input(0, TensorType::U8, &[1], &tensor_data) + .expect("Failed to set input"); + context + .set_input(1, TensorType::U8, &[1], &config_data) + .expect("Failed to set input"); + context.compute().expect("Failed to compute"); + let output_bytes = get_data_from_context(&context, 0); + let spec = hound::WavSpec { + channels: 1, + sample_rate: 24000, + bits_per_sample: 32, + sample_format: hound::SampleFormat::Float, + }; + let mut writer = hound::WavWriter::create("output1.wav", spec).unwrap(); + let samples: Vec = output_bytes + .chunks_exact(4) + .map(|b| f32::from_le_bytes([b[0], b[1], b[2], b[3]])) + .collect(); + for sample in samples { + writer.write_sample(sample).unwrap(); + } + writer.finalize().unwrap(); + graph.unload().expect("Failed to free resource"); +} diff --git a/wasmedge-chatTTS/wasmedge-chattts.wasm b/wasmedge-chatTTS/wasmedge-chattts.wasm new file mode 100755 index 0000000..0a6ada3 Binary files /dev/null and b/wasmedge-chatTTS/wasmedge-chattts.wasm differ