-
Notifications
You must be signed in to change notification settings - Fork 2
/
onnx-simple.html
53 lines (45 loc) · 8.95 KB
/
onnx-simple.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width">
<title>Lyra v2 (SoundStream) - ONNX Runtime Web</title>
</head>
<body>
<p>See browser console for results. This is a WIP - see README for details.</p>
<select id="modelTypeSelectEl">
<option value="ort">ORT</option>
<option value="onnx">ONNX (not working correctly)</option>
</select>
<button onclick="start(); this.disabled=true;">start</button>
<p><a href="https://github.com/josephrocca/lyra-v2-soundstream-web">github repo</a> - <a href="https://huggingface.co/rocca/lyra-v2-soundstream">huggingface repo</a></p>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/ort.js"></script>
<script>
async function start() {
let modelType = modelTypeSelectEl.value;
let modelSuffix = modelType === "ort" ? "with_runtime_opt.ort" : "onnx";
let inputAudioSamples =[-0.005340576171875,-0.00921630859375,-0.00848388671875,-0.009307861328125,-0.00897216796875,-0.009002685546875,-0.008270263671875,-0.008087158203125,-0.007781982421875,-0.007720947265625,-0.007476806640625,-0.007080078125,-0.006744384765625,-0.0067138671875,-0.006561279296875,-0.0064697265625,-0.006591796875,-0.00665283203125,-0.006866455078125,-0.007171630859375,-0.0074462890625,-0.0079345703125,-0.007965087890625,-0.008026123046875,-0.007720947265625,-0.007720947265625,-0.008148193359375,-0.008087158203125,-0.008392333984375,-0.0086669921875,-0.00933837890625,-0.010009765625,-0.010772705078125,-0.011260986328125,-0.011322021484375,-0.01092529296875,-0.010284423828125,-0.010040283203125,-0.009735107421875,-0.00970458984375,-0.009765625,-0.010009765625,-0.010345458984375,-0.0103759765625,-0.009918212890625,-0.0087890625,-0.008056640625,-0.007720947265625,-0.007537841796875,-0.007659912109375,-0.00775146484375,-0.008270263671875,-0.00885009765625,-0.009429931640625,-0.009765625,-0.010101318359375,-0.010345458984375,-0.010406494140625,-0.010498046875,-0.010284423828125,-0.010345458984375,-0.010223388671875,-0.01007080078125,-0.0101318359375,-0.009979248046875,-0.009918212890625,-0.010040283203125,-0.009857177734375,-0.009735107421875,-0.00909423828125,-0.008758544921875,-0.00933837890625,-0.009246826171875,-0.00970458984375,-0.0098876953125,-0.00970458984375,-0.009521484375,-0.0093994140625,-0.009063720703125,-0.00860595703125,-0.008575439453125,-0.008697509765625,-0.0091552734375,-0.00946044921875,-0.009613037109375,-0.00970458984375,-0.009765625,-0.010162353515625,-0.010467529296875,-0.01055908203125,-0.0108642578125,-0.010986328125,-0.011566162109375,-0.011505126953125,-0.0111083984375,-0.01080322265625,-0.01043701171875,-0.01031494140625,-0.00982666015625,-0.009307861328125,-0.008636474609375,-0.00860595703125,-0.00848388671875,-0.008453369140625,-0.008636474609375,-0.008758544921875,-0.00885009765625,-0.00872802734375,-0.00860595703125,-0.008148193359375,-0.007720947265625,-0.007476806640625,-0.007537841796875,-0.0076904296875,-0.008209228515625,-0.008697509765625,-0.00885009765625,-0.009033203125,-0.00946044921875,-0.009735107421875,-0.00946044921875,-0.008941650390625,-0.008392333984375,-0.008026123046875,-0.007720947265625,-0.007598876953125,-0.00762939453125,-0.00762939453125,-0.007659912109375,-0.0076904296875,-0.00762939453125,-0.007415771484375,-0.006988525390625,-0.006439208984375,-0.00579833984375,-0.005218505859375,-0.004791259765625,-0.004302978515625,-0.0040283203125,-0.00421142578125,-0.004608154296875,-0.0050048828125,-0.005462646484375,-0.005828857421875,-0.0057373046875,-0.0057373046875,-0.005340576171875,-0.00482177734375,-0.00457763671875,-0.004638671875,-0.00482177734375,-0.0047607421875,-0.005096435546875,-0.00518798828125,-0.00537109375,-0.00555419921875,-0.005279541015625,-0.0048828125,-0.004547119140625,-0.004058837890625,-0.00335693359375,-0.002838134765625,-0.002777099609375,-0.002471923828125,-0.002197265625,-0.001953125,-0.00164794921875,-0.00164794921875,-0.001617431640625,-0.00115966796875,-0.0006103515625,-0.000244140625,0,0.000030518509447574615,0.0004882961511611938,0.00079348124563694,0.0008545182645320892,0.0007629627361893654,0.00039674062281847,0.0002136295661330223,0.0001831110566854477,-0.000213623046875,-0.00030517578125,-0.000274658203125,-0.000244140625,0.000030518509447574615,0.00030518509447574615,0.0007019257172942162,0.0008545182645320892,0.0008545182645320892,0.0007324442267417908,0.0007324442267417908,0.00045777764171361923,0.0004882961511611938,0.0004882961511611938,0.0007019257172942162,0.0012817773967981339,0.0014648884534835815,0.00198370311409235,0.002136295661330223,0.0026551103219389915,0.003021332435309887,0.002929776906967163,0.00277718435972929,0.0027161473408341408,0.002685628831386566,0.00238044373691082,0.0023499252274632454,0.0023194067180156708,0.0024414807558059692,0.0025635547935962677,0.002533036284148693,0.002533036284148693,0.0024414807558059692,0.0023499252274632454,0.0025940733030438423,0.002288888208568096,0.002288888208568096,0.0025025177747011185,0.002533036284148693,0.002533036284148693,0.002288888208568096,0.0024414807558059692,0.002624591812491417,0.0030518509447574615,0.003326517529785633,0.0036011841148138046,0.003814813680946827,0.003875850699841976,0.004028443247079849,0.004181035794317722,0.0040894802659749985,0.004028443247079849,0.0040894802659749985,0.004150517284870148,0.004211554303765297,0.004150517284870148,0.004425183869898319,0.004730368964374065,0.0050355540588498116,0.005005035549402237,0.005005035549402237,0.0048829615116119385,0.004608294926583767,0.004181035794317722,0.0035096285864710808,0.0032349620014429092,0.0029602954164147377,0.00277718435972929,0.0027161473408341408,0.0025940733030438423,0.0025940733030438423,0.0025940733030438423,0.0024109622463583946,0.002471999265253544,0.0024109622463583946,0.002288888208568096,0.0021973326802253723,0.0021057771518826485,0.0020447401329874992,0.0021973326802253723,0.0021973326802253723,0.0021057771518826485,0.0021668141707777977,0.002533036284148693,0.002929776906967163,0.0030518509447574615,0.0029602954164147377,0.002624591812491417,0.0024109622463583946,0.0021057771518826485,0.00198370311409235,0.0016785180196166039,0.0014954069629311562,0.00158696249127388,0.0016785180196166039,0.00158696249127388,0.00158696249127388,0.0013733329251408577,0.0010986663401126862,0.0008545182645320892,0.0006408886983990669,0.0006714072078466415,0.0004272591322660446,0.00033570360392332077,0.0001831110566854477,0.00045777764171361923,0.0006103701889514923,0.0004272591322660446,0.00045777764171361923,0.0005493331700563431,0.0004882961511611938,0.0003662221133708954,0.0004272591322660446,0.0006103701889514923,0.0011291848495602608,0.0014648884534835815,0.0014648884534835815,0.0015564439818263054,0.0016785180196166039,0.001739555038511753,0.0017700735479593277,0.0015564439818263054,0.0016174810007214546,0.0017700735479593277,0.0017700735479593277,0.001831110566854477,0.0017090365290641785,0.0016174810007214546,0.001434369944036007,0.0012207403779029846,0.0011597033590078354,0.0009460737928748131,0.0006408886983990669,0.0007629627361893654,0.00119022186845541,0.0012512588873505592,0.001342814415693283,0.0017090365290641785,0.0019226660951972008];
console.log(inputAudioSamples);
let encoderModel = await ort.InferenceSession.create(`https://huggingface.co/rocca/lyra-v2-soundstream/resolve/main/${modelType}/1.3.0/soundstream_encoder.${modelSuffix}`, { executionProviders: ["wasm"] });
let results1 = await encoderModel.run({"serving_default_input_audio:0": new ort.Tensor('float32', new Float32Array(inputAudioSamples), [1,320])});
let output1 = results1["StatefulPartitionedCall:0"]; // float32[1,1,64]
console.log(output1.data);
let quantizerEncoderModel = await ort.InferenceSession.create(`https://huggingface.co/rocca/lyra-v2-soundstream/resolve/main/${modelType}/1.3.0/quantizer_encoder.${modelSuffix}`, { executionProviders: ["wasm"] });
let results2 = await quantizerEncoderModel.run({
"encode_input_frames:0": output1,
"encode_num_quantizers:0": new ort.Tensor('int32', new Int32Array([46]), []),
});
let output2 = results2["StatefulPartitionedCall_1:0"]; // int32[46,1,1]
console.log(output2.data);
let quantizerDecoderModel = await ort.InferenceSession.create(`https://huggingface.co/rocca/lyra-v2-soundstream/resolve/main/${modelType}/1.3.0/quantizer_decoder.${modelSuffix}`, { executionProviders: ["wasm"] });
let results3 = await quantizerDecoderModel.run({"decode_encoding_indices:0": output2});
let output3 = results3["StatefulPartitionedCall:0"]; // float32[1,1,64]
console.log(output3.data);
let decoderModel = await ort.InferenceSession.create(`https://huggingface.co/rocca/lyra-v2-soundstream/resolve/main/${modelType}/1.3.0/lyragan.${modelSuffix}`, { executionProviders: ["wasm"] });
let results4 = await decoderModel.run({"serving_default_input_audio:0": output3});
let output4 = results4["StatefulPartitionedCall:0"]; // float32[1,320]
console.log(output4.data);
}
</script>
</body>
</html>