diff --git a/notebooks/408-openvoice/408-openvoice.ipynb b/notebooks/408-openvoice/408-openvoice.ipynb index a16fc386d39..563eba5cbd3 100644 --- a/notebooks/408-openvoice/408-openvoice.ipynb +++ b/notebooks/408-openvoice/408-openvoice.ipynb @@ -8,73 +8,73 @@ ] }, { - "cell_type": "code", - "execution_count": 1, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "#!git clone https://github.com/myshell-ai/OpenVoice" + "OpenVoice is a versatile instant voice cloning approach that requires only a short audio clip from the reference speaker to replicate their voice and generate speech in multiple languages. OpenVoice enables granular control over voice styles, including emotion, accent, rhythm, pauses, and intonation, in addition to replicating the tone color of the reference speaker. OpenVoice also achieves zero-shot cross-lingual voice cloning for languages not included in the massive-speaker training set.\n", + "\n", + "This notebooks provides example of converting original OpenVoice model (https://github.com/myshell-ai/OpenVoice) to OpenVINO IR format for faster inference." ] }, { - "cell_type": "code", - "execution_count": 2, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "# pip install openvino\n", - "\n", - "# todo: unfreeze dependencies\n", - "# %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu \"torch>=2.1.0\" \"torchaudio>=2.1.0\"\n", - "# wavmark==0.0.2 also installs torch" + "clone the repository and install all dependencies" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "# %pip install librosa==0.9.1 \\\n", - "# faster-whisper==0.9.0 \\\n", - "# pydub==0.25.1 \\\n", - "# whisper-timestamped==1.14.2 \\\n", - "# tqdm \\\n", - "# inflect==7.0.0 \\\n", - "# unidecode==1.3.7 \\\n", - "# eng_to_ipa==0.0.2 \\\n", - "# wavmark==0.0.2 \\\n", - "# pypinyin==0.50.0 \\\n", - "# cn2an==0.5.22 \\\n", - "# jieba==0.42.1 \\\n", - "# langid==1.1.6\n", - "# gradio==3.48.0 \\" + "!git clone https://github.com/myshell-ai/OpenVoice" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ - "# %pip install gradio==3.48.0" + "%pip install openvino==2023.2\n", + "%pip install -q --extra-index-url https://download.pytorch.org/whl/cpu \"torch>=2.1.0\" \"torchaudio>=2.1.0\"\n", + "%pip install wavmark also installs torch\n", + "\n", + "# todo: try to unfreeze dependencies\n", + "%pip install librosa==0.9.1 \\\n", + "faster-whisper==0.9.0 \\\n", + "pydub==0.25.1 \\\n", + "whisper-timestamped==1.14.2 \\\n", + "tqdm \\\n", + "inflect==7.0.0 \\\n", + "unidecode==1.3.7 \\\n", + "eng_to_ipa==0.0.2 \\\n", + "wavmark==0.0.2 \\\n", + "pypinyin==0.50.0 \\\n", + "cn2an==0.5.22 \\\n", + "jieba==0.42.1 \\\n", + "langid==1.1.6\n", + "gradio==4.15 \\\n", + "ipywebrtc \\\n", + "ipywidgets \\" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ - "import openvino as ov\n", "import os\n", "import torch\n", - "from openvoice_utils import get_tts_forward, get_converter_forward, OVOpenVoiceTTS, OVOpenVoiceConvert" + "from openvoice_utils import OVOpenVoiceTTS, OVOpenVoiceConvert" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -94,120 +94,53 @@ } ], "source": [ + "# cd to the original repo to save original data paths and imports\n", "%cd OpenVoice" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "download all resources from HF Hub" + ] + }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2024-01-22 18:47:29-- https://huggingface.co/myshell-ai/OpenVoice/resolve/main/checkpoints/base_speakers/ZH/checkpoint.pth\n", - "Resolving proxy-dmz.intel.com (proxy-dmz.intel.com)... 10.102.248.16\n", - "Connecting to proxy-dmz.intel.com (proxy-dmz.intel.com)|10.102.248.16|:912... connected.\n", - "Proxy request sent, awaiting response... 302 Found\n", - "Location: https://cdn-lfs-us-1.huggingface.co/repos/c4/4f/c44ff1065a97d8c91e31c6989e0b1f15abb8c70de9951f7f5b9adda9a9c3a4f5/de9fb0eb749f3254130fe0172fcbb20e75f88a9b16b54dd0b73cac0dc40da7d9?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27checkpoint.pth%3B+filename%3D%22checkpoint.pth%22%3B&Expires=1706204855&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcwNjIwNDg1NX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2M0LzRmL2M0NGZmMTA2NWE5N2Q4YzkxZTMxYzY5ODllMGIxZjE1YWJiOGM3MGRlOTk1MWY3ZjViOWFkZGE5YTljM2E0ZjUvZGU5ZmIwZWI3NDlmMzI1NDEzMGZlMDE3MmZjYmIyMGU3NWY4OGE5YjE2YjU0ZGQwYjczY2FjMGRjNDBkYTdkOT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=g05Mq2b4B-jTfmZ1o5wZ67TcWOuqSyGp2CUV27L%7EiahZduyiT1R8LAyvTrrNC5i7s3yJ2xaPytGUXHStac4MB6vklQVSbpmmPBO0nZ9Fi%7EGTFHr5n89XWc1WFu6kR9Wn2PrXwadXB47XNAe-nqmEPI8ppaozpl0QSwbKWV6UT4076foFxvKmVd2tUo9zXfiwQG3JsE1VYCHslkH3idKw7w4GgbzLIKf5j0RSqPCjLSAzWvi1NRXY6WvW2-DfpxF2fldX3f73hQga5PZqvOKpEHmcmyYdjDEnGJZzeuXf8A0GrfbkRII%7Egbmcj106hq0CecrvG1XJGC9acMeeCRAASQ__&Key-Pair-Id=KCD77M1F0VK2B [following]\n", - "--2024-01-22 18:47:29-- https://cdn-lfs-us-1.huggingface.co/repos/c4/4f/c44ff1065a97d8c91e31c6989e0b1f15abb8c70de9951f7f5b9adda9a9c3a4f5/de9fb0eb749f3254130fe0172fcbb20e75f88a9b16b54dd0b73cac0dc40da7d9?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27checkpoint.pth%3B+filename%3D%22checkpoint.pth%22%3B&Expires=1706204855&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcwNjIwNDg1NX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2M0LzRmL2M0NGZmMTA2NWE5N2Q4YzkxZTMxYzY5ODllMGIxZjE1YWJiOGM3MGRlOTk1MWY3ZjViOWFkZGE5YTljM2E0ZjUvZGU5ZmIwZWI3NDlmMzI1NDEzMGZlMDE3MmZjYmIyMGU3NWY4OGE5YjE2YjU0ZGQwYjczY2FjMGRjNDBkYTdkOT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=g05Mq2b4B-jTfmZ1o5wZ67TcWOuqSyGp2CUV27L%7EiahZduyiT1R8LAyvTrrNC5i7s3yJ2xaPytGUXHStac4MB6vklQVSbpmmPBO0nZ9Fi%7EGTFHr5n89XWc1WFu6kR9Wn2PrXwadXB47XNAe-nqmEPI8ppaozpl0QSwbKWV6UT4076foFxvKmVd2tUo9zXfiwQG3JsE1VYCHslkH3idKw7w4GgbzLIKf5j0RSqPCjLSAzWvi1NRXY6WvW2-DfpxF2fldX3f73hQga5PZqvOKpEHmcmyYdjDEnGJZzeuXf8A0GrfbkRII%7Egbmcj106hq0CecrvG1XJGC9acMeeCRAASQ__&Key-Pair-Id=KCD77M1F0VK2B\n", - "Connecting to proxy-dmz.intel.com (proxy-dmz.intel.com)|10.102.248.16|:912... connected.\n", - "Proxy request sent, awaiting response... 200 OK\n", - "Length: 160467309 (153M) [application/zip]\n", - "Saving to: ‘checkpoints/base_speakers/ZH/checkpoint.pth’\n", - "\n", - "checkpoints/base_sp 100%[===================>] 153,03M 3,98MB/s in 39s \n", - "\n", - "2024-01-22 18:48:08 (3,96 MB/s) - ‘checkpoints/base_speakers/ZH/checkpoint.pth’ saved [160467309/160467309]\n", - "\n", - "--2024-01-22 18:48:08-- https://huggingface.co/myshell-ai/OpenVoice/raw/main/checkpoints/base_speakers/ZH/config.json\n", - "Resolving proxy-dmz.intel.com (proxy-dmz.intel.com)... 10.102.248.16\n", - "Connecting to proxy-dmz.intel.com (proxy-dmz.intel.com)|10.102.248.16|:912... connected.\n", - "Proxy request sent, awaiting response... 200 OK\n", - "Length: 1828 (1,8K) [text/plain]\n", - "Saving to: ‘checkpoints/base_speakers/ZH/config.json’\n", - "\n", - "checkpoints/base_sp 100%[===================>] 1,79K --.-KB/s in 0s \n", - "\n", - "2024-01-22 18:48:09 (5,62 GB/s) - ‘checkpoints/base_speakers/ZH/config.json’ saved [1828/1828]\n", - "\n", - "--2024-01-22 18:48:09-- https://huggingface.co/myshell-ai/OpenVoice/resolve/main/checkpoints/base_speakers/EN/en_style_se.pth\n", - "Resolving proxy-dmz.intel.com (proxy-dmz.intel.com)... 10.102.248.16\n", - "Connecting to proxy-dmz.intel.com (proxy-dmz.intel.com)|10.102.248.16|:912... connected.\n", - "Proxy request sent, awaiting response... 302 Found\n", - "Location: https://cdn-lfs-us-1.huggingface.co/repos/c4/4f/c44ff1065a97d8c91e31c6989e0b1f15abb8c70de9951f7f5b9adda9a9c3a4f5/6f698153be5004b90a8642d1157c89cae7dd296752a3276450ced6a17b8b98a9?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27en_style_se.pth%3B+filename%3D%22en_style_se.pth%22%3B&Expires=1706204895&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcwNjIwNDg5NX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2M0LzRmL2M0NGZmMTA2NWE5N2Q4YzkxZTMxYzY5ODllMGIxZjE1YWJiOGM3MGRlOTk1MWY3ZjViOWFkZGE5YTljM2E0ZjUvNmY2OTgxNTNiZTUwMDRiOTBhODY0MmQxMTU3Yzg5Y2FlN2RkMjk2NzUyYTMyNzY0NTBjZWQ2YTE3YjhiOThhOT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=ZTLflxpGZhaVLw7m2Z1yazyw9imi1828LU3PHyTaxGdkRRq%7E3JZwA5Uj%7ETuEICCCR0jLjAhKkywWyRQpZg6uhJzAe7vvQvsRJizpj5y9%7E1SsVszgBhkazxdkcxlHyo3kdOKqI0vaPKe9soQxAKq3KYDrc4LwshsIbrumvRmUuwquiVzZeWqKh-ILriFQfoy9gpbyaHWJt4dzeZUcbUOqVUxjgMFVMHWwiACFeFs5ISiA7glH8y4yhR59FfzyvLKoic3wyoQLvW6kvEiDPDrjumk%7EMlYhoWhKbrZrKUaKu%7ELaD57dPorz2P%7E48dCnIXkKmwRUJtSQfTSORLd%7EhVLAnQ__&Key-Pair-Id=KCD77M1F0VK2B [following]\n", - "--2024-01-22 18:48:09-- https://cdn-lfs-us-1.huggingface.co/repos/c4/4f/c44ff1065a97d8c91e31c6989e0b1f15abb8c70de9951f7f5b9adda9a9c3a4f5/6f698153be5004b90a8642d1157c89cae7dd296752a3276450ced6a17b8b98a9?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27en_style_se.pth%3B+filename%3D%22en_style_se.pth%22%3B&Expires=1706204895&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcwNjIwNDg5NX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2M0LzRmL2M0NGZmMTA2NWE5N2Q4YzkxZTMxYzY5ODllMGIxZjE1YWJiOGM3MGRlOTk1MWY3ZjViOWFkZGE5YTljM2E0ZjUvNmY2OTgxNTNiZTUwMDRiOTBhODY0MmQxMTU3Yzg5Y2FlN2RkMjk2NzUyYTMyNzY0NTBjZWQ2YTE3YjhiOThhOT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=ZTLflxpGZhaVLw7m2Z1yazyw9imi1828LU3PHyTaxGdkRRq%7E3JZwA5Uj%7ETuEICCCR0jLjAhKkywWyRQpZg6uhJzAe7vvQvsRJizpj5y9%7E1SsVszgBhkazxdkcxlHyo3kdOKqI0vaPKe9soQxAKq3KYDrc4LwshsIbrumvRmUuwquiVzZeWqKh-ILriFQfoy9gpbyaHWJt4dzeZUcbUOqVUxjgMFVMHWwiACFeFs5ISiA7glH8y4yhR59FfzyvLKoic3wyoQLvW6kvEiDPDrjumk%7EMlYhoWhKbrZrKUaKu%7ELaD57dPorz2P%7E48dCnIXkKmwRUJtSQfTSORLd%7EhVLAnQ__&Key-Pair-Id=KCD77M1F0VK2B\n", - "Connecting to proxy-dmz.intel.com (proxy-dmz.intel.com)|10.102.248.16|:912... connected.\n", - "Proxy request sent, awaiting response... 200 OK\n", - "Length: 1783 (1,7K) [application/zip]\n", - "Saving to: ‘checkpoints/base_speakers/EN/en_style_se.pth’\n", - "\n", - "checkpoints/base_sp 100%[===================>] 1,74K --.-KB/s in 0s \n", - "\n", - "2024-01-22 18:48:10 (87,7 MB/s) - ‘checkpoints/base_speakers/EN/en_style_se.pth’ saved [1783/1783]\n", - "\n", - "--2024-01-22 18:48:10-- https://huggingface.co/myshell-ai/OpenVoice/resolve/main/checkpoints/base_speakers/ZH/zh_default_se.pth\n", - "Resolving proxy-dmz.intel.com (proxy-dmz.intel.com)... 10.102.248.16\n", - "Connecting to proxy-dmz.intel.com (proxy-dmz.intel.com)|10.102.248.16|:912... connected.\n", - "Proxy request sent, awaiting response... 302 Found\n", - "Location: https://cdn-lfs-us-1.huggingface.co/repos/c4/4f/c44ff1065a97d8c91e31c6989e0b1f15abb8c70de9951f7f5b9adda9a9c3a4f5/3b62e8264962059b8a84dd00b29e2fcccc92f5d3be90eec67dfa082c0cf58ccf?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27zh_default_se.pth%3B+filename%3D%22zh_default_se.pth%22%3B&Expires=1706204895&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcwNjIwNDg5NX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2M0LzRmL2M0NGZmMTA2NWE5N2Q4YzkxZTMxYzY5ODllMGIxZjE1YWJiOGM3MGRlOTk1MWY3ZjViOWFkZGE5YTljM2E0ZjUvM2I2MmU4MjY0OTYyMDU5YjhhODRkZDAwYjI5ZTJmY2NjYzkyZjVkM2JlOTBlZWM2N2RmYTA4MmMwY2Y1OGNjZj9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=g85yifinD1PSumgzQizzqdT1D1aHeVX-rhOQ63enKxx%7EjHPMScJ7wX-TxZVhU62KRtBCIExnTslWo%7E2xIHKrCN-4u8UjBxRrURtwrVKaJjqnhcoe2gzVHtlX0w1HYpqPX8LzGhliSWIlLSbcjeeXSMqSKvU7KXj8Bx73aruoz1E-Au6biP3AiWpsPFqyx8XMdjtZzf0m-qrzp4uDGClqr6qtMWuy8hFD4WkhehZ5IUcP5YC81oqCSRk4Hr7yad58Gc0ApsFPKEjtLmY1xmVXJwSsew1xCWMDO4Ca4Fsk9HzOySkmzzW-JRhNefZZZQOhtbpCzNsT1munxY7qa3yIfg__&Key-Pair-Id=KCD77M1F0VK2B [following]\n", - "--2024-01-22 18:48:10-- https://cdn-lfs-us-1.huggingface.co/repos/c4/4f/c44ff1065a97d8c91e31c6989e0b1f15abb8c70de9951f7f5b9adda9a9c3a4f5/3b62e8264962059b8a84dd00b29e2fcccc92f5d3be90eec67dfa082c0cf58ccf?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27zh_default_se.pth%3B+filename%3D%22zh_default_se.pth%22%3B&Expires=1706204895&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcwNjIwNDg5NX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2M0LzRmL2M0NGZmMTA2NWE5N2Q4YzkxZTMxYzY5ODllMGIxZjE1YWJiOGM3MGRlOTk1MWY3ZjViOWFkZGE5YTljM2E0ZjUvM2I2MmU4MjY0OTYyMDU5YjhhODRkZDAwYjI5ZTJmY2NjYzkyZjVkM2JlOTBlZWM2N2RmYTA4MmMwY2Y1OGNjZj9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=g85yifinD1PSumgzQizzqdT1D1aHeVX-rhOQ63enKxx%7EjHPMScJ7wX-TxZVhU62KRtBCIExnTslWo%7E2xIHKrCN-4u8UjBxRrURtwrVKaJjqnhcoe2gzVHtlX0w1HYpqPX8LzGhliSWIlLSbcjeeXSMqSKvU7KXj8Bx73aruoz1E-Au6biP3AiWpsPFqyx8XMdjtZzf0m-qrzp4uDGClqr6qtMWuy8hFD4WkhehZ5IUcP5YC81oqCSRk4Hr7yad58Gc0ApsFPKEjtLmY1xmVXJwSsew1xCWMDO4Ca4Fsk9HzOySkmzzW-JRhNefZZZQOhtbpCzNsT1munxY7qa3yIfg__&Key-Pair-Id=KCD77M1F0VK2B\n", - "Connecting to proxy-dmz.intel.com (proxy-dmz.intel.com)|10.102.248.16|:912... connected.\n", - "Proxy request sent, awaiting response... 200 OK\n", - "Length: 1789 (1,7K) [application/zip]\n", - "Saving to: ‘checkpoints/base_speakers/ZH/zh_default_se.pth’\n", - "\n", - "checkpoints/base_sp 100%[===================>] 1,75K --.-KB/s in 0s \n", - "\n", - "2024-01-22 18:48:10 (87,8 MB/s) - ‘checkpoints/base_speakers/ZH/zh_default_se.pth’ saved [1789/1789]\n", - "\n" - ] - } - ], + "outputs": [], "source": [ - "# !mkdir -p checkpoints/converter/\n", - "# !mkdir -p checkpoints/base_speakers/EN/\n", - "# !mkdir -p checkpoints/base_speakers/ZH/\n", + "!mkdir -p checkpoints/converter/\n", + "!mkdir -p checkpoints/base_speakers/EN/\n", + "!mkdir -p checkpoints/base_speakers/ZH/\n", "\n", - "# !wget https://huggingface.co/myshell-ai/OpenVoice/resolve/main/checkpoints/converter/checkpoint.pth -O checkpoints/converter/checkpoint.pth\n", - "# !wget https://huggingface.co/myshell-ai/OpenVoice/raw/main/checkpoints/converter/config.json -O checkpoints/converter/config.json\n", + "!wget https://huggingface.co/myshell-ai/OpenVoice/resolve/main/checkpoints/converter/checkpoint.pth -O checkpoints/converter/checkpoint.pth\n", + "!wget https://huggingface.co/myshell-ai/OpenVoice/raw/main/checkpoints/converter/config.json -O checkpoints/converter/config.json\n", "\n", - "# !wget https://huggingface.co/myshell-ai/OpenVoice/resolve/main/checkpoints/base_speakers/EN/checkpoint.pth -O checkpoints/base_speakers/EN/checkpoint.pth\n", - "# !wget https://huggingface.co/myshell-ai/OpenVoice/raw/main/checkpoints/base_speakers/EN/config.json -O checkpoints/base_speakers/EN/config.json\n", + "!wget https://huggingface.co/myshell-ai/OpenVoice/resolve/main/checkpoints/base_speakers/EN/checkpoint.pth -O checkpoints/base_speakers/EN/checkpoint.pth\n", + "!wget https://huggingface.co/myshell-ai/OpenVoice/raw/main/checkpoints/base_speakers/EN/config.json -O checkpoints/base_speakers/EN/config.json\n", "\n", - "# !wget https://huggingface.co/myshell-ai/OpenVoice/resolve/main/checkpoints/base_speakers/ZH/checkpoint.pth -O checkpoints/base_speakers/ZH/checkpoint.pth\n", - "# !wget https://huggingface.co/myshell-ai/OpenVoice/raw/main/checkpoints/base_speakers/ZH/config.json -O checkpoints/base_speakers/ZH/config.json\n", + "!wget https://huggingface.co/myshell-ai/OpenVoice/resolve/main/checkpoints/base_speakers/ZH/checkpoint.pth -O checkpoints/base_speakers/ZH/checkpoint.pth\n", + "!wget https://huggingface.co/myshell-ai/OpenVoice/raw/main/checkpoints/base_speakers/ZH/config.json -O checkpoints/base_speakers/ZH/config.json\n", "\n", - "# !wget https://huggingface.co/myshell-ai/OpenVoice/resolve/main/checkpoints/base_speakers/EN/en_default_se.pth -O checkpoints/base_speakers/EN/en_default_se.pth\n", - "# !wget https://huggingface.co/myshell-ai/OpenVoice/resolve/main/checkpoints/base_speakers/EN/en_style_se.pth -O checkpoints/base_speakers/EN/en_style_se.pth\n", - "# !wget https://huggingface.co/myshell-ai/OpenVoice/resolve/main/checkpoints/base_speakers/ZH/zh_default_se.pth -O checkpoints/base_speakers/ZH/zh_default_se.pth" + "!wget https://huggingface.co/myshell-ai/OpenVoice/resolve/main/checkpoints/base_speakers/EN/en_default_se.pth -O checkpoints/base_speakers/EN/en_default_se.pth\n", + "!wget https://huggingface.co/myshell-ai/OpenVoice/resolve/main/checkpoints/base_speakers/EN/en_style_se.pth -O checkpoints/base_speakers/EN/en_style_se.pth\n", + "!wget https://huggingface.co/myshell-ai/OpenVoice/resolve/main/checkpoints/base_speakers/ZH/zh_default_se.pth -O checkpoints/base_speakers/ZH/zh_default_se.pth" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Importing the dtw module. When using in academic works please cite:\n", - " T. Giorgino. Computing and Visualizing Dynamic Time Warping Alignments in R: The dtw Package.\n", - " J. Stat. Soft., doi:10.18637/jss.v031.i07.\n", - "\n" - ] - } - ], + "outputs": [], "source": [ - "import se_extractor\n", "from api import BaseSpeakerTTS, ToneColorConverter" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -252,7 +185,35 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "e0dff3e511e847ce829a8d7bb6ee7943", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Dropdown(options=('CPU', 'GPU', 'AUTO'), value='CPU')" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "devices = ['CPU', 'GPU', 'AUTO']\n", + "device = widgets.Dropdown(options=devices, value=devices[0], disabled=False)\n", + "display(device)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -260,183 +221,300 @@ "zh_tts_model = OVOpenVoiceTTS(en_base_speaker_tts, ir_path='zh_openvoice_tts.xml')\n", "color_convert_model = OVOpenVoiceConvert(tone_color_converter, ir_path='openvoice_converter.xml')\n", "\n", - "en_tts_model.compile()\n", - "zh_tts_model.compile()\n", - "color_convert_model.compile()" + "en_tts_model.compile(device.value)\n", + "zh_tts_model.compile(device.value)\n", + "color_convert_model.compile(device.value)" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# load speaker embeddings\n", - "en_source_default_se = torch.load(f'{en_ckpt_base}/en_default_se.pth').to(device)\n", - "en_source_style_se = torch.load(f'{en_ckpt_base}/en_style_se.pth').to(device)\n", - "zh_source_se = torch.load(f'{zh_ckpt_base}/zh_default_se.pth').to(device)\n", - "\n", - "# source_se = torch.load(f'{ckpt_base}/en_default_se.pth').to(device)\n", - "\n", - "# need to install ffmpeg in the system\n", - "reference_speaker = 'resources/example_reference.mp3'\n", - "target_se, audio_name = se_extractor.get_se(reference_speaker, tone_color_converter, target_dir='processed', vad=True)" + "en_source_default_se = torch.load(f'{en_ckpt_base}/en_default_se.pth')\n", + "en_source_style_se = torch.load(f'{en_ckpt_base}/en_style_se.pth')\n", + "zh_source_se = torch.load(f'{zh_ckpt_base}/zh_default_se.pth')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Inference" + "First of all, select the reference tone of voice to which the generated text will be converted: your can select from existing ones or record your own by seleceing 'record_manually'" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 11, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/epavel/devel/openvino_notebooks/.venv/lib/python3.10/site-packages/gradio/components/dropdown.py:90: UserWarning: The `max_choices` parameter is ignored when `multiselect` is False.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running on local URL: http://0.0.0.0:7860\n", - "\n", - "To create a public link, set `share=True` in `launch()`.\n" - ] - }, { "data": { - "text/html": [ - "
" - ], + "application/vnd.jupyter.widget-view+json": { + "model_id": "e3068acf38a94a12ad85c87f346d9a14", + "version_major": 2, + "version_minor": 0 + }, "text/plain": [ - "