From 6dbf16ed05bb74ad5526198add2b0e20f61c6601 Mon Sep 17 00:00:00 2001 From: mcneilrp1 <132184899+mcneilrp1@users.noreply.github.com> Date: Tue, 9 May 2023 08:11:48 -0400 Subject: [PATCH 1/4] Audio GPT run.md pip install -r requirements.txt } --- .dockerignore | 27 +++++++++++++++++++++++++++ .vscode/launch.json | 44 ++++++++++++++++++++++++++++++++++++++++++++ Audio | 1 + Dockerfile | 19 +++++++++++++++++++ audio-chatgpt.py | 10 +++++----- download.sh | 45 ++++++++++++++++++++++++++++++++++++++++++++- run.md | 5 ++++- 7 files changed, 144 insertions(+), 7 deletions(-) create mode 100644 .dockerignore create mode 100644 .vscode/launch.json create mode 160000 Audio create mode 100644 Dockerfile diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..0b1e1e7 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,27 @@ +**/__pycache__ +**/.venv +**/.classpath +**/.dockerignore +**/.env +**/.git +**/.gitignore +**/.project +**/.settings +**/.toolstarget +**/.vs +**/.vscode +**/*.*proj.user +**/*.dbmdl +**/*.jfm +**/bin +**/charts +**/docker-compose* +**/compose* +**/Dockerfile* +**/node_modules +**/npm-debug.log +**/obj +**/secrets.dev.yaml +**/values.dev.yaml +LICENSE +README.md diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..878d794 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,44 @@ +{ + "configurations": [ + { + "name": "Python: Current File", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true + }, + + { + "name": "Python: Current File", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true + }, + { + "name": "Python: Current File", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true + }, + { + "name": "Docker: Python - General", + "type": "docker", + "request": "launch", + "preLaunchTask": "docker-run: debug", + "python": { + "pathMappings": [ + { + "localRoot": "${workspaceFolder}", + "remoteRoot": "/app" + } + ], + "projectType": "general" + } + } + ] +} \ No newline at end of file diff --git a/Audio b/Audio new file mode 160000 index 0000000..5adaf66 --- /dev/null +++ b/Audio @@ -0,0 +1 @@ +Subproject commit 5adaf66b92defff4061672a3d35ce70b52f600c3 diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..405027e --- /dev/null +++ b/Dockerfile @@ -0,0 +1,19 @@ +# For more information, please refer to https://aka.ms/vscode-docker-python +FROM python:3.10-slim + +ENV PYTHONUNBUFFERED=1 + +# Install pip requirements +COPY requirements.txt . +RUN python -m pip install -r requirements.txt + +WORKDIR /app +COPY . /app + +# Creates a non-root user with an explicit UID and adds permission to access the /app folder +# For more info, please refer to https://aka.ms/vscode-docker-python-configure-containers +RUN adduser -u 5678 --disabled-password --gecos "" appuser && chown -R appuser /app +USER appuser + +# During debugging, this entry point will be overridden. For more information, please refer to https://aka.ms/vscode-docker-python-debug +CMD ["python", "-m", "Audio GPt"] diff --git a/audio-chatgpt.py b/audio-chatgpt.py index 280534b..8679369 100644 --- a/audio-chatgpt.py +++ b/audio-chatgpt.py @@ -3,12 +3,12 @@ sys.path.append(os.path.dirname(os.path.realpath(__file__))) sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'NeuralSeq')) -sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'text_to_audio/Make_An_Audio')) +sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'text_to_audio/Make_An_Audio')) # noqa: E501 sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'audio_detection')) -sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'mono2binaural')) -import gradio as gr -import matplotlib -import librosa +sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'mono2binaural')) # noqa: E501 +import gradio as gr # noqa: E402 +import matplotlib # noqa: E402 +import librosa # noqa: E402 import torch from langchain.agents.initialize import initialize_agent from langchain.agents.tools import Tool diff --git a/download.sh b/download.sh index c23cc9f..083bc51 100644 --- a/download.sh +++ b/download.sh @@ -40,4 +40,47 @@ wget https://huggingface.co/Dongchao/pre_trained_model/resolve/main/tsd.tar.gz tar -zxvf tsd.tar.gz ./ rm tsd.tar.gz cd sound_extraction/useful_ckpts -wget https://huggingface.co/Dongchao/pre_trained_model/resolve/main/LASSNet.pt \ No newline at end of file +wget https://huggingface.co/Dongchao/pre_trained_model/resolve/main/LASSNet.pt +mkdir checkpoints +mkdir audio +mkdir image +mkdir text_to_audio +# Text to sing +wget -P checkpoints/0831_opencpop_ds1000/ -i https://huggingface.co/spaces/Silentlin/DiffSinger/resolve/main/checkpoints/0831_opencpop_ds1000/config.yaml https://huggingface.co/spaces/Silentlin/DiffSinger/resolve/main/checkpoints/0831_opencpop_ds1000/model_ckpt_steps_320000.ckpt +wget -P checkpoints/0109_hifigan_bigpopcs_hop128/ -i https://huggingface.co/spaces/Silentlin/DiffSinger/blob/main/checkpoints/0109_hifigan_bigpopcs_hop128/config.yaml https://huggingface.co/spaces/Silentlin/DiffSinger/resolve/main/checkpoints/0109_hifigan_bigpopcs_hop128/model_ckpt_steps_1512000.ckpt +wget -P checkpoints/0102_xiaoma_pe/ -i https://huggingface.co/spaces/Silentlin/DiffSinger/blob/main/checkpoints/0102_xiaoma_pe/config.yaml https://huggingface.co/spaces/Silentlin/DiffSinger/resolve/main/checkpoints/0102_xiaoma_pe/model_ckpt_steps_60000.ckpt +# Text to audio +cd text_to_audio +wget -P text_to_audio/Make_An_Audio/useful_ckpts/ -i https://huggingface.co/spaces/DiffusionSpeech/Make_An_Audio/resolve/main/useful_ckpts/ta40multi_epoch=000085.ckpt +wget -P text_to_audio/Make_An_Audio/useful_ckpts/CLAP/ -i https://huggingface.co/spaces/DiffusionSpeech/Make_An_Audio/resolve/main/useful_ckpts/CLAP/CLAP_weights_2022.pth +wget -P text_to_audio/Make_An_Audio/useful_ckpts/ -i https://huggingface.co/spaces/DiffusionSpeech/Make_An_Audio_img/resolve/main/useful_ckpts/ta54_epoch=000216.ckpt +wget -P text_to_audio/Make_An_Audio/useful_ckpts/ -i https://huggingface.co/spaces/DiffusionSpeech/Make_An_Audio_inpaint/resolve/main/useful_ckpts/inpaint7_epoch00047.ckpt +# Text to speech +wget -P checkpoints/GenerSpeech/ -i https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/blob/main/checkpoints/GenerSpeech/config.yaml https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/checkpoints/GenerSpeech/model_ckpt_steps_300000.ckpt +wget -P checkpoints/trainset_hifigan/ -i https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/blob/main/checkpoints/trainset_hifigan/config.yaml https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/checkpoints/trainset_hifigan/model_ckpt_steps_1000000.ckpt +wget -P checkpoints/ https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/checkpoints/Emotion_encoder.pt +wget -P data/binary/training_set https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/data/binary/training_set/mfa_dict.txt +wget -P data/binary/training_set https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/data/binary/training_set/mfa_model.zip +wget -P data/binary/training_set https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/data/binary/training_set/phone_set.json +wget -P data/binary/training_set https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/data/binary/training_set/spk_map.json +wget -P data/binary/training_set https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/data/binary/training_set/train_f0s_mean_std.npy +wget -P data/binary/training_set https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/data/binary/training_set/word_set.json +wget -P text_to_speech/checkpoints/hifi_lj -i https://huggingface.co/AIGC-Audio/AudioGPT/blob/main/text_to_speech/checkpoints/hifi_lj/config.yaml https://huggingface.co/AIGC-Audio/AudioGPT/resolve/main/text_to_speech/checkpoints/hifi_lj/model_ckpt_steps_2076000.ckpt +wget -P text_to_speech/checkpoints/ljspeech/ps_adv_baseline -i https://huggingface.co/AIGC-Audio/AudioGPT/blob/main/text_to_speech/checkpoints/ljspeech/ps_adv_baseline/config.yaml https://huggingface.co/AIGC-Audio/AudioGPT/resolve/main/checkpoints/ljspeech/ps_adv_baseline/model_ckpt_steps_160000.ckpt https://huggingface.co/AIGC-Audio/AudioGPT/resolve/main/checkpoints/ljspeech/ps_adv_baseline/model_ckpt_steps_160001.ckpt +# Audio to text +wget -P audio_to_text/audiocaps_cntrstv_cnn14rnn_trm -i https://huggingface.co/AIGC-Audio/AudioGPT/blob/main/audio_to_text/audiocaps_cntrstv_cnn14rnn_trm/config.yaml https://huggingface.co/AIGC-Audio/AudioGPT/resolve/main/audio_to_text/audiocaps_cntrstv_cnn14rnn_trm/swa.pth +wget -P audio_to_text/clotho_cntrstv_cnn14rnn_trm -i https://huggingface.co/AIGC-Audio/AudioGPT/blob/main/audio_to_text/clotho_cntrstv_cnn14rnn_trm/config.yaml https://huggingface.co/AIGC-Audio/AudioGPT/resolve/main/audio_to_text/clotho_cntrstv_cnn14rnn_trm/swa.pth +wget -P audio_to_text/pretrained_feature_extractors https://huggingface.co/AIGC-Audio/AudioGPT/resolve/main/audio_to_text/pretrained_feature_extractors/contrastive_pretrain_cnn14_bertm.pth +# Audio detection +cd audio_detection/audio_infer/useful_ckpts +wget https://huggingface.co/Dongchao/pre_trained_model/resolve/main/audio_detection.pth +cd mono2binaural/useful_ckpts +wget https://huggingface.co/Dongchao/pre_trained_model/resolve/main/m2b.tar.gz +tar -zxvf m2b.tar.gz ./ +rm m2b.tar.gz +cd audio_detection/target_sound_detection/useful_ckpts +wget https://huggingface.co/Dongchao/pre_trained_model/resolve/main/tsd.tar.gz +tar -zxvf tsd.tar.gz ./ +rm tsd.tar.gz +cd sound_extraction/useful_ckpts +wget https://huggingface.co/Dongchao/pre_trained_model/resolve/main/LASSNet.pt diff --git a/run.md b/run.md index 742c88b..0017a27 100644 --- a/run.md +++ b/run.md @@ -10,10 +10,13 @@ pip install -r requirements.txt bash download.sh # prepare your private openAI private key -export OPENAI_API_KEY={Your_Private_Openai_Key} +export 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 + # Start AudioGPT ! python audio-chatgpt.py ``` +make pop song + From f6ba9e1491014878e16fe6e5dcf93a0bdf150da9 Mon Sep 17 00:00:00 2001 From: mcneilrp1 <132184899+mcneilrp1@users.noreply.github.com> Date: Tue, 9 May 2023 08:11:50 -0400 Subject: [PATCH 2/4] Update Dockerfile --- Dockerfile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Dockerfile b/Dockerfile index 405027e..3149a9c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,10 @@ # For more information, please refer to https://aka.ms/vscode-docker-python FROM python:3.10-slim +# Keeps Python from generating .pyc files in the container +ENV PYTHONDONTWRITEBYTECODE=1 + +# Turns off buffering for easier container logging ENV PYTHONUNBUFFERED=1 # Install pip requirements From 8243a98a981470e14009e83cc714ffb73dc0e621 Mon Sep 17 00:00:00 2001 From: mcneilrp1 <132184899+mcneilrp1@users.noreply.github.com> Date: Tue, 9 May 2023 08:13:24 -0400 Subject: [PATCH 3/4] Audio GPt Audio GPt Co-Authored-By: Mileta Avramovic <92308650+MiletaA@users.noreply.github.com> Co-Authored-By: Jinglin Liu <32165188+MoonInTheRiver@users.noreply.github.com> Co-Authored-By: Zhiqing Hong Co-Authored-By: Darius-H <49788663+Darius-H@users.noreply.github.com> Co-Authored-By: Yuning Wu <38801033+A-Quarter-Mile@users.noreply.github.com> --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 8274353..72fb137 100755 --- a/.gitignore +++ b/.gitignore @@ -142,3 +142,7 @@ wandb/ nohup.out multirun outputs +run.md +*.md +.vscode/tasks.json +*.sh From e45b2b70a743fe678d6ed73cb0e282f0c44d8b98 Mon Sep 17 00:00:00 2001 From: mcneilrp1 <132184899+mcneilrp1@users.noreply.github.com> Date: Tue, 9 May 2023 12:57:41 +0000 Subject: [PATCH 4/4] new file: .vscode/.xxluadata --- .vscode/.xxluadata | 1 + compose-dev.yaml | 12 ++++++++++++ 2 files changed, 13 insertions(+) create mode 100644 .vscode/.xxluadata create mode 100644 compose-dev.yaml diff --git a/.vscode/.xxluadata b/.vscode/.xxluadata new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.vscode/.xxluadata @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/compose-dev.yaml b/compose-dev.yaml new file mode 100644 index 0000000..a92f701 --- /dev/null +++ b/compose-dev.yaml @@ -0,0 +1,12 @@ +services: + app: + entrypoint: + - sleep + - infinity + image: docker/dev-environments-default:stable-1 + init: true + volumes: + - type: bind + source: /var/run/docker.sock + target: /var/run/docker.sock +