diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..0b1e1e7 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,27 @@ +**/__pycache__ +**/.venv +**/.classpath +**/.dockerignore +**/.env +**/.git +**/.gitignore +**/.project +**/.settings +**/.toolstarget +**/.vs +**/.vscode +**/*.*proj.user +**/*.dbmdl +**/*.jfm +**/bin +**/charts +**/docker-compose* +**/compose* +**/Dockerfile* +**/node_modules +**/npm-debug.log +**/obj +**/secrets.dev.yaml +**/values.dev.yaml +LICENSE +README.md diff --git a/.gitignore b/.gitignore index 8274353..72fb137 100755 --- a/.gitignore +++ b/.gitignore @@ -142,3 +142,7 @@ wandb/ nohup.out multirun outputs +run.md +*.md +.vscode/tasks.json +*.sh diff --git a/.vscode/.xxluadata b/.vscode/.xxluadata new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.vscode/.xxluadata @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..878d794 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,44 @@ +{ + "configurations": [ + { + "name": "Python: Current File", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true + }, + + { + "name": "Python: Current File", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true + }, + { + "name": "Python: Current File", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true + }, + { + "name": "Docker: Python - General", + "type": "docker", + "request": "launch", + "preLaunchTask": "docker-run: debug", + "python": { + "pathMappings": [ + { + "localRoot": "${workspaceFolder}", + "remoteRoot": "/app" + } + ], + "projectType": "general" + } + } + ] +} \ No newline at end of file diff --git a/Audio b/Audio new file mode 160000 index 0000000..5adaf66 --- /dev/null +++ b/Audio @@ -0,0 +1 @@ +Subproject commit 5adaf66b92defff4061672a3d35ce70b52f600c3 diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..3149a9c --- /dev/null +++ b/Dockerfile @@ -0,0 +1,23 @@ +# For more information, please refer to https://aka.ms/vscode-docker-python +FROM python:3.10-slim + +# Keeps Python from generating .pyc files in the container +ENV PYTHONDONTWRITEBYTECODE=1 + +# Turns off buffering for easier container logging +ENV PYTHONUNBUFFERED=1 + +# Install pip requirements +COPY requirements.txt . +RUN python -m pip install -r requirements.txt + +WORKDIR /app +COPY . /app + +# Creates a non-root user with an explicit UID and adds permission to access the /app folder +# For more info, please refer to https://aka.ms/vscode-docker-python-configure-containers +RUN adduser -u 5678 --disabled-password --gecos "" appuser && chown -R appuser /app +USER appuser + +# During debugging, this entry point will be overridden. For more information, please refer to https://aka.ms/vscode-docker-python-debug +CMD ["python", "-m", "Audio GPt"] diff --git a/audio-chatgpt.py b/audio-chatgpt.py index 280534b..8679369 100644 --- a/audio-chatgpt.py +++ b/audio-chatgpt.py @@ -3,12 +3,12 @@ sys.path.append(os.path.dirname(os.path.realpath(__file__))) sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'NeuralSeq')) -sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'text_to_audio/Make_An_Audio')) +sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'text_to_audio/Make_An_Audio')) # noqa: E501 sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'audio_detection')) -sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'mono2binaural')) -import gradio as gr -import matplotlib -import librosa +sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'mono2binaural')) # noqa: E501 +import gradio as gr # noqa: E402 +import matplotlib # noqa: E402 +import librosa # noqa: E402 import torch from langchain.agents.initialize import initialize_agent from langchain.agents.tools import Tool diff --git a/compose-dev.yaml b/compose-dev.yaml new file mode 100644 index 0000000..a92f701 --- /dev/null +++ b/compose-dev.yaml @@ -0,0 +1,12 @@ +services: + app: + entrypoint: + - sleep + - infinity + image: docker/dev-environments-default:stable-1 + init: true + volumes: + - type: bind + source: /var/run/docker.sock + target: /var/run/docker.sock + diff --git a/download.sh b/download.sh index c23cc9f..083bc51 100644 --- a/download.sh +++ b/download.sh @@ -40,4 +40,47 @@ wget https://huggingface.co/Dongchao/pre_trained_model/resolve/main/tsd.tar.gz tar -zxvf tsd.tar.gz ./ rm tsd.tar.gz cd sound_extraction/useful_ckpts -wget https://huggingface.co/Dongchao/pre_trained_model/resolve/main/LASSNet.pt \ No newline at end of file +wget https://huggingface.co/Dongchao/pre_trained_model/resolve/main/LASSNet.pt +mkdir checkpoints +mkdir audio +mkdir image +mkdir text_to_audio +# Text to sing +wget -P checkpoints/0831_opencpop_ds1000/ -i https://huggingface.co/spaces/Silentlin/DiffSinger/resolve/main/checkpoints/0831_opencpop_ds1000/config.yaml https://huggingface.co/spaces/Silentlin/DiffSinger/resolve/main/checkpoints/0831_opencpop_ds1000/model_ckpt_steps_320000.ckpt +wget -P checkpoints/0109_hifigan_bigpopcs_hop128/ -i https://huggingface.co/spaces/Silentlin/DiffSinger/blob/main/checkpoints/0109_hifigan_bigpopcs_hop128/config.yaml https://huggingface.co/spaces/Silentlin/DiffSinger/resolve/main/checkpoints/0109_hifigan_bigpopcs_hop128/model_ckpt_steps_1512000.ckpt +wget -P checkpoints/0102_xiaoma_pe/ -i https://huggingface.co/spaces/Silentlin/DiffSinger/blob/main/checkpoints/0102_xiaoma_pe/config.yaml https://huggingface.co/spaces/Silentlin/DiffSinger/resolve/main/checkpoints/0102_xiaoma_pe/model_ckpt_steps_60000.ckpt +# Text to audio +cd text_to_audio +wget -P text_to_audio/Make_An_Audio/useful_ckpts/ -i https://huggingface.co/spaces/DiffusionSpeech/Make_An_Audio/resolve/main/useful_ckpts/ta40multi_epoch=000085.ckpt +wget -P text_to_audio/Make_An_Audio/useful_ckpts/CLAP/ -i https://huggingface.co/spaces/DiffusionSpeech/Make_An_Audio/resolve/main/useful_ckpts/CLAP/CLAP_weights_2022.pth +wget -P text_to_audio/Make_An_Audio/useful_ckpts/ -i https://huggingface.co/spaces/DiffusionSpeech/Make_An_Audio_img/resolve/main/useful_ckpts/ta54_epoch=000216.ckpt +wget -P text_to_audio/Make_An_Audio/useful_ckpts/ -i https://huggingface.co/spaces/DiffusionSpeech/Make_An_Audio_inpaint/resolve/main/useful_ckpts/inpaint7_epoch00047.ckpt +# Text to speech +wget -P checkpoints/GenerSpeech/ -i https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/blob/main/checkpoints/GenerSpeech/config.yaml https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/checkpoints/GenerSpeech/model_ckpt_steps_300000.ckpt +wget -P checkpoints/trainset_hifigan/ -i https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/blob/main/checkpoints/trainset_hifigan/config.yaml https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/checkpoints/trainset_hifigan/model_ckpt_steps_1000000.ckpt +wget -P checkpoints/ https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/checkpoints/Emotion_encoder.pt +wget -P data/binary/training_set https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/data/binary/training_set/mfa_dict.txt +wget -P data/binary/training_set https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/data/binary/training_set/mfa_model.zip +wget -P data/binary/training_set https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/data/binary/training_set/phone_set.json +wget -P data/binary/training_set https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/data/binary/training_set/spk_map.json +wget -P data/binary/training_set https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/data/binary/training_set/train_f0s_mean_std.npy +wget -P data/binary/training_set https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/data/binary/training_set/word_set.json +wget -P text_to_speech/checkpoints/hifi_lj -i https://huggingface.co/AIGC-Audio/AudioGPT/blob/main/text_to_speech/checkpoints/hifi_lj/config.yaml https://huggingface.co/AIGC-Audio/AudioGPT/resolve/main/text_to_speech/checkpoints/hifi_lj/model_ckpt_steps_2076000.ckpt +wget -P text_to_speech/checkpoints/ljspeech/ps_adv_baseline -i https://huggingface.co/AIGC-Audio/AudioGPT/blob/main/text_to_speech/checkpoints/ljspeech/ps_adv_baseline/config.yaml https://huggingface.co/AIGC-Audio/AudioGPT/resolve/main/checkpoints/ljspeech/ps_adv_baseline/model_ckpt_steps_160000.ckpt https://huggingface.co/AIGC-Audio/AudioGPT/resolve/main/checkpoints/ljspeech/ps_adv_baseline/model_ckpt_steps_160001.ckpt +# Audio to text +wget -P audio_to_text/audiocaps_cntrstv_cnn14rnn_trm -i https://huggingface.co/AIGC-Audio/AudioGPT/blob/main/audio_to_text/audiocaps_cntrstv_cnn14rnn_trm/config.yaml https://huggingface.co/AIGC-Audio/AudioGPT/resolve/main/audio_to_text/audiocaps_cntrstv_cnn14rnn_trm/swa.pth +wget -P audio_to_text/clotho_cntrstv_cnn14rnn_trm -i https://huggingface.co/AIGC-Audio/AudioGPT/blob/main/audio_to_text/clotho_cntrstv_cnn14rnn_trm/config.yaml https://huggingface.co/AIGC-Audio/AudioGPT/resolve/main/audio_to_text/clotho_cntrstv_cnn14rnn_trm/swa.pth +wget -P audio_to_text/pretrained_feature_extractors https://huggingface.co/AIGC-Audio/AudioGPT/resolve/main/audio_to_text/pretrained_feature_extractors/contrastive_pretrain_cnn14_bertm.pth +# Audio detection +cd audio_detection/audio_infer/useful_ckpts +wget https://huggingface.co/Dongchao/pre_trained_model/resolve/main/audio_detection.pth +cd mono2binaural/useful_ckpts +wget https://huggingface.co/Dongchao/pre_trained_model/resolve/main/m2b.tar.gz +tar -zxvf m2b.tar.gz ./ +rm m2b.tar.gz +cd audio_detection/target_sound_detection/useful_ckpts +wget https://huggingface.co/Dongchao/pre_trained_model/resolve/main/tsd.tar.gz +tar -zxvf tsd.tar.gz ./ +rm tsd.tar.gz +cd sound_extraction/useful_ckpts +wget https://huggingface.co/Dongchao/pre_trained_model/resolve/main/LASSNet.pt diff --git a/run.md b/run.md index 742c88b..0017a27 100644 --- a/run.md +++ b/run.md @@ -10,10 +10,13 @@ pip install -r requirements.txt bash download.sh # prepare your private openAI private key -export OPENAI_API_KEY={Your_Private_Openai_Key} +export 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 + # Start AudioGPT ! python audio-chatgpt.py ``` +make pop song +