AIGC-Audio · mcneilrp1 · May 9, 2023 · May 9, 2023 · May 9, 2023 · May 9, 2023
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,27 @@
+**/__pycache__
+**/.venv
+**/.classpath
+**/.dockerignore
+**/.env
+**/.git
+**/.gitignore
+**/.project
+**/.settings
+**/.toolstarget
+**/.vs
+**/.vscode
+**/*.*proj.user
+**/*.dbmdl
+**/*.jfm
+**/bin
+**/charts
+**/docker-compose*
+**/compose*
+**/Dockerfile*
+**/node_modules
+**/npm-debug.log
+**/obj
+**/secrets.dev.yaml
+**/values.dev.yaml
+LICENSE
+README.md
diff --git a/.gitignore b/.gitignore
@@ -142,3 +142,7 @@ wandb/
 nohup.out
 multirun
 outputs
+run.md
+*.md
+.vscode/tasks.json
+*.sh
diff --git a/.vscode/.xxluadata b/.vscode/.xxluadata
@@ -0,0 +1 @@
+{}
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -0,0 +1,44 @@
+{
+    "configurations": [
+        {
+            "name": "Python: Current File",
+            "type": "python",
+            "request": "launch",
+            "program": "${file}",
+            "console": "integratedTerminal",
+            "justMyCode": true
+        },
+
+        {
+            "name": "Python: Current File",
+            "type": "python",
+            "request": "launch",
+            "program": "${file}",
+            "console": "integratedTerminal",
+            "justMyCode": true
+        },
+        {
+            "name": "Python: Current File",
+            "type": "python",
+            "request": "launch",
+            "program": "${file}",
+            "console": "integratedTerminal",
+            "justMyCode": true
+        },
+        {
+            "name": "Docker: Python - General",
+            "type": "docker",
+            "request": "launch",
+            "preLaunchTask": "docker-run: debug",
+            "python": {
+                "pathMappings": [
+                    {
+                        "localRoot": "${workspaceFolder}",
+                        "remoteRoot": "/app"
+                    }
+                ],
+                "projectType": "general"
+            }
+        }
+    ]
+}
diff --git a/Audio b/Audio
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,23 @@
+# For more information, please refer to https://aka.ms/vscode-docker-python
+FROM python:3.10-slim
+
+# Keeps Python from generating .pyc files in the container
+ENV PYTHONDONTWRITEBYTECODE=1
+
+# Turns off buffering for easier container logging
+ENV PYTHONUNBUFFERED=1
+
+# Install pip requirements
+COPY requirements.txt .
+RUN python -m pip install -r requirements.txt
+
+WORKDIR /app
+COPY . /app
+
+# Creates a non-root user with an explicit UID and adds permission to access the /app folder
+# For more info, please refer to https://aka.ms/vscode-docker-python-configure-containers
+RUN adduser -u 5678 --disabled-password --gecos "" appuser && chown -R appuser /app
+USER appuser
+
+# During debugging, this entry point will be overridden. For more information, please refer to https://aka.ms/vscode-docker-python-debug
+CMD ["python", "-m", "Audio GPt"]
diff --git a/audio-chatgpt.py b/audio-chatgpt.py
@@ -3,12 +3,12 @@
 sys.path.append(os.path.dirname(os.path.realpath(__file__)))
 sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
 sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'NeuralSeq'))
-sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'text_to_audio/Make_An_Audio'))
+sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'text_to_audio/Make_An_Audio'))  # noqa: E501
 sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'audio_detection'))
-sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'mono2binaural'))
-import gradio as gr
-import matplotlib
-import librosa
+sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'mono2binaural'))  # noqa: E501
+import gradio as gr  # noqa: E402
+import matplotlib  # noqa: E402
+import librosa  # noqa: E402
 import torch
 from langchain.agents.initialize import initialize_agent
 from langchain.agents.tools import Tool

diff --git a/compose-dev.yaml b/compose-dev.yaml
@@ -0,0 +1,12 @@
+services:
+  app:
+    entrypoint:
+    - sleep
+    - infinity
+    image: docker/dev-environments-default:stable-1
+    init: true
+    volumes:
+    - type: bind
+      source: /var/run/docker.sock
+      target: /var/run/docker.sock
+
diff --git a/download.sh b/download.sh
@@ -40,4 +40,47 @@ wget https://huggingface.co/Dongchao/pre_trained_model/resolve/main/tsd.tar.gz
 tar -zxvf tsd.tar.gz ./
 rm tsd.tar.gz
 cd sound_extraction/useful_ckpts
-wget https://huggingface.co/Dongchao/pre_trained_model/resolve/main/LASSNet.pt
+wget https://huggingface.co/Dongchao/pre_trained_model/resolve/main/LASSNet.pt
+mkdir checkpoints
+mkdir audio
+mkdir image
+mkdir text_to_audio
+#  Text to sing
+wget -P checkpoints/0831_opencpop_ds1000/ -i https://huggingface.co/spaces/Silentlin/DiffSinger/resolve/main/checkpoints/0831_opencpop_ds1000/config.yaml https://huggingface.co/spaces/Silentlin/DiffSinger/resolve/main/checkpoints/0831_opencpop_ds1000/model_ckpt_steps_320000.ckpt
+wget -P checkpoints/0109_hifigan_bigpopcs_hop128/ -i https://huggingface.co/spaces/Silentlin/DiffSinger/blob/main/checkpoints/0109_hifigan_bigpopcs_hop128/config.yaml https://huggingface.co/spaces/Silentlin/DiffSinger/resolve/main/checkpoints/0109_hifigan_bigpopcs_hop128/model_ckpt_steps_1512000.ckpt
+wget -P checkpoints/0102_xiaoma_pe/ -i https://huggingface.co/spaces/Silentlin/DiffSinger/blob/main/checkpoints/0102_xiaoma_pe/config.yaml https://huggingface.co/spaces/Silentlin/DiffSinger/resolve/main/checkpoints/0102_xiaoma_pe/model_ckpt_steps_60000.ckpt
+#  Text to audio
+cd text_to_audio
+wget -P text_to_audio/Make_An_Audio/useful_ckpts/ -i https://huggingface.co/spaces/DiffusionSpeech/Make_An_Audio/resolve/main/useful_ckpts/ta40multi_epoch=000085.ckpt
+wget -P text_to_audio/Make_An_Audio/useful_ckpts/CLAP/ -i https://huggingface.co/spaces/DiffusionSpeech/Make_An_Audio/resolve/main/useful_ckpts/CLAP/CLAP_weights_2022.pth
+wget -P text_to_audio/Make_An_Audio/useful_ckpts/ -i https://huggingface.co/spaces/DiffusionSpeech/Make_An_Audio_img/resolve/main/useful_ckpts/ta54_epoch=000216.ckpt
+wget -P text_to_audio/Make_An_Audio/useful_ckpts/ -i https://huggingface.co/spaces/DiffusionSpeech/Make_An_Audio_inpaint/resolve/main/useful_ckpts/inpaint7_epoch00047.ckpt
+#  Text to speech
+wget -P checkpoints/GenerSpeech/ -i https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/blob/main/checkpoints/GenerSpeech/config.yaml https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/checkpoints/GenerSpeech/model_ckpt_steps_300000.ckpt
+wget -P checkpoints/trainset_hifigan/ -i https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/blob/main/checkpoints/trainset_hifigan/config.yaml https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/checkpoints/trainset_hifigan/model_ckpt_steps_1000000.ckpt
+wget -P checkpoints/ https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/checkpoints/Emotion_encoder.pt
+wget -P data/binary/training_set https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/data/binary/training_set/mfa_dict.txt
+wget -P data/binary/training_set https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/data/binary/training_set/mfa_model.zip
+wget -P data/binary/training_set https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/data/binary/training_set/phone_set.json
+wget -P data/binary/training_set https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/data/binary/training_set/spk_map.json
+wget -P data/binary/training_set https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/data/binary/training_set/train_f0s_mean_std.npy
+wget -P data/binary/training_set https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/data/binary/training_set/word_set.json
+wget -P text_to_speech/checkpoints/hifi_lj -i https://huggingface.co/AIGC-Audio/AudioGPT/blob/main/text_to_speech/checkpoints/hifi_lj/config.yaml https://huggingface.co/AIGC-Audio/AudioGPT/resolve/main/text_to_speech/checkpoints/hifi_lj/model_ckpt_steps_2076000.ckpt
+wget -P text_to_speech/checkpoints/ljspeech/ps_adv_baseline -i https://huggingface.co/AIGC-Audio/AudioGPT/blob/main/text_to_speech/checkpoints/ljspeech/ps_adv_baseline/config.yaml https://huggingface.co/AIGC-Audio/AudioGPT/resolve/main/checkpoints/ljspeech/ps_adv_baseline/model_ckpt_steps_160000.ckpt https://huggingface.co/AIGC-Audio/AudioGPT/resolve/main/checkpoints/ljspeech/ps_adv_baseline/model_ckpt_steps_160001.ckpt
+# Audio to text
+wget -P audio_to_text/audiocaps_cntrstv_cnn14rnn_trm -i https://huggingface.co/AIGC-Audio/AudioGPT/blob/main/audio_to_text/audiocaps_cntrstv_cnn14rnn_trm/config.yaml https://huggingface.co/AIGC-Audio/AudioGPT/resolve/main/audio_to_text/audiocaps_cntrstv_cnn14rnn_trm/swa.pth
+wget -P audio_to_text/clotho_cntrstv_cnn14rnn_trm -i https://huggingface.co/AIGC-Audio/AudioGPT/blob/main/audio_to_text/clotho_cntrstv_cnn14rnn_trm/config.yaml https://huggingface.co/AIGC-Audio/AudioGPT/resolve/main/audio_to_text/clotho_cntrstv_cnn14rnn_trm/swa.pth
+wget -P audio_to_text/pretrained_feature_extractors https://huggingface.co/AIGC-Audio/AudioGPT/resolve/main/audio_to_text/pretrained_feature_extractors/contrastive_pretrain_cnn14_bertm.pth
+# Audio detection
+cd audio_detection/audio_infer/useful_ckpts
+wget https://huggingface.co/Dongchao/pre_trained_model/resolve/main/audio_detection.pth
+cd mono2binaural/useful_ckpts
+wget https://huggingface.co/Dongchao/pre_trained_model/resolve/main/m2b.tar.gz
+tar -zxvf m2b.tar.gz ./
+rm m2b.tar.gz
+cd audio_detection/target_sound_detection/useful_ckpts
+wget https://huggingface.co/Dongchao/pre_trained_model/resolve/main/tsd.tar.gz
+tar -zxvf tsd.tar.gz ./
+rm tsd.tar.gz
+cd sound_extraction/useful_ckpts
+wget https://huggingface.co/Dongchao/pre_trained_model/resolve/main/LASSNet.pt
diff --git a/run.md b/run.md
@@ -10,10 +10,13 @@ pip install -r requirements.txt
 bash download.sh
 
 # prepare your private openAI private key
-export OPENAI_API_KEY={Your_Private_Openai_Key}
+export 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
+
 
 # Start AudioGPT !
 python audio-chatgpt.py
 ```
+make pop song
+