Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Audio gpt #69

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
**/__pycache__
**/.venv
**/.classpath
**/.dockerignore
**/.env
**/.git
**/.gitignore
**/.project
**/.settings
**/.toolstarget
**/.vs
**/.vscode
**/*.*proj.user
**/*.dbmdl
**/*.jfm
**/bin
**/charts
**/docker-compose*
**/compose*
**/Dockerfile*
**/node_modules
**/npm-debug.log
**/obj
**/secrets.dev.yaml
**/values.dev.yaml
LICENSE
README.md
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,7 @@ wandb/
nohup.out
multirun
outputs
run.md
*.md
.vscode/tasks.json
*.sh
1 change: 1 addition & 0 deletions .vscode/.xxluadata
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}
44 changes: 44 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{
"configurations": [
{
"name": "Python: Current File",
"type": "python",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"justMyCode": true
},

{
"name": "Python: Current File",
"type": "python",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"justMyCode": true
},
{
"name": "Python: Current File",
"type": "python",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"justMyCode": true
},
{
"name": "Docker: Python - General",
"type": "docker",
"request": "launch",
"preLaunchTask": "docker-run: debug",
"python": {
"pathMappings": [
{
"localRoot": "${workspaceFolder}",
"remoteRoot": "/app"
}
],
"projectType": "general"
}
}
]
}
1 change: 1 addition & 0 deletions Audio
Submodule Audio added at 5adaf6
23 changes: 23 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# For more information, please refer to https://aka.ms/vscode-docker-python
FROM python:3.10-slim

# Keeps Python from generating .pyc files in the container
ENV PYTHONDONTWRITEBYTECODE=1

# Turns off buffering for easier container logging
ENV PYTHONUNBUFFERED=1

# Install pip requirements
COPY requirements.txt .
RUN python -m pip install -r requirements.txt

WORKDIR /app
COPY . /app

# Creates a non-root user with an explicit UID and adds permission to access the /app folder
# For more info, please refer to https://aka.ms/vscode-docker-python-configure-containers
RUN adduser -u 5678 --disabled-password --gecos "" appuser && chown -R appuser /app
USER appuser

# During debugging, this entry point will be overridden. For more information, please refer to https://aka.ms/vscode-docker-python-debug
CMD ["python", "-m", "Audio GPt"]
10 changes: 5 additions & 5 deletions audio-chatgpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
sys.path.append(os.path.dirname(os.path.realpath(__file__)))
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'NeuralSeq'))
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'text_to_audio/Make_An_Audio'))
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'text_to_audio/Make_An_Audio')) # noqa: E501
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'audio_detection'))
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'mono2binaural'))
import gradio as gr
import matplotlib
import librosa
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'mono2binaural')) # noqa: E501
import gradio as gr # noqa: E402
import matplotlib # noqa: E402
import librosa # noqa: E402
import torch
from langchain.agents.initialize import initialize_agent
from langchain.agents.tools import Tool
Expand Down
12 changes: 12 additions & 0 deletions compose-dev.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
services:
app:
entrypoint:
- sleep
- infinity
image: docker/dev-environments-default:stable-1
init: true
volumes:
- type: bind
source: /var/run/docker.sock
target: /var/run/docker.sock

45 changes: 44 additions & 1 deletion download.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,47 @@ wget https://huggingface.co/Dongchao/pre_trained_model/resolve/main/tsd.tar.gz
tar -zxvf tsd.tar.gz ./
rm tsd.tar.gz
cd sound_extraction/useful_ckpts
wget https://huggingface.co/Dongchao/pre_trained_model/resolve/main/LASSNet.pt
wget https://huggingface.co/Dongchao/pre_trained_model/resolve/main/LASSNet.pt
mkdir checkpoints
mkdir audio
mkdir image
mkdir text_to_audio
# Text to sing
wget -P checkpoints/0831_opencpop_ds1000/ -i https://huggingface.co/spaces/Silentlin/DiffSinger/resolve/main/checkpoints/0831_opencpop_ds1000/config.yaml https://huggingface.co/spaces/Silentlin/DiffSinger/resolve/main/checkpoints/0831_opencpop_ds1000/model_ckpt_steps_320000.ckpt
wget -P checkpoints/0109_hifigan_bigpopcs_hop128/ -i https://huggingface.co/spaces/Silentlin/DiffSinger/blob/main/checkpoints/0109_hifigan_bigpopcs_hop128/config.yaml https://huggingface.co/spaces/Silentlin/DiffSinger/resolve/main/checkpoints/0109_hifigan_bigpopcs_hop128/model_ckpt_steps_1512000.ckpt
wget -P checkpoints/0102_xiaoma_pe/ -i https://huggingface.co/spaces/Silentlin/DiffSinger/blob/main/checkpoints/0102_xiaoma_pe/config.yaml https://huggingface.co/spaces/Silentlin/DiffSinger/resolve/main/checkpoints/0102_xiaoma_pe/model_ckpt_steps_60000.ckpt
# Text to audio
cd text_to_audio
wget -P text_to_audio/Make_An_Audio/useful_ckpts/ -i https://huggingface.co/spaces/DiffusionSpeech/Make_An_Audio/resolve/main/useful_ckpts/ta40multi_epoch=000085.ckpt
wget -P text_to_audio/Make_An_Audio/useful_ckpts/CLAP/ -i https://huggingface.co/spaces/DiffusionSpeech/Make_An_Audio/resolve/main/useful_ckpts/CLAP/CLAP_weights_2022.pth
wget -P text_to_audio/Make_An_Audio/useful_ckpts/ -i https://huggingface.co/spaces/DiffusionSpeech/Make_An_Audio_img/resolve/main/useful_ckpts/ta54_epoch=000216.ckpt
wget -P text_to_audio/Make_An_Audio/useful_ckpts/ -i https://huggingface.co/spaces/DiffusionSpeech/Make_An_Audio_inpaint/resolve/main/useful_ckpts/inpaint7_epoch00047.ckpt
# Text to speech
wget -P checkpoints/GenerSpeech/ -i https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/blob/main/checkpoints/GenerSpeech/config.yaml https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/checkpoints/GenerSpeech/model_ckpt_steps_300000.ckpt
wget -P checkpoints/trainset_hifigan/ -i https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/blob/main/checkpoints/trainset_hifigan/config.yaml https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/checkpoints/trainset_hifigan/model_ckpt_steps_1000000.ckpt
wget -P checkpoints/ https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/checkpoints/Emotion_encoder.pt
wget -P data/binary/training_set https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/data/binary/training_set/mfa_dict.txt
wget -P data/binary/training_set https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/data/binary/training_set/mfa_model.zip
wget -P data/binary/training_set https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/data/binary/training_set/phone_set.json
wget -P data/binary/training_set https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/data/binary/training_set/spk_map.json
wget -P data/binary/training_set https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/data/binary/training_set/train_f0s_mean_std.npy
wget -P data/binary/training_set https://huggingface.co/spaces/Rongjiehuang/GenerSpeech/resolve/main/data/binary/training_set/word_set.json
wget -P text_to_speech/checkpoints/hifi_lj -i https://huggingface.co/AIGC-Audio/AudioGPT/blob/main/text_to_speech/checkpoints/hifi_lj/config.yaml https://huggingface.co/AIGC-Audio/AudioGPT/resolve/main/text_to_speech/checkpoints/hifi_lj/model_ckpt_steps_2076000.ckpt
wget -P text_to_speech/checkpoints/ljspeech/ps_adv_baseline -i https://huggingface.co/AIGC-Audio/AudioGPT/blob/main/text_to_speech/checkpoints/ljspeech/ps_adv_baseline/config.yaml https://huggingface.co/AIGC-Audio/AudioGPT/resolve/main/checkpoints/ljspeech/ps_adv_baseline/model_ckpt_steps_160000.ckpt https://huggingface.co/AIGC-Audio/AudioGPT/resolve/main/checkpoints/ljspeech/ps_adv_baseline/model_ckpt_steps_160001.ckpt
# Audio to text
wget -P audio_to_text/audiocaps_cntrstv_cnn14rnn_trm -i https://huggingface.co/AIGC-Audio/AudioGPT/blob/main/audio_to_text/audiocaps_cntrstv_cnn14rnn_trm/config.yaml https://huggingface.co/AIGC-Audio/AudioGPT/resolve/main/audio_to_text/audiocaps_cntrstv_cnn14rnn_trm/swa.pth
wget -P audio_to_text/clotho_cntrstv_cnn14rnn_trm -i https://huggingface.co/AIGC-Audio/AudioGPT/blob/main/audio_to_text/clotho_cntrstv_cnn14rnn_trm/config.yaml https://huggingface.co/AIGC-Audio/AudioGPT/resolve/main/audio_to_text/clotho_cntrstv_cnn14rnn_trm/swa.pth
wget -P audio_to_text/pretrained_feature_extractors https://huggingface.co/AIGC-Audio/AudioGPT/resolve/main/audio_to_text/pretrained_feature_extractors/contrastive_pretrain_cnn14_bertm.pth
# Audio detection
cd audio_detection/audio_infer/useful_ckpts
wget https://huggingface.co/Dongchao/pre_trained_model/resolve/main/audio_detection.pth
cd mono2binaural/useful_ckpts
wget https://huggingface.co/Dongchao/pre_trained_model/resolve/main/m2b.tar.gz
tar -zxvf m2b.tar.gz ./
rm m2b.tar.gz
cd audio_detection/target_sound_detection/useful_ckpts
wget https://huggingface.co/Dongchao/pre_trained_model/resolve/main/tsd.tar.gz
tar -zxvf tsd.tar.gz ./
rm tsd.tar.gz
cd sound_extraction/useful_ckpts
wget https://huggingface.co/Dongchao/pre_trained_model/resolve/main/LASSNet.pt
5 changes: 4 additions & 1 deletion run.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,13 @@ pip install -r requirements.txt
bash download.sh

# prepare your private openAI private key
export OPENAI_API_KEY={Your_Private_Openai_Key}
export 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


# Start AudioGPT !
python audio-chatgpt.py
```
make pop song