fix musicgen ignoring the melody (#154)

rsxdalv · Aug 27, 2023 · be447b5 · be447b5
1 parent 4d39af3
commit be447b5
Show file tree

Hide file tree

Showing 2 changed files with 20 additions and 20 deletions.
diff --git a/README.md b/README.md
@@ -23,8 +23,7 @@ Google Colab demo: [![Open In Colab](https://colab.research.google.com/assets/co
 
 ## Examples
 
-[audio__bark__continued_generation__2023-05-04_16-07-49_long.webm](https://user-images.githubuserc
-ontent.com/6757283/236218842-b9dc253e-05de-49e5-ada9-e714e1e2cbd4.webm)
+[audio__bark__continued_generation__2023-05-04_16-07-49_long.webm](https://user-images.githubusercontent.com/6757283/236218842-b9dc253e-05de-49e5-ada9-e714e1e2cbd4.webm)
 
 [audio__bark__continued_generation__2023-05-04_16-09-21_long.webm](https://user-images.githubusercontent.com/6757283/236219228-518d2b70-51a3-4175-af44-b24c01d14932.webm)
 
@@ -36,6 +35,9 @@ https://rsxdalv.github.io/bark-speaker-directory/
 
 ## Changelog
 
+Aug 27:
+* Fix MusicGen ignoring the melody https://github.com/rsxdalv/tts-generation-webui/issues/153
+
 Aug 26:
 * Add Send to RVC, Demucs, Vocos buttons to Bark and Vocos
 

diff --git a/src/musicgen/musicgen_tab.py b/src/musicgen/musicgen_tab.py
@@ -27,7 +27,7 @@
 from importlib.metadata import version
 
 AUDIOCRAFT_VERSION = version("audiocraft")
-
+FB_MUSICGEN_MELODY = "facebook/musicgen-melody"
 
 class MusicGenGeneration(TypedDict):
     model: str
@@ -138,7 +138,7 @@ def generate(params: MusicGenGeneration, melody_in: Optional[Tuple[int, np.ndarr
     model = params["model"]
     text = params["text"]
     # due to JSON serialization limitations
-    params["melody"] = None if model != "melody" else melody_in
+    params["melody"] = None if model != FB_MUSICGEN_MELODY else melody_in
     melody = params["melody"]
 
     global MODEL
@@ -165,7 +165,7 @@ def generate(params: MusicGenGeneration, melody_in: Optional[Tuple[int, np.ndarr
     params["seed"] = parse_or_set_seed(params["seed"], 0)
     # generator = torch.Generator(device=MODEL.device).manual_seed(params["seed"])
     log_generation_musicgen(params)
-    if melody:
+    if model == FB_MUSICGEN_MELODY and melody is not None:
         sr, melody = melody[0], torch.from_numpy(melody[1]).to(
             MODEL.device
         ).float().t().unsqueeze(0)
@@ -181,21 +181,19 @@ def generate(params: MusicGenGeneration, melody_in: Optional[Tuple[int, np.ndarr
             return_tokens=True,
             # generator=generator,
         )
+    elif model == "facebook/audiogen-medium":
+        output = MODEL.generate(
+            descriptions=[text],
+            progress=True,
+            # generator=generator,
+        )
     else:
-        # if AudioGen then don't return tokens
-        if model == "facebook/audiogen-medium":
-            output = MODEL.generate(
-                descriptions=[text],
-                progress=True,
-                # generator=generator,
-            )
-        else:
-            output, tokens = MODEL.generate(
-                descriptions=[text],
-                progress=True,
-                return_tokens=True,
-                # generator=generator,
-            )
+        output, tokens = MODEL.generate(
+            descriptions=[text],
+            progress=True,
+            return_tokens=True,
+            # generator=generator,
+        )
     set_seed(-1)
 
     elapsed = time.time() - start
@@ -258,7 +256,7 @@ def generation_tab_musicgen():
                 )
                 model = gr.Radio(
                     [
-                        "facebook/musicgen-melody",
+                        FB_MUSICGEN_MELODY,
                         # "musicgen-melody",
                         "facebook/musicgen-medium",
                         # "musicgen-medium",