Fix background to use unflattened dirs, for random_aug

threestudio-project · Jul 7, 2023 · 2e026bd · 2e026bd
1 parent e81fd69
commit 2e026bd
Show file tree

Hide file tree

Showing 11 changed files with 42 additions and 31 deletions.
diff --git a/configs/zero123-geometry.yaml b/configs/zero123-geometry.yaml
@@ -76,7 +76,8 @@ system:
 
   background_type: "solid-color-background" # unused
   background:
-    random_aug: True
+    random_aug: false
+    random_aug_prob: 0.5
 
   # renderer_type: "nerf-volume-renderer"
   # renderer:

diff --git a/configs/zero123.yaml b/configs/zero123.yaml
@@ -20,8 +20,8 @@ data: # threestudio/data/image.py -> SingleImageDataModuleConfig
     width: [64, 128, 256]
     batch_size: [12, 4, 2]
     resolution_milestones: [200, 300]
-    eval_height: 256
-    eval_width: 256
+    eval_height: 512
+    eval_width: 512
     eval_batch_size: 1
     elevation_range: [-10, 80]
     azimuth_range: [-180, 180]
@@ -88,6 +88,11 @@ system:
   #   color_activation: sigmoid
 
   background_type: "solid-color-background" # unused
+  background:
+    random_aug: false
+    random_aug_prob: 0.5
+
+  amb_ratio_min: 0.5
 
   renderer_type: "nerf-volume-renderer"
   renderer:

diff --git a/configs/zero123_64.yaml b/configs/zero123_64.yaml
@@ -83,6 +83,11 @@ system:
     albedo_activation: sigmoid
 
   background_type: "solid-color-background" # unused
+  background:
+    random_aug: false
+    random_aug_prob: 0.5
+
+  amb_ratio_min: 0.5
 
   renderer_type: "nerf-volume-renderer"
   renderer:

diff --git a/threestudio/models/background/base.py b/threestudio/models/background/base.py
@@ -20,5 +20,5 @@ class Config(BaseModule.Config):
     def configure(self):
         pass
 
-    def forward(self, dirs: Float[Tensor, "*B 3"]) -> Float[Tensor, "*B 3"]:
+    def forward(self, dirs: Float[Tensor, "B H W 3"]) -> Float[Tensor, "B H W Nc"]:
         raise NotImplementedError
diff --git a/threestudio/models/background/neural_environment_map_background.py b/threestudio/models/background/neural_environment_map_background.py
@@ -43,13 +43,12 @@ def configure(self) -> None:
             self.cfg.mlp_network_config,
         )
 
-    def forward(self, dirs: Float[Tensor, "*B 3"]) -> Float[Tensor, "*B 3"]:
+    def forward(self, dirs: Float[Tensor, "B H W 3"]) -> Float[Tensor, "B H W Nc"]:
         if not self.training and self.cfg.eval_color is not None:
             return torch.ones(*dirs.shape[:-1], self.cfg.n_output_dims).to(
                 dirs
             ) * torch.as_tensor(self.cfg.eval_color).to(dirs)
         # viewdirs must be normalized before passing to this function
-        squeezed_dim = dirs.view(-1, 3).shape[0]
         dirs = (dirs + 1.0) / 2.0  # (-1, 1) => (0, 1)
         dirs_embd = self.encoding(dirs.view(-1, 3))
         color = self.network(dirs_embd).view(*dirs.shape[:-1], self.cfg.n_output_dims)
@@ -61,9 +60,8 @@ def forward(self, dirs: Float[Tensor, "*B 3"]) -> Float[Tensor, "*B 3"]:
         ):
             # use random background color with probability random_aug_prob
             color = color * 0 + (  # prevent checking for unused parameters in DDP
-                torch.rand(self.cfg.n_output_dims)
-                .to(dirs)[None, :]
-                .expand(squeezed_dim, -1)
-                .view(*dirs.shape[:-1], -1)
+                torch.rand(dirs.shape[0], 1, 1, self.cfg.n_output_dims)
+                .to(dirs)
+                .expand(*dirs.shape[:-1], -1)
             )
         return color
diff --git a/threestudio/models/background/solid_color_background.py b/threestudio/models/background/solid_color_background.py
@@ -33,7 +33,7 @@ def configure(self) -> None:
                 "env_color", torch.as_tensor(self.cfg.color, dtype=torch.float32)
             )
 
-    def forward(self, dirs: Float[Tensor, "*B 3"]) -> Float[Tensor, "*B Nc"]:
+    def forward(self, dirs: Float[Tensor, "B H W 3"]) -> Float[Tensor, "B H W Nc"]:
         color = (
             torch.ones(*dirs.shape[:-1], self.cfg.n_output_dims).to(dirs)
             * self.env_color
@@ -45,9 +45,8 @@ def forward(self, dirs: Float[Tensor, "*B 3"]) -> Float[Tensor, "*B Nc"]:
         ):
             # use random background color with probability random_aug_prob
             color = color * 0 + (  # prevent checking for unused parameters in DDP
-                torch.rand(self.cfg.n_output_dims)
-                .to(dirs)[None, :]
-                .expand(dirs.reshape(-1, 3).shape[0], -1)
-                .reshape(*dirs.shape[:-1], -1)
+                torch.rand(dirs.shape[0], 1, 1, self.cfg.n_output_dims)
+                .to(dirs)
+                .expand(*dirs.shape[:-1], -1)
             )
         return color
diff --git a/threestudio/models/background/textured_background.py b/threestudio/models/background/textured_background.py
@@ -36,7 +36,7 @@ def spherical_xyz_to_uv(self, dirs: Float[Tensor, "*B 3"]) -> Float[Tensor, "*B
 
     def forward(self, dirs: Float[Tensor, "*B 3"]) -> Float[Tensor, "*B Nc"]:
         dirs_shape = dirs.shape[:-1]
-        uv = self.spherical_xyz_to_uv(dirs)
+        uv = self.spherical_xyz_to_uv(dirs.reshape(-1, 3))
         uv = 2 * uv - 1  # rescale to [-1, 1] for grid_sample
         uv = uv.reshape(1, -1, 1, 2)
         color = (

diff --git a/threestudio/models/renderers/nerf_volume_renderer.py b/threestudio/models/renderers/nerf_volume_renderer.py
@@ -126,7 +126,7 @@ def sigma_fn(t_starts, t_ends, ray_indices):
                 **geo_out,
                 **kwargs
             )
-            comp_rgb_bg = self.background(dirs=rays_d_flatten)
+            comp_rgb_bg = self.background(dirs=rays_d)
         else:
             geo_out = chunk_batch(
                 self.geometry,
@@ -143,7 +143,7 @@ def sigma_fn(t_starts, t_ends, ray_indices):
                 **geo_out
             )
             comp_rgb_bg = chunk_batch(
-                self.background, self.cfg.eval_chunk_size, dirs=rays_d_flatten
+                self.background, self.cfg.eval_chunk_size, dirs=rays_d
             )
 
         weights: Float[Tensor, "Nr 1"]
@@ -184,8 +184,8 @@ def sigma_fn(t_starts, t_ends, ray_indices):
                 #        -> [bs, height, width, 3]):
                 bg_color = bg_color.expand(-1, height, width, -1)
 
-            if bg_color.shape == (batch_size, height, width, 3):
-                bg_color = bg_color.reshape(-1, 3)
+        if bg_color.shape == (batch_size, height, width, 3):
+            bg_color = bg_color.reshape(-1, 3)
 
         comp_rgb = comp_rgb_fg + bg_color * (1.0 - opacity)
 

diff --git a/threestudio/models/renderers/neus_volume_renderer.py b/threestudio/models/renderers/neus_volume_renderer.py
@@ -187,7 +187,7 @@ def alpha_fn(t_starts, t_ends, ray_indices):
                 **geo_out,
                 **kwargs
             )
-            comp_rgb_bg = self.background(dirs=rays_d_flatten)
+            comp_rgb_bg = self.background(dirs=rays_d)
         else:
             geo_out = chunk_batch(
                 self.geometry,
@@ -204,7 +204,7 @@ def alpha_fn(t_starts, t_ends, ray_indices):
                 **geo_out
             )
             comp_rgb_bg = chunk_batch(
-                self.background, self.cfg.eval_chunk_size, dirs=rays_d_flatten
+                self.background, self.cfg.eval_chunk_size, dirs=rays_d
             )
 
         # grad or normal?
@@ -231,9 +231,9 @@ def alpha_fn(t_starts, t_ends, ray_indices):
 
         if bg_color is None:
             bg_color = comp_rgb_bg
-        else:
-            if bg_color.shape == (batch_size, height, width, 3):
-                bg_color = bg_color.reshape(-1, 3)
+
+        if bg_color.shape == (batch_size, height, width, 3):
+            bg_color = bg_color.reshape(-1, 3)
 
         comp_rgb = comp_rgb_fg + bg_color * (1.0 - opacity)
 

diff --git a/threestudio/scripts/run_zero123.sh b/threestudio/scripts/run_zero123.sh
@@ -1,7 +1,7 @@
 NAME="dragon2"
 
 # Phase 1 - 64x64
-python launch.py --config configs/zero123.yaml --train --gpu 7 data.image_path=./load/images/${NAME}_rgba.png system.guidance.pretrained_model_name_or_path="./load/zero123/XL_20230604.ckpt" use_timestamp=False name=${NAME} tag="Phase1_64" # system.freq.guidance_eval=0 system.loggers.wandb.enable=false system.loggers.wandb.project="voletiv-anya-new" system.loggers.wandb.name=${NAME}_Phase1
+python launch.py --config configs/zero123.yaml --train --gpu 7 data.image_path=./load/images/${NAME}_rgba.png system.guidance.pretrained_model_name_or_path="./load/zero123/XL_20230604.ckpt" use_timestamp=False name=${NAME}_EXPS tag="Phase1" # system.freq.guidance_eval=0 system.loggers.wandb.enable=false system.loggers.wandb.project="voletiv-anya-new" system.loggers.wandb.name=${NAME}_Phase1
 
 # python threestudio/scripts/make_training_vid.py --exp /admin/home-vikram/git/threestudio/outputs/zero123/64_dragon2_rgba.png@20230628-152734 --frames_per_vid 30 --fps 20 --max_iters 200
 

diff --git a/threestudio/systems/zero123.py b/threestudio/systems/zero123.py
@@ -20,6 +20,7 @@ class Zero123(BaseLift3DSystem):
     class Config(BaseLift3DSystem.Config):
         freq: dict = field(default_factory=dict)
         refinement: bool = False
+        amb_ratio_min: float = 0.5
 
     cfg: Config
 
@@ -62,13 +63,13 @@ def training_substep(self, batch, batch_idx, guidance: str):
             ambient_ratio = 1.0
             shading = "diffuse"
             batch["shading"] = shading
-            bg_color = None
+            # bg_color = None
         elif guidance == "zero123":
             batch = batch["random_camera"]
             # claforte: surely there's a cleaner way to get batch size
-            bs = batch["rays_o"].shape[0]
+            # bs = batch["rays_o"].shape[0]
 
-            bg_color = torch.ones(bs, 3).to(self.device)
+            # bg_color = torch.ones(bs, 3).to(self.device)
             # bg_color = torch.rand(bs, 3).to(self.device)  # claforte: use dtype
 
             # # Override 50% of the bgcolors with white.
@@ -82,9 +83,11 @@ def training_substep(self, batch, batch_idx, guidance: str):
 
             # bg_color = bg_color * (1.0 - is_white) + white * is_white
 
-            ambient_ratio = 0.1 + 0.9 * random.random()
+            ambient_ratio = (
+                self.cfg.amb_ratio_min + (1 - self.cfg.amb_ratio_min) * random.random()
+            )
 
-        batch["bg_color"] = bg_color
+        batch["bg_color"] = None
         batch["ambient_ratio"] = ambient_ratio
 
         out = self(batch)