Merge pull request #505 from jadevaibhav/effstabledreamfusion

Effstabledreamfusion
threestudio-project · Oct 2, 2024 · dd19ea6 · dd19ea6
2 parents a80b37a + 5f4f664
commit dd19ea6
Show file tree

Hide file tree

Showing 7 changed files with 706 additions and 1 deletion.
diff --git a/.gitignore b/.gitignore
@@ -55,6 +55,8 @@ coverage.xml
 .pytest_cache/
 cover/
 
+# Slurm logs
+slurm*
 # Translations
 *.mo
 *.pot

diff --git a/configs/dreamfusion-sd-eff.yaml b/configs/dreamfusion-sd-eff.yaml
@@ -0,0 +1,115 @@
+name: "dreamfusion-sd"
+tag: "${rmspace:${system.prompt_processor.prompt},_}"
+exp_root_dir: "outputs"
+seed: 0
+
+data_type: "eff-random-camera-datamodule"
+data:
+  batch_size: 1
+  width: 128
+  height: 128
+  sample_width: 64
+  sample_height: 64
+  camera_distance_range: [1.5, 2.0]
+  fovy_range: [40, 70]
+  elevation_range: [-10, 45]
+  light_sample_strategy: "dreamfusion"
+  eval_camera_distance: 2.0
+  eval_fovy_deg: 70.
+
+system_type: "efficient-dreamfusion-system"
+system:
+  geometry_type: "implicit-volume"
+  geometry:
+    radius: 2.0
+    normal_type: "analytic"
+
+    # the density initialization proposed in the DreamFusion paper
+    # does not work very well
+    # density_bias: "blob_dreamfusion"
+    # density_activation: exp
+    # density_blob_scale: 5.
+    # density_blob_std: 0.2
+
+    # use Magic3D density initialization instead
+    density_bias: "blob_magic3d"
+    density_activation: softplus
+    density_blob_scale: 10.
+    density_blob_std: 0.5
+
+    # coarse to fine hash grid encoding
+    # to ensure smooth analytic normals
+    pos_encoding_config:
+      otype: ProgressiveBandHashGrid
+      n_levels: 16
+      n_features_per_level: 2
+      log2_hashmap_size: 19
+      base_resolution: 16
+      per_level_scale: 1.447269237440378 # max resolution 4096
+      start_level: 8 # resolution ~200
+      start_step: 2000
+      update_steps: 500
+
+  material_type: "diffuse-with-point-light-material"
+  material:
+    ambient_only_steps: 2001
+    albedo_activation: sigmoid
+
+  background_type: "neural-environment-map-background"
+  background:
+    color_activation: sigmoid
+
+  renderer_type: "nerf-volume-renderer"
+  renderer:
+    radius: ${system.geometry.radius}
+    num_samples_per_ray: 512
+
+  prompt_processor_type: "stable-diffusion-prompt-processor"
+  prompt_processor:
+    pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
+    prompt: ???
+
+  guidance_type: "stable-diffusion-guidance"
+  guidance:
+    pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
+    guidance_scale: 100.
+    weighting_strategy: sds
+    min_step_percent: 0.02
+    max_step_percent: 0.98
+
+  loggers:
+    wandb:
+      enable: false
+      project: "threestudio"
+      name: None
+
+  loss:
+    lambda_sds: 1.
+    lambda_orient: [0, 10., 1000., 5000]
+    lambda_sparsity: 1.
+    lambda_opaque: 0.
+
+  optimizer:
+    name: Adam
+    args:
+      lr: 0.01
+      betas: [0.9, 0.99]
+      eps: 1.e-15
+    params:
+      geometry:
+        lr: 0.01
+      background:
+        lr: 0.001
+
+trainer:
+  max_steps: 10000
+  log_every_n_steps: 1
+  num_sanity_val_steps: 0
+  val_check_interval: 200
+  enable_progress_bar: true
+  precision: 16-mixed
+
+checkpoint:
+  save_last: true # save at each validation time
+  save_top_k: -1
+  every_n_train_steps: ${trainer.max_steps}
diff --git a/threestudio/data/__init__.py b/threestudio/data/__init__.py
@@ -1 +1 @@
-from . import co3d, image, multiview, uncond
+from . import co3d, image, multiview, uncond, uncond_eff