Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove image size limits. #3062

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
7 changes: 5 additions & 2 deletions nerfstudio/data/dataparsers/colmap_dataparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
get_train_eval_split_interval,
)
from nerfstudio.process_data.colmap_utils import parse_colmap_camera_params
from nerfstudio.utils.misc import set_pil_image_size_limit
from nerfstudio.utils.rich_utils import CONSOLE, status
from nerfstudio.utils.scripts import run_command

Expand Down Expand Up @@ -481,7 +482,8 @@ def calculate_scaled_size(original_width, original_height, downscale_factor, mod
# Using %05d ffmpeg commands appears to be unreliable (skips images).
for path in paths:
# Compute image-wise rescaled width/height.
img = Image.open(path)
with set_pil_image_size_limit(None):
img = Image.open(path)
w, h = img.size
w_scaled, h_scaled = calculate_scaled_size(w, h, downscale_factor, downscale_rounding_mode)
# Downscale images using ffmpeg.
Expand Down Expand Up @@ -514,7 +516,8 @@ def get_fname(parent: Path, filepath: Path) -> Path:
filepath = next(iter(image_filenames))
if self._downscale_factor is None:
if self.config.downscale_factor is None:
test_img = Image.open(filepath)
with set_pil_image_size_limit(None):
test_img = Image.open(filepath)
w, h = test_img.size
max_res = max(h, w)
df = 0
Expand Down
4 changes: 3 additions & 1 deletion nerfstudio/data/dataparsers/nerfstudio_dataparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
get_train_eval_split_interval,
)
from nerfstudio.utils.io import load_from_json
from nerfstudio.utils.misc import set_pil_image_size_limit
from nerfstudio.utils.rich_utils import CONSOLE

MAX_AUTO_RESOLUTION = 1600
Expand Down Expand Up @@ -469,7 +470,8 @@ def _get_fname(self, filepath: Path, data_dir: Path, downsample_folder_prefix="i

if self.downscale_factor is None:
if self.config.downscale_factor is None:
test_img = Image.open(data_dir / filepath)
with set_pil_image_size_limit(None):
test_img = Image.open(data_dir / filepath)
h, w = test_img.size
max_res = max(h, w)
df = 0
Expand Down
4 changes: 3 additions & 1 deletion nerfstudio/data/datasets/base_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from nerfstudio.cameras.cameras import Cameras
from nerfstudio.data.dataparsers.base_dataparser import DataparserOutputs
from nerfstudio.data.utils.data_utils import get_image_mask_tensor_from_path
from nerfstudio.utils.misc import set_pil_image_size_limit


class InputDataset(Dataset):
Expand Down Expand Up @@ -66,7 +67,8 @@ def get_numpy_image(self, image_idx: int) -> npt.NDArray[np.uint8]:
image_idx: The image index in the dataset.
"""
image_filename = self._dataparser_outputs.image_filenames[image_idx]
pil_image = Image.open(image_filename)
with set_pil_image_size_limit(None):
pil_image = Image.open(image_filename)
if self.scale_factor != 1.0:
width, height = pil_image.size
newsize = (int(width * self.scale_factor), int(height * self.scale_factor))
Expand Down
5 changes: 3 additions & 2 deletions nerfstudio/data/datasets/depth_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from nerfstudio.data.datasets.base_dataset import InputDataset
from nerfstudio.data.utils.data_utils import get_depth_image_from_path
from nerfstudio.model_components import losses
from nerfstudio.utils.misc import torch_compile
from nerfstudio.utils.misc import set_pil_image_size_limit, torch_compile
from nerfstudio.utils.rich_utils import CONSOLE


Expand Down Expand Up @@ -79,7 +79,8 @@ def __init__(self, dataparser_outputs: DataparserOutputs, scale_factor: float =

for i in track(range(len(filenames)), description="Generating depth images"):
image_filename = filenames[i]
pil_image = Image.open(image_filename)
with set_pil_image_size_limit(None):
pil_image = Image.open(image_filename)
image = np.array(pil_image, dtype="uint8") # shape is (h, w) or (h, w, 3 or 4)
if len(image.shape) == 2:
image = image[:, :, None].repeat(3, axis=2)
Expand Down
8 changes: 6 additions & 2 deletions nerfstudio/data/utils/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,15 @@
import torch
from PIL import Image

from nerfstudio.utils.misc import set_pil_image_size_limit


def get_image_mask_tensor_from_path(filepath: Path, scale_factor: float = 1.0) -> torch.Tensor:
"""
Utility function to read a mask image from the given path and return a boolean tensor
"""
pil_mask = Image.open(filepath)
with set_pil_image_size_limit(None):
pil_mask = Image.open(filepath)
if scale_factor != 1.0:
width, height = pil_mask.size
newsize = (int(width * scale_factor), int(height * scale_factor))
Expand All @@ -47,7 +50,8 @@ def get_semantics_and_mask_tensors_from_path(
"""
if isinstance(mask_indices, List):
mask_indices = torch.tensor(mask_indices, dtype=torch.int64).view(1, 1, -1)
pil_image = Image.open(filepath)
with set_pil_image_size_limit(None):
pil_image = Image.open(filepath)
if scale_factor != 1.0:
width, height = pil_image.size
newsize = (int(width * scale_factor), int(height * scale_factor))
Expand Down
5 changes: 2 additions & 3 deletions nerfstudio/generative/deepfloyd.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,16 +206,15 @@ def prompt_to_image(
Returns:
The generated image.
"""

from diffusers import DiffusionPipeline, IFPipeline as IFOrig
from diffusers import DiffusionPipeline, IFPipeline
from diffusers.pipelines.deepfloyd_if import IFPipelineOutput as IFOutputOrig

prompts = [prompts] if isinstance(prompts, str) else prompts
negative_prompts = [negative_prompts] if isinstance(negative_prompts, str) else negative_prompts
assert isinstance(self.pipe, DiffusionPipeline)
prompt_embeds, negative_embeds = self.pipe.encode_prompt(prompts, negative_prompt=negative_prompts)

assert isinstance(self.pipe, IFOrig)
assert isinstance(self.pipe, IFPipeline)
model_output = self.pipe(
prompt_embeds=prompt_embeds, negative_prompt_embeds=negative_embeds, generator=generator
)
Expand Down
4 changes: 3 additions & 1 deletion nerfstudio/process_data/realitycapture_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from PIL import Image

from nerfstudio.process_data.process_data_utils import CAMERA_MODELS
from nerfstudio.utils.misc import set_pil_image_size_limit
from nerfstudio.utils.rich_utils import CONSOLE


Expand Down Expand Up @@ -71,7 +72,8 @@ def realitycapture_to_json(
continue

frame = {}
img = np.array(Image.open(output_dir / image_filename_map[basename]))
with set_pil_image_size_limit(None):
img = np.array(Image.open(output_dir / image_filename_map[basename]))
height, width, _ = img.shape
frame["h"] = int(height)
frame["w"] = int(width)
Expand Down
16 changes: 16 additions & 0 deletions nerfstudio/utils/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,15 @@
Miscellaneous helper code.
"""

import contextlib
import platform
import typing
import warnings
from inspect import currentframe
from typing import Any, Callable, Dict, List, Optional, TypeVar, Union

import torch
from PIL import Image

T = TypeVar("T")
TKey = TypeVar("TKey")
Expand Down Expand Up @@ -218,3 +220,17 @@ def get_orig_class(obj, default=None):
finally:
del frame
return default


@contextlib.contextmanager
def set_pil_image_size_limit(max_pixels: Optional[Any]):
"""By default PIL limits the max image size preventing processing or training with high resolution images.
Use this function to disable or set a custom image size limit.

:param max_pixels: Max number of pixels for image processing in PIL.
:type max_pixels: Optional[int | None]
"""
orig = Image.MAX_IMAGE_PIXELS
Image.MAX_IMAGE_PIXELS = max_pixels
yield
Image.MAX_IMAGE_PIXELS = orig
Loading