intelisl_midas_v2.py

# -*- coding: utf-8 -*-
"""Copy of intelisl_midas_v2.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/10StkaUd5O0-jW8viKfsCrzX1Ol32uJ22

### This notebook is optionally accelerated with a GPU runtime.
### If you would like to use this acceleration, please select the menu option "Runtime" -> "Change runtime type", select "Hardware Accelerator" -> "GPU" and click "SAVE"

----------------------------------------------------------------------

# MiDaS

*Author: Intel ISL*

**The MiDaS v2.1 model for computing relative depth from a single image.**

<img src="https://pytorch.org/assets/images/midas_samples.png" alt="alt" width="50%"/>
"""

import torch
midas = torch.hub.load("intel-isl/MiDaS", "MiDaS")
midas.eval()

"""will load the MiDaS v2.1 model. The model expects 3-channel RGB images of shape ```(3 x H x W)```. Images are expected to be normalized using
`mean=[0.485, 0.456, 0.406]` and `std=[0.229, 0.224, 0.225]`. 
`H` and `W` need to be divisible by `32`. For optimal results `H` and `W` should be close to `384` (the training resolution). 
We provide a custom transformation that performs resizing while maintaining aspect ratio. 

### Model Description

[MiDaS](https://arxiv.org/abs/1907.01341) computes relative inverse depth from a single image. The model has been trained on 10 distinct dataset using 
multi-objective optimization to ensure high quality on a wide range of inputs.


### Example Usage

Download an image from the PyTorch homepage
"""

import cv2
import torch
import urllib.request

import matplotlib.pyplot as plt

def generate_depth_map(input_dir, output_dir, use_large_model=True):

    """Load large or small model"""

    if use_large_model:
        midas = torch.hub.load("intel-isl/MiDaS", "MiDaS")
    else:
        midas = torch.hub.load("intel-isl/MiDaS", "MiDaS_small")

    """Move model to GPU if available"""

    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    midas.to(device)
    midas.eval()

    """Load transforms to resize and normalize the image for large or small model"""

    midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")

    if use_large_model:
        transform = midas_transforms.default_transform
    else:
        transform = midas_transforms.small_transform

    for pano_num, f in os.listdir(input_dir):
        inputfile = f'{input_dir}/{f}'

        """Load image and apply transforms"""

        img = cv2.imread(inputfile)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        input_batch = transform(img).to(device)

        """Predict and resize to original resolution"""

        with torch.no_grad():
            prediction = midas(input_batch)

            prediction = torch.nn.functional.interpolate(
                prediction.unsqueeze(1),
                size=img.shape[:2],
                mode="bicubic",
                align_corners=False,
            ).squeeze()
            
        output = prediction.cpu().numpy()

        """Show result"""

        plt.imsave(f'{output_dir}/{f}_depth_map.png')
        # plt.show()

if __name__ == '__main__':
    from configargparse import ArgumentParser
    import glob
    import os

    parser = ArgumentParser(
        description='Generate Single View MPIs'
    )

    parser.add_argument('--input',
                        required=True,
                        help='image or directory containing input images')
    parser.add_argument('--output', '-o',
                        required=True,
                        help='directory for cylindrical output')
    parser.add_argument('--use_large_model',
                        required=True,
                        type=bool,
                        help='image padding')

    args = parser.parse_args()

    # verify/create the output directory

    try:
        os.makedirs(args.output)
    except:
        pass

    generate_depth_map(args.input, args.output, args.use_large_model)