-
Notifications
You must be signed in to change notification settings - Fork 0
/
intelisl_midas_v2.py
131 lines (89 loc) · 3.92 KB
/
intelisl_midas_v2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# -*- coding: utf-8 -*-
"""Copy of intelisl_midas_v2.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/10StkaUd5O0-jW8viKfsCrzX1Ol32uJ22
### This notebook is optionally accelerated with a GPU runtime.
### If you would like to use this acceleration, please select the menu option "Runtime" -> "Change runtime type", select "Hardware Accelerator" -> "GPU" and click "SAVE"
----------------------------------------------------------------------
# MiDaS
*Author: Intel ISL*
**The MiDaS v2.1 model for computing relative depth from a single image.**
<img src="https://pytorch.org/assets/images/midas_samples.png" alt="alt" width="50%"/>
"""
import torch
midas = torch.hub.load("intel-isl/MiDaS", "MiDaS")
midas.eval()
"""will load the MiDaS v2.1 model. The model expects 3-channel RGB images of shape ```(3 x H x W)```. Images are expected to be normalized using
`mean=[0.485, 0.456, 0.406]` and `std=[0.229, 0.224, 0.225]`.
`H` and `W` need to be divisible by `32`. For optimal results `H` and `W` should be close to `384` (the training resolution).
We provide a custom transformation that performs resizing while maintaining aspect ratio.
### Model Description
[MiDaS](https://arxiv.org/abs/1907.01341) computes relative inverse depth from a single image. The model has been trained on 10 distinct dataset using
multi-objective optimization to ensure high quality on a wide range of inputs.
### Example Usage
Download an image from the PyTorch homepage
"""
import cv2
import torch
import urllib.request
import matplotlib.pyplot as plt
def generate_depth_map(input_dir, output_dir, use_large_model=True):
"""Load large or small model"""
if use_large_model:
midas = torch.hub.load("intel-isl/MiDaS", "MiDaS")
else:
midas = torch.hub.load("intel-isl/MiDaS", "MiDaS_small")
"""Move model to GPU if available"""
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
midas.to(device)
midas.eval()
"""Load transforms to resize and normalize the image for large or small model"""
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
if use_large_model:
transform = midas_transforms.default_transform
else:
transform = midas_transforms.small_transform
for pano_num, f in os.listdir(input_dir):
inputfile = f'{input_dir}/{f}'
"""Load image and apply transforms"""
img = cv2.imread(inputfile)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
input_batch = transform(img).to(device)
"""Predict and resize to original resolution"""
with torch.no_grad():
prediction = midas(input_batch)
prediction = torch.nn.functional.interpolate(
prediction.unsqueeze(1),
size=img.shape[:2],
mode="bicubic",
align_corners=False,
).squeeze()
output = prediction.cpu().numpy()
"""Show result"""
plt.imsave(f'{output_dir}/{f}_depth_map.png')
# plt.show()
if __name__ == '__main__':
from configargparse import ArgumentParser
import glob
import os
parser = ArgumentParser(
description='Generate Single View MPIs'
)
parser.add_argument('--input',
required=True,
help='image or directory containing input images')
parser.add_argument('--output', '-o',
required=True,
help='directory for cylindrical output')
parser.add_argument('--use_large_model',
required=True,
type=bool,
help='image padding')
args = parser.parse_args()
# verify/create the output directory
try:
os.makedirs(args.output)
except:
pass
generate_depth_map(args.input, args.output, args.use_large_model)