From 0cddfd53b4295ffd076a4a4294370b423810c13d Mon Sep 17 00:00:00 2001
From: Jet Kwok <jetkwok827@gmail.com>
Date: Thu, 25 Jan 2024 15:23:48 +0800
Subject: [PATCH] Add save the depth image results

---
 README.md |  9 +++++++--
 run.py    | 53 +++++++++++++++++++++++++++++------------------------
 2 files changed, 36 insertions(+), 26 deletions(-)
diff --git a/README.md b/README.md
index 5e5ac4f8..ebd803e5 100644
--- a/README.md
+++ b/README.md
@@ -109,16 +109,21 @@ pip install -r requirements.txt
 ### Running
 
 ```bash
-python run.py --encoder <vits | vitb | vitl> --img-path <img-directory | single-img | txt-file> --outdir <outdir>
+python run.py --encoder <vits | vitb | vitl> --img-path <img-directory | single-img | txt-file> --outdir <outdir> [--save_depth_img]
 ```
 For the ``img-path``, you can either 1) point it to an image directory storing all interested images, 2) point it to a single image, or 3) point it to a text file storing all image paths.
 
-For example:
+Example 1:
 ```bash
 python run.py --encoder vitl --img-path assets/examples --outdir depth_visualization
 ```
 
+Example 2:
 
+If you just want to save the depth image results.
+```bash
+python run.py --encoder vitl --img-path assets/examples --outdir depth_visualization --save_depth_img
+```
 ### Gradio demo
 
 To use our gradio demo locally:
diff --git a/run.py b/run.py
index a0d07ae5..b3580c97 100644
--- a/run.py
+++ b/run.py
@@ -16,7 +16,8 @@
     parser.add_argument('--img-path', type=str)
     parser.add_argument('--outdir', type=str, default='./vis_depth')
     parser.add_argument('--encoder', type=str, default='vitl', choices=['vits', 'vitb', 'vitl'])
-    
+    parser.add_argument('--save_depth_img', default=False, action='store_true')
+
     args = parser.parse_args()
     
     margin_width = 50
@@ -73,30 +74,34 @@
             depth = depth_anything(image)
         
         depth = F.interpolate(depth[None], (h, w), mode='bilinear', align_corners=False)[0, 0]
-        depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
-        
-        depth = depth.cpu().numpy().astype(np.uint8)
-        depth_color = cv2.applyColorMap(depth, cv2.COLORMAP_INFERNO)
-        
+
         os.makedirs(args.outdir, exist_ok=True)
         filename = os.path.basename(filename)
-        
-        split_region = np.ones((raw_image.shape[0], margin_width, 3), dtype=np.uint8) * 255
-        combined_results = cv2.hconcat([raw_image, split_region, depth_color])
-        
-        caption_space = np.ones((caption_height, combined_results.shape[1], 3), dtype=np.uint8) * 255
-        captions = ['Raw image', 'Depth Anything']
-        segment_width = w + margin_width
-        for i, caption in enumerate(captions):
-            # Calculate text size
-            text_size = cv2.getTextSize(caption, font, font_scale, font_thickness)[0]
 
-            # Calculate x-coordinate to center the text
-            text_x = int((segment_width * i) + (w - text_size[0]) / 2)
+        if args.save_depth_img:
+            cv2.imwrite(os.path.join(args.outdir, filename[:filename.find('.')] + '_img_depth.png'), depth.cpu().numpy().astype(np.uint16))
+        else:
+            depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
+            depth = depth.cpu().numpy().astype(np.uint8)
+            depth_color = cv2.applyColorMap(depth, cv2.COLORMAP_INFERNO)
+                        
+            split_region = np.ones((raw_image.shape[0], margin_width, 3), dtype=np.uint8) * 255
+            combined_results = cv2.hconcat([raw_image, split_region, depth_color])
+            
+            caption_space = np.ones((caption_height, combined_results.shape[1], 3), dtype=np.uint8) * 255
 
-            # Add text caption
-            cv2.putText(caption_space, caption, (text_x, 40), font, font_scale, (0, 0, 0), font_thickness)
-        
-        final_result = cv2.vconcat([caption_space, combined_results])
-        
-        cv2.imwrite(os.path.join(args.outdir, filename[:filename.find('.')] + '_img_depth.png'), final_result)
+            captions = ['Raw image', 'Depth Anything']
+            segment_width = w + margin_width
+            for i, caption in enumerate(captions):
+                # Calculate text size
+                text_size = cv2.getTextSize(caption, font, font_scale, font_thickness)[0]
+
+                # Calculate x-coordinate to center the text
+                text_x = int((segment_width * i) + (w - text_size[0]) / 2)
+
+                # Add text caption
+                cv2.putText(caption_space, caption, (text_x, 40), font, font_scale, (0, 0, 0), font_thickness)
+            
+            final_result = cv2.vconcat([caption_space, combined_results])
+            
+            cv2.imwrite(os.path.join(args.outdir, filename[:filename.find('.')] + '_img_depth.png'), final_result)
\ No newline at end of file