luxonis · njezersek · Jun 16, 2021 · Jul 1, 2021 · Jul 1, 2021 · Jul 1, 2021
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,3 @@
 .DS_Store
 __pycache__
+*vscode
diff --git a/depth-color-alignment-test/.gitignore b/depth-color-alignment-test/.gitignore
@@ -0,0 +1 @@
+.venv
diff --git a/depth-color-alignment-test/README.md b/depth-color-alignment-test/README.md
@@ -0,0 +1,49 @@
+# Depth-color alignment test
+## Controls
+| key | action |
+| :--- | :--- |
+| `q` | quit |
+| `s` | select the elevated plane area on the RGB image |
+
+## Usage
+The camera should look on a scene with ground plane and elevation plane of different colors.
+
+Press the `s` key to select the elevated area. Check if the segmentation looks correct. 
+
+The running average of the error ratio will be displayed. 
+
+## How it works
+When the user makes a selection the average depth of the elevated and ground plane is computed. Based on that measurement the depth image is into _elevated area_ (pixels between _top padding_ and _mid depth_) and _ground area_ (pixels between _mid depth_ and _bottom padding_).
+
+Similarly the color image is segmented based on the measured hue on the elevated and ground area.
+
+```                                                                                                                                                                                  
+                 ┌────────┐                                     
+                 │ camera │                                     
+                 └────────┘                                     
+                    /  \                                        
+                   /    \                                       
+                  /      \                                      
+                 /        \                                     
+                /          \                                    
+               /            \                                   
+              /              \                                  
+             /                \                                 
+            /                  \                                
+ ----------/--------------------\------------- top padding      
+          /                      \                              
+ ========/=======▒▒▒▒▒▒▒▒▒▒=======\=========== elevated plane   
+        /                          \                            
+       /                            \                           
+ -----/------------------------------\-------- mid depth        
+     /                                \                         
+    /                                  \                        
+ ██████████████████████████████████████████=== ground plane     
+
+ --------------------------------------------- bottom padding   
+
+```
+
+After segmentation the error ratio is computed.
+
+![test](demo.png)
diff --git a/depth-color-alignment-test/alignment_test.py b/depth-color-alignment-test/alignment_test.py
@@ -0,0 +1,132 @@
+import cv2
+import numpy as np
+from typing import Tuple, Optional
+from collections import deque
+import config
+
+class AlignmentTest:
+
+	def __init__(self):
+		self.alignments = deque(maxlen=20)
+		self.border_widths = deque(maxlen=20)
+		self.center_offsets = deque(maxlen=20)
+		self.roi: Optional[Tuple[int, int, int, int]] = None
+
+	def fit_rect(self, bin_img):
+		closing = cv2.morphologyEx(bin_img, cv2.MORPH_CLOSE, np.ones((3,3), np.uint8))
+		contours, hierarchy = cv2.findContours(closing, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
+		if len(contours) == 0:
+			return None
+		cnt = np.vstack(contours).squeeze()
+		if len(cnt) < 4:
+			return None
+		rect = cv2.minAreaRect(cnt)
+		box = cv2.boxPoints(rect)
+		box = np.int0(box)
+
+		return box
+
+	def update(self, depth_frame, image_frame):
+		image_elevated, image_floor = self.image_threshold(image_frame)
+		depth_elevated, depth_floor = self.depth_threshold(depth_frame)
+
+
+		error = (image_elevated != depth_elevated) & (image_floor != depth_floor)
+		selection = (depth_elevated & image_elevated) | (depth_floor & image_floor)
+
+		error_area = np.count_nonzero(error)
+		total_area = np.count_nonzero(selection)
+
+		if total_area != 0:
+			error_ratio = error_area / total_area
+			self.alignments.append(error_ratio)
+
+		# visualization
+		image_segmentation = np.stack((image_floor, np.zeros_like(image_elevated), image_elevated), axis=-1).astype(np.uint8) * 255
+		depth_segmentation = np.stack((depth_floor, np.zeros_like(depth_elevated), depth_elevated), axis=-1).astype(np.uint8) * 255
+
+		image_box = self.fit_rect(image_elevated.astype(np.uint8)*255)
+		depth_box = self.fit_rect(depth_elevated.astype(np.uint8)*255)
+
+		error_vis = np.stack((error, )*3, axis=-1).astype(np.uint8) * 255
+		selection_vis = np.stack((selection, )*3, axis=-1).astype(np.uint8) * 255
+
+
+		if image_box is not None and depth_box is not None:
+			cv2.drawContours(image_segmentation, [image_box], 0, (0,255,0), 2)
+			cv2.drawContours(depth_segmentation, [depth_box], 0, (0,255,0), 2)
+
+
+			# calculate border width
+			rectangle_perimeter_px = cv2.arcLength(image_box, True)
+			px_to_mm = config.rectangle_perimeter_mm / rectangle_perimeter_px
+			d_px = error_area / rectangle_perimeter_px
+			d_mm = d_px * px_to_mm
+			self.border_widths.append(d_mm)
+
+			# calculate center offset
+			d = np.linalg.norm(image_box.mean(axis=0) - depth_box.mean(axis=0))*px_to_mm
+			self.center_offsets.append(d)
+
+
+		print(f"Center offset: {np.mean(self.center_offsets):.2f} mm")
+		print(f"Border width: {np.mean(self.border_widths):.2f} mm")
+
+
+		return image_segmentation, depth_segmentation, error_vis, selection_vis
+
+	def image_threshold(self, image_frame: np.ndarray):
+		zeros = np.zeros(image_frame.shape[:2], dtype=np.bool_)
+		if self.roi is None: return zeros, zeros
+
+		hsv = cv2.cvtColor(image_frame, cv2.COLOR_RGB2HSV)
+		hue = hsv[:,:,0]
+
+		elevated = (hue < (self.avg_hue + 10)) & (hue > (self.avg_hue - 10))
+		floor = ~elevated
+
+		return elevated, floor
+
+
+	def depth_threshold(self, depth_frame: np.ndarray):
+		zeros = np.zeros(depth_frame.shape[:2], dtype=np.bool_)
+		if self.roi is None: return zeros, zeros
+
+		padding_top = 100 # mm
+		padding_bottom = 100 # mm
+		mid_depth = (self.avg_depth_elevated + self.avg_depth_floor) / 2
+		elevated = (mid_depth > depth_frame) & (depth_frame > (self.avg_depth_elevated - padding_top))
+		floor = ((self.avg_depth_floor + padding_bottom) > depth_frame) & (depth_frame > mid_depth)
+
+		return elevated, floor
+
+	def reset(self):
+		self.alignments = []
+
+	def get_results(self):
+		if len(self.alignments) == 0: 
+			print("No test frames added yet. Press `c` to capture a frame.")
+			return None
+		avg_alignment = sum(self.alignments) / len(self.alignments)
+		return avg_alignment
+
+	def set_roi(self, roi, image_frame: np.ndarray, depth_frame: np.ndarray):
+		self.roi = roi
+
+		hsv = cv2.cvtColor(image_frame, cv2.COLOR_RGB2HSV)
+		hue = hsv[:,:,0]
+
+		# Crop image and depth frame
+		hue_elevated = hue[roi[1]:roi[1]+roi[3], roi[0]:roi[0]+roi[2]]
+		depth_elevated = depth_frame[roi[1]:roi[1]+roi[3], roi[0]:roi[0]+roi[2]]
+
+		depth_elevated_sum = np.sum(depth_elevated)
+		depth_sum = np.sum(depth_frame)
+		depth_elevated_count = depth_elevated.shape[0] * depth_elevated.shape[1] - np.count_nonzero(depth_elevated == 0)
+		depth_count = depth_frame.shape[0] * depth_frame.shape[1] - np.count_nonzero(depth_frame == 0)
+
+		self.avg_depth_elevated = depth_elevated_sum / depth_elevated_count
+		self.avg_depth_floor = (depth_sum - depth_elevated_sum) / (depth_count - depth_elevated_count)
+
+
+		self.avg_hue = np.mean(hue_elevated)
diff --git a/depth-color-alignment-test/camera.py b/depth-color-alignment-test/camera.py
@@ -0,0 +1,148 @@
+import depthai as dai
+import cv2
+import numpy as np
+from typing import List
+from host_sync import HostSync
+import config
+
+class Camera:
+    def __init__(self, device_info: dai.DeviceInfo, friendly_id: int, show_video: bool = True):
+        self.show_video = show_video
+        self.show_depth = False
+        self.device_info = device_info
+        self.friendly_id = friendly_id
+        self.mxid = device_info.getMxId()
+        self._create_pipeline()
+        self.device = dai.Device(self.pipeline, self.device_info)
+        # self.roi = (100, 100, 200, 400)
+        self.roi = None
+
+        self.device.setIrLaserDotProjectorBrightness(600)
+
+        self.image_queue = self.device.getOutputQueue(name="image", maxSize=10, blocking=False)
+        self.depth_queue = self.device.getOutputQueue(name="depth", maxSize=10, blocking=False)
+        self.mono_queue = self.device.getOutputQueue(name="mono", maxSize=10, blocking=False)
+        self.host_sync = HostSync(["image", "depth", "mono"])
+
+        self.image_frame = None
+        self.depth_frame = None
+        self.depth_visualization_frame = None
+
+        # camera window
+        self.image_window_name = f"[{self.friendly_id}] Camera RGB - mxid: {self.mxid}"
+        self.depth_window_name = f"[{self.friendly_id}] Camera depth - mxid: {self.mxid}"
+        if show_video:
+            cv2.namedWindow(self.image_window_name, cv2.WINDOW_NORMAL)
+            cv2.namedWindow(self.depth_window_name, cv2.WINDOW_NORMAL)
+            cv2.resizeWindow(self.image_window_name, 640, 360)
+            cv2.resizeWindow(self.depth_window_name, 640, 360)
+
+        self._load_calibration()
+
+        print("=== Connected to " + self.device_info.getMxId())
+
+    def __del__(self):
+        self.device.close()
+        print("=== Closed " + self.device_info.getMxId())
+
+    def _load_calibration(self):
+        calibration = self.device.readCalibration()
+        self.intrinsics = calibration.getCameraIntrinsics(
+            dai.CameraBoardSocket.RGB if config.COLOR else dai.CameraBoardSocket.RIGHT, 
+            dai.Size2f(*self.image_size)
+        )
+
+    def save_point_cloud_alignment(self):
+        np.save(f"{config.calibration_data_dir}/point_cloud_alignment_{self.mxid}.npy", self.point_cloud_alignment)
+
+
+    def _create_pipeline(self):
+        pipeline = dai.Pipeline()
+
+        # Depth cam -> 'depth'
+        mono_left = pipeline.createMonoCamera()
+        mono_right = pipeline.createMonoCamera()
+        mono_left.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
+        mono_left.setBoardSocket(dai.CameraBoardSocket.LEFT)
+        mono_right.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
+        mono_right.setBoardSocket(dai.CameraBoardSocket.RIGHT)
+        cam_stereo = pipeline.createStereoDepth()
+        cam_stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
+        cam_stereo.initialConfig.setMedianFilter(config.median)
+        cam_stereo.initialConfig.setConfidenceThreshold(config.confidence_threshold)
+        cam_stereo.setLeftRightCheck(config.lrcheck)
+        cam_stereo.setExtendedDisparity(config.extended)
+        cam_stereo.setSubpixel(config.subpixel)
+        mono_left.out.link(cam_stereo.left)
+        mono_right.out.link(cam_stereo.right)
+
+        init_config = cam_stereo.initialConfig.get()
+        # init_config.postProcessing.speckleFilter.enable = False
+        # init_config.postProcessing.speckleFilter.speckleRange = 50
+        # init_config.postProcessing.temporalFilter.enable = True
+        # init_config.postProcessing.spatialFilter.enable = True
+        # init_config.postProcessing.spatialFilter.holeFillingRadius = 2
+        # init_config.postProcessing.spatialFilter.numIterations = 1
+        # init_config.postProcessing.thresholdFilter.minRange = config.min_range
+        # init_config.postProcessing.thresholdFilter.maxRange = config.max_range
+        # init_config.postProcessing.decimationFilter.decimationFactor = 1
+        cam_stereo.initialConfig.set(init_config)
+
+        xout_depth = pipeline.createXLinkOut()
+        xout_depth.setStreamName("depth")
+        cam_stereo.depth.link(xout_depth.input)
+
+        xout_mono = pipeline.createXLinkOut()
+        xout_mono.setStreamName("mono")
+        mono_left.out.link(xout_mono.input)
+
+
+        # RGB cam or mono right -> 'image'
+        xout_image = pipeline.createXLinkOut()
+        xout_image.setStreamName("image")
+        if config.COLOR:
+            cam_rgb = pipeline.createColorCamera()
+            cam_rgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
+            cam_rgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.RGB)
+            cam_rgb.setIspScale(1, 3)
+            cam_rgb.initialControl.setManualFocus(130)
+            cam_rgb.initialControl.setAutoWhiteBalanceLock(True)
+            cam_rgb.initialControl.setManualWhiteBalance(200)
+            cam_rgb.initialControl.setAutoExposureLock(True)
+            cam_rgb.initialControl.setManualExposure(10000, 1000)
+            cam_stereo.setDepthAlign(dai.CameraBoardSocket.RGB)
+            cam_rgb.isp.link(xout_image.input)
+            self.image_size = cam_rgb.getIspSize()
+        else:
+            cam_stereo.rectifiedRight.link(xout_image.input)
+            self.image_size = mono_right.getResolutionSize()
+
+        self.pipeline = pipeline
+
+    def update(self):
+        for queue in [self.depth_queue, self.image_queue, self.mono_queue]:
+            new_msgs = queue.tryGetAll()
+            if new_msgs is not None:
+                for new_msg in new_msgs:
+                    self.host_sync.add(queue.getName(), new_msg)
+
+        msg_sync = self.host_sync.get()
+        if msg_sync is None:
+            return
+
+        self.depth_frame = msg_sync["depth"].getFrame()
+        self.image_frame = msg_sync["image"].getCvFrame()
+        self.mono_frame = msg_sync["mono"].getCvFrame()
+        r = self.roi
+        if r is not None:
+            self.depth_frame = self.depth_frame[r[0]:(r[0]+r[2]), r[1]:(r[1]+r[3])]
+            self.image_frame = self.image_frame[r[0]:(r[0]+r[2]), r[1]:(r[1]+r[3]), :]
+            self.mono_frame = self.mono_frame[r[0]:(r[0]+r[2]), r[1]:(r[1]+r[3])]
+
+        # scale the depth frame from 0-2000 to 0-255
+        self.depth_visualization_frame = np.interp(self.depth_frame, [0, 1000], [0, 255]).astype(np.uint8)
+        self.depth_visualization_frame = cv2.applyColorMap(self.depth_visualization_frame, cv2.COLORMAP_MAGMA)
+
+        if self.show_video:
+            cv2.imshow(self.depth_window_name, self.depth_visualization_frame)
+            cv2.imshow(self.image_window_name, self.image_frame)
diff --git a/depth-color-alignment-test/config.py b/depth-color-alignment-test/config.py
@@ -0,0 +1,18 @@
+import depthai as dai
+
+COLOR = True 		# Use color camera of mono camera
+
+# DEPTH CONFIG
+lrcheck  = True   			# Better handling for occlusions
+extended = False  			# Closer-in minimum depth, disparity range is doubled
+subpixel = False   			# Better accuracy for longer distance, fractional disparity 32-levels
+confidence_threshold = 255 	# 0-255, 255 = low confidence, 0 = high confidence
+min_range = 100 			# mm
+max_range = 2000			# mm
+
+# Median filter
+# Options: MEDIAN_OFF, KERNEL_3x3, KERNEL_5x5, KERNEL_7x7
+median   = dai.StereoDepthProperties.MedianFilter.MEDIAN_OFF
+
+
+rectangle_perimeter_mm = (148+208)*2
diff --git a/depth-color-alignment-test/host_sync.py b/depth-color-alignment-test/host_sync.py
@@ -0,0 +1,21 @@
+import numpy as np
+from functools import reduce
+from collections import deque
+from typing import List
+
+class HostSync:
+    def __init__(self, streams: List[str], maxlen=50):
+        self.queues = {stream: deque(maxlen=maxlen) for stream in streams}
+
+    def add(self, stream: str, msg):
+        self.queues[stream].append({'msg': msg, 'seq': msg.getSequenceNum()})
+
+    def get(self):
+        seqs = [np.array([msg['seq'] for msg in msgs]) for msgs in self.queues.values()]
+        matching_seqs = reduce(np.intersect1d, seqs)
+        if len(matching_seqs) == 0:
+            return None
+        seq = np.max(matching_seqs)
+        res = {stream: next(msg['msg'] for msg in msgs if msg['seq'] == seq) for stream, msgs in self.queues.items()}
+        self.queues = {stream: deque([msg for msg in msgs if msg['seq'] > seq], maxlen=msgs.maxlen) for stream, msgs in self.queues.items()}
+        return res