diff --git a/configs/_base_/models/multiview_dfm.py b/configs/_base_/models/multiview_dfm.py index f20ab6fd84..7fa5376d1e 100644 --- a/configs/_base_/models/multiview_dfm.py +++ b/configs/_base_/models/multiview_dfm.py @@ -89,8 +89,7 @@ pos_iou_thr=0.6, neg_iou_thr=0.45, min_pos_iou=0.45, - ignore_iof_thr=-1), - + ignore_iof_thr=-1) ], allowed_border=0, pos_weight=-1, diff --git a/configs/pgd/pgd_r101_fpn_gn-head_dcn_8xb3-2x_waymoD3-mv-mono3d.py b/configs/pgd/pgd_r101_fpn_gn-head_dcn_8xb3-2x_waymoD3-mv-mono3d.py index 12fc725df9..2247aa44d8 100644 --- a/configs/pgd/pgd_r101_fpn_gn-head_dcn_8xb3-2x_waymoD3-mv-mono3d.py +++ b/configs/pgd/pgd_r101_fpn_gn-head_dcn_8xb3-2x_waymoD3-mv-mono3d.py @@ -108,4 +108,4 @@ train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=24, val_interval=24) val_cfg = dict(type='ValLoop') test_cfg = dict(type='TestLoop') -auto_scale_lr = dict(enable=False, base_batch_size=48) \ No newline at end of file +auto_scale_lr = dict(enable=False, base_batch_size=48) diff --git a/mmdet3d/datasets/convert_utils.py b/mmdet3d/datasets/convert_utils.py index 66dba29117..cb4d97e137 100644 --- a/mmdet3d/datasets/convert_utils.py +++ b/mmdet3d/datasets/convert_utils.py @@ -10,7 +10,7 @@ from shapely.geometry import MultiPoint, box from shapely.geometry.polygon import Polygon -from mmdet3d.structures import Box3DMode, LiDARInstance3DBoxes, points_cam2img +from mmdet3d.structures import Box3DMode, CameraInstance3DBoxes, points_cam2img from mmdet3d.structures.ops import box_np_ops kitti_categories = ('Pedestrian', 'Cyclist', 'Car', 'Van', 'Truck', @@ -318,11 +318,8 @@ def get_kitti_style_2d_boxes(info: dict, def convert_annos(info: dict, cam_idx: int) -> dict: """Convert front-cam anns to i-th camera (KITTI-style info).""" rect = info['calib']['R0_rect'].astype(np.float32) - if cam_idx == 0: - lidar2cami = info['calib']['Tr_velo_to_cam'].astype(np.float32) - else: - lidar2cami = info['calib'][f'Tr_velo_to_cam{cam_idx}'].astype( - np.float32) + lidar2cam0 = info['calib']['Tr_velo_to_cam'].astype(np.float32) + lidar2cami = info['calib'][f'Tr_velo_to_cam{cam_idx}'].astype(np.float32) annos = info['annos'] converted_annos = copy.deepcopy(annos) loc = annos['location'] @@ -330,11 +327,12 @@ def convert_annos(info: dict, cam_idx: int) -> dict: rots = annos['rotation_y'] gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]], axis=1).astype(np.float32) - # BC-breaking: gt_bboxes_3d is already in lidar coordinates + # convert gt_bboxes_3d to velodyne coordinates + gt_bboxes_3d = CameraInstance3DBoxes(gt_bboxes_3d).convert_to( + Box3DMode.LIDAR, np.linalg.inv(rect @ lidar2cam0), correct_yaw=True) # convert gt_bboxes_3d to cam coordinates - gt_bboxes_3d = LiDARInstance3DBoxes(gt_bboxes_3d).convert_to( + gt_bboxes_3d = gt_bboxes_3d.convert_to( Box3DMode.CAM, rect @ lidar2cami, correct_yaw=True).numpy() - converted_annos['location'] = gt_bboxes_3d[:, :3] converted_annos['dimensions'] = gt_bboxes_3d[:, 3:6] converted_annos['rotation_y'] = gt_bboxes_3d[:, 6] diff --git a/mmdet3d/datasets/transforms/transforms_3d.py b/mmdet3d/datasets/transforms/transforms_3d.py index 18b7e50c7b..94275d42de 100644 --- a/mmdet3d/datasets/transforms/transforms_3d.py +++ b/mmdet3d/datasets/transforms/transforms_3d.py @@ -2071,7 +2071,6 @@ def _crop_data(self, offset_w = np.random.randint( self.rel_offset_w[0] * margin_w, self.rel_offset_w[1] * margin_w + 1) - # offset_h, offset_w = 0, 0 else: offset_w, offset_h = results['img_crop_offset'] diff --git a/mmdet3d/datasets/waymo_dataset.py b/mmdet3d/datasets/waymo_dataset.py index 7fcb361eea..cda27e42e5 100644 --- a/mmdet3d/datasets/waymo_dataset.py +++ b/mmdet3d/datasets/waymo_dataset.py @@ -249,7 +249,6 @@ def parse_data_info(self, info: dict) -> Union[dict, List[dict]]: new_image_info = {} new_image_info[self.default_cam_key] = \ info['images'][self.default_cam_key] - # cam_prefix = self.data_prefix[self.default_cam_key] info['images'] = new_image_info info['instances'] = info['cam_instances'][self.default_cam_key] return Det3DDataset.parse_data_info(self, info) @@ -258,7 +257,6 @@ def parse_data_info(self, info: dict) -> Union[dict, List[dict]]: # Convert frame-based infos to multi-view image-based data_list = [] for (cam_key, img_info) in info['images'].items(): - # camera_info = deepcopy(info) camera_info = dict() camera_info['sample_idx'] = info['sample_idx'] camera_info['timestamp'] = info['timestamp'] diff --git a/mmdet3d/evaluation/metrics/waymo_metric.py b/mmdet3d/evaluation/metrics/waymo_metric.py index 12cc954307..cdbc4a58db 100644 --- a/mmdet3d/evaluation/metrics/waymo_metric.py +++ b/mmdet3d/evaluation/metrics/waymo_metric.py @@ -61,7 +61,6 @@ def __init__(self, self.load_type = load_type self.result_prefix = result_prefix self.format_only = format_only - self.result_prefix = result_prefix if self.format_only: assert result_prefix is not None, 'result_prefix must be not ' 'None when format_only is True, otherwise the result files will ' diff --git a/mmdet3d/models/detectors/dfm.py b/mmdet3d/models/detectors/dfm.py index 3f6118dcaf..9655b06023 100644 --- a/mmdet3d/models/detectors/dfm.py +++ b/mmdet3d/models/detectors/dfm.py @@ -57,7 +57,8 @@ def __init__(self, test_cfg=None, pretrained=None, init_cfg=None): - super().__init__(data_preprocessor= data_preprocessor,init_cfg=init_cfg) + super().__init__( + data_preprocessor=data_preprocessor, init_cfg=init_cfg) self.backbone = MODELS.build(backbone) self.neck = MODELS.build(neck) if backbone_stereo is not None: diff --git a/mmdet3d/models/detectors/multiview_dfm.py b/mmdet3d/models/detectors/multiview_dfm.py index c195fbc47f..81446d30f2 100644 --- a/mmdet3d/models/detectors/multiview_dfm.py +++ b/mmdet3d/models/detectors/multiview_dfm.py @@ -1,18 +1,19 @@ # Copyright (c) OpenMMLab. All rights reserved. +from typing import Union + import numpy as np import torch +from mmengine.structures import InstanceData from torch import Tensor -from typing import Union from mmdet3d.models.layers.fusion_layers.point_fusion import (point_sample, voxel_sample) from mmdet3d.registry import MODELS, TASK_UTILS from mmdet3d.structures.bbox_3d.utils import get_lidar2img from mmdet3d.structures.det3d_data_sample import SampleList -from mmengine.structures import InstanceData -from mmdet3d.utils import ConfigType, OptConfigType +from mmdet3d.utils import ConfigType, OptConfigType, OptInstanceList from .dfm import DfM -from mmdet3d.utils.typing_utils import OptConfigType, OptInstanceList + @MODELS.register_module() class MultiViewDfM(DfM): @@ -356,7 +357,7 @@ def feature_transformation(self, batch_feats, batch_img_metas, num_views, return transform_feats def loss(self, batch_inputs: Tensor, - batch_data_samples: SampleList) -> Union[dict, tuple]: + batch_data_samples: SampleList) -> Union[dict, tuple]: """Calculate losses from a batch of inputs dict and data samples. Args: @@ -393,7 +394,7 @@ def predict(self, batch_inputs: Tensor, batch_data_samples (List[:obj:`Det3DDataSample`]): The Data samples. It usually includes information such as `gt_instance_3d`, `gt_panoptic_seg_3d` and `gt_sem_seg_3d`. - + Returns: list[:obj:`Det3DDataSample`]: Detection results of the input samples. Each Det3DDataSample usually contain @@ -409,14 +410,15 @@ def predict(self, batch_inputs: Tensor, """ feats = self.extract_feat(batch_inputs, batch_data_samples) bev_feat = feats[0] - results_list = self.bbox_head_3d.predict([bev_feat], batch_data_samples) + results_list = self.bbox_head_3d.predict([bev_feat], + batch_data_samples) predictions = self.add_pred_to_datasample(batch_data_samples, results_list) return predictions def _forward(self, - batch_inputs: Tensor, - batch_data_samples: SampleList = None): + batch_inputs: Tensor, + batch_data_samples: SampleList = None): """Network forward process. Usually includes backbone, neck and head forward without any post- @@ -426,12 +428,11 @@ def _forward(self, bev_feat = feats[0] self.bbox_head.forward(bev_feat, batch_data_samples) - def add_pred_to_datasample( - self, - data_samples: SampleList, - data_instances_3d: OptInstanceList = None, - data_instances_2d: OptInstanceList = None, + self, + data_samples: SampleList, + data_instances_3d: OptInstanceList = None, + data_instances_2d: OptInstanceList = None, ) -> SampleList: """Convert results list to `Det3DDataSample`. diff --git a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py index 7b3daa7cee..1c7b342013 100644 --- a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py +++ b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py @@ -228,56 +228,9 @@ result_prefix='./dsvt_pred') test_evaluator = val_evaluator -# vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')] vis_backends = [dict(type='LocalVisBackend')] visualizer = dict( type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer') -lr = 1e-5 -# This schedule is mainly used by models on nuScenes dataset -# max_norm=10 is better for SECOND -optim_wrapper = dict( - type='OptimWrapper', - optimizer=dict(type='AdamW', lr=lr, weight_decay=0.05, betas=(0.9, 0.99)), - clip_grad=dict(max_norm=10, norm_type=2)) -# learning rate -param_scheduler = [ - dict( - type='CosineAnnealingLR', - T_max=1.2, - eta_min=lr * 100, - begin=0, - end=1.2, - by_epoch=True, - convert_to_iter_based=True), - dict( - type='CosineAnnealingLR', - T_max=10.8, - eta_min=lr * 1e-4, - begin=1.2, - end=12, - by_epoch=True, - convert_to_iter_based=True), - # momentum scheduler - dict( - type='CosineAnnealingMomentum', - T_max=1.2, - eta_min=0.85, - begin=0, - end=1.2, - by_epoch=True, - convert_to_iter_based=True), - dict( - type='CosineAnnealingMomentum', - T_max=10.8, - eta_min=0.95, - begin=1.2, - end=12, - by_epoch=True, - convert_to_iter_based=True) -] - -# runtime settings -train_cfg = dict(by_epoch=True, max_epochs=12, val_interval=1) # schedules lr = 1e-5 diff --git a/tools/dataset_converters/kitti_converter.py b/tools/dataset_converters/kitti_converter.py index e904918f60..367cfd7ba9 100644 --- a/tools/dataset_converters/kitti_converter.py +++ b/tools/dataset_converters/kitti_converter.py @@ -5,7 +5,6 @@ import mmcv import mmengine import numpy as np -from mmengine import logging, print_log from nuscenes.utils.geometry_utils import view_points from mmdet3d.structures import points_cam2img @@ -250,12 +249,6 @@ def create_waymo_info_file(data_path, max_sweeps (int, optional): Max sweeps before the detection frame to be used. Default: 5. """ - print_log( - 'Deprecation Warning: related functions has been migrated to ' - '`Waymo2KITTI.create_waymo_info_file`. It will be removed in ' - 'the future!', - logger='current', - level=logging.WARNING) imageset_folder = Path(data_path) / 'ImageSets' train_img_ids = _read_imageset_file(str(imageset_folder / 'train.txt')) val_img_ids = _read_imageset_file(str(imageset_folder / 'val.txt')) diff --git a/tools/dataset_converters/kitti_data_utils.py b/tools/dataset_converters/kitti_data_utils.py index ae1a858355..64c3bc415b 100644 --- a/tools/dataset_converters/kitti_data_utils.py +++ b/tools/dataset_converters/kitti_data_utils.py @@ -6,7 +6,6 @@ import mmengine import numpy as np -from mmengine import logging, print_log from PIL import Image from skimage import io @@ -350,12 +349,6 @@ def __init__(self, self.relative_path = relative_path self.with_imageshape = with_imageshape self.max_sweeps = max_sweeps - print_log( - 'Deprecation Warning: `WaymoInfoGatherer` has been migrated to ' - '`Waymo2KITTI.create_waymo_info_file`. It will be removed in ' - 'the future!', - logger='current', - level=logging.WARNING) def gather_single(self, idx): root_path = Path(self.path)