diff --git a/.gitignore b/.gitignore new file mode 100755 index 0000000..67a05c6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +# Compiled python modules. +*.pyc + +# Python egg metadata, regenerated from source files by setuptools. +/*.egg-info +/*.egg + +# Data +*.npy +*.npz +*.txt diff --git a/LICENSE b/LICENSE new file mode 100755 index 0000000..045e216 --- /dev/null +++ b/LICENSE @@ -0,0 +1,194 @@ +WaffleIron + +Copyright 2022 Valeo + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + + + Apache License + Version 2.0, January 2004 + https://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS \ No newline at end of file diff --git a/README.md b/README.md new file mode 100755 index 0000000..d8329d4 --- /dev/null +++ b/README.md @@ -0,0 +1,234 @@ +# WaffleIron + +![](./illustration.png) + +[**Using a Waffle Iron for Automotive Point Cloud Semantic Segmentation**]() +[*Gilles Puy*1](https://sites.google.com/site/puygilles/home), +[*Alexandre Boulch*1](http://boulch.eu), +[*Renaud Marlet*1,2](http://imagine.enpc.fr/~marletr/) +1*valeo.ai, France* and 2*LIGM, Ecole des Ponts, Univ Gustave Eiffel, CNRS, France*. + +If you find this code or work useful, please cite the following [paper](): +``` +@article{puy23waffleiron, + title={Using a Waffle Iron for Automotive Point Cloud Semantic Segmentation}, + author={Puy, Gilles and Boulch, Alexandre and Marlet, Renaud}, + journal={arxiv:2301.xxxx} + year={2023} +} +``` + +## Installation + +``` +pip install pyaml==6.0 tqdm=4.63.0 scipy==1.8.0 torch==1.11.0 tensorboard=2.8.0 +git clone https://github.com/valeoai/WaffleIron +cd WaffleIron +pip install -e ./ +``` + +Download the pretrained models: +``` +wget [ADD LINK] +tar -xvzf pretrained_models_and_data.tar.gz +``` + +Finally, indicate where the nuScenes and SemanticKITTI datasets are located on your system: +``` +export PATH_NUSCENES="/PATH/TO/NUSCENES" +export PATH_KITTI="/PATH/TO/KITTI/" +``` + +If you want to uninstall this package, type `pip uninstall waffleiron`. + + +## Testing pretrained models + +### Option 1: Using this code + +To evaluate the pre-trained model on the train set of nuScenes used in Table 1 of our paper, type +``` +python launch_train.py \ +--dataset nuscenes \ +--path_dataset $PATH_NUSCENES \ +--log_path ./pretrained_models/WaffleIron-48-256__60cm-baseline-nuscenes/ \ +--config ./configs/WaffleIron-48-256__60cm-baseline-nuscenes.yaml \ +--fp16 \ +--gpu 0 \ +--restart \ +--eval +``` + +To evaluate the pre-trained model on the train set of SemanticKITTI, with instance cutmix augmentation, type +``` +python launch_train.py \ +--dataset semantic_kitti \ +--path_dataset $PATH_KITTI \ +--log_path ./pretrained_models/WaffleIron-48-256__40cm-BEV-cutmix-kitti/ \ +--config ./configs/WaffleIron-48-256__40cm-BEV-cutmix-kitti.yaml \ +--fp16 \ +--restart \ +--eval +``` + +**Remark:** *On SemanticKITTI, the code above will extract object instances on the train set (despite this +being not necessary for validation) because this augmentation is activated for training on this dataset (and this code +re-use the training script). This can be bypassed by editing the `yaml` config file and changing the entry +`instance_cutmix` to `False`. The instances are saved automatically in `/tmp/semantic_kitti_instances/`.* + +### Option 2: Using the official APIs + +The second option writes the predictions on disk and the results can be computed using the official +nuScenes or SemanticKITTI APIs. This option also allows you to perform test time augmentations, which is not possible +with Option 1 above. These scripts should be useable for submission of the official benchmarks. + +#### nuScenes + +To extract the prediction with the pre-trained model on nuScenes, type +``` +python eval_nuscenes.py \ +--path_dataset $PATH_NUSCENES \ +--config ./configs/WaffleIron-48-256__60cm-baseline-nuscenes.yaml \ +--ckpt ./pretrained_models/WaffleIron-48-256__60cm-baseline-nuscenes/ckpt_last.pth \ +--result_folder ./predictions_nuscenes \ +--phase val \ +--num_workers 12 +``` +or, if you want to use, e.g., 10 votes with test time augmentations, +``` +python eval_nuscenes.py \ +--path_dataset $PATH_NUSCENES \ +--config ./configs/WaffleIron-48-256__60cm-baseline-nuscenes.yaml \ +--ckpt ./pretrained_models/WaffleIron-48-256__60cm-baseline-nuscenes/ckpt_last.pth \ +--result_folder ./predictions_nuscenes \ +--phase val \ +--num_workers 12 \ +--num_votes 10 \ +--batch_size 5 +``` +You can reduce `batch_size` to 2 or 1 depending on the available memory. + +These predictions can be evaluated using the official nuScenes API as follows +``` +git clone https://github.com/nutonomy/nuscenes-devkit.git +python nuscenes-devkit/python-sdk/nuscenes/eval/lidarseg/evaluate.py \ +--result_path ./predictions_nuscenes \ +--eval_set val \ +--version v1.0-trainval \ +--dataroot $PATH_NUSCENES \ +--verbose True +``` + +#### SemanticKITTI + +To evaluate the pre-trained model on SemanticKITTI, type +``` +python eval_kitti.py \ +--path_dataset $PATH_KITTI \ +--ckpt ./pretrained_models/WaffleIron-48-256__40cm-BEV-cutmix-kitti/ckpt_last.pth \ +--config ./configs/WaffleIron-48-256__40cm-BEV-cutmix-kitti.yaml \ +--result_folder ./predictions_kitti \ +--phase val \ +--num_workers 12 +``` + +The predictions can be evaluated using the official APIs by typing +``` +git clone https://github.com/PRBonn/semantic-kitti-api.git +cd semantic-kitti-api/ +python evaluate_semantics.py \ +--dataset $PATH_KITTI/dataset \ +--predictions ../predictions_kitti \ +--split valid +``` + +## Training + +### nuScenes + +To train a WaffleIron-48-256 backbone on nuScenes with +- 2D cells of 60 cm, +- the baseline sequence of projections along the z-axis, then the y-axis, then the x-axis, etc., until the last layer, + +type +``` +python launch_train.py \ +--dataset nuscenes \ +--path_dataset $PATH_NUSCENES \ +--log_path ./logs/WaffleIron-48-256__60cm-baseline-nuscenes/ \ +--config ./configs/WaffleIron-48-256__60cm-baseline-nuscenes.yaml \ +--gpu 0 \ +--fp16 +``` + +For example, with `--seed 1` as additional arguments in `launch_train.py`, I obtain 76.2 % in mIoU at the last +training epoch (using one Nvidia Tesla V100S-PCIE-32GB for training). + +Note: for multi-GPUs training, you can remove `--gpu 0` and the code will use all available GPUs using PyTorch DataParallel +for parallelism. You can add the argument `--multiprocessing-distributed` to use DistributedDataParallel instead. + + +### SemanticKITTI + +To retrain a WaffleIron-48-256 backbone on SemanticKITTI with +- 2D cells of 40 cm, +- projection along the z-axis at all layers, +- **instance cutmix augmentations**, + +type +``` +python launch_train.py \ +--dataset semantic_kitti \ +--path_dataset $PATH_KITTI \ +--log_path ./logs/WaffleIron-48-256__40cm-BEV-cutmix-kitti \ +--config ./configs/WaffleIron-48-256__40cm-BEV-cutmix-kitti.yaml \ +--fp16 \ +--multiprocessing-distributed +``` + +The instances for cutmix augmentation are saved in `/tmp/semantic_kitti_instances/`. You can disable the instance +cutmix augmentations by editing the `yaml` config file to set `instance_cutmix` to `False`. + +For submission to the official benchmark on the test set of SemanticKITTI, we also trained the network on both the +val and train sets (argument `--trainval` in `launch_train.py`), used the checkpoint at the last epoch and 10 test +time augmentations during inference. + + +## Creating your own network + +### Config file + +You can refer to `./config/WaffleIron-template.yaml` where we describe the role of each parameter. +In particular, you can adjust `nb_channels` and `depth` to increase of decrease the capacity of WaffleIron. +You can also adjust the memory required to train a network by adjusting `max_points` in `dataloader`, but a +too small value might impact the performance. + +### Models + +The WaffleIron backbone is defined in `waffleiron/backbone.py` and can be imported in your project by typing +```python +from waffleiron import WaffleIron +``` +It needs to be combined with a embedding layer to provide point tokens and a pointwise classification layer, as we do +in `waffleiron/segmenter.py`. You can define your own embedding and classification layers instead. + + +## Acknowledgements +We thank the authors of +``` +@inproceedings{berman18lovasz, +author = {Berman, Maxim and Triki, Amal Rannen and Blaschko, Matthew B.}, +title = {The Lovász-Softmax Loss: A Tractable Surrogate for the Optimization of the Intersection-Over-Union Measure +in Neural Networks}, +booktitle = {CVPR}, +year = {2018} +} +``` +for making their [implementation](https://github.com/bermanmaxim/LovaszSoftmax) of the Lovász loss publicly available. + + +## License +WaffleIron is released under the [Apache 2.0 license](./LICENSE). + +The implementation of the Lovász loss in `utils/lovasz.py` is released under +[MIT Licence](https://github.com/bermanmaxim/LovaszSoftmax/blob/master/LICENSE). diff --git a/configs/WaffleIron-48-256__40cm-BEV-cutmix-kitti.yaml b/configs/WaffleIron-48-256__40cm-BEV-cutmix-kitti.yaml new file mode 100644 index 0000000..b60fa67 --- /dev/null +++ b/configs/WaffleIron-48-256__40cm-BEV-cutmix-kitti.yaml @@ -0,0 +1,51 @@ +waffleiron: # Architecture of the backbone + nb_channels: 256 # Define F = the feature size = width of the WaffleIron + depth: 48 # Define L = the depth on the network + fov_xyz: # Define the FOV in meters + - - -50 # min value on x-axis: -50 m + - -50 # min value on y-axis: -50 m + - -5 # min value on z-axis: -5 m + - - 50 # max value on x-axis: 50 m + - 50 # max value on y-axis: 50 m + - 3 # max value on z-axis: 5 m + dim_proj: # Define the sequence of projection (which is then repeated sequentially until \ell = L) + - 2 # Project along the z axis at \ell = 1 (and then the same at all layer) + grids_size: # Define here the size of the 2D grids + - [250, 250] # At \ell = 1, project along z, ie on (x, y) with FOV [-50, 50] on both axes: size [250, 250] -> resolution 40cm + +classif: # Architecture of the classifcation layer, after WaffleIron + nb_class: 19 # Number of classes on nuscenes (after removing the ignore class) + +embedding: # Architecture of the embedding layer, before WaffleIron + input_feat: # List of features on each point + - "intensity" + - "height" + - "radius" + size_input: 3 # Input feature size on each point + neighbors: 16 # Neighborhood for embedding layer + voxel_size: 0.1 # Voxel size for downsampling point cloud in pre-processing + +dataloader: + batch_size: 4 + num_workers: 12 + max_points: 20000 + +augmentations: + rotation_z: null + flip_xy: null + scale: + - [0, 1, 2] + - 0.1 + instance_cutmix: True + +loss: + lovasz: 1.0 + +optim: + lr: .001 + weight_decay: 0.003 + +scheduler: + min_lr: 0.00001 + max_epoch: 45 + epoch_warmup: 4 diff --git a/configs/WaffleIron-48-256__60cm-baseline-nuscenes.yaml b/configs/WaffleIron-48-256__60cm-baseline-nuscenes.yaml new file mode 100644 index 0000000..b631db1 --- /dev/null +++ b/configs/WaffleIron-48-256__60cm-baseline-nuscenes.yaml @@ -0,0 +1,55 @@ +waffleiron: # Architecture of the backbone + nb_channels: 256 # Define F = the feature size = width of the WaffleIron + depth: 48 # Define L = the depth on the network + fov_xyz: # Define the FOV in meters + - - -50 # min value on x-axis: -50 m + - -50 # min value on y-axis: -50 m + - -5 # min value on z-axis: -5 m + - - 50 # max value on x-axis: 50 m + - 50 # max value on y-axis: 50 m + - 5 # max value on z-axis: 5 m + dim_proj: # Define the sequence of projection (which is then repeated sequentially until \ell = L) + - 2 # Project along the z axis at \ell = 1 + - 1 # Project along the y axis at \ell = 2 + - 0 # Project along the x axis at \ell = 3 + grids_size: # Define here the size of the 2D grids + - [166, 166] # At \ell = 1, project along z, ie on (x, y) with FOV [-50, 50] on both axes: size [250, 250] -> resolution 60cm + - [166, 16] # At \ell = 2, project along y, ie on (x, z) with FOV [-50, 50] on x and [-5, 5] on z: size [250, 25] -> resolution 60cm + - [166, 16] # At \ell = 3, project along x, ie on (y, z) with FOV [-50, 50] on y and [-5, 5] on z: size [250, 25] -> resolution 60cm + +classif: # Architecture of the classifcation layer, after WaffleIron + nb_class: 16 # Number of classes on nuscenes (after removing the ignore class) + +embedding: # Architecture of the embedding layer, before WaffleIron + input_feat: # List of features on each point + - "intensity" + - "height" + - "radius" + size_input: 3 # Input feature size on each point + neighbors: 16 # Neighborhood for embedding layer + voxel_size: 0.1 # Voxel size for downsampling point cloud in pre-processing + +dataloader: + batch_size: 4 + num_workers: 12 + max_points: 20000 + +augmentations: + rotation_z: null + flip_xy: null + scale: + - [0, 1, 2] + - 0.1 + instance_cutmix: False + +loss: + lovasz: 1.0 + +optim: + lr: .001 + weight_decay: 0.003 + +scheduler: + min_lr: 0.00001 + max_epoch: 45 + epoch_warmup: 4 diff --git a/configs/WaffleIron-template-BEV-projection.yaml b/configs/WaffleIron-template-BEV-projection.yaml new file mode 100644 index 0000000..5ac6296 --- /dev/null +++ b/configs/WaffleIron-template-BEV-projection.yaml @@ -0,0 +1,51 @@ +waffleiron: # Architecture of the backbone + nb_channels: 256 # Define F = the feature size = width of the WaffleIron + depth: 48 # Define L = the depth on the network + fov_xyz: # Define the FOV in meters + - - -50 # min value on x-axis: -50 m + - -50 # min value on y-axis: -50 m + - -5 # min value on z-axis: -5 m + - - 50 # max value on x-axis: 50 m + - 50 # max value on y-axis: 50 m + - 5 # max value on z-axis: 5 m + dim_proj: # Define the sequence of projection (which is then repeated sequentially until \ell = L) + - 2 # Project along the z axis at \ell = 1 + grids_size: # Define here the size of the 2D grids + - [166, 166] # At \ell = 1, project along z, ie on (x, y) with FOV [-50, 50] on both axes: size [250, 250] -> resolution 60cm + +classif: # Architecture of the classifcation layer, after WaffleIron + nb_class: 16 # Number of classes on nuscenes (after removing the ignore class) + +embedding: # Architecture of the embedding layer, before WaffleIron + input_feat: # List of features on each point + - "intensity" + - "height" + - "radius" + size_input: 3 # Input feature size on each point + neighbors: 16 # Neighborhood for embedding layer + voxel_size: 0.1 # Voxel size for downsampling point cloud in pre-processing + +dataloader: + batch_size: 4 + num_workers: 12 + max_points: 20000 + +augmentations: + rotation_z: null + flip_xy: null + scale: + - [0, 1, 2] + - 0.1 + instance_cutmix: False + +loss: + lovasz: 1.0 + +optim: + lr: .001 + weight_decay: 0.001 + +scheduler: + min_lr: 0.00001 + max_epoch: 45 + epoch_warmup: 4 diff --git a/configs/WaffleIron-template-baseline-projection.yaml b/configs/WaffleIron-template-baseline-projection.yaml new file mode 100644 index 0000000..216b397 --- /dev/null +++ b/configs/WaffleIron-template-baseline-projection.yaml @@ -0,0 +1,55 @@ +waffleiron: # Architecture of the backbone + nb_channels: 256 # Define F = the feature size = width of the WaffleIron + depth: 48 # Define L = the depth on the network + fov_xyz: # Define the FOV in meters + - - -50 # min value on x-axis: -50 m + - -50 # min value on y-axis: -50 m + - -5 # min value on z-axis: -5 m + - - 50 # max value on x-axis: 50 m + - 50 # max value on y-axis: 50 m + - 5 # max value on z-axis: 5 m + dim_proj: # Define the sequence of projection (which is then repeated sequentially until \ell = L) + - 2 # Project along the z axis at \ell = 1 + - 1 # Project along the y axis at \ell = 2 + - 0 # Project along the x axis at \ell = 3 + grids_size: # Define here the size of the 2D grids + - [166, 166] # At \ell = 1, project along z, ie on (x, y) with FOV [-50, 50] on both axes: size [250, 250] -> resolution 60cm + - [166, 16] # At \ell = 2, project along y, ie on (x, z) with FOV [-50, 50] on x and [-5, 5] on z: size [250, 25] -> resolution 60cm + - [166, 16] # At \ell = 3, project along x, ie on (y, z) with FOV [-50, 50] on y and [-5, 5] on z: size [250, 25] -> resolution 60cm + +classif: # Architecture of the classifcation layer, after WaffleIron + nb_class: 16 # Number of classes on nuscenes (after removing the ignore class) + +embedding: # Architecture of the embedding layer, before WaffleIron + input_feat: # List of features on each point + - "intensity" + - "height" + - "radius" + size_input: 3 # Input feature size on each point + neighbors: 16 # Neighborhood for embedding layer + voxel_size: 0.1 # Voxel size for downsampling point cloud in pre-processing + +dataloader: + batch_size: 4 + num_workers: 12 + max_points: 20000 + +augmentations: + rotation_z: null + flip_xy: null + scale: + - [0, 1, 2] + - 0.1 + instance_cutmix: False + +loss: + lovasz: 1.0 + +optim: + lr: .001 + weight_decay: 0.001 + +scheduler: + min_lr: 0.00001 + max_epoch: 45 + epoch_warmup: 4 diff --git a/datasets/__init__.py b/datasets/__init__.py new file mode 100644 index 0000000..2cf6775 --- /dev/null +++ b/datasets/__init__.py @@ -0,0 +1,21 @@ +# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from .pc_dataset import Collate +from .nuscenes import NuScenesSemSeg +from .semantic_kitti import SemanticKITTI + +__all__ = [SemanticKITTI, NuScenesSemSeg, Collate] +LIST_DATASETS = {"nuscenes": NuScenesSemSeg, "semantic_kitti": SemanticKITTI} diff --git a/datasets/nuscenes.py b/datasets/nuscenes.py new file mode 100755 index 0000000..617d7db --- /dev/null +++ b/datasets/nuscenes.py @@ -0,0 +1,97 @@ +# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import os +import numpy as np +from .pc_dataset import PCDataset + + +class ClassMapper: + def __init__(self): + current_folder = os.path.dirname(os.path.realpath(__file__)) + self.mapping = np.load( + os.path.join(current_folder, "mapping_class_index_nuscenes.npy") + ) + + def get_index(self, x): + return self.mapping[x] + + +class NuScenesSemSeg(PCDataset): + + CLASS_NAME = [ + "barrier", + "bicycle", + "bus", + "car", + "construction_vehicle", + "motorcycle", + "pedestrian", + "traffic_cone", + "trailer", + "truck", + "driveable_surface", + "other_flat", + "sidewalk", + "terrain", + "manmade", + "vegetation", + ] + + def __init__(self, **kwargs): + super().__init__(**kwargs) + + # Class mapping + current_folder = os.path.dirname(os.path.realpath(__file__)) + self.mapper = np.vectorize(ClassMapper().get_index) + + # List all keyframes + self.list_frames = np.load( + os.path.join(current_folder, "list_files_nuscenes.npz") + )[self.phase] + if self.phase == "train": + assert len(self) == 28130 + elif self.phase == "val": + assert len(self) == 6019 + elif self.phase == "test": + assert len(self) == 6008 + else: + raise ValueError(f"Unknown phase {self.phase}.") + + assert not self.instance_cutmix, "Instance CutMix not implemented on nuscenes" + + def __len__(self): + return len(self.list_frames) + + def load_pc(self, index): + # Load point cloud + pc = np.fromfile( + os.path.join(self.rootdir, self.list_frames[index][0]), + dtype=np.float32, + ) + pc = pc.reshape((-1, 5))[:, :4] + + # Load segmentation labels + labels = np.fromfile( + os.path.join(self.rootdir, self.list_frames[index][1]), + dtype=np.uint8, + ) + labels = self.mapper(labels) + + # Label 0 should be ignored + labels = labels - 1 + labels[labels == -1] = 255 + + return pc, labels, self.list_frames[index][2] diff --git a/datasets/pc_dataset.py b/datasets/pc_dataset.py new file mode 100644 index 0000000..99175c4 --- /dev/null +++ b/datasets/pc_dataset.py @@ -0,0 +1,276 @@ +# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import torch +import numpy as np +import utils.transforms as tr +from torch.utils.data import Dataset +from scipy.spatial import cKDTree as KDTree + + +class PCDataset(Dataset): + def __init__( + self, + rootdir=None, + phase="train", + input_feat="intensity", + voxel_size=0.1, + train_augmentations=None, + dim_proj=[ + 0, + ], + grids_shape=[(256, 256)], + fov_xyz=( + ( + -1.0, + -1.0, + -1.0, + ), + (1.0, 1.0, 1.0), + ), + num_neighbors=16, + tta=False, + instance_cutmix=False, + ): + super().__init__() + + # Dataset split + self.phase = phase + assert self.phase in ["train", "val", "trainval", "test"] + + # Root directory of dataset + self.rootdir = rootdir + + # Input features to compute for each point + self.input_feat = input_feat + + # Downsample input point cloud by small voxelization + self.downsample = tr.Voxelize( + dims=(0, 1, 2), + voxel_size=voxel_size, + random=(self.phase == "train" or self.phase == "trainval"), + ) + + # Field of view + assert len(fov_xyz[0]) == len( + fov_xyz[1] + ), "Min and Max FOV must have the same length." + for i, (min, max) in enumerate(zip(*fov_xyz)): + assert ( + min < max + ), f"Field of view: min ({min}) < max ({max}) is expected on dimension {i}." + self.fov_xyz = np.concatenate([np.array(f)[None] for f in fov_xyz], axis=0) + self.crop_to_fov = tr.Crop(dims=(0, 1, 2), fov=fov_xyz) + + # Grid shape for projection in 2D + assert len(grids_shape) == len(dim_proj) + self.dim_proj = dim_proj + self.grids_shape = [np.array(g) for g in grids_shape] + self.lut_axis_plane = {0: (1, 2), 1: (0, 2), 2: (0, 1)} + + # Number of neighbors for embedding layer + assert num_neighbors > 0 + self.num_neighbors = num_neighbors + + # Test time augmentation + if tta: + assert self.phase in ["test", "val"] + self.tta = tr.Compose( + ( + tr.Rotation(inplace=True, dim=2), + tr.RandomApply(tr.FlipXY(inplace=True), prob=2.0 / 3.0), + tr.Scale(inplace=True, dims=(0, 1, 2), range=0.1), + ) + ) + else: + self.tta = None + + # Train time augmentations + if train_augmentations is not None: + assert self.phase in ["train", "trainval"] + self.train_augmentations = train_augmentations + + # Flag for instance cutmix + self.instance_cutmix = instance_cutmix + + def get_occupied_2d_cells(self, pc): + """Return mapping between 3D point and corresponding 2D cell""" + cell_ind = [] + for dim, grid in zip(self.dim_proj, self.grids_shape): + # Get plane of which to project + dims = self.lut_axis_plane[dim] + # Compute grid resolution + res = (self.fov_xyz[1, dims] - self.fov_xyz[0, dims]) / grid[None] + # Shift and quantize point cloud + pc_quant = ((pc[:, dims] - self.fov_xyz[0, dims]) / res).astype("int") + # Check that the point cloud fits on the grid + min, max = pc_quant.min(0), pc_quant.max(0) + assert min[0] >= 0 and min[1] >= 0, print( + "Some points are outside the FOV:", pc[:, :3].min(0), self.fov_xyz + ) + assert max[0] < grid[0] and max[1] < grid[1], print( + "Some points are outside the FOV:", pc[:, :3].min(0), self.fov_xyz + ) + # Transform quantized coordinates to cell indices for projection on 2D plane + temp = pc_quant[:, 0] * grid[1] + pc_quant[:, 1] + cell_ind.append(temp[None]) + return np.vstack(cell_ind) + + def prepare_input_features(self, pc_orig): + # Concatenate desired input features to coordinates + pc = [pc_orig[:, :3]] # Initialize with coordinates + for type in self.input_feat: + if type == "intensity": + pc.append(pc_orig[:, 3:]) + elif type == "height": + pc.append(pc_orig[:, 2:3]) + elif type == "radius": + r_xyz = np.linalg.norm(pc_orig[:, :3], axis=1, keepdims=True) + pc.append(r_xyz) + else: + raise ValueError(f"Unknown feature: {type}") + return np.concatenate(pc, 1) + + def load_pc(self, index): + raise NotImplementedError() + + def __len__(self): + raise NotImplementedError() + + def __getitem__(self, index): + # Load original point cloud + pc_orig, labels_orig, filename = self.load_pc(index) + + # Prepare input feature + pc_orig = self.prepare_input_features(pc_orig) + + # Test time augmentation + if self.tta is not None: + pc_orig, labels_orig = self.tta(pc_orig, labels_orig) + + # Voxelization + pc, labels = self.downsample(pc_orig, labels_orig) + + # Augment data + if self.train_augmentations is not None: + pc, labels = self.train_augmentations(pc, labels) + + # Crop to fov + pc, labels = self.crop_to_fov(pc, labels) + + # For each point, get index of corresponding 2D cells on projected grid + cell_ind = self.get_occupied_2d_cells(pc) + + # Get neighbors for point embedding layer providing tokens to waffleiron backbone + kdtree = KDTree(pc[:, :3]) + assert pc.shape[0] > self.num_neighbors + _, neighbors_emb = kdtree.query(pc[:, :3], k=self.num_neighbors + 1) + + # Nearest neighbor interpolation to undo cropping & voxelisation at validation time + if self.phase in ["train", "trainval"]: + upsample = np.arange(pc.shape[0]) + else: + _, upsample = kdtree.query(pc_orig[:, :3], k=1) + + # Output to return + out = ( + # Point features + pc[:, 3:].T[None], + # Point labels of original entire point cloud + labels if self.phase in ["train", "trainval"] else labels_orig, + # Projection 2D -> 3D: index of 2D cells for each point + cell_ind[None], + # Neighborhood for point embedding layer, which provides tokens to waffleiron backbone + neighbors_emb.T[None], + # For interpolation from voxelized & cropped point cloud to original point cloud + upsample, + # Filename of original point cloud + filename, + ) + + return out + + +def zero_pad(feat, neighbors_emb, cell_ind, Nmax): + N = feat.shape[-1] + assert N <= Nmax + occupied_cells = np.ones((1, Nmax)) + if N < Nmax: + # Zero-pad with null features + feat = np.concatenate((feat, np.zeros((1, feat.shape[1], Nmax - N))), axis=2) + # For zero-padded points, associate last zero-padded points as neighbor + neighbors_emb = np.concatenate( + ( + neighbors_emb, + (Nmax - 1) * np.ones((1, neighbors_emb.shape[1], Nmax - N)), + ), + axis=2, + ) + # Associate zero-padded points to first 2D cell... + cell_ind = np.concatenate( + (cell_ind, np.zeros((1, cell_ind.shape[1], Nmax - N))), axis=2 + ) + # ... and at the same time mark zero-padded points as unoccupied + occupied_cells[:, N:] = 0 + return feat, neighbors_emb, cell_ind, occupied_cells + + +class Collate: + def __init__(self, num_points=None): + self.num_points = num_points + assert num_points is None or num_points > 0 + + def __call__(self, list_data): + + # Extract all data + list_of_data = (list(data) for data in zip(*list_data)) + feat, label_orig, cell_ind, neighbors_emb, upsample, filename = list_of_data + + # Zero-pad point clouds + Nmax = np.max([f.shape[-1] for f in feat]) + if self.num_points is not None: + assert Nmax <= self.num_points + occupied_cells = [] + for i in range(len(feat)): + feat[i], neighbors_emb[i], cell_ind[i], temp = zero_pad( + feat[i], + neighbors_emb[i], + cell_ind[i], + Nmax if self.num_points is None else self.num_points, + ) + occupied_cells.append(temp) + + # Concatenate along batch dimension + feat = torch.from_numpy(np.vstack(feat)).float() # B x C x Nmax + neighbors_emb = torch.from_numpy(np.vstack(neighbors_emb)).long() # B x Nmax + cell_ind = torch.from_numpy( + np.vstack(cell_ind) + ).long() # B x nb_2d_cells x Nmax + occupied_cells = torch.from_numpy(np.vstack(occupied_cells)).float() # B x Nmax + labels_orig = torch.from_numpy(np.hstack(label_orig)).long() + upsample = [torch.from_numpy(u) for u in upsample] + + # Prepare output variables + out = { + "feat": feat, + "neighbors_emb": neighbors_emb, + "upsample": upsample, + "labels_orig": labels_orig, + "cell_ind": cell_ind, + "occupied_cells": occupied_cells, + "filename": filename, + } + + return out diff --git a/datasets/semantic-kitti.yaml b/datasets/semantic-kitti.yaml new file mode 100755 index 0000000..6281065 --- /dev/null +++ b/datasets/semantic-kitti.yaml @@ -0,0 +1,211 @@ +# This file is covered by the LICENSE file in the root of this project. +labels: + 0 : "unlabeled" + 1 : "outlier" + 10: "car" + 11: "bicycle" + 13: "bus" + 15: "motorcycle" + 16: "on-rails" + 18: "truck" + 20: "other-vehicle" + 30: "person" + 31: "bicyclist" + 32: "motorcyclist" + 40: "road" + 44: "parking" + 48: "sidewalk" + 49: "other-ground" + 50: "building" + 51: "fence" + 52: "other-structure" + 60: "lane-marking" + 70: "vegetation" + 71: "trunk" + 72: "terrain" + 80: "pole" + 81: "traffic-sign" + 99: "other-object" + 252: "moving-car" + 253: "moving-bicyclist" + 254: "moving-person" + 255: "moving-motorcyclist" + 256: "moving-on-rails" + 257: "moving-bus" + 258: "moving-truck" + 259: "moving-other-vehicle" +color_map: # bgr + 0 : [0, 0, 0] + 1 : [0, 0, 255] + 10: [245, 150, 100] + 11: [245, 230, 100] + 13: [250, 80, 100] + 15: [150, 60, 30] + 16: [255, 0, 0] + 18: [180, 30, 80] + 20: [255, 0, 0] + 30: [30, 30, 255] + 31: [200, 40, 255] + 32: [90, 30, 150] + 40: [255, 0, 255] + 44: [255, 150, 255] + 48: [75, 0, 75] + 49: [75, 0, 175] + 50: [0, 200, 255] + 51: [50, 120, 255] + 52: [0, 150, 255] + 60: [170, 255, 150] + 70: [0, 175, 0] + 71: [0, 60, 135] + 72: [80, 240, 150] + 80: [150, 240, 255] + 81: [0, 0, 255] + 99: [255, 255, 50] + 252: [245, 150, 100] + 256: [255, 0, 0] + 253: [200, 40, 255] + 254: [30, 30, 255] + 255: [90, 30, 150] + 257: [250, 80, 100] + 258: [180, 30, 80] + 259: [255, 0, 0] +content: # as a ratio with the total number of points + 0: 0.018889854628292943 + 1: 0.0002937197336781505 + 10: 0.040818519255974316 + 11: 0.00016609538710764618 + 13: 2.7879693665067774e-05 + 15: 0.00039838616015114444 + 16: 0.0 + 18: 0.0020633612104619787 + 20: 0.0016218197275284021 + 30: 0.00017698551338515307 + 31: 1.1065903904919655e-08 + 32: 5.532951952459828e-09 + 40: 0.1987493871255525 + 44: 0.014717169549888214 + 48: 0.14392298360372 + 49: 0.0039048553037472045 + 50: 0.1326861944777486 + 51: 0.0723592229456223 + 52: 0.002395131480328884 + 60: 4.7084144280367186e-05 + 70: 0.26681502148037506 + 71: 0.006035012012626033 + 72: 0.07814222006271769 + 80: 0.002855498193863172 + 81: 0.0006155958086189918 + 99: 0.009923127583046915 + 252: 0.001789309418528068 + 253: 0.00012709999297008662 + 254: 0.00016059776092534436 + 255: 3.745553104802113e-05 + 256: 0.0 + 257: 0.00011351574470342043 + 258: 0.00010157861367183268 + 259: 4.3840131989471124e-05 +# classes that are indistinguishable from single scan or inconsistent in +# ground truth are mapped to their closest equivalent +learning_map: + 0 : 0 # "unlabeled" + 1 : 0 # "outlier" mapped to "unlabeled" --------------------------mapped + 10: 1 # "car" + 11: 2 # "bicycle" + 13: 5 # "bus" mapped to "other-vehicle" --------------------------mapped + 15: 3 # "motorcycle" + 16: 5 # "on-rails" mapped to "other-vehicle" ---------------------mapped + 18: 4 # "truck" + 20: 5 # "other-vehicle" + 30: 6 # "person" + 31: 7 # "bicyclist" + 32: 8 # "motorcyclist" + 40: 9 # "road" + 44: 10 # "parking" + 48: 11 # "sidewalk" + 49: 12 # "other-ground" + 50: 13 # "building" + 51: 14 # "fence" + 52: 0 # "other-structure" mapped to "unlabeled" ------------------mapped + 60: 9 # "lane-marking" to "road" ---------------------------------mapped + 70: 15 # "vegetation" + 71: 16 # "trunk" + 72: 17 # "terrain" + 80: 18 # "pole" + 81: 19 # "traffic-sign" + 99: 0 # "other-object" to "unlabeled" ----------------------------mapped + 252: 1 # "moving-car" to "car" ------------------------------------mapped + 253: 7 # "moving-bicyclist" to "bicyclist" ------------------------mapped + 254: 6 # "moving-person" to "person" ------------------------------mapped + 255: 8 # "moving-motorcyclist" to "motorcyclist" ------------------mapped + 256: 5 # "moving-on-rails" mapped to "other-vehicle" --------------mapped + 257: 5 # "moving-bus" mapped to "other-vehicle" -------------------mapped + 258: 4 # "moving-truck" to "truck" --------------------------------mapped + 259: 5 # "moving-other"-vehicle to "other-vehicle" ----------------mapped +learning_map_inv: # inverse of previous map + 0: 0 # "unlabeled", and others ignored + 1: 10 # "car" + 2: 11 # "bicycle" + 3: 15 # "motorcycle" + 4: 18 # "truck" + 5: 20 # "other-vehicle" + 6: 30 # "person" + 7: 31 # "bicyclist" + 8: 32 # "motorcyclist" + 9: 40 # "road" + 10: 44 # "parking" + 11: 48 # "sidewalk" + 12: 49 # "other-ground" + 13: 50 # "building" + 14: 51 # "fence" + 15: 70 # "vegetation" + 16: 71 # "trunk" + 17: 72 # "terrain" + 18: 80 # "pole" + 19: 81 # "traffic-sign" +learning_ignore: # Ignore classes + 0: True # "unlabeled", and others ignored + 1: False # "car" + 2: False # "bicycle" + 3: False # "motorcycle" + 4: False # "truck" + 5: False # "other-vehicle" + 6: False # "person" + 7: False # "bicyclist" + 8: False # "motorcyclist" + 9: False # "road" + 10: False # "parking" + 11: False # "sidewalk" + 12: False # "other-ground" + 13: False # "building" + 14: False # "fence" + 15: False # "vegetation" + 16: False # "trunk" + 17: False # "terrain" + 18: False # "pole" + 19: False # "traffic-sign" +split: # sequence numbers + train: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 9 + - 10 + valid: + - 8 + test: + - 11 + - 12 + - 13 + - 14 + - 15 + - 16 + - 17 + - 18 + - 19 + - 20 + - 21 diff --git a/datasets/semantic_kitti.py b/datasets/semantic_kitti.py new file mode 100644 index 0000000..86436b6 --- /dev/null +++ b/datasets/semantic_kitti.py @@ -0,0 +1,260 @@ +# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import os +import yaml +import torch +import warnings +import numpy as np +from glob import glob +from tqdm import tqdm +import utils.transforms as tr +from .pc_dataset import PCDataset + + +class InstanceCutMix: + def __init__(self, phase="train", temp_dir="/tmp/semantic_kitti_instances/"): + + # Train or Trainval + self.phase = phase + assert self.phase in ["train", "trainval"] + + # List of files containing instances for bicycle, motorcycle, person, bicyclist + self.bank = {1: [], 2: [], 5: [], 6: []} + + # Directory where to store instances + self.rootdir = os.path.join(temp_dir, self.phase) + for id_class in self.bank.keys(): + os.makedirs(os.path.join(self.rootdir, f"{id_class}"), exist_ok=True) + + # Load instances + for key in self.bank.keys(): + self.bank[key] = glob(os.path.join(self.rootdir, f"{key}", "*.bin")) + self.__loaded__ = self.test_loaded() + if not self.__loaded__: + warnings.warn( + "Instances must be extracted and saved on disk before training" + ) + + # Augmentations applied on Instances + self.rot = tr.Compose( + ( + tr.FlipXY(inplace=True), + tr.Rotation(inplace=True), + tr.Scale(dims=(0, 1, 2), range=0.1, inplace=True), + ) + ) + + # For each class, maximum number of instance to add + self.num_to_add = 40 + + # Voxelization of 1m to downsample point cloud to ensure that + # center of the instances are at least 1m away + self.vox = tr.Voxelize(dims=(0, 1, 2), voxel_size=1.0, random=True) + + def test_loaded(self): + self.__loaded__ = False + if self.phase == "train": + if len(self.bank[1]) != 5083: + print(f"Expected 5083 instances but got {len(self.bank[1])}.") + return False + if len(self.bank[2]) != 3092: + print(f"Expected 3092 instances but got {len(self.bank[2])}.") + return False + if len(self.bank[5]) != 8084: + print(f"Expected 8084 instances but got {len(self.bank[5])}.") + return False + if len(self.bank[6]) != 1551: + print(f"Expected 1551 instances but got {len(self.bank[6])}.") + return False + elif self.phase == "trainval": + if len(self.bank[1]) != 8213: + print(f"Expected 8213 instances but got {len(self.bank[1])}.") + return False + if len(self.bank[2]) != 4169: + print(f"Expected 4169 instances but got {len(self.bank[2])}.") + return False + if len(self.bank[5]) != 12190: + print(f"Expected 12190 instances but got {len(self.bank[5])}.") + return False + if len(self.bank[6]) != 2943: + print(f"Expected 2943 instances but got {len(self.bank[6])}.") + return False + self.__loaded__ = True + return True + + def cut(self, pc, class_label, instance_label): + for id_class in self.bank.keys(): + where_class = (class_label == id_class) + all_instances = np.unique(instance_label[where_class]) + for id_instance in all_instances: + # Segment instance + where_ins = (instance_label == id_instance) + if where_ins.sum() <= 5: continue + instance = pc[where_ins, :] + # Center instance + instance[:, :2] -= instance[:, :2].mean(0, keepdims=True) + instance[:, 2] -= instance[:, 2].min(0, keepdims=True) + # Save instance + pathfile = os.path.join( + self.rootdir, f"{id_class}", f"{len(self.bank[id_class]):07d}.bin" + ) + instance.tofile(pathfile) + self.bank[id_class].append(pathfile) + + def mix(self, pc, class_label): + + # Find potential location where to add new object (on a surface) + pc_vox, class_label_vox = self.vox(pc, class_label) + where_surface = np.where((class_label_vox >= 8) & (class_label_vox <= 10))[0] + where_surface = where_surface[torch.randperm(len(where_surface))] + + # Add instances of each class in bank + id_tot = 0 + new_pc, new_label = [pc], [class_label] + for id_class in self.bank.keys(): + nb_to_add = torch.randint(self.num_to_add, (1,))[0] + which_one = torch.randint(len(self.bank[id_class]), (nb_to_add,)) + for ii in range(nb_to_add): + # Point p where to add the instance + p = pc_vox[where_surface[id_tot]] + # Extract instance + object = self.bank[id_class][which_one[ii]] + object = np.fromfile(object, dtype=np.float32).reshape((-1, 4)) + # Augment instance + label = np.ones((object.shape[0],), dtype=np.int) * id_class + object, label = self.rot(object, label) + # Move instance at point p + object[:, :3] += p[:3][None] + # Add instance in the point cloud + new_pc.append(object) + # Add corresponding label + new_label.append(label) + id_tot += 1 + + return np.concatenate(new_pc, 0), np.concatenate(new_label, 0) + + def __call__(self, pc, class_label, instance_label): + if not self.__loaded__: + self.cut(pc, class_label, instance_label) + return None, None + + return self.mix(pc, class_label) + + +class SemanticKITTI(PCDataset): + + CLASS_NAME = [ + "car", # 0 + "bicycle", # 1 + "motorcycle", # 2 + "truck", # 3 + "other-vehicle", # 4 + "person", # 5 + "bicyclist", # 6 + "motorcyclist", # 7 + "road", # 8 + "parking", # 9 + "sidewalk", # 10 + "other-ground", # 11 + "building", # 12 + "fence", # 13 + "vegetation", # 14 + "trunk", # 15 + "terrain", # 16 + "pole", # 17 + "traffic-sign", # 18 + ] + + def __init__(self, **kwargs): + super().__init__(**kwargs) + + # Config file and class mapping + current_folder = os.path.dirname(os.path.realpath(__file__)) + with open(os.path.join(current_folder, "semantic-kitti.yaml")) as stream: + semkittiyaml = yaml.safe_load(stream) + self.learning_map = semkittiyaml["learning_map"] + + # Split + if self.phase == "train": + split = semkittiyaml["split"]["train"] + elif self.phase == "val": + split = semkittiyaml["split"]["valid"] + elif self.phase == "test": + split = semkittiyaml["split"]["test"] + elif self.phase == "trainval": + split = semkittiyaml["split"]["train"] + semkittiyaml["split"]["valid"] + else: + raise Exception(f"Unknown split {self.phase}") + + # Find all files + self.im_idx = [] + for i_folder in np.sort(split): + self.im_idx.extend( + glob( + os.path.join( + self.rootdir, + "dataset", + "sequences", + str(i_folder).zfill(2), + "velodyne", + "*.bin", + ) + ) + ) + self.im_idx = np.sort(self.im_idx) + + # Training with instance cutmix + if self.instance_cutmix: + assert ( + self.phase != "test" and self.phase != "val" + ), "Instance cutmix should not be applied at test or val time" + self.cutmix = InstanceCutMix(phase=self.phase) + if not self.cutmix.test_loaded(): + print("Extracting instances before training...") + for index in tqdm(range(len(self))): + self.load_pc(index) + print("Done.") + assert self.cutmix.test_loaded(), "Instances not extracted correctly" + + def __len__(self): + return len(self.im_idx) + + def load_pc(self, index): + # Load point cloud + pc = np.fromfile(self.im_idx[index], dtype=np.float32).reshape((-1, 4)) + + # Extract Label + if self.phase == "test": + labels = np.zeros((pc.shape[0], 1), dtype=np.uint8) + else: + labels_inst = np.fromfile( + self.im_idx[index].replace("velodyne", "labels")[:-3] + "label", + dtype=np.uint32, + ).reshape((-1, 1)) + labels = labels_inst & 0xFFFF # delete high 16 digits binary + labels = np.vectorize(self.learning_map.__getitem__)(labels).astype( + np.int32 + ) + + # Map ignore index (0) to 255 + labels = labels[:, 0] - 1 + labels[labels == -1] = 255 + + # Instance CutMix + if self.instance_cutmix: + pc, labels = self.cutmix(pc, labels, labels_inst[:, 0]) + + return pc, labels, self.im_idx[index] diff --git a/eval_kitti.py b/eval_kitti.py new file mode 100644 index 0000000..be172aa --- /dev/null +++ b/eval_kitti.py @@ -0,0 +1,156 @@ +# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import os +import yaml +import torch +import argparse +import numpy as np +from tqdm import tqdm +from waffleiron import Segmenter +from torch.utils.data import DataLoader +from datasets import SemanticKITTI, Collate + + +if __name__ == "__main__": + + # --- Arguments + parser = argparse.ArgumentParser(description="Evaluation") + parser.add_argument("--config", type=str, help="Path to config file") + parser.add_argument("--ckpt", type=str, help="Path to checkpoint") + parser.add_argument("--path_dataset", type=str, help="Path to SemanticKITTI dataset") + parser.add_argument("--result_folder", type=str, help="Path to where result folder") + parser.add_argument("--num_votes", type=int, default=1, help="Number of test time augmentations") + parser.add_argument("--batch_size", type=int, default=1, help="Batch size") + parser.add_argument("--num_workers", type=int, default=6) + parser.add_argument("--phase", required=True, help="val or test") + args = parser.parse_args() + assert args.num_votes % args.batch_size == 0 + os.makedirs(args.result_folder, exist_ok=True) + + # --- Load config file + with open(args.config, "r") as f: + config = yaml.safe_load(f) + + # --- SemanticKITTI (from https://github.com/PRBonn/semantic-kitti-api/blob/master/remap_semantic_labels.py) + with open("./datasets/semantic-kitti.yaml") as stream: + semkittiyaml = yaml.safe_load(stream) + remapdict = semkittiyaml["learning_map_inv"] + maxkey = max(remapdict.keys()) + remap_lut = np.zeros((maxkey + 100), dtype=np.int32) + remap_lut[list(remapdict.keys())] = list(remapdict.values()) + + # --- Dataloader + dataset = SemanticKITTI( + rootdir=args.path_dataset, + input_feat=config["embedding"]["input_feat"], + voxel_size=config["embedding"]["voxel_size"], + num_neighbors=config["embedding"]["neighbors"], + dim_proj=config["waffleiron"]["dim_proj"], + grids_shape=config["waffleiron"]["grids_size"], + fov_xyz=config["waffleiron"]["fov_xyz"], + phase=args.phase, + tta=(args.num_votes > 1), + ) + if args.num_votes > 1: + new_list = [] + for f in dataset.im_idx: + for v in range(args.num_votes): + new_list.append(f) + dataset.im_idx = new_list + loader = torch.utils.data.DataLoader( + dataset, + batch_size=args.batch_size, + shuffle=False, + num_workers=args.num_workers, + pin_memory=True, + drop_last=False, + collate_fn=Collate(), + ) + args.num_votes = args.num_votes // args.batch_size + + # --- Build network + net = Segmenter( + input_channels=config["embedding"]["size_input"], + feat_channels=config["waffleiron"]["nb_channels"], + depth=config["waffleiron"]["depth"], + grid_shape=config["waffleiron"]["grids_size"], + nb_class=config["classif"]["nb_class"], + ) + net = net.cuda() + + # --- Load weights + ckpt = torch.load(args.ckpt, map_location="cuda:0") + try: + net.load_state_dict(ckpt["net"]) + except: + # If model was trained using DataParallel or DistributedDataParallel + state_dict = {} + for key in ckpt["net"].keys(): + state_dict[key[len("module."):]] = ckpt["net"][key] + net.load_state_dict(state_dict) + net = net.eval() + + # --- Evaluation + id_vote = 0 + for it, batch in enumerate(tqdm(loader, bar_format="{desc:<5.5}{percentage:3.0f}%|{bar:50}{r_bar}")): + + # Reset vote + if id_vote == 0: + vote = None + + # Network inputs + feat = batch["feat"].cuda(non_blocking=True) + labels = batch["labels_orig"].cuda(non_blocking=True) + batch["upsample"] = [ + up.cuda(non_blocking=True) for up in batch["upsample"] + ] + cell_ind = batch["cell_ind"].cuda(non_blocking=True) + occupied_cell = batch["occupied_cells"].cuda(non_blocking=True) + neighbors_emb = batch["neighbors_emb"].cuda(non_blocking=True) + net_inputs = (feat, cell_ind, occupied_cell, neighbors_emb) + + # Get prediction + with torch.autocast("cuda", enabled=True): + with torch.inference_mode(): + # Get prediction + out = net(*net_inputs) + for b in range(out.shape[0]): + temp = out[b, :, batch["upsample"][b]].T + if vote is None: + vote = torch.softmax(temp, dim=1) + else: + vote += torch.softmax(temp, dim=1) + id_vote += 1 + + # Save prediction + if id_vote == args.num_votes: + # Convert label + pred_label = vote.max(1)[1] + 1 # Shift by 1 because of ignore_label at index 0 + label = pred_label.cpu().numpy().reshape((-1)).astype(np.uint32) + upper_half = label >> 16 # get upper half for instances + lower_half = label & 0xFFFF # get lower half for semantics + lower_half = remap_lut[lower_half] # do the remapping of semantics + label = (upper_half << 16) + lower_half # reconstruct full label + label = label.astype(np.uint32) + # Save result + assert batch["filename"][0] == batch["filename"][-1] + label_file = batch["filename"][0][len(dataset.rootdir) + len("/dataset"):] + label_file = label_file.replace("velodyne", "predictions")[:-3] + "label" + label_file = os.path.join(args.result_folder, label_file) + os.makedirs(os.path.split(label_file)[0], exist_ok=True) + label.tofile(label_file) + # Reset count of votes + id_vote = 0 \ No newline at end of file diff --git a/eval_nuscenes.py b/eval_nuscenes.py new file mode 100644 index 0000000..676b38b --- /dev/null +++ b/eval_nuscenes.py @@ -0,0 +1,139 @@ +# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import os +import torch +import argparse +import numpy as np +from tqdm import tqdm +from waffleiron import Segmenter +from torch.utils.data import DataLoader +from datasets import NuScenesSemSeg, Collate + + +if __name__ == "__main__": + + # --- Arguments + parser = argparse.ArgumentParser(description="Evaluation") + parser.add_argument("--config", type=str, help="Path to config file") + parser.add_argument("--ckpt", type=str, help="Path to checkpoint") + parser.add_argument("--path_dataset", type=str, help="Path to SemanticKITTI dataset") + parser.add_argument("--result_folder", type=str, help="Path to where result folder") + parser.add_argument("--num_votes", type=int, default=1, help="Number of test time augmentations") + parser.add_argument("--batch_size", type=int, default=1, help="Batch size") + parser.add_argument("--num_workers", type=int, default=6) + parser.add_argument("--phase", required=True, help="val or test") + args = parser.parse_args() + assert args.num_votes % args.batch_size == 0 + args.result_folder = os.path.join(args.result_folder, "lidarseg", args.phase) + os.makedirs(args.result_folder, exist_ok=True) + + # --- Load config file + import yaml + with open(args.config, "r") as f: + config = yaml.safe_load(f) + + # --- Dataloader + dataset = NuScenesSemSeg( + rootdir=args.path_dataset, + input_feat=config["embedding"]["input_feat"], + voxel_size=config["embedding"]["voxel_size"], + num_neighbors=config["embedding"]["neighbors"], + dim_proj=config["waffleiron"]["dim_proj"], + grids_shape=config["waffleiron"]["grids_size"], + fov_xyz=config["waffleiron"]["fov_xyz"], + phase=args.phase, + tta=(args.num_votes > 1), + ) + if args.num_votes > 1: + new_list = [] + for f in dataset.list_frames: + for v in range(args.num_votes): + new_list.append(f) + dataset.list_frames = new_list + loader = torch.utils.data.DataLoader( + dataset, + batch_size=args.batch_size, + shuffle=False, + num_workers=args.num_workers, + pin_memory=True, + drop_last=False, + collate_fn=Collate(), + ) + args.num_votes = args.num_votes // args.batch_size + + # --- Build network + net = Segmenter( + input_channels=config["embedding"]["size_input"], + feat_channels=config["waffleiron"]["nb_channels"], + depth=config["waffleiron"]["depth"], + grid_shape=config["waffleiron"]["grids_size"], + nb_class=config["classif"]["nb_class"], + ) + net = net.cuda() + + # --- Load weights + ckpt = torch.load(args.ckpt, map_location="cuda:0") + try: + net.load_state_dict(ckpt["net"]) + except: + # If model was trained using DataParallel or DistributedDataParallel + state_dict = {} + for key in ckpt["net"].keys(): + state_dict[key[len("module."):]] = ckpt["net"][key] + net.load_state_dict(state_dict) + net = net.eval() + + # --- Evaluation + id_vote = 0 + for it, batch in enumerate(tqdm(loader, bar_format="{desc:<5.5}{percentage:3.0f}%|{bar:50}{r_bar}")): + + # Reset vote + if id_vote == 0: + vote = None + + # Network inputs + feat = batch["feat"].cuda(non_blocking=True) + labels = batch["labels_orig"].cuda(non_blocking=True) + batch["upsample"] = [ + up.cuda(non_blocking=True) for up in batch["upsample"] + ] + cell_ind = batch["cell_ind"].cuda(non_blocking=True) + occupied_cell = batch["occupied_cells"].cuda(non_blocking=True) + neighbors_emb = batch["neighbors_emb"].cuda(non_blocking=True) + net_inputs = (feat, cell_ind, occupied_cell, neighbors_emb) + + # Get prediction + with torch.autocast("cuda", enabled=True): + with torch.inference_mode(): + # Get prediction + out = net(*net_inputs) + for b in range(out.shape[0]): + temp = out[b, :, batch["upsample"][b]].T + if vote is None: + vote = torch.softmax(temp, dim=1) + else: + vote += torch.softmax(temp, dim=1) + id_vote += 1 + + # Save prediction + if id_vote == args.num_votes: + # Get label + pred_label = vote.max(1)[1] + 1 # Shift by 1 because of ignore_label at index 0 + # Save result + bin_file_path = os.path.join(args.result_folder, batch["filename"][0] + "_lidarseg.bin") + np.array(pred_label.cpu().numpy()).astype(np.uint8).tofile(bin_file_path) + # Reset count of votes + id_vote = 0 \ No newline at end of file diff --git a/illustration.png b/illustration.png new file mode 100755 index 0000000..7c6ba7d Binary files /dev/null and b/illustration.png differ diff --git a/launch_train.py b/launch_train.py new file mode 100644 index 0000000..935aeca --- /dev/null +++ b/launch_train.py @@ -0,0 +1,374 @@ +# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import os +import yaml +import torch +import random +import warnings +import argparse +import numpy as np +import utils.transforms as tr +from utils.metrics import SemSegLoss +from utils.scheduler import WarmupCosine +from utils.trainer import TrainingManager +from waffleiron.segmenter import Segmenter +from datasets import LIST_DATASETS, Collate + + +def load_model_config(file): + with open(file, "r") as f: + config = yaml.safe_load(f) + return config + + +def get_train_augmentations(config): + + list_of_transf = [] + + # Two transformations shared across all datasets + list_of_transf.append( + tr.LimitNumPoints( + dims=(0, 1, 2), + max_point=config["dataloader"]["max_points"], + random=True, + ) + ) + + # Optional augmentations + for aug_name in config["augmentations"].keys(): + if aug_name == "rotation_z": + list_of_transf.append(tr.Rotation(inplace=True, dim=2)) + elif aug_name == "flip_xy": + list_of_transf.append(tr.RandomApply(tr.FlipXY(inplace=True), prob=2 / 3)) + elif aug_name == "scale": + dims = config["augmentations"]["scale"][0] + scale = config["augmentations"]["scale"][1] + list_of_transf.append(tr.Scale(inplace=True, dims=dims, range=scale)) + elif aug_name == "instance_cutmix": + # Do nothing here, directly handled in semantic kitti dataset + continue + else: + raise ValueError("Unknown transformation") + + print("List of transformations:", list_of_transf) + + return tr.Compose(list_of_transf) + + +def get_datasets(config, args): + + # Shared parameters + kwargs = { + "rootdir": os.path.join("/datasets_local/", args.path_dataset), + "input_feat": config["embedding"]["input_feat"], + "voxel_size": config["embedding"]["voxel_size"], + "num_neighbors": config["embedding"]["neighbors"], + "dim_proj": config["waffleiron"]["dim_proj"], + "grids_shape": config["waffleiron"]["grids_size"], + "fov_xyz": config["waffleiron"]["fov_xyz"], + } + + # Get datatset + DATASET = LIST_DATASETS.get(args.dataset.lower()) + if DATASET is None: + raise ValueError(f"Dataset {args.dataset.lower()} not available.") + + # Train dataset + train_dataset = DATASET( + phase="trainval" if args.trainval else "train", + train_augmentations=get_train_augmentations(config), + instance_cutmix=config["augmentations"]["instance_cutmix"], + **kwargs, + ) + + # Validation dataset + val_dataset = DATASET( + phase="val", + **kwargs, + ) + + return train_dataset, val_dataset + + +def get_dataloader(train_dataset, val_dataset, args): + + if args.distributed: + train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) + val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset) + else: + train_sampler = None + val_sampler = None + + train_loader = torch.utils.data.DataLoader( + train_dataset, + batch_size=args.batch_size, + shuffle=(train_sampler is None), + num_workers=args.workers, + pin_memory=True, + sampler=train_sampler, + drop_last=True, + collate_fn=Collate(), + ) + val_loader = torch.utils.data.DataLoader( + val_dataset, + batch_size=args.batch_size, + shuffle=False, + num_workers=args.workers, + pin_memory=True, + sampler=val_sampler, + drop_last=False, + collate_fn=Collate(), + ) + + return train_loader, val_loader, train_sampler + + +def get_optimizer(parameters, config): + return torch.optim.AdamW( + parameters, + lr=config["optim"]["lr"], + weight_decay=config["optim"]["weight_decay"], + ) + + +def get_scheduler(optimizer, config, len_train_loader): + scheduler = torch.optim.lr_scheduler.LambdaLR( + optimizer, + WarmupCosine( + config["scheduler"]["epoch_warmup"] * len_train_loader, + config["scheduler"]["max_epoch"] * len_train_loader, + config["scheduler"]["min_lr"] / config["optim"]["lr"], + ), + ) + return scheduler + + +def distributed_training(gpu, ngpus_per_node, args, config): + + # --- Init. distributing training + args.gpu = gpu + if args.gpu is not None: + print(f"Use GPU: {args.gpu} for training") + if args.distributed: + args.rank = args.rank * ngpus_per_node + gpu + torch.distributed.init_process_group( + backend=args.dist_backend, + init_method=args.dist_url, + world_size=args.world_size, + rank=args.rank, + ) + + # --- Build network + model = Segmenter( + input_channels=config["embedding"]["size_input"], + feat_channels=config["waffleiron"]["nb_channels"], + depth=config["waffleiron"]["depth"], + grid_shape=config["waffleiron"]["grids_size"], + nb_class=config["classif"]["nb_class"], + ) + + # --- + args.batch_size = config["dataloader"]["batch_size"] + args.workers = config["dataloader"]["num_workers"] + if args.distributed: + # For multiprocessing distributed, DistributedDataParallel constructor + # should always set the single device scope, otherwise, + # DistributedDataParallel will use all available devices. + torch.cuda.set_device(args.gpu) + model.cuda(args.gpu) + # When using a single GPU per process and per + # DistributedDataParallel, we need to divide the batch size + # ourselves based on the total number of GPUs of the current node. + args.batch_size = int(config["dataloader"]["batch_size"] / ngpus_per_node) + args.workers = int( + (config["dataloader"]["num_workers"] + ngpus_per_node - 1) / ngpus_per_node + ) + model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) + elif args.gpu is not None: + # Training on one GPU + torch.cuda.set_device(args.gpu) + model = model.cuda(args.gpu) + else: + # DataParallel will divide and allocate batch_size to all available GPUs + model = torch.nn.DataParallel(model).cuda() + if args.gpu == 0 or args.gpu is None: + print(f"Model:\n{model}") + nb_param = sum([p.numel() for p in model.parameters()]) / 1e6 + print(f"{nb_param} x 10^6 trainable parameters ") + + # --- Optimizer + optim = get_optimizer(model.parameters(), config) + + # --- Dataset + train_dataset, val_dataset = get_datasets(config, args) + train_loader, val_loader, train_sampler = get_dataloader( + train_dataset, val_dataset, args + ) + + # --- Loss function + loss = SemSegLoss( + config["classif"]["nb_class"], + lovasz_weight=config["loss"]["lovasz"], + ).cuda(args.gpu) + + # --- Sets the learning rate to the initial LR decayed by 10 every 30 epochs + scheduler = get_scheduler(optim, config, len(train_loader)) + + # --- Training + mng = TrainingManager( + model, + loss, + train_loader, + val_loader, + train_sampler, + optim, + scheduler, + config["scheduler"]["max_epoch"], + args.log_path, + args.gpu, + args.world_size, + args.fp16, + LIST_DATASETS.get(args.dataset.lower()).CLASS_NAME, + tensorboard=(not args.eval) + ) + if args.restart: + mng.load_state() + if args.eval: + mng.one_epoch(training=False) + else: + mng.train() + + +def main(args, config): + + # --- Fixed args + # Device + args.device = "cuda" + # Node rank for distributed training + args.rank = 0 + # Number of nodes for distributed training' + args.world_size = 1 + # URL used to set up distributed training + args.dist_url = "tcp://127.0.0.1:4444" + # Distributed backend' + args.dist_backend = "nccl" + # Distributed processing + args.distributed = args.multiprocessing_distributed + + # Create log directory + os.makedirs(args.log_path, exist_ok=True) + if args.seed is not None: + random.seed(args.seed) + np.random.seed(args.seed) + torch.manual_seed(args.seed) + torch.cuda.manual_seed(args.seed) + os.environ["PYTHONHASHSEED"] = str(args.seed) + + if args.gpu is not None: + args.gpu = 0 + args.distributed = False + args.multiprocessing_distributed = False + warnings.warn( + "You have chosen a specific GPU. This will completely disable data parallelism." + ) + + # Extract instances for cutmix + if config["augmentations"]["instance_cutmix"]: + get_datasets(config, args) + + ngpus_per_node = torch.cuda.device_count() + if args.multiprocessing_distributed: + # Since we have ngpus_per_node processes per node, the total world_size + # needs to be adjusted accordingly + args.world_size = ngpus_per_node * args.world_size + # Use torch.multiprocessing.spawn to launch distributed processes: the + # main_worker process function + torch.multiprocessing.spawn( + distributed_training, + nprocs=ngpus_per_node, + args=(ngpus_per_node, args, config), + ) + else: + # Simply call main_worker function + distributed_training(args.gpu, ngpus_per_node, args, config) + + +def get_default_parser(): + parser = argparse.ArgumentParser(description="Training") + parser.add_argument( + "--dataset", + type=str, + help="Path to dataset", + default="nuscenes", + ) + parser.add_argument( + "--path_dataset", + type=str, + help="Path to dataset", + default="/datasets_local/nuscenes/", + ) + parser.add_argument( + "--log_path", type=str, required=True, help="Path to log folder" + ) + parser.add_argument( + "-r", "--restart", action="store_true", default=False, help="Restart training" + ) + parser.add_argument( + "--seed", default=None, type=int, help="Seed for initializing training" + ) + parser.add_argument( + "--gpu", default=None, type=int, help="Set to any number to use gpu 0" + ) + parser.add_argument( + "--multiprocessing-distributed", + action="store_true", + help="Use multi-processing distributed training to launch " + "N processes per node, which has N GPUs. This is the " + "fastest way to use PyTorch for either single node or " + "multi node data parallel training", + ) + parser.add_argument( + "--fp16", + action="store_true", + default=False, + help="Enable autocast for mix precision training", + ) + parser.add_argument( + "--config", type=str, required=True, help="Path to model config" + ) + parser.add_argument( + "--trainval", + action="store_true", + default=False, + help="Use train + val as train set", + ) + parser.add_argument( + "--eval", + action="store_true", + default=False, + help="Run validation only", + ) + + return parser + + +if __name__ == "__main__": + + parser = get_default_parser() + args = parser.parse_args() + config = load_model_config(args.config) + main(args, config) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..0fe28c7 --- /dev/null +++ b/setup.py @@ -0,0 +1,19 @@ +# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from setuptools import setup +from setuptools import find_packages + +setup(name="waffleiron", packages=find_packages()) diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..f78a20f --- /dev/null +++ b/utils/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + diff --git a/utils/lovasz.py b/utils/lovasz.py new file mode 100755 index 0000000..5adc74b --- /dev/null +++ b/utils/lovasz.py @@ -0,0 +1,346 @@ +""" +Lovasz-Softmax and Jaccard hinge loss in PyTorch +Maxim Berman 2018 ESAT-PSI KU Leuven (MIT License) +Code downloaded from: +https://github.com/edwardzhou130/PolarSeg/blob/master/network/lovasz_losses.py +""" + + +import torch +from torch.autograd import Variable +import torch.nn.functional as F +import numpy as np + +try: + from itertools import ifilterfalse +except ImportError: # py3k + from itertools import filterfalse as ifilterfalse + + +def lovasz_grad(gt_sorted): + """ + Computes gradient of the Lovasz extension w.r.t sorted errors + See Alg. 1 in paper + """ + p = len(gt_sorted) + gts = gt_sorted.sum() + intersection = gts - gt_sorted.float().cumsum(0) + union = gts + (1 - gt_sorted).float().cumsum(0) + jaccard = 1.0 - intersection / union + if p > 1: # cover 1-pixel case + jaccard[1:p] = jaccard[1:p] - jaccard[0:-1] + return jaccard + + +def iou_binary(preds, labels, EMPTY=1.0, ignore=None, per_image=True): + """ + IoU for foreground class + binary: 1 foreground, 0 background + """ + if not per_image: + preds, labels = (preds,), (labels,) + ious = [] + for pred, label in zip(preds, labels): + intersection = ((label == 1) & (pred == 1)).sum() + union = ((label == 1) | ((pred == 1) & (label != ignore))).sum() + if not union: + iou = EMPTY + else: + iou = float(intersection) / float(union) + ious.append(iou) + iou = mean(ious) # mean accross images if per_image + return 100 * iou + + +def iou(preds, labels, C, EMPTY=1.0, ignore=None, per_image=False): + """ + Array of IoU for each (non ignored) class + """ + if not per_image: + preds, labels = (preds,), (labels,) + ious = [] + for pred, label in zip(preds, labels): + iou = [] + for i in range(C): + if ( + i != ignore + ): # The ignored label is sometimes among predicted classes (ENet - CityScapes) + intersection = ((label == i) & (pred == i)).sum() + union = ((label == i) | ((pred == i) & (label != ignore))).sum() + if not union: + iou.append(EMPTY) + else: + iou.append(float(intersection) / float(union)) + ious.append(iou) + ious = [mean(iou) for iou in zip(*ious)] # mean accross images if per_image + return 100 * np.array(ious) + + +# --------------------------- BINARY LOSSES --------------------------- + + +def lovasz_hinge(logits, labels, per_image=True, ignore=None): + r""" + Binary Lovasz hinge loss + logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty) + labels: [B, H, W] Tensor, binary ground truth masks (0 or 1) + per_image: compute the loss per image instead of per batch + ignore: void class id + """ + if per_image: + loss = mean( + lovasz_hinge_flat( + *flatten_binary_scores(log.unsqueeze(0), lab.unsqueeze(0), ignore) + ) + for log, lab in zip(logits, labels) + ) + else: + loss = lovasz_hinge_flat(*flatten_binary_scores(logits, labels, ignore)) + return loss + + +def lovasz_hinge_flat(logits, labels): + r""" + Binary Lovasz hinge loss + logits: [P] Variable, logits at each prediction (between -\infty and +\infty) + labels: [P] Tensor, binary ground truth labels (0 or 1) + ignore: label to ignore + """ + if len(labels) == 0: + # only void pixels, the gradients should be 0 + return logits.sum() * 0.0 + signs = 2.0 * labels.float() - 1.0 + errors = 1.0 - logits * Variable(signs) + errors_sorted, perm = torch.sort(errors, dim=0, descending=True) + perm = perm.data + gt_sorted = labels[perm] + grad = lovasz_grad(gt_sorted) + loss = torch.dot(F.relu(errors_sorted), Variable(grad)) + return loss + + +def flatten_binary_scores(scores, labels, ignore=None): + """ + Flattens predictions in the batch (binary case) + Remove labels equal to 'ignore' + """ + scores = scores.view(-1) + labels = labels.view(-1) + if ignore is None: + return scores, labels + valid = labels != ignore + vscores = scores[valid] + vlabels = labels[valid] + return vscores, vlabels + + +class StableBCELoss(torch.nn.modules.Module): + def __init__(self): + super().__init__() + + def forward(self, input, target): + neg_abs = -input.abs() + loss = input.clamp(min=0) - input * target + (1 + neg_abs.exp()).log() + return loss.mean() + + +def binary_xloss(logits, labels, ignore=None): + r""" + Binary Cross entropy loss + logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty) + labels: [B, H, W] Tensor, binary ground truth masks (0 or 1) + ignore: void class id + """ + logits, labels = flatten_binary_scores(logits, labels, ignore) + loss = StableBCELoss()(logits, Variable(labels.float())) + return loss + + +# --------------------------- MULTICLASS LOSSES --------------------------- + + +def lovasz_softmax(probas, labels, classes="present", per_image=False, ignore=None): + """ + Multi-class Lovasz-Softmax loss + probas: [B, C, H, W] Variable, class probabilities at each prediction (between 0 and 1). + Interpreted as binary (sigmoid) output with outputs of size [B, H, W]. + labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1) + classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average. + per_image: compute the loss per image instead of per batch + ignore: void class labels + """ + if per_image: + loss = mean( + lovasz_softmax_flat( + *flatten_probas(prob.unsqueeze(0), lab.unsqueeze(0), ignore), + classes=classes + ) + for prob, lab in zip(probas, labels) + ) + else: + loss = lovasz_softmax_flat( + *flatten_probas(probas, labels, ignore), classes=classes + ) + return loss + + +def lovasz_softmax_flat(probas, labels, classes="present"): + """ + Multi-class Lovasz-Softmax loss + probas: [P, C] Variable, class probabilities at each prediction (between 0 and 1) + labels: [P] Tensor, ground truth labels (between 0 and C - 1) + classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average. + """ + if probas.numel() == 0: + # only void pixels, the gradients should be 0 + return probas * 0.0 + C = probas.size(1) + losses = [] + class_to_sum = list(range(C)) if classes in ["all", "present"] else classes + for c in class_to_sum: + fg = (labels == c).float() # foreground for class c + if classes == "present" and fg.sum() == 0: + continue + if C == 1: + if len(classes) > 1: + raise ValueError("Sigmoid output possible only with 1 class") + class_pred = probas[:, 0] + else: + class_pred = probas[:, c] + errors = (Variable(fg) - class_pred).abs() + errors_sorted, perm = torch.sort(errors, 0, descending=True) + perm = perm.data + fg_sorted = fg[perm] + losses.append(torch.dot(errors_sorted, Variable(lovasz_grad(fg_sorted)))) + return mean(losses) + + +def flatten_probas(probas, labels, ignore=None): + """ + Flattens predictions in the batch + """ + if probas.dim() == 3: + # assumes output of a sigmoid layer + B, H, W = probas.size() + probas = probas.view(B, 1, H, W) + elif probas.dim() == 5: + # 3D segmentation + B, C, L, H, W = probas.size() + probas = probas.contiguous().view(B, C, L, H * W) + B, C, H, W = probas.size() + probas = probas.permute(0, 2, 3, 1).contiguous().view(-1, C) # B * H * W, C = P, C + labels = labels.view(-1) + if ignore is None: + return probas, labels + valid = labels != ignore + vprobas = probas[valid.nonzero().squeeze()] + vlabels = labels[valid] + return vprobas, vlabels + + +def xloss(logits, labels, ignore=None): + """ + Cross entropy loss + """ + return F.cross_entropy(logits, Variable(labels), ignore_index=255) + + +def jaccard_loss(probas, labels, ignore=None, smooth=100, bk_class=None): + """ + Something wrong with this loss + Multi-class Lovasz-Softmax loss + probas: [B, C, H, W] Variable, class probabilities at each prediction (between 0 and 1). + Interpreted as binary (sigmoid) output with outputs of size [B, H, W]. + labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1) + classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average. + per_image: compute the loss per image instead of per batch + ignore: void class labels + """ + vprobas, vlabels = flatten_probas(probas, labels, ignore) + + true_1_hot = torch.eye(vprobas.shape[1])[vlabels] + + if bk_class: + one_hot_assignment = torch.ones_like(vlabels) + one_hot_assignment[vlabels == bk_class] = 0 + one_hot_assignment = one_hot_assignment.float().unsqueeze(1) + true_1_hot = true_1_hot * one_hot_assignment + + true_1_hot = true_1_hot.to(vprobas.device) + intersection = torch.sum(vprobas * true_1_hot) + cardinality = torch.sum(vprobas + true_1_hot) + loss = (intersection + smooth / (cardinality - intersection + smooth)).mean() + return (1 - loss) * smooth + + +def hinge_jaccard_loss( + probas, labels, ignore=None, classes="present", hinge=0.1, smooth=100 +): + """ + Multi-class Hinge Jaccard loss + probas: [B, C, H, W] Variable, class probabilities at each prediction (between 0 and 1). + Interpreted as binary (sigmoid) output with outputs of size [B, H, W]. + labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1) + classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average. + ignore: void class labels + """ + vprobas, vlabels = flatten_probas(probas, labels, ignore) + C = vprobas.size(1) + losses = [] + class_to_sum = list(range(C)) if classes in ["all", "present"] else classes + for c in class_to_sum: + if c in vlabels: + c_sample_ind = vlabels == c + cprobas = vprobas[c_sample_ind, :] + non_c_ind = np.array([a for a in class_to_sum if a != c]) + class_pred = cprobas[:, c] + max_non_class_pred = torch.max(cprobas[:, non_c_ind], dim=1)[0] + TP = ( + torch.sum(torch.clamp(class_pred - max_non_class_pred, max=hinge) + 1.0) + + smooth + ) + FN = torch.sum( + torch.clamp(max_non_class_pred - class_pred, min=-hinge) + hinge + ) + + if (~c_sample_ind).sum() == 0: + FP = 0 + else: + nonc_probas = vprobas[~c_sample_ind, :] + class_pred = nonc_probas[:, c] + max_non_class_pred = torch.max(nonc_probas[:, non_c_ind], dim=1)[0] + FP = torch.sum( + torch.clamp(class_pred - max_non_class_pred, max=hinge) + 1.0 + ) + + losses.append(1 - TP / (TP + FP + FN)) + + if len(losses) == 0: + return 0 + return mean(losses) + + +# --------------------------- HELPER FUNCTIONS --------------------------- +def isnan(x): + return x != x + + +def mean(l, ignore_nan=False, empty=0): + """ + nanmean compatible with generators. + """ + l = iter(l) + if ignore_nan: + l = ifilterfalse(isnan, l) + try: + n = 1 + acc = next(l) + except StopIteration: + if empty == "raise": + raise ValueError("Empty mean") + return empty + for n, v in enumerate(l, 2): + acc += v + if n == 1: + return acc + return acc / n diff --git a/utils/metrics.py b/utils/metrics.py new file mode 100644 index 0000000..7821e4f --- /dev/null +++ b/utils/metrics.py @@ -0,0 +1,63 @@ +# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import torch +import numpy as np +from .lovasz import lovasz_softmax_flat +from torch.nn.functional import softmax +from torch.nn import Module, CrossEntropyLoss + + +def fast_hist(pred, label, n): + assert torch.all(label > -1) & torch.all(pred > -1) + assert torch.all(label < n) & torch.all(pred < n) + return torch.bincount(n * label + pred, minlength=n**2).reshape(n, n) + + +def per_class_iu(hist): + with np.errstate(divide="ignore", invalid="ignore"): + return np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist)) + + +def overall_accuracy(hist): + with np.errstate(divide="ignore", invalid="ignore"): + return np.diag(hist).sum() / hist.sum() + + +def per_class_accuracy(hist): + with np.errstate(divide="ignore", invalid="ignore"): + return np.diag(hist) / hist.sum(1) + + +class SemSegLoss(Module): + def __init__(self, nb_class, lovasz_weight=1.0, ignore_index=255): + super().__init__() + self.nb_class = nb_class + self.ignore_index = ignore_index + self.lovasz_weight = lovasz_weight + self.ce = CrossEntropyLoss(ignore_index=ignore_index) + + def __call__(self, pred, true): + loss = self.ce(pred, true) + + if self.lovasz_weight > 0: + where = true != self.ignore_index + if where.sum() > 0: + loss += self.lovasz_weight * lovasz_softmax_flat( + softmax(pred[where], dim=1), + true[where], + ) + + return loss diff --git a/utils/scheduler.py b/utils/scheduler.py new file mode 100644 index 0000000..4b50196 --- /dev/null +++ b/utils/scheduler.py @@ -0,0 +1,33 @@ +# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import numpy as np + + +class WarmupCosine: + def __init__(self, warmup_end, max_iter, factor_min): + self.max_iter = max_iter + self.warmup_end = warmup_end + self.factor_min = factor_min + + def __call__(self, iter): + if iter < self.warmup_end: + factor = iter / self.warmup_end + else: + iter = iter - self.warmup_end + max_iter = self.max_iter - self.warmup_end + iter = (iter / max_iter) * np.pi + factor = self.factor_min + 0.5 * (1 - self.factor_min) * (np.cos(iter) + 1) + return factor diff --git a/utils/trainer.py b/utils/trainer.py new file mode 100644 index 0000000..e73e61d --- /dev/null +++ b/utils/trainer.py @@ -0,0 +1,306 @@ +# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import torch +import warnings +import numpy as np +from tqdm import tqdm +from torch.cuda.amp import GradScaler +from torch.utils.tensorboard import SummaryWriter +from utils.metrics import overall_accuracy, fast_hist, per_class_iu, per_class_accuracy + + +class TrainingManager: + def __init__( + self, + net, + loss, + loader_train, + loader_val, + train_sampler, # If provided, we assume distributed training + optim, + scheduler, + max_epoch, + path, + rank, + world_size, + fp16=True, + class_names=None, + tensorboard=True, + ): + + # Optim. methods + self.optim = optim + self.fp16 = fp16 + self.scaler = GradScaler() if fp16 else None + self.scheduler = scheduler + + # Dataloaders + self.max_epoch = max_epoch + self.loader_train = loader_train + self.loader_val = loader_val + self.train_sampler = train_sampler + self.class_names = class_names + + # Network + self.net = net + self.rank = rank + self.world_size = world_size + print(f"Trainer on gpu: {self.rank}. World size:{self.world_size}.") + + # Loss + self.loss = loss + + # Checkpoints + self.best_miou = 0 + self.current_epoch = 0 + self.path_to_ckpt = path + + # Monitoring + if tensorboard and (self.rank == 0 or self.rank is None): + self.writer_train = SummaryWriter( + path + "/tensorboard/train/", + purge_step=self.current_epoch * len(self.loader_train), + flush_secs=30, + ) + self.writer_val = SummaryWriter( + path + "/tensorboard/val/", + purge_step=self.current_epoch, + flush_secs=30, + ) + else: + self.writer_val = None + self.writer_train = None + + def print_log(self, running_loss, oAcc, mAcc, mIoU, ious): + if self.rank == 0 or self.rank is None: + # Global score + log = ( + f"\nEpoch: {self.current_epoch:d} :\n" + + f" Loss = {running_loss:.3f}" + + f" - oAcc = {oAcc:.1f}" + + f" - mAcc = {mAcc:.1f}" + + f" - mIoU = {mIoU:.1f}" + ) + print(log) + # Per class score + log = "" + for i, s in enumerate(ious): + if self.class_names is None: + log += f"Class {i}: {100 * s:.1f} - " + else: + log += f"{self.class_names[i]}: {100 * s:.1f} - " + print(log[:-3]) + # Recall best mIoU + print(f"Best mIoU was {self.best_miou:.1f}.") + + def gather_scores(self, list_tensors): + if self.rank == 0: + tensor_reduced = [ + [torch.empty_like(t) for _ in range(self.world_size)] + for t in list_tensors + ] + for t, t_reduced in zip(list_tensors, tensor_reduced): + torch.distributed.gather(t, t_reduced) + tensor_reduced = [sum(t).cpu() for t in tensor_reduced] + return tensor_reduced + else: + for t in list_tensors: + torch.distributed.gather(t) + + def one_epoch(self, training=True): + + # Train or eval mode + if training: + net = self.net.train() + loader = self.loader_train + if self.rank == 0 or self.rank is None: + print("\nTraining: %d/%d epochs" % (self.current_epoch, self.max_epoch)) + writer = self.writer_train + if self.train_sampler is not None: + self.train_sampler.set_epoch(self.current_epoch) + else: + net = self.net.eval() + loader = self.loader_val + if self.rank == 0 or self.rank is None: + print( + "\nValidation: %d/%d epochs" % (self.current_epoch, self.max_epoch) + ) + writer = self.writer_val + print_freq = np.max((len(loader) // 10, 1)) + + # Stat. + running_loss = 0.0 + confusion_matrix = 0 + + # Loop over mini-batches + if self.rank == 0 or self.rank is None: + bar_format = "{desc:<5.5}{percentage:3.0f}%|{bar:50}{r_bar}" + loader = tqdm(loader, bar_format=bar_format) + for it, batch in enumerate(loader): + + # Network inputs + feat = batch["feat"].cuda(self.rank, non_blocking=True) + labels = batch["labels_orig"].cuda(self.rank, non_blocking=True) + batch["upsample"] = [ + up.cuda(self.rank, non_blocking=True) for up in batch["upsample"] + ] + cell_ind = batch["cell_ind"].cuda(self.rank, non_blocking=True) + occupied_cell = batch["occupied_cells"].cuda(self.rank, non_blocking=True) + neighbors_emb = batch["neighbors_emb"].cuda(self.rank, non_blocking=True) + net_inputs = (feat, cell_ind, occupied_cell, neighbors_emb) + + # Get prediction and loss + with torch.autocast("cuda", enabled=self.fp16): + # Logits + if training: + out = net(*net_inputs) + else: + with torch.no_grad(): + out = net(*net_inputs) + # Upsample to original resolution + out_upsample = [] + for id_b, closest_point in enumerate(batch["upsample"]): + temp = out[id_b, :, closest_point] + out_upsample.append(temp.T) + out = torch.cat(out_upsample, dim=0) + # Loss + loss = self.loss(out, labels) + running_loss += loss.detach() + + # Confusion matrix + with torch.no_grad(): + nb_class = out.shape[1] + pred_label = out.max(1)[1] + where = labels != 255 + confusion_matrix += fast_hist( + pred_label[where], labels[where], nb_class + ) + + # Logs + if it % print_freq == print_freq - 1 or it == len(loader) - 1: + # Gather scores + if self.train_sampler is not None: + out = self.gather_scores([running_loss, confusion_matrix]) + else: + out = [running_loss.cpu(), confusion_matrix.cpu()] + if self.rank == 0 or self.rank is None: + # Compute scores + oAcc = 100 * overall_accuracy(out[1]) + mAcc = 100 * np.nanmean(per_class_accuracy(out[1])) + ious = per_class_iu(out[1]) + mIoU = 100 * np.nanmean(ious) + running_loss_reduced = out[0].item() / self.world_size / (it + 1) + # Print score + self.print_log(running_loss_reduced, oAcc, mAcc, mIoU, ious) + # Save in tensorboard + if (writer is not None) and (training or it == len(loader) - 1): + header = "Train" if training else "Test" + step = ( + self.current_epoch * len(loader) + it + if training + else self.current_epoch + ) + writer.add_scalar(header + "/loss", running_loss_reduced, step) + writer.add_scalar(header + "/oAcc", oAcc, step) + writer.add_scalar(header + "/mAcc", mAcc, step) + writer.add_scalar(header + "/mIoU", mIoU, step) + writer.add_scalar( + header + "/lr", self.optim.param_groups[0]["lr"], step + ) + + # Gradient step + if training: + self.optim.zero_grad(set_to_none=True) + if self.fp16: + self.scaler.scale(loss).backward() + self.scaler.step(self.optim) + self.scaler.update() + else: + loss.backward() + self.optim.step() + if self.scheduler is not None: + self.scheduler.step() + + # Return score + if self.rank == 0 or self.rank is None: + return mIoU + else: + return None + + def load_state(self, best=False): + filename = self.path_to_ckpt + filename += "/ckpt_best.pth" if best else "/ckpt_last.pth" + rank = 0 if self.rank is None else self.rank + ckpt = torch.load( + filename, + map_location=f"cuda:{rank}", + ) + self.net.load_state_dict(ckpt["net"]) + if ckpt.get("optim") is None: + warnings.warn("Optimizer state not available") + else: + self.optim.load_state_dict(ckpt["optim"]) + if self.scheduler is not None: + if ckpt.get("scheduler") is None: + warnings.warn("Scheduler state not available") + else: + self.scheduler.load_state_dict(ckpt["scheduler"]) + if self.fp16: + if ckpt.get("scaler") is None: + warnings.warn("Scaler state not available") + else: + self.scaler.load_state_dict(ckpt["scaler"]) + if ckpt.get("best_miou") is not None: + self.best_miou = ckpt["best_miou"] + if ckpt.get("epoch") is not None: + self.current_epoch = ckpt["epoch"] + 1 + print( + f"Checkpoint loaded on {torch.device(rank)} (cuda:{rank}): {self.path_to_ckpt}" + ) + + def save_state(self, best=False): + if self.rank == 0 or self.rank is None: + dict_to_save = { + "epoch": self.current_epoch, + "net": self.net.state_dict(), + "optim": self.optim.state_dict(), + "scheduler": self.scheduler.state_dict() + if self.scheduler is not None + else None, + "scaler": self.scaler.state_dict() if self.fp16 else None, + "best_miou": self.best_miou, + } + filename = self.path_to_ckpt + filename += "/ckpt_best.pth" if best else "/ckpt_last.pth" + torch.save(dict_to_save, filename) + + def train(self): + for _ in range(self.current_epoch, self.max_epoch): + # Train + self.one_epoch(training=True) + # Val + miou = self.one_epoch(training=False) + # Save best checkpoint + if miou is not None and miou > self.best_miou: + self.best_miou = miou + self.save_state(best=True) + print(f"\n\n*** New best mIoU: {self.best_miou:.1f}.\n") + # Save last checkpoint + self.save_state() + # Increase epoch number + self.current_epoch += 1 + if self.rank == 0 or self.rank is None: + print("Finished Training") diff --git a/utils/transforms.py b/utils/transforms.py new file mode 100644 index 0000000..f4a6625 --- /dev/null +++ b/utils/transforms.py @@ -0,0 +1,311 @@ +# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import os +import torch +import numpy as np +from glob import glob + + +class Compose: + def __init__(self, transformations): + self.transformations = transformations + + def __call__(self, pcloud, labels): + for t in self.transformations: + pcloud, labels = t(pcloud, labels) + return pcloud, labels + + +class RandomApply: + def __init__(self, transformation, prob=0.5): + self.prob = prob + self.transformation = transformation + + def __call__(self, pcloud, labels): + if torch.rand(1) < self.prob: + pcloud, labels = self.transformation(pcloud, labels) + return pcloud, labels + + +class Transformation: + def __init__(self, inplace=False): + self.inplace = inplace + + def __call__(self, pcloud, labels): + if labels is None: + return pcloud if self.inplace else np.array(pcloud, copy=True) + + out = ( + (pcloud, labels) + if self.inplace + else (np.array(pcloud, copy=True), np.array(labels, copy=True)) + ) + return out + + +class Identity(Transformation): + def __init__(self, inplace=False): + super().__init__(inplace) + + def __call__(self, pcloud, labels): + return super().__call__(pcloud, labels) + + +class Rotation(Transformation): + def __init__(self, dim=2, range=np.pi, inplace=False): + super().__init__(inplace) + self.range = range + self.inplace = inplace + if dim == 2: + self.dims = (0, 1) + elif dim == 1: + self.dims = (0, 2) + elif dim == 0: + self.dims = (1, 2) + + def __call__(self, pcloud, labels): + # Build rotation matrix + theta = (2 * torch.rand(1)[0] - 1) * self.range + # Build rotation matrix + rot = np.array( + [ + [np.cos(theta), np.sin(theta)], + [-np.sin(theta), np.cos(theta)], + ] + ) + # Apply rotation + pcloud, labels = super().__call__(pcloud, labels) + pcloud[:, self.dims] = pcloud[:, self.dims] @ rot + return pcloud, labels + + +class Scale(Transformation): + def __init__(self, dims=(0, 1), range=0.05, inplace=False): + super().__init__(inplace) + self.dims = dims + self.range = range + + def __call__(self, pcloud, labels): + pcloud, labels = super().__call__(pcloud, labels) + scale = 1 + (2 * torch.rand(1).item() - 1) * self.range + pcloud[:, self.dims] *= scale + return pcloud, labels + + +class FlipXY(Transformation): + def __init__(self, inplace=False): + super().__init__(inplace=inplace) + + def __call__(self, pcloud, labels): + pcloud, labels = super().__call__(pcloud, labels) + id = torch.randint(2, (1,))[0] + pcloud[:, id] *= -1.0 + return pcloud, labels + + +class LimitNumPoints(Transformation): + def __init__(self, dims=(0, 1, 2), max_point=30000, random=False): + super().__init__(inplace=True) + self.dims = dims + self.max_points = max_point + self.random = random + assert max_point > 0 + + def __call__(self, pcloud, labels): + pc, labels = super().__call__(pcloud, labels) + if pc.shape[0] > self.max_points: + if self.random: + center = torch.randint(pc.shape[0], (1,))[0] + center = pc[center : center + 1, self.dims] + else: + center = np.zeros((1, len(self.dims))) + idx = np.argsort(np.square(pc[:, self.dims] - center).sum(axis=1))[ + : self.max_points + ] + pc, labels = pc[idx], labels[idx] + return pc, labels + + +class Crop(Transformation): + def __init__(self, dims=(0, 1, 2), fov=((-5, -5, -5), (5, 5, 5)), eps=1e-4): + super().__init__(inplace=True) + self.dims = dims + self.fov = fov + self.eps = eps + assert len(fov[0]) == len(fov[1]), "Min and Max FOV must have the same length." + for i, (min, max) in enumerate(zip(*fov)): + assert ( + min < max + ), f"Field of view: min ({min}) < max ({max}) is expected on dimension {i}." + + def __call__(self, pcloud, labels): + pc, labels = super().__call__(pcloud, labels) + + where = None + for i, d in enumerate(self.dims): # Actually a bug below, use d in pc not i! + temp = (pc[:, d] > self.fov[0][i] + self.eps) & ( + pc[:, d] < self.fov[1][i] - self.eps + ) + where = temp if where is None else where & temp + + return pc[where], labels[where] + + +class Voxelize(Transformation): + def __init__(self, dims=(0, 1, 2), voxel_size=0.1, random=False): + super().__init__(inplace=True) + self.dims = dims + self.voxel_size = voxel_size + self.random = random + assert voxel_size >= 0 + + def __call__(self, pcloud, labels): + pc, labels = super().__call__(pcloud, labels) + if self.voxel_size <= 0: + return pc, labels + + if self.random: + permute = torch.randperm(pc.shape[0]) + pc, labels = pc[permute], labels[permute] + + pc_shift = pc[:, self.dims] - pc[:, self.dims].min(0, keepdims=True) + + _, ind = np.unique( + (pc_shift / self.voxel_size).astype("int"), return_index=True, axis=0 + ) + + return pc[ind, :], None if labels is None else labels[ind] + + +class InstanceCutMix(Transformation): + def __init__(self, phase="train"): + """Instance cutmix coded only for SemanticKITTI""" + super().__init__(inplace=True) + + raise ValueError("Include latest verion") + + self.phase = phase + self.rootdir = "/root/local_storage/semantic_kitti_instance_" + self.phase + self.bank = {1: [], 2: [], 5: [], 6: [], 7: []} + for key in self.bank.keys(): + self.bank[key] = glob(os.path.join(self.rootdir, f"{key}", "*.bin")) + self.loaded = self.test_loaded() + # v2 + self.rot = Compose( + ( + FlipXY(inplace=True), + Rotation(inplace=True), + Scale(dims=(0, 1, 2), range=0.1, inplace=True), + ) + ) + self.nb_to_add = 40 + self.vox = Voxelize(dims=(0, 1, 2), voxel_size=1.0, random=True) + """ v1 + self.rot = Rotation(inplace=False) + self.max_size = 100 # Unused + self.nb_to_add = 20 + self.vox = Voxelize(dims=(0, 1, 2), voxel_size=.1, random=True) + """ + + def test_loaded(self): + if self.phase == "train": + if len(self.bank[1]) != 5083: + print(len(self.bank[1]), 5083) + return False + if len(self.bank[2]) != 3092: + print(len(self.bank[2]), 3092) + return False + if len(self.bank[5]) != 8084: + print(len(self.bank[5]), 8084) + return False + if len(self.bank[6]) != 1551: + print(len(self.bank[6]), 1551) + return False + if len(self.bank[7]) != 560: + print(len(self.bank[7]), 560) + return False + elif self.phase == "trainval": + if len(self.bank[1]) != 8213: + print(len(self.bank[1]), 8213) + return False + if len(self.bank[2]) != 4169: + print(len(self.bank[2]), 4169) + return False + if len(self.bank[5]) != 12190: + print(len(self.bank[5]), 12190) + return False + if len(self.bank[6]) != 2943: + print(len(self.bank[6]), 2943) + return False + if len(self.bank[7]) != 701: + print(len(self.bank[7]), 701) + return False + return True + + def add_in_bank(self, pc, class_label, instance_label): + for id_class in self.bank.keys(): + where_class = class_label == id_class + all_instances = np.unique(instance_label[where_class]) + for id_instance in all_instances: + # Segment instance + where_ins = instance_label == id_instance + if where_ins.sum() <= 5: + continue + pc_to_add = pc[where_ins, :] + # Center instance + pc_to_add[:, :2] -= pc_to_add[:, :2].mean(0, keepdims=True) + pc_to_add[:, 2] -= pc_to_add[:, 2].min(0, keepdims=True) + # + pathfile = os.path.join( + self.rootdir, f"{id_class}", f"{len(self.bank[id_class]):07d}.bin" + ) + os.makedirs(os.path.join(self.rootdir, f"{id_class}"), exist_ok=True) + pc_to_add.tofile(pathfile) + self.bank[id_class].append(pathfile) + + def add_in_pc(self, pc, class_label): + new_pc = [pc] + new_label = [class_label] + # Find location where to add new object (on a surface) + pc_vox, class_label_vox = self.vox(pc, class_label) + + # v2 + where_surface = np.where((class_label_vox >= 8) & (class_label_vox <= 10))[0] + + """ v1 + where_surface = np.where( ( (class_label_vox>=8) & (class_label_vox<=11) ) | (class_label_vox==16) )[0] + """ + + where_surface = where_surface[torch.randperm(len(where_surface))] + id_tot = 0 + for id_class in self.bank.keys(): + which_one = torch.randint(len(self.bank[id_class]), (self.nb_to_add,)) + for ii in range(self.nb_to_add): + p = pc_vox[where_surface[id_tot]] + object = self.bank[id_class][which_one[ii]] + object = np.fromfile(object, dtype=np.float32).reshape((-1, 4)) + object, _ = self.rot(object, 1) + object[:, :3] += p[:3][None] + new_pc.append(object) + new_label.append(np.ones((object.shape[0],), dtype=np.int) * id_class) + id_tot += 1 + return np.concatenate(new_pc, 0), np.concatenate(new_label, 0) + + def __call__(self, pc, class_label, instance_label): + if not self.loaded: + self.add_in_bank(pc, class_label, instance_label) + return np.zeros((2, 4)), None + return self.add_in_pc(pc, class_label) diff --git a/waffleiron/__init__.py b/waffleiron/__init__.py new file mode 100644 index 0000000..ee370e5 --- /dev/null +++ b/waffleiron/__init__.py @@ -0,0 +1,19 @@ +# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from .backbone import WaffleIron +from .segmenter import Segmenter + +__all__ = [WaffleIron, Segmenter] diff --git a/waffleiron/backbone.py b/waffleiron/backbone.py new file mode 100644 index 0000000..a6f6abd --- /dev/null +++ b/waffleiron/backbone.py @@ -0,0 +1,146 @@ +# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import torch +import numpy as np +import torch.nn as nn +from torch import autocast + + +def build_proj_matrix(indices_non_zeros, occupied_cell, batch_size, num_2d_cells): + num_points = indices_non_zeros.shape[1] // batch_size + matrix_shape = (batch_size, num_2d_cells, num_points) + + # Sparse projection matrix for Inflate step + inflate = torch.sparse_coo_tensor( + indices_non_zeros, occupied_cell.reshape(-1), matrix_shape + ).transpose(1, 2) + + # Count number of points in each cells (used in flatten step) + with autocast("cuda", enabled=False): + num_points_per_cells = torch.bmm( + inflate, torch.bmm(inflate.transpose(1, 2), occupied_cell.unsqueeze(-1)) + ) + + # Sparse projection matrix for Flatten step (projection & average in each 2d cells) + weight_per_point = 1.0 / (num_points_per_cells.reshape(-1) + 1e-6) + weight_per_point *= occupied_cell.reshape(-1) + flatten = torch.sparse_coo_tensor(indices_non_zeros, weight_per_point, matrix_shape) + + return {"flatten": flatten, "inflate": inflate} + + +class ChannelMix(nn.Module): + def __init__(self, channels): + super().__init__() + self.norm = nn.BatchNorm1d(channels) + self.mlp = nn.Sequential( + nn.Conv1d(channels, channels, 1), + nn.ReLU(inplace=True), + nn.Conv1d(channels, channels, 1), + ) + self.scale = nn.Conv1d( + channels, channels, 1, bias=False, groups=channels + ) # Implement LayerScale + + def forward(self, tokens): + """tokens <- tokens + LayerScale( MLP( BN(tokens) ) )""" + return tokens + self.scale(self.mlp(self.norm(tokens))) + + +class SpatialMix(nn.Module): + def __init__(self, channels, grid_shape): + super().__init__() + self.H, self.W = grid_shape + self.norm = nn.BatchNorm1d(channels) + self.ffn = nn.Sequential( + nn.Conv2d(channels, channels, 3, padding=1, groups=channels), + nn.ReLU(inplace=True), + nn.Conv2d(channels, channels, 3, padding=1, groups=channels), + ) + self.scale = nn.Conv1d( + channels, channels, 1, bias=False, groups=channels + ) # Implement LayerScale + self.grid_shape = grid_shape + + def extra_repr(self): + return f"(grid): [{self.grid_shape[0]}, {self.grid_shape[1]}]" + + def forward(self, tokens, sp_mat): + """tokens <- tokens + LayerScale( Inflate( FFN( Flatten( BN(tokens) ) ) )""" + B, C, N = tokens.shape + residual = self.norm(tokens) + # Flatten + with autocast("cuda", enabled=False): + residual = torch.bmm( + sp_mat["flatten"], residual.transpose(1, 2).float() + ).transpose(1, 2) + residual = residual.reshape(B, C, self.H, self.W) + # FFN + residual = self.ffn(residual) + # Inflate + residual = residual.reshape(B, C, self.H * self.W) + with autocast("cuda", enabled=False): + residual = torch.bmm( + sp_mat["inflate"], residual.transpose(1, 2).float() + ).transpose(1, 2) + residual = residual.reshape(B, C, N) + return tokens + self.scale(residual) + + +class WaffleIron(nn.Module): + def __init__(self, channels, depth, grids_shape): + super().__init__() + self.grids_shape = grids_shape + self.channel_mix = nn.ModuleList([ChannelMix(channels) for _ in range(depth)]) + self.spatial_mix = nn.ModuleList( + [ + SpatialMix(channels, grids_shape[d % len(grids_shape)]) + for d in range(depth) + ] + ) + + def forward(self, tokens, cell_ind, occupied_cell): + + # Build projection matrices + batch_size, num_points = tokens.shape[0], tokens.shape[-1] + point_ind = ( + torch.arange(num_points, device=tokens.device) + .unsqueeze(0) + .expand(batch_size, -1) + .reshape(1, -1) + ) + batch_ind = ( + torch.arange(batch_size, device=tokens.device) + .unsqueeze(1) + .expand(-1, num_points) + .reshape(1, -1) + ) + non_zeros_ind = [] + for i in range(cell_ind.shape[1]): + non_zeros_ind.append( + torch.cat((batch_ind, cell_ind[:, i].reshape(1, -1), point_ind), axis=0) + ) + sp_mat = [ + build_proj_matrix(id, occupied_cell, batch_size, np.prod(sh)) + for id, sh in zip(non_zeros_ind, self.grids_shape) + ] + + # Actual backbone + for d, (smix, cmix) in enumerate(zip(self.spatial_mix, self.channel_mix)): + tokens = smix(tokens, sp_mat[d % len(sp_mat)]) + tokens = cmix(tokens) + + return tokens diff --git a/waffleiron/embedding.py b/waffleiron/embedding.py new file mode 100644 index 0000000..6bd837e --- /dev/null +++ b/waffleiron/embedding.py @@ -0,0 +1,64 @@ +# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import torch +import torch.nn as nn + + +class Embedding(nn.Module): + def __init__(self, channels_in, channels_out): + super().__init__() + + # Normalize inputs + self.norm = nn.BatchNorm1d(channels_in) + + # Point Embedding + self.conv1 = nn.Conv1d(channels_in, channels_out, 1) + + # Neighborhood embedding + self.conv2 = nn.Sequential( + nn.BatchNorm2d(channels_in), + nn.Conv2d(channels_in, channels_out, 1, bias=False), + nn.BatchNorm2d(channels_out), + nn.ReLU(inplace=True), + nn.Conv2d(channels_out, channels_out, 1, bias=False), + ) + + # Merge point and neighborhood embeddings + self.final = nn.Conv1d(2 * channels_out, channels_out, 1, bias=True, padding=0) + + def forward(self, x, neighbors): + """x: B x C_in x N. neighbors: B x K x N. Output: B x C_out x N""" + # Normalize input + x = self.norm(x) + + # Point embedding + point_emb = self.conv1(x) + + # Neighborhood embedding + gather = [] + # Gather neighbors around each center point + for ind_nn in range( + 1, neighbors.shape[1] + ): # Remove first neighbors which is the center point + temp = neighbors[:, ind_nn : ind_nn + 1, :].expand(-1, x.shape[1], -1) + gather.append(torch.gather(x, 2, temp).unsqueeze(-1)) + # Relative coordinates + neigh_emb = torch.cat(gather, -1) - x.unsqueeze(-1) # Size: (B x C x N) x K + # Embedding + neigh_emb = self.conv2(neigh_emb).max(-1)[0] + + # Merge both embeddings + return self.final(torch.cat((point_emb, neigh_emb), dim=1)) diff --git a/waffleiron/segmenter.py b/waffleiron/segmenter.py new file mode 100644 index 0000000..c08baa2 --- /dev/null +++ b/waffleiron/segmenter.py @@ -0,0 +1,34 @@ +# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import torch.nn as nn +from .backbone import WaffleIron +from .embedding import Embedding + + +class Segmenter(nn.Module): + def __init__(self, input_channels, feat_channels, nb_class, depth, grid_shape): + super().__init__() + # Embedding layer + self.embed = Embedding(input_channels, feat_channels) + # WaffleIron backbone + self.waffleiron = WaffleIron(feat_channels, depth, grid_shape) + # Classification layer + self.classif = nn.Conv1d(feat_channels, nb_class, 1) + + def forward(self, feats, cell_ind, occupied_cell, neighbors): + tokens = self.embed(feats, neighbors) + tokens = self.waffleiron(tokens, cell_ind, occupied_cell) + return self.classif(tokens)