diff --git a/.gitignore b/.gitignore
new file mode 100755
index 0000000..67a05c6
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,11 @@
+# Compiled python modules.
+*.pyc
+
+# Python egg metadata, regenerated from source files by setuptools.
+/*.egg-info
+/*.egg
+
+# Data
+*.npy
+*.npz
+*.txt
diff --git a/LICENSE b/LICENSE
new file mode 100755
index 0000000..045e216
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,194 @@
+WaffleIron
+
+Copyright 2022 Valeo
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ https://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+
+
+ Apache License
+ Version 2.0, January 2004
+ https://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100755
index 0000000..d8329d4
--- /dev/null
+++ b/README.md
@@ -0,0 +1,234 @@
+# WaffleIron
+
+![](./illustration.png)
+
+[**Using a Waffle Iron for Automotive Point Cloud Semantic Segmentation**]()
+[*Gilles Puy*1](https://sites.google.com/site/puygilles/home),
+[*Alexandre Boulch*1](http://boulch.eu),
+[*Renaud Marlet*1,2](http://imagine.enpc.fr/~marletr/)
+1*valeo.ai, France* and 2*LIGM, Ecole des Ponts, Univ Gustave Eiffel, CNRS, France*.
+
+If you find this code or work useful, please cite the following [paper]():
+```
+@article{puy23waffleiron,
+ title={Using a Waffle Iron for Automotive Point Cloud Semantic Segmentation},
+ author={Puy, Gilles and Boulch, Alexandre and Marlet, Renaud},
+ journal={arxiv:2301.xxxx}
+ year={2023}
+}
+```
+
+## Installation
+
+```
+pip install pyaml==6.0 tqdm=4.63.0 scipy==1.8.0 torch==1.11.0 tensorboard=2.8.0
+git clone https://github.com/valeoai/WaffleIron
+cd WaffleIron
+pip install -e ./
+```
+
+Download the pretrained models:
+```
+wget [ADD LINK]
+tar -xvzf pretrained_models_and_data.tar.gz
+```
+
+Finally, indicate where the nuScenes and SemanticKITTI datasets are located on your system:
+```
+export PATH_NUSCENES="/PATH/TO/NUSCENES"
+export PATH_KITTI="/PATH/TO/KITTI/"
+```
+
+If you want to uninstall this package, type `pip uninstall waffleiron`.
+
+
+## Testing pretrained models
+
+### Option 1: Using this code
+
+To evaluate the pre-trained model on the train set of nuScenes used in Table 1 of our paper, type
+```
+python launch_train.py \
+--dataset nuscenes \
+--path_dataset $PATH_NUSCENES \
+--log_path ./pretrained_models/WaffleIron-48-256__60cm-baseline-nuscenes/ \
+--config ./configs/WaffleIron-48-256__60cm-baseline-nuscenes.yaml \
+--fp16 \
+--gpu 0 \
+--restart \
+--eval
+```
+
+To evaluate the pre-trained model on the train set of SemanticKITTI, with instance cutmix augmentation, type
+```
+python launch_train.py \
+--dataset semantic_kitti \
+--path_dataset $PATH_KITTI \
+--log_path ./pretrained_models/WaffleIron-48-256__40cm-BEV-cutmix-kitti/ \
+--config ./configs/WaffleIron-48-256__40cm-BEV-cutmix-kitti.yaml \
+--fp16 \
+--restart \
+--eval
+```
+
+**Remark:** *On SemanticKITTI, the code above will extract object instances on the train set (despite this
+being not necessary for validation) because this augmentation is activated for training on this dataset (and this code
+re-use the training script). This can be bypassed by editing the `yaml` config file and changing the entry
+`instance_cutmix` to `False`. The instances are saved automatically in `/tmp/semantic_kitti_instances/`.*
+
+### Option 2: Using the official APIs
+
+The second option writes the predictions on disk and the results can be computed using the official
+nuScenes or SemanticKITTI APIs. This option also allows you to perform test time augmentations, which is not possible
+with Option 1 above. These scripts should be useable for submission of the official benchmarks.
+
+#### nuScenes
+
+To extract the prediction with the pre-trained model on nuScenes, type
+```
+python eval_nuscenes.py \
+--path_dataset $PATH_NUSCENES \
+--config ./configs/WaffleIron-48-256__60cm-baseline-nuscenes.yaml \
+--ckpt ./pretrained_models/WaffleIron-48-256__60cm-baseline-nuscenes/ckpt_last.pth \
+--result_folder ./predictions_nuscenes \
+--phase val \
+--num_workers 12
+```
+or, if you want to use, e.g., 10 votes with test time augmentations,
+```
+python eval_nuscenes.py \
+--path_dataset $PATH_NUSCENES \
+--config ./configs/WaffleIron-48-256__60cm-baseline-nuscenes.yaml \
+--ckpt ./pretrained_models/WaffleIron-48-256__60cm-baseline-nuscenes/ckpt_last.pth \
+--result_folder ./predictions_nuscenes \
+--phase val \
+--num_workers 12 \
+--num_votes 10 \
+--batch_size 5
+```
+You can reduce `batch_size` to 2 or 1 depending on the available memory.
+
+These predictions can be evaluated using the official nuScenes API as follows
+```
+git clone https://github.com/nutonomy/nuscenes-devkit.git
+python nuscenes-devkit/python-sdk/nuscenes/eval/lidarseg/evaluate.py \
+--result_path ./predictions_nuscenes \
+--eval_set val \
+--version v1.0-trainval \
+--dataroot $PATH_NUSCENES \
+--verbose True
+```
+
+#### SemanticKITTI
+
+To evaluate the pre-trained model on SemanticKITTI, type
+```
+python eval_kitti.py \
+--path_dataset $PATH_KITTI \
+--ckpt ./pretrained_models/WaffleIron-48-256__40cm-BEV-cutmix-kitti/ckpt_last.pth \
+--config ./configs/WaffleIron-48-256__40cm-BEV-cutmix-kitti.yaml \
+--result_folder ./predictions_kitti \
+--phase val \
+--num_workers 12
+```
+
+The predictions can be evaluated using the official APIs by typing
+```
+git clone https://github.com/PRBonn/semantic-kitti-api.git
+cd semantic-kitti-api/
+python evaluate_semantics.py \
+--dataset $PATH_KITTI/dataset \
+--predictions ../predictions_kitti \
+--split valid
+```
+
+## Training
+
+### nuScenes
+
+To train a WaffleIron-48-256 backbone on nuScenes with
+- 2D cells of 60 cm,
+- the baseline sequence of projections along the z-axis, then the y-axis, then the x-axis, etc., until the last layer,
+
+type
+```
+python launch_train.py \
+--dataset nuscenes \
+--path_dataset $PATH_NUSCENES \
+--log_path ./logs/WaffleIron-48-256__60cm-baseline-nuscenes/ \
+--config ./configs/WaffleIron-48-256__60cm-baseline-nuscenes.yaml \
+--gpu 0 \
+--fp16
+```
+
+For example, with `--seed 1` as additional arguments in `launch_train.py`, I obtain 76.2 % in mIoU at the last
+training epoch (using one Nvidia Tesla V100S-PCIE-32GB for training).
+
+Note: for multi-GPUs training, you can remove `--gpu 0` and the code will use all available GPUs using PyTorch DataParallel
+for parallelism. You can add the argument `--multiprocessing-distributed` to use DistributedDataParallel instead.
+
+
+### SemanticKITTI
+
+To retrain a WaffleIron-48-256 backbone on SemanticKITTI with
+- 2D cells of 40 cm,
+- projection along the z-axis at all layers,
+- **instance cutmix augmentations**,
+
+type
+```
+python launch_train.py \
+--dataset semantic_kitti \
+--path_dataset $PATH_KITTI \
+--log_path ./logs/WaffleIron-48-256__40cm-BEV-cutmix-kitti \
+--config ./configs/WaffleIron-48-256__40cm-BEV-cutmix-kitti.yaml \
+--fp16 \
+--multiprocessing-distributed
+```
+
+The instances for cutmix augmentation are saved in `/tmp/semantic_kitti_instances/`. You can disable the instance
+cutmix augmentations by editing the `yaml` config file to set `instance_cutmix` to `False`.
+
+For submission to the official benchmark on the test set of SemanticKITTI, we also trained the network on both the
+val and train sets (argument `--trainval` in `launch_train.py`), used the checkpoint at the last epoch and 10 test
+time augmentations during inference.
+
+
+## Creating your own network
+
+### Config file
+
+You can refer to `./config/WaffleIron-template.yaml` where we describe the role of each parameter.
+In particular, you can adjust `nb_channels` and `depth` to increase of decrease the capacity of WaffleIron.
+You can also adjust the memory required to train a network by adjusting `max_points` in `dataloader`, but a
+too small value might impact the performance.
+
+### Models
+
+The WaffleIron backbone is defined in `waffleiron/backbone.py` and can be imported in your project by typing
+```python
+from waffleiron import WaffleIron
+```
+It needs to be combined with a embedding layer to provide point tokens and a pointwise classification layer, as we do
+in `waffleiron/segmenter.py`. You can define your own embedding and classification layers instead.
+
+
+## Acknowledgements
+We thank the authors of
+```
+@inproceedings{berman18lovasz,
+author = {Berman, Maxim and Triki, Amal Rannen and Blaschko, Matthew B.},
+title = {The Lovász-Softmax Loss: A Tractable Surrogate for the Optimization of the Intersection-Over-Union Measure
+in Neural Networks},
+booktitle = {CVPR},
+year = {2018}
+}
+```
+for making their [implementation](https://github.com/bermanmaxim/LovaszSoftmax) of the Lovász loss publicly available.
+
+
+## License
+WaffleIron is released under the [Apache 2.0 license](./LICENSE).
+
+The implementation of the Lovász loss in `utils/lovasz.py` is released under
+[MIT Licence](https://github.com/bermanmaxim/LovaszSoftmax/blob/master/LICENSE).
diff --git a/configs/WaffleIron-48-256__40cm-BEV-cutmix-kitti.yaml b/configs/WaffleIron-48-256__40cm-BEV-cutmix-kitti.yaml
new file mode 100644
index 0000000..b60fa67
--- /dev/null
+++ b/configs/WaffleIron-48-256__40cm-BEV-cutmix-kitti.yaml
@@ -0,0 +1,51 @@
+waffleiron: # Architecture of the backbone
+ nb_channels: 256 # Define F = the feature size = width of the WaffleIron
+ depth: 48 # Define L = the depth on the network
+ fov_xyz: # Define the FOV in meters
+ - - -50 # min value on x-axis: -50 m
+ - -50 # min value on y-axis: -50 m
+ - -5 # min value on z-axis: -5 m
+ - - 50 # max value on x-axis: 50 m
+ - 50 # max value on y-axis: 50 m
+ - 3 # max value on z-axis: 5 m
+ dim_proj: # Define the sequence of projection (which is then repeated sequentially until \ell = L)
+ - 2 # Project along the z axis at \ell = 1 (and then the same at all layer)
+ grids_size: # Define here the size of the 2D grids
+ - [250, 250] # At \ell = 1, project along z, ie on (x, y) with FOV [-50, 50] on both axes: size [250, 250] -> resolution 40cm
+
+classif: # Architecture of the classifcation layer, after WaffleIron
+ nb_class: 19 # Number of classes on nuscenes (after removing the ignore class)
+
+embedding: # Architecture of the embedding layer, before WaffleIron
+ input_feat: # List of features on each point
+ - "intensity"
+ - "height"
+ - "radius"
+ size_input: 3 # Input feature size on each point
+ neighbors: 16 # Neighborhood for embedding layer
+ voxel_size: 0.1 # Voxel size for downsampling point cloud in pre-processing
+
+dataloader:
+ batch_size: 4
+ num_workers: 12
+ max_points: 20000
+
+augmentations:
+ rotation_z: null
+ flip_xy: null
+ scale:
+ - [0, 1, 2]
+ - 0.1
+ instance_cutmix: True
+
+loss:
+ lovasz: 1.0
+
+optim:
+ lr: .001
+ weight_decay: 0.003
+
+scheduler:
+ min_lr: 0.00001
+ max_epoch: 45
+ epoch_warmup: 4
diff --git a/configs/WaffleIron-48-256__60cm-baseline-nuscenes.yaml b/configs/WaffleIron-48-256__60cm-baseline-nuscenes.yaml
new file mode 100644
index 0000000..b631db1
--- /dev/null
+++ b/configs/WaffleIron-48-256__60cm-baseline-nuscenes.yaml
@@ -0,0 +1,55 @@
+waffleiron: # Architecture of the backbone
+ nb_channels: 256 # Define F = the feature size = width of the WaffleIron
+ depth: 48 # Define L = the depth on the network
+ fov_xyz: # Define the FOV in meters
+ - - -50 # min value on x-axis: -50 m
+ - -50 # min value on y-axis: -50 m
+ - -5 # min value on z-axis: -5 m
+ - - 50 # max value on x-axis: 50 m
+ - 50 # max value on y-axis: 50 m
+ - 5 # max value on z-axis: 5 m
+ dim_proj: # Define the sequence of projection (which is then repeated sequentially until \ell = L)
+ - 2 # Project along the z axis at \ell = 1
+ - 1 # Project along the y axis at \ell = 2
+ - 0 # Project along the x axis at \ell = 3
+ grids_size: # Define here the size of the 2D grids
+ - [166, 166] # At \ell = 1, project along z, ie on (x, y) with FOV [-50, 50] on both axes: size [250, 250] -> resolution 60cm
+ - [166, 16] # At \ell = 2, project along y, ie on (x, z) with FOV [-50, 50] on x and [-5, 5] on z: size [250, 25] -> resolution 60cm
+ - [166, 16] # At \ell = 3, project along x, ie on (y, z) with FOV [-50, 50] on y and [-5, 5] on z: size [250, 25] -> resolution 60cm
+
+classif: # Architecture of the classifcation layer, after WaffleIron
+ nb_class: 16 # Number of classes on nuscenes (after removing the ignore class)
+
+embedding: # Architecture of the embedding layer, before WaffleIron
+ input_feat: # List of features on each point
+ - "intensity"
+ - "height"
+ - "radius"
+ size_input: 3 # Input feature size on each point
+ neighbors: 16 # Neighborhood for embedding layer
+ voxel_size: 0.1 # Voxel size for downsampling point cloud in pre-processing
+
+dataloader:
+ batch_size: 4
+ num_workers: 12
+ max_points: 20000
+
+augmentations:
+ rotation_z: null
+ flip_xy: null
+ scale:
+ - [0, 1, 2]
+ - 0.1
+ instance_cutmix: False
+
+loss:
+ lovasz: 1.0
+
+optim:
+ lr: .001
+ weight_decay: 0.003
+
+scheduler:
+ min_lr: 0.00001
+ max_epoch: 45
+ epoch_warmup: 4
diff --git a/configs/WaffleIron-template-BEV-projection.yaml b/configs/WaffleIron-template-BEV-projection.yaml
new file mode 100644
index 0000000..5ac6296
--- /dev/null
+++ b/configs/WaffleIron-template-BEV-projection.yaml
@@ -0,0 +1,51 @@
+waffleiron: # Architecture of the backbone
+ nb_channels: 256 # Define F = the feature size = width of the WaffleIron
+ depth: 48 # Define L = the depth on the network
+ fov_xyz: # Define the FOV in meters
+ - - -50 # min value on x-axis: -50 m
+ - -50 # min value on y-axis: -50 m
+ - -5 # min value on z-axis: -5 m
+ - - 50 # max value on x-axis: 50 m
+ - 50 # max value on y-axis: 50 m
+ - 5 # max value on z-axis: 5 m
+ dim_proj: # Define the sequence of projection (which is then repeated sequentially until \ell = L)
+ - 2 # Project along the z axis at \ell = 1
+ grids_size: # Define here the size of the 2D grids
+ - [166, 166] # At \ell = 1, project along z, ie on (x, y) with FOV [-50, 50] on both axes: size [250, 250] -> resolution 60cm
+
+classif: # Architecture of the classifcation layer, after WaffleIron
+ nb_class: 16 # Number of classes on nuscenes (after removing the ignore class)
+
+embedding: # Architecture of the embedding layer, before WaffleIron
+ input_feat: # List of features on each point
+ - "intensity"
+ - "height"
+ - "radius"
+ size_input: 3 # Input feature size on each point
+ neighbors: 16 # Neighborhood for embedding layer
+ voxel_size: 0.1 # Voxel size for downsampling point cloud in pre-processing
+
+dataloader:
+ batch_size: 4
+ num_workers: 12
+ max_points: 20000
+
+augmentations:
+ rotation_z: null
+ flip_xy: null
+ scale:
+ - [0, 1, 2]
+ - 0.1
+ instance_cutmix: False
+
+loss:
+ lovasz: 1.0
+
+optim:
+ lr: .001
+ weight_decay: 0.001
+
+scheduler:
+ min_lr: 0.00001
+ max_epoch: 45
+ epoch_warmup: 4
diff --git a/configs/WaffleIron-template-baseline-projection.yaml b/configs/WaffleIron-template-baseline-projection.yaml
new file mode 100644
index 0000000..216b397
--- /dev/null
+++ b/configs/WaffleIron-template-baseline-projection.yaml
@@ -0,0 +1,55 @@
+waffleiron: # Architecture of the backbone
+ nb_channels: 256 # Define F = the feature size = width of the WaffleIron
+ depth: 48 # Define L = the depth on the network
+ fov_xyz: # Define the FOV in meters
+ - - -50 # min value on x-axis: -50 m
+ - -50 # min value on y-axis: -50 m
+ - -5 # min value on z-axis: -5 m
+ - - 50 # max value on x-axis: 50 m
+ - 50 # max value on y-axis: 50 m
+ - 5 # max value on z-axis: 5 m
+ dim_proj: # Define the sequence of projection (which is then repeated sequentially until \ell = L)
+ - 2 # Project along the z axis at \ell = 1
+ - 1 # Project along the y axis at \ell = 2
+ - 0 # Project along the x axis at \ell = 3
+ grids_size: # Define here the size of the 2D grids
+ - [166, 166] # At \ell = 1, project along z, ie on (x, y) with FOV [-50, 50] on both axes: size [250, 250] -> resolution 60cm
+ - [166, 16] # At \ell = 2, project along y, ie on (x, z) with FOV [-50, 50] on x and [-5, 5] on z: size [250, 25] -> resolution 60cm
+ - [166, 16] # At \ell = 3, project along x, ie on (y, z) with FOV [-50, 50] on y and [-5, 5] on z: size [250, 25] -> resolution 60cm
+
+classif: # Architecture of the classifcation layer, after WaffleIron
+ nb_class: 16 # Number of classes on nuscenes (after removing the ignore class)
+
+embedding: # Architecture of the embedding layer, before WaffleIron
+ input_feat: # List of features on each point
+ - "intensity"
+ - "height"
+ - "radius"
+ size_input: 3 # Input feature size on each point
+ neighbors: 16 # Neighborhood for embedding layer
+ voxel_size: 0.1 # Voxel size for downsampling point cloud in pre-processing
+
+dataloader:
+ batch_size: 4
+ num_workers: 12
+ max_points: 20000
+
+augmentations:
+ rotation_z: null
+ flip_xy: null
+ scale:
+ - [0, 1, 2]
+ - 0.1
+ instance_cutmix: False
+
+loss:
+ lovasz: 1.0
+
+optim:
+ lr: .001
+ weight_decay: 0.001
+
+scheduler:
+ min_lr: 0.00001
+ max_epoch: 45
+ epoch_warmup: 4
diff --git a/datasets/__init__.py b/datasets/__init__.py
new file mode 100644
index 0000000..2cf6775
--- /dev/null
+++ b/datasets/__init__.py
@@ -0,0 +1,21 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from .pc_dataset import Collate
+from .nuscenes import NuScenesSemSeg
+from .semantic_kitti import SemanticKITTI
+
+__all__ = [SemanticKITTI, NuScenesSemSeg, Collate]
+LIST_DATASETS = {"nuscenes": NuScenesSemSeg, "semantic_kitti": SemanticKITTI}
diff --git a/datasets/nuscenes.py b/datasets/nuscenes.py
new file mode 100755
index 0000000..617d7db
--- /dev/null
+++ b/datasets/nuscenes.py
@@ -0,0 +1,97 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import os
+import numpy as np
+from .pc_dataset import PCDataset
+
+
+class ClassMapper:
+ def __init__(self):
+ current_folder = os.path.dirname(os.path.realpath(__file__))
+ self.mapping = np.load(
+ os.path.join(current_folder, "mapping_class_index_nuscenes.npy")
+ )
+
+ def get_index(self, x):
+ return self.mapping[x]
+
+
+class NuScenesSemSeg(PCDataset):
+
+ CLASS_NAME = [
+ "barrier",
+ "bicycle",
+ "bus",
+ "car",
+ "construction_vehicle",
+ "motorcycle",
+ "pedestrian",
+ "traffic_cone",
+ "trailer",
+ "truck",
+ "driveable_surface",
+ "other_flat",
+ "sidewalk",
+ "terrain",
+ "manmade",
+ "vegetation",
+ ]
+
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+
+ # Class mapping
+ current_folder = os.path.dirname(os.path.realpath(__file__))
+ self.mapper = np.vectorize(ClassMapper().get_index)
+
+ # List all keyframes
+ self.list_frames = np.load(
+ os.path.join(current_folder, "list_files_nuscenes.npz")
+ )[self.phase]
+ if self.phase == "train":
+ assert len(self) == 28130
+ elif self.phase == "val":
+ assert len(self) == 6019
+ elif self.phase == "test":
+ assert len(self) == 6008
+ else:
+ raise ValueError(f"Unknown phase {self.phase}.")
+
+ assert not self.instance_cutmix, "Instance CutMix not implemented on nuscenes"
+
+ def __len__(self):
+ return len(self.list_frames)
+
+ def load_pc(self, index):
+ # Load point cloud
+ pc = np.fromfile(
+ os.path.join(self.rootdir, self.list_frames[index][0]),
+ dtype=np.float32,
+ )
+ pc = pc.reshape((-1, 5))[:, :4]
+
+ # Load segmentation labels
+ labels = np.fromfile(
+ os.path.join(self.rootdir, self.list_frames[index][1]),
+ dtype=np.uint8,
+ )
+ labels = self.mapper(labels)
+
+ # Label 0 should be ignored
+ labels = labels - 1
+ labels[labels == -1] = 255
+
+ return pc, labels, self.list_frames[index][2]
diff --git a/datasets/pc_dataset.py b/datasets/pc_dataset.py
new file mode 100644
index 0000000..99175c4
--- /dev/null
+++ b/datasets/pc_dataset.py
@@ -0,0 +1,276 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import torch
+import numpy as np
+import utils.transforms as tr
+from torch.utils.data import Dataset
+from scipy.spatial import cKDTree as KDTree
+
+
+class PCDataset(Dataset):
+ def __init__(
+ self,
+ rootdir=None,
+ phase="train",
+ input_feat="intensity",
+ voxel_size=0.1,
+ train_augmentations=None,
+ dim_proj=[
+ 0,
+ ],
+ grids_shape=[(256, 256)],
+ fov_xyz=(
+ (
+ -1.0,
+ -1.0,
+ -1.0,
+ ),
+ (1.0, 1.0, 1.0),
+ ),
+ num_neighbors=16,
+ tta=False,
+ instance_cutmix=False,
+ ):
+ super().__init__()
+
+ # Dataset split
+ self.phase = phase
+ assert self.phase in ["train", "val", "trainval", "test"]
+
+ # Root directory of dataset
+ self.rootdir = rootdir
+
+ # Input features to compute for each point
+ self.input_feat = input_feat
+
+ # Downsample input point cloud by small voxelization
+ self.downsample = tr.Voxelize(
+ dims=(0, 1, 2),
+ voxel_size=voxel_size,
+ random=(self.phase == "train" or self.phase == "trainval"),
+ )
+
+ # Field of view
+ assert len(fov_xyz[0]) == len(
+ fov_xyz[1]
+ ), "Min and Max FOV must have the same length."
+ for i, (min, max) in enumerate(zip(*fov_xyz)):
+ assert (
+ min < max
+ ), f"Field of view: min ({min}) < max ({max}) is expected on dimension {i}."
+ self.fov_xyz = np.concatenate([np.array(f)[None] for f in fov_xyz], axis=0)
+ self.crop_to_fov = tr.Crop(dims=(0, 1, 2), fov=fov_xyz)
+
+ # Grid shape for projection in 2D
+ assert len(grids_shape) == len(dim_proj)
+ self.dim_proj = dim_proj
+ self.grids_shape = [np.array(g) for g in grids_shape]
+ self.lut_axis_plane = {0: (1, 2), 1: (0, 2), 2: (0, 1)}
+
+ # Number of neighbors for embedding layer
+ assert num_neighbors > 0
+ self.num_neighbors = num_neighbors
+
+ # Test time augmentation
+ if tta:
+ assert self.phase in ["test", "val"]
+ self.tta = tr.Compose(
+ (
+ tr.Rotation(inplace=True, dim=2),
+ tr.RandomApply(tr.FlipXY(inplace=True), prob=2.0 / 3.0),
+ tr.Scale(inplace=True, dims=(0, 1, 2), range=0.1),
+ )
+ )
+ else:
+ self.tta = None
+
+ # Train time augmentations
+ if train_augmentations is not None:
+ assert self.phase in ["train", "trainval"]
+ self.train_augmentations = train_augmentations
+
+ # Flag for instance cutmix
+ self.instance_cutmix = instance_cutmix
+
+ def get_occupied_2d_cells(self, pc):
+ """Return mapping between 3D point and corresponding 2D cell"""
+ cell_ind = []
+ for dim, grid in zip(self.dim_proj, self.grids_shape):
+ # Get plane of which to project
+ dims = self.lut_axis_plane[dim]
+ # Compute grid resolution
+ res = (self.fov_xyz[1, dims] - self.fov_xyz[0, dims]) / grid[None]
+ # Shift and quantize point cloud
+ pc_quant = ((pc[:, dims] - self.fov_xyz[0, dims]) / res).astype("int")
+ # Check that the point cloud fits on the grid
+ min, max = pc_quant.min(0), pc_quant.max(0)
+ assert min[0] >= 0 and min[1] >= 0, print(
+ "Some points are outside the FOV:", pc[:, :3].min(0), self.fov_xyz
+ )
+ assert max[0] < grid[0] and max[1] < grid[1], print(
+ "Some points are outside the FOV:", pc[:, :3].min(0), self.fov_xyz
+ )
+ # Transform quantized coordinates to cell indices for projection on 2D plane
+ temp = pc_quant[:, 0] * grid[1] + pc_quant[:, 1]
+ cell_ind.append(temp[None])
+ return np.vstack(cell_ind)
+
+ def prepare_input_features(self, pc_orig):
+ # Concatenate desired input features to coordinates
+ pc = [pc_orig[:, :3]] # Initialize with coordinates
+ for type in self.input_feat:
+ if type == "intensity":
+ pc.append(pc_orig[:, 3:])
+ elif type == "height":
+ pc.append(pc_orig[:, 2:3])
+ elif type == "radius":
+ r_xyz = np.linalg.norm(pc_orig[:, :3], axis=1, keepdims=True)
+ pc.append(r_xyz)
+ else:
+ raise ValueError(f"Unknown feature: {type}")
+ return np.concatenate(pc, 1)
+
+ def load_pc(self, index):
+ raise NotImplementedError()
+
+ def __len__(self):
+ raise NotImplementedError()
+
+ def __getitem__(self, index):
+ # Load original point cloud
+ pc_orig, labels_orig, filename = self.load_pc(index)
+
+ # Prepare input feature
+ pc_orig = self.prepare_input_features(pc_orig)
+
+ # Test time augmentation
+ if self.tta is not None:
+ pc_orig, labels_orig = self.tta(pc_orig, labels_orig)
+
+ # Voxelization
+ pc, labels = self.downsample(pc_orig, labels_orig)
+
+ # Augment data
+ if self.train_augmentations is not None:
+ pc, labels = self.train_augmentations(pc, labels)
+
+ # Crop to fov
+ pc, labels = self.crop_to_fov(pc, labels)
+
+ # For each point, get index of corresponding 2D cells on projected grid
+ cell_ind = self.get_occupied_2d_cells(pc)
+
+ # Get neighbors for point embedding layer providing tokens to waffleiron backbone
+ kdtree = KDTree(pc[:, :3])
+ assert pc.shape[0] > self.num_neighbors
+ _, neighbors_emb = kdtree.query(pc[:, :3], k=self.num_neighbors + 1)
+
+ # Nearest neighbor interpolation to undo cropping & voxelisation at validation time
+ if self.phase in ["train", "trainval"]:
+ upsample = np.arange(pc.shape[0])
+ else:
+ _, upsample = kdtree.query(pc_orig[:, :3], k=1)
+
+ # Output to return
+ out = (
+ # Point features
+ pc[:, 3:].T[None],
+ # Point labels of original entire point cloud
+ labels if self.phase in ["train", "trainval"] else labels_orig,
+ # Projection 2D -> 3D: index of 2D cells for each point
+ cell_ind[None],
+ # Neighborhood for point embedding layer, which provides tokens to waffleiron backbone
+ neighbors_emb.T[None],
+ # For interpolation from voxelized & cropped point cloud to original point cloud
+ upsample,
+ # Filename of original point cloud
+ filename,
+ )
+
+ return out
+
+
+def zero_pad(feat, neighbors_emb, cell_ind, Nmax):
+ N = feat.shape[-1]
+ assert N <= Nmax
+ occupied_cells = np.ones((1, Nmax))
+ if N < Nmax:
+ # Zero-pad with null features
+ feat = np.concatenate((feat, np.zeros((1, feat.shape[1], Nmax - N))), axis=2)
+ # For zero-padded points, associate last zero-padded points as neighbor
+ neighbors_emb = np.concatenate(
+ (
+ neighbors_emb,
+ (Nmax - 1) * np.ones((1, neighbors_emb.shape[1], Nmax - N)),
+ ),
+ axis=2,
+ )
+ # Associate zero-padded points to first 2D cell...
+ cell_ind = np.concatenate(
+ (cell_ind, np.zeros((1, cell_ind.shape[1], Nmax - N))), axis=2
+ )
+ # ... and at the same time mark zero-padded points as unoccupied
+ occupied_cells[:, N:] = 0
+ return feat, neighbors_emb, cell_ind, occupied_cells
+
+
+class Collate:
+ def __init__(self, num_points=None):
+ self.num_points = num_points
+ assert num_points is None or num_points > 0
+
+ def __call__(self, list_data):
+
+ # Extract all data
+ list_of_data = (list(data) for data in zip(*list_data))
+ feat, label_orig, cell_ind, neighbors_emb, upsample, filename = list_of_data
+
+ # Zero-pad point clouds
+ Nmax = np.max([f.shape[-1] for f in feat])
+ if self.num_points is not None:
+ assert Nmax <= self.num_points
+ occupied_cells = []
+ for i in range(len(feat)):
+ feat[i], neighbors_emb[i], cell_ind[i], temp = zero_pad(
+ feat[i],
+ neighbors_emb[i],
+ cell_ind[i],
+ Nmax if self.num_points is None else self.num_points,
+ )
+ occupied_cells.append(temp)
+
+ # Concatenate along batch dimension
+ feat = torch.from_numpy(np.vstack(feat)).float() # B x C x Nmax
+ neighbors_emb = torch.from_numpy(np.vstack(neighbors_emb)).long() # B x Nmax
+ cell_ind = torch.from_numpy(
+ np.vstack(cell_ind)
+ ).long() # B x nb_2d_cells x Nmax
+ occupied_cells = torch.from_numpy(np.vstack(occupied_cells)).float() # B x Nmax
+ labels_orig = torch.from_numpy(np.hstack(label_orig)).long()
+ upsample = [torch.from_numpy(u) for u in upsample]
+
+ # Prepare output variables
+ out = {
+ "feat": feat,
+ "neighbors_emb": neighbors_emb,
+ "upsample": upsample,
+ "labels_orig": labels_orig,
+ "cell_ind": cell_ind,
+ "occupied_cells": occupied_cells,
+ "filename": filename,
+ }
+
+ return out
diff --git a/datasets/semantic-kitti.yaml b/datasets/semantic-kitti.yaml
new file mode 100755
index 0000000..6281065
--- /dev/null
+++ b/datasets/semantic-kitti.yaml
@@ -0,0 +1,211 @@
+# This file is covered by the LICENSE file in the root of this project.
+labels:
+ 0 : "unlabeled"
+ 1 : "outlier"
+ 10: "car"
+ 11: "bicycle"
+ 13: "bus"
+ 15: "motorcycle"
+ 16: "on-rails"
+ 18: "truck"
+ 20: "other-vehicle"
+ 30: "person"
+ 31: "bicyclist"
+ 32: "motorcyclist"
+ 40: "road"
+ 44: "parking"
+ 48: "sidewalk"
+ 49: "other-ground"
+ 50: "building"
+ 51: "fence"
+ 52: "other-structure"
+ 60: "lane-marking"
+ 70: "vegetation"
+ 71: "trunk"
+ 72: "terrain"
+ 80: "pole"
+ 81: "traffic-sign"
+ 99: "other-object"
+ 252: "moving-car"
+ 253: "moving-bicyclist"
+ 254: "moving-person"
+ 255: "moving-motorcyclist"
+ 256: "moving-on-rails"
+ 257: "moving-bus"
+ 258: "moving-truck"
+ 259: "moving-other-vehicle"
+color_map: # bgr
+ 0 : [0, 0, 0]
+ 1 : [0, 0, 255]
+ 10: [245, 150, 100]
+ 11: [245, 230, 100]
+ 13: [250, 80, 100]
+ 15: [150, 60, 30]
+ 16: [255, 0, 0]
+ 18: [180, 30, 80]
+ 20: [255, 0, 0]
+ 30: [30, 30, 255]
+ 31: [200, 40, 255]
+ 32: [90, 30, 150]
+ 40: [255, 0, 255]
+ 44: [255, 150, 255]
+ 48: [75, 0, 75]
+ 49: [75, 0, 175]
+ 50: [0, 200, 255]
+ 51: [50, 120, 255]
+ 52: [0, 150, 255]
+ 60: [170, 255, 150]
+ 70: [0, 175, 0]
+ 71: [0, 60, 135]
+ 72: [80, 240, 150]
+ 80: [150, 240, 255]
+ 81: [0, 0, 255]
+ 99: [255, 255, 50]
+ 252: [245, 150, 100]
+ 256: [255, 0, 0]
+ 253: [200, 40, 255]
+ 254: [30, 30, 255]
+ 255: [90, 30, 150]
+ 257: [250, 80, 100]
+ 258: [180, 30, 80]
+ 259: [255, 0, 0]
+content: # as a ratio with the total number of points
+ 0: 0.018889854628292943
+ 1: 0.0002937197336781505
+ 10: 0.040818519255974316
+ 11: 0.00016609538710764618
+ 13: 2.7879693665067774e-05
+ 15: 0.00039838616015114444
+ 16: 0.0
+ 18: 0.0020633612104619787
+ 20: 0.0016218197275284021
+ 30: 0.00017698551338515307
+ 31: 1.1065903904919655e-08
+ 32: 5.532951952459828e-09
+ 40: 0.1987493871255525
+ 44: 0.014717169549888214
+ 48: 0.14392298360372
+ 49: 0.0039048553037472045
+ 50: 0.1326861944777486
+ 51: 0.0723592229456223
+ 52: 0.002395131480328884
+ 60: 4.7084144280367186e-05
+ 70: 0.26681502148037506
+ 71: 0.006035012012626033
+ 72: 0.07814222006271769
+ 80: 0.002855498193863172
+ 81: 0.0006155958086189918
+ 99: 0.009923127583046915
+ 252: 0.001789309418528068
+ 253: 0.00012709999297008662
+ 254: 0.00016059776092534436
+ 255: 3.745553104802113e-05
+ 256: 0.0
+ 257: 0.00011351574470342043
+ 258: 0.00010157861367183268
+ 259: 4.3840131989471124e-05
+# classes that are indistinguishable from single scan or inconsistent in
+# ground truth are mapped to their closest equivalent
+learning_map:
+ 0 : 0 # "unlabeled"
+ 1 : 0 # "outlier" mapped to "unlabeled" --------------------------mapped
+ 10: 1 # "car"
+ 11: 2 # "bicycle"
+ 13: 5 # "bus" mapped to "other-vehicle" --------------------------mapped
+ 15: 3 # "motorcycle"
+ 16: 5 # "on-rails" mapped to "other-vehicle" ---------------------mapped
+ 18: 4 # "truck"
+ 20: 5 # "other-vehicle"
+ 30: 6 # "person"
+ 31: 7 # "bicyclist"
+ 32: 8 # "motorcyclist"
+ 40: 9 # "road"
+ 44: 10 # "parking"
+ 48: 11 # "sidewalk"
+ 49: 12 # "other-ground"
+ 50: 13 # "building"
+ 51: 14 # "fence"
+ 52: 0 # "other-structure" mapped to "unlabeled" ------------------mapped
+ 60: 9 # "lane-marking" to "road" ---------------------------------mapped
+ 70: 15 # "vegetation"
+ 71: 16 # "trunk"
+ 72: 17 # "terrain"
+ 80: 18 # "pole"
+ 81: 19 # "traffic-sign"
+ 99: 0 # "other-object" to "unlabeled" ----------------------------mapped
+ 252: 1 # "moving-car" to "car" ------------------------------------mapped
+ 253: 7 # "moving-bicyclist" to "bicyclist" ------------------------mapped
+ 254: 6 # "moving-person" to "person" ------------------------------mapped
+ 255: 8 # "moving-motorcyclist" to "motorcyclist" ------------------mapped
+ 256: 5 # "moving-on-rails" mapped to "other-vehicle" --------------mapped
+ 257: 5 # "moving-bus" mapped to "other-vehicle" -------------------mapped
+ 258: 4 # "moving-truck" to "truck" --------------------------------mapped
+ 259: 5 # "moving-other"-vehicle to "other-vehicle" ----------------mapped
+learning_map_inv: # inverse of previous map
+ 0: 0 # "unlabeled", and others ignored
+ 1: 10 # "car"
+ 2: 11 # "bicycle"
+ 3: 15 # "motorcycle"
+ 4: 18 # "truck"
+ 5: 20 # "other-vehicle"
+ 6: 30 # "person"
+ 7: 31 # "bicyclist"
+ 8: 32 # "motorcyclist"
+ 9: 40 # "road"
+ 10: 44 # "parking"
+ 11: 48 # "sidewalk"
+ 12: 49 # "other-ground"
+ 13: 50 # "building"
+ 14: 51 # "fence"
+ 15: 70 # "vegetation"
+ 16: 71 # "trunk"
+ 17: 72 # "terrain"
+ 18: 80 # "pole"
+ 19: 81 # "traffic-sign"
+learning_ignore: # Ignore classes
+ 0: True # "unlabeled", and others ignored
+ 1: False # "car"
+ 2: False # "bicycle"
+ 3: False # "motorcycle"
+ 4: False # "truck"
+ 5: False # "other-vehicle"
+ 6: False # "person"
+ 7: False # "bicyclist"
+ 8: False # "motorcyclist"
+ 9: False # "road"
+ 10: False # "parking"
+ 11: False # "sidewalk"
+ 12: False # "other-ground"
+ 13: False # "building"
+ 14: False # "fence"
+ 15: False # "vegetation"
+ 16: False # "trunk"
+ 17: False # "terrain"
+ 18: False # "pole"
+ 19: False # "traffic-sign"
+split: # sequence numbers
+ train:
+ - 0
+ - 1
+ - 2
+ - 3
+ - 4
+ - 5
+ - 6
+ - 7
+ - 9
+ - 10
+ valid:
+ - 8
+ test:
+ - 11
+ - 12
+ - 13
+ - 14
+ - 15
+ - 16
+ - 17
+ - 18
+ - 19
+ - 20
+ - 21
diff --git a/datasets/semantic_kitti.py b/datasets/semantic_kitti.py
new file mode 100644
index 0000000..86436b6
--- /dev/null
+++ b/datasets/semantic_kitti.py
@@ -0,0 +1,260 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import os
+import yaml
+import torch
+import warnings
+import numpy as np
+from glob import glob
+from tqdm import tqdm
+import utils.transforms as tr
+from .pc_dataset import PCDataset
+
+
+class InstanceCutMix:
+ def __init__(self, phase="train", temp_dir="/tmp/semantic_kitti_instances/"):
+
+ # Train or Trainval
+ self.phase = phase
+ assert self.phase in ["train", "trainval"]
+
+ # List of files containing instances for bicycle, motorcycle, person, bicyclist
+ self.bank = {1: [], 2: [], 5: [], 6: []}
+
+ # Directory where to store instances
+ self.rootdir = os.path.join(temp_dir, self.phase)
+ for id_class in self.bank.keys():
+ os.makedirs(os.path.join(self.rootdir, f"{id_class}"), exist_ok=True)
+
+ # Load instances
+ for key in self.bank.keys():
+ self.bank[key] = glob(os.path.join(self.rootdir, f"{key}", "*.bin"))
+ self.__loaded__ = self.test_loaded()
+ if not self.__loaded__:
+ warnings.warn(
+ "Instances must be extracted and saved on disk before training"
+ )
+
+ # Augmentations applied on Instances
+ self.rot = tr.Compose(
+ (
+ tr.FlipXY(inplace=True),
+ tr.Rotation(inplace=True),
+ tr.Scale(dims=(0, 1, 2), range=0.1, inplace=True),
+ )
+ )
+
+ # For each class, maximum number of instance to add
+ self.num_to_add = 40
+
+ # Voxelization of 1m to downsample point cloud to ensure that
+ # center of the instances are at least 1m away
+ self.vox = tr.Voxelize(dims=(0, 1, 2), voxel_size=1.0, random=True)
+
+ def test_loaded(self):
+ self.__loaded__ = False
+ if self.phase == "train":
+ if len(self.bank[1]) != 5083:
+ print(f"Expected 5083 instances but got {len(self.bank[1])}.")
+ return False
+ if len(self.bank[2]) != 3092:
+ print(f"Expected 3092 instances but got {len(self.bank[2])}.")
+ return False
+ if len(self.bank[5]) != 8084:
+ print(f"Expected 8084 instances but got {len(self.bank[5])}.")
+ return False
+ if len(self.bank[6]) != 1551:
+ print(f"Expected 1551 instances but got {len(self.bank[6])}.")
+ return False
+ elif self.phase == "trainval":
+ if len(self.bank[1]) != 8213:
+ print(f"Expected 8213 instances but got {len(self.bank[1])}.")
+ return False
+ if len(self.bank[2]) != 4169:
+ print(f"Expected 4169 instances but got {len(self.bank[2])}.")
+ return False
+ if len(self.bank[5]) != 12190:
+ print(f"Expected 12190 instances but got {len(self.bank[5])}.")
+ return False
+ if len(self.bank[6]) != 2943:
+ print(f"Expected 2943 instances but got {len(self.bank[6])}.")
+ return False
+ self.__loaded__ = True
+ return True
+
+ def cut(self, pc, class_label, instance_label):
+ for id_class in self.bank.keys():
+ where_class = (class_label == id_class)
+ all_instances = np.unique(instance_label[where_class])
+ for id_instance in all_instances:
+ # Segment instance
+ where_ins = (instance_label == id_instance)
+ if where_ins.sum() <= 5: continue
+ instance = pc[where_ins, :]
+ # Center instance
+ instance[:, :2] -= instance[:, :2].mean(0, keepdims=True)
+ instance[:, 2] -= instance[:, 2].min(0, keepdims=True)
+ # Save instance
+ pathfile = os.path.join(
+ self.rootdir, f"{id_class}", f"{len(self.bank[id_class]):07d}.bin"
+ )
+ instance.tofile(pathfile)
+ self.bank[id_class].append(pathfile)
+
+ def mix(self, pc, class_label):
+
+ # Find potential location where to add new object (on a surface)
+ pc_vox, class_label_vox = self.vox(pc, class_label)
+ where_surface = np.where((class_label_vox >= 8) & (class_label_vox <= 10))[0]
+ where_surface = where_surface[torch.randperm(len(where_surface))]
+
+ # Add instances of each class in bank
+ id_tot = 0
+ new_pc, new_label = [pc], [class_label]
+ for id_class in self.bank.keys():
+ nb_to_add = torch.randint(self.num_to_add, (1,))[0]
+ which_one = torch.randint(len(self.bank[id_class]), (nb_to_add,))
+ for ii in range(nb_to_add):
+ # Point p where to add the instance
+ p = pc_vox[where_surface[id_tot]]
+ # Extract instance
+ object = self.bank[id_class][which_one[ii]]
+ object = np.fromfile(object, dtype=np.float32).reshape((-1, 4))
+ # Augment instance
+ label = np.ones((object.shape[0],), dtype=np.int) * id_class
+ object, label = self.rot(object, label)
+ # Move instance at point p
+ object[:, :3] += p[:3][None]
+ # Add instance in the point cloud
+ new_pc.append(object)
+ # Add corresponding label
+ new_label.append(label)
+ id_tot += 1
+
+ return np.concatenate(new_pc, 0), np.concatenate(new_label, 0)
+
+ def __call__(self, pc, class_label, instance_label):
+ if not self.__loaded__:
+ self.cut(pc, class_label, instance_label)
+ return None, None
+
+ return self.mix(pc, class_label)
+
+
+class SemanticKITTI(PCDataset):
+
+ CLASS_NAME = [
+ "car", # 0
+ "bicycle", # 1
+ "motorcycle", # 2
+ "truck", # 3
+ "other-vehicle", # 4
+ "person", # 5
+ "bicyclist", # 6
+ "motorcyclist", # 7
+ "road", # 8
+ "parking", # 9
+ "sidewalk", # 10
+ "other-ground", # 11
+ "building", # 12
+ "fence", # 13
+ "vegetation", # 14
+ "trunk", # 15
+ "terrain", # 16
+ "pole", # 17
+ "traffic-sign", # 18
+ ]
+
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+
+ # Config file and class mapping
+ current_folder = os.path.dirname(os.path.realpath(__file__))
+ with open(os.path.join(current_folder, "semantic-kitti.yaml")) as stream:
+ semkittiyaml = yaml.safe_load(stream)
+ self.learning_map = semkittiyaml["learning_map"]
+
+ # Split
+ if self.phase == "train":
+ split = semkittiyaml["split"]["train"]
+ elif self.phase == "val":
+ split = semkittiyaml["split"]["valid"]
+ elif self.phase == "test":
+ split = semkittiyaml["split"]["test"]
+ elif self.phase == "trainval":
+ split = semkittiyaml["split"]["train"] + semkittiyaml["split"]["valid"]
+ else:
+ raise Exception(f"Unknown split {self.phase}")
+
+ # Find all files
+ self.im_idx = []
+ for i_folder in np.sort(split):
+ self.im_idx.extend(
+ glob(
+ os.path.join(
+ self.rootdir,
+ "dataset",
+ "sequences",
+ str(i_folder).zfill(2),
+ "velodyne",
+ "*.bin",
+ )
+ )
+ )
+ self.im_idx = np.sort(self.im_idx)
+
+ # Training with instance cutmix
+ if self.instance_cutmix:
+ assert (
+ self.phase != "test" and self.phase != "val"
+ ), "Instance cutmix should not be applied at test or val time"
+ self.cutmix = InstanceCutMix(phase=self.phase)
+ if not self.cutmix.test_loaded():
+ print("Extracting instances before training...")
+ for index in tqdm(range(len(self))):
+ self.load_pc(index)
+ print("Done.")
+ assert self.cutmix.test_loaded(), "Instances not extracted correctly"
+
+ def __len__(self):
+ return len(self.im_idx)
+
+ def load_pc(self, index):
+ # Load point cloud
+ pc = np.fromfile(self.im_idx[index], dtype=np.float32).reshape((-1, 4))
+
+ # Extract Label
+ if self.phase == "test":
+ labels = np.zeros((pc.shape[0], 1), dtype=np.uint8)
+ else:
+ labels_inst = np.fromfile(
+ self.im_idx[index].replace("velodyne", "labels")[:-3] + "label",
+ dtype=np.uint32,
+ ).reshape((-1, 1))
+ labels = labels_inst & 0xFFFF # delete high 16 digits binary
+ labels = np.vectorize(self.learning_map.__getitem__)(labels).astype(
+ np.int32
+ )
+
+ # Map ignore index (0) to 255
+ labels = labels[:, 0] - 1
+ labels[labels == -1] = 255
+
+ # Instance CutMix
+ if self.instance_cutmix:
+ pc, labels = self.cutmix(pc, labels, labels_inst[:, 0])
+
+ return pc, labels, self.im_idx[index]
diff --git a/eval_kitti.py b/eval_kitti.py
new file mode 100644
index 0000000..be172aa
--- /dev/null
+++ b/eval_kitti.py
@@ -0,0 +1,156 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import os
+import yaml
+import torch
+import argparse
+import numpy as np
+from tqdm import tqdm
+from waffleiron import Segmenter
+from torch.utils.data import DataLoader
+from datasets import SemanticKITTI, Collate
+
+
+if __name__ == "__main__":
+
+ # --- Arguments
+ parser = argparse.ArgumentParser(description="Evaluation")
+ parser.add_argument("--config", type=str, help="Path to config file")
+ parser.add_argument("--ckpt", type=str, help="Path to checkpoint")
+ parser.add_argument("--path_dataset", type=str, help="Path to SemanticKITTI dataset")
+ parser.add_argument("--result_folder", type=str, help="Path to where result folder")
+ parser.add_argument("--num_votes", type=int, default=1, help="Number of test time augmentations")
+ parser.add_argument("--batch_size", type=int, default=1, help="Batch size")
+ parser.add_argument("--num_workers", type=int, default=6)
+ parser.add_argument("--phase", required=True, help="val or test")
+ args = parser.parse_args()
+ assert args.num_votes % args.batch_size == 0
+ os.makedirs(args.result_folder, exist_ok=True)
+
+ # --- Load config file
+ with open(args.config, "r") as f:
+ config = yaml.safe_load(f)
+
+ # --- SemanticKITTI (from https://github.com/PRBonn/semantic-kitti-api/blob/master/remap_semantic_labels.py)
+ with open("./datasets/semantic-kitti.yaml") as stream:
+ semkittiyaml = yaml.safe_load(stream)
+ remapdict = semkittiyaml["learning_map_inv"]
+ maxkey = max(remapdict.keys())
+ remap_lut = np.zeros((maxkey + 100), dtype=np.int32)
+ remap_lut[list(remapdict.keys())] = list(remapdict.values())
+
+ # --- Dataloader
+ dataset = SemanticKITTI(
+ rootdir=args.path_dataset,
+ input_feat=config["embedding"]["input_feat"],
+ voxel_size=config["embedding"]["voxel_size"],
+ num_neighbors=config["embedding"]["neighbors"],
+ dim_proj=config["waffleiron"]["dim_proj"],
+ grids_shape=config["waffleiron"]["grids_size"],
+ fov_xyz=config["waffleiron"]["fov_xyz"],
+ phase=args.phase,
+ tta=(args.num_votes > 1),
+ )
+ if args.num_votes > 1:
+ new_list = []
+ for f in dataset.im_idx:
+ for v in range(args.num_votes):
+ new_list.append(f)
+ dataset.im_idx = new_list
+ loader = torch.utils.data.DataLoader(
+ dataset,
+ batch_size=args.batch_size,
+ shuffle=False,
+ num_workers=args.num_workers,
+ pin_memory=True,
+ drop_last=False,
+ collate_fn=Collate(),
+ )
+ args.num_votes = args.num_votes // args.batch_size
+
+ # --- Build network
+ net = Segmenter(
+ input_channels=config["embedding"]["size_input"],
+ feat_channels=config["waffleiron"]["nb_channels"],
+ depth=config["waffleiron"]["depth"],
+ grid_shape=config["waffleiron"]["grids_size"],
+ nb_class=config["classif"]["nb_class"],
+ )
+ net = net.cuda()
+
+ # --- Load weights
+ ckpt = torch.load(args.ckpt, map_location="cuda:0")
+ try:
+ net.load_state_dict(ckpt["net"])
+ except:
+ # If model was trained using DataParallel or DistributedDataParallel
+ state_dict = {}
+ for key in ckpt["net"].keys():
+ state_dict[key[len("module."):]] = ckpt["net"][key]
+ net.load_state_dict(state_dict)
+ net = net.eval()
+
+ # --- Evaluation
+ id_vote = 0
+ for it, batch in enumerate(tqdm(loader, bar_format="{desc:<5.5}{percentage:3.0f}%|{bar:50}{r_bar}")):
+
+ # Reset vote
+ if id_vote == 0:
+ vote = None
+
+ # Network inputs
+ feat = batch["feat"].cuda(non_blocking=True)
+ labels = batch["labels_orig"].cuda(non_blocking=True)
+ batch["upsample"] = [
+ up.cuda(non_blocking=True) for up in batch["upsample"]
+ ]
+ cell_ind = batch["cell_ind"].cuda(non_blocking=True)
+ occupied_cell = batch["occupied_cells"].cuda(non_blocking=True)
+ neighbors_emb = batch["neighbors_emb"].cuda(non_blocking=True)
+ net_inputs = (feat, cell_ind, occupied_cell, neighbors_emb)
+
+ # Get prediction
+ with torch.autocast("cuda", enabled=True):
+ with torch.inference_mode():
+ # Get prediction
+ out = net(*net_inputs)
+ for b in range(out.shape[0]):
+ temp = out[b, :, batch["upsample"][b]].T
+ if vote is None:
+ vote = torch.softmax(temp, dim=1)
+ else:
+ vote += torch.softmax(temp, dim=1)
+ id_vote += 1
+
+ # Save prediction
+ if id_vote == args.num_votes:
+ # Convert label
+ pred_label = vote.max(1)[1] + 1 # Shift by 1 because of ignore_label at index 0
+ label = pred_label.cpu().numpy().reshape((-1)).astype(np.uint32)
+ upper_half = label >> 16 # get upper half for instances
+ lower_half = label & 0xFFFF # get lower half for semantics
+ lower_half = remap_lut[lower_half] # do the remapping of semantics
+ label = (upper_half << 16) + lower_half # reconstruct full label
+ label = label.astype(np.uint32)
+ # Save result
+ assert batch["filename"][0] == batch["filename"][-1]
+ label_file = batch["filename"][0][len(dataset.rootdir) + len("/dataset"):]
+ label_file = label_file.replace("velodyne", "predictions")[:-3] + "label"
+ label_file = os.path.join(args.result_folder, label_file)
+ os.makedirs(os.path.split(label_file)[0], exist_ok=True)
+ label.tofile(label_file)
+ # Reset count of votes
+ id_vote = 0
\ No newline at end of file
diff --git a/eval_nuscenes.py b/eval_nuscenes.py
new file mode 100644
index 0000000..676b38b
--- /dev/null
+++ b/eval_nuscenes.py
@@ -0,0 +1,139 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import os
+import torch
+import argparse
+import numpy as np
+from tqdm import tqdm
+from waffleiron import Segmenter
+from torch.utils.data import DataLoader
+from datasets import NuScenesSemSeg, Collate
+
+
+if __name__ == "__main__":
+
+ # --- Arguments
+ parser = argparse.ArgumentParser(description="Evaluation")
+ parser.add_argument("--config", type=str, help="Path to config file")
+ parser.add_argument("--ckpt", type=str, help="Path to checkpoint")
+ parser.add_argument("--path_dataset", type=str, help="Path to SemanticKITTI dataset")
+ parser.add_argument("--result_folder", type=str, help="Path to where result folder")
+ parser.add_argument("--num_votes", type=int, default=1, help="Number of test time augmentations")
+ parser.add_argument("--batch_size", type=int, default=1, help="Batch size")
+ parser.add_argument("--num_workers", type=int, default=6)
+ parser.add_argument("--phase", required=True, help="val or test")
+ args = parser.parse_args()
+ assert args.num_votes % args.batch_size == 0
+ args.result_folder = os.path.join(args.result_folder, "lidarseg", args.phase)
+ os.makedirs(args.result_folder, exist_ok=True)
+
+ # --- Load config file
+ import yaml
+ with open(args.config, "r") as f:
+ config = yaml.safe_load(f)
+
+ # --- Dataloader
+ dataset = NuScenesSemSeg(
+ rootdir=args.path_dataset,
+ input_feat=config["embedding"]["input_feat"],
+ voxel_size=config["embedding"]["voxel_size"],
+ num_neighbors=config["embedding"]["neighbors"],
+ dim_proj=config["waffleiron"]["dim_proj"],
+ grids_shape=config["waffleiron"]["grids_size"],
+ fov_xyz=config["waffleiron"]["fov_xyz"],
+ phase=args.phase,
+ tta=(args.num_votes > 1),
+ )
+ if args.num_votes > 1:
+ new_list = []
+ for f in dataset.list_frames:
+ for v in range(args.num_votes):
+ new_list.append(f)
+ dataset.list_frames = new_list
+ loader = torch.utils.data.DataLoader(
+ dataset,
+ batch_size=args.batch_size,
+ shuffle=False,
+ num_workers=args.num_workers,
+ pin_memory=True,
+ drop_last=False,
+ collate_fn=Collate(),
+ )
+ args.num_votes = args.num_votes // args.batch_size
+
+ # --- Build network
+ net = Segmenter(
+ input_channels=config["embedding"]["size_input"],
+ feat_channels=config["waffleiron"]["nb_channels"],
+ depth=config["waffleiron"]["depth"],
+ grid_shape=config["waffleiron"]["grids_size"],
+ nb_class=config["classif"]["nb_class"],
+ )
+ net = net.cuda()
+
+ # --- Load weights
+ ckpt = torch.load(args.ckpt, map_location="cuda:0")
+ try:
+ net.load_state_dict(ckpt["net"])
+ except:
+ # If model was trained using DataParallel or DistributedDataParallel
+ state_dict = {}
+ for key in ckpt["net"].keys():
+ state_dict[key[len("module."):]] = ckpt["net"][key]
+ net.load_state_dict(state_dict)
+ net = net.eval()
+
+ # --- Evaluation
+ id_vote = 0
+ for it, batch in enumerate(tqdm(loader, bar_format="{desc:<5.5}{percentage:3.0f}%|{bar:50}{r_bar}")):
+
+ # Reset vote
+ if id_vote == 0:
+ vote = None
+
+ # Network inputs
+ feat = batch["feat"].cuda(non_blocking=True)
+ labels = batch["labels_orig"].cuda(non_blocking=True)
+ batch["upsample"] = [
+ up.cuda(non_blocking=True) for up in batch["upsample"]
+ ]
+ cell_ind = batch["cell_ind"].cuda(non_blocking=True)
+ occupied_cell = batch["occupied_cells"].cuda(non_blocking=True)
+ neighbors_emb = batch["neighbors_emb"].cuda(non_blocking=True)
+ net_inputs = (feat, cell_ind, occupied_cell, neighbors_emb)
+
+ # Get prediction
+ with torch.autocast("cuda", enabled=True):
+ with torch.inference_mode():
+ # Get prediction
+ out = net(*net_inputs)
+ for b in range(out.shape[0]):
+ temp = out[b, :, batch["upsample"][b]].T
+ if vote is None:
+ vote = torch.softmax(temp, dim=1)
+ else:
+ vote += torch.softmax(temp, dim=1)
+ id_vote += 1
+
+ # Save prediction
+ if id_vote == args.num_votes:
+ # Get label
+ pred_label = vote.max(1)[1] + 1 # Shift by 1 because of ignore_label at index 0
+ # Save result
+ bin_file_path = os.path.join(args.result_folder, batch["filename"][0] + "_lidarseg.bin")
+ np.array(pred_label.cpu().numpy()).astype(np.uint8).tofile(bin_file_path)
+ # Reset count of votes
+ id_vote = 0
\ No newline at end of file
diff --git a/illustration.png b/illustration.png
new file mode 100755
index 0000000..7c6ba7d
Binary files /dev/null and b/illustration.png differ
diff --git a/launch_train.py b/launch_train.py
new file mode 100644
index 0000000..935aeca
--- /dev/null
+++ b/launch_train.py
@@ -0,0 +1,374 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import os
+import yaml
+import torch
+import random
+import warnings
+import argparse
+import numpy as np
+import utils.transforms as tr
+from utils.metrics import SemSegLoss
+from utils.scheduler import WarmupCosine
+from utils.trainer import TrainingManager
+from waffleiron.segmenter import Segmenter
+from datasets import LIST_DATASETS, Collate
+
+
+def load_model_config(file):
+ with open(file, "r") as f:
+ config = yaml.safe_load(f)
+ return config
+
+
+def get_train_augmentations(config):
+
+ list_of_transf = []
+
+ # Two transformations shared across all datasets
+ list_of_transf.append(
+ tr.LimitNumPoints(
+ dims=(0, 1, 2),
+ max_point=config["dataloader"]["max_points"],
+ random=True,
+ )
+ )
+
+ # Optional augmentations
+ for aug_name in config["augmentations"].keys():
+ if aug_name == "rotation_z":
+ list_of_transf.append(tr.Rotation(inplace=True, dim=2))
+ elif aug_name == "flip_xy":
+ list_of_transf.append(tr.RandomApply(tr.FlipXY(inplace=True), prob=2 / 3))
+ elif aug_name == "scale":
+ dims = config["augmentations"]["scale"][0]
+ scale = config["augmentations"]["scale"][1]
+ list_of_transf.append(tr.Scale(inplace=True, dims=dims, range=scale))
+ elif aug_name == "instance_cutmix":
+ # Do nothing here, directly handled in semantic kitti dataset
+ continue
+ else:
+ raise ValueError("Unknown transformation")
+
+ print("List of transformations:", list_of_transf)
+
+ return tr.Compose(list_of_transf)
+
+
+def get_datasets(config, args):
+
+ # Shared parameters
+ kwargs = {
+ "rootdir": os.path.join("/datasets_local/", args.path_dataset),
+ "input_feat": config["embedding"]["input_feat"],
+ "voxel_size": config["embedding"]["voxel_size"],
+ "num_neighbors": config["embedding"]["neighbors"],
+ "dim_proj": config["waffleiron"]["dim_proj"],
+ "grids_shape": config["waffleiron"]["grids_size"],
+ "fov_xyz": config["waffleiron"]["fov_xyz"],
+ }
+
+ # Get datatset
+ DATASET = LIST_DATASETS.get(args.dataset.lower())
+ if DATASET is None:
+ raise ValueError(f"Dataset {args.dataset.lower()} not available.")
+
+ # Train dataset
+ train_dataset = DATASET(
+ phase="trainval" if args.trainval else "train",
+ train_augmentations=get_train_augmentations(config),
+ instance_cutmix=config["augmentations"]["instance_cutmix"],
+ **kwargs,
+ )
+
+ # Validation dataset
+ val_dataset = DATASET(
+ phase="val",
+ **kwargs,
+ )
+
+ return train_dataset, val_dataset
+
+
+def get_dataloader(train_dataset, val_dataset, args):
+
+ if args.distributed:
+ train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
+ val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)
+ else:
+ train_sampler = None
+ val_sampler = None
+
+ train_loader = torch.utils.data.DataLoader(
+ train_dataset,
+ batch_size=args.batch_size,
+ shuffle=(train_sampler is None),
+ num_workers=args.workers,
+ pin_memory=True,
+ sampler=train_sampler,
+ drop_last=True,
+ collate_fn=Collate(),
+ )
+ val_loader = torch.utils.data.DataLoader(
+ val_dataset,
+ batch_size=args.batch_size,
+ shuffle=False,
+ num_workers=args.workers,
+ pin_memory=True,
+ sampler=val_sampler,
+ drop_last=False,
+ collate_fn=Collate(),
+ )
+
+ return train_loader, val_loader, train_sampler
+
+
+def get_optimizer(parameters, config):
+ return torch.optim.AdamW(
+ parameters,
+ lr=config["optim"]["lr"],
+ weight_decay=config["optim"]["weight_decay"],
+ )
+
+
+def get_scheduler(optimizer, config, len_train_loader):
+ scheduler = torch.optim.lr_scheduler.LambdaLR(
+ optimizer,
+ WarmupCosine(
+ config["scheduler"]["epoch_warmup"] * len_train_loader,
+ config["scheduler"]["max_epoch"] * len_train_loader,
+ config["scheduler"]["min_lr"] / config["optim"]["lr"],
+ ),
+ )
+ return scheduler
+
+
+def distributed_training(gpu, ngpus_per_node, args, config):
+
+ # --- Init. distributing training
+ args.gpu = gpu
+ if args.gpu is not None:
+ print(f"Use GPU: {args.gpu} for training")
+ if args.distributed:
+ args.rank = args.rank * ngpus_per_node + gpu
+ torch.distributed.init_process_group(
+ backend=args.dist_backend,
+ init_method=args.dist_url,
+ world_size=args.world_size,
+ rank=args.rank,
+ )
+
+ # --- Build network
+ model = Segmenter(
+ input_channels=config["embedding"]["size_input"],
+ feat_channels=config["waffleiron"]["nb_channels"],
+ depth=config["waffleiron"]["depth"],
+ grid_shape=config["waffleiron"]["grids_size"],
+ nb_class=config["classif"]["nb_class"],
+ )
+
+ # ---
+ args.batch_size = config["dataloader"]["batch_size"]
+ args.workers = config["dataloader"]["num_workers"]
+ if args.distributed:
+ # For multiprocessing distributed, DistributedDataParallel constructor
+ # should always set the single device scope, otherwise,
+ # DistributedDataParallel will use all available devices.
+ torch.cuda.set_device(args.gpu)
+ model.cuda(args.gpu)
+ # When using a single GPU per process and per
+ # DistributedDataParallel, we need to divide the batch size
+ # ourselves based on the total number of GPUs of the current node.
+ args.batch_size = int(config["dataloader"]["batch_size"] / ngpus_per_node)
+ args.workers = int(
+ (config["dataloader"]["num_workers"] + ngpus_per_node - 1) / ngpus_per_node
+ )
+ model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
+ model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
+ elif args.gpu is not None:
+ # Training on one GPU
+ torch.cuda.set_device(args.gpu)
+ model = model.cuda(args.gpu)
+ else:
+ # DataParallel will divide and allocate batch_size to all available GPUs
+ model = torch.nn.DataParallel(model).cuda()
+ if args.gpu == 0 or args.gpu is None:
+ print(f"Model:\n{model}")
+ nb_param = sum([p.numel() for p in model.parameters()]) / 1e6
+ print(f"{nb_param} x 10^6 trainable parameters ")
+
+ # --- Optimizer
+ optim = get_optimizer(model.parameters(), config)
+
+ # --- Dataset
+ train_dataset, val_dataset = get_datasets(config, args)
+ train_loader, val_loader, train_sampler = get_dataloader(
+ train_dataset, val_dataset, args
+ )
+
+ # --- Loss function
+ loss = SemSegLoss(
+ config["classif"]["nb_class"],
+ lovasz_weight=config["loss"]["lovasz"],
+ ).cuda(args.gpu)
+
+ # --- Sets the learning rate to the initial LR decayed by 10 every 30 epochs
+ scheduler = get_scheduler(optim, config, len(train_loader))
+
+ # --- Training
+ mng = TrainingManager(
+ model,
+ loss,
+ train_loader,
+ val_loader,
+ train_sampler,
+ optim,
+ scheduler,
+ config["scheduler"]["max_epoch"],
+ args.log_path,
+ args.gpu,
+ args.world_size,
+ args.fp16,
+ LIST_DATASETS.get(args.dataset.lower()).CLASS_NAME,
+ tensorboard=(not args.eval)
+ )
+ if args.restart:
+ mng.load_state()
+ if args.eval:
+ mng.one_epoch(training=False)
+ else:
+ mng.train()
+
+
+def main(args, config):
+
+ # --- Fixed args
+ # Device
+ args.device = "cuda"
+ # Node rank for distributed training
+ args.rank = 0
+ # Number of nodes for distributed training'
+ args.world_size = 1
+ # URL used to set up distributed training
+ args.dist_url = "tcp://127.0.0.1:4444"
+ # Distributed backend'
+ args.dist_backend = "nccl"
+ # Distributed processing
+ args.distributed = args.multiprocessing_distributed
+
+ # Create log directory
+ os.makedirs(args.log_path, exist_ok=True)
+ if args.seed is not None:
+ random.seed(args.seed)
+ np.random.seed(args.seed)
+ torch.manual_seed(args.seed)
+ torch.cuda.manual_seed(args.seed)
+ os.environ["PYTHONHASHSEED"] = str(args.seed)
+
+ if args.gpu is not None:
+ args.gpu = 0
+ args.distributed = False
+ args.multiprocessing_distributed = False
+ warnings.warn(
+ "You have chosen a specific GPU. This will completely disable data parallelism."
+ )
+
+ # Extract instances for cutmix
+ if config["augmentations"]["instance_cutmix"]:
+ get_datasets(config, args)
+
+ ngpus_per_node = torch.cuda.device_count()
+ if args.multiprocessing_distributed:
+ # Since we have ngpus_per_node processes per node, the total world_size
+ # needs to be adjusted accordingly
+ args.world_size = ngpus_per_node * args.world_size
+ # Use torch.multiprocessing.spawn to launch distributed processes: the
+ # main_worker process function
+ torch.multiprocessing.spawn(
+ distributed_training,
+ nprocs=ngpus_per_node,
+ args=(ngpus_per_node, args, config),
+ )
+ else:
+ # Simply call main_worker function
+ distributed_training(args.gpu, ngpus_per_node, args, config)
+
+
+def get_default_parser():
+ parser = argparse.ArgumentParser(description="Training")
+ parser.add_argument(
+ "--dataset",
+ type=str,
+ help="Path to dataset",
+ default="nuscenes",
+ )
+ parser.add_argument(
+ "--path_dataset",
+ type=str,
+ help="Path to dataset",
+ default="/datasets_local/nuscenes/",
+ )
+ parser.add_argument(
+ "--log_path", type=str, required=True, help="Path to log folder"
+ )
+ parser.add_argument(
+ "-r", "--restart", action="store_true", default=False, help="Restart training"
+ )
+ parser.add_argument(
+ "--seed", default=None, type=int, help="Seed for initializing training"
+ )
+ parser.add_argument(
+ "--gpu", default=None, type=int, help="Set to any number to use gpu 0"
+ )
+ parser.add_argument(
+ "--multiprocessing-distributed",
+ action="store_true",
+ help="Use multi-processing distributed training to launch "
+ "N processes per node, which has N GPUs. This is the "
+ "fastest way to use PyTorch for either single node or "
+ "multi node data parallel training",
+ )
+ parser.add_argument(
+ "--fp16",
+ action="store_true",
+ default=False,
+ help="Enable autocast for mix precision training",
+ )
+ parser.add_argument(
+ "--config", type=str, required=True, help="Path to model config"
+ )
+ parser.add_argument(
+ "--trainval",
+ action="store_true",
+ default=False,
+ help="Use train + val as train set",
+ )
+ parser.add_argument(
+ "--eval",
+ action="store_true",
+ default=False,
+ help="Run validation only",
+ )
+
+ return parser
+
+
+if __name__ == "__main__":
+
+ parser = get_default_parser()
+ args = parser.parse_args()
+ config = load_model_config(args.config)
+ main(args, config)
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..0fe28c7
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,19 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from setuptools import setup
+from setuptools import find_packages
+
+setup(name="waffleiron", packages=find_packages())
diff --git a/utils/__init__.py b/utils/__init__.py
new file mode 100644
index 0000000..f78a20f
--- /dev/null
+++ b/utils/__init__.py
@@ -0,0 +1,15 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
diff --git a/utils/lovasz.py b/utils/lovasz.py
new file mode 100755
index 0000000..5adc74b
--- /dev/null
+++ b/utils/lovasz.py
@@ -0,0 +1,346 @@
+"""
+Lovasz-Softmax and Jaccard hinge loss in PyTorch
+Maxim Berman 2018 ESAT-PSI KU Leuven (MIT License)
+Code downloaded from:
+https://github.com/edwardzhou130/PolarSeg/blob/master/network/lovasz_losses.py
+"""
+
+
+import torch
+from torch.autograd import Variable
+import torch.nn.functional as F
+import numpy as np
+
+try:
+ from itertools import ifilterfalse
+except ImportError: # py3k
+ from itertools import filterfalse as ifilterfalse
+
+
+def lovasz_grad(gt_sorted):
+ """
+ Computes gradient of the Lovasz extension w.r.t sorted errors
+ See Alg. 1 in paper
+ """
+ p = len(gt_sorted)
+ gts = gt_sorted.sum()
+ intersection = gts - gt_sorted.float().cumsum(0)
+ union = gts + (1 - gt_sorted).float().cumsum(0)
+ jaccard = 1.0 - intersection / union
+ if p > 1: # cover 1-pixel case
+ jaccard[1:p] = jaccard[1:p] - jaccard[0:-1]
+ return jaccard
+
+
+def iou_binary(preds, labels, EMPTY=1.0, ignore=None, per_image=True):
+ """
+ IoU for foreground class
+ binary: 1 foreground, 0 background
+ """
+ if not per_image:
+ preds, labels = (preds,), (labels,)
+ ious = []
+ for pred, label in zip(preds, labels):
+ intersection = ((label == 1) & (pred == 1)).sum()
+ union = ((label == 1) | ((pred == 1) & (label != ignore))).sum()
+ if not union:
+ iou = EMPTY
+ else:
+ iou = float(intersection) / float(union)
+ ious.append(iou)
+ iou = mean(ious) # mean accross images if per_image
+ return 100 * iou
+
+
+def iou(preds, labels, C, EMPTY=1.0, ignore=None, per_image=False):
+ """
+ Array of IoU for each (non ignored) class
+ """
+ if not per_image:
+ preds, labels = (preds,), (labels,)
+ ious = []
+ for pred, label in zip(preds, labels):
+ iou = []
+ for i in range(C):
+ if (
+ i != ignore
+ ): # The ignored label is sometimes among predicted classes (ENet - CityScapes)
+ intersection = ((label == i) & (pred == i)).sum()
+ union = ((label == i) | ((pred == i) & (label != ignore))).sum()
+ if not union:
+ iou.append(EMPTY)
+ else:
+ iou.append(float(intersection) / float(union))
+ ious.append(iou)
+ ious = [mean(iou) for iou in zip(*ious)] # mean accross images if per_image
+ return 100 * np.array(ious)
+
+
+# --------------------------- BINARY LOSSES ---------------------------
+
+
+def lovasz_hinge(logits, labels, per_image=True, ignore=None):
+ r"""
+ Binary Lovasz hinge loss
+ logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty)
+ labels: [B, H, W] Tensor, binary ground truth masks (0 or 1)
+ per_image: compute the loss per image instead of per batch
+ ignore: void class id
+ """
+ if per_image:
+ loss = mean(
+ lovasz_hinge_flat(
+ *flatten_binary_scores(log.unsqueeze(0), lab.unsqueeze(0), ignore)
+ )
+ for log, lab in zip(logits, labels)
+ )
+ else:
+ loss = lovasz_hinge_flat(*flatten_binary_scores(logits, labels, ignore))
+ return loss
+
+
+def lovasz_hinge_flat(logits, labels):
+ r"""
+ Binary Lovasz hinge loss
+ logits: [P] Variable, logits at each prediction (between -\infty and +\infty)
+ labels: [P] Tensor, binary ground truth labels (0 or 1)
+ ignore: label to ignore
+ """
+ if len(labels) == 0:
+ # only void pixels, the gradients should be 0
+ return logits.sum() * 0.0
+ signs = 2.0 * labels.float() - 1.0
+ errors = 1.0 - logits * Variable(signs)
+ errors_sorted, perm = torch.sort(errors, dim=0, descending=True)
+ perm = perm.data
+ gt_sorted = labels[perm]
+ grad = lovasz_grad(gt_sorted)
+ loss = torch.dot(F.relu(errors_sorted), Variable(grad))
+ return loss
+
+
+def flatten_binary_scores(scores, labels, ignore=None):
+ """
+ Flattens predictions in the batch (binary case)
+ Remove labels equal to 'ignore'
+ """
+ scores = scores.view(-1)
+ labels = labels.view(-1)
+ if ignore is None:
+ return scores, labels
+ valid = labels != ignore
+ vscores = scores[valid]
+ vlabels = labels[valid]
+ return vscores, vlabels
+
+
+class StableBCELoss(torch.nn.modules.Module):
+ def __init__(self):
+ super().__init__()
+
+ def forward(self, input, target):
+ neg_abs = -input.abs()
+ loss = input.clamp(min=0) - input * target + (1 + neg_abs.exp()).log()
+ return loss.mean()
+
+
+def binary_xloss(logits, labels, ignore=None):
+ r"""
+ Binary Cross entropy loss
+ logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty)
+ labels: [B, H, W] Tensor, binary ground truth masks (0 or 1)
+ ignore: void class id
+ """
+ logits, labels = flatten_binary_scores(logits, labels, ignore)
+ loss = StableBCELoss()(logits, Variable(labels.float()))
+ return loss
+
+
+# --------------------------- MULTICLASS LOSSES ---------------------------
+
+
+def lovasz_softmax(probas, labels, classes="present", per_image=False, ignore=None):
+ """
+ Multi-class Lovasz-Softmax loss
+ probas: [B, C, H, W] Variable, class probabilities at each prediction (between 0 and 1).
+ Interpreted as binary (sigmoid) output with outputs of size [B, H, W].
+ labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1)
+ classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.
+ per_image: compute the loss per image instead of per batch
+ ignore: void class labels
+ """
+ if per_image:
+ loss = mean(
+ lovasz_softmax_flat(
+ *flatten_probas(prob.unsqueeze(0), lab.unsqueeze(0), ignore),
+ classes=classes
+ )
+ for prob, lab in zip(probas, labels)
+ )
+ else:
+ loss = lovasz_softmax_flat(
+ *flatten_probas(probas, labels, ignore), classes=classes
+ )
+ return loss
+
+
+def lovasz_softmax_flat(probas, labels, classes="present"):
+ """
+ Multi-class Lovasz-Softmax loss
+ probas: [P, C] Variable, class probabilities at each prediction (between 0 and 1)
+ labels: [P] Tensor, ground truth labels (between 0 and C - 1)
+ classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.
+ """
+ if probas.numel() == 0:
+ # only void pixels, the gradients should be 0
+ return probas * 0.0
+ C = probas.size(1)
+ losses = []
+ class_to_sum = list(range(C)) if classes in ["all", "present"] else classes
+ for c in class_to_sum:
+ fg = (labels == c).float() # foreground for class c
+ if classes == "present" and fg.sum() == 0:
+ continue
+ if C == 1:
+ if len(classes) > 1:
+ raise ValueError("Sigmoid output possible only with 1 class")
+ class_pred = probas[:, 0]
+ else:
+ class_pred = probas[:, c]
+ errors = (Variable(fg) - class_pred).abs()
+ errors_sorted, perm = torch.sort(errors, 0, descending=True)
+ perm = perm.data
+ fg_sorted = fg[perm]
+ losses.append(torch.dot(errors_sorted, Variable(lovasz_grad(fg_sorted))))
+ return mean(losses)
+
+
+def flatten_probas(probas, labels, ignore=None):
+ """
+ Flattens predictions in the batch
+ """
+ if probas.dim() == 3:
+ # assumes output of a sigmoid layer
+ B, H, W = probas.size()
+ probas = probas.view(B, 1, H, W)
+ elif probas.dim() == 5:
+ # 3D segmentation
+ B, C, L, H, W = probas.size()
+ probas = probas.contiguous().view(B, C, L, H * W)
+ B, C, H, W = probas.size()
+ probas = probas.permute(0, 2, 3, 1).contiguous().view(-1, C) # B * H * W, C = P, C
+ labels = labels.view(-1)
+ if ignore is None:
+ return probas, labels
+ valid = labels != ignore
+ vprobas = probas[valid.nonzero().squeeze()]
+ vlabels = labels[valid]
+ return vprobas, vlabels
+
+
+def xloss(logits, labels, ignore=None):
+ """
+ Cross entropy loss
+ """
+ return F.cross_entropy(logits, Variable(labels), ignore_index=255)
+
+
+def jaccard_loss(probas, labels, ignore=None, smooth=100, bk_class=None):
+ """
+ Something wrong with this loss
+ Multi-class Lovasz-Softmax loss
+ probas: [B, C, H, W] Variable, class probabilities at each prediction (between 0 and 1).
+ Interpreted as binary (sigmoid) output with outputs of size [B, H, W].
+ labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1)
+ classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.
+ per_image: compute the loss per image instead of per batch
+ ignore: void class labels
+ """
+ vprobas, vlabels = flatten_probas(probas, labels, ignore)
+
+ true_1_hot = torch.eye(vprobas.shape[1])[vlabels]
+
+ if bk_class:
+ one_hot_assignment = torch.ones_like(vlabels)
+ one_hot_assignment[vlabels == bk_class] = 0
+ one_hot_assignment = one_hot_assignment.float().unsqueeze(1)
+ true_1_hot = true_1_hot * one_hot_assignment
+
+ true_1_hot = true_1_hot.to(vprobas.device)
+ intersection = torch.sum(vprobas * true_1_hot)
+ cardinality = torch.sum(vprobas + true_1_hot)
+ loss = (intersection + smooth / (cardinality - intersection + smooth)).mean()
+ return (1 - loss) * smooth
+
+
+def hinge_jaccard_loss(
+ probas, labels, ignore=None, classes="present", hinge=0.1, smooth=100
+):
+ """
+ Multi-class Hinge Jaccard loss
+ probas: [B, C, H, W] Variable, class probabilities at each prediction (between 0 and 1).
+ Interpreted as binary (sigmoid) output with outputs of size [B, H, W].
+ labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1)
+ classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.
+ ignore: void class labels
+ """
+ vprobas, vlabels = flatten_probas(probas, labels, ignore)
+ C = vprobas.size(1)
+ losses = []
+ class_to_sum = list(range(C)) if classes in ["all", "present"] else classes
+ for c in class_to_sum:
+ if c in vlabels:
+ c_sample_ind = vlabels == c
+ cprobas = vprobas[c_sample_ind, :]
+ non_c_ind = np.array([a for a in class_to_sum if a != c])
+ class_pred = cprobas[:, c]
+ max_non_class_pred = torch.max(cprobas[:, non_c_ind], dim=1)[0]
+ TP = (
+ torch.sum(torch.clamp(class_pred - max_non_class_pred, max=hinge) + 1.0)
+ + smooth
+ )
+ FN = torch.sum(
+ torch.clamp(max_non_class_pred - class_pred, min=-hinge) + hinge
+ )
+
+ if (~c_sample_ind).sum() == 0:
+ FP = 0
+ else:
+ nonc_probas = vprobas[~c_sample_ind, :]
+ class_pred = nonc_probas[:, c]
+ max_non_class_pred = torch.max(nonc_probas[:, non_c_ind], dim=1)[0]
+ FP = torch.sum(
+ torch.clamp(class_pred - max_non_class_pred, max=hinge) + 1.0
+ )
+
+ losses.append(1 - TP / (TP + FP + FN))
+
+ if len(losses) == 0:
+ return 0
+ return mean(losses)
+
+
+# --------------------------- HELPER FUNCTIONS ---------------------------
+def isnan(x):
+ return x != x
+
+
+def mean(l, ignore_nan=False, empty=0):
+ """
+ nanmean compatible with generators.
+ """
+ l = iter(l)
+ if ignore_nan:
+ l = ifilterfalse(isnan, l)
+ try:
+ n = 1
+ acc = next(l)
+ except StopIteration:
+ if empty == "raise":
+ raise ValueError("Empty mean")
+ return empty
+ for n, v in enumerate(l, 2):
+ acc += v
+ if n == 1:
+ return acc
+ return acc / n
diff --git a/utils/metrics.py b/utils/metrics.py
new file mode 100644
index 0000000..7821e4f
--- /dev/null
+++ b/utils/metrics.py
@@ -0,0 +1,63 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import torch
+import numpy as np
+from .lovasz import lovasz_softmax_flat
+from torch.nn.functional import softmax
+from torch.nn import Module, CrossEntropyLoss
+
+
+def fast_hist(pred, label, n):
+ assert torch.all(label > -1) & torch.all(pred > -1)
+ assert torch.all(label < n) & torch.all(pred < n)
+ return torch.bincount(n * label + pred, minlength=n**2).reshape(n, n)
+
+
+def per_class_iu(hist):
+ with np.errstate(divide="ignore", invalid="ignore"):
+ return np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))
+
+
+def overall_accuracy(hist):
+ with np.errstate(divide="ignore", invalid="ignore"):
+ return np.diag(hist).sum() / hist.sum()
+
+
+def per_class_accuracy(hist):
+ with np.errstate(divide="ignore", invalid="ignore"):
+ return np.diag(hist) / hist.sum(1)
+
+
+class SemSegLoss(Module):
+ def __init__(self, nb_class, lovasz_weight=1.0, ignore_index=255):
+ super().__init__()
+ self.nb_class = nb_class
+ self.ignore_index = ignore_index
+ self.lovasz_weight = lovasz_weight
+ self.ce = CrossEntropyLoss(ignore_index=ignore_index)
+
+ def __call__(self, pred, true):
+ loss = self.ce(pred, true)
+
+ if self.lovasz_weight > 0:
+ where = true != self.ignore_index
+ if where.sum() > 0:
+ loss += self.lovasz_weight * lovasz_softmax_flat(
+ softmax(pred[where], dim=1),
+ true[where],
+ )
+
+ return loss
diff --git a/utils/scheduler.py b/utils/scheduler.py
new file mode 100644
index 0000000..4b50196
--- /dev/null
+++ b/utils/scheduler.py
@@ -0,0 +1,33 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import numpy as np
+
+
+class WarmupCosine:
+ def __init__(self, warmup_end, max_iter, factor_min):
+ self.max_iter = max_iter
+ self.warmup_end = warmup_end
+ self.factor_min = factor_min
+
+ def __call__(self, iter):
+ if iter < self.warmup_end:
+ factor = iter / self.warmup_end
+ else:
+ iter = iter - self.warmup_end
+ max_iter = self.max_iter - self.warmup_end
+ iter = (iter / max_iter) * np.pi
+ factor = self.factor_min + 0.5 * (1 - self.factor_min) * (np.cos(iter) + 1)
+ return factor
diff --git a/utils/trainer.py b/utils/trainer.py
new file mode 100644
index 0000000..e73e61d
--- /dev/null
+++ b/utils/trainer.py
@@ -0,0 +1,306 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import torch
+import warnings
+import numpy as np
+from tqdm import tqdm
+from torch.cuda.amp import GradScaler
+from torch.utils.tensorboard import SummaryWriter
+from utils.metrics import overall_accuracy, fast_hist, per_class_iu, per_class_accuracy
+
+
+class TrainingManager:
+ def __init__(
+ self,
+ net,
+ loss,
+ loader_train,
+ loader_val,
+ train_sampler, # If provided, we assume distributed training
+ optim,
+ scheduler,
+ max_epoch,
+ path,
+ rank,
+ world_size,
+ fp16=True,
+ class_names=None,
+ tensorboard=True,
+ ):
+
+ # Optim. methods
+ self.optim = optim
+ self.fp16 = fp16
+ self.scaler = GradScaler() if fp16 else None
+ self.scheduler = scheduler
+
+ # Dataloaders
+ self.max_epoch = max_epoch
+ self.loader_train = loader_train
+ self.loader_val = loader_val
+ self.train_sampler = train_sampler
+ self.class_names = class_names
+
+ # Network
+ self.net = net
+ self.rank = rank
+ self.world_size = world_size
+ print(f"Trainer on gpu: {self.rank}. World size:{self.world_size}.")
+
+ # Loss
+ self.loss = loss
+
+ # Checkpoints
+ self.best_miou = 0
+ self.current_epoch = 0
+ self.path_to_ckpt = path
+
+ # Monitoring
+ if tensorboard and (self.rank == 0 or self.rank is None):
+ self.writer_train = SummaryWriter(
+ path + "/tensorboard/train/",
+ purge_step=self.current_epoch * len(self.loader_train),
+ flush_secs=30,
+ )
+ self.writer_val = SummaryWriter(
+ path + "/tensorboard/val/",
+ purge_step=self.current_epoch,
+ flush_secs=30,
+ )
+ else:
+ self.writer_val = None
+ self.writer_train = None
+
+ def print_log(self, running_loss, oAcc, mAcc, mIoU, ious):
+ if self.rank == 0 or self.rank is None:
+ # Global score
+ log = (
+ f"\nEpoch: {self.current_epoch:d} :\n"
+ + f" Loss = {running_loss:.3f}"
+ + f" - oAcc = {oAcc:.1f}"
+ + f" - mAcc = {mAcc:.1f}"
+ + f" - mIoU = {mIoU:.1f}"
+ )
+ print(log)
+ # Per class score
+ log = ""
+ for i, s in enumerate(ious):
+ if self.class_names is None:
+ log += f"Class {i}: {100 * s:.1f} - "
+ else:
+ log += f"{self.class_names[i]}: {100 * s:.1f} - "
+ print(log[:-3])
+ # Recall best mIoU
+ print(f"Best mIoU was {self.best_miou:.1f}.")
+
+ def gather_scores(self, list_tensors):
+ if self.rank == 0:
+ tensor_reduced = [
+ [torch.empty_like(t) for _ in range(self.world_size)]
+ for t in list_tensors
+ ]
+ for t, t_reduced in zip(list_tensors, tensor_reduced):
+ torch.distributed.gather(t, t_reduced)
+ tensor_reduced = [sum(t).cpu() for t in tensor_reduced]
+ return tensor_reduced
+ else:
+ for t in list_tensors:
+ torch.distributed.gather(t)
+
+ def one_epoch(self, training=True):
+
+ # Train or eval mode
+ if training:
+ net = self.net.train()
+ loader = self.loader_train
+ if self.rank == 0 or self.rank is None:
+ print("\nTraining: %d/%d epochs" % (self.current_epoch, self.max_epoch))
+ writer = self.writer_train
+ if self.train_sampler is not None:
+ self.train_sampler.set_epoch(self.current_epoch)
+ else:
+ net = self.net.eval()
+ loader = self.loader_val
+ if self.rank == 0 or self.rank is None:
+ print(
+ "\nValidation: %d/%d epochs" % (self.current_epoch, self.max_epoch)
+ )
+ writer = self.writer_val
+ print_freq = np.max((len(loader) // 10, 1))
+
+ # Stat.
+ running_loss = 0.0
+ confusion_matrix = 0
+
+ # Loop over mini-batches
+ if self.rank == 0 or self.rank is None:
+ bar_format = "{desc:<5.5}{percentage:3.0f}%|{bar:50}{r_bar}"
+ loader = tqdm(loader, bar_format=bar_format)
+ for it, batch in enumerate(loader):
+
+ # Network inputs
+ feat = batch["feat"].cuda(self.rank, non_blocking=True)
+ labels = batch["labels_orig"].cuda(self.rank, non_blocking=True)
+ batch["upsample"] = [
+ up.cuda(self.rank, non_blocking=True) for up in batch["upsample"]
+ ]
+ cell_ind = batch["cell_ind"].cuda(self.rank, non_blocking=True)
+ occupied_cell = batch["occupied_cells"].cuda(self.rank, non_blocking=True)
+ neighbors_emb = batch["neighbors_emb"].cuda(self.rank, non_blocking=True)
+ net_inputs = (feat, cell_ind, occupied_cell, neighbors_emb)
+
+ # Get prediction and loss
+ with torch.autocast("cuda", enabled=self.fp16):
+ # Logits
+ if training:
+ out = net(*net_inputs)
+ else:
+ with torch.no_grad():
+ out = net(*net_inputs)
+ # Upsample to original resolution
+ out_upsample = []
+ for id_b, closest_point in enumerate(batch["upsample"]):
+ temp = out[id_b, :, closest_point]
+ out_upsample.append(temp.T)
+ out = torch.cat(out_upsample, dim=0)
+ # Loss
+ loss = self.loss(out, labels)
+ running_loss += loss.detach()
+
+ # Confusion matrix
+ with torch.no_grad():
+ nb_class = out.shape[1]
+ pred_label = out.max(1)[1]
+ where = labels != 255
+ confusion_matrix += fast_hist(
+ pred_label[where], labels[where], nb_class
+ )
+
+ # Logs
+ if it % print_freq == print_freq - 1 or it == len(loader) - 1:
+ # Gather scores
+ if self.train_sampler is not None:
+ out = self.gather_scores([running_loss, confusion_matrix])
+ else:
+ out = [running_loss.cpu(), confusion_matrix.cpu()]
+ if self.rank == 0 or self.rank is None:
+ # Compute scores
+ oAcc = 100 * overall_accuracy(out[1])
+ mAcc = 100 * np.nanmean(per_class_accuracy(out[1]))
+ ious = per_class_iu(out[1])
+ mIoU = 100 * np.nanmean(ious)
+ running_loss_reduced = out[0].item() / self.world_size / (it + 1)
+ # Print score
+ self.print_log(running_loss_reduced, oAcc, mAcc, mIoU, ious)
+ # Save in tensorboard
+ if (writer is not None) and (training or it == len(loader) - 1):
+ header = "Train" if training else "Test"
+ step = (
+ self.current_epoch * len(loader) + it
+ if training
+ else self.current_epoch
+ )
+ writer.add_scalar(header + "/loss", running_loss_reduced, step)
+ writer.add_scalar(header + "/oAcc", oAcc, step)
+ writer.add_scalar(header + "/mAcc", mAcc, step)
+ writer.add_scalar(header + "/mIoU", mIoU, step)
+ writer.add_scalar(
+ header + "/lr", self.optim.param_groups[0]["lr"], step
+ )
+
+ # Gradient step
+ if training:
+ self.optim.zero_grad(set_to_none=True)
+ if self.fp16:
+ self.scaler.scale(loss).backward()
+ self.scaler.step(self.optim)
+ self.scaler.update()
+ else:
+ loss.backward()
+ self.optim.step()
+ if self.scheduler is not None:
+ self.scheduler.step()
+
+ # Return score
+ if self.rank == 0 or self.rank is None:
+ return mIoU
+ else:
+ return None
+
+ def load_state(self, best=False):
+ filename = self.path_to_ckpt
+ filename += "/ckpt_best.pth" if best else "/ckpt_last.pth"
+ rank = 0 if self.rank is None else self.rank
+ ckpt = torch.load(
+ filename,
+ map_location=f"cuda:{rank}",
+ )
+ self.net.load_state_dict(ckpt["net"])
+ if ckpt.get("optim") is None:
+ warnings.warn("Optimizer state not available")
+ else:
+ self.optim.load_state_dict(ckpt["optim"])
+ if self.scheduler is not None:
+ if ckpt.get("scheduler") is None:
+ warnings.warn("Scheduler state not available")
+ else:
+ self.scheduler.load_state_dict(ckpt["scheduler"])
+ if self.fp16:
+ if ckpt.get("scaler") is None:
+ warnings.warn("Scaler state not available")
+ else:
+ self.scaler.load_state_dict(ckpt["scaler"])
+ if ckpt.get("best_miou") is not None:
+ self.best_miou = ckpt["best_miou"]
+ if ckpt.get("epoch") is not None:
+ self.current_epoch = ckpt["epoch"] + 1
+ print(
+ f"Checkpoint loaded on {torch.device(rank)} (cuda:{rank}): {self.path_to_ckpt}"
+ )
+
+ def save_state(self, best=False):
+ if self.rank == 0 or self.rank is None:
+ dict_to_save = {
+ "epoch": self.current_epoch,
+ "net": self.net.state_dict(),
+ "optim": self.optim.state_dict(),
+ "scheduler": self.scheduler.state_dict()
+ if self.scheduler is not None
+ else None,
+ "scaler": self.scaler.state_dict() if self.fp16 else None,
+ "best_miou": self.best_miou,
+ }
+ filename = self.path_to_ckpt
+ filename += "/ckpt_best.pth" if best else "/ckpt_last.pth"
+ torch.save(dict_to_save, filename)
+
+ def train(self):
+ for _ in range(self.current_epoch, self.max_epoch):
+ # Train
+ self.one_epoch(training=True)
+ # Val
+ miou = self.one_epoch(training=False)
+ # Save best checkpoint
+ if miou is not None and miou > self.best_miou:
+ self.best_miou = miou
+ self.save_state(best=True)
+ print(f"\n\n*** New best mIoU: {self.best_miou:.1f}.\n")
+ # Save last checkpoint
+ self.save_state()
+ # Increase epoch number
+ self.current_epoch += 1
+ if self.rank == 0 or self.rank is None:
+ print("Finished Training")
diff --git a/utils/transforms.py b/utils/transforms.py
new file mode 100644
index 0000000..f4a6625
--- /dev/null
+++ b/utils/transforms.py
@@ -0,0 +1,311 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import os
+import torch
+import numpy as np
+from glob import glob
+
+
+class Compose:
+ def __init__(self, transformations):
+ self.transformations = transformations
+
+ def __call__(self, pcloud, labels):
+ for t in self.transformations:
+ pcloud, labels = t(pcloud, labels)
+ return pcloud, labels
+
+
+class RandomApply:
+ def __init__(self, transformation, prob=0.5):
+ self.prob = prob
+ self.transformation = transformation
+
+ def __call__(self, pcloud, labels):
+ if torch.rand(1) < self.prob:
+ pcloud, labels = self.transformation(pcloud, labels)
+ return pcloud, labels
+
+
+class Transformation:
+ def __init__(self, inplace=False):
+ self.inplace = inplace
+
+ def __call__(self, pcloud, labels):
+ if labels is None:
+ return pcloud if self.inplace else np.array(pcloud, copy=True)
+
+ out = (
+ (pcloud, labels)
+ if self.inplace
+ else (np.array(pcloud, copy=True), np.array(labels, copy=True))
+ )
+ return out
+
+
+class Identity(Transformation):
+ def __init__(self, inplace=False):
+ super().__init__(inplace)
+
+ def __call__(self, pcloud, labels):
+ return super().__call__(pcloud, labels)
+
+
+class Rotation(Transformation):
+ def __init__(self, dim=2, range=np.pi, inplace=False):
+ super().__init__(inplace)
+ self.range = range
+ self.inplace = inplace
+ if dim == 2:
+ self.dims = (0, 1)
+ elif dim == 1:
+ self.dims = (0, 2)
+ elif dim == 0:
+ self.dims = (1, 2)
+
+ def __call__(self, pcloud, labels):
+ # Build rotation matrix
+ theta = (2 * torch.rand(1)[0] - 1) * self.range
+ # Build rotation matrix
+ rot = np.array(
+ [
+ [np.cos(theta), np.sin(theta)],
+ [-np.sin(theta), np.cos(theta)],
+ ]
+ )
+ # Apply rotation
+ pcloud, labels = super().__call__(pcloud, labels)
+ pcloud[:, self.dims] = pcloud[:, self.dims] @ rot
+ return pcloud, labels
+
+
+class Scale(Transformation):
+ def __init__(self, dims=(0, 1), range=0.05, inplace=False):
+ super().__init__(inplace)
+ self.dims = dims
+ self.range = range
+
+ def __call__(self, pcloud, labels):
+ pcloud, labels = super().__call__(pcloud, labels)
+ scale = 1 + (2 * torch.rand(1).item() - 1) * self.range
+ pcloud[:, self.dims] *= scale
+ return pcloud, labels
+
+
+class FlipXY(Transformation):
+ def __init__(self, inplace=False):
+ super().__init__(inplace=inplace)
+
+ def __call__(self, pcloud, labels):
+ pcloud, labels = super().__call__(pcloud, labels)
+ id = torch.randint(2, (1,))[0]
+ pcloud[:, id] *= -1.0
+ return pcloud, labels
+
+
+class LimitNumPoints(Transformation):
+ def __init__(self, dims=(0, 1, 2), max_point=30000, random=False):
+ super().__init__(inplace=True)
+ self.dims = dims
+ self.max_points = max_point
+ self.random = random
+ assert max_point > 0
+
+ def __call__(self, pcloud, labels):
+ pc, labels = super().__call__(pcloud, labels)
+ if pc.shape[0] > self.max_points:
+ if self.random:
+ center = torch.randint(pc.shape[0], (1,))[0]
+ center = pc[center : center + 1, self.dims]
+ else:
+ center = np.zeros((1, len(self.dims)))
+ idx = np.argsort(np.square(pc[:, self.dims] - center).sum(axis=1))[
+ : self.max_points
+ ]
+ pc, labels = pc[idx], labels[idx]
+ return pc, labels
+
+
+class Crop(Transformation):
+ def __init__(self, dims=(0, 1, 2), fov=((-5, -5, -5), (5, 5, 5)), eps=1e-4):
+ super().__init__(inplace=True)
+ self.dims = dims
+ self.fov = fov
+ self.eps = eps
+ assert len(fov[0]) == len(fov[1]), "Min and Max FOV must have the same length."
+ for i, (min, max) in enumerate(zip(*fov)):
+ assert (
+ min < max
+ ), f"Field of view: min ({min}) < max ({max}) is expected on dimension {i}."
+
+ def __call__(self, pcloud, labels):
+ pc, labels = super().__call__(pcloud, labels)
+
+ where = None
+ for i, d in enumerate(self.dims): # Actually a bug below, use d in pc not i!
+ temp = (pc[:, d] > self.fov[0][i] + self.eps) & (
+ pc[:, d] < self.fov[1][i] - self.eps
+ )
+ where = temp if where is None else where & temp
+
+ return pc[where], labels[where]
+
+
+class Voxelize(Transformation):
+ def __init__(self, dims=(0, 1, 2), voxel_size=0.1, random=False):
+ super().__init__(inplace=True)
+ self.dims = dims
+ self.voxel_size = voxel_size
+ self.random = random
+ assert voxel_size >= 0
+
+ def __call__(self, pcloud, labels):
+ pc, labels = super().__call__(pcloud, labels)
+ if self.voxel_size <= 0:
+ return pc, labels
+
+ if self.random:
+ permute = torch.randperm(pc.shape[0])
+ pc, labels = pc[permute], labels[permute]
+
+ pc_shift = pc[:, self.dims] - pc[:, self.dims].min(0, keepdims=True)
+
+ _, ind = np.unique(
+ (pc_shift / self.voxel_size).astype("int"), return_index=True, axis=0
+ )
+
+ return pc[ind, :], None if labels is None else labels[ind]
+
+
+class InstanceCutMix(Transformation):
+ def __init__(self, phase="train"):
+ """Instance cutmix coded only for SemanticKITTI"""
+ super().__init__(inplace=True)
+
+ raise ValueError("Include latest verion")
+
+ self.phase = phase
+ self.rootdir = "/root/local_storage/semantic_kitti_instance_" + self.phase
+ self.bank = {1: [], 2: [], 5: [], 6: [], 7: []}
+ for key in self.bank.keys():
+ self.bank[key] = glob(os.path.join(self.rootdir, f"{key}", "*.bin"))
+ self.loaded = self.test_loaded()
+ # v2
+ self.rot = Compose(
+ (
+ FlipXY(inplace=True),
+ Rotation(inplace=True),
+ Scale(dims=(0, 1, 2), range=0.1, inplace=True),
+ )
+ )
+ self.nb_to_add = 40
+ self.vox = Voxelize(dims=(0, 1, 2), voxel_size=1.0, random=True)
+ """ v1
+ self.rot = Rotation(inplace=False)
+ self.max_size = 100 # Unused
+ self.nb_to_add = 20
+ self.vox = Voxelize(dims=(0, 1, 2), voxel_size=.1, random=True)
+ """
+
+ def test_loaded(self):
+ if self.phase == "train":
+ if len(self.bank[1]) != 5083:
+ print(len(self.bank[1]), 5083)
+ return False
+ if len(self.bank[2]) != 3092:
+ print(len(self.bank[2]), 3092)
+ return False
+ if len(self.bank[5]) != 8084:
+ print(len(self.bank[5]), 8084)
+ return False
+ if len(self.bank[6]) != 1551:
+ print(len(self.bank[6]), 1551)
+ return False
+ if len(self.bank[7]) != 560:
+ print(len(self.bank[7]), 560)
+ return False
+ elif self.phase == "trainval":
+ if len(self.bank[1]) != 8213:
+ print(len(self.bank[1]), 8213)
+ return False
+ if len(self.bank[2]) != 4169:
+ print(len(self.bank[2]), 4169)
+ return False
+ if len(self.bank[5]) != 12190:
+ print(len(self.bank[5]), 12190)
+ return False
+ if len(self.bank[6]) != 2943:
+ print(len(self.bank[6]), 2943)
+ return False
+ if len(self.bank[7]) != 701:
+ print(len(self.bank[7]), 701)
+ return False
+ return True
+
+ def add_in_bank(self, pc, class_label, instance_label):
+ for id_class in self.bank.keys():
+ where_class = class_label == id_class
+ all_instances = np.unique(instance_label[where_class])
+ for id_instance in all_instances:
+ # Segment instance
+ where_ins = instance_label == id_instance
+ if where_ins.sum() <= 5:
+ continue
+ pc_to_add = pc[where_ins, :]
+ # Center instance
+ pc_to_add[:, :2] -= pc_to_add[:, :2].mean(0, keepdims=True)
+ pc_to_add[:, 2] -= pc_to_add[:, 2].min(0, keepdims=True)
+ #
+ pathfile = os.path.join(
+ self.rootdir, f"{id_class}", f"{len(self.bank[id_class]):07d}.bin"
+ )
+ os.makedirs(os.path.join(self.rootdir, f"{id_class}"), exist_ok=True)
+ pc_to_add.tofile(pathfile)
+ self.bank[id_class].append(pathfile)
+
+ def add_in_pc(self, pc, class_label):
+ new_pc = [pc]
+ new_label = [class_label]
+ # Find location where to add new object (on a surface)
+ pc_vox, class_label_vox = self.vox(pc, class_label)
+
+ # v2
+ where_surface = np.where((class_label_vox >= 8) & (class_label_vox <= 10))[0]
+
+ """ v1
+ where_surface = np.where( ( (class_label_vox>=8) & (class_label_vox<=11) ) | (class_label_vox==16) )[0]
+ """
+
+ where_surface = where_surface[torch.randperm(len(where_surface))]
+ id_tot = 0
+ for id_class in self.bank.keys():
+ which_one = torch.randint(len(self.bank[id_class]), (self.nb_to_add,))
+ for ii in range(self.nb_to_add):
+ p = pc_vox[where_surface[id_tot]]
+ object = self.bank[id_class][which_one[ii]]
+ object = np.fromfile(object, dtype=np.float32).reshape((-1, 4))
+ object, _ = self.rot(object, 1)
+ object[:, :3] += p[:3][None]
+ new_pc.append(object)
+ new_label.append(np.ones((object.shape[0],), dtype=np.int) * id_class)
+ id_tot += 1
+ return np.concatenate(new_pc, 0), np.concatenate(new_label, 0)
+
+ def __call__(self, pc, class_label, instance_label):
+ if not self.loaded:
+ self.add_in_bank(pc, class_label, instance_label)
+ return np.zeros((2, 4)), None
+ return self.add_in_pc(pc, class_label)
diff --git a/waffleiron/__init__.py b/waffleiron/__init__.py
new file mode 100644
index 0000000..ee370e5
--- /dev/null
+++ b/waffleiron/__init__.py
@@ -0,0 +1,19 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from .backbone import WaffleIron
+from .segmenter import Segmenter
+
+__all__ = [WaffleIron, Segmenter]
diff --git a/waffleiron/backbone.py b/waffleiron/backbone.py
new file mode 100644
index 0000000..a6f6abd
--- /dev/null
+++ b/waffleiron/backbone.py
@@ -0,0 +1,146 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import torch
+import numpy as np
+import torch.nn as nn
+from torch import autocast
+
+
+def build_proj_matrix(indices_non_zeros, occupied_cell, batch_size, num_2d_cells):
+ num_points = indices_non_zeros.shape[1] // batch_size
+ matrix_shape = (batch_size, num_2d_cells, num_points)
+
+ # Sparse projection matrix for Inflate step
+ inflate = torch.sparse_coo_tensor(
+ indices_non_zeros, occupied_cell.reshape(-1), matrix_shape
+ ).transpose(1, 2)
+
+ # Count number of points in each cells (used in flatten step)
+ with autocast("cuda", enabled=False):
+ num_points_per_cells = torch.bmm(
+ inflate, torch.bmm(inflate.transpose(1, 2), occupied_cell.unsqueeze(-1))
+ )
+
+ # Sparse projection matrix for Flatten step (projection & average in each 2d cells)
+ weight_per_point = 1.0 / (num_points_per_cells.reshape(-1) + 1e-6)
+ weight_per_point *= occupied_cell.reshape(-1)
+ flatten = torch.sparse_coo_tensor(indices_non_zeros, weight_per_point, matrix_shape)
+
+ return {"flatten": flatten, "inflate": inflate}
+
+
+class ChannelMix(nn.Module):
+ def __init__(self, channels):
+ super().__init__()
+ self.norm = nn.BatchNorm1d(channels)
+ self.mlp = nn.Sequential(
+ nn.Conv1d(channels, channels, 1),
+ nn.ReLU(inplace=True),
+ nn.Conv1d(channels, channels, 1),
+ )
+ self.scale = nn.Conv1d(
+ channels, channels, 1, bias=False, groups=channels
+ ) # Implement LayerScale
+
+ def forward(self, tokens):
+ """tokens <- tokens + LayerScale( MLP( BN(tokens) ) )"""
+ return tokens + self.scale(self.mlp(self.norm(tokens)))
+
+
+class SpatialMix(nn.Module):
+ def __init__(self, channels, grid_shape):
+ super().__init__()
+ self.H, self.W = grid_shape
+ self.norm = nn.BatchNorm1d(channels)
+ self.ffn = nn.Sequential(
+ nn.Conv2d(channels, channels, 3, padding=1, groups=channels),
+ nn.ReLU(inplace=True),
+ nn.Conv2d(channels, channels, 3, padding=1, groups=channels),
+ )
+ self.scale = nn.Conv1d(
+ channels, channels, 1, bias=False, groups=channels
+ ) # Implement LayerScale
+ self.grid_shape = grid_shape
+
+ def extra_repr(self):
+ return f"(grid): [{self.grid_shape[0]}, {self.grid_shape[1]}]"
+
+ def forward(self, tokens, sp_mat):
+ """tokens <- tokens + LayerScale( Inflate( FFN( Flatten( BN(tokens) ) ) )"""
+ B, C, N = tokens.shape
+ residual = self.norm(tokens)
+ # Flatten
+ with autocast("cuda", enabled=False):
+ residual = torch.bmm(
+ sp_mat["flatten"], residual.transpose(1, 2).float()
+ ).transpose(1, 2)
+ residual = residual.reshape(B, C, self.H, self.W)
+ # FFN
+ residual = self.ffn(residual)
+ # Inflate
+ residual = residual.reshape(B, C, self.H * self.W)
+ with autocast("cuda", enabled=False):
+ residual = torch.bmm(
+ sp_mat["inflate"], residual.transpose(1, 2).float()
+ ).transpose(1, 2)
+ residual = residual.reshape(B, C, N)
+ return tokens + self.scale(residual)
+
+
+class WaffleIron(nn.Module):
+ def __init__(self, channels, depth, grids_shape):
+ super().__init__()
+ self.grids_shape = grids_shape
+ self.channel_mix = nn.ModuleList([ChannelMix(channels) for _ in range(depth)])
+ self.spatial_mix = nn.ModuleList(
+ [
+ SpatialMix(channels, grids_shape[d % len(grids_shape)])
+ for d in range(depth)
+ ]
+ )
+
+ def forward(self, tokens, cell_ind, occupied_cell):
+
+ # Build projection matrices
+ batch_size, num_points = tokens.shape[0], tokens.shape[-1]
+ point_ind = (
+ torch.arange(num_points, device=tokens.device)
+ .unsqueeze(0)
+ .expand(batch_size, -1)
+ .reshape(1, -1)
+ )
+ batch_ind = (
+ torch.arange(batch_size, device=tokens.device)
+ .unsqueeze(1)
+ .expand(-1, num_points)
+ .reshape(1, -1)
+ )
+ non_zeros_ind = []
+ for i in range(cell_ind.shape[1]):
+ non_zeros_ind.append(
+ torch.cat((batch_ind, cell_ind[:, i].reshape(1, -1), point_ind), axis=0)
+ )
+ sp_mat = [
+ build_proj_matrix(id, occupied_cell, batch_size, np.prod(sh))
+ for id, sh in zip(non_zeros_ind, self.grids_shape)
+ ]
+
+ # Actual backbone
+ for d, (smix, cmix) in enumerate(zip(self.spatial_mix, self.channel_mix)):
+ tokens = smix(tokens, sp_mat[d % len(sp_mat)])
+ tokens = cmix(tokens)
+
+ return tokens
diff --git a/waffleiron/embedding.py b/waffleiron/embedding.py
new file mode 100644
index 0000000..6bd837e
--- /dev/null
+++ b/waffleiron/embedding.py
@@ -0,0 +1,64 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import torch
+import torch.nn as nn
+
+
+class Embedding(nn.Module):
+ def __init__(self, channels_in, channels_out):
+ super().__init__()
+
+ # Normalize inputs
+ self.norm = nn.BatchNorm1d(channels_in)
+
+ # Point Embedding
+ self.conv1 = nn.Conv1d(channels_in, channels_out, 1)
+
+ # Neighborhood embedding
+ self.conv2 = nn.Sequential(
+ nn.BatchNorm2d(channels_in),
+ nn.Conv2d(channels_in, channels_out, 1, bias=False),
+ nn.BatchNorm2d(channels_out),
+ nn.ReLU(inplace=True),
+ nn.Conv2d(channels_out, channels_out, 1, bias=False),
+ )
+
+ # Merge point and neighborhood embeddings
+ self.final = nn.Conv1d(2 * channels_out, channels_out, 1, bias=True, padding=0)
+
+ def forward(self, x, neighbors):
+ """x: B x C_in x N. neighbors: B x K x N. Output: B x C_out x N"""
+ # Normalize input
+ x = self.norm(x)
+
+ # Point embedding
+ point_emb = self.conv1(x)
+
+ # Neighborhood embedding
+ gather = []
+ # Gather neighbors around each center point
+ for ind_nn in range(
+ 1, neighbors.shape[1]
+ ): # Remove first neighbors which is the center point
+ temp = neighbors[:, ind_nn : ind_nn + 1, :].expand(-1, x.shape[1], -1)
+ gather.append(torch.gather(x, 2, temp).unsqueeze(-1))
+ # Relative coordinates
+ neigh_emb = torch.cat(gather, -1) - x.unsqueeze(-1) # Size: (B x C x N) x K
+ # Embedding
+ neigh_emb = self.conv2(neigh_emb).max(-1)[0]
+
+ # Merge both embeddings
+ return self.final(torch.cat((point_emb, neigh_emb), dim=1))
diff --git a/waffleiron/segmenter.py b/waffleiron/segmenter.py
new file mode 100644
index 0000000..c08baa2
--- /dev/null
+++ b/waffleiron/segmenter.py
@@ -0,0 +1,34 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import torch.nn as nn
+from .backbone import WaffleIron
+from .embedding import Embedding
+
+
+class Segmenter(nn.Module):
+ def __init__(self, input_channels, feat_channels, nb_class, depth, grid_shape):
+ super().__init__()
+ # Embedding layer
+ self.embed = Embedding(input_channels, feat_channels)
+ # WaffleIron backbone
+ self.waffleiron = WaffleIron(feat_channels, depth, grid_shape)
+ # Classification layer
+ self.classif = nn.Conv1d(feat_channels, nb_class, 1)
+
+ def forward(self, feats, cell_ind, occupied_cell, neighbors):
+ tokens = self.embed(feats, neighbors)
+ tokens = self.waffleiron(tokens, cell_ind, occupied_cell)
+ return self.classif(tokens)