diff --git a/.gitignore b/.gitignore
new file mode 100755
index 0000000..67a05c6
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,11 @@
+# Compiled python modules.
+*.pyc
+
+# Python egg metadata, regenerated from source files by setuptools.
+/*.egg-info
+/*.egg
+
+# Data
+*.npy
+*.npz
+*.txt
diff --git a/LICENSE b/LICENSE
new file mode 100755
index 0000000..045e216
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,194 @@
+WaffleIron
+
+Copyright 2022 Valeo
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+   https://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+
+
+                                Apache License
+                       Version 2.0, January 2004
+                    https://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+  "License" shall mean the terms and conditions for use, reproduction,
+  and distribution as defined by Sections 1 through 9 of this document.
+
+  "Licensor" shall mean the copyright owner or entity authorized by
+  the copyright owner that is granting the License.
+
+  "Legal Entity" shall mean the union of the acting entity and all
+  other entities that control, are controlled by, or are under common
+  control with that entity. For the purposes of this definition,
+  "control" means (i) the power, direct or indirect, to cause the
+  direction or management of such entity, whether by contract or
+  otherwise, or (ii) ownership of fifty percent (50%) or more of the
+  outstanding shares, or (iii) beneficial ownership of such entity.
+
+  "You" (or "Your") shall mean an individual or Legal Entity
+  exercising permissions granted by this License.
+
+  "Source" form shall mean the preferred form for making modifications,
+  including but not limited to software source code, documentation
+  source, and configuration files.
+
+  "Object" form shall mean any form resulting from mechanical
+  transformation or translation of a Source form, including but
+  not limited to compiled object code, generated documentation,
+  and conversions to other media types.
+
+  "Work" shall mean the work of authorship, whether in Source or
+  Object form, made available under the License, as indicated by a
+  copyright notice that is included in or attached to the work
+  (an example is provided in the Appendix below).
+
+  "Derivative Works" shall mean any work, whether in Source or Object
+  form, that is based on (or derived from) the Work and for which the
+  editorial revisions, annotations, elaborations, or other modifications
+  represent, as a whole, an original work of authorship. For the purposes
+  of this License, Derivative Works shall not include works that remain
+  separable from, or merely link (or bind by name) to the interfaces of,
+  the Work and Derivative Works thereof.
+
+  "Contribution" shall mean any work of authorship, including
+  the original version of the Work and any modifications or additions
+  to that Work or Derivative Works thereof, that is intentionally
+  submitted to Licensor for inclusion in the Work by the copyright owner
+  or by an individual or Legal Entity authorized to submit on behalf of
+  the copyright owner. For the purposes of this definition, "submitted"
+  means any form of electronic, verbal, or written communication sent
+  to the Licensor or its representatives, including but not limited to
+  communication on electronic mailing lists, source code control systems,
+  and issue tracking systems that are managed by, or on behalf of, the
+  Licensor for the purpose of discussing and improving the Work, but
+  excluding communication that is conspicuously marked or otherwise
+  designated in writing by the copyright owner as "Not a Contribution."
+
+  "Contributor" shall mean Licensor and any individual or Legal Entity
+  on behalf of whom a Contribution has been received by Licensor and
+  subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+  this License, each Contributor hereby grants to You a perpetual,
+  worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+  copyright license to reproduce, prepare Derivative Works of,
+  publicly display, publicly perform, sublicense, and distribute the
+  Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+  this License, each Contributor hereby grants to You a perpetual,
+  worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+  (except as stated in this section) patent license to make, have made,
+  use, offer to sell, sell, import, and otherwise transfer the Work,
+  where such license applies only to those patent claims licensable
+  by such Contributor that are necessarily infringed by their
+  Contribution(s) alone or by combination of their Contribution(s)
+  with the Work to which such Contribution(s) was submitted. If You
+  institute patent litigation against any entity (including a
+  cross-claim or counterclaim in a lawsuit) alleging that the Work
+  or a Contribution incorporated within the Work constitutes direct
+  or contributory patent infringement, then any patent licenses
+  granted to You under this License for that Work shall terminate
+  as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+  Work or Derivative Works thereof in any medium, with or without
+  modifications, and in Source or Object form, provided that You
+  meet the following conditions:
+
+  (a) You must give any other recipients of the Work or
+      Derivative Works a copy of this License; and
+
+  (b) You must cause any modified files to carry prominent notices
+      stating that You changed the files; and
+
+  (c) You must retain, in the Source form of any Derivative Works
+      that You distribute, all copyright, patent, trademark, and
+      attribution notices from the Source form of the Work,
+      excluding those notices that do not pertain to any part of
+      the Derivative Works; and
+
+  (d) If the Work includes a "NOTICE" text file as part of its
+      distribution, then any Derivative Works that You distribute must
+      include a readable copy of the attribution notices contained
+      within such NOTICE file, excluding those notices that do not
+      pertain to any part of the Derivative Works, in at least one
+      of the following places: within a NOTICE text file distributed
+      as part of the Derivative Works; within the Source form or
+      documentation, if provided along with the Derivative Works; or,
+      within a display generated by the Derivative Works, if and
+      wherever such third-party notices normally appear. The contents
+      of the NOTICE file are for informational purposes only and
+      do not modify the License. You may add Your own attribution
+      notices within Derivative Works that You distribute, alongside
+      or as an addendum to the NOTICE text from the Work, provided
+      that such additional attribution notices cannot be construed
+      as modifying the License.
+
+  You may add Your own copyright statement to Your modifications and
+  may provide additional or different license terms and conditions
+  for use, reproduction, or distribution of Your modifications, or
+  for any such Derivative Works as a whole, provided Your use,
+  reproduction, and distribution of the Work otherwise complies with
+  the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+  any Contribution intentionally submitted for inclusion in the Work
+  by You to the Licensor shall be under the terms and conditions of
+  this License, without any additional terms or conditions.
+  Notwithstanding the above, nothing herein shall supersede or modify
+  the terms of any separate license agreement you may have executed
+  with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+  names, trademarks, service marks, or product names of the Licensor,
+  except as required for reasonable and customary use in describing the
+  origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+  agreed to in writing, Licensor provides the Work (and each
+  Contributor provides its Contributions) on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+  implied, including, without limitation, any warranties or conditions
+  of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+  PARTICULAR PURPOSE. You are solely responsible for determining the
+  appropriateness of using or redistributing the Work and assume any
+  risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+  whether in tort (including negligence), contract, or otherwise,
+  unless required by applicable law (such as deliberate and grossly
+  negligent acts) or agreed to in writing, shall any Contributor be
+  liable to You for damages, including any direct, indirect, special,
+  incidental, or consequential damages of any character arising as a
+  result of this License or out of the use or inability to use the
+  Work (including but not limited to damages for loss of goodwill,
+  work stoppage, computer failure or malfunction, or any and all
+  other commercial damages or losses), even if such Contributor
+  has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+  the Work or Derivative Works thereof, You may choose to offer,
+  and charge a fee for, acceptance of support, warranty, indemnity,
+  or other liability obligations and/or rights consistent with this
+  License. However, in accepting such obligations, You may act only
+  on Your own behalf and on Your sole responsibility, not on behalf
+  of any other Contributor, and only if You agree to indemnify,
+  defend, and hold each Contributor harmless for any liability
+  incurred by, or claims asserted against, such Contributor by reason
+  of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100755
index 0000000..d8329d4
--- /dev/null
+++ b/README.md
@@ -0,0 +1,234 @@
+# WaffleIron
+
+![](./illustration.png)
+
+[**Using a Waffle Iron for Automotive Point Cloud Semantic Segmentation**]()  
+[*Gilles Puy*<sup>1</sup>](https://sites.google.com/site/puygilles/home),
+[*Alexandre Boulch*<sup>1</sup>](http://boulch.eu),
+[*Renaud Marlet*<sup>1,2</sup>](http://imagine.enpc.fr/~marletr/)  
+<sup>1</sup>*valeo.ai, France* and <sup>2</sup>*LIGM, Ecole des Ponts, Univ Gustave Eiffel, CNRS, France*.
+
+If you find this code or work useful, please cite the following [paper]():
+```
+@article{puy23waffleiron,
+  title={Using a Waffle Iron for Automotive Point Cloud Semantic Segmentation},
+  author={Puy, Gilles and Boulch, Alexandre and Marlet, Renaud},
+  journal={arxiv:2301.xxxx}
+  year={2023}
+}
+```
+
+## Installation
+
+```
+pip install pyaml==6.0 tqdm=4.63.0 scipy==1.8.0 torch==1.11.0 tensorboard=2.8.0
+git clone https://github.com/valeoai/WaffleIron
+cd WaffleIron
+pip install -e ./
+```
+
+Download the pretrained models:
+```
+wget [ADD LINK]
+tar -xvzf pretrained_models_and_data.tar.gz
+```
+
+Finally, indicate where the nuScenes and SemanticKITTI datasets are located on your system:
+```
+export PATH_NUSCENES="/PATH/TO/NUSCENES"
+export PATH_KITTI="/PATH/TO/KITTI/"
+```
+
+If you want to uninstall this package, type `pip uninstall waffleiron`.
+
+
+## Testing pretrained models
+
+### Option 1: Using this code
+
+To evaluate the pre-trained model on the train set of nuScenes used in Table 1 of our paper, type
+```
+python launch_train.py \
+--dataset nuscenes \
+--path_dataset $PATH_NUSCENES \
+--log_path ./pretrained_models/WaffleIron-48-256__60cm-baseline-nuscenes/ \
+--config ./configs/WaffleIron-48-256__60cm-baseline-nuscenes.yaml \
+--fp16 \
+--gpu 0 \
+--restart \
+--eval
+```
+
+To evaluate the pre-trained model on the train set of SemanticKITTI, with instance cutmix augmentation, type
+```
+python launch_train.py \
+--dataset semantic_kitti \
+--path_dataset $PATH_KITTI \
+--log_path ./pretrained_models/WaffleIron-48-256__40cm-BEV-cutmix-kitti/ \
+--config ./configs/WaffleIron-48-256__40cm-BEV-cutmix-kitti.yaml \
+--fp16 \
+--restart \
+--eval
+```
+
+**Remark:** *On SemanticKITTI, the code above will extract object instances on the train set (despite this 
+being not necessary for validation) because this augmentation is activated for training on this dataset (and this code
+re-use the training script). This can be bypassed by editing the `yaml` config file and changing the entry 
+`instance_cutmix` to `False`. The instances are saved automatically in `/tmp/semantic_kitti_instances/`.*
+
+### Option 2: Using the official APIs
+
+The second option writes the predictions on disk and the results can be computed using the official 
+nuScenes or SemanticKITTI APIs. This option also allows you to perform test time augmentations, which is not possible 
+with Option 1 above. These scripts should be useable for submission of the official benchmarks.
+
+#### nuScenes
+
+To extract the prediction with the pre-trained model on nuScenes, type
+```
+python eval_nuscenes.py \
+--path_dataset $PATH_NUSCENES \
+--config ./configs/WaffleIron-48-256__60cm-baseline-nuscenes.yaml \
+--ckpt ./pretrained_models/WaffleIron-48-256__60cm-baseline-nuscenes/ckpt_last.pth \
+--result_folder ./predictions_nuscenes \
+--phase val \
+--num_workers 12
+```
+or, if you want to use, e.g., 10 votes with test time augmentations,
+```
+python eval_nuscenes.py \
+--path_dataset $PATH_NUSCENES \
+--config ./configs/WaffleIron-48-256__60cm-baseline-nuscenes.yaml \
+--ckpt ./pretrained_models/WaffleIron-48-256__60cm-baseline-nuscenes/ckpt_last.pth \
+--result_folder ./predictions_nuscenes \
+--phase val \
+--num_workers 12 \
+--num_votes 10 \
+--batch_size 5
+```
+You can reduce `batch_size` to 2 or 1 depending on the available memory.
+
+These predictions can be evaluated using the official nuScenes API as follows
+```
+git clone https://github.com/nutonomy/nuscenes-devkit.git
+python nuscenes-devkit/python-sdk/nuscenes/eval/lidarseg/evaluate.py \
+--result_path ./predictions_nuscenes \
+--eval_set val \
+--version v1.0-trainval \
+--dataroot $PATH_NUSCENES \
+--verbose True  
+```
+
+#### SemanticKITTI
+
+To evaluate the pre-trained model on SemanticKITTI, type
+```
+python eval_kitti.py \
+--path_dataset $PATH_KITTI \
+--ckpt ./pretrained_models/WaffleIron-48-256__40cm-BEV-cutmix-kitti/ckpt_last.pth \
+--config ./configs/WaffleIron-48-256__40cm-BEV-cutmix-kitti.yaml \
+--result_folder ./predictions_kitti \
+--phase val \
+--num_workers 12
+```
+
+The predictions can be evaluated using the official APIs by typing
+```
+git clone https://github.com/PRBonn/semantic-kitti-api.git
+cd semantic-kitti-api/
+python evaluate_semantics.py \
+--dataset $PATH_KITTI/dataset \
+--predictions ../predictions_kitti \
+--split valid
+```
+
+## Training
+
+### nuScenes
+
+To train a WaffleIron-48-256 backbone on nuScenes with 
+- 2D cells of 60 cm,
+- the baseline sequence of projections along the z-axis, then the y-axis, then the x-axis, etc., until the last layer,
+
+type
+```
+python launch_train.py \
+--dataset nuscenes \
+--path_dataset $PATH_NUSCENES \
+--log_path ./logs/WaffleIron-48-256__60cm-baseline-nuscenes/ \
+--config ./configs/WaffleIron-48-256__60cm-baseline-nuscenes.yaml \
+--gpu 0 \
+--fp16
+```
+
+For example, with `--seed 1` as additional arguments in `launch_train.py`, I obtain 76.2 % in mIoU at the last 
+training epoch (using one Nvidia Tesla V100S-PCIE-32GB for training).
+
+Note: for multi-GPUs training, you can remove `--gpu 0` and the code will use all available GPUs using PyTorch DataParallel 
+for parallelism. You can add the argument `--multiprocessing-distributed` to use DistributedDataParallel instead.
+
+
+### SemanticKITTI
+
+To retrain a WaffleIron-48-256 backbone on SemanticKITTI with
+- 2D cells of 40 cm,
+- projection along the z-axis at all layers,
+- **instance cutmix augmentations**,
+
+type
+```
+python launch_train.py \
+--dataset semantic_kitti \
+--path_dataset $PATH_KITTI \
+--log_path ./logs/WaffleIron-48-256__40cm-BEV-cutmix-kitti \
+--config ./configs/WaffleIron-48-256__40cm-BEV-cutmix-kitti.yaml \
+--fp16 \
+--multiprocessing-distributed
+```
+
+The instances for cutmix augmentation are saved in `/tmp/semantic_kitti_instances/`. You can disable the instance 
+cutmix augmentations by editing the `yaml` config file to set `instance_cutmix` to `False`.
+
+For submission to the official benchmark on the test set of SemanticKITTI, we also trained the network on both the 
+val and train sets (argument `--trainval` in `launch_train.py`), used the checkpoint at the last epoch and 10 test 
+time augmentations during inference.
+
+
+## Creating your own network
+
+### Config file
+
+You can refer to `./config/WaffleIron-template.yaml` where we describe the role of each parameter. 
+In particular, you can adjust `nb_channels` and `depth` to increase of decrease the capacity of WaffleIron.
+You can also adjust the memory required to train a network by adjusting `max_points` in `dataloader`, but a 
+too small value might impact the performance.
+
+### Models
+
+The WaffleIron backbone is defined in `waffleiron/backbone.py` and can be imported in your project by typing
+```python
+from waffleiron import WaffleIron
+```
+It needs to be combined with a embedding layer to provide point tokens and a pointwise classification layer, as we do 
+in `waffleiron/segmenter.py`. You can define your own embedding and classification layers instead.
+
+
+## Acknowledgements
+We thank the authors of 
+```
+@inproceedings{berman18lovasz,
+author = {Berman, Maxim and Triki, Amal Rannen and Blaschko, Matthew B.},
+title = {The Lovász-Softmax Loss: A Tractable Surrogate for the Optimization of the Intersection-Over-Union Measure 
+in Neural Networks},
+booktitle = {CVPR},
+year = {2018}
+}
+```
+for making their [implementation](https://github.com/bermanmaxim/LovaszSoftmax) of the Lovász loss publicly available.
+
+
+## License
+WaffleIron is released under the [Apache 2.0 license](./LICENSE). 
+
+The implementation of the Lovász loss in `utils/lovasz.py` is released under 
+[MIT Licence](https://github.com/bermanmaxim/LovaszSoftmax/blob/master/LICENSE).
diff --git a/configs/WaffleIron-48-256__40cm-BEV-cutmix-kitti.yaml b/configs/WaffleIron-48-256__40cm-BEV-cutmix-kitti.yaml
new file mode 100644
index 0000000..b60fa67
--- /dev/null
+++ b/configs/WaffleIron-48-256__40cm-BEV-cutmix-kitti.yaml
@@ -0,0 +1,51 @@
+waffleiron: # Architecture of the backbone
+  nb_channels: 256  # Define F = the feature size = width of the WaffleIron
+  depth: 48         # Define L = the depth on the network
+  fov_xyz:          # Define the FOV in meters
+    - - -50 # min value on x-axis: -50 m
+      - -50 # min value on y-axis: -50 m
+      - -5  # min value on z-axis: -5 m
+    - - 50  # max value on x-axis: 50 m
+      - 50  # max value on y-axis: 50 m
+      - 3   # max value on z-axis: 5 m
+  dim_proj:         # Define the sequence of projection (which is then repeated sequentially until \ell = L)
+    - 2  # Project along the z axis at \ell = 1 (and then the same at all layer)
+  grids_size:       # Define here the size of the 2D grids
+    - [250, 250] # At \ell = 1, project along z, ie on (x, y) with FOV [-50, 50] on both axes: size [250, 250] -> resolution 40cm
+
+classif: # Architecture of the classifcation layer, after WaffleIron
+  nb_class: 19      # Number of classes on nuscenes (after removing the ignore class)
+
+embedding: # Architecture of the embedding layer, before WaffleIron
+  input_feat:       # List of features on each point
+    - "intensity"
+    - "height"
+    - "radius"
+  size_input: 3     # Input feature size on each point
+  neighbors: 16     # Neighborhood for embedding layer
+  voxel_size: 0.1   # Voxel size for downsampling point cloud in pre-processing
+
+dataloader:
+  batch_size: 4
+  num_workers: 12
+  max_points: 20000
+
+augmentations:
+  rotation_z: null
+  flip_xy: null
+  scale:
+    - [0, 1, 2]
+    - 0.1
+  instance_cutmix: True
+
+loss:
+  lovasz: 1.0
+
+optim:
+  lr: .001
+  weight_decay: 0.003
+
+scheduler:
+  min_lr: 0.00001
+  max_epoch: 45
+  epoch_warmup: 4
diff --git a/configs/WaffleIron-48-256__60cm-baseline-nuscenes.yaml b/configs/WaffleIron-48-256__60cm-baseline-nuscenes.yaml
new file mode 100644
index 0000000..b631db1
--- /dev/null
+++ b/configs/WaffleIron-48-256__60cm-baseline-nuscenes.yaml
@@ -0,0 +1,55 @@
+waffleiron: # Architecture of the backbone
+  nb_channels: 256  # Define F = the feature size = width of the WaffleIron
+  depth: 48         # Define L = the depth on the network
+  fov_xyz:          # Define the FOV in meters
+    - - -50 # min value on x-axis: -50 m
+      - -50 # min value on y-axis: -50 m
+      - -5  # min value on z-axis: -5 m
+    - - 50  # max value on x-axis: 50 m
+      - 50  # max value on y-axis: 50 m
+      - 5   # max value on z-axis: 5 m
+  dim_proj:         # Define the sequence of projection (which is then repeated sequentially until \ell = L)
+    - 2  # Project along the z axis at \ell = 1
+    - 1  # Project along the y axis at \ell = 2
+    - 0  # Project along the x axis at \ell = 3
+  grids_size:       # Define here the size of the 2D grids
+    - [166, 166] # At \ell = 1, project along z, ie on (x, y) with FOV [-50, 50] on both axes: size [250, 250] -> resolution 60cm
+    - [166,  16] # At \ell = 2, project along y, ie on (x, z) with FOV [-50, 50] on x and [-5, 5] on z: size [250, 25] -> resolution 60cm
+    - [166,  16] # At \ell = 3, project along x, ie on (y, z) with FOV [-50, 50] on y and [-5, 5] on z: size [250, 25] -> resolution 60cm
+
+classif: # Architecture of the classifcation layer, after WaffleIron
+  nb_class: 16     # Number of classes on nuscenes (after removing the ignore class)
+
+embedding: # Architecture of the embedding layer, before WaffleIron
+  input_feat:       # List of features on each point
+    - "intensity"
+    - "height"
+    - "radius"
+  size_input: 3     # Input feature size on each point
+  neighbors: 16     # Neighborhood for embedding layer
+  voxel_size: 0.1   # Voxel size for downsampling point cloud in pre-processing
+
+dataloader:
+  batch_size: 4
+  num_workers: 12
+  max_points: 20000
+
+augmentations:
+  rotation_z: null
+  flip_xy: null
+  scale:
+    - [0, 1, 2]
+    - 0.1
+  instance_cutmix: False
+
+loss:
+  lovasz: 1.0
+
+optim:
+  lr: .001
+  weight_decay: 0.003
+
+scheduler:
+  min_lr: 0.00001
+  max_epoch: 45
+  epoch_warmup: 4
diff --git a/configs/WaffleIron-template-BEV-projection.yaml b/configs/WaffleIron-template-BEV-projection.yaml
new file mode 100644
index 0000000..5ac6296
--- /dev/null
+++ b/configs/WaffleIron-template-BEV-projection.yaml
@@ -0,0 +1,51 @@
+waffleiron: # Architecture of the backbone
+  nb_channels: 256  # Define F = the feature size = width of the WaffleIron
+  depth: 48         # Define L = the depth on the network
+  fov_xyz:          # Define the FOV in meters
+    - - -50 # min value on x-axis: -50 m
+      - -50 # min value on y-axis: -50 m
+      - -5  # min value on z-axis: -5 m
+    - - 50  # max value on x-axis: 50 m
+      - 50  # max value on y-axis: 50 m
+      - 5   # max value on z-axis: 5 m
+  dim_proj:         # Define the sequence of projection (which is then repeated sequentially until \ell = L)
+    - 2  # Project along the z axis at \ell = 1
+  grids_size:       # Define here the size of the 2D grids
+    - [166, 166] # At \ell = 1, project along z, ie on (x, y) with FOV [-50, 50] on both axes: size [250, 250] -> resolution 60cm
+
+classif: # Architecture of the classifcation layer, after WaffleIron
+  nb_class: 16      # Number of classes on nuscenes (after removing the ignore class)
+
+embedding: # Architecture of the embedding layer, before WaffleIron
+  input_feat:       # List of features on each point
+    - "intensity"
+    - "height"
+    - "radius"
+  size_input: 3     # Input feature size on each point
+  neighbors: 16     # Neighborhood for embedding layer
+  voxel_size: 0.1   # Voxel size for downsampling point cloud in pre-processing
+
+dataloader:
+  batch_size: 4
+  num_workers: 12
+  max_points: 20000
+
+augmentations:
+  rotation_z: null
+  flip_xy: null
+  scale:
+    - [0, 1, 2]
+    - 0.1
+  instance_cutmix: False
+
+loss:
+  lovasz: 1.0
+
+optim:
+  lr: .001
+  weight_decay: 0.001
+
+scheduler:
+  min_lr: 0.00001
+  max_epoch: 45
+  epoch_warmup: 4
diff --git a/configs/WaffleIron-template-baseline-projection.yaml b/configs/WaffleIron-template-baseline-projection.yaml
new file mode 100644
index 0000000..216b397
--- /dev/null
+++ b/configs/WaffleIron-template-baseline-projection.yaml
@@ -0,0 +1,55 @@
+waffleiron: # Architecture of the backbone
+  nb_channels: 256  # Define F = the feature size = width of the WaffleIron
+  depth: 48         # Define L = the depth on the network
+  fov_xyz:          # Define the FOV in meters
+    - - -50 # min value on x-axis: -50 m
+      - -50 # min value on y-axis: -50 m
+      - -5  # min value on z-axis: -5 m
+    - - 50  # max value on x-axis: 50 m
+      - 50  # max value on y-axis: 50 m
+      - 5   # max value on z-axis: 5 m
+  dim_proj:         # Define the sequence of projection (which is then repeated sequentially until \ell = L)
+    - 2  # Project along the z axis at \ell = 1
+    - 1  # Project along the y axis at \ell = 2
+    - 0  # Project along the x axis at \ell = 3
+  grids_size:       # Define here the size of the 2D grids
+    - [166, 166] # At \ell = 1, project along z, ie on (x, y) with FOV [-50, 50] on both axes: size [250, 250] -> resolution 60cm
+    - [166,  16] # At \ell = 2, project along y, ie on (x, z) with FOV [-50, 50] on x and [-5, 5] on z: size [250, 25] -> resolution 60cm
+    - [166,  16] # At \ell = 3, project along x, ie on (y, z) with FOV [-50, 50] on y and [-5, 5] on z: size [250, 25] -> resolution 60cm
+
+classif: # Architecture of the classifcation layer, after WaffleIron
+  nb_class: 16      # Number of classes on nuscenes (after removing the ignore class)
+
+embedding: # Architecture of the embedding layer, before WaffleIron
+  input_feat:       # List of features on each point
+    - "intensity"
+    - "height"
+    - "radius"
+  size_input: 3     # Input feature size on each point
+  neighbors: 16     # Neighborhood for embedding layer
+  voxel_size: 0.1   # Voxel size for downsampling point cloud in pre-processing
+
+dataloader:
+  batch_size: 4
+  num_workers: 12
+  max_points: 20000
+
+augmentations:
+  rotation_z: null
+  flip_xy: null
+  scale:
+    - [0, 1, 2]
+    - 0.1
+  instance_cutmix: False
+
+loss:
+  lovasz: 1.0
+
+optim:
+  lr: .001
+  weight_decay: 0.001
+
+scheduler:
+  min_lr: 0.00001
+  max_epoch: 45
+  epoch_warmup: 4
diff --git a/datasets/__init__.py b/datasets/__init__.py
new file mode 100644
index 0000000..2cf6775
--- /dev/null
+++ b/datasets/__init__.py
@@ -0,0 +1,21 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from .pc_dataset import Collate
+from .nuscenes import NuScenesSemSeg
+from .semantic_kitti import SemanticKITTI
+
+__all__ = [SemanticKITTI, NuScenesSemSeg, Collate]
+LIST_DATASETS = {"nuscenes": NuScenesSemSeg, "semantic_kitti": SemanticKITTI}
diff --git a/datasets/nuscenes.py b/datasets/nuscenes.py
new file mode 100755
index 0000000..617d7db
--- /dev/null
+++ b/datasets/nuscenes.py
@@ -0,0 +1,97 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import os
+import numpy as np
+from .pc_dataset import PCDataset
+
+
+class ClassMapper:
+    def __init__(self):
+        current_folder = os.path.dirname(os.path.realpath(__file__))
+        self.mapping = np.load(
+            os.path.join(current_folder, "mapping_class_index_nuscenes.npy")
+        )
+
+    def get_index(self, x):
+        return self.mapping[x]
+
+
+class NuScenesSemSeg(PCDataset):
+
+    CLASS_NAME = [
+        "barrier",
+        "bicycle",
+        "bus",
+        "car",
+        "construction_vehicle",
+        "motorcycle",
+        "pedestrian",
+        "traffic_cone",
+        "trailer",
+        "truck",
+        "driveable_surface",
+        "other_flat",
+        "sidewalk",
+        "terrain",
+        "manmade",
+        "vegetation",
+    ]
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+        # Class mapping
+        current_folder = os.path.dirname(os.path.realpath(__file__))
+        self.mapper = np.vectorize(ClassMapper().get_index)
+
+        # List all keyframes
+        self.list_frames = np.load(
+            os.path.join(current_folder, "list_files_nuscenes.npz")
+        )[self.phase]
+        if self.phase == "train":
+            assert len(self) == 28130
+        elif self.phase == "val":
+            assert len(self) == 6019
+        elif self.phase == "test":
+            assert len(self) == 6008
+        else:
+            raise ValueError(f"Unknown phase {self.phase}.")
+
+        assert not self.instance_cutmix, "Instance CutMix not implemented on nuscenes"
+
+    def __len__(self):
+        return len(self.list_frames)
+
+    def load_pc(self, index):
+        # Load point cloud
+        pc = np.fromfile(
+            os.path.join(self.rootdir, self.list_frames[index][0]),
+            dtype=np.float32,
+        )
+        pc = pc.reshape((-1, 5))[:, :4]
+
+        # Load segmentation labels
+        labels = np.fromfile(
+            os.path.join(self.rootdir, self.list_frames[index][1]),
+            dtype=np.uint8,
+        )
+        labels = self.mapper(labels)
+
+        # Label 0 should be ignored
+        labels = labels - 1
+        labels[labels == -1] = 255
+
+        return pc, labels, self.list_frames[index][2]
diff --git a/datasets/pc_dataset.py b/datasets/pc_dataset.py
new file mode 100644
index 0000000..99175c4
--- /dev/null
+++ b/datasets/pc_dataset.py
@@ -0,0 +1,276 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import torch
+import numpy as np
+import utils.transforms as tr
+from torch.utils.data import Dataset
+from scipy.spatial import cKDTree as KDTree
+
+
+class PCDataset(Dataset):
+    def __init__(
+        self,
+        rootdir=None,
+        phase="train",
+        input_feat="intensity",
+        voxel_size=0.1,
+        train_augmentations=None,
+        dim_proj=[
+            0,
+        ],
+        grids_shape=[(256, 256)],
+        fov_xyz=(
+            (
+                -1.0,
+                -1.0,
+                -1.0,
+            ),
+            (1.0, 1.0, 1.0),
+        ),
+        num_neighbors=16,
+        tta=False,
+        instance_cutmix=False,
+    ):
+        super().__init__()
+
+        # Dataset split
+        self.phase = phase
+        assert self.phase in ["train", "val", "trainval", "test"]
+
+        # Root directory of dataset
+        self.rootdir = rootdir
+
+        # Input features to compute for each point
+        self.input_feat = input_feat
+
+        # Downsample input point cloud by small voxelization
+        self.downsample = tr.Voxelize(
+            dims=(0, 1, 2),
+            voxel_size=voxel_size,
+            random=(self.phase == "train" or self.phase == "trainval"),
+        )
+
+        # Field of view
+        assert len(fov_xyz[0]) == len(
+            fov_xyz[1]
+        ), "Min and Max FOV must have the same length."
+        for i, (min, max) in enumerate(zip(*fov_xyz)):
+            assert (
+                min < max
+            ), f"Field of view: min ({min}) < max ({max}) is expected on dimension {i}."
+        self.fov_xyz = np.concatenate([np.array(f)[None] for f in fov_xyz], axis=0)
+        self.crop_to_fov = tr.Crop(dims=(0, 1, 2), fov=fov_xyz)
+
+        # Grid shape for projection in 2D
+        assert len(grids_shape) == len(dim_proj)
+        self.dim_proj = dim_proj
+        self.grids_shape = [np.array(g) for g in grids_shape]
+        self.lut_axis_plane = {0: (1, 2), 1: (0, 2), 2: (0, 1)}
+
+        # Number of neighbors for embedding layer
+        assert num_neighbors > 0
+        self.num_neighbors = num_neighbors
+
+        # Test time augmentation
+        if tta:
+            assert self.phase in ["test", "val"]
+            self.tta = tr.Compose(
+                (
+                    tr.Rotation(inplace=True, dim=2),
+                    tr.RandomApply(tr.FlipXY(inplace=True), prob=2.0 / 3.0),
+                    tr.Scale(inplace=True, dims=(0, 1, 2), range=0.1),
+                )
+            )
+        else:
+            self.tta = None
+
+        # Train time augmentations
+        if train_augmentations is not None:
+            assert self.phase in ["train", "trainval"]
+        self.train_augmentations = train_augmentations
+
+        # Flag for instance cutmix
+        self.instance_cutmix = instance_cutmix
+
+    def get_occupied_2d_cells(self, pc):
+        """Return mapping between 3D point and corresponding 2D cell"""
+        cell_ind = []
+        for dim, grid in zip(self.dim_proj, self.grids_shape):
+            # Get plane of which to project
+            dims = self.lut_axis_plane[dim]
+            # Compute grid resolution
+            res = (self.fov_xyz[1, dims] - self.fov_xyz[0, dims]) / grid[None]
+            # Shift and quantize point cloud
+            pc_quant = ((pc[:, dims] - self.fov_xyz[0, dims]) / res).astype("int")
+            # Check that the point cloud fits on the grid
+            min, max = pc_quant.min(0), pc_quant.max(0)
+            assert min[0] >= 0 and min[1] >= 0, print(
+                "Some points are outside the FOV:", pc[:, :3].min(0), self.fov_xyz
+            )
+            assert max[0] < grid[0] and max[1] < grid[1], print(
+                "Some points are outside the FOV:", pc[:, :3].min(0), self.fov_xyz
+            )
+            # Transform quantized coordinates to cell indices for projection on 2D plane
+            temp = pc_quant[:, 0] * grid[1] + pc_quant[:, 1]
+            cell_ind.append(temp[None])
+        return np.vstack(cell_ind)
+
+    def prepare_input_features(self, pc_orig):
+        # Concatenate desired input features to coordinates
+        pc = [pc_orig[:, :3]]  # Initialize with coordinates
+        for type in self.input_feat:
+            if type == "intensity":
+                pc.append(pc_orig[:, 3:])
+            elif type == "height":
+                pc.append(pc_orig[:, 2:3])
+            elif type == "radius":
+                r_xyz = np.linalg.norm(pc_orig[:, :3], axis=1, keepdims=True)
+                pc.append(r_xyz)
+            else:
+                raise ValueError(f"Unknown feature: {type}")
+        return np.concatenate(pc, 1)
+
+    def load_pc(self, index):
+        raise NotImplementedError()
+
+    def __len__(self):
+        raise NotImplementedError()
+
+    def __getitem__(self, index):
+        # Load original point cloud
+        pc_orig, labels_orig, filename = self.load_pc(index)
+
+        # Prepare input feature
+        pc_orig = self.prepare_input_features(pc_orig)
+
+        # Test time augmentation
+        if self.tta is not None:
+            pc_orig, labels_orig = self.tta(pc_orig, labels_orig)
+
+        # Voxelization
+        pc, labels = self.downsample(pc_orig, labels_orig)
+
+        # Augment data
+        if self.train_augmentations is not None:
+            pc, labels = self.train_augmentations(pc, labels)
+
+        # Crop to fov
+        pc, labels = self.crop_to_fov(pc, labels)
+
+        # For each point, get index of corresponding 2D cells on projected grid
+        cell_ind = self.get_occupied_2d_cells(pc)
+
+        # Get neighbors for point embedding layer providing tokens to waffleiron backbone
+        kdtree = KDTree(pc[:, :3])
+        assert pc.shape[0] > self.num_neighbors
+        _, neighbors_emb = kdtree.query(pc[:, :3], k=self.num_neighbors + 1)
+
+        # Nearest neighbor interpolation to undo cropping & voxelisation at validation time
+        if self.phase in ["train", "trainval"]:
+            upsample = np.arange(pc.shape[0])
+        else:
+            _, upsample = kdtree.query(pc_orig[:, :3], k=1)
+
+        # Output to return
+        out = (
+            # Point features
+            pc[:, 3:].T[None],
+            # Point labels of original entire point cloud
+            labels if self.phase in ["train", "trainval"] else labels_orig,
+            # Projection 2D -> 3D: index of 2D cells for each point
+            cell_ind[None],
+            # Neighborhood for point embedding layer, which provides tokens to waffleiron backbone
+            neighbors_emb.T[None],
+            # For interpolation from voxelized & cropped point cloud to original point cloud
+            upsample,
+            # Filename of original point cloud
+            filename,
+        )
+
+        return out
+
+
+def zero_pad(feat, neighbors_emb, cell_ind, Nmax):
+    N = feat.shape[-1]
+    assert N <= Nmax
+    occupied_cells = np.ones((1, Nmax))
+    if N < Nmax:
+        # Zero-pad with null features
+        feat = np.concatenate((feat, np.zeros((1, feat.shape[1], Nmax - N))), axis=2)
+        # For zero-padded points, associate last zero-padded points as neighbor
+        neighbors_emb = np.concatenate(
+            (
+                neighbors_emb,
+                (Nmax - 1) * np.ones((1, neighbors_emb.shape[1], Nmax - N)),
+            ),
+            axis=2,
+        )
+        # Associate zero-padded points to first 2D cell...
+        cell_ind = np.concatenate(
+            (cell_ind, np.zeros((1, cell_ind.shape[1], Nmax - N))), axis=2
+        )
+        # ... and at the same time mark zero-padded points as unoccupied
+        occupied_cells[:, N:] = 0
+    return feat, neighbors_emb, cell_ind, occupied_cells
+
+
+class Collate:
+    def __init__(self, num_points=None):
+        self.num_points = num_points
+        assert num_points is None or num_points > 0
+
+    def __call__(self, list_data):
+
+        # Extract all data
+        list_of_data = (list(data) for data in zip(*list_data))
+        feat, label_orig, cell_ind, neighbors_emb, upsample, filename = list_of_data
+
+        # Zero-pad point clouds
+        Nmax = np.max([f.shape[-1] for f in feat])
+        if self.num_points is not None:
+            assert Nmax <= self.num_points
+        occupied_cells = []
+        for i in range(len(feat)):
+            feat[i], neighbors_emb[i], cell_ind[i], temp = zero_pad(
+                feat[i],
+                neighbors_emb[i],
+                cell_ind[i],
+                Nmax if self.num_points is None else self.num_points,
+            )
+            occupied_cells.append(temp)
+
+        # Concatenate along batch dimension
+        feat = torch.from_numpy(np.vstack(feat)).float()  # B x C x Nmax
+        neighbors_emb = torch.from_numpy(np.vstack(neighbors_emb)).long()  # B x Nmax
+        cell_ind = torch.from_numpy(
+            np.vstack(cell_ind)
+        ).long()  # B x nb_2d_cells x Nmax
+        occupied_cells = torch.from_numpy(np.vstack(occupied_cells)).float()  # B x Nmax
+        labels_orig = torch.from_numpy(np.hstack(label_orig)).long()
+        upsample = [torch.from_numpy(u) for u in upsample]
+
+        # Prepare output variables
+        out = {
+            "feat": feat,
+            "neighbors_emb": neighbors_emb,
+            "upsample": upsample,
+            "labels_orig": labels_orig,
+            "cell_ind": cell_ind,
+            "occupied_cells": occupied_cells,
+            "filename": filename,
+        }
+
+        return out
diff --git a/datasets/semantic-kitti.yaml b/datasets/semantic-kitti.yaml
new file mode 100755
index 0000000..6281065
--- /dev/null
+++ b/datasets/semantic-kitti.yaml
@@ -0,0 +1,211 @@
+# This file is covered by the LICENSE file in the root of this project.
+labels: 
+  0 : "unlabeled"
+  1 : "outlier"
+  10: "car"
+  11: "bicycle"
+  13: "bus"
+  15: "motorcycle"
+  16: "on-rails"
+  18: "truck"
+  20: "other-vehicle"
+  30: "person"
+  31: "bicyclist"
+  32: "motorcyclist"
+  40: "road"
+  44: "parking"
+  48: "sidewalk"
+  49: "other-ground"
+  50: "building"
+  51: "fence"
+  52: "other-structure"
+  60: "lane-marking"
+  70: "vegetation"
+  71: "trunk"
+  72: "terrain"
+  80: "pole"
+  81: "traffic-sign"
+  99: "other-object"
+  252: "moving-car"
+  253: "moving-bicyclist"
+  254: "moving-person"
+  255: "moving-motorcyclist"
+  256: "moving-on-rails"
+  257: "moving-bus"
+  258: "moving-truck"
+  259: "moving-other-vehicle"
+color_map: # bgr
+  0 : [0, 0, 0]
+  1 : [0, 0, 255]
+  10: [245, 150, 100]
+  11: [245, 230, 100]
+  13: [250, 80, 100]
+  15: [150, 60, 30]
+  16: [255, 0, 0]
+  18: [180, 30, 80]
+  20: [255, 0, 0]
+  30: [30, 30, 255]
+  31: [200, 40, 255]
+  32: [90, 30, 150]
+  40: [255, 0, 255]
+  44: [255, 150, 255]
+  48: [75, 0, 75]
+  49: [75, 0, 175]
+  50: [0, 200, 255]
+  51: [50, 120, 255]
+  52: [0, 150, 255]
+  60: [170, 255, 150]
+  70: [0, 175, 0]
+  71: [0, 60, 135]
+  72: [80, 240, 150]
+  80: [150, 240, 255]
+  81: [0, 0, 255]
+  99: [255, 255, 50]
+  252: [245, 150, 100]
+  256: [255, 0, 0]
+  253: [200, 40, 255]
+  254: [30, 30, 255]
+  255: [90, 30, 150]
+  257: [250, 80, 100]
+  258: [180, 30, 80]
+  259: [255, 0, 0]
+content: # as a ratio with the total number of points
+  0: 0.018889854628292943
+  1: 0.0002937197336781505
+  10: 0.040818519255974316
+  11: 0.00016609538710764618
+  13: 2.7879693665067774e-05
+  15: 0.00039838616015114444
+  16: 0.0
+  18: 0.0020633612104619787
+  20: 0.0016218197275284021
+  30: 0.00017698551338515307
+  31: 1.1065903904919655e-08
+  32: 5.532951952459828e-09
+  40: 0.1987493871255525
+  44: 0.014717169549888214
+  48: 0.14392298360372
+  49: 0.0039048553037472045
+  50: 0.1326861944777486
+  51: 0.0723592229456223
+  52: 0.002395131480328884
+  60: 4.7084144280367186e-05
+  70: 0.26681502148037506
+  71: 0.006035012012626033
+  72: 0.07814222006271769
+  80: 0.002855498193863172
+  81: 0.0006155958086189918
+  99: 0.009923127583046915
+  252: 0.001789309418528068
+  253: 0.00012709999297008662
+  254: 0.00016059776092534436
+  255: 3.745553104802113e-05
+  256: 0.0
+  257: 0.00011351574470342043
+  258: 0.00010157861367183268
+  259: 4.3840131989471124e-05
+# classes that are indistinguishable from single scan or inconsistent in
+# ground truth are mapped to their closest equivalent
+learning_map:
+  0 : 0     # "unlabeled"
+  1 : 0     # "outlier" mapped to "unlabeled" --------------------------mapped
+  10: 1     # "car"
+  11: 2     # "bicycle"
+  13: 5     # "bus" mapped to "other-vehicle" --------------------------mapped
+  15: 3     # "motorcycle"
+  16: 5     # "on-rails" mapped to "other-vehicle" ---------------------mapped
+  18: 4     # "truck"
+  20: 5     # "other-vehicle"
+  30: 6     # "person"
+  31: 7     # "bicyclist"
+  32: 8     # "motorcyclist"
+  40: 9     # "road"
+  44: 10    # "parking"
+  48: 11    # "sidewalk"
+  49: 12    # "other-ground"
+  50: 13    # "building"
+  51: 14    # "fence"
+  52: 0     # "other-structure" mapped to "unlabeled" ------------------mapped
+  60: 9     # "lane-marking" to "road" ---------------------------------mapped
+  70: 15    # "vegetation"
+  71: 16    # "trunk"
+  72: 17    # "terrain"
+  80: 18    # "pole"
+  81: 19    # "traffic-sign"
+  99: 0     # "other-object" to "unlabeled" ----------------------------mapped
+  252: 1    # "moving-car" to "car" ------------------------------------mapped
+  253: 7    # "moving-bicyclist" to "bicyclist" ------------------------mapped
+  254: 6    # "moving-person" to "person" ------------------------------mapped
+  255: 8    # "moving-motorcyclist" to "motorcyclist" ------------------mapped
+  256: 5    # "moving-on-rails" mapped to "other-vehicle" --------------mapped
+  257: 5    # "moving-bus" mapped to "other-vehicle" -------------------mapped
+  258: 4    # "moving-truck" to "truck" --------------------------------mapped
+  259: 5    # "moving-other"-vehicle to "other-vehicle" ----------------mapped
+learning_map_inv: # inverse of previous map
+  0: 0      # "unlabeled", and others ignored
+  1: 10     # "car"
+  2: 11     # "bicycle"
+  3: 15     # "motorcycle"
+  4: 18     # "truck"
+  5: 20     # "other-vehicle"
+  6: 30     # "person"
+  7: 31     # "bicyclist"
+  8: 32     # "motorcyclist"
+  9: 40     # "road"
+  10: 44    # "parking"
+  11: 48    # "sidewalk"
+  12: 49    # "other-ground"
+  13: 50    # "building"
+  14: 51    # "fence"
+  15: 70    # "vegetation"
+  16: 71    # "trunk"
+  17: 72    # "terrain"
+  18: 80    # "pole"
+  19: 81    # "traffic-sign"
+learning_ignore: # Ignore classes
+  0: True      # "unlabeled", and others ignored
+  1: False     # "car"
+  2: False     # "bicycle"
+  3: False     # "motorcycle"
+  4: False     # "truck"
+  5: False     # "other-vehicle"
+  6: False     # "person"
+  7: False     # "bicyclist"
+  8: False     # "motorcyclist"
+  9: False     # "road"
+  10: False    # "parking"
+  11: False    # "sidewalk"
+  12: False    # "other-ground"
+  13: False    # "building"
+  14: False    # "fence"
+  15: False    # "vegetation"
+  16: False    # "trunk"
+  17: False    # "terrain"
+  18: False    # "pole"
+  19: False    # "traffic-sign"
+split: # sequence numbers
+  train:
+    - 0
+    - 1
+    - 2
+    - 3
+    - 4
+    - 5
+    - 6
+    - 7
+    - 9
+    - 10
+  valid:
+    - 8
+  test:
+    - 11
+    - 12
+    - 13
+    - 14
+    - 15
+    - 16
+    - 17
+    - 18
+    - 19
+    - 20
+    - 21
diff --git a/datasets/semantic_kitti.py b/datasets/semantic_kitti.py
new file mode 100644
index 0000000..86436b6
--- /dev/null
+++ b/datasets/semantic_kitti.py
@@ -0,0 +1,260 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import os
+import yaml
+import torch
+import warnings
+import numpy as np
+from glob import glob
+from tqdm import tqdm
+import utils.transforms as tr
+from .pc_dataset import PCDataset
+
+
+class InstanceCutMix:
+    def __init__(self, phase="train", temp_dir="/tmp/semantic_kitti_instances/"):
+
+        # Train or Trainval
+        self.phase = phase
+        assert self.phase in ["train", "trainval"]
+
+        # List of files containing instances for bicycle, motorcycle, person, bicyclist
+        self.bank = {1: [], 2: [], 5: [], 6: []}
+
+        # Directory where to store instances
+        self.rootdir = os.path.join(temp_dir, self.phase)
+        for id_class in self.bank.keys():
+            os.makedirs(os.path.join(self.rootdir, f"{id_class}"), exist_ok=True)
+
+        # Load instances
+        for key in self.bank.keys():
+            self.bank[key] = glob(os.path.join(self.rootdir, f"{key}", "*.bin"))
+        self.__loaded__ = self.test_loaded()
+        if not self.__loaded__:
+            warnings.warn(
+                "Instances must be extracted and saved on disk before training"
+            )
+
+        # Augmentations applied on Instances
+        self.rot = tr.Compose(
+            (
+                tr.FlipXY(inplace=True),
+                tr.Rotation(inplace=True),
+                tr.Scale(dims=(0, 1, 2), range=0.1, inplace=True),
+            )
+        )
+
+        # For each class, maximum number of instance to add
+        self.num_to_add = 40
+
+        # Voxelization of 1m to downsample point cloud to ensure that
+        # center of the instances are at least 1m away
+        self.vox = tr.Voxelize(dims=(0, 1, 2), voxel_size=1.0, random=True)
+
+    def test_loaded(self):
+        self.__loaded__ = False
+        if self.phase == "train":
+            if len(self.bank[1]) != 5083:
+                print(f"Expected 5083 instances but got {len(self.bank[1])}.")
+                return False
+            if len(self.bank[2]) != 3092:
+                print(f"Expected 3092 instances but got {len(self.bank[2])}.")
+                return False
+            if len(self.bank[5]) != 8084:
+                print(f"Expected 8084 instances but got {len(self.bank[5])}.")
+                return False
+            if len(self.bank[6]) != 1551:
+                print(f"Expected 1551 instances but got {len(self.bank[6])}.")
+                return False
+        elif self.phase == "trainval":
+            if len(self.bank[1]) != 8213:
+                print(f"Expected 8213 instances but got {len(self.bank[1])}.")
+                return False
+            if len(self.bank[2]) != 4169:
+                print(f"Expected 4169 instances but got {len(self.bank[2])}.")
+                return False
+            if len(self.bank[5]) != 12190:
+                print(f"Expected 12190 instances but got {len(self.bank[5])}.")
+                return False
+            if len(self.bank[6]) != 2943:
+                print(f"Expected 2943 instances but got {len(self.bank[6])}.")
+                return False
+        self.__loaded__ = True
+        return True
+
+    def cut(self, pc, class_label, instance_label):
+        for id_class in self.bank.keys():
+            where_class = (class_label == id_class)
+            all_instances = np.unique(instance_label[where_class])
+            for id_instance in all_instances:
+                # Segment instance
+                where_ins = (instance_label == id_instance)
+                if where_ins.sum() <= 5: continue
+                instance = pc[where_ins, :]
+                # Center instance
+                instance[:, :2] -= instance[:, :2].mean(0, keepdims=True)
+                instance[:, 2] -= instance[:, 2].min(0, keepdims=True)
+                # Save instance
+                pathfile = os.path.join(
+                    self.rootdir, f"{id_class}", f"{len(self.bank[id_class]):07d}.bin"
+                )
+                instance.tofile(pathfile)
+                self.bank[id_class].append(pathfile)
+
+    def mix(self, pc, class_label):
+
+        # Find potential location where to add new object (on a surface)
+        pc_vox, class_label_vox = self.vox(pc, class_label)
+        where_surface = np.where((class_label_vox >= 8) & (class_label_vox <= 10))[0]
+        where_surface = where_surface[torch.randperm(len(where_surface))]
+
+        # Add instances of each class in bank
+        id_tot = 0
+        new_pc, new_label  = [pc], [class_label]
+        for id_class in self.bank.keys():
+            nb_to_add = torch.randint(self.num_to_add, (1,))[0]
+            which_one = torch.randint(len(self.bank[id_class]), (nb_to_add,))
+            for ii in range(nb_to_add):
+                # Point p where to add the instance
+                p = pc_vox[where_surface[id_tot]]
+                # Extract instance
+                object = self.bank[id_class][which_one[ii]]
+                object = np.fromfile(object, dtype=np.float32).reshape((-1, 4))
+                # Augment instance
+                label = np.ones((object.shape[0],), dtype=np.int) * id_class
+                object, label = self.rot(object, label)
+                # Move instance at point p
+                object[:, :3] += p[:3][None]
+                # Add instance in the point cloud
+                new_pc.append(object)
+                # Add corresponding label
+                new_label.append(label)
+                id_tot += 1
+
+        return np.concatenate(new_pc, 0), np.concatenate(new_label, 0)
+
+    def __call__(self, pc, class_label, instance_label):
+        if not self.__loaded__:
+            self.cut(pc, class_label, instance_label)
+            return None, None
+
+        return self.mix(pc, class_label)
+
+
+class SemanticKITTI(PCDataset):
+
+    CLASS_NAME = [
+        "car",              # 0
+        "bicycle",          # 1
+        "motorcycle",       # 2
+        "truck",            # 3
+        "other-vehicle",    # 4
+        "person",           # 5
+        "bicyclist",        # 6
+        "motorcyclist",     # 7
+        "road",             # 8
+        "parking",          # 9
+        "sidewalk",         # 10
+        "other-ground",     # 11
+        "building",         # 12
+        "fence",            # 13
+        "vegetation",       # 14
+        "trunk",            # 15
+        "terrain",          # 16
+        "pole",             # 17
+        "traffic-sign",     # 18
+    ]
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+        # Config file and class mapping
+        current_folder = os.path.dirname(os.path.realpath(__file__))
+        with open(os.path.join(current_folder, "semantic-kitti.yaml")) as stream:
+            semkittiyaml = yaml.safe_load(stream)
+        self.learning_map = semkittiyaml["learning_map"]
+
+        # Split
+        if self.phase == "train":
+            split = semkittiyaml["split"]["train"]
+        elif self.phase == "val":
+            split = semkittiyaml["split"]["valid"]
+        elif self.phase == "test":
+            split = semkittiyaml["split"]["test"]
+        elif self.phase == "trainval":
+            split = semkittiyaml["split"]["train"] + semkittiyaml["split"]["valid"]
+        else:
+            raise Exception(f"Unknown split {self.phase}")
+
+        # Find all files
+        self.im_idx = []
+        for i_folder in np.sort(split):
+            self.im_idx.extend(
+                glob(
+                    os.path.join(
+                        self.rootdir,
+                        "dataset",
+                        "sequences",
+                        str(i_folder).zfill(2),
+                        "velodyne",
+                        "*.bin",
+                    )
+                )
+            )
+        self.im_idx = np.sort(self.im_idx)
+
+        # Training with instance cutmix
+        if self.instance_cutmix:
+            assert (
+                self.phase != "test" and self.phase != "val"
+            ), "Instance cutmix should not be applied at test or val time"
+            self.cutmix = InstanceCutMix(phase=self.phase)
+            if not self.cutmix.test_loaded():
+                print("Extracting instances before training...")
+                for index in tqdm(range(len(self))):
+                    self.load_pc(index)
+                print("Done.")
+            assert self.cutmix.test_loaded(), "Instances not extracted correctly"
+
+    def __len__(self):
+        return len(self.im_idx)
+
+    def load_pc(self, index):
+        # Load point cloud
+        pc = np.fromfile(self.im_idx[index], dtype=np.float32).reshape((-1, 4))
+
+        # Extract Label
+        if self.phase == "test":
+            labels = np.zeros((pc.shape[0], 1), dtype=np.uint8)
+        else:
+            labels_inst = np.fromfile(
+                self.im_idx[index].replace("velodyne", "labels")[:-3] + "label",
+                dtype=np.uint32,
+            ).reshape((-1, 1))
+            labels = labels_inst & 0xFFFF  # delete high 16 digits binary
+            labels = np.vectorize(self.learning_map.__getitem__)(labels).astype(
+                np.int32
+            )
+
+        # Map ignore index (0) to 255
+        labels = labels[:, 0] - 1
+        labels[labels == -1] = 255
+
+        # Instance CutMix
+        if self.instance_cutmix:
+            pc, labels = self.cutmix(pc, labels, labels_inst[:, 0])
+
+        return pc, labels, self.im_idx[index]
diff --git a/eval_kitti.py b/eval_kitti.py
new file mode 100644
index 0000000..be172aa
--- /dev/null
+++ b/eval_kitti.py
@@ -0,0 +1,156 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import os
+import yaml
+import torch
+import argparse
+import numpy as np
+from tqdm import tqdm
+from waffleiron import Segmenter
+from torch.utils.data import DataLoader
+from datasets import SemanticKITTI, Collate
+
+
+if __name__ == "__main__":
+
+    # --- Arguments
+    parser = argparse.ArgumentParser(description="Evaluation")
+    parser.add_argument("--config", type=str, help="Path to config file")
+    parser.add_argument("--ckpt", type=str, help="Path to checkpoint")
+    parser.add_argument("--path_dataset", type=str, help="Path to SemanticKITTI dataset")
+    parser.add_argument("--result_folder", type=str, help="Path to where result folder")
+    parser.add_argument("--num_votes", type=int, default=1, help="Number of test time augmentations")
+    parser.add_argument("--batch_size", type=int, default=1, help="Batch size")
+    parser.add_argument("--num_workers", type=int, default=6)
+    parser.add_argument("--phase", required=True, help="val or test")
+    args = parser.parse_args()
+    assert args.num_votes % args.batch_size == 0
+    os.makedirs(args.result_folder, exist_ok=True)
+
+    # --- Load config file
+    with open(args.config, "r") as f:
+        config = yaml.safe_load(f)
+
+    # --- SemanticKITTI (from https://github.com/PRBonn/semantic-kitti-api/blob/master/remap_semantic_labels.py)
+    with open("./datasets/semantic-kitti.yaml") as stream:
+        semkittiyaml = yaml.safe_load(stream)
+    remapdict = semkittiyaml["learning_map_inv"]
+    maxkey = max(remapdict.keys())
+    remap_lut = np.zeros((maxkey + 100), dtype=np.int32)
+    remap_lut[list(remapdict.keys())] = list(remapdict.values())
+
+    # --- Dataloader
+    dataset = SemanticKITTI(
+        rootdir=args.path_dataset,
+        input_feat=config["embedding"]["input_feat"],
+        voxel_size=config["embedding"]["voxel_size"],
+        num_neighbors=config["embedding"]["neighbors"],
+        dim_proj=config["waffleiron"]["dim_proj"],
+        grids_shape=config["waffleiron"]["grids_size"],
+        fov_xyz=config["waffleiron"]["fov_xyz"],
+        phase=args.phase,
+        tta=(args.num_votes > 1),
+    )
+    if args.num_votes > 1:
+        new_list = []
+        for f in dataset.im_idx:
+            for v in range(args.num_votes):
+                new_list.append(f)
+        dataset.im_idx = new_list
+    loader = torch.utils.data.DataLoader(
+        dataset,
+        batch_size=args.batch_size,
+        shuffle=False,
+        num_workers=args.num_workers,
+        pin_memory=True,
+        drop_last=False,
+        collate_fn=Collate(),
+    )
+    args.num_votes = args.num_votes // args.batch_size
+
+    # --- Build network
+    net = Segmenter(
+        input_channels=config["embedding"]["size_input"],
+        feat_channels=config["waffleiron"]["nb_channels"],
+        depth=config["waffleiron"]["depth"],
+        grid_shape=config["waffleiron"]["grids_size"],
+        nb_class=config["classif"]["nb_class"],
+    )
+    net = net.cuda()
+
+    # --- Load weights
+    ckpt = torch.load(args.ckpt, map_location="cuda:0")
+    try:
+        net.load_state_dict(ckpt["net"])
+    except:
+        # If model was trained using DataParallel or DistributedDataParallel
+        state_dict = {}
+        for key in ckpt["net"].keys():
+            state_dict[key[len("module."):]] = ckpt["net"][key]
+        net.load_state_dict(state_dict)
+    net = net.eval()
+
+    # --- Evaluation
+    id_vote = 0
+    for it, batch in enumerate(tqdm(loader, bar_format="{desc:<5.5}{percentage:3.0f}%|{bar:50}{r_bar}")):
+
+        # Reset vote
+        if id_vote == 0:
+            vote = None
+
+        # Network inputs
+        feat = batch["feat"].cuda(non_blocking=True)
+        labels = batch["labels_orig"].cuda(non_blocking=True)
+        batch["upsample"] = [
+            up.cuda(non_blocking=True) for up in batch["upsample"]
+        ]
+        cell_ind = batch["cell_ind"].cuda(non_blocking=True)
+        occupied_cell = batch["occupied_cells"].cuda(non_blocking=True)
+        neighbors_emb = batch["neighbors_emb"].cuda(non_blocking=True)
+        net_inputs = (feat, cell_ind, occupied_cell, neighbors_emb)
+
+        # Get prediction
+        with torch.autocast("cuda", enabled=True):
+            with torch.inference_mode():
+                # Get prediction
+                out = net(*net_inputs)
+                for b in range(out.shape[0]):
+                    temp = out[b, :, batch["upsample"][b]].T
+                    if vote is None:
+                        vote = torch.softmax(temp, dim=1)
+                    else:
+                        vote += torch.softmax(temp, dim=1)
+        id_vote += 1
+
+        # Save prediction
+        if id_vote == args.num_votes:
+            # Convert label
+            pred_label = vote.max(1)[1] + 1 # Shift by 1 because of ignore_label at index 0
+            label = pred_label.cpu().numpy().reshape((-1)).astype(np.uint32)
+            upper_half = label >> 16  # get upper half for instances
+            lower_half = label & 0xFFFF  # get lower half for semantics
+            lower_half = remap_lut[lower_half]  # do the remapping of semantics
+            label = (upper_half << 16) + lower_half  # reconstruct full label
+            label = label.astype(np.uint32)
+            # Save result
+            assert batch["filename"][0] == batch["filename"][-1]
+            label_file = batch["filename"][0][len(dataset.rootdir) + len("/dataset"):]
+            label_file = label_file.replace("velodyne", "predictions")[:-3] + "label"
+            label_file = os.path.join(args.result_folder, label_file)
+            os.makedirs(os.path.split(label_file)[0], exist_ok=True)
+            label.tofile(label_file)
+            # Reset count of votes
+            id_vote = 0
\ No newline at end of file
diff --git a/eval_nuscenes.py b/eval_nuscenes.py
new file mode 100644
index 0000000..676b38b
--- /dev/null
+++ b/eval_nuscenes.py
@@ -0,0 +1,139 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import os
+import torch
+import argparse
+import numpy as np
+from tqdm import tqdm
+from waffleiron import Segmenter
+from torch.utils.data import DataLoader
+from datasets import NuScenesSemSeg, Collate
+
+
+if __name__ == "__main__":
+
+    # --- Arguments
+    parser = argparse.ArgumentParser(description="Evaluation")
+    parser.add_argument("--config", type=str, help="Path to config file")
+    parser.add_argument("--ckpt", type=str, help="Path to checkpoint")
+    parser.add_argument("--path_dataset", type=str, help="Path to SemanticKITTI dataset")
+    parser.add_argument("--result_folder", type=str, help="Path to where result folder")
+    parser.add_argument("--num_votes", type=int, default=1, help="Number of test time augmentations")
+    parser.add_argument("--batch_size", type=int, default=1, help="Batch size")
+    parser.add_argument("--num_workers", type=int, default=6)
+    parser.add_argument("--phase", required=True, help="val or test")
+    args = parser.parse_args()
+    assert args.num_votes % args.batch_size == 0
+    args.result_folder = os.path.join(args.result_folder, "lidarseg", args.phase)
+    os.makedirs(args.result_folder, exist_ok=True)
+
+    # --- Load config file
+    import yaml
+    with open(args.config, "r") as f:
+        config = yaml.safe_load(f)
+
+    # --- Dataloader
+    dataset = NuScenesSemSeg(
+        rootdir=args.path_dataset,
+        input_feat=config["embedding"]["input_feat"],
+        voxel_size=config["embedding"]["voxel_size"],
+        num_neighbors=config["embedding"]["neighbors"],
+        dim_proj=config["waffleiron"]["dim_proj"],
+        grids_shape=config["waffleiron"]["grids_size"],
+        fov_xyz=config["waffleiron"]["fov_xyz"],
+        phase=args.phase,
+        tta=(args.num_votes > 1),
+    )
+    if args.num_votes > 1:
+        new_list = []
+        for f in dataset.list_frames:
+            for v in range(args.num_votes):
+                new_list.append(f)
+        dataset.list_frames = new_list
+    loader = torch.utils.data.DataLoader(
+        dataset,
+        batch_size=args.batch_size,
+        shuffle=False,
+        num_workers=args.num_workers,
+        pin_memory=True,
+        drop_last=False,
+        collate_fn=Collate(),
+    )
+    args.num_votes = args.num_votes // args.batch_size
+
+    # --- Build network
+    net = Segmenter(
+        input_channels=config["embedding"]["size_input"],
+        feat_channels=config["waffleiron"]["nb_channels"],
+        depth=config["waffleiron"]["depth"],
+        grid_shape=config["waffleiron"]["grids_size"],
+        nb_class=config["classif"]["nb_class"],
+    )
+    net = net.cuda()
+
+    # --- Load weights
+    ckpt = torch.load(args.ckpt, map_location="cuda:0")
+    try:
+        net.load_state_dict(ckpt["net"])
+    except:
+        # If model was trained using DataParallel or DistributedDataParallel
+        state_dict = {}
+        for key in ckpt["net"].keys():
+            state_dict[key[len("module."):]] = ckpt["net"][key]
+        net.load_state_dict(state_dict)
+    net = net.eval()
+
+    # --- Evaluation
+    id_vote = 0
+    for it, batch in enumerate(tqdm(loader, bar_format="{desc:<5.5}{percentage:3.0f}%|{bar:50}{r_bar}")):
+
+        # Reset vote
+        if id_vote == 0:
+            vote = None
+
+        # Network inputs
+        feat = batch["feat"].cuda(non_blocking=True)
+        labels = batch["labels_orig"].cuda(non_blocking=True)
+        batch["upsample"] = [
+            up.cuda(non_blocking=True) for up in batch["upsample"]
+        ]
+        cell_ind = batch["cell_ind"].cuda(non_blocking=True)
+        occupied_cell = batch["occupied_cells"].cuda(non_blocking=True)
+        neighbors_emb = batch["neighbors_emb"].cuda(non_blocking=True)
+        net_inputs = (feat, cell_ind, occupied_cell, neighbors_emb)
+
+        # Get prediction
+        with torch.autocast("cuda", enabled=True):
+            with torch.inference_mode():
+                # Get prediction
+                out = net(*net_inputs)
+                for b in range(out.shape[0]):
+                    temp = out[b, :, batch["upsample"][b]].T
+                    if vote is None:
+                        vote = torch.softmax(temp, dim=1)
+                    else:
+                        vote += torch.softmax(temp, dim=1)
+        id_vote += 1
+
+        # Save prediction
+        if id_vote == args.num_votes:
+            # Get label
+            pred_label = vote.max(1)[1] + 1 # Shift by 1 because of ignore_label at index 0
+            # Save result
+            bin_file_path = os.path.join(args.result_folder, batch["filename"][0] + "_lidarseg.bin")
+            np.array(pred_label.cpu().numpy()).astype(np.uint8).tofile(bin_file_path)
+            # Reset count of votes
+            id_vote = 0
\ No newline at end of file
diff --git a/illustration.png b/illustration.png
new file mode 100755
index 0000000..7c6ba7d
Binary files /dev/null and b/illustration.png differ
diff --git a/launch_train.py b/launch_train.py
new file mode 100644
index 0000000..935aeca
--- /dev/null
+++ b/launch_train.py
@@ -0,0 +1,374 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import os
+import yaml
+import torch
+import random
+import warnings
+import argparse
+import numpy as np
+import utils.transforms as tr
+from utils.metrics import SemSegLoss
+from utils.scheduler import WarmupCosine
+from utils.trainer import TrainingManager
+from waffleiron.segmenter import Segmenter
+from datasets import LIST_DATASETS, Collate
+
+
+def load_model_config(file):
+    with open(file, "r") as f:
+        config = yaml.safe_load(f)
+    return config
+
+
+def get_train_augmentations(config):
+
+    list_of_transf = []
+
+    # Two transformations shared across all datasets
+    list_of_transf.append(
+        tr.LimitNumPoints(
+            dims=(0, 1, 2),
+            max_point=config["dataloader"]["max_points"],
+            random=True,
+        )
+    )
+
+    # Optional augmentations
+    for aug_name in config["augmentations"].keys():
+        if aug_name == "rotation_z":
+            list_of_transf.append(tr.Rotation(inplace=True, dim=2))
+        elif aug_name == "flip_xy":
+            list_of_transf.append(tr.RandomApply(tr.FlipXY(inplace=True), prob=2 / 3))
+        elif aug_name == "scale":
+            dims = config["augmentations"]["scale"][0]
+            scale = config["augmentations"]["scale"][1]
+            list_of_transf.append(tr.Scale(inplace=True, dims=dims, range=scale))
+        elif aug_name == "instance_cutmix":
+            # Do nothing here, directly handled in semantic kitti dataset
+            continue
+        else:
+            raise ValueError("Unknown transformation")
+
+    print("List of transformations:", list_of_transf)
+
+    return tr.Compose(list_of_transf)
+
+
+def get_datasets(config, args):
+
+    # Shared parameters
+    kwargs = {
+        "rootdir": os.path.join("/datasets_local/", args.path_dataset),
+        "input_feat": config["embedding"]["input_feat"],
+        "voxel_size": config["embedding"]["voxel_size"],
+        "num_neighbors": config["embedding"]["neighbors"],
+        "dim_proj": config["waffleiron"]["dim_proj"],
+        "grids_shape": config["waffleiron"]["grids_size"],
+        "fov_xyz": config["waffleiron"]["fov_xyz"],
+    }
+
+    # Get datatset
+    DATASET = LIST_DATASETS.get(args.dataset.lower())
+    if DATASET is None:
+        raise ValueError(f"Dataset {args.dataset.lower()} not available.")
+
+    # Train dataset
+    train_dataset = DATASET(
+        phase="trainval" if args.trainval else "train",
+        train_augmentations=get_train_augmentations(config),
+        instance_cutmix=config["augmentations"]["instance_cutmix"],
+        **kwargs,
+    )
+
+    # Validation dataset
+    val_dataset = DATASET(
+        phase="val",
+        **kwargs,
+    )
+
+    return train_dataset, val_dataset
+
+
+def get_dataloader(train_dataset, val_dataset, args):
+
+    if args.distributed:
+        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
+        val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)
+    else:
+        train_sampler = None
+        val_sampler = None
+
+    train_loader = torch.utils.data.DataLoader(
+        train_dataset,
+        batch_size=args.batch_size,
+        shuffle=(train_sampler is None),
+        num_workers=args.workers,
+        pin_memory=True,
+        sampler=train_sampler,
+        drop_last=True,
+        collate_fn=Collate(),
+    )
+    val_loader = torch.utils.data.DataLoader(
+        val_dataset,
+        batch_size=args.batch_size,
+        shuffle=False,
+        num_workers=args.workers,
+        pin_memory=True,
+        sampler=val_sampler,
+        drop_last=False,
+        collate_fn=Collate(),
+    )
+
+    return train_loader, val_loader, train_sampler
+
+
+def get_optimizer(parameters, config):
+    return torch.optim.AdamW(
+        parameters,
+        lr=config["optim"]["lr"],
+        weight_decay=config["optim"]["weight_decay"],
+    )
+
+
+def get_scheduler(optimizer, config, len_train_loader):
+    scheduler = torch.optim.lr_scheduler.LambdaLR(
+        optimizer,
+        WarmupCosine(
+            config["scheduler"]["epoch_warmup"] * len_train_loader,
+            config["scheduler"]["max_epoch"] * len_train_loader,
+            config["scheduler"]["min_lr"] / config["optim"]["lr"],
+        ),
+    )
+    return scheduler
+
+
+def distributed_training(gpu, ngpus_per_node, args, config):
+
+    # --- Init. distributing training
+    args.gpu = gpu
+    if args.gpu is not None:
+        print(f"Use GPU: {args.gpu} for training")
+    if args.distributed:
+        args.rank = args.rank * ngpus_per_node + gpu
+        torch.distributed.init_process_group(
+            backend=args.dist_backend,
+            init_method=args.dist_url,
+            world_size=args.world_size,
+            rank=args.rank,
+        )
+
+    # --- Build network
+    model = Segmenter(
+        input_channels=config["embedding"]["size_input"],
+        feat_channels=config["waffleiron"]["nb_channels"],
+        depth=config["waffleiron"]["depth"],
+        grid_shape=config["waffleiron"]["grids_size"],
+        nb_class=config["classif"]["nb_class"],
+    )
+
+    # ---
+    args.batch_size = config["dataloader"]["batch_size"]
+    args.workers = config["dataloader"]["num_workers"]
+    if args.distributed:
+        # For multiprocessing distributed, DistributedDataParallel constructor
+        # should always set the single device scope, otherwise,
+        # DistributedDataParallel will use all available devices.
+        torch.cuda.set_device(args.gpu)
+        model.cuda(args.gpu)
+        # When using a single GPU per process and per
+        # DistributedDataParallel, we need to divide the batch size
+        # ourselves based on the total number of GPUs of the current node.
+        args.batch_size = int(config["dataloader"]["batch_size"] / ngpus_per_node)
+        args.workers = int(
+            (config["dataloader"]["num_workers"] + ngpus_per_node - 1) / ngpus_per_node
+        )
+        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
+        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
+    elif args.gpu is not None:
+        # Training on one GPU
+        torch.cuda.set_device(args.gpu)
+        model = model.cuda(args.gpu)
+    else:
+        # DataParallel will divide and allocate batch_size to all available GPUs
+        model = torch.nn.DataParallel(model).cuda()
+    if args.gpu == 0 or args.gpu is None:
+        print(f"Model:\n{model}")
+        nb_param = sum([p.numel() for p in model.parameters()]) / 1e6
+        print(f"{nb_param} x 10^6 trainable parameters ")
+
+    # --- Optimizer
+    optim = get_optimizer(model.parameters(), config)
+
+    # --- Dataset
+    train_dataset, val_dataset = get_datasets(config, args)
+    train_loader, val_loader, train_sampler = get_dataloader(
+        train_dataset, val_dataset, args
+    )
+
+    # --- Loss function
+    loss = SemSegLoss(
+        config["classif"]["nb_class"],
+        lovasz_weight=config["loss"]["lovasz"],
+    ).cuda(args.gpu)
+
+    # --- Sets the learning rate to the initial LR decayed by 10 every 30 epochs
+    scheduler = get_scheduler(optim, config, len(train_loader))
+
+    # --- Training
+    mng = TrainingManager(
+        model,
+        loss,
+        train_loader,
+        val_loader,
+        train_sampler,
+        optim,
+        scheduler,
+        config["scheduler"]["max_epoch"],
+        args.log_path,
+        args.gpu,
+        args.world_size,
+        args.fp16,
+        LIST_DATASETS.get(args.dataset.lower()).CLASS_NAME,
+        tensorboard=(not args.eval)
+    )
+    if args.restart:
+        mng.load_state()
+    if args.eval:
+        mng.one_epoch(training=False)
+    else:
+        mng.train()
+
+
+def main(args, config):
+
+    # --- Fixed args
+    # Device
+    args.device = "cuda"
+    # Node rank for distributed training
+    args.rank = 0
+    # Number of nodes for distributed training'
+    args.world_size = 1
+    # URL used to set up distributed training
+    args.dist_url = "tcp://127.0.0.1:4444"
+    # Distributed backend'
+    args.dist_backend = "nccl"
+    # Distributed processing
+    args.distributed = args.multiprocessing_distributed
+
+    # Create log directory
+    os.makedirs(args.log_path, exist_ok=True)
+    if args.seed is not None:
+        random.seed(args.seed)
+        np.random.seed(args.seed)
+        torch.manual_seed(args.seed)
+        torch.cuda.manual_seed(args.seed)
+        os.environ["PYTHONHASHSEED"] = str(args.seed)
+
+    if args.gpu is not None:
+        args.gpu = 0
+        args.distributed = False
+        args.multiprocessing_distributed = False
+        warnings.warn(
+            "You have chosen a specific GPU. This will completely disable data parallelism."
+        )
+
+    # Extract instances for cutmix
+    if config["augmentations"]["instance_cutmix"]:
+        get_datasets(config, args)
+
+    ngpus_per_node = torch.cuda.device_count()
+    if args.multiprocessing_distributed:
+        # Since we have ngpus_per_node processes per node, the total world_size
+        # needs to be adjusted accordingly
+        args.world_size = ngpus_per_node * args.world_size
+        # Use torch.multiprocessing.spawn to launch distributed processes: the
+        # main_worker process function
+        torch.multiprocessing.spawn(
+            distributed_training,
+            nprocs=ngpus_per_node,
+            args=(ngpus_per_node, args, config),
+        )
+    else:
+        # Simply call main_worker function
+        distributed_training(args.gpu, ngpus_per_node, args, config)
+
+
+def get_default_parser():
+    parser = argparse.ArgumentParser(description="Training")
+    parser.add_argument(
+        "--dataset",
+        type=str,
+        help="Path to dataset",
+        default="nuscenes",
+    )
+    parser.add_argument(
+        "--path_dataset",
+        type=str,
+        help="Path to dataset",
+        default="/datasets_local/nuscenes/",
+    )
+    parser.add_argument(
+        "--log_path", type=str, required=True, help="Path to log folder"
+    )
+    parser.add_argument(
+        "-r", "--restart", action="store_true", default=False, help="Restart training"
+    )
+    parser.add_argument(
+        "--seed", default=None, type=int, help="Seed for initializing training"
+    )
+    parser.add_argument(
+        "--gpu", default=None, type=int, help="Set to any number to use gpu 0"
+    )
+    parser.add_argument(
+        "--multiprocessing-distributed",
+        action="store_true",
+        help="Use multi-processing distributed training to launch "
+        "N processes per node, which has N GPUs. This is the "
+        "fastest way to use PyTorch for either single node or "
+        "multi node data parallel training",
+    )
+    parser.add_argument(
+        "--fp16",
+        action="store_true",
+        default=False,
+        help="Enable autocast for mix precision training",
+    )
+    parser.add_argument(
+        "--config", type=str, required=True, help="Path to model config"
+    )
+    parser.add_argument(
+        "--trainval",
+        action="store_true",
+        default=False,
+        help="Use train + val as train set",
+    )
+    parser.add_argument(
+        "--eval",
+        action="store_true",
+        default=False,
+        help="Run validation only",
+    )
+
+    return parser
+
+
+if __name__ == "__main__":
+
+    parser = get_default_parser()
+    args = parser.parse_args()
+    config = load_model_config(args.config)
+    main(args, config)
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..0fe28c7
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,19 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from setuptools import setup
+from setuptools import find_packages
+
+setup(name="waffleiron", packages=find_packages())
diff --git a/utils/__init__.py b/utils/__init__.py
new file mode 100644
index 0000000..f78a20f
--- /dev/null
+++ b/utils/__init__.py
@@ -0,0 +1,15 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
diff --git a/utils/lovasz.py b/utils/lovasz.py
new file mode 100755
index 0000000..5adc74b
--- /dev/null
+++ b/utils/lovasz.py
@@ -0,0 +1,346 @@
+"""
+Lovasz-Softmax and Jaccard hinge loss in PyTorch
+Maxim Berman 2018 ESAT-PSI KU Leuven (MIT License)
+Code downloaded from:
+https://github.com/edwardzhou130/PolarSeg/blob/master/network/lovasz_losses.py
+"""
+
+
+import torch
+from torch.autograd import Variable
+import torch.nn.functional as F
+import numpy as np
+
+try:
+    from itertools import ifilterfalse
+except ImportError:  # py3k
+    from itertools import filterfalse as ifilterfalse
+
+
+def lovasz_grad(gt_sorted):
+    """
+    Computes gradient of the Lovasz extension w.r.t sorted errors
+    See Alg. 1 in paper
+    """
+    p = len(gt_sorted)
+    gts = gt_sorted.sum()
+    intersection = gts - gt_sorted.float().cumsum(0)
+    union = gts + (1 - gt_sorted).float().cumsum(0)
+    jaccard = 1.0 - intersection / union
+    if p > 1:  # cover 1-pixel case
+        jaccard[1:p] = jaccard[1:p] - jaccard[0:-1]
+    return jaccard
+
+
+def iou_binary(preds, labels, EMPTY=1.0, ignore=None, per_image=True):
+    """
+    IoU for foreground class
+    binary: 1 foreground, 0 background
+    """
+    if not per_image:
+        preds, labels = (preds,), (labels,)
+    ious = []
+    for pred, label in zip(preds, labels):
+        intersection = ((label == 1) & (pred == 1)).sum()
+        union = ((label == 1) | ((pred == 1) & (label != ignore))).sum()
+        if not union:
+            iou = EMPTY
+        else:
+            iou = float(intersection) / float(union)
+        ious.append(iou)
+    iou = mean(ious)  # mean accross images if per_image
+    return 100 * iou
+
+
+def iou(preds, labels, C, EMPTY=1.0, ignore=None, per_image=False):
+    """
+    Array of IoU for each (non ignored) class
+    """
+    if not per_image:
+        preds, labels = (preds,), (labels,)
+    ious = []
+    for pred, label in zip(preds, labels):
+        iou = []
+        for i in range(C):
+            if (
+                i != ignore
+            ):  # The ignored label is sometimes among predicted classes (ENet - CityScapes)
+                intersection = ((label == i) & (pred == i)).sum()
+                union = ((label == i) | ((pred == i) & (label != ignore))).sum()
+                if not union:
+                    iou.append(EMPTY)
+                else:
+                    iou.append(float(intersection) / float(union))
+        ious.append(iou)
+    ious = [mean(iou) for iou in zip(*ious)]  # mean accross images if per_image
+    return 100 * np.array(ious)
+
+
+# --------------------------- BINARY LOSSES ---------------------------
+
+
+def lovasz_hinge(logits, labels, per_image=True, ignore=None):
+    r"""
+    Binary Lovasz hinge loss
+      logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty)
+      labels: [B, H, W] Tensor, binary ground truth masks (0 or 1)
+      per_image: compute the loss per image instead of per batch
+      ignore: void class id
+    """
+    if per_image:
+        loss = mean(
+            lovasz_hinge_flat(
+                *flatten_binary_scores(log.unsqueeze(0), lab.unsqueeze(0), ignore)
+            )
+            for log, lab in zip(logits, labels)
+        )
+    else:
+        loss = lovasz_hinge_flat(*flatten_binary_scores(logits, labels, ignore))
+    return loss
+
+
+def lovasz_hinge_flat(logits, labels):
+    r"""
+    Binary Lovasz hinge loss
+      logits: [P] Variable, logits at each prediction (between -\infty and +\infty)
+      labels: [P] Tensor, binary ground truth labels (0 or 1)
+      ignore: label to ignore
+    """
+    if len(labels) == 0:
+        # only void pixels, the gradients should be 0
+        return logits.sum() * 0.0
+    signs = 2.0 * labels.float() - 1.0
+    errors = 1.0 - logits * Variable(signs)
+    errors_sorted, perm = torch.sort(errors, dim=0, descending=True)
+    perm = perm.data
+    gt_sorted = labels[perm]
+    grad = lovasz_grad(gt_sorted)
+    loss = torch.dot(F.relu(errors_sorted), Variable(grad))
+    return loss
+
+
+def flatten_binary_scores(scores, labels, ignore=None):
+    """
+    Flattens predictions in the batch (binary case)
+    Remove labels equal to 'ignore'
+    """
+    scores = scores.view(-1)
+    labels = labels.view(-1)
+    if ignore is None:
+        return scores, labels
+    valid = labels != ignore
+    vscores = scores[valid]
+    vlabels = labels[valid]
+    return vscores, vlabels
+
+
+class StableBCELoss(torch.nn.modules.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input, target):
+        neg_abs = -input.abs()
+        loss = input.clamp(min=0) - input * target + (1 + neg_abs.exp()).log()
+        return loss.mean()
+
+
+def binary_xloss(logits, labels, ignore=None):
+    r"""
+    Binary Cross entropy loss
+      logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty)
+      labels: [B, H, W] Tensor, binary ground truth masks (0 or 1)
+      ignore: void class id
+    """
+    logits, labels = flatten_binary_scores(logits, labels, ignore)
+    loss = StableBCELoss()(logits, Variable(labels.float()))
+    return loss
+
+
+# --------------------------- MULTICLASS LOSSES ---------------------------
+
+
+def lovasz_softmax(probas, labels, classes="present", per_image=False, ignore=None):
+    """
+    Multi-class Lovasz-Softmax loss
+      probas: [B, C, H, W] Variable, class probabilities at each prediction (between 0 and 1).
+              Interpreted as binary (sigmoid) output with outputs of size [B, H, W].
+      labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1)
+      classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.
+      per_image: compute the loss per image instead of per batch
+      ignore: void class labels
+    """
+    if per_image:
+        loss = mean(
+            lovasz_softmax_flat(
+                *flatten_probas(prob.unsqueeze(0), lab.unsqueeze(0), ignore),
+                classes=classes
+            )
+            for prob, lab in zip(probas, labels)
+        )
+    else:
+        loss = lovasz_softmax_flat(
+            *flatten_probas(probas, labels, ignore), classes=classes
+        )
+    return loss
+
+
+def lovasz_softmax_flat(probas, labels, classes="present"):
+    """
+    Multi-class Lovasz-Softmax loss
+      probas: [P, C] Variable, class probabilities at each prediction (between 0 and 1)
+      labels: [P] Tensor, ground truth labels (between 0 and C - 1)
+      classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.
+    """
+    if probas.numel() == 0:
+        # only void pixels, the gradients should be 0
+        return probas * 0.0
+    C = probas.size(1)
+    losses = []
+    class_to_sum = list(range(C)) if classes in ["all", "present"] else classes
+    for c in class_to_sum:
+        fg = (labels == c).float()  # foreground for class c
+        if classes == "present" and fg.sum() == 0:
+            continue
+        if C == 1:
+            if len(classes) > 1:
+                raise ValueError("Sigmoid output possible only with 1 class")
+            class_pred = probas[:, 0]
+        else:
+            class_pred = probas[:, c]
+        errors = (Variable(fg) - class_pred).abs()
+        errors_sorted, perm = torch.sort(errors, 0, descending=True)
+        perm = perm.data
+        fg_sorted = fg[perm]
+        losses.append(torch.dot(errors_sorted, Variable(lovasz_grad(fg_sorted))))
+    return mean(losses)
+
+
+def flatten_probas(probas, labels, ignore=None):
+    """
+    Flattens predictions in the batch
+    """
+    if probas.dim() == 3:
+        # assumes output of a sigmoid layer
+        B, H, W = probas.size()
+        probas = probas.view(B, 1, H, W)
+    elif probas.dim() == 5:
+        # 3D segmentation
+        B, C, L, H, W = probas.size()
+        probas = probas.contiguous().view(B, C, L, H * W)
+    B, C, H, W = probas.size()
+    probas = probas.permute(0, 2, 3, 1).contiguous().view(-1, C)  # B * H * W, C = P, C
+    labels = labels.view(-1)
+    if ignore is None:
+        return probas, labels
+    valid = labels != ignore
+    vprobas = probas[valid.nonzero().squeeze()]
+    vlabels = labels[valid]
+    return vprobas, vlabels
+
+
+def xloss(logits, labels, ignore=None):
+    """
+    Cross entropy loss
+    """
+    return F.cross_entropy(logits, Variable(labels), ignore_index=255)
+
+
+def jaccard_loss(probas, labels, ignore=None, smooth=100, bk_class=None):
+    """
+    Something wrong with this loss
+    Multi-class Lovasz-Softmax loss
+      probas: [B, C, H, W] Variable, class probabilities at each prediction (between 0 and 1).
+              Interpreted as binary (sigmoid) output with outputs of size [B, H, W].
+      labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1)
+      classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.
+      per_image: compute the loss per image instead of per batch
+      ignore: void class labels
+    """
+    vprobas, vlabels = flatten_probas(probas, labels, ignore)
+
+    true_1_hot = torch.eye(vprobas.shape[1])[vlabels]
+
+    if bk_class:
+        one_hot_assignment = torch.ones_like(vlabels)
+        one_hot_assignment[vlabels == bk_class] = 0
+        one_hot_assignment = one_hot_assignment.float().unsqueeze(1)
+        true_1_hot = true_1_hot * one_hot_assignment
+
+    true_1_hot = true_1_hot.to(vprobas.device)
+    intersection = torch.sum(vprobas * true_1_hot)
+    cardinality = torch.sum(vprobas + true_1_hot)
+    loss = (intersection + smooth / (cardinality - intersection + smooth)).mean()
+    return (1 - loss) * smooth
+
+
+def hinge_jaccard_loss(
+    probas, labels, ignore=None, classes="present", hinge=0.1, smooth=100
+):
+    """
+    Multi-class Hinge Jaccard loss
+      probas: [B, C, H, W] Variable, class probabilities at each prediction (between 0 and 1).
+              Interpreted as binary (sigmoid) output with outputs of size [B, H, W].
+      labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1)
+      classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.
+      ignore: void class labels
+    """
+    vprobas, vlabels = flatten_probas(probas, labels, ignore)
+    C = vprobas.size(1)
+    losses = []
+    class_to_sum = list(range(C)) if classes in ["all", "present"] else classes
+    for c in class_to_sum:
+        if c in vlabels:
+            c_sample_ind = vlabels == c
+            cprobas = vprobas[c_sample_ind, :]
+            non_c_ind = np.array([a for a in class_to_sum if a != c])
+            class_pred = cprobas[:, c]
+            max_non_class_pred = torch.max(cprobas[:, non_c_ind], dim=1)[0]
+            TP = (
+                torch.sum(torch.clamp(class_pred - max_non_class_pred, max=hinge) + 1.0)
+                + smooth
+            )
+            FN = torch.sum(
+                torch.clamp(max_non_class_pred - class_pred, min=-hinge) + hinge
+            )
+
+            if (~c_sample_ind).sum() == 0:
+                FP = 0
+            else:
+                nonc_probas = vprobas[~c_sample_ind, :]
+                class_pred = nonc_probas[:, c]
+                max_non_class_pred = torch.max(nonc_probas[:, non_c_ind], dim=1)[0]
+                FP = torch.sum(
+                    torch.clamp(class_pred - max_non_class_pred, max=hinge) + 1.0
+                )
+
+            losses.append(1 - TP / (TP + FP + FN))
+
+    if len(losses) == 0:
+        return 0
+    return mean(losses)
+
+
+# --------------------------- HELPER FUNCTIONS ---------------------------
+def isnan(x):
+    return x != x
+
+
+def mean(l, ignore_nan=False, empty=0):
+    """
+    nanmean compatible with generators.
+    """
+    l = iter(l)
+    if ignore_nan:
+        l = ifilterfalse(isnan, l)
+    try:
+        n = 1
+        acc = next(l)
+    except StopIteration:
+        if empty == "raise":
+            raise ValueError("Empty mean")
+        return empty
+    for n, v in enumerate(l, 2):
+        acc += v
+    if n == 1:
+        return acc
+    return acc / n
diff --git a/utils/metrics.py b/utils/metrics.py
new file mode 100644
index 0000000..7821e4f
--- /dev/null
+++ b/utils/metrics.py
@@ -0,0 +1,63 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import torch
+import numpy as np
+from .lovasz import lovasz_softmax_flat
+from torch.nn.functional import softmax
+from torch.nn import Module, CrossEntropyLoss
+
+
+def fast_hist(pred, label, n):
+    assert torch.all(label > -1) & torch.all(pred > -1)
+    assert torch.all(label < n) & torch.all(pred < n)
+    return torch.bincount(n * label + pred, minlength=n**2).reshape(n, n)
+
+
+def per_class_iu(hist):
+    with np.errstate(divide="ignore", invalid="ignore"):
+        return np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))
+
+
+def overall_accuracy(hist):
+    with np.errstate(divide="ignore", invalid="ignore"):
+        return np.diag(hist).sum() / hist.sum()
+
+
+def per_class_accuracy(hist):
+    with np.errstate(divide="ignore", invalid="ignore"):
+        return np.diag(hist) / hist.sum(1)
+
+
+class SemSegLoss(Module):
+    def __init__(self, nb_class, lovasz_weight=1.0, ignore_index=255):
+        super().__init__()
+        self.nb_class = nb_class
+        self.ignore_index = ignore_index
+        self.lovasz_weight = lovasz_weight
+        self.ce = CrossEntropyLoss(ignore_index=ignore_index)
+
+    def __call__(self, pred, true):
+        loss = self.ce(pred, true)
+
+        if self.lovasz_weight > 0:
+            where = true != self.ignore_index
+            if where.sum() > 0:
+                loss += self.lovasz_weight * lovasz_softmax_flat(
+                    softmax(pred[where], dim=1),
+                    true[where],
+                )
+
+        return loss
diff --git a/utils/scheduler.py b/utils/scheduler.py
new file mode 100644
index 0000000..4b50196
--- /dev/null
+++ b/utils/scheduler.py
@@ -0,0 +1,33 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import numpy as np
+
+
+class WarmupCosine:
+    def __init__(self, warmup_end, max_iter, factor_min):
+        self.max_iter = max_iter
+        self.warmup_end = warmup_end
+        self.factor_min = factor_min
+
+    def __call__(self, iter):
+        if iter < self.warmup_end:
+            factor = iter / self.warmup_end
+        else:
+            iter = iter - self.warmup_end
+            max_iter = self.max_iter - self.warmup_end
+            iter = (iter / max_iter) * np.pi
+            factor = self.factor_min + 0.5 * (1 - self.factor_min) * (np.cos(iter) + 1)
+        return factor
diff --git a/utils/trainer.py b/utils/trainer.py
new file mode 100644
index 0000000..e73e61d
--- /dev/null
+++ b/utils/trainer.py
@@ -0,0 +1,306 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import torch
+import warnings
+import numpy as np
+from tqdm import tqdm
+from torch.cuda.amp import GradScaler
+from torch.utils.tensorboard import SummaryWriter
+from utils.metrics import overall_accuracy, fast_hist, per_class_iu, per_class_accuracy
+
+
+class TrainingManager:
+    def __init__(
+        self,
+        net,
+        loss,
+        loader_train,
+        loader_val,
+        train_sampler,  # If provided, we assume distributed training
+        optim,
+        scheduler,
+        max_epoch,
+        path,
+        rank,
+        world_size,
+        fp16=True,
+        class_names=None,
+        tensorboard=True,
+    ):
+
+        # Optim. methods
+        self.optim = optim
+        self.fp16 = fp16
+        self.scaler = GradScaler() if fp16 else None
+        self.scheduler = scheduler
+
+        # Dataloaders
+        self.max_epoch = max_epoch
+        self.loader_train = loader_train
+        self.loader_val = loader_val
+        self.train_sampler = train_sampler
+        self.class_names = class_names
+
+        # Network
+        self.net = net
+        self.rank = rank
+        self.world_size = world_size
+        print(f"Trainer on gpu: {self.rank}. World size:{self.world_size}.")
+
+        # Loss
+        self.loss = loss
+
+        # Checkpoints
+        self.best_miou = 0
+        self.current_epoch = 0
+        self.path_to_ckpt = path
+
+        # Monitoring
+        if tensorboard and (self.rank == 0 or self.rank is None):
+            self.writer_train = SummaryWriter(
+                path + "/tensorboard/train/",
+                purge_step=self.current_epoch * len(self.loader_train),
+                flush_secs=30,
+            )
+            self.writer_val = SummaryWriter(
+                path + "/tensorboard/val/",
+                purge_step=self.current_epoch,
+                flush_secs=30,
+            )
+        else:
+            self.writer_val = None
+            self.writer_train = None
+
+    def print_log(self, running_loss, oAcc, mAcc, mIoU, ious):
+        if self.rank == 0 or self.rank is None:
+            # Global score
+            log = (
+                f"\nEpoch: {self.current_epoch:d} :\n"
+                + f" Loss = {running_loss:.3f}"
+                + f" - oAcc = {oAcc:.1f}"
+                + f" - mAcc = {mAcc:.1f}"
+                + f" - mIoU = {mIoU:.1f}"
+            )
+            print(log)
+            # Per class score
+            log = ""
+            for i, s in enumerate(ious):
+                if self.class_names is None:
+                    log += f"Class {i}: {100 * s:.1f} - "
+                else:
+                    log += f"{self.class_names[i]}: {100 * s:.1f} - "
+            print(log[:-3])
+            # Recall best mIoU
+            print(f"Best mIoU was {self.best_miou:.1f}.")
+
+    def gather_scores(self, list_tensors):
+        if self.rank == 0:
+            tensor_reduced = [
+                [torch.empty_like(t) for _ in range(self.world_size)]
+                for t in list_tensors
+            ]
+            for t, t_reduced in zip(list_tensors, tensor_reduced):
+                torch.distributed.gather(t, t_reduced)
+            tensor_reduced = [sum(t).cpu() for t in tensor_reduced]
+            return tensor_reduced
+        else:
+            for t in list_tensors:
+                torch.distributed.gather(t)
+
+    def one_epoch(self, training=True):
+
+        # Train or eval mode
+        if training:
+            net = self.net.train()
+            loader = self.loader_train
+            if self.rank == 0 or self.rank is None:
+                print("\nTraining: %d/%d epochs" % (self.current_epoch, self.max_epoch))
+            writer = self.writer_train
+            if self.train_sampler is not None:
+                self.train_sampler.set_epoch(self.current_epoch)
+        else:
+            net = self.net.eval()
+            loader = self.loader_val
+            if self.rank == 0 or self.rank is None:
+                print(
+                    "\nValidation: %d/%d epochs" % (self.current_epoch, self.max_epoch)
+                )
+            writer = self.writer_val
+        print_freq = np.max((len(loader) // 10, 1))
+
+        # Stat.
+        running_loss = 0.0
+        confusion_matrix = 0
+
+        # Loop over mini-batches
+        if self.rank == 0 or self.rank is None:
+            bar_format = "{desc:<5.5}{percentage:3.0f}%|{bar:50}{r_bar}"
+            loader = tqdm(loader, bar_format=bar_format)
+        for it, batch in enumerate(loader):
+
+            # Network inputs
+            feat = batch["feat"].cuda(self.rank, non_blocking=True)
+            labels = batch["labels_orig"].cuda(self.rank, non_blocking=True)
+            batch["upsample"] = [
+                up.cuda(self.rank, non_blocking=True) for up in batch["upsample"]
+            ]
+            cell_ind = batch["cell_ind"].cuda(self.rank, non_blocking=True)
+            occupied_cell = batch["occupied_cells"].cuda(self.rank, non_blocking=True)
+            neighbors_emb = batch["neighbors_emb"].cuda(self.rank, non_blocking=True)
+            net_inputs = (feat, cell_ind, occupied_cell, neighbors_emb)
+
+            # Get prediction and loss
+            with torch.autocast("cuda", enabled=self.fp16):
+                # Logits
+                if training:
+                    out = net(*net_inputs)
+                else:
+                    with torch.no_grad():
+                        out = net(*net_inputs)
+                # Upsample to original resolution
+                out_upsample = []
+                for id_b, closest_point in enumerate(batch["upsample"]):
+                    temp = out[id_b, :, closest_point]
+                    out_upsample.append(temp.T)
+                out = torch.cat(out_upsample, dim=0)
+                # Loss
+                loss = self.loss(out, labels)
+            running_loss += loss.detach()
+
+            # Confusion matrix
+            with torch.no_grad():
+                nb_class = out.shape[1]
+                pred_label = out.max(1)[1]
+                where = labels != 255
+                confusion_matrix += fast_hist(
+                    pred_label[where], labels[where], nb_class
+                )
+
+            # Logs
+            if it % print_freq == print_freq - 1 or it == len(loader) - 1:
+                # Gather scores
+                if self.train_sampler is not None:
+                    out = self.gather_scores([running_loss, confusion_matrix])
+                else:
+                    out = [running_loss.cpu(), confusion_matrix.cpu()]
+                if self.rank == 0 or self.rank is None:
+                    # Compute scores
+                    oAcc = 100 * overall_accuracy(out[1])
+                    mAcc = 100 * np.nanmean(per_class_accuracy(out[1]))
+                    ious = per_class_iu(out[1])
+                    mIoU = 100 * np.nanmean(ious)
+                    running_loss_reduced = out[0].item() / self.world_size / (it + 1)
+                    # Print score
+                    self.print_log(running_loss_reduced, oAcc, mAcc, mIoU, ious)
+                    # Save in tensorboard
+                    if (writer is not None) and (training or it == len(loader) - 1):
+                        header = "Train" if training else "Test"
+                        step = (
+                            self.current_epoch * len(loader) + it
+                            if training
+                            else self.current_epoch
+                        )
+                        writer.add_scalar(header + "/loss", running_loss_reduced, step)
+                        writer.add_scalar(header + "/oAcc", oAcc, step)
+                        writer.add_scalar(header + "/mAcc", mAcc, step)
+                        writer.add_scalar(header + "/mIoU", mIoU, step)
+                        writer.add_scalar(
+                            header + "/lr", self.optim.param_groups[0]["lr"], step
+                        )
+
+            # Gradient step
+            if training:
+                self.optim.zero_grad(set_to_none=True)
+                if self.fp16:
+                    self.scaler.scale(loss).backward()
+                    self.scaler.step(self.optim)
+                    self.scaler.update()
+                else:
+                    loss.backward()
+                    self.optim.step()
+                if self.scheduler is not None:
+                    self.scheduler.step()
+
+        # Return score
+        if self.rank == 0 or self.rank is None:
+            return mIoU
+        else:
+            return None
+
+    def load_state(self, best=False):
+        filename = self.path_to_ckpt
+        filename += "/ckpt_best.pth" if best else "/ckpt_last.pth"
+        rank = 0 if self.rank is None else self.rank
+        ckpt = torch.load(
+            filename,
+            map_location=f"cuda:{rank}",
+        )
+        self.net.load_state_dict(ckpt["net"])
+        if ckpt.get("optim") is None:
+            warnings.warn("Optimizer state not available")
+        else:
+            self.optim.load_state_dict(ckpt["optim"])
+        if self.scheduler is not None:
+            if ckpt.get("scheduler") is None:
+                warnings.warn("Scheduler state not available")
+            else:
+                self.scheduler.load_state_dict(ckpt["scheduler"])
+        if self.fp16:
+            if ckpt.get("scaler") is None:
+                warnings.warn("Scaler state not available")
+            else:
+                self.scaler.load_state_dict(ckpt["scaler"])
+        if ckpt.get("best_miou") is not None:
+            self.best_miou = ckpt["best_miou"]
+        if ckpt.get("epoch") is not None:
+            self.current_epoch = ckpt["epoch"] + 1
+        print(
+            f"Checkpoint loaded on {torch.device(rank)} (cuda:{rank}): {self.path_to_ckpt}"
+        )
+
+    def save_state(self, best=False):
+        if self.rank == 0 or self.rank is None:
+            dict_to_save = {
+                "epoch": self.current_epoch,
+                "net": self.net.state_dict(),
+                "optim": self.optim.state_dict(),
+                "scheduler": self.scheduler.state_dict()
+                if self.scheduler is not None
+                else None,
+                "scaler": self.scaler.state_dict() if self.fp16 else None,
+                "best_miou": self.best_miou,
+            }
+            filename = self.path_to_ckpt
+            filename += "/ckpt_best.pth" if best else "/ckpt_last.pth"
+            torch.save(dict_to_save, filename)
+
+    def train(self):
+        for _ in range(self.current_epoch, self.max_epoch):
+            # Train
+            self.one_epoch(training=True)
+            # Val
+            miou = self.one_epoch(training=False)
+            # Save best checkpoint
+            if miou is not None and miou > self.best_miou:
+                self.best_miou = miou
+                self.save_state(best=True)
+                print(f"\n\n*** New best mIoU: {self.best_miou:.1f}.\n")
+            # Save last checkpoint
+            self.save_state()
+            # Increase epoch number
+            self.current_epoch += 1
+        if self.rank == 0 or self.rank is None:
+            print("Finished Training")
diff --git a/utils/transforms.py b/utils/transforms.py
new file mode 100644
index 0000000..f4a6625
--- /dev/null
+++ b/utils/transforms.py
@@ -0,0 +1,311 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import os
+import torch
+import numpy as np
+from glob import glob
+
+
+class Compose:
+    def __init__(self, transformations):
+        self.transformations = transformations
+
+    def __call__(self, pcloud, labels):
+        for t in self.transformations:
+            pcloud, labels = t(pcloud, labels)
+        return pcloud, labels
+
+
+class RandomApply:
+    def __init__(self, transformation, prob=0.5):
+        self.prob = prob
+        self.transformation = transformation
+
+    def __call__(self, pcloud, labels):
+        if torch.rand(1) < self.prob:
+            pcloud, labels = self.transformation(pcloud, labels)
+        return pcloud, labels
+
+
+class Transformation:
+    def __init__(self, inplace=False):
+        self.inplace = inplace
+
+    def __call__(self, pcloud, labels):
+        if labels is None:
+            return pcloud if self.inplace else np.array(pcloud, copy=True)
+
+        out = (
+            (pcloud, labels)
+            if self.inplace
+            else (np.array(pcloud, copy=True), np.array(labels, copy=True))
+        )
+        return out
+
+
+class Identity(Transformation):
+    def __init__(self, inplace=False):
+        super().__init__(inplace)
+
+    def __call__(self, pcloud, labels):
+        return super().__call__(pcloud, labels)
+
+
+class Rotation(Transformation):
+    def __init__(self, dim=2, range=np.pi, inplace=False):
+        super().__init__(inplace)
+        self.range = range
+        self.inplace = inplace
+        if dim == 2:
+            self.dims = (0, 1)
+        elif dim == 1:
+            self.dims = (0, 2)
+        elif dim == 0:
+            self.dims = (1, 2)
+
+    def __call__(self, pcloud, labels):
+        # Build rotation matrix
+        theta = (2 * torch.rand(1)[0] - 1) * self.range
+        # Build rotation matrix
+        rot = np.array(
+            [
+                [np.cos(theta), np.sin(theta)],
+                [-np.sin(theta), np.cos(theta)],
+            ]
+        )
+        # Apply rotation
+        pcloud, labels = super().__call__(pcloud, labels)
+        pcloud[:, self.dims] = pcloud[:, self.dims] @ rot
+        return pcloud, labels
+
+
+class Scale(Transformation):
+    def __init__(self, dims=(0, 1), range=0.05, inplace=False):
+        super().__init__(inplace)
+        self.dims = dims
+        self.range = range
+
+    def __call__(self, pcloud, labels):
+        pcloud, labels = super().__call__(pcloud, labels)
+        scale = 1 + (2 * torch.rand(1).item() - 1) * self.range
+        pcloud[:, self.dims] *= scale
+        return pcloud, labels
+
+
+class FlipXY(Transformation):
+    def __init__(self, inplace=False):
+        super().__init__(inplace=inplace)
+
+    def __call__(self, pcloud, labels):
+        pcloud, labels = super().__call__(pcloud, labels)
+        id = torch.randint(2, (1,))[0]
+        pcloud[:, id] *= -1.0
+        return pcloud, labels
+
+
+class LimitNumPoints(Transformation):
+    def __init__(self, dims=(0, 1, 2), max_point=30000, random=False):
+        super().__init__(inplace=True)
+        self.dims = dims
+        self.max_points = max_point
+        self.random = random
+        assert max_point > 0
+
+    def __call__(self, pcloud, labels):
+        pc, labels = super().__call__(pcloud, labels)
+        if pc.shape[0] > self.max_points:
+            if self.random:
+                center = torch.randint(pc.shape[0], (1,))[0]
+                center = pc[center : center + 1, self.dims]
+            else:
+                center = np.zeros((1, len(self.dims)))
+            idx = np.argsort(np.square(pc[:, self.dims] - center).sum(axis=1))[
+                : self.max_points
+            ]
+            pc, labels = pc[idx], labels[idx]
+        return pc, labels
+
+
+class Crop(Transformation):
+    def __init__(self, dims=(0, 1, 2), fov=((-5, -5, -5), (5, 5, 5)), eps=1e-4):
+        super().__init__(inplace=True)
+        self.dims = dims
+        self.fov = fov
+        self.eps = eps
+        assert len(fov[0]) == len(fov[1]), "Min and Max FOV must have the same length."
+        for i, (min, max) in enumerate(zip(*fov)):
+            assert (
+                min < max
+            ), f"Field of view: min ({min}) < max ({max}) is expected on dimension {i}."
+
+    def __call__(self, pcloud, labels):
+        pc, labels = super().__call__(pcloud, labels)
+
+        where = None
+        for i, d in enumerate(self.dims):  # Actually a bug below, use d in pc not i!
+            temp = (pc[:, d] > self.fov[0][i] + self.eps) & (
+                pc[:, d] < self.fov[1][i] - self.eps
+            )
+            where = temp if where is None else where & temp
+
+        return pc[where], labels[where]
+
+
+class Voxelize(Transformation):
+    def __init__(self, dims=(0, 1, 2), voxel_size=0.1, random=False):
+        super().__init__(inplace=True)
+        self.dims = dims
+        self.voxel_size = voxel_size
+        self.random = random
+        assert voxel_size >= 0
+
+    def __call__(self, pcloud, labels):
+        pc, labels = super().__call__(pcloud, labels)
+        if self.voxel_size <= 0:
+            return pc, labels
+
+        if self.random:
+            permute = torch.randperm(pc.shape[0])
+            pc, labels = pc[permute], labels[permute]
+
+        pc_shift = pc[:, self.dims] - pc[:, self.dims].min(0, keepdims=True)
+
+        _, ind = np.unique(
+            (pc_shift / self.voxel_size).astype("int"), return_index=True, axis=0
+        )
+
+        return pc[ind, :], None if labels is None else labels[ind]
+
+
+class InstanceCutMix(Transformation):
+    def __init__(self, phase="train"):
+        """Instance cutmix coded only for SemanticKITTI"""
+        super().__init__(inplace=True)
+
+        raise ValueError("Include latest verion")
+
+        self.phase = phase
+        self.rootdir = "/root/local_storage/semantic_kitti_instance_" + self.phase
+        self.bank = {1: [], 2: [], 5: [], 6: [], 7: []}
+        for key in self.bank.keys():
+            self.bank[key] = glob(os.path.join(self.rootdir, f"{key}", "*.bin"))
+        self.loaded = self.test_loaded()
+        # v2
+        self.rot = Compose(
+            (
+                FlipXY(inplace=True),
+                Rotation(inplace=True),
+                Scale(dims=(0, 1, 2), range=0.1, inplace=True),
+            )
+        )
+        self.nb_to_add = 40
+        self.vox = Voxelize(dims=(0, 1, 2), voxel_size=1.0, random=True)
+        """ v1
+        self.rot = Rotation(inplace=False)
+        self.max_size = 100 # Unused
+        self.nb_to_add = 20
+        self.vox = Voxelize(dims=(0, 1, 2), voxel_size=.1, random=True)
+        """
+
+    def test_loaded(self):
+        if self.phase == "train":
+            if len(self.bank[1]) != 5083:
+                print(len(self.bank[1]), 5083)
+                return False
+            if len(self.bank[2]) != 3092:
+                print(len(self.bank[2]), 3092)
+                return False
+            if len(self.bank[5]) != 8084:
+                print(len(self.bank[5]), 8084)
+                return False
+            if len(self.bank[6]) != 1551:
+                print(len(self.bank[6]), 1551)
+                return False
+            if len(self.bank[7]) != 560:
+                print(len(self.bank[7]), 560)
+                return False
+        elif self.phase == "trainval":
+            if len(self.bank[1]) != 8213:
+                print(len(self.bank[1]), 8213)
+                return False
+            if len(self.bank[2]) != 4169:
+                print(len(self.bank[2]), 4169)
+                return False
+            if len(self.bank[5]) != 12190:
+                print(len(self.bank[5]), 12190)
+                return False
+            if len(self.bank[6]) != 2943:
+                print(len(self.bank[6]), 2943)
+                return False
+            if len(self.bank[7]) != 701:
+                print(len(self.bank[7]), 701)
+                return False
+        return True
+
+    def add_in_bank(self, pc, class_label, instance_label):
+        for id_class in self.bank.keys():
+            where_class = class_label == id_class
+            all_instances = np.unique(instance_label[where_class])
+            for id_instance in all_instances:
+                # Segment instance
+                where_ins = instance_label == id_instance
+                if where_ins.sum() <= 5:
+                    continue
+                pc_to_add = pc[where_ins, :]
+                # Center instance
+                pc_to_add[:, :2] -= pc_to_add[:, :2].mean(0, keepdims=True)
+                pc_to_add[:, 2] -= pc_to_add[:, 2].min(0, keepdims=True)
+                #
+                pathfile = os.path.join(
+                    self.rootdir, f"{id_class}", f"{len(self.bank[id_class]):07d}.bin"
+                )
+                os.makedirs(os.path.join(self.rootdir, f"{id_class}"), exist_ok=True)
+                pc_to_add.tofile(pathfile)
+                self.bank[id_class].append(pathfile)
+
+    def add_in_pc(self, pc, class_label):
+        new_pc = [pc]
+        new_label = [class_label]
+        # Find location where to add new object (on a surface)
+        pc_vox, class_label_vox = self.vox(pc, class_label)
+
+        # v2
+        where_surface = np.where((class_label_vox >= 8) & (class_label_vox <= 10))[0]
+
+        """ v1
+        where_surface = np.where( ( (class_label_vox>=8) & (class_label_vox<=11) ) | (class_label_vox==16) )[0]
+        """
+
+        where_surface = where_surface[torch.randperm(len(where_surface))]
+        id_tot = 0
+        for id_class in self.bank.keys():
+            which_one = torch.randint(len(self.bank[id_class]), (self.nb_to_add,))
+            for ii in range(self.nb_to_add):
+                p = pc_vox[where_surface[id_tot]]
+                object = self.bank[id_class][which_one[ii]]
+                object = np.fromfile(object, dtype=np.float32).reshape((-1, 4))
+                object, _ = self.rot(object, 1)
+                object[:, :3] += p[:3][None]
+                new_pc.append(object)
+                new_label.append(np.ones((object.shape[0],), dtype=np.int) * id_class)
+                id_tot += 1
+        return np.concatenate(new_pc, 0), np.concatenate(new_label, 0)
+
+    def __call__(self, pc, class_label, instance_label):
+        if not self.loaded:
+            self.add_in_bank(pc, class_label, instance_label)
+            return np.zeros((2, 4)), None
+        return self.add_in_pc(pc, class_label)
diff --git a/waffleiron/__init__.py b/waffleiron/__init__.py
new file mode 100644
index 0000000..ee370e5
--- /dev/null
+++ b/waffleiron/__init__.py
@@ -0,0 +1,19 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from .backbone import WaffleIron
+from .segmenter import Segmenter
+
+__all__ = [WaffleIron, Segmenter]
diff --git a/waffleiron/backbone.py b/waffleiron/backbone.py
new file mode 100644
index 0000000..a6f6abd
--- /dev/null
+++ b/waffleiron/backbone.py
@@ -0,0 +1,146 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import torch
+import numpy as np
+import torch.nn as nn
+from torch import autocast
+
+
+def build_proj_matrix(indices_non_zeros, occupied_cell, batch_size, num_2d_cells):
+    num_points = indices_non_zeros.shape[1] // batch_size
+    matrix_shape = (batch_size, num_2d_cells, num_points)
+
+    # Sparse projection matrix for Inflate step
+    inflate = torch.sparse_coo_tensor(
+        indices_non_zeros, occupied_cell.reshape(-1), matrix_shape
+    ).transpose(1, 2)
+
+    # Count number of points in each cells (used in flatten step)
+    with autocast("cuda", enabled=False):
+        num_points_per_cells = torch.bmm(
+            inflate, torch.bmm(inflate.transpose(1, 2), occupied_cell.unsqueeze(-1))
+        )
+
+    # Sparse projection matrix for Flatten step (projection & average in each 2d cells)
+    weight_per_point = 1.0 / (num_points_per_cells.reshape(-1) + 1e-6)
+    weight_per_point *= occupied_cell.reshape(-1)
+    flatten = torch.sparse_coo_tensor(indices_non_zeros, weight_per_point, matrix_shape)
+
+    return {"flatten": flatten, "inflate": inflate}
+
+
+class ChannelMix(nn.Module):
+    def __init__(self, channels):
+        super().__init__()
+        self.norm = nn.BatchNorm1d(channels)
+        self.mlp = nn.Sequential(
+            nn.Conv1d(channels, channels, 1),
+            nn.ReLU(inplace=True),
+            nn.Conv1d(channels, channels, 1),
+        )
+        self.scale = nn.Conv1d(
+            channels, channels, 1, bias=False, groups=channels
+        )  # Implement LayerScale
+
+    def forward(self, tokens):
+        """tokens <- tokens + LayerScale( MLP( BN(tokens) ) )"""
+        return tokens + self.scale(self.mlp(self.norm(tokens)))
+
+
+class SpatialMix(nn.Module):
+    def __init__(self, channels, grid_shape):
+        super().__init__()
+        self.H, self.W = grid_shape
+        self.norm = nn.BatchNorm1d(channels)
+        self.ffn = nn.Sequential(
+            nn.Conv2d(channels, channels, 3, padding=1, groups=channels),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(channels, channels, 3, padding=1, groups=channels),
+        )
+        self.scale = nn.Conv1d(
+            channels, channels, 1, bias=False, groups=channels
+        )  # Implement LayerScale
+        self.grid_shape = grid_shape
+
+    def extra_repr(self):
+        return f"(grid): [{self.grid_shape[0]}, {self.grid_shape[1]}]"
+
+    def forward(self, tokens, sp_mat):
+        """tokens <- tokens + LayerScale( Inflate( FFN( Flatten( BN(tokens) ) ) )"""
+        B, C, N = tokens.shape
+        residual = self.norm(tokens)
+        # Flatten
+        with autocast("cuda", enabled=False):
+            residual = torch.bmm(
+                sp_mat["flatten"], residual.transpose(1, 2).float()
+            ).transpose(1, 2)
+        residual = residual.reshape(B, C, self.H, self.W)
+        # FFN
+        residual = self.ffn(residual)
+        # Inflate
+        residual = residual.reshape(B, C, self.H * self.W)
+        with autocast("cuda", enabled=False):
+            residual = torch.bmm(
+                sp_mat["inflate"], residual.transpose(1, 2).float()
+            ).transpose(1, 2)
+        residual = residual.reshape(B, C, N)
+        return tokens + self.scale(residual)
+
+
+class WaffleIron(nn.Module):
+    def __init__(self, channels, depth, grids_shape):
+        super().__init__()
+        self.grids_shape = grids_shape
+        self.channel_mix = nn.ModuleList([ChannelMix(channels) for _ in range(depth)])
+        self.spatial_mix = nn.ModuleList(
+            [
+                SpatialMix(channels, grids_shape[d % len(grids_shape)])
+                for d in range(depth)
+            ]
+        )
+
+    def forward(self, tokens, cell_ind, occupied_cell):
+
+        # Build projection matrices
+        batch_size, num_points = tokens.shape[0], tokens.shape[-1]
+        point_ind = (
+            torch.arange(num_points, device=tokens.device)
+            .unsqueeze(0)
+            .expand(batch_size, -1)
+            .reshape(1, -1)
+        )
+        batch_ind = (
+            torch.arange(batch_size, device=tokens.device)
+            .unsqueeze(1)
+            .expand(-1, num_points)
+            .reshape(1, -1)
+        )
+        non_zeros_ind = []
+        for i in range(cell_ind.shape[1]):
+            non_zeros_ind.append(
+                torch.cat((batch_ind, cell_ind[:, i].reshape(1, -1), point_ind), axis=0)
+            )
+        sp_mat = [
+            build_proj_matrix(id, occupied_cell, batch_size, np.prod(sh))
+            for id, sh in zip(non_zeros_ind, self.grids_shape)
+        ]
+
+        # Actual backbone
+        for d, (smix, cmix) in enumerate(zip(self.spatial_mix, self.channel_mix)):
+            tokens = smix(tokens, sp_mat[d % len(sp_mat)])
+            tokens = cmix(tokens)
+
+        return tokens
diff --git a/waffleiron/embedding.py b/waffleiron/embedding.py
new file mode 100644
index 0000000..6bd837e
--- /dev/null
+++ b/waffleiron/embedding.py
@@ -0,0 +1,64 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import torch
+import torch.nn as nn
+
+
+class Embedding(nn.Module):
+    def __init__(self, channels_in, channels_out):
+        super().__init__()
+
+        # Normalize inputs
+        self.norm = nn.BatchNorm1d(channels_in)
+
+        # Point Embedding
+        self.conv1 = nn.Conv1d(channels_in, channels_out, 1)
+
+        # Neighborhood embedding
+        self.conv2 = nn.Sequential(
+            nn.BatchNorm2d(channels_in),
+            nn.Conv2d(channels_in, channels_out, 1, bias=False),
+            nn.BatchNorm2d(channels_out),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(channels_out, channels_out, 1, bias=False),
+        )
+
+        # Merge point and neighborhood embeddings
+        self.final = nn.Conv1d(2 * channels_out, channels_out, 1, bias=True, padding=0)
+
+    def forward(self, x, neighbors):
+        """x: B x C_in x N. neighbors: B x K x N. Output: B x C_out x N"""
+        # Normalize input
+        x = self.norm(x)
+
+        # Point embedding
+        point_emb = self.conv1(x)
+
+        # Neighborhood embedding
+        gather = []
+        # Gather neighbors around each center point
+        for ind_nn in range(
+            1, neighbors.shape[1]
+        ):  # Remove first neighbors which is the center point
+            temp = neighbors[:, ind_nn : ind_nn + 1, :].expand(-1, x.shape[1], -1)
+            gather.append(torch.gather(x, 2, temp).unsqueeze(-1))
+        # Relative coordinates
+        neigh_emb = torch.cat(gather, -1) - x.unsqueeze(-1)  # Size: (B x C x N) x K
+        # Embedding
+        neigh_emb = self.conv2(neigh_emb).max(-1)[0]
+
+        # Merge both embeddings
+        return self.final(torch.cat((point_emb, neigh_emb), dim=1))
diff --git a/waffleiron/segmenter.py b/waffleiron/segmenter.py
new file mode 100644
index 0000000..c08baa2
--- /dev/null
+++ b/waffleiron/segmenter.py
@@ -0,0 +1,34 @@
+# Copyright 2022 - Valeo Comfort and Driving Assistance - Gilles Puy @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import torch.nn as nn
+from .backbone import WaffleIron
+from .embedding import Embedding
+
+
+class Segmenter(nn.Module):
+    def __init__(self, input_channels, feat_channels, nb_class, depth, grid_shape):
+        super().__init__()
+        # Embedding layer
+        self.embed = Embedding(input_channels, feat_channels)
+        # WaffleIron backbone
+        self.waffleiron = WaffleIron(feat_channels, depth, grid_shape)
+        # Classification layer
+        self.classif = nn.Conv1d(feat_channels, nb_class, 1)
+
+    def forward(self, feats, cell_ind, occupied_cell, neighbors):
+        tokens = self.embed(feats, neighbors)
+        tokens = self.waffleiron(tokens, cell_ind, occupied_cell)
+        return self.classif(tokens)