diff --git a/README.md b/README.md index da74599..cb7bc7b 100644 --- a/README.md +++ b/README.md @@ -130,19 +130,19 @@ python train_selfsupervised.py ... cfg.network.backbone=SPVCNN ```bash # 100% -python train_downstream_semseg.py cfg=nuscenes cfg.downstream.max_epochs=30 cfg.val_interval=5 cfg.downstream.skip_ratio=1 +python also_selfsup/train_downstream_semseg.py cfg=nuscenes cfg.downstream.max_epochs=30 cfg.downstream.val_interval=5 cfg.downstream.skip_ratio=1 # 50% -python train_downstream_semseg.py cfg=nuscenes cfg.downstream.max_epochs=50 cfg.val_interval=5 cfg.downstream.skip_ratio=2 +python also_selfsup/train_downstream_semseg.py cfg=nuscenes cfg.downstream.max_epochs=50 cfg.downstream.val_interval=5 cfg.downstream.skip_ratio=2 # 10% -python train_downstream_semseg.py cfg=nuscenes cfg.downstream.max_epochs=100 cfg.val_interval=10 cfg.downstream.skip_ratio=10 +python also_selfsup/train_downstream_semseg.py cfg=nuscenes cfg.downstream.max_epochs=100 cfg.downstream.val_interval=10 cfg.downstream.skip_ratio=10 # 1% -python train_downstream_semseg.py cfg=nuscenes cfg.downstream.max_epochs=500 cfg.val_interval=50 cfg.downstream.skip_ratio=100 +python also_selfsup/train_downstream_semseg.py cfg=nuscenes cfg.downstream.max_epochs=500 cfg.downstream.val_interval=50 cfg.downstream.skip_ratio=100 # 0.1% -python train_downstream_semseg.py cfg=nuscenes cfg.downstream.max_epochs=1000 cfg.val_interval=100 cfg.downstream.skip_ratio=1000 +python also_selfsup/train_downstream_semseg.py cfg=nuscenes cfg.downstream.max_epochs=1000 cfg.downstream.val_interval=100 cfg.downstream.skip_ratio=1000 ``` #### With pretrained models @@ -154,7 +154,7 @@ python convert_models.py --ckpt path_to_pretraining_checkpoint Second, run downstream training ```bash -python train_downstream_semseg.py cfg=nuscenes cfg.downstream.checkpoint_dir='path_to_checkpoint_directory' cfg.downstream.checkpoint_name='pretrained_backbone_XXX.ckpt' +python also_selfsup/train_downstream_semseg.py cfg=nuscenes cfg.downstream.checkpoint_dir='path_to_checkpoint_directory' cfg.downstream.checkpoint_name='pretrained_backbone_XXX.ckpt' ``` #### Evaluation of downstream models diff --git a/also_selfsup/eval.py b/also_selfsup/eval.py index 0441be8..2475f67 100644 --- a/also_selfsup/eval.py +++ b/also_selfsup/eval.py @@ -1,5 +1,3 @@ -import os -import numpy as np import yaml import logging import argparse @@ -14,7 +12,7 @@ from torch_geometric.data import DataLoader -from utils.utils import wblue, wgreen +from utils.utils import wgreen from utils.confusion_matrix import ConfusionMatrix from transforms import get_transforms, get_input_channels @@ -25,7 +23,7 @@ if __name__ == "__main__": - warnings.filterwarnings("ignore", category=UserWarning) + warnings.filterwarnings("ignore", category=UserWarning) logging.getLogger().setLevel("INFO") @@ -41,7 +39,7 @@ DatasetClass = eval("datasets."+config["dataset_name"]) test_transforms = get_transforms(config, train=False, downstream=True, keep_orignal_data=True) test_dataset = DatasetClass(config["dataset_root"], - split=opts.split, + split=opts.split, transform=test_transforms, ) @@ -54,12 +52,10 @@ follow_batch=["voxel_coords"] ) - num_classes = config["downstream"]["num_classes"] device = torch.device("cuda") - - logging.info("Network") + logging.info("Network") if config["network"]["backbone_params"] is None: config["network"]["backbone_params"] = {} config["network"]["backbone_params"]["in_channels"] = get_input_channels(config["inputs"]) @@ -73,11 +69,10 @@ net = eval(backbone_name)(**config["network"]["backbone_params"]) net.to(device) net.eval() - + logging.info("Loading the weights from pretrained network") net.load_state_dict(torch.load(opts.ckpt), strict=True) - cm = ConfusionMatrix(num_classes, 0) with torch.no_grad(): t = tqdm(test_loader, ncols=100) @@ -113,4 +108,3 @@ logging.info(f"MIoU: {miou}") logging.info(f"FIoU: {freqweighted_iou}") logging.info(f"IoU per class: {iou_per_class}") - diff --git a/also_selfsup/train_downstream_semseg.py b/also_selfsup/train_downstream_semseg.py index 4ff9701..23a2d08 100644 --- a/also_selfsup/train_downstream_semseg.py +++ b/also_selfsup/train_downstream_semseg.py @@ -4,41 +4,26 @@ import warnings import importlib +import torch +import datasets + import hydra -from hydra.core.hydra_config import HydraConfig from omegaconf import DictConfig, OmegaConf -import torch -import numpy as np - from sklearn.metrics import confusion_matrix - from torch_geometric.data import DataLoader import pytorch_lightning as pl from pytorch_lightning import loggers as pl_loggers -from pytorch_lightning.callbacks import LearningRateMonitor - -import datasets -import networks.decoder -import utils.metrics as metrics from utils.utils import wblue, wgreen from utils.callbacks import CustomProgressBar from transforms import get_transforms, get_input_channels from utils.confusion_matrix import ConfusionMatrix -def count_parameters(model): - return sum(p.numel() for p in model.parameters() if p.requires_grad) - -def save_config_file(config, filename): - with open(filename, 'w') as outfile: - yaml.dump(config, outfile, default_flow_style=False) def logs_file(filepath, epoch, log_data): - - if not os.path.exists(filepath): log_str = f"epoch" for key, value in log_data.items(): @@ -57,9 +42,11 @@ def logs_file(filepath, epoch, log_data): logs.write(log_str) logs.flush() + def isnan(x): return x != x + def mean(ls, ignore_nan=False, empty=0): """ nanmean compatible with generators. @@ -80,6 +67,7 @@ def mean(ls, ignore_nan=False, empty=0): return acc return acc / n + def lovasz_grad(gt_sorted): """ Computes gradient of the Lovasz extension w.r.t sorted errors @@ -94,6 +82,7 @@ def lovasz_grad(gt_sorted): jaccard[1:p] = jaccard[1:p] - jaccard[0:-1] return jaccard + def lovasz_softmax_flat(probas, labels, classes="present"): """ Multi-class Lovasz-Softmax loss @@ -134,7 +123,7 @@ def __init__(self, config): self.save_hyperparameters(config) self.config = config - + if config["network"]["backbone_params"] is None: config["network"]["backbone_params"] = {} config["network"]["backbone_params"]["in_channels"] = get_input_channels(config["inputs"]) @@ -143,18 +132,18 @@ def __init__(self, config): backbone_name = "networks.backbone." if config["network"]["framework"] is not None: backbone_name += config["network"]["framework"] - importlib.import_module(backbone_name) - backbone_name += "."+config["network"]["backbone"] - self.net = eval(backbone_name)(**config["network"]["backbone_params"]) + model_module = importlib.import_module(backbone_name) + model = getattr(model_module, config["network"]["backbone"]) + self.net = model(**config["network"]["backbone_params"]) if config["downstream"]["checkpoint_dir"] is not None: logging.info("Loading the weights from pretrained network") ckpt = torch.load( - os.path.join( - config["downstream"]["checkpoint_dir"], - config["downstream"]["checkpoint_name"] - ) - ) + os.path.join( + config["downstream"]["checkpoint_dir"], + config["downstream"]["checkpoint_name"] + ) + ) for k in self.net.state_dict(): if k not in ckpt.keys(): print(" key missing", k) @@ -168,16 +157,17 @@ def forward(self, data): def configure_optimizers(self): optimizer = eval(self.config["optimizer"])(self.parameters(), **self.config["optimizer_params"]) - scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer=optimizer, - T_max=self.config["training"]["max_epochs"], - eta_min=0, - ) + scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( + optimizer=optimizer, + T_max=self.config["training"]["max_epochs"], + eta_min=0, + ) return [optimizer], [scheduler] def compute_confusion_matrix(self, output_data): outputs = output_data["predictions"].squeeze(-1) occupancies = output_data["occupancies"].float() - + output_np = (torch.sigmoid(outputs).cpu().detach().numpy() > 0.5).astype(int) target_np = occupancies.cpu().numpy().astype(int) cm = confusion_matrix( @@ -185,16 +175,13 @@ def compute_confusion_matrix(self, output_data): ) return cm - def compute_loss(self, predictions, targets, prefix): - loss = self.criterion(predictions, targets) if "lovasz" in self.config["training"] and self.config["training"]["lovasz"]: - classes = 'present' ignore = 0 - + valid = (targets != ignore) vpreds = predictions[valid] vtargets = targets[valid] @@ -206,16 +193,15 @@ def compute_loss(self, predictions, targets, prefix): loss = loss + loss_lovasz # log also the total loss - self.log(prefix+"/loss", loss.item(), on_step=False, on_epoch=True, prog_bar=True, logger=True) + self.log(prefix + "/loss", loss.item(), on_step=False, on_epoch=True, prog_bar=True, logger=True) return loss def compute_log_data(self, outputs, cm, prefix): - # iou_per_class = cm.get_per_class_iou() miou = cm.get_mean_iou() fiou = cm.get_freqweighted_iou() - self.log(prefix+"/miou", miou, on_step=False, on_epoch=True, prog_bar=True, logger=True) - self.log(prefix+"/fiou", fiou, on_step=False, on_epoch=True, prog_bar=True, logger=True) + self.log(prefix + "/miou", miou, on_step=False, on_epoch=True, prog_bar=True, logger=True) + self.log(prefix + "/fiou", fiou, on_step=False, on_epoch=True, prog_bar=True, logger=True) log_data = {} log_data["miou"] = miou @@ -240,13 +226,12 @@ def on_train_epoch_start(self) -> None: return super().on_train_epoch_start() def training_step(self, train_batch, batch_idx): - torch.cuda.empty_cache() predictions = self.forward(train_batch) loss = self.compute_loss(predictions, train_batch["y"], prefix="train") targets = train_batch["y"].cpu().numpy() - predictions = torch.nn.functional.softmax(predictions[:,1:], dim=1).max(dim=1)[1].cpu().numpy() + 1 + predictions = torch.nn.functional.softmax(predictions[:, 1:], dim=1).max(dim=1)[1].cpu().numpy() + 1 self.train_cm.update(targets, predictions) miou = self.train_cm.get_mean_iou() @@ -255,15 +240,13 @@ def training_step(self, train_batch, batch_idx): return loss def training_epoch_end(self, outputs): - - log_data = self.compute_log_data(outputs, self.train_cm, prefix="train") os.makedirs(self.logger.log_dir, exist_ok=True) logs_file(os.path.join(self.logger.log_dir, "logs_train.csv"), self.current_epoch, log_data) if (self.global_step > 0) and (not self.config["interactive_log"]): - desc = "Train "+ self.get_description_string(log_data) + desc = "Train " + self.get_description_string(log_data) print(wblue(desc)) def on_validation_epoch_start(self) -> None: @@ -271,23 +254,20 @@ def on_validation_epoch_start(self) -> None: return super().on_validation_epoch_start() def validation_step(self, val_batch, batch_idx): - torch.cuda.empty_cache() predictions = self.forward(val_batch) - loss = self.compute_loss(predictions, val_batch["y"], prefix="val") + loss = self.compute_loss(predictions, val_batch["y"], prefix="val") targets = val_batch["y"].cpu().numpy() - predictions = torch.nn.functional.softmax(predictions[:,1:], dim=1).max(dim=1)[1].cpu().numpy() + 1 + predictions = torch.nn.functional.softmax(predictions[:, 1:], dim=1).max(dim=1)[1].cpu().numpy() + 1 self.val_cm.update(targets, predictions) - miou = self.val_cm.get_mean_iou() self.log("iou", miou, on_step=True, on_epoch=False, prog_bar=True, logger=False) return loss def validation_epoch_end(self, outputs): - if self.global_step > 0: log_data = self.compute_log_data(outputs, self.val_cm, prefix="val") @@ -296,12 +276,11 @@ def validation_epoch_end(self, outputs): logs_file(os.path.join(self.logger.log_dir, "logs_val.csv"), self.current_epoch, log_data) if (not self.config["interactive_log"]): - desc = "Val "+ self.get_description_string(log_data) + desc = "Val " + self.get_description_string(log_data) print(wgreen(desc)) def get_savedir_name(config): - savedir = f"{config['network']['backbone']}" savedir += f"_{config['dataset_name']}" savedir += f"_{config['manifold_points']}" @@ -312,10 +291,11 @@ def get_savedir_name(config): return savedir + @hydra.main(version_base=None, config_path="configs", config_name="config") -def main(config : DictConfig): +def main(config: DictConfig): - warnings.filterwarnings("ignore", category=UserWarning) + warnings.filterwarnings("ignore", category=UserWarning) config = OmegaConf.to_container(config)["cfg"] @@ -326,25 +306,27 @@ def main(config : DictConfig): if config["downstream"]["checkpoint_dir"] is not None: config["save_dir"] = config["downstream"]["checkpoint_dir"] - logging.getLogger().setLevel(config["logging"]) logging.info("Getting the dataset") - DatasetClass = eval("datasets."+config["dataset_name"]) + DatasetClass = getattr(datasets, config["dataset_name"]) train_transforms = get_transforms(config, train=True, downstream=True) test_transforms = get_transforms(config, train=False, downstream=True) # build the dataset - train_dataset = DatasetClass(config["dataset_root"], - split=config["train_split"], - transform=train_transforms, - skip_ratio=config['downstream']["skip_ratio"] - ) - val_dataset = DatasetClass(config["dataset_root"], - split=config["val_split"], - transform=test_transforms, - ) + train_dataset = DatasetClass( + config["dataset_root"], + split=config["train_split"], + transform=train_transforms, + skip_ratio=config['downstream']["skip_ratio"], + seed_offset=config['downstream']["seed_offset"] + ) + val_dataset = DatasetClass( + config["dataset_root"], + split=config["val_split"], + transform=test_transforms, + ) # build the data loaders train_loader = DataLoader( @@ -362,7 +344,6 @@ def main(config : DictConfig): follow_batch=["voxel_coords", "voxel_proj_yx"] ) - logging.info("Creating trainer") savedir_root = get_savedir_name(config) @@ -373,27 +354,25 @@ def main(config : DictConfig): tb_logger = pl_loggers.TensorBoardLogger(save_dir=savedir_root) trainer = pl.Trainer( - # accelerator=config["device"], - # devices=config["num_device"], - gpus= config["num_device"], - check_val_every_n_epoch=config["training"]["val_interval"], - logger=tb_logger, - max_epochs=config["training"]["max_epochs"], - resume_from_checkpoint=config["resume"], - callbacks=[ - CustomProgressBar(refresh_rate=int(config["interactive_log"])) - ] - ) + # accelerator=config["device"], + # devices=config["num_device"], + gpus=config["num_device"], + check_val_every_n_epoch=config["training"]["val_interval"], + logger=tb_logger, + max_epochs=config["training"]["max_epochs"], + resume_from_checkpoint=config["resume"], + callbacks=[ + CustomProgressBar(refresh_rate=int(config["interactive_log"])) + ] + ) logging.info(f"Saving at {trainer.logger.log_dir}") os.makedirs(trainer.logger.log_dir, exist_ok=True) yaml.dump(config, open(os.path.join(trainer.logger.log_dir, "config.yaml"), "w"), default_flow_style=False) - model = LightningDownstreamTrainer(config) trainer.fit(model, train_loader, val_loader, ckpt_path=config["resume"]) - if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/also_selfsup/transforms/get_transforms.py b/also_selfsup/transforms/get_transforms.py index 9114b2d..0fe9717 100644 --- a/also_selfsup/transforms/get_transforms.py +++ b/also_selfsup/transforms/get_transforms.py @@ -1,18 +1,16 @@ -import torch import torch_geometric.transforms as T import logging + import importlib -# from networks import backbone +from transforms.create_inputs import CreateInputs +from transforms.create_points import CreatePoints +from transforms.dupplicate import Dupplicate +from transforms.random_rotate import RandomRotate +from transforms.random_flip import RandomFlip +from transforms.scaling import Scaling +from transforms.voxel_decimation import VoxelDecimation -import also_selfsup -from also_selfsup.transforms.create_inputs import CreateInputs -from also_selfsup.transforms.create_points import CreatePoints -from also_selfsup.transforms.dupplicate import Dupplicate -from also_selfsup.transforms.random_rotate import RandomRotate -from also_selfsup.transforms.random_flip import RandomFlip -from also_selfsup.transforms.scaling import Scaling -from also_selfsup.transforms.voxel_decimation import VoxelDecimation class CleanData(object): @@ -98,15 +96,14 @@ def get_transforms(config, network_function=None, train=True, downstream=False, if augmentations["random_flip"]: logging.info("Transforms - Flip") transforms.append(RandomFlip(["pos", "pos_non_manifold"])) - + transforms.append(CreateInputs(config["inputs"])) if config["network"]["framework"] is not None: logging.info(f"Transforms - Quantize - {config['network']['framework']}") - module_name = "also_selfsup.networks.backbone."+config["network"]["framework"] - importlib.import_module(module_name) - transforms.append(eval(module_name+".Quantize")(**config["network"]["backbone_params"]["quantization_params"])) + model_module = importlib.import_module("networks.backbone." + config["network"]["framework"]) + transforms.append(model_module.Quantize(**config["network"]["backbone_params"]["quantization_params"])) transforms = T.Compose(transforms) - return transforms \ No newline at end of file + return transforms