From 6300bd756e341ca180c60b8e07c218b9eb40664a Mon Sep 17 00:00:00 2001 From: liaoxingyu Date: Mon, 31 May 2021 17:32:24 +0800 Subject: [PATCH] Bugfix for cls_layer In `any_softmax`, all operations are in-place, so pass into the `logits.clone()` to prevent outside logits changed. --- CHANGELOG.md | 8 +++++++- fastreid/modeling/heads/clas_head.py | 2 +- fastreid/modeling/heads/embedding_head.py | 18 +++++++++++------- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e8f66e19..482c4088 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,9 +2,15 @@ ### v1.3 -#### Improvements +#### New Features +- Vision Transformer backbone, see config in `configs/Market1501/bagtricks_vit.yml` +- Self-Distillation with EMA update +- Gradient Clip +#### Improvements - Faster dataloader with pre-fetch thread and cuda stream +- Optimize DDP training speed by removing `find_unused_parameters` in DDP + ### v1.2 (06/04/2021) diff --git a/fastreid/modeling/heads/clas_head.py b/fastreid/modeling/heads/clas_head.py index 9660a38e..e154bfaa 100644 --- a/fastreid/modeling/heads/clas_head.py +++ b/fastreid/modeling/heads/clas_head.py @@ -27,7 +27,7 @@ def forward(self, features, targets=None): # Evaluation if not self.training: return logits.mul_(self.cls_layer.s) - cls_outputs = self.cls_layer(logits, targets) + cls_outputs = self.cls_layer(logits.clone(), targets) return { "cls_outputs": cls_outputs, diff --git a/fastreid/modeling/heads/embedding_head.py b/fastreid/modeling/heads/embedding_head.py index 1189b602..ef0d75e4 100644 --- a/fastreid/modeling/heads/embedding_head.py +++ b/fastreid/modeling/heads/embedding_head.py @@ -4,8 +4,6 @@ @contact: sherlockliao01@gmail.com """ -import math - import torch import torch.nn.functional as F from torch import nn @@ -13,7 +11,7 @@ from fastreid.config import configurable from fastreid.layers import * from fastreid.layers import pooling, any_softmax -from fastreid.utils.weight_init import weights_init_kaiming +from fastreid.layers.weight_init import weights_init_kaiming from .build import REID_HEADS_REGISTRY @@ -78,14 +76,19 @@ def __init__( neck.append(get_norm(norm_type, feat_dim, bias_freeze=True)) self.bottleneck = nn.Sequential(*neck) - self.bottleneck.apply(weights_init_kaiming) - # Linear layer + # Classification head assert hasattr(any_softmax, cls_type), "Expected cls types are {}, " \ "but got {}".format(any_softmax.__all__, cls_type) - self.weight = nn.Parameter(torch.normal(0, 0.01, (num_classes, feat_dim))) + self.weight = nn.Parameter(torch.Tensor(num_classes, feat_dim)) self.cls_layer = getattr(any_softmax, cls_type)(num_classes, scale, margin) + self.reset_parameters() + + def reset_parameters(self) -> None: + self.bottleneck.apply(weights_init_kaiming) + nn.init.normal_(self.weight, std=0.01) + @classmethod def from_config(cls, cfg): # fmt: off @@ -132,7 +135,8 @@ def forward(self, features, targets=None): else: logits = F.linear(F.normalize(neck_feat), F.normalize(self.weight)) - cls_outputs = self.cls_layer(logits, targets) + # Pass logits.clone() into cls_layer, because there is in-place operations + cls_outputs = self.cls_layer(logits.clone(), targets) # fmt: off if self.neck_feat == 'before': feat = pool_feat[..., 0, 0]