Skip to content
This repository has been archived by the owner on Oct 31, 2023. It is now read-only.

fix FP 16 training for retinanet #1146

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions maskrcnn_benchmark/layers/sigmoid_focal_loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,16 @@
# TODO: Use JIT to replace CUDA implementation in the future.
class _SigmoidFocalLoss(Function):
@staticmethod
def forward(ctx, logits, targets, gamma, alpha):
def forward(ctx, logits, targets, gamma, alpha, dtype):
if dtype == 'float16':
logits = logits.float()
ctx.save_for_backward(logits, targets)
num_classes = logits.shape[1]
ctx.num_classes = num_classes
ctx.gamma = gamma
ctx.alpha = alpha
ctx.dtype = dtype


losses = _C.sigmoid_focalloss_forward(
logits, targets, num_classes, gamma, alpha
Expand All @@ -31,6 +35,9 @@ def backward(ctx, d_loss):
d_logits = _C.sigmoid_focalloss_backward(
logits, targets, d_loss, num_classes, gamma, alpha
)
if ctx.dtype == 'float16':
d_logits = d_logits.half()

return d_logits, None, None, None, None


Expand All @@ -39,6 +46,8 @@ def backward(ctx, d_loss):

def sigmoid_focal_loss_cpu(logits, targets, gamma, alpha):
num_classes = logits.shape[1]
gamma = gamma[0]
alpha = alpha[0]
dtype = targets.dtype
device = targets.device
class_range = torch.arange(1, num_classes+1, dtype=dtype, device=device).unsqueeze(0)
Expand All @@ -51,10 +60,11 @@ def sigmoid_focal_loss_cpu(logits, targets, gamma, alpha):


class SigmoidFocalLoss(nn.Module):
def __init__(self, gamma, alpha):
def __init__(self, gamma, alpha, dtype):
super(SigmoidFocalLoss, self).__init__()
self.gamma = gamma
self.alpha = alpha
self.dtype = dtype

def forward(self, logits, targets):
device = logits.device
Expand All @@ -63,7 +73,7 @@ def forward(self, logits, targets):
else:
loss_func = sigmoid_focal_loss_cpu

loss = loss_func(logits, targets, self.gamma, self.alpha)
loss = loss_func(logits, targets, self.gamma, self.alpha,self.dtype)
return loss.sum()

def __repr__(self):
Expand Down
3 changes: 2 additions & 1 deletion maskrcnn_benchmark/modeling/rpn/retinanet/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ def make_retinanet_loss_evaluator(cfg, box_coder):
)
sigmoid_focal_loss = SigmoidFocalLoss(
cfg.MODEL.RETINANET.LOSS_GAMMA,
cfg.MODEL.RETINANET.LOSS_ALPHA
cfg.MODEL.RETINANET.LOSS_ALPHA,
cfg.DTYPE
)

loss_evaluator = RetinaNetLossComputation(
Expand Down