forked from wgrathwohl/JEM
-
Notifications
You must be signed in to change notification settings - Fork 1
/
batchrenorm.py
100 lines (85 loc) · 3.29 KB
/
batchrenorm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# Source: https://github.com/ludvb/batchrenorm/blob/master/batchrenorm/batchrenorm.py
import torch
__all__ = ["BatchRenorm1d", "BatchRenorm2d", "BatchRenorm3d"]
class BatchRenorm(torch.jit.ScriptModule):
def __init__(
self,
num_features: int,
eps: float = 1e-3,
momentum: float = 0.01,
affine: bool = True,
):
super().__init__()
self.register_buffer(
"running_mean", torch.zeros(num_features, dtype=torch.float)
)
self.register_buffer(
"running_std", torch.ones(num_features, dtype=torch.float)
)
self.register_buffer(
"num_batches_tracked", torch.tensor(0, dtype=torch.long)
)
self.weight = torch.nn.Parameter(
torch.ones(num_features, dtype=torch.float)
)
self.bias = torch.nn.Parameter(
torch.zeros(num_features, dtype=torch.float)
)
self.affine = affine
self.eps = eps
self.step = 0
self.momentum = momentum
def _check_input_dim(self, x: torch.Tensor) -> None:
raise NotImplementedError() # pragma: no cover
@property
def rmax(self) -> torch.Tensor:
return (2 / 35000 * self.num_batches_tracked + 25 / 35).clamp_(
1.0, 3.0
)
@property
def dmax(self) -> torch.Tensor:
return (5 / 20000 * self.num_batches_tracked - 25 / 20).clamp_(
0.0, 5.0
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
self._check_input_dim(x)
if x.dim() > 2:
x = x.transpose(1, -1)
if self.training:
dims = [i for i in range(x.dim() - 1)]
batch_mean = x.mean(dims)
batch_std = x.std(dims, unbiased=False) + self.eps
r = (
batch_std.detach() / self.running_std.view_as(batch_std)
).clamp_(1 / self.rmax, self.rmax)
d = (
(batch_mean.detach() - self.running_mean.view_as(batch_mean))
/ self.running_std.view_as(batch_std)
).clamp_(-self.dmax, self.dmax)
x = (x - batch_mean) / batch_std * r + d
self.running_mean += self.momentum * (
batch_mean.detach() - self.running_mean
)
self.running_std += self.momentum * (
batch_std.detach() - self.running_std
)
self.num_batches_tracked += 1
else:
x = (x - self.running_mean) / self.running_std
if self.affine:
x = self.weight * x + self.bias
if x.dim() > 2:
x = x.transpose(1, -1)
return x
class BatchRenorm1d(BatchRenorm):
def _check_input_dim(self, x: torch.Tensor) -> None:
if x.dim() not in [2, 3]:
raise ValueError("expected 2D or 3D input (got {x.dim()}D input)")
class BatchRenorm2d(BatchRenorm):
def _check_input_dim(self, x: torch.Tensor) -> None:
if x.dim() != 4:
raise ValueError("expected 4D input (got {x.dim()}D input)")
class BatchRenorm3d(BatchRenorm):
def _check_input_dim(self, x: torch.Tensor) -> None:
if x.dim() != 5:
raise ValueError("expected 5D input (got {x.dim()}D input)")