Skip to content

Commit

Permalink
multiply by token embedding weight for logits by default
Browse files Browse the repository at this point in the history
  • Loading branch information
lucidrains committed Nov 3, 2020
1 parent 11a9058 commit 7484805
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 2 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
setup(
name = 'x-transformers',
packages = find_packages(exclude=['examples']),
version = '0.0.6',
version = '0.0.7',
license='MIT',
description = 'X-Transformers - Pytorch',
author = 'Phil Wang',
Expand Down
8 changes: 7 additions & 1 deletion x_transformers/x_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,13 @@ def __init__(
self.pos_emb = nn.Embedding(max_seq_len, dim)
self.layer_blocks = layer_blocks
self.norm = nn.LayerNorm(dim)
self.to_logits = nn.Linear(dim, num_tokens) if return_logits else nn.Identity()

self.init_()
self.to_logits = lambda t: t @ self.token_emb.weight.t() if return_logits else nn.Identity()

def init_(self):
nn.init.normal_(self.token_emb.weight, std = 0.02)
nn.init.normal_(self.pos_emb.weight, std = 0.02)

def forward(self, x, **kwargs):
_, n, device = *x.shape, x.device
Expand Down

0 comments on commit 7484805

Please sign in to comment.