diff --git a/backpack/utils/examples.py b/backpack/utils/examples.py index b52b8a903..3798ebde9 100644 --- a/backpack/utils/examples.py +++ b/backpack/utils/examples.py @@ -40,13 +40,14 @@ def get_mnist_dataloader(batch_size: int = 64, shuffle: bool = True) -> DataLoad def load_one_batch_mnist( - batch_size: int = 64, shuffle: bool = True + batch_size: int = 64, shuffle: bool = True, flat: bool = False ) -> Tuple[Tensor, Tensor]: """Return a single mini-batch (inputs, labels) from MNIST. Args: batch_size: Mini-batch size. Default: ``64``. shuffle: Randomly shuffle the data. Default: ``True``. + flat: Flatten chanel and returns a matrix ``[batch_size x 784]`` Returns: A single batch (inputs, labels) from MNIST. @@ -54,6 +55,9 @@ def load_one_batch_mnist( dataloader = get_mnist_dataloader(batch_size, shuffle) X, y = next(iter(dataloader)) + if flat: + X = X.reshape(X.shape[0], -1) + return X, y diff --git a/docs/examples.html b/docs/examples.html index c8d887d13..8280bff9e 100644 --- a/docs/examples.html +++ b/docs/examples.html @@ -60,8 +60,8 @@
For this example to run, you will need PyTorch and TorchVision (>= 1.0)
-To install BackPACK, either use pip
or clone the repo.
pip install backpack-for-pytorch
+To install BackPACK, either use pip
or clone the repo.
+pip install backpack-for-pytorch
An example: Diagonal GGN Preconditioner
@@ -90,20 +90,20 @@ Step 1: Libraries, MNIST, and the
STEP_SIZE = 0.01
DAMPING = 1.0
MAX_ITER = 100
-torch.manual_seed(0)
+torch.manual_seed(0)
Now, let’s load MNIST
-mnist_loader = torch.utils.data.dataloader.DataLoader(
- torchvision.datasets.MNIST(
+mnist_loader = torch.utils.data.dataloader.DataLoader(
+ torchvision.datasets.MNIST(
'./data',
train=True,
download=True,
- transform=torchvision.transforms.Compose([
- torchvision.transforms.ToTensor(),
- torchvision.transforms.Normalize(
+ transform=torchvision.transforms.Compose([
+ torchvision.transforms.ToTensor(),
+ torchvision.transforms.Normalize(
(0.1307,), (0.3081,)
)
])),
@@ -113,32 +113,32 @@ Step 1: Libraries, MNIST, and the
We’ll create a small CNN with MaxPooling and ReLU activations, using a Sequential
layer as the main model.
We’ll create a small CNN with MaxPooling and ReLU activations, using a Sequential
layer as the main model.
model = torch.nn.Sequential(
- torch.nn.Conv2d(1, 20, 5, 1),
- torch.nn.ReLU(),
- torch.nn.MaxPool2d(2, 2),
- torch.nn.Conv2d(20, 50, 5, 1),
- torch.nn.ReLU(),
- torch.nn.MaxPool2d(2, 2),
+model = torch.nn.Sequential(
+ torch.nn.Conv2d(1, 20, 5, 1),
+ torch.nn.ReLU(),
+ torch.nn.MaxPool2d(2, 2),
+ torch.nn.Conv2d(20, 50, 5, 1),
+ torch.nn.ReLU(),
+ torch.nn.MaxPool2d(2, 2),
Flatten(),
# Pytorch <1.2 doesn't have a Flatten layer
- torch.nn.Linear(4*4*50, 500),
- torch.nn.ReLU(),
- torch.nn.Linear(500, 10),
+ torch.nn.Linear(4*4*50, 500),
+ torch.nn.ReLU(),
+ torch.nn.Linear(500, 10),
)
We will also need a loss function and a way to measure accuracy
-loss_function = torch.nn.CrossEntropyLoss()
+loss_function = torch.nn.CrossEntropyLoss()
def get_accuracy(output, targets):
"""Helper function to print the accuracy"""
- predictions = output.argmax(dim=1, keepdim=True).view_as(targets)
- return predictions.eq(targets).float().mean().item()```
+ predictions = output.argmax(dim=1, keepdim=True).view_as(targets)
+ return predictions.eq(targets).float().mean().item()```
@@ -151,23 +151,23 @@ Step 2: The optimizer
-where 𝛼
is the step-size, 𝜆
is the damping parameter, g
is the gradient and G
is the diagonal of the generalized Gauss-Newton (GGN).
-The difficult part is computing G
, but BackPACK will do this;
-just like PyTorch’s autograd compute the gradient for each parameter p
and store it in p.grad
, BackPACK with the DiagGGNMC
extension will compute (a Monte-Carlo estimate of) the diagonal of the GGN and store it in p.diag_ggn_mc
.
+
where 𝛼
is the step-size, 𝜆
is the damping parameter, g
is the gradient and G
is the diagonal of the generalized Gauss-Newton (GGN).
+The difficult part is computing G
, but BackPACK will do this;
+just like PyTorch’s autograd compute the gradient for each parameter p
and store it in p.grad
, BackPACK with the DiagGGNMC
extension will compute (a Monte-Carlo estimate of) the diagonal of the GGN and store it in p.diag_ggn_mc
.
We can now simply focus on implementing the optimizer that uses this information:
-class DiagGGNOptimizer(torch.optim.Optimizer):
+class DiagGGNOptimizer(torch.optim.Optimizer):
def __init__(self, parameters, step_size, damping):
- super().__init__(
+ super().__init__(
parameters,
dict(step_size=step_size, damping=damping)
)
def step(self):
- for group in self.param_groups:
+ for group in self.param_groups:
for p in group["params"]:
- step_direction = p.grad / (p.diag_ggn_mc + group["damping"])
- p.data.add_(-group["step_size"], step_direction)
+ step_direction = p.grad / (p.diag_ggn_mc + group["damping"])
+ p.data.add_(-group["step_size"], step_direction)
return loss
@@ -179,7 +179,7 @@ Step 3: Put on your BackPACK
extend(loss_function)
optimizer = DiagGGNOptimizer(
- model.parameters(),
+ model.parameters(),
step_size=STEP_SIZE,
damping=DAMPING
)
@@ -191,7 +191,7 @@ The main loop
Traditional optimization loop: load each minibatch,
compute the minibatch loss, but now call BackPACK before doing the backward pass.
-The diag_ggn_mc
fields of the parameters will get filled and the optimizer will run.
+The diag_ggn_mc
fields of the parameters will get filled and the optimizer will run.
for batch_idx, (x, y) in enumerate(mnist_loader):
output = model(x)
@@ -200,13 +200,13 @@ The main loop
with backpack(DiagGGNMC()):
loss = loss_function(output, y)
- loss.backward()
- optimizer.step()
+ loss.backward()
+ optimizer.step()
print(
- "Iteration %3.d/%d " % (batch_idx, MAX_ITER) +
- "Minibatch Loss %.3f " % (loss) +
- "Accuracy %.0f" % (accuracy * 100) + "%"
+ "Iteration %3.d/%d " % (batch_idx, MAX_ITER) +
+ "Minibatch Loss %.3f " % (loss) +
+ "Accuracy %.0f" % (accuracy * 100) + "%"
)
if batch_idx >= MAX_ITER:
@@ -215,7 +215,7 @@ The main loop
If everything went fine, the output should look like
-Iteration 0/100 Minibatch Loss 2.307 Accuracy 12%
+Iteration 0/100 Minibatch Loss 2.307 Accuracy 12%
Iteration 1/100 Minibatch Loss 2.318 Accuracy 8%
Iteration 2/100 Minibatch Loss 2.329 Accuracy 8%
Iteration 3/100 Minibatch Loss 2.281 Accuracy 19%
diff --git a/docs/index.html b/docs/index.html
index a5eeb25d6..c086476b7 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -101,10 +101,10 @@
"""
from torch.nn import CrossEntropyLoss, Linear
-from utils import load_mnist_data
+from backpack.utils.examples import load_one_batch_mnist
-X, y = load_mnist_data()
+X, y = load_one_batch_mnist(flat=True)
model = Linear(784, 10)
lossfunc = CrossEntropyLoss()
loss = lossfunc(model(X), y)
@@ -127,10 +127,11 @@
and the variance with BackPACK
"""
from torch.nn import CrossEntropyLoss, Linear
-from utils import load_mnist_data
-from backpack import extend, backpack, Variance
+from backpack.utils.examples import load_one_batch_mnist
+from backpack import extend, backpack
+from backpack.extensions import Variance
-X, y = load_mnist_data()
+X, y = load_one_batch_mnist(flat=True)
model = extend(Linear(784, 10))
lossfunc = extend(CrossEntropyLoss())
loss = lossfunc(model(X), y)
@@ -150,10 +151,11 @@
and the second moment with BackPACK
"""
from torch.nn import CrossEntropyLoss, Linear
-from utils import load_mnist_data
-from backpack import extend, backpack, SumGradSquared
+from backpack.utils.examples import load_one_batch_mnist
+from backpack import extend, backpack
+from backpack.extensions import SumGradSquared
-X, y = load_mnist_data()
+X, y = load_one_batch_mnist(flat=True)
model = extend(Linear(784, 10))
lossfunc = extend(CrossEntropyLoss())
loss = lossfunc(model(X), y)
@@ -173,10 +175,11 @@
and the diagonal of the Gauss-Newton with BackPACK
"""
from torch.nn import CrossEntropyLoss, Linear
-from utils import load_mnist_data
-from backpack import extend, backpack, DiagGGNExact
+from backpack.utils.examples import load_one_batch_mnist
+from backpack import extend, backpack
+from backpack.extensions import DiagGGNExact
-X, y = load_mnist_data()
+X, y = load_one_batch_mnist(flat=True)
model = extend(Linear(784, 10))
lossfunc = extend(CrossEntropyLoss())
loss = lossfunc(model(X), y)
@@ -196,10 +199,11 @@
and KFAC with BackPACK
"""
from torch.nn import CrossEntropyLoss, Linear
-from utils import load_mnist_data
-from backpack import extend, backpack, KFAC
+from backpack.utils.examples import load_one_batch_mnist
+from backpack import extend, backpack
+from backpack.extensions import KFAC
-X, y = load_mnist_data()
+X, y = load_one_batch_mnist(flat=True)
model = extend(Linear(784, 10))
lossfunc = extend(CrossEntropyLoss())
loss = lossfunc(model(X), y)
@@ -292,14 +296,14 @@
Install with
-pip install backpack-for-pytorch
+pip install backpack-for-pytorch
If you use BackPACK in your research, please cite download bibtex
-@inproceedings{dangel2020backpack,
+@inproceedings{dangel2020backpack,
title = {BackPACK: Packing more into Backprop},
author = {Felix Dangel and Frederik Kunstner and Philipp Hennig},
booktitle = {International Conference on Learning Representations},
diff --git a/docs_src/CNAME b/docs_src/CNAME
new file mode 100644
index 000000000..b172915ff
--- /dev/null
+++ b/docs_src/CNAME
@@ -0,0 +1 @@
+backpack.pt
\ No newline at end of file
diff --git a/docs_src/README.md b/docs_src/README.md
index fe8032e5b..ca7680652 100644
--- a/docs_src/README.md
+++ b/docs_src/README.md
@@ -1,25 +1,31 @@
**Building the web version**
-Requirements: [Jekyll](https://jekyllrb.com/docs/installation/) and [Sphinx](https://www.sphinx-doc.org/en/1.8/usage/installation.html)
+Requirements: [Jekyll](https://jekyllrb.com/docs/installation/) and [Sphinx](https://www.sphinx-doc.org/en/1.8/usage/installation.html)
+and installing the jekyll dependencies (`bundle install` in `docs_src/splash`)
-Full build to output results in `../docs`
-```
-bash buildweb.sh
-```
+- Full build to output results in `../docs`
+ ```
+ bash buildweb.sh
+ ```
-Local build of the Jekyll splash page
-```
-cd splash
-bundle exec jekyll server
-```
-and go to `localhost:4000/backpack`
+- Local build of the Jekyll splash page
+ ```
+ cd splash
+ bundle exec jekyll server
+ ```
+ and go to `localhost:4000/backpack`
+
+ Note: The code examples on backpack.pt are defined with HTML tags in
+ `splash/_includes/code-samples.html`.
+ There are no python source file to generate them.
+ Test manually by copy-pasting from the resulting page.
-Local build of the documentation
-```
-cd rtd
-make
-```
-and open `/docs_src/rtd_output/index.html`
+- Local build of the documentation
+ ```
+ cd rtd
+ make
+ ```
+ and open `/docs_src/rtd_output/index.html`
diff --git a/docs_src/buildweb.sh b/docs_src/buildweb.sh
index a9659a915..2f79ecfc2 100644
--- a/docs_src/buildweb.sh
+++ b/docs_src/buildweb.sh
@@ -2,3 +2,4 @@ cd splash
bundle exec jekyll build -d "../../docs"
cd ..
touch ../docs/.nojekyll
+cp CNAME ../docs/CNAME
\ No newline at end of file
diff --git a/docs_src/splash/_includes/code-samples.html b/docs_src/splash/_includes/code-samples.html
index 7a701bce8..62ef48348 100644
--- a/docs_src/splash/_includes/code-samples.html
+++ b/docs_src/splash/_includes/code-samples.html
@@ -44,10 +44,10 @@
"""
from torch.nn import CrossEntropyLoss, Linear
-from utils import load_mnist_data
+from backpack.utils.examples import load_one_batch_mnist
-X, y = load_mnist_data()
+X, y = load_one_batch_mnist(flat=True)
model = Linear(784, 10)
lossfunc = CrossEntropyLoss()
loss = lossfunc(model(X), y)
@@ -70,10 +70,11 @@
and the variance with BackPACK
"""
from torch.nn import CrossEntropyLoss, Linear
-from utils import load_mnist_data
-from backpack import extend, backpack, extensions
+from backpack.utils.examples import load_one_batch_mnist
+from backpack import extend, backpack
+from backpack.extensions import Variance
-X, y = load_mnist_data()
+X, y = load_one_batch_mnist(flat=True)
model = extend(Linear(784, 10))
lossfunc = extend(CrossEntropyLoss())
loss = lossfunc(model(X), y)
@@ -94,10 +95,11 @@
and the second moment with BackPACK
"""
from torch.nn import CrossEntropyLoss, Linear
-from utils import load_mnist_data
-from backpack import extend, backpack, SumGradSquared
+from backpack.utils.examples import load_one_batch_mnist
+from backpack import extend, backpack
+from backpack.extensions import SumGradSquared
-X, y = load_mnist_data()
+X, y = load_one_batch_mnist(flat=True)
model = extend(Linear(784, 10))
lossfunc = extend(CrossEntropyLoss())
loss = lossfunc(model(X), y)
@@ -117,10 +119,11 @@
and the diagonal of the Gauss-Newton with BackPACK
"""
from torch.nn import CrossEntropyLoss, Linear
-from utils import load_mnist_data
-from backpack import extend, backpack, extensions
+from backpack.utils.examples import load_one_batch_mnist
+from backpack import extend, backpack
+from backpack.extensions import DiagGGNExact
-X, y = load_mnist_data()
+X, y = load_one_batch_mnist(flat=True)
model = extend(Linear(784, 10))
lossfunc = extend(CrossEntropyLoss())
loss = lossfunc(model(X), y)
@@ -140,10 +143,11 @@
and KFAC with BackPACK
"""
from torch.nn import CrossEntropyLoss, Linear
-from utils import load_mnist_data
-from backpack import extend, backpack, extensions
+from backpack.utils.examples import load_one_batch_mnist
+from backpack import extend, backpack
+from backpack.extensions import KFAC
-X, y = load_mnist_data()
+X, y = load_one_batch_mnist(flat=True)
model = extend(Linear(784, 10))
lossfunc = extend(CrossEntropyLoss())
loss = lossfunc(model(X), y)