PearsonCorrcoef always zero when validation batch size is one. #286
-
🐛 BugHi, rather than a bug this is a question about how the To Reproduceimport torch
from pytorch_lightning import LightningModule, LightningDataModule, Trainer
from pytorch_lightning.loggers import TensorBoardLogger
from torch.utils.data import Dataset
from torchmetrics import PearsonCorrcoef
class RandomDataset(Dataset):
def __init__(self, length: int):
self.length = length
self.data = torch.linspace(0, 1, length, dtype=torch.float)
def __getitem__(self, index):
value = self.data[index].reshape(-1)
return value + torch.rand(1), value
def __len__(self):
return self.length
class RangeDataModule(LightningDataModule):
def setup(self, stage=None):
pass
def train_dataloader(self):
return torch.utils.data.DataLoader(RandomDataset(100), batch_size=10, shuffle=True)
def val_dataloader(self):
return torch.utils.data.DataLoader(RandomDataset(50), batch_size=1)
def test_dataloader(self):
return torch.utils.data.DataLoader(RandomDataset(100), batch_size=1)
class SuperBoringModel(LightningModule):
def __init__(self):
super().__init__()
self.layer = torch.nn.Linear(1, 1, bias=False)
self.layer.weight.data.fill_(0.5)
self.train_correlation = PearsonCorrcoef()
self.val_correlation = PearsonCorrcoef()
def forward(self, x):
return self.layer(x)
@staticmethod
def loss(y_pred, y_true):
return torch.nn.functional.mse_loss(y_pred, y_true)
def training_step(self, batch, batch_idx):
y_pred, y_true, loss = self.common_step(batch, batch_idx)
self.log_dict({"train_correlation": self.train_correlation(y_pred, y_true)})
return {"loss": loss}
def validation_step(self, batch, batch_idx):
y_pred, y_true, loss = self.common_step(batch, batch_idx)
self.log_dict({"val_correlation": self.val_correlation(y_pred, y_true)})
return {"x": loss}
def common_step(self, batch, batch_idx):
x, y_true = batch
y_pred = self.layer(x)
loss = self.loss(y_pred, y_true)
return y_pred, y_true, loss
def configure_optimizers(self):
optimizer = torch.optim.SGD(self.layer.parameters(), lr=0.0001)
return optimizer
if __name__ == '__main__':
model = SuperBoringModel()
datamodule = RangeDataModule()
trainer = Trainer(logger=TensorBoardLogger("."))
trainer.fit(model, datamodule=datamodule) Steps to reproduce the behavior:
Environment
|
Beta Was this translation helpful? Give feedback.
Replies: 5 comments 1 reply
-
When you call def validation_step(self, batch, batch_idx):
y_pred, y_true, loss = self.common_step(batch, batch_idx)
self.val_correlation.update(y_pred, y_true)
def validation_epoch_end(self, outputs):
corr = self.val_correlation.compute()
self.log_dict({"val_correlation": corr}) |
Beta Was this translation helpful? Give feedback.
-
Thanks @SkafteNicki! Could I help contributing a running version of the algorithm? |
Beta Was this translation helpful? Give feedback.
-
hi there, not sure what you mean? 🐰 |
Beta Was this translation helpful? Give feedback.
-
Hi @Borda! Something like what is suggested here. We would keep track of certain variables per batch and output the correlation at the end of the loop instead of appending to a list. |
Beta Was this translation helpful? Give feedback.
-
I see, YES, that would be very welcome contribution :] |
Beta Was this translation helpful? Give feedback.
When you call
self.val_correlation(y_pred, y_true)
you are calculating the correlation on that batch. If the batch size is only 1 then the correlation will always be 0. What you of cause want to do is to calculate the correlation over the hole validation set as pearsons is a global metric. You should therefore do something like this: