You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I trained a mod by using scRNA-seq with CPU mode (about 1 day). An error occurred when I try "Cell2location: spatial mapping" with CPU mode.
mod.train(
max_epochs=30000,
# train using full data (batch_size=None)batch_size=None,
# use all data points in training because# we need to estimate cell abundance at all locationstrain_size=1,
use_gpu=False,
num_particles=1
)
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
---------------------------------------------------------------------------
OutOfMemoryError Traceback (most recent call last)
Cell In[23], line 1
----> 1 mod.train(
2 max_epochs=30000,
3# train using full data (batch_size=None)4 batch_size=None,
5# use all data points in training because6# we need to estimate cell abundance at all locations7 train_size=1,
8 use_gpu=False,
9 num_particles=110 )
12# plot ELBO loss history during training, removing first 100 epochs from the plot13 mod.plot_history(1000)
File /mnt/data1/ll/software/miniconda3/envs/cell2loc/lib/python3.9/site-packages/cell2location/models/_cell2location_model.py:209, in Cell2location.train(self, max_epochs, batch_size, train_size, lr, num_particles, scale_elbo, **kwargs)
206 scale_elbo =1.0/ (self.summary_stats["n_cells"] *self.summary_stats["n_vars"])
207 kwargs["plan_kwargs"]["scale_elbo"] = scale_elbo
--> 209 super().train(**kwargs)
File /mnt/data1/ll/software/miniconda3/envs/cell2loc/lib/python3.9/site-packages/scvi/model/base/_pyromixin.py:184, in PyroSviTrainMixin.train(self, max_epochs, use_gpu, accelerator, device, train_size, validation_size, shuffle_set_split, batch_size, early_stopping, lr, training_plan, plan_kwargs, **trainer_kwargs)
172 trainer_kwargs["callbacks"].append(PyroJitGuideWarmup())
174 runner =self._train_runner_cls(
175self,
176 training_plan=training_plan,
(...)
182**trainer_kwargs,
183 )
--> 184 return runner()
File /mnt/data1/ll/software/miniconda3/envs/cell2loc/lib/python3.9/site-packages/scvi/train/_trainrunner.py:99, in TrainRunner.__call__(self)
96ifhasattr(self.data_splitter, "n_val"):
97self.training_plan.n_obs_validation =self.data_splitter.n_val
---> 99 self.trainer.fit(self.training_plan, self.data_splitter)
100self._update_history()
102# data splitter only gets these attrs after fit
File /mnt/data1/ll/software/miniconda3/envs/cell2loc/lib/python3.9/site-packages/scvi/train/_trainer.py:186, in Trainer.fit(self, *args, **kwargs)
180ifisinstance(args[0], PyroTrainingPlan):
181 warnings.filterwarnings(
182 action="ignore",
183 category=UserWarning,
184 message="`LightningModule.configure_optimizers` returned `None`",
185 )
--> 186 super().fit(*args, **kwargs)
File /mnt/data1/ll/software/miniconda3/envs/cell2loc/lib/python3.9/site-packages/lightning/pytorch/trainer/trainer.py:532, in Trainer.fit(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path)
530self.strategy._lightning_module = model
531 _verify_strategy_supports_compile(model, self.strategy)
--> 532 call._call_and_handle_interrupt(
533self, self._fit_impl, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path
534 )
File /mnt/data1/ll/software/miniconda3/envs/cell2loc/lib/python3.9/site-packages/lightning/pytorch/trainer/call.py:43, in _call_and_handle_interrupt(trainer, trainer_fn, *args, **kwargs)
41if trainer.strategy.launcher isnotNone:
42return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)
---> 43 return trainer_fn(*args, **kwargs)
45except _TunerExitException:
46 _call_teardown_hook(trainer)
File /mnt/data1/ll/software/miniconda3/envs/cell2loc/lib/python3.9/site-packages/lightning/pytorch/trainer/trainer.py:571, in Trainer._fit_impl(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path)
561self._data_connector.attach_data(
562 model, train_dataloaders=train_dataloaders, val_dataloaders=val_dataloaders, datamodule=datamodule
563 )
565 ckpt_path =self._checkpoint_connector._select_ckpt_path(
566self.state.fn,
567 ckpt_path,
568 model_provided=True,
569 model_connected=self.lightning_module isnotNone,
570 )
--> 571 self._run(model, ckpt_path=ckpt_path)
573assertself.state.stopped
574self.training =False
File /mnt/data1/ll/software/miniconda3/envs/cell2loc/lib/python3.9/site-packages/lightning/pytorch/trainer/trainer.py:941, in Trainer._run(self, model, ckpt_path)
938self.strategy.setup_environment()
939self.__setup_profiler()
--> 941 call._call_setup_hook(self) # allow user to setup lightning_module in accelerator environment
943# check if we should delay restoring checkpoint till later944ifnotself.strategy.restore_checkpoint_after_setup:
File /mnt/data1/ll/software/miniconda3/envs/cell2loc/lib/python3.9/site-packages/lightning/pytorch/trainer/call.py:85, in _call_setup_hook(trainer)
82 trainer.strategy.barrier("pre_setup")
84if trainer.datamodule isnotNone:
---> 85 _call_lightning_datamodule_hook(trainer, "setup", stage=fn)
86 _call_callback_hooks(trainer, "setup", stage=fn)
87 _call_lightning_module_hook(trainer, "setup", stage=fn)
File /mnt/data1/ll/software/miniconda3/envs/cell2loc/lib/python3.9/site-packages/lightning/pytorch/trainer/call.py:166, in _call_lightning_datamodule_hook(trainer, hook_name, *args, **kwargs)
164ifcallable(fn):
165with trainer.profiler.profile(f"[LightningDataModule]{trainer.datamodule.__class__.__name__}.{hook_name}"):
--> 166 return fn(*args, **kwargs)
167returnNone
File /mnt/data1/ll/software/miniconda3/envs/cell2loc/lib/python3.9/site-packages/scvi/dataloaders/_data_splitting.py:431, in DeviceBackedDataSplitter.setup(self, stage)
424self.val_idx = (
425 np.sort(self.val_idx) iflen(self.val_idx) >0elseself.val_idx
426 )
427self.test_idx = (
428 np.sort(self.test_idx) iflen(self.test_idx) >0elseself.test_idx
429 )
--> 431 self.train_tensor_dict = self._get_tensor_dict(
432self.train_idx, device=self.device
433 )
434self.test_tensor_dict =self._get_tensor_dict(self.test_idx, device=self.device)
435self.val_tensor_dict =self._get_tensor_dict(self.val_idx, device=self.device)
File /mnt/data1/ll/software/miniconda3/envs/cell2loc/lib/python3.9/site-packages/scvi/dataloaders/_data_splitting.py:453, in DeviceBackedDataSplitter._get_tensor_dict(self, indices, device)
450 tensor_dict = batch
452for k, v in tensor_dict.items():
--> 453 tensor_dict[k] = v.to(device)
455return tensor_dict
456else:
OutOfMemoryError: CUDA out of memory. Tried to allocate 88.44 GiB. GPU
The text was updated successfully, but these errors were encountered:
This likely means incompatibility of cell2location and scvi-tools. I recommend installing the GitHub version. The version uses 'accelerator' and 'device' arguments instead of 'use_gpu'.
That said, cell2location is going to take a very long time on CPU. If the data doesn't fit into the GPU memory, I recommend reading #356#358 for tips on using large data.
I trained a mod by using scRNA-seq with CPU mode (about 1 day). An error occurred when I try "Cell2location: spatial mapping" with CPU mode.
The text was updated successfully, but these errors were encountered: