Skip to content

Commit

Permalink
pre-release arch updates (#933)
Browse files Browse the repository at this point in the history
* pre-release arch updates

* codestyle

* codestyle

* typos

* changelog
  • Loading branch information
Scitator authored Sep 7, 2020
1 parent 2b5c0e8 commit 2275ea9
Show file tree
Hide file tree
Showing 17 changed files with 286 additions and 164 deletions.
27 changes: 24 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,26 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).


## [20.09] - YYYY-MM-DD
## [20.09.1] - YYYY-MM-DD

### Added

-

### Changed

-

### Removed

-

### Fixed

-


## [20.09] - 2020-09-07

### Added

Expand All @@ -15,10 +34,13 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Multi-scheduler support for multi-optimizer case ([#923](https://github.com/catalyst-team/catalyst/pull/923))
- Native mixed-precision training support ([#740](https://github.com/catalyst-team/catalyst/issues/740))
- `OptiomizerCallback` - flag `use_fast_zero_grad` for faster (and hacky) version of `optimizer.zero_grad()` ([#927](https://github.com/catalyst-team/catalyst/pull/927))
- `IOptiomizerCallback`, `ISchedulerCallback`, `ICheckpointCallback`, `ILoggerCallback` as core abstractions for Callbacks ([#933](https://github.com/catalyst-team/catalyst/pull/933))
- flag `USE_AMP` for PyTorch AMP usage ([#933](https://github.com/catalyst-team/catalyst/pull/933))

### Changed

-
- Pruning moved to `catalyst.dl` ([#933](https://github.com/catalyst-team/catalyst/pull/933))
- default `USE_APEX` changed to 0 ([#933](https://github.com/catalyst-team/catalyst/pull/933))

### Removed

Expand All @@ -35,7 +57,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Catalyst dependency on system git ([922](https://github.com/catalyst-team/catalyst/pull/922))



## [20.08] - 2020-08-09

### Added
Expand Down
2 changes: 1 addition & 1 deletion catalyst/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "20.08.3"
__version__ = "20.09"
15 changes: 9 additions & 6 deletions catalyst/core/callbacks/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# flake8: noqa

from catalyst.tools.settings import IS_PRUNING_AVAILABLE

from catalyst.core.callbacks.batch_overfit import BatchOverfitCallback
from catalyst.core.callbacks.checkpoint import (
ICheckpointCallback,
BaseCheckpointCallback,
CheckpointCallback,
IterationCheckpointCallback,
)
Expand All @@ -15,6 +15,7 @@
)
from catalyst.core.callbacks.exception import ExceptionCallback
from catalyst.core.callbacks.logging import (
ILoggerCallback,
ConsoleLogger,
TensorboardLogger,
VerboseLogger,
Expand All @@ -30,13 +31,15 @@
MetricManagerCallback,
)
from catalyst.core.callbacks.optimizer import (
IOptimizerCallback,
OptimizerCallback,
AMPOptimizerCallback,
)
from catalyst.core.callbacks.periodic_loader import PeriodicLoaderCallback
from catalyst.core.callbacks.scheduler import LRUpdater, SchedulerCallback
from catalyst.core.callbacks.scheduler import (
ISchedulerCallback,
LRUpdater,
SchedulerCallback,
)
from catalyst.core.callbacks.timer import TimerCallback
from catalyst.core.callbacks.validation import ValidationManagerCallback

if IS_PRUNING_AVAILABLE:
from catalyst.core.callbacks.pruning import PruningCallback
50 changes: 36 additions & 14 deletions catalyst/core/callbacks/checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,15 @@ def _load_states_from_file_map(
print(f" loaded: {', '.join(parts_to_load)}")


class BaseCheckpointCallback(Callback):
class ICheckpointCallback(Callback):
"""
Checkpoint callback interface, abstraction over model checkpointing step.
"""

pass


class BaseCheckpointCallback(ICheckpointCallback):
"""Base class for all checkpoint callbacks."""

def __init__(self, metrics_filename: str = "_metrics.json"):
Expand All @@ -182,19 +190,28 @@ def __init__(self, metrics_filename: str = "_metrics.json"):
metrics_filename (str): filename to save metrics
in checkpoint folder. Must ends on ``.json`` or ``.yml``
"""
super().__init__(order=CallbackOrder.external, node=CallbackNode.all)
super().__init__(
order=CallbackOrder.external, node=CallbackNode.master
)
self.metrics_filename = metrics_filename
self.metrics: dict = {}

def get_checkpoint_suffix(self, checkpoint: dict) -> str:
def _get_checkpoint_suffix(self, checkpoint: dict) -> str:
return "checkpoint"

def save_metric(self, logdir: Union[str, Path], metrics: Dict) -> None:
def _save_metric(self, logdir: Union[str, Path], metrics: Dict) -> None:
utils.save_config(
metrics, f"{logdir}/checkpoints/{self.metrics_filename}"
)

def on_exception(self, runner: IRunner):
"""
Expection handler.
Args:
runner: current runner
"""
exception = runner.exception
if not utils.is_exception(exception):
return
Expand All @@ -206,7 +223,7 @@ def on_exception(self, runner: IRunner):

try:
checkpoint = _pack_runner(runner)
suffix = self.get_checkpoint_suffix(checkpoint)
suffix = self._get_checkpoint_suffix(checkpoint)
suffix = f"{suffix}.exception_{exception.__class__.__name__}"
utils.save_checkpoint(
logdir=Path(f"{runner.logdir}/checkpoints/"),
Expand All @@ -218,7 +235,7 @@ def on_exception(self, runner: IRunner):
)
metrics = self.metrics
metrics[suffix] = runner.valid_metrics
self.save_metric(runner.logdir, metrics)
self._save_metric(runner.logdir, metrics)
except Exception: # noqa: S110
pass

Expand Down Expand Up @@ -343,7 +360,7 @@ def __init__(
self._keys_from_state = ["resume", "resume_dir"]
self._save_fn: Callable = None

def get_checkpoint_suffix(self, checkpoint: dict) -> str:
def _get_checkpoint_suffix(self, checkpoint: dict) -> str:
"""
Create checkpoint filename suffix based on checkpoint data.
Expand Down Expand Up @@ -484,7 +501,7 @@ def process_checkpoint(
_, filepath = self._save_checkpoint(
logdir=logdir,
checkpoint=checkpoint,
suffix=self.get_checkpoint_suffix(checkpoint),
suffix=self._get_checkpoint_suffix(checkpoint),
is_best=is_best,
is_last=True,
)
Expand All @@ -495,7 +512,7 @@ def process_checkpoint(
self.metrics_history.append(metrics_record)
self.truncate_checkpoints(minimize_metric=minimize_metric)
metrics = self.process_metrics(valid_metrics)
self.save_metric(logdir, metrics)
self._save_metric(logdir, metrics)

@staticmethod
def _load_runner(
Expand Down Expand Up @@ -637,7 +654,7 @@ def on_stage_end(self, runner: IRunner) -> None:
is_last=False, # don't need that because current state is last
)
metrics = self.process_metrics(checkpoint["valid_metrics"])
self.save_metric(runner.logdir, metrics)
self._save_metric(runner.logdir, metrics)
main_metric_value = metrics["last"][runner.main_metric]
log_message += "{filepath}\t{metric:3.4f}".format(
filepath=filepath, metric=main_metric_value
Expand Down Expand Up @@ -711,7 +728,7 @@ def __init__(
self.load_on_stage_end = load_on_stage_end
self._save_fn = None

def get_checkpoint_suffix(self, checkpoint: dict) -> str:
def _get_checkpoint_suffix(self, checkpoint: dict) -> str:
"""
Create checkpoint filename suffix based on checkpoint data.
Expand Down Expand Up @@ -777,7 +794,7 @@ def process_checkpoint(
filepath = utils.save_checkpoint(
logdir=Path(f"{logdir}/checkpoints/"),
checkpoint=checkpoint,
suffix=self.get_checkpoint_suffix(checkpoint),
suffix=self._get_checkpoint_suffix(checkpoint),
is_best=False,
is_last=False,
saver_fn=self._save_fn,
Expand All @@ -789,7 +806,7 @@ def process_checkpoint(
self.metrics_history.append(batch_metrics)

metrics = self.process_metrics()
self.save_metric(logdir, metrics)
self._save_metric(logdir, metrics)
print(f"\nSaved checkpoint at {filepath}")

def on_stage_start(self, runner: IRunner):
Expand Down Expand Up @@ -843,4 +860,9 @@ def on_stage_end(self, runner: IRunner):
)


__all__ = ["CheckpointCallback", "IterationCheckpointCallback"]
__all__ = [
"CheckpointCallback",
"IterationCheckpointCallback",
"ICheckpointCallback",
"BaseCheckpointCallback",
]
2 changes: 1 addition & 1 deletion catalyst/core/callbacks/early_stop.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


class CheckRunCallback(Callback):
"""Execute exeperiment not fully."""
"""Executes only a pipeline part from the ``Experiment``."""

def __init__(self, num_batch_steps: int = 3, num_epoch_steps: int = 2):
"""
Expand Down
13 changes: 10 additions & 3 deletions catalyst/core/callbacks/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,13 @@
from catalyst.core.runner import IRunner


class VerboseLogger(Callback):
class ILoggerCallback(Callback):
"""Logger callback interface, abstraction over logging step"""

pass


class VerboseLogger(ILoggerCallback):
"""Logs the params into console."""

def __init__(
Expand Down Expand Up @@ -97,7 +103,7 @@ def on_exception(self, runner: IRunner):
runner.need_exception_reraise = False


class ConsoleLogger(Callback):
class ConsoleLogger(ILoggerCallback):
"""Logger callback,
translates ``runner.*_metrics`` to console and text file.
"""
Expand Down Expand Up @@ -153,7 +159,7 @@ def on_epoch_end(self, runner: IRunner):
self.logger.info("", extra={"runner": runner})


class TensorboardLogger(Callback):
class TensorboardLogger(ILoggerCallback):
"""Logger callback, translates ``runner.metric_manager`` to tensorboard."""

def __init__(
Expand Down Expand Up @@ -256,6 +262,7 @@ def on_stage_end(self, runner: IRunner):


__all__ = [
"ILoggerCallback",
"ConsoleLogger",
"TensorboardLogger",
"VerboseLogger",
Expand Down
Loading

0 comments on commit 2275ea9

Please sign in to comment.