PaddlePaddle · ForFishes · Sep 14, 2024 · Sep 18, 2024
diff --git a/docs/trainer.md b/docs/trainer.md
@@ -725,6 +725,11 @@ Trainer 是一个简单，但功能完整的 Paddle 训练和评估模块，并
                            async_save, enable asynchronous saving checkpoints to disk.
                            enable_all_options, enable all unified checkpoint optimization configs.
 
+  --enable_optimizer_timer
+                       是否开启Optimzier的timer 统计。（可选，默认为False，不开启）
+                       Whether to enable Optimzier's timer profiler.
+                       (optional, default is False, not enabled)
+
   --skip_memory_metrics
                        是否跳过内存profiler检测。（可选，默认为True，跳过）
                        Whether or not to skip adding of memory profiler reports

diff --git a/paddlenlp/trainer/trainer.py b/paddlenlp/trainer/trainer.py
@@ -1291,11 +1291,9 @@
                 get_timers as paddle_get_timers,
             )
 
-            paddle_pipeline_timers = paddle_get_timers()
-            for name, timer in paddle_pipeline_timers.timers.items():
-                elapsed_time = timer.elapsed(reset=False) * 1000.0
+            for name, timer in paddle_get_timers().timers.items():
+                elapsed_time = timer.elapsed(reset=True) * 1000.0
                 paddle_timer_info += f" | {name}: {elapsed_time:.2f}"
-            paddle_pipeline_timers.log(paddle_pipeline_timers.timers.keys(), reset=True)
         except ImportError:  # paddle version too old, timer not support
             warnings.warn(f"paddle version:{paddle.__git_commit__} does not support pipeline timer")
         except AssertionError:  # paddle timer not enabled
@@ -2153,16 +2151,29 @@
 
         model.train()
         inputs = self._prepare_inputs(inputs)
+
+        # obtain current acc step
+        if not hasattr(self, "_cur_acc_step"):
+            self._cur_acc_step = 0
+
+        if self._cur_acc_step == self.args.gradient_accumulation_steps:
+            self._cur_acc_step = 0
+
+        self.timers and self.timers(f"forward-acc-{self._cur_acc_step}").start()
         with self.autocast_smart_context_manager():
             loss = self.compute_loss(model, inputs)
 
         if self.args.gradient_accumulation_steps > 1 and not self._enable_delay_scale_loss():
             loss = loss / self.args.gradient_accumulation_steps
 
+        self.timers and self.timers(f"forward-acc-{self._cur_acc_step}").stop()
+
+        self.timers and self.timers(f"backward-acc-{self._cur_acc_step}").start()
         if self.do_grad_scaling:
             self.scaler.scale(loss).backward()
         else:
             loss.backward()
+        self.timers and self.timers(f"backward-acc-{self._cur_acc_step}").stop()
         return loss.detach()
 
     def training_pipeline_step(self, model: nn.Layer, inputs: Dict[str, Union[paddle.Tensor, Any]]) -> paddle.Tensor:

diff --git a/paddlenlp/trainer/training_args.py b/paddlenlp/trainer/training_args.py
@@ -844,6 +844,10 @@
             )
         },
     )
+    enable_optimizer_timer: Optional[bool] = field(
+        default=False,
+        metadata={"help": "是否开启Optimzier的timer统计"},
+    )
     ignore_load_lr_and_optim: Optional[bool] = field(
         default=False,
         metadata={"help": "whether to ignore load optimizer and scheduler."},
@@ -1267,6 +1271,15 @@
                         "order": order,
                     }
 
+                try:
+                    if self.enable_optimizer_timer:
+                        hybrid_configs["enable_optimizer_timer"] = True
+                except (KeyError, AttributeError):
+                    warnings.warn(
+                        "The enable_optimizer_timer is not supported "
+                        "by current version of Paddle. Please try latest develop Paddle."
+                    )
+
                 if self.pipeline_parallel_degree > 1:
                     hybrid_configs["pp_configs"] = dygraph_pp_configs
                     logger.info(f"using pipeline configs:{dygraph_pp_configs}")