frontend: minor logging improvements (#787)

* frontend: minor logging improvements * formatting * deduplicate all-reduce warning * more allreduce logs
PygmalionAI · Nov 2, 2024 · 0f1af04 · 0f1af04
1 parent f98e7b2
commit 0f1af04
Show file tree

Hide file tree

Showing 4 changed files with 27 additions and 21 deletions.
diff --git a/aphrodite/common/utils.py b/aphrodite/common/utils.py
@@ -1138,8 +1138,9 @@ def tensor_progress_bar(iterable:Iterable[Tuple[str, torch.Tensor]],
             SpinnerColumn(),
             TextColumn("[progress.description]{task.description}"),
             BarColumn(),
-            MofNCompleteColumn(),
+            # MofNCompleteColumn(),
             TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
+            TextColumn("{task.completed:.2f}/{task.total:.2f} GiB"),
             TimeElapsedColumn(),
         ) as progress:
             task = progress.add_task(f"[cyan]{desc}", total=final_bytes/units)

diff --git a/aphrodite/distributed/device_communicators/custom_all_reduce.py b/aphrodite/distributed/device_communicators/custom_all_reduce.py
@@ -78,12 +78,13 @@ def __init__(self,
             return
 
         if world_size not in CustomAllreduce._SUPPORTED_WORLD_SIZES:
-            logger.warning(
-                "Custom allreduce is disabled due to an unsupported world"
-                f" size: {world_size}. Supported world sizes:"
-                f"{str(CustomAllreduce._SUPPORTED_WORLD_SIZES)}. To silence "
-                "this warning, specify disable_custom_all_reduce=True "
-                "explicitly.")
+            if rank == 0:
+                logger.warning(
+                    "Custom allreduce is disabled due to an unsupported world"
+                    f" size: {world_size}. Supported world sizes:"
+                    f"{str(CustomAllreduce._SUPPORTED_WORLD_SIZES)}. To "
+                    "silence this warning, specify disable_custom_all_reduce="
+                    "True explicitly.")
             return
 
         if isinstance(device, int):
@@ -119,19 +120,23 @@ def __init__(self,
         cuda_platform: CudaPlatform = current_platform
         full_nvlink = cuda_platform.is_full_nvlink(physical_device_ids)
         if world_size > 2 and not full_nvlink:
-            logger.warning(
-                "Custom allreduce is disabled because it's not supported on"
-                " more than two PCIe-only GPUs. To silence this warning, "
-                "specify disable_custom_all_reduce=True explicitly.")
+            if rank == 0:
+                logger.warning(
+                    "Custom allreduce is disabled because it's not supported "
+                    "on more than two PCIe-only GPUs. To silence this "
+                    "warning, specify disable_custom_all_reduce=True "
+                    "explicitly.")
             return
         # test P2P capability, this checks software/cudaruntime support
         # this is expensive to compute at the first time
         # then we cache the result
         if not _can_p2p(rank, world_size):
-            logger.warning(
-                "Custom allreduce is disabled because your platform lacks "
-                "GPU P2P capability or P2P test failed. To silence this "
-                "warning, specify disable_custom_all_reduce=True explicitly.")
+            if rank == 0:
+                logger.warning(
+                    "Custom allreduce is disabled because your platform lacks "
+                    "GPU P2P capability or P2P test failed. To silence this "
+                    "warning, specify disable_custom_all_reduce=True "
+                    "explicitly.")
             return
 
         self.disabled = False

diff --git a/aphrodite/modeling/model_loader/loader.py b/aphrodite/modeling/model_loader/loader.py
@@ -353,7 +353,7 @@ def load_model(self, *, model_config: ModelConfig,
                                                "fall_back_to_pt_during_load",
                                                True))
             model.load_weights(tensor_progress_bar(weights, wgt_bytes,
-                                                   "Loading modules..."))
+                                                   "Loading model weights..."))
 
             for _, module in model.named_modules():
                 quant_method = getattr(module, "quant_method", None)

diff --git a/aphrodite/task_handler/model_runner.py b/aphrodite/task_handler/model_runner.py
@@ -900,13 +900,13 @@ def load_model(self) -> None:
             if rank == 0:
                 logger.info(f"Model loaded in {total_time:.2f} seconds.")
                 logger.info(
-                    "Weights memory usage: "
-                    f"{self.model_memory_usage / float(2**30):.2f} GiB x {tp} ="
-                    f" {self.model_memory_usage * tp / float(2**30):.2f} GiB")
+                    "Total model weights memory usage: "
+                    f"{self.model_memory_usage * tp / float(2**30):.2f} GiB")
         else:
             logger.info(f"Model weights loaded in {total_time:.2f} seconds.")
-            logger.info("Weights memory usage: "
-                        f"{self.model_memory_usage / float(2**30):.2f} GiB")
+            logger.info(
+                "Total model weights memory usage: "
+                f"{self.model_memory_usage / float(2**30):.2f} GiB")
 
         if self.lora_config:
             assert supports_lora(self.model), "Model does not support LoRA"