Skip to content

Commit

Permalink
frontend: minor logging improvements (#787)
Browse files Browse the repository at this point in the history
* frontend: minor logging improvements

* formatting

* deduplicate all-reduce warning

* more allreduce logs
  • Loading branch information
AlpinDale authored Nov 2, 2024
1 parent f98e7b2 commit 0f1af04
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 21 deletions.
3 changes: 2 additions & 1 deletion aphrodite/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1138,8 +1138,9 @@ def tensor_progress_bar(iterable:Iterable[Tuple[str, torch.Tensor]],
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
MofNCompleteColumn(),
# MofNCompleteColumn(),
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
TextColumn("{task.completed:.2f}/{task.total:.2f} GiB"),
TimeElapsedColumn(),
) as progress:
task = progress.add_task(f"[cyan]{desc}", total=final_bytes/units)
Expand Down
33 changes: 19 additions & 14 deletions aphrodite/distributed/device_communicators/custom_all_reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,13 @@ def __init__(self,
return

if world_size not in CustomAllreduce._SUPPORTED_WORLD_SIZES:
logger.warning(
"Custom allreduce is disabled due to an unsupported world"
f" size: {world_size}. Supported world sizes:"
f"{str(CustomAllreduce._SUPPORTED_WORLD_SIZES)}. To silence "
"this warning, specify disable_custom_all_reduce=True "
"explicitly.")
if rank == 0:
logger.warning(
"Custom allreduce is disabled due to an unsupported world"
f" size: {world_size}. Supported world sizes:"
f"{str(CustomAllreduce._SUPPORTED_WORLD_SIZES)}. To "
"silence this warning, specify disable_custom_all_reduce="
"True explicitly.")
return

if isinstance(device, int):
Expand Down Expand Up @@ -119,19 +120,23 @@ def __init__(self,
cuda_platform: CudaPlatform = current_platform
full_nvlink = cuda_platform.is_full_nvlink(physical_device_ids)
if world_size > 2 and not full_nvlink:
logger.warning(
"Custom allreduce is disabled because it's not supported on"
" more than two PCIe-only GPUs. To silence this warning, "
"specify disable_custom_all_reduce=True explicitly.")
if rank == 0:
logger.warning(
"Custom allreduce is disabled because it's not supported "
"on more than two PCIe-only GPUs. To silence this "
"warning, specify disable_custom_all_reduce=True "
"explicitly.")
return
# test P2P capability, this checks software/cudaruntime support
# this is expensive to compute at the first time
# then we cache the result
if not _can_p2p(rank, world_size):
logger.warning(
"Custom allreduce is disabled because your platform lacks "
"GPU P2P capability or P2P test failed. To silence this "
"warning, specify disable_custom_all_reduce=True explicitly.")
if rank == 0:
logger.warning(
"Custom allreduce is disabled because your platform lacks "
"GPU P2P capability or P2P test failed. To silence this "
"warning, specify disable_custom_all_reduce=True "
"explicitly.")
return

self.disabled = False
Expand Down
2 changes: 1 addition & 1 deletion aphrodite/modeling/model_loader/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ def load_model(self, *, model_config: ModelConfig,
"fall_back_to_pt_during_load",
True))
model.load_weights(tensor_progress_bar(weights, wgt_bytes,
"Loading modules..."))
"Loading model weights..."))

for _, module in model.named_modules():
quant_method = getattr(module, "quant_method", None)
Expand Down
10 changes: 5 additions & 5 deletions aphrodite/task_handler/model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -900,13 +900,13 @@ def load_model(self) -> None:
if rank == 0:
logger.info(f"Model loaded in {total_time:.2f} seconds.")
logger.info(
"Weights memory usage: "
f"{self.model_memory_usage / float(2**30):.2f} GiB x {tp} ="
f" {self.model_memory_usage * tp / float(2**30):.2f} GiB")
"Total model weights memory usage: "
f"{self.model_memory_usage * tp / float(2**30):.2f} GiB")
else:
logger.info(f"Model weights loaded in {total_time:.2f} seconds.")
logger.info("Weights memory usage: "
f"{self.model_memory_usage / float(2**30):.2f} GiB")
logger.info(
"Total model weights memory usage: "
f"{self.model_memory_usage / float(2**30):.2f} GiB")

if self.lora_config:
assert supports_lora(self.model), "Model does not support LoRA"
Expand Down

0 comments on commit 0f1af04

Please sign in to comment.