Skip to content

Commit

Permalink
add missing deepspeed options
Browse files Browse the repository at this point in the history
Signed-off-by: Oleg S <[email protected]>
  • Loading branch information
RobotSail committed Jun 24, 2024
1 parent ee76c17 commit a9f97c8
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 0 deletions.
3 changes: 3 additions & 0 deletions src/instructlab/training/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ class DeepSpeedOptions(BaseModel):
cpu_offload_optimizer_ratio: float = 1
cpu_offload_optimizer_pin_memory: bool = False

# don't save in deepspeed format as a default
save_samples: int | None = None


class TrainingArgs(BaseModel):
"""
Expand Down
25 changes: 25 additions & 0 deletions src/instructlab/training/main_ds.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,19 @@ def run_training(torch_args: TorchrunArgs, train_args: TrainingArgs):
if train_args.lora.quantize_data_type == config.QuantizeDataType.NF4:
command.append("--lora_quant_bits=4")

# deepspeed opts
if train_args.deepspeed_options.save_samples:
command.append(f"--save_samples_ds={train_args.deepspeed_options.save_samples}")
if train_args.deepspeed_options.cpu_offload_optimizer:
command.extend(
[
"--cpu_offload_optimizer",
f"--cpu_offload_optimizer_ratio={train_args.deepspeed_options.cpu_offload_optimizer_ratio}",
]
)
if train_args.deepspeed_options.cpu_offload_optimizer_pin_memory:
command.append("--cpu_offload_optimizer_pin_memory")

print(f"\033[92mRunning command: {' '.join(command)}\033[0m")
process = None
try:
Expand Down Expand Up @@ -664,6 +677,18 @@ def run_training(torch_args: TorchrunArgs, train_args: TrainingArgs):
default=False,
help="Offload optimizer to CPU when using DeepSpeed. This configures it to use ZeRO stage 2.",
)
parser.add_argument(
"--cpu_offload_optimizer_pin_memory",
action="store_true",
default=False,
help="Pin memory when offloading optimizer to CPU. This allows for faster transfers between CPU and GPU. Comes at the cost of higher memory usage and CPU overhead.",
)
parser.add_argument(
"--cpu_offload_optimizer_ratio",
type=float,
default=1.0,
help="Ratio of the optimizer to be offloaded to CPU. The rest will be on GPU(s).",
)
parser.add_argument("--NEFTune_alpha", type=float, default=None)
parser.add_argument(
"--chat-tmpl-path",
Expand Down

0 comments on commit a9f97c8

Please sign in to comment.