diff --git a/requirements.txt b/requirements.txt index 8bb8d804..530e1dfb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,7 +9,8 @@ datasets>=2.15.0 numba numpy rich -dolomite-engine @ git+https://github.com/ibm-granite/dolomite-engine.git@main +instructlab-dolomite @ git+https://github.com/instructlab/GPTDolomite.git@initial + trl==0.9.4 peft pydantic>=2.7.0 diff --git a/src/instructlab/training/main_ds.py b/src/instructlab/training/main_ds.py index eeb0c077..4d18ba7d 100644 --- a/src/instructlab/training/main_ds.py +++ b/src/instructlab/training/main_ds.py @@ -43,6 +43,9 @@ setup_logger, ) import instructlab.training.data_process as dp +from instructlab.dolomite.hf_models import GPTDolomiteForCausalLM +from instructlab.dolomite.enums import GradientCheckpointingMethod +from instructlab.dolomite.gradient_checkpointing import apply_gradient_checkpointing def get_ds_config(world_size, samples_per_gpu, grad_accum, opts: DeepSpeedOptions): @@ -88,8 +91,6 @@ def setup_model(args, tokenizer, train_loader, grad_accum): ) if args.is_granite: - # Third Party - from dolomite_engine.hf_models.models import GPTDolomiteForCausalLM model = GPTDolomiteForCausalLM.from_pretrained( args.model_name_or_path, @@ -201,9 +202,6 @@ def setup_model(args, tokenizer, train_loader, grad_accum): # granite gradient checkpointing is handled uniformly # for both lora and full here if args.is_granite: - # Third Party - from dolomite_engine.enums import GradientCheckpointingMethod - from dolomite_engine.gradient_checkpointing import apply_gradient_checkpointing block_name = model._no_split_modules[0] apply_gradient_checkpointing( diff --git a/src/instructlab/training/utils.py b/src/instructlab/training/utils.py index 6feaa548..a52fe9a0 100644 --- a/src/instructlab/training/utils.py +++ b/src/instructlab/training/utils.py @@ -13,6 +13,7 @@ import warnings # Third Party +from instructlab.dolomite.hf_models import export_to_huggingface from rich.logging import RichHandler from torch import distributed as dist from torch.distributed import get_rank, is_initialized @@ -539,7 +540,6 @@ def save_hf_format_ds(args, model, tokenizer, samples_seen, convert_granite=True from tempfile import TemporaryDirectory # Third Party - from dolomite_engine.hf_models import export_to_huggingface from safetensors.torch import save_file with TemporaryDirectory("w") as tmpdir: