Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use QDoRA instead of just LoRA #128

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 14 additions & 6 deletions llm-lora-finetuning/pipelines/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@
from steps import (
evaluate_model,
finetune,
log_metadata_from_step_artifact,
prepare_data,
promote,
log_metadata_from_step_artifact,
)

from zenml import pipeline


Expand All @@ -48,7 +49,9 @@ def llm_peft_full_finetune(
"At least one of `load_in_8bit` and `load_in_4bit` must be True."
)
if load_in_4bit and load_in_8bit:
raise ValueError("Only one of `load_in_8bit` and `load_in_4bit` can be True.")
raise ValueError(
"Only one of `load_in_8bit` and `load_in_4bit` can be True."
)

datasets_dir = prepare_data(
base_model_id=base_model_id,
Expand All @@ -70,7 +73,7 @@ def llm_peft_full_finetune(
"evaluate_base",
"base_model_rouge_metrics",
after=["evaluate_base"],
id="log_metadata_evaluation_base"
id="log_metadata_evaluation_base",
)

ft_model_dir = finetune(
Expand All @@ -79,7 +82,7 @@ def llm_peft_full_finetune(
use_fast=use_fast,
load_in_8bit=load_in_8bit,
load_in_4bit=load_in_4bit,
use_accelerate=False
use_accelerate=False,
)

evaluate_model(
Expand All @@ -96,7 +99,12 @@ def llm_peft_full_finetune(
"evaluate_finetuned",
"finetuned_model_rouge_metrics",
after=["evaluate_finetuned"],
id="log_metadata_evaluation_finetuned"
id="log_metadata_evaluation_finetuned",
)

promote(after=["log_metadata_evaluation_finetuned", "log_metadata_evaluation_base"])
promote(
after=[
"log_metadata_evaluation_finetuned",
"log_metadata_evaluation_base",
]
)
20 changes: 14 additions & 6 deletions llm-lora-finetuning/pipelines/train_accelerated.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@
from steps import (
evaluate_model,
finetune,
log_metadata_from_step_artifact,
prepare_data,
promote,
log_metadata_from_step_artifact,
)

from zenml import pipeline
from zenml.integrations.huggingface.steps import run_with_accelerate

Expand All @@ -49,7 +50,9 @@ def llm_peft_full_finetune(
"At least one of `load_in_8bit` and `load_in_4bit` must be True."
)
if load_in_4bit and load_in_8bit:
raise ValueError("Only one of `load_in_8bit` and `load_in_4bit` can be True.")
raise ValueError(
"Only one of `load_in_8bit` and `load_in_4bit` can be True."
)

datasets_dir = prepare_data(
base_model_id=base_model_id,
Expand All @@ -71,7 +74,7 @@ def llm_peft_full_finetune(
"evaluate_base",
"base_model_rouge_metrics",
after=["evaluate_base"],
id="log_metadata_evaluation_base"
id="log_metadata_evaluation_base",
)

ft_model_dir = run_with_accelerate(finetune)(
Expand All @@ -80,7 +83,7 @@ def llm_peft_full_finetune(
use_fast=use_fast,
load_in_8bit=load_in_8bit,
load_in_4bit=load_in_4bit,
use_accelerate=True
use_accelerate=True,
)

evaluate_model(
Expand All @@ -97,7 +100,12 @@ def llm_peft_full_finetune(
"evaluate_finetuned",
"finetuned_model_rouge_metrics",
after=["evaluate_finetuned"],
id="log_metadata_evaluation_finetuned"
id="log_metadata_evaluation_finetuned",
)

promote(after=["log_metadata_evaluation_finetuned", "log_metadata_evaluation_base"])
promote(
after=[
"log_metadata_evaluation_finetuned",
"log_metadata_evaluation_base",
]
)
2 changes: 1 addition & 1 deletion llm-lora-finetuning/steps/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,6 @@

from .evaluate_model import evaluate_model
from .finetune import finetune
from .log_metadata import log_metadata_from_step_artifact
from .prepare_datasets import prepare_data
from .promote import promote
from .log_metadata import log_metadata_from_step_artifact
11 changes: 8 additions & 3 deletions llm-lora-finetuning/steps/evaluate_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
load_pretrained_model,
)
from utils.tokenizer import load_tokenizer, tokenize_for_eval

from zenml import save_artifact, step
from zenml.client import Client
from zenml.logger import get_logger
Expand Down Expand Up @@ -65,7 +66,7 @@ def evaluate_model(

if not os.getenv("HF_TOKEN"):
try:
hf_token = client.get_secret("hf_token").secret_values['token']
hf_token = client.get_secret("hf_token").secret_values["token"]
huggingface_hub.login(token=hf_token)
except Exception as e:
logger.warning(f"Error authenticating with Hugging Face: {e}")
Expand All @@ -81,7 +82,9 @@ def evaluate_model(
test_dataset = load_from_disk(str((datasets_dir / "test_raw").absolute()))
test_dataset = test_dataset[:50]
ground_truths = test_dataset["meaning_representation"]
tokenized_train_dataset = tokenize_for_eval(test_dataset, tokenizer, system_prompt)
tokenized_train_dataset = tokenize_for_eval(
test_dataset, tokenizer, system_prompt
)

if ft_model_dir is None:
logger.info("Generating using base model...")
Expand Down Expand Up @@ -115,7 +118,9 @@ def evaluate_model(
logger.info("Computing ROUGE metrics...")
prefix = "base_model_" if ft_model_dir is None else "finetuned_model_"
rouge = evaluate.load("rouge")
rouge_metrics = rouge.compute(predictions=predictions, references=ground_truths)
rouge_metrics = rouge.compute(
predictions=predictions, references=ground_truths
)

logger.info("Computed metrics: " + str(rouge_metrics))

Expand Down
37 changes: 25 additions & 12 deletions llm-lora-finetuning/steps/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,21 @@
import os
from pathlib import Path

import huggingface_hub
import transformers
from accelerate import Accelerator
from datasets import load_from_disk
import huggingface_hub
from materializers.directory_materializer import DirectoryMaterializer
import transformers
from typing_extensions import Annotated
from utils.callbacks import ZenMLCallback
from utils.loaders import load_base_model
from utils.tokenizer import load_tokenizer
from zenml import step, ArtifactConfig

from zenml import ArtifactConfig, step
from zenml.client import Client
from zenml.logger import get_logger
from zenml.materializers import BuiltInMaterializer
from zenml.utils.cuda_utils import cleanup_gpu_memory
from zenml.client import Client

logger = get_logger(__name__)

Expand All @@ -54,7 +55,9 @@ def finetune(
use_fast: bool = True,
load_in_4bit: bool = False,
load_in_8bit: bool = False,
) -> Annotated[Path, ArtifactConfig(name="ft_model_dir", is_model_artifact=True)]:
) -> Annotated[
Path, ArtifactConfig(name="ft_model_dir", is_model_artifact=True)
]:
"""Finetune the model using PEFT.

Base model will be derived from configure step and finetuned model will
Expand Down Expand Up @@ -84,13 +87,13 @@ def finetune(
The path to the finetuned model directory.
"""
cleanup_gpu_memory(force=True)

# authenticate with Hugging Face for gated repos
client = Client()

if not os.getenv("HF_TOKEN"):
try:
hf_token = client.get_secret("hf_token").secret_values['token']
hf_token = client.get_secret("hf_token").secret_values["token"]
huggingface_hub.login(token=hf_token)
except Exception as e:
logger.warning(f"Error authenticating with Hugging Face: {e}")
Expand All @@ -112,8 +115,12 @@ def finetune(
if should_print:
logger.info("Loading datasets...")
tokenizer = load_tokenizer(base_model_id, use_fast=use_fast)
tokenized_train_dataset = load_from_disk(str((dataset_dir / "train").absolute()))
tokenized_val_dataset = load_from_disk(str((dataset_dir / "val").absolute()))
tokenized_train_dataset = load_from_disk(
str((dataset_dir / "train").absolute())
)
tokenized_val_dataset = load_from_disk(
str((dataset_dir / "val").absolute())
)

if should_print:
logger.info("Loading base model...")
Expand All @@ -135,18 +142,24 @@ def finetune(
warmup_steps=warmup_steps,
per_device_train_batch_size=per_device_train_batch_size,
gradient_checkpointing=False,
gradient_checkpointing_kwargs={'use_reentrant':False} if use_accelerate else {},
gradient_checkpointing_kwargs={"use_reentrant": False}
if use_accelerate
else {},
gradient_accumulation_steps=gradient_accumulation_steps,
max_steps=max_steps,
learning_rate=lr,
logging_steps=(
min(logging_steps, max_steps) if max_steps >= 0 else logging_steps
min(logging_steps, max_steps)
if max_steps >= 0
else logging_steps
),
bf16=bf16,
optim=optimizer,
logging_dir="./logs",
save_strategy="steps",
save_steps=min(save_steps, max_steps) if max_steps >= 0 else save_steps,
save_steps=min(save_steps, max_steps)
if max_steps >= 0
else save_steps,
evaluation_strategy="steps",
eval_steps=eval_steps,
do_eval=True,
Expand Down
2 changes: 1 addition & 1 deletion llm-lora-finetuning/steps/log_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

from typing import Any, Dict

from zenml import log_model_metadata, step, get_step_context
from zenml import get_step_context, log_model_metadata, step


@step(enable_cache=False)
Expand Down
5 changes: 4 additions & 1 deletion llm-lora-finetuning/steps/prepare_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from materializers.directory_materializer import DirectoryMaterializer
from typing_extensions import Annotated
from utils.tokenizer import generate_and_tokenize_prompt, load_tokenizer

from zenml import log_model_metadata, step
from zenml.materializers import BuiltInMaterializer
from zenml.utils.cuda_utils import cleanup_gpu_memory
Expand Down Expand Up @@ -81,7 +82,9 @@ def prepare_data(
)

datasets_path = Path("datasets")
tokenized_train_dataset.save_to_disk(str((datasets_path / "train").absolute()))
tokenized_train_dataset.save_to_disk(
str((datasets_path / "train").absolute())
)
tokenized_val_dataset.save_to_disk(str((datasets_path / "val").absolute()))
test_dataset.save_to_disk(str((datasets_path / "test_raw").absolute()))

Expand Down
1 change: 1 addition & 0 deletions llm-lora-finetuning/utils/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
TrainerState,
TrainingArguments,
)

from zenml import get_step_context

if TYPE_CHECKING:
Expand Down
19 changes: 16 additions & 3 deletions llm-lora-finetuning/utils/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@

import torch
from datasets import Dataset
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from peft import (
LoraConfig,
LoraRuntimeConfig,
get_peft_model,
prepare_model_for_kbit_training,
)
Comment on lines +23 to +28
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed this

from transformers import AutoModelForCausalLM

from utils.logging import print_trainable_parameters
Expand Down Expand Up @@ -67,7 +72,10 @@ def load_base_model(
)

model = AutoModelForCausalLM.from_pretrained(
base_model_id, quantization_config=bnb_config, device_map=device_map, trust_remote_code=True,
base_model_id,
quantization_config=bnb_config,
device_map=device_map,
trust_remote_code=True,
)

if is_training:
Expand All @@ -90,6 +98,8 @@ def load_base_model(
bias="none",
lora_dropout=0.05, # Conventional
task_type="CAUSAL_LM",
use_dora=True,
runtime_config=LoraRuntimeConfig(ephemeral_gpu_offload=True),
Comment on lines +101 to +102
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added these two lines

)

model = get_peft_model(model, config)
Expand Down Expand Up @@ -126,6 +136,9 @@ def load_pretrained_model(
bnb_4bit_compute_dtype=torch.bfloat16,
)
model = AutoModelForCausalLM.from_pretrained(
ft_model_dir, quantization_config=bnb_config, device_map="auto", trust_remote_code=True,
ft_model_dir,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True,
)
return model
Loading