Skip to content

Commit

Permalink
og metadata separately
Browse files Browse the repository at this point in the history
  • Loading branch information
avishniakov committed Jun 7, 2024
1 parent d9172c7 commit 817a1b2
Show file tree
Hide file tree
Showing 6 changed files with 110 additions and 32 deletions.
39 changes: 30 additions & 9 deletions llm-lora-finetuning/pipelines/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,13 @@
#


from steps import evaluate_model, finetune, prepare_data, promote
from steps import (
evaluate_model,
finetune,
prepare_data,
promote,
log_metadata_from_step_artifact,
)
from zenml import pipeline


Expand Down Expand Up @@ -49,32 +55,47 @@ def llm_peft_full_finetune(
system_prompt=system_prompt,
use_fast=use_fast,
)
ft_model_dir = finetune(

evaluate_model(
base_model_id,
system_prompt,
datasets_dir,
None,
use_fast=use_fast,
load_in_8bit=load_in_8bit,
load_in_4bit=load_in_4bit,
id="evaluate_base",
)
log_metadata_from_step_artifact(
"evaluate_base",
"base_model_rouge_metrics",
after=["evaluate_base"],
id="log_metadata_evaluation_base"
)

evaluate_model(
ft_model_dir = finetune(
base_model_id,
system_prompt,
datasets_dir,
ft_model_dir,
use_fast=use_fast,
load_in_8bit=load_in_8bit,
load_in_4bit=load_in_4bit,
id="evaluate_finetuned",
)

evaluate_model(
base_model_id,
system_prompt,
datasets_dir,
None,
ft_model_dir,
use_fast=use_fast,
load_in_8bit=load_in_8bit,
load_in_4bit=load_in_4bit,
id="evaluate_base",
id="evaluate_finetuned",
)
promote(after=["evaluate_finetuned", "evaluate_base"])
log_metadata_from_step_artifact(
"evaluate_finetuned",
"finetuned_model_rouge_metrics",
after=["evaluate_finetuned"],
id="log_metadata_evaluation_finetuned"
)

promote(after=["log_metadata_evaluation_finetuned", "log_metadata_evaluation_base"])
42 changes: 31 additions & 11 deletions llm-lora-finetuning/pipelines/train_accelerated.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,13 @@
#


from steps import evaluate_model, finetune, prepare_data, promote
from steps import (
evaluate_model,
finetune,
prepare_data,
promote,
log_metadata_from_step_artifact,
)
from zenml import pipeline
from zenml.integrations.huggingface.steps import run_with_accelerate

Expand Down Expand Up @@ -51,6 +57,23 @@ def llm_peft_full_finetune(
use_fast=use_fast,
)

evaluate_model(
base_model_id,
system_prompt,
datasets_dir,
None,
use_fast=use_fast,
load_in_8bit=load_in_8bit,
load_in_4bit=load_in_4bit,
id="evaluate_base",
)
log_metadata_from_step_artifact(
"evaluate_base",
"base_model_rouge_metrics",
after=["evaluate_base"],
id="log_metadata_evaluation_base"
)

ft_model_dir = run_with_accelerate(finetune)(
base_model_id=base_model_id,
dataset_dir=datasets_dir,
Expand All @@ -69,14 +92,11 @@ def llm_peft_full_finetune(
load_in_4bit=load_in_4bit,
id="evaluate_finetuned",
)
evaluate_model(
base_model_id,
system_prompt,
datasets_dir,
None,
use_fast=use_fast,
load_in_8bit=load_in_8bit,
load_in_4bit=load_in_4bit,
id="evaluate_base",
log_metadata_from_step_artifact(
"evaluate_finetuned",
"finetuned_model_rouge_metrics",
after=["evaluate_finetuned"],
id="log_metadata_evaluation_finetuned"
)
promote(after=["evaluate_finetuned", "evaluate_base"])

promote(after=["log_metadata_evaluation_finetuned", "log_metadata_evaluation_base"])
1 change: 1 addition & 0 deletions llm-lora-finetuning/steps/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@
from .finetune import finetune
from .prepare_datasets import prepare_data
from .promote import promote
from .log_metadata import log_metadata_from_step_artifact
14 changes: 4 additions & 10 deletions llm-lora-finetuning/steps/evaluate_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
load_pretrained_model,
)
from utils.tokenizer import load_tokenizer, tokenize_for_eval
from zenml import log_model_metadata, save_artifact, step
from zenml import save_artifact, step
from zenml.logger import get_logger
from zenml.utils.cuda_utils import cleanup_gpu_memory

Expand Down Expand Up @@ -67,9 +67,7 @@ def evaluate_model(
test_dataset = load_from_disk(datasets_dir / "test_raw")
test_dataset = test_dataset[:50]
ground_truths = test_dataset["meaning_representation"]
tokenized_train_dataset = tokenize_for_eval(
test_dataset, tokenizer, system_prompt
)
tokenized_train_dataset = tokenize_for_eval(test_dataset, tokenizer, system_prompt)

if ft_model_dir is None:
logger.info("Generating using base model...")
Expand Down Expand Up @@ -103,12 +101,8 @@ def evaluate_model(
logger.info("Computing ROUGE metrics...")
prefix = "base_model_" if ft_model_dir is None else "finetuned_model_"
rouge = evaluate.load("rouge")
rouge_metrics = rouge.compute(
predictions=predictions, references=ground_truths
)
metadata = {prefix + k: float(v) for k, v in rouge_metrics.items()}
rouge_metrics = rouge.compute(predictions=predictions, references=ground_truths)

log_model_metadata(metadata)
logger.info("Computed metrics: " + str(metadata))
logger.info("Computed metrics: " + str(rouge_metrics))

save_artifact(rouge_metrics, prefix + "rouge_metrics")
4 changes: 2 additions & 2 deletions llm-lora-finetuning/steps/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from utils.callbacks import ZenMLCallback
from utils.loaders import load_base_model
from utils.tokenizer import load_tokenizer
from zenml import step
from zenml import step, ArtifactConfig
from zenml.logger import get_logger
from zenml.materializers import BuiltInMaterializer
from zenml.utils.cuda_utils import cleanup_gpu_memory
Expand All @@ -51,7 +51,7 @@ def finetune(
use_fast: bool = True,
load_in_4bit: bool = False,
load_in_8bit: bool = False,
) -> Annotated[Path, "ft_model_dir"]:
) -> Annotated[Path, ArtifactConfig(name="ft_model_dir", is_model_artifact=True)]:
"""Finetune the model using PEFT.
Base model will be derived from configure step and finetuned model will
Expand Down
42 changes: 42 additions & 0 deletions llm-lora-finetuning/steps/log_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Apache Software License 2.0
#
# Copyright (c) ZenML GmbH 2024. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from typing import Any, Dict

from zenml import log_model_metadata, step, get_step_context


@step(enable_cache=False)
def log_metadata_from_step_artifact(
step_name: str,
artifact_name: str,
) -> None:
"""Log metadata to the model from saved artifact.
Args:
step_name: The name of the step.
artifact_name: The name of the artifact.
"""

context = get_step_context()
metadata_dict: Dict[str, Any] = (
context.pipeline_run.steps[step_name].outputs[artifact_name].load()
)

metadata = {artifact_name: metadata_dict}

log_model_metadata(metadata)

0 comments on commit 817a1b2

Please sign in to comment.