From 46cb4b7e63500eb9c8b9ca12480355f40bd31efc Mon Sep 17 00:00:00 2001 From: wenhuach21 Date: Tue, 4 Jun 2024 12:14:15 +0800 Subject: [PATCH] upgrade lm-eval to 0.4.2 --- examples/language-modeling/main.py | 36 ++++++++++++--------- examples/language-modeling/requirements.txt | 1 + 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/examples/language-modeling/main.py b/examples/language-modeling/main.py index 104ae7c8..44553ed4 100644 --- a/examples/language-modeling/main.py +++ b/examples/language-modeling/main.py @@ -152,8 +152,8 @@ def get_library_version(library_name): return "Library not found" - res = get_library_version("lm-eval") - if res == "0.3.0": + lm_eval_version = get_library_version("lm-eval") + if lm_eval_version == "0.3.0": use_eval_legacy = True if isinstance(tasks, str): @@ -340,17 +340,21 @@ def get_library_version(library_name): model.save_pretrained(output_dir) tokenizer.save_pretrained(output_dir) - # if not args.disable_eval and "fake" in deployment_device: ##support autogptq real eval later - # excel_name = f"{output_dir}_result.xlsx" - # output_dir += "/" - # print(excel_name, flush=True) - # eval_model(model_path=output_dir, tasks=tasks, dtype=dtype, limit=None, - # eval_bs=args.eval_bs, use_accelerate=not args.disable_low_gpu_mem_usage, - # device=torch_device, excel_file=excel_name) - from auto_round.auto_quantizer import AutoHfQuantizer - from eval_042.evaluation import simple_evaluate - - model_args = f"pretrained={export_dir}-gpu" - simple_evaluate(model="hf", model_args=model_args, - tasks="lambada_openai", - batch_size=args.eval_bs) + if not args.disable_eval and "fake" in deployment_device and lm_eval_version == "0.4.2": ##support autogptq real eval later + excel_name = f"{output_dir}_result.xlsx" + output_dir += "/" + print(excel_name, flush=True) + eval_model(model_path=output_dir, tasks=tasks, dtype=dtype, limit=None, + eval_bs=args.eval_bs, use_accelerate=not args.disable_low_gpu_mem_usage, + device=torch_device, excel_file=excel_name) + + if not args.disable_eval and lm_eval_version == "0.4.2": + from eval_042.evaluation import simple_evaluate + + if 'gpu' in deployment_device or "auto_round" in gpu_format or "auto-round" in gpu_format: + model_args = f"pretrained={export_dir}-gpu" + else: + model_args = f"pretrained={output_dir}" + simple_evaluate(model="hf", model_args=model_args, + tasks=tasks, + batch_size=args.eval_bs) diff --git a/examples/language-modeling/requirements.txt b/examples/language-modeling/requirements.txt index 9b0df5e0..17c55baa 100644 --- a/examples/language-modeling/requirements.txt +++ b/examples/language-modeling/requirements.txt @@ -1,5 +1,6 @@ transformers torch +lm-eval==0.4.2 git+https://github.com/EleutherAI/lm-evaluation-harness.git@96d185fa6232a5ab685ba7c43e45d1dbb3bb906d # For the paper results use the old lm_eval (0.3.0) # git+https://github.com/EleutherAI/lm-evaluation-harness.git@008fc2a23245c40384f2312718433eeb1e0f87a9