diff --git a/auto_round/autoround.py b/auto_round/autoround.py index a9c25073..375da000 100644 --- a/auto_round/autoround.py +++ b/auto_round/autoround.py @@ -1671,7 +1671,7 @@ class AutoAdamRound(AutoOPTRound): dynamic_max_gap (int): The dynamic maximum gap (default is -1). data_type (str): The data type to be used (default is "int"). optimizer: string or object - scale_dtype (str): The data type of quantization scale to be used (default is "float32"), different kernels + scale_dtype (str): The data type of quantization scale to be used (default is "float16"), different kernels have different choices. Returns: @@ -1743,3 +1743,4 @@ def __init__( optimizer, **kwargs, ) + diff --git a/examples/language-modeling/README.md b/examples/language-modeling/README.md index c7ab82d7..e10ce211 100644 --- a/examples/language-modeling/README.md +++ b/examples/language-modeling/README.md @@ -5,38 +5,8 @@ This document presents step-by-step instructions for auto-round. # Prerequisite -## 1. Environment -PyTorch 1.8 or higher version is needed -The transformers version required varies across different types of models. Here, the transformers version used for running models during experiments is provided as a reference. - -| Model | Transformers version | -| :----: | :----: | -| EleutherAI/gpt-j-6b | 4.28/4.30/4.34/4.36 | -| huggyllama/llama-7b | 4.28/4.30/4.34/4.36 | -| meta-llama/Llama-2-7b-hf | 4.30/4.34/4.36 | -| facebook/opt-6.7b | 4.28/4.30/4.34/4.36 | -| tiiuae/falcon-7b | 4.28/4.30/4.34/4.36 | -| mosaicml/mpt-7b | 4.28/4.30/4.34/4.36 | -| mosaicml/mpt-7b-chat | 4.34 | -| bigscience/bloom-7b1 | 4.28/4.30/4.34/4.36 | -| baichuan-inc/Baichuan2-7B-Chat | 4.36 | -| Qwen/Qwen-7B | 4.28/4.30/4.34/4.36 | -| Qwen/Qwen1.5-7B-Chat | 4.38/4.40 | -| THUDM/chatglm3-6b | 4.34/4.36 | -| mistralai/Mistral-7B-v0.1 | 4.34/4.36 | -| MBZUAI/LaMini-GPT-124M | 4.34/4.36 | -| EleutherAI/gpt-neo-125m | 4.34 | -| databricks/dolly-v2-3b | 4.34 | -| stabilityai/stablelm-base-alpha-3b | 4.34 | -| Intel/neural-chat-7b-v3 | 4.34/4.36 | -| rinna/bilingual-gpt-neox-4b | 4.36 | -| microsoft/phi-2 | 4.36 | -| google/gemma-7b | 4.38/4.40 | -| Salesforce/codegen25-7b-multi | 4.33.2| - - -## 2. Prepare Calibration Dataset +## 1. Prepare Calibration Dataset ### Default Dataset The [NeelNanda/pile-10k](https://huggingface.co/datasets/NeelNanda/pile-10k) in huggingface is adopted as the default calibration data and will be downloaded automatically from the datasets Hub. To customize a dataset, please kindly follow our dataset code. @@ -44,7 +14,7 @@ See more about loading [huggingface dataset](https://huggingface.co/docs/dataset ### Customized Dataset - Option 1: Pass a local json file path to dataset argument -- Option 2: Register your dataset following the [code](../../auto_round/calib_dataset.py) and pass the new dataset&split args to initialize AutoRound object,e.g. autoround=Autoround(dataset="NeelNanda/pile-10k:train",...) +- Option 2: Register your dataset following the [code](../../auto_round/calib_dataset.py) and pass the new dataset and split args to initialize AutoRound object, e.g. autoround=Autoround(dataset="NeelNanda/pile-10k:train", ...) - Option 3: pass list of string or list of input_ids to dataset. ~~~python def customized_data(): @@ -65,11 +35,11 @@ def customized_data_with_tokenizer(tokenizer, seqlen=2048): return tokens ~~~ -Combination of different datasets has been supported, --dataset "./tmp.json,NeelNanda/pile-10k:train, mbpp:train+validation+test". Please note that samples with sequence length < args.seq will be dropped. +Combination of different datasets has been supported, --dataset "./tmp.json,NeelNanda/pile-10k:train, mbpp:train+validation+test". Please note that samples with sequence length < args.seqlen will be dropped.
-## 3. Run Examples +## 2. Run Examples Enter into the examples folder and install lm-eval to run the evaluation ```bash pip install -r requirements.txt @@ -123,7 +93,7 @@ bash run_autoround_on_gaudi.sh -## 4. Evaluation +## 3. Evaluation The example supports evaluation for various tasks in lm_eval. Moreover, it facilitates separate evaluation through the 'evaluation.py' script, which extends support to three additional tasks (ptb, c4, and wikitext2) beyond the capabilities of the official lm_eval. Additionally, evaluation results will be neatly organized into an Excel file for ease of demonstration. For large models, GPU memory may be insufficient. Enable multi-GPU evaluation by setting 'CUDA_VISIBLE_DEVICES'. @@ -144,11 +114,42 @@ CUDA_VISIBLE_DEVICES=1,2 python3 eval/evaluation.py --model_name /save_model_pat You can also utilize the official lm_eval [link](https://github.com/EleutherAI/lm-evaluation-harness/tree/main?tab=readme-ov-file#basic-usage). -## 5. Known Issues +## 4. Known Issues * Random issues in tuning Qwen models * ChatGlm-V1 is not supported +## 5. Environment + +PyTorch 1.8 or higher version is needed +The transformers version required varies across different types of models. Here, the transformers version used for running models during experiments is provided as a reference. + +| Model | Transformers version | +| :----: | :----: | +| EleutherAI/gpt-j-6b | 4.28/4.30/4.34/4.36 | +| huggyllama/llama-7b | 4.28/4.30/4.34/4.36 | +| meta-llama/Llama-2-7b-hf | 4.30/4.34/4.36 | +| facebook/opt-6.7b | 4.28/4.30/4.34/4.36 | +| tiiuae/falcon-7b | 4.28/4.30/4.34/4.36 | +| mosaicml/mpt-7b | 4.28/4.30/4.34/4.36 | +| mosaicml/mpt-7b-chat | 4.34 | +| bigscience/bloom-7b1 | 4.28/4.30/4.34/4.36 | +| baichuan-inc/Baichuan2-7B-Chat | 4.36 | +| Qwen/Qwen-7B | 4.28/4.30/4.34/4.36 | +| Qwen/Qwen1.5-7B-Chat | 4.38/4.40 | +| THUDM/chatglm3-6b | 4.34/4.36 | +| mistralai/Mistral-7B-v0.1 | 4.34/4.36 | +| MBZUAI/LaMini-GPT-124M | 4.34/4.36 | +| EleutherAI/gpt-neo-125m | 4.34 | +| databricks/dolly-v2-3b | 4.34 | +| stabilityai/stablelm-base-alpha-3b | 4.34 | +| Intel/neural-chat-7b-v3 | 4.34/4.36 | +| rinna/bilingual-gpt-neox-4b | 4.36 | +| microsoft/phi-2 | 4.36 | +| google/gemma-7b | 4.38/4.40 | +| Salesforce/codegen25-7b-multi | 4.33.2| + + ## Reference If you find SignRound useful for your research, please cite our paper: ```bash @@ -165,3 +166,4 @@ If you find SignRound useful for your research, please cite our paper: + diff --git a/examples/language-modeling/main.py b/examples/language-modeling/main.py index f1c82cea..b0dce0e1 100644 --- a/examples/language-modeling/main.py +++ b/examples/language-modeling/main.py @@ -301,7 +301,7 @@ def get_library_version(library_name): deployment_device = args.deployment_device.split(',') gpu_format = "auto_gptq" if 'gpu' in deployment_device: - if lm_head_layer_name in weight_config.keys(): + if lm_head_layer_name in weight_config.keys() and weight_config[lm_head_layer_name]["data_type"] == "int": gpu_format = "autoround" autoround = round(model, tokenizer, args.bits, args.group_size, sym=args.sym, batch_size=args.train_bs, @@ -344,3 +344,4 @@ def get_library_version(library_name): eval_bs=args.eval_bs, use_accelerate=not args.disable_low_gpu_mem_usage, device=torch_device, excel_file=excel_name) +