Skip to content

Commit

Permalink
simplify config and fix ut
Browse files Browse the repository at this point in the history
Signed-off-by: Mengni Wang <[email protected]>
  • Loading branch information
mengniwang95 committed Aug 16, 2024
1 parent 13b69e3 commit 04625d0
Show file tree
Hide file tree
Showing 10 changed files with 137 additions and 294 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@
parser.add_argument(
"--tasks",
nargs="+",
default=[
default=["lambada_openai"],
choices=[
"winogrande",
"copa",
"piqa",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,19 @@ function init_params {
do
case $var in
--input_model=*)
input_model=$(echo $var |cut -f2 -d=)
input_model=$(echo "$var" |cut -f2 -d=)
;;
--batch_size=*)
batch_size=$(echo $var |cut -f2 -d=)
batch_size=$(echo "$var" |cut -f2 -d=)
;;
--tokenizer=*)
tokenizer=$(echo $var |cut -f2 -d=)
tokenizer=$(echo "$var" |cut -f2 -d=)
;;
--mode=*)
mode=$(echo $var |cut -f2 -d=)
mode=$(echo "$var" |cut -f2 -d=)
;;
--intra_op_num_threads=*)
intra_op_num_threads=$(echo $var |cut -f2 -d=)
intra_op_num_threads=$(echo "$var" |cut -f2 -d=)
;;
esac
done
Expand All @@ -42,19 +42,27 @@ function run_benchmark {
input_model=$(dirname "$input_model")
fi

extra_cmd=""

if [[ "${tokenizer}" =~ "Phi-3-mini" ]]; then
extra_cmd="--trust_remote_code True"
extra_cmd=$extra_cmd"--trust_remote_code True "
fi

if [ "${batch_size}" ]; then
extra_cmd=$extra_cmd"--batch_size ${batch_size} "
fi
if [ "${tokenizer}" ]; then
extra_cmd=$extra_cmd"--tokenizer ${tokenizer} "
fi
if [ "${tasks}" ]; then
extra_cmd=$extra_cmd"--tasks ${tasks} "
fi
if [ "${intra_op_num_threads}" ]; then
extra_cmd=$extra_cmd"--intra_op_num_threads ${intra_op_num_threads} "
fi

python main.py \
--model_path="${input_model}" \
--batch_size="${batch_size-1}" \
--tokenizer="${tokenizer-meta-llama/Llama-2-7b-hf}" \
--tasks="${tasks-lambada_openai}" \
--mode="${mode}" \
--intra_op_num_threads="${intra_op_num_threads-24}" \
--benchmark \
${extra_cmd}
extra_cmd=$extra_cmd"--benchmark"
eval "python main.py --model_path ${input_model} --mode ${mode} ${extra_cmd}"

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,25 +12,25 @@ function init_params {
do
case $var in
--input_model=*)
input_model=$(echo $var |cut -f2 -d=)
input_model=$(echo "$var" |cut -f2 -d=)
;;
--output_model=*)
output_model=$(echo $var |cut -f2 -d=)
output_model=$(echo "$var" |cut -f2 -d=)
;;
--batch_size=*)
batch_size=$(echo $var |cut -f2 -d=)
batch_size=$(echo "$var" |cut -f2 -d=)
;;
--dataset=*)
dataset=$(echo $var |cut -f2 -d=)
dataset=$(echo "$var" |cut -f2 -d=)
;;
--tokenizer=*)
tokenizer=$(echo $var |cut -f2 -d=)
tokenizer=$(echo "$var" |cut -f2 -d=)
;;
--algorithm=*)
algorithm=$(echo $var |cut -f2 -d=)
algorithm=$(echo "$var" |cut -f2 -d=)
;;
--quant_format=*)
quant_format=$(echo $var |cut -f2 -d=)
quant_format=$(echo "$var" |cut -f2 -d=)
;;
esac
done
Expand Down Expand Up @@ -59,31 +59,42 @@ function run_tuning {
echo "Created directory $output_model"
fi

extra_cmd=""

if [[ "${tokenizer}" =~ "Phi-3-mini" ]]; then
nodes_to_exclude="/model/layers.*/self_attn/qkv_proj/MatMul /model/layers.*/mlp/down_proj/MatMul"
extra_cmd="--nodes_to_exclude ${nodes_to_exclude} --trust_remote_code True"
extra_cmd=$extra_cmd"--nodes_to_exclude ${nodes_to_exclude} --trust_remote_code True "
fi
if [[ "${tokenizer}" =~ "Llama-3-8B" ]]; then
nodes_to_exclude="/model/layers.*/mlp/down_proj/MatMul"
extra_cmd="--nodes_to_exclude ${nodes_to_exclude}"
extra_cmd=$extra_cmd"--nodes_to_exclude ${nodes_to_exclude} "
fi
if [[ "${tokenizer}" =~ "Qwen2-7B" ]]; then
nodes_to_exclude="/model/layers.*/mlp/down_proj/MatMul /model/layers.*/mlp/up_proj/MatMul"
extra_cmd="--nodes_to_exclude ${nodes_to_exclude}"
extra_cmd=$extra_cmd"--nodes_to_exclude ${nodes_to_exclude} "
fi

if [ "${tokenizer}" ]; then
extra_cmd=$extra_cmd"--tokenizer ${tokenizer} "
fi
if [ "${batch_size}" ]; then
extra_cmd=$extra_cmd"--batch_size ${batch_size} "
fi
if [ "${dataset}" ]; then
extra_cmd=$extra_cmd"--dataset ${dataset} "
fi
if [ "${algorithm}" ]; then
extra_cmd=$extra_cmd"--algorithm ${algorithm} "
fi
if [ "${tasks}" ]; then
extra_cmd=$extra_cmd"--tasks ${tasks} "
fi
if [ "${quant_format}" ]; then
extra_cmd=$extra_cmd"--quant_format ${quant_format} "
fi

python main.py \
--model_path "${input_model}" \
--tokenizer "${tokenizer-meta-llama/Llama-2-7b-hf}" \
--output_model "${output_model}" \
--batch_size "${batch_size-1}" \
--dataset "${dataset-NeelNanda/pile-10k}" \
--algorithm "${algorithm-WOQ_TUNE}" \
--tasks "${tasks-lambada_openai}" \
--quant_format "${quant_format-QOperator}" \
--layer_wise \
--tune \
${extra_cmd}
extra_cmd=$extra_cmd"--layer_wise --tune"
eval "python main.py --model_path ${input_model} --output_model ${output_model} ${extra_cmd}"
}

main "$@"
13 changes: 5 additions & 8 deletions onnx_neural_compressor/algorithms/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,14 +222,10 @@ def calculate_scale_zp(rmin, rmax, qType, sym, reduce_range=False):
dtype = _qType_to_np_type(qType)
if isinstance(rmax, np.ndarray):
if sym:
mask = abs(rmin) > abs(rmax)
scale = np.ones(rmin.shape).astype(rmin.dtype)
scale[mask] = rmin[mask]
scale[~mask] = rmax[~mask]
abs_max = round((qmax - qmin) / 2)
scale /= abs_max
else:
scale = (rmax - rmin) / (qmax - qmin)
max_range = np.maximum(abs(rmin), abs(rmax))
rmin = -max_range
rmax = max_range
scale = (rmax - rmin) / (qmax - qmin)
scale[abs(scale) < np.finfo(rmax.dtype).tiny] = 1
zero_point = (
np.multiply(np.ones(rmax.shape), np.round((qmax + qmin) / 2.0)).astype(dtype)
Expand Down Expand Up @@ -612,6 +608,7 @@ def dump_woq_stats(model, quantize_config):

if optype not in res:
res[optype] = {}

if re.match("^.*_Q\d*G\d*", node.input[1]):
Q_position = re.search("_Q\d*", node.input[1])
full_position = re.search("_Q\d*G\d*", node.input[1])
Expand Down
2 changes: 1 addition & 1 deletion onnx_neural_compressor/quantization/algorithm_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def smooth_quant_entry(
calibration_data_reader,
execution_provider=getattr(quant_config, "execution_provider", "CPUExecutionProvider"),
)
smoothed_model = smoother.transform(**quant_config.to_dict())
smoothed_model = smoother.transform(**quant_config.get_model_params_dict())
with tempfile.TemporaryDirectory(prefix="ort.quant.") as tmp_dir:
# ORT quant API requires str input
onnx.save_model(
Expand Down
Loading

0 comments on commit 04625d0

Please sign in to comment.