Skip to content

Commit

Permalink
Added a script to run multiple experiments sequentially
Browse files Browse the repository at this point in the history
  • Loading branch information
nikita-savelyevv committed Sep 11, 2024
1 parent e04e7d1 commit 2a3a63c
Show file tree
Hide file tree
Showing 3 changed files with 181 additions and 20 deletions.
8 changes: 4 additions & 4 deletions nncf/openvino/quantization/compression_primitives.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,8 +256,8 @@ def _get_compress_model(

compressed_w = opset.clamp(opset.round(compressed_w), level_low, level_high, name="compressed_weights")

INT8_OUTPUT = bool(int(os.environ.get("INT8_OUTPUT", "0")))
if INT8_OUTPUT:
FP32_OUTPUT = bool(int(os.environ.get("FP32_OUTPUT", "0")))
if not FP32_OUTPUT:
compressed_w = opset.convert(compressed_w, dtype)

results = [compressed_w]
Expand All @@ -272,8 +272,8 @@ def _get_compress_model(

compiled_model = ov.compile_model(model, device_name="CPU")

NOT_SHARED_OUTPUTS = bool(int(os.environ.get("NOT_SHARED_OUTPUTS", "0")))
return compiled_model, lambda parameters: compiled_model(parameters, share_outputs=not NOT_SHARED_OUTPUTS)
SHARE_OUTPUTS = bool(int(os.environ.get("SHARE_OUTPUTS", "0")))
return compiled_model, lambda parameters: compiled_model(parameters, share_outputs=SHARE_OUTPUTS)

@staticmethod
def _get_compress_decompress_model(
Expand Down
125 changes: 125 additions & 0 deletions run_weight_compression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import os
import shutil
import subprocess
import threading
import time
from pathlib import Path


def stream_handler(stream, target_file):
for line in iter(stream.readline, ''):
print(line, end='')
target_file.write(line)


parent_model_dir = Path("/home/nsavel/workspace/openvino.genai/llm_bench/python/models")
parent_log_dir = Path("compression_logs")

experiment_params = [
(parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "tmp", "--numpy"),

# (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "release_memory_att3/tiny-llama", "--numpy"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "release_memory_att3/tiny-llama", "--end-to-end --release-memory"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "release_memory_att3/tiny-llama", "--numpy"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "release_memory_att3/tiny-llama", "--end-to-end --release-memory"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "release_memory_att3/tiny-llama", "--numpy"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "release_memory_att3/tiny-llama", "--end-to-end --dynamic --recompile --input-dtype fp32"),
#
# (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "release_memory_att3/phi3", "--numpy"),
# (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "release_memory_att3/phi3", "--end-to-end --release-memory"),
# (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "release_memory_att3/phi3", "--numpy"),
# (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "release_memory_att3/phi3", "--end-to-end --release-memory"),
# (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "release_memory_att3/phi3", "--numpy"),
# (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "release_memory_att3/phi3", "--end-to-end --dynamic --recompile --input-dtype fp32"),
#
# (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp32", parent_log_dir / "release_memory_att3/llama3-8b", "--numpy"),
# (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp32", parent_log_dir / "release_memory_att3/llama3-8b", "--end-to-end --release-memory"),
# (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp16", parent_log_dir / "release_memory_att3/llama3-8b", "--numpy"),
# (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp16", parent_log_dir / "release_memory_att3/llama3-8b", "--end-to-end --release-memory"),
# (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-bf16", parent_log_dir / "release_memory_att3/llama3-8b", "--numpy"),
# (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-bf16", parent_log_dir / "release_memory_att3/llama3-8b", "--end-to-end --dynamic --recompile --input-dtype fp32"),

# (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--numpy"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--end-to-end"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--end-to-end --recompile"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--end-to-end --release-memory"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--end-to-end --share-outputs"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--end-to-end --recompile --share-outputs"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--end-to-end --release-memory --share-outputs"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--dynamic --end-to-end"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--dynamic --end-to-end --recompile"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--dynamic --end-to-end --release-memory"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--dynamic --end-to-end --share-outputs"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--dynamic --end-to-end --recompile --share-outputs"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--dynamic --end-to-end --release-memory --share-outputs"),
#
# (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--numpy"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--end-to-end"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--end-to-end --recompile"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--end-to-end --release-memory"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--end-to-end --share-outputs"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--end-to-end --recompile --share-outputs"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--end-to-end --release-memory --share-outputs"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--dynamic --end-to-end"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--dynamic --end-to-end --recompile"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--dynamic --end-to-end --release-memory"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--dynamic --end-to-end --share-outputs"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--dynamic --end-to-end --recompile --share-outputs"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--dynamic --end-to-end --release-memory --share-outputs"),
#
# (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--input-dtype fp32 --numpy"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--input-dtype fp32 --end-to-end"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--input-dtype fp32 --end-to-end --recompile"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--input-dtype fp32 --end-to-end --release-memory"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--input-dtype fp32 --end-to-end --share-outputs"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--input-dtype fp32 --end-to-end --recompile --share-outputs"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--input-dtype fp32 --end-to-end --release-memory --share-outputs"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--dynamic --input-dtype fp32 --end-to-end"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--dynamic --input-dtype fp32 --end-to-end --recompile"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--dynamic --input-dtype fp32 --end-to-end --release-memory"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--dynamic --input-dtype fp32 --end-to-end --share-outputs"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--dynamic --input-dtype fp32 --end-to-end --recompile --share-outputs"),
# (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile-vs-release_tiny-llama_att3", "--dynamic --input-dtype fp32 --end-to-end --release-memory --share-outputs"),
]

for model_dir, log_dir, params in experiment_params:
model_path = model_dir / "openvino_model.xml"
cmd = f"/home/nsavel/venvs/nncf/bin/python weight_compression.py --model-path {model_path} --log-dir {log_dir} {params}"

log_dir.mkdir(parents=True, exist_ok=True)
with open(log_dir / "log.txt", "a") as log_file:
process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=True,
universal_newlines=True,
preexec_fn=os.setsid,
)

stdout_thread = threading.Thread(target=stream_handler, args=(process.stdout, log_file))
stderr_thread = threading.Thread(target=stream_handler, args=(process.stderr, log_file))

stdout_thread.start()
stderr_thread.start()

stdout_thread.join()
stderr_thread.join()

process.wait()
time.sleep(5)

evaluated_paths = set()
for _, log_dir, _ in experiment_params:
for model_path in log_dir.rglob("**/*"):
model_path: Path
if model_path.suffix != ".xml":
continue
if model_path.absolute() in evaluated_paths:
continue
evaluated_paths.add(model_path.absolute())

model_dir = model_path.parent.absolute()
cmd = f"/home/nsavel/venvs/lm-evaluation-harness/bin/lm_eval --model openvino --model_args pretrained={model_dir},trust_remote_code=True --tasks wikitext --output_path {model_dir}"
process = subprocess.Popen(cmd, shell=True)
process.wait()
68 changes: 52 additions & 16 deletions weight_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,13 @@ def parse_arguments():

parser.add_argument("--end-to-end", action="store_true", help="Enable end-to-end OV compression")

parser.add_argument("--input-dtype", type=str, choices=["fp32", "fp16", "bf16"], default="fp32", help="OV model input dtype")
parser.add_argument("--input-dtype", type=str, choices=["fp32", "fp16", "bf16"], default=None, help="OV model input dtype")

parser.add_argument("--int8-output", action="store_true", help="Output in (u)int8")
parser.add_argument("--fp32-output", action="store_true", help="Output in fp32 instead of (u)int8")

parser.add_argument("--recompile", action="store_true", help="Recompile model every time")

parser.add_argument("--not-shared-outputs", action="store_true", help="Do not share outputs")
parser.add_argument("--share-outputs", action="store_true", help="Share OV model outputs")

parser.add_argument("--save-model", action="store_true", help="Save compressed model")

Expand All @@ -63,6 +63,19 @@ def log(mm, fz, log_dir):
)


def count_node_dtypes(model):
# Get the main dtype of weight constants
node_count_per_dtype = dict(f32=0, f16=0, bf16=0)
for node in model.get_ordered_ops():
friendly_name = node.get_friendly_name()
if node.get_type_name() != "Constant" or ".weight" not in friendly_name:
continue
const_dtype = node.get_element_type().get_type_name()
if const_dtype in node_count_per_dtype:
node_count_per_dtype[const_dtype] = node_count_per_dtype[const_dtype] + 1
return node_count_per_dtype


def main(args):
model_path = Path(args.model_path)
log_dir = Path(args.log_dir)
Expand All @@ -71,26 +84,32 @@ def main(args):
dynamic_compression = args.dynamic
end_to_end_compression = args.end_to_end
input_dtype = args.input_dtype
int8_output = args.int8_output
fp32_output = args.fp32_output
recompile = args.recompile
not_shared_outputs = args.not_shared_outputs
share_outputs = args.share_outputs
save_model = args.save_model
compare_with_numpy = args.compare_with_numpy
invert_numpy_division = args.invert_numpy_division
release_memory = args.release_memory

log_dir_suffix = f"{model_path.parent.name}_"
if numpy_compression:
log_dir_suffix = "numpy"
log_dir_suffix = f"{log_dir_suffix}numpy"
if invert_numpy_division:
log_dir_suffix += "_inverted"
else:
log_dir_suffix = "end-to-end_" if end_to_end_compression else ""
log_dir_suffix = f"{log_dir_suffix}end-to-end_" if end_to_end_compression else ""
log_dir_suffix = f"{log_dir_suffix}{'ov-dynamic' if dynamic_compression else 'ov-static'}"
log_dir_suffix = f"{log_dir_suffix}_{'output-int8' if int8_output else 'output-fp32'}"
log_dir_suffix = f"{log_dir_suffix}_{f'input-{input_dtype}'}"
log_dir_suffix = f"{log_dir_suffix}_{'output-fp32' if fp32_output else 'output-i8'}"
if input_dtype is not None:
log_dir_suffix = f"{log_dir_suffix}_{f'input-{input_dtype}'}"
if recompile:
log_dir_suffix = f"{log_dir_suffix}_recompile"
if not_shared_outputs:
log_dir_suffix = f"{log_dir_suffix}_not-shared-outputs"
if release_memory:
log_dir_suffix = f"{log_dir_suffix}_release-memory"
if share_outputs:
log_dir_suffix = f"{log_dir_suffix}_share-outputs"
print(f"Log dir suffix: {log_dir_suffix}")

memory_monitors = []
for memory_type, mem_from_zero in [(MemoryType.RSS, False), (MemoryType.SYSTEM, False), (MemoryType.SYSTEM, True)]:
Expand All @@ -102,13 +121,22 @@ def main(args):
# core.set_property({"ENABLE_MMAP": "NO"})
model = core.read_model(model_path)

node_count_per_dtype = count_node_dtypes(model)
assert max(node_count_per_dtype.values()) == sum(node_count_per_dtype.values()), "Not all consts have the same type"
node_count_per_dtype = sorted([(v, k) for k, v in node_count_per_dtype.items()], reverse=True)
model_dtype = dict(f32="fp32", f16="fp16", bf16="bf16")[node_count_per_dtype[0][1]]

# Update input dtype based on model
if input_dtype is None:
input_dtype = "fp32" if model_dtype == "bf16" else model_dtype

os.environ["NUMPY_COMPRESSION"] = f"{int(numpy_compression)}"
os.environ["DYNAMIC_COMPRESSION"] = f"{int(dynamic_compression)}"
os.environ["END_TO_END_COMPRESSION"] = f"{int(end_to_end_compression)}"
os.environ["INPUT_DTYPE"] = input_dtype
os.environ["INT8_OUTPUT"] = f"{int(int8_output)}"
os.environ["FP32_OUTPUT"] = f"{int(fp32_output)}"
os.environ["RECOMPILE"] = f"{int(recompile)}"
os.environ["NOT_SHARED_OUTPUTS"] = f"{int(not_shared_outputs)}"
os.environ["SHARE_OUTPUTS"] = f"{int(share_outputs)}"
os.environ["COMPARE_WITH_NUMPY"] = f"{int(compare_with_numpy)}"
os.environ["INVERT_NUMPY_DIVISION"] = f"{int(invert_numpy_division)}"
os.environ["RELEASE_MEMORY"] = f"{int(release_memory)}"
Expand Down Expand Up @@ -157,8 +185,12 @@ def main(args):
if not csv_exists:
f.write(
"Model Path,"
"Model dtype,"
"Backend,"
"End-to-end,"
"End to end,"
"Recompile,"
"Release memory,"
"Share outputs,"
"Input Shapes,"
"Input,"
"Output,"
Expand All @@ -170,11 +202,15 @@ def main(args):
)
f.write(
f"{model_path},"
f"{model_dtype.upper()},"
f"{'NumPy' if numpy_compression else 'OV'},"
f"{end_to_end_compression},"
f"{'-' if numpy_compression else end_to_end_compression},"
f"{'-' if numpy_compression else recompile},"
f"{'-' if numpy_compression else release_memory},"
f"{'-' if numpy_compression else share_outputs},"
f"{'-' if numpy_compression else 'Dynamic' if dynamic_compression else 'Static'},"
f"{'-' if numpy_compression else input_dtype.upper()},"
f"{'-' if numpy_compression else 'INT8' if int8_output else 'FP32'},"
f"{'-' if numpy_compression else 'FP32' if fp32_output else 'INT8'},"
f"{compression_time:.2f},"
f"{peak_memory:.2f},"
f"{cache_size:.2f},"
Expand Down

0 comments on commit 2a3a63c

Please sign in to comment.