-
Notifications
You must be signed in to change notification settings - Fork 240
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
f059285
commit 1c85732
Showing
3 changed files
with
217 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,164 @@ | ||
# Copyright (c) 2024 Intel Corporation | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import argparse | ||
import gc | ||
import os | ||
import shutil | ||
import time | ||
from functools import partial | ||
from pathlib import Path | ||
|
||
import openvino as ov | ||
|
||
import nncf | ||
from nncf.openvino.quantization.compression_primitives import OV_COMPRESSION_PRIMITIVE_CACHE | ||
from tools.memory_monitor import MemoryMonitor | ||
from tools.memory_monitor import MemoryType | ||
|
||
|
||
def parse_arguments(): | ||
parser = argparse.ArgumentParser() | ||
|
||
parser.add_argument("--model-path", type=str, required=True, help="Path where the model is stored") | ||
|
||
parser.add_argument("--log-dir", default="./compression_logs", type=str, help="Directory where logs will be saved") | ||
|
||
parser.add_argument("--numpy-compression", action="store_true", help="Enable numpy compression") | ||
|
||
parser.add_argument("--dynamic-compression", action="store_true", help="Enable dynamic compression") | ||
|
||
parser.add_argument("--fp16-input", action="store_true", help="Enable FP16 input mode") | ||
|
||
parser.add_argument("--int8-output", action="store_true", help="Output in int8") | ||
|
||
parser.add_argument("--recompile", action="store_true", help="Recompile model every time") | ||
|
||
parser.add_argument("--share-outputs", action="store_true", help="Share outputs") | ||
|
||
parser.add_argument("--save-model", action="store_true", help="Save compressed model") | ||
|
||
return parser.parse_args() | ||
|
||
|
||
def log(mm, fz, log_dir): | ||
mm.save_memory_logs( | ||
*mm.get_data(memory_from_zero=fz), save_dir=Path(log_dir), filename_suffix="_from-zero" if fz else "" | ||
) | ||
|
||
|
||
def main(args): | ||
model_path = Path(args.model_path) | ||
log_dir = Path(args.log_dir) | ||
|
||
numpy_compression = args.numpy_compression | ||
dynamic_compression = args.dynamic_compression | ||
fp16_input = args.fp16_input | ||
int8_output = args.int8_output | ||
recompile = args.recompile | ||
share_outputs = args.share_outputs | ||
save_model = args.save_model | ||
if numpy_compression: | ||
log_dir_suffix = "numpy" | ||
else: | ||
log_dir_suffix = "ov-dynamic" if dynamic_compression else "ov-static" | ||
log_dir_suffix = f"{log_dir_suffix}_{('output-int8' if int8_output else 'output-fp32')}" | ||
log_dir_suffix = f"{log_dir_suffix}_{('input-fp16' if fp16_input else 'input-fp32')}" | ||
if recompile: | ||
log_dir_suffix = f"{log_dir_suffix}_recompile" | ||
if share_outputs: | ||
log_dir_suffix = f"{log_dir_suffix}_share-outputs" | ||
|
||
memory_monitors = [] | ||
for memory_type, mem_from_zero in [(MemoryType.RSS, False), (MemoryType.SYSTEM, False), (MemoryType.SYSTEM, True)]: | ||
memory_monitor = MemoryMonitor(interval=1e-2, memory_type=memory_type, include_child_processes=bool(0)) | ||
memory_monitor.start(at_exit_fn=partial(log, memory_monitor, mem_from_zero, log_dir / log_dir_suffix)) | ||
memory_monitors.append(memory_monitor) | ||
|
||
core = ov.Core() | ||
# core.set_property({"ENABLE_MMAP": "NO"}) | ||
model = core.read_model(model_path) | ||
|
||
os.environ["NUMPY_COMPRESSION"] = f"{int(numpy_compression)}" | ||
os.environ["DYNAMIC_COMPRESSION"] = f"{int(dynamic_compression)}" | ||
os.environ["FP16_INPUT"] = f"{int(fp16_input)}" | ||
os.environ["INT8_OUTPUT"] = f"{int(int8_output)}" | ||
os.environ["RECOMPILE"] = f"{int(recompile)}" | ||
os.environ["SHARE_OUTPUTS"] = f"{int(share_outputs)}" | ||
|
||
start_time = time.perf_counter() | ||
compressed_model = nncf.compress_weights(model) | ||
compression_time = time.perf_counter() - start_time | ||
print(f"Compression Time: {compression_time:.2f} sec.") | ||
|
||
if save_model: | ||
ov.save_model(compressed_model, log_dir / log_dir_suffix / "openvino_model.xml") | ||
for filepath in model_path.parent.glob("*.json"): | ||
shutil.copy(str(filepath), str(log_dir / log_dir_suffix / filepath.name)) | ||
|
||
del core | ||
del model | ||
del compressed_model | ||
gc.collect() | ||
time.sleep(0.5) | ||
|
||
before_cache_deletion = memory_monitors[2].get_data(True)[1][-1] | ||
if OV_COMPRESSION_PRIMITIVE_CACHE._compress_weight_model_cache: | ||
OV_COMPRESSION_PRIMITIVE_CACHE._compress_weight_model_cache.clear() | ||
gc.collect() | ||
time.sleep(memory_monitors[0].interval * 10) | ||
after_cache_deletion = memory_monitors[2].get_data(True)[1][-1] | ||
else: | ||
after_cache_deletion = before_cache_deletion | ||
cache_size = before_cache_deletion - after_cache_deletion | ||
print(f"Cache size: {cache_size:.2f} MiB") | ||
|
||
time.sleep(memory_monitors[0].interval * 10) | ||
|
||
leftover_memory = memory_monitors[2].get_data(True)[1][-1] | ||
peak_memory = max(memory_monitors[2].get_data(True)[1]) | ||
print(f"Peak memory: {peak_memory:.2f} MiB") | ||
print(f"Leftover memory: {leftover_memory:.2f} MiB") | ||
print("Done") | ||
|
||
csv_path = log_dir / "results.csv" | ||
csv_exists = csv_path.exists() | ||
csv_path.parent.mkdir(exist_ok=True, parents=True) | ||
with open(csv_path, "a") as f: | ||
if not csv_exists: | ||
f.write( | ||
"Model Path," | ||
"Numpy," | ||
"Submodel Type," | ||
"Input,Output," | ||
"Compression Time," | ||
"Peak Memory," | ||
"Cache Size," | ||
"Leftover Memory" | ||
"\n" | ||
) | ||
f.write( | ||
f"{model_path}," | ||
f"{numpy_compression}," | ||
f"{'-' if numpy_compression else 'Dynamic' if dynamic_compression else 'Static'}," | ||
f"{'-' if numpy_compression else 'FP16' if fp16_input else 'FP32'}," | ||
f"{'-' if numpy_compression else 'INT8' if int8_output else 'FP32'}," | ||
f"{compression_time:.2f}," | ||
f"{peak_memory:.2f}," | ||
f"{cache_size:.2f}," | ||
f"{leftover_memory:.2f}" | ||
f"\n" | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
args = parse_arguments() | ||
main(args) |