diff --git a/nncf/openvino/graph/node_utils.py b/nncf/openvino/graph/node_utils.py index 2c28214849e..13700bbac29 100644 --- a/nncf/openvino/graph/node_utils.py +++ b/nncf/openvino/graph/node_utils.py @@ -116,7 +116,8 @@ def get_const_value(const_node: ov.Node) -> np.ndarray: :return: The constant value. """ INPUT_DTYPE = os.environ.get("INPUT_DTYPE", "fp32") - if const_node.get_element_type() == ov.Type.bf16 and INPUT_DTYPE != "bf16": + NUMPY_COMPRESSION = bool(int(os.environ.get("NUMPY_COMPRESSION", "0"))) + if const_node.get_element_type() == ov.Type.bf16 and (INPUT_DTYPE != "bf16" or NUMPY_COMPRESSION): # Fixed FP32 data type as the result for BF16 constant return const_node.get_data(dtype=np.float32) return const_node.data diff --git a/nncf/openvino/quantization/compression_primitives.py b/nncf/openvino/quantization/compression_primitives.py index 8930b7de89b..dcdb8979eb4 100644 --- a/nncf/openvino/quantization/compression_primitives.py +++ b/nncf/openvino/quantization/compression_primitives.py @@ -263,12 +263,14 @@ def _get_compress_model( num_bits = config.num_bits if config.mode in [CompressWeightsMode.INT8_ASYM, config.mode.INT4_ASYM]: - dtype = ov.Type.u8 + # dtype = ov.Type.u8 + dtype = ov.Type.u8 if config.mode == CompressWeightsMode.INT8_ASYM else ov.Type.u4 level_low = 0 level_high = 2**num_bits - 1 compressed_w += opset.convert(zp, ov.Type.f32) elif config.mode in [CompressWeightsMode.INT8_SYM, config.mode.INT4_SYM]: - dtype = ov.Type.i8 + # dtype = ov.Type.i8 + dtype = ov.Type.i8 if config.mode == CompressWeightsMode.INT8_SYM else ov.Type.u4 level_low = -(2 ** (num_bits - 1)) level_high = 2 ** (num_bits - 1) - 1 else: @@ -282,9 +284,10 @@ def _get_compress_model( results = [compressed_w] if not output_only_weight: + s = opset.convert(s, ov.Type.f16) results.append(s) if zp is not None: - results.append(opset.convert(zp, ov.Type.i32)) + results.append(opset.convert(zp, compressed_w.get_element_type())) if return_nodes: return parameters, results @@ -293,7 +296,15 @@ def _get_compress_model( compiled_model = ov.compile_model(model, device_name="CPU") SHARE_OUTPUTS = bool(int(os.environ.get("SHARE_OUTPUTS", "0"))) - return compiled_model, lambda parameters: compiled_model(parameters, share_outputs=SHARE_OUTPUTS) + + def infer(inputs): + infer_request = compiled_model.create_infer_request() + infer_request.infer(inputs, share_outputs=SHARE_OUTPUTS) + outputs = [infer_request.get_output_tensor(i) for i in range(len(infer_request.results))] + return outputs + + # return compiled_model, lambda parameters: compiled_model(parameters, share_outputs=SHARE_OUTPUTS) + return compiled_model, infer @staticmethod def _get_compress_decompress_model( diff --git a/nncf/quantization/algorithms/weight_compression/openvino_backend.py b/nncf/quantization/algorithms/weight_compression/openvino_backend.py index e831cd27513..8d46610d33f 100644 --- a/nncf/quantization/algorithms/weight_compression/openvino_backend.py +++ b/nncf/quantization/algorithms/weight_compression/openvino_backend.py @@ -226,21 +226,38 @@ def _create_compression_subgraph( original_shape = weight.shape compressed_weight = compress_weight(weight, reduction_axes, compression_config, layer_scales, layer_zero_points) - compressed_const = opset.constant(compressed_weight.tensor.data, dtype=compression_dtype, name=const_node_name) + compressed_weight_data = compressed_weight.tensor.data + if isinstance(compressed_weight_data, ov.Tensor): + compressed_const = opset.constant(compressed_weight_data, name=const_node_name) + else: + compressed_const = opset.constant(compressed_weight_data, dtype=compression_dtype, name=const_node_name) + if compressed_const.get_element_type() != compression_dtype: + compressed_const = opset.convert(compressed_const, compression_dtype) converted_const = opset.convert(compressed_const, ov.Type.f16) - if compressed_weight.zero_point is not None and compressed_weight.tensor.dtype == TensorDataType.uint8: - zero_point_const = opset.constant( - compressed_weight.zero_point.data, - dtype=compression_dtype, - name=f"{const_node_name}/zero_point", - ) - converted_zero_point = opset.convert(zero_point_const, ov.Type.f16) + if compressed_weight.zero_point is not None: + zero_point_data = compressed_weight.zero_point.data + if isinstance(zero_point_data, ov.Tensor): + zero_point_const = opset.constant( + compressed_weight.zero_point.data, + name=f"{const_node_name}/zero_point", + ) + else: + zero_point_const = opset.constant( + compressed_weight.zero_point.data, + dtype=compression_dtype, + name=f"{const_node_name}/zero_point", + ) + zero_point_const = opset.convert(zero_point_const, ov.Type.f16) converted_const = opset.subtract( - converted_const, converted_zero_point, name=f"{const_node_name}/zero_point/subtract" + converted_const, zero_point_const, name=f"{const_node_name}/zero_point/subtract" ) - scale_const = opset.constant(compressed_weight.scale.data, dtype=scale_dtype, name=f"{const_node_name}/scale") - if scale_dtype != ov.Type.f16: + scale_data = compressed_weight.scale.data + if isinstance(scale_data, ov.Tensor): + scale_const = opset.constant(scale_data, name=f"{const_node_name}/scale") + else: + scale_const = opset.constant(scale_data, dtype=scale_dtype, name=f"{const_node_name}/scale") + if scale_const.get_element_type() != ov.Type.f16: scale_const = opset.convert(scale_const, ov.Type.f16) mul = opset.multiply( diff --git a/nncf/quantization/algorithms/weight_compression/weight_lowering.py b/nncf/quantization/algorithms/weight_compression/weight_lowering.py index f1711563076..ebc2e50045e 100644 --- a/nncf/quantization/algorithms/weight_compression/weight_lowering.py +++ b/nncf/quantization/algorithms/weight_compression/weight_lowering.py @@ -421,7 +421,7 @@ def calculate_quantized_weight( compressed_weights = compressed_weights_ov dtype = TensorDataType.uint8 if asym_quant else TensorDataType.int8 - if compressed_weights.dtype != dtype: + if isinstance(compressed_weights.data, np.ndarray) and compressed_weights.dtype != dtype: compressed_weights = compressed_weights.astype(dtype) if scale_ov is not None: scale, zero_point = scale_ov, zero_point_ov diff --git a/run_weight_compression.py b/run_weight_compression.py index d365c8ea2fc..0413034449d 100644 --- a/run_weight_compression.py +++ b/run_weight_compression.py @@ -16,137 +16,137 @@ def stream_handler(stream, target_file): parent_log_dir = Path("compression_logs") experiment_params = [ - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--numpy"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --recompile"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --release-memory"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --recompile --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --release-memory --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --recompile"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --release-memory"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --recompile --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --release-memory --share-outputs"), - - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--numpy"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --recompile"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --release-memory"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --recompile --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --release-memory --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --recompile"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --release-memory"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --recompile --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --release-memory --share-outputs"), - - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--numpy"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --recompile"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --release-memory"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --recompile --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --release-memory --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --recompile"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --release-memory"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --recompile --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --release-memory --share-outputs"), - - - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--numpy"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --recompile"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --release-memory"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --recompile --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --release-memory --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --recompile"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --release-memory"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --recompile --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --release-memory --share-outputs"), - - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--numpy"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --recompile"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --release-memory"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --recompile --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --release-memory --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --recompile"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --release-memory"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --recompile --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --release-memory --share-outputs"), - - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--numpy"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --recompile"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --release-memory"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --recompile --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --release-memory --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --recompile"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --release-memory"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --recompile --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --release-memory --share-outputs"), - - - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--numpy"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --recompile"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --release-memory"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --recompile --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --release-memory --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --recompile"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --release-memory"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --recompile --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --release-memory --share-outputs"), - - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--numpy"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --recompile"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --release-memory"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --recompile --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --release-memory --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --recompile"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --release-memory"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --recompile --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --release-memory --share-outputs"), - - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--numpy"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --recompile"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --release-memory"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --recompile --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --release-memory --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --recompile"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --release-memory"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --recompile --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --release-memory --share-outputs"), - - - - + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--numpy"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --recompile"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --release-memory"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --recompile --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --release-memory --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --recompile"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --release-memory"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --recompile --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --release-memory --share-outputs"), + # + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--numpy"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --recompile"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --release-memory"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --recompile --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --release-memory --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --recompile"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --release-memory"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --recompile --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --release-memory --share-outputs"), + # + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--numpy"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --recompile"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --release-memory"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --recompile --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --release-memory --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --recompile"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --release-memory"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --recompile --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/tiny-llama", "--end-to-end --dynamic --release-memory --share-outputs"), + # + # + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--numpy"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --recompile"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --release-memory"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --recompile --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --release-memory --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --recompile"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --release-memory"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --recompile --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --release-memory --share-outputs"), + # + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--numpy"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --recompile"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --release-memory"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --recompile --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --release-memory --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --recompile"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --release-memory"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --recompile --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --release-memory --share-outputs"), + # + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--numpy"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --recompile"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --release-memory"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --recompile --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --release-memory --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --recompile"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --release-memory"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --recompile --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/phi3", "--end-to-end --dynamic --release-memory --share-outputs"), + # + # + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--numpy"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --recompile"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --release-memory"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --recompile --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --release-memory --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --recompile"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --release-memory"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --recompile --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --release-memory --share-outputs"), + # + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--numpy"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --recompile"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --release-memory"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --recompile --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --release-memory --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --recompile"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --release-memory"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --recompile --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --release-memory --share-outputs"), + # + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--numpy"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --recompile"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --release-memory"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --recompile --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --release-memory --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --recompile"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --release-memory"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --recompile --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int8/llama3-8b", "--end-to-end --dynamic --release-memory --share-outputs"), + # + # + # + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --numpy"), (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym "), (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--compression-mode int4_asym --recompile"), @@ -274,52 +274,60 @@ def stream_handler(stream, target_file): (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --share-outputs"), (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --recompile --share-outputs"), (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), - - - - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int8/tiny-llama", "--save-model --numpy"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int8/tiny-llama", "--save-model --end-to-end --release-memory"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int8/tiny-llama", "--save-model --numpy"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int8/tiny-llama", "--save-model --end-to-end --release-memory"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int8/tiny-llama", "--save-model --numpy"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int8/tiny-llama", "--save-model --end-to-end --release-memory"), - - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int8/phi3", "--numpy"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int8/phi3", "--end-to-end --release-memory"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int8/phi3", "--numpy"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int8/phi3", "--end-to-end --release-memory"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int8/phi3", "--numpy"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int8/phi3", "--end-to-end --release-memory"), - - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp32", parent_log_dir / "optimal_configurations/int8/llama3-8b", "--numpy"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp32", parent_log_dir / "optimal_configurations/int8/llama3-8b", "--end-to-end --release-memory"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp16", parent_log_dir / "optimal_configurations/int8/llama3-8b", "--numpy"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp16", parent_log_dir / "optimal_configurations/int8/llama3-8b", "--end-to-end --release-memory"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-bf16", parent_log_dir / "optimal_configurations/int8/llama3-8b", "--numpy"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-bf16", parent_log_dir / "optimal_configurations/int8/llama3-8b", "--end-to-end --release-memory"), - - - - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int4/tiny-llama", "--save-model --compression-mode int4_asym --numpy"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int4/tiny-llama", "--save-model --compression-mode int4_asym --end-to-end"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int4/tiny-llama", "--save-model --compression-mode int4_asym --numpy"), - (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int4/tiny-llama", "--save-model --compression-mode int4_asym --end-to-end --release-memory --share-outputs"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int4/tiny-llama", "--save-model --compression-mode int4_asym --numpy"), - (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int4/tiny-llama", "--save-model --compression-mode int4_asym --end-to-end --release-memory --share-outputs"), - - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int4/phi3", "--compression-mode int4_asym --numpy"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int4/phi3", "--compression-mode int4_asym --end-to-end"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int4/phi3", "--compression-mode int4_asym --numpy"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int4/phi3", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int4/phi3", "--compression-mode int4_asym --numpy"), - (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int4/phi3", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), - - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp32", parent_log_dir / "optimal_configurations/int4/llama3-8b", "--compression-mode int4_asym --numpy"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp32", parent_log_dir / "optimal_configurations/int4/llama3-8b", "--compression-mode int4_asym --end-to-end"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp16", parent_log_dir / "optimal_configurations/int4/llama3-8b", "--compression-mode int4_asym --numpy"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp16", parent_log_dir / "optimal_configurations/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-bf16", parent_log_dir / "optimal_configurations/int4/llama3-8b", "--compression-mode int4_asym --numpy"), - (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-bf16", parent_log_dir / "optimal_configurations/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), + # + # + # + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int8/tiny-llama", "--save-model --numpy"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int8/tiny-llama", "--save-model --end-to-end --release-memory"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int8/tiny-llama", "--save-model --numpy"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int8/tiny-llama", "--save-model --end-to-end --release-memory"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int8/tiny-llama", "--save-model --numpy"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int8/tiny-llama", "--save-model --end-to-end --release-memory"), + # + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int8/phi3", "--numpy"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int8/phi3", "--end-to-end --release-memory"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int8/phi3", "--numpy"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int8/phi3", "--end-to-end --release-memory"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int8/phi3", "--numpy"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int8/phi3", "--end-to-end --release-memory"), + # + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp32", parent_log_dir / "optimal_configurations/int8/llama3-8b", "--numpy"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp32", parent_log_dir / "optimal_configurations/int8/llama3-8b", "--end-to-end --release-memory"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp16", parent_log_dir / "optimal_configurations/int8/llama3-8b", "--numpy"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp16", parent_log_dir / "optimal_configurations/int8/llama3-8b", "--end-to-end --release-memory"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-bf16", parent_log_dir / "optimal_configurations/int8/llama3-8b", "--numpy"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-bf16", parent_log_dir / "optimal_configurations/int8/llama3-8b", "--end-to-end --release-memory"), + # + # + # + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int4/tiny-llama", "--save-model --compression-mode int4_asym --numpy"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int4/tiny-llama", "--save-model --compression-mode int4_asym --end-to-end"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int4/tiny-llama", "--save-model --compression-mode int4_asym --numpy"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int4/tiny-llama", "--save-model --compression-mode int4_asym --end-to-end --release-memory --share-outputs"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int4/tiny-llama", "--save-model --compression-mode int4_asym --numpy"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int4/tiny-llama", "--save-model --compression-mode int4_asym --end-to-end --release-memory --share-outputs"), + # + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int4/phi3", "--compression-mode int4_asym --numpy"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP32", parent_log_dir / "optimal_configurations/int4/phi3", "--compression-mode int4_asym --end-to-end"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int4/phi3", "--compression-mode int4_asym --numpy"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/FP16", parent_log_dir / "optimal_configurations/int4/phi3", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int4/phi3", "--compression-mode int4_asym --numpy"), + # (parent_model_dir / "phi3-mini-4k-instruct/pytorch/dldt/BF16", parent_log_dir / "optimal_configurations/int4/phi3", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), + # + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp32", parent_log_dir / "optimal_configurations/int4/llama3-8b", "--compression-mode int4_asym --numpy"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp32", parent_log_dir / "optimal_configurations/int4/llama3-8b", "--compression-mode int4_asym --end-to-end"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp16", parent_log_dir / "optimal_configurations/int4/llama3-8b", "--compression-mode int4_asym --numpy"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-fp16", parent_log_dir / "optimal_configurations/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-bf16", parent_log_dir / "optimal_configurations/int4/llama3-8b", "--compression-mode int4_asym --numpy"), + # (parent_model_dir / "Meta-Llama-3-8B/pytorch/dldt/optimum-bf16", parent_log_dir / "optimal_configurations/int4/llama3-8b", "--compression-mode int4_asym --end-to-end --release-memory --share-outputs"), + + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "int4_acc/tiny-llama", "--save-model --compression-mode int4_asym --numpy"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "int4_acc/tiny-llama", "--save-model --compression-mode int4_asym"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP32", parent_log_dir / "int4_acc/tiny-llama", "--save-model --compression-mode int4_asym --end-to-end"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--save-model --compression-mode int4_asym --numpy"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/FP16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--save-model --compression-mode int4_asym --end-to-end"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--save-model --compression-mode int4_asym --numpy"), + # (parent_model_dir / "tiny-llama/pytorch/dldt/BF16", parent_log_dir / "recompile_vs_release-memory/int4/tiny-llama", "--save-model --compression-mode int4_asym --end-to-end"), ] for model_dir, log_dir, params in experiment_params: