diff --git a/docs/coverage.md b/docs/coverage.md index 9cf1fc3..3bf9b89 100644 --- a/docs/coverage.md +++ b/docs/coverage.md @@ -41,7 +41,6 @@ Name Stmts Miss Branch BrPart Cover Mi -------------------------------------------------------------------------------------------------------- \turnkeyml\build\__init__.py 0 0 0 0 100% \turnkeyml\build\onnx_helpers.py 70 34 28 2 45% 15-21, 28-87, 92, 95-100 -\turnkeyml\build\quantization_helpers.py 29 20 18 0 19% 13-30, 35, 50-78 \turnkeyml\build\sequences.py 15 1 8 2 87% 62->61, 65 \turnkeyml\build\tensor_helpers.py 47 26 34 4 41% 17-44, 57, 61, 63-74, 78 \turnkeyml\build_api.py 31 9 8 3 64% 68-71, 120-125, 140-147 diff --git a/src/turnkeyml/build/export.py b/src/turnkeyml/build/export.py index 4a9f157..2f4bc88 100644 --- a/src/turnkeyml/build/export.py +++ b/src/turnkeyml/build/export.py @@ -15,7 +15,6 @@ import turnkeyml.common.build as build import turnkeyml.build.tensor_helpers as tensor_helpers import turnkeyml.build.onnx_helpers as onnx_helpers -import turnkeyml.build.quantization_helpers as quant_helpers import turnkeyml.common.filesystem as fs @@ -77,13 +76,6 @@ def converted_onnx_file(state: build.State): ) -def quantized_onnx_file(state: build.State): - return os.path.join( - onnx_dir(state), - f"{state.config.build_name}-op{state.config.onnx_opset}-opt-quantized_int8.onnx", - ) - - class ExportPlaceholder(stage.Stage): """ Placeholder Stage that should be replaced by a framework-specific export stage, @@ -571,9 +563,8 @@ def fire(self, state: build.State): inputs_file = state.original_inputs_file if os.path.isfile(inputs_file): inputs = np.load(inputs_file, allow_pickle=True) - to_downcast = False if state.quantization_samples else True inputs_converted = tensor_helpers.save_inputs( - inputs, inputs_file, downcast=to_downcast + inputs, inputs_file, downcast=True ) else: raise exp.StageError( @@ -621,58 +612,6 @@ def fire(self, state: build.State): return state -class QuantizeONNXModel(stage.Stage): - """ - Stage that takes an ONNX model and a dataset of quantization samples as inputs, - and performs static post-training quantization to the model to int8 precision. - - Expected inputs: - - state.model is a path to the ONNX model - - state.quantization_dataset is a dataset that is used for static quantization - - Outputs: - - A *_quantized.onnx file => the quantized onnx model. - """ - - def __init__(self): - super().__init__( - unique_name="quantize_onnx", - monitor_message="Quantizing ONNX model", - ) - - def fire(self, state: build.State): - input_path = state.intermediate_results[0] - output_path = quantized_onnx_file(state) - - quant_helpers.quantize( - input_file=input_path, - data=state.quantization_samples, - output_file=output_path, - ) - - # Check that the converted model is still valid - success_msg = "\tSuccess quantizing ONNX model to int8" - fail_msg = "\tFailed quantizing ONNX model to int8" - - if check_model(output_path, success_msg, fail_msg): - state.intermediate_results = [output_path] - - stats = fs.Stats(state.cache_dir, state.config.build_name, state.stats_id) - stats.add_build_stat( - fs.Keys.ONNX_FILE, - output_path, - ) - else: - msg = f""" - Attempted to use {state.quantization_dataset} to statically quantize - model to int8 datatype, however this operation was not successful. - More information may be available in the log file at **{self.logfile_path}** - """ - raise exp.StageError(msg) - - return state - - class SuccessStage(stage.Stage): """ Stage that sets state.build_status = build.Status.SUCCESSFUL_BUILD, diff --git a/src/turnkeyml/build/ignition.py b/src/turnkeyml/build/ignition.py index 18da5aa..abd39ba 100644 --- a/src/turnkeyml/build/ignition.py +++ b/src/turnkeyml/build/ignition.py @@ -258,7 +258,6 @@ def load_or_make_state( monitor: bool, model: build.UnionValidModelInstanceTypes = None, inputs: Optional[Dict[str, Any]] = None, - quantization_samples: Optional[Collection] = None, state_type: Type = build.State, cache_validation_func: Callable = validate_cached_model, extra_state_args: Optional[Dict] = None, @@ -280,7 +279,6 @@ def load_or_make_state( "cache_dir": cache_dir, "config": config, "model_type": model_type, - "quantization_samples": quantization_samples, } # Ensure that `rebuild` has a valid value @@ -306,50 +304,6 @@ def load_or_make_state( state_type=state_type, ) - # if the previous build is using quantization while the current is not - # or vice versa - if state.quantization_samples and quantization_samples is None: - if rebuild == "never": - msg = ( - f"Model {config.build_name} was built in a previous call to " - "build_model() with post-training quantization sample enabled." - "However, post-training quantization is not enabled in the " - "current build. Rebuild is necessary but currently the rebuild" - "policy is set to 'never'. " - ) - raise exp.CacheError(msg) - - msg = ( - f"Model {config.build_name} was built in a previous call to " - "build_model() with post-training quantization sample enabled." - "However, post-training quantization is not enabled in the " - "current build. Starting a fresh build." - ) - - printing.log_info(msg) - return _begin_fresh_build(state_args, state_type) - - if not state.quantization_samples and quantization_samples is not None: - if rebuild == "never": - msg = ( - f"Model {config.build_name} was built in a previous call to " - "build_model() with post-training quantization sample disabled." - "However, post-training quantization is enabled in the " - "current build. Rebuild is necessary but currently the rebuild" - "policy is set to 'never'. " - ) - raise exp.CacheError(msg) - - msg = ( - f"Model {config.build_name} was built in a previous call to " - "build_model() with post-training quantization sample disabled." - "However, post-training quantization is enabled in the " - "current build. Starting a fresh build." - ) - - printing.log_info(msg) - return _begin_fresh_build(state_args, state_type) - except exp.StateError as e: problem = ( "- build_model() failed to load " @@ -500,7 +454,6 @@ def model_intake( user_model, user_inputs, user_sequence: Optional[stage.Sequence], - user_quantization_samples: Optional[Collection] = None, ) -> Tuple[Any, Any, stage.Sequence, build.ModelType, str]: # Model intake structure options: # user_model @@ -550,18 +503,11 @@ def model_intake( sequence = copy.deepcopy(user_sequence) if sequence is None: - if user_quantization_samples: - if model_type != build.ModelType.PYTORCH: - raise exp.IntakeError( - "Currently, post training quantization only supports Pytorch models." - ) - sequence = sequences.pytorch_with_quantization - else: - sequence = stage.Sequence( - "top_level_sequence", - "Top Level Sequence", - [sequences.onnx_fp32], - ) + sequence = stage.Sequence( + "top_level_sequence", + "Top Level Sequence", + [sequences.onnx_fp32], + ) # If there is an ExportPlaceholder Stage in the sequence, replace it with # a framework-specific export Stage. diff --git a/src/turnkeyml/build/quantization_helpers.py b/src/turnkeyml/build/quantization_helpers.py deleted file mode 100644 index e945bb2..0000000 --- a/src/turnkeyml/build/quantization_helpers.py +++ /dev/null @@ -1,78 +0,0 @@ -import os -import numpy as np - -import onnx -import onnxruntime -from onnxruntime.quantization import quantize_static, CalibrationDataReader, QuantType - - -class DataReader(CalibrationDataReader): - """Wrapper class around calibration data, which is used to quantize an onnx model.""" - - def __init__(self, input_file, samples, input_shapes=None, pack_inputs=False): - session = onnxruntime.InferenceSession(input_file, None) - input_names = [inp.name for inp in session.get_inputs()] - - if pack_inputs: - expand_each = lambda data: [np.expand_dims(d, axis=0) for d in data] - self.enum_data_dicts = iter( - [ - dict(zip(input_names, expand_each(sample_inputs))) - for sample_inputs in zip(*samples) - ] - ) - else: - if input_shapes: - self.samples = samples.reshape(-1, len(input_shapes), *input_shapes[0]) - else: - self.samples = samples - - self.enum_data_dicts = iter( - [dict(zip(input_names, sample)) for sample in self.samples] - ) - - def get_next(self): - return next(self.enum_data_dicts, None) - - -def quantize( - input_file, - data, - input_shapes=None, - pack_inputs=False, - verbose=False, - output_file=None, -): - """ - Given an onnx file and calibration data on which to quantize, - computes and saves quantized onnx model to a local file. - """ - data_reader = DataReader( - input_file, - samples=data, - input_shapes=input_shapes, - pack_inputs=pack_inputs, - ) - - if not output_file: - output_file = input_file[:-5] + "_quantized.onnx" - - quantize_static( - model_input=input_file, - model_output=output_file, - calibration_data_reader=data_reader, - activation_type=QuantType.QUInt8, - weight_type=QuantType.QInt8, - op_types_to_quantize=["Conv", "MatMul", "Relu"], - extra_options={"ActivationSymmetric": False, "WeightSymmetric": True}, - ) - - onnx.save(onnx.shape_inference.infer_shapes(onnx.load(output_file)), output_file) - - if os.path.isfile("augmented_model.onnx"): - os.remove("augmented_model.onnx") - - if verbose: - print("Calibrated and quantized model saved.") - - return output_file diff --git a/src/turnkeyml/build/sequences.py b/src/turnkeyml/build/sequences.py index bc39a0d..7e90ead 100644 --- a/src/turnkeyml/build/sequences.py +++ b/src/turnkeyml/build/sequences.py @@ -35,18 +35,6 @@ enable_model_validation=True, ) -pytorch_with_quantization = stage.Sequence( - "pytorch_export_sequence_with_quantization", - "Exporting PyTorch Model and Quantizing Exported ONNX", - [ - export.ExportPytorchModel(), - export.OptimizeOnnxModel(), - export.QuantizeONNXModel(), - export.SuccessStage(), - ], - enable_model_validation=True, -) - # Plugin interface for sequences discovered_plugins = plugins.discover() diff --git a/src/turnkeyml/build_api.py b/src/turnkeyml/build_api.py index 5cf681b..44c805c 100644 --- a/src/turnkeyml/build_api.py +++ b/src/turnkeyml/build_api.py @@ -17,7 +17,6 @@ def build_model( monitor: Optional[bool] = None, rebuild: Optional[str] = None, sequence: Optional[List[stage.Stage]] = None, - quantization_samples: Collection = None, onnx_opset: Optional[int] = None, device: Optional[str] = None, ) -> build.State: @@ -48,11 +47,6 @@ def build_model( - None: Falls back to default sequence: Override the default sequence of build stages. Power users only. - quantization_samples: If set, performs post-training quantization - on the ONNX model using the provided samplesIf the previous build used samples - that are different to the samples used in current build, the "rebuild" - argument needs to be manually set to "always" in the current build - in order to create a new ONNX file. onnx_opset: ONNX opset to use during ONNX export. device: Specific device target to take into account during the build sequence. Use the format "device_family", "device_family::part", or @@ -96,7 +90,6 @@ def build_model( model, inputs, sequence, - user_quantization_samples=quantization_samples, ) # Get the state of the model from the cache if a valid build is available @@ -109,7 +102,6 @@ def build_model( monitor=monitor_setting, model=model_locked, inputs=inputs_locked, - quantization_samples=quantization_samples, ) # Return a cached build if possible, otherwise prepare the model State for @@ -124,8 +116,6 @@ def build_model( return state - state.quantization_samples = quantization_samples - sequence_locked.show_monitor(config, state.monitor) state = sequence_locked.launch(state) diff --git a/src/turnkeyml/common/build.py b/src/turnkeyml/common/build.py index a224d1a..b55ed8b 100644 --- a/src/turnkeyml/common/build.py +++ b/src/turnkeyml/common/build.py @@ -259,8 +259,6 @@ class State: # Results of a successful build results: Any = None - quantization_samples: Optional[Collection] = None - def __post_init__(self): if self.uid is None: self.uid = unique_id() @@ -309,16 +307,6 @@ def prepare_state_dict(self) -> Dict: state_dict["model_type"] = self.model_type.value state_dict["build_status"] = self.build_status.value - # During actual execution, quantization_samples in the state - # stores the actual quantization samples. - # However, we do not save quantization samples - # Instead, we save a boolean to indicate whether the model - # stored has been quantized by some samples. - if self.quantization_samples: - state_dict["quantization_samples"] = True - else: - state_dict["quantization_samples"] = False - return state_dict def save_yaml(self, state_dict: Dict): @@ -524,7 +512,7 @@ def get_system_info(): # Get OS Version try: info_dict["OS Version"] = platform.platform() - except Exception as e: # pylint: disable=broad-except + except Exception as e: # pylint: disable=broad-except info_dict["Error OS Version"] = str(e) if os_type == "Windows": @@ -537,7 +525,7 @@ def get_system_info(): .strip() ) info_dict["Processor"] = proc_info - except Exception as e: # pylint: disable=broad-except + except Exception as e: # pylint: disable=broad-except info_dict["Error Processor"] = str(e) # Get OEM System Information @@ -549,7 +537,7 @@ def get_system_info(): .strip() ) info_dict["OEM System"] = oem_info - except Exception as e: # pylint: disable=broad-except + except Exception as e: # pylint: disable=broad-except info_dict["Error OEM System"] = str(e) # Get Physical Memory in GB @@ -564,7 +552,7 @@ def get_system_info(): ) mem_info_gb = round(int(mem_info_bytes) / (1024**3), 2) info_dict["Physical Memory"] = f"{mem_info_gb} GB" - except Exception as e: # pylint: disable=broad-except + except Exception as e: # pylint: disable=broad-except info_dict["Error Physical Memory"] = str(e) elif os_type == "Linux": @@ -586,7 +574,7 @@ def get_system_info(): .strip() ) info_dict["OEM System"] = oem_info - except Exception as e: # pylint: disable=broad-except + except Exception as e: # pylint: disable=broad-except info_dict["Error OEM System (WSL)"] = str(e) else: @@ -602,7 +590,7 @@ def get_system_info(): .replace("\n", " ") ) info_dict["OEM System"] = oem_info - except Exception as e: # pylint: disable=broad-except + except Exception as e: # pylint: disable=broad-except info_dict["Error OEM System"] = str(e) # Get CPU Information @@ -612,7 +600,7 @@ def get_system_info(): if "Model name:" in line: info_dict["Processor"] = line.split(":")[1].strip() break - except Exception as e: # pylint: disable=broad-except + except Exception as e: # pylint: disable=broad-except info_dict["Error Processor"] = str(e) # Get Memory Information @@ -625,7 +613,7 @@ def get_system_info(): ) mem_info_gb = round(int(mem_info) / 1024, 2) info_dict["Memory Info"] = f"{mem_info_gb} GB" - except Exception as e: # pylint: disable=broad-except + except Exception as e: # pylint: disable=broad-except info_dict["Error Memory Info"] = str(e) else: @@ -635,9 +623,10 @@ def get_system_info(): try: installed_packages = pkg_resources.working_set info_dict["Python Packages"] = [ - f"{i.key}=={i.version}" for i in installed_packages # pylint: disable=not-an-iterable + f"{i.key}=={i.version}" + for i in installed_packages # pylint: disable=not-an-iterable ] - except Exception as e: # pylint: disable=broad-except + except Exception as e: # pylint: disable=broad-except info_dict["Error Python Packages"] = str(e) return info_dict