diff --git a/constraints.txt b/constraints.txt index 0cc8f482d53..6b05879d340 100644 --- a/constraints.txt +++ b/constraints.txt @@ -20,3 +20,4 @@ pytest-mock==3.12.0 pytest-dependency==0.6.0 pytest-ordering==0.6 pytest-xdist==3.5.0 +pytest-forked==1.6.0 diff --git a/tests/post_training/README.md b/tests/post_training/README.md index 975637fc79d..49a16febd5f 100644 --- a/tests/post_training/README.md +++ b/tests/post_training/README.md @@ -48,6 +48,9 @@ It's possible to run a suite of tests for the specific compression algorithm onl For that append `::test_weight_compression` or `::test_ptq_quantization` to the `tests/post_training/test_quantize_conformance.py`. For instance: +> [!WARNING] +> It is recommended to run the test with a specific test function specified, running all tests at the same time is not tested. + ```bash NUM_VAL_THREADS=8 pytest --data= --output=./tmp tests/post_training/test_quantize_conformance.py::test_weight_compression ``` @@ -62,65 +65,69 @@ Additional arguments: - `--subset-size=N` to force subset_size of calibration dataset - `--batch-size=N` to use batch_size for calibration. Some of the models do not support --batch-size > 1. For such models, please, use --batch-size=1. - `--benchmark` to collect throughput statistics, add `FPS` column to result.csv -- `--extra-columns` to add additional columns to reports.csv: - - `Stat. collection time` - time of statistic collection - - `Bias correction time` - time of bias correction - - `Validation time` - time of validation +- `--extra-columns` to add additional columns to reports.csv, like time for each algorithms +- `--memory-monitor` to using MemoryMonitor from tools/memory_monitor.py ### Examples Run for only OV backend: ```bash -pytest --data= -k backend_OV tests/post_training/test_quantize_conformance.py +pytest --data= -k backend_OV tests/post_training/test_quantize_conformance.py::test_weight_compression ``` Run for only one model: ```bash -pytest --data= -k timm/crossvit_9_240 tests/post_training/test_quantize_conformance.py +pytest --data= -k timm/crossvit_9_240 tests/post_training/test_quantize_conformance.py::test_weight_compression ``` Run for only one model for OV backend: ```bash -pytest --data= -k timm/crossvit_9_240_backend_OV tests/post_training/test_quantize_conformance.py +pytest --data= -k timm/crossvit_9_240_backend_OV tests/post_training/test_quantize_conformance.py::test_weight_compression ``` Only dump models: ```bash -pytest --data= --no-eval tests/post_training/test_quantize_conformance.py +pytest --data= --no-eval tests/post_training/test_quantize_conformance.py::test_weight_compression ``` Fast dump models with `subset_size=1` for all models: ```bash -pytest --data= --no-eval --subset-size 1 tests/post_training/test_quantize_conformance.py +pytest --data= --no-eval --subset-size 1 tests/post_training/test_quantize_conformance.py::test_weight_compression ``` Run test with collection of throughput statistics: ```bash -pytest --data= --benchmark tests/post_training/test_quantize_conformance.py +pytest --data= --benchmark tests/post_training/test_quantize_conformance.py::test_weight_compression ``` Fast collection of throughput statistics: ```bash -pytest --data= --benchmark --no-eval --subset-size 1 tests/post_training/test_quantize_conformance.py +pytest --data= --benchmark --no-eval --subset-size 1 tests/post_training/test_quantize_conformance.py::test_weight_compression ``` Run test with additional columns: ```bash -pytest --data= --extra-columns tests/post_training/test_quantize_conformance.py +pytest --data= --extra-columns tests/post_training/test_quantize_conformance.py::test_weight_compression ``` Run test with calibration dataset having batch-size=10 for all models: ```bash -pytest --data= --batch-size 10 tests/post_training/test_quantize_conformance.py +pytest --data= --batch-size 10 tests/post_training/test_quantize_conformance.py::test_weight_compression +``` + +Run test as in benchmark jobs: + +```bash +pytest --data= --forked --no-eval --subset-size 300 --batch-size 1 --benchmark --extra-columns --memory-monitor tests/post_training/test_quantize_conformance.py::test_ptq_quantization ``` ## Reference data diff --git a/tests/post_training/pipelines/base.py b/tests/post_training/pipelines/base.py index 426870c9e97..3657848063c 100644 --- a/tests/post_training/pipelines/base.py +++ b/tests/post_training/pipelines/base.py @@ -166,6 +166,14 @@ def format_memory_usage(memory): return int(memory) def get_result_dict(self): + ram_data = {} + if self.compression_memory_usage_rss is None and self.compression_memory_usage_system is None: + ram_data["RAM MiB"] = self.format_memory_usage(self.compression_memory_usage) + if self.compression_memory_usage_rss is not None: + ram_data["RAM MiB"] = self.format_memory_usage(self.compression_memory_usage_rss) + if self.compression_memory_usage_system is not None: + ram_data["RAM MiB System"] = self.format_memory_usage(self.compression_memory_usage_system) + result = { "Model": self.model, "Backend": self.backend.value if self.backend else None, @@ -179,16 +187,11 @@ def get_result_dict(self): **self.stats_from_output.get_stats(), "Total time": self.format_time(self.time_total), "FPS": self.fps, + **ram_data, "Status": self.status[:LIMIT_LENGTH_OF_STATUS] if self.status is not None else None, + "Build url": os.environ.get("BUILD_URL", ""), } - if self.compression_memory_usage_rss is None and self.compression_memory_usage_system is None: - result["RAM MiB"] = self.format_memory_usage(self.compression_memory_usage) - if self.compression_memory_usage_rss is not None: - result["RAM MiB RSS"] = self.format_memory_usage(self.compression_memory_usage_rss) - if self.compression_memory_usage_system is not None: - result["RAM MiB System"] = self.format_memory_usage(self.compression_memory_usage_system) - return result @@ -449,14 +452,18 @@ def run_bench(self) -> None: """ if not self.run_benchmark_app: return - runner = Command(f"benchmark_app -m {self.path_compressed_ir}") - runner.run(stdout=False) - cmd_output = " ".join(runner.output) - - match = re.search(r"Throughput\: (.+?) FPS", cmd_output) - if match is not None: - fps = match.group(1) - self.run_info.fps = float(fps) + + try: + runner = Command(f"benchmark_app -m {self.path_compressed_ir}") + runner.run(stdout=False) + cmd_output = " ".join(runner.output) + + match = re.search(r"Throughput\: (.+?) FPS", cmd_output) + if match is not None: + fps = match.group(1) + self.run_info.fps = float(fps) + except Exception as e: + print(e) def cleanup_cache(self): """ diff --git a/tests/post_training/pipelines/gpt.py b/tests/post_training/pipelines/gpt.py index 4ce16b1e723..1d850605eb4 100644 --- a/tests/post_training/pipelines/gpt.py +++ b/tests/post_training/pipelines/gpt.py @@ -64,10 +64,11 @@ def transform_func(data): else: def transform_func(data): + ids = np.expand_dims(data["input_ids"], axis=0) inputs = { - "input_ids": np.expand_dims(data["input_ids"], axis=0), + "input_ids": ids, "attention_mask": np.expand_dims(data["attention_mask"], axis=0), - "position_ids": np.ones((1, 128), dtype=np.int64), + "position_ids": np.ones(ids.shape, dtype=np.int64), "beam_idx": np.zeros((1,), dtype=np.int64), } return inputs diff --git a/tests/post_training/requirements.txt b/tests/post_training/requirements.txt index cd636a9a8dc..9a972694614 100644 --- a/tests/post_training/requirements.txt +++ b/tests/post_training/requirements.txt @@ -5,6 +5,7 @@ onnx onnxruntime openvino pytest +pytest-forked librosa==0.10.0 memory-profiler==0.61.0 diff --git a/tests/post_training/test_quantize_conformance.py b/tests/post_training/test_quantize_conformance.py index 34addb8a2b5..d5217fc0762 100644 --- a/tests/post_training/test_quantize_conformance.py +++ b/tests/post_training/test_quantize_conformance.py @@ -139,7 +139,7 @@ def fixture_wc_reference_data(): @pytest.fixture(scope="session", name="ptq_result_data") -def fixture_ptq_report_data(output_dir, run_benchmark_app): +def fixture_ptq_report_data(output_dir, run_benchmark_app, pytestconfig): data: Dict[str, RunInfo] = {} yield data @@ -151,11 +151,18 @@ def fixture_ptq_report_data(output_dir, run_benchmark_app): df = df.drop(columns=["FPS"]) output_dir.mkdir(parents=True, exist_ok=True) - df.to_csv(output_dir / "results.csv", index=False) + output_file = output_dir / "results.csv" + + if pytestconfig.getoption("forked") and output_file.exists(): + # When run test with --forked to run test in separate process + # Used in post_training_performance jobs + df.to_csv(output_file, index=False, mode="a", header=False) + else: + df.to_csv(output_file, index=False) @pytest.fixture(scope="session", name="wc_result_data") -def fixture_wc_report_data(output_dir): +def fixture_wc_report_data(output_dir, run_benchmark_app, pytestconfig): data: Dict[str, RunInfo] = {} yield data @@ -163,10 +170,20 @@ def fixture_wc_report_data(output_dir): if data: test_results = OrderedDict(sorted(data.items())) df = pd.DataFrame(v.get_result_dict() for v in test_results.values()) - df = df.drop(columns=["FPS", "Num FQ"]) + if not run_benchmark_app: + df = df.drop(columns=["FPS"]) + + df = df.drop(columns=["Num FQ"]) output_dir.mkdir(parents=True, exist_ok=True) - df.to_csv(output_dir / "results.csv", index=False) + output_file = output_dir / "results.csv" + + if pytestconfig.getoption("forked") and output_file.exists(): + # When run test with --forked to run test in separate process + # Used in post_training_performance jobs + df.to_csv(output_file, index=False, mode="a", header=False) + else: + df.to_csv(output_file, index=False) def maybe_skip_test_case(test_model_param, run_fp32_backend, run_torch_cuda_backend, batch_size):