From 03d1f17eb12e19e253b52aa0a6769d14480b7360 Mon Sep 17 00:00:00 2001 From: bruntib Date: Tue, 24 Oct 2023 01:05:25 +0200 Subject: [PATCH] [feat] Introduce review status config file The review statuses can be provided by a yaml config file. This file can be used for setting the review status of reports in a given directory or a checker name. --- analyzer/codechecker_analyzer/cmd/analyze.py | 21 ++++ analyzer/codechecker_analyzer/cmd/check.py | 10 ++ analyzer/codechecker_analyzer/cmd/parse.py | 4 + codechecker_common/review_status_handler.py | 96 ++++++++++++++- docs/analyzer/user_guide.md | 115 ++++++++++++------ web/client/codechecker_client/cmd/store.py | 4 + .../codechecker_client/cmd_line_client.py | 10 +- .../codechecker_server/api/mass_store_run.py | 27 ++-- 8 files changed, 233 insertions(+), 54 deletions(-) diff --git a/analyzer/codechecker_analyzer/cmd/analyze.py b/analyzer/codechecker_analyzer/cmd/analyze.py index 16303980c7..7c2ae3e8e5 100644 --- a/analyzer/codechecker_analyzer/cmd/analyze.py +++ b/analyzer/codechecker_analyzer/cmd/analyze.py @@ -193,6 +193,14 @@ def add_arguments_to_parser(parser): "Example: '/path/to/main.cpp', 'lib/*.cpp', " "*/test*'.") + parser.add_argument('--review-status-config', + dest="review_status_config", + required=False, + default=argparse.SUPPRESS, + help="Path of review_status.yaml config file which " + "contains review status settings assigned to " + "specific directories, checkers, bug hashes.") + parser.add_argument('-o', '--output', dest="output_path", required=True, @@ -846,6 +854,18 @@ def __update_skip_file(args): shutil.copyfile(args.skipfile, skip_file_to_send) +def __update_review_status_config(args): + rs_config_to_send = os.path.join(args.output_path, 'review_status.yaml') + + if os.path.exists(rs_config_to_send): + os.remove(rs_config_to_send) + + if 'review_status_config' in args: + LOG.debug("Copying review status config file %s to %s", + args.review_status_config, rs_config_to_send) + shutil.copyfile(args.review_status_config, rs_config_to_send) + + def __cleanup_metadata(metadata_prev, metadata): """ Cleanup metadata. @@ -1091,6 +1111,7 @@ def main(args): compile_cmd_count) __update_skip_file(args) + __update_review_status_config(args) LOG.debug("Cleanup metadata file started.") __cleanup_metadata(metadata_prev, metadata) diff --git a/analyzer/codechecker_analyzer/cmd/check.py b/analyzer/codechecker_analyzer/cmd/check.py index e116e041e0..3c70c212ee 100644 --- a/analyzer/codechecker_analyzer/cmd/check.py +++ b/analyzer/codechecker_analyzer/cmd/check.py @@ -400,6 +400,15 @@ def add_arguments_to_parser(parser): "the analysis is considered as a failed " "one.") + analyzer_opts.add_argument('--review-status-config', + dest="review_status_config", + required=False, + default=argparse.SUPPRESS, + help="Path of review_status.yaml config file " + "which contains review status settings " + "assigned to specific directories, " + "checkers, bug hashes.") + clang_has_z3 = analyzer_types.is_z3_capable() if clang_has_z3: @@ -872,6 +881,7 @@ def __update_if_key_exists(source, target, key): 'disable_all', 'ordered_checkers', # --enable and --disable. 'timeout', + 'review_status_config', 'compile_uniqueing', 'report_hash', 'enable_z3', diff --git a/analyzer/codechecker_analyzer/cmd/parse.py b/analyzer/codechecker_analyzer/cmd/parse.py index 458be06e2b..abc11b3972 100644 --- a/analyzer/codechecker_analyzer/cmd/parse.py +++ b/analyzer/codechecker_analyzer/cmd/parse.py @@ -394,6 +394,10 @@ def get_output_file_path(default_file_name: str) -> Optional[str]: context.checker_labels) for dir_path, file_paths in report_file.analyzer_result_files(args.input): + review_status_cfg = os.path.join(dir_path, 'review_status.yaml') + if os.path.isfile(review_status_cfg): + review_status_handler.set_review_status_config(review_status_cfg) + metadata = get_metadata(dir_path) if metadata and 'files' in args: diff --git a/codechecker_common/review_status_handler.py b/codechecker_common/review_status_handler.py index df77476c46..5a52ecbcb1 100644 --- a/codechecker_common/review_status_handler.py +++ b/codechecker_common/review_status_handler.py @@ -7,8 +7,10 @@ # ------------------------------------------------------------------------- +import fnmatch import os from typing import List, Optional +import yaml from codechecker_report_converter.report import Report, SourceReviewStatus from codechecker_common.logger import get_logger @@ -33,13 +35,16 @@ class ReviewStatusHandler: 3. Review status rule: A mapping between bug hashes and review statuses set in the GUI. - TODO: Option 2. will be handled in a future commit. TODO: Option 3. should also be covered by this handler class. """ def __init__(self, source_root=''): + """ + TODO: What happens if multiple report directories are stored? + """ self.__source_root = source_root self.__source_comment_warnings = [] self.__source_commets = {} + self.__data = None def __parse_codechecker_review_comment( self, @@ -63,6 +68,28 @@ def __parse_codechecker_review_comment( return src_comment_data + def __validate_review_status_yaml_data(self): + """ + This function validates the data read from review_status.yaml file and + raises ValueError with the description of the error if the format is + invalid. + """ + if not isinstance(self.__data, list): + raise ValueError( + f"{self.__review_status_yaml} should be a list of review " + "status descriptor objects.") + + for item in self.__data: + if not isinstance(item, dict): + raise ValueError( + f"Format error in {self.__review_status_yaml}: {item} " + "should be a review status descriptor object.") + + if 'review_status' not in item: + raise ValueError( + f"Format error in {self.__review_status_yaml}: " + f"'review_status' field is missing from {item}.") + def get_review_status(self, report: Report) -> SourceReviewStatus: """ Return the review status of the report based on source code comments. @@ -71,13 +98,70 @@ def get_review_status(self, report: Report) -> SourceReviewStatus: review status returns. This function throws ValueError if the review status is ambiguous (see get_review_status_from_source()). """ - rs_from_source = self.get_review_status_from_source(report) + # 1. Check if the report has in-source review status setting. + review_status = self.get_review_status_from_source(report) - if rs_from_source: - return rs_from_source + if review_status: + return review_status + # 2. Check if the report has review status setting in the yaml config. + if self.__data: + review_status = self.get_review_status_from_config(report) + if review_status: + return review_status + + # 3. Return "unreviewed" otherwise. return SourceReviewStatus(bug_hash=report.report_hash) + def set_review_status_config(self, config_file): + """ + Set the location of the review_status.yaml config file. + + When the content of multiple report directories are parsed then they + may contain separate config files. These need to be given before + parsing each report folders. + """ + self.__review_status_yaml = config_file + + with open(self.__review_status_yaml, + encoding='utf-8', errors='ignore') as f: + # TODO: Validate format. + # - Can filepath be a list? + # TODO: May throw yaml.scanner.ScannerError. + self.__data = yaml.safe_load(f) + + self.__validate_review_status_yaml_data() + + def get_review_status_from_config( + self, + report: Report + ) -> Optional[SourceReviewStatus]: + """ + Return the review status of the given report based on the config file + set by set_review_status_config(). If not config file set, or no + setting matches the report then None returns. + """ + assert self.__data is not None, \ + "Review status config file has to be set with " \ + "set_review_status_config()." + + # TODO: Document "in_source". + for item in self.__data: + if 'filepath' in item and not fnmatch.fnmatch( + report.file.original_path, item['filepath']): + continue + if 'checker' in item and \ + report.checker_name != item['checker']: + continue + + return SourceReviewStatus( + status=item['review_status'], + message=item['message'] \ + .encode(encoding='utf-8', errors='ignore') \ + if 'message' in item else b'', + bug_hash=report.report_hash, + in_source=True) + def get_review_status_from_source( self, report: Report @@ -92,6 +176,8 @@ def get_review_status_from_source( contains information about the problem in a string. - If the soure file changed (which means that the source comments may have replaced, changed or removed) then None returns. + - If no review status belongs to the report in the source code, then + return None. TODO: This function either returns a SourceReviewStatus, or raises an exception or returns None. This is too many things that a caller needs to handle. The reason is that according to the current behaviors, @@ -145,4 +231,4 @@ def source_comment(self, report: Report) -> Optional[SourceCodeComment]: This ReviewStatusHandler class is caching source comments so they are read and parsed only once for each report. """ - return self.__source_commets.get(report) + return self.__source_commets.get(report) \ No newline at end of file diff --git a/docs/analyzer/user_guide.md b/docs/analyzer/user_guide.md index 96c814995a..a2b477bb64 100644 --- a/docs/analyzer/user_guide.md +++ b/docs/analyzer/user_guide.md @@ -47,15 +47,17 @@ - [`checkers`](#checkers) - [`analyzers`](#analyzers) - [Configuring Clang version](#configuring-clang-version) - - [Suppressing False positives (source code comments for review status)](#suppressing-false-positives-source-code-comments-for-review-status) - - [Supported formats](#supported-formats) - - [Change review status of a specific checker result](#change-review-status-of-a-specific-checker-result) - - [Change review status of a specific checker result by using a substring of the checker name](#change-review-status-of-a-specific-checker-result-by-using-a-substring-of-the-checker-name) - - [Change review status of all checker result](#change-review-status-of-all-checker-result) - - [Change review status of all checker result with C style comment](#change-review-status-of-all-checker-result-with-c-style-comment) - - [Multi line comments](#multi-line-comments) - - [Multi line C style comments](#multi-line-c-style-comments) - - [Change review status for multiple checker results in the same line](#change-review-status-for-multiple-checker-results-in-the-same-line) + - [Review status handling](#review-status-handling) + - [Setting with source code comments](#setting-with-source-code-comments) + - [Supported formats](#supported-formats) + - [Change review status of a specific checker result](#change-review-status-of-a-specific-checker-result) + - [Change review status of a specific checker result by using a substring of the checker name](#change-review-status-of-a-specific-checker-result-by-using-a-substring-of-the-checker-name) + - [Change review status of all checker results](#change-review-status-of-all-checker-results) + - [Change review status of all checker results with C style comment](#change-review-status-of-all-checker-results-with-c-style-comment) + - [Multi line comments](#multi-line-comments) + - [Multi line C style comments](#multi-line-c-style-comments) + - [Change review status for multiple checker results in the same line](#change-review-status-for-multiple-checker-results-in-the-same-line) + - [Setting with config file](#setting-with-config-file) ## Overview CodeChecker command line tooling provides sub-commands to perform **static code analysis** and to store reports into a **web-based storage**. @@ -68,7 +70,7 @@ The analysis related use-cases can be fully performed without a web-server. * [Execuing Static Analysis](#analyze) * [Showing analysis results in the command line](#parse) * [Applying code fixes](#fixit) -* [Suppressing false positives with source-code comments](#suppressing-false-positives-source-code-comments-for-review-status) +* [Suppressing false positives with source-code comments](#setting-with-source-code-comments) ## Quick Analysis @@ -2417,41 +2419,53 @@ Clang based tools search by default for in a path relative to the tool binary. `$(dirname /path/to/tool)/../lib/clang/8.0.0/include` -## Suppressing False positives (source code comments for review status) +## Review status handling + +Users can categorize CodeChecker reports by setting their _review status_. A +report can be _false positive_ or it can also indicate a _confirmed_ bug. +Sometimes a bug is _intentional_ in a test code. Developers can review the +reports and assign such a status accordingly with a short comment message. A +report is _unreviewed_ by default. + +The review status is not only a stateless indication of a report, but takes +effect in situations when CodeChecker needs to determine the set of outstanding +bugs. For example, a report with _intentional_ or _false positive_ detection +status is not considered outstanding, because the analyzed project's developers +don't need to worry about them. + +### Setting with source code comments Source code comments can be used in the source files to change the review -status of a specific or all checker results found in a particular line of code. -Source code comment should be above the line where the defect was found, and +status of a specific report found in a particular line of code. +Source code comments should be above the line where the defect was found, and __no__ empty lines are allowed between the line with the bug and the source code comment. -Comment lines staring with `//` or C style `/**/` comments are supported. +Comment lines staring with `//` or C style `/**/` comments are both supported. Watch out for the comment format! -### Supported formats +#### Supported formats The source code comment has the following format: -```sh -// codechecker comment type [checker name] comment +```c +// codechecker_ [] comment ``` Multiple source code comment types are allowed: - * `codechecker_suppress` - * `codechecker_false_positive` - * `codechecker_intentional` - * `codechecker_confirmed` - -Source code comment change the `review status` of a bug in the following form: - - * `codechecker_suppress` and `codechecker_false_positive` to `False positive` - * `codechecker_intentional` to `Intentional` - * `codechecker_confirmed` to `Confirmed`. - -Note: `codechecker_suppress` does the same as `codechecker_false_positive`. + * `codechecker_suppress`: Sets the review status to _false positive_. These + reports are not considered outstanding. The limitations of analyzers may + cause false positive reports, these can be categorized by this status value. + * `codechecker_false_positive`: Same as `codechecker_suppress`. + * `codechecker_intentional`: Sets the review status to _intentional_. These + reports are not considered outstanding. For example a bug may be intentional + in a test code. + * `codechecker_confirmed`: Sets the review status to _confirmed_. This is an + indication for developers to deal with this report. Such a report is + considered outstanding. You can read more about review status [here](https://github.com/Ericsson/codechecker/blob/master/www/userguide/userguide.md#userguide-review-status) -### Change review status of a specific checker result +#### Change review status of a specific checker result ```cpp void test() { int x; @@ -2460,7 +2474,7 @@ void test() { } ``` -### Change review status of a specific checker result by using a substring of the checker name +#### Change review status of a specific checker result by using a substring of the checker name There is no need to specify the whole checker name in the source code comment like `deadcode.DeadStores`, because it will not be resilient to package name changes. You are able to specify only a substring of the checker name for the @@ -2474,7 +2488,7 @@ void test() { ``` -### Change review status of all checker result +#### Change review status of all checker results ```cpp void test() { int x; @@ -2483,7 +2497,7 @@ void test() { } ``` -### Change review status of all checker result with C style comment +#### Change review status of all checker results with C style comment ```cpp void test() { int x; @@ -2492,7 +2506,7 @@ void test() { } ``` -### Multi line comments +#### Multi line comments ```cpp void test() { int x; @@ -2505,7 +2519,7 @@ void test() { } ``` -### Multi line C style comments +#### Multi line C style comments ```cpp void test() { int x; @@ -2532,7 +2546,7 @@ void test() { } ``` -### Change review status for multiple checker results in the same line +#### Change review status for multiple checker results in the same line You can change multiple checker reports with a single source code comment: ```cpp @@ -2543,7 +2557,7 @@ void test() { ``` The limitation of this format is that you can't use separate status or message -for checkers. To solve this problem you can use one of the following format: +for checkers. To solve this problem you can use one of the following formats: ```cpp void test_simple() { @@ -2577,3 +2591,30 @@ void testError2() { int x = 1 / 0; } ``` + +### Setting with config file + +Review status can be configured by a config file in YAML format. This config +file has to represent a list of review status settings: + +```yaml +- filepath: /path/to/project/test/** + checker: core.DivideZero + message: Division by zero in test files is automatically intentional. + review_status: intentional +- filepath: /path/to/project/important/module/** + message: All reports in this module should be investigated. + review_status: confirmed +``` + +The fields of a review status settings are: + +- `filepath` (optional): A glob to a path where the given review status is + applied. +- `checker` (optional): Set the review status for only these checkers' reports. +- `message` (optional): A comment message that describes the reason of the + setting. +- `review_status`: The review status to set. + +If neither `filepath` nor `checker` is provided, then that setting is not +applied on any report. diff --git a/web/client/codechecker_client/cmd/store.py b/web/client/codechecker_client/cmd/store.py index ee6ecd92c2..5ec6671f89 100644 --- a/web/client/codechecker_client/cmd/store.py +++ b/web/client/codechecker_client/cmd/store.py @@ -439,6 +439,10 @@ def assemble_zip(inputs, zip_file, client, checker_labels: CheckerLabels): files_to_compress.add(skip_file_path) + review_status_file_path = os.path.join(dir_path, 'review_status.yaml') + if os.path.exists(review_status_file_path): + files_to_compress.add(review_status_file_path) + LOG.debug("Processing report files ...") # Currently ProcessPoolExecutor fails completely in windows. diff --git a/web/client/codechecker_client/cmd_line_client.py b/web/client/codechecker_client/cmd_line_client.py index e18f42d90f..5efd072b07 100644 --- a/web/client/codechecker_client/cmd_line_client.py +++ b/web/client/codechecker_client/cmd_line_client.py @@ -201,10 +201,16 @@ def get_report_dir_results( Absolute paths are expected to the given report directories. """ all_reports = [] - review_status_handler = ReviewStatusHandler() processed_path_hashes = set() - for _, file_paths in report_file.analyzer_result_files(report_dirs): + for report_dir, file_paths in \ + report_file.analyzer_result_files(report_dirs): + + review_status_handler = ReviewStatusHandler() + review_status_cfg = os.path.join(report_dir, 'review_status.yaml') + if os.path.isfile(review_status_cfg): + review_status_handler.set_review_status_config(review_status_cfg) + for file_path in file_paths: # Get reports. reports = report_file.get_reports(file_path, checker_labels) diff --git a/web/server/codechecker_server/api/mass_store_run.py b/web/server/codechecker_server/api/mass_store_run.py index 9c846bf0de..0766f58828 100644 --- a/web/server/codechecker_server/api/mass_store_run.py +++ b/web/server/codechecker_server/api/mass_store_run.py @@ -892,19 +892,20 @@ def get_missing_file_ids(report: Report) -> List[str]: analyzer_name = mip.checker_to_analyzer.get( report.checker_name, report.analyzer_name) - rs_from_source = SourceReviewStatus() + review_status = SourceReviewStatus() + try: - rs_from_source = \ - review_status_handler.get_review_status(report) - rs_from_source.author = self.user_name - rs_from_source.date = run_history_time + review_status = review_status_handler.get_review_status(report) except ValueError as err: self.__wrong_src_code_comments.append(str(err)) + review_status.author = self.user_name + review_status.date = run_history_time + # False positive and intentional reports are considered as closed # reports which is indicated with non-null "fixed_at" date. fixed_at = None - if rs_from_source.status in ['false_positive', 'intentional']: + if review_status.status in ['false_positive', 'intentional']: # Keep in mind that now this is not handling review status # rules, only review status source code comments if old_report and old_report.review_status in \ @@ -915,11 +916,11 @@ def get_missing_file_ids(report: Report) -> List[str]: report_id = self.__add_report( session, run_id, report, file_path_to_id, - rs_from_source, detection_status, detected_at, + review_status, detection_status, detected_at, run_history_time, analysis_info, analyzer_name, fixed_at) self.__new_report_hashes[report.report_hash] = \ - rs_from_source.status + review_status.status self.__already_added_report_hashes.add(report_path_hash) LOG.debug("Storing report done. ID=%d", report_id) @@ -1022,13 +1023,19 @@ def get_skip_handler( # Processing analyzer result files. processed_result_file_count = 0 - review_status_handler = ReviewStatusHandler(source_root) - for root_dir_path, _, report_file_paths in os.walk(report_dir): LOG.debug("Get reports from '%s' directory", root_dir_path) skip_handler = get_skip_handler(root_dir_path) + review_status_handler = ReviewStatusHandler(source_root) + + review_status_cfg = \ + os.path.join(root_dir_path, 'review_status.yaml') + if os.path.isfile(review_status_cfg): + review_status_handler.set_review_status_config( + review_status_cfg) + mip = self.__mips[root_dir_path] enabled_checkers.update(mip.enabled_checkers) disabled_checkers.update(mip.disabled_checkers)