From 40f35fd78f0b1f58ab8028c23d0c3cc06ffaa3db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Alix?= Date: Mon, 6 Jan 2025 08:46:48 +0100 Subject: [PATCH] PortAddonPullRequest: reduce noise by filtering PRs to port PRs that are updating both the analyzed module + other ones (here called 'satellite') were listed by oca-port, even if the commits related to the analyzed module were already ported, because contributions on these satellite modules were not (yet) ported. In the context of the analyzed module, this brings noise and such PR should not be proposed to be ported, but only mentioned. There are now two kinds of detected PRs: - the ones that are updating the current analyzed module: they are proposed to the user as "to be ported" as usual - the ones that are updating only satellite modules/files: - the ones where only satellite modules/files remain to be ported: such modules are now listed (in verbose mode), and the user invited to run oca-port on them. As such PRs with commits updating only satellite modules are not proposed anymore to the user, some modules will now be considered as "Fully ported" instead of still having some "Commits to port". --- oca_port/port_addon_pr.py | 189 +++++++++++++++++++++++++++++-------- oca_port/tests/test_app.py | 2 +- oca_port/utils/git.py | 15 ++- 3 files changed, 165 insertions(+), 41 deletions(-) diff --git a/oca_port/port_addon_pr.py b/oca_port/port_addon_pr.py index ccf0e0b..e4e7917 100644 --- a/oca_port/port_addon_pr.py +++ b/oca_port/port_addon_pr.py @@ -54,9 +54,6 @@ "{to_branch}...{to_org}:{pr_branch}?expand=1&title={title}" ) -# Fake PR for commits w/o any PR (used as fallback) -FAKE_PR = g.PullRequest(*[""] * 6) - def path_to_skip(commit_path): """Return True if the commit path should not be ported.""" @@ -88,9 +85,12 @@ def run(self): "checking PRs to port..." ) branches_diff = BranchesDiff(self.app) - branches_diff.print_diff(self.app.verbose) + if branches_diff.commits_diff["addon"]: + branches_diff.print_diff(verbose=self.app.verbose) + if branches_diff.commits_diff["satellite"]: + branches_diff.print_satellite_diff(verbose=self.app.verbose) if self.app.non_interactive: - if branches_diff.commits_diff: + if branches_diff.commits_diff["addon"]: # If an output is defined we return the result in the expected format if self.app.output: self._results["results"] = branches_diff.serialized_diff @@ -119,11 +119,11 @@ def run(self): def _get_dest_branch_name(self, branches_diff): dest_branch_name = self.app.destination.branch # Define a destination branch if not set - if branches_diff.commits_diff and not dest_branch_name: + if branches_diff.commits_diff["addon"] and not dest_branch_name: commits_to_port = [ commit.hexsha for commit in itertools.chain.from_iterable( - branches_diff.commits_diff.values() + branches_diff.commits_diff["addon"].values() ) ] h = hashlib.shake_256("-".join(commits_to_port).encode()) @@ -142,7 +142,7 @@ def _port_pull_requests(self, branches_diff): wip = self._print_wip_session() dest_branch_name = self.app.destination.branch # Nothing to port - if not branches_diff.commits_diff or not dest_branch_name: + if not branches_diff.commits_diff["addon"] or not dest_branch_name: # Nothing to port while having WIP means the porting is done return wip # Check if destination branch exists, and create it if not @@ -194,11 +194,11 @@ def _port_pull_requests(self, branches_diff): dest_branch = g.Branch(self.app.repo, dest_branch_name) self.app.repo.heads[dest_branch.name].checkout() last_pr = ( - list(branches_diff.commits_diff.keys())[-1] - if branches_diff.commits_diff + list(branches_diff.commits_diff["addon"].keys())[-1] + if branches_diff.commits_diff["addon"] else None ) - for pr, commits in branches_diff.commits_diff.items(): + for pr, commits in branches_diff.commits_diff["addon"].items(): # Check if PR has been blacklisted in user's session if self._is_pr_blacklisted(pr): if self._confirm_pr_blacklisted(pr): @@ -221,7 +221,7 @@ def _port_pull_requests(self, branches_diff): ) msg = ( f"\t{bc.DIM}PR #{pr.number} has been" - if pr.number + if pr.number != "_" else "Orphaned commits have been" ) + f" automatically blacklisted{bc.ENDD}" self._print(msg) @@ -382,7 +382,7 @@ def _print_tips(self, pr_data): def _port_pull_request_commits(self, pr, commits, base_ref, branch): """Port commits of a Pull Request in a new branch.""" - if pr.number: + if pr.number != "_": self._print( f"- {bc.BOLD}{bc.OKCYAN}Port PR {pr.ref}{bc.END} " f"{bc.OKCYAN}{pr.title}{bc.ENDC}..." @@ -391,7 +391,7 @@ def _port_pull_request_commits(self, pr, commits, base_ref, branch): else: self._print(f"- {bc.BOLD}{bc.OKCYAN}Port commits w/o PR{bc.END}...") # Ask the user if he wants to port the PR (or orphaned commits) - if not click.confirm("\tPort it?" if pr.number else "\tPort them?"): + if not click.confirm("\tPort it?" if pr.number != "_" else "\tPort them?"): self._handle_pr_blacklist(pr) return False @@ -632,7 +632,7 @@ def __init__(self, app): def _serialize_diff(self, commits_diff): data = {} - for pr, commits in commits_diff.items(): + for pr, commits in commits_diff["addon"].items(): data[pr.number] = pr.to_dict() data[pr.number]["missing_commits"] = [commit.hexsha for commit in commits] return data @@ -686,8 +686,9 @@ def print_diff(self, verbose=False): lines_to_print = [] fake_pr = None i = 0 - for i, pr in enumerate(self.commits_diff, 1): - if pr.number: + key = "addon" + for i, pr in enumerate(self.commits_diff[key], 1): + if pr.number != "_": lines_to_print.append( f"{i}) {bc.BOLD}{bc.OKBLUE}{pr.ref}{bc.END} " f"{bc.OKBLUE}{pr.title}{bc.ENDC}:" @@ -699,26 +700,26 @@ def print_diff(self, verbose=False): if verbose: pr_paths = ", ".join([f"{bc.DIM}{path}{bc.ENDD}" for path in pr.paths]) lines_to_print.append(f"\t=> Updates: {pr_paths}") - if pr.number: + if pr.number != "_": pr_paths_not_ported = ", ".join( [f"{bc.OKBLUE}{path}{bc.ENDC}" for path in pr.paths_not_ported] ) lines_to_print.append(f"\t=> Not ported: {pr_paths_not_ported}") lines_to_print.append( - f"\t=> {bc.BOLD}{bc.OKBLUE}{len(self.commits_diff[pr])} " + f"\t=> {bc.BOLD}{bc.OKBLUE}{len(self.commits_diff[key][pr])} " f"commit(s){bc.END} not (fully) ported" ) - if pr.number: + if pr.number != "_": lines_to_print.append(f"\t=> {pr.url}") - if verbose or not pr.number: - for commit in self.commits_diff[pr]: + if verbose or pr.number == "_": + for commit in self.commits_diff[key][pr]: lines_to_print.append( f"\t\t{bc.DIM}{commit.hexsha[:8]} " f"{commit.summary}{bc.ENDD}" ) if fake_pr: # We have commits without PR, adapt the message i -= 1 - nb_commits = len(self.commits_diff[fake_pr]) + nb_commits = len(self.commits_diff[key][fake_pr]) message = ( f"{bc.BOLD}{bc.OKBLUE}{i} pull request(s){bc.END} " f"and {bc.BOLD}{bc.OKBLUE}{nb_commits} commit(s) w/o " @@ -733,23 +734,100 @@ def print_diff(self, verbose=False): f"{self.app.from_branch.ref()} to {self.app.to_branch.ref()}" ) lines_to_print.insert(0, message) - if self.commits_diff: + if self.commits_diff[key]: lines_to_print.insert(1, "") self._print("\n".join(lines_to_print)) + def print_satellite_diff(self, verbose=False): + nb_prs = len(self.commits_diff["satellite"]) + if not nb_prs: + return + self._print() + lines_to_print = [] + msg = ( + f"ℹ️ {nb_prs} other PRs related to {bc.OKBLUE}{self.app.addon}{bc.ENDC} " + "are also updating satellite modules/root files" + ) + if verbose: + msg += ":" + lines_to_print.append(msg) + paths_ported = [] + paths_not_ported = [] + pr_paths_not_ported = sorted( + set( + itertools.chain.from_iterable( + [pr.paths_not_ported for pr in self.commits_diff["satellite"]] + ) + ) + ) + for path in pr_paths_not_ported: + path_exists = g.check_path_exists( + self.app.repo, + self.app.to_branch.ref(), + path, + rootdir=self.app.addons_rootdir and self.app.addons_rootdir.name, + ) + if path_exists: + if verbose: + lines_to_print.append(f"\t{bc.OKGREEN}- {path}{bc.END}") + paths_ported.append(path) + else: + paths_not_ported.append(path) + # Print the list of related modules/root files + if verbose: + if paths_not_ported: + # Two cases: + # - if we have PRs that could update already migrated modules + # we list them (see above) while displaying only a counter for + # not yet migrated modules. + if paths_ported: + lines_to_print.append( + f"\t{bc.DIM}- +{len(paths_not_ported)} modules/root files " + f"not ported{bc.END}" + ) + # - if we get only PRs that could update non-migrated modules we + # do not display a counter but an exaustive list of these modules. + else: + for path in paths_not_ported: + lines_to_print.append(f"\t{bc.DIM}- {path}{bc.END}") + if paths_ported: + lines_to_print.append("Think about running oca-port on these modules.") + self._print("\n".join(lines_to_print)) + def get_commits_diff(self): """Returns the commits which do not exist in `to_branch`, grouped by their related Pull Request. - :return: a dict {PullRequest: {Commit: data, ...}, ...} + These PRs are then in turn grouped based on their impacted addons: + - if a PR is updating the analyzed module, it'll be put in 'addon' key + - if a PR is updating satellite module(s), it'll be put in 'satellite' key + + :return: a dict { + 'addon': {PullRequest: {Commit: data, ...}, ...}, + 'satellite': {PullRequest: {Commit: data, ...}, ...}, + } """ commits_by_pr = defaultdict(list) + fake_pr = g.PullRequest("_", *[""] * 5) + # 1st loop to collect original PRs and stack orphaned commits in a fake PR for commit in self.from_branch_path_commits: if commit in self.to_branch_all_commits: self.app.cache.mark_commit_as_ported(commit.hexsha) continue - # Get related Pull Request if any - pr = self._get_original_pr(commit) + # Get related Pull Request if any, + # or fallback on a fake PR to host orphaned commits + # This call has two effects: + # - put in cache original PRs (so the 2nd loop is faster) + # - stack orphaned commits in one fake PR + self._get_original_pr(commit, fallback_pr=fake_pr) + # 2nd loop to actually analyze the content of commits/PRs + for commit in self.from_branch_path_commits: + if commit in self.to_branch_all_commits: + self.app.cache.mark_commit_as_ported(commit.hexsha) + continue + # Get related Pull Request if any, + # or fallback on a fake PR to host orphaned commits + pr = self._get_original_pr(commit, fallback_pr=fake_pr) if pr: for pr_commit_sha in pr.commits: try: @@ -821,35 +899,53 @@ def get_commits_diff(self): break else: commits_by_pr[pr].append(pr_commit) - # No related PR: add the commit to the fake PR - else: - commits_by_pr[FAKE_PR].append(commit) # Sort PRs on the merge date (better to port them in the right order). # Do not return blacklisted PR. - sorted_commits_by_pr = {} + sorted_commits_by_pr = { + "addon": defaultdict(list), + "satellite": defaultdict(list), + } for pr in sorted(commits_by_pr, key=lambda pr: pr.merged_at or ""): + if self._is_pr_updating_addon(pr): + key = "addon" + else: + key = "satellite" blacklisted = self.app.storage.is_pr_blacklisted(pr.ref) if not blacklisted: # TODO: Backward compat for old tracking only by number blacklisted = self.app.storage.is_pr_blacklisted(pr.number) if blacklisted: msg = ( - f"{bc.DIM}PR #{pr.number}" if pr.number else "Orphaned commits" + f"{bc.DIM}PR #{pr.number}" + if pr.number != "_" + else "Orphaned commits" ) + f" blacklisted ({blacklisted}){bc.ENDD}" self._print(msg) continue - sorted_commits_by_pr[pr] = commits_by_pr[pr] + sorted_commits_by_pr[key][pr] = commits_by_pr[pr] return sorted_commits_by_pr - def _get_original_pr(self, commit: g.Commit): - """Return the original PR of a given commit.""" + def _is_pr_updating_addon(self, pr): + """Check if a PR still needs to update the analyzed addon.""" + for path in pr.paths_not_ported: + if path.endswith(self.app.addon): + return True + return False + + def _get_original_pr(self, commit: g.Commit, fallback_pr=None): + """Return the original PR of a given commit. + + If `fallback_pr` is provided, it'll be returned with the commit stacked in it. + + This method is taking care of storing in cache the original PR of a commit. + """ # Try to get the data from the user's cache first data = self.app.cache.get_pr_from_commit(commit.hexsha) if data: return g.PullRequest(**data) # Request GitHub to get them if not any("github.com" in remote.url for remote in self.app.repo.remotes): - return + return self._handle_fallback_pr(fallback_pr, commit, cache=False) src_repo_name = self.app.source.repo or self.app.repo_name try: raw_data = self.app.github.get_original_pr( @@ -860,7 +956,7 @@ def _get_original_pr(self, commit: g.Commit): ) except requests.exceptions.ConnectionError: self._print("⚠️ Unable to detect original PR (connection error)") - return + return self._handle_fallback_pr(fallback_pr, commit, cache=False) if raw_data: # Get all commits of the PR as they could update others addons # than the one the user is interested in. @@ -882,3 +978,22 @@ def _get_original_pr(self, commit: g.Commit): } self.app.cache.store_commit_pr(commit.hexsha, data) return g.PullRequest(**data) + return self._handle_fallback_pr(fallback_pr, commit) + + def _handle_fallback_pr(self, fallback_pr, commit, cache=True): + # Fallback PR hosting orphaned commits + data = { + "number": "_", + "url": "", + "author": "", + "title": "", + "body": "", + "merged_at": "", + "commits": [], + } + if fallback_pr: + fallback_pr.commits.append(commit.hexsha) + data = fallback_pr.to_dict(ref=False, number=True, body=True, commits=True) + if cache: + self.app.cache.store_commit_pr(commit.hexsha, data) + return fallback_pr diff --git a/oca_port/tests/test_app.py b/oca_port/tests/test_app.py index 1703406..6de36e1 100644 --- a/oca_port/tests/test_app.py +++ b/oca_port/tests/test_app.py @@ -166,7 +166,7 @@ def test_app_commit_to_port_output_json(self): # A commit could be ported and is put in a "fake PR" without number self.assertEqual(len(output["results"]), 1) self.assertDictEqual( - output["results"][""], + output["results"]["_"], { "url": "", "ref": "", diff --git a/oca_port/utils/git.py b/oca_port/utils/git.py index 9611bc5..126a541 100644 --- a/oca_port/utils/git.py +++ b/oca_port/utils/git.py @@ -275,20 +275,21 @@ def __hash__(self): def paths_not_ported(self): return list(self.paths - self.ported_paths) - def to_dict(self, number=False, body=False, commits=False): + def to_dict(self, ref=True, number=False, body=False, commits=False): data = { "url": self.url, - "ref": self.ref, "author": self.author, "title": self.title, "merged_at": str(self.merged_at or ""), } + if ref: + data["ref"] = self.ref if number: data["number"] = self.number if body: data["body"] = self.body if commits: - data["commits"] = [c.hexsha for c in self.commits] + data["commits"] = self.commits return data @@ -323,3 +324,11 @@ def get_changed_paths(repo, modified=True, staged=True): if staged: changed_diff.extend(repo.index.diff("HEAD")) return [diff.a_path or diff.b_path for diff in changed_diff] + + +def check_path_exists(repo, ref, path, rootdir=None): + root_tree = repo.commit(ref).tree + if rootdir: + root_tree /= str(rootdir) + paths = [t.path for t in root_tree.trees] + return path in paths