Skip to content

Commit

Permalink
PortAddonPullRequest: reduce noise by filtering PRs to port
Browse files Browse the repository at this point in the history
PRs that are updating both the analyzed module + other ones (here called
'satellite') were listed by oca-port, even if the commits related to the
analyzed module were already ported, because contributions on these
satellite modules were not (yet) ported.

In the context of the analyzed module, this brings noise and such PR
should not be proposed to be ported, but only mentioned.

There are now two kinds of detected PRs:

- the ones that are updating the current analyzed module: they are
  proposed to the user as "to be ported" as usual
- the ones that are updating only satellite modules/files:
- the ones where only satellite modules/files remain to be ported: such
  modules are now listed (in verbose mode), and the user invited to run
  oca-port on them.

As such PRs with commits updating only satellite modules are not
proposed anymore to the user, some modules will now be considered as
"Fully ported" instead of still having some "Commits to port".
  • Loading branch information
sebalix committed Jan 6, 2025
1 parent 539417e commit 40f35fd
Show file tree
Hide file tree
Showing 3 changed files with 165 additions and 41 deletions.
189 changes: 152 additions & 37 deletions oca_port/port_addon_pr.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,6 @@
"{to_branch}...{to_org}:{pr_branch}?expand=1&title={title}"
)

# Fake PR for commits w/o any PR (used as fallback)
FAKE_PR = g.PullRequest(*[""] * 6)


def path_to_skip(commit_path):
"""Return True if the commit path should not be ported."""
Expand Down Expand Up @@ -88,9 +85,12 @@ def run(self):
"checking PRs to port..."
)
branches_diff = BranchesDiff(self.app)
branches_diff.print_diff(self.app.verbose)
if branches_diff.commits_diff["addon"]:
branches_diff.print_diff(verbose=self.app.verbose)
if branches_diff.commits_diff["satellite"]:
branches_diff.print_satellite_diff(verbose=self.app.verbose)
if self.app.non_interactive:
if branches_diff.commits_diff:
if branches_diff.commits_diff["addon"]:
# If an output is defined we return the result in the expected format
if self.app.output:
self._results["results"] = branches_diff.serialized_diff
Expand Down Expand Up @@ -119,11 +119,11 @@ def run(self):
def _get_dest_branch_name(self, branches_diff):
dest_branch_name = self.app.destination.branch
# Define a destination branch if not set
if branches_diff.commits_diff and not dest_branch_name:
if branches_diff.commits_diff["addon"] and not dest_branch_name:
commits_to_port = [
commit.hexsha
for commit in itertools.chain.from_iterable(
branches_diff.commits_diff.values()
branches_diff.commits_diff["addon"].values()
)
]
h = hashlib.shake_256("-".join(commits_to_port).encode())
Expand All @@ -142,7 +142,7 @@ def _port_pull_requests(self, branches_diff):
wip = self._print_wip_session()
dest_branch_name = self.app.destination.branch
# Nothing to port
if not branches_diff.commits_diff or not dest_branch_name:
if not branches_diff.commits_diff["addon"] or not dest_branch_name:
# Nothing to port while having WIP means the porting is done
return wip
# Check if destination branch exists, and create it if not
Expand Down Expand Up @@ -194,11 +194,11 @@ def _port_pull_requests(self, branches_diff):
dest_branch = g.Branch(self.app.repo, dest_branch_name)
self.app.repo.heads[dest_branch.name].checkout()
last_pr = (
list(branches_diff.commits_diff.keys())[-1]
if branches_diff.commits_diff
list(branches_diff.commits_diff["addon"].keys())[-1]
if branches_diff.commits_diff["addon"]
else None
)
for pr, commits in branches_diff.commits_diff.items():
for pr, commits in branches_diff.commits_diff["addon"].items():
# Check if PR has been blacklisted in user's session
if self._is_pr_blacklisted(pr):
if self._confirm_pr_blacklisted(pr):
Expand All @@ -221,7 +221,7 @@ def _port_pull_requests(self, branches_diff):
)
msg = (
f"\t{bc.DIM}PR #{pr.number} has been"
if pr.number
if pr.number != "_"
else "Orphaned commits have been"
) + f" automatically blacklisted{bc.ENDD}"
self._print(msg)
Expand Down Expand Up @@ -382,7 +382,7 @@ def _print_tips(self, pr_data):

def _port_pull_request_commits(self, pr, commits, base_ref, branch):
"""Port commits of a Pull Request in a new branch."""
if pr.number:
if pr.number != "_":
self._print(
f"- {bc.BOLD}{bc.OKCYAN}Port PR {pr.ref}{bc.END} "
f"{bc.OKCYAN}{pr.title}{bc.ENDC}..."
Expand All @@ -391,7 +391,7 @@ def _port_pull_request_commits(self, pr, commits, base_ref, branch):
else:
self._print(f"- {bc.BOLD}{bc.OKCYAN}Port commits w/o PR{bc.END}...")
# Ask the user if he wants to port the PR (or orphaned commits)
if not click.confirm("\tPort it?" if pr.number else "\tPort them?"):
if not click.confirm("\tPort it?" if pr.number != "_" else "\tPort them?"):
self._handle_pr_blacklist(pr)
return False

Expand Down Expand Up @@ -632,7 +632,7 @@ def __init__(self, app):

def _serialize_diff(self, commits_diff):
data = {}
for pr, commits in commits_diff.items():
for pr, commits in commits_diff["addon"].items():
data[pr.number] = pr.to_dict()
data[pr.number]["missing_commits"] = [commit.hexsha for commit in commits]
return data
Expand Down Expand Up @@ -686,8 +686,9 @@ def print_diff(self, verbose=False):
lines_to_print = []
fake_pr = None
i = 0
for i, pr in enumerate(self.commits_diff, 1):
if pr.number:
key = "addon"
for i, pr in enumerate(self.commits_diff[key], 1):
if pr.number != "_":
lines_to_print.append(
f"{i}) {bc.BOLD}{bc.OKBLUE}{pr.ref}{bc.END} "
f"{bc.OKBLUE}{pr.title}{bc.ENDC}:"
Expand All @@ -699,26 +700,26 @@ def print_diff(self, verbose=False):
if verbose:
pr_paths = ", ".join([f"{bc.DIM}{path}{bc.ENDD}" for path in pr.paths])
lines_to_print.append(f"\t=> Updates: {pr_paths}")
if pr.number:
if pr.number != "_":
pr_paths_not_ported = ", ".join(
[f"{bc.OKBLUE}{path}{bc.ENDC}" for path in pr.paths_not_ported]
)
lines_to_print.append(f"\t=> Not ported: {pr_paths_not_ported}")
lines_to_print.append(
f"\t=> {bc.BOLD}{bc.OKBLUE}{len(self.commits_diff[pr])} "
f"\t=> {bc.BOLD}{bc.OKBLUE}{len(self.commits_diff[key][pr])} "
f"commit(s){bc.END} not (fully) ported"
)
if pr.number:
if pr.number != "_":
lines_to_print.append(f"\t=> {pr.url}")
if verbose or not pr.number:
for commit in self.commits_diff[pr]:
if verbose or pr.number == "_":
for commit in self.commits_diff[key][pr]:
lines_to_print.append(
f"\t\t{bc.DIM}{commit.hexsha[:8]} " f"{commit.summary}{bc.ENDD}"
)
if fake_pr:
# We have commits without PR, adapt the message
i -= 1
nb_commits = len(self.commits_diff[fake_pr])
nb_commits = len(self.commits_diff[key][fake_pr])
message = (
f"{bc.BOLD}{bc.OKBLUE}{i} pull request(s){bc.END} "
f"and {bc.BOLD}{bc.OKBLUE}{nb_commits} commit(s) w/o "
Expand All @@ -733,23 +734,100 @@ def print_diff(self, verbose=False):
f"{self.app.from_branch.ref()} to {self.app.to_branch.ref()}"
)
lines_to_print.insert(0, message)
if self.commits_diff:
if self.commits_diff[key]:
lines_to_print.insert(1, "")
self._print("\n".join(lines_to_print))

def print_satellite_diff(self, verbose=False):
nb_prs = len(self.commits_diff["satellite"])
if not nb_prs:
return
self._print()
lines_to_print = []
msg = (
f"ℹ️ {nb_prs} other PRs related to {bc.OKBLUE}{self.app.addon}{bc.ENDC} "
"are also updating satellite modules/root files"
)
if verbose:
msg += ":"
lines_to_print.append(msg)
paths_ported = []
paths_not_ported = []
pr_paths_not_ported = sorted(
set(
itertools.chain.from_iterable(
[pr.paths_not_ported for pr in self.commits_diff["satellite"]]
)
)
)
for path in pr_paths_not_ported:
path_exists = g.check_path_exists(
self.app.repo,
self.app.to_branch.ref(),
path,
rootdir=self.app.addons_rootdir and self.app.addons_rootdir.name,
)
if path_exists:
if verbose:
lines_to_print.append(f"\t{bc.OKGREEN}- {path}{bc.END}")
paths_ported.append(path)
else:
paths_not_ported.append(path)
# Print the list of related modules/root files
if verbose:
if paths_not_ported:
# Two cases:
# - if we have PRs that could update already migrated modules
# we list them (see above) while displaying only a counter for
# not yet migrated modules.
if paths_ported:
lines_to_print.append(
f"\t{bc.DIM}- +{len(paths_not_ported)} modules/root files "
f"not ported{bc.END}"
)
# - if we get only PRs that could update non-migrated modules we
# do not display a counter but an exaustive list of these modules.
else:
for path in paths_not_ported:
lines_to_print.append(f"\t{bc.DIM}- {path}{bc.END}")
if paths_ported:
lines_to_print.append("Think about running oca-port on these modules.")
self._print("\n".join(lines_to_print))

def get_commits_diff(self):
"""Returns the commits which do not exist in `to_branch`, grouped by
their related Pull Request.
:return: a dict {PullRequest: {Commit: data, ...}, ...}
These PRs are then in turn grouped based on their impacted addons:
- if a PR is updating the analyzed module, it'll be put in 'addon' key
- if a PR is updating satellite module(s), it'll be put in 'satellite' key
:return: a dict {
'addon': {PullRequest: {Commit: data, ...}, ...},
'satellite': {PullRequest: {Commit: data, ...}, ...},
}
"""
commits_by_pr = defaultdict(list)
fake_pr = g.PullRequest("_", *[""] * 5)
# 1st loop to collect original PRs and stack orphaned commits in a fake PR
for commit in self.from_branch_path_commits:
if commit in self.to_branch_all_commits:
self.app.cache.mark_commit_as_ported(commit.hexsha)
continue
# Get related Pull Request if any
pr = self._get_original_pr(commit)
# Get related Pull Request if any,
# or fallback on a fake PR to host orphaned commits
# This call has two effects:
# - put in cache original PRs (so the 2nd loop is faster)
# - stack orphaned commits in one fake PR
self._get_original_pr(commit, fallback_pr=fake_pr)
# 2nd loop to actually analyze the content of commits/PRs
for commit in self.from_branch_path_commits:
if commit in self.to_branch_all_commits:
self.app.cache.mark_commit_as_ported(commit.hexsha)
continue
# Get related Pull Request if any,
# or fallback on a fake PR to host orphaned commits
pr = self._get_original_pr(commit, fallback_pr=fake_pr)
if pr:
for pr_commit_sha in pr.commits:
try:
Expand Down Expand Up @@ -821,35 +899,53 @@ def get_commits_diff(self):
break
else:
commits_by_pr[pr].append(pr_commit)
# No related PR: add the commit to the fake PR
else:
commits_by_pr[FAKE_PR].append(commit)
# Sort PRs on the merge date (better to port them in the right order).
# Do not return blacklisted PR.
sorted_commits_by_pr = {}
sorted_commits_by_pr = {
"addon": defaultdict(list),
"satellite": defaultdict(list),
}
for pr in sorted(commits_by_pr, key=lambda pr: pr.merged_at or ""):
if self._is_pr_updating_addon(pr):
key = "addon"
else:
key = "satellite"
blacklisted = self.app.storage.is_pr_blacklisted(pr.ref)
if not blacklisted:
# TODO: Backward compat for old tracking only by number
blacklisted = self.app.storage.is_pr_blacklisted(pr.number)
if blacklisted:
msg = (
f"{bc.DIM}PR #{pr.number}" if pr.number else "Orphaned commits"
f"{bc.DIM}PR #{pr.number}"
if pr.number != "_"
else "Orphaned commits"
) + f" blacklisted ({blacklisted}){bc.ENDD}"
self._print(msg)
continue
sorted_commits_by_pr[pr] = commits_by_pr[pr]
sorted_commits_by_pr[key][pr] = commits_by_pr[pr]
return sorted_commits_by_pr

def _get_original_pr(self, commit: g.Commit):
"""Return the original PR of a given commit."""
def _is_pr_updating_addon(self, pr):
"""Check if a PR still needs to update the analyzed addon."""
for path in pr.paths_not_ported:
if path.endswith(self.app.addon):
return True
return False

def _get_original_pr(self, commit: g.Commit, fallback_pr=None):
"""Return the original PR of a given commit.
If `fallback_pr` is provided, it'll be returned with the commit stacked in it.
This method is taking care of storing in cache the original PR of a commit.
"""
# Try to get the data from the user's cache first
data = self.app.cache.get_pr_from_commit(commit.hexsha)
if data:
return g.PullRequest(**data)
# Request GitHub to get them
if not any("github.com" in remote.url for remote in self.app.repo.remotes):
return
return self._handle_fallback_pr(fallback_pr, commit, cache=False)
src_repo_name = self.app.source.repo or self.app.repo_name
try:
raw_data = self.app.github.get_original_pr(
Expand All @@ -860,7 +956,7 @@ def _get_original_pr(self, commit: g.Commit):
)
except requests.exceptions.ConnectionError:
self._print("⚠️ Unable to detect original PR (connection error)")
return
return self._handle_fallback_pr(fallback_pr, commit, cache=False)
if raw_data:
# Get all commits of the PR as they could update others addons
# than the one the user is interested in.
Expand All @@ -882,3 +978,22 @@ def _get_original_pr(self, commit: g.Commit):
}
self.app.cache.store_commit_pr(commit.hexsha, data)
return g.PullRequest(**data)
return self._handle_fallback_pr(fallback_pr, commit)

def _handle_fallback_pr(self, fallback_pr, commit, cache=True):
# Fallback PR hosting orphaned commits
data = {
"number": "_",
"url": "",
"author": "",
"title": "",
"body": "",
"merged_at": "",
"commits": [],
}
if fallback_pr:
fallback_pr.commits.append(commit.hexsha)
data = fallback_pr.to_dict(ref=False, number=True, body=True, commits=True)
if cache:
self.app.cache.store_commit_pr(commit.hexsha, data)
return fallback_pr
2 changes: 1 addition & 1 deletion oca_port/tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def test_app_commit_to_port_output_json(self):
# A commit could be ported and is put in a "fake PR" without number
self.assertEqual(len(output["results"]), 1)
self.assertDictEqual(
output["results"][""],
output["results"]["_"],
{
"url": "",
"ref": "",
Expand Down
15 changes: 12 additions & 3 deletions oca_port/utils/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,20 +275,21 @@ def __hash__(self):
def paths_not_ported(self):
return list(self.paths - self.ported_paths)

def to_dict(self, number=False, body=False, commits=False):
def to_dict(self, ref=True, number=False, body=False, commits=False):
data = {
"url": self.url,
"ref": self.ref,
"author": self.author,
"title": self.title,
"merged_at": str(self.merged_at or ""),
}
if ref:
data["ref"] = self.ref
if number:
data["number"] = self.number
if body:
data["body"] = self.body
if commits:
data["commits"] = [c.hexsha for c in self.commits]
data["commits"] = self.commits
return data


Expand Down Expand Up @@ -323,3 +324,11 @@ def get_changed_paths(repo, modified=True, staged=True):
if staged:
changed_diff.extend(repo.index.diff("HEAD"))
return [diff.a_path or diff.b_path for diff in changed_diff]


def check_path_exists(repo, ref, path, rootdir=None):
root_tree = repo.commit(ref).tree
if rootdir:
root_tree /= str(rootdir)
paths = [t.path for t in root_tree.trees]
return path in paths

0 comments on commit 40f35fd

Please sign in to comment.