-
Notifications
You must be signed in to change notification settings - Fork 846
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GitHub data access pr migration #2864
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,10 @@ | ||
import logging | ||
Check warning on line 1 in augur/tasks/github/pull_requests/tasks.py GitHub Actions / runner / pylint
|
||
from datetime import datetime, timedelta, timezone | ||
|
||
from augur.tasks.github.pull_requests.core import extract_data_from_pr_list | ||
from augur.tasks.init.celery_app import celery_app as celery | ||
from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask, AugurSecondaryRepoCollectionTask | ||
from augur.application.db.data_parse import * | ||
Check warning on line 7 in augur/tasks/github/pull_requests/tasks.py GitHub Actions / runner / pylint
|
||
from augur.tasks.github.util.github_data_access import GithubDataAccess | ||
from augur.tasks.github.util.github_paginator import GithubPaginator | ||
from augur.tasks.util.worker_util import remove_duplicate_dicts | ||
|
@@ -28,7 +28,6 @@ | |
logger = logging.getLogger(collect_pull_requests.__name__) | ||
|
||
with GithubTaskManifest(logger) as manifest: | ||
#with GithubTaskManifest() as manifest: | ||
|
||
augur_db = manifest.augur_db | ||
|
||
|
@@ -50,37 +49,37 @@ | |
all_data.append(pr) | ||
|
||
if len(all_data) >= 1000: | ||
process_pull_requests(all_data, f"{owner}/{repo}: Pr task", repo_id, logger, augur_db) | ||
process_pull_requests(all_data, f"{owner}/{repo}: Github Pr task", repo_id, logger, augur_db) | ||
total_count += len(all_data) | ||
all_data.clear() | ||
|
||
if len(all_data): | ||
process_pull_requests(all_data, f"{owner}/{repo}: Pr task", repo_id, logger, augur_db) | ||
process_pull_requests(all_data, f"{owner}/{repo}: Github Pr task", repo_id, logger, augur_db) | ||
total_count += len(all_data) | ||
|
||
if total_count > 0: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
return total_count | ||
else: | ||
logger.info(f"{owner}/{repo} has no pull requests") | ||
logger.debug(f"{owner}/{repo} has no pull requests") | ||
return 0 | ||
|
||
|
||
|
||
# TODO: Rename pull_request_reviewers table to pull_request_requested_reviewers | ||
Check warning on line 68 in augur/tasks/github/pull_requests/tasks.py GitHub Actions / runner / pylint
|
||
# TODO: Fix column names in pull request labels table | ||
Check warning on line 69 in augur/tasks/github/pull_requests/tasks.py GitHub Actions / runner / pylint
|
||
def retrieve_all_pr_data(repo_git: str, logger, key_auth, since): #-> Generator[List[Dict]]: | ||
|
||
owner, repo = get_owner_repo(repo_git) | ||
|
||
logger.info(f"Collecting pull requests for {owner}/{repo}") | ||
logger.debug(f"Collecting pull requests for {owner}/{repo}") | ||
|
||
url = f"https://api.github.com/repos/{owner}/{repo}/pulls?state=all&direction=desc&sort=updated" | ||
|
||
github_data_access = GithubDataAccess(key_auth, logger) | ||
|
||
num_pages = github_data_access.get_resource_page_count(url) | ||
|
||
logger.info(f"{owner}/{repo}: Retrieving {num_pages} pages of pull requests") | ||
logger.debug(f"{owner}/{repo}: Retrieving {num_pages} pages of pull requests") | ||
|
||
# returns a generator so this method can be used by doing for x in retrieve_all_pr_data() | ||
|
||
|
@@ -92,7 +91,7 @@ | |
if since and datetime.fromisoformat(pr["updated_at"].replace("Z", "+00:00")).replace(tzinfo=timezone.utc) < since: | ||
return | ||
|
||
def process_pull_requests(pull_requests, task_name, repo_id, logger, augur_db): | ||
Check warning on line 94 in augur/tasks/github/pull_requests/tasks.py GitHub Actions / runner / pylint
|
||
""" | ||
Parse and insert all retrieved PR data. | ||
|
||
|
@@ -107,7 +106,7 @@ | |
tool_version = "2.0" | ||
data_source = "Github API" | ||
|
||
pr_dicts, pr_mapping_data, pr_numbers, contributors = extract_data_from_pr_list(pull_requests, repo_id, tool_source, tool_version, data_source) | ||
Check warning on line 109 in augur/tasks/github/pull_requests/tasks.py GitHub Actions / runner / pylint
|
||
|
||
# remove duplicate contributors before inserting | ||
contributors = remove_duplicate_dicts(contributors) | ||
|
@@ -221,14 +220,14 @@ | |
return pr_review_cntrb | ||
|
||
@celery.task(base=AugurSecondaryRepoCollectionTask) | ||
def collect_pull_request_review_comments(repo_git: str) -> None: | ||
Check warning on line 223 in augur/tasks/github/pull_requests/tasks.py GitHub Actions / runner / pylint
Check warning on line 223 in augur/tasks/github/pull_requests/tasks.py GitHub Actions / runner / pylint
|
||
|
||
owner, repo = get_owner_repo(repo_git) | ||
|
||
review_msg_url = f"https://api.github.com/repos/{owner}/{repo}/pulls/comments" | ||
|
||
logger = logging.getLogger(collect_pull_request_review_comments.__name__) | ||
logger.info(f"Collecting pull request review comments for {owner}/{repo}") | ||
logger.debug(f"Collecting pull request review comments for {owner}/{repo}") | ||
|
||
repo_id = get_repo_by_repo_git(repo_git).repo_id | ||
|
||
|
@@ -245,23 +244,9 @@ | |
data_source = "Github API" | ||
|
||
key_auth = GithubRandomKeyAuth(logger) | ||
pr_review_messages = GithubPaginator(review_msg_url, key_auth, logger) | ||
num_pages = pr_review_messages.get_num_pages() | ||
|
||
all_raw_pr_review_messages = [] | ||
for page_data, page in pr_review_messages.iter_pages(): | ||
|
||
if page_data is None: | ||
break | ||
|
||
if len(page_data) == 0: | ||
logger.debug(f"{owner}/{repo} Pr Review Messages Page {page} contains no data...returning") | ||
logger.info(f"{owner}/{repo} Pr Review Messages Page {page} of {num_pages}") | ||
break | ||
|
||
logger.info(f"{owner}/{repo} Pr Review Messages Page {page} of {num_pages}") | ||
github_data_access = GithubDataAccess(key_auth, logger) | ||
|
||
all_raw_pr_review_messages += page_data | ||
all_raw_pr_review_messages = list(github_data_access.paginate_resource(review_msg_url)) | ||
|
||
contributors = [] | ||
for comment in all_raw_pr_review_messages: | ||
|
@@ -278,8 +263,7 @@ | |
pr_review_msg_mapping_data = {} | ||
|
||
pr_review_comments_len = len(all_raw_pr_review_messages) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
logger.info(f"{owner}/{repo}: Pr review comments len: {pr_review_comments_len}") | ||
for index, comment in enumerate(all_raw_pr_review_messages): | ||
for comment in all_raw_pr_review_messages: | ||
|
||
# pull_request_review_id is required to map it to the correct pr review | ||
if not comment["pull_request_review_id"]: | ||
|
@@ -319,9 +303,7 @@ | |
try: | ||
augur_pr_review_id = pr_review_id_mapping[github_pr_review_id] | ||
except KeyError: | ||
logger.info(f"{owner}/{repo}: Could not find related pr review") | ||
logger.info(f"{owner}/{repo}: We were searching for pr review with id: {github_pr_review_id}") | ||
logger.info("Skipping") | ||
logger.warning(f"{owner}/{repo}: Could not find related pr review. We were searching for pr review with id: {github_pr_review_id}") | ||
continue | ||
|
||
pr_review_message_ref = extract_pr_review_message_ref_data(comment, augur_pr_review_id, github_pr_review_id, repo_id, tool_version, data_source) | ||
|
@@ -364,50 +346,30 @@ | |
|
||
pr_count = len(prs) | ||
|
||
github_data_access = GithubDataAccess(manifest.key_auth, logger) | ||
|
||
all_pr_reviews = {} | ||
for index, pr in enumerate(prs): | ||
|
||
pr_number = pr.pr_src_number | ||
pull_request_id = pr.pull_request_id | ||
|
||
logger.info(f"{owner}/{repo} Collecting Pr Reviews for pr {index + 1} of {pr_count}") | ||
logger.debug(f"{owner}/{repo} Collecting Pr Reviews for pr {index + 1} of {pr_count}") | ||
|
||
pr_review_url = f"https://api.github.com/repos/{owner}/{repo}/pulls/{pr_number}/reviews" | ||
|
||
pr_reviews = [] | ||
pr_reviews_generator = GithubPaginator(pr_review_url, manifest.key_auth, logger) | ||
for page_data, page in pr_reviews_generator.iter_pages(): | ||
|
||
if page_data is None: | ||
break | ||
|
||
if len(page_data) == 0: | ||
break | ||
|
||
if isinstance(page_data, list): | ||
page_data = [ | ||
element.decode('utf-8').replace('\x00', ' ') if isinstance(element, bytes) else element | ||
for element in page_data | ||
] | ||
logger.info(f"NUL characters were found in PR Reviews and replaced with spaces.") | ||
elif isinstance(page_data, bytes): | ||
page_data = page_data.decode('utf-8').replace('\x00', ' ') | ||
logger.info(f"NUL characters were found in PR Reviews and replaced with spaces.") | ||
|
||
|
||
pr_reviews.extend(page_data) | ||
pr_reviews = list(github_data_access.paginate_resource(pr_review_url)) | ||
|
||
if pr_reviews: | ||
all_pr_reviews[pull_request_id] = pr_reviews | ||
|
||
if not list(all_pr_reviews.keys()): | ||
logger.info(f"{owner}/{repo} No pr reviews for repo") | ||
logger.debug(f"{owner}/{repo} No pr reviews for repo") | ||
return | ||
|
||
contributors = [] | ||
for pull_request_id in all_pr_reviews.keys(): | ||
for pull_request_id, reviews in all_pr_reviews.items(): | ||
|
||
reviews = all_pr_reviews[pull_request_id] | ||
for review in reviews: | ||
contributor = process_pull_request_review_contributor(review, tool_source, tool_version, data_source) | ||
if contributor: | ||
|
@@ -418,9 +380,8 @@ | |
|
||
|
||
pr_reviews = [] | ||
for pull_request_id in all_pr_reviews.keys(): | ||
for pull_request_id, reviews in all_pr_reviews.items(): | ||
|
||
reviews = all_pr_reviews[pull_request_id] | ||
for review in reviews: | ||
|
||
if "cntrb_id" in review: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[pylint] reported by reviewdog 🐶
C1802: Do not use
len(SEQUENCE)
without comparison to determine if a sequence is empty (use-implicit-booleaness-not-len)