From 1ee561bd54de3c9061d3b5f7acbab54cec18bb05 Mon Sep 17 00:00:00 2001 From: Dominik Tuchyna Date: Thu, 18 Feb 2021 15:39:17 +0100 Subject: [PATCH 1/6] Add kebechet metrics class and cli option --- srcopsmetrics/cli.py | 18 ++- srcopsmetrics/kebechet_metrics.py | 227 ++++++++++++++++++++++++++++++ 2 files changed, 239 insertions(+), 6 deletions(-) create mode 100644 srcopsmetrics/kebechet_metrics.py diff --git a/srcopsmetrics/cli.py b/srcopsmetrics/cli.py index 1a9bf4a4..01a4a2a1 100755 --- a/srcopsmetrics/cli.py +++ b/srcopsmetrics/cli.py @@ -27,6 +27,7 @@ from srcopsmetrics.enums import EntityTypeEnum, StoragePath from srcopsmetrics.evaluate_scores import ReviewerAssigner from srcopsmetrics.github_knowledge import GitHubKnowledge +from srcopsmetrics.kebechet_metrics import KebechetMetrics _LOGGER = logging.getLogger("aicoe-src-ops-metrics") logging.basicConfig(level=logging.INFO) @@ -98,6 +99,12 @@ def get_entities_as_list(entities_raw: Optional[str]) -> List[str]: are stored. Default knowledge path is {StoragePath.DEFAULT.value} """, ) +@click.option( + "--metrics", "-m", is_flag=True, required=False, help=f"""Launch Metrics Calculation for specified repository.""", +) +@click.option( + "--kebechet", "-K", is_flag=True, required=False, help=f"""Launch Metrics Calculation for specified repository.""", +) def cli( repository: Optional[str], organization: Optional[str], @@ -108,6 +115,8 @@ def cli( visualize_statistics: bool, reviewer_reccomender: bool, knowledge_path: str, + metrics: bool, + kebechet: bool, ): """Command Line Interface for SrcOpsMetrics.""" os.environ["IS_LOCAL"] = "True" if is_local else "False" @@ -124,16 +133,13 @@ def cli( for project in repos: os.environ["PROJECT"] = project - if visualize_statistics: - raise NotImplementedError if reviewer_reccomender: reviewer_assigner = ReviewerAssigner() reviewer_assigner.evaluate_reviewers_scores(project=project, is_local=is_local) - if visualize_statistics and repository is not None: - raise NotImplementedError - elif visualize_statistics and organization is not None: - raise NotImplementedError + if kebechet: + kebechet_metrics = KebechetMetrics(repository=repos[0], today=True) + kebechet_metrics.evaluate_and_store_kebechet_metrics(is_local=is_local) if __name__ == "__main__": diff --git a/srcopsmetrics/kebechet_metrics.py b/srcopsmetrics/kebechet_metrics.py new file mode 100644 index 00000000..b27ca313 --- /dev/null +++ b/srcopsmetrics/kebechet_metrics.py @@ -0,0 +1,227 @@ +# Copyright (C) 2021 Dominik Tuchyna +# +# This file is part of thoth-station/mi - Meta-information Indicators. +# +# thoth-station/mi - Meta-information Indicators is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# thoth-station/mi - Meta-information Indicators is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with thoth-station/mi - Meta-information Indicators. If not, see . + +"""Kebechet repository metrics evaluation.""" + +import logging +import os +import time +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, Optional + +import numpy as np +import pandas as pd +from github import Github + +from srcopsmetrics.entities.issue import Issue +from srcopsmetrics.entities.pull_request import PullRequest +from srcopsmetrics.storage import KnowledgeStorage + +BOT_NAMES = {"sesheta"} + +UPDATE_TYPES_AND_KEYWORDS = { + "automatic": "Automatic update of dependency", + "failure_notification": "Failed to update dependencies to their latest version", + "initial_lock": "Initial dependency lock", +} + +_LOGGER = logging.getLogger(__name__) +_GITHUB_ACCESS_TOKEN = os.getenv("GITHUB_ACCESS_TOKEN") + + +class KebechetMetrics: + """Kebechet Metrics inspected by MI.""" + + def __init__(self, repository: str, today: bool = False): + """Initialize with collected knowledge.""" + gh_repo = Github(login_or_token=_GITHUB_ACCESS_TOKEN, timeout=50).get_repo(repository) + + self.repo_name = repository + self.prs = PullRequest(gh_repo).load_previous_knowledge(is_local=True) + self.issues = Issue(gh_repo).load_previous_knowledge(is_local=True) + self.today = today + + def _get_least_square_polynomial_fit(self, x_series: pd.Series, y_series: pd.Series, degree: int = 3): + """Apply least square polynomial fit on time metrics data.""" + return np.poly1d(np.polyfit(x_series, y_series, degree)) + + def _compute_predictions(self, x_series: pd.Series, y_series: pd.Series, days_ahead: int = 7) -> np.array: + """Compute estimation of the mean metrics in time for future score. + + Return numpy.array with prediciton for all the available dates + in self.pr_metrics plus specified days_ahead + """ + score = self._get_least_square_polynomial_fit(x_series, y_series) + return score(x_series.append(pd.Series([int(time.time()) * 3600 * 24 for i in range(1, days_ahead + 1)]))) + + @staticmethod + def _get_responded_time(issue) -> Optional[int]: + for comment in issue["comments"]: + if comment["author"] in BOT_NAMES: + return int(comment["created_at"]) + return None + + @staticmethod + def _get_update_manager_request_type(issue) -> Optional[str]: + """Get the type of the update request.""" + if issue["title"] == "Kebechet update": + return "manual" + + for request_type, keyword in UPDATE_TYPES_AND_KEYWORDS.items(): + if keyword in issue["title"]: + return request_type + + return None + + def _get_update_manager_issues(self): + data = [] + for issue in self.issues.values(): + issue_type = KebechetMetrics._get_update_manager_request_type(issue) + if not issue_type: + continue + + created_at = int(issue["created_at"]) + response = self._get_responded_time(issue) + ttre = response - created_at if response else None + + closed_at = int(issue["closed_at"]) if issue["closed_at"] else None + closed_by = issue["closed_by"] if issue["closed_by"] else None + closed_by_bot = closed_by in BOT_NAMES if closed_by else False + ttci = closed_at - created_at if closed_at else None + + data.append([created_at, issue_type, ttre, ttci, closed_by_bot]) + + df = pd.DataFrame(data) + df.columns = ["date", "type", "ttre", "ttci", "closed_by_bot"] + + return df.sort_values(by=["date"]).reset_index(drop=True) + + def _get_update_manager_pull_requests(self): + data = [] + for pr in self.prs.values(): + pr_type = KebechetMetrics._get_update_manager_request_type(pr) + if not pr_type: + continue + + created_at = int(pr["created_at"]) + + ttm = int(pr["merged_at"]) - created_at if pr["merged_at"] else None + + # TODO: include stats of reviewers? + # reviewers = [pr["reviews"][r]["author"] for r in pr["reviews"]] + review_times = [int(pr["reviews"][r]["submitted_at"]) for r in pr["reviews"]] + ttfr = min(review_times) - created_at if review_times else None + + reviews = [r for r in pr["reviews"].values()] + approvals = [r["submitted_at"] for r in reviews if r["state"] == "APPROVED"] + tta = min(approvals) - created_at if approvals else None + + rejected = 1 if ttm is None and pr["closed_at"] is not None else 0 + closed_by_bot = 1 if rejected is not None and pr["closed_by"] in BOT_NAMES else 0 + merged_by_kebechet_bot = 1 if closed_by_bot and not rejected else 0 + rejected_by_kebechet_bot = 1 if closed_by_bot and rejected else 0 + + data.append([created_at, pr_type, ttm, ttfr, tta, merged_by_kebechet_bot, rejected_by_kebechet_bot]) + + df = pd.DataFrame(data) + df.columns = ["date", "type", "ttm", "ttfr", "tta", "merged_by_kebechet_bot", "rejected_by_kebechet_bot"] + + return df.sort_values(by=["date"]).reset_index(drop=True) + + def get_overall_stats_update_manager(self): + """Return stats over whole repository age.""" + prs = self._get_update_manager_pull_requests() + + stats: Dict[str, Any] = {} + stats["created_pull_requests"] = len(prs) + + stats["rejected"] = len(prs[np.isnan(prs["ttm"])]) + stats["rejected_by_kebechet_bot"] = len(prs[prs["rejected_by_kebechet_bot"] == 1]) + stats["rejected_by_other"] = stats["rejected"] - stats["rejected_by_kebechet_bot"] + + stats["merged"] = len(prs) - stats["rejected"] + stats["merged_by_kebechet_bot"] = len(prs[prs["merged_by_kebechet_bot"] == 1]) + stats["merged_by_other"] = stats["merged"] - stats["merged_by_kebechet_bot"] + + return stats + + def get_daily_stats_update_manager(self): + """Get daily stats. + + If self.today set to true, return only stats for current day. + """ + prs = self._get_update_manager_pull_requests() + prs["days"] = prs.apply(lambda x: datetime.fromtimestamp(x["date"]).date(), axis=1) + + stats: Dict[datetime, Any] = {} + day_range = [datetime.now().date()] if self.today else prs["days"].unique() + for date in day_range: + prs_day = prs[prs["days"] == date] + + day = {} + day["created_pull_requests"] = len(prs_day) + + day["rejected"] = len(prs_day[np.isnan(prs_day["ttm"])]) + day["rejected_by_kebechet_bot"] = len(prs_day[prs_day["rejected_by_kebechet_bot"] == 1]) + day["rejected_by_other"] = day["rejected"] - day["rejected_by_kebechet_bot"] + + day["merged"] = len(prs_day) - day["rejected"] + day["merged_by_kebechet_bot"] = len(prs_day[prs_day["merged_by_kebechet_bot"] == 1]) + day["merged_by_other"] = day["merged"] - day["merged_by_kebechet_bot"] + + if self.today: + return day + + stats[str(date)] = day + + return stats + + def evaluate_and_store_kebechet_metrics(self, is_local: bool): + """Calculate and store metrics for every kebechet manager in repository.""" + for get_stats in [self.update_manager]: + stats = get_stats() + + path = f"./srcopsmetrics/metrics/{self.repo_name}/kebechet_{get_stats.__name__}" + if self.today: + curr_day = datetime.now().date() + path += f"_{str(curr_day)}" + path += ".json" + + KnowledgeStorage(is_local=is_local).save_knowledge(file_path=Path(path), data=stats) + + def update_manager(self): + """Calculate and store update manager metrics.""" + overall_stats = self.get_overall_stats_update_manager() + daily_stats = self.get_daily_stats_update_manager() + return {"overall": overall_stats, "daily": daily_stats} + + def label_bot_manager(self): + """Calculate and store label bot manager metrics.""" + raise NotImplementedError + + def thoth_advise(self): + """Calculate and store thoth advise manager metrics.""" + raise NotImplementedError + + def thoth_promenance(self): + """Calculate and store promenance manager metrics.""" + raise NotImplementedError + + def pipfile_requirements(self): + """Calculate and store pipfile requirements manager metrics.""" + raise NotImplementedError From 90704d5fead7a17b7d556de9b44cc189d85a8203 Mon Sep 17 00:00:00 2001 From: Dominik Tuchyna Date: Thu, 18 Feb 2021 15:49:19 +0100 Subject: [PATCH 2/6] Exclude implementation from feature/metrics --- srcopsmetrics/cli.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/srcopsmetrics/cli.py b/srcopsmetrics/cli.py index 01a4a2a1..65da1196 100755 --- a/srcopsmetrics/cli.py +++ b/srcopsmetrics/cli.py @@ -100,10 +100,11 @@ def get_entities_as_list(entities_raw: Optional[str]) -> List[str]: """, ) @click.option( - "--metrics", "-m", is_flag=True, required=False, help=f"""Launch Metrics Calculation for specified repository.""", -) -@click.option( - "--kebechet", "-K", is_flag=True, required=False, help=f"""Launch Metrics Calculation for specified repository.""", + "--thoth", + "-T", + is_flag=True, + required=False, + help=f"""Launch performance analysis of Thoth Kebechet managers for specified repository.""", ) def cli( repository: Optional[str], @@ -115,8 +116,7 @@ def cli( visualize_statistics: bool, reviewer_reccomender: bool, knowledge_path: str, - metrics: bool, - kebechet: bool, + thoth: bool, ): """Command Line Interface for SrcOpsMetrics.""" os.environ["IS_LOCAL"] = "True" if is_local else "False" @@ -137,7 +137,7 @@ def cli( reviewer_assigner = ReviewerAssigner() reviewer_assigner.evaluate_reviewers_scores(project=project, is_local=is_local) - if kebechet: + if thoth: kebechet_metrics = KebechetMetrics(repository=repos[0], today=True) kebechet_metrics.evaluate_and_store_kebechet_metrics(is_local=is_local) From 517f4ea439c2830884b96281e7ccfb0bfec53a02 Mon Sep 17 00:00:00 2001 From: Dominik Tuchyna Date: Thu, 18 Feb 2021 16:19:31 +0100 Subject: [PATCH 3/6] Fix provenance typo --- srcopsmetrics/kebechet_metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/srcopsmetrics/kebechet_metrics.py b/srcopsmetrics/kebechet_metrics.py index b27ca313..5d974308 100644 --- a/srcopsmetrics/kebechet_metrics.py +++ b/srcopsmetrics/kebechet_metrics.py @@ -218,7 +218,7 @@ def thoth_advise(self): """Calculate and store thoth advise manager metrics.""" raise NotImplementedError - def thoth_promenance(self): + def thoth_provenance(self): """Calculate and store promenance manager metrics.""" raise NotImplementedError From 4d4c4b4dcc7caca4a00ba20088cce066bd149890 Mon Sep 17 00:00:00 2001 From: Dominik Tuchyna Date: Fri, 19 Feb 2021 11:57:41 +0100 Subject: [PATCH 4/6] Fix formatting --- srcopsmetrics/cli.py | 1 + 1 file changed, 1 insertion(+) diff --git a/srcopsmetrics/cli.py b/srcopsmetrics/cli.py index 12716818..c57cf5a4 100755 --- a/srcopsmetrics/cli.py +++ b/srcopsmetrics/cli.py @@ -163,5 +163,6 @@ def cli( path = Path(f"./srcopsmetrics/metrics/{repos[0]}/issue_scores.json") KnowledgeStorage(is_local=is_local).save_knowledge(file_path=path, data=scores_issues) + if __name__ == "__main__": cli() From 577ba9774114fb11f9e62141adb430e72d7d763d Mon Sep 17 00:00:00 2001 From: Dominik Tuchyna Date: Fri, 19 Feb 2021 12:16:24 +0100 Subject: [PATCH 5/6] Add median metric --- srcopsmetrics/kebechet_metrics.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/srcopsmetrics/kebechet_metrics.py b/srcopsmetrics/kebechet_metrics.py index 5d974308..4dd634ec 100644 --- a/srcopsmetrics/kebechet_metrics.py +++ b/srcopsmetrics/kebechet_metrics.py @@ -158,6 +158,9 @@ def get_overall_stats_update_manager(self): stats["merged_by_kebechet_bot"] = len(prs[prs["merged_by_kebechet_bot"] == 1]) stats["merged_by_other"] = stats["merged"] - stats["merged_by_kebechet_bot"] + median_time = prs["ttm"].median() + stats["median_ttm"] = median_time if not np.isnan(median_time) else 0 + return stats def get_daily_stats_update_manager(self): @@ -167,9 +170,10 @@ def get_daily_stats_update_manager(self): """ prs = self._get_update_manager_pull_requests() prs["days"] = prs.apply(lambda x: datetime.fromtimestamp(x["date"]).date(), axis=1) + today = datetime.now().date() stats: Dict[datetime, Any] = {} - day_range = [datetime.now().date()] if self.today else prs["days"].unique() + day_range = [today] if self.today else prs["days"].unique() for date in day_range: prs_day = prs[prs["days"] == date] @@ -184,7 +188,11 @@ def get_daily_stats_update_manager(self): day["merged_by_kebechet_bot"] = len(prs_day[prs_day["merged_by_kebechet_bot"] == 1]) day["merged_by_other"] = day["merged"] - day["merged_by_kebechet_bot"] + # TODO consider adding median_time to every day statistics (rolling windown maybe?) + if self.today: + median_time = prs[prs["days"] == today]["ttm"].median() + day["median_ttm"] = median_time if not np.isnan(median_time) else 0 return day stats[str(date)] = day From 10075a5a27cc5d13be01dd03737ccebcb90817f5 Mon Sep 17 00:00:00 2001 From: Dominik Tuchyna Date: Fri, 19 Feb 2021 12:27:32 +0100 Subject: [PATCH 6/6] Add path under kebechet-update-manager --- srcopsmetrics/kebechet_metrics.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/srcopsmetrics/kebechet_metrics.py b/srcopsmetrics/kebechet_metrics.py index 4dd634ec..c92b833a 100644 --- a/srcopsmetrics/kebechet_metrics.py +++ b/srcopsmetrics/kebechet_metrics.py @@ -28,6 +28,7 @@ import pandas as pd from github import Github +from srcopsmetrics import utils from srcopsmetrics.entities.issue import Issue from srcopsmetrics.entities.pull_request import PullRequest from srcopsmetrics.storage import KnowledgeStorage @@ -42,6 +43,7 @@ _LOGGER = logging.getLogger(__name__) _GITHUB_ACCESS_TOKEN = os.getenv("GITHUB_ACCESS_TOKEN") +_ROOT_DIR = "kebechet-update-manager" class KebechetMetrics: @@ -204,13 +206,16 @@ def evaluate_and_store_kebechet_metrics(self, is_local: bool): for get_stats in [self.update_manager]: stats = get_stats() - path = f"./srcopsmetrics/metrics/{self.repo_name}/kebechet_{get_stats.__name__}" + path = Path(f"./{_ROOT_DIR}/{self.repo_name}/") + utils.check_directory(path) + + file_name = f"kebechet_{get_stats.__name__}" if self.today: curr_day = datetime.now().date() - path += f"_{str(curr_day)}" - path += ".json" + file_name += f"_{str(curr_day)}" + file_name += ".json" - KnowledgeStorage(is_local=is_local).save_knowledge(file_path=Path(path), data=stats) + KnowledgeStorage(is_local=is_local).save_knowledge(file_path=path.joinpath(file_name), data=stats) def update_manager(self): """Calculate and store update manager metrics."""