diff --git a/srcopsmetrics/cli.py b/srcopsmetrics/cli.py index d703f580..1c9aa316 100755 --- a/srcopsmetrics/cli.py +++ b/srcopsmetrics/cli.py @@ -18,12 +18,13 @@ """This is the CLI for SrcOpsMetrics to create, visualize, use bot knowledge.""" import logging -from tqdm.contrib.logging import logging_redirect_tqdm import os +from datetime import date, timedelta from pathlib import Path from typing import List, Optional import click +from tqdm.contrib.logging import logging_redirect_tqdm from srcopsmetrics.bot_knowledge import analyse_projects from srcopsmetrics.enums import EntityTypeEnum, StoragePath @@ -113,7 +114,7 @@ def get_entities_as_list(entities_raw: Optional[str]) -> List[str]: "-t", is_flag=True, required=False, - help=f"""Launch performance analysis of Thoth Kebechet managers for specified repository.""", + help=f"""Launch performance analysis of Thoth Kebechet managers for specified repository for yesterday.""", ) @click.option( "--metrics", "-m", is_flag=True, required=False, help=f"""Launch Metrics Calculation for specified repository.""", @@ -160,9 +161,12 @@ def cli( for project in repos: os.environ["PROJECT"] = project + today = date.today() + yesterday = today - timedelta(days=1) + if thoth: if repository and not merge: - kebechet_metrics = KebechetMetrics(repository=repos[0], today=True, is_local=is_local) + kebechet_metrics = KebechetMetrics(repository=repos[0], day=yesterday, is_local=is_local) kebechet_metrics.evaluate_and_store_kebechet_metrics() if metrics: @@ -182,7 +186,7 @@ def cli( if merge: if thoth: - KebechetMetrics.merge_kebechet_metrics_today(is_local=is_local) + KebechetMetrics.merge_kebechet_metrics_per_day(day=yesterday, is_local=is_local) else: raise NotImplementedError diff --git a/srcopsmetrics/entities/tools/storage.py b/srcopsmetrics/entities/tools/storage.py index ced4d920..65edfa38 100644 --- a/srcopsmetrics/entities/tools/storage.py +++ b/srcopsmetrics/entities/tools/storage.py @@ -34,7 +34,7 @@ _LOGGER = logging.getLogger(__name__) -def load_data_frame(path_or_buf: Union[Path, str]) -> pd.DataFrame: +def load_data_frame(path_or_buf: Union[Path, Any]) -> pd.DataFrame: """Load DataFrame from either string data or path.""" df = pd.read_json(path_or_buf, orient="records", lines=True) if not df.empty: @@ -42,6 +42,15 @@ def load_data_frame(path_or_buf: Union[Path, str]) -> pd.DataFrame: return df +def load_json(path_or_buf: Union[Path, str]) -> Any: + """Load json data from string or filepath.""" + if isinstance(path_or_buf, Path): + with open(path_or_buf, "r") as f: + return json.loads(f.read()) + + return json.loads(path_or_buf) + + class KnowledgeStorage: """Class for knowledge loading and saving.""" @@ -95,7 +104,7 @@ def save_data(self, file_path: Path, data: Dict[str, Any]): json.dump(data, f) _LOGGER.info("Saved locally at %s" % file_path) - def load_data(self, file_path: Optional[Path] = None) -> Dict[str, Any]: + def load_data(self, file_path: Optional[Path] = None, as_json: bool = False) -> Dict[str, Any]: """Load previously collected repo knowledge. If a repo was not inspected before, create its directory. Arguments: @@ -103,6 +112,8 @@ def load_data(self, file_path: Optional[Path] = None) -> Dict[str, Any]: inspected github repository. If None is passed, the used path will be :value:`~enums.StoragePath.DEFAULT` + as_json {bool} -- load data as a plain json file + Returns: Dict[str, Any] -- previusly collected knowledge. Empty dict if the knowledge does not exist. @@ -111,7 +122,11 @@ def load_data(self, file_path: Optional[Path] = None) -> Dict[str, Any]: if file_path is None: raise ValueError("Filepath has to be specified.") - results = self.load_locally(file_path) if self.is_local else self.load_remotely(file_path) + results = ( + self.load_locally(file_path, as_json=as_json) + if self.is_local + else self.load_remotely(file_path, as_json=as_json) + ) if results is None: _LOGGER.info("File does not exist.") @@ -121,7 +136,7 @@ def load_data(self, file_path: Optional[Path] = None) -> Dict[str, Any]: return results @staticmethod - def load_locally(file_path: Path, as_csv: bool = True) -> pd.DataFrame: + def load_locally(file_path: Path, as_json: bool = False) -> pd.DataFrame: """Load knowledge file from local storage.""" _LOGGER.info("Loading knowledge locally") @@ -129,16 +144,20 @@ def load_locally(file_path: Path, as_csv: bool = True) -> pd.DataFrame: _LOGGER.debug("Knowledge %s not found locally" % file_path) return pd.DataFrame() + if as_json: + return load_json(file_path) return load_data_frame(file_path) - def load_remotely(self, file_path: Path, as_csv: bool = True) -> pd.DataFrame: + def load_remotely(self, file_path: Path, as_json: bool = False) -> pd.DataFrame: """Load knowledge file from Ceph storage.""" _LOGGER.info("Loading knowledge from Ceph") ceph_filename = os.path.relpath(file_path).replace("./", "") try: data = self.get_ceph_store().retrieve_document(ceph_filename) - return load_data_frame(data) + if not as_json: + data = load_data_frame(data) + return data except NotFoundError: _LOGGER.debug("Knowledge %s not found on Ceph" % ceph_filename) diff --git a/srcopsmetrics/kebechet_metrics.py b/srcopsmetrics/kebechet_metrics.py index 0f7c26ad..95968da2 100644 --- a/srcopsmetrics/kebechet_metrics.py +++ b/srcopsmetrics/kebechet_metrics.py @@ -20,7 +20,7 @@ import logging import os import time -from datetime import datetime +from datetime import date from pathlib import Path from typing import Any, Dict, Optional @@ -62,14 +62,14 @@ def get_update_manager_request_type(title: str) -> Optional[str]: class KebechetMetrics: """Kebechet Metrics inspected by MI.""" - def __init__(self, repository: str, today: bool = False, is_local: bool = False): + def __init__(self, repository: str, is_local: bool = False, day: Optional[date] = None): """Initialize with collected knowledge.""" gh_repo = Github(login_or_token=_GITHUB_ACCESS_TOKEN, timeout=50).get_repo(repository) self.repo_name = repository self.prs = PullRequest(gh_repo).load_previous_knowledge(is_local=is_local) self.issues = Issue(gh_repo).load_previous_knowledge(is_local=is_local) - self.today = today + self.day = day self.is_local = is_local def _get_least_square_polynomial_fit(self, x_series: pd.Series, y_series: pd.Series, degree: int = 3): @@ -159,7 +159,7 @@ def get_overall_stats_update_manager(self) -> Dict[str, Any]: def get_daily_stats_update_manager(self) -> Dict[str, Any]: """Get daily stats. - If self.today set to true, return only stats for current day. + If self.day is set, return only stats for that day. """ prs = self._get_update_manager_pull_requests() @@ -170,12 +170,11 @@ def get_daily_stats_update_manager(self) -> Dict[str, Any]: stats: Dict[str, Any] = {} - today = datetime.now().date() - if self.today: - prs = prs[prs.date == today] + if self.day: + prs = prs[prs.date == self.day] - for date in prs.date.unique(): - prs_day = prs[prs["days"] == date] + for specific_date in prs.date.unique(): + prs_day = prs[prs["days"] == specific_date] day = {} day["created_pull_requests"] = len(prs_day) @@ -190,12 +189,12 @@ def get_daily_stats_update_manager(self) -> Dict[str, Any]: # TODO consider adding median_time to every day statistics (rolling windown maybe?) - if self.today: - median_time = prs[prs["days"] == today]["ttm"].median() + if self.day: + median_time = prs[prs["days"] == self.day]["ttm"].median() day["median_ttm"] = median_time if not np.isnan(median_time) else 0 return day - stats[str(date)] = day + stats[str(specific_date)] = day return stats @@ -208,18 +207,15 @@ def evaluate_and_store_kebechet_metrics(self): utils.check_directory(path) file_name = f"kebechet_{get_stats.__name__}" - if self.today: - curr_day = datetime.now().date() - file_name += f"_{str(curr_day)}" + if self.day: + file_name += f"_{str(self.day)}" file_name += ".json" KnowledgeStorage(is_local=self.is_local).save_data(file_path=path.joinpath(file_name), data=stats) @staticmethod - def merge_kebechet_metrics_today(is_local: bool = False): + def merge_kebechet_metrics_per_day(day: date, is_local: bool = False): """Merge all the collected metrics under given parent directory.""" - today = str(datetime.now().date()) - overall_today = { "created_pull_requests": 0, "rejected": 0, @@ -234,12 +230,12 @@ def merge_kebechet_metrics_today(is_local: bool = False): ks = KnowledgeStorage(is_local=is_local) for manager_name in ["update_manager"]: - file_name = f"kebechet_{manager_name}_{today}.json" + file_name = f"kebechet_{manager_name}_{str(day)}.json" for path in Path(Path(f"./{_ROOT_DIR}/")).rglob(f"*{file_name}"): if path.name == f"overall_{file_name}": continue - data = ks.load_data(file_path=path) + data = ks.load_data(file_path=path, as_json=True) for k in data["daily"]: if k == "median_ttm": ttms.append(data["daily"][k])