Skip to content
This repository has been archived by the owner on Jun 9, 2023. It is now read-only.

Commit

Permalink
Merge pull request #411 from xtuchyna/feature/kebechet-analysis-speci…
Browse files Browse the repository at this point in the history
…fic-day

Feature/kebechet analysis specific day
  • Loading branch information
sesheta authored Jun 1, 2021
2 parents 08f59b4 + ebf1d12 commit abece86
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 30 deletions.
12 changes: 8 additions & 4 deletions srcopsmetrics/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@
"""This is the CLI for SrcOpsMetrics to create, visualize, use bot knowledge."""

import logging
from tqdm.contrib.logging import logging_redirect_tqdm
import os
from datetime import date, timedelta
from pathlib import Path
from typing import List, Optional

import click
from tqdm.contrib.logging import logging_redirect_tqdm

from srcopsmetrics.bot_knowledge import analyse_projects
from srcopsmetrics.enums import EntityTypeEnum, StoragePath
Expand Down Expand Up @@ -113,7 +114,7 @@ def get_entities_as_list(entities_raw: Optional[str]) -> List[str]:
"-t",
is_flag=True,
required=False,
help=f"""Launch performance analysis of Thoth Kebechet managers for specified repository.""",
help=f"""Launch performance analysis of Thoth Kebechet managers for specified repository for yesterday.""",
)
@click.option(
"--metrics", "-m", is_flag=True, required=False, help=f"""Launch Metrics Calculation for specified repository.""",
Expand Down Expand Up @@ -160,9 +161,12 @@ def cli(
for project in repos:
os.environ["PROJECT"] = project

today = date.today()
yesterday = today - timedelta(days=1)

if thoth:
if repository and not merge:
kebechet_metrics = KebechetMetrics(repository=repos[0], today=True, is_local=is_local)
kebechet_metrics = KebechetMetrics(repository=repos[0], day=yesterday, is_local=is_local)
kebechet_metrics.evaluate_and_store_kebechet_metrics()

if metrics:
Expand All @@ -182,7 +186,7 @@ def cli(

if merge:
if thoth:
KebechetMetrics.merge_kebechet_metrics_today(is_local=is_local)
KebechetMetrics.merge_kebechet_metrics_per_day(day=yesterday, is_local=is_local)
else:
raise NotImplementedError

Expand Down
31 changes: 25 additions & 6 deletions srcopsmetrics/entities/tools/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,23 @@
_LOGGER = logging.getLogger(__name__)


def load_data_frame(path_or_buf: Union[Path, str]) -> pd.DataFrame:
def load_data_frame(path_or_buf: Union[Path, Any]) -> pd.DataFrame:
"""Load DataFrame from either string data or path."""
df = pd.read_json(path_or_buf, orient="records", lines=True)
if not df.empty:
df = df.set_index("id")
return df


def load_json(path_or_buf: Union[Path, str]) -> Any:
"""Load json data from string or filepath."""
if isinstance(path_or_buf, Path):
with open(path_or_buf, "r") as f:
return json.loads(f.read())

return json.loads(path_or_buf)


class KnowledgeStorage:
"""Class for knowledge loading and saving."""

Expand Down Expand Up @@ -95,14 +104,16 @@ def save_data(self, file_path: Path, data: Dict[str, Any]):
json.dump(data, f)
_LOGGER.info("Saved locally at %s" % file_path)

def load_data(self, file_path: Optional[Path] = None) -> Dict[str, Any]:
def load_data(self, file_path: Optional[Path] = None, as_json: bool = False) -> Dict[str, Any]:
"""Load previously collected repo knowledge. If a repo was not inspected before, create its directory.
Arguments:
file_path {Optional[Path]} -- path to previously stored knowledge from
inspected github repository. If None is passed, the used path will
be :value:`~enums.StoragePath.DEFAULT`
as_json {bool} -- load data as a plain json file
Returns:
Dict[str, Any] -- previusly collected knowledge.
Empty dict if the knowledge does not exist.
Expand All @@ -111,7 +122,11 @@ def load_data(self, file_path: Optional[Path] = None) -> Dict[str, Any]:
if file_path is None:
raise ValueError("Filepath has to be specified.")

results = self.load_locally(file_path) if self.is_local else self.load_remotely(file_path)
results = (
self.load_locally(file_path, as_json=as_json)
if self.is_local
else self.load_remotely(file_path, as_json=as_json)
)

if results is None:
_LOGGER.info("File does not exist.")
Expand All @@ -121,24 +136,28 @@ def load_data(self, file_path: Optional[Path] = None) -> Dict[str, Any]:
return results

@staticmethod
def load_locally(file_path: Path, as_csv: bool = True) -> pd.DataFrame:
def load_locally(file_path: Path, as_json: bool = False) -> pd.DataFrame:
"""Load knowledge file from local storage."""
_LOGGER.info("Loading knowledge locally")

if not file_path.exists():
_LOGGER.debug("Knowledge %s not found locally" % file_path)
return pd.DataFrame()

if as_json:
return load_json(file_path)
return load_data_frame(file_path)

def load_remotely(self, file_path: Path, as_csv: bool = True) -> pd.DataFrame:
def load_remotely(self, file_path: Path, as_json: bool = False) -> pd.DataFrame:
"""Load knowledge file from Ceph storage."""
_LOGGER.info("Loading knowledge from Ceph")

ceph_filename = os.path.relpath(file_path).replace("./", "")
try:
data = self.get_ceph_store().retrieve_document(ceph_filename)
return load_data_frame(data)
if not as_json:
data = load_data_frame(data)
return data

except NotFoundError:
_LOGGER.debug("Knowledge %s not found on Ceph" % ceph_filename)
Expand Down
36 changes: 16 additions & 20 deletions srcopsmetrics/kebechet_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import logging
import os
import time
from datetime import datetime
from datetime import date
from pathlib import Path
from typing import Any, Dict, Optional

Expand Down Expand Up @@ -62,14 +62,14 @@ def get_update_manager_request_type(title: str) -> Optional[str]:
class KebechetMetrics:
"""Kebechet Metrics inspected by MI."""

def __init__(self, repository: str, today: bool = False, is_local: bool = False):
def __init__(self, repository: str, is_local: bool = False, day: Optional[date] = None):
"""Initialize with collected knowledge."""
gh_repo = Github(login_or_token=_GITHUB_ACCESS_TOKEN, timeout=50).get_repo(repository)

self.repo_name = repository
self.prs = PullRequest(gh_repo).load_previous_knowledge(is_local=is_local)
self.issues = Issue(gh_repo).load_previous_knowledge(is_local=is_local)
self.today = today
self.day = day
self.is_local = is_local

def _get_least_square_polynomial_fit(self, x_series: pd.Series, y_series: pd.Series, degree: int = 3):
Expand Down Expand Up @@ -159,7 +159,7 @@ def get_overall_stats_update_manager(self) -> Dict[str, Any]:
def get_daily_stats_update_manager(self) -> Dict[str, Any]:
"""Get daily stats.
If self.today set to true, return only stats for current day.
If self.day is set, return only stats for that day.
"""
prs = self._get_update_manager_pull_requests()

Expand All @@ -170,12 +170,11 @@ def get_daily_stats_update_manager(self) -> Dict[str, Any]:

stats: Dict[str, Any] = {}

today = datetime.now().date()
if self.today:
prs = prs[prs.date == today]
if self.day:
prs = prs[prs.date == self.day]

for date in prs.date.unique():
prs_day = prs[prs["days"] == date]
for specific_date in prs.date.unique():
prs_day = prs[prs["days"] == specific_date]

day = {}
day["created_pull_requests"] = len(prs_day)
Expand All @@ -190,12 +189,12 @@ def get_daily_stats_update_manager(self) -> Dict[str, Any]:

# TODO consider adding median_time to every day statistics (rolling windown maybe?)

if self.today:
median_time = prs[prs["days"] == today]["ttm"].median()
if self.day:
median_time = prs[prs["days"] == self.day]["ttm"].median()
day["median_ttm"] = median_time if not np.isnan(median_time) else 0
return day

stats[str(date)] = day
stats[str(specific_date)] = day

return stats

Expand All @@ -208,18 +207,15 @@ def evaluate_and_store_kebechet_metrics(self):
utils.check_directory(path)

file_name = f"kebechet_{get_stats.__name__}"
if self.today:
curr_day = datetime.now().date()
file_name += f"_{str(curr_day)}"
if self.day:
file_name += f"_{str(self.day)}"
file_name += ".json"

KnowledgeStorage(is_local=self.is_local).save_data(file_path=path.joinpath(file_name), data=stats)

@staticmethod
def merge_kebechet_metrics_today(is_local: bool = False):
def merge_kebechet_metrics_per_day(day: date, is_local: bool = False):
"""Merge all the collected metrics under given parent directory."""
today = str(datetime.now().date())

overall_today = {
"created_pull_requests": 0,
"rejected": 0,
Expand All @@ -234,12 +230,12 @@ def merge_kebechet_metrics_today(is_local: bool = False):
ks = KnowledgeStorage(is_local=is_local)
for manager_name in ["update_manager"]:

file_name = f"kebechet_{manager_name}_{today}.json"
file_name = f"kebechet_{manager_name}_{str(day)}.json"

for path in Path(Path(f"./{_ROOT_DIR}/")).rglob(f"*{file_name}"):
if path.name == f"overall_{file_name}":
continue
data = ks.load_data(file_path=path)
data = ks.load_data(file_path=path, as_json=True)
for k in data["daily"]:
if k == "median_ttm":
ttms.append(data["daily"][k])
Expand Down

0 comments on commit abece86

Please sign in to comment.