Skip to content
This repository has been archived by the owner on Jun 9, 2023. It is now read-only.

Mi fixes #609

Merged
merged 8 commits into from
Oct 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 43 additions & 35 deletions srcopsmetrics/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,50 @@
logging.basicConfig(level=logging.INFO)


def get_entities_as_list(entities_raw: Optional[str]) -> List[str]:
def _parse_entities(entities_raw: Optional[str]) -> List[str]:
"""Get passed entities as list."""
if entities_raw and entities_raw != "":
return [e.strip() for e in entities_raw.split(",")]

return []


def _parse_repos(repository: Optional[str], organization: Optional[str]):
repos = []

if repository:
for rep in repository.split(","):
repos.extend(GitHubKnowledge().get_repositories(repository=rep.strip()))
if organization:
repos.extend(GitHubKnowledge().get_repositories(organization=organization))

return repos


def _check_env_vars(is_local: bool):
if not is_local:
ceph_needed_vars = ["CEPH_KEY_ID", "CEPH_SECRET_KEY", "CEPH_BUCKET_PREFIX", "S3_ENDPOINT_URL", "CEPH_BUCKET"]
missing = []
for env in ceph_needed_vars:
if os.getenv(env) is None:
missing.append(env)

if len(missing) > 0:
_LOGGER.warning("--is_local option is not set but Ceph environment variables are missing.")
_LOGGER.warning("Missing: " + ",".join(env))

if os.getenv("GITHUB_ACCESS_TOKEN") is None:
_LOGGER.warning(
"Missing GITHUB_ACCESS_TOKEN environment variable; The rate limit of GitHub API request will be limited"
)


def _set_env_vars(is_local: bool, knowledge_path: Optional[str], merge_path: Optional[str]):
os.environ["IS_LOCAL"] = "True" if is_local else "False"
os.environ[StoragePath.LOCATION_VAR.value] = knowledge_path
os.environ[StoragePath.MERGE_LOCATION_ENVVAR_NAME.value] = merge_path


@click.command()
@click.option(
"--repository",
Expand All @@ -68,13 +104,6 @@ def get_entities_as_list(entities_raw: Optional[str]) -> List[str]:
Storage location is {StoragePath.KNOWLEDGE.value}
Removes all previously processed storage""",
)
@click.option(
"--process-knowledge",
"-p",
is_flag=True,
help=f"""Process knowledge into more explicit information from collected knowledge.
Storage location is {StoragePath.PROCESSED.value}""",
)
@click.option(
"--is-local",
"-l",
Expand All @@ -94,16 +123,6 @@ def get_entities_as_list(entities_raw: Optional[str]) -> List[str]:
"""
+ "\n".join([entity.value for entity in EntityTypeEnum]),
)
@click.option(
"--visualize-statistics",
"-v",
is_flag=True,
help="""Visualize statistics on the project repository knowledge collected.
Dash application is launched and can be accesed at http://127.0.0.1:8050/""",
)
@click.option(
"--reviewer-reccomender", "-R", is_flag=True, help="Assign reviewers based on previous knowledge collected."
)
@click.option(
"--knowledge-path",
"-k",
Expand Down Expand Up @@ -146,17 +165,14 @@ def get_entities_as_list(entities_raw: Optional[str]) -> List[str]:
"--sli-slo",
is_flag=True,
required=False,
help="""Launch sli-slo metrics calculation given repositories.""",
help="""Launch sli-slo metrics calculation given repositories. Must be used in conjunction with -t""",
)
def cli(
repository: Optional[str],
organization: Optional[str],
create_knowledge: bool,
process_knowledge: bool,
is_local: bool,
entities: Optional[str],
visualize_statistics: bool,
reviewer_reccomender: bool,
knowledge_path: str,
thoth: bool,
metrics: bool,
Expand All @@ -165,19 +181,11 @@ def cli(
sli_slo: bool,
):
"""Command Line Interface for SrcOpsMetrics."""
os.environ["IS_LOCAL"] = "True" if is_local else "False"
os.environ[StoragePath.LOCATION_VAR.value] = knowledge_path
os.environ[StoragePath.MERGE_LOCATION_ENVVAR_NAME.value] = merge_path

repos = []

if repository:
for rep in repository.split(","):
repos.extend(GitHubKnowledge().get_repositories(repository=rep.strip()))
if organization:
repos.extend(GitHubKnowledge().get_repositories(organization=organization))
_check_env_vars(is_local=is_local)
_set_env_vars(is_local=is_local, knowledge_path=knowledge_path, merge_path=merge_path)

entities_args = get_entities_as_list(entities)
repos = _parse_repos(repository=repository, organization=organization)
entities_args = _parse_entities(entities)

if create_knowledge:
analyse_projects(repositories=repos, is_local=is_local, entities=entities_args)
Expand All @@ -191,7 +199,7 @@ def cli(
if thoth:
_LOGGER.info("#### Launching thoth data analysis ####")

if repository and not merge and not sli_slo:
if repos and not merge and not sli_slo:
for repo in repos:
_LOGGER.info("Creating metrics for repository %s" % repo)
kebechet_metrics = KebechetMetrics(repository=repo, day=yesterday, is_local=is_local)
Expand Down
10 changes: 8 additions & 2 deletions srcopsmetrics/entities/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,13 @@ def save_knowledge(
else:
raise NotImplementedError
else:
new_data = pd.DataFrame.from_dict(self.stored_entities).T
to_save = pd.concat([new_data, self.previous_knowledge])
try:
new_data = pd.DataFrame.from_dict(self.stored_entities).T
to_save = pd.concat([new_data, self.previous_knowledge])
except Exception as e:
_LOGGER.warning("There was an error converting the stored entity to a DataFrame.")
_LOGGER.warning(str(e))
return

_LOGGER.info("Knowledge file %s", (os.path.basename(file_path)))
_LOGGER.info("new %d entities", len(self.stored_entities))
Expand All @@ -168,6 +173,7 @@ def save_knowledge(

_LOGGER.info("Saved on CEPH at %s/%s%s" % (s3.bucket, s3.prefix, ceph_filename))
else:
os.makedirs(os.path.dirname(file_path), exist_ok=True)
with open(file_path, "w") as f:
f.write(str(to_save))
_LOGGER.info("Saved locally at %s" % file_path)
Expand Down
7 changes: 7 additions & 0 deletions srcopsmetrics/entities/pull_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ class PullRequest(Entity):
"commits_number": int,
"changed_files": [str],
"changed_files_number": int,
"changed_files_changes": {str: int},
"interactions": {str: int},
"reviews": PullRequestReviews,
"commits": [str],
Expand Down Expand Up @@ -97,6 +98,11 @@ def store(self, pull_request: GithubPullRequest):

labels = [label.name for label in pull_request.get_labels()]

# get changed files changes
changes: Dict[str, int] = {}
for f in pull_request.get_files():
changes[f.filename] = changes[f.filename] + f.changes if f.filename in changes else f.changes

# Evaluate size of PR
pull_request_size = None
if labels:
Expand Down Expand Up @@ -125,6 +131,7 @@ def store(self, pull_request: GithubPullRequest):
"labels": labels,
"commits": [c.sha for c in pull_request.get_commits()],
"changed_files": [f.filename for f in pull_request.get_files()],
"changed_files_changes": changes,
"first_review_at": get_first_review_time(reviews),
"first_approve_at": get_approve_time(reviews),
}
Expand Down
3 changes: 3 additions & 0 deletions srcopsmetrics/iterator.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,9 @@ def run(self):
except (GithubException, KeyboardInterrupt) as e:
_LOGGER.warning(str(e))
_LOGGER.warning("Problem occured, cached data will be saved")
except (NotImplementedError) as e:
_LOGGER.warning(str(e))
_LOGGER.warning("Entity '" + self.entity.name() + "' has not implemented Entity.analyse. Skipping.")

def save_analysed_knowledge(self):
"""Save analysed knowledge if new information was extracted."""
Expand Down
15 changes: 15 additions & 0 deletions srcopsmetrics/kebechet_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,9 @@ def _get_responded_time(issue) -> Optional[int]:
def _get_update_manager_issues(self):
update_issues = get_annotated_requests(self.issues, UPDATE_TYPES_AND_KEYWORDS)

if update_issues.empty:
return pd.DataFrame()

update_issues["time_to_respond"] = update_issues.first_response_at - update_issues.created_at

update_issues["closed_by_bot"] = update_issues.closed_by.isin(BOT_NAMES)
Expand All @@ -168,6 +171,18 @@ def _get_update_manager_issues(self):

return update_issues.sort_values(by=["created_at"])

def get_human_pull_request(self, filter_file=None) -> pd.DataFrame:
"""Get pull requests made by a human."""
if self.pull_requests.empty:
return pd.DataFrame()

requests = self.pull_requests[self.pull_requests["labels"].apply(lambda x: "bot" not in x)]

if filter_file:
requests = requests[requests["changed_files"].apply(lambda x: filter_file in x)]

return requests.sort_values(by=["created_at"]).reset_index(drop=True)

def _get_update_manager_pull_requests(self) -> pd.DataFrame:

if self.pull_requests.empty:
Expand Down
59 changes: 53 additions & 6 deletions srcopsmetrics/kebechet_sli_slo_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,22 @@ def _get_usage_sli_advise_manager(self, kebechet_metrics):
"is_used": is_used,
}

def _get_source_code_changes_sli(self, kebechet_metrics, bot_prs):
human_prs = kebechet_metrics.get_human_pull_request(filter_file="Pipfile.lock")
total_lines_changed_by_bot = 0
total_lines_changed_by_human = 0
for index, pr in bot_prs.iterrows():
if "Pipfile.lock" in pr["changed_files_changes"]:
total_lines_changed_by_bot += pr["changed_files_changes"]["Pipfile.lock"]

for index, pr in human_prs.iterrows():
if "Pipfile.lock" in pr["changed_files_changes"]:
total_lines_changed_by_human += pr["changed_files_changes"]["Pipfile.lock"]
return {
"total_lines_changed_by_bot": total_lines_changed_by_bot,
"percent_by_bot": total_lines_changed_by_bot / (total_lines_changed_by_bot + total_lines_changed_by_human),
}

def _evaluate_sli_slo(self, repository):

kebechet_metrics = KebechetMetrics(repository, is_local=self.is_local)
Expand All @@ -69,22 +85,41 @@ def _evaluate_sli_slo(self, repository):
usage_sli_version = self._get_usage_sli_version_manager(kebechet_metrics)
usage_sli_advise = self._get_usage_sli_advise_manager(kebechet_metrics)

# merge data into one dataframe with unique indeces
source_code_changes_sli_update = self._get_source_code_changes_sli(
kebechet_metrics, kebechet_metrics._get_update_manager_pull_requests()
)
source_code_changes_sli_advise = self._get_source_code_changes_sli(
kebechet_metrics, kebechet_metrics._get_advise_manager_pull_requests()
)

# merge data into one dataframe with unique indices
data = {
"advise": usage_sli_advise,
"version": usage_sli_version,
"update": usage_sli_update,
"advise": {**usage_sli_advise, **source_code_changes_sli_advise},
"version": {**usage_sli_version},
"update": {**usage_sli_update, **source_code_changes_sli_update},
"missing_issue_metrics": kebechet_metrics.issues.empty,
"missing_pull_request_metrics": kebechet_metrics.pull_requests.empty,
}

return data

def _get_sli_slo_for_all_managers(self) -> Tuple[Any, Any]:
"""Return a tuple of overall aggregated metrics and overall sli metrics for each repository."""
overall_sli_slo_data: Dict[str, Any] = {
"advise": {"repository_usage_count": 0},
"advise": {
"repository_usage_count": 0,
"total_source_code_lines_changed_by_bot": 0,
"percent_source_code_changes_by_bot": 0,
},
"version": {"repository_usage_count": 0},
"update": {"repository_usage_count": 0},
"update": {
"repository_usage_count": 0,
"total_source_code_lines_changed_by_bot": 0,
"percent_source_code_changes_by_bot": 0,
},
"overall_repositories": len(self.repositories),
"repositories_missing_issue_metric": 0,
"repositories_missing_pull_request_metric": 0,
}

# raw data per repository
Expand All @@ -101,6 +136,18 @@ def _get_sli_slo_for_all_managers(self) -> Tuple[Any, Any]:
# add data to overall manager metrics count
overall_sli_slo_data["advise"]["repository_usage_count"] += data["advise"]["is_used"]
overall_sli_slo_data["version"]["repository_usage_count"] += data["version"]["is_used"]
overall_sli_slo_data["repositories_missing_issue_metric"] += data["missing_issue_metrics"]
overall_sli_slo_data["repositories_missing_pull_request_metric"] += data["missing_pull_request_metrics"]

# add data to overall code changes count
overall_sli_slo_data["advise"]["total_source_code_lines_changed_by_bot"] += data["advise"][
"total_lines_changed_by_bot"
]
overall_sli_slo_data["advise"]["percent_source_code_changes_by_bot"] += data["advise"]["percent_by_bot"]
overall_sli_slo_data["update"]["total_source_code_lines_changed_by_bot"] += data["update"][
"total_lines_changed_by_bot"
]
overall_sli_slo_data["update"]["percent_source_code_changes_by_bot"] += data["update"]["percent_by_bot"]

# TODO: update manager & other
overall_sli_slo_data["update"]["repository_usage_count"] += (
Expand Down