Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add plotting script and plot #247

Merged
merged 2 commits into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added devtools/mdakits-registry-growth.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
112 changes: 112 additions & 0 deletions devtools/plot-over-time.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
"""
A script to plot the number of packages registered over time.

You will need the following packages installed:
- PyGithub
- numpy
- pandas
- seaborn
- matplotlib

You will need a GitHub token with access to the repository to run this script.
The token should be stored in the environment variable `GH_TOKEN`.
"""
import argparse
import datetime
import re
import os

import numpy as np
import pandas as pd
from github import Github

import seaborn as sns
import matplotlib.dates as mdates
from matplotlib import pyplot as plt

REPO_NAME = "mdanalysis/MDAKits"
GITHUB_TOKEN = os.environ["GH_TOKEN"]

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--verbose",
action="store_true",
help="Print the DataFrame of results",
)


def find_prs_with_file(repo_name, github_token):
"""
Find all pull requests that added a `metadata.yaml` file to the repository.
"""
pattern = re.compile(r"metadata\.yaml$")

g = Github(github_token)
repo = g.get_repo(repo_name)

prs_with_file = []

# Iterate through all pull requests in the repository
for pr in repo.get_pulls(state='closed'):
# Check if the PR was merged
if pr.merged_at is None:
continue

# Check files changed in the PR
# look for addition of a `metadata.yaml` file
# so we ignore updates and changes to the metadata
files = pr.get_files()
for file in files:
if file.status == 'added' and re.search(pattern, file.filename):
# mostly these attributes are for debugging
prs_with_file.append({
'pr_number': pr.number,
'pr_title': pr.title,
'merged_at': pr.merged_at,
'html_url': pr.html_url
})
break

return prs_with_file


def main(
verbose: bool = False,
):
"""
Main function to plot the number of packages registered over time.
"""
results = find_prs_with_file(REPO_NAME, GITHUB_TOKEN)
df = pd.DataFrame(results)
df_cumsum = df.sort_values("merged_at")
df_cumsum["Number of MDAKits"] = np.arange(len(df)) + 1

if verbose:
print(df_cumsum)

_, ax = plt.subplots(figsize=(4, 3))
ax = sns.lineplot(
ax=ax,
data=df_cumsum,
x="merged_at",
y="Number of MDAKits"
)

ax.set_xlim((
datetime.date(2023, 1, 1),
datetime.date(2024, 11, 30)
))
ax.xaxis.set_major_locator(mdates.MonthLocator(bymonth=(1, 7)))
ax.xaxis.set_minor_locator(mdates.MonthLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%b'))
ax.set_xlabel("Added to registry")
ax.set_ylabel("Number of MDAKits")
ax.set_title("MDAKits Registry Growth")
plt.tight_layout()
plt.savefig("mdakits-registry-growth.png", dpi=300)
print("Saved plot to mdakits-registry-growth.png")


if __name__ == "__main__":
args = parser.parse_args()
main(verbose=args.verbose)