Skip to content

Commit

Permalink
Add plotting script and plot (#247)
Browse files Browse the repository at this point in the history
* add plotting script and plot
* fix pep8
  • Loading branch information
lilyminium authored Dec 19, 2024
1 parent 856f521 commit 971ff95
Show file tree
Hide file tree
Showing 2 changed files with 112 additions and 0 deletions.
Binary file added devtools/mdakits-registry-growth.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
112 changes: 112 additions & 0 deletions devtools/plot-over-time.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
"""
A script to plot the number of packages registered over time.
You will need the following packages installed:
- PyGithub
- numpy
- pandas
- seaborn
- matplotlib
You will need a GitHub token with access to the repository to run this script.
The token should be stored in the environment variable `GH_TOKEN`.
"""
import argparse
import datetime
import re
import os

import numpy as np
import pandas as pd
from github import Github

import seaborn as sns
import matplotlib.dates as mdates
from matplotlib import pyplot as plt

REPO_NAME = "mdanalysis/MDAKits"
GITHUB_TOKEN = os.environ["GH_TOKEN"]

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--verbose",
action="store_true",
help="Print the DataFrame of results",
)


def find_prs_with_file(repo_name, github_token):
"""
Find all pull requests that added a `metadata.yaml` file to the repository.
"""
pattern = re.compile(r"metadata\.yaml$")

g = Github(github_token)
repo = g.get_repo(repo_name)

prs_with_file = []

# Iterate through all pull requests in the repository
for pr in repo.get_pulls(state='closed'):
# Check if the PR was merged
if pr.merged_at is None:
continue

# Check files changed in the PR
# look for addition of a `metadata.yaml` file
# so we ignore updates and changes to the metadata
files = pr.get_files()
for file in files:
if file.status == 'added' and re.search(pattern, file.filename):
# mostly these attributes are for debugging
prs_with_file.append({
'pr_number': pr.number,
'pr_title': pr.title,
'merged_at': pr.merged_at,
'html_url': pr.html_url
})
break

return prs_with_file


def main(
verbose: bool = False,
):
"""
Main function to plot the number of packages registered over time.
"""
results = find_prs_with_file(REPO_NAME, GITHUB_TOKEN)
df = pd.DataFrame(results)
df_cumsum = df.sort_values("merged_at")
df_cumsum["Number of MDAKits"] = np.arange(len(df)) + 1

if verbose:
print(df_cumsum)

_, ax = plt.subplots(figsize=(4, 3))
ax = sns.lineplot(
ax=ax,
data=df_cumsum,
x="merged_at",
y="Number of MDAKits"
)

ax.set_xlim((
datetime.date(2023, 1, 1),
datetime.date(2024, 11, 30)
))
ax.xaxis.set_major_locator(mdates.MonthLocator(bymonth=(1, 7)))
ax.xaxis.set_minor_locator(mdates.MonthLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%b'))
ax.set_xlabel("Added to registry")
ax.set_ylabel("Number of MDAKits")
ax.set_title("MDAKits Registry Growth")
plt.tight_layout()
plt.savefig("mdakits-registry-growth.png", dpi=300)
print("Saved plot to mdakits-registry-growth.png")


if __name__ == "__main__":
args = parser.parse_args()
main(verbose=args.verbose)

0 comments on commit 971ff95

Please sign in to comment.