diff --git a/dev/.gitignore b/dev/.gitignore new file mode 100644 index 00000000..927f8cce --- /dev/null +++ b/dev/.gitignore @@ -0,0 +1,5 @@ +# Ignore Python virtual env. +venv/ + +# Ignore GitHub personal access token. +github.secret diff --git a/dev/requirements.frozen.txt b/dev/requirements.frozen.txt new file mode 100644 index 00000000..5757d7bd --- /dev/null +++ b/dev/requirements.frozen.txt @@ -0,0 +1,10 @@ +certifi==2018.11.29 +chardet==3.0.4 +Deprecated==1.2.4 +idna==2.7 +pkg-resources==0.0.0 +PyGithub==1.43.3 +PyJWT==1.7.1 +requests==2.20.1 +urllib3==1.24.1 +wrapt==1.10.11 diff --git a/dev/requirements.txt b/dev/requirements.txt new file mode 100644 index 00000000..945b116a --- /dev/null +++ b/dev/requirements.txt @@ -0,0 +1 @@ +PyGithub diff --git a/dev/work-log-parser.py b/dev/work-log-parser.py index 57a82280..0392c33a 100755 --- a/dev/work-log-parser.py +++ b/dev/work-log-parser.py @@ -1,21 +1,32 @@ #!/usr/bin/env python3 # This script parses the semi-formatted work-log.md file, and writes some of -# the data out to a spreadsheet. +# the data out to a SQLite DB. # -# The resulting spreadsheet will be structured as follows: -# - Sheet: Issues by Days -# - Columns: GitHub Issues -# - Rows: Days +# The resulting SQLite DB will contain all of the time log entries with +# durations. Where noted, it will also tie together issue numbers to those time +# log entries. +# +# Configuration: +# * The `GITHUB_REPO` constant in this file specifies which project issues will +# be pulled from. +# * The directory this script is run from should have a `github.secret` +# file, containing a GitHub personal access token with `repo:public_repo` +# scope. # # Usage: -# $ ./work-log-parser.py work-log.md work-log.csv +# $ ./work-log-parser.py work-log.md work-log.sqlite3 import sys import os import collections import re import datetime +import sqlite3 +import contextlib + +# The GitHub repo that issues will be pulled from. +GITHUB_REPO = 'karlmdavis/rps-tourney' def main(): """ @@ -30,11 +41,30 @@ def main(): raise ValueError("File to parse not found: " + work_log_path) output_path = sys.argv[2] + # Read in the GitHub access token. + github_token_path = 'github.secret' + if not os.path.exists(github_token_path): + raise ValueError("File to parse not found: " + github_token_path) + with open (github_token_path, 'r') as github_token_file: + github_token = github_token_file.read().strip() + # Parse the file. work_log_entries = parse_work_log(work_log_path) - # Write out the file as a spreadsheet. - write_data(work_log_entries, output_path) + # Create the output DB. + connection = create_db(output_path) + + try: + # Insert all of the project's GitHub issues to the DB. + insert_github_issues(connection, github_token) + + # Insert all of the time log entries to the DB. + insert_work_log_entries(connection, work_log_entries) + + # Create some analysis utilities in the DB. + create_analysis_utils(connection) + finally: + connection.close() def parse_work_log(work_log_path): """ @@ -92,29 +122,195 @@ def parse_work_log(work_log_path): log_entries.append(LogEntry(None, None, None, None, line_number, line)) return log_entries -def write_data(work_log_entries, output_path): +def create_db(output_path): + """ + Creates/recreates a blank SQLite DB at the specified path. + + Args: + output_path (str): The path to create the DB at. + + Returns: + A Connection handle for the new SQLite DB. + """ + + # Create/recreate the output DB. + with contextlib.suppress(FileNotFoundError): + os.remove(output_path) + conn = sqlite3.connect(output_path) + + return conn + +def create_cursor(connection): + """ + Creates a Cursor handle to the specified SQLite DB. + + Args: + connection: The SQLite DB Connection to get a Cursor for. + + Returns: + A Cursor handle to the specified SQLite DB. + """ + + # Create the Cursor. + cursor = connection.cursor() + + # Enable FKs, because horrifyingly, they aren't by default. + cursor.execute('PRAGMA foreign_keys = ON') + + return cursor + +def insert_github_issues(connection, github_token): + """ + Inserts the project's GitHub issues to the specified DB. + + Args: + connection: The SQLite DB Connection to save data to. + github_token (str): The GitHub personal access token to use. + + Returns: + (nothing) + """ + + with contextlib.closing(create_cursor(connection)) as cursor: + # Create DB schema. + cursor.execute('''CREATE TABLE github_issues ( + id INTEGER PRIMARY KEY, + title TEXT NOT NULL, + created_at TEXT NOT NULL, + closed_at TEXT, + is_bug BOOLEAN NOT NULL + )''') + + # Retrieve all of the issues from GitHub. + from github import Github + github_client = Github(github_token) + repo = github_client.get_repo(GITHUB_REPO) + issues = repo.get_issues(state='all') + + # INSERT all of the entries and issue refs. + for issue in issues: + cursor.execute('INSERT INTO github_issues VALUES (?,?,?,?,?)', + (issue.number, issue.title, issue.created_at, + issue.closed_at, + any("bug" == label.name for label in issue.labels))) + # Commit all of that. + connection.commit() + +def insert_work_log_entries(connection, work_log_entries): """ - Outputs the specified work log data to the specified file. + Inserts the specified work log data to the specified DB. Args: - work_log_entries (str): The list of LogEntry tuples to write out. - output_path (str): The file to write out to. + connection: The SQLite DB Connection to save data to. + work_log_entries (str): The list of LogEntry tuples to insert. Returns: (nothing) """ - with open(output_path, 'w') as output_file: - output_file.write("date,issue_number,duration_minutes,times,line_number,text\n") + with contextlib.closing(create_cursor(connection)) as cursor: + # Create DB schema. + cursor.execute('''CREATE TABLE work_log_entries ( + id INTEGER PRIMARY KEY, + date TEXT NOT NULL, + duration_minutes INTEGER NOT NULL, + issue_id INTEGER, + FOREIGN KEY(issue_id) REFERENCES github_issues(id) + )''') + + # INSERT all of the entries and issue refs. for log_entry in work_log_entries: - output_file.write("{},{},{},\"{}\",{},\"{}\"\n".format( - log_entry.date or "", log_entry.issue_number or "", - log_entry.duration or "", log_entry.times or "", - log_entry.line_number, log_entry.text)) + if log_entry.date and log_entry.duration: + cursor.execute('INSERT INTO work_log_entries VALUES (?,?,?,?)', + (None, log_entry.date, log_entry.duration, + log_entry.issue_number)) + # Commit all of that. + connection.commit() + +def create_analysis_utils(connection): + """ + Creates some analysis utilities (e.g. views) in the specified DB. + + Args: + connection: The SQLite DB Connection to use.. + + Returns: + (nothing) + """ + + with contextlib.closing(create_cursor(connection)) as cursor: + # Create view for issue summaries. + cursor.execute('''CREATE VIEW issue_analysis + (id, title, is_closed, is_bug, duration_minutes, + date_worked_first, date_worked_last, + dates_worked_count, dates_worked_elapsed) + AS + SELECT + github_issues.id, + github_issues.title, + CASE github_issues.closed_at + WHEN NULL THEN 0 + ELSE 1 + END, + github_issues.is_bug, + SUM(work_log_entries.duration_minutes), + MIN(work_log_entries.date), + MAX(work_log_entries.date), + COUNT(DISTINCT work_log_entries.date), + CAST((julianday(MAX(work_log_entries.date)) + - julianday(MIN(work_log_entries.date))) + AS INT) + 1 + FROM work_log_entries + LEFT JOIN github_issues + ON work_log_entries.issue_id = github_issues.id + GROUP BY github_issues.id + UNION ALL + SELECT + github_issues.id, + github_issues.title, + CASE github_issues.closed_at + WHEN NULL THEN 0 + ELSE 1 + END, + github_issues.is_bug, + SUM(work_log_entries.duration_minutes), + MIN(work_log_entries.date), + MAX(work_log_entries.date), + COUNT(DISTINCT work_log_entries.date), + CAST((julianday(MAX(work_log_entries.date)) + - julianday(MIN(work_log_entries.date))) + AS INT) + 1 + FROM github_issues + LEFT JOIN work_log_entries + ON work_log_entries.issue_id = github_issues.id + WHERE work_log_entries.issue_id IS NULL + GROUP BY github_issues.id + ORDER BY github_issues.id ASC + ''') + + # Create view for overal summary. + cursor.execute('''CREATE VIEW overall_analysis + (date_worked_first, date_worked_elapsed, + dates_worked_count, dates_worked_elapsed, + total_duration_minutes, total_duration_hours, + average_worked_minutes_per_day_worked) + AS + SELECT + MIN(date), + MAX(date), + COUNT(DISTINCT date), + CAST((julianday(MAX(date)) + - julianday(MIN(date))) AS INT) + 1, + SUM(duration_minutes), + (SUM(duration_minutes) / 60.0), + (SUM(duration_minutes) * 1.0 / COUNT(DISTINCT date)) + FROM work_log_entries + ''') + # Commit all of that. + connection.commit() # If this file is being run as a standalone script, call the main() function. # (Otherwise, do nothing.) if __name__ == "__main__": main() -