Skip to content

Commit

Permalink
Converted work log parser to SQLite from CSV.
Browse files Browse the repository at this point in the history
Still not 100% convinced this is a clear win, but it _feels_ like it
should be? I dunno'... Either way, it was an interesting little project.
  • Loading branch information
karlmdavis committed Jan 1, 2019
1 parent c27a3b2 commit 7fac4c3
Show file tree
Hide file tree
Showing 4 changed files with 231 additions and 19 deletions.
5 changes: 5 additions & 0 deletions dev/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Ignore Python virtual env.
venv/

# Ignore GitHub personal access token.
github.secret
10 changes: 10 additions & 0 deletions dev/requirements.frozen.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
certifi==2018.11.29
chardet==3.0.4
Deprecated==1.2.4
idna==2.7
pkg-resources==0.0.0
PyGithub==1.43.3
PyJWT==1.7.1
requests==2.20.1
urllib3==1.24.1
wrapt==1.10.11
1 change: 1 addition & 0 deletions dev/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
PyGithub
234 changes: 215 additions & 19 deletions dev/work-log-parser.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,32 @@
#!/usr/bin/env python3

# This script parses the semi-formatted work-log.md file, and writes some of
# the data out to a spreadsheet.
# the data out to a SQLite DB.
#
# The resulting spreadsheet will be structured as follows:
# - Sheet: Issues by Days
# - Columns: GitHub Issues
# - Rows: Days
# The resulting SQLite DB will contain all of the time log entries with
# durations. Where noted, it will also tie together issue numbers to those time
# log entries.
#
# Configuration:
# * The `GITHUB_REPO` constant in this file specifies which project issues will
# be pulled from.
# * The directory this script is run from should have a `github.secret`
# file, containing a GitHub personal access token with `repo:public_repo`
# scope.
#
# Usage:
# $ ./work-log-parser.py work-log.md work-log.csv
# $ ./work-log-parser.py work-log.md work-log.sqlite3

import sys
import os
import collections
import re
import datetime
import sqlite3
import contextlib

# The GitHub repo that issues will be pulled from.
GITHUB_REPO = 'karlmdavis/rps-tourney'

def main():
"""
Expand All @@ -30,11 +41,30 @@ def main():
raise ValueError("File to parse not found: " + work_log_path)
output_path = sys.argv[2]

# Read in the GitHub access token.
github_token_path = 'github.secret'
if not os.path.exists(github_token_path):
raise ValueError("File to parse not found: " + github_token_path)
with open (github_token_path, 'r') as github_token_file:
github_token = github_token_file.read().strip()

# Parse the file.
work_log_entries = parse_work_log(work_log_path)

# Write out the file as a spreadsheet.
write_data(work_log_entries, output_path)
# Create the output DB.
connection = create_db(output_path)

try:
# Insert all of the project's GitHub issues to the DB.
insert_github_issues(connection, github_token)

# Insert all of the time log entries to the DB.
insert_work_log_entries(connection, work_log_entries)

# Create some analysis utilities in the DB.
create_analysis_utils(connection)
finally:
connection.close()

def parse_work_log(work_log_path):
"""
Expand Down Expand Up @@ -92,29 +122,195 @@ def parse_work_log(work_log_path):
log_entries.append(LogEntry(None, None, None, None, line_number, line))
return log_entries

def write_data(work_log_entries, output_path):
def create_db(output_path):
"""
Creates/recreates a blank SQLite DB at the specified path.
Args:
output_path (str): The path to create the DB at.
Returns:
A Connection handle for the new SQLite DB.
"""

# Create/recreate the output DB.
with contextlib.suppress(FileNotFoundError):
os.remove(output_path)
conn = sqlite3.connect(output_path)

return conn

def create_cursor(connection):
"""
Creates a Cursor handle to the specified SQLite DB.
Args:
connection: The SQLite DB Connection to get a Cursor for.
Returns:
A Cursor handle to the specified SQLite DB.
"""

# Create the Cursor.
cursor = connection.cursor()

# Enable FKs, because horrifyingly, they aren't by default.
cursor.execute('PRAGMA foreign_keys = ON')

return cursor

def insert_github_issues(connection, github_token):
"""
Inserts the project's GitHub issues to the specified DB.
Args:
connection: The SQLite DB Connection to save data to.
github_token (str): The GitHub personal access token to use.
Returns:
(nothing)
"""

with contextlib.closing(create_cursor(connection)) as cursor:
# Create DB schema.
cursor.execute('''CREATE TABLE github_issues (
id INTEGER PRIMARY KEY,
title TEXT NOT NULL,
created_at TEXT NOT NULL,
closed_at TEXT,
is_bug BOOLEAN NOT NULL
)''')

# Retrieve all of the issues from GitHub.
from github import Github
github_client = Github(github_token)
repo = github_client.get_repo(GITHUB_REPO)
issues = repo.get_issues(state='all')

# INSERT all of the entries and issue refs.
for issue in issues:
cursor.execute('INSERT INTO github_issues VALUES (?,?,?,?,?)',
(issue.number, issue.title, issue.created_at,
issue.closed_at,
any("bug" == label.name for label in issue.labels)))
# Commit all of that.
connection.commit()

def insert_work_log_entries(connection, work_log_entries):
"""
Outputs the specified work log data to the specified file.
Inserts the specified work log data to the specified DB.
Args:
work_log_entries (str): The list of LogEntry tuples to write out.
output_path (str): The file to write out to.
connection: The SQLite DB Connection to save data to.
work_log_entries (str): The list of LogEntry tuples to insert.
Returns:
(nothing)
"""

with open(output_path, 'w') as output_file:
output_file.write("date,issue_number,duration_minutes,times,line_number,text\n")
with contextlib.closing(create_cursor(connection)) as cursor:
# Create DB schema.
cursor.execute('''CREATE TABLE work_log_entries (
id INTEGER PRIMARY KEY,
date TEXT NOT NULL,
duration_minutes INTEGER NOT NULL,
issue_id INTEGER,
FOREIGN KEY(issue_id) REFERENCES github_issues(id)
)''')

# INSERT all of the entries and issue refs.
for log_entry in work_log_entries:
output_file.write("{},{},{},\"{}\",{},\"{}\"\n".format(
log_entry.date or "", log_entry.issue_number or "",
log_entry.duration or "", log_entry.times or "",
log_entry.line_number, log_entry.text))
if log_entry.date and log_entry.duration:
cursor.execute('INSERT INTO work_log_entries VALUES (?,?,?,?)',
(None, log_entry.date, log_entry.duration,
log_entry.issue_number))
# Commit all of that.
connection.commit()

def create_analysis_utils(connection):
"""
Creates some analysis utilities (e.g. views) in the specified DB.
Args:
connection: The SQLite DB Connection to use..
Returns:
(nothing)
"""

with contextlib.closing(create_cursor(connection)) as cursor:
# Create view for issue summaries.
cursor.execute('''CREATE VIEW issue_analysis
(id, title, is_closed, is_bug, duration_minutes,
date_worked_first, date_worked_last,
dates_worked_count, dates_worked_elapsed)
AS
SELECT
github_issues.id,
github_issues.title,
CASE github_issues.closed_at
WHEN NULL THEN 0
ELSE 1
END,
github_issues.is_bug,
SUM(work_log_entries.duration_minutes),
MIN(work_log_entries.date),
MAX(work_log_entries.date),
COUNT(DISTINCT work_log_entries.date),
CAST((julianday(MAX(work_log_entries.date))
- julianday(MIN(work_log_entries.date)))
AS INT) + 1
FROM work_log_entries
LEFT JOIN github_issues
ON work_log_entries.issue_id = github_issues.id
GROUP BY github_issues.id
UNION ALL
SELECT
github_issues.id,
github_issues.title,
CASE github_issues.closed_at
WHEN NULL THEN 0
ELSE 1
END,
github_issues.is_bug,
SUM(work_log_entries.duration_minutes),
MIN(work_log_entries.date),
MAX(work_log_entries.date),
COUNT(DISTINCT work_log_entries.date),
CAST((julianday(MAX(work_log_entries.date))
- julianday(MIN(work_log_entries.date)))
AS INT) + 1
FROM github_issues
LEFT JOIN work_log_entries
ON work_log_entries.issue_id = github_issues.id
WHERE work_log_entries.issue_id IS NULL
GROUP BY github_issues.id
ORDER BY github_issues.id ASC
''')

# Create view for overal summary.
cursor.execute('''CREATE VIEW overall_analysis
(date_worked_first, date_worked_elapsed,
dates_worked_count, dates_worked_elapsed,
total_duration_minutes, total_duration_hours,
average_worked_minutes_per_day_worked)
AS
SELECT
MIN(date),
MAX(date),
COUNT(DISTINCT date),
CAST((julianday(MAX(date))
- julianday(MIN(date))) AS INT) + 1,
SUM(duration_minutes),
(SUM(duration_minutes) / 60.0),
(SUM(duration_minutes) * 1.0 / COUNT(DISTINCT date))
FROM work_log_entries
''')
# Commit all of that.
connection.commit()


# If this file is being run as a standalone script, call the main() function.
# (Otherwise, do nothing.)
if __name__ == "__main__":
main()

0 comments on commit 7fac4c3

Please sign in to comment.