Skip to content

Commit

Permalink
Merge pull request #2 from socrse/add_tests
Browse files Browse the repository at this point in the history
Parse CSV columns into a MultiIndex
  • Loading branch information
milliams authored Sep 18, 2023
2 parents 86d9f18 + a608fb9 commit 3263194
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 15 deletions.
29 changes: 29 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Test

on:
push:
branches:
- "master"
pull_request:
branches:
- "master"

permissions:
contents: read

jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: "3.10"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt -r requirements-dev.txt
- name: Test with pytest
run: |
python -m pytest -v
10 changes: 4 additions & 6 deletions count_votes.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,17 @@
votes = parse_google_form(args.ballots, token_col=args.token_col)
valid_tokens = parse_tokens(args.tokens)

votes = votes.loc[:, votes.columns.str.startswith(f"{args.question} [")]
votes.columns = votes.columns.str.replace(r".*\[(.*)\].*", lambda m: m.group(1), regex=True)

votes = votes[args.question]

valid_votes, invalid_votes = filter_valid(votes, valid_tokens)

# Tally up each resolution
resolutions = {}
resolution_counts = {}
for resolution in valid_votes.columns:
this_vote = valid_votes[resolution]
resolutions[resolution] = count_votes_simple(this_vote)
resolution_counts[resolution] = count_votes_simple(this_vote)

resolutions = pd.DataFrame(resolutions, index=["approve", "reject", "abstain", "total"]).transpose()
resolutions = pd.DataFrame(resolution_counts, index=["approve", "reject", "abstain", "total"]).transpose()
resolutions["total_votes"] = resolutions["approve"] + resolutions["reject"]
resolutions["approve_percent"] = (resolutions["approve"] / resolutions["total_votes"]) * 100
resolutions["reject_percent"] = (resolutions["reject"] / resolutions["total_votes"]) * 100
Expand Down
3 changes: 3 additions & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pytest
mypy
pandas-stubs
32 changes: 32 additions & 0 deletions tests/test_voting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from pathlib import Path

import pytest
import pandas as pd

from utils import parse_google_form, run_stv, count_votes_simple


@pytest.fixture(scope="module")
def simple_file():
test_dir = Path(__file__).resolve().parent
return parse_google_form(test_dir / "votes.csv", "Token")


def test_parse_csv(simple_file):
assert pd.api.types.is_string_dtype(simple_file.index)
assert simple_file.index.is_unique
assert all(pd.api.types.is_string_dtype(c) for _, c in simple_file.items())
assert "Resolutions" in simple_file.columns
assert "Resolution 1" in simple_file["Resolutions"]


def test_stv(simple_file):
res = run_stv(simple_file, "Rank candidates required", 1)
winners = res.get_winners()
assert len(winners) == 1
assert winners[0].name == "Person 1"


def test_simple(simple_file):
res = count_votes_simple(simple_file["Resolutions"]["Resolution 1"])
assert res == (2, 3, 2, 5)
8 changes: 8 additions & 0 deletions tests/votes.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"Timestamp","Token","Rank candidates required [Person 3]","Rank candidates required [Person 2]","Rank candidates required [Person 4]","Rank candidates required [Person 1]","Rank candidates optional [Person 2]","Rank candidates optional [Person 1]","Rank candidates optional [Person 4]","Rank candidates optional [Person 3]","Resolutions [Resolution 1]","Resolutions [Resolution 2]","dropdown","multiple choice","Checkboxes","tickbox grid [Row 1]","tickbox grid [Row 2]","tickbox grid 2 [Row 1]","tickbox grid 2 [Row 2]"
"2023/08/10 11:10:56 am CET","jdjfghdj","3","2","4","1","2","1","4","","Approve","Reject","","","","","","",""
"2023/08/10 11:11:16 am CET","ghghdgn","2","3","4","1","","1","2","3","Reject","Approve","","","","","","",""
"2023/08/10 11:11:41 am CET","jmhmjm","4","3","2","1","1","4","3","2","Abstain","Reject","","","","","","",""
"2023/08/10 11:11:56 am CET","hfgdhdfg","1","4","2","3","","","","","Abstain","Abstain","","","","","","",""
"2023/08/10 11:56:46 am CET","dfgdfadf","3","4","1","2","3","4","2","1","Reject","Approve","","","","","","",""
"2023/08/10 11:57:11 am CET","gdgdv","4","3","2","1","","2","","3","Approve","Abstain","Option 2","Option 2","Option 1;Option 2","Column 1;Column 2","","",""
"2023/08/10 11:58:51 am CET","fghgshds","4","3","2","1","","","","2","Reject","Approve","","","","Column 1","Column 2","Column 2","Column 1"
32 changes: 23 additions & 9 deletions utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import re
from pathlib import Path

import pandas as pd
import pyrankvote as rv


def run_stv(votes: pd.DataFrame, question, seats):
votes = votes.loc[:, votes.columns.str.startswith(f"{question} [")]
votes.columns = votes.columns.str.replace(r".*\[(.*)\].*", lambda m: m.group(1), regex=True)
def run_stv(votes: pd.DataFrame, question: str, seats: int) -> rv.helpers.ElectionResults:
votes = votes[question]
votes = votes.astype(int)

candidates = {c: rv.Candidate(c) for c in votes.columns}

Expand All @@ -16,26 +17,39 @@ def run_stv(votes: pd.DataFrame, question, seats):
return r


def parse_tokens(token_file: Path):
def parse_tokens(token_file: Path) -> set[str]:
with token_file.open() as tokens:
return set(t.strip() for t in tokens.readlines())


def parse_google_form(csv_file: Path, token_col: str):
votes = pd.read_csv(csv_file)
votes.drop_duplicates(subset=[token_col], keep='last', inplace=True)
def parse_google_form(csv_file: Path, token_col: str) -> pd.DataFrame:
votes = pd.read_csv(csv_file, dtype=str, keep_default_na=False)
votes = votes.drop_duplicates(subset=[token_col], keep="last")
votes = votes.set_index(token_col).drop(columns=["Timestamp"])
headers = []
for c in votes.columns:
m = re.match(r"(?:(.*) \[(.*)\])|(.*)", c)
if m is None:
raise ValueError(f"Column '{c}' does not match expected pattern")
g = m.groups()
if g[2] is not None:
g = (g[2], g[2])
else:
g = g[0:2]
headers.append(g)
headers = pd.MultiIndex.from_tuples(headers)
votes.columns = headers
return votes


def filter_valid(df: pd.DataFrame, tokens: set[str]):
def filter_valid(df: pd.DataFrame, tokens: set[str]) -> tuple[pd.DataFrame, pd.DataFrame]:
"""Filter out invalid tokens"""
valid = df.loc[df.index.intersection(tokens)]
invalid = df.loc[df.index.difference(tokens)]
return valid, invalid


def count_votes_simple(this_vote: pd.Series):
def count_votes_simple(this_vote: pd.Series) -> tuple[int, int, int, int]:
# If a person voted more than once, only the last vote is counted
keep_votes = this_vote[~this_vote.index.duplicated(keep="last")]
vote_counts = keep_votes.value_counts()
Expand Down

0 comments on commit 3263194

Please sign in to comment.