Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: filter grants collection for co-PIs of person #26

Open
wants to merge 29 commits into
base: recent_collaborators
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
e396c64
initial commit of recent-collabs builder
sbillinge Jan 3, 2020
a6b9851
now extracts people it finds in author list, but only if in people co…
sbillinge Jan 3, 2020
dd4d779
WIP working and returns set of folks in people coll
sbillinge Jan 3, 2020
54e3ee1
proper name parsing in recentcollabs builder
sbillinge Jan 5, 2020
49a105b
tweaking error handling in recent_collabs
sbillinge Jan 5, 2020
8799307
adding dateutil to requirements
sbillinge Jan 10, 2020
95eff79
ENH: add needed_dbs
dragonyanglong Feb 5, 2020
2e164af
MAINT: replace sbillinge with people argument
dragonyanglong Feb 5, 2020
68145d1
catch tbd months
sbillinge Feb 15, 2020
484abe5
more friendly fail when no person is specified
sbillinge Feb 16, 2020
5db5ea0
now extracts people it finds in author list, but only if in people co…
sbillinge Jan 3, 2020
a6bc4ba
people seems to be enforced as list in p3.8]
sbillinge Feb 17, 2020
71e55dc
test file
Feb 19, 2020
f2afba3
added coa_template.xlsx
Feb 19, 2020
b6fbe30
- added script coabuilder.py filling in excel template
Feb 20, 2020
900f02f
- removed the duplicate entries
Feb 21, 2020
4ca5487
added global variable NUM_MONTHS
Feb 21, 2020
abeadf0
requirements should have python-dateutil not just dateutil
sbillinge Mar 4, 2020
0b5677d
remove missing review-man test for test_builders. This should be in a…
sbillinge Mar 4, 2020
232b17a
changing tests so that recent collabs will run with scopatz as person
sbillinge Mar 4, 2020
c7879e4
- added example outputs for testing
Mar 11, 2020
b618f81
Fixed typo and added comments about the to_date option
Mar 12, 2020
0d69ae9
Added options to test for the recent-collabs builder (comparing excel…
Mar 12, 2020
8c6e171
Merge pull request #24 from tienhungvuong/tests
sbillinge Mar 14, 2020
47849a7
adding doe template and renaming nsf template
sbillinge Mar 16, 2020
86a0e2c
Merge branch 'recent_collaborators' of github.com:sbillinge/regolith …
sbillinge Mar 16, 2020
910f290
removing old recentcollabsbuilder and changing template name in coabu…
sbillinge Mar 16, 2020
e39d6f7
version of the builder that builds the DOE template
sbillinge Mar 16, 2020
0d62d68
ENH: filter grants collection for co-PIs of person
dragonyanglong Mar 18, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ var/
*.egg-info/
.installed.cfg
*.egg
.idea

# PyInstaller
# Usually these files are written by a python script from a template
Expand Down
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ env:

matrix:
include:
- python: 3.6
- python: 3.8

install:
# Install conda
Expand Down
Empty file added news/test.rst
Empty file.
2 changes: 2 additions & 0 deletions regolith/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from regolith.builders.resumebuilder import ResumeBuilder
from regolith.builders.cpbuilder import CPBuilder
from regolith.builders.figurebuilder import FigureBuilder
from regolith.builders.coabuilder import RecentCollaboratorsBuilder


BUILDERS = {
Expand All @@ -28,6 +29,7 @@
"preslist": PresListBuilder,
"reimb": ReimbursementBuilder,
"figure": FigureBuilder,
"recent-collabs": RecentCollaboratorsBuilder,
}


Expand Down
227 changes: 227 additions & 0 deletions regolith/builders/coabuilder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
"""Builder for Recent Collaborators."""

import datetime as dt
import os
import sys
import openpyxl
from copy import copy
from operator import itemgetter
from dateutil.relativedelta import relativedelta
from nameparser import HumanName

from regolith.builders.basebuilder import BuilderBase
from regolith.dates import month_to_int
from regolith.sorters import doc_date_key, ene_date_key, position_key
from regolith.tools import all_docs_from_collection, filter_publications, \
month_and_year, fuzzy_retrieval, is_since


NUM_MONTHS = 48

def mdy_date(month, day, year, **kwargs):
if isinstance(month, str):
month = month_to_int(month)
return dt.date(year, month, day)


def mdy(month, day, year, **kwargs):
return "{}/{}/{}".format(
str(month_to_int(month)).zfill(2), str(day).zfill(2), str(year)[-2:]
)


class RecentCollaboratorsBuilder(BuilderBase):
"""Build recent collaborators from database entries"""

btype = "recent-collabs"
needed_dbs = ['citations', 'people', 'contacts', 'institutions']

def __init__(self, rc):
super().__init__(rc)
self.template = os.path.join(
os.path.dirname(os.path.dirname(__file__)), "templates", "coa_template_nsf.xlsx"
)
self.template2 = os.path.join(
os.path.dirname(os.path.dirname(__file__)), "templates", "coa_template_doe.xlsx"
)
self.cmds = ["excel"]

def construct_global_ctx(self):
super().construct_global_ctx()
gtx = self.gtx
rc = self.rc

gtx["people"] = sorted(
all_docs_from_collection(rc.client, "people"),
key=position_key,
reverse=True,
)
gtx["contacts"] = sorted(
all_docs_from_collection(rc.client, "contacts"),
key=position_key,
reverse=True,
)
gtx["institutions"] = all_docs_from_collection(rc.client,
"institutions")
gtx["citations"] = all_docs_from_collection(rc.client, "citations")
gtx["all_docs_from_collection"] = all_docs_from_collection

def excel(self):
rc = self.rc
gtx = self.gtx
# if --to is provided:
# use self.rc.to_date as the endpoint and find every publications within
# NUM_MONTHS months of the to_date date
# Otherwise: find every publication within NUM_MONTHS months from today.
if isinstance(self.rc.to_date, str):
since_date = dt.datetime.strptime(self.rc.to_date, '%Y-%m-%d').date() - relativedelta(months=NUM_MONTHS)
else:
since_date = dt.date.today() - relativedelta(months=NUM_MONTHS)
if isinstance(self.rc.people, str):
self.rc.people = [self.rc.people]
person = fuzzy_retrieval(all_docs_from_collection(rc.client, "people"),
['aka', 'name', '_id'], self.rc.people[0],
case_sensitive=False)
if not person:
sys.exit("please rerun specifying --people PERSON")
person_inst = person.get("employment")[0]["organization"]
person_inst = fuzzy_retrieval(all_docs_from_collection(
rc.client, "institutions"), ["name", "aka", "_id"],
person_inst)
person_inst_name = person_inst.get("name")

for p in self.gtx["people"]:
if p["_id"] == person["_id"]:
my_names = frozenset(p.get("aka", []) + [p["name"]])
pubs = filter_publications(self.gtx["citations"], my_names,
reverse=True, bold=False)
my_collabs = []
for pub in pubs:
if is_since(pub.get("year"), since_date.year,
pub.get("month", 1), since_date.month):
if not pub.get("month"):
print("WARNING: {} is missing month".format(
pub["_id"]))
if pub.get("month") == "tbd".casefold():
print("WARNING: month in {} is tbd".format(
pub["_id"]))

my_collabs.extend([collabs for collabs in
[names for names in
pub.get('author', [])]])
people, institutions = [], []
for collab in my_collabs:
collab_person = fuzzy_retrieval(all_docs_from_collection(
rc.client, "people"),
["name", "aka", "_id"],
collab)
if not collab_person:
collab_person = fuzzy_retrieval(all_docs_from_collection(
rc.client, "contacts"),
["name", "aka", "_id"], collab)
if not collab_person:
print(
"WARNING: {} not found in contacts. Check aka".format(
collab))
else:
people.append(collab_person)
inst = fuzzy_retrieval(all_docs_from_collection(
rc.client, "institutions"),
["name", "aka", "_id"],
collab_person["institution"])
if inst:
institutions.append(inst["name"])
else:
institutions.append(collab_person.get("institution", "missing"))
print("WARNING: {} missing from institutions".format(
collab_person["institution"]))
else:
people.append(collab_person)
pinst = collab_person.get("employment",
[{"organization": "missing"}])[
0]["organization"]
inst = fuzzy_retrieval(all_docs_from_collection(
rc.client, "institutions"), ["name", "aka", "_id"],
pinst)
if inst:
institutions.append(inst["name"])
else:
institutions.append(pinst)
print(
"WARNING: {} missing from institutions".format(
pinst))
ppl_names = [(collab_person["name"], i) for
collab_person, i in zip(people, institutions) if
collab_person]
ppl = []
# reformatting the name in last name, first name
for idx in range(len(ppl_names)):
names = ppl_names[idx][0].split()
last_name = names[-1]
first_name = ' '.join(names[:-1])
name_reformatted = ', '.join([last_name, first_name])
ppl.append((name_reformatted, ppl_names[idx][1]))
ppl = list(set(ppl))
# sorting the ppl list
ppl_sorted = sorted(ppl, key=itemgetter(0))
emp = p.get("employment", [{"organization": "missing",
"begin_year": 2019}])
emp.sort(key=ene_date_key, reverse=True)

def apply_cell_style(cell, style):
cell.font = style["font"]
cell.border = style["border"]
cell.fill = style["fill"]
cell.alignment = style["alignment"]

template = self.template
num_rows = len(ppl) # number of rows to add to the excel file
wb = openpyxl.load_workbook(template)
ws = wb.worksheets[0]
ws.delete_rows(52, amount=3) # removing the example rows
ws.move_range("A52:E66", rows=num_rows, cols=0, translate=True)
style_ref_cell = ws["B51"]
template_cell_style = {}
template_cell_style["font"] = copy(style_ref_cell.font)
template_cell_style["border"] = copy(style_ref_cell.border)
template_cell_style["fill"] = copy(style_ref_cell.fill)
template_cell_style["alignment"] = copy(style_ref_cell.alignment)
col_idx = ["A", "B", "C", "D", "E"]
for row in range(1, num_rows + 1):
try:
ws.unmerge_cells("A{}:E{}".format(row + 51, row + 51))
except:
pass
for idx in range(len(col_idx)):
apply_cell_style(ws["{}{}".format(col_idx[idx], row + 51)], template_cell_style)
ws["A{}".format(row + 51)].value = "A:"
ws["B{}".format(row + 51)].value = ppl_sorted[row - 1][0]
ws["C{}".format((row + 51))].value = ppl_sorted[row - 1][1]
ws.delete_rows(51) # deleting the reference row
wb.save(os.path.join(self.bldir, "{}_nsf.xlsx".format(person["_id"])))

template2 = self.template2
ppl = []
for t in ppl_names:
inst = fuzzy_retrieval(
all_docs_from_collection(rc.client, "institutions"),
['aka', 'name', '_id'], t[1],
case_sensitive=False)
if inst:
inst_name = inst.get("name","")
else:
inst_name = t[1]
# remove all people who are in the institution of the person
if inst_name != person_inst_name:
name = HumanName(t[0])
ppl.append((name.last, name.first, t[1]))
ppl = list(set(ppl))
ppl.sort(key = lambda x: x[0])
num_rows = len(ppl) # number of rows to add to the excel file
wb = openpyxl.load_workbook(template2)
ws = wb.worksheets[0]
for row in range(num_rows):
ws["A{}".format(row + 8)].value = ppl[row][0]
ws["B{}".format(row + 8)].value = ppl[row][1]
ws["C{}".format((row + 8))].value = ppl[row][2]
wb.save(os.path.join(self.bldir, "{}_doe.xlsx".format(person["_id"])))
2 changes: 1 addition & 1 deletion regolith/dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
"dec.": 12,
"december": 12,
"": 1,
"tbd": 1,
"tbd": 1
}


Expand Down
Binary file added regolith/templates/coa_template_doe.xlsx
Binary file not shown.
Binary file added regolith/templates/coa_template_nsf.xlsx
Binary file not shown.
4 changes: 4 additions & 0 deletions regolith/templates/recentcollabs.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
%person collaborated with
{{p['title']}} {{p['name']}}

{{ my_collabs }}
8 changes: 7 additions & 1 deletion regolith/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,7 @@ def filter_grants(input_grants, names, pi=True, reverse=True, multi_pi=False):
If True compute sub-awards for multi PI grants, defaults to False
"""
grants = []
co_pis = {}
total_amount = 0.0
subaward_amount = 0.0
for grant in input_grants:
Expand All @@ -409,6 +410,11 @@ def filter_grants(input_grants, names, pi=True, reverse=True, multi_pi=False):
elif multi_pi:
grant["subaward_amount"] = person.get("subaward_amount", 0.0)
grant["multi_pi"] = any(gets(grant["team"], "subaward_amount"))
if co_pis.get(person.get("name")):
current_amount = co_pis[person.get("name")]
co_pis[person.get("name")] = current_amount + person.get("subaward_amount", 0.0)
else:
co_pis[person.get("name")] = person.get("subaward_amount", 0.0)
else:
if person["position"].lower() == "pi":
continue
Expand All @@ -422,7 +428,7 @@ def filter_grants(input_grants, names, pi=True, reverse=True, multi_pi=False):
grant["me"] = person
grants.append(grant)
grants.sort(key=ene_date_key, reverse=reverse)
return grants, total_amount, subaward_amount
return grants, total_amount, subaward_amount, co_pis


def awards_grants_honors(p):
Expand Down
1 change: 1 addition & 0 deletions requirements/run.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ xonsh
rever
openpyxl
nameparser
python-dateutil
Binary file added tests/outputs/recent-collabs/coa_table.xlsx
Binary file not shown.
19 changes: 17 additions & 2 deletions tests/test_builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,12 @@
"preslist",
"reimb",
"figure",
# "review-man",
"recent-collabs"
]

xls_check = ("B17", "B20", "B36")
recent_collabs_xlsx_check = ["A51", "B51", "C51"]


def prep_figure():
Expand Down Expand Up @@ -49,7 +52,7 @@ def test_builder(bm, make_db):
prep_figure()
if bm == "html":
os.makedirs("templates/static", exist_ok=True)
if bm == "reimb":
if bm == "reimb" or bm == "recent-collabs":
subprocess.run(["regolith", "build", bm, "--no-pdf", "--people",
"scopatz"], check=True, cwd=repo )
else:
Expand All @@ -63,13 +66,19 @@ def test_builder(bm, make_db):
if bm == "reimb":
actual = openpyxl.load_workbook(fn1)["T&B"]
actual = [str(actual[b]) for b in xls_check]
elif bm == "recent-collabs":
actual = openpyxl.load_workbook(fn1)["NSF COA Template"]
actual = [str(actual[cell]) for cell in recent_collabs_xlsx_check]
else:
with open(fn1, "r") as f:
actual = f.read()
fn2 = os.path.join(expected_base, bm, root, file)
if bm == "reimb":
expected = openpyxl.load_workbook(fn2)["T&B"]
expected = [str(expected[b]) for b in xls_check]
elif bm == "recent-collabs":
expected = openpyxl.load_workbook(fn2)["NSF COA Template"]
expected = [str(expected[cell]) for cell in recent_collabs_xlsx_check]
else:
with open(fn2, "r") as f:
expected = f.read()
Expand All @@ -93,7 +102,7 @@ def test_builder_python(bm, make_db):
prep_figure()
if bm == "html":
os.makedirs("templates/static", exist_ok=True)
if bm == "reimb":
if bm == "reimb" or bm == "recent-collabs":
main(["build", bm, "--no-pdf", "--people", "scopatz"])
else:
main(["build", bm, "--no-pdf"])
Expand All @@ -106,13 +115,19 @@ def test_builder_python(bm, make_db):
if bm == "reimb":
actual = openpyxl.load_workbook(fn1)["T&B"]
actual = [str(actual[b]) for b in xls_check]
elif bm == "recent-collabs":
actual = openpyxl.load_workbook(fn1)["NSF COA Template"]
actual = [str(actual[cell]) for cell in recent_collabs_xlsx_check]
else:
with open(fn1, "r") as f:
actual = f.read()
fn2 = os.path.join(expected_base, bm, root, file)
if bm == "reimb":
expected = openpyxl.load_workbook(fn2)["T&B"]
expected = [str(expected[b]) for b in xls_check]
elif bm == "recent-collabs":
expected = openpyxl.load_workbook(fn2)["NSF COA Template"]
expected = [str(expected[cell]) for cell in recent_collabs_xlsx_check]
else:
with open(fn2, "r") as f:
expected = f.read()
Expand Down