sbillinge · dragonyanglong · Jan 3, 2020 · Jan 3, 2020 · Jan 3, 2020 · Jan 5, 2020
diff --git a/.gitignore b/.gitignore
@@ -22,6 +22,7 @@ var/
 *.egg-info/
 .installed.cfg
 *.egg
+.idea
 
 # PyInstaller
 #  Usually these files are written by a python script from a template

diff --git a/.travis.yml b/.travis.yml
@@ -8,7 +8,7 @@ env:
 
 matrix:
   include:
-    - python: 3.6
+    - python: 3.8
 
 install:
   # Install conda

diff --git a/news/test.rst b/news/test.rst
diff --git a/regolith/builder.py b/regolith/builder.py
@@ -12,6 +12,7 @@
 from regolith.builders.resumebuilder import ResumeBuilder
 from regolith.builders.cpbuilder import CPBuilder
 from regolith.builders.figurebuilder import FigureBuilder
+from regolith.builders.coabuilder import RecentCollaboratorsBuilder
 
 
 BUILDERS = {
@@ -28,6 +29,7 @@
     "preslist": PresListBuilder,
     "reimb": ReimbursementBuilder,
     "figure": FigureBuilder,
+    "recent-collabs": RecentCollaboratorsBuilder,
 }
 
 

diff --git a/regolith/builders/coabuilder.py b/regolith/builders/coabuilder.py
@@ -0,0 +1,227 @@
+"""Builder for Recent Collaborators."""
+
+import datetime as dt
+import os
+import sys
+import openpyxl
+from copy import copy
+from operator import itemgetter
+from dateutil.relativedelta import relativedelta
+from nameparser import HumanName
+
+from regolith.builders.basebuilder import BuilderBase
+from regolith.dates import month_to_int
+from regolith.sorters import doc_date_key, ene_date_key, position_key
+from regolith.tools import all_docs_from_collection, filter_publications, \
+    month_and_year, fuzzy_retrieval, is_since
+
+
+NUM_MONTHS = 48
+
+def mdy_date(month, day, year, **kwargs):
+    if isinstance(month, str):
+        month = month_to_int(month)
+    return dt.date(year, month, day)
+
+
+def mdy(month, day, year, **kwargs):
+    return "{}/{}/{}".format(
+        str(month_to_int(month)).zfill(2), str(day).zfill(2), str(year)[-2:]
+    )
+
+
+class RecentCollaboratorsBuilder(BuilderBase):
+    """Build recent collaborators from database entries"""
+
+    btype = "recent-collabs"
+    needed_dbs = ['citations', 'people', 'contacts', 'institutions']
+
+    def __init__(self, rc):
+        super().__init__(rc)
+        self.template = os.path.join(
+            os.path.dirname(os.path.dirname(__file__)), "templates", "coa_template_nsf.xlsx"
+        )
+        self.template2 = os.path.join(
+            os.path.dirname(os.path.dirname(__file__)), "templates", "coa_template_doe.xlsx"
+        )
+        self.cmds = ["excel"]
+
+    def construct_global_ctx(self):
+        super().construct_global_ctx()
+        gtx = self.gtx
+        rc = self.rc
+
+        gtx["people"] = sorted(
+            all_docs_from_collection(rc.client, "people"),
+            key=position_key,
+            reverse=True,
+        )
+        gtx["contacts"] = sorted(
+            all_docs_from_collection(rc.client, "contacts"),
+            key=position_key,
+            reverse=True,
+        )
+        gtx["institutions"] = all_docs_from_collection(rc.client,
+                                                       "institutions")
+        gtx["citations"] = all_docs_from_collection(rc.client, "citations")
+        gtx["all_docs_from_collection"] = all_docs_from_collection
+
+    def excel(self):
+        rc = self.rc
+        gtx = self.gtx
+        # if --to is provided:
+        # use self.rc.to_date as the endpoint and find every publications within
+        # NUM_MONTHS months of the to_date date
+        # Otherwise: find every publication within NUM_MONTHS months from today.
+        if isinstance(self.rc.to_date, str):
+            since_date = dt.datetime.strptime(self.rc.to_date, '%Y-%m-%d').date() - relativedelta(months=NUM_MONTHS)
+        else:
+            since_date = dt.date.today() - relativedelta(months=NUM_MONTHS)
+        if isinstance(self.rc.people, str):
+            self.rc.people = [self.rc.people]
+        person = fuzzy_retrieval(all_docs_from_collection(rc.client, "people"),
+                                 ['aka', 'name', '_id'], self.rc.people[0],
+                                 case_sensitive=False)
+        if not person:
+            sys.exit("please rerun specifying --people PERSON")
+        person_inst = person.get("employment")[0]["organization"]
+        person_inst = fuzzy_retrieval(all_docs_from_collection(
+            rc.client, "institutions"), ["name", "aka", "_id"],
+            person_inst)
+        person_inst_name = person_inst.get("name")
+
+        for p in self.gtx["people"]:
+            if p["_id"] == person["_id"]:
+                my_names = frozenset(p.get("aka", []) + [p["name"]])
+                pubs = filter_publications(self.gtx["citations"], my_names,
+                                           reverse=True, bold=False)
+                my_collabs = []
+                for pub in pubs:
+                    if is_since(pub.get("year"), since_date.year,
+                                pub.get("month", 1), since_date.month):
+                        if not pub.get("month"):
+                            print("WARNING: {} is missing month".format(
+                                pub["_id"]))
+                        if pub.get("month") == "tbd".casefold():
+                            print("WARNING: month in {} is tbd".format(
+                                pub["_id"]))
+
+                        my_collabs.extend([collabs for collabs in
+                                           [names for names in
+                                            pub.get('author', [])]])
+                people, institutions = [], []
+                for collab in my_collabs:
+                    collab_person = fuzzy_retrieval(all_docs_from_collection(
+                        rc.client, "people"),
+                        ["name", "aka", "_id"],
+                        collab)
+                    if not collab_person:
+                        collab_person = fuzzy_retrieval(all_docs_from_collection(
+                            rc.client, "contacts"),
+                            ["name", "aka", "_id"], collab)
+                        if not collab_person:
+                            print(
+                                "WARNING: {} not found in contacts. Check aka".format(
+                                   collab))
+                        else:
+                            people.append(collab_person)
+                            inst = fuzzy_retrieval(all_docs_from_collection(
+                                rc.client, "institutions"),
+                                ["name", "aka", "_id"],
+                                collab_person["institution"])
+                            if inst:
+                                institutions.append(inst["name"])
+                            else:
+                                institutions.append(collab_person.get("institution", "missing"))
+                                print("WARNING: {} missing from institutions".format(
+                                       collab_person["institution"]))
+                    else:
+                        people.append(collab_person)
+                        pinst = collab_person.get("employment",
+                                           [{"organization": "missing"}])[
+                            0]["organization"]
+                        inst = fuzzy_retrieval(all_docs_from_collection(
+                            rc.client, "institutions"), ["name", "aka", "_id"],
+                            pinst)
+                        if inst:
+                            institutions.append(inst["name"])
+                        else:
+                            institutions.append(pinst)
+                            print(
+                                "WARNING: {} missing from institutions".format(
+                                    pinst))
+                ppl_names = [(collab_person["name"], i) for
+                             collab_person, i in zip(people, institutions) if
+                             collab_person]
+                ppl = []
+                # reformatting the name in last name, first name
+                for idx in range(len(ppl_names)):
+                    names = ppl_names[idx][0].split()
+                    last_name = names[-1]
+                    first_name = ' '.join(names[:-1])
+                    name_reformatted = ', '.join([last_name, first_name])
+                    ppl.append((name_reformatted, ppl_names[idx][1]))
+                ppl = list(set(ppl))
+                # sorting the ppl list
+                ppl_sorted = sorted(ppl, key=itemgetter(0))
+            emp = p.get("employment", [{"organization": "missing",
+                                        "begin_year": 2019}])
+            emp.sort(key=ene_date_key, reverse=True)
+
+        def apply_cell_style(cell, style):
+            cell.font = style["font"]
+            cell.border = style["border"]
+            cell.fill = style["fill"]
+            cell.alignment = style["alignment"]
+
+        template = self.template
+        num_rows = len(ppl)  # number of rows to add to the excel file
+        wb = openpyxl.load_workbook(template)
+        ws = wb.worksheets[0]
+        ws.delete_rows(52, amount=3) # removing the example rows
+        ws.move_range("A52:E66", rows=num_rows, cols=0, translate=True)
+        style_ref_cell = ws["B51"]
+        template_cell_style = {}
+        template_cell_style["font"] = copy(style_ref_cell.font)
+        template_cell_style["border"] = copy(style_ref_cell.border)
+        template_cell_style["fill"] = copy(style_ref_cell.fill)
+        template_cell_style["alignment"] = copy(style_ref_cell.alignment)
+        col_idx = ["A", "B", "C", "D", "E"]
+        for row in range(1, num_rows + 1):
+            try:
+                ws.unmerge_cells("A{}:E{}".format(row + 51, row + 51))
+            except:
+                pass
+            for idx in range(len(col_idx)):
+                apply_cell_style(ws["{}{}".format(col_idx[idx], row + 51)], template_cell_style)
+            ws["A{}".format(row + 51)].value = "A:"
+            ws["B{}".format(row + 51)].value = ppl_sorted[row - 1][0]
+            ws["C{}".format((row + 51))].value = ppl_sorted[row - 1][1]
+        ws.delete_rows(51)  # deleting the reference row
+        wb.save(os.path.join(self.bldir, "{}_nsf.xlsx".format(person["_id"])))
+
+        template2 = self.template2
+        ppl = []
+        for t in ppl_names:
+            inst = fuzzy_retrieval(
+                all_docs_from_collection(rc.client, "institutions"),
+                ['aka', 'name', '_id'], t[1],
+                case_sensitive=False)
+            if inst:
+                inst_name = inst.get("name","")
+            else:
+                inst_name = t[1]
+            # remove all people who are in the institution of the person
+            if inst_name != person_inst_name:
+                name = HumanName(t[0])
+                ppl.append((name.last, name.first, t[1]))
+        ppl = list(set(ppl))
+        ppl.sort(key = lambda x: x[0])
+        num_rows = len(ppl)  # number of rows to add to the excel file
+        wb = openpyxl.load_workbook(template2)
+        ws = wb.worksheets[0]
+        for row in range(num_rows):
+            ws["A{}".format(row + 8)].value = ppl[row][0]
+            ws["B{}".format(row + 8)].value = ppl[row][1]
+            ws["C{}".format((row + 8))].value = ppl[row][2]
+        wb.save(os.path.join(self.bldir, "{}_doe.xlsx".format(person["_id"])))
diff --git a/regolith/dates.py b/regolith/dates.py
@@ -40,7 +40,7 @@
     "dec.": 12,
     "december": 12,
     "": 1,
-    "tbd": 1,
+    "tbd": 1
 }
 
 

diff --git a/regolith/templates/coa_template_doe.xlsx b/regolith/templates/coa_template_doe.xlsx
diff --git a/regolith/templates/coa_template_nsf.xlsx b/regolith/templates/coa_template_nsf.xlsx
diff --git a/regolith/templates/recentcollabs.csv b/regolith/templates/recentcollabs.csv
@@ -0,0 +1,4 @@
+%person collaborated with
+{{p['title']}} {{p['name']}}
+
+{{ my_collabs }}
diff --git a/regolith/tools.py b/regolith/tools.py
@@ -393,6 +393,7 @@ def filter_grants(input_grants, names, pi=True, reverse=True, multi_pi=False):
         If True compute sub-awards for multi PI grants, defaults to False
     """
     grants = []
+    co_pis = {}
     total_amount = 0.0
     subaward_amount = 0.0
     for grant in input_grants:
@@ -409,6 +410,11 @@ def filter_grants(input_grants, names, pi=True, reverse=True, multi_pi=False):
         elif multi_pi:
             grant["subaward_amount"] = person.get("subaward_amount", 0.0)
             grant["multi_pi"] = any(gets(grant["team"], "subaward_amount"))
+            if co_pis.get(person.get("name")):
+                current_amount = co_pis[person.get("name")]
+                co_pis[person.get("name")] = current_amount + person.get("subaward_amount", 0.0)
+            else:
+                 co_pis[person.get("name")] = person.get("subaward_amount", 0.0)
         else:
             if person["position"].lower() == "pi":
                 continue
@@ -422,7 +428,7 @@ def filter_grants(input_grants, names, pi=True, reverse=True, multi_pi=False):
                 grant["me"] = person
         grants.append(grant)
     grants.sort(key=ene_date_key, reverse=reverse)
-    return grants, total_amount, subaward_amount
+    return grants, total_amount, subaward_amount, co_pis
 
 
 def awards_grants_honors(p):

diff --git a/requirements/run.txt b/requirements/run.txt
@@ -7,3 +7,4 @@ xonsh
 rever
 openpyxl
 nameparser
+python-dateutil
diff --git a/tests/outputs/recent-collabs/coa_table.xlsx b/tests/outputs/recent-collabs/coa_table.xlsx
diff --git a/tests/test_builders.py b/tests/test_builders.py
@@ -19,9 +19,12 @@
     "preslist",
     "reimb",
     "figure",
+#    "review-man",
+    "recent-collabs"
 ]
 
 xls_check = ("B17", "B20", "B36")
+recent_collabs_xlsx_check = ["A51", "B51", "C51"] 
 
 
 def prep_figure():
@@ -49,7 +52,7 @@ def test_builder(bm, make_db):
         prep_figure()
     if bm == "html":
         os.makedirs("templates/static", exist_ok=True)
-    if bm == "reimb":
+    if bm == "reimb" or bm == "recent-collabs":
         subprocess.run(["regolith", "build", bm, "--no-pdf", "--people",
                         "scopatz"], check=True, cwd=repo )
     else:
@@ -63,13 +66,19 @@ def test_builder(bm, make_db):
                 if bm == "reimb":
                     actual = openpyxl.load_workbook(fn1)["T&B"]
                     actual = [str(actual[b]) for b in xls_check]
+                elif bm == "recent-collabs":
+                    actual = openpyxl.load_workbook(fn1)["NSF COA Template"]
+                    actual = [str(actual[cell]) for cell in recent_collabs_xlsx_check]
                 else:
                     with open(fn1, "r") as f:
                         actual = f.read()
                 fn2 = os.path.join(expected_base, bm, root, file)
                 if bm == "reimb":
                     expected = openpyxl.load_workbook(fn2)["T&B"]
                     expected = [str(expected[b]) for b in xls_check]
+                elif bm == "recent-collabs":
+                    expected = openpyxl.load_workbook(fn2)["NSF COA Template"]
+                    expected = [str(expected[cell]) for cell in recent_collabs_xlsx_check]
                 else:
                     with open(fn2, "r") as f:
                         expected = f.read()
@@ -93,7 +102,7 @@ def test_builder_python(bm, make_db):
         prep_figure()
     if bm == "html":
         os.makedirs("templates/static", exist_ok=True)
-    if bm == "reimb":
+    if bm == "reimb" or bm == "recent-collabs":
         main(["build", bm, "--no-pdf", "--people", "scopatz"])
     else:
         main(["build", bm, "--no-pdf"])
@@ -106,13 +115,19 @@ def test_builder_python(bm, make_db):
                 if bm == "reimb":
                     actual = openpyxl.load_workbook(fn1)["T&B"]
                     actual = [str(actual[b]) for b in xls_check]
+                elif bm == "recent-collabs":
+                    actual = openpyxl.load_workbook(fn1)["NSF COA Template"]
+                    actual = [str(actual[cell]) for cell in recent_collabs_xlsx_check]
                 else:
                     with open(fn1, "r") as f:
                         actual = f.read()
                 fn2 = os.path.join(expected_base, bm, root, file)
                 if bm == "reimb":
                     expected = openpyxl.load_workbook(fn2)["T&B"]
                     expected = [str(expected[b]) for b in xls_check]
+                elif bm == "recent-collabs":
+                    expected = openpyxl.load_workbook(fn2)["NSF COA Template"]
+                    expected = [str(expected[cell]) for cell in recent_collabs_xlsx_check]
                 else:
                     with open(fn2, "r") as f:
                         expected = f.read()