diff --git a/coldfront/core/allocation/templates/allocation/allocation_detail.html b/coldfront/core/allocation/templates/allocation/allocation_detail.html index 5c4161349..bcb015be7 100644 --- a/coldfront/core/allocation/templates/allocation/allocation_detail.html +++ b/coldfront/core/allocation/templates/allocation/allocation_detail.html @@ -26,6 +26,9 @@

Allocation Detail

+ {% if allocation.project.sf_zone and "tier" in allocation.get_parent_resource.name %} + View more information about your storage on Starfish + {% endif %}
diff --git a/coldfront/core/department/models.py b/coldfront/core/department/models.py index f57a39711..b068b389b 100644 --- a/coldfront/core/department/models.py +++ b/coldfront/core/department/models.py @@ -47,11 +47,9 @@ class Department(Organization): objects = DepartmentSelector() history = HistoricalRecords() - class Meta: proxy = True - def get_projects(self): """Get all projects related to the Department, either directly or indirectly. """ diff --git a/coldfront/core/department/templates/department/department_detail.html b/coldfront/core/department/templates/department/department_detail.html index 8d67acadd..e484b7ae2 100644 --- a/coldfront/core/department/templates/department/department_detail.html +++ b/coldfront/core/department/templates/department/department_detail.html @@ -23,10 +23,10 @@ {{ form.non_field_errors }} {% endif %} -
+

FAS Research Computing Usage Report

- +
diff --git a/coldfront/core/project/management/commands/add_default_project_choices.py b/coldfront/core/project/management/commands/add_default_project_choices.py index 3b435f86a..d2ba4f8ae 100644 --- a/coldfront/core/project/management/commands/add_default_project_choices.py +++ b/coldfront/core/project/management/commands/add_default_project_choices.py @@ -28,9 +28,10 @@ def handle(self, *args, **options): AttributeType.objects.get_or_create(name=attribute_type) for name, attribute_type, has_usage, is_private in ( + ('Starfish Zone', 'Int', False, False), # UBCCR defaults - ('Project ID', 'Text', False, False), - ('Account Number', 'Int', False, True), + # ('Project ID', 'Text', False, False), + # ('Account Number', 'Int', False, True), ): ProjectAttributeType.objects.update_or_create( name=name, diff --git a/coldfront/core/project/models.py b/coldfront/core/project/models.py index a39b28a1b..50ee2425b 100644 --- a/coldfront/core/project/models.py +++ b/coldfront/core/project/models.py @@ -135,6 +135,10 @@ def latest_publication(self): return self.publication_set.order_by('-created')[0] return None + @property + def sf_zone(self): + return self.get_attribute('Starfish Zone') + @property def needs_review(self): """ @@ -215,12 +219,37 @@ def has_perm(self, user, perm): perms = self.user_permissions(user) return perm in perms + def get_attribute(self, name): + """ + Params: + name (str): name of the project attribute type + + Returns: + str: value of the first attribute found for this project with the specified name + """ + attr = self.projectattribute_set.filter(proj_attr_type__name=name).first() + if attr: + return attr.value + return None + + def get_attribute_list(self, name): + """ + Params: + name (str): name of the project attribute type + + Returns: + list: the list of values of the attributes found with specified name + """ + attr = self.projectattribute_set.filter(proj_attr_type__name=name) + return [a.value for a in attr] + def __str__(self): return self.title def natural_key(self): return (self.title,) + self.pi.natural_key() + class ProjectAdminComment(TimeStampedModel): """ A project admin comment is a comment that an admin can make on a project. diff --git a/coldfront/core/project/templates/project/project_detail.html b/coldfront/core/project/templates/project/project_detail.html index 59849672a..6c235414d 100644 --- a/coldfront/core/project/templates/project/project_detail.html +++ b/coldfront/core/project/templates/project/project_detail.html @@ -92,7 +92,11 @@

Project Allo
{% if storage_allocations %} -

Storage

+

Storage   + {% if project.sf_zone %} + View detailed storage allocation information on Starfish + {% endif %} +

diff --git a/coldfront/core/project/views.py b/coldfront/core/project/views.py index 80550d652..71d221c46 100644 --- a/coldfront/core/project/views.py +++ b/coldfront/core/project/views.py @@ -670,7 +670,7 @@ def post(self, request, *args, **kwargs): if 'coldfront.plugins.ldap' in settings.INSTALLED_APPS: try: - ldap_conn.add_member_to_group( + ldap_conn.add_user_to_group( user_obj.username, project_obj.title, ) logger.info( diff --git a/coldfront/core/utils/common.py b/coldfront/core/utils/common.py index 66207e1a1..64a04246d 100644 --- a/coldfront/core/utils/common.py +++ b/coldfront/core/utils/common.py @@ -36,6 +36,11 @@ def write(self, value): """Write the value by returning it, instead of storing in a buffer.""" return value +def uniques_and_intersection(list1, list2): + intersection = list(set(list1) & set(list2)) + list1_unique = list(set(list1) - set(list2)) + list2_unique = list(set(list2) - set(list1)) + return (list1_unique, intersection, list2_unique) def su_login_callback(user): """Only superusers are allowed to login as other users diff --git a/coldfront/plugins/fasrc/management/commands/id_import_new_allocations.py b/coldfront/plugins/fasrc/management/commands/id_import_new_allocations.py index 731d4bd71..7d0f292cf 100644 --- a/coldfront/plugins/fasrc/management/commands/id_import_new_allocations.py +++ b/coldfront/plugins/fasrc/management/commands/id_import_new_allocations.py @@ -56,7 +56,7 @@ def handle(self, *args, **options): save_json(result_file, resp_json_by_lab) # Remove allocations for labs not in Coldfront, add those labs to a list - result_json_cleaned, proj_models = match_entries_with_projects(resp_json_by_lab) + result_cleaned, proj_models = match_entries_with_projects(resp_json_by_lab) redash_api = StarFishRedash() allocation_usages = redash_api.return_query_results(query='subdirectory') @@ -67,7 +67,7 @@ def handle(self, *args, **options): project.status = ProjectStatusChoice.objects.get(name='Active') project.save() - for lab, allocations in result_json_cleaned.items(): + for lab, allocations in result_cleaned.items(): project = proj_models.get(title=lab) for entry in allocations: lab_name = entry['lab'] diff --git a/coldfront/plugins/fasrc/utils.py b/coldfront/plugins/fasrc/utils.py index 7d6f545e1..b1af615f8 100644 --- a/coldfront/plugins/fasrc/utils.py +++ b/coldfront/plugins/fasrc/utils.py @@ -31,9 +31,9 @@ def produce_query_statement(self, vol_type, volumes=None): 'match': "(e:Quota) MATCH (d:ConfigValue {Name: 'Quota.Invocation'})", 'server': 'filesystem', 'validation_query': - "NOT ((e.SizeGB IS null) OR (e.usedBytes = 0 AND e.SizeGB = 1024)) \ - AND (datetime() - duration('P31D') <= datetime(r.DotsLFSUpdateDate)) \ - AND NOT (e.Path IS null)", + "NOT ((e.SizeGB IS null) OR (e.usedBytes = 0 AND e.SizeGB = 1024)) \ + AND (datetime() - duration('P31D') <= datetime(r.DotsLFSUpdateDate)) \ + AND NOT (e.Path IS null)", 'r_updated': 'DotsLFSUpdateDate', 'storage_type': 'Quota', 'usedgb': 'usedGB', @@ -49,10 +49,10 @@ def produce_query_statement(self, vol_type, volumes=None): 'match': "(e:IsilonPath) MATCH (d:ConfigValue {Name: 'IsilonPath.Invocation'})", 'server': 'Isilon', 'validation_query': "r.DotsUpdateDate = d.DotsUpdateDate \ - AND NOT (e.Path =~ '.*/rc_admin/.*')\ - AND (e.Path =~ '.*labs.*')\ - AND (datetime() - duration('P31D') <= datetime(r.DotsUpdateDate)) \ - AND NOT (e.SizeGB = 0)", + AND NOT (e.Path =~ '.*/rc_admin/.*')\ + AND (e.Path =~ '.*labs.*')\ + AND (datetime() - duration('P31D') <= datetime(r.DotsUpdateDate)) \ + AND NOT (e.SizeGB = 0)", 'fs_path':'Path', 'r_updated': 'DotsUpdateDate', 'storage_type': 'Isilon', @@ -253,6 +253,7 @@ def pair_allocations_data(project, quota_dicts): dicts = [ d for d in quota_dicts if d['fs_path'] and allocation.path.lower() == d['fs_path'].lower() + and d['server'] in allocation.resources.first().name ] if dicts: log_message = f'Path-based match: {allocation}, {allocation.path}, {dicts[0]}' diff --git a/coldfront/plugins/ldap/management/commands/id_add_new_projects.py b/coldfront/plugins/ldap/management/commands/id_add_new_projects.py index cef580423..724e60dcd 100644 --- a/coldfront/plugins/ldap/management/commands/id_add_new_projects.py +++ b/coldfront/plugins/ldap/management/commands/id_add_new_projects.py @@ -45,7 +45,8 @@ def handle(self, *args, **kwargs): 'sAMAccountName': groups, 'managedBy': '*' }, attributes=['sAMAccountName']) - ad_group_names = [group['sAMAccountName'][0] for group in ad_groups] # get all AD group names + # get all AD group names + ad_group_names = [group['sAMAccountName'][0] for group in ad_groups] # remove AD groups that already have a corresponding ColdFront project ad_only = list(set(ad_group_names) - set(project_titles)) errortracker = { @@ -72,7 +73,7 @@ def handle(self, *args, **kwargs): added_projects, errortracker = add_new_projects(groupusercollections, errortracker) print(f"added {len(added_projects)} projects: ", [a[0] for a in added_projects]) print("errs: ", errortracker) - logger.warning(errortracker) + logger.warning("errors: %s", errortracker) not_added = [ {'title': i, 'info': k} for k, v in errortracker.items() for i in v ] diff --git a/coldfront/plugins/ldap/utils.py b/coldfront/plugins/ldap/utils.py index 03b740c2f..608201583 100644 --- a/coldfront/plugins/ldap/utils.py +++ b/coldfront/plugins/ldap/utils.py @@ -12,7 +12,9 @@ from ldap3.extend.microsoft.addMembersToGroups import ad_add_members_to_groups from ldap3.extend.microsoft.removeMembersFromGroups import ad_remove_members_from_groups -from coldfront.core.utils.common import import_from_settings +from coldfront.core.utils.common import ( + import_from_settings, uniques_and_intersection +) from coldfront.core.field_of_science.models import FieldOfScience from coldfront.core.utils.fasrc import ( id_present_missing_users, @@ -32,8 +34,7 @@ username_ignore_list = import_from_settings('username_ignore_list', []) class LDAPConn: - """ - LDAP connection object + """LDAP connection object """ def __init__(self, test=False): @@ -159,35 +160,34 @@ def return_group_by_name(self, groupname, return_as='dict'): raise ValueError("no groups returned") return group[0] - def add_member_to_group(self, user_name, group_name): - # get group + def add_user_to_group(self, user_name, group_name): group = self.return_group_by_name(group_name) - # get user - try: - user = self.return_user_by_name(user_name) - except ValueError as e: - raise e + user = self.return_user_by_name(user_name) + self.add_member_to_group(user, group) + + def add_group_to_group(self, group_name, parent_group_name): + group = self.return_group_by_name(group_name) + parent_group = self.return_group_by_name(parent_group_name) + self.add_member_to_group(group, parent_group) + + def add_member_to_group(self, member, group): group_dn = group['distinguishedName'] - user_dn = user['distinguishedName'] + member_dn = member['distinguishedName'] try: - result = ad_add_members_to_groups(self.conn, [user_dn], group_dn, fix=True) + result = ad_add_members_to_groups( + self.conn, [member_dn], group_dn, fix=True) except Exception as e: raise e return result def remove_member_from_group(self, user_name, group_name): # get group - try: - group = self.return_group_by_name(group_name) - except ValueError as e: - raise e + group = self.return_group_by_name(group_name) # get user - try: - user = self.return_user_by_name(user_name) - except ValueError as e: - raise e + user = self.return_user_by_name(user_name) if user['gidNumber'] == group['gidNumber']: - raise ValueError("group is user's primary group - please contact FASRC support to remove this user from your group.") + raise ValueError( + "Group is user's primary group. Please contact FASRC support to remove this user from your group.") group_dn = group['distinguishedName'] user_dn = user['distinguishedName'] try: @@ -327,13 +327,6 @@ def format_template_assertions(attr_search_dict, search_operator='and'): search_filter = f'({match_operator[search_operator]}'+search_filter+')' return search_filter -def uniques_and_intersection(list1, list2): - intersection = list(set(list1) & set(list2)) - list1_unique = list(set(list1) - set(list2)) - list2_unique = list(set(list2) - set(list1)) - return (list1_unique, intersection, list2_unique) - - def is_string(value): return isinstance(value, str) @@ -348,7 +341,7 @@ def sort_dict_on_conditional(dict1, condition): def cleaned_membership_query(proj_membs_mans): search_errors, proj_membs_mans = sort_dict_on_conditional(proj_membs_mans, is_string) if search_errors: - logger.error('could not return members and manager for some groups:\n%s', + logger.error('could not return members and manager for some groups: %s', search_errors) return proj_membs_mans, search_errors diff --git a/coldfront/plugins/sftocf/management/commands/update_zones.py b/coldfront/plugins/sftocf/management/commands/update_zones.py new file mode 100644 index 000000000..1ff6cf026 --- /dev/null +++ b/coldfront/plugins/sftocf/management/commands/update_zones.py @@ -0,0 +1,162 @@ +"""Add, update, and remove Starfish zones +- Add zones/zone paths + - Collect all projects that have active allocations on Starfish + - separate projects by whether a zone with the same name currently exists + - Create zones for all projects that don’t yet exist + - For the projects that do have zones, ensure that the corresponding zone: + - has the project AD group in “managing_groups” + - has all the allocation paths associated with the project +- Remove zones/zone paths + - Collect all projects with allocations that were deactivated since the last successful run of the DjangoQ task, or in the past week + - If the project no longer has any active allocations on Starfish, remove the zone +""" +import logging +from requests.exceptions import HTTPError + +from django.core.management.base import BaseCommand + +from coldfront.core.project.models import Project, ProjectAttributeType +from coldfront.plugins.sftocf.utils import StarFishServer + +logger = logging.getLogger(__name__) +class Command(BaseCommand): + help = 'Add, update, and remove Starfish zones based on Coldfront projects' + + def add_arguments(self, parser): + parser.add_argument( + '--dry-run', + action='store_true', + help='Do not make any changes to Starfish, just print what changes would be slated', + ) + + def handle(self, *args, **options): + dry_run = options['dry_run'] + if dry_run: + print('DRY RUN') + + report = { + 'dry_run': dry_run, + 'deleted_zones': [], + 'created_zones': [], + 'allocations_missing_paths': [], + 'added_zone_ids': [], + 'updated_zone_paths': [], + 'updated_zone_groups': [] + } + + sf = StarFishServer() + starfish_zone_attr_type = ProjectAttributeType.objects.get(name='Starfish Zone') + # collect all projects that have active allocations on Starfish + projects_with_allocations = Project.objects.filter( + status__name='Active', + allocation__status__name='Active', + allocation__resources__in=sf.get_corresponding_coldfront_resources(), + title__in=sf.get_groups() # confirm the projects have groups in Starfish + ).distinct() + + projects_with_zones = projects_with_allocations.filter( + projectattribute__proj_attr_type=starfish_zone_attr_type, + ) + # for the projects that do have zones, ensure that its zone: + sf_cf_vols = sf.get_volumes_in_coldfront() + for project in projects_with_zones: + zone_id = project.sf_zone + zone = sf.get_zones(zone_id) + + # has all the allocation paths associated with the project + update_paths = zone['paths'] + storage_allocations = project.allocation_set.filter( + status__name='Active', + resources__in=sf.get_corresponding_coldfront_resources(), + ) + zone_paths_not_in_cf = [p for p in zone['paths'] if p.split(':')[0] not in sf_cf_vols] + # don't update if any paths are missing + missing_paths = False + for a in storage_allocations: + if a.path == '': + missing_paths = True + report['allocations_missing_paths'].append(a.pk) + logger.error('Allocation %s (%s) is missing a path; cannot update zone until this is fixed', + a.pk, a) + if missing_paths: + continue + paths = [f'{a.resources.first().name.split("/")[0]}:{a.path}' for a in storage_allocations] + zone_paths_not_in_cf + if not set(paths) == set(zone['paths']): + if not dry_run: + update_paths['paths'] = paths + sf.update_zone(zone['name'], paths=update_paths) + report['updated_zone_paths'].append({ + 'zone': zone['name'], + 'old_paths': zone['paths'], + 'new_paths': paths, + }) + + # has the project AD group in “managing_groups” + update_groups = zone['managing_groups'] + zone_group_names = [g['groupname'] for g in update_groups] + if project.title not in zone_group_names: + if not dry_run: + update_groups.append({'groupname': project.title}) + sf.update_zone(zone, managing_groups=update_groups) + report['updated_zone_groups'].append({ + 'zone': zone['name'], + 'old_groups': zone_group_names, + 'new_groups': zone_group_names + [project.title], + }) + + # if project lacks "Starfish Zone" attribute, create or update the zone and save zone id to ProjectAttribute "Starfish Zone" + projects_without_zones = projects_with_allocations.exclude( + projectattribute__proj_attr_type=starfish_zone_attr_type, + ) + for project in projects_without_zones: + if not dry_run: + try: + zone = sf.zone_from_project(project) + report['created_zones'].append(project.title) + except HTTPError as e: + if e.response.status_code == 409: + logger.warning('zone for %s already exists; adding zoneid to Project and breaking', project.title) + zone = sf.get_zone_by_name(project.title) + report['added_zone_ids'].append([project.title, zone['id']]) + elif e.response.status_code == 402: + logger.error('zone quota reached; can no longer add any zones.') + continue + else: + err = f'unclear error prevented creation of zone for project {project.title}. error: {e.response}' + logger.error(err) + print(err) + continue + except ValueError as e: + err = f"error encountered. If no groups returned, LDAP group doesn't exist: {e}, {project.title}" + logger.error(err) + print(err) + continue + project.projectattribute_set.get_or_create( + proj_attr_type=starfish_zone_attr_type, + value=zone['id'], + ) + else: + report['created_zones'].append(project.title) + + # check whether to delete zones of projects with no active SF storage allocations + potential_delete_zone_attr_projs = Project.objects.filter( + projectattribute__proj_attr_type__name='Starfish Zone' + ).exclude( + title__in=[p.title for p in projects_with_allocations] + ) + for project in potential_delete_zone_attr_projs: + print(project, project.pk) + zone = sf.get_zones(project.sf_zone) + zone_paths_not_in_cf = [p for p in zone['paths'] if p.split(':')[0] not in sf_cf_vols] + # delete any zones that have no paths + if not zone_paths_not_in_cf: + if not dry_run: + sf.delete_zone(zone['id']) + # delete projectattribute + project.projectattribute_set.get( + proj_attr_type=starfish_zone_attr_type, + ).delete() + report['deleted_zones'].append(zone['name']) + continue + print(report) + logger.warning(report) diff --git a/coldfront/plugins/sftocf/utils.py b/coldfront/plugins/sftocf/utils.py index f6985a033..e583b1ee0 100644 --- a/coldfront/plugins/sftocf/utils.py +++ b/coldfront/plugins/sftocf/utils.py @@ -1,15 +1,19 @@ -import re +import operator from operator import itemgetter from itertools import groupby +from functools import reduce import time import logging from pathlib import Path from datetime import datetime, timedelta import requests +from django.conf import settings +from django.db.models import Q from django.utils import timezone -from coldfront.core.utils.common import import_from_settings +from coldfront.core.utils.common import ( + import_from_settings, uniques_and_intersection) from coldfront.core.utils.fasrc import ( read_json, save_json, @@ -19,6 +23,7 @@ locate_or_create_dirpath, ) from coldfront.core.resource.models import Resource, ResourceAttributeType +from coldfront.core.project.models import Project from coldfront.core.allocation.models import ( Allocation, AllocationUser, @@ -26,10 +31,12 @@ AllocationAttributeUsage, AllocationUserStatusChoice, ) +if 'coldfront.plugins.ldap' in settings.INSTALLED_APPS: + from coldfront.plugins.ldap.utils import LDAPConn DATESTR = datetime.today().strftime('%Y%m%d') -DATAPATH = "./coldfront/plugins/sftocf/data/" -STARFISH_SERVER = import_from_settings('STARFISH_SERVER', '') +DATAPATH = './coldfront/plugins/sftocf/data/' +STARFISH_SERVER = import_from_settings('STARFISH_SERVER', 'starfish') logger = logging.getLogger(__name__) @@ -50,11 +57,83 @@ def call(*args, **kwargs): return call -# Define filter for allocations to be slated for collection here as you see fit -ALLOCATIONS_FOR_COLLECTION = Allocation.objects.filter( - status__name__in=['Active'], - resources__resource_type__name='Storage' -) +def zone_report(): + """Check of SF zone alignment with pipeline specs. + Report on: + Coldfront projects with storage allocations vs. SF zones + AD groups corresponding to SF zones that don't belong to AD group starfish_groups + SF zones have all allocations that correspond to Coldfront project allocations + SF zones that don’t have groups + SF zones that have users as opposed to groups + """ + report = { + 'projects_with_allocations_no_zones': [], + 'zones_with_no_projects': [], + 'zones_with_no_groups': [], + 'zones_with_users': [], + } + # start by getting all zones + server = StarFishServer(STARFISH_SERVER) + # get list of all zones in server + zones = server.get_zones() + + # get all projects with at least one storage allocation + projects = Project.objects.filter( + allocation__status__name__in=['Active', 'Updated', 'Ready for Review'], + allocation__resources__in=server.get_corresponding_coldfront_resources(), + ).distinct() + # check which of these projects have zones + project_titles = [p.title for p in projects] + zone_names = [z['name'] for z in zones] + projs_no_zones, projs_with_zones, zones_no_projs = uniques_and_intersection(project_titles, zone_names) + report['projs_with_zones'] = {p['name']:p['id'] for p in [z for z in zones if z['name'] in projs_with_zones]} + report['projects_with_allocations_no_zones'] = projs_no_zones + report['zones_with_no_projects'] = zones_no_projs + no_group_zones = [z['name'] for z in zones if not z['managing_groups']] + report['zones_with_no_groups'] = no_group_zones + user_zones = [z for z in zones if z['managers']] + report['zones_with_users'] = [ + f"{z['name']}: {z['managers']}" for z in user_zones + ] + report_nums = {k: len(v) for k, v in report.items()} + for r in [report, report_nums]: + print(r) + logger.warning(r) + +def allocation_to_zone(allocation): + """ + 1. Check whether the allocation is in Starfish + 2. If so, check whether a zone exists for the allocation's project. + 3. If not, create a zone for the allocation's project. + 4. Add the allocation to the zone. + """ + server = StarFishServer(STARFISH_SERVER) + resource = allocation.resources.first() + if not any(sf_res in resource.title for sf_res in server.volumes): + return None + project = allocation.project + zone = server.get_zone_by_name(project.title) + if zone: + zone_paths = zone['paths'] + new_path = f"{allocation.resources.first().name.split('/')[0]}:{allocation.path}" + zone_paths.append(new_path) + zone.update_zone(paths=zone_paths) + else: + zone = server.zone_from_project(project.title) + return zone + +def add_zone_group_to_ad(group_name): + if 'coldfront.plugins.ldap' in settings.INSTALLED_APPS: + ldap_conn = LDAPConn() + try: + ldap_conn.add_group_to_group(group_name, 'starfish_users') + except Exception as e: + # no exception if group is already present + # exception if group doesn't exist + error = f'Error adding {group_name} to starfish_users: {e}' + print(error) + logger.warning(error) + raise class StarFishServer: @@ -87,11 +166,89 @@ def get_auth_token(self): def get_volume_names(self): """Generate a list of the volumes available on the server. """ - url = self.api_url + 'storage/' - response = return_get_json(url, self.headers) - volnames = [i['name'] for i in response['items']] + response = self.get_volume_attributes() + volnames = [i['vol'] for i in response] return volnames + def get_groups(self): + """get set of group names on starfish""" + url = self.api_url + 'mapping/group/' + response = return_get_json(url, self.headers) + groupnames = {g['name'] for g in response} + return groupnames + + def get_zones(self, zone_id=''): + """Get all zones from the API, or the zone with the corresponding ID + """ + url = self.api_url + f'zone/{zone_id}' + response = return_get_json(url, self.headers) + return response + + def get_zone_by_name(self, zone_name): + """Get a zone by name""" + zones = self.get_zones() + return next((z for z in zones if z['name'] == zone_name), None) + + + def create_zone(self, zone_name, paths, managers, managing_groups): + """Create a zone via the API""" + url = self.api_url + 'zone/' + data = { + "name": zone_name, + "paths": paths, + "managers": managers, + "managing_groups": managing_groups, + } + logger.debug(data) + response = return_post_json(url, data=data, headers=self.headers) + logger.debug(response) + return response + + def delete_zone(self, zone_id, zone_name=None): + """Delete a zone via the API""" + if not zone_id: + zone = self.get_zone_by_name(zone_name) + zone_id = zone['id'] + url = self.api_url + f'zone/{zone_id}' + response = requests.delete(url, headers=self.headers) + return response + + def update_zone(self, zone_name, paths=None, managers=None, managing_groups=None): + """Update a zone via the API""" + zone_data = self.get_zone_by_name(zone_name) + zone_id = zone_data['id'] + url = self.api_url + f'zone/{zone_id}/' + data = {'name': zone_name} + data['paths'] = paths if paths else zone_data['paths'] + data['managers'] = managers if managers else zone_data['managers'] + data['managing_groups'] = managing_groups if managing_groups else zone_data['managing_groups'] + for group in managing_groups: + add_zone_group_to_ad(group['groupname']) + response = return_put_json(url, data=data, headers=self.headers) + return response + + def zone_from_project(self, project_obj): + """Create a zone from a project object""" + zone_name = project_obj.title + paths = [ + f"{a.resources.first().name.split('/')[0]}:{a.path}" + for a in project_obj.allocation_set.filter( + status__name__in=['Active', 'New', 'Updated', 'Ready for Review'], + resources__in=self.get_corresponding_coldfront_resources() + ) + if a.path + ] + managers = [f'{project_obj.pi.username}'] + managing_groups = [{'groupname': project_obj.title}] + add_zone_group_to_ad(project_obj.title) + return self.create_zone(zone_name, paths, [], managing_groups) + + def get_corresponding_coldfront_resources(self): + resources = Resource.objects.filter( + reduce(operator.or_,(Q(name__contains=x) for x in self.volumes)) + ) + return resources + def get_volumes_in_coldfront(self): resource_volume_list = [r.name.split('/')[0] for r in Resource.objects.all()] return [v for v in self.volumes if v in resource_volume_list] @@ -109,7 +266,7 @@ def get_tags(self): def get_scans(self): """Collect scans of all volumes in Coldfront """ - volumes = '&'.join([f'volume={volume}' for volume in self.get_volumes_in_coldfront()]) + volumes = '&'.join([f'volume={v}' for v in self.get_volumes_in_coldfront()]) url = self.api_url + 'scan/?' + volumes response_raw = requests.get(url, headers=self.headers) response = response_raw.json() @@ -124,7 +281,8 @@ def get_most_recent_scans(self): volumes = self.get_volumes_in_coldfront() for volume in volumes: latest_time = max( - s['creation_time'] for s in scans['scans'] if s['volume'] == volume + s['creation_time'] for s in scans['scans'] + if s['volume'] == volume ) latest_scan = next( s for s in scans['scans'] @@ -208,6 +366,13 @@ def __init__(self, server_name=STARFISH_SERVER): self.base_url = f'https://{server_name}.rc.fas.harvard.edu/redash/api/' self.queries = import_from_settings('REDASH_API_KEYS') + def get_corresponding_coldfront_resources(self): + volumes = [r['volume_name'] for r in self.get_vol_stats()] + resources = Resource.objects.filter( + reduce(operator.or_,(Q(name__contains=x) for x in volumes)) + ) + return resources + def submit_query(self, queryname): """submit a query and return a json of the results. """ @@ -223,7 +388,7 @@ def get_vol_stats(self): k.replace(' ', '_').replace('(','').replace(')','') : v for k, v in d.items() } for d in result] resource_names = [ - re.sub(r'\/.+','',n) for n in Resource.objects.values_list('name',flat=True) + n.split('/')[0] for n in Resource.objects.values_list('name',flat=True) ] result = [r for r in result if r['volume_name'] in resource_names] return result @@ -269,8 +434,7 @@ def post_async_query(self, query, group_by, volpath, qformat='parent_path +aggrs 'humanize_nested': 'false', 'mount_agent': 'None', } - r = requests.post(query_url, params=params, headers=self.headers) - response = r.json() + response = return_post_json(query_url, params=params, headers=self.headers) logger.debug('response: %s', response) return response['query_id'] @@ -294,14 +458,20 @@ def return_get_json(url, headers): response = requests.get(url, headers=headers) return response.json() +def return_put_json(url, data, headers): + response = requests.put(url, json=data, headers=headers) + response.raise_for_status() + return response.json() + +def return_post_json(url, params=None, data=None, headers=None): + response = requests.post(url, params=params, json=data, headers=headers) + response.raise_for_status() + return response.json() + def generate_headers(token): """Generate 'headers' attribute by using the 'token' attribute. """ - headers = { - 'accept': 'application/json', - 'Authorization': 'Bearer {}'.format(token), - } - return headers + return {'accept': 'application/json', 'Authorization': f'Bearer {token}'} class AllocationQueryMatch: @@ -397,9 +567,16 @@ class UsageDataPipelineBase: """Base class for usage data pipeline classes.""" def __init__(self, volume=None): - self.volume = volume - self.resource_volumes = None self.connection_obj = self.return_connection_obj() + if volume: + self.volumes = [volume] + else: + self.volumes = self.connection_obj.get_corresponding_coldfront_resources() + + self.allocations = Allocation.objects.filter( + status__name__in=['Active', 'New', 'Updated', 'Ready for Review'], + resources__in=self.volumes + ) # self.collection_filter = self.set_collection_parameters() self.sf_user_data = self.collect_sf_user_data() self.sf_usage_data = self.collect_sf_usage_data() @@ -425,12 +602,7 @@ def clean_collected_data(self): logger.debug('allocation_usage:\n%s', self.sf_usage_data) # limit allocations to those in the volumes collected - vols_collected = {e['volume'] for e in self.sf_usage_data} - searched_resources = [Resource.objects.get(name__contains=vol) for vol in vols_collected] - allocations = Allocation.objects.filter( - resources__in=searched_resources, - status__name__in=['Active', 'New', 'Updated', 'Ready for Review'] - ).prefetch_related('project','allocationattribute_set', 'allocationuser_set') + allocations = self.allocations.prefetch_related('project','allocationattribute_set', 'allocationuser_set') allocationquerymatch_objects = match_allocations_with_usage_entries( allocations, self.sf_user_data, self.sf_usage_data ) @@ -541,10 +713,8 @@ def return_connection_obj(self): def collect_sf_user_data(self): """Collect starfish data using the Redash API. Return the results.""" - # 1. grab data from redash - resource_volume_list = [r.name.split('/')[0] for r in Resource.objects.all()] user_usage = self.connection_obj.return_query_results( - query='path_usage_query', volumes=resource_volume_list + query='path_usage_query', volumes=self.volumes ) for d in user_usage: d['username'] = d.pop('user_name') @@ -553,9 +723,8 @@ def collect_sf_user_data(self): return user_usage def collect_sf_usage_data(self): - resource_volume_list = [r.name.split('/')[0] for r in Resource.objects.all()] allocation_usage = self.connection_obj.return_query_results( - query='subdirectory', volumes=resource_volume_list + query='subdirectory', volumes=self.volumes ) for d in allocation_usage: d['username'] = d.pop('user_name') @@ -569,6 +738,7 @@ class RESTDataPipeline(UsageDataPipelineBase): def return_connection_obj(self): return StarFishServer() + @record_process def produce_lab_dict(self): """Create dict of lab/volume combinations to collect and the volumes associated with them. @@ -582,7 +752,7 @@ def produce_lab_dict(self): Structured as follows: 'lab_name': [('volume', 'path'), ('volume', 'path')] """ - pr_objs = ALLOCATIONS_FOR_COLLECTION.only('id', 'project') + pr_objs = self.allocations.only('id', 'project') labs_resources = {allocation.project.title: [] for allocation in pr_objs} for allocation in pr_objs: proj_name = allocation.project.title @@ -590,9 +760,11 @@ def produce_lab_dict(self): if resource: vol_name = resource.name.split('/')[0] else: - print("no resource for allocation owned by", proj_name) + message = f'no resource for allocation owned by {proj_name}' + print(message) + logger.error(message) continue - if self.volume and resource != self.volume: + if resource not in self.volumes: continue if allocation.path: labs_resources[proj_name].append((vol_name, allocation.path)) @@ -620,7 +792,7 @@ def check_volume_collection(self, lr): filepaths = [] to_collect = [] labs_resources = [(l, res) for l, r in lr.items() for res in r] - logger.debug("labs_resources:%s", labs_resources) + logger.debug('labs_resources:%s', labs_resources) yesterdaystr = (datetime.today()-timedelta(1)).strftime("%Y%m%d") dates = [yesterdaystr, DATESTR] @@ -642,10 +814,12 @@ def clean_dirs_data(self, data): """Clean sequence for the data produced from the usage query. """ data = [d for d in data if d['username'] != 'root'] + items_to_pop = ['physical_nlinks_size_sum', 'rec_aggrs', 'fn', 'count', + 'physical_nlinks_size_sum_hum', 'size_sum_hum', 'volume_display_name'] for entry in data: # entry['size_sum'] = entry['rec_aggrs']['size'] # entry['full_path'] = entry['parent_path']+'/'+entry['fn'] - for item in ['physical_nlinks_size_sum', 'rec_aggrs', 'fn', 'count', 'physical_nlinks_size_sum_hum', 'size_sum_hum', 'volume_display_name']: + for item in items_to_pop: entry.pop(item, None) # remove any directory that is a subdirectory of a directory owned by the same user return data @@ -662,7 +836,7 @@ def collect_sf_user_data(self): for volume in vols: # volumepath = svp["volumes"][volume] projects = [t for t in to_collect if t[1] == volume] - logger.debug("vol: %s\nto_collect_subset: %s", volume, projects) + logger.debug('vol: %s\nto_collect_subset: %s', volume, projects) ### OLD METHOD ### for tup in projects: @@ -716,10 +890,12 @@ def collect_sf_usage_data(self): vol_set = {i[1] for i in lab_res} vols = [vol for vol in vol_set if vol in svp['volumes']] entries = [] + items_to_remove = ['size_sum_hum', 'rec_aggrs', 'physical_nlinks_size_sum', + 'physical_nlinks_size_sum_hum', 'volume_display_name', 'count', 'fn'] for volume in vols: - volumepath = svp["volumes"][volume] + volumepath = svp['volumes'][volume] projects = [t for t in lab_res if t[1] == volume] - logger.debug("vol: %s\nto_collect_subset: %s", volume, projects) + logger.debug('vol: %s\nto_collect_subset: %s', volume, projects) ### OLD METHOD ### for tup in projects: @@ -753,27 +929,46 @@ def collect_sf_usage_data(self): 'project': p, 'date': DATESTR, }) - for item in ['count', 'size_sum_hum','rec_aggrs','fn', 'volume_display_name', 'physical_nlinks_size_sum', 'physical_nlinks_size_sum_hum']: + for item in items_to_remove: entry.pop(item) entries.append(entry) return entries -def pull_resource_data(): +def pull_resource_data(source='rest_api'): """Pull data from starfish and save to ResourceAttribute objects""" - starfish_redash = StarFishRedash(STARFISH_SERVER) - volumes = starfish_redash.get_vol_stats() - volumes = [ - { - 'name': vol['volume_name'], - 'attrs': { - 'capacity_tb': vol['capacity_TB'], - 'free_tb': vol['free_TB'], - 'file_count': vol['regular_files'], + if source == 'rest_api': + sf = StarFishServer(STARFISH_SERVER) + volumes = sf.get_volume_attributes() + volumes = [ + { + 'name': vol['vol'], + 'attrs': { + 'capacity_tb': vol['total_capacity']/(1024**4), + 'free_tb': vol['free_space']/(1024**4), + 'file_count': vol['number_of_files'], + } } - } - for vol in volumes - ] + for vol in volumes + ] + + elif source == 'redash': + sf = StarFishRedash(STARFISH_SERVER) + volumes = sf.get_vol_stats() + volumes = [ + { + 'name': vol['volume_name'], + 'attrs': { + 'capacity_tb': vol['capacity_TB'], + 'free_tb': vol['free_TB'], + 'file_count': vol['regular_files'], + } + } + for vol in volumes + ] + else: + raise ValueError('source must be "rest_api" or "redash"') + # collect user and lab counts, allocation sizes for each volume res_attr_types = ResourceAttributeType.objects.all() @@ -781,8 +976,9 @@ def pull_resource_data(): resource = Resource.objects.get(name__contains=volume['name']) for attr_name, attr_val in volume['attrs'].items(): - attr_type_obj = res_attr_types.get(name=attr_name) - resource.resourceattribute_set.update_or_create( - resource_attribute_type=attr_type_obj, - defaults={'value': attr_val} - ) + if attr_val: + attr_type_obj = res_attr_types.get(name=attr_name) + resource.resourceattribute_set.update_or_create( + resource_attribute_type=attr_type_obj, + defaults={'value': attr_val} + ) diff --git a/coldfront/plugins/slurm/management/commands/slurm_sync.py b/coldfront/plugins/slurm/management/commands/slurm_sync.py index 8531eaf68..0b7af1dce 100644 --- a/coldfront/plugins/slurm/management/commands/slurm_sync.py +++ b/coldfront/plugins/slurm/management/commands/slurm_sync.py @@ -41,6 +41,7 @@ def _cluster_from_dump(self, cluster, file=None): slurm_cluster = SlurmCluster.new_from_stream(fh) except SlurmError as e: logger.error('Failed to dump Slurm cluster %s: %s', cluster, e) + raise slurm_cluster.pull_fairshares() return slurm_cluster
Resource Name