diff --git a/coldfront/core/allocation/views.py b/coldfront/core/allocation/views.py index f78a11608..27681a291 100644 --- a/coldfront/core/allocation/views.py +++ b/coldfront/core/allocation/views.py @@ -68,7 +68,7 @@ from coldfront.core.project.models import (Project, ProjectPermission, ProjectUserStatusChoice) from coldfront.core.resource.models import Resource -from coldfront.core.utils.common import get_domain_url, import_from_settings +from coldfront.core.utils.common import import_from_settings from coldfront.core.utils.mail import send_allocation_admin_email, send_allocation_customer_email @@ -393,8 +393,7 @@ def post(self, request, *args, **kwargs): ) send_allocation_customer_email( - allocation_obj, 'Allocation Activated', - 'email/allocation_activated.txt', domain_url=get_domain_url(self.request) + allocation_obj, 'Allocation Activated', 'email/allocation_activated.txt' ) if action == 'approve': messages.success(request, 'Allocation Activated!') @@ -425,7 +424,6 @@ def post(self, request, *args, **kwargs): allocation_obj, f'Allocation {allocation_obj.status.name}', f'email/allocation_{allocation_obj.status.name.lower()}.txt', - domain_url=get_domain_url(self.request), ) messages.success(request, f'Allocation {allocation_obj.status.name}!') else: @@ -748,7 +746,6 @@ def form_valid(self, form): allocation_obj, 'New Allocation Request', 'email/new_allocation_request.txt', - domain_url=get_domain_url(self.request), url_path=reverse('allocation-detail', kwargs={'pk': allocation_obj.pk}), other_vars=other_vars, ) @@ -1420,7 +1417,6 @@ def post(self, request, *args, **kwargs): allocation_obj, 'Allocation Renewed', 'email/allocation_renewed.txt', - domain_url=get_domain_url(self.request), ) messages.success(request, 'Allocation renewed successfully') @@ -1946,7 +1942,6 @@ def post(self, request, *args, **kwargs): alloc_change_obj.allocation, 'Allocation Change Denied', 'email/allocation_change_denied.txt', - domain_url=get_domain_url(self.request), ) save_and_redirect = True @@ -2056,7 +2051,6 @@ def post(self, request, *args, **kwargs): alloc_change_obj.allocation, 'Allocation Change Approved', 'email/allocation_change_approved.txt', - domain_url=get_domain_url(self.request), ) message = make_allocation_change_message(alloc_change_obj, 'APPROVED') @@ -2293,7 +2287,6 @@ def post(self, request, *args, **kwargs): 'allocation-change-detail', kwargs={'pk': allocation_change_request_obj.pk}, ), - domain_url=get_domain_url(self.request), other_vars=email_vars, ) return HttpResponseRedirect(reverse('allocation-detail', kwargs={'pk': pk})) diff --git a/coldfront/core/resource/management/commands/add_resource_defaults.py b/coldfront/core/resource/management/commands/add_resource_defaults.py index 4c3758c0c..16e772014 100644 --- a/coldfront/core/resource/management/commands/add_resource_defaults.py +++ b/coldfront/core/resource/management/commands/add_resource_defaults.py @@ -114,7 +114,7 @@ def handle(self, *args, **options): defaults={'value': default_value} ) - quantity_label = "Quantity in TB" + quantity_label = "TB" if default_value == 20: quantity_label += " in 20T increments" diff --git a/coldfront/core/utils/fasrc.py b/coldfront/core/utils/fasrc.py index d63cd7d0b..199532982 100644 --- a/coldfront/core/utils/fasrc.py +++ b/coldfront/core/utils/fasrc.py @@ -2,22 +2,27 @@ """ import os import json +import logging import operator from functools import reduce from datetime import datetime import pandas as pd +from django.conf import settings from django.db.models import Q from django.contrib.auth import get_user_model +from django.urls import reverse from ifxbilling.models import Product from coldfront.core.utils.common import import_from_settings from coldfront.core.project.models import Project from coldfront.core.resource.models import Resource +from coldfront.core.utils.mail import send_allocation_manager_email, build_link +logger = logging.getLogger(__name__) -MISSING_DATA_DIR = './local_data/missing/' - +MISSING_DATA_DIR = import_from_settings('MISSING_DATA_DIR', './local_data/missing/') +DATA_MANAGERS = import_from_settings('DATA_MANAGERS', ['General Manager, Storage Manager']) username_ignore_list = import_from_settings('username_ignore_list', []) groupname_ignore_list = import_from_settings('groupname_ignore_list', []) @@ -29,6 +34,7 @@ def get_quarter_start_end(): quarter = (datetime.today().month-1)//3 return (quarter_starts[quarter], quarter_ends[quarter]) + def sort_by(list1, sorter, how='attr'): """split one list into two on basis of each item's ability to meet a condition Parameters @@ -50,6 +56,7 @@ def sort_by(list1, sorter, how='attr'): raise Exception('unclear sorting method') return is_true, is_false + def select_one_project_allocation(project_obj, resource_obj, dirpath=None): """ Get one allocation for a given project/resource pairing; handle return of @@ -69,13 +76,15 @@ def select_one_project_allocation(project_obj, resource_obj, dirpath=None): allocation_query = project_obj.allocation_set.filter(**filter_vals) if allocation_query.count() == 1: allocation_obj = allocation_query.first() - if allocation_obj.path and dirpath and allocation_obj.path not in dirpath and dirpath not in allocation_obj.path: - return None + if allocation_obj.path and allocation_obj.path != dirpath: + logger.error("directory path mismatch:", allocation_obj.path, dirpath) elif allocation_query.count() < 1: allocation_obj = None elif allocation_query.count() > 1: - allocation_obj = next((a for a in allocation_query if a.path.lower() in dirpath.lower()), - None) + allocation_obj = next( + (a for a in allocation_query if a.path.lower() in dirpath.lower()), + 'MultiAllocationError' + ) return allocation_obj @@ -90,6 +99,7 @@ def determine_size_fmt(byte_num): byte_num /= 1024.0 return(round(byte_num, 3), unit) + def convert_size_fmt(num, target_unit, source_unit='B'): units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB'] diff = units.index(target_unit) - units.index(source_unit) @@ -103,6 +113,7 @@ def convert_size_fmt(num, target_unit, source_unit='B'): num*=1024.0 return round(num, 3) + def get_resource_rate(resource): """find Product with the name provided and return the associated rate""" try: @@ -121,6 +132,58 @@ def get_resource_rate(resource): price = convert_size_fmt(rate_obj.price, 'TB', source_unit=rate_obj.units) return round(price/100, 2) + +def allocation_reaching_capacity_operations(allocation_obj, new_byte_usage): + """if allocation_obj.usage is <80/90% of allocation_obj.limit and new_byte_usage + >80/90% of allocation_obj.limit, send email to pi and data manager. + """ + threshold = None + size_bytes = float(allocation_obj.size_exact) + for limit in [80, 90]: + if ( + allocation_obj.usage_exact/size_bytes < limit/100 + and new_byte_usage/size_bytes > limit/100 + ): + threshold = limit + if threshold: + resource = allocation_obj.get_parent_resource + # send email to pi and data manager + # define: center_name, threshold, resource, allocation_url, request_allocation_url, starfish_url, starfish_docs_url, signature + allocation_pk_dict = {'pk': allocation_obj.pk} + other_vars = { + 'threshold': threshold, + 'project_title': allocation_obj.project.title, + 'allocation_quota': f'{allocation_obj.size} {allocation_obj.get_parent_resource.quantity_label}', + 'resource': resource.name, + 'change_request_url': build_link(reverse('allocation-change', kwargs=allocation_pk_dict)), + } + if ( + 'coldfront.plugins.sftocf' in settings.INSTALLED_APPS + and 'tape' not in resource.name + and 'tier' in resource.name + and allocation_obj.project.sf_zone + ): + other_vars['starfish'] = True + STARFISH_SERVER = import_from_settings('STARFISH_SERVER') + starfish_url = f'https://{STARFISH_SERVER}.rc.fas.harvard.edu' + other_vars['starfish_url'] = starfish_url + other_vars['starfish_docs_url'] = 'https://docs.rc.fas.harvard.edu/kb/starfish-data-management/' + subject = f'Allocation Usage Warning for {allocation_obj.project.title} on {other_vars["resource"]}' + send_allocation_manager_email( + allocation_obj, + subject, + 'email/allocation_usage_high.txt', + url_path=reverse('allocation-detail', kwargs=allocation_pk_dict), + manager_types=DATA_MANAGERS, + other_vars=other_vars + ) + msg = f"email sent for allocation {allocation_obj.pk} - old usage {allocation_obj.usage_exact} ({allocation_obj.usage_exact/size_bytes}%), new usage {new_byte_usage} ({new_byte_usage/size_bytes}%), quota {allocation_obj.size_exact}" + logger.info(msg) + print(msg) + return True + return False + + def id_present_missing_resources(resourceserver_list): """ Collect all Resource entries with resources in param resourceserver_list; @@ -202,6 +265,7 @@ def log_missing(modelname, missing): """ update_csv(missing, MISSING_DATA_DIR, f'missing_{modelname}s.csv') + def slate_for_check(log_entries): """Add an issue encountered during runtime to a CSV for administrative review. @@ -214,6 +278,7 @@ def slate_for_check(log_entries): """ update_csv(log_entries, 'local_data/', 'program_error_checks.csv') + def update_csv(new_entries, dirpath, csv_name, date_update='date'): """Add or update entries in CSV, order CSV by descending date and save. diff --git a/coldfront/core/utils/mail.py b/coldfront/core/utils/mail.py index 1885d0037..2a9ce367e 100644 --- a/coldfront/core/utils/mail.py +++ b/coldfront/core/utils/mail.py @@ -49,6 +49,8 @@ def send_email(subject, body, sender, receiver_list, cc=[]): try: email = EmailMessage(subject, body, sender, receiver_list, cc=cc) + logger.info('Email sent to %s from %s with subject %s', + ','.join(receiver_list+cc), sender, subject) email.send(fail_silently=False) except SMTPException: logger.error('Failed to send email to %s from %s with subject %s', @@ -84,9 +86,9 @@ def build_link(url_path, domain_url=''): def send_allocation_admin_email( allocation_obj, subject, template_name, - url_path='', domain_url='', other_vars=None + url_path='', domain_url='', cc=[], other_vars=None ): - """Send allocation admin emails + """Send allocation-related email to system admins """ url_path = url_path or reverse('allocation-request-list') @@ -101,7 +103,6 @@ def send_allocation_admin_email( ctx['resource'] = resource_name ctx['url'] = url - cc = [] if ctx.get('user'): cc.append(ctx.get('user').email) send_email_template( @@ -113,6 +114,40 @@ def send_allocation_admin_email( cc=cc ) +def send_allocation_manager_email( + allocation_obj, subject, template_name, + url_path='', manager_types=[], domain_url='', other_vars=None +): + """Send allocation-related email to allocation pi, cc'ing managers + """ + url_path = url_path or reverse('allocation-request-list') + + url = build_link(url_path, domain_url=domain_url) + pi = allocation_obj.project.pi + pi_name = f'{pi.first_name} {pi.last_name}' + resource_name = allocation_obj.get_parent_resource + + ctx = email_template_context(other_vars) + ctx['pi_name'] = pi_name + ctx['pi_username'] = f'{pi.username}' + ctx['resource'] = resource_name + ctx['url'] = url + + cc = [] + if manager_types: + managers = allocation_obj.project.projectuser_set.filter( + role__name__in=manager_types) + cc.extend([manager.user.email for manager in managers]) + send_email_template( + f'{subject}: {pi_name} - {resource_name}', + template_name, + ctx, + EMAIL_SENDER, + [pi.email], + cc=cc + ) + + def send_allocation_customer_email( allocation_obj, subject, template_name, url_path='', domain_url='' diff --git a/coldfront/core/utils/tests.py b/coldfront/core/utils/tests.py index 3f0fa4dc8..cb265a940 100644 --- a/coldfront/core/utils/tests.py +++ b/coldfront/core/utils/tests.py @@ -10,15 +10,19 @@ email_template_context, send_allocation_admin_email, send_allocation_customer_email, - CENTER_BASE_URL, build_link, logger ) +from coldfront.core.utils.fasrc import allocation_reaching_capacity_operations @patch('coldfront.core.utils.mail.EMAIL_ENABLED', True) @patch('coldfront.config.email.EMAIL_BACKEND', 'django.core.mail.backends.locmem.EmailBackend') @patch('coldfront.core.utils.mail.EMAIL_SENDER', 'test-admin@coldfront.org') @patch('coldfront.core.utils.mail.EMAIL_TICKET_SYSTEM_ADDRESS', 'tickets@example.org') +@patch('coldfront.core.utils.mail.EMAIL_CENTER_NAME', 'HPC Center') +@patch('coldfront.core.utils.mail.CENTER_BASE_URL', 'https://centerbaseexampleurl.org') +@patch('coldfront.core.utils.mail.EMAIL_SIGNATURE', 'HPC Center Team') +# @patch('coldfront.config.base.INSTALLED_APPS', 'coldfront.plugins') class EmailFunctionsTestCase(TestCase): def setUp(self): @@ -105,14 +109,31 @@ def test_build_link(self): domain_url = 'https://example.com' expected_url = f'{domain_url}{url_path}' self.assertEqual(build_link(url_path, domain_url), expected_url) - self.assertEqual(build_link(url_path), f'{CENTER_BASE_URL}{url_path}') + self.assertEqual(build_link(url_path), f'https://centerbaseexampleurl.org{url_path}') + + def test_allocation_reaching_capacity_operations(self): + allocation_obj = MagicMock() + allocation_obj.pk = 1 + allocation_obj.project.title = 'Test Project' + allocation_obj.project.pi.first_name = 'John' + allocation_obj.project.pi.last_name = 'Doe' + allocation_obj.project.pi.username = 'jdoe' + allocation_obj.project.pi.email = 'jdoe@test_project.edu' + allocation_obj.get_parent_resource.name = 'Test Resource' + allocation_obj.size_exact = 140140566725200 + allocation_obj.usage_exact = 100 + new_usage = 140140566625100 + result = allocation_reaching_capacity_operations(allocation_obj, new_usage) + self.assertEqual(result, True) + self.assertEqual(len(mail.outbox), 1) + print(mail.outbox[0].__dict__) def test_send_allocation_admin_email(self): allocation_obj = MagicMock() allocation_obj.project.pi.first_name = 'John' allocation_obj.project.pi.last_name = 'Doe' allocation_obj.project.pi.username = 'jdoe' - allocation_obj.get_parent_resource = 'Test Resource' + allocation_obj.get_parent_resource.name = 'Test Resource' send_allocation_admin_email(allocation_obj, self.subject, self.template_name) self.assertEqual(len(mail.outbox), 1) diff --git a/coldfront/plugins/fasrc/management/commands/id_import_new_allocations.py b/coldfront/plugins/fasrc/management/commands/id_import_new_allocations.py index 4f933aade..b65beebb4 100644 --- a/coldfront/plugins/fasrc/management/commands/id_import_new_allocations.py +++ b/coldfront/plugins/fasrc/management/commands/id_import_new_allocations.py @@ -76,7 +76,8 @@ def handle(self, *args, **options): resource = Resource.objects.get(name__contains=entry['server']) alloc_obj = select_one_project_allocation(project, resource, dirpath=lab_path) - if alloc_obj is not None: + if alloc_obj == 'MultiAllocationError': + logger.warning('Multiple allocations found for %s %s %s', lab_name, lab_server, lab_path) continue lab_usage_entries = [ i for i in allocation_usages if i['vol_name'] == lab_server diff --git a/coldfront/plugins/fasrc/tasks.py b/coldfront/plugins/fasrc/tasks.py index 1c63b37ec..524356d91 100644 --- a/coldfront/plugins/fasrc/tasks.py +++ b/coldfront/plugins/fasrc/tasks.py @@ -1,6 +1,6 @@ -from coldfront.plugins.fasrc.utils import pull_push_quota_data import logging from django.core import management +from coldfront.plugins.fasrc.utils import pull_push_quota_data def import_quotas(volumes=None): diff --git a/coldfront/plugins/fasrc/utils.py b/coldfront/plugins/fasrc/utils.py index cf5fff796..f1bb3960e 100644 --- a/coldfront/plugins/fasrc/utils.py +++ b/coldfront/plugins/fasrc/utils.py @@ -9,6 +9,7 @@ log_missing, read_json, save_json, + allocation_reaching_capacity_operations, id_present_missing_projects ) from coldfront.core.resource.models import Resource @@ -182,11 +183,11 @@ def stage_user_member_query(self, groupsearch, pi=False): match_vars = '(u:User)-[r:MemberOf|ManagedBy]-(g:Group) WHERE' return_vars = 'type(r) AS relationship, g.ADManaged_By AS group_manager' if pi: - match_vars = '(g:Group) WITH g MATCH (u:User)\ + match_vars = '(g:Group) WITH g MATCH (u:User) \ WHERE u.ADSamAccountName = g.ADManaged_By AND' return_vars = 'u.ADParentCanonicalName AS path, u.ADDepartment AS department' query = {'statements': [{ - 'statement': f"MATCH {match_vars} (g.ADSamAccountName =~ '({groupsearch})')\ + 'statement': f"MATCH {match_vars} (g.ADSamAccountName =~ '({groupsearch})') \ RETURN \ u.ADgivenName AS first_name, \ u.ADsurname AS last_name, \ @@ -323,6 +324,7 @@ def push_quota_data(result_file): 'no byte_allocation value for allocation %s, lab %s on resource %s', allocation.pk, lab, data_dict['server'] ) + allocation_reaching_capacity_operations(allocation, data_dict['byte_usage']) for k, v in allocation_values.items(): allocation_attr_type_obj = allocation_attribute_types.get(name=k) alloc_attr_obj, _ = allocation.allocationattribute_set.update_or_create( diff --git a/coldfront/plugins/sftocf/management/commands/update_zones.py b/coldfront/plugins/sftocf/management/commands/update_zones.py index 686c08ed3..f563607d4 100644 --- a/coldfront/plugins/sftocf/management/commands/update_zones.py +++ b/coldfront/plugins/sftocf/management/commands/update_zones.py @@ -147,9 +147,13 @@ def handle(self, *args, **options): print(project, project.pk) zone = sf.get_zones(project.sf_zone) zone_paths_not_in_cf = [p for p in zone['paths'] if p.split(':')[0] not in sf_cf_vols] - # delete any zones that have no paths - if not zone_paths_not_in_cf: + # delete any zones that have no paths or belong to inactive projects + if not zone_paths_not_in_cf or project.status.name == 'Archived': if not dry_run: + if project.status.name == 'Archived': + logger.warning('Project %s is archived; deleting zone', project) + elif not zone_paths_not_in_cf: + logger.warning('Zone %s has no paths; deleting', zone['name']) sf.delete_zone(zone['id']) # delete projectattribute project.projectattribute_set.get( diff --git a/coldfront/plugins/sftocf/utils.py b/coldfront/plugins/sftocf/utils.py index e46f24cbf..e8b198138 100644 --- a/coldfront/plugins/sftocf/utils.py +++ b/coldfront/plugins/sftocf/utils.py @@ -22,6 +22,7 @@ save_json, log_missing, determine_size_fmt, + allocation_reaching_capacity_operations, id_present_missing_users, locate_or_create_dirpath, ) @@ -579,19 +580,13 @@ def __init__(self, volume=None): self._allocations = None # self.collection_filter = self.set_collection_parameters() - self.sf_user_data = self.collect_sf_user_data() - self.sf_usage_data = self.collect_sf_usage_data() + self._sf_user_data = None + self._sf_usage_data = None self._allocationquerymatches = None def return_connection_obj(self): raise NotImplementedError - def collect_sf_user_data(self): - raise NotImplementedError - - def collect_sf_usage_data(self): - raise NotImplementedError - @property def allocations(self): if self._allocations: @@ -601,6 +596,7 @@ def allocations(self): status__name__in=allocation_statuses, resources__in=self.connection_obj.get_corresponding_coldfront_resources() ) + return self._allocations @property def allocationquerymatches(self): @@ -612,6 +608,8 @@ def allocationquerymatches(self): allocation_list = [ (a.get_parent_resource.name.split('/')[0], a.path) for a in allocations ] + # if usage is true, collect and update usage + allocation_usage_grouped = {} total_sort_key = itemgetter('path','volume') allocation_usage_grouped = return_dict_of_groupings(self.sf_usage_data, total_sort_key) missing_allocations = [ @@ -646,7 +644,6 @@ def clean_collected_data(self): user_usernames = {d['username'] for d in self.sf_user_data} user_models, missing_usernames = id_present_missing_users(user_usernames) missing_username_list = [d['username'] for d in missing_usernames] - logger.debug('allocation_usage:\n%s', self.sf_usage_data) # identify and remove allocation users that are no longer in the AD group for obj in self.allocationquerymatches: @@ -664,26 +661,30 @@ def clean_collected_data(self): obj.user_usage_entries.remove(i) return self.allocationquerymatches, user_models - def update_coldfront_objects(self, user_models): - """update coldfront objects""" + def update_coldfront_allocation_usage(self): + """update coldfront allocation usage""" allocation_attribute_types = AllocationAttributeType.objects.all() - quota_b_attrtype = allocation_attribute_types.get(name='Quota_In_Bytes') quota_tb_attrtype = allocation_attribute_types.get(name='Storage Quota (TB)') - # 3. iterate across allocations for obj in self.allocationquerymatches: - logger.debug('updating allocation %s %s (path %s)', - obj.lab, obj.volume, obj.allocation.path - ) + allocation_reaching_capacity_operations(obj.allocation, obj.total_usage_entry['total_size']) obj.update_usage_attr(quota_b_attrtype, obj.total_usage_entry['total_size']) obj.update_usage_attr(quota_tb_attrtype, obj.total_usage_tb) - logger.info('allocation usage for allocation %s: %s bytes, %s terabytes', obj.allocation.pk, obj.total_usage_entry['total_size'], obj.total_usage_tb ) + + def update_coldfront_objects(self, user_models, usage=False): + """update coldfront allocation objects""" + # 3. iterate across allocations + if usage: + self.update_coldfront_allocation_usage() + for obj in self.allocationquerymatches: + logger.debug('updating allocation %s %s (path %s)', + obj.lab, obj.volume, obj.allocation.path + ) # identify and remove allocation users that are no longer in the AD group self.zero_out_absent_allocationusers(obj.query_usernames, obj.allocation) - for userdict in obj.user_usage_entries: user = next( u for u in user_models if userdict['username'].lower() == u.username.lower() @@ -716,25 +717,31 @@ def return_connection_obj(self): # 1. grab data from redash return StarFishRedash() - def collect_sf_user_data(self): + @property + def sf_user_data(self): """Collect starfish data using the Redash API. Return the results.""" - user_usage = self.connection_obj.return_query_results( - query='path_usage_query', volumes=self.volumes - ) - for d in user_usage: - d['username'] = d.pop('user_name') - d['volume'] = d.pop('vol_name') - d['path'] = d.pop('lab_path') - return user_usage - - def collect_sf_usage_data(self): - allocation_usage = self.connection_obj.return_query_results( - query='subdirectory', volumes=self.volumes - ) - for d in allocation_usage: - d['username'] = d.pop('user_name') - d['volume'] = d.pop('vol_name') - return allocation_usage + if not self._sf_user_data: + user_usage = self.connection_obj.return_query_results( + query='path_usage_query', volumes=self.volumes + ) + for d in user_usage: + d['username'] = d.pop('user_name') + d['volume'] = d.pop('vol_name') + d['path'] = d.pop('lab_path') + self._sf_user_data = user_usage + return self._sf_user_data + + @property + def sf_usage_data(self): + if not self._sf_usage_data: + allocation_usage = self.connection_obj.return_query_results( + query='subdirectory', volumes=self.volumes + ) + for d in allocation_usage: + d['username'] = d.pop('user_name') + d['volume'] = d.pop('vol_name') + self._sf_usage_data = allocation_usage + return self._sf_usage_data def collect_sf_data_for_lab(self, lab_name, volume_name): """Collect user-level and allocation-level usage data for a specific lab.""" @@ -838,105 +845,111 @@ def items_to_pop(self): return ['size_sum_hum', 'rec_aggrs', 'physical_nlinks_size_sum', 'physical_nlinks_size_sum_hum', 'volume_display_name', 'count', 'fn'] - def collect_sf_user_data(self): + @property + def sf_user_data(self): """Collect starfish data using the REST API. Return the results.""" - # 1. produce dict of all labs to be collected & volumes on which their data is located - lab_res = self.produce_lab_dict() - # 2. produce list of files collected & list of lab/volume/filename tuples to collect - filepaths, to_collect = self.check_volume_collection(lab_res) - # 3. produce set of all volumes to be queried - vol_set = {i[1] for i in to_collect} - vols = [vol for vol in vol_set if vol in svp['volumes']] - for volume in vols: - projects = [t for t in to_collect if t[1] == volume] - logger.debug('vol: %s\nto_collect_subset: %s', volume, projects) - - ### OLD METHOD ### - for tup in projects: - p = tup[0] - filepath = tup[3] - lab_volpath = tup[2] #volumepath[0] if '_l3' not in p else volumepath[1] - logger.debug('filepath: %s lab: %s volpath: %s', filepath, p, lab_volpath) - usage_query = self.connection_obj.create_query( - f'groupname={p} type=f', - 'volume,username,groupname', - f'{volume}:{lab_volpath}', - ) - data = self.return_usage_query_data(usage_query.result) - if data: - contents = [d for d in data if d['username'] != 'root'] - for entry in contents: - # entry['size_sum'] = entry['rec_aggrs']['size'] - # entry['full_path'] = entry['parent_path']+'/'+entry['fn'] - for item in self.items_to_pop: - entry.pop(item, None) - record = { - 'server': self.connection_obj.name, - 'volume': volume, - 'path': lab_volpath, - 'project': p, - 'date': DATESTR, - 'contents': contents, - } - save_json(filepath, record) - filepaths.append(filepath) - - collected_data = [] - for filepath in filepaths: - content = read_json(filepath) - for user in content['contents']: - user.update({ - 'volume': content['volume'], - 'path': content['path'], - 'project': content['project'], - }) - collected_data.append(user) - return collected_data - - def collect_sf_usage_data(self): - """Collect usage data from starfish for all labs in the lab list.""" - # 1. produce dict of all labs to be collected & volumes on which their data is located - lab_res = self.produce_lab_dict() - lab_res = [(k, i[0], i[1]) for k, v in lab_res.items() for i in v] - # 2. produce set of all volumes to be queried - vol_set = {i[1] for i in lab_res} - vols = [vol for vol in vol_set if vol in svp['volumes']] - entries = [] - for volume in vols: - volumepath = svp['volumes'][volume] - projects = [t for t in lab_res if t[1] == volume] - logger.debug('vol: %s\nto_collect_subset: %s', volume, projects) - - ### OLD METHOD ### - for tup in projects: - p = tup[0] - lab_volpath = volumepath[0] if '_l3' not in p else volumepath[1] - logger.debug('lab: %s volpath: %s', p, lab_volpath) - usage_query = self.connection_obj.create_query( - f'groupname={p} type=d depth=1', - 'volume,parent_path,groupname,rec_aggrs.size,fn', - f'{volume}:{lab_volpath}', - qformat='parent_path +aggrs.by_gid', - ) - data = self.return_usage_query_data(usage_query.result) - if data: - if len(data) > 1: - logger.error('too many data entries for %s: %s', p, data) - continue - entry = data[0] - entry.update({ - 'size_sum': entry['rec_aggrs']['size'], - 'full_path': entry['parent_path']+'/'+entry['fn'], - 'server': self.connection_obj.name, - 'volume': volume, - 'path': lab_volpath, - 'project': p, - 'date': DATESTR, + if not self._sf_user_data: + # 1. produce dict of all labs to be collected & volumes on which their data is located + lab_res = self.produce_lab_dict() + # 2. produce list of files collected & list of lab/volume/filename tuples to collect + filepaths, to_collect = self.check_volume_collection(lab_res) + # 3. produce set of all volumes to be queried + vol_set = {i[1] for i in to_collect} + vols = [vol for vol in vol_set if vol in svp['volumes']] + for volume in vols: + projects = [t for t in to_collect if t[1] == volume] + logger.debug('vol: %s\nto_collect_subset: %s', volume, projects) + + ### OLD METHOD ### + for tup in projects: + p = tup[0] + filepath = tup[3] + lab_volpath = tup[2] #volumepath[0] if '_l3' not in p else volumepath[1] + logger.debug('filepath: %s lab: %s volpath: %s', filepath, p, lab_volpath) + usage_query = self.connection_obj.create_query( + f'groupname={p} type=f', + 'volume,username,groupname', + f'{volume}:{lab_volpath}', + ) + data = self.return_usage_query_data(usage_query.result) + if data: + contents = [d for d in data if d['username'] != 'root'] + for entry in contents: + # entry['size_sum'] = entry['rec_aggrs']['size'] + # entry['full_path'] = entry['parent_path']+'/'+entry['fn'] + for item in self.items_to_pop: + entry.pop(item, None) + record = { + 'server': self.connection_obj.name, + 'volume': volume, + 'path': lab_volpath, + 'project': p, + 'date': DATESTR, + 'contents': contents, + } + save_json(filepath, record) + filepaths.append(filepath) + + collected_data = [] + for filepath in filepaths: + content = read_json(filepath) + for user in content['contents']: + user.update({ + 'volume': content['volume'], + 'path': content['path'], + 'project': content['project'], }) - for item in self.items_to_pop: - entry.pop(item) - entries.append(entry) - return entries + collected_data.append(user) + self._sf_user_data = collected_data + return self._sf_user_data + + @property + def sf_usage_data(self): + """Collect usage data from starfish for all labs in the lab list.""" + if not self._sf_usage_data: + # 1. produce dict of all labs to be collected & volumes containing their data + lab_res = self.produce_lab_dict() + lab_res = [(k, i[0], i[1]) for k, v in lab_res.items() for i in v] + # 2. produce set of all volumes to be queried + vol_set = {i[1] for i in lab_res} + vols = [vol for vol in vol_set if vol in svp['volumes']] + entries = [] + for volume in vols: + volumepath = svp['volumes'][volume] + projects = [t for t in lab_res if t[1] == volume] + logger.debug('vol: %s\nto_collect_subset: %s', volume, projects) + + ### OLD METHOD ### + for tup in projects: + p = tup[0] + lab_volpath = volumepath[0] if '_l3' not in p else volumepath[1] + logger.debug('lab: %s volpath: %s', p, lab_volpath) + usage_query = self.connection_obj.create_query( + f'groupname={p} type=d depth=1', + 'volume,parent_path,groupname,rec_aggrs.size,fn', + f'{volume}:{lab_volpath}', + qformat='parent_path +aggrs.by_gid', + ) + data = self.return_usage_query_data(usage_query.result) + if data: + if len(data) > 1: + logger.error('too many data entries for %s: %s', p, data) + continue + entry = data[0] + entry.update({ + 'size_sum': entry['rec_aggrs']['size'], + 'full_path': entry['parent_path']+'/'+entry['fn'], + 'server': self.connection_obj.name, + 'volume': volume, + 'path': lab_volpath, + 'project': p, + 'date': DATESTR, + }) + for item in self.items_to_pop: + entry.pop(item) + entries.append(entry) + self._sf_usage_data = entries + return self._sf_usage_data @receiver(allocation_activate) def update_allocation(sender, **kwargs): diff --git a/coldfront/templates/email/allocation_usage_high.txt b/coldfront/templates/email/allocation_usage_high.txt new file mode 100644 index 000000000..f457763c2 --- /dev/null +++ b/coldfront/templates/email/allocation_usage_high.txt @@ -0,0 +1,12 @@ +This is a notice that usage of {{project_title}}’s allocation on {{resource}} has exceeded {{threshold}}% of its quota of {{allocation_quota}}. +You can view more information about the allocation here: {{url}}. + +To avoid reaching quota capacity, we strongly encourage either reducing allocation usage or requesting an allocation quota increase. +You can request an allocation quota increase here: {{change_request_url}}. +{% if starfish %} +The Starfish platform can assist in identifying allocation directories and files that have not been used recently. You can access Starfish here: {{starfish_url}}. +Some tips for using Starfish to identify files that have not been accessed recently or may no longer be in use are available here: {{starfish_docs_url}} +{% endif %} + +Thank you, +{{signature}} diff --git a/container_startup.sh b/container_startup.sh index ceb7bd4fc..b7d58f5aa 100644 --- a/container_startup.sh +++ b/container_startup.sh @@ -14,5 +14,5 @@ python ./manage.py collectstatic --noinput if [ "$BUILD_ENV" == 'dev' ]; then python ./manage.py runserver 0.0.0.0:80 --insecure else - gunicorn coldfront.config.wsgi:application --bind 0.0.0.0:80 --error-logfile /var/log/gunicorn_errors.log --reload --timeout 144000 --workers=3 + gunicorn coldfront.config.wsgi:application --bind 0.0.0.0:80 --error-logfile /var/log/gunicorn_errors.log --reload --timeout 144000 --workers=5 fi