Skip to content

Commit

Permalink
Merge pull request #327 from fasrc/cp_sfpathcheck
Browse files Browse the repository at this point in the history
Require path match for allocation pipelines
  • Loading branch information
claire-peters authored Aug 22, 2024
2 parents 0e70d88 + f204250 commit 91972b8
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 72 deletions.
36 changes: 16 additions & 20 deletions coldfront/core/utils/fasrc.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,34 +50,30 @@ def sort_by(list1, sorter, how='attr'):
raise Exception('unclear sorting method')
return is_true, is_false

def select_one_project_allocation(project_obj, resource_obj, dirpath=None):
def select_one_project_allocation(project_obj, resource_obj, dirpath):
"""
Get one allocation for a given project/resource pairing; handle return of
Get one allocation for a given project/resource/path pairing; handle return of
zero or multiple allocations.
If multiple allocations are in allocation_query, choose the one with the
similar dirpath.
Parameters
----------
project_obj
resource_obj
dirpath
"""
filter_vals = {'resources__id': resource_obj.id}
# if dirpath:
# filter_vals['allocationattribute__value'] = dirpath
allocation_query = project_obj.allocation_set.filter(**filter_vals)
if allocation_query.count() == 1:
allocation_obj = allocation_query.first()
if allocation_obj.path and dirpath and allocation_obj.path not in dirpath and dirpath not in allocation_obj.path:
return None
elif allocation_query.count() < 1:
allocation_obj = None
elif allocation_query.count() > 1:
allocation_obj = next((a for a in allocation_query if a.path.lower() in dirpath.lower()),
None)
return allocation_obj

if allocation_query.count() < 1:
return None
else:
allocations = [
a for a in allocation_query
if a.path and dirpath and (a.path in dirpath or dirpath in a.path)
]
if len(allocations) == 1:
return allocations[0]
elif len(allocations) > 1:
raise Exception('multiple allocations found for project/resource/path pairing')

def determine_size_fmt(byte_num):
"""Return the correct human-readable byte measurement.
Expand All @@ -86,9 +82,9 @@ def determine_size_fmt(byte_num):
for u in units:
unit = u
if abs(byte_num) < 1024.0:
return round(byte_num, 3), unit
return (round(byte_num, 3), unit)
byte_num /= 1024.0
return(round(byte_num, 3), unit)
return (round(byte_num, 3), unit)

def convert_size_fmt(num, target_unit, source_unit='B'):
units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB']
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def handle(self, *args, **options):
lab_path = entry['fs_path'].replace(f'/n/{entry["server"]}/', '')

resource = Resource.objects.get(name__contains=entry['server'])
alloc_obj = select_one_project_allocation(project, resource, dirpath=lab_path)
alloc_obj = select_one_project_allocation(project, resource, lab_path)
if alloc_obj is not None:
continue
lab_usage_entries = [
Expand Down
85 changes: 36 additions & 49 deletions coldfront/plugins/fasrc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,13 @@ def produce_query_statement(self, vol_type, volumes=None):

query_dict = {
'quota': {
'volumes': '|'.join(r.name.split('/')[0] for r in Resource.objects.filter(name__contains='tier0')),
'relation': 'HasQuota',
'match': "(e:Quota) MATCH (d:ConfigValue {Name: 'Quota.Invocation'})",
'server': 'filesystem',
'validation_query':
"NOT ((e.SizeGB IS null) OR (e.usedBytes = 0 AND e.SizeGB = 1024)) \
AND (datetime() - duration('P31D') <= datetime(r.DotsLFSUpdateDate)) \
"NOT ((e.SizeGB IS null) OR (e.usedBytes = 0 AND e.SizeGB = 1024))\
AND (datetime() - duration('P31D') <= datetime(r.DotsLFSUpdateDate))\
AND NOT (e.Path IS null)",
'r_updated': 'DotsLFSUpdateDate',
'storage_type': 'Quota',
Expand All @@ -42,17 +43,18 @@ def produce_query_statement(self, vol_type, volumes=None):
'usedbytes': 'usedBytes',
'fs_path': 'Path',
'server_replace': '/n/',
'path_replace': '/n//',
'path_def': "substring(e.Path, size('/n/') + size(split(e.Path, '/')[2]) + 1)",
'unique':'datetime(e.DotsLFSUpdateDate) as begin_date'
},
'isilon': {
'volumes': '|'.join(r.name.split('/')[0] for r in Resource.objects.filter(name__contains='tier1')),
'relation': 'Owns',
'match': "(e:IsilonPath) MATCH (d:ConfigValue {Name: 'IsilonPath.Invocation'})",
'server': 'Isilon',
'validation_query': "r.DotsUpdateDate = d.DotsUpdateDate \
'validation_query': "r.DotsUpdateDate = d.DotsUpdateDate\
AND NOT (e.Path =~ '.*/rc_admin/.*')\
AND (e.Path =~ '.*labs.*')\
AND (datetime() - duration('P31D') <= datetime(r.DotsUpdateDate)) \
AND (datetime() - duration('P31D') <= datetime(r.DotsUpdateDate))\
AND NOT (e.SizeGB = 0)",
'fs_path':'Path',
'r_updated': 'DotsUpdateDate',
Expand All @@ -61,48 +63,43 @@ def produce_query_statement(self, vol_type, volumes=None):
'sizebytes': 'SizeBytes',
'usedbytes': 'UsedBytes',
'server_replace': '01.rc.fas.harvard.edu',
'path_replace': '/ifs/',
'path_def': "replace(e.Path, '/ifs/', '')",
'unique': 'datetime(e.DotsUpdateDate) as begin_date'
},
'volume': {
'volumes': '|'.join(r.name.split('/')[0] for r in Resource.objects.filter(name__contains='tier2')),
'relation': 'Owns',
'match': '(e:Volume)',
'server': 'Hostname',
'validation_query': 'NOT (e.SizeGB = 0)',
'r_updated': 'DotsLVSUpdateDate',
'storage_type': 'Volume',
'fs_path': 'LogicalVolume',
'path_replace': '/dev/data/',
'path_def': "replace(e.LogicalVolume, '/dev/data/', '')",
'usedgb': 'UsedGB',
'sizebytes': 'SizeGB * 1000000000',
'usedbytes': 'UsedGB * 1000000000',
'server_replace': '.rc.fas.harvard.edu',
'unique': 'datetime(e.DotsLVSUpdateDate) as update_date, \
'unique': 'datetime(e.DotsLVSUpdateDate) as update_date,\
datetime(e.DotsLVDisplayUpdateDate) as display_date'
},
}
d = query_dict[vol_type]

if not volumes:
volumes = [
r.name.split('/')[0] for r in Resource.objects.filter(resource_type__name='Storage')
]
volumes = '|'.join(volumes)
where = f"(e.{d['server']} =~ '.*({volumes}).*')"
statement = {
'statement': f"MATCH p=(g:Group)-[r:{d['relation']}]-{d['match']} \
WHERE {where} AND {d['validation_query']}\
'statement': f"MATCH p=(g:Group)-[r:{d['relation']}]-{d['match']}\
WHERE (e.{d['server']} =~ '.*({d['volumes']}).*') AND {d['validation_query']}\
AND NOT (g.ADSamAccountName =~ '.*(disabled|rc_admin).*')\
RETURN \
{d['unique']}, \
RETURN\
{d['unique']},\
g.ADSamAccountName as lab,\
(e.SizeGB / 1024.0) as tb_allocation, \
(e.SizeGB / 1024.0) as tb_allocation,\
e.{d['sizebytes']} as byte_allocation,\
e.{d['usedbytes']} as byte_usage,\
(e.{d['usedgb']} / 1024.0) as tb_usage,\
replace(e.{d['fs_path']}, '{d['path_replace']}', '') as fs_path, \
'{d['storage_type']}' as storage_type, \
datetime(r.{d['r_updated']}) as rel_updated, \
{d['path_def']} as fs_path,\
'{d['storage_type']}' as storage_type,\
datetime(r.{d['r_updated']}) as rel_updated,\
replace(e.{d['server']}, '{d['server_replace']}', '') as server"
}
self.queries['statements'].append(statement)
Expand Down Expand Up @@ -141,14 +138,14 @@ def _standardize_nesefile(self):
headers = headers_df.columns.values.tolist()
data = pd.read_csv(datafile, names=headers, sep='\s+')
data = data.loc[data['pool'].str.contains('1')]
data['fs_path'] = data['pool']
data['lab'] = data['pool'].str.replace('1', '').str.replace('hugl', '').str.replace('hus3', '')
data['server'] = 'nesetape'
data['storage_type'] = 'tape'
data['byte_allocation'] = data['mib_capacity'] * 1048576
data['byte_usage'] = data['mib_used'] * 1048576
data['tb_allocation'] = round(((data['mib_capacity']+ data['mib_capacity']*0.025) / 953674.3164), -1)
data['tb_usage'] = data['mib_used'] / 953674.3164
data['fs_path'] = None
data = data[[
'lab', 'server', 'storage_type', 'byte_allocation',
'byte_usage', 'tb_allocation', 'tb_usage', 'fs_path',
Expand Down Expand Up @@ -187,17 +184,17 @@ def stage_user_member_query(self, groupsearch, pi=False):
return_vars = 'u.ADParentCanonicalName AS path, u.ADDepartment AS department'
query = {'statements': [{
'statement': f"MATCH {match_vars} (g.ADSamAccountName =~ '({groupsearch})')\
RETURN \
u.ADgivenName AS first_name, \
u.ADsurname AS last_name, \
u.ADSamAccountName AS user_name, \
u.ADenabled AS user_enabled, \
RETURN\
u.ADgivenName AS first_name,\
u.ADsurname AS last_name,\
u.ADSamAccountName AS user_name,\
u.ADenabled AS user_enabled,\
g.ADSamAccountName AS group_name,\
{return_vars},\
g.ADManaged_By AS group_manager, \
u.ADgidNumber AS user_gid_number, \
u.ADTitle AS title, \
u.ADCompany AS company, \
g.ADManaged_By AS group_manager,\
u.ADgidNumber AS user_gid_number,\
u.ADTitle AS title,\
u.ADCompany AS company,\
g.ADgidNumber AS group_gid_number"
}]}
resp_json = self.post_query(query)
Expand Down Expand Up @@ -247,37 +244,27 @@ def matched_dict_processing(allocation, data_dicts, paired_allocs, log_message):
def pair_allocations_data(project, quota_dicts):
"""pair allocations with usage dicts"""
logger = logging.getLogger('coldfront.import_quotas')
unpaired_allocs = project.allocation_set.filter(
allocs = project.allocation_set.filter(
status__name__in=['Active','Pending Deactivation'],
resources__resource_type__name='Storage'
)
paired_allocs = {}
# first, pair allocations with those that have same
for allocation in unpaired_allocs:
for allocation in allocs:
dicts = [
d for d in quota_dicts
if d['fs_path'] and allocation.path.lower() == d['fs_path'].lower()
if d['fs_path'] and d['fs_path'].lower() == allocation.path.replace('HDD/', '').replace('SSD-HGST/', '').replace('SSD/', '').lower()
and d['server'] in allocation.resources.first().name
]
if dicts:
log_message = f'Path-based match: {allocation}, {allocation.path}, {dicts[0]}'
paired_allocs = matched_dict_processing(allocation, dicts, paired_allocs, log_message)
unpaired_allocs = [
a for a in unpaired_allocs if a not in paired_allocs
]
unpaired_allocs = [a for a in allocs if a not in paired_allocs]
unpaired_dicts = [d for d in quota_dicts if d not in paired_allocs.values()]
for allocation in unpaired_allocs:
dicts = [
d for d in unpaired_dicts if d['server'] in allocation.resources.first().name
]
if dicts:
log_message = f'Resource-based match: {allocation}, {dicts[0]}'
paired_allocs = matched_dict_processing(allocation, dicts, paired_allocs, log_message)
unpaired_allocs = [
a for a in unpaired_allocs if a not in paired_allocs and a.status.name == 'Active'
]
unpaired_dicts = [d for d in unpaired_dicts if d not in paired_allocs.values()]
if unpaired_dicts or unpaired_allocs:
print(
f"unpaired allocation data. Allocation: {unpaired_allocs} | Dict: {unpaired_dicts}"
)
logger.warning(
"unpaired allocation data. Allocation: %s | Dict: %s", unpaired_allocs, unpaired_dicts
)
Expand Down
3 changes: 1 addition & 2 deletions coldfront/plugins/sftocf/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,6 @@ def get_zone_by_name(self, zone_name):
zones = self.get_zones()
return next((z for z in zones if z['name'] == zone_name), None)


def create_zone(self, zone_name, paths, managers, managing_groups):
"""Create a zone via the API"""
url = self.api_url + 'zone/'
Expand Down Expand Up @@ -616,7 +615,7 @@ def allocationquerymatches(self):
total_sort_key = itemgetter('path','volume')
allocation_usage_grouped = return_dict_of_groupings(self.sf_usage_data, total_sort_key)
missing_allocations = [
(k,a) for k, a in allocation_usage_grouped if k not in allocation_list
(k,a) for k, a in allocation_usage_grouped if (a, k) not in allocation_list
]
print("missing_allocations:", missing_allocations)
logger.warning('starfish allocations missing in coldfront: %s', missing_allocations)
Expand Down

0 comments on commit 91972b8

Please sign in to comment.