Skip to content

Commit

Permalink
Test util db (#3406)
Browse files Browse the repository at this point in the history
* Update to DB qiita.slurm_resource_allocations

* connected tests to database

* Update util.py

* debugging changes to test

* Update test_util.py

* Update test_util.py

* Tests update

* Update test_meta_util.py

* Updates to @antgonza comments

* Updates to @charles-cowart comments
  • Loading branch information
Gossty authored May 21, 2024
1 parent c0cdb4b commit 55e460e
Show file tree
Hide file tree
Showing 6 changed files with 988 additions and 28 deletions.
930 changes: 927 additions & 3 deletions qiita_db/support_files/patches/test_db_sql/92.sql

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion qiita_db/test/test_meta_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ def _get_daily_stats():
('num_studies_ebi', b'1', r_client.get),
('num_samples_ebi', b'27', r_client.get),
('number_samples_ebi_prep', b'54', r_client.get),
('num_processing_jobs', b'14', r_client.get)
('num_processing_jobs', b'474', r_client.get)
# not testing img/time for simplicity
# ('img', r_client.get),
# ('time', r_client.get)
Expand Down
13 changes: 10 additions & 3 deletions qiita_db/test/test_software.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,13 +494,20 @@ def test_processing_jobs(self):
'6ad4d590-4fa3-44d3-9a8f-ddbb472b1b5f',
'063e553b-327c-4818-ab4a-adfe58e49860',
'ac653cb5-76a6-4a45-929e-eb9b2dee6b63']
exp = [qdb.processing_job.ProcessingJob(j) for j in exp_jids]
self.assertCountEqual(qdb.software.Command(1).processing_jobs, exp)

jobs = qdb.software.Command(1).processing_jobs
set_jobs = set(jobs)

# comparing the length of jobs and set_jobs, since there could've been
# duplicates in the tests
self.assertEqual(len(jobs), len(set_jobs))

exp = set([qdb.processing_job.ProcessingJob(j) for j in exp_jids])
self.assertEqual(len(set_jobs & exp), len(exp_jids))

exp_jids = ['bcc7ebcd-39c1-43e4-af2d-822e3589f14d']
exp = [qdb.processing_job.ProcessingJob(j) for j in exp_jids]
self.assertCountEqual(qdb.software.Command(2).processing_jobs, exp)

self.assertCountEqual(qdb.software.Command(4).processing_jobs, [])


Expand Down
7 changes: 0 additions & 7 deletions qiita_db/test/test_user.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,17 +491,10 @@ def test_jobs(self):
limit=1, ignore_status=ignore_status), [
PJ('b72369f9-a886-4193-8d3d-f7b504168e75')])

# no jobs
self.assertEqual(qdb.user.User('[email protected]').jobs(
ignore_status=ignore_status), [])

# generates expected jobs
jobs = qdb.user.User('[email protected]').jobs()
self.assertEqual(jobs, [])

# no jobs
self.assertEqual(qdb.user.User('[email protected]').jobs(), [])

def test_update_email(self):
user = qdb.user.User('[email protected]')
with self.assertRaisesRegex(IncorrectEmailError, 'Bad email given:'):
Expand Down
22 changes: 13 additions & 9 deletions qiita_db/test/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -1309,19 +1309,23 @@ def test_quick_mounts_purge(self):

class ResourceAllocationPlotTests(TestCase):
def setUp(self):

self.PATH_TO_DATA = ('./qiita_db/test/test_data/'
'jobs_2024-02-21.tsv.gz')
self.CNAME = "Validate"
self.SNAME = "Diversity types - alpha_vector"
self.CNAME = "Split libraries FASTQ"
self.SNAME = "QIIMEq2"
self.col_name = 'samples * columns'
self.df = pd.read_csv(self.PATH_TO_DATA, sep='\t',
dtype={'extra_info': str})
self.columns = [
"sName", "sVersion", "cID", "cName", "processing_job_id",
"parameters", "samples", "columns", "input_size", "extra_info",
"MaxRSSRaw", "ElapsedRaw"]

# df is a dataframe that represents a table with columns specified in
# self.columns
self.df = qdb.util._retrieve_resource_data(
self.CNAME, self.SNAME, self.columns)

def test_plot_return(self):
# check the plot returns correct objects
fig1, axs1 = qdb.util.resource_allocation_plot(
self.PATH_TO_DATA, self.CNAME, self.SNAME, self.col_name)
self.df, self.CNAME, self.SNAME, self.col_name)
self.assertIsInstance(
fig1, Figure,
"Returned object fig1 is not a Matplotlib Figure")
Expand All @@ -1346,7 +1350,7 @@ def test_minimize_const(self):
failures_df = qdb.util._resource_allocation_failures(
self.df, k, a, b, bm, self.col_name, 'MaxRSSRaw')
failures = failures_df.shape[0]
self.assertEqual(bm, qdb.util.mem_model4, msg="""Best memory model
self.assertEqual(bm, qdb.util.mem_model3, msg="""Best memory model
doesn't match""")
self.assertEqual(failures, 0, "Number of failures must be 0")

Expand Down
42 changes: 37 additions & 5 deletions qiita_db/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,10 @@
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText

import pandas as pd
from datetime import timedelta
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.optimize import minimize

# memory constant functions defined for @resource_allocation_plot
Expand Down Expand Up @@ -2341,7 +2341,7 @@ def send_email(to, subject, body):
smtp.close()


def resource_allocation_plot(file, cname, sname, col_name):
def resource_allocation_plot(df, cname, sname, col_name):
"""Builds resource allocation plot for given filename and jobs
Parameters
Expand All @@ -2361,9 +2361,6 @@ def resource_allocation_plot(file, cname, sname, col_name):
Returns a matplotlib object with a plot
"""

df = pd.read_csv(file, sep='\t', dtype={'extra_info': str})
df['ElapsedRawTime'] = pd.to_timedelta(df.ElapsedRawTime)
df = df[(df.cName == cname) & (df.sName == sname)]
df.dropna(subset=['samples', 'columns'], inplace=True)
df[col_name] = df.samples * df['columns']
df[col_name] = df[col_name].astype(int)
Expand All @@ -2383,6 +2380,41 @@ def resource_allocation_plot(file, cname, sname, col_name):
return fig, axs


def _retrieve_resource_data(cname, sname, columns):
with qdb.sql_connection.TRN:
sql = """
SELECT
s.name AS sName,
s.version AS sVersion,
sc.command_id AS cID,
sc.name AS cName,
pr.processing_job_id AS processing_job_id,
pr.command_parameters AS parameters,
sra.samples AS samples,
sra.columns AS columns,
sra.input_size AS input_size,
sra.extra_info AS extra_info,
sra.memory_used AS memory_used,
sra.walltime_used AS walltime_used
FROM
qiita.processing_job pr
JOIN
qiita.software_command sc ON pr.command_id = sc.command_id
JOIN
qiita.software s ON sc.software_id = s.software_id
JOIN
qiita.slurm_resource_allocations sra
ON pr.processing_job_id = sra.processing_job_id
WHERE
sc.name = %s
AND s.name = %s;
"""
qdb.sql_connection.TRN.add(sql, sql_args=[cname, sname])
res = qdb.sql_connection.TRN.execute_fetchindex()
df = pd.DataFrame(res, columns=columns)
return df


def _resource_allocation_plot_helper(
df, ax, cname, sname, curr, models, col_name):
"""Helper function for resource allocation plot. Builds plot for MaxRSSRaw
Expand Down

0 comments on commit 55e460e

Please sign in to comment.