some new tests, and reconfiguring

- CromwellApi: added new methods for search, validate and abort - utils: added new past_date fxn to calculate a datetime in the past (or future) - metadata tests: add tests for keys on dicts, treating final states and non-final separately - new test files for: abort, validate, failures, search - new wdl directories: badFile, parseBatchFile
FredHutch · Dec 20, 2024 · 072bd69 · 072bd69
1 parent 925b957
commit 072bd69
Show file tree

Hide file tree

Showing 10 changed files with 273 additions and 10 deletions.
diff --git a/badFile/badFile.wdl b/badFile/badFile.wdl
@@ -0,0 +1,19 @@
+version 1.0
+## This is a test workflow that fails against womtool.
+## From https://github.com/broadinstitute/cromwell
+
+#### WORKFLOW DEFINITION
+
+workflow oops {
+  call oopsie
+}
+
+#### TASK DEFINITIONS
+
+task oopsie {
+  input {
+    String str
+  }
+  command { echo ${str} }
+  runtime { docker: docker_image }
+}
diff --git a/parseBatchFile/README.md b/parseBatchFile/README.md
@@ -0,0 +1,15 @@
+# parseBatchFile WDL Workflow
+
+## Overview
+xxx
+
+## Workflow Components
+
+### Workflow: `parseBatchFile`
+xxx
+
+## Purpose
+To check Cromwell failures behavior
+
+## Version
+WDL 1.0
diff --git a/parseBatchFile/parseBatchFile.wdl b/parseBatchFile/parseBatchFile.wdl
@@ -0,0 +1,46 @@
+version 1.0
+# This workflow takes a tab separated file where each row is a set of data to be used in each 
+# of the independent scattered task series that you have as your workflow process.  This file 
+# will, for example, have column names `sampleName`, `bamLocation`, and `bedlocation`.  This
+# allows you to know that regardless of the order of the columns in your batch file, the correct
+# inputs will be used for the tasks you define.  
+workflow parseBatchFile {
+  input {
+  File batchFile
+  }
+    Array[Object] batchInfo = read_objects(batchFile)
+  scatter (job in batchInfo){
+    String sampleName = job.sampleName
+    File bamFile = job.bamLocation
+    File bedFile = job.bedLocation
+
+    ## INSERT YOUR WORKFLOW TO RUN PER LINE IN YOUR BATCH FILE HERE!!!!
+    call test {
+        input: in1=sampleName, in2=bamFile, in3=bedFile
+    }
+
+  }  # End Scatter over the batch file
+# Outputs that will be retained when execution is complete
+  output {
+    Array[File] outputArray = test.item_out
+    }
+# End workflow
+}
+
+#### TASK DEFINITIONS
+# echo some text to stdout, treats files as strings just to echo them as a dummy example
+task test {
+  input {
+    String in1
+    String in2
+    String in3
+  }
+    command {
+    echo ~{in1}
+    echo ~{in2}
+    echo ~{in3}
+    }
+    output {
+        File item_out = stdout()
+    }
+}
diff --git a/tests/cromwell-api/cromwell.py b/tests/cromwell-api/cromwell.py
@@ -5,7 +5,7 @@
     stop_after_attempt,
     wait_exponential,
 )
-from utils import TOKEN
+from utils import TOKEN, past_date
 
 
 def as_file_object(path=None):
@@ -20,6 +20,12 @@ def my_before_sleep(state):
     )
 
 
+def path_as_string(x):
+    if not x:
+        return None
+    return str(x.absolute())
+
+
 class CromwellApi(object):
     """CromwellApi class"""
 
@@ -35,25 +41,26 @@ def submit_workflow(
         params=None,
     ):
         files = {
-            "workflowSource": as_file_object(str(wdl_path.absolute())),
-            "workflowInputs": as_file_object(batch),
-            "workflowInputs_2": as_file_object(params),
+            "workflowSource": as_file_object(path_as_string(wdl_path)),
+            "workflowInputs": as_file_object(path_as_string(batch)),
+            "workflowInputs_2": as_file_object(path_as_string(params)),
         }
         files = {k: v for k, v in files.items() if v}
         res = httpx.post(
             f"{self.base_url}/api/workflows/v1", headers=self.headers, files=files
         )
         res.raise_for_status()
-        return res.json()
+        data = res.json()
+        data["path"] = str(wdl_path)
+        return data
 
     @retry(
         retry=retry_if_exception_type(httpx.HTTPStatusError),
         stop=stop_after_attempt(3),
         wait=wait_exponential(multiplier=1, min=4, max=10),
         before_sleep=my_before_sleep,
     )
-    def metadata(self, workflow_id):
-        params = {"expandSubWorkflows": False, "excludeKey": "calls"}
+    def metadata(self, workflow_id, params={}):
         res = httpx.get(
             f"{self.base_url}/api/workflows/v1/{workflow_id}/metadata",
             headers=self.headers,
@@ -69,3 +76,37 @@ def version(self):
         )
         res.raise_for_status()
         return res.json()
+
+    def validate(self, wdl_path, inputs=None):
+        files = {
+            "workflowSource": as_file_object(path_as_string(wdl_path)),
+            "workflowInputs": as_file_object(path_as_string(inputs)),
+        }
+        files = {k: v for k, v in files.items() if v}
+        res = httpx.post(
+            f"{self.base_url}/api/womtool/v1/describe",
+            headers=self.headers,
+            files=files,
+        )
+        res.raise_for_status()
+        return res.json()
+
+    def abort(self, workflow_id):
+        res = httpx.post(
+            f"{self.base_url}/api/workflows/v1/{workflow_id}/abort",
+            headers=self.headers,
+        )
+        res.raise_for_status()
+        return res.json()
+
+    def search(self, days=1, name=None, status=None):
+        submission = f"{past_date(days)}T00:00Z"
+        params = {"submission": submission, "name": name, "status": status}
+        params = {k: v for k, v in params.items() if v}
+        res = httpx.get(
+            f"{self.base_url}/api/workflows/v1/query",
+            headers=self.headers,
+            params=params,
+        )
+        res.raise_for_status()
+        return res.json()
diff --git a/tests/cromwell-api/test-abort.py b/tests/cromwell-api/test-abort.py
@@ -0,0 +1,13 @@
+import pytest
+from utils import make_path
+
+wdls = ["helloHostname", "helloDockerHostname", "helloModuleHostname"]
+
+
+@pytest.mark.parametrize("wdl", wdls)
+def test_abort(cromwell_api, wdl):
+    """Checking that abort works"""
+    workflow = cromwell_api.submit_workflow(wdl_path=make_path(wdl))
+    aborted = cromwell_api.abort(workflow["id"])
+    assert isinstance(aborted, dict)
+    assert aborted["status"] == "Aborted"
diff --git a/tests/cromwell-api/test-failures.py b/tests/cromwell-api/test-failures.py
@@ -0,0 +1,29 @@
+# import re
+
+
+def test_failures_initial_state(cromwell_api, submit_wdls):
+    """Checking for failures works for initial state"""
+    params = {"includeKey": "failures", "includeKey": "jobId"}
+    fail = list(filter(lambda x: "parseBatchFile" in x["path"], submit_wdls))
+    res = cromwell_api.metadata(fail[0]["id"], params=params)
+    assert isinstance(res, dict)
+    assert res == {}
+
+
+# def test_failures_final_state(cromwell_api, submit_wdls):
+#     """Checking for failures works for final state"""
+#     params = {"includeKey": "failures", "includeKey": "jobId"}
+#     fail = list(filter(lambda x: "parseBatchFile" in x["path"], submit_wdls))
+#     print(f"fail0: {fail[0]}")
+#     res = cromwell_api.metadata(fail[0]["id"], params=params)
+#     print(f"cromwell_api.metadata output:{res}")
+#     fail_causedby_mssg = res["failures"]["causedBy"]["message"]
+#     fail_mssg = res["failures"]["message"]
+#     assert isinstance(res, dict)
+#     assert list(res.keys()) == [
+#         "failures",
+#         "calls",
+#         "id",
+#     ]
+#     assert re.search("not specified", fail_causedby_mssg) is not None
+#     assert re.search("failed", fail_mssg) is not None
diff --git a/tests/cromwell-api/test-metadata.py b/tests/cromwell-api/test-metadata.py
@@ -1,4 +1,4 @@
-workflow_meta_keys = [
+meta_keys_state_submitted = [
     "submittedFiles",
     "calls",
     "outputs",
@@ -8,12 +8,46 @@
     "labels",
     "submission",
 ]
+meta_keys_state_final = [
+    "workflowName",
+    "workflowProcessingEvents",
+    "actualWorkflowLanguageVersion",
+    "submittedFiles",
+    "calls",
+    "outputs",
+    "workflowRoot",
+    "actualWorkflowLanguage",
+    "status",
+    "end",
+    "start",
+    "id",
+    "inputs",
+    "labels",
+    "submission",
+]
+states_not_final = [
+    "Running",
+    "Submitted",
+    "Pending",
+]
+states_final = [
+    "Failed",
+    "Aborted",
+    "Succeeded",
+]
 
 
 def test_metadata(cromwell_api, submit_wdls):
     """Getting workflow metadata works"""
+    params = {"expandSubWorkflows": False, "excludeKey": "calls"}
     ids = [wf["id"] for wf in submit_wdls]
     for x in ids:
-        res = cromwell_api.metadata(x)
+        res = cromwell_api.metadata(x, params=params)
         assert isinstance(res, dict)
-        # assert list(res.keys()) == workflow_meta_keys
+
+        if res["status"] in states_not_final:
+            assert list(res.keys()) == meta_keys_state_submitted
+        elif res["status"] in states_final:
+            assert list(res.keys()) == meta_keys_state_final
+        else:
+            pass
diff --git a/tests/cromwell-api/test-search.py b/tests/cromwell-api/test-search.py
@@ -0,0 +1,39 @@
+import pytest
+
+search_result_each = [
+    "end",
+    "id",
+    "metadataArchiveStatus",
+    "name",
+    "start",
+    "status",
+    "submission",
+]
+
+
+# Check that search results are a subset as number of keys
+# can range from 4 to 7 depending on the item
+def contains_sublist(list1, list2):
+    return set(list2).issubset(set(list1))
+
+
+def test_search_no_results(cromwell_api):
+    """Checking that search works with no results"""
+    out = cromwell_api.search(days=-2)
+    # There should not be any results for a query for jobs
+    # started in the future
+    assert out["totalResultsCount"] == 0
+    assert out["results"] == []
+
+
+def test_search_results(cromwell_api):
+    """Checking that search works when there MIGHT be results"""
+    out = cromwell_api.search(days=1)
+    # There may or may not be results, we can't gaurantee it
+    if out["totalResultsCount"] == 0:
+        pytest.skip("no results for search, skipping tests")
+    else:
+        assert isinstance(out["totalResultsCount"], int)
+        assert len(out["results"]) > 0
+        for item in out["results"]:
+            assert contains_sublist(search_result_each, list(item.keys()))
diff --git a/tests/cromwell-api/test-validate.py b/tests/cromwell-api/test-validate.py
@@ -0,0 +1,20 @@
+from utils import make_path
+
+
+def test_validate_good_wdl(cromwell_api):
+    """Checking that validate works - final state is quick"""
+    res = cromwell_api.validate(wdl_path=make_path("helloHostname"))
+    assert isinstance(res, dict)
+    assert res["valid"]
+    assert res["validWorkflow"]
+    assert res["isRunnableWorkflow"]
+
+
+def test_validate_bad_wdl(cromwell_api):
+    """Checking that validate works - final state is quick"""
+    res = cromwell_api.validate(wdl_path=make_path("badFile"))
+    assert isinstance(res, dict)
+    assert not res["valid"]
+    assert not res["validWorkflow"]
+    assert not res["isRunnableWorkflow"]
+    assert "Cannot lookup value 'docker_image'" in res["errors"][0]
diff --git a/tests/cromwell-api/utils.py b/tests/cromwell-api/utils.py
@@ -1,4 +1,5 @@
 import os
+from datetime import datetime, timedelta
 from pathlib import Path
 
 PROOF_BASE_URL = "https://proof-api-dev.fredhutch.org"
@@ -8,3 +9,9 @@
 def make_path(file):
     path = Path(__file__).parents[2].resolve()
     return path / f"{file}/{file}.wdl"
+
+
+def past_date(days):
+    now = datetime.now()
+    past_date = now - timedelta(days=days)
+    return past_date.strftime("%Y-%m-%d")