Skip to content

Commit

Permalink
More helpers for reasoning about gxformat2 steps.
Browse files Browse the repository at this point in the history
  • Loading branch information
jmchilton committed Jul 22, 2024
1 parent 241a181 commit 94cc478
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 51 deletions.
55 changes: 6 additions & 49 deletions gxformat2/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,36 +10,23 @@

from ._labels import Labels
from .model import (
append_step_id_to_step_list_elements,
clean_connection,
convert_dict_to_id_list_if_needed,
ensure_step_position,
get_native_step_type,
inputs_as_native_steps,
pop_connect_from_step_dict,
setup_connected_values,
steps_as_list,
SUPPORT_LEGACY_CONNECTIONS,
with_step_ids,
)
from .yaml import ordered_load

SCRIPT_DESCRIPTION = """
Convert a Format 2 Galaxy workflow description into a native format.
"""

STEP_TYPES = [
"subworkflow",
"data_input",
"data_collection_input",
"tool",
"pause",
"parameter_input",
]

STEP_TYPE_ALIASES = {
'input': 'data_input',
'input_collection': 'data_collection_input',
'parameter': 'parameter_input',
}

RUN_ACTIONS_TO_STEPS = {
'GalaxyWorkflow': 'run_workflow_to_step',
'GalaxyTool': 'run_tool_to_step',
Expand Down Expand Up @@ -130,34 +117,6 @@ def python_to_workflow(as_python, galaxy_interface, workflow_directory=None, imp
return converted


# move to a utils file?
def steps_as_list(format2_workflow: dict, add_ids: bool = False, inputs_offset: int = 0, mutate: bool = False):
"""Return steps as a list, converting ID map to list representation if needed.
This method does mutate the supplied steps, try to make progress toward not doing this.
Add keys as labels instead of IDs. Why am I doing this?
"""
if "steps" not in format2_workflow:
raise Exception(f"No 'steps' key in dict, keys are {format2_workflow.keys()}")
steps = format2_workflow["steps"]
steps = convert_dict_to_id_list_if_needed(steps, add_label=True, mutate=mutate)
if add_ids:
if mutate:
_append_step_id_to_step_list_elements(steps, inputs_offset=inputs_offset)
else:
steps = with_step_ids(steps, inputs_offset=inputs_offset)
return steps


def _append_step_id_to_step_list_elements(steps: list, inputs_offset: int = 0):
assert isinstance(steps, list)
for i, step in enumerate(steps):
if "id" not in step:
step["id"] = i + inputs_offset
assert step["id"] is not None


def _python_to_workflow(as_python, conversion_context):

if "class" not in as_python:
Expand All @@ -182,7 +141,7 @@ def _python_to_workflow(as_python, conversion_context):
convert_inputs_to_steps(as_python, steps)

if isinstance(steps, list):
_append_step_id_to_step_list_elements(steps)
append_step_id_to_step_list_elements(steps)
steps_as_dict: Dict[str, Any] = {}
for i, step in enumerate(steps):
steps_as_dict[str(i)] = step
Expand Down Expand Up @@ -214,10 +173,8 @@ def _python_to_workflow(as_python, conversion_context):
del step["run"]

for step in steps.values():
step_type = step.get("type", "tool")
step_type = STEP_TYPE_ALIASES.get(step_type, step_type)
if step_type not in STEP_TYPES:
raise Exception(f"Unknown step type encountered {step_type}")
step_type = get_native_step_type(step)
# in case it was an alias or default - set it back up in the resulting dict
step["type"] = step_type
eval(f"transform_{step_type}")(conversion_context, step)

Expand Down
79 changes: 78 additions & 1 deletion gxformat2/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@
Union,
)

from typing_extensions import TypedDict
from typing_extensions import (
Literal,
TypedDict,
)

log = logging.getLogger(__name__)

Expand All @@ -20,6 +23,52 @@

EmbeddedLink = TypedDict("EmbeddedLink", {"$link": str})

NativeGalaxyStepType = Literal[
"subworkflow",
"data_input",
"data_collection_input",
"tool",
"pause",
"parameter_input",
]
GxFormat2StepTypeAlias = Literal[
"input",
"input_collection",
"parameter",
]
StepTypes = Union[NativeGalaxyStepType, GxFormat2StepTypeAlias]


STEP_TYPES = [
"subworkflow",
"data_input",
"data_collection_input",
"tool",
"pause",
"parameter_input",
]
STEP_TYPE_ALIASES: Dict[GxFormat2StepTypeAlias, NativeGalaxyStepType] = {
'input': 'data_input',
'input_collection': 'data_collection_input',
'parameter': 'parameter_input',
}


def get_native_step_type(gxformat2_step_dict: dict) -> NativeGalaxyStepType:
"""Infer native galaxy step type from the gxformat2 step as a dict."""
specifies_subworkflow_run = bool(gxformat2_step_dict.get("run"))
step_type_default = "tool" if not specifies_subworkflow_run else "subworkflow"
raw_step_type = gxformat2_step_dict.get("type", step_type_default)
if raw_step_type not in STEP_TYPES and raw_step_type not in STEP_TYPE_ALIASES:
raise Exception(f"Unknown step type encountered {raw_step_type}")
step_type: NativeGalaxyStepType
if raw_step_type in STEP_TYPE_ALIASES:
step_type = STEP_TYPE_ALIASES[cast(GxFormat2StepTypeAlias, raw_step_type)]
else:
step_type = cast(NativeGalaxyStepType, raw_step_type)
return step_type


# source: step#output and $link: step#output instead of outputSource: step/output and $link: step/output
SUPPORT_LEGACY_CONNECTIONS = os.environ.get("GXFORMAT2_SUPPORT_LEGACY_CONNECTIONS") == "1"

Expand Down Expand Up @@ -318,3 +367,31 @@ def outputs_as_list(as_python: dict) -> list:
outputs = as_python.get("outputs", [])
outputs = convert_dict_to_id_list_if_needed(outputs)
return outputs


def steps_as_list(format2_workflow: dict, add_ids: bool = False, inputs_offset: int = 0, mutate: bool = False) -> List[Dict[str, Any]]:
"""Return steps as a list, converting ID map to list representation if needed.
This method does mutate the supplied steps, try to make progress toward not doing this.
Add keys as labels instead of IDs. Why am I doing this?
"""
if "steps" not in format2_workflow:
raise Exception(f"No 'steps' key in dict, keys are {format2_workflow.keys()}")
steps = format2_workflow["steps"]
steps = convert_dict_to_id_list_if_needed(steps, add_label=True, mutate=mutate)
if add_ids:
if mutate:
append_step_id_to_step_list_elements(steps, inputs_offset=inputs_offset)
else:
steps = with_step_ids(steps, inputs_offset=inputs_offset)
return steps


def append_step_id_to_step_list_elements(steps: List[Dict[str, Any]], inputs_offset: int = 0) -> None:
"""Ensure a list of steps each contains an 'id' element."""
assert isinstance(steps, list)
for i, step in enumerate(steps):
if "id" not in step:
step["id"] = i + inputs_offset
assert step["id"] is not None
3 changes: 2 additions & 1 deletion tests/_helpers.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import copy
import os

from gxformat2.converter import python_to_workflow, STEP_TYPES, yaml_to_workflow
from gxformat2.converter import python_to_workflow, yaml_to_workflow
from gxformat2.export import from_galaxy_native
from gxformat2.interface import ImporterGalaxyInterface
from gxformat2.model import STEP_TYPES

TEST_PATH = os.path.abspath(os.path.dirname(__file__))
TEST_INTEROP_EXAMPLES = os.environ.get("GXFORMAT2_INTEROP_EXAMPLES", os.path.join(TEST_PATH, "examples"))
Expand Down

0 comments on commit 94cc478

Please sign in to comment.