Skip to content

Commit

Permalink
Generalize finding tool sheds from runnable
Browse files Browse the repository at this point in the history
Should now work for workflow_edit, test, run etc if targeting a local or
docker instance.
  • Loading branch information
mvdbeek committed May 3, 2024
1 parent 1e81b3e commit 361610b
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 71 deletions.
40 changes: 2 additions & 38 deletions planemo/autoupdate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import itertools
import re
import xml.etree.ElementTree as ET
from string import Template
from typing import (
Any,
DefaultDict,
Expand All @@ -26,7 +25,8 @@

import planemo.conda
from planemo.galaxy.workflows import (
guess_tool_shed_url,
get_tool_ids_for_workflow,
get_toolshed_url_for_tool_id,
MAIN_TOOLSHED_URL,
)
from planemo.io import (
Expand Down Expand Up @@ -294,14 +294,6 @@ def get_newest_tool_id(tool_ids: List[str]) -> str:
)[-1]


def get_toolshed_url_for_tool_id(tool_id: str) -> Optional[str]:
components = tool_id.split("/repos")
if len(components) > 1:
tool_shed_fqdn = components[0]
return guess_tool_shed_url(tool_shed_fqdn=tool_shed_fqdn)
return None


def outdated_tools( # noqa: C901
ctx: "PlanemoCliContext", wf_dict: Dict[str, Any], tools_to_skip: List[str]
) -> Dict[str, Dict[str, str]]:
Expand Down Expand Up @@ -355,22 +347,6 @@ def outdated_tools_rec(wf_dict: Dict[str, Any]) -> None:
return outdated_tool_dict


def get_tool_ids_for_workflow(wf_dict: Dict[str, Any], tool_ids: Optional[List[str]] = None) -> List[str]:
tool_ids = [] if tool_ids is None else tool_ids
steps = wf_dict["steps"].values() if isinstance(wf_dict["steps"], dict) else wf_dict["steps"]
for step in steps:
if step.get("type", "tool") == "tool" and not step.get("run", {}).get("class") == "GalaxyWorkflow":
tool_id = step["tool_id"]
tool_ids.append(tool_id)
elif step.get("type") == "subworkflow": # GA SWF
get_tool_ids_for_workflow(step["subworkflow"], tool_ids=tool_ids)
elif step.get("run", {}).get("class") == "GalaxyWorkflow": # gxformat2 SWF
get_tool_ids_for_workflow(step["run"], tool_ids=tool_ids)
else:
continue
return list(dict.fromkeys(tool_ids))


def get_tools_to_update(
ctx: "PlanemoCliContext", workflow: "Runnable", tools_to_skip: List[str]
) -> Dict[str, Dict[str, str]]:
Expand All @@ -383,18 +359,6 @@ def get_tools_to_update(
return outdated_tools(ctx, wf_dict, tools_to_skip)


def get_shed_tools_conf_string_for_tool_ids(tool_ids: List[str]) -> str:
tool_shed_urls = set(get_toolshed_url_for_tool_id(tool_id) for tool_id in tool_ids if tool_id)
cleaned_tool_shed_urls = set(_ for _ in tool_shed_urls if _ is not None)
TOOL_SHEDS_CONF_TEMPLATE = Template("""<tool_sheds>${tool_shed_lines}</tool_sheds>""")
tool_sheds: List[str] = []
# sort tool_shed_urls from shortest to longest, as https://github.com/galaxyproject/galaxy/blob/c7cb47a1b18ccd5b39075a705bbd2f34572755fe/lib/galaxy/util/tool_shed/tool_shed_registry.py#L106-L118
# has a bug where a toolshed that is an exact substring of another registered toolshed would wrongly be selected.
for tool_shed_url in sorted(cleaned_tool_shed_urls, key=lambda url: len(url)):
tool_sheds.append(f'<tool_shed name="{tool_shed_url.split("://")[-1]}" url="{tool_shed_url}" />')
return TOOL_SHEDS_CONF_TEMPLATE.substitute(tool_shed_lines="".join(tool_sheds))


def autoupdate_wf(ctx: "PlanemoCliContext", config: "LocalGalaxyConfig", wf: "Runnable") -> Dict[str, Any]:
workflow_id = config.workflow_id_for_runnable(wf)
_update_wf(config, workflow_id)
Expand Down
8 changes: 0 additions & 8 deletions planemo/commands/cmd_autoupdate.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,14 +135,6 @@ def cli(ctx, paths, **kwds): # noqa C901
kwds["install_repository_dependencies"] = False
kwds["shed_install"] = True

tool_ids = []
for workflow in modified_workflows:
with open(workflow.path) as fh:
wf_dict = yaml.safe_load(fh)
tool_ids.extend(autoupdate.get_tool_ids_for_workflow(wf_dict=wf_dict))
tool_ids = list(dict.fromkeys(tool_ids))
kwds["tool_sheds_config_content"] = autoupdate.get_shed_tools_conf_string_for_tool_ids(tool_ids)

with engine_context(ctx, **kwds) as galaxy_engine:
with galaxy_engine.ensure_runnables_served(modified_workflows) as config:
for workflow in modified_workflows:
Expand Down
62 changes: 52 additions & 10 deletions planemo/galaxy/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@
from planemo.config import OptionSource
from planemo.deps import ensure_dependency_resolvers_conf_configured
from planemo.docker import docker_host_args
from planemo.galaxy.workflows import remote_runnable_to_workflow_id
from planemo.galaxy.workflows import (
get_toolshed_url_for_tool_id,
remote_runnable_to_workflow_id,
)
from planemo.io import (
communicate,
kill_pid_file,
Expand All @@ -48,6 +51,7 @@
write_file,
)
from planemo.mulled import build_involucro_context
from planemo.runnable import RunnableType
from planemo.shed import tool_shed_url
from .api import (
DEFAULT_ADMIN_API_KEY,
Expand Down Expand Up @@ -258,6 +262,10 @@ def config_join(*args):
shed_tool_path = kwds.get("shed_tool_path") or config_join("shed_tools")
_ensure_directory(shed_tool_path)

# Find tool sheds to add to config
tool_sheds_config_content = get_tool_sheds_conf_for_runnables(runnables)
if tool_sheds_config_content:
kwds["tool_sheds_config_content"] = tool_sheds_config_content
sheds_config_path = _configure_sheds_config_file(ctx, config_directory, **kwds)
port = _get_port(kwds)
properties = _shared_galaxy_properties(config_directory, kwds, for_tests=for_tests)
Expand Down Expand Up @@ -326,6 +334,7 @@ def local_galaxy_config(ctx, runnables, for_tests=False, **kwds):
test_data_dir = _find_test_data(runnables, **kwds)
tool_data_tables = _find_tool_data_table(runnables, test_data_dir=test_data_dir, **kwds)
data_manager_config_paths = [r.data_manager_conf_path for r in runnables if r.data_manager_conf_path]

galaxy_root = _find_galaxy_root(ctx, **kwds)
install_galaxy = kwds.get("install_galaxy", False)
if galaxy_root is not None:
Expand Down Expand Up @@ -389,6 +398,10 @@ def config_join(*args):
shed_tool_path = kwds.get("shed_tool_path") or config_join("shed_tools")
_ensure_directory(shed_tool_path)

# Find tool sheds to add to config
tool_sheds_config_content = get_tool_sheds_conf_for_runnables(runnables)
if tool_sheds_config_content:
kwds["tool_sheds_config_content"] = tool_sheds_config_content
sheds_config_path = _configure_sheds_config_file(ctx, config_directory, **kwds)

database_location = config_join("galaxy.sqlite")
Expand Down Expand Up @@ -568,19 +581,27 @@ def _all_tool_paths(
all_tool_paths = {r.path for r in runnables if r.has_tools and not r.data_manager_conf_path}
extra_tools = _expand_paths(galaxy_root, extra_tools=extra_tools)
all_tool_paths.update(extra_tools)
for runnable in runnables:
if runnable.type.name == "galaxy_workflow":
tool_ids = find_tool_ids(runnable.path)
for tool_id in tool_ids:
tool_paths = DISTRO_TOOLS_ID_TO_PATH.get(tool_id)
if tool_paths:
if isinstance(tool_paths, str):
tool_paths = [tool_paths]
all_tool_paths.update(tool_paths)
for tool_id in get_tool_ids_for_runnables(runnables):
tool_paths = DISTRO_TOOLS_ID_TO_PATH.get(tool_id)
if tool_paths:
if isinstance(tool_paths, str):
tool_paths = [tool_paths]
all_tool_paths.update(tool_paths)

return all_tool_paths


def get_workflow_runnables(runnables: List[Runnable]) -> List[Runnable]:
return [r for r in runnables if r.type == RunnableType.galaxy_workflow and r.has_path]


def get_tool_ids_for_runnables(runnables) -> List[str]:
tool_ids = []
for r in get_workflow_runnables(runnables):
tool_ids.extend(find_tool_ids(r.path))
return list(dict.fromkeys(tool_ids))


def _shared_galaxy_properties(config_directory, kwds, for_tests):
"""Setup properties useful for local and Docker Galaxy instances.
Expand Down Expand Up @@ -1201,6 +1222,27 @@ def _search_tool_path_for(path, target, extra_paths=None):
return None


def get_tool_sheds_conf_for_runnables(runnables: Optional[List[Runnable]]) -> Optional[str]:
if runnables:
tool_ids = get_tool_ids_for_runnables(runnables)
return get_shed_tools_conf_string_for_tool_ids(tool_ids)
return None


def get_shed_tools_conf_string_for_tool_ids(tool_ids: List[str]) -> str:
tool_shed_urls = set(get_toolshed_url_for_tool_id(tool_id) for tool_id in tool_ids if tool_id)
# always add main toolshed
tool_shed_urls.add("https://toolshed.g2.bx.psu.edu/")
cleaned_tool_shed_urls = set(_ for _ in tool_shed_urls if _ is not None)
TOOL_SHEDS_CONF_TEMPLATE = Template("""<tool_sheds>${tool_shed_lines}</tool_sheds>""")
tool_sheds: List[str] = []
# sort tool_shed_urls from shortest to longest, as https://github.com/galaxyproject/galaxy/blob/c7cb47a1b18ccd5b39075a705bbd2f34572755fe/lib/galaxy/util/tool_shed/tool_shed_registry.py#L106-L118
# has a bug where a toolshed that is an exact substring of another registered toolshed would wrongly be selected.
for shed_url in sorted(cleaned_tool_shed_urls, key=lambda url: len(url)):
tool_sheds.append(f'<tool_shed name="{shed_url.split("://")[-1]}" url="{shed_url}" />')
return TOOL_SHEDS_CONF_TEMPLATE.substitute(tool_shed_lines="".join(tool_sheds))


def _configure_sheds_config_file(ctx, config_directory, **kwds):
contents = kwds.get("tool_sheds_config_content")
if not contents:
Expand Down
37 changes: 25 additions & 12 deletions planemo/galaxy/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,14 @@ def guess_tool_shed_url(tool_shed_fqdn: str) -> Optional[str]:
return None


def get_toolshed_url_for_tool_id(tool_id: str) -> Optional[str]:
components = tool_id.split("/repos")
if len(components) > 1:
tool_shed_fqdn = components[0]
return guess_tool_shed_url(tool_shed_fqdn=tool_shed_fqdn)
return None


def load_shed_repos(runnable):
if runnable.type.name != "galaxy_workflow":
return []
Expand Down Expand Up @@ -164,20 +172,25 @@ def _raw_dict(path, importer=None):
return workflow


def find_tool_ids(path):
tool_ids = set()
workflow = _raw_dict(path)

def register_tool_ids(tool_ids, workflow):
for step in workflow["steps"].values():
if step.get("subworkflow"):
register_tool_ids(tool_ids, step["subworkflow"])
elif step.get("tool_id"):
tool_ids.add(step["tool_id"])
def get_tool_ids_for_workflow(wf_dict: Dict[str, Any], tool_ids: Optional[List[str]] = None) -> List[str]:
tool_ids = [] if tool_ids is None else tool_ids
steps = wf_dict["steps"].values() if isinstance(wf_dict["steps"], dict) else wf_dict["steps"]
for step in steps:
if step.get("type", "tool") == "tool" and not step.get("run", {}).get("class") == "GalaxyWorkflow":
tool_id = step["tool_id"]
tool_ids.append(tool_id)
elif step.get("type") == "subworkflow": # GA SWF
get_tool_ids_for_workflow(step["subworkflow"], tool_ids=tool_ids)
elif step.get("run", {}).get("class") == "GalaxyWorkflow": # gxformat2 SWF
get_tool_ids_for_workflow(step["run"], tool_ids=tool_ids)
else:
continue
return list(dict.fromkeys(tool_ids))

register_tool_ids(tool_ids, workflow)

return list(tool_ids)
def find_tool_ids(path):
workflow = _raw_dict(path)
return get_tool_ids_for_workflow(workflow)


WorkflowOutput = namedtuple("WorkflowOutput", ["order_index", "output_name", "label", "optional"])
Expand Down
6 changes: 3 additions & 3 deletions tests/test_autoupdate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@

from galaxy.util import parse_xml_string

from planemo.autoupdate import (
get_newest_tool_id,
get_shed_tools_conf_string_for_tool_ids,
from planemo.autoupdate import get_newest_tool_id
from planemo.galaxy.config import get_shed_tools_conf_string_for_tool_ids
from planemo.galaxy.workflows import (
get_tool_ids_for_workflow,
get_toolshed_url_for_tool_id,
)
Expand Down

0 comments on commit 361610b

Please sign in to comment.