diff --git a/beam/__init__.py b/beam/__init__.py index a4dea37..965ee1f 100644 --- a/beam/__init__.py +++ b/beam/__init__.py @@ -14,6 +14,7 @@ """ +import argparse import json import multiprocessing.managers import multiprocessing.shared_memory @@ -25,6 +26,7 @@ import urllib.request import venv from enum import Enum +from importlib import metadata from pathlib import Path from types import ModuleType from typing import Any, Callable, Optional @@ -46,6 +48,43 @@ class SupportedInstallationMethod(Enum): CONDA = "conda" +def run_beam(): + argparser = argparse.ArgumentParser() + argparser.add_argument( + "--version", + action="version", + version=f'%(prog)s version {metadata.version("datatractor_beam")}', + ) + + argparser.add_argument( + "filetype", + help="FileType.ID of the input file", + default=None, + ) + + argparser.add_argument( + "infile", + help="Path of the input file", + default=None, + ) + + argparser.add_argument( + "--outfile", + "-o", + help="Optional path of the output file", + default=None, + ) + + args = argparser.parse_args() + + extract( + input_path=args.infile, + input_type=args.filetype, + output_path=args.outfile, + preferred_mode=SupportedExecutionMethod.CLI, + ) + + def extract( input_path: Path | str, input_type: str, @@ -61,15 +100,16 @@ def extract( Parameters: input_path: The path or URL of the file to parse. - input_type: The ID of the `FileType` in the registry. + input_type: The ID of the ``FileType`` in the registry. output_path: The path to write the output to. If not provided, the output will be requested to be written - to a file with the same name as the input file, but with a .json extension. + to a file with the same name as the input file, but with an extension as + defined using the ``output_type``. Defaults to ``{input_path}.out``. output_type: A string specifying the desired output type. preferred_mode: The preferred execution method. If the extractor supports both Python and CLI, this will be used to determine which to use. If the extractor only supports one method, this will be ignored. - Accepts the `SupportedExecutionMethod` values of "cli" or "python". + Accepts the ``SupportedExecutionMethod`` values of "cli" or "python". install: Whether to install the extractor package before running it. Defaults to True. extractor_definition: A dictionary containing the extractor definition to use instead of a registry lookup. @@ -265,7 +305,10 @@ def execute( ) if output_path is None: - output_path = input_path.with_suffix(".json") + suffix = ".out" if output_type is None else f".{output_type}" + output_path = input_path.with_suffix(suffix) + + print(f"{output_type=}") command = self.apply_template_args( command, diff --git a/pyproject.toml b/pyproject.toml index 81ed56b..60b5cf9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,6 +54,9 @@ dev = [ [project.urls] repository = "https://github.com/datatractor/beam" +[project.scripts] +beam = "beam:run_beam" + [tool.ruff] extend-exclude = [ "providers", diff --git a/tests/test_mpr.py b/tests/test_mpr.py index e6edd71..a8df145 100644 --- a/tests/test_mpr.py +++ b/tests/test_mpr.py @@ -1,3 +1,4 @@ +import subprocess import urllib.request from pathlib import Path @@ -146,3 +147,13 @@ def test_extractorplan_python_method(): function, args, kwargs = ExtractorPlan._prepare_python( 'extract(filename="example.txt", type={"test": "example", "dictionary": "example"})' ) + + +def test_biologic_beam(tmp_path, test_mprs): + for ind, test_mpr in enumerate(test_mprs): + input_path = tmp_path / test_mpr + output_path = tmp_path / test_mpr.name.replace(".mpr", ".nc") + task = ["beam", "biologic-mpr", str(input_path), "--outfile", str(output_path)] + subprocess.run(task) + assert output_path.exists() + break