diff --git a/README.md b/README.md index 1d393aa..46f138d 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ For the time being, there are two sets of annotation generators: ## Adding an annotation -- Add the command in the dictionary in (https://github.com/binpash/annotations/blob/main/pash_annotations/annotation_generation/AnnotationGeneration.py#L13) +- Add the command in the dictionary in (https://github.com/binpash/annotations/blob/main/pash_annotations/annotation_generation/AnnotationGenerator().py#L13) - Add a json file with the command flags in (https://github.com/binpash/annotations/tree/main/pash_annotations/parser/command_flag_option_info/data). This could be used to generate a first version of it: (https://github.com/binpash/annotations/blob/main/pash_annotations/parser/command_flag_option_info/manpage-to-json.sh). - Add an `InputOutputInfoGeneratorXXX.py` in (https://github.com/binpash/annotations/tree/main/pash_annotations/annotation_generation/annotation_generators) - (Optionally) add a `ParallelizabilityInfoGeneratorXXX.py` in (https://github.com/binpash/annotations/tree/main/pash_annotations/annotation_generation/annotation_generators) diff --git a/pash_annotations/annotation_cli.py b/pash_annotations/annotation_cli.py index 1a57178..44bf019 100644 --- a/pash_annotations/annotation_cli.py +++ b/pash_annotations/annotation_cli.py @@ -2,28 +2,40 @@ from typing import Optional # create parser -from annotation_generation.AnnotationGeneration import get_input_output_info_from_cmd_invocation, \ - get_parallelizability_info_from_cmd_invocation +from pash_annotations.annotation_generation.annotation_generation import ( + get_input_output_info_from_cmd_invocation, + get_parallelizability_info_from_cmd_invocation, +) -from annotation_generation.datatypes.InputOutputInfo import InputOutputInfo -from annotation_generation.datatypes.ParallelizabilityInfo import ParallelizabilityInfo -from datatypes.CommandInvocationInitial import CommandInvocationInitial -from parser.parser import parse +from pash_annotations.annotation_generation.datatypes.input_output_info import InputOutputInfo +from pash_annotations.annotation_generation.datatypes.parallelizability_info import ParallelizabilityInfo +from pash_annotations.datatypes.command_invocation_initial import CommandInvocationInitial +from pash_annotations.parser.parser import parse parser = argparse.ArgumentParser() # add arguments to the parser -parser.add_argument('--command_invocation', metavar='STRING', type=str, required=True, - help='specifies the command invocation to check (enclosed by \")') -parser.add_argument('--save_to', metavar='FILE', type=str, default=None, - help='store output in file (relative to where the script is called from); ' - 'will not overwrite existing files but then print instead') +parser.add_argument( + "--command_invocation", + metavar="STRING", + type=str, + required=True, + help='specifies the command invocation to check (enclosed by ")', +) +parser.add_argument( + "--save_to", + metavar="FILE", + type=str, + default=None, + help="store output in file (relative to where the script is called from); " + "will not overwrite existing files but then print instead", +) # parse the arguments options = parser.parse_args() script_path = __file__ -split_path = script_path.rpartition('/') +split_path = script_path.rpartition("/") script_prefix = "".join(split_path[:2]) where_to_save = options.save_to @@ -38,7 +50,9 @@ except IOError: shall_we_write_to_file = False if not shall_we_write_to_file: - print("There exists a file on the provided path so the result will be output (only). ") + print( + "There exists a file on the provided path so the result will be output (only). " + ) result = "" @@ -49,18 +63,26 @@ result += str(command_invocation) + "\n" result += ">>> INPUT-OUTPUT INFORMATION (applied to command invocation if possible): \n" -io_info: Optional[InputOutputInfo] = get_input_output_info_from_cmd_invocation(command_invocation) +io_info: Optional[InputOutputInfo] = get_input_output_info_from_cmd_invocation( + command_invocation +) if io_info is None: result += f"Information not provided so considered side-effectful." elif io_info.has_other_outputs(): result += f"Provided command has outputs other than streaming." else: - command_invocation_with_io = io_info.apply_input_output_info_to_command_invocation(command_invocation) + command_invocation_with_io = io_info.apply_input_output_info_to_command_invocation( + command_invocation + ) result += str(command_invocation_with_io) result += "\n" -para_info: Optional[ParallelizabilityInfo] = get_parallelizability_info_from_cmd_invocation(command_invocation) +para_info: Optional[ + ParallelizabilityInfo +] = get_parallelizability_info_from_cmd_invocation(command_invocation) if para_info is None: - para_info = ParallelizabilityInfo() # defaults to no parallelizer's and all properties False + para_info = ( + ParallelizabilityInfo() + ) # defaults to no parallelizer's and all properties False result += ">>> PARALLELIZABILITY INFORMATION: \n" # TODO: change representation when we move commutativity into parallelizers result += str(para_info) diff --git a/pash_annotations/annotation_generation/AnnotationGeneration.py b/pash_annotations/annotation_generation/AnnotationGeneration.py deleted file mode 100644 index 21b721a..0000000 --- a/pash_annotations/annotation_generation/AnnotationGeneration.py +++ /dev/null @@ -1,98 +0,0 @@ -import os -import sys -from typing import Optional - -from pash_annotations.datatypes.CommandInvocationInitial import CommandInvocationInitial -from pash_annotations.annotation_generation.datatypes.InputOutputInfo import InputOutputInfo -from pash_annotations.annotation_generation.datatypes.ParallelizabilityInfo import ParallelizabilityInfo - -### directory paths -ROOT_DIR = os.path.realpath(os.path.join(os.path.dirname(__file__), '../..')) -ANNOTATION_GENERATORS = "pash_annotations.annotation_generation.annotation_generators" - -DICT_CMD_NAME_TO_REPRESENTATION_IN_MODULE_NAMES = { - "alt_bigrams_aux": "AltBigramsAux", - "alt_bigram_aux_reduce": "AltBigramAuxReduce", - "awk": "Awk", - "bigrams_aux": "BigramsAux", - "bigram_aux_map": "BigramAuxMap", - "bigram_aux_reduce": "BigramAuxReduce", - "cat": "Cat", - "col": "Col", - "comm": "Comm", - "custom_sort": "CustomSort", - "custom_tr": "CustomTr", - "cut": "Cut", - "diff": "Diff", - "grep": "Grep", - "head": "Head", - "mkfifo": "Mkfifo", - "mv": "Mv", - "rm": "Rm", - "sed": "Sed", - "set_diff": "SetDiff", - "seq": "Seq", - "sort": "Sort", - "tail": "Tail", - "tee": "Tee", - "test_one": "TestOne", - "test_two": "TestTwo", - "tr": "Tr", - "uniq": "Uniq", - "wc": "Wc", - "xargs": "Xargs" -} - -INPUTOUTPUT_INFO_FILENAME_MODULE_PREFIX = "InputOutputInfoGenerator" -inputoutput_info_generator_prefix_abs = ANNOTATION_GENERATORS + '.' + INPUTOUTPUT_INFO_FILENAME_MODULE_PREFIX -inputoutput_info_generator_file_module_names = \ - [(inputoutput_info_generator_prefix_abs + name, INPUTOUTPUT_INFO_FILENAME_MODULE_PREFIX + name) - for name in DICT_CMD_NAME_TO_REPRESENTATION_IN_MODULE_NAMES.values()] - -PARALLELIZABILITY_INFO_FILENAME_MODULE_PREFIX = "ParallelizabilityInfoGenerator" -parallelizability_info_generator_prefix_abs = ANNOTATION_GENERATORS + '.' + PARALLELIZABILITY_INFO_FILENAME_MODULE_PREFIX -parallelizability_info_generator_file_and_module_names = \ - [(parallelizability_info_generator_prefix_abs + name, PARALLELIZABILITY_INFO_FILENAME_MODULE_PREFIX + name) - for name in DICT_CMD_NAME_TO_REPRESENTATION_IN_MODULE_NAMES.values()] - -for FILENAME_MODULE_PAIR in inputoutput_info_generator_file_module_names + \ - parallelizability_info_generator_file_and_module_names: - FILENAME, MODULE = FILENAME_MODULE_PAIR - import_str = "from " + FILENAME + " import " + MODULE - try: - exec(import_str) - except ModuleNotFoundError: - pass # it's fine if some do not exist, we catch that later - - -# cannot be merged due to types -def get_input_output_info_from_cmd_invocation(cmd_invocation : CommandInvocationInitial) -> Optional[InputOutputInfo]: - # Get the Generator, info_generator_class_for_cmd_repr, info_generator_class_for_cmd_repr - info_generator_class_for_cmd_repr = DICT_CMD_NAME_TO_REPRESENTATION_IN_MODULE_NAMES.get(cmd_invocation.cmd_name) - try: - info_generator_class_for_cmd = str_to_class(str(INPUTOUTPUT_INFO_FILENAME_MODULE_PREFIX) + str(info_generator_class_for_cmd_repr)) - # Initialize the info generator object - info_generator_object = info_generator_class_for_cmd(cmd_invocation) - # Generate info - info_generator_object.generate_info() - return info_generator_object.get_info() - except Exception as sth: # module does not exist - return None - -def get_parallelizability_info_from_cmd_invocation(cmd_invocation : CommandInvocationInitial) -> Optional[ParallelizabilityInfo]: - # Get the Generator - info_generator_class_for_cmd_repr = str(PARALLELIZABILITY_INFO_FILENAME_MODULE_PREFIX) + str(DICT_CMD_NAME_TO_REPRESENTATION_IN_MODULE_NAMES.get(cmd_invocation.cmd_name)) - try: - info_generator_class_for_cmd = str_to_class(info_generator_class_for_cmd_repr) - # Initialize the info generator object - # TODO: be more rigorous and allow empty parallelization annotation: return ParallelizabilityInfo with [] as default - info_generator_object = info_generator_class_for_cmd(cmd_invocation) - # Generate info - info_generator_object.generate_info() - return info_generator_object.get_info() - except Exception: # module does not exist - return None - - -def str_to_class(classname): - return getattr(sys.modules[__name__], classname) diff --git a/pash_annotations/annotation_generation/annotation_generation.py b/pash_annotations/annotation_generation/annotation_generation.py new file mode 100644 index 0000000..3af82d1 --- /dev/null +++ b/pash_annotations/annotation_generation/annotation_generation.py @@ -0,0 +1,110 @@ +import os +import importlib +from typing import Optional + +from collections import namedtuple + +from pash_annotations.datatypes.command_invocation_initial import ( + CommandInvocationInitial, +) +from pash_annotations.annotation_generation.datatypes.input_output_info import ( + InputOutputInfo, +) +from pash_annotations.annotation_generation.datatypes.parallelizability_info import ( + ParallelizabilityInfo, +) + +### directory paths +ANNOTATION_GENERATORS = "pash_annotations.annotation_generation.annotation_generators" +CMD_NAMES = [ + "alt_bigrams_aux", + "alt_bigram_aux_reduce", + "awk", + "bigrams_aux", + "bigram_aux_map", + "bigram_aux_reduce", + "cat", + "col", + "comm", + "custom_sort", + "custom_tr", + "cut", + "diff", + "grep", + "head", + "mkfifo", + "mv", + "rm", + "sed", + "set_diff", + "seq", + "sort", + "tail", + "tee", + "test_one", + "test_two", + "tr", + "uniq", + "xarg", + "wc", +] + +IO_INFO_PREFIX = "input_output_info_generator" +PAR_INFO_PREFIX = "parallelizability_info_generator" + +FileModulePair = namedtuple("FileModulePair", ["file", "module"]) + + +class AnnotationGenerator: + # cannot be merged due to types + def get_input_output_info_from_cmd_invocation( + self, + cmd_invocation: CommandInvocationInitial, + ) -> Optional[InputOutputInfo]: + try: + # Get the Generator, info_generator_class_for_cmd_repr, info_generator_class_for_cmd_repr + cmd = cmd_invocation.cmd_name + module = importlib.import_module( + f"{ANNOTATION_GENERATORS}.{IO_INFO_PREFIX}_{cmd}" + ) + info_gen_class = getattr( + module, self.to_pascal_case(IO_INFO_PREFIX) + self.to_pascal_case(cmd) + ) + # Initialize the info generator object + info_generator_object = info_gen_class(cmd_invocation) + # Generate info + info_generator_object.generate_info() + return info_generator_object.get_info() + except: + return None + + def get_parallelizability_info_from_cmd_invocation( + self, + cmd_invocation: CommandInvocationInitial, + ) -> Optional[ParallelizabilityInfo]: + # Get the Generator + try: + cmd = cmd_invocation.cmd_name + module = importlib.import_module( + f"{ANNOTATION_GENERATORS}.{PAR_INFO_PREFIX}_{cmd}" + ) + print(module) + info_gen_class = getattr( + module, self.to_pascal_case(PAR_INFO_PREFIX) + self.to_pascal_case(cmd) + ) + # Initialize the info generator object + info_generator_object = info_gen_class(cmd_invocation) + # Initialize the info generator object + # TODO: be more rigorous and allow empty parallelization annotation: return ParallelizabilityInfo with [] as default + # Generate info + info_generator_object.generate_info() + return info_generator_object.get_info() + except: + return None + + @staticmethod + def to_pascal_case(string: str) -> str: + """ + Turns a snake_case string to PascalCase + """ + return string.replace("_", " ").title().replace(" ", "") diff --git a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorAltBigramsAux.py b/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorAltBigramsAux.py deleted file mode 100644 index 2cccf74..0000000 --- a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorAltBigramsAux.py +++ /dev/null @@ -1,15 +0,0 @@ -from pash_annotations.annotation_generation.annotation_generators.ParallelizabilityInfoGenerator_Interface import ParallelizabilityInfoGeneratorInterface -from pash_annotations.annotation_generation.datatypes.parallelizability.AggregatorSpec import \ - make_aggregator_spec_custom_2_ary_from_string_representation -from pash_annotations.annotation_generation.datatypes.parallelizability.MapperSpec import \ - make_mapper_spec_custom_from_string_representation -from pash_annotations.annotation_generation.datatypes.parallelizability.Parallelizer import make_parallelizer_round_robin, \ - make_parallelizer_consec_chunks - -class ParallelizabilityInfoGeneratorAltBigramsAux(ParallelizabilityInfoGeneratorInterface): - - def generate_info(self) -> None: - mapper_spec = make_mapper_spec_custom_from_string_representation("alt_bigrams_aux", is_implemented=True) - agg_spec = make_aggregator_spec_custom_2_ary_from_string_representation("alt_bigram_aux_reduce", is_implemented=True) - parallelizer_cc = make_parallelizer_consec_chunks(mapper_spec=mapper_spec, aggregator_spec=agg_spec) - self.append_to_parallelizer_list(parallelizer_cc) diff --git a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorCustomSort.py b/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorCustomSort.py deleted file mode 100644 index b614186..0000000 --- a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorCustomSort.py +++ /dev/null @@ -1,28 +0,0 @@ -from pash_annotations.annotation_generation.annotation_generators.ParallelizabilityInfoGenerator_Interface import ParallelizabilityInfoGeneratorInterface -from pash_annotations.annotation_generation.datatypes.parallelizability.AggregatorSpec import \ - make_aggregator_spec_custom_2_ary_from_cmd_inv_with_transformers -from pash_annotations.annotation_generation.datatypes.parallelizability.Parallelizer import \ - make_parallelizer_indiv_files, make_parallelizer_consec_chunks -from pash_annotations.annotation_generation.datatypes.parallelizability.TransformerFlagOptionList import TransformerFlagOptionListFilter, \ - TransformerFlagOptionListAdd, ChainTransformerFlagOptionList -from pash_annotations.datatypes.BasicDatatypes import Flag, Option, ArgStringType - - -class ParallelizabilityInfoGeneratorCustomSort(ParallelizabilityInfoGeneratorInterface): - - def generate_info(self) -> None: - # copied from sort - self.set_commutative() - if self.does_flag_option_list_contain_at_least_one_of(["-c", "-C", "-u", "-z", "-R", "-s", "-m", "--files0-from", "--random-source"]): - pass # no parallelization - else: - # Build aggregator spec: keep certain flags with filtering and add -m - transformer_filter = TransformerFlagOptionListFilter(["-b", "-d", "-f", "-g", "-i", "-M", "-h", "-n", "-r", "--sort", "-V", "-k", "-t"]) - transformer_add = TransformerFlagOptionListAdd([Flag("-m")]) - chained_transformers = ChainTransformerFlagOptionList([transformer_filter, transformer_add]) - # TODO: change this to n instead of 2 but we keep this for testing aggregator trees for now - aggregator_spec = make_aggregator_spec_custom_2_ary_from_cmd_inv_with_transformers( - flag_option_list_transformer=chained_transformers, is_implemented=True) - # Build parallelizers and append - parallelizer_cc_seq_cus = make_parallelizer_consec_chunks(aggregator_spec=aggregator_spec) - self.append_to_parallelizer_list(parallelizer_cc_seq_cus) diff --git a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorSed.py b/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorSed.py deleted file mode 100644 index 8ee05c2..0000000 --- a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorSed.py +++ /dev/null @@ -1,17 +0,0 @@ -from pash_annotations.annotation_generation.annotation_generators.ParallelizabilityInfoGenerator_Interface import ParallelizabilityInfoGeneratorInterface -from pash_annotations.annotation_generation.datatypes.parallelizability.Parallelizer import \ - make_parallelizer_round_robin -from pash_annotations.annotation_generation.datatypes.parallelizability.AggregatorSpec import \ - make_aggregator_spec_adj_lines_func_from_string_representation - - -class ParallelizabilityInfoGeneratorSed(ParallelizabilityInfoGeneratorInterface): - - def generate_info(self) -> None: - # TODO: Logic copied from PaSh, but does this not depend how the script is given? - if self.does_first_operand_start_with("-") \ - or self.does_first_operand_start_with("s") \ - or not (self.does_first_operand_contain("d") or self.does_first_operand_contain("q")): - self.append_to_parallelizer_list_cc_seq_conc() - self.append_to_parallelizer_list_rr_seq_conc() - diff --git a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorSort.py b/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorSort.py deleted file mode 100644 index c6eadf0..0000000 --- a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorSort.py +++ /dev/null @@ -1,29 +0,0 @@ -from pash_annotations.annotation_generation.annotation_generators.ParallelizabilityInfoGenerator_Interface import ParallelizabilityInfoGeneratorInterface -from pash_annotations.annotation_generation.datatypes.parallelizability.AggregatorSpec import \ - make_aggregator_spec_custom_2_ary_from_cmd_inv_with_transformers -from pash_annotations.annotation_generation.datatypes.parallelizability.Parallelizer import \ - make_parallelizer_indiv_files, make_parallelizer_consec_chunks -from pash_annotations.annotation_generation.datatypes.parallelizability.TransformerFlagOptionList import TransformerFlagOptionListFilter, \ - TransformerFlagOptionListAdd, ChainTransformerFlagOptionList -from pash_annotations.datatypes.BasicDatatypes import Flag, Option, ArgStringType - - -class ParallelizabilityInfoGeneratorSort(ParallelizabilityInfoGeneratorInterface): - - # Which ones do affect parallelizability? - - def generate_info(self) -> None: - self.set_commutative() - if self.does_flag_option_list_contain_at_least_one_of(["-c", "-C", "-u", "-z", "-R", "-s", "-m", "--files0-from", "--random-source"]): - pass # no parallelization - else: - # Build aggregator spec: keep certain flags with filtering and add -m - transformer_filter = TransformerFlagOptionListFilter(["-b", "-d", "-f", "-g", "-i", "-M", "-h", "-n", "-r", "--sort", "-V", "-k", "-t"]) - transformer_add = TransformerFlagOptionListAdd([Flag("-m")]) - chained_transformers = ChainTransformerFlagOptionList([transformer_filter, transformer_add]) - # TODO: change this to n instead of 2 but we keep this for testing aggregator trees for now - aggregator_spec = make_aggregator_spec_custom_2_ary_from_cmd_inv_with_transformers( - flag_option_list_transformer=chained_transformers, is_implemented=True) - # Build parallelizers and append - parallelizer_cc_seq_cus = make_parallelizer_consec_chunks(aggregator_spec=aggregator_spec) - self.append_to_parallelizer_list(parallelizer_cc_seq_cus) diff --git a/pash_annotations/annotation_generation/annotation_generators/Generator_Interface.py b/pash_annotations/annotation_generation/annotation_generators/generator_interface.py similarity index 66% rename from pash_annotations/annotation_generation/annotation_generators/Generator_Interface.py rename to pash_annotations/annotation_generation/annotation_generators/generator_interface.py index 5f6cb1e..230475d 100644 --- a/pash_annotations/annotation_generation/annotation_generators/Generator_Interface.py +++ b/pash_annotations/annotation_generation/annotation_generators/generator_interface.py @@ -1,11 +1,13 @@ from typing import List from abc import ABC, abstractmethod -from pash_annotations.datatypes.BasicDatatypes import FlagOption -from pash_annotations.datatypes.CommandInvocationInitial import CommandInvocationInitial +from pash_annotations.datatypes.basic_datatypes import FlagOption +from pash_annotations.datatypes.command_invocation_initial import ( + CommandInvocationInitial, +) -class Generator_Interface(ABC): +class Generator_Interface(ABC): def __init__(self, cmd_invocation: CommandInvocationInitial) -> None: self.cmd_inv: CommandInvocationInitial = cmd_invocation @@ -19,11 +21,19 @@ def get_info(self): ## HELPERS/Library functions: to check conditions - def does_flag_option_list_contain_at_least_one_of(self, list_names: List[str]) -> bool: + def does_flag_option_list_contain_at_least_one_of( + self, list_names: List[str] + ) -> bool: return len(self.get_flag_option_list_filtered_with(list_names)) > 0 - def get_flag_option_list_filtered_with(self, list_names: List[str]) -> List[FlagOption]: - return [flagoption for flagoption in self.cmd_inv.flag_option_list if flagoption.get_name() in list_names] + def get_flag_option_list_filtered_with( + self, list_names: List[str] + ) -> List[FlagOption]: + return [ + flagoption + for flagoption in self.cmd_inv.flag_option_list + if flagoption.get_name() in list_names + ] def get_operand_list_length(self): return len(self.cmd_inv.operand_list) @@ -41,4 +51,4 @@ def does_first_operand_start_with(self, arg): def does_first_operand_contain(self, arg): first_operand_name = self.get_first_operand_name_as_string() - return (arg in first_operand_name) + return arg in first_operand_name diff --git a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorAltBigramAuxReduce.py b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_alt_bigram_aux_reduce.py similarity index 77% rename from pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorAltBigramAuxReduce.py rename to pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_alt_bigram_aux_reduce.py index 58b4972..4d13df1 100644 --- a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorAltBigramAuxReduce.py +++ b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_alt_bigram_aux_reduce.py @@ -1,8 +1,7 @@ -from pash_annotations.annotation_generation.annotation_generators.InputOutputInfoGenerator_Interface import InputOutputInfoGeneratorInterface +from pash_annotations.annotation_generation.annotation_generators.input_output_info_generator_interface import InputOutputInfoGeneratorInterface class InputOutputInfoGeneratorAltBigramAuxReduce(InputOutputInfoGeneratorInterface): - def generate_info(self) -> None: self.all_operands_are_streaming_inputs() self.set_implicit_use_of_stdout() diff --git a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorAltBigramsAux.py b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_alt_bigrams_aux.py similarity index 76% rename from pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorAltBigramsAux.py rename to pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_alt_bigrams_aux.py index a2c653d..03a58a8 100644 --- a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorAltBigramsAux.py +++ b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_alt_bigrams_aux.py @@ -1,8 +1,7 @@ -from pash_annotations.annotation_generation.annotation_generators.InputOutputInfoGenerator_Interface import InputOutputInfoGeneratorInterface +from pash_annotations.annotation_generation.annotation_generators.input_output_info_generator_interface import InputOutputInfoGeneratorInterface class InputOutputInfoGeneratorAltBigramsAux(InputOutputInfoGeneratorInterface): - def generate_info(self) -> None: self.set_implicit_use_of_stdin() self.set_implicit_use_of_stdout() diff --git a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorAwk.py b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_awk.py similarity index 81% rename from pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorAwk.py rename to pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_awk.py index 5a43f70..3505b7c 100644 --- a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorAwk.py +++ b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_awk.py @@ -1,8 +1,7 @@ -from pash_annotations.annotation_generation.annotation_generators.InputOutputInfoGenerator_Interface import InputOutputInfoGeneratorInterface +from pash_annotations.annotation_generation.annotation_generators.input_output_info_generator_interface import InputOutputInfoGeneratorInterface class InputOutputInfoGeneratorAwk(InputOutputInfoGeneratorInterface): - def generate_info(self) -> None: self.set_implicit_use_of_stdout() @@ -11,7 +10,7 @@ def generate_info(self) -> None: if self.get_operand_list_length() == 0: self.set_implicit_use_of_stdin() else: - self.all_operands_are_streaming_inputs() # this is true also if empty + self.all_operands_are_streaming_inputs() # this is true also if empty else: self.set_first_operand_as_config_arg_type_string() if self.get_operand_list_length() == 1: diff --git a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorBigramsAux.py b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_bigrams_aux.py similarity index 76% rename from pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorBigramsAux.py rename to pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_bigrams_aux.py index 7aa5110..2d9422c 100644 --- a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorBigramsAux.py +++ b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_bigrams_aux.py @@ -1,7 +1,7 @@ -from pash_annotations.annotation_generation.annotation_generators.InputOutputInfoGenerator_Interface import InputOutputInfoGeneratorInterface +from pash_annotations.annotation_generation.annotation_generators.input_output_info_generator_interface import InputOutputInfoGeneratorInterface -class InputOutputInfoGeneratorBigramsAux(InputOutputInfoGeneratorInterface): +class InputOutputInfoGeneratorBigramsAux(InputOutputInfoGeneratorInterface): def generate_info(self) -> None: self.set_implicit_use_of_stdin() self.set_implicit_use_of_stdout() diff --git a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorCat.py b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_cat.py similarity index 88% rename from pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorCat.py rename to pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_cat.py index f245396..b21e049 100644 --- a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorCat.py +++ b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_cat.py @@ -1,8 +1,7 @@ -from pash_annotations.annotation_generation.annotation_generators.InputOutputInfoGenerator_Interface import InputOutputInfoGeneratorInterface +from pash_annotations.annotation_generation.annotation_generators.input_output_info_generator_interface import InputOutputInfoGeneratorInterface class InputOutputInfoGeneratorCat(InputOutputInfoGeneratorInterface): - # list_of_all_flags = ["-A", "-b", "-e", "-E", "--number", "-s", "t", "-T", "-u", "-v", "--help", "--version"] # list_of_all_options = [] diff --git a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorCol.py b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_col.py similarity index 75% rename from pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorCol.py rename to pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_col.py index 29edbae..184c68b 100644 --- a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorCol.py +++ b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_col.py @@ -1,8 +1,7 @@ -from pash_annotations.annotation_generation.annotation_generators.InputOutputInfoGenerator_Interface import InputOutputInfoGeneratorInterface +from pash_annotations.annotation_generation.annotation_generators.input_output_info_generator_interface import InputOutputInfoGeneratorInterface class InputOutputInfoGeneratorCol(InputOutputInfoGeneratorInterface): - def generate_info(self) -> None: self.set_implicit_use_of_stdin() self.set_implicit_use_of_stdout() diff --git a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorComm.py b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_comm.py similarity index 78% rename from pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorComm.py rename to pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_comm.py index 6cd80d8..f415b6f 100644 --- a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorComm.py +++ b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_comm.py @@ -1,4 +1,4 @@ -from pash_annotations.annotation_generation.annotation_generators.InputOutputInfoGenerator_Interface import InputOutputInfoGeneratorInterface +from pash_annotations.annotation_generation.annotation_generators.input_output_info_generator_interface import InputOutputInfoGeneratorInterface class InputOutputInfoGeneratorComm(InputOutputInfoGeneratorInterface): @@ -12,5 +12,5 @@ class InputOutputInfoGeneratorComm(InputOutputInfoGeneratorInterface): def generate_info(self) -> None: self.set_implicit_use_of_stdout() - assert(self.get_operand_list_length() == 2) # needs two files to compare; + assert self.get_operand_list_length() == 2 # needs two files to compare; self.all_operands_are_streaming_inputs() diff --git a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorCustomSort.py b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_custom_sort.py similarity index 61% rename from pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorCustomSort.py rename to pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_custom_sort.py index 38ca559..50f5d10 100644 --- a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorCustomSort.py +++ b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_custom_sort.py @@ -1,13 +1,17 @@ -from pash_annotations.annotation_generation.annotation_generators.InputOutputInfoGenerator_Interface import InputOutputInfoGeneratorInterface +from pash_annotations.annotation_generation.annotation_generators.input_output_info_generator_interface import InputOutputInfoGeneratorInterface class InputOutputInfoGeneratorCustomSort(InputOutputInfoGeneratorInterface): - def generate_info(self) -> None: # copied from sort if not self.does_flag_option_list_contain_at_least_one_of(["-o"]): self.set_implicit_use_of_stdout() - if self.get_operand_list_length() == 0 and not self.does_flag_option_list_contain_at_least_one_of(["--files0-from"]): + if ( + self.get_operand_list_length() == 0 + and not self.does_flag_option_list_contain_at_least_one_of( + ["--files0-from"] + ) + ): self.set_implicit_use_of_stdin() else: self.all_operands_are_streaming_inputs() diff --git a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorCustomTr.py b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_custom_tr.py similarity index 68% rename from pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorCustomTr.py rename to pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_custom_tr.py index a31a78d..9aacd5c 100644 --- a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorCustomTr.py +++ b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_custom_tr.py @@ -1,9 +1,7 @@ -from pash_annotations.annotation_generation.annotation_generators.InputOutputInfoGenerator_Interface import InputOutputInfoGeneratorInterface -from pash_annotations.datatypes.BasicDatatypes import ArgStringType +from pash_annotations.annotation_generation.annotation_generators.input_output_info_generator_interface import InputOutputInfoGeneratorInterface class InputOutputInfoGeneratorCustomTr(InputOutputInfoGeneratorInterface): - def generate_info(self) -> None: self.set_implicit_use_of_stdin() self.set_implicit_use_of_stdout() diff --git a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorCut.py b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_cut.py similarity index 88% rename from pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorCut.py rename to pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_cut.py index b9aa2ed..81b2097 100644 --- a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorCut.py +++ b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_cut.py @@ -1,8 +1,7 @@ -from pash_annotations.annotation_generation.annotation_generators.InputOutputInfoGenerator_Interface import InputOutputInfoGeneratorInterface +from pash_annotations.annotation_generation.annotation_generators.input_output_info_generator_interface import InputOutputInfoGeneratorInterface class InputOutputInfoGeneratorCut(InputOutputInfoGeneratorInterface): - # list_of_all_flags = ["-n", "--complement", "-s", "-z", "--help", "--version"] # list_of_all_options = ["-b", "-c", "-d", "-f", "--output-delimiter", ] diff --git a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorDiff.py b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_diff.py similarity index 62% rename from pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorDiff.py rename to pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_diff.py index b85e282..a2c01f0 100644 --- a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorDiff.py +++ b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_diff.py @@ -1,8 +1,7 @@ -from pash_annotations.annotation_generation.annotation_generators.InputOutputInfoGenerator_Interface import InputOutputInfoGeneratorInterface +from pash_annotations.annotation_generation.annotation_generators.input_output_info_generator_interface import InputOutputInfoGeneratorInterface class InputOutputInfoGeneratorDiff(InputOutputInfoGeneratorInterface): - def generate_info(self) -> None: self.set_implicit_use_of_stdout() - self.all_operands_are_streaming_inputs() \ No newline at end of file + self.all_operands_are_streaming_inputs() diff --git a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorGrep.py b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_grep.py similarity index 91% rename from pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorGrep.py rename to pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_grep.py index b3640b7..3b94d07 100644 --- a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorGrep.py +++ b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_grep.py @@ -1,9 +1,7 @@ -from pash_annotations.annotation_generation.annotation_generators.InputOutputInfoGenerator_Interface import InputOutputInfoGeneratorInterface -from pash_annotations.datatypes.BasicDatatypes import Operand +from pash_annotations.annotation_generation.annotation_generators.input_output_info_generator_interface import InputOutputInfoGeneratorInterface class InputOutputInfoGeneratorGrep(InputOutputInfoGeneratorInterface): - # list_of_all_flags = ["-V", "--help", "-E", "-F", "-G", "-P", "-i", "--no-ignore-case", "-v", "-w", # "-x", "-y", "-c", "-L", "-l", "-o", "-q", "-s", "-b", "-H", "-h", "-n", "-T", "-Z", # "--no-group-separator", "-a", "-I", "-r", "-R", "--line-buffered", "-U", "-z"] @@ -25,7 +23,7 @@ def generate_info(self) -> None: if self.get_operand_list_length() == 0: self.set_implicit_use_of_stdin() else: - self.all_operands_are_streaming_inputs() # this is true also if empty + self.all_operands_are_streaming_inputs() # this is true also if empty else: self.set_first_operand_as_config_arg_type_string() if self.get_operand_list_length() == 1: diff --git a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorHead.py b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_head.py similarity index 85% rename from pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorHead.py rename to pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_head.py index 00a9420..d043ec8 100644 --- a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorHead.py +++ b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_head.py @@ -1,8 +1,7 @@ -from pash_annotations.annotation_generation.annotation_generators.InputOutputInfoGenerator_Interface import InputOutputInfoGeneratorInterface +from pash_annotations.annotation_generation.annotation_generators.input_output_info_generator_interface import InputOutputInfoGeneratorInterface class InputOutputInfoGeneratorHead(InputOutputInfoGeneratorInterface): - # list_of_all_flags = ["-q", "-v", "-z", "--help", "--version"] # list_of_all_options = ["-c", "-n"] diff --git a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGenerator_Interface.py b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_interface.py similarity index 63% rename from pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGenerator_Interface.py rename to pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_interface.py index d34b4b8..d8f2043 100644 --- a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGenerator_Interface.py +++ b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_interface.py @@ -2,37 +2,46 @@ from pash_annotations.config.definitions import INDICATORS_FOR_FILENAMES -from pash_annotations.datatypes.BasicDatatypes import Flag, Option, WhichClassForArg -from pash_annotations.datatypes.AccessKind import AccessKind, get_access_from_string -from pash_annotations.datatypes.CommandInvocationInitial import CommandInvocationInitial -from pash_annotations.datatypes.CommandInvocationWithIO import CommandInvocationWithIO - -from pash_annotations.annotation_generation.annotation_generators.Generator_Interface import Generator_Interface +from pash_annotations.datatypes.basic_datatypes import Flag, Option, WhichClassForArg +from pash_annotations.datatypes.access_kind import AccessKind, get_access_from_string +from pash_annotations.datatypes.command_invocation_initial import ( + CommandInvocationInitial, +) +from pash_annotations.datatypes.command_invocation_with_io import ( + CommandInvocationWithIO, +) + +from pash_annotations.annotation_generation.annotation_generators.generator_interface import ( + Generator_Interface, +) from abc import ABC, abstractmethod -from pash_annotations.annotation_generation.datatypes.InputOutputInfo import InputOutputInfo +from pash_annotations.annotation_generation.datatypes.input_output_info import ( + InputOutputInfo, +) from pash_annotations.parser.util_parser import get_json_data class InputOutputInfoGeneratorInterface(Generator_Interface, ABC): - # here, we only need to specify information about operands and implicitly used resources # information about option arguments are provided by parsing infrastructure # ASSUMPTION: No implicit information should be exploited since internal implementation may change - def __init__(self, cmd_invocation: CommandInvocationInitial) -> None: super().__init__(cmd_invocation=cmd_invocation) - flagoption_list_typer: List[Union[Tuple[Literal[WhichClassForArg.FILESTD], AccessKind], - Tuple[Literal[WhichClassForArg.ARGSTRING], None], - Tuple[Literal[WhichClassForArg.PLAINSTRING], None]]] \ - = self.get_flagoption_list_typer_for_specific_list() + flagoption_list_typer: List[ + Union[ + Tuple[Literal[WhichClassForArg.FILESTD], AccessKind], + Tuple[Literal[WhichClassForArg.ARGSTRING], None], + Tuple[Literal[WhichClassForArg.PLAINSTRING], None], + ] + ] = self.get_flagoption_list_typer_for_specific_list() self.input_output_info: InputOutputInfo = InputOutputInfo( - flagoption_list_typer=flagoption_list_typer, - number_of_operands=len(cmd_invocation.operand_list) - ) + flagoption_list_typer=flagoption_list_typer, + number_of_operands=len(cmd_invocation.operand_list), + ) @abstractmethod def generate_info(self) -> None: @@ -41,29 +50,57 @@ def generate_info(self) -> None: def get_info(self) -> InputOutputInfo: return self.input_output_info - def get_cmd_inv_with_io(self, cmd_inv: CommandInvocationInitial) -> CommandInvocationWithIO: - return self.input_output_info.apply_input_output_info_to_command_invocation(cmd_inv) - - def get_flagoption_list_typer_for_specific_list(self) -> \ - List[Union[Tuple[Literal[WhichClassForArg.FILESTD], AccessKind], - Tuple[Literal[WhichClassForArg.ARGSTRING], None], - Tuple[Literal[WhichClassForArg.PLAINSTRING], None]]]: - dict_option_to_class_for_arg: Dict[str, Union[Tuple[Literal[WhichClassForArg.FILESTD], AccessKind], - Tuple[Literal[WhichClassForArg.ARGSTRING], None]]] = self.get_dict_option_to_class_for_arg() + def get_cmd_inv_with_io( + self, cmd_inv: CommandInvocationInitial + ) -> CommandInvocationWithIO: + return self.input_output_info.apply_input_output_info_to_command_invocation( + cmd_inv + ) + + def get_flagoption_list_typer_for_specific_list( + self, + ) -> List[ + Union[ + Tuple[Literal[WhichClassForArg.FILESTD], AccessKind], + Tuple[Literal[WhichClassForArg.ARGSTRING], None], + Tuple[Literal[WhichClassForArg.PLAINSTRING], None], + ] + ]: + dict_option_to_class_for_arg: Dict[ + str, + Union[ + Tuple[Literal[WhichClassForArg.FILESTD], AccessKind], + Tuple[Literal[WhichClassForArg.ARGSTRING], None], + ], + ] = self.get_dict_option_to_class_for_arg() flagoption_list_typer = [] for flagoption in self.cmd_inv.flag_option_list: if isinstance(flagoption, Flag): flagoption_list_typer.append((WhichClassForArg.PLAINSTRING, None)) elif isinstance(flagoption, Option): - flagoption_list_typer.append(dict_option_to_class_for_arg[flagoption.get_name()]) + flagoption_list_typer.append( + dict_option_to_class_for_arg[flagoption.get_name()] + ) else: raise Exception("neither Flag nor Option") return flagoption_list_typer - def get_dict_option_to_class_for_arg(self) -> Dict[str, Union[Tuple[Literal[WhichClassForArg.FILESTD], AccessKind], - Tuple[Literal[WhichClassForArg.ARGSTRING], None]]]: - dict_option_to_class_for_arg: Dict[str, Union[Tuple[Literal[WhichClassForArg.FILESTD], AccessKind], - Tuple[Literal[WhichClassForArg.ARGSTRING], None]]] = dict() + def get_dict_option_to_class_for_arg( + self, + ) -> Dict[ + str, + Union[ + Tuple[Literal[WhichClassForArg.FILESTD], AccessKind], + Tuple[Literal[WhichClassForArg.ARGSTRING], None], + ], + ]: + dict_option_to_class_for_arg: Dict[ + str, + Union[ + Tuple[Literal[WhichClassForArg.FILESTD], AccessKind], + Tuple[Literal[WhichClassForArg.ARGSTRING], None], + ], + ] = dict() json_data = get_json_data(self.cmd_inv.cmd_name) for option_data in json_data["option"]: option_name = option_data[0] @@ -75,14 +112,25 @@ def get_dict_option_to_class_for_arg(self) -> Dict[str, Union[Tuple[Literal[Whic access: AccessKind = get_access_from_string(option_arg_access_str) if option_arg_type in INDICATORS_FOR_FILENAMES: # for now, we do not allow to have '-' for stdin in option arguments - dict_option_to_class_for_arg[option_name] = (WhichClassForArg.FILESTD, access) + dict_option_to_class_for_arg[option_name] = ( + WhichClassForArg.FILESTD, + access, + ) else: - dict_option_to_class_for_arg[option_name] = (WhichClassForArg.ARGSTRING, None) + dict_option_to_class_for_arg[option_name] = ( + WhichClassForArg.ARGSTRING, + None, + ) else: option_arg_type: str = option_arg_info - assert(not option_arg_type in INDICATORS_FOR_FILENAMES) # filenames need to declare access pattern, no default + assert ( + not option_arg_type in INDICATORS_FOR_FILENAMES + ) # filenames need to declare access pattern, no default # access: AccessKind = AccessKind.make_config_input() - dict_option_to_class_for_arg[option_name] = (WhichClassForArg.ARGSTRING, None) + dict_option_to_class_for_arg[option_name] = ( + WhichClassForArg.ARGSTRING, + None, + ) return dict_option_to_class_for_arg ## Library functions @@ -99,7 +147,6 @@ def if_no_operands_given_stdin_implicitly_used(self) -> None: if len(self.cmd_inv.operand_list) == 0: self.set_implicit_use_of_stdin(True) - # forwarded to InputOutputInfo # Assumption: streaming inputs are always filenames or stdin # Assumption: (streaming) outputs are always filenames or stdout diff --git a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorMkfifo.py b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_mkfifo.py similarity index 73% rename from pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorMkfifo.py rename to pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_mkfifo.py index f09d83c..f4141f0 100644 --- a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorMkfifo.py +++ b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_mkfifo.py @@ -1,7 +1,6 @@ -from pash_annotations.annotation_generation.annotation_generators.InputOutputInfoGenerator_Interface import InputOutputInfoGeneratorInterface +from pash_annotations.annotation_generation.annotation_generators.input_output_info_generator_interface import InputOutputInfoGeneratorInterface class InputOutputInfoGeneratorMkfifo(InputOutputInfoGeneratorInterface): - def generate_info(self) -> None: self.all_operands_are_other_outputs() diff --git a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorMv.py b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_mv.py similarity index 84% rename from pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorMv.py rename to pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_mv.py index 85d72fa..bd962fe 100644 --- a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorMv.py +++ b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_mv.py @@ -1,11 +1,9 @@ -from typing import List, Union -from pash_annotations.datatypes.BasicDatatypes import FlagOption -from pash_annotations.datatypes.BasicDatatypesWithIO import OptionWithIO -from pash_annotations.annotation_generation.annotation_generators.InputOutputInfoGenerator_Interface import InputOutputInfoGeneratorInterface +from typing import List +from pash_annotations.datatypes.basic_datatypes import FlagOption +from pash_annotations.annotation_generation.annotation_generators.input_output_info_generator_interface import InputOutputInfoGeneratorInterface class InputOutputInfoGeneratorMv(InputOutputInfoGeneratorInterface): - # list_of_all_flags = ["-b", "-f", "-i", "-n", "--strip-trailing-slashes", "-T", # "-u", "-v", "-Z", "--help", "--version"] # list_of_all_options = ["--backup", "-S", "-t"] @@ -33,7 +31,9 @@ class InputOutputInfoGeneratorMv(InputOutputInfoGeneratorInterface): def generate_info(self) -> None: # -T shall treat destination as file, not directory, not considered currently # -t gives destination directory as an argument to option and determines how operands are interpreted - list_options_t : List[FlagOption] = self.get_flag_option_list_filtered_with(["-t"]) + list_options_t: List[FlagOption] = self.get_flag_option_list_filtered_with( + ["-t"] + ) if len(list_options_t) == 0: self.all_but_last_operand_is_other_input() self.only_last_operand_is_other_output() diff --git a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorRm.py b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_rm.py similarity index 57% rename from pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorRm.py rename to pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_rm.py index 1dacfc9..ed244d5 100644 --- a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorRm.py +++ b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_rm.py @@ -1,7 +1,6 @@ -from pash_annotations.annotation_generation.annotation_generators.InputOutputInfoGenerator_Interface import InputOutputInfoGeneratorInterface +from pash_annotations.annotation_generation.annotation_generators.input_output_info_generator_interface import InputOutputInfoGeneratorInterface class InputOutputInfoGeneratorRm(InputOutputInfoGeneratorInterface): - def generate_info(self) -> None: - self.all_operands_are_other_outputs() \ No newline at end of file + self.all_operands_are_other_outputs() diff --git a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorSed.py b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_sed.py similarity index 86% rename from pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorSed.py rename to pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_sed.py index b72ed43..848d1a6 100644 --- a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorSed.py +++ b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_sed.py @@ -1,8 +1,7 @@ -from pash_annotations.annotation_generation.annotation_generators.InputOutputInfoGenerator_Interface import InputOutputInfoGeneratorInterface +from pash_annotations.annotation_generation.annotation_generators.input_output_info_generator_interface import InputOutputInfoGeneratorInterface class InputOutputInfoGeneratorSed(InputOutputInfoGeneratorInterface): - def generate_info(self) -> None: self.set_implicit_use_of_stdin() self.set_implicit_use_of_stdout() @@ -11,4 +10,3 @@ def generate_info(self) -> None: self.all_but_first_operand_is_streaming_input() else: self.all_operands_are_streaming_inputs() - diff --git a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorSeq.py b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_seq.py similarity index 77% rename from pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorSeq.py rename to pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_seq.py index f52d4c9..d097bdc 100644 --- a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorSeq.py +++ b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_seq.py @@ -1,8 +1,7 @@ -from pash_annotations.annotation_generation.annotation_generators.InputOutputInfoGenerator_Interface import InputOutputInfoGeneratorInterface +from pash_annotations.annotation_generation.annotation_generators.input_output_info_generator_interface import InputOutputInfoGeneratorInterface class InputOutputInfoGeneratorSeq(InputOutputInfoGeneratorInterface): - def generate_info(self) -> None: self.set_implicit_use_of_stdout() self.set_all_operands_as_config_arg_type_string() diff --git a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorSetDiff.py b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_set_diff.py similarity index 68% rename from pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorSetDiff.py rename to pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_set_diff.py index 4c0f0f0..ca290ec 100644 --- a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorSetDiff.py +++ b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_set_diff.py @@ -1,10 +1,9 @@ -from pash_annotations.annotation_generation.annotation_generators.InputOutputInfoGenerator_Interface import InputOutputInfoGeneratorInterface +from pash_annotations.annotation_generation.annotation_generators.input_output_info_generator_interface import InputOutputInfoGeneratorInterface class InputOutputInfoGeneratorSetDiff(InputOutputInfoGeneratorInterface): - def generate_info(self) -> None: self.set_implicit_use_of_stdin() self.set_implicit_use_of_stdout() - assert(self.get_operand_list_length() == 1) - self.set_first_operand_as_config_arg_type_filename_or_std_descriptor() \ No newline at end of file + assert self.get_operand_list_length() == 1 + self.set_first_operand_as_config_arg_type_filename_or_std_descriptor() diff --git a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorSort.py b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_sort.py similarity index 64% rename from pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorSort.py rename to pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_sort.py index 7d860c5..42701e9 100644 --- a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorSort.py +++ b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_sort.py @@ -1,15 +1,19 @@ -from pash_annotations.annotation_generation.annotation_generators.InputOutputInfoGenerator_Interface import InputOutputInfoGeneratorInterface +from pash_annotations.annotation_generation.annotation_generators.input_output_info_generator_interface import InputOutputInfoGeneratorInterface class InputOutputInfoGeneratorSort(InputOutputInfoGeneratorInterface): - # Which ones do affect input/output? def generate_info(self) -> None: # self.set_multiple_inputs_possible() if not self.does_flag_option_list_contain_at_least_one_of(["-o"]): self.set_implicit_use_of_stdout() - if self.get_operand_list_length() == 0 and not self.does_flag_option_list_contain_at_least_one_of(["--files0-from"]): + if ( + self.get_operand_list_length() == 0 + and not self.does_flag_option_list_contain_at_least_one_of( + ["--files0-from"] + ) + ): self.set_implicit_use_of_stdin() else: self.all_operands_are_streaming_inputs() diff --git a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorTail.py b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_tail.py similarity index 88% rename from pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorTail.py rename to pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_tail.py index 3b3f1b9..860f68e 100644 --- a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorTail.py +++ b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_tail.py @@ -1,4 +1,4 @@ -from pash_annotations.annotation_generation.annotation_generators.InputOutputInfoGenerator_Interface import InputOutputInfoGeneratorInterface +from pash_annotations.annotation_generation.annotation_generators.input_output_info_generator_interface import InputOutputInfoGeneratorInterface class InputOutputInfoGeneratorTail(InputOutputInfoGeneratorInterface): diff --git a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorTee.py b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_tee.py similarity index 69% rename from pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorTee.py rename to pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_tee.py index 68b7705..cc71ae7 100644 --- a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorTee.py +++ b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_tee.py @@ -1,10 +1,9 @@ -from pash_annotations.annotation_generation.annotation_generators.InputOutputInfoGenerator_Interface import InputOutputInfoGeneratorInterface +from pash_annotations.annotation_generation.annotation_generators.input_output_info_generator_interface import InputOutputInfoGeneratorInterface class InputOutputInfoGeneratorTee(InputOutputInfoGeneratorInterface): - def generate_info(self) -> None: self.set_implicit_use_of_stdin() # copies to both operands and stdout self.set_implicit_use_of_stdout() - self.all_operands_are_streaming_outputs() \ No newline at end of file + self.all_operands_are_streaming_outputs() diff --git a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorTestOne.py b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_test_one.py similarity index 76% rename from pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorTestOne.py rename to pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_test_one.py index a77c966..91928bd 100644 --- a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorTestOne.py +++ b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_test_one.py @@ -1,8 +1,7 @@ -from pash_annotations.annotation_generation.annotation_generators.InputOutputInfoGenerator_Interface import InputOutputInfoGeneratorInterface +from pash_annotations.annotation_generation.annotation_generators.input_output_info_generator_interface import InputOutputInfoGeneratorInterface class InputOutputInfoGeneratorTestOne(InputOutputInfoGeneratorInterface): - def generate_info(self) -> None: self.set_implicit_use_of_stdin() self.set_implicit_use_of_stdout() diff --git a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorTestTwo.py b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_test_two.py similarity index 76% rename from pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorTestTwo.py rename to pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_test_two.py index f584d2b..aa417b5 100644 --- a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorTestTwo.py +++ b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_test_two.py @@ -1,8 +1,7 @@ -from pash_annotations.annotation_generation.annotation_generators.InputOutputInfoGenerator_Interface import InputOutputInfoGeneratorInterface +from pash_annotations.annotation_generation.annotation_generators.input_output_info_generator_interface import InputOutputInfoGeneratorInterface class InputOutputInfoGeneratorTestTwo(InputOutputInfoGeneratorInterface): - def generate_info(self) -> None: self.set_implicit_use_of_stdin() self.set_implicit_use_of_stdout() diff --git a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorTr.py b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_tr.py similarity index 78% rename from pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorTr.py rename to pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_tr.py index 85917c8..305d85b 100644 --- a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorTr.py +++ b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_tr.py @@ -1,9 +1,7 @@ -from pash_annotations.annotation_generation.annotation_generators.InputOutputInfoGenerator_Interface import InputOutputInfoGeneratorInterface -from pash_annotations.datatypes.BasicDatatypes import ArgStringType +from pash_annotations.annotation_generation.annotation_generators.input_output_info_generator_interface import InputOutputInfoGeneratorInterface class InputOutputInfoGeneratorTr(InputOutputInfoGeneratorInterface): - # list_of_all_flags = ["-c", "-d", "-s", "-t", "--help", "--version", # list_of_all_options = [] diff --git a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorUniq.py b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_uniq.py similarity index 87% rename from pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorUniq.py rename to pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_uniq.py index 5024561..0a70dad 100644 --- a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorUniq.py +++ b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_uniq.py @@ -1,8 +1,7 @@ -from pash_annotations.annotation_generation.annotation_generators.InputOutputInfoGenerator_Interface import InputOutputInfoGeneratorInterface +from pash_annotations.annotation_generation.annotation_generators.input_output_info_generator_interface import InputOutputInfoGeneratorInterface class InputOutputInfoGeneratorUniq(InputOutputInfoGeneratorInterface): - # list_of_all_flags = ["-c", "-d", "-D", "-i", "-u", "-z", "--help", "--version"] # list_of_all_options = ["--all-repeated", "-f", "--group", "-s", "-w"] @@ -22,4 +21,4 @@ def generate_info(self) -> None: self.all_but_last_operand_is_streaming_input() self.only_last_operand_is_stream_output() else: - raise Exception('extra operand for uniq, the 3rd one') + raise Exception("extra operand for uniq, the 3rd one") diff --git a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorWc.py b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_wc.py similarity index 71% rename from pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorWc.py rename to pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_wc.py index 126cdb1..3a96f4b 100644 --- a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorWc.py +++ b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_wc.py @@ -1,12 +1,13 @@ -from pash_annotations.annotation_generation.annotation_generators.InputOutputInfoGenerator_Interface import InputOutputInfoGeneratorInterface +from pash_annotations.annotation_generation.annotation_generators.input_output_info_generator_interface import InputOutputInfoGeneratorInterface class InputOutputInfoGeneratorWc(InputOutputInfoGeneratorInterface): - def generate_info(self) -> None: if self.does_flag_option_list_contain_at_least_one_of(["--files0-from"]): - raise Exception('wc with --files0-from is considered side-effectful for now') - + raise Exception( + "wc with --files0-from is considered side-effectful for now" + ) + if self.get_operand_list_length() == 0: self.set_implicit_use_of_stdin() self.set_implicit_use_of_stdout() diff --git a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorXargs.py b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_xargs.py similarity index 84% rename from pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorXargs.py rename to pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_xargs.py index 2faab83..5bae4dc 100644 --- a/pash_annotations/annotation_generation/annotation_generators/InputOutputInfoGeneratorXargs.py +++ b/pash_annotations/annotation_generation/annotation_generators/input_output_info_generator_xargs.py @@ -1,8 +1,7 @@ -from pash_annotations.annotation_generation.annotation_generators.InputOutputInfoGenerator_Interface import InputOutputInfoGeneratorInterface +from pash_annotations.annotation_generation.annotation_generators.input_output_info_generator_interface import InputOutputInfoGeneratorInterface class InputOutputInfoGeneratorXargs(InputOutputInfoGeneratorInterface): - def generate_info(self) -> None: # TODO: this is not quite true, and we actually would need to recursively call the respective annotation generator? self.set_all_operands_as_arg_string() diff --git a/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_alt_bigrams_aux.py b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_alt_bigrams_aux.py new file mode 100644 index 0000000..e5ce2d9 --- /dev/null +++ b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_alt_bigrams_aux.py @@ -0,0 +1,28 @@ +from pash_annotations.annotation_generation.annotation_generators.parallelizability_info_generator_interface import ( + ParallelizabilityInfoGeneratorInterface, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.aggregator_spec import ( + make_aggregator_spec_custom_2_ary_from_string_representation, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.mapper_spec import ( + make_mapper_spec_custom_from_string_representation, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.parallelizer import ( + make_parallelizer_consec_chunks, +) + + +class ParallelizabilityInfoGeneratorAltBigramsAux( + ParallelizabilityInfoGeneratorInterface +): + def generate_info(self) -> None: + mapper_spec = make_mapper_spec_custom_from_string_representation( + "alt_bigrams_aux", is_implemented=True + ) + agg_spec = make_aggregator_spec_custom_2_ary_from_string_representation( + "alt_bigram_aux_reduce", is_implemented=True + ) + parallelizer_cc = make_parallelizer_consec_chunks( + mapper_spec=mapper_spec, aggregator_spec=agg_spec + ) + self.append_to_parallelizer_list(parallelizer_cc) diff --git a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorBigramsAux.py b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_bigrams_aux.py similarity index 54% rename from pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorBigramsAux.py rename to pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_bigrams_aux.py index 4db5582..f792a4c 100644 --- a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorBigramsAux.py +++ b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_bigrams_aux.py @@ -1,15 +1,27 @@ -from pash_annotations.annotation_generation.annotation_generators.ParallelizabilityInfoGenerator_Interface import ParallelizabilityInfoGeneratorInterface -from pash_annotations.annotation_generation.datatypes.parallelizability.AggregatorSpec import \ - make_aggregator_spec_custom_2_ary_from_string_representation -from pash_annotations.annotation_generation.datatypes.parallelizability.MapperSpec import \ - make_mapper_spec_custom_from_string_representation -from pash_annotations.annotation_generation.datatypes.parallelizability.Parallelizer import make_parallelizer_consec_chunks +from pash_annotations.annotation_generation.annotation_generators.parallelizability_info_generator_interface import ( + ParallelizabilityInfoGeneratorInterface, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.AggregatorSpec import ( + make_aggregator_spec_custom_2_ary_from_string_representation, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.MapperSpec import ( + make_mapper_spec_custom_from_string_representation, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.Parallelizer import ( + make_parallelizer_consec_chunks, +) -class ParallelizabilityInfoGeneratorBigramsAux(ParallelizabilityInfoGeneratorInterface): +class ParallelizabilityInfoGeneratorBigramsAux(ParallelizabilityInfoGeneratorInterface): def generate_info(self) -> None: - map_spec = make_mapper_spec_custom_from_string_representation("bigram_aux_map", is_implemented=True) - agg_spec = make_aggregator_spec_custom_2_ary_from_string_representation("bigram_aux_reduce", is_implemented=True) - parallelizer_cc = make_parallelizer_consec_chunks(mapper_spec=map_spec, aggregator_spec=agg_spec) + map_spec = make_mapper_spec_custom_from_string_representation( + "bigram_aux_map", is_implemented=True + ) + agg_spec = make_aggregator_spec_custom_2_ary_from_string_representation( + "bigram_aux_reduce", is_implemented=True + ) + parallelizer_cc = make_parallelizer_consec_chunks( + mapper_spec=map_spec, aggregator_spec=agg_spec + ) parallelizer_cc.info_mapper_aggregator = 2 self.append_to_parallelizer_list(parallelizer_cc) diff --git a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorCol.py b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_col.py similarity index 74% rename from pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorCol.py rename to pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_col.py index fd3f1bf..d20bfe4 100644 --- a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorCol.py +++ b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_col.py @@ -1,10 +1,9 @@ -from pash_annotations.annotation_generation.annotation_generators.ParallelizabilityInfoGenerator_Interface import ParallelizabilityInfoGeneratorInterface +from pash_annotations.annotation_generation.annotation_generators.parallelizability_info_generator_interface import ( + ParallelizabilityInfoGeneratorInterface, +) -class ParallelizabilityInfoGeneratorCol(ParallelizabilityInfoGeneratorInterface): +class ParallelizabilityInfoGeneratorCol(ParallelizabilityInfoGeneratorInterface): def generate_info(self) -> None: self.append_to_parallelizer_list_cc_seq_conc() self.append_to_parallelizer_list_rr_seq_conc() - - - diff --git a/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_custom_sort.py b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_custom_sort.py new file mode 100644 index 0000000..dc5945f --- /dev/null +++ b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_custom_sort.py @@ -0,0 +1,70 @@ +from pash_annotations.annotation_generation.annotation_generators.parallelizability_info_generator_interface import ( + ParallelizabilityInfoGeneratorInterface, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.aggregator_spec import ( + make_aggregator_spec_custom_2_ary_from_cmd_inv_with_transformers, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.parallelizer import ( + make_parallelizer_consec_chunks, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.transformer_flag_option_list import ( + TransformerFlagOptionListFilter, + TransformerFlagOptionListAdd, + ChainTransformerFlagOptionList, +) +from pash_annotations.datatypes.basic_datatypes import Flag + + +class ParallelizabilityInfoGeneratorCustomSort(ParallelizabilityInfoGeneratorInterface): + def generate_info(self) -> None: + # copied from sort + self.set_commutative() + if self.does_flag_option_list_contain_at_least_one_of( + [ + "-c", + "-C", + "-u", + "-z", + "-R", + "-s", + "-m", + "--files0-from", + "--random-source", + ] + ): + pass # no parallelization + else: + # Build aggregator spec: keep certain flags with filtering and add -m + transformer_filter = TransformerFlagOptionListFilter( + [ + "-b", + "-d", + "-f", + "-g", + "-i", + "-M", + "-h", + "-n", + "-r", + "--sort", + "-V", + "-k", + "-t", + ] + ) + transformer_add = TransformerFlagOptionListAdd([Flag("-m")]) + chained_transformers = ChainTransformerFlagOptionList( + [transformer_filter, transformer_add] + ) + # TODO: change this to n instead of 2 but we keep this for testing aggregator trees for now + aggregator_spec = ( + make_aggregator_spec_custom_2_ary_from_cmd_inv_with_transformers( + flag_option_list_transformer=chained_transformers, + is_implemented=True, + ) + ) + # Build parallelizers and append + parallelizer_cc_seq_cus = make_parallelizer_consec_chunks( + aggregator_spec=aggregator_spec + ) + self.append_to_parallelizer_list(parallelizer_cc_seq_cus) diff --git a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorCustomTr.py b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_custom_tr.py similarity index 74% rename from pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorCustomTr.py rename to pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_custom_tr.py index 1857695..8e6bd33 100644 --- a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorCustomTr.py +++ b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_custom_tr.py @@ -1,8 +1,9 @@ -from pash_annotations.annotation_generation.annotation_generators.ParallelizabilityInfoGenerator_Interface import ParallelizabilityInfoGeneratorInterface +from pash_annotations.annotation_generation.annotation_generators.parallelizability_info_generator_interface import ( + ParallelizabilityInfoGeneratorInterface, +) class ParallelizabilityInfoGeneratorCustomTr(ParallelizabilityInfoGeneratorInterface): - def generate_info(self) -> None: self.append_to_parallelizer_list_cc_seq_conc() self.append_to_parallelizer_list_rr_seq_conc() diff --git a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorCut.py b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_cut.py similarity index 69% rename from pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorCut.py rename to pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_cut.py index 0290e7f..e47bd12 100644 --- a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorCut.py +++ b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_cut.py @@ -1,9 +1,9 @@ -from pash_annotations.annotation_generation.annotation_generators.ParallelizabilityInfoGenerator_Interface import ParallelizabilityInfoGeneratorInterface -from pash_annotations.annotation_generation.datatypes.parallelizability.Parallelizer import Parallelizer +from pash_annotations.annotation_generation.annotation_generators.parallelizability_info_generator_interface import ( + ParallelizabilityInfoGeneratorInterface, +) class ParallelizabilityInfoGeneratorCut(ParallelizabilityInfoGeneratorInterface): - # list_of_all_flags = ["-n", "--complement", "-s", "-z", "--help", "--version"] # list_of_all_options = ["-b", "-c", "-d", "-f", "--output-delimiter", ] diff --git a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorGrep.py b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_grep.py similarity index 56% rename from pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorGrep.py rename to pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_grep.py index b8b9e4a..f22d079 100644 --- a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorGrep.py +++ b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_grep.py @@ -1,13 +1,20 @@ -from pash_annotations.annotation_generation.annotation_generators.ParallelizabilityInfoGenerator_Interface import ParallelizabilityInfoGeneratorInterface -from pash_annotations.annotation_generation.datatypes.parallelizability.Parallelizer import \ - AdditionalInfoSplitterToMapper, make_parallelizer_round_robin, make_parallelizer_consec_chunks -from pash_annotations.annotation_generation.datatypes.parallelizability.MapperSpec import make_mapper_spec_custom -from pash_annotations.annotation_generation.datatypes.parallelizability.AggregatorSpec import \ - make_aggregator_spec_custom_2_ary_from_string_representation +from pash_annotations.annotation_generation.annotation_generators.parallelizability_info_generator_interface import ( + ParallelizabilityInfoGeneratorInterface, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.parallelizer import ( + AdditionalInfoSplitterToMapper, + make_parallelizer_round_robin, + make_parallelizer_consec_chunks, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.mapper_spec import ( + make_mapper_spec_custom, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.aggregator_spec import ( + make_aggregator_spec_custom_2_ary_from_string_representation, +) class ParallelizabilityInfoGeneratorGrep(ParallelizabilityInfoGeneratorInterface): - # list_of_all_flags = ["-V", "--help", "-E", "-F", "-G", "-P", "-i", "--no-ignore-case", "-v", "-w", # "-x", "-y", "-c", "-L", "-l", "-o", "-q", "-s", "-b", "-H", "-h", "-n", "-T", "-Z", # "--no-group-separator", "-a", "-I", "-r", "-R", "--line-buffered", "-U", "-z"] @@ -31,41 +38,66 @@ def generate_info(self) -> None: aggregator_spec = None add_info_from_splitter = None # CA to decide which mapper or aggregator, parallelizer added after CA - if not self.does_flag_option_list_contain_at_least_one_of(["-A", "-B", "-C", "-m"]): + if not self.does_flag_option_list_contain_at_least_one_of( + ["-A", "-B", "-C", "-m"] + ): if self.does_flag_option_list_contain_at_least_one_of(["-L", "-l"]): # the output for both options is either empty or the filename (same for both if so) # for "-l": if there was a match in one part, the filename will propagate; if not, not # for "-L": if there was no match in one part, the filename will propagate; it not, not - aggregator_spec = make_aggregator_spec_custom_2_ary_from_string_representation(cmd_inv_as_str='PLACEHOLDER:merge_keeping_longer_output', - is_implemented=False) + aggregator_spec = ( + make_aggregator_spec_custom_2_ary_from_string_representation( + cmd_inv_as_str="PLACEHOLDER:merge_keeping_longer_output", + is_implemented=False, + ) + ) elif self.does_flag_option_list_contain_at_least_one_of(["-c"]): - aggregator_spec = make_aggregator_spec_custom_2_ary_from_string_representation(cmd_inv_as_str='PLACEHOLDER:sum_indiv_results_up', - is_implemented=False) + aggregator_spec = ( + make_aggregator_spec_custom_2_ary_from_string_representation( + cmd_inv_as_str="PLACEHOLDER:sum_indiv_results_up", + is_implemented=False, + ) + ) # cat input1 input2 | paste -s -d + - | bc - elif self.does_flag_option_list_contain_at_least_one_of(["-n"]) and self.does_flag_option_list_contain_at_least_one_of(["-b"]): - mapper_spec = make_mapper_spec_custom('PLACEHOLDER:grep_add_line_number_and_byte_offset', - is_implemented=False) - add_info_from_splitter=AdditionalInfoSplitterToMapper.LINE_NUM_AND_BYTE_OFFSET + elif self.does_flag_option_list_contain_at_least_one_of( + ["-n"] + ) and self.does_flag_option_list_contain_at_least_one_of(["-b"]): + mapper_spec = make_mapper_spec_custom( + "PLACEHOLDER:grep_add_line_number_and_byte_offset", + is_implemented=False, + ) + add_info_from_splitter = ( + AdditionalInfoSplitterToMapper.LINE_NUM_AND_BYTE_OFFSET + ) elif self.does_flag_option_list_contain_at_least_one_of(["-n"]): - mapper_spec = make_mapper_spec_custom('PLACEHOLDER:grep_add_line_number_offset', - is_implemented=False) - add_info_from_splitter=AdditionalInfoSplitterToMapper.LINE_NUM_OFFSET + mapper_spec = make_mapper_spec_custom( + "PLACEHOLDER:grep_add_line_number_offset", is_implemented=False + ) + add_info_from_splitter = ( + AdditionalInfoSplitterToMapper.LINE_NUM_OFFSET + ) elif self.does_flag_option_list_contain_at_least_one_of(["-b"]): - mapper_spec = make_mapper_spec_custom('PLACEHOLDER:grep_add_byte_offset', - is_implemented=False) + mapper_spec = make_mapper_spec_custom( + "PLACEHOLDER:grep_add_byte_offset", is_implemented=False + ) add_info_from_splitter = AdditionalInfoSplitterToMapper.BYTE_OFFSET - else: # none of the above affecting flags - pass #just keep mapper and aggregator None and thus add RR_SEQ_CONC + else: # none of the above affecting flags + pass # just keep mapper and aggregator None and thus add RR_SEQ_CONC # we exploit that mapper_spec becomes seq and aggregator_spec becomes conc if given None # check can be removed once all are implemented! (exploits short-circuiting) # TODO: we should be able to remove the `is_implemented`-check as it is checked later - if (mapper_spec is None or mapper_spec.is_implemented) and \ - (aggregator_spec is None or aggregator_spec.is_implemented): - parallelizer_cc = make_parallelizer_consec_chunks(mapper_spec=mapper_spec, - aggregator_spec=aggregator_spec, - info_splitter_mapper=add_info_from_splitter) + if (mapper_spec is None or mapper_spec.is_implemented) and ( + aggregator_spec is None or aggregator_spec.is_implemented + ): + parallelizer_cc = make_parallelizer_consec_chunks( + mapper_spec=mapper_spec, + aggregator_spec=aggregator_spec, + info_splitter_mapper=add_info_from_splitter, + ) self.append_to_parallelizer_list(parallelizer_cc) - parallelizer_rr = make_parallelizer_round_robin(mapper_spec=mapper_spec, - aggregator_spec=aggregator_spec, - info_splitter_mapper=add_info_from_splitter) + parallelizer_rr = make_parallelizer_round_robin( + mapper_spec=mapper_spec, + aggregator_spec=aggregator_spec, + info_splitter_mapper=add_info_from_splitter, + ) self.append_to_parallelizer_list(parallelizer_rr) diff --git a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGenerator_Interface.py b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_interface.py similarity index 67% rename from pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGenerator_Interface.py rename to pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_interface.py index 6e91a3f..1aea362 100644 --- a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGenerator_Interface.py +++ b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_interface.py @@ -1,18 +1,30 @@ from abc import ABC, abstractmethod -from pash_annotations.datatypes.CommandInvocationInitial import CommandInvocationInitial - -from pash_annotations.annotation_generation.annotation_generators.Generator_Interface import Generator_Interface - -from pash_annotations.annotation_generation.datatypes.parallelizability.Parallelizer import Parallelizer, \ - make_parallelizer_indiv_files, make_parallelizer_consec_chunks, make_parallelizer_round_robin -from pash_annotations.annotation_generation.datatypes.parallelizability.AggregatorSpec import AggregatorSpec, \ - make_aggregator_spec_adj_lines_merge, make_aggregator_spec_adj_lines_seq, make_aggregator_spec_adj_lines_func_from_string_representation -from pash_annotations.annotation_generation.datatypes.ParallelizabilityInfo import ParallelizabilityInfo +from pash_annotations.datatypes.command_invocation_initial import ( + CommandInvocationInitial, +) + +from pash_annotations.annotation_generation.annotation_generators.generator_interface import ( + Generator_Interface, +) + +from pash_annotations.annotation_generation.datatypes.parallelizability.parallelizer import ( + Parallelizer, + make_parallelizer_indiv_files, + make_parallelizer_consec_chunks, + make_parallelizer_round_robin, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.aggregator_spec import ( + make_aggregator_spec_adj_lines_merge, + make_aggregator_spec_adj_lines_seq, + make_aggregator_spec_adj_lines_func_from_string_representation, +) +from pash_annotations.annotation_generation.datatypes.parallelizability_info import ( + ParallelizabilityInfo, +) class ParallelizabilityInfoGeneratorInterface(Generator_Interface, ABC): - def __init__(self, cmd_invocation: CommandInvocationInitial) -> None: self.cmd_inv = cmd_invocation self.parallelizability_info: ParallelizabilityInfo = ParallelizabilityInfo() @@ -37,7 +49,9 @@ def append_to_parallelizer_list_if_seq_conc(self) -> None: def append_to_parallelizer_list_if_seq_adjm(self) -> None: aggregator_spec = make_aggregator_spec_adj_lines_merge() - parallelizer_rr_seq_adjm = make_parallelizer_indiv_files(aggregator_spec=aggregator_spec) + parallelizer_rr_seq_adjm = make_parallelizer_indiv_files( + aggregator_spec=aggregator_spec + ) self.append_to_parallelizer_list(parallelizer_rr_seq_adjm) def append_to_parallelizer_list_cc_seq_conc(self) -> None: @@ -46,7 +60,9 @@ def append_to_parallelizer_list_cc_seq_conc(self) -> None: def append_to_parallelizer_list_cc_seq_adjm(self) -> None: aggregator_spec = make_aggregator_spec_adj_lines_merge() - parallelizer_cc_seq_adjm = make_parallelizer_consec_chunks(aggregator_spec=aggregator_spec) + parallelizer_cc_seq_adjm = make_parallelizer_consec_chunks( + aggregator_spec=aggregator_spec + ) self.append_to_parallelizer_list(parallelizer_cc_seq_adjm) def append_to_parallelizer_list_rr_seq_conc(self) -> None: @@ -55,27 +71,49 @@ def append_to_parallelizer_list_rr_seq_conc(self) -> None: def append_to_parallelizer_list_rr_seq_adjm(self) -> None: aggregator_spec = make_aggregator_spec_adj_lines_merge() - parallelizer_rr_seq_adjm = make_parallelizer_round_robin(aggregator_spec=aggregator_spec) + parallelizer_rr_seq_adjm = make_parallelizer_round_robin( + aggregator_spec=aggregator_spec + ) self.append_to_parallelizer_list(parallelizer_rr_seq_adjm) def append_to_parallelizer_list_rr_seq_adjs(self) -> None: aggregator_spec = make_aggregator_spec_adj_lines_seq() - parallelizer_rr_seq_adjs = make_parallelizer_round_robin(aggregator_spec=aggregator_spec) + parallelizer_rr_seq_adjs = make_parallelizer_round_robin( + aggregator_spec=aggregator_spec + ) self.append_to_parallelizer_list(parallelizer_rr_seq_adjs) def append_to_parallelizer_list_cc_seq_adjs(self) -> None: aggregator_spec = make_aggregator_spec_adj_lines_seq() - parallelizer_cc_seq_adjs = make_parallelizer_consec_chunks(aggregator_spec=aggregator_spec) + parallelizer_cc_seq_adjs = make_parallelizer_consec_chunks( + aggregator_spec=aggregator_spec + ) self.append_to_parallelizer_list(parallelizer_cc_seq_adjs) - def append_to_parallelizer_list_rr_seq_adjf(self, string_repr_func: str, is_implemented: bool) -> None: - aggregator_spec = make_aggregator_spec_adj_lines_func_from_string_representation(string_repr_func, is_implemented) - parallelizer_rr_seq_adjs = make_parallelizer_round_robin(aggregator_spec=aggregator_spec) + def append_to_parallelizer_list_rr_seq_adjf( + self, string_repr_func: str, is_implemented: bool + ) -> None: + aggregator_spec = ( + make_aggregator_spec_adj_lines_func_from_string_representation( + string_repr_func, is_implemented + ) + ) + parallelizer_rr_seq_adjs = make_parallelizer_round_robin( + aggregator_spec=aggregator_spec + ) self.append_to_parallelizer_list(parallelizer_rr_seq_adjs) - def append_to_parallelizer_list_cc_seq_adjf(self, string_repr_func: str, is_implemented: bool) -> None: - aggregator_spec = make_aggregator_spec_adj_lines_func_from_string_representation(string_repr_func, is_implemented) - parallelizer_cc_seq_adjs = make_parallelizer_consec_chunks(aggregator_spec=aggregator_spec) + def append_to_parallelizer_list_cc_seq_adjf( + self, string_repr_func: str, is_implemented: bool + ) -> None: + aggregator_spec = ( + make_aggregator_spec_adj_lines_func_from_string_representation( + string_repr_func, is_implemented + ) + ) + parallelizer_cc_seq_adjs = make_parallelizer_consec_chunks( + aggregator_spec=aggregator_spec + ) self.append_to_parallelizer_list(parallelizer_cc_seq_adjs) def set_commutative(self) -> None: diff --git a/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_sed.py b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_sed.py new file mode 100644 index 0000000..1c6f494 --- /dev/null +++ b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_sed.py @@ -0,0 +1,18 @@ +from pash_annotations.annotation_generation.annotation_generators.parallelizability_info_generator_interface import ( + ParallelizabilityInfoGeneratorInterface, +) + + +class ParallelizabilityInfoGeneratorSed(ParallelizabilityInfoGeneratorInterface): + def generate_info(self) -> None: + # TODO: Logic copied from PaSh, but does this not depend how the script is given? + if ( + self.does_first_operand_start_with("-") + or self.does_first_operand_start_with("s") + or not ( + self.does_first_operand_contain("d") + or self.does_first_operand_contain("q") + ) + ): + self.append_to_parallelizer_list_cc_seq_conc() + self.append_to_parallelizer_list_rr_seq_conc() diff --git a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorSetDiff.py b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_set_diff.py similarity index 78% rename from pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorSetDiff.py rename to pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_set_diff.py index ec9906c..8e7ac28 100644 --- a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorSetDiff.py +++ b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_set_diff.py @@ -1,7 +1,9 @@ -from pash_annotations.annotation_generation.annotation_generators.ParallelizabilityInfoGenerator_Interface import ParallelizabilityInfoGeneratorInterface +from pash_annotations.annotation_generation.annotation_generators.parallelizability_info_generator_interface import ( + ParallelizabilityInfoGeneratorInterface, +) -class ParallelizabilityInfoGeneratorSetDiff(ParallelizabilityInfoGeneratorInterface): +class ParallelizabilityInfoGeneratorSetDiff(ParallelizabilityInfoGeneratorInterface): def generate_info(self) -> None: self.append_to_parallelizer_list_cc_seq_conc() # issues with RR (also in branch future - due to tee I think) diff --git a/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_sort.py b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_sort.py new file mode 100644 index 0000000..2e72908 --- /dev/null +++ b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_sort.py @@ -0,0 +1,71 @@ +from pash_annotations.annotation_generation.annotation_generators.parallelizability_info_generator_interface import ( + ParallelizabilityInfoGeneratorInterface, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.aggregator_spec import ( + make_aggregator_spec_custom_2_ary_from_cmd_inv_with_transformers, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.parallelizer import ( + make_parallelizer_consec_chunks, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.transformer_flag_option_list import ( + TransformerFlagOptionListFilter, + TransformerFlagOptionListAdd, + ChainTransformerFlagOptionList, +) +from pash_annotations.datatypes.basic_datatypes import Flag + + +class ParallelizabilityInfoGeneratorSort(ParallelizabilityInfoGeneratorInterface): + # Which ones do affect parallelizability? + + def generate_info(self) -> None: + self.set_commutative() + if self.does_flag_option_list_contain_at_least_one_of( + [ + "-c", + "-C", + "-u", + "-z", + "-R", + "-s", + "-m", + "--files0-from", + "--random-source", + ] + ): + pass # no parallelization + else: + # Build aggregator spec: keep certain flags with filtering and add -m + transformer_filter = TransformerFlagOptionListFilter( + [ + "-b", + "-d", + "-f", + "-g", + "-i", + "-M", + "-h", + "-n", + "-r", + "--sort", + "-V", + "-k", + "-t", + ] + ) + transformer_add = TransformerFlagOptionListAdd([Flag("-m")]) + chained_transformers = ChainTransformerFlagOptionList( + [transformer_filter, transformer_add] + ) + # TODO: change this to n instead of 2 but we keep this for testing aggregator trees for now + aggregator_spec = ( + make_aggregator_spec_custom_2_ary_from_cmd_inv_with_transformers( + flag_option_list_transformer=chained_transformers, + is_implemented=True, + ) + ) + # Build parallelizers and append + parallelizer_cc_seq_cus = make_parallelizer_consec_chunks( + aggregator_spec=aggregator_spec + ) + self.append_to_parallelizer_list(parallelizer_cc_seq_cus) diff --git a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorTestOne.py b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_test_one.py similarity index 74% rename from pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorTestOne.py rename to pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_test_one.py index 59b88b6..de0f500 100644 --- a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorTestOne.py +++ b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_test_one.py @@ -1,15 +1,20 @@ -from pash_annotations.annotation_generation.annotation_generators.ParallelizabilityInfoGenerator_Interface import ParallelizabilityInfoGeneratorInterface -from pash_annotations.annotation_generation.datatypes.parallelizability.AggregatorSpec import \ - make_aggregator_spec_custom_n_ary_from_string_representation -from pash_annotations.annotation_generation.datatypes.parallelizability.Parallelizer import make_parallelizer_round_robin, \ - make_parallelizer_consec_chunks +from pash_annotations.annotation_generation.annotation_generators.parallelizability_info_generator_interface import ( + ParallelizabilityInfoGeneratorInterface, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.aggregator_spec import ( + make_aggregator_spec_custom_n_ary_from_string_representation, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.parallelizer import ( + make_parallelizer_consec_chunks, +) class ParallelizabilityInfoGeneratorTestOne(ParallelizabilityInfoGeneratorInterface): - def generate_info(self) -> None: - agg_spec = make_aggregator_spec_custom_n_ary_from_string_representation("runtime/agg/opt/concat.sh") - parallelizer_cc = make_parallelizer_consec_chunks(aggregator_spec = agg_spec) + agg_spec = make_aggregator_spec_custom_n_ary_from_string_representation( + "runtime/agg/opt/concat.sh" + ) + parallelizer_cc = make_parallelizer_consec_chunks(aggregator_spec=agg_spec) self.append_to_parallelizer_list(parallelizer_cc) # RR does not work since we check that the aggregator is cat or adj_line_* # we could circumvent this with a field that indicates compatibility with RR diff --git a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorTestTwo.py b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_test_two.py similarity index 75% rename from pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorTestTwo.py rename to pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_test_two.py index 585c4f2..8e3191c 100644 --- a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorTestTwo.py +++ b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_test_two.py @@ -1,15 +1,20 @@ -from pash_annotations.annotation_generation.annotation_generators.ParallelizabilityInfoGenerator_Interface import ParallelizabilityInfoGeneratorInterface -from pash_annotations.annotation_generation.datatypes.parallelizability.AggregatorSpec import \ - make_aggregator_spec_custom_n_ary_from_string_representation -from pash_annotations.annotation_generation.datatypes.parallelizability.Parallelizer import make_parallelizer_round_robin, \ - make_parallelizer_consec_chunks +from pash_annotations.annotation_generation.annotation_generators.parallelizability_info_generator_interface import ( + ParallelizabilityInfoGeneratorInterface, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.aggregator_spec import ( + make_aggregator_spec_custom_n_ary_from_string_representation, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.parallelizer import ( + make_parallelizer_consec_chunks, +) class ParallelizabilityInfoGeneratorTestTwo(ParallelizabilityInfoGeneratorInterface): - def generate_info(self) -> None: - agg_spec = make_aggregator_spec_custom_n_ary_from_string_representation("runtime/agg/opt/concat.sh") - parallelizer_cc = make_parallelizer_consec_chunks(aggregator_spec = agg_spec) + agg_spec = make_aggregator_spec_custom_n_ary_from_string_representation( + "runtime/agg/opt/concat.sh" + ) + parallelizer_cc = make_parallelizer_consec_chunks(aggregator_spec=agg_spec) self.append_to_parallelizer_list(parallelizer_cc) # RR does not work since we assert that the aggregator is cat or adj_line_*; # we could circumvent this with a field that indicates compatibility with RR diff --git a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorTr.py b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_tr.py similarity index 63% rename from pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorTr.py rename to pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_tr.py index 562f815..eb2df96 100644 --- a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorTr.py +++ b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_tr.py @@ -1,9 +1,10 @@ -from pash_annotations.annotation_generation.annotation_generators.ParallelizabilityInfoGenerator_Interface import ParallelizabilityInfoGeneratorInterface -from pash_annotations.datatypes.BasicDatatypes import Operand +from pash_annotations.annotation_generation.annotation_generators.parallelizability_info_generator_interface import ( + ParallelizabilityInfoGeneratorInterface, +) +from pash_annotations.datatypes.basic_datatypes import Operand class ParallelizabilityInfoGeneratorTr(ParallelizabilityInfoGeneratorInterface): - # list_of_all_flags = ["-c", "-d", "-s", "-t", "--help", "--version", # list_of_all_options = [] @@ -23,19 +24,27 @@ def generate_info(self) -> None: # print("before res: ") # res = self.does_last_set_effectively_contain_newline() # print("res: " + res) - does_delete_newlines: bool = self.does_flag_option_list_contain_at_least_one_of(["-d"]) and \ - self.does_last_set_effectively_contain_newline() # only allowed to have a single set + does_delete_newlines: bool = ( + self.does_flag_option_list_contain_at_least_one_of(["-d"]) + and self.does_last_set_effectively_contain_newline() + ) # only allowed to have a single set # check for squeezing newlines - does_squeeze_newlines: bool = self.does_flag_option_list_contain_at_least_one_of(["-s"]) and \ - self.does_last_set_effectively_contain_newline() + does_squeeze_newlines: bool = ( + self.does_flag_option_list_contain_at_least_one_of(["-s"]) + and self.does_last_set_effectively_contain_newline() + ) if does_delete_newlines: # for RR, we need an adjacent aggregator self.append_to_parallelizer_list_cc_seq_adjm() self.append_to_parallelizer_list_rr_seq_adjm() # TO CHECK: at the end, it should be a single line; this should work if we fold over results elif does_squeeze_newlines: - self.append_to_parallelizer_list_cc_seq_adjf("PLACEHOLDER: remove first line if empty", is_implemented = False) - self.append_to_parallelizer_list_rr_seq_adjf("PLACEHOLDER: remove first line if empty", is_implemented = False) + self.append_to_parallelizer_list_cc_seq_adjf( + "PLACEHOLDER: remove first line if empty", is_implemented=False + ) + self.append_to_parallelizer_list_rr_seq_adjf( + "PLACEHOLDER: remove first line if empty", is_implemented=False + ) else: # for RR, we can just use concatenation self.append_to_parallelizer_list_cc_seq_conc() @@ -46,9 +55,11 @@ def does_last_set_effectively_contain_newline(self) -> bool: # is contained if (a) no -c and in set, or (b) -c and not in set last_operand_contains_newline: bool = last_operand.contains_new_line() last_operand_contains_null_char: bool = last_operand.contains_null_char() - if len(self.cmd_inv.operand_list) == 1 and self.does_flag_option_list_contain_at_least_one_of(["-c"]): - return not (last_operand_contains_newline or last_operand_contains_null_char) + if len( + self.cmd_inv.operand_list + ) == 1 and self.does_flag_option_list_contain_at_least_one_of(["-c"]): + return not ( + last_operand_contains_newline or last_operand_contains_null_char + ) else: # '-c' (if existent) does not refer to the given set - return (last_operand_contains_newline or last_operand_contains_null_char) - - + return last_operand_contains_newline or last_operand_contains_null_char diff --git a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorUniq.py b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_uniq.py similarity index 70% rename from pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorUniq.py rename to pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_uniq.py index 87f6959..3b5392c 100644 --- a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorUniq.py +++ b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_uniq.py @@ -1,12 +1,16 @@ -from pash_annotations.annotation_generation.annotation_generators.ParallelizabilityInfoGenerator_Interface import ParallelizabilityInfoGeneratorInterface -from pash_annotations.annotation_generation.datatypes.parallelizability.Parallelizer import \ - make_parallelizer_round_robin, make_parallelizer_consec_chunks -from pash_annotations.annotation_generation.datatypes.parallelizability.AggregatorSpec import \ - make_aggregator_spec_adj_lines_func_from_string_representation +from pash_annotations.annotation_generation.annotation_generators.parallelizability_info_generator_interface import ( + ParallelizabilityInfoGeneratorInterface, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.parallelizer import ( + make_parallelizer_round_robin, + make_parallelizer_consec_chunks, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.aggregator_spec import ( + make_aggregator_spec_adj_lines_func_from_string_representation, +) class ParallelizabilityInfoGeneratorUniq(ParallelizabilityInfoGeneratorInterface): - # list_of_all_flags = ["-c", "-d", "-D", "-i", "-u", "-z", "--help", "--version"] # list_of_all_options = ["--all-repeated", "-f", "--group", "-s", "-w"] @@ -20,12 +24,23 @@ class ParallelizabilityInfoGeneratorUniq(ParallelizabilityInfoGeneratorInterface def generate_info(self) -> None: # check for flags/options that make it hard to parallelize - if not self.does_flag_option_list_contain_at_least_one_of(["-d", "-D", "--all-repeated"]): + if not self.does_flag_option_list_contain_at_least_one_of( + ["-d", "-D", "--all-repeated"] + ): if self.does_flag_option_list_contain_at_least_one_of(["-c"]): # we need a special merge - aggregator_spec = make_aggregator_spec_adj_lines_func_from_string_representation(cmd_inv_as_str='PLACEHOLDER:uniq_merge_count_uniq', is_implemented=False) - parallelizer_cc_seq_adjf = make_parallelizer_consec_chunks(aggregator_spec=aggregator_spec) - parallelizer_rr_seq_adjf = make_parallelizer_round_robin(aggregator_spec=aggregator_spec) + aggregator_spec = ( + make_aggregator_spec_adj_lines_func_from_string_representation( + cmd_inv_as_str="PLACEHOLDER:uniq_merge_count_uniq", + is_implemented=False, + ) + ) + parallelizer_cc_seq_adjf = make_parallelizer_consec_chunks( + aggregator_spec=aggregator_spec + ) + parallelizer_rr_seq_adjf = make_parallelizer_round_robin( + aggregator_spec=aggregator_spec + ) self.append_to_parallelizer_list(parallelizer_cc_seq_adjf) self.append_to_parallelizer_list(parallelizer_rr_seq_adjf) else: diff --git a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorXargs.py b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_xargs.py similarity index 78% rename from pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorXargs.py rename to pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_xargs.py index 0d459ab..f97c95a 100644 --- a/pash_annotations/annotation_generation/annotation_generators/ParallelizabilityInfoGeneratorXargs.py +++ b/pash_annotations/annotation_generation/annotation_generators/parallelizability_info_generator_xargs.py @@ -1,12 +1,10 @@ -from pash_annotations.annotation_generation.annotation_generators.ParallelizabilityInfoGenerator_Interface import ParallelizabilityInfoGeneratorInterface +from pash_annotations.annotation_generation.annotation_generators.parallelizability_info_generator_interface import ( + ParallelizabilityInfoGeneratorInterface, +) -class ParallelizabilityInfoGeneratorXargs(ParallelizabilityInfoGeneratorInterface): +class ParallelizabilityInfoGeneratorXargs(ParallelizabilityInfoGeneratorInterface): def generate_info(self) -> None: # in original, there is a case distinction but same info self.append_to_parallelizer_list_cc_seq_conc() self.append_to_parallelizer_list_rr_seq_conc() - - - - diff --git a/pash_annotations/annotation_generation/datatypes/InputOutputInfo.py b/pash_annotations/annotation_generation/datatypes/InputOutputInfo.py deleted file mode 100644 index 49c7cab..0000000 --- a/pash_annotations/annotation_generation/datatypes/InputOutputInfo.py +++ /dev/null @@ -1,189 +0,0 @@ -import itertools - -from pash_annotations.util_standard import standard_repr -from typing import List, Tuple, Union, Optional, Literal -from pash_annotations.datatypes.AccessKind import AccessKind, make_stream_input, make_other_input, make_stream_output, \ - make_other_output, make_config_input -from pash_annotations.datatypes.CommandInvocationInitial import CommandInvocationInitial -from pash_annotations.datatypes.CommandInvocationWithIO import CommandInvocationWithIO -from pash_annotations.datatypes.BasicDatatypes import Operand, ArgStringType, FileNameOrStdDescriptor, FileName, StdDescriptor, \ - Flag, Option, FlagOption, WhichClassForArg, get_stdout_fd, get_stdin_fd -from pash_annotations.datatypes.BasicDatatypesWithIO import FileNameOrStdDescriptorWithIOInfo, \ - OptionWithIO, get_from_original_stddescriptor_with_ioinfo, \ - get_from_original_filename_with_ioinfo -from pash_annotations.util import compute_actual_el_for_input, compute_actual_el_for_output - -class InputOutputInfo: - - def __init__(self, - flagoption_list_typer : List[Union[Tuple[Literal[WhichClassForArg.FILESTD], AccessKind], - Tuple[Literal[WhichClassForArg.ARGSTRING], None], - Tuple[Literal[WhichClassForArg.PLAINSTRING], None]]], - number_of_operands : int, - implicit_use_of_streaming_input : Optional[FileNameOrStdDescriptorWithIOInfo] = None, - implicit_use_of_streaming_output : Optional[FileNameOrStdDescriptorWithIOInfo] = None, - ) -> None: - self.flagoption_list_typer: List[Union[Tuple[Literal[WhichClassForArg.FILESTD], AccessKind], - Tuple[Literal[WhichClassForArg.ARGSTRING], None], - Tuple[Literal[WhichClassForArg.PLAINSTRING], None]]] = flagoption_list_typer - self.operand_list_typer: List[Union[Tuple[Literal[WhichClassForArg.FILESTD], AccessKind], - Tuple[Literal[WhichClassForArg.ARGSTRING], None]]] = \ - [(WhichClassForArg.FILESTD, make_stream_input())] * number_of_operands - self.implicit_use_of_streaming_input : Optional[FileNameOrStdDescriptorWithIOInfo] = implicit_use_of_streaming_input - self.implicit_use_of_streaming_output : Optional[FileNameOrStdDescriptorWithIOInfo] = implicit_use_of_streaming_output - - def __repr__(self) -> str: - return standard_repr(self) - - # the InputOutputInfoGenerator Interface automatically calls this function to set this - def set_typer_for_flagoptions(self, list_typer_for_flagoptions): - self.flagoption_list_typer = list_typer_for_flagoptions - - # Assumption: for operand_list_types, we assume no double assignment - - def set_implicit_use_of_stdin(self, value: bool) -> None: - if value: - stdin_with_io = get_from_original_stddescriptor_with_ioinfo(get_stdin_fd(), make_stream_input()) - self.set_implicit_use_of_streaming_input(stdin_with_io) - else: - pass # since default value is None anyway - - def set_implicit_use_of_streaming_input(self, implicit_input: Optional[FileNameOrStdDescriptorWithIOInfo]) -> None: - self.implicit_use_of_streaming_input = implicit_input - - def set_implicit_use_of_stdout(self, value: bool) -> None: - if value: - stdout_with_io = get_from_original_stddescriptor_with_ioinfo(get_stdout_fd(), make_stream_output()) - self.set_implicit_use_of_streaming_output(stdout_with_io) - else: - pass # since default value is None anyway - - def set_implicit_use_of_streaming_output(self, implicit_output: Optional[FileNameOrStdDescriptorWithIOInfo]) -> None: - self.implicit_use_of_streaming_output = implicit_output - - def all_operands_are_streaming_inputs(self) -> None: - pass # since this is the default in the constructor - - def all_operands_are_streaming_outputs(self) -> None: - number_of_operands = len(self.operand_list_typer) - self.operand_list_typer = [(WhichClassForArg.FILESTD, make_stream_output())] * number_of_operands - - def all_operands_are_other_inputs(self) -> None: - number_of_operands = len(self.operand_list_typer) - self.operand_list_typer = [(WhichClassForArg.FILESTD, make_other_input())] * number_of_operands - - def all_operands_are_other_outputs(self) -> None: - number_of_operands = len(self.operand_list_typer) - self.operand_list_typer = [(WhichClassForArg.FILESTD, make_other_output())] * number_of_operands - - def all_but_last_operand_is_streaming_input(self) -> None: - pass # since this is the default in the constructor, and we assume the last is assigned somewhere else - - def all_but_last_operand_is_other_input(self) -> None: - original_last_entry = self.operand_list_typer[-1] - self.all_operands_are_other_inputs() - self.operand_list_typer[-1] = original_last_entry - - def all_but_first_operand_is_streaming_input(self) -> None: - pass # since this is the default in the constructor and we assume the last is assigned somewhere else - - def all_but_first_operand_is_streaming_output(self) -> None: - original_first_entry = self.operand_list_typer[0] - self.all_operands_are_streaming_outputs() - self.operand_list_typer[0] = original_first_entry - - def all_but_first_operand_is_other_input(self) -> None: - original_first_entry = self.operand_list_typer[0] - number_of_operands = len(self.operand_list_typer) - self.operand_list_typer = [(WhichClassForArg.FILESTD, make_other_input())] * number_of_operands - self.operand_list_typer[0] = original_first_entry - - def only_last_operand_is_stream_output(self) -> None: - self.operand_list_typer[-1] = (WhichClassForArg.FILESTD, make_stream_output()) - - def only_last_operand_is_other_output(self) -> None: - self.operand_list_typer[-1] = (WhichClassForArg.FILESTD, make_other_output()) - - def set_all_operands_as_config_arg_type_string(self) -> None: - number_of_operands = len(self.operand_list_typer) - self.operand_list_typer = [(WhichClassForArg.ARGSTRING, None)] * number_of_operands - - def set_first_operand_as_config_arg_type_string(self) -> None: - self.operand_list_typer[0] = (WhichClassForArg.ARGSTRING, None) - - def set_first_operand_as_config_arg_type_filename_or_std_descriptor(self) -> None: - self.operand_list_typer[0] = (WhichClassForArg.FILESTD, make_config_input()) - - # TODO: this is only used for operands of XARGS but should be refactored once a solution for xargs is found - def set_all_operands_as_arg_string(self) -> None: - # not correct but sound approximation for cases where input is provided by xargs via stdin - self.operand_list_typer = [(WhichClassForArg.ARGSTRING, None)] * len(self.operand_list_typer) - - # methods to apply the InputOutputInfo to a command invocation - - def apply_input_output_info_to_command_invocation(self, cmd_inv: CommandInvocationInitial) \ - -> CommandInvocationWithIO: - # 1) transform flagoption list - flagoption_list_original: List[FlagOption] = cmd_inv.flag_option_list - flagoption_list_with_io: List[Union[Flag, OptionWithIO]] = \ - [InputOutputInfo.apply_typer_to_flagoption(flagoption, typer) for (flagoption, typer) in zip(flagoption_list_original, self.flagoption_list_typer)] - # 2) transform operand list - operand_list_original: List[Operand] = cmd_inv.operand_list - operand_list_with_io_full: List[Union[ArgStringType, FileNameOrStdDescriptorWithIOInfo]] = \ - [InputOutputInfo.apply_typer_to_arg(operand.get_name(), typer) for (operand, typer) in zip(operand_list_original, self.operand_list_typer)] - # 3) build the command invocation with io and return - cmd_inv_io_full = CommandInvocationWithIO( - cmd_name=cmd_inv.cmd_name, - flag_option_list=flagoption_list_with_io, - operand_list=operand_list_with_io_full, - implicit_use_of_streaming_input=self.implicit_use_of_streaming_input, - implicit_use_of_streaming_output=self.implicit_use_of_streaming_output - ) - return cmd_inv_io_full - - @staticmethod - def apply_typer_to_arg(arg: str, - typer: Union[Tuple[Literal[WhichClassForArg.FILESTD], AccessKind], - Tuple[Literal[WhichClassForArg.ARGSTRING], None]]) \ - -> Union[ArgStringType, FileNameOrStdDescriptorWithIOInfo]: - if typer[0] == WhichClassForArg.FILESTD: - access: AccessKind = typer[1] - if access.is_any_input(): - filename_or_stddescriptor: FileNameOrStdDescriptor = compute_actual_el_for_input(arg) - elif access.is_any_output(): - filename_or_stddescriptor: FileNameOrStdDescriptor = compute_actual_el_for_output(arg) - else: - raise Exception("access which is neither any input nor output") - if isinstance(filename_or_stddescriptor, FileName): - return get_from_original_filename_with_ioinfo(filename_or_stddescriptor, access) - elif isinstance(filename_or_stddescriptor, StdDescriptor): - return get_from_original_stddescriptor_with_ioinfo(filename_or_stddescriptor, access) - elif typer[0] == WhichClassForArg.ARGSTRING: - return ArgStringType(arg) - else: - raise Exception("no valid option for argument type WhichClassForArg: " + str(typer[0])) - - @staticmethod - def apply_typer_to_flagoption(flagoption: FlagOption, - typer: Union[Tuple[Literal[WhichClassForArg.FILESTD], AccessKind], - Tuple[Literal[WhichClassForArg.ARGSTRING], None], - Tuple[Literal[WhichClassForArg.PLAINSTRING], None]]) \ - -> Union[Flag, OptionWithIO]: - if isinstance(flagoption, Flag): - return flagoption - elif isinstance(flagoption, Option): - assert (typer[0] == WhichClassForArg.FILESTD) or (typer[0] == WhichClassForArg.ARGSTRING) # PLAINSTRING only for flags - option_arg = flagoption.get_arg() - option_arg_new: Union[ArgStringType, FileNameOrStdDescriptorWithIOInfo] = InputOutputInfo.apply_typer_to_arg(option_arg, typer) - return OptionWithIO(flagoption.get_name(), option_arg_new) - else: - raise Exception("neither flag nor option") - - def has_other_outputs(self): - typer_lists = list(itertools.chain(self.flagoption_list_typer, self.operand_list_typer)) - for _, pot_accesskind in typer_lists: - if pot_accesskind is not None: - # is not None -> of type AccessKind - if pot_accesskind.is_other_output(): - return True - return False diff --git a/pash_annotations/annotation_generation/datatypes/CommandProperties.py b/pash_annotations/annotation_generation/datatypes/command_properties.py similarity index 82% rename from pash_annotations/annotation_generation/datatypes/CommandProperties.py rename to pash_annotations/annotation_generation/datatypes/command_properties.py index dedc9ae..1b3dee3 100644 --- a/pash_annotations/annotation_generation/datatypes/CommandProperties.py +++ b/pash_annotations/annotation_generation/datatypes/command_properties.py @@ -2,11 +2,13 @@ from pash_annotations.util import return_default_if_none_else_itself from typing import Dict, Any, Optional + # TODO: use this in construction of Info (under the hood with setter-functions) class CommandProperties: - def __init__(self, dict_prop_name_value: Optional[Dict[str, Any]] = None) -> None: - self.dict_prop_name_value: Dict[str, Any] = return_default_if_none_else_itself(dict_prop_name_value, dict()) + self.dict_prop_name_value: Dict[str, Any] = return_default_if_none_else_itself( + dict_prop_name_value, dict() + ) def __repr__(self) -> str: return standard_repr(self) @@ -15,4 +17,4 @@ def set_property(self, prop_name: str, value: Any) -> None: self.dict_prop_name_value[prop_name] = value def get_property_value(self, prop_name: str): - return self.dict_prop_name_value.get(prop_name, None) \ No newline at end of file + return self.dict_prop_name_value.get(prop_name, None) diff --git a/pash_annotations/annotation_generation/datatypes/input_output_info.py b/pash_annotations/annotation_generation/datatypes/input_output_info.py new file mode 100644 index 0000000..3bcf979 --- /dev/null +++ b/pash_annotations/annotation_generation/datatypes/input_output_info.py @@ -0,0 +1,287 @@ +import itertools + +from pash_annotations.util_standard import standard_repr +from typing import List, Tuple, Union, Optional, Literal +from pash_annotations.datatypes.access_kind import ( + AccessKind, + make_stream_input, + make_other_input, + make_stream_output, + make_other_output, + make_config_input, +) +from pash_annotations.datatypes.command_invocation_initial import ( + CommandInvocationInitial, +) +from pash_annotations.datatypes.command_invocation_with_io import ( + CommandInvocationWithIO, +) +from pash_annotations.datatypes.basic_datatypes import ( + Operand, + ArgStringType, + FileNameOrStdDescriptor, + FileName, + StdDescriptor, + Flag, + Option, + FlagOption, + WhichClassForArg, + get_stdout_fd, + get_stdin_fd, +) +from pash_annotations.datatypes.basic_datatypes_with_io import ( + FileNameOrStdDescriptorWithIOInfo, + OptionWithIO, + get_from_original_stddescriptor_with_ioinfo, + get_from_original_filename_with_ioinfo, +) +from pash_annotations.util import ( + compute_actual_el_for_input, + compute_actual_el_for_output, +) + + +class InputOutputInfo: + def __init__( + self, + flagoption_list_typer: List[ + Union[ + Tuple[Literal[WhichClassForArg.FILESTD], AccessKind], + Tuple[Literal[WhichClassForArg.ARGSTRING], None], + Tuple[Literal[WhichClassForArg.PLAINSTRING], None], + ] + ], + number_of_operands: int, + implicit_use_of_streaming_input: Optional[ + FileNameOrStdDescriptorWithIOInfo + ] = None, + implicit_use_of_streaming_output: Optional[ + FileNameOrStdDescriptorWithIOInfo + ] = None, + ) -> None: + self.flagoption_list_typer: List[ + Union[ + Tuple[Literal[WhichClassForArg.FILESTD], AccessKind], + Tuple[Literal[WhichClassForArg.ARGSTRING], None], + Tuple[Literal[WhichClassForArg.PLAINSTRING], None], + ] + ] = flagoption_list_typer + self.operand_list_typer: List[ + Union[ + Tuple[Literal[WhichClassForArg.FILESTD], AccessKind], + Tuple[Literal[WhichClassForArg.ARGSTRING], None], + ] + ] = [(WhichClassForArg.FILESTD, make_stream_input())] * number_of_operands + self.implicit_use_of_streaming_input: Optional[ + FileNameOrStdDescriptorWithIOInfo + ] = implicit_use_of_streaming_input + self.implicit_use_of_streaming_output: Optional[ + FileNameOrStdDescriptorWithIOInfo + ] = implicit_use_of_streaming_output + + def __repr__(self) -> str: + return standard_repr(self) + + # the InputOutputInfoGenerator Interface automatically calls this function to set this + def set_typer_for_flagoptions(self, list_typer_for_flagoptions): + self.flagoption_list_typer = list_typer_for_flagoptions + + # Assumption: for operand_list_types, we assume no double assignment + + def set_implicit_use_of_stdin(self, value: bool) -> None: + if value: + stdin_with_io = get_from_original_stddescriptor_with_ioinfo( + get_stdin_fd(), make_stream_input() + ) + self.set_implicit_use_of_streaming_input(stdin_with_io) + else: + pass # since default value is None anyway + + def set_implicit_use_of_streaming_input( + self, implicit_input: Optional[FileNameOrStdDescriptorWithIOInfo] + ) -> None: + self.implicit_use_of_streaming_input = implicit_input + + def set_implicit_use_of_stdout(self, value: bool) -> None: + if value: + stdout_with_io = get_from_original_stddescriptor_with_ioinfo( + get_stdout_fd(), make_stream_output() + ) + self.set_implicit_use_of_streaming_output(stdout_with_io) + else: + pass # since default value is None anyway + + def set_implicit_use_of_streaming_output( + self, implicit_output: Optional[FileNameOrStdDescriptorWithIOInfo] + ) -> None: + self.implicit_use_of_streaming_output = implicit_output + + def all_operands_are_streaming_inputs(self) -> None: + pass # since this is the default in the constructor + + def all_operands_are_streaming_outputs(self) -> None: + number_of_operands = len(self.operand_list_typer) + self.operand_list_typer = [ + (WhichClassForArg.FILESTD, make_stream_output()) + ] * number_of_operands + + def all_operands_are_other_inputs(self) -> None: + number_of_operands = len(self.operand_list_typer) + self.operand_list_typer = [ + (WhichClassForArg.FILESTD, make_other_input()) + ] * number_of_operands + + def all_operands_are_other_outputs(self) -> None: + number_of_operands = len(self.operand_list_typer) + self.operand_list_typer = [ + (WhichClassForArg.FILESTD, make_other_output()) + ] * number_of_operands + + def all_but_last_operand_is_streaming_input(self) -> None: + pass # since this is the default in the constructor, and we assume the last is assigned somewhere else + + def all_but_last_operand_is_other_input(self) -> None: + original_last_entry = self.operand_list_typer[-1] + self.all_operands_are_other_inputs() + self.operand_list_typer[-1] = original_last_entry + + def all_but_first_operand_is_streaming_input(self) -> None: + pass # since this is the default in the constructor and we assume the last is assigned somewhere else + + def all_but_first_operand_is_streaming_output(self) -> None: + original_first_entry = self.operand_list_typer[0] + self.all_operands_are_streaming_outputs() + self.operand_list_typer[0] = original_first_entry + + def all_but_first_operand_is_other_input(self) -> None: + original_first_entry = self.operand_list_typer[0] + number_of_operands = len(self.operand_list_typer) + self.operand_list_typer = [ + (WhichClassForArg.FILESTD, make_other_input()) + ] * number_of_operands + self.operand_list_typer[0] = original_first_entry + + def only_last_operand_is_stream_output(self) -> None: + self.operand_list_typer[-1] = (WhichClassForArg.FILESTD, make_stream_output()) + + def only_last_operand_is_other_output(self) -> None: + self.operand_list_typer[-1] = (WhichClassForArg.FILESTD, make_other_output()) + + def set_all_operands_as_config_arg_type_string(self) -> None: + number_of_operands = len(self.operand_list_typer) + self.operand_list_typer = [ + (WhichClassForArg.ARGSTRING, None) + ] * number_of_operands + + def set_first_operand_as_config_arg_type_string(self) -> None: + self.operand_list_typer[0] = (WhichClassForArg.ARGSTRING, None) + + def set_first_operand_as_config_arg_type_filename_or_std_descriptor(self) -> None: + self.operand_list_typer[0] = (WhichClassForArg.FILESTD, make_config_input()) + + # TODO: this is only used for operands of XARGS but should be refactored once a solution for xargs is found + def set_all_operands_as_arg_string(self) -> None: + # not correct but sound approximation for cases where input is provided by xargs via stdin + self.operand_list_typer = [(WhichClassForArg.ARGSTRING, None)] * len( + self.operand_list_typer + ) + + # methods to apply the InputOutputInfo to a command invocation + + def apply_input_output_info_to_command_invocation( + self, cmd_inv: CommandInvocationInitial + ) -> CommandInvocationWithIO: + # 1) transform flagoption list + flagoption_list_original: List[FlagOption] = cmd_inv.flag_option_list + flagoption_list_with_io: List[Union[Flag, OptionWithIO]] = [ + InputOutputInfo.apply_typer_to_flagoption(flagoption, typer) + for (flagoption, typer) in zip( + flagoption_list_original, self.flagoption_list_typer + ) + ] + # 2) transform operand list + operand_list_original: List[Operand] = cmd_inv.operand_list + operand_list_with_io_full: List[ + Union[ArgStringType, FileNameOrStdDescriptorWithIOInfo] + ] = [ + InputOutputInfo.apply_typer_to_arg(operand.get_name(), typer) + for (operand, typer) in zip(operand_list_original, self.operand_list_typer) + ] + # 3) build the command invocation with io and return + cmd_inv_io_full = CommandInvocationWithIO( + cmd_name=cmd_inv.cmd_name, + flag_option_list=flagoption_list_with_io, + operand_list=operand_list_with_io_full, + implicit_use_of_streaming_input=self.implicit_use_of_streaming_input, + implicit_use_of_streaming_output=self.implicit_use_of_streaming_output, + ) + return cmd_inv_io_full + + @staticmethod + def apply_typer_to_arg( + arg: str, + typer: Union[ + Tuple[Literal[WhichClassForArg.FILESTD], AccessKind], + Tuple[Literal[WhichClassForArg.ARGSTRING], None], + ], + ) -> Union[ArgStringType, FileNameOrStdDescriptorWithIOInfo]: + if typer[0] == WhichClassForArg.FILESTD: + access: AccessKind = typer[1] + if access.is_any_input(): + filename_or_stddescriptor: FileNameOrStdDescriptor = ( + compute_actual_el_for_input(arg) + ) + elif access.is_any_output(): + filename_or_stddescriptor: FileNameOrStdDescriptor = ( + compute_actual_el_for_output(arg) + ) + else: + raise Exception("access which is neither any input nor output") + if isinstance(filename_or_stddescriptor, FileName): + return get_from_original_filename_with_ioinfo( + filename_or_stddescriptor, access + ) + elif isinstance(filename_or_stddescriptor, StdDescriptor): + return get_from_original_stddescriptor_with_ioinfo( + filename_or_stddescriptor, access + ) + elif typer[0] == WhichClassForArg.ARGSTRING: + return ArgStringType(arg) + else: + raise Exception( + "no valid option for argument type WhichClassForArg: " + str(typer[0]) + ) + + @staticmethod + def apply_typer_to_flagoption( + flagoption: FlagOption, + typer: Union[ + Tuple[Literal[WhichClassForArg.FILESTD], AccessKind], + Tuple[Literal[WhichClassForArg.ARGSTRING], None], + Tuple[Literal[WhichClassForArg.PLAINSTRING], None], + ], + ) -> Union[Flag, OptionWithIO]: + if isinstance(flagoption, Flag): + return flagoption + elif isinstance(flagoption, Option): + assert (typer[0] == WhichClassForArg.FILESTD) or ( + typer[0] == WhichClassForArg.ARGSTRING + ) # PLAINSTRING only for flags + option_arg = flagoption.get_arg() + option_arg_new: Union[ + ArgStringType, FileNameOrStdDescriptorWithIOInfo + ] = InputOutputInfo.apply_typer_to_arg(option_arg, typer) + return OptionWithIO(flagoption.get_name(), option_arg_new) + else: + raise Exception("neither flag nor option") + + def has_other_outputs(self): + typer_lists = list( + itertools.chain(self.flagoption_list_typer, self.operand_list_typer) + ) + for _, pot_accesskind in typer_lists: + if pot_accesskind is not None: + # is not None -> of type AccessKind + if pot_accesskind.is_other_output(): + return True + return False diff --git a/pash_annotations/annotation_generation/datatypes/Inputs.py b/pash_annotations/annotation_generation/datatypes/inputs.py similarity index 96% rename from pash_annotations/annotation_generation/datatypes/Inputs.py rename to pash_annotations/annotation_generation/datatypes/inputs.py index bbd56c0..5a83347 100644 --- a/pash_annotations/annotation_generation/datatypes/Inputs.py +++ b/pash_annotations/annotation_generation/datatypes/inputs.py @@ -7,7 +7,6 @@ class InputsEnum(Enum): class Inputs: - def __init__(self, kind: InputsEnum, values): self.kind = kind self.values = values @@ -29,4 +28,4 @@ def get_all_inputs(self): streaming_inputs = self.values[1] return conf_inputs + streaming_inputs else: - assert(False) \ No newline at end of file + assert False diff --git a/pash_annotations/annotation_generation/datatypes/parallelizability/Aggregator.py b/pash_annotations/annotation_generation/datatypes/parallelizability/Aggregator.py deleted file mode 100644 index 8a19799..0000000 --- a/pash_annotations/annotation_generation/datatypes/parallelizability/Aggregator.py +++ /dev/null @@ -1,61 +0,0 @@ -from typing import Optional, List, Union - -from pash_annotations.datatypes.BasicDatatypesWithIOVar import IOVar, OptionWithIOVar -from pash_annotations.datatypes.CommandInvocationWithIOVars import CommandInvocationWithIOVars -from pash_annotations.util_standard import standard_repr, standard_eq -from pash_annotations.util import return_empty_flag_option_list_if_none_else_itself, return_empty_pos_config_list_if_none_else_itself - -from pash_annotations.annotation_generation.datatypes.parallelizability.AggregatorKind import AggregatorKindEnum - -from pash_annotations.datatypes.BasicDatatypes import FlagOption, OptionArgPosConfigType, Flag, ArgStringType - - -class Aggregator(CommandInvocationWithIOVars): - - def __init__(self, - # depending on kind, the aggregator function will be applied to different inputs, e.g. lines - kind: AggregatorKindEnum, - cmd_name: str, - flag_option_list: List[Union[Flag, OptionWithIOVar]], - operand_list: List[Union[ArgStringType, IOVar]], - implicit_use_of_streaming_input: Optional[IOVar], - implicit_use_of_streaming_output: Optional[IOVar], - access_map - ) -> None: - self.kind = kind - CommandInvocationWithIOVars.__init__(self, cmd_name, flag_option_list, operand_list, implicit_use_of_streaming_input, implicit_use_of_streaming_output, access_map) - - def __eq__(self, other) -> bool: - return standard_eq(self, other) - - def __repr__(self) -> str: - return standard_repr(self) - - def is_aggregator_concatenate(self): - return self.kind == AggregatorKindEnum.CONCATENATE - - @classmethod - def make_aggregator_from_cmd_inv_with_io(cls, cmd_inv: CommandInvocationWithIOVars, kind: AggregatorKindEnum): - return cls(kind, cmd_inv.cmd_name, cmd_inv.flag_option_list, cmd_inv.operand_list, - cmd_inv.implicit_use_of_streaming_input, cmd_inv.implicit_use_of_streaming_output, cmd_inv.access_map) - - # @classmethod - # def make_aggregator_concatenate(cls) -> Aggregator: - # return cls(AggregatorKindEnum.CONCATENATE, - # cmd_name='cat') - # - # @classmethod - # def make_aggregator_adj_lines_merge(cls) -> Aggregator: - # return cls(AggregatorKindEnum.ADJ_LINES_MERGE, - # cmd_name='adj_lines_merge') - # - # @classmethod - # def make_aggregator_custom_2_ary(cls, - # cmd_name: str, - # flag_option_list: List[FlagOption], - # positional_config_list: Optional[List[OptionArgPosConfigType]] = None, - # ) -> Aggregator: - # return cls(AggregatorKindEnum.CUSTOM_2_ARY, - # cmd_name=cmd_name, - # flag_option_list=flag_option_list, - # positional_config_list=positional_config_list) diff --git a/pash_annotations/annotation_generation/datatypes/parallelizability/AggregatorKind.py b/pash_annotations/annotation_generation/datatypes/parallelizability/AggregatorKind.py deleted file mode 100644 index 70c4d86..0000000 --- a/pash_annotations/annotation_generation/datatypes/parallelizability/AggregatorKind.py +++ /dev/null @@ -1,15 +0,0 @@ -from enum import Enum - -class AggregatorKindEnum(Enum): - # Assumption: for multiple inputs, given in order of appearance: flagoption_list, operand_list - # Assumption: for ADJ_LINES_* and CUSTOM_2_ARY, PaSh concatenates the resulting streams - # Assumption: for ADJ_LINES_*, both lines are given in one input stream - # Question: is this "streaming inputs" a resonable choice as condition for the command invocation? - CONCATENATE = 1 # hard-coded; CommandInvocationWithIO with n streaming inputs (just operand list as cat) and 1 streaming output - ADJ_LINES_MERGE = 2 # hard-coded; CommandInvocationWithIO with 1 streaming inputs and 1 streaming output - ADJ_LINES_SEQ = 3 # hard-coded; CommandInvocationWithIO with 1 streaming inputs and 1 streaming output - # should the function be a CommandInvocationWithIO itself? - ADJ_LINES_FUNC = 4 # function for adjacent lines; CommandInvocationWithIO with 1 streaming inputs and 1 streaming output - CUSTOM_2_ARY = 5 # function for two inputs; CommandInvocationWithIO with 2 streaming inputs and 1 streaming output - CUSTOM_N_ARY = 6 # function for n inputs; CommandInvocationWithIO with n streaming inputs and 1 streaming output - # How to know where to provide the n streaming inputs? One option or operand list? diff --git a/pash_annotations/annotation_generation/datatypes/parallelizability/AggregatorSpec.py b/pash_annotations/annotation_generation/datatypes/parallelizability/AggregatorSpec.py deleted file mode 100644 index f45e508..0000000 --- a/pash_annotations/annotation_generation/datatypes/parallelizability/AggregatorSpec.py +++ /dev/null @@ -1,332 +0,0 @@ -from copy import deepcopy -from typing import Optional, List, Literal, Union - -from abc import ABC, abstractmethod - -from pash_annotations.datatypes.BasicDatatypesWithIOVar import IOVar, OptionWithIOVar -from pash_annotations.parser.parser import parse -from pash_annotations.util_standard import standard_repr, standard_eq - -from pash_annotations.datatypes.CommandInvocationWithIOVars import CommandInvocationWithIOVars -from pash_annotations.datatypes.BasicDatatypes import FileNameOrStdDescriptor, ArgStringType, Flag -from pash_annotations.datatypes.AccessKind import AccessKind, make_stream_input, make_stream_output -from pash_annotations.annotation_generation.datatypes.parallelizability.TransformerFlagOptionList import TransformerFlagOptionList, \ - return_transformer_flagoption_list_same_as_seq_if_none_else_itself, TransformerFlagOptionListCustom -# from annotation_generation.datatypes.parallelizability.TransformerPosConfigList import TransformerPosConfigList -from pash_annotations.annotation_generation.datatypes.parallelizability.AggregatorKind import AggregatorKindEnum -from pash_annotations.annotation_generation.datatypes.parallelizability.Aggregator import Aggregator -from pash_annotations.util import return_default_if_none_else_itself - -# What spec needs to contain for which one: - -# CONCATENATE: only kind -# ADJ_LINES_MERGE: only kind -# ADJ_LINES_SEQ: only kind, return can be computed from parameters - -# ADJ_LINES_FUNC: function for adjacent lines (2 inputs) -# CUSTOM_2_ARY: function for two blocks (2 inputs) -# could be given as transformation of original command or parsed from string representation - -# CUSTOM_N_ARY: function for all blocks (multiple inputs) -# as for 2 inputs but some way to specify additional inputs, -# hard-coded appended to operand list? currently no use case anyway... - -class AggregatorSpec(ABC): - - def __init__(self, - kind: AggregatorKindEnum, - # spec_agg_cmd_name: str, - # for now, we keep everything in operand list as it is but substitute streaming input and output - is_implemented: bool = False - ) -> None: - self.kind: AggregatorKindEnum = kind - # self.spec_agg_cmd_name: str = spec_agg_cmd_name # for the rest, it should be specified - # self.flag_option_list_transformer: TransformerFlagOptionList = \ - # TransformerFlagOptionList.return_transformer_empty_if_none_else_itself(flag_option_list_transformer) - self.is_implemented = is_implemented - - - def __eq__(self, other) -> bool: - return standard_eq(self, other) - - def __repr__(self) -> str: - return standard_repr(self) - - def is_aggregator_spec_concatenate(self): - return self.kind == AggregatorKindEnum.CONCATENATE - - def is_aggregator_spec_adj_lines_merge(self): - return self.kind == AggregatorKindEnum.ADJ_LINES_MERGE - - def is_aggregator_spec_adj_lines_seq(self): - return self.kind == AggregatorKindEnum.ADJ_LINES_SEQ - - def is_aggregator_spec_adj_lines_func(self): - return self.kind == AggregatorKindEnum.ADJ_LINES_FUNC - - def is_aggregator_spec_custom_2_ary(self): - return self.kind == AggregatorKindEnum.CUSTOM_2_ARY - - def is_aggregator_spec_custom_n_ary(self): - return self.kind == AggregatorKindEnum.CUSTOM_N_ARY - - # Spec shall be hold by PaSh and once needed, gets actual aggregator from this function - # return value None if it is not yet implemented - # PaSh ought to provide the correct input based on the kind of aggregator, e.g., line - # for CONCATENATE and CUSTOM_N_ARY, we need to provide the number of inputs to give back - @abstractmethod - def get_aggregator(self, - original_cmd_invocation: CommandInvocationWithIOVars, - inputs_from: List[Union[IOVar, ArgStringType]], - # ArgStringType needed for typing, only IOVar provided - output_to: IOVar - ) -> Optional[Aggregator]: - pass - - @abstractmethod - def get_actual_2_ary_aggregator_with_aux(self, - fst_normal_input: FileNameOrStdDescriptor, - fst_aux_inputs_from: List[FileNameOrStdDescriptor], - snd_normal_input: FileNameOrStdDescriptor, - snd_aux_inputs_from: List[FileNameOrStdDescriptor], - output_to: FileNameOrStdDescriptor, - aux_outputs_to: List[FileNameOrStdDescriptor] - ): - pass - - def get_kind(self): - return self.kind - -def make_aggregator_spec_concatenate() -> AggregatorSpec: - return AggregatorSpecNonFunc(AggregatorKindEnum.CONCATENATE, is_implemented=True) - -def make_aggregator_spec_adj_lines_merge() -> AggregatorSpec: - return AggregatorSpecNonFunc(AggregatorKindEnum.ADJ_LINES_MERGE, is_implemented=False) - -def make_aggregator_spec_adj_lines_seq() -> AggregatorSpec: - return AggregatorSpecNonFunc(AggregatorKindEnum.ADJ_LINES_SEQ, is_implemented=False) - -def make_aggregator_spec_adj_lines_func_from_cmd_inv_with_transformers( - flag_option_list_transformer: Optional[TransformerFlagOptionList] = None, - # pos_config_list_transformer: Optional[TransformerPosConfigList] = None, - is_implemented: bool = False) -> AggregatorSpec: - return AggregatorSpecFuncTransformer(kind=AggregatorKindEnum.ADJ_LINES_FUNC, - flag_option_list_transformer=flag_option_list_transformer, - # pos_config_list_transformer=pos_config_list_transformer, - is_implemented=is_implemented) - -def make_aggregator_spec_adj_lines_func_from_string_representation( - cmd_inv_as_str: str, - is_implemented: bool= False) -> AggregatorSpec: - return AggregatorSpecFuncStringRepresentation(kind=AggregatorKindEnum.ADJ_LINES_FUNC, - cmd_inv_as_str=cmd_inv_as_str, - is_implemented=is_implemented) - -def make_aggregator_spec_custom_2_ary_from_cmd_inv_with_transformers( - flag_option_list_transformer: Optional[TransformerFlagOptionList] = None, - # pos_config_list_transformer: Optional[TransformerPosConfigList] = None, - is_implemented: bool = False) -> AggregatorSpec: - return AggregatorSpecFuncTransformer(kind=AggregatorKindEnum.CUSTOM_2_ARY, - flag_option_list_transformer=flag_option_list_transformer, - # pos_config_list_transformer=pos_config_list_transformer, - is_implemented=is_implemented) - -def make_aggregator_spec_custom_2_ary_from_string_representation( - cmd_inv_as_str: str, - is_implemented: bool = False) -> AggregatorSpec: - return AggregatorSpecFuncStringRepresentation(kind=AggregatorKindEnum.CUSTOM_2_ARY, - cmd_inv_as_str=cmd_inv_as_str, - is_implemented=is_implemented) - -def make_aggregator_spec_custom_n_ary_from_cmd_inv_with_transformers( - flag_option_list_transformer: Optional[TransformerFlagOptionList] = None, - # pos_config_list_transformer: Optional[TransformerPosConfigList] = None, - is_implemented: bool = False) -> AggregatorSpec: - return AggregatorSpecFuncTransformer(kind=AggregatorKindEnum.CUSTOM_N_ARY, - flag_option_list_transformer=flag_option_list_transformer, - # pos_config_list_transformer=pos_config_list_transformer, - is_implemented=is_implemented) - -def make_aggregator_spec_custom_n_ary_from_string_representation( - cmd_inv_as_str: str, - is_implemented: bool = False) -> AggregatorSpec: - return AggregatorSpecFuncStringRepresentation(kind=AggregatorKindEnum.CUSTOM_N_ARY, - cmd_inv_as_str=cmd_inv_as_str, - is_implemented=is_implemented) - -def return_aggregator_conc_if_none_else_itself(arg: Optional[AggregatorSpec]) -> AggregatorSpec: - return return_default_if_none_else_itself(arg, make_aggregator_spec_concatenate()) - - - -class AggregatorSpecNonFunc(AggregatorSpec): - - def __init__(self, - kind: Literal[AggregatorKindEnum.CONCATENATE, AggregatorKindEnum.ADJ_LINES_MERGE, AggregatorKindEnum.ADJ_LINES_SEQ], - is_implemented: bool) -> None: - AggregatorSpec.__init__(self, kind, is_implemented) - - def get_aggregator(self, - original_cmd_invocation: CommandInvocationWithIOVars, - inputs_from: List[Union[IOVar, ArgStringType]], - # ArgStringType needed for typing, only IOVar provided - output_to: IOVar - ) -> Optional[Aggregator]: - if not self.is_implemented: - return None - if self.kind == AggregatorKindEnum.CONCATENATE: - cmd_inv_cat = CommandInvocationWithIOVars.make_cat_command_invocation_with_io_vars(inputs_from, output_to) - return Aggregator.make_aggregator_from_cmd_inv_with_io(cmd_inv_cat, self.kind) - elif self.kind == AggregatorKindEnum.ADJ_LINES_MERGE: - assert(len(inputs_from) == 1) - assert(False) - # TODO - # tr -d '\n' | sed '$a\' seems to do the job -> @KK: Can we join this in one command so no sequence of commands? - return None - elif self.kind == AggregatorKindEnum.ADJ_LINES_SEQ: - assert(len(inputs_from) == 1) - assert(False) - # TODO - return None - - def get_actual_2_ary_aggregator_with_aux(self, - fst_normal_input: FileNameOrStdDescriptor, - fst_aux_inputs_from: List[FileNameOrStdDescriptor], - snd_normal_input: FileNameOrStdDescriptor, - snd_aux_inputs_from: List[FileNameOrStdDescriptor], - output_to: FileNameOrStdDescriptor, - aux_outputs_to: List[FileNameOrStdDescriptor] - ): - raise Exception("Auxiliary information from mapper to aggregator only supported for aggregators given as string") - - -class AggregatorSpecFuncTransformer(AggregatorSpec): - - def __init__(self, - kind: Literal[AggregatorKindEnum.ADJ_LINES_FUNC, AggregatorKindEnum.CUSTOM_2_ARY, AggregatorKindEnum.CUSTOM_N_ARY], - flag_option_list_transformer: Optional[TransformerFlagOptionList] = None, # None translates to same as seq - # pos_config_list_transformer: Optional[TransformerPosConfigList] = None, - is_implemented: bool=False) -> None: - AggregatorSpec.__init__(self, kind, is_implemented) - # for now, we keep everything in operand list as it is but substitute streaming input and output - self.flag_option_list_transformer: TransformerFlagOptionList = \ - return_transformer_flagoption_list_same_as_seq_if_none_else_itself(flag_option_list_transformer) - - def get_aggregator(self, - original_cmd_invocation: CommandInvocationWithIOVars, - inputs_from: List[Union[IOVar, ArgStringType]], - # ArgStringType needed for typing, only IOVar provided - output_to: IOVar - ) -> Optional[Aggregator]: - if not self.is_implemented: - return None - # sanity checks - if self.kind == AggregatorKindEnum.ADJ_LINES_FUNC: - assert(len(inputs_from) == 1) - elif self.kind == AggregatorKindEnum.CUSTOM_2_ARY: - assert(len(inputs_from) == 2) - aggregator_cmd_inv = deepcopy(original_cmd_invocation) - aggregator_cmd_inv.flag_option_list = self.flag_option_list_transformer.get_flag_option_list_after_transformer_application(original_cmd_invocation.flag_option_list) - # access map modifications - # Hard-coded how to provide input and get output -> TODO: move to spec - aggregator_cmd_inv.remove_streaming_inputs() - aggregator_cmd_inv.operand_list = inputs_from - for input_id in inputs_from: - assert not input_id in aggregator_cmd_inv.access_map and isinstance(input_id, IOVar) - aggregator_cmd_inv.access_map[input_id] = make_stream_input() - aggregator_cmd_inv.replace_var(aggregator_cmd_inv.implicit_use_of_streaming_output, output_to) - return Aggregator.make_aggregator_from_cmd_inv_with_io(aggregator_cmd_inv, self.kind) - - def get_actual_2_ary_aggregator_with_aux(self, - fst_normal_input: FileNameOrStdDescriptor, - fst_aux_inputs_from: List[FileNameOrStdDescriptor], - snd_normal_input: FileNameOrStdDescriptor, - snd_aux_inputs_from: List[FileNameOrStdDescriptor], - output_to: FileNameOrStdDescriptor, - aux_outputs_to: List[FileNameOrStdDescriptor] - ): - raise Exception("Auxiliary information from mapper to aggregator only supported for aggregators given as string") - - -class AggregatorSpecFuncStringRepresentation(AggregatorSpec): - - def __init__(self, - kind: Literal[AggregatorKindEnum.ADJ_LINES_FUNC, AggregatorKindEnum.CUSTOM_2_ARY, AggregatorKindEnum.CUSTOM_N_ARY], - cmd_inv_as_str: str, - is_implemented: bool=False) -> None: - AggregatorSpec.__init__(self, kind, is_implemented) - # for now, we keep everything in operand list as it is but substitute streaming input and output - self.cmd_inv_as_str = cmd_inv_as_str - - def get_aggregator(self, - original_cmd_invocation: CommandInvocationWithIOVars, - inputs_from: List[Union[IOVar, ArgStringType]], - # ArgStringType needed for typing, only IOVar provided - output_to: IOVar - ) -> Optional[Aggregator]: - if not self.is_implemented: - return None - agg_cmd_inv = parse(self.cmd_inv_as_str) - # currently, we assume no file names in option arguments - # this is why we do not convert them properly but need to do this trick for typing - # later, if option arguments contain file names, they need to get IOVar from PaSh - new_flagoption_list : List[Union[Flag, OptionWithIOVar]] = [] - for x in agg_cmd_inv.flag_option_list: - assert isinstance(x, Flag) - new_flagoption_list.append(x) - # Assumption: inputs are given as operands and output is stdout - # Assumption: no inputs or outputs given (also as config) since we do not do access map things etc... - access_map = {input_id: make_stream_input() for input_id in inputs_from} - access_map[output_to] = make_stream_output() - agg_cmd_inv_with_io_vars = Aggregator(kind=self.kind, - access_map=access_map, - cmd_name = agg_cmd_inv.cmd_name, - flag_option_list=new_flagoption_list, - operand_list=inputs_from, - implicit_use_of_streaming_input=None, - implicit_use_of_streaming_output=output_to) - if self.kind == AggregatorKindEnum.ADJ_LINES_FUNC: - assert(len(inputs_from) == 1) - # TODO: isn't it 2 here? - raise Exception("case not yet implemented") - elif self.kind == AggregatorKindEnum.CUSTOM_2_ARY: - assert(len(inputs_from) == 2) - return agg_cmd_inv_with_io_vars - - def get_actual_2_ary_aggregator_with_aux(self, - fst_normal_input: IOVar, - fst_aux_inputs_from: List[IOVar], - snd_normal_input: IOVar, - snd_aux_inputs_from: List[IOVar], - output_to: IOVar, - aux_outputs_to: List[IOVar] - ): - assert(len(fst_aux_inputs_from) == len(snd_aux_inputs_from)) - assert(len(fst_aux_inputs_from) == len(aux_outputs_to)) - agg_cmd_inv = parse(self.cmd_inv_as_str) - # currently, we assume no file names in option arguments - # this is why we do not convert them properly but need to do this trick for typing - # later, if option arguments contain file names, they need to get IOVar from PaSh - new_flagoption_list : List[Union[Flag, OptionWithIOVar]] = [] - for x in agg_cmd_inv.flag_option_list: - assert isinstance(x, Flag) - new_flagoption_list.append(x) - all_inputs = [fst_normal_input] + fst_aux_inputs_from + [snd_normal_input] + snd_aux_inputs_from - all_outputs = [output_to] + aux_outputs_to - access_map = dict() - for input_id in all_inputs: - access_map[input_id] = make_stream_input() - for output_id in all_outputs: - access_map[output_id] = make_stream_output() - new_operand_list : List[Union[ArgStringType, IOVar]] = [] - # trick for typing... - for x in [fst_normal_input] + fst_aux_inputs_from + [snd_normal_input] + snd_aux_inputs_from + [output_to] + aux_outputs_to: - new_operand_list.append(x) - agg_cmd_inv_with_io_vars = Aggregator(kind=self.kind, - access_map=access_map, - cmd_name = agg_cmd_inv.cmd_name, - flag_option_list=new_flagoption_list, - operand_list=new_operand_list, - implicit_use_of_streaming_input=None, - implicit_use_of_streaming_output=None) - return agg_cmd_inv_with_io_vars diff --git a/pash_annotations/annotation_generation/datatypes/parallelizability/Mapper.py b/pash_annotations/annotation_generation/datatypes/parallelizability/Mapper.py deleted file mode 100644 index 46a6385..0000000 --- a/pash_annotations/annotation_generation/datatypes/parallelizability/Mapper.py +++ /dev/null @@ -1,43 +0,0 @@ -from typing import Optional, List, Union - -from pash_annotations.datatypes.BasicDatatypesWithIOVar import OptionWithIOVar, IOVar -from pash_annotations.util_standard import standard_repr, standard_eq - -from pash_annotations.datatypes.BasicDatatypes import Flag, ArgStringType -from pash_annotations.datatypes.BasicDatatypesWithIO import OptionWithIO, FileNameOrStdDescriptorWithIOInfo -from pash_annotations.datatypes.CommandInvocationWithIOVars import CommandInvocationWithIOVars - -class Mapper(CommandInvocationWithIOVars): - - # Assumption: 1 streaming input and 1 streaming output, to substitute the (new) input and output - - def __init__(self, - cmd_name: str, - flag_option_list : List[Union[Flag, OptionWithIOVar]], - operand_list : List[Union[ArgStringType, IOVar]], - implicit_use_of_streaming_input: Optional[IOVar], - implicit_use_of_streaming_output: Optional[IOVar], - access_map - ) -> None: - CommandInvocationWithIOVars.__init__(self, - cmd_name, - flag_option_list, - operand_list, - implicit_use_of_streaming_input, - implicit_use_of_streaming_output, - access_map) - - def __eq__(self, other) -> bool: - return standard_eq(self, other) - - def __repr__(self) -> str: - return standard_repr(self) - - @staticmethod - def make_same_as_seq_mapper_from_command_invocation(command_invocation_with_io: CommandInvocationWithIOVars): - return Mapper(command_invocation_with_io.cmd_name, - command_invocation_with_io.flag_option_list, - command_invocation_with_io.operand_list, - command_invocation_with_io.implicit_use_of_streaming_input, - command_invocation_with_io.implicit_use_of_streaming_output, - command_invocation_with_io.access_map) diff --git a/pash_annotations/annotation_generation/datatypes/parallelizability/Parallelizer.py b/pash_annotations/annotation_generation/datatypes/parallelizability/Parallelizer.py deleted file mode 100644 index 24c9a0a..0000000 --- a/pash_annotations/annotation_generation/datatypes/parallelizability/Parallelizer.py +++ /dev/null @@ -1,135 +0,0 @@ -from copy import deepcopy -from typing import Optional, List, Union - -from pash_annotations.datatypes.CommandInvocationWithIOVars import CommandInvocationWithIOVars -from pash_annotations.util_standard import standard_eq -from pash_annotations.util import return_default_if_none_else_itself - -from pash_annotations.datatypes.BasicDatatypesWithIOVar import IOVar -from pash_annotations.datatypes.BasicDatatypes import FileNameOrStdDescriptor, ArgStringType -from pash_annotations.annotation_generation.datatypes.parallelizability.Splitter import Splitter, make_splitter_consec_chunks, \ - make_splitter_indiv_files, make_splitter_round_robin, make_splitter_round_robin_with_unwrap -from pash_annotations.annotation_generation.datatypes.parallelizability.MapperSpec import MapperSpec, \ - return_mapper_spec_seq_if_none_else_itself -from pash_annotations.annotation_generation.datatypes.parallelizability.AggregatorSpec import AggregatorSpec, \ - return_aggregator_conc_if_none_else_itself -from pash_annotations.annotation_generation.datatypes.parallelizability.Mapper import Mapper -from pash_annotations.annotation_generation.datatypes.parallelizability.Aggregator import Aggregator - -from enum import Enum - -# this will probably become its own class with more information later -class AdditionalInfoSplitterToMapper(Enum): - NO_ADD_INPUT = 'no_add_input' - LINE_NUM_OFFSET = 'line_num_offset' - BYTE_OFFSET = 'byte_offset' - LINE_NUM_AND_BYTE_OFFSET = 'line_num_and_byte_offset' - - -class Parallelizer: - - def __init__(self, - splitter: Splitter, - # , we only store MapperSpec and AggregatorSpec, actual ones to be retrieved with CMDInvPref in Pash - core_mapper_spec: MapperSpec, - core_aggregator_spec: AggregatorSpec, - info_splitter_mapper: Optional[AdditionalInfoSplitterToMapper], - info_mapper_aggregator: int # the number of pipes to connect - # if this is - ) -> None: - self.splitter: Splitter = splitter - self.core_mapper_spec: MapperSpec = core_mapper_spec - self.core_aggregator_spec: AggregatorSpec = core_aggregator_spec - self.info_splitter_mapper: AdditionalInfoSplitterToMapper = return_default_if_none_else_itself(info_splitter_mapper, AdditionalInfoSplitterToMapper.NO_ADD_INPUT) - self.info_mapper_aggregator = info_mapper_aggregator - # sanity check that round robin is only applied with following aggregators: - if self.splitter.is_splitter_round_robin(): - assert(self.core_aggregator_spec.is_aggregator_spec_concatenate() or - self.core_aggregator_spec.is_aggregator_spec_adj_lines_merge() or - self.core_aggregator_spec.is_aggregator_spec_adj_lines_seq() or - self.core_aggregator_spec.is_aggregator_spec_adj_lines_func()) - - def __eq__(self, other) -> bool: - return standard_eq(self, other) - - def __repr__(self) -> str: - return f'Parallizer: \n' \ - f'splitter: {self.splitter} \n' \ - f'mapper attr: {self.core_mapper_spec} \n' \ - f'aggregator attr: {self.core_aggregator_spec} \n' - - def get_splitter(self) -> Splitter: - return self.splitter - - def get_mapper_spec(self) -> MapperSpec: - return self.core_mapper_spec - - def get_actual_mapper(self, - cmd_invocation: CommandInvocationWithIOVars, - input_from: IOVar, - output_to: IOVar, - aux_output_tos: List[IOVar]) \ - -> Optional[Mapper]: - assert(len(aux_output_tos) == self.info_mapper_aggregator) - return self.core_mapper_spec.get_mapper(cmd_invocation, input_from, output_to, aux_output_tos) - - def get_aggregator_spec(self) -> AggregatorSpec: - return self.core_aggregator_spec - - def get_actual_aggregator(self, - cmd_invocation: CommandInvocationWithIOVars, - inputs_from: List[Union[IOVar, ArgStringType]], - # ArgStringType needed for typing, only IOVar provided - output_to: IOVar - ) -> Optional[Aggregator]: - return self.core_aggregator_spec.get_aggregator(cmd_invocation, inputs_from, output_to) - - def get_actual_2_ary_aggregator_with_aux(self, - fst_normal_input: FileNameOrStdDescriptor, - fst_aux_inputs_from: List[FileNameOrStdDescriptor], - snd_normal_input: FileNameOrStdDescriptor, - snd_aux_inputs_from: List[FileNameOrStdDescriptor], - output_to: FileNameOrStdDescriptor, - aux_outputs_to: List[FileNameOrStdDescriptor] - ): - return self.core_aggregator_spec.get_actual_2_ary_aggregator_with_aux(fst_normal_input, fst_aux_inputs_from, - snd_normal_input, snd_aux_inputs_from, - output_to, aux_outputs_to) - - def get_info_mapper_aggregator(self) -> int: - return self.info_mapper_aggregator - - def are_all_parts_implemented(self): - return self.core_mapper_spec.is_implemented and self.core_aggregator_spec.is_implemented - -def make_parallelizer_indiv_files(mapper_spec: Optional[MapperSpec]=None, - aggregator_spec: Optional[AggregatorSpec]=None, - info_splitter_mapper: Optional[AdditionalInfoSplitterToMapper]=None, - info_mapper_aggregator: int = 0 - ) -> Parallelizer: - mapper_spec = return_mapper_spec_seq_if_none_else_itself(mapper_spec) - aggregator_spec = return_aggregator_conc_if_none_else_itself(aggregator_spec) - return Parallelizer(make_splitter_indiv_files(), mapper_spec, aggregator_spec, info_splitter_mapper, info_mapper_aggregator) - -def make_parallelizer_round_robin(mapper_spec: Optional[MapperSpec]=None, - aggregator_spec: Optional[AggregatorSpec]=None, - info_splitter_mapper: Optional[AdditionalInfoSplitterToMapper]=None, - info_mapper_aggregator: int = 0 - ) -> Parallelizer: - mapper_spec = return_mapper_spec_seq_if_none_else_itself(mapper_spec) - aggregator_spec = return_aggregator_conc_if_none_else_itself(aggregator_spec) - return Parallelizer(make_splitter_round_robin(), mapper_spec, aggregator_spec, info_splitter_mapper, info_mapper_aggregator) - -def make_parallelizer_round_robin_with_unwrap_from_other(parallelizer): - new_parallelizer = deepcopy(parallelizer) - new_parallelizer.splitter = make_splitter_round_robin_with_unwrap() - return new_parallelizer - -def make_parallelizer_consec_chunks(mapper_spec: Optional[MapperSpec]=None, - aggregator_spec: Optional[AggregatorSpec]=None, - info_splitter_mapper: Optional[AdditionalInfoSplitterToMapper] = None, - info_mapper_aggregator: int = 0 - ) -> Parallelizer: - mapper_spec = return_mapper_spec_seq_if_none_else_itself(mapper_spec) - aggregator_spec = return_aggregator_conc_if_none_else_itself(aggregator_spec) - return Parallelizer(make_splitter_consec_chunks(), mapper_spec, aggregator_spec, info_splitter_mapper, info_mapper_aggregator) diff --git a/pash_annotations/annotation_generation/datatypes/parallelizability/aggregator.py b/pash_annotations/annotation_generation/datatypes/parallelizability/aggregator.py new file mode 100644 index 0000000..1365df2 --- /dev/null +++ b/pash_annotations/annotation_generation/datatypes/parallelizability/aggregator.py @@ -0,0 +1,79 @@ +from typing import Optional, List, Union + +from pash_annotations.datatypes.basic_datatypes import Flag, ArgStringType +from pash_annotations.datatypes.basic_datatypes_with_iovar import IOVar, OptionWithIOVar +from pash_annotations.datatypes.command_invocation_with_iovars import ( + CommandInvocationWithIOVars, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.aggregator_kind import ( + AggregatorKindEnum, +) +from pash_annotations.util_standard import standard_repr, standard_eq + + +class Aggregator(CommandInvocationWithIOVars): + def __init__( + self, + # depending on kind, the aggregator function will be applied to different inputs, e.g. lines + kind: AggregatorKindEnum, + cmd_name: str, + flag_option_list: List[Union[Flag, OptionWithIOVar]], + operand_list: List[Union[ArgStringType, IOVar]], + implicit_use_of_streaming_input: Optional[IOVar], + implicit_use_of_streaming_output: Optional[IOVar], + access_map, + ) -> None: + self.kind = kind + CommandInvocationWithIOVars.__init__( + self, + cmd_name, + flag_option_list, + operand_list, + implicit_use_of_streaming_input, + implicit_use_of_streaming_output, + access_map, + ) + + def __eq__(self, other) -> bool: + return standard_eq(self, other) + + def __repr__(self) -> str: + return standard_repr(self) + + def is_aggregator_concatenate(self): + return self.kind == AggregatorKindEnum.CONCATENATE + + @classmethod + def make_aggregator_from_cmd_inv_with_io( + cls, cmd_inv: CommandInvocationWithIOVars, kind: AggregatorKindEnum + ): + return cls( + kind, + cmd_inv.cmd_name, + cmd_inv.flag_option_list, + cmd_inv.operand_list, + cmd_inv.implicit_use_of_streaming_input, + cmd_inv.implicit_use_of_streaming_output, + cmd_inv.access_map, + ) + + # @classmethod + # def make_aggregator_concatenate(cls) -> Aggregator: + # return cls(AggregatorKindEnum.CONCATENATE, + # cmd_name='cat') + # + # @classmethod + # def make_aggregator_adj_lines_merge(cls) -> Aggregator: + # return cls(AggregatorKindEnum.ADJ_LINES_MERGE, + # cmd_name='adj_lines_merge') + # + # @classmethod + # def make_aggregator_custom_2_ary(cls, + # cmd_name: str, + # flag_option_list: List[FlagOption], + # positional_config_list: Optional[List[OptionArgPosConfigType]] = None, + # ) -> Aggregator: + # return cls(AggregatorKindEnum.CUSTOM_2_ARY, + # cmd_name=cmd_name, + # flag_option_list=flag_option_list, + # positional_config_list=positional_config_list) diff --git a/pash_annotations/annotation_generation/datatypes/parallelizability/aggregator_kind.py b/pash_annotations/annotation_generation/datatypes/parallelizability/aggregator_kind.py new file mode 100644 index 0000000..9e81b62 --- /dev/null +++ b/pash_annotations/annotation_generation/datatypes/parallelizability/aggregator_kind.py @@ -0,0 +1,16 @@ +from enum import Enum + + +class AggregatorKindEnum(Enum): + # Assumption: for multiple inputs, given in order of appearance: flagoption_list, operand_list + # Assumption: for ADJ_LINES_* and CUSTOM_2_ARY, PaSh concatenates the resulting streams + # Assumption: for ADJ_LINES_*, both lines are given in one input stream + # Question: is this "streaming inputs" a resonable choice as condition for the command invocation? + CONCATENATE = 1 # hard-coded; CommandInvocationWithIO with n streaming inputs (just operand list as cat) and 1 streaming output + ADJ_LINES_MERGE = 2 # hard-coded; CommandInvocationWithIO with 1 streaming inputs and 1 streaming output + ADJ_LINES_SEQ = 3 # hard-coded; CommandInvocationWithIO with 1 streaming inputs and 1 streaming output + # should the function be a CommandInvocationWithIO itself? + ADJ_LINES_FUNC = 4 # function for adjacent lines; CommandInvocationWithIO with 1 streaming inputs and 1 streaming output + CUSTOM_2_ARY = 5 # function for two inputs; CommandInvocationWithIO with 2 streaming inputs and 1 streaming output + CUSTOM_N_ARY = 6 # function for n inputs; CommandInvocationWithIO with n streaming inputs and 1 streaming output + # How to know where to provide the n streaming inputs? One option or operand list? diff --git a/pash_annotations/annotation_generation/datatypes/parallelizability/aggregator_spec.py b/pash_annotations/annotation_generation/datatypes/parallelizability/aggregator_spec.py new file mode 100644 index 0000000..813ccf3 --- /dev/null +++ b/pash_annotations/annotation_generation/datatypes/parallelizability/aggregator_spec.py @@ -0,0 +1,435 @@ +from copy import deepcopy +from typing import Optional, List, Literal, Union + +from abc import ABC, abstractmethod + +from pash_annotations.datatypes.basic_datatypes import ( + FileNameOrStdDescriptor, + ArgStringType, + Flag, +) +from pash_annotations.datatypes.basic_datatypes_with_iovar import IOVar, OptionWithIOVar +from pash_annotations.datatypes.command_invocation_with_iovars import ( + CommandInvocationWithIOVars, +) +from pash_annotations.datatypes.access_kind import make_stream_input, make_stream_output + +from pash_annotations.util import return_default_if_none_else_itself +from pash_annotations.parser.parser import parse +from pash_annotations.util_standard import standard_repr, standard_eq + +from pash_annotations.annotation_generation.datatypes.parallelizability.transformer_flag_option_list import ( + TransformerFlagOptionList, + return_transformer_flagoption_list_same_as_seq_if_none_else_itself, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.aggregator_kind import ( + AggregatorKindEnum, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.aggregator import ( + Aggregator, +) + + +# What spec needs to contain for which one: + +# CONCATENATE: only kind +# ADJ_LINES_MERGE: only kind +# ADJ_LINES_SEQ: only kind, return can be computed from parameters + +# ADJ_LINES_FUNC: function for adjacent lines (2 inputs) +# CUSTOM_2_ARY: function for two blocks (2 inputs) +# could be given as transformation of original command or parsed from string representation + +# CUSTOM_N_ARY: function for all blocks (multiple inputs) +# as for 2 inputs but some way to specify additional inputs, +# hard-coded appended to operand list? currently no use case anyway... + + +class AggregatorSpec(ABC): + def __init__( + self, + kind: AggregatorKindEnum, + # spec_agg_cmd_name: str, + # for now, we keep everything in operand list as it is but substitute streaming input and output + is_implemented: bool = False, + ) -> None: + self.kind: AggregatorKindEnum = kind + # self.spec_agg_cmd_name: str = spec_agg_cmd_name # for the rest, it should be specified + # self.flag_option_list_transformer: TransformerFlagOptionList = \ + # TransformerFlagOptionList.return_transformer_empty_if_none_else_itself(flag_option_list_transformer) + self.is_implemented = is_implemented + + def __eq__(self, other) -> bool: + return standard_eq(self, other) + + def __repr__(self) -> str: + return standard_repr(self) + + def is_aggregator_spec_concatenate(self): + return self.kind == AggregatorKindEnum.CONCATENATE + + def is_aggregator_spec_adj_lines_merge(self): + return self.kind == AggregatorKindEnum.ADJ_LINES_MERGE + + def is_aggregator_spec_adj_lines_seq(self): + return self.kind == AggregatorKindEnum.ADJ_LINES_SEQ + + def is_aggregator_spec_adj_lines_func(self): + return self.kind == AggregatorKindEnum.ADJ_LINES_FUNC + + def is_aggregator_spec_custom_2_ary(self): + return self.kind == AggregatorKindEnum.CUSTOM_2_ARY + + def is_aggregator_spec_custom_n_ary(self): + return self.kind == AggregatorKindEnum.CUSTOM_N_ARY + + # Spec shall be hold by PaSh and once needed, gets actual aggregator from this function + # return value None if it is not yet implemented + # PaSh ought to provide the correct input based on the kind of aggregator, e.g., line + # for CONCATENATE and CUSTOM_N_ARY, we need to provide the number of inputs to give back + @abstractmethod + def get_aggregator( + self, + original_cmd_invocation: CommandInvocationWithIOVars, + inputs_from: List[Union[IOVar, ArgStringType]], + # ArgStringType needed for typing, only IOVar provided + output_to: IOVar, + ) -> Optional[Aggregator]: + pass + + @abstractmethod + def get_actual_2_ary_aggregator_with_aux( + self, + fst_normal_input: FileNameOrStdDescriptor, + fst_aux_inputs_from: List[FileNameOrStdDescriptor], + snd_normal_input: FileNameOrStdDescriptor, + snd_aux_inputs_from: List[FileNameOrStdDescriptor], + output_to: FileNameOrStdDescriptor, + aux_outputs_to: List[FileNameOrStdDescriptor], + ): + pass + + def get_kind(self): + return self.kind + + +def make_aggregator_spec_concatenate() -> AggregatorSpec: + return AggregatorSpecNonFunc(AggregatorKindEnum.CONCATENATE, is_implemented=True) + + +def make_aggregator_spec_adj_lines_merge() -> AggregatorSpec: + return AggregatorSpecNonFunc( + AggregatorKindEnum.ADJ_LINES_MERGE, is_implemented=False + ) + + +def make_aggregator_spec_adj_lines_seq() -> AggregatorSpec: + return AggregatorSpecNonFunc(AggregatorKindEnum.ADJ_LINES_SEQ, is_implemented=False) + + +def make_aggregator_spec_adj_lines_func_from_cmd_inv_with_transformers( + flag_option_list_transformer: Optional[TransformerFlagOptionList] = None, + # pos_config_list_transformer: Optional[TransformerPosConfigList] = None, + is_implemented: bool = False, +) -> AggregatorSpec: + return AggregatorSpecFuncTransformer( + kind=AggregatorKindEnum.ADJ_LINES_FUNC, + flag_option_list_transformer=flag_option_list_transformer, + # pos_config_list_transformer=pos_config_list_transformer, + is_implemented=is_implemented, + ) + + +def make_aggregator_spec_adj_lines_func_from_string_representation( + cmd_inv_as_str: str, is_implemented: bool = False +) -> AggregatorSpec: + return AggregatorSpecFuncStringRepresentation( + kind=AggregatorKindEnum.ADJ_LINES_FUNC, + cmd_inv_as_str=cmd_inv_as_str, + is_implemented=is_implemented, + ) + + +def make_aggregator_spec_custom_2_ary_from_cmd_inv_with_transformers( + flag_option_list_transformer: Optional[TransformerFlagOptionList] = None, + # pos_config_list_transformer: Optional[TransformerPosConfigList] = None, + is_implemented: bool = False, +) -> AggregatorSpec: + return AggregatorSpecFuncTransformer( + kind=AggregatorKindEnum.CUSTOM_2_ARY, + flag_option_list_transformer=flag_option_list_transformer, + # pos_config_list_transformer=pos_config_list_transformer, + is_implemented=is_implemented, + ) + + +def make_aggregator_spec_custom_2_ary_from_string_representation( + cmd_inv_as_str: str, is_implemented: bool = False +) -> AggregatorSpec: + return AggregatorSpecFuncStringRepresentation( + kind=AggregatorKindEnum.CUSTOM_2_ARY, + cmd_inv_as_str=cmd_inv_as_str, + is_implemented=is_implemented, + ) + + +def make_aggregator_spec_custom_n_ary_from_cmd_inv_with_transformers( + flag_option_list_transformer: Optional[TransformerFlagOptionList] = None, + # pos_config_list_transformer: Optional[TransformerPosConfigList] = None, + is_implemented: bool = False, +) -> AggregatorSpec: + return AggregatorSpecFuncTransformer( + kind=AggregatorKindEnum.CUSTOM_N_ARY, + flag_option_list_transformer=flag_option_list_transformer, + # pos_config_list_transformer=pos_config_list_transformer, + is_implemented=is_implemented, + ) + + +def make_aggregator_spec_custom_n_ary_from_string_representation( + cmd_inv_as_str: str, is_implemented: bool = False +) -> AggregatorSpec: + return AggregatorSpecFuncStringRepresentation( + kind=AggregatorKindEnum.CUSTOM_N_ARY, + cmd_inv_as_str=cmd_inv_as_str, + is_implemented=is_implemented, + ) + + +def return_aggregator_conc_if_none_else_itself( + arg: Optional[AggregatorSpec], +) -> AggregatorSpec: + return return_default_if_none_else_itself(arg, make_aggregator_spec_concatenate()) + + +class AggregatorSpecNonFunc(AggregatorSpec): + def __init__( + self, + kind: Literal[ + AggregatorKindEnum.CONCATENATE, + AggregatorKindEnum.ADJ_LINES_MERGE, + AggregatorKindEnum.ADJ_LINES_SEQ, + ], + is_implemented: bool, + ) -> None: + AggregatorSpec.__init__(self, kind, is_implemented) + + def get_aggregator( + self, + original_cmd_invocation: CommandInvocationWithIOVars, + inputs_from: List[Union[IOVar, ArgStringType]], + # ArgStringType needed for typing, only IOVar provided + output_to: IOVar, + ) -> Optional[Aggregator]: + if not self.is_implemented: + return None + if self.kind == AggregatorKindEnum.CONCATENATE: + cmd_inv_cat = ( + CommandInvocationWithIOVars.make_cat_command_invocation_with_io_vars( + inputs_from, output_to + ) + ) + return Aggregator.make_aggregator_from_cmd_inv_with_io( + cmd_inv_cat, self.kind + ) + elif self.kind == AggregatorKindEnum.ADJ_LINES_MERGE: + assert len(inputs_from) == 1 + assert False + # TODO + # tr -d '\n' | sed '$a\' seems to do the job -> @KK: Can we join this in one command so no sequence of commands? + return None + elif self.kind == AggregatorKindEnum.ADJ_LINES_SEQ: + assert len(inputs_from) == 1 + assert False + # TODO + return None + + def get_actual_2_ary_aggregator_with_aux( + self, + fst_normal_input: FileNameOrStdDescriptor, + fst_aux_inputs_from: List[FileNameOrStdDescriptor], + snd_normal_input: FileNameOrStdDescriptor, + snd_aux_inputs_from: List[FileNameOrStdDescriptor], + output_to: FileNameOrStdDescriptor, + aux_outputs_to: List[FileNameOrStdDescriptor], + ): + raise Exception( + "Auxiliary information from pash_annotations.annotation_generation.datatypes.parallelizability.mapper to aggregator only supported for aggregators given as string" + ) + + +class AggregatorSpecFuncTransformer(AggregatorSpec): + def __init__( + self, + kind: Literal[ + AggregatorKindEnum.ADJ_LINES_FUNC, + AggregatorKindEnum.CUSTOM_2_ARY, + AggregatorKindEnum.CUSTOM_N_ARY, + ], + flag_option_list_transformer: Optional[ + TransformerFlagOptionList + ] = None, # None translates to same as seq + # pos_config_list_transformer: Optional[TransformerPosConfigList] = None, + is_implemented: bool = False, + ) -> None: + AggregatorSpec.__init__(self, kind, is_implemented) + # for now, we keep everything in operand list as it is but substitute streaming input and output + self.flag_option_list_transformer: TransformerFlagOptionList = ( + return_transformer_flagoption_list_same_as_seq_if_none_else_itself( + flag_option_list_transformer + ) + ) + + def get_aggregator( + self, + original_cmd_invocation: CommandInvocationWithIOVars, + inputs_from: List[Union[IOVar, ArgStringType]], + # ArgStringType needed for typing, only IOVar provided + output_to: IOVar, + ) -> Optional[Aggregator]: + if not self.is_implemented: + return None + # sanity checks + if self.kind == AggregatorKindEnum.ADJ_LINES_FUNC: + assert len(inputs_from) == 1 + elif self.kind == AggregatorKindEnum.CUSTOM_2_ARY: + assert len(inputs_from) == 2 + aggregator_cmd_inv = deepcopy(original_cmd_invocation) + aggregator_cmd_inv.flag_option_list = self.flag_option_list_transformer.get_flag_option_list_after_transformer_application( + original_cmd_invocation.flag_option_list + ) + # access map modifications + # Hard-coded how to provide input and get output -> TODO: move to spec + aggregator_cmd_inv.remove_streaming_inputs() + aggregator_cmd_inv.operand_list = inputs_from + for input_id in inputs_from: + assert not input_id in aggregator_cmd_inv.access_map and isinstance( + input_id, IOVar + ) + aggregator_cmd_inv.access_map[input_id] = make_stream_input() + aggregator_cmd_inv.replace_var( + aggregator_cmd_inv.implicit_use_of_streaming_output, output_to + ) + return Aggregator.make_aggregator_from_cmd_inv_with_io( + aggregator_cmd_inv, self.kind + ) + + def get_actual_2_ary_aggregator_with_aux( + self, + fst_normal_input: FileNameOrStdDescriptor, + fst_aux_inputs_from: List[FileNameOrStdDescriptor], + snd_normal_input: FileNameOrStdDescriptor, + snd_aux_inputs_from: List[FileNameOrStdDescriptor], + output_to: FileNameOrStdDescriptor, + aux_outputs_to: List[FileNameOrStdDescriptor], + ): + raise Exception( + "Auxiliary information from pash_annotations.annotation_generation.datatypes.parallelizability.mapper to aggregator only supported for aggregators given as string" + ) + + +class AggregatorSpecFuncStringRepresentation(AggregatorSpec): + def __init__( + self, + kind: Literal[ + AggregatorKindEnum.ADJ_LINES_FUNC, + AggregatorKindEnum.CUSTOM_2_ARY, + AggregatorKindEnum.CUSTOM_N_ARY, + ], + cmd_inv_as_str: str, + is_implemented: bool = False, + ) -> None: + AggregatorSpec.__init__(self, kind, is_implemented) + # for now, we keep everything in operand list as it is but substitute streaming input and output + self.cmd_inv_as_str = cmd_inv_as_str + + def get_aggregator( + self, + original_cmd_invocation: CommandInvocationWithIOVars, + inputs_from: List[Union[IOVar, ArgStringType]], + # ArgStringType needed for typing, only IOVar provided + output_to: IOVar, + ) -> Optional[Aggregator]: + if not self.is_implemented: + return None + agg_cmd_inv = parse(self.cmd_inv_as_str) + # currently, we assume no file names in option arguments + # this is why we do not convert them properly but need to do this trick for typing + # later, if option arguments contain file names, they need to get IOVar from PaSh + new_flagoption_list: List[Union[Flag, OptionWithIOVar]] = [] + for x in agg_cmd_inv.flag_option_list: + assert isinstance(x, Flag) + new_flagoption_list.append(x) + # Assumption: inputs are given as operands and output is stdout + # Assumption: no inputs or outputs given (also as config) since we do not do access map things etc... + access_map = {input_id: make_stream_input() for input_id in inputs_from} + access_map[output_to] = make_stream_output() + agg_cmd_inv_with_io_vars = Aggregator( + kind=self.kind, + access_map=access_map, + cmd_name=agg_cmd_inv.cmd_name, + flag_option_list=new_flagoption_list, + operand_list=inputs_from, + implicit_use_of_streaming_input=None, + implicit_use_of_streaming_output=output_to, + ) + if self.kind == AggregatorKindEnum.ADJ_LINES_FUNC: + assert len(inputs_from) == 1 + # TODO: isn't it 2 here? + raise Exception("case not yet implemented") + elif self.kind == AggregatorKindEnum.CUSTOM_2_ARY: + assert len(inputs_from) == 2 + return agg_cmd_inv_with_io_vars + + def get_actual_2_ary_aggregator_with_aux( + self, + fst_normal_input: IOVar, + fst_aux_inputs_from: List[IOVar], + snd_normal_input: IOVar, + snd_aux_inputs_from: List[IOVar], + output_to: IOVar, + aux_outputs_to: List[IOVar], + ): + assert len(fst_aux_inputs_from) == len(snd_aux_inputs_from) + assert len(fst_aux_inputs_from) == len(aux_outputs_to) + agg_cmd_inv = parse(self.cmd_inv_as_str) + # currently, we assume no file names in option arguments + # this is why we do not convert them properly but need to do this trick for typing + # later, if option arguments contain file names, they need to get IOVar from PaSh + new_flagoption_list: List[Union[Flag, OptionWithIOVar]] = [] + for x in agg_cmd_inv.flag_option_list: + assert isinstance(x, Flag) + new_flagoption_list.append(x) + all_inputs = ( + [fst_normal_input] + + fst_aux_inputs_from + + [snd_normal_input] + + snd_aux_inputs_from + ) + all_outputs = [output_to] + aux_outputs_to + access_map = dict() + for input_id in all_inputs: + access_map[input_id] = make_stream_input() + for output_id in all_outputs: + access_map[output_id] = make_stream_output() + new_operand_list: List[Union[ArgStringType, IOVar]] = [] + # trick for typing... + for x in ( + [fst_normal_input] + + fst_aux_inputs_from + + [snd_normal_input] + + snd_aux_inputs_from + + [output_to] + + aux_outputs_to + ): + new_operand_list.append(x) + agg_cmd_inv_with_io_vars = Aggregator( + kind=self.kind, + access_map=access_map, + cmd_name=agg_cmd_inv.cmd_name, + flag_option_list=new_flagoption_list, + operand_list=new_operand_list, + implicit_use_of_streaming_input=None, + implicit_use_of_streaming_output=None, + ) + return agg_cmd_inv_with_io_vars diff --git a/pash_annotations/annotation_generation/datatypes/parallelizability/mapper.py b/pash_annotations/annotation_generation/datatypes/parallelizability/mapper.py new file mode 100644 index 0000000..9a0b200 --- /dev/null +++ b/pash_annotations/annotation_generation/datatypes/parallelizability/mapper.py @@ -0,0 +1,54 @@ +from typing import Optional, List, Union + +from pash_annotations.datatypes.basic_datatypes_with_iovar import ( + OptionWithIOVar, + IOVar, +) +from pash_annotations.datatypes.basic_datatypes import Flag, ArgStringType +from pash_annotations.datatypes.command_invocation_with_iovars import ( + CommandInvocationWithIOVars, +) + +from pash_annotations.util_standard import standard_repr, standard_eq + + +class Mapper(CommandInvocationWithIOVars): + # Assumption: 1 streaming input and 1 streaming output, to substitute the (new) input and output + + def __init__( + self, + cmd_name: str, + flag_option_list: List[Union[Flag, OptionWithIOVar]], + operand_list: List[Union[ArgStringType, IOVar]], + implicit_use_of_streaming_input: Optional[IOVar], + implicit_use_of_streaming_output: Optional[IOVar], + access_map, + ) -> None: + CommandInvocationWithIOVars.__init__( + self, + cmd_name, + flag_option_list, + operand_list, + implicit_use_of_streaming_input, + implicit_use_of_streaming_output, + access_map, + ) + + def __eq__(self, other) -> bool: + return standard_eq(self, other) + + def __repr__(self) -> str: + return standard_repr(self) + + @staticmethod + def make_same_as_seq_mapper_from_command_invocation( + command_invocation_with_io: CommandInvocationWithIOVars, + ): + return Mapper( + command_invocation_with_io.cmd_name, + command_invocation_with_io.flag_option_list, + command_invocation_with_io.operand_list, + command_invocation_with_io.implicit_use_of_streaming_input, + command_invocation_with_io.implicit_use_of_streaming_output, + command_invocation_with_io.access_map, + ) diff --git a/pash_annotations/annotation_generation/datatypes/parallelizability/MapperSpec.py b/pash_annotations/annotation_generation/datatypes/parallelizability/mapper_spec.py similarity index 52% rename from pash_annotations/annotation_generation/datatypes/parallelizability/MapperSpec.py rename to pash_annotations/annotation_generation/datatypes/parallelizability/mapper_spec.py index eab2f9d..b49e75e 100644 --- a/pash_annotations/annotation_generation/datatypes/parallelizability/MapperSpec.py +++ b/pash_annotations/annotation_generation/datatypes/parallelizability/mapper_spec.py @@ -2,38 +2,55 @@ from enum import Enum -from pash_annotations.datatypes.AccessKind import make_stream_input, make_stream_output -from pash_annotations.datatypes.BasicDatatypesWithIOVar import IOVar, OptionWithIOVar +from pash_annotations.datatypes.command_invocation_with_iovars import ( + CommandInvocationWithIOVars, +) +from pash_annotations.datatypes.basic_datatypes import Flag +from pash_annotations.datatypes.access_kind import make_stream_input, make_stream_output +from pash_annotations.datatypes.basic_datatypes_with_iovar import ( + IOVar, + OptionWithIOVar, +) + +from pash_annotations.annotation_generation.datatypes.parallelizability.transformer_flag_option_list import ( + TransformerFlagOptionList, + return_transformer_flagoption_list_same_as_seq_if_none_else_itself, + TransformerFlagOptionListCustom, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.mapper import ( + Mapper, +) + from pash_annotations.parser.parser import parse from pash_annotations.util_standard import standard_repr, standard_eq - -from pash_annotations.datatypes.CommandInvocationWithIOVars import CommandInvocationWithIOVars -from pash_annotations.datatypes.BasicDatatypes import FileNameOrStdDescriptor, Flag -from pash_annotations.annotation_generation.datatypes.parallelizability.TransformerFlagOptionList import TransformerFlagOptionList, \ - return_transformer_flagoption_list_same_as_seq_if_none_else_itself, TransformerFlagOptionListCustom -# from annotation_generation.datatypes.parallelizability.TransformerPosConfigList import TransformerPosConfigList -from pash_annotations.annotation_generation.datatypes.parallelizability.Mapper import Mapper from pash_annotations.util import return_default_if_none_else_itself class MapperSpecKindEnum(Enum): - SAME_AS_SEQ = 'same_as_seq' - CUSTOM = 'custom' + SAME_AS_SEQ = "same_as_seq" + CUSTOM = "custom" class MapperSpec: - - def __init__(self, - kind: MapperSpecKindEnum = MapperSpecKindEnum.SAME_AS_SEQ, - spec_mapper_cmd_name: Optional[str] = None, # None translates to original command name - flag_option_list_transformer: Optional[TransformerFlagOptionList] = None, # None translates to same as seq transformer - # for now, we keep everything in operand list as it is but substitute streaming input and output - is_implemented: bool = False - ) -> None: + def __init__( + self, + kind: MapperSpecKindEnum = MapperSpecKindEnum.SAME_AS_SEQ, + spec_mapper_cmd_name: Optional[ + str + ] = None, # None translates to original command name + flag_option_list_transformer: Optional[ + TransformerFlagOptionList + ] = None, # None translates to same as seq transformer + # for now, we keep everything in operand list as it is but substitute streaming input and output + is_implemented: bool = False, + ) -> None: self.kind: MapperSpecKindEnum = kind self.spec_mapper_cmd_name: Optional[str] = spec_mapper_cmd_name - self.flag_option_list_transformer: TransformerFlagOptionList = \ - return_transformer_flagoption_list_same_as_seq_if_none_else_itself(flag_option_list_transformer) + self.flag_option_list_transformer: TransformerFlagOptionList = ( + return_transformer_flagoption_list_same_as_seq_if_none_else_itself( + flag_option_list_transformer + ) + ) self.is_implemented = is_implemented # sanity check if kind == MapperSpecKindEnum.SAME_AS_SEQ: @@ -45,7 +62,6 @@ def __init__(self, else: raise Exception("unknown kind for MapperSpec") - def __eq__(self, other) -> bool: return standard_eq(self, other) @@ -54,39 +70,53 @@ def __repr__(self) -> str: # Spec shall be hold by PaSh and once needed, gets actual mapper from this function # return value None if it is not yet implemented - def get_mapper(self, - original_cmd_invocation: CommandInvocationWithIOVars, - input_from: IOVar, - output_to: IOVar, - aux_output_tos: List[IOVar] - ) -> Optional[Mapper]: + def get_mapper( + self, + original_cmd_invocation: CommandInvocationWithIOVars, + input_from: IOVar, + output_to: IOVar, + aux_output_tos: List[IOVar], + ) -> Optional[Mapper]: if not self.is_implemented: # this is a handle to specify future mappers without the need to implement them return None if self.kind == MapperSpecKindEnum.SAME_AS_SEQ: - mapper = Mapper.make_same_as_seq_mapper_from_command_invocation(original_cmd_invocation) + mapper = Mapper.make_same_as_seq_mapper_from_command_invocation( + original_cmd_invocation + ) elif self.kind == MapperSpecKindEnum.CUSTOM: - cmd_name = return_default_if_none_else_itself(self.spec_mapper_cmd_name, original_cmd_invocation.cmd_name) + cmd_name = return_default_if_none_else_itself( + self.spec_mapper_cmd_name, original_cmd_invocation.cmd_name + ) flag_option_list = self.flag_option_list_transformer.get_flag_option_list_after_transformer_application( - original_cmd_invocation.flag_option_list) - mapper = Mapper(cmd_name= cmd_name, - flag_option_list= flag_option_list, - operand_list=original_cmd_invocation.operand_list, - implicit_use_of_streaming_input = original_cmd_invocation.implicit_use_of_streaming_input, - implicit_use_of_streaming_output = original_cmd_invocation.implicit_use_of_streaming_output, - access_map = original_cmd_invocation.access_map) + original_cmd_invocation.flag_option_list + ) + mapper = Mapper( + cmd_name=cmd_name, + flag_option_list=flag_option_list, + operand_list=original_cmd_invocation.operand_list, + implicit_use_of_streaming_input=original_cmd_invocation.implicit_use_of_streaming_input, + implicit_use_of_streaming_output=original_cmd_invocation.implicit_use_of_streaming_output, + access_map=original_cmd_invocation.access_map, + ) else: raise Exception("MapperSpec with unknown kind!") - if len(aux_output_tos) == 0: # we previously checked that they are as many as specified in the parallelizer - mapper.substitute_inputs_and_outputs_in_cmd_invocation([input_from], [output_to]) + if ( + len(aux_output_tos) == 0 + ): # we previously checked that they are as many as specified in the parallelizer + mapper.substitute_inputs_and_outputs_in_cmd_invocation( + [input_from], [output_to] + ) else: # ASSUMPTION: operands given in that order: input, output, aux_output_1, aux_output_2, ... for operand in mapper.operand_list: - assert(isinstance(operand, IOVar)) + assert isinstance(operand, IOVar) mapper.access_map.pop(operand) # trick for typing... new_operand_list = [] - for x in [input_from, output_to] + [aux_output_to for aux_output_to in aux_output_tos]: + for x in [input_from, output_to] + [ + aux_output_to for aux_output_to in aux_output_tos + ]: new_operand_list.append(x) mapper.operand_list = new_operand_list mapper.access_map[input_from] = make_stream_input() @@ -97,32 +127,42 @@ def get_mapper(self, mapper.implicit_use_of_streaming_input = None return mapper + ## factory methods def make_mapper_spec_seq() -> MapperSpec: return MapperSpec(MapperSpecKindEnum.SAME_AS_SEQ, is_implemented=True) + def return_mapper_spec_seq_if_none_else_itself(mapper_spec) -> MapperSpec: return return_default_if_none_else_itself(mapper_spec, make_mapper_spec_seq()) -def make_mapper_spec_custom(spec_mapper_cmd_name: str, - flag_option_list_transformer: Optional[TransformerFlagOptionList] = None, - # pos_config_list_transformer: Optional[TransformerPosConfigList] = None, - num_outputs: int = 1, - is_implemented: bool = False - ) -> MapperSpec: - actual_flag_option_list_transformer: TransformerFlagOptionList = \ - return_transformer_flagoption_list_same_as_seq_if_none_else_itself(flag_option_list_transformer) + +def make_mapper_spec_custom( + spec_mapper_cmd_name: str, + flag_option_list_transformer: Optional[TransformerFlagOptionList] = None, + # pos_config_list_transformer: Optional[TransformerPosConfigList] = None, + num_outputs: int = 1, + is_implemented: bool = False, +) -> MapperSpec: + actual_flag_option_list_transformer: TransformerFlagOptionList = ( + return_transformer_flagoption_list_same_as_seq_if_none_else_itself( + flag_option_list_transformer + ) + ) # actual_pos_config_list_transformer: TransformerPosConfigList = \ # TransformerPosConfigList.return_transformer_same_as_seq_if_none_else_itself(pos_config_list_transformer) - return MapperSpec(MapperSpecKindEnum.CUSTOM, - spec_mapper_cmd_name, - flag_option_list_transformer= actual_flag_option_list_transformer, - # pos_config_list_transformer= actual_pos_config_list_transformer, - is_implemented= is_implemented) + return MapperSpec( + MapperSpecKindEnum.CUSTOM, + spec_mapper_cmd_name, + flag_option_list_transformer=actual_flag_option_list_transformer, + # pos_config_list_transformer= actual_pos_config_list_transformer, + is_implemented=is_implemented, + ) + def make_mapper_spec_custom_from_string_representation( - cmd_inv_as_str: str, - is_implemented: bool = False) -> MapperSpec: + cmd_inv_as_str: str, is_implemented: bool = False +) -> MapperSpec: cmd_inv = parse(cmd_inv_as_str) # currently, we assume no file names in option arguments # this is why we do not convert them properly but need to do this trick for typing @@ -132,6 +172,8 @@ def make_mapper_spec_custom_from_string_representation( assert isinstance(x, Flag) new_flagoption_list.append(x) FlagOptionListTransformer = TransformerFlagOptionListCustom(new_flagoption_list) - return make_mapper_spec_custom(spec_mapper_cmd_name=cmd_inv.cmd_name, - flag_option_list_transformer=FlagOptionListTransformer, - is_implemented=is_implemented) + return make_mapper_spec_custom( + spec_mapper_cmd_name=cmd_inv.cmd_name, + flag_option_list_transformer=FlagOptionListTransformer, + is_implemented=is_implemented, + ) diff --git a/pash_annotations/annotation_generation/datatypes/parallelizability/parallelizer.py b/pash_annotations/annotation_generation/datatypes/parallelizability/parallelizer.py new file mode 100644 index 0000000..7e3d5da --- /dev/null +++ b/pash_annotations/annotation_generation/datatypes/parallelizability/parallelizer.py @@ -0,0 +1,203 @@ +from enum import Enum +from typing import Optional, List, Union + +from copy import deepcopy + +from pash_annotations.datatypes.command_invocation_with_iovars import ( + CommandInvocationWithIOVars, +) +from pash_annotations.datatypes.basic_datatypes_with_iovar import IOVar +from pash_annotations.datatypes.basic_datatypes import ( + FileNameOrStdDescriptor, + ArgStringType, +) + +from pash_annotations.annotation_generation.datatypes.parallelizability.mapper import ( + Mapper, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.aggregator import ( + Aggregator, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.mapper_spec import ( + MapperSpec, + return_mapper_spec_seq_if_none_else_itself, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.aggregator_spec import ( + AggregatorSpec, + return_aggregator_conc_if_none_else_itself, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.splitter import ( + Splitter, + make_splitter_consec_chunks, + make_splitter_indiv_files, + make_splitter_round_robin, + make_splitter_round_robin_with_unwrap, +) + +from pash_annotations.util_standard import standard_eq +from pash_annotations.util import return_default_if_none_else_itself + + +# this will probably become its own class with more information later +class AdditionalInfoSplitterToMapper(Enum): + NO_ADD_INPUT = "no_add_input" + LINE_NUM_OFFSET = "line_num_offset" + BYTE_OFFSET = "byte_offset" + LINE_NUM_AND_BYTE_OFFSET = "line_num_and_byte_offset" + + +class Parallelizer: + def __init__( + self, + splitter: Splitter, + # , we only store MapperSpec and AggregatorSpec, actual ones to be retrieved with CMDInvPref in Pash + core_mapper_spec: MapperSpec, + core_aggregator_spec: AggregatorSpec, + info_splitter_mapper: Optional[AdditionalInfoSplitterToMapper], + info_mapper_aggregator: int # the number of pipes to connect + # if this is + ) -> None: + self.splitter: Splitter = splitter + self.core_mapper_spec: MapperSpec = core_mapper_spec + self.core_aggregator_spec: AggregatorSpec = core_aggregator_spec + self.info_splitter_mapper: AdditionalInfoSplitterToMapper = ( + return_default_if_none_else_itself( + info_splitter_mapper, AdditionalInfoSplitterToMapper.NO_ADD_INPUT + ) + ) + self.info_mapper_aggregator = info_mapper_aggregator + # sanity check that round robin is only applied with following aggregators: + if self.splitter.is_splitter_round_robin(): + assert ( + self.core_aggregator_spec.is_aggregator_spec_concatenate() + or self.core_aggregator_spec.is_aggregator_spec_adj_lines_merge() + or self.core_aggregator_spec.is_aggregator_spec_adj_lines_seq() + or self.core_aggregator_spec.is_aggregator_spec_adj_lines_func() + ) + + def __eq__(self, other) -> bool: + return standard_eq(self, other) + + def __repr__(self) -> str: + return ( + f"Parallizer: \n" + f"splitter: {self.splitter} \n" + f"mapper attr: {self.core_mapper_spec} \n" + f"aggregator attr: {self.core_aggregator_spec} \n" + ) + + def get_splitter(self) -> Splitter: + return self.splitter + + def get_mapper_spec(self) -> MapperSpec: + return self.core_mapper_spec + + def get_actual_mapper( + self, + cmd_invocation: CommandInvocationWithIOVars, + input_from: IOVar, + output_to: IOVar, + aux_output_tos: List[IOVar], + ) -> Optional[Mapper]: + assert len(aux_output_tos) == self.info_mapper_aggregator + return self.core_mapper_spec.get_mapper( + cmd_invocation, input_from, output_to, aux_output_tos + ) + + def get_aggregator_spec(self) -> AggregatorSpec: + return self.core_aggregator_spec + + def get_actual_aggregator( + self, + cmd_invocation: CommandInvocationWithIOVars, + inputs_from: List[Union[IOVar, ArgStringType]], + # ArgStringType needed for typing, only IOVar provided + output_to: IOVar, + ) -> Optional[Aggregator]: + return self.core_aggregator_spec.get_aggregator( + cmd_invocation, inputs_from, output_to + ) + + def get_actual_2_ary_aggregator_with_aux( + self, + fst_normal_input: FileNameOrStdDescriptor, + fst_aux_inputs_from: List[FileNameOrStdDescriptor], + snd_normal_input: FileNameOrStdDescriptor, + snd_aux_inputs_from: List[FileNameOrStdDescriptor], + output_to: FileNameOrStdDescriptor, + aux_outputs_to: List[FileNameOrStdDescriptor], + ): + return self.core_aggregator_spec.get_actual_2_ary_aggregator_with_aux( + fst_normal_input, + fst_aux_inputs_from, + snd_normal_input, + snd_aux_inputs_from, + output_to, + aux_outputs_to, + ) + + def get_info_mapper_aggregator(self) -> int: + return self.info_mapper_aggregator + + def are_all_parts_implemented(self): + return ( + self.core_mapper_spec.is_implemented + and self.core_aggregator_spec.is_implemented + ) + + +def make_parallelizer_indiv_files( + mapper_spec: Optional[MapperSpec] = None, + aggregator_spec: Optional[AggregatorSpec] = None, + info_splitter_mapper: Optional[AdditionalInfoSplitterToMapper] = None, + info_mapper_aggregator: int = 0, +) -> Parallelizer: + mapper_spec = return_mapper_spec_seq_if_none_else_itself(mapper_spec) + aggregator_spec = return_aggregator_conc_if_none_else_itself(aggregator_spec) + return Parallelizer( + make_splitter_indiv_files(), + mapper_spec, + aggregator_spec, + info_splitter_mapper, + info_mapper_aggregator, + ) + + +def make_parallelizer_round_robin( + mapper_spec: Optional[MapperSpec] = None, + aggregator_spec: Optional[AggregatorSpec] = None, + info_splitter_mapper: Optional[AdditionalInfoSplitterToMapper] = None, + info_mapper_aggregator: int = 0, +) -> Parallelizer: + mapper_spec = return_mapper_spec_seq_if_none_else_itself(mapper_spec) + aggregator_spec = return_aggregator_conc_if_none_else_itself(aggregator_spec) + return Parallelizer( + make_splitter_round_robin(), + mapper_spec, + aggregator_spec, + info_splitter_mapper, + info_mapper_aggregator, + ) + + +def make_parallelizer_round_robin_with_unwrap_from_other(parallelizer): + new_parallelizer = deepcopy(parallelizer) + new_parallelizer.splitter = make_splitter_round_robin_with_unwrap() + return new_parallelizer + + +def make_parallelizer_consec_chunks( + mapper_spec: Optional[MapperSpec] = None, + aggregator_spec: Optional[AggregatorSpec] = None, + info_splitter_mapper: Optional[AdditionalInfoSplitterToMapper] = None, + info_mapper_aggregator: int = 0, +) -> Parallelizer: + mapper_spec = return_mapper_spec_seq_if_none_else_itself(mapper_spec) + aggregator_spec = return_aggregator_conc_if_none_else_itself(aggregator_spec) + return Parallelizer( + make_splitter_consec_chunks(), + mapper_spec, + aggregator_spec, + info_splitter_mapper, + info_mapper_aggregator, + ) diff --git a/pash_annotations/annotation_generation/datatypes/parallelizability/Splitter.py b/pash_annotations/annotation_generation/datatypes/parallelizability/splitter.py similarity index 97% rename from pash_annotations/annotation_generation/datatypes/parallelizability/Splitter.py rename to pash_annotations/annotation_generation/datatypes/parallelizability/splitter.py index 6d5930d..7194c0a 100644 --- a/pash_annotations/annotation_generation/datatypes/parallelizability/Splitter.py +++ b/pash_annotations/annotation_generation/datatypes/parallelizability/splitter.py @@ -9,7 +9,6 @@ class SplitterKindEnum(Enum): class Splitter: - def __init__(self, kind: SplitterKindEnum) -> None: self.kind = kind @@ -17,7 +16,7 @@ def __eq__(self, other) -> bool: return self.kind == other.kind def __repr__(self) -> str: - return f'{self.kind}' + return f"{self.kind}" def is_splitter_round_robin(self) -> bool: return self.kind == SplitterKindEnum.ROUND_ROBIN_PLAIN @@ -28,16 +27,19 @@ def is_splitter_round_robin_with_unwrap_flag(self) -> bool: def is_splitter_consec_chunks(self) -> bool: return self.kind == SplitterKindEnum.CONSEC_CHUNKS + # currently not used def make_splitter_indiv_files() -> Splitter: return Splitter(SplitterKindEnum.INDIV_FILES) + def make_splitter_round_robin() -> Splitter: return Splitter(SplitterKindEnum.ROUND_ROBIN_PLAIN) + def make_splitter_round_robin_with_unwrap() -> Splitter: return Splitter(SplitterKindEnum.ROUND_ROBIN_UNWRAP_FLAG) + def make_splitter_consec_chunks() -> Splitter: return Splitter(SplitterKindEnum.CONSEC_CHUNKS) - diff --git a/pash_annotations/annotation_generation/datatypes/parallelizability/TransformerFlagOptionList.py b/pash_annotations/annotation_generation/datatypes/parallelizability/transformer_flag_option_list.py similarity index 52% rename from pash_annotations/annotation_generation/datatypes/parallelizability/TransformerFlagOptionList.py rename to pash_annotations/annotation_generation/datatypes/parallelizability/transformer_flag_option_list.py index 89f7d89..6226ff7 100644 --- a/pash_annotations/annotation_generation/datatypes/parallelizability/TransformerFlagOptionList.py +++ b/pash_annotations/annotation_generation/datatypes/parallelizability/transformer_flag_option_list.py @@ -1,9 +1,9 @@ from typing import Optional, List, Union -from pash_annotations.datatypes.BasicDatatypes import Flag from abc import ABC, abstractmethod -from pash_annotations.datatypes.BasicDatatypesWithIOVar import OptionWithIOVar +from pash_annotations.datatypes.basic_datatypes import Flag +from pash_annotations.datatypes.basic_datatypes_with_iovar import OptionWithIOVar from pash_annotations.util_standard import standard_repr, standard_eq from pash_annotations.util import foldl @@ -19,7 +19,6 @@ class TransformerFlagOptionList(ABC): - def __eq__(self, other) -> bool: return standard_eq(self, other) @@ -27,113 +26,137 @@ def __repr__(self) -> str: return standard_repr(self) @abstractmethod - def get_flag_option_list_after_transformer_application(self, - original_flag_option_list: List[Union[Flag, OptionWithIOVar]]) \ - -> List[Union[Flag, OptionWithIOVar]]: + def get_flag_option_list_after_transformer_application( + self, original_flag_option_list: List[Union[Flag, OptionWithIOVar]] + ) -> List[Union[Flag, OptionWithIOVar]]: pass -def return_transformer_flagoption_list_empty_if_none_else_itself(arg: Optional[TransformerFlagOptionList]) \ - -> TransformerFlagOptionList: + +def return_transformer_flagoption_list_empty_if_none_else_itself( + arg: Optional[TransformerFlagOptionList], +) -> TransformerFlagOptionList: if arg is None: return make_transformer_empty() else: return arg -def return_transformer_flagoption_list_same_as_seq_if_none_else_itself(arg: Optional[TransformerFlagOptionList]) \ - -> TransformerFlagOptionList: + +def return_transformer_flagoption_list_same_as_seq_if_none_else_itself( + arg: Optional[TransformerFlagOptionList], +) -> TransformerFlagOptionList: if arg is None: return make_transformer_same_as_seq() else: return arg -def apply_individual_transformer_flagoption_list(transformer: TransformerFlagOptionList, - current_list: List[Union[Flag, OptionWithIOVar]] - ) -> List[Union[Flag, OptionWithIOVar]]: + +def apply_individual_transformer_flagoption_list( + transformer: TransformerFlagOptionList, + current_list: List[Union[Flag, OptionWithIOVar]], +) -> List[Union[Flag, OptionWithIOVar]]: return transformer.get_flag_option_list_after_transformer_application(current_list) + # TODO: retrieve access info for option arguments from man-page-file -class TransformerFlagOptionListSeq(TransformerFlagOptionList): +class TransformerFlagOptionListSeq(TransformerFlagOptionList): def __init__(self) -> None: pass - def get_flag_option_list_after_transformer_application(self, - original_flag_option_list: List[Union[Flag, OptionWithIOVar]]) \ - -> List[Union[Flag, OptionWithIOVar]]: + def get_flag_option_list_after_transformer_application( + self, original_flag_option_list: List[Union[Flag, OptionWithIOVar]] + ) -> List[Union[Flag, OptionWithIOVar]]: return original_flag_option_list -class TransformerFlagOptionListAdd(TransformerFlagOptionList): +class TransformerFlagOptionListAdd(TransformerFlagOptionList): def __init__(self, list_to_add: List[Union[Flag, OptionWithIOVar]]) -> None: self.list_to_add: List[Union[Flag, OptionWithIOVar]] = list_to_add - def get_flag_option_list_after_transformer_application(self, - original_flag_option_list: List[Union[Flag, OptionWithIOVar]]) \ - -> List[Union[Flag, OptionWithIOVar]]: - list_of_flagoptions_without_the_ones_in_original_one = [flagoption - for flagoption in self.list_to_add - if flagoption not in original_flag_option_list] - return original_flag_option_list + list_of_flagoptions_without_the_ones_in_original_one + def get_flag_option_list_after_transformer_application( + self, original_flag_option_list: List[Union[Flag, OptionWithIOVar]] + ) -> List[Union[Flag, OptionWithIOVar]]: + list_of_flagoptions_without_the_ones_in_original_one = [ + flagoption + for flagoption in self.list_to_add + if flagoption not in original_flag_option_list + ] + return ( + original_flag_option_list + + list_of_flagoptions_without_the_ones_in_original_one + ) -class TransformerFlagOptionListRemove(TransformerFlagOptionList): +class TransformerFlagOptionListRemove(TransformerFlagOptionList): def __init__(self, list_to_remove: List[str]) -> None: self.list_to_remove = list_to_remove - def get_flag_option_list_after_transformer_application(self, - original_flag_option_list: List[Union[Flag, OptionWithIOVar]]) \ - -> List[Union[Flag, OptionWithIOVar]]: - return [flagoption for flagoption in original_flag_option_list - if flagoption.get_name() not in self.list_to_remove] + def get_flag_option_list_after_transformer_application( + self, original_flag_option_list: List[Union[Flag, OptionWithIOVar]] + ) -> List[Union[Flag, OptionWithIOVar]]: + return [ + flagoption + for flagoption in original_flag_option_list + if flagoption.get_name() not in self.list_to_remove + ] -class TransformerFlagOptionListFilter(TransformerFlagOptionList): +class TransformerFlagOptionListFilter(TransformerFlagOptionList): def __init__(self, list_filter: List[str]) -> None: self.list_filter = list_filter - def get_flag_option_list_after_transformer_application(self, - original_flag_option_list: List[Union[Flag, OptionWithIOVar]]) \ - -> List[Union[Flag, OptionWithIOVar]]: - return [flagoption for flagoption in original_flag_option_list - if flagoption.get_name() in self.list_filter] + def get_flag_option_list_after_transformer_application( + self, original_flag_option_list: List[Union[Flag, OptionWithIOVar]] + ) -> List[Union[Flag, OptionWithIOVar]]: + return [ + flagoption + for flagoption in original_flag_option_list + if flagoption.get_name() in self.list_filter + ] -class TransformerFlagOptionListEmpty(TransformerFlagOptionList): +class TransformerFlagOptionListEmpty(TransformerFlagOptionList): def __init__(self) -> None: pass - def get_flag_option_list_after_transformer_application(self, - original_flag_option_list: List[Union[Flag, OptionWithIOVar]]) \ - -> List[Union[Flag, OptionWithIOVar]]: + def get_flag_option_list_after_transformer_application( + self, original_flag_option_list: List[Union[Flag, OptionWithIOVar]] + ) -> List[Union[Flag, OptionWithIOVar]]: return [] -class TransformerFlagOptionListCustom(TransformerFlagOptionList): +class TransformerFlagOptionListCustom(TransformerFlagOptionList): def __init__(self, list_custom: List[Union[Flag, OptionWithIOVar]]) -> None: self.list_custom = list_custom - def get_flag_option_list_after_transformer_application(self, - original_flag_option_list: List[Union[Flag, OptionWithIOVar]]) \ - -> List[Union[Flag, OptionWithIOVar]]: + def get_flag_option_list_after_transformer_application( + self, original_flag_option_list: List[Union[Flag, OptionWithIOVar]] + ) -> List[Union[Flag, OptionWithIOVar]]: return self.list_custom + ## factory methods to hide details for API def make_transformer_same_as_seq() -> TransformerFlagOptionList: return TransformerFlagOptionListSeq() + def make_transformer_add(list_to_add) -> TransformerFlagOptionList: return TransformerFlagOptionListAdd(list_to_add) + def make_transformer_remove(list_to_remove) -> TransformerFlagOptionList: return TransformerFlagOptionListRemove(list_to_remove) + def make_transformer_empty() -> TransformerFlagOptionList: return TransformerFlagOptionListEmpty() + def make_transformer_filter(list_to_filter) -> TransformerFlagOptionList: return TransformerFlagOptionListFilter(list_to_filter) + def make_transformer_custom(list_custom) -> TransformerFlagOptionList: return TransformerFlagOptionListCustom(list_custom) @@ -141,12 +164,14 @@ def make_transformer_custom(list_custom) -> TransformerFlagOptionList: # a class which allows to chain multiple Transformers # implements the same method as the others for uniformity class ChainTransformerFlagOptionList(TransformerFlagOptionList): - def __init__(self, list_transformers) -> None: self.list_transformers = list_transformers - def get_flag_option_list_after_transformer_application(self, - original_flag_option_list: List[Union[Flag, OptionWithIOVar]] - ) -> List[Union[Flag, OptionWithIOVar]]: - return foldl(apply_individual_transformer_flagoption_list, original_flag_option_list, self.list_transformers) - + def get_flag_option_list_after_transformer_application( + self, original_flag_option_list: List[Union[Flag, OptionWithIOVar]] + ) -> List[Union[Flag, OptionWithIOVar]]: + return foldl( + apply_individual_transformer_flagoption_list, + original_flag_option_list, + self.list_transformers, + ) diff --git a/pash_annotations/annotation_generation/datatypes/ParallelizabilityInfo.py b/pash_annotations/annotation_generation/datatypes/parallelizability_info.py similarity index 56% rename from pash_annotations/annotation_generation/datatypes/ParallelizabilityInfo.py rename to pash_annotations/annotation_generation/datatypes/parallelizability_info.py index c329b81..bb2459b 100644 --- a/pash_annotations/annotation_generation/datatypes/ParallelizabilityInfo.py +++ b/pash_annotations/annotation_generation/datatypes/parallelizability_info.py @@ -1,20 +1,28 @@ -from pash_annotations.util_standard import standard_repr from typing import List, Optional, Tuple -from pash_annotations.annotation_generation.datatypes.parallelizability.Parallelizer import Parallelizer, \ - make_parallelizer_round_robin_with_unwrap_from_other +from pash_annotations.annotation_generation.datatypes.parallelizability.parallelizer import ( + Parallelizer, + make_parallelizer_round_robin_with_unwrap_from_other, +) + +from pash_annotations.util_standard import standard_repr from pash_annotations.util import return_empty_list_if_none_else_itself -class ParallelizabilityInfo: - def __init__(self, - parallelizer_list: Optional[List[Parallelizer]] = None, # None translates to empty list - # TODO: remove RR_comp_with_cat - round_robin_compatible_with_cat: bool = False, - is_commutative: bool = False - ) -> None: +class ParallelizabilityInfo: + def __init__( + self, + parallelizer_list: Optional[ + List[Parallelizer] + ] = None, # None translates to empty list + # TODO: remove RR_comp_with_cat + round_robin_compatible_with_cat: bool = False, + is_commutative: bool = False, + ) -> None: # TODO: remove direct uses of parallelizer list and use getter function which infers additional ones - self.parallelizer_list = return_empty_list_if_none_else_itself(parallelizer_list) + self.parallelizer_list = return_empty_list_if_none_else_itself( + parallelizer_list + ) self.round_robin_compatible_with_cat = round_robin_compatible_with_cat self.is_commutative = is_commutative @@ -31,9 +39,18 @@ def get_inferred_parallelizer_list(self): # add round robin with unwrap parallelizer for commutative command parallelizer_list = self.parallelizer_list for parallelizer in self.parallelizer_list: - if self.is_commutative and parallelizer.splitter.is_splitter_consec_chunks(): - parallelizer_list.append(make_parallelizer_round_robin_with_unwrap_from_other(parallelizer)) + if ( + self.is_commutative + and parallelizer.splitter.is_splitter_consec_chunks() + ): + parallelizer_list.append( + make_parallelizer_round_robin_with_unwrap_from_other(parallelizer) + ) return parallelizer_list def unpack_info(self) -> Tuple[List[Parallelizer], bool, bool]: - return self.get_inferred_parallelizer_list(), self.round_robin_compatible_with_cat, self.is_commutative + return ( + self.get_inferred_parallelizer_list(), + self.round_robin_compatible_with_cat, + self.is_commutative, + ) diff --git a/pash_annotations/annotation_generation/tests/test_ann_gen_cat.py b/pash_annotations/annotation_generation/tests/test_ann_gen_cat.py index 1c506c9..ff71c20 100644 --- a/pash_annotations/annotation_generation/tests/test_ann_gen_cat.py +++ b/pash_annotations/annotation_generation/tests/test_ann_gen_cat.py @@ -1,121 +1,188 @@ from typing import List, Optional from pash_annotations.util_flag_option import make_arg_simple -from pash_annotations.datatypes.BasicDatatypes import FlagOption, Operand -from pash_annotations.datatypes.BasicDatatypesWithIO import make_stdout_with_access_output -from pash_annotations.datatypes.CommandInvocationInitial import CommandInvocationInitial -from pash_annotations.datatypes.CommandInvocationWithIO import CommandInvocationWithIO -from pash_annotations.datatypes.CommandInvocationPrefix import CommandInvocationPrefix -from pash_annotations.annotation_generation.datatypes.InputOutputInfo import InputOutputInfo -from pash_annotations.annotation_generation.datatypes.ParallelizabilityInfo import ParallelizabilityInfo - -import pash_annotations.annotation_generation.AnnotationGeneration as AnnotationGeneration +from pash_annotations.datatypes.basic_datatypes import FlagOption, Operand +from pash_annotations.datatypes.basic_datatypes_with_io import ( + make_stdout_with_access_output, +) +from pash_annotations.datatypes.command_invocation_initial import ( + CommandInvocationInitial, +) +from pash_annotations.datatypes.command_invocation_with_io import ( + CommandInvocationWithIO, +) +from pash_annotations.datatypes.command_invocation_prefix import CommandInvocationPrefix +from pash_annotations.annotation_generation.datatypes.input_output_info import ( + InputOutputInfo, +) +from pash_annotations.annotation_generation.datatypes.parallelizability_info import ( + ParallelizabilityInfo, +) + +from pash_annotations.annotation_generation.annotation_generation import AnnotationGenerator cmd_name = "cat" + def test_cat_1() -> None: args: List[FlagOption] = [make_arg_simple(["-b"]), make_arg_simple(["-e"])] - operands: List[Operand] = [Operand("in1.txt"), - Operand("in2.txt")] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) + operands: List[Operand] = [Operand("in1.txt"), Operand("in2.txt")] + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 2 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 0 assert cmd_inv_with_io.implicit_use_of_streaming_input is None - assert cmd_inv_with_io.implicit_use_of_streaming_output == make_stdout_with_access_output() + assert ( + cmd_inv_with_io.implicit_use_of_streaming_output + == make_stdout_with_access_output() + ) # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) assert para_info is None + def test_cat_2() -> None: args = [] - operands = [Operand("in1.txt"), - Operand("-"), - Operand("in2.txt")] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) - cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix(cmd_inv.cmd_name, cmd_inv.flag_option_list, []) + operands = [Operand("in1.txt"), Operand("-"), Operand("in2.txt")] + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) + cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix( + cmd_inv.cmd_name, cmd_inv.flag_option_list, [] + ) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 3 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 0 assert cmd_inv_with_io.implicit_use_of_streaming_input is None - assert cmd_inv_with_io.implicit_use_of_streaming_output == make_stdout_with_access_output() + assert ( + cmd_inv_with_io.implicit_use_of_streaming_output + == make_stdout_with_access_output() + ) # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) assert para_info is None def test_cat_3() -> None: args = [make_arg_simple(["-n"])] - operands = [Operand("in1.txt"), - Operand("in2.txt")] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) - cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix(cmd_inv.cmd_name, cmd_inv.flag_option_list, []) + operands = [Operand("in1.txt"), Operand("in2.txt")] + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) + cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix( + cmd_inv.cmd_name, cmd_inv.flag_option_list, [] + ) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 2 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 0 assert cmd_inv_with_io.implicit_use_of_streaming_input is None - assert cmd_inv_with_io.implicit_use_of_streaming_output == make_stdout_with_access_output() + assert ( + cmd_inv_with_io.implicit_use_of_streaming_output + == make_stdout_with_access_output() + ) # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) assert para_info is None def test_cat_4() -> None: args = [make_arg_simple(["-n"]), make_arg_simple(["-s"])] - operands = [Operand("in1.txt"), - Operand("in2.txt")] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) + operands = [Operand("in1.txt"), Operand("in2.txt")] + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) # cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix(cmd_inv.cmd_name, cmd_inv.flag_option_list, []) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 2 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 0 assert cmd_inv_with_io.implicit_use_of_streaming_input is None - assert cmd_inv_with_io.implicit_use_of_streaming_output == make_stdout_with_access_output() + assert ( + cmd_inv_with_io.implicit_use_of_streaming_output + == make_stdout_with_access_output() + ) # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) assert para_info is None def test_cat_5() -> None: args = [make_arg_simple(["-s"])] - operands = [Operand("in1.txt"), - Operand("in2.txt")] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) - cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix(cmd_inv.cmd_name, cmd_inv.flag_option_list, []) + operands = [Operand("in1.txt"), Operand("in2.txt")] + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) + cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix( + cmd_inv.cmd_name, cmd_inv.flag_option_list, [] + ) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 2 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 0 assert cmd_inv_with_io.implicit_use_of_streaming_input is None - assert cmd_inv_with_io.implicit_use_of_streaming_output == make_stdout_with_access_output() + assert ( + cmd_inv_with_io.implicit_use_of_streaming_output + == make_stdout_with_access_output() + ) # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) assert para_info is None diff --git a/pash_annotations/annotation_generation/tests/test_ann_gen_comm.py b/pash_annotations/annotation_generation/tests/test_ann_gen_comm.py index 476f3c8..409b73c 100644 --- a/pash_annotations/annotation_generation/tests/test_ann_gen_comm.py +++ b/pash_annotations/annotation_generation/tests/test_ann_gen_comm.py @@ -1,47 +1,68 @@ from typing import Optional from pash_annotations.util_flag_option import make_arg_simple -from pash_annotations.datatypes.BasicDatatypes import Operand -from pash_annotations.datatypes.BasicDatatypesWithIO import make_stdout_with_access_output -from pash_annotations.datatypes.CommandInvocationInitial import CommandInvocationInitial -from pash_annotations.datatypes.CommandInvocationWithIO import CommandInvocationWithIO -from pash_annotations.annotation_generation.datatypes.InputOutputInfo import InputOutputInfo -from pash_annotations.annotation_generation.datatypes.ParallelizabilityInfo import ParallelizabilityInfo - -import pash_annotations.annotation_generation.AnnotationGeneration as AnnotationGeneration +from pash_annotations.datatypes.basic_datatypes import Operand +from pash_annotations.datatypes.basic_datatypes_with_io import ( + make_stdout_with_access_output, +) +from pash_annotations.datatypes.command_invocation_initial import ( + CommandInvocationInitial, +) +from pash_annotations.datatypes.command_invocation_with_io import ( + CommandInvocationWithIO, +) +from pash_annotations.annotation_generation.datatypes.input_output_info import ( + InputOutputInfo, +) +from pash_annotations.annotation_generation.datatypes.parallelizability_info import ( + ParallelizabilityInfo, +) +from pash_annotations.annotation_generation.annotation_generation import AnnotationGenerator cmd_name = "comm" def test_comm_1() -> None: args = [make_arg_simple(["-1"]), make_arg_simple(["-2"])] - operands = [Operand("tocomm1.txt"), - Operand("tocomm2.txt")] + operands = [Operand("tocomm1.txt"), Operand("tocomm2.txt")] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 2 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 0 assert cmd_inv_with_io.implicit_use_of_streaming_input is None - assert cmd_inv_with_io.implicit_use_of_streaming_output == make_stdout_with_access_output() + assert ( + cmd_inv_with_io.implicit_use_of_streaming_output + == make_stdout_with_access_output() + ) # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) assert para_info is None def test_comm_2() -> None: args = [] # illegal to have more than 2 files to compare - operands = [Operand("tocomm1.txt"), - Operand("tocomm2.txt"), - Operand("tocomm3.txt")] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) + operands = [Operand("tocomm1.txt"), Operand("tocomm2.txt"), Operand("tocomm3.txt")] + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is None diff --git a/pash_annotations/annotation_generation/tests/test_ann_gen_cut.py b/pash_annotations/annotation_generation/tests/test_ann_gen_cut.py index 78cb7d8..7a3611f 100644 --- a/pash_annotations/annotation_generation/tests/test_ann_gen_cut.py +++ b/pash_annotations/annotation_generation/tests/test_ann_gen_cut.py @@ -1,22 +1,40 @@ from typing import List, Optional from pash_annotations.util_flag_option import make_arg_simple -from pash_annotations.datatypes.BasicDatatypes import FlagOption, Operand -from pash_annotations.datatypes.BasicDatatypesWithIO import \ - make_stdout_with_access_output, make_stdin_with_access_stream_input -from pash_annotations.datatypes.CommandInvocationInitial import CommandInvocationInitial -from pash_annotations.datatypes.CommandInvocationWithIO import CommandInvocationWithIO -from pash_annotations.datatypes.CommandInvocationPrefix import CommandInvocationPrefix -from pash_annotations.annotation_generation.datatypes.InputOutputInfo import InputOutputInfo -from pash_annotations.annotation_generation.datatypes.ParallelizabilityInfo import ParallelizabilityInfo -from pash_annotations.annotation_generation.datatypes.parallelizability.Parallelizer import Parallelizer -from pash_annotations.annotation_generation.datatypes.parallelizability.Splitter import make_splitter_round_robin, \ - make_splitter_indiv_files, make_splitter_consec_chunks -from pash_annotations.annotation_generation.datatypes.parallelizability.MapperSpec import make_mapper_spec_seq -from pash_annotations.annotation_generation.datatypes.parallelizability.AggregatorSpec import AggregatorSpec, \ - make_aggregator_spec_concatenate +from pash_annotations.datatypes.basic_datatypes import FlagOption, Operand +from pash_annotations.datatypes.basic_datatypes_with_io import ( + make_stdout_with_access_output, + make_stdin_with_access_stream_input, +) +from pash_annotations.datatypes.command_invocation_initial import ( + CommandInvocationInitial, +) +from pash_annotations.datatypes.command_invocation_with_io import ( + CommandInvocationWithIO, +) +from pash_annotations.datatypes.command_invocation_prefix import CommandInvocationPrefix +from pash_annotations.annotation_generation.datatypes.input_output_info import ( + InputOutputInfo, +) +from pash_annotations.annotation_generation.datatypes.parallelizability_info import ( + ParallelizabilityInfo, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.parallelizer import ( + Parallelizer, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.splitter import ( + make_splitter_round_robin, + make_splitter_consec_chunks, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.mapper_spec import ( + make_mapper_spec_seq, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.aggregator_spec import ( + AggregatorSpec, + make_aggregator_spec_concatenate, +) -import pash_annotations.annotation_generation.AnnotationGeneration as AnnotationGeneration +from pash_annotations.annotation_generation.annotation_generation import AnnotationGenerator cmd_name = "cut" @@ -25,23 +43,35 @@ def test_cut_1() -> None: args: List[FlagOption] = [] - operands: List[Operand] = [Operand("in1.txt"), - Operand("in2.txt")] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) - cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix(cmd_inv.cmd_name, cmd_inv.flag_option_list, []) + operands: List[Operand] = [Operand("in1.txt"), Operand("in2.txt")] + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) + cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix( + cmd_inv.cmd_name, cmd_inv.flag_option_list, [] + ) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 2 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 0 assert cmd_inv_with_io.implicit_use_of_streaming_input is None - assert cmd_inv_with_io.implicit_use_of_streaming_output == make_stdout_with_access_output() + assert ( + cmd_inv_with_io.implicit_use_of_streaming_output + == make_stdout_with_access_output() + ) # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) assert para_info is not None and len(para_info.parallelizer_list) == 2 parallelizer1: Parallelizer = para_info.parallelizer_list[0] parallelizer2: Parallelizer = para_info.parallelizer_list[1] @@ -59,21 +89,37 @@ def test_cut_1() -> None: def test_cut_2() -> None: args = [make_arg_simple(["-z"])] operands = [] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) - cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix(cmd_inv.cmd_name, cmd_inv.flag_option_list, []) + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) + cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix( + cmd_inv.cmd_name, cmd_inv.flag_option_list, [] + ) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 0 - assert cmd_inv_with_io.implicit_use_of_streaming_input == make_stdin_with_access_stream_input() - assert cmd_inv_with_io.implicit_use_of_streaming_output == make_stdout_with_access_output() + assert ( + cmd_inv_with_io.implicit_use_of_streaming_input + == make_stdin_with_access_stream_input() + ) + assert ( + cmd_inv_with_io.implicit_use_of_streaming_output + == make_stdout_with_access_output() + ) # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) assert para_info is not None and len(para_info.parallelizer_list) == 2 parallelizer1: Parallelizer = para_info.parallelizer_list[0] parallelizer2: Parallelizer = para_info.parallelizer_list[1] diff --git a/pash_annotations/annotation_generation/tests/test_ann_gen_grep.py b/pash_annotations/annotation_generation/tests/test_ann_gen_grep.py index 05d4bbe..be697c9 100644 --- a/pash_annotations/annotation_generation/tests/test_ann_gen_grep.py +++ b/pash_annotations/annotation_generation/tests/test_ann_gen_grep.py @@ -1,45 +1,81 @@ -from pash_annotations.util_flag_option import make_arg_simple from typing import List, Optional -from pash_annotations.datatypes.BasicDatatypes import FlagOption, ArgStringType, Operand, FileName -from pash_annotations.datatypes.BasicDatatypesWithIO import make_stdout_with_access_output -from pash_annotations.datatypes.CommandInvocationInitial import CommandInvocationInitial -from pash_annotations.datatypes.CommandInvocationWithIO import CommandInvocationWithIO -from pash_annotations.datatypes.CommandInvocationPrefix import CommandInvocationPrefix -from pash_annotations.annotation_generation.datatypes.InputOutputInfo import InputOutputInfo -from pash_annotations.annotation_generation.datatypes.ParallelizabilityInfo import ParallelizabilityInfo - -from pash_annotations.annotation_generation.datatypes.parallelizability.Parallelizer import Parallelizer, AdditionalInfoSplitterToMapper -from pash_annotations.annotation_generation.datatypes.parallelizability.Splitter import \ - make_splitter_round_robin, make_splitter_indiv_files, make_splitter_consec_chunks -from pash_annotations.annotation_generation.datatypes.parallelizability.MapperSpec import \ - make_mapper_spec_custom, make_mapper_spec_seq -from pash_annotations.annotation_generation.datatypes.parallelizability.AggregatorSpec import \ - make_aggregator_spec_concatenate, make_aggregator_spec_custom_2_ary_from_string_representation - -import pash_annotations.annotation_generation.AnnotationGeneration as AnnotationGeneration + +from pash_annotations.util_flag_option import make_arg_simple +from pash_annotations.datatypes.basic_datatypes import ( + FlagOption, + Operand, + FileName, + ArgStringType, +) +from pash_annotations.datatypes.basic_datatypes_with_io import ( + make_stdout_with_access_output, +) +from pash_annotations.datatypes.command_invocation_initial import ( + CommandInvocationInitial, +) +from pash_annotations.datatypes.command_invocation_with_io import ( + CommandInvocationWithIO, +) +from pash_annotations.datatypes.command_invocation_prefix import CommandInvocationPrefix +from pash_annotations.annotation_generation.datatypes.input_output_info import ( + InputOutputInfo, +) +from pash_annotations.annotation_generation.datatypes.parallelizability_info import ( + ParallelizabilityInfo, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.parallelizer import ( + Parallelizer, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.splitter import ( + make_splitter_round_robin, + make_splitter_consec_chunks, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.mapper_spec import ( + make_mapper_spec_seq, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.aggregator_spec import ( + make_aggregator_spec_concatenate, +) + +from pash_annotations.annotation_generation.annotation_generation import AnnotationGenerator cmd_name = "grep" def test_grep_1() -> None: - args: List[FlagOption] = [make_arg_simple(["-L"]), make_arg_simple(["-f", FileName("dict.txt")])] - operands: List[Operand] = [Operand("in1.txt"), - Operand("in2.txt")] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) - cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix(cmd_inv.cmd_name, cmd_inv.flag_option_list, []) + args: List[FlagOption] = [ + make_arg_simple(["-L"]), + make_arg_simple(["-f", FileName("dict.txt")]), + ] + operands: List[Operand] = [Operand("in1.txt"), Operand("in2.txt")] + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) + cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix( + cmd_inv.cmd_name, cmd_inv.flag_option_list, [] + ) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 2 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 0 assert cmd_inv_with_io.implicit_use_of_streaming_input is None - assert cmd_inv_with_io.implicit_use_of_streaming_output == make_stdout_with_access_output() + assert ( + cmd_inv_with_io.implicit_use_of_streaming_output + == make_stdout_with_access_output() + ) # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) assert para_info is not None and len(para_info.parallelizer_list) == 0 # currently not implemented and thus not added to parallelizer_list # parallelizer1: Parallelizer = para_info.parallelizer_list[0] @@ -58,26 +94,40 @@ def test_grep_1() -> None: def test_grep_2() -> None: - args: List[FlagOption] = [make_arg_simple(["-f", FileName("dict.txt")]), - make_arg_simple(["-e", "*"]), - make_arg_simple(["-b"])] - operands: List[Operand] = [Operand("in1.txt"), - Operand("in2.txt")] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) - cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix(cmd_inv.cmd_name, cmd_inv.flag_option_list, []) + args: List[FlagOption] = [ + make_arg_simple(["-f", FileName("dict.txt")]), + make_arg_simple(["-e", "*"]), + make_arg_simple(["-b"]), + ] + operands: List[Operand] = [Operand("in1.txt"), Operand("in2.txt")] + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) + cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix( + cmd_inv.cmd_name, cmd_inv.flag_option_list, [] + ) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 2 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 0 assert cmd_inv_with_io.implicit_use_of_streaming_input is None - assert cmd_inv_with_io.implicit_use_of_streaming_output == make_stdout_with_access_output() + assert ( + cmd_inv_with_io.implicit_use_of_streaming_output + == make_stdout_with_access_output() + ) # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) assert para_info is not None and len(para_info.parallelizer_list) == 0 # currently not implemented and thus not added to parallelizer_list # parallelizer1: Parallelizer = para_info.parallelizer_list[0] @@ -96,26 +146,40 @@ def test_grep_2() -> None: def test_grep_3() -> None: - args = [make_arg_simple(["-f", FileName("dict.txt")]), - make_arg_simple(["-e", ArgStringType("*")]), - make_arg_simple(["-f", FileName("dict2.txt")])] - operands = [Operand("in1.txt"), - Operand("-")] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) - cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix(cmd_inv.cmd_name, cmd_inv.flag_option_list, []) + args = [ + make_arg_simple(["-f", FileName("dict.txt")]), + make_arg_simple(["-e", ArgStringType("*")]), + make_arg_simple(["-f", FileName("dict2.txt")]), + ] + operands = [Operand("in1.txt"), Operand("-")] + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) + cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix( + cmd_inv.cmd_name, cmd_inv.flag_option_list, [] + ) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 2 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 0 assert cmd_inv_with_io.implicit_use_of_streaming_input is None - assert cmd_inv_with_io.implicit_use_of_streaming_output == make_stdout_with_access_output() + assert ( + cmd_inv_with_io.implicit_use_of_streaming_output + == make_stdout_with_access_output() + ) # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) assert para_info is not None and len(para_info.parallelizer_list) == 2 parallelizer1: Parallelizer = para_info.parallelizer_list[0] parallelizer2: Parallelizer = para_info.parallelizer_list[1] @@ -129,29 +193,42 @@ def test_grep_3() -> None: def test_grep_4() -> None: - args = [make_arg_simple(["-f", FileName("dict.txt")]), - make_arg_simple(["-e", ArgStringType("*")]), - make_arg_simple(["-f", FileName("dict2.txt")]), - make_arg_simple(["-n"]), - make_arg_simple(["-b"])] - operands = [Operand("in1.txt"), - Operand("in2.txt"), - Operand("in3.txt")] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) - cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix(cmd_inv.cmd_name, cmd_inv.flag_option_list, []) + args = [ + make_arg_simple(["-f", FileName("dict.txt")]), + make_arg_simple(["-e", ArgStringType("*")]), + make_arg_simple(["-f", FileName("dict2.txt")]), + make_arg_simple(["-n"]), + make_arg_simple(["-b"]), + ] + operands = [Operand("in1.txt"), Operand("in2.txt"), Operand("in3.txt")] + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) + cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix( + cmd_inv.cmd_name, cmd_inv.flag_option_list, [] + ) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 3 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 0 assert cmd_inv_with_io.implicit_use_of_streaming_input is None - assert cmd_inv_with_io.implicit_use_of_streaming_output == make_stdout_with_access_output() + assert ( + cmd_inv_with_io.implicit_use_of_streaming_output + == make_stdout_with_access_output() + ) # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) assert para_info is not None and len(para_info.parallelizer_list) == 0 # currently not implemented and thus not added to parallelizer_list # parallelizer1: Parallelizer = para_info.parallelizer_list[0] @@ -170,16 +247,20 @@ def test_grep_4() -> None: def test_grep_5() -> None: args = [make_arg_simple(["-q"]), make_arg_simple(["-s"])] - operands = [Operand("*"), - Operand("in1.txt"), - Operand("in2.txt")] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) + operands = [Operand("*"), Operand("in1.txt"), Operand("in2.txt")] + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) # cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix(cmd_inv.cmd_name, cmd_inv.flag_option_list, []) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 1 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 2 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 0 @@ -187,9 +268,12 @@ def test_grep_5() -> None: assert cmd_inv_with_io.implicit_use_of_streaming_output is None # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) assert para_info is not None and len(para_info.parallelizer_list) == 0 + # test case removed for now (has to do with the case analysis on what to do when no file is given) # def test_grep_6() -> None: # args: List[FlagOption] = [] @@ -198,7 +282,7 @@ def test_grep_5() -> None: # cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix(cmd_inv.cmd_name, cmd_inv.flag_option_list, []) # # # IO Info -# io_info: InputOutputInfo = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) +# io_info: InputOutputInfo = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) # cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) # print(cmd_inv_with_io.operand_list) # assert len(cmd_inv_with_io.get_operands_with_config_input()) == 1 @@ -208,7 +292,7 @@ def test_grep_5() -> None: # assert cmd_inv_with_io.implicit_use_of_streaming_output == make_stdout_with_access_output() # # # Parallelizability Info -# para_info: ParallelizabilityInfo = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) +# para_info: ParallelizabilityInfo = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) # assert len(para_info.parallelizer_list) == 2 # parallelizer1: Parallelizer = para_info.parallelizer_list[0] # parallelizer2: Parallelizer = para_info.parallelizer_list[1] diff --git a/pash_annotations/annotation_generation/tests/test_ann_gen_head.py b/pash_annotations/annotation_generation/tests/test_ann_gen_head.py index 1babe34..a8eb736 100644 --- a/pash_annotations/annotation_generation/tests/test_ann_gen_head.py +++ b/pash_annotations/annotation_generation/tests/test_ann_gen_head.py @@ -1,60 +1,96 @@ -from pash_annotations.util_flag_option import make_arg_simple from typing import List, Optional -from pash_annotations.datatypes.BasicDatatypes import FlagOption, Operand -from pash_annotations.datatypes.BasicDatatypesWithIO import make_stdout_with_access_output -from pash_annotations.datatypes.CommandInvocationInitial import CommandInvocationInitial -from pash_annotations.datatypes.CommandInvocationWithIO import CommandInvocationWithIO -from pash_annotations.datatypes.CommandInvocationPrefix import CommandInvocationPrefix -from pash_annotations.annotation_generation.datatypes.InputOutputInfo import InputOutputInfo -from pash_annotations.annotation_generation.datatypes.ParallelizabilityInfo import ParallelizabilityInfo -import pash_annotations.annotation_generation.AnnotationGeneration as AnnotationGeneration +from pash_annotations.util_flag_option import make_arg_simple +from pash_annotations.datatypes.basic_datatypes import FlagOption, Operand +from pash_annotations.datatypes.basic_datatypes_with_io import ( + make_stdout_with_access_output, +) +from pash_annotations.datatypes.command_invocation_initial import ( + CommandInvocationInitial, +) +from pash_annotations.datatypes.command_invocation_with_io import ( + CommandInvocationWithIO, +) +from pash_annotations.datatypes.command_invocation_prefix import CommandInvocationPrefix +from pash_annotations.annotation_generation.datatypes.input_output_info import ( + InputOutputInfo, +) +from pash_annotations.annotation_generation.datatypes.parallelizability_info import ( + ParallelizabilityInfo, +) + +from pash_annotations.annotation_generation.annotation_generation import AnnotationGenerator + cmd_name = "head" # TODO: individual file splitter needs the header here, where else? + def test_head_1() -> None: args: List[FlagOption] = [make_arg_simple(["-q"])] - operands: List[Operand] = [Operand("in1.txt"), - Operand("in2.txt")] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) - cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix(cmd_inv.cmd_name, cmd_inv.flag_option_list, []) + operands: List[Operand] = [Operand("in1.txt"), Operand("in2.txt")] + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) + cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix( + cmd_inv.cmd_name, cmd_inv.flag_option_list, [] + ) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 2 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 0 assert cmd_inv_with_io.implicit_use_of_streaming_input is None - assert cmd_inv_with_io.implicit_use_of_streaming_output == make_stdout_with_access_output() + assert ( + cmd_inv_with_io.implicit_use_of_streaming_output + == make_stdout_with_access_output() + ) # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) assert para_info is None def test_head_2() -> None: args: List[FlagOption] = [make_arg_simple(["--version"])] - operands: List[Operand] = [Operand("in1.txt"), - Operand("-"), - Operand("in2.txt")] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) - cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix(cmd_inv.cmd_name, cmd_inv.flag_option_list, []) + operands: List[Operand] = [Operand("in1.txt"), Operand("-"), Operand("in2.txt")] + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) + cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix( + cmd_inv.cmd_name, cmd_inv.flag_option_list, [] + ) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 3 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 0 assert cmd_inv_with_io.implicit_use_of_streaming_input is None - assert cmd_inv_with_io.implicit_use_of_streaming_output == make_stdout_with_access_output() + assert ( + cmd_inv_with_io.implicit_use_of_streaming_output + == make_stdout_with_access_output() + ) # assert not io_info.multiple_inputs_possible # changes the result due to headers # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) assert para_info is None diff --git a/pash_annotations/annotation_generation/tests/test_ann_gen_mv.py b/pash_annotations/annotation_generation/tests/test_ann_gen_mv.py index 64bdd03..4182521 100644 --- a/pash_annotations/annotation_generation/tests/test_ann_gen_mv.py +++ b/pash_annotations/annotation_generation/tests/test_ann_gen_mv.py @@ -1,35 +1,41 @@ -from pash_annotations.util_flag_option import make_arg_simple from typing import List, Optional -from pash_annotations.datatypes.BasicDatatypes import FlagOption, FileName, Operand -from pash_annotations.datatypes.BasicDatatypesWithIO import StdDescriptorWithIOInfo -from pash_annotations.datatypes.CommandInvocationInitial import CommandInvocationInitial -from pash_annotations.datatypes.CommandInvocationWithIO import CommandInvocationWithIO -from pash_annotations.datatypes.CommandInvocationPrefix import CommandInvocationPrefix -from pash_annotations.annotation_generation.datatypes.InputOutputInfo import InputOutputInfo -from pash_annotations.annotation_generation.datatypes.ParallelizabilityInfo import ParallelizabilityInfo -from pash_annotations.annotation_generation.datatypes.parallelizability.Parallelizer import Parallelizer, AdditionalInfoSplitterToMapper -from pash_annotations.annotation_generation.datatypes.parallelizability.Mapper import Mapper -from pash_annotations.annotation_generation.datatypes.parallelizability.MapperSpec import MapperSpec -from pash_annotations.annotation_generation.datatypes.parallelizability.Aggregator import Aggregator -from pash_annotations.annotation_generation.datatypes.parallelizability.AggregatorSpec import AggregatorSpec +from pash_annotations.util_flag_option import make_arg_simple +from pash_annotations.datatypes.basic_datatypes import FlagOption, Operand, FileName +from pash_annotations.datatypes.command_invocation_initial import ( + CommandInvocationInitial, +) +from pash_annotations.datatypes.command_invocation_with_io import ( + CommandInvocationWithIO, +) +from pash_annotations.annotation_generation.datatypes.input_output_info import ( + InputOutputInfo, +) +from pash_annotations.annotation_generation.datatypes.parallelizability_info import ( + ParallelizabilityInfo, +) -import pash_annotations.annotation_generation.AnnotationGeneration as AnnotationGeneration +from pash_annotations.annotation_generation.annotation_generation import AnnotationGenerator cmd_name = "mv" def test_mv_1() -> None: args: List[FlagOption] = [make_arg_simple(["-t", FileName("dest")])] - operands: List[Operand] = [Operand("tomove1.txt"), - Operand("tomove2.txt")] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) + operands: List[Operand] = [Operand("tomove1.txt"), Operand("tomove2.txt")] + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) # cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix(cmd_inv.cmd_name, cmd_inv.flag_option_list, []) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_other_input()) == 2 @@ -40,22 +46,32 @@ def test_mv_1() -> None: assert cmd_inv_with_io.implicit_use_of_streaming_output is None # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) assert para_info is None def test_mv_2() -> None: args: List[FlagOption] = [make_arg_simple(["-v"])] - operands: List[Operand] = [Operand("tomove1.txt"), - Operand("tomove2.txt"), - Operand("dest.txt")] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) + operands: List[Operand] = [ + Operand("tomove1.txt"), + Operand("tomove2.txt"), + Operand("dest.txt"), + ] + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) # cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix(cmd_inv.cmd_name, cmd_inv.flag_option_list, []) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_other_input()) == 2 @@ -65,18 +81,28 @@ def test_mv_2() -> None: assert cmd_inv_with_io.implicit_use_of_streaming_output is None # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) assert para_info is None def test_mv_3() -> None: - args: List[FlagOption] = [make_arg_simple(["-t", FileName("dest1.txt")]), - make_arg_simple(["-t", FileName("dest2.txt")])] + args: List[FlagOption] = [ + make_arg_simple(["-t", FileName("dest1.txt")]), + make_arg_simple(["-t", FileName("dest2.txt")]), + ] # illegal to have -t twice - operands: List[Operand] = [Operand("tomove1.txt"), - Operand("tomove2.txt"), - Operand("dest.txt")] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) + operands: List[Operand] = [ + Operand("tomove1.txt"), + Operand("tomove2.txt"), + Operand("dest.txt"), + ] + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is None diff --git a/pash_annotations/annotation_generation/tests/test_ann_gen_sort.py b/pash_annotations/annotation_generation/tests/test_ann_gen_sort.py index f42fae3..9ff44d3 100644 --- a/pash_annotations/annotation_generation/tests/test_ann_gen_sort.py +++ b/pash_annotations/annotation_generation/tests/test_ann_gen_sort.py @@ -1,48 +1,86 @@ from typing import List, Optional from pash_annotations.util_flag_option import make_arg_simple -from pash_annotations.datatypes.BasicDatatypes import Flag, Option, FlagOption, Operand, FileName, ArgStringType -from pash_annotations.datatypes.BasicDatatypesWithIO import make_stdout_with_access_output, make_stdin_with_access_stream_input -from pash_annotations.datatypes.CommandInvocationInitial import CommandInvocationInitial -from pash_annotations.datatypes.CommandInvocationWithIO import CommandInvocationWithIO -from pash_annotations.datatypes.CommandInvocationPrefix import CommandInvocationPrefix -from pash_annotations.annotation_generation.datatypes.InputOutputInfo import InputOutputInfo -from pash_annotations.annotation_generation.datatypes.ParallelizabilityInfo import ParallelizabilityInfo -from pash_annotations.annotation_generation.datatypes.parallelizability.Parallelizer import Parallelizer -from pash_annotations.annotation_generation.datatypes.parallelizability.Splitter import Splitter, make_splitter_indiv_files, \ - make_splitter_round_robin, make_splitter_consec_chunks -from pash_annotations.annotation_generation.datatypes.parallelizability.MapperSpec import make_mapper_spec_seq -from pash_annotations.annotation_generation.datatypes.parallelizability.AggregatorSpec import \ - make_aggregator_spec_custom_2_ary_from_cmd_inv_with_transformers -from pash_annotations.annotation_generation.datatypes.parallelizability.TransformerFlagOptionList import\ - TransformerFlagOptionListFilter, TransformerFlagOptionListAdd, ChainTransformerFlagOptionList - - -import pash_annotations.annotation_generation.AnnotationGeneration as AnnotationGeneration +from pash_annotations.datatypes.basic_datatypes import ( + FlagOption, + Operand, + FileName, + Flag, +) +from pash_annotations.datatypes.basic_datatypes_with_io import ( + make_stdout_with_access_output, + make_stdin_with_access_stream_input, +) +from pash_annotations.datatypes.command_invocation_initial import ( + CommandInvocationInitial, +) +from pash_annotations.datatypes.command_invocation_prefix import CommandInvocationPrefix +from pash_annotations.datatypes.command_invocation_with_io import ( + CommandInvocationWithIO, +) +from pash_annotations.annotation_generation.datatypes.input_output_info import ( + InputOutputInfo, +) +from pash_annotations.annotation_generation.datatypes.parallelizability_info import ( + ParallelizabilityInfo, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.parallelizer import ( + Parallelizer, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.splitter import ( + make_splitter_consec_chunks, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.mapper_spec import ( + make_mapper_spec_seq, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.transformer_flag_option_list import ( + TransformerFlagOptionListFilter, + TransformerFlagOptionListAdd, + ChainTransformerFlagOptionList, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.aggregator_spec import ( + make_aggregator_spec_custom_2_ary_from_cmd_inv_with_transformers, +) + +from pash_annotations.annotation_generation.annotation_generation import AnnotationGenerator + cmd_name = "sort" # TODO: with -m, we could do a reduction tree + def test_sort_1() -> None: args: List[FlagOption] = [] - operands: List[Operand] = [Operand("in1.txt"), - Operand("in2.txt")] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) - cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix(cmd_inv.cmd_name, cmd_inv.flag_option_list, []) + operands: List[Operand] = [Operand("in1.txt"), Operand("in2.txt")] + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) + cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix( + cmd_inv.cmd_name, cmd_inv.flag_option_list, [] + ) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 2 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 0 assert cmd_inv_with_io.implicit_use_of_streaming_input is None - assert cmd_inv_with_io.implicit_use_of_streaming_output == make_stdout_with_access_output() + assert ( + cmd_inv_with_io.implicit_use_of_streaming_output + == make_stdout_with_access_output() + ) # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) assert para_info is not None and len(para_info.parallelizer_list) == 1 parallelizer1: Parallelizer = para_info.parallelizer_list[0] # only check splitter and actual mappers and aggregators @@ -50,14 +88,38 @@ def test_sort_1() -> None: # check that results of getting mapper and aggregator are fine goal_mapper_spec = make_mapper_spec_seq() # TODO: change to actual check whether it does what is is supposed to do - flag_option_list_to_keep = ["-b", "-d", "-f", "-g", "-i", "-M", "-h", "-n", "-r", "--sort", "-V", "-k", "-t"] - transformer_flag_option_list_filter: TransformerFlagOptionListFilter = \ + flag_option_list_to_keep = [ + "-b", + "-d", + "-f", + "-g", + "-i", + "-M", + "-h", + "-n", + "-r", + "--sort", + "-V", + "-k", + "-t", + ] + transformer_flag_option_list_filter: TransformerFlagOptionListFilter = ( TransformerFlagOptionListFilter(flag_option_list_to_keep) - transformer_flag_option_list_add: TransformerFlagOptionListAdd = TransformerFlagOptionListAdd([Flag("-m")]) - chain_transformer_flag_option_list: ChainTransformerFlagOptionList = \ - ChainTransformerFlagOptionList([transformer_flag_option_list_filter, transformer_flag_option_list_add]) - goal_aggregator_spec = make_aggregator_spec_custom_2_ary_from_cmd_inv_with_transformers( - flag_option_list_transformer=chain_transformer_flag_option_list, is_implemented=True) + ) + transformer_flag_option_list_add: TransformerFlagOptionListAdd = ( + TransformerFlagOptionListAdd([Flag("-m")]) + ) + chain_transformer_flag_option_list: ChainTransformerFlagOptionList = ( + ChainTransformerFlagOptionList( + [transformer_flag_option_list_filter, transformer_flag_option_list_add] + ) + ) + goal_aggregator_spec = ( + make_aggregator_spec_custom_2_ary_from_cmd_inv_with_transformers( + flag_option_list_transformer=chain_transformer_flag_option_list, + is_implemented=True, + ) + ) assert parallelizer1.get_mapper_spec() == goal_mapper_spec assert parallelizer1.get_aggregator_spec() == goal_aggregator_spec @@ -65,21 +127,37 @@ def test_sort_1() -> None: def test_sort_2() -> None: args: List[FlagOption] = [make_arg_simple(["-b"]), make_arg_simple(["-f"])] operands: List[Operand] = [] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) - cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix(cmd_inv.cmd_name, cmd_inv.flag_option_list, []) + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) + cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix( + cmd_inv.cmd_name, cmd_inv.flag_option_list, [] + ) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 0 - assert cmd_inv_with_io.implicit_use_of_streaming_input == make_stdin_with_access_stream_input() - assert cmd_inv_with_io.implicit_use_of_streaming_output == make_stdout_with_access_output() + assert ( + cmd_inv_with_io.implicit_use_of_streaming_input + == make_stdin_with_access_stream_input() + ) + assert ( + cmd_inv_with_io.implicit_use_of_streaming_output + == make_stdout_with_access_output() + ) # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) assert para_info is not None and len(para_info.parallelizer_list) == 1 parallelizer1: Parallelizer = para_info.parallelizer_list[0] # only check splitter and actual mappers and aggregators @@ -87,61 +165,107 @@ def test_sort_2() -> None: # check that results of getting mapper and aggregator are fine # TODO: change to actual check whether it does what is is supposed to do: with flag option list + def test_sort_3() -> None: - args: List[FlagOption] = [make_arg_simple(["-s"]), - make_arg_simple(["-o", FileName("output.txt")])] + args: List[FlagOption] = [ + make_arg_simple(["-s"]), + make_arg_simple(["-o", FileName("output.txt")]), + ] operands: List[Operand] = [] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 0 - assert cmd_inv_with_io.implicit_use_of_streaming_input == make_stdin_with_access_stream_input() + assert ( + cmd_inv_with_io.implicit_use_of_streaming_input + == make_stdin_with_access_stream_input() + ) assert cmd_inv_with_io.implicit_use_of_streaming_output is None # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) - assert para_info is not None and len(para_info.parallelizer_list) == 0 # because of stable sort + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) + assert ( + para_info is not None and len(para_info.parallelizer_list) == 0 + ) # because of stable sort + def test_sort_5() -> None: args: List[FlagOption] = [make_arg_simple(["-m"])] operands: List[Operand] = [] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 0 - assert cmd_inv_with_io.implicit_use_of_streaming_input == make_stdin_with_access_stream_input() - assert cmd_inv_with_io.implicit_use_of_streaming_output == make_stdout_with_access_output() + assert ( + cmd_inv_with_io.implicit_use_of_streaming_input + == make_stdin_with_access_stream_input() + ) + assert ( + cmd_inv_with_io.implicit_use_of_streaming_output + == make_stdout_with_access_output() + ) # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) - assert para_info is not None and len(para_info.parallelizer_list) == 0 # because of stable sort + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) + assert ( + para_info is not None and len(para_info.parallelizer_list) == 0 + ) # because of stable sort def test_sort_6() -> None: args: List[FlagOption] = [make_arg_simple(["--files0-from", FileName("input.txt")])] operands: List[Operand] = [] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 0 assert cmd_inv_with_io.implicit_use_of_streaming_input is None - assert cmd_inv_with_io.implicit_use_of_streaming_output == make_stdout_with_access_output() + assert ( + cmd_inv_with_io.implicit_use_of_streaming_output + == make_stdout_with_access_output() + ) # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) - assert para_info is not None and len(para_info.parallelizer_list) == 0 # because of files0-from + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) + assert ( + para_info is not None and len(para_info.parallelizer_list) == 0 + ) # because of files0-from diff --git a/pash_annotations/annotation_generation/tests/test_ann_gen_tail.py b/pash_annotations/annotation_generation/tests/test_ann_gen_tail.py index a578d14..7eab2cf 100644 --- a/pash_annotations/annotation_generation/tests/test_ann_gen_tail.py +++ b/pash_annotations/annotation_generation/tests/test_ann_gen_tail.py @@ -1,60 +1,93 @@ from pash_annotations.util_flag_option import make_arg_simple from typing import List, Optional -from pash_annotations.datatypes.BasicDatatypes import FlagOption, Operand -from pash_annotations.datatypes.BasicDatatypesWithIO import make_stdout_with_access_output -from pash_annotations.datatypes.CommandInvocationInitial import CommandInvocationInitial -from pash_annotations.datatypes.CommandInvocationWithIO import CommandInvocationWithIO -from pash_annotations.datatypes.CommandInvocationPrefix import CommandInvocationPrefix -from pash_annotations.annotation_generation.datatypes.InputOutputInfo import InputOutputInfo -from pash_annotations.annotation_generation.datatypes.ParallelizabilityInfo import ParallelizabilityInfo +from pash_annotations.datatypes.basic_datatypes import FlagOption, Operand +from pash_annotations.datatypes.basic_datatypes_with_io import ( + make_stdout_with_access_output, +) +from pash_annotations.datatypes.command_invocation_initial import ( + CommandInvocationInitial, +) +from pash_annotations.datatypes.command_invocation_with_io import ( + CommandInvocationWithIO, +) +from pash_annotations.datatypes.command_invocation_prefix import CommandInvocationPrefix +from pash_annotations.annotation_generation.datatypes.input_output_info import ( + InputOutputInfo, +) +from pash_annotations.annotation_generation.datatypes.parallelizability_info import ( + ParallelizabilityInfo, +) -import pash_annotations.annotation_generation.AnnotationGeneration as AnnotationGeneration +from pash_annotations.annotation_generation.annotation_generation import AnnotationGenerator cmd_name = "tail" def test_tail_1() -> None: args: List[FlagOption] = [make_arg_simple(["-q"])] - operands: List[Operand] = [Operand("in1.txt"), - Operand("in2.txt")] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) - cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix(cmd_inv.cmd_name, cmd_inv.flag_option_list, []) + operands: List[Operand] = [Operand("in1.txt"), Operand("in2.txt")] + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) + cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix( + cmd_inv.cmd_name, cmd_inv.flag_option_list, [] + ) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 2 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 0 assert cmd_inv_with_io.implicit_use_of_streaming_input is None - assert cmd_inv_with_io.implicit_use_of_streaming_output == make_stdout_with_access_output() + assert ( + cmd_inv_with_io.implicit_use_of_streaming_output + == make_stdout_with_access_output() + ) # assert not io_info.multiple_inputs_possible # changes the result! -> different property needed ? TODO # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) assert para_info is None def test_tail_2() -> None: args: List[FlagOption] = [make_arg_simple(["--version"])] - operands: List[Operand] = [Operand("in1.txt"), - Operand("-"), - Operand("in2.txt")] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) - cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix(cmd_inv.cmd_name, cmd_inv.flag_option_list, []) + operands: List[Operand] = [Operand("in1.txt"), Operand("-"), Operand("in2.txt")] + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) + cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix( + cmd_inv.cmd_name, cmd_inv.flag_option_list, [] + ) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 3 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 0 assert cmd_inv_with_io.implicit_use_of_streaming_input is None - assert cmd_inv_with_io.implicit_use_of_streaming_output == make_stdout_with_access_output() + assert ( + cmd_inv_with_io.implicit_use_of_streaming_output + == make_stdout_with_access_output() + ) # assert not io_info.multiple_inputs_possible # changes the result! -> different property needed ? TODO # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) assert para_info is None diff --git a/pash_annotations/annotation_generation/tests/test_ann_gen_tr.py b/pash_annotations/annotation_generation/tests/test_ann_gen_tr.py index de03108..a1cae66 100644 --- a/pash_annotations/annotation_generation/tests/test_ann_gen_tr.py +++ b/pash_annotations/annotation_generation/tests/test_ann_gen_tr.py @@ -1,22 +1,41 @@ -from pash_annotations.util_flag_option import make_arg_simple from typing import List, Optional -from pash_annotations.datatypes.BasicDatatypes import FlagOption, Operand -from pash_annotations.datatypes.BasicDatatypesWithIO import \ - make_stdin_with_access_stream_input, make_stdout_with_access_output -from pash_annotations.datatypes.CommandInvocationInitial import CommandInvocationInitial -from pash_annotations.datatypes.CommandInvocationWithIO import CommandInvocationWithIO -from pash_annotations.datatypes.CommandInvocationPrefix import CommandInvocationPrefix -from pash_annotations.annotation_generation.datatypes.InputOutputInfo import InputOutputInfo -from pash_annotations.annotation_generation.datatypes.ParallelizabilityInfo import ParallelizabilityInfo - -from pash_annotations.annotation_generation.datatypes.parallelizability.Parallelizer import Parallelizer -from pash_annotations.annotation_generation.datatypes.parallelizability.Splitter import make_splitter_round_robin, \ - make_splitter_consec_chunks -from pash_annotations.annotation_generation.datatypes.parallelizability.MapperSpec import make_mapper_spec_seq -from pash_annotations.annotation_generation.datatypes.parallelizability.AggregatorSpec import \ - make_aggregator_spec_adj_lines_merge, make_aggregator_spec_concatenate, make_aggregator_spec_adj_lines_func_from_string_representation - -import pash_annotations.annotation_generation.AnnotationGeneration as AnnotationGeneration + +from pash_annotations.util_flag_option import make_arg_simple +from pash_annotations.datatypes.basic_datatypes import FlagOption, Operand +from pash_annotations.datatypes.basic_datatypes_with_io import ( + make_stdin_with_access_stream_input, + make_stdout_with_access_output, +) +from pash_annotations.datatypes.command_invocation_initial import ( + CommandInvocationInitial, +) +from pash_annotations.datatypes.command_invocation_with_io import ( + CommandInvocationWithIO, +) +from pash_annotations.datatypes.command_invocation_prefix import CommandInvocationPrefix +from pash_annotations.annotation_generation.datatypes.input_output_info import ( + InputOutputInfo, +) +from pash_annotations.annotation_generation.datatypes.parallelizability_info import ( + ParallelizabilityInfo, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.parallelizer import ( + Parallelizer, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.splitter import ( + make_splitter_round_robin, + make_splitter_consec_chunks, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.mapper_spec import ( + make_mapper_spec_seq, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.aggregator_spec import ( + make_aggregator_spec_adj_lines_merge, + make_aggregator_spec_concatenate, + make_aggregator_spec_adj_lines_func_from_string_representation, +) + +from pash_annotations.annotation_generation.annotation_generation import AnnotationGenerator cmd_name = "tr" @@ -26,50 +45,93 @@ def test_tr_1() -> None: args: List[FlagOption] = [make_arg_simple(["-c"]), make_arg_simple(["-s"])] operands: List[Operand] = [Operand("A-Za-z"), Operand(r"'\n'")] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) - cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix(cmd_inv.cmd_name, cmd_inv.flag_option_list, []) + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) + cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix( + cmd_inv.cmd_name, cmd_inv.flag_option_list, [] + ) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 2 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 0 - assert cmd_inv_with_io.implicit_use_of_streaming_input == make_stdin_with_access_stream_input() - assert cmd_inv_with_io.implicit_use_of_streaming_output == make_stdout_with_access_output() + assert ( + cmd_inv_with_io.implicit_use_of_streaming_input + == make_stdin_with_access_stream_input() + ) + assert ( + cmd_inv_with_io.implicit_use_of_streaming_output + == make_stdout_with_access_output() + ) # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) assert para_info is not None and len(para_info.parallelizer_list) == 2 parallelizer1: Parallelizer = para_info.parallelizer_list[0] parallelizer2: Parallelizer = para_info.parallelizer_list[1] # check that specs for mapper and aggregator are fine assert parallelizer1.get_splitter() == make_splitter_consec_chunks() assert parallelizer1.get_mapper_spec() == make_mapper_spec_seq() - assert parallelizer1.get_aggregator_spec() == make_aggregator_spec_adj_lines_func_from_string_representation("PLACEHOLDER: remove first line if empty", False) + assert ( + parallelizer1.get_aggregator_spec() + == make_aggregator_spec_adj_lines_func_from_string_representation( + "PLACEHOLDER: remove first line if empty", False + ) + ) assert parallelizer2.get_splitter() == make_splitter_round_robin() assert parallelizer2.get_mapper_spec() == make_mapper_spec_seq() - assert parallelizer2.get_aggregator_spec() == make_aggregator_spec_adj_lines_func_from_string_representation("PLACEHOLDER: remove first line if empty", False) + assert ( + parallelizer2.get_aggregator_spec() + == make_aggregator_spec_adj_lines_func_from_string_representation( + "PLACEHOLDER: remove first line if empty", False + ) + ) + def test_tr_2() -> None: args: List[FlagOption] = [] operands: List[Operand] = [Operand("A-Z"), Operand("a-z")] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) - cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix(cmd_inv.cmd_name, cmd_inv.flag_option_list, []) + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) + cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix( + cmd_inv.cmd_name, cmd_inv.flag_option_list, [] + ) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 2 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 0 - assert cmd_inv_with_io.implicit_use_of_streaming_input == make_stdin_with_access_stream_input() - assert cmd_inv_with_io.implicit_use_of_streaming_output == make_stdout_with_access_output() + assert ( + cmd_inv_with_io.implicit_use_of_streaming_input + == make_stdin_with_access_stream_input() + ) + assert ( + cmd_inv_with_io.implicit_use_of_streaming_output + == make_stdout_with_access_output() + ) # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) assert para_info is not None and len(para_info.parallelizer_list) == 2 parallelizer1: Parallelizer = para_info.parallelizer_list[0] parallelizer2: Parallelizer = para_info.parallelizer_list[1] @@ -85,21 +147,37 @@ def test_tr_2() -> None: def test_tr_3() -> None: args: List[FlagOption] = [make_arg_simple(["-d"])] operands: List[Operand] = [Operand("'[:punct:]'")] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) - cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix(cmd_inv.cmd_name, cmd_inv.flag_option_list, []) + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) + cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix( + cmd_inv.cmd_name, cmd_inv.flag_option_list, [] + ) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 1 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 0 - assert cmd_inv_with_io.implicit_use_of_streaming_input == make_stdin_with_access_stream_input() - assert cmd_inv_with_io.implicit_use_of_streaming_output == make_stdout_with_access_output() + assert ( + cmd_inv_with_io.implicit_use_of_streaming_input + == make_stdin_with_access_stream_input() + ) + assert ( + cmd_inv_with_io.implicit_use_of_streaming_output + == make_stdout_with_access_output() + ) # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) assert para_info is not None and len(para_info.parallelizer_list) == 2 parallelizer1: Parallelizer = para_info.parallelizer_list[0] parallelizer2: Parallelizer = para_info.parallelizer_list[1] @@ -115,21 +193,37 @@ def test_tr_3() -> None: def test_tr_4() -> None: args: List[FlagOption] = [make_arg_simple(["-d"])] operands: List[Operand] = [Operand(r"'\n'")] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) - cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix(cmd_inv.cmd_name, cmd_inv.flag_option_list, []) + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) + cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix( + cmd_inv.cmd_name, cmd_inv.flag_option_list, [] + ) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 1 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 0 - assert cmd_inv_with_io.implicit_use_of_streaming_input == make_stdin_with_access_stream_input() - assert cmd_inv_with_io.implicit_use_of_streaming_output == make_stdout_with_access_output() + assert ( + cmd_inv_with_io.implicit_use_of_streaming_input + == make_stdin_with_access_stream_input() + ) + assert ( + cmd_inv_with_io.implicit_use_of_streaming_output + == make_stdout_with_access_output() + ) # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) assert para_info is not None and len(para_info.parallelizer_list) == 2 parallelizer1: Parallelizer = para_info.parallelizer_list[0] parallelizer2: Parallelizer = para_info.parallelizer_list[1] @@ -145,21 +239,37 @@ def test_tr_4() -> None: def test_tr_5() -> None: args: List[FlagOption] = [make_arg_simple(["-c"]), make_arg_simple(["-d"])] operands: List[Operand] = [Operand(r"'\n'")] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) - cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix(cmd_inv.cmd_name, cmd_inv.flag_option_list, []) + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) + cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix( + cmd_inv.cmd_name, cmd_inv.flag_option_list, [] + ) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 1 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 0 - assert cmd_inv_with_io.implicit_use_of_streaming_input == make_stdin_with_access_stream_input() - assert cmd_inv_with_io.implicit_use_of_streaming_output == make_stdout_with_access_output() + assert ( + cmd_inv_with_io.implicit_use_of_streaming_input + == make_stdin_with_access_stream_input() + ) + assert ( + cmd_inv_with_io.implicit_use_of_streaming_output + == make_stdout_with_access_output() + ) # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) assert para_info is not None and len(para_info.parallelizer_list) == 2 parallelizer1: Parallelizer = para_info.parallelizer_list[0] parallelizer2: Parallelizer = para_info.parallelizer_list[1] @@ -175,21 +285,37 @@ def test_tr_5() -> None: def test_tr_6() -> None: args: List[FlagOption] = [make_arg_simple(["-c"]), make_arg_simple(["-d"])] operands: List[Operand] = [Operand("A-Z")] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) - cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix(cmd_inv.cmd_name, cmd_inv.flag_option_list, []) + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) + cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix( + cmd_inv.cmd_name, cmd_inv.flag_option_list, [] + ) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 1 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 0 - assert cmd_inv_with_io.implicit_use_of_streaming_input == make_stdin_with_access_stream_input() - assert cmd_inv_with_io.implicit_use_of_streaming_output == make_stdout_with_access_output() + assert ( + cmd_inv_with_io.implicit_use_of_streaming_input + == make_stdin_with_access_stream_input() + ) + assert ( + cmd_inv_with_io.implicit_use_of_streaming_output + == make_stdout_with_access_output() + ) # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) assert para_info is not None and len(para_info.parallelizer_list) == 2 parallelizer1: Parallelizer = para_info.parallelizer_list[0] parallelizer2: Parallelizer = para_info.parallelizer_list[1] diff --git a/pash_annotations/annotation_generation/tests/test_ann_gen_uniq.py b/pash_annotations/annotation_generation/tests/test_ann_gen_uniq.py index 06d1895..42370c2 100644 --- a/pash_annotations/annotation_generation/tests/test_ann_gen_uniq.py +++ b/pash_annotations/annotation_generation/tests/test_ann_gen_uniq.py @@ -1,38 +1,61 @@ -from pash_annotations.util_flag_option import make_arg_simple from typing import List, Optional -from pash_annotations.datatypes.BasicDatatypes import FlagOption, Operand -from pash_annotations.datatypes.BasicDatatypesWithIO import StdDescriptorWithIOInfo, make_stdin_with_access_stream_input, \ - make_stdout_with_access_output -from pash_annotations.datatypes.CommandInvocationInitial import CommandInvocationInitial -from pash_annotations.datatypes.CommandInvocationWithIO import CommandInvocationWithIO -from pash_annotations.datatypes.CommandInvocationPrefix import CommandInvocationPrefix -from pash_annotations.annotation_generation.datatypes.InputOutputInfo import InputOutputInfo -from pash_annotations.annotation_generation.datatypes.ParallelizabilityInfo import ParallelizabilityInfo - -from pash_annotations.annotation_generation.datatypes.parallelizability.Parallelizer import Parallelizer -from pash_annotations.annotation_generation.datatypes.parallelizability.Splitter import \ - make_splitter_consec_chunks, make_splitter_round_robin -from pash_annotations.annotation_generation.datatypes.parallelizability.MapperSpec import make_mapper_spec_seq -from pash_annotations.annotation_generation.datatypes.parallelizability.AggregatorSpec import \ - make_aggregator_spec_adj_lines_seq, make_aggregator_spec_adj_lines_func_from_string_representation - -import pash_annotations.annotation_generation.AnnotationGeneration as AnnotationGeneration +from pash_annotations.util_flag_option import make_arg_simple +from pash_annotations.datatypes.basic_datatypes import FlagOption, Operand +from pash_annotations.datatypes.basic_datatypes_with_io import ( + make_stdin_with_access_stream_input, + make_stdout_with_access_output, +) +from pash_annotations.datatypes.command_invocation_initial import ( + CommandInvocationInitial, +) +from pash_annotations.datatypes.command_invocation_with_io import ( + CommandInvocationWithIO, +) +from pash_annotations.datatypes.command_invocation_prefix import CommandInvocationPrefix +from pash_annotations.annotation_generation.datatypes.input_output_info import ( + InputOutputInfo, +) +from pash_annotations.annotation_generation.datatypes.parallelizability_info import ( + ParallelizabilityInfo, +) + +from pash_annotations.annotation_generation.datatypes.parallelizability.parallelizer import ( + Parallelizer, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.splitter import ( + make_splitter_consec_chunks, + make_splitter_round_robin, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.mapper_spec import ( + make_mapper_spec_seq, +) +from pash_annotations.annotation_generation.datatypes.parallelizability.aggregator_spec import ( + make_aggregator_spec_adj_lines_seq, + make_aggregator_spec_adj_lines_func_from_string_representation, +) + +from pash_annotations.annotation_generation.annotation_generation import AnnotationGenerator cmd_name = "uniq" def test_uniq_1() -> None: args: List[FlagOption] = [make_arg_simple(["-D"])] - operands: List[Operand] = [Operand("in.txt"), - Operand("out.txt")] + operands: List[Operand] = [Operand("in.txt"), Operand("out.txt")] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) # cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix(cmd_inv.cmd_name, cmd_inv.flag_option_list, []) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 1 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 1 @@ -40,51 +63,86 @@ def test_uniq_1() -> None: assert cmd_inv_with_io.implicit_use_of_streaming_output is None # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) assert para_info is not None and len(para_info.parallelizer_list) == 0 def test_uniq_2() -> None: args: List[FlagOption] = [make_arg_simple(["-c"])] operands: List[Operand] = [] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) - cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix(cmd_inv.cmd_name, cmd_inv.flag_option_list, []) + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) + cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix( + cmd_inv.cmd_name, cmd_inv.flag_option_list, [] + ) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 0 - assert cmd_inv_with_io.implicit_use_of_streaming_input == make_stdin_with_access_stream_input() - assert cmd_inv_with_io.implicit_use_of_streaming_output == make_stdout_with_access_output() + assert ( + cmd_inv_with_io.implicit_use_of_streaming_input + == make_stdin_with_access_stream_input() + ) + assert ( + cmd_inv_with_io.implicit_use_of_streaming_output + == make_stdout_with_access_output() + ) # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) assert para_info is not None and len(para_info.parallelizer_list) == 2 parallelizer1: Parallelizer = para_info.parallelizer_list[0] parallelizer2: Parallelizer = para_info.parallelizer_list[1] # check that specs for mapper and aggregator are fine assert parallelizer1.get_splitter() == make_splitter_consec_chunks() assert parallelizer1.get_mapper_spec() == make_mapper_spec_seq() - assert parallelizer1.get_aggregator_spec() == make_aggregator_spec_adj_lines_func_from_string_representation('PLACEHOLDER:uniq_merge_count_uniq', is_implemented=False) + assert ( + parallelizer1.get_aggregator_spec() + == make_aggregator_spec_adj_lines_func_from_string_representation( + "PLACEHOLDER:uniq_merge_count_uniq", is_implemented=False + ) + ) assert parallelizer2.get_splitter() == make_splitter_round_robin() assert parallelizer2.get_mapper_spec() == make_mapper_spec_seq() - assert parallelizer2.get_aggregator_spec() == make_aggregator_spec_adj_lines_func_from_string_representation('PLACEHOLDER:uniq_merge_count_uniq', is_implemented=False) + assert ( + parallelizer2.get_aggregator_spec() + == make_aggregator_spec_adj_lines_func_from_string_representation( + "PLACEHOLDER:uniq_merge_count_uniq", is_implemented=False + ) + ) def test_uniq_3() -> None: args: List[FlagOption] = [] - operands: List[Operand] = [Operand("in.txt"), - Operand("out.txt")] - cmd_inv: CommandInvocationInitial = CommandInvocationInitial(cmd_name, flag_option_list=args, operand_list=operands) - cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix(cmd_inv.cmd_name, cmd_inv.flag_option_list, []) + operands: List[Operand] = [Operand("in.txt"), Operand("out.txt")] + cmd_inv: CommandInvocationInitial = CommandInvocationInitial( + cmd_name, flag_option_list=args, operand_list=operands + ) + cmd_inv_pref: CommandInvocationPrefix = CommandInvocationPrefix( + cmd_inv.cmd_name, cmd_inv.flag_option_list, [] + ) # IO Info - io_info: Optional[InputOutputInfo] = AnnotationGeneration.get_input_output_info_from_cmd_invocation(cmd_inv) + io_info: Optional[ + InputOutputInfo + ] = AnnotationGenerator().get_input_output_info_from_cmd_invocation(cmd_inv) assert io_info is not None - cmd_inv_with_io: CommandInvocationWithIO = io_info.apply_input_output_info_to_command_invocation(cmd_inv) + cmd_inv_with_io: CommandInvocationWithIO = ( + io_info.apply_input_output_info_to_command_invocation(cmd_inv) + ) assert len(cmd_inv_with_io.get_operands_with_config_input()) == 0 assert len(cmd_inv_with_io.get_operands_with_stream_input()) == 1 assert len(cmd_inv_with_io.get_operands_with_stream_output()) == 1 @@ -92,7 +150,9 @@ def test_uniq_3() -> None: assert cmd_inv_with_io.implicit_use_of_streaming_output is None # Parallelizability Info - para_info: Optional[ParallelizabilityInfo] = AnnotationGeneration.get_parallelizability_info_from_cmd_invocation(cmd_inv) + para_info: Optional[ + ParallelizabilityInfo + ] = AnnotationGenerator().get_parallelizability_info_from_cmd_invocation(cmd_inv) assert para_info is not None and len(para_info.parallelizer_list) == 2 parallelizer1: Parallelizer = para_info.parallelizer_list[0] parallelizer2: Parallelizer = para_info.parallelizer_list[1] diff --git a/pash_annotations/datatypes/Operand.py b/pash_annotations/datatypes/Operand.py deleted file mode 100644 index 303cd3d..0000000 --- a/pash_annotations/datatypes/Operand.py +++ /dev/null @@ -1 +0,0 @@ -from datatypes.BasicDatatypes import ArgStringType diff --git a/pash_annotations/datatypes/AccessKind.py b/pash_annotations/datatypes/access_kind.py similarity index 83% rename from pash_annotations/datatypes/AccessKind.py rename to pash_annotations/datatypes/access_kind.py index 30da27f..cdb2ce6 100644 --- a/pash_annotations/datatypes/AccessKind.py +++ b/pash_annotations/datatypes/access_kind.py @@ -5,17 +5,18 @@ class AccessKindEnum(Enum): CONFIG_INPUT = 0 STREAM_INPUT = 1 - CONTAINER_FOR_INPUT = 2 # e.g. sort with --files0-from -> does not work easily for task parallelization - EXCLUDES_FROM_INPUT = 3 # e.g. grep with exclude -> does not affect task parallelization + CONTAINER_FOR_INPUT = 2 # e.g. sort with --files0-from -> does not work easily for task parallelization + EXCLUDES_FROM_INPUT = ( + 3 # e.g. grep with exclude -> does not affect task parallelization + ) OTHER_INPUT = 4 STREAM_OUTPUT = 5 OTHER_OUTPUT = 6 class AccessKind: - def __init__(self, kind) -> None: - self.kind : AccessKindEnum = kind + self.kind: AccessKindEnum = kind def __repr__(self): return standard_repr(self) @@ -39,8 +40,13 @@ def is_other_input(self) -> bool: return self.kind == AccessKindEnum.OTHER_INPUT def is_any_input(self): - return self.is_config_input() or self.is_stream_input() or self.is_other_input() \ - or self.is_container_for_input() or self.is_excludes_from_input() + return ( + self.is_config_input() + or self.is_stream_input() + or self.is_other_input() + or self.is_container_for_input() + or self.is_excludes_from_input() + ) def is_stream_output(self): return self.kind == AccessKindEnum.STREAM_OUTPUT @@ -51,27 +57,35 @@ def is_other_output(self): def is_any_output(self): return self.is_stream_output() or self.is_other_output() + def make_config_input() -> AccessKind: return AccessKind(AccessKindEnum.CONFIG_INPUT) + def make_stream_input() -> AccessKind: return AccessKind(AccessKindEnum.STREAM_INPUT) + def make_container_for_input() -> AccessKind: return AccessKind(AccessKindEnum.CONTAINER_FOR_INPUT) + def make_excludes_from_input() -> AccessKind: return AccessKind(AccessKindEnum.EXCLUDES_FROM_INPUT) + def make_other_input() -> AccessKind: return AccessKind(AccessKindEnum.OTHER_INPUT) + def make_stream_output() -> AccessKind: return AccessKind(AccessKindEnum.STREAM_OUTPUT) + def make_other_output() -> AccessKind: return AccessKind(AccessKindEnum.OTHER_OUTPUT) + def get_access_from_string(value: str) -> AccessKind: if value == "CONFIG_INPUT": return make_config_input() diff --git a/pash_annotations/datatypes/BasicDatatypes.py b/pash_annotations/datatypes/basic_datatypes.py similarity index 85% rename from pash_annotations/datatypes/BasicDatatypes.py rename to pash_annotations/datatypes/basic_datatypes.py index 074822a..848484f 100644 --- a/pash_annotations/datatypes/BasicDatatypes.py +++ b/pash_annotations/datatypes/basic_datatypes.py @@ -6,9 +6,9 @@ from abc import ABC, abstractmethod + # note that we have individual classes since aliasing does not provide as much support class BaseClassForBasicDatatypes(ABC): - def __repr__(self) -> str: return standard_repr(self) @@ -19,8 +19,8 @@ def __eq__(self, other) -> bool: def get_name(self) -> str: pass -class FileName(BaseClassForBasicDatatypes): +class FileName(BaseClassForBasicDatatypes): def __init__(self, name: str) -> None: # name should be a string self.name = name @@ -28,13 +28,14 @@ def __init__(self, name: str) -> None: def get_name(self) -> str: return self.name + class StdDescriptorEnum(Enum): STDIN = 0 STDOUT = 1 STDERR = 2 -class StdDescriptor(BaseClassForBasicDatatypes): +class StdDescriptor(BaseClassForBasicDatatypes): def __init__(self, name: StdDescriptorEnum) -> None: # name should be a number self.name = name @@ -45,29 +46,34 @@ def get_name(self) -> str: def get_type(self) -> StdDescriptorEnum: return self.name + def get_stdin_fd() -> StdDescriptor: return StdDescriptor(StdDescriptorEnum.STDIN) + def get_stdout_fd() -> StdDescriptor: return StdDescriptor(StdDescriptorEnum.STDOUT) + def get_stderr_fd() -> StdDescriptor: return StdDescriptor(StdDescriptorEnum.STDERR) + FileNameOrStdDescriptor = Union[FileName, StdDescriptor] -class ArgStringType(BaseClassForBasicDatatypes): +class ArgStringType(BaseClassForBasicDatatypes): def __init__(self, name: str) -> None: self.name = name def get_name(self) -> str: return self.name + OptionArgPosConfigType = Union[ArgStringType, FileNameOrStdDescriptor] -class Flag(BaseClassForBasicDatatypes): +class Flag(BaseClassForBasicDatatypes): def __init__(self, name: str) -> None: self.flag_name = name @@ -76,7 +82,6 @@ def get_name(self) -> str: class Option(BaseClassForBasicDatatypes): - def __init__(self, name: str, option_arg: str) -> None: self.option_name = name self.option_arg: str = option_arg @@ -93,18 +98,19 @@ def get_arg(self) -> str: # def is_arg_of_type_filename_or_stddescriptor(self): # return isinstance(self.option_arg, FileName) or isinstance(self.option_arg, StdDescriptor) + FlagOption = Union[Flag, Option] # Note difference between Option argument and Operand after parsing: # for option arguments, we know which is a filename; for operands, we don't -class Operand(BaseClassForBasicDatatypes): +class Operand(BaseClassForBasicDatatypes): def __init__(self, name: str) -> None: self.name = name def __repr__(self) -> str: - return f'{self.name}' + return f"{self.name}" def get_name(self) -> str: return self.name @@ -122,48 +128,52 @@ def to_arg_string_type(self): class WhichClassForArg(Enum): - FILESTD = 'filestd' - ARGSTRING = 'argstring' - PLAINSTRING = 'str' + FILESTD = "filestd" + ARGSTRING = "argstring" + PLAINSTRING = "str" + # copied from ir_utils def format_arg_chars(arg_chars): chars = [format_arg_char(arg_char) for arg_char in arg_chars] return "".join(chars) + ## ## BIG TODO: Fix the formating of arg_chars bask to shell scripts and string. ## We need to do this the proper way using the parser. ## def format_arg_char(arg_char): key, val = get_kv(arg_char) - if (key == 'C'): + if key == "C": return str(chr(val)) - elif (key == 'B'): + elif key == "B": # The $() is just for illustration. This is backticks - return '$({})'.format(val) - elif (key == 'Q'): + return "$({})".format(val) + elif key == "Q": formated_val = format_arg_chars(val) return '"{}"'.format(formated_val) - elif (key == 'V'): - return '${{{}}}'.format(val[2]) - elif (key == 'E'): + elif key == "V": + return "${{{}}}".format(val[2]) + elif key == "E": ## TODO: This is not right. I think the main reason for the ## problems is the differences between bash and the posix ## standard. # log(" -- escape-debug -- ", val, chr(val)) - non_escape_chars = [92, # \ - 61, # = - 91, # [ - 93, # ] - 45, # - - 58, # : - 126,# ~ - 42] # * - if(val in non_escape_chars): - return '{}'.format(chr(val)) + non_escape_chars = [ + 92, # \ + 61, # = + 91, # [ + 93, # ] + 45, # - + 58, # : + 126, # ~ + 42, + ] # * + if val in non_escape_chars: + return "{}".format(chr(val)) else: - return '\{}'.format(chr(val)) + return "\{}".format(chr(val)) else: # log("Cannot format arg_char:", arg_char) ## TODO: Make this correct diff --git a/pash_annotations/datatypes/BasicDatatypesWithIO.py b/pash_annotations/datatypes/basic_datatypes_with_io.py similarity index 65% rename from pash_annotations/datatypes/BasicDatatypesWithIO.py rename to pash_annotations/datatypes/basic_datatypes_with_io.py index 948647e..06d999f 100644 --- a/pash_annotations/datatypes/BasicDatatypesWithIO.py +++ b/pash_annotations/datatypes/basic_datatypes_with_io.py @@ -1,12 +1,24 @@ from typing import Union -from pash_annotations.datatypes.BasicDatatypes import FileName, StdDescriptor, StdDescriptorEnum, Operand, \ - BaseClassForBasicDatatypes, ArgStringType, get_stdout_fd, get_stdin_fd -from pash_annotations.datatypes.AccessKind import AccessKind, make_stream_output, make_stream_input +from pash_annotations.datatypes.basic_datatypes import ( + FileName, + StdDescriptor, + StdDescriptorEnum, + Operand, + BaseClassForBasicDatatypes, + ArgStringType, + get_stdout_fd, + get_stdin_fd, +) +from pash_annotations.datatypes.access_kind import ( + AccessKind, + make_stream_output, + make_stream_input, +) + +from abc import ABC -from abc import ABC, abstractmethod class BaseClassForBasicDatatypesWithIOInfo(ABC): - def __init__(self, access: AccessKind): self.access = access @@ -21,56 +33,79 @@ def get_access(self) -> AccessKind: class FileNameWithIOInfo(FileName, BaseClassForBasicDatatypesWithIOInfo): - def __init__(self, name: str, access: AccessKind) -> None: FileName.__init__(self, name=name) BaseClassForBasicDatatypesWithIOInfo.__init__(self, access=access) -def get_from_original_filename_with_ioinfo(original: FileName, access: AccessKind) -> FileNameWithIOInfo: + +def get_from_original_filename_with_ioinfo( + original: FileName, access: AccessKind +) -> FileNameWithIOInfo: return FileNameWithIOInfo(original.get_name(), access) class StdDescriptorWithIOInfo(StdDescriptor, BaseClassForBasicDatatypesWithIOInfo): - def __init__(self, name: StdDescriptorEnum, access: AccessKind) -> None: StdDescriptor.__init__(self, name=name) BaseClassForBasicDatatypesWithIOInfo.__init__(self, access=access) -def get_from_original_stddescriptor_with_ioinfo(original: StdDescriptor, access: AccessKind) -> StdDescriptorWithIOInfo: + +def get_from_original_stddescriptor_with_ioinfo( + original: StdDescriptor, access: AccessKind +) -> StdDescriptorWithIOInfo: return StdDescriptorWithIOInfo(original.name, access) + def make_stdin_with_access_stream_input() -> StdDescriptorWithIOInfo: - return get_from_original_stddescriptor_with_ioinfo(get_stdin_fd(), make_stream_input()) + return get_from_original_stddescriptor_with_ioinfo( + get_stdin_fd(), make_stream_input() + ) + def make_stdout_with_access_output() -> StdDescriptorWithIOInfo: - return get_from_original_stddescriptor_with_ioinfo(get_stdout_fd(), make_stream_output()) + return get_from_original_stddescriptor_with_ioinfo( + get_stdout_fd(), make_stream_output() + ) + FileNameOrStdDescriptorWithIOInfo = Union[FileNameWithIOInfo, StdDescriptorWithIOInfo] + def add_access_to_stream_output(output_to): if isinstance(output_to, FileName): - assert(False) - output_to_with_access: FileNameOrStdDescriptorWithIOInfo = FileNameWithIOInfo.get_from_original_filename_with_ioinfo(output_to, - make_stream_output()) + assert False + output_to_with_access: FileNameOrStdDescriptorWithIOInfo = ( + FileNameWithIOInfo.get_from_original_filename_with_ioinfo( + output_to, make_stream_output() + ) + ) elif isinstance(output_to, StdDescriptor): - assert(False) - output_to_with_access: FileNameOrStdDescriptorWithIOInfo = StdDescriptorWithIOInfo.get_from_original_stddescriptor_with_ioinfo( - output_to, - make_stream_output()) + assert False + output_to_with_access: FileNameOrStdDescriptorWithIOInfo = ( + StdDescriptorWithIOInfo.get_from_original_stddescriptor_with_ioinfo( + output_to, make_stream_output() + ) + ) else: raise Exception("neither FileName nor StdDescriptor") return output_to_with_access + def add_access_to_stream_input(input_from): if isinstance(input_from, FileName): - assert(False) - input_from_with_access: FileNameOrStdDescriptorWithIOInfo = FileNameWithIOInfo.get_from_original_filename_with_ioinfo(input_from, - make_stream_input()) + assert False + input_from_with_access: FileNameOrStdDescriptorWithIOInfo = ( + FileNameWithIOInfo.get_from_original_filename_with_ioinfo( + input_from, make_stream_input() + ) + ) elif isinstance(input_from, StdDescriptor): - assert(False) - input_from_with_access: FileNameOrStdDescriptorWithIOInfo = StdDescriptorWithIOInfo.get_from_original_stddescriptor_with_ioinfo( - input_from, - make_stream_input()) + assert False + input_from_with_access: FileNameOrStdDescriptorWithIOInfo = ( + StdDescriptorWithIOInfo.get_from_original_stddescriptor_with_ioinfo( + input_from, make_stream_input() + ) + ) else: raise Exception("neither FileName nor StdDescriptor") return input_from_with_access @@ -78,10 +113,15 @@ def add_access_to_stream_input(input_from): # only OptionWithIOInfo if argument needs it class OptionWithIO(BaseClassForBasicDatatypes): - - def __init__(self, name: str, option_arg: Union[FileNameOrStdDescriptorWithIOInfo, ArgStringType]) -> None: - self.option_name : str = name - self.option_arg : Union[FileNameOrStdDescriptorWithIOInfo, ArgStringType] = option_arg + def __init__( + self, + name: str, + option_arg: Union[FileNameOrStdDescriptorWithIOInfo, ArgStringType], + ) -> None: + self.option_name: str = name + self.option_arg: Union[ + FileNameOrStdDescriptorWithIOInfo, ArgStringType + ] = option_arg def get_name(self) -> str: return self.option_name @@ -89,7 +129,9 @@ def get_name(self) -> str: def get_arg(self) -> Union[FileNameOrStdDescriptorWithIOInfo, ArgStringType]: return self.option_arg - def get_arg_with_ioinfo(self) -> Union[FileNameOrStdDescriptorWithIOInfo, ArgStringType]: + def get_arg_with_ioinfo( + self, + ) -> Union[FileNameOrStdDescriptorWithIOInfo, ArgStringType]: return self.option_arg # @staticmethod @@ -106,7 +148,6 @@ def get_arg_with_ioinfo(self) -> Union[FileNameOrStdDescriptorWithIOInfo, ArgStr # only OptionWithIO if argument needs it class OperandWithIO: - def __init__(self, name: FileNameOrStdDescriptorWithIOInfo) -> None: self.name = name @@ -115,7 +156,9 @@ def get_name(self) -> FileNameOrStdDescriptorWithIOInfo: @staticmethod def make_operand_a_filename_with_access(original: Operand, access: AccessKind): - filename_with_ioinfo = get_from_original_filename_with_ioinfo(FileName(original.get_name()), access) + filename_with_ioinfo = get_from_original_filename_with_ioinfo( + FileName(original.get_name()), access + ) return OperandWithIO(filename_with_ioinfo) # TODO: how to get proper type? diff --git a/pash_annotations/datatypes/BasicDatatypesWithIOVar.py b/pash_annotations/datatypes/basic_datatypes_with_iovar.py similarity index 69% rename from pash_annotations/datatypes/BasicDatatypesWithIOVar.py rename to pash_annotations/datatypes/basic_datatypes_with_iovar.py index fe85c1f..2d9ecec 100644 --- a/pash_annotations/datatypes/BasicDatatypesWithIOVar.py +++ b/pash_annotations/datatypes/basic_datatypes_with_iovar.py @@ -1,13 +1,13 @@ from typing import Union -from pash_annotations.datatypes.BasicDatatypes import ArgStringType +from pash_annotations.datatypes.basic_datatypes import ArgStringType IOVar = int -class OptionWithIOVar: +class OptionWithIOVar: def __init__(self, name: str, option_arg: Union[IOVar, ArgStringType]) -> None: - self.option_name : str = name - self.option_arg : Union[IOVar, ArgStringType] = option_arg + self.option_name: str = name + self.option_arg: Union[IOVar, ArgStringType] = option_arg def get_name(self) -> str: return self.option_name diff --git a/pash_annotations/datatypes/CommandInvocationInitial.py b/pash_annotations/datatypes/command_invocation_initial.py similarity index 65% rename from pash_annotations/datatypes/CommandInvocationInitial.py rename to pash_annotations/datatypes/command_invocation_initial.py index 2dbafa8..7aa0a20 100644 --- a/pash_annotations/datatypes/CommandInvocationInitial.py +++ b/pash_annotations/datatypes/command_invocation_initial.py @@ -1,11 +1,16 @@ from typing import List -from pash_annotations.datatypes.BasicDatatypes import FlagOption, Operand +from pash_annotations.datatypes.basic_datatypes import FlagOption, Operand from pash_annotations.util_standard import standard_repr, standard_eq -class CommandInvocationInitial: - def __init__(self, cmd_name: str, flag_option_list: List[FlagOption], operand_list: List[Operand]) -> None: +class CommandInvocationInitial: + def __init__( + self, + cmd_name: str, + flag_option_list: List[FlagOption], + operand_list: List[Operand], + ) -> None: self.cmd_name: str = cmd_name self.flag_option_list: List[FlagOption] = flag_option_list self.operand_list: List[Operand] = operand_list diff --git a/pash_annotations/datatypes/CommandInvocationPrefix.py b/pash_annotations/datatypes/command_invocation_prefix.py similarity index 58% rename from pash_annotations/datatypes/CommandInvocationPrefix.py rename to pash_annotations/datatypes/command_invocation_prefix.py index cb7662b..8014fdd 100644 --- a/pash_annotations/datatypes/CommandInvocationPrefix.py +++ b/pash_annotations/datatypes/command_invocation_prefix.py @@ -1,11 +1,19 @@ from typing import List from pash_annotations.util_standard import standard_repr, standard_eq -from pash_annotations.datatypes.BasicDatatypes import FlagOption, OptionArgPosConfigType +from pash_annotations.datatypes.basic_datatypes import ( + FlagOption, + OptionArgPosConfigType, +) -class CommandInvocationPrefix: - def __init__(self, cmd_name: str, flag_option_list: List[FlagOption], positional_config_list: List[OptionArgPosConfigType]) -> None: +class CommandInvocationPrefix: + def __init__( + self, + cmd_name: str, + flag_option_list: List[FlagOption], + positional_config_list: List[OptionArgPosConfigType], + ) -> None: self.cmd_name = cmd_name self.flag_option_list = flag_option_list self.positional_config_list = positional_config_list diff --git a/pash_annotations/datatypes/CommandInvocationWithIO.py b/pash_annotations/datatypes/command_invocation_with_io.py similarity index 76% rename from pash_annotations/datatypes/CommandInvocationWithIO.py rename to pash_annotations/datatypes/command_invocation_with_io.py index b57a26f..182eede 100644 --- a/pash_annotations/datatypes/CommandInvocationWithIO.py +++ b/pash_annotations/datatypes/command_invocation_with_io.py @@ -1,24 +1,40 @@ from typing import List, Union, Optional, Tuple -from pash_annotations.datatypes.BasicDatatypes import Flag, ArgStringType, FileName, StdDescriptor, FileNameOrStdDescriptor -from pash_annotations.datatypes.BasicDatatypesWithIO import OptionWithIO, FileNameOrStdDescriptorWithIOInfo, FileNameWithIOInfo, StdDescriptorWithIOInfo +from pash_annotations.datatypes.basic_datatypes import ( + Flag, + ArgStringType, +) +from pash_annotations.datatypes.basic_datatypes_with_io import ( + OptionWithIO, + FileNameOrStdDescriptorWithIOInfo, + FileNameWithIOInfo, + StdDescriptorWithIOInfo, +) from pash_annotations.util_standard import standard_repr, standard_eq + class CommandInvocationWithIO: # TODO: fully substitute by ...Vars and delete this one - def __init__(self, - cmd_name: str, - flag_option_list: List[Union[Flag, OptionWithIO]], - operand_list: List[Union[ArgStringType, FileNameOrStdDescriptorWithIOInfo]], - implicit_use_of_streaming_input: Optional[FileNameOrStdDescriptorWithIOInfo], - implicit_use_of_streaming_output: Optional[FileNameOrStdDescriptorWithIOInfo], - ) -> None: + def __init__( + self, + cmd_name: str, + flag_option_list: List[Union[Flag, OptionWithIO]], + operand_list: List[Union[ArgStringType, FileNameOrStdDescriptorWithIOInfo]], + implicit_use_of_streaming_input: Optional[FileNameOrStdDescriptorWithIOInfo], + implicit_use_of_streaming_output: Optional[FileNameOrStdDescriptorWithIOInfo], + ) -> None: self.cmd_name: str = cmd_name self.flag_option_list: List[Union[Flag, OptionWithIO]] = flag_option_list - self.operand_list: List[Union[ArgStringType, FileNameOrStdDescriptorWithIOInfo]] = operand_list - self.implicit_use_of_streaming_input: Optional[FileNameOrStdDescriptorWithIOInfo] = implicit_use_of_streaming_input - self.implicit_use_of_streaming_output: Optional[FileNameOrStdDescriptorWithIOInfo] = implicit_use_of_streaming_output + self.operand_list: List[ + Union[ArgStringType, FileNameOrStdDescriptorWithIOInfo] + ] = operand_list + self.implicit_use_of_streaming_input: Optional[ + FileNameOrStdDescriptorWithIOInfo + ] = implicit_use_of_streaming_input + self.implicit_use_of_streaming_output: Optional[ + FileNameOrStdDescriptorWithIOInfo + ] = implicit_use_of_streaming_output # map from variables to filenames def __repr__(self): @@ -27,7 +43,6 @@ def __repr__(self): def __eq__(self, other): return standard_eq(self, other) - # def substitute_inputs_and_outputs_in_cmd_invocation(self, # inputs_from: List[FileNameOrStdDescriptor], # outputs_to: List[FileNameOrStdDescriptor]) -> None: @@ -171,30 +186,80 @@ def __eq__(self, other): # return outputs # for test cases: - def get_operands_with_config_input(self) -> List[Union[ArgStringType, FileNameOrStdDescriptorWithIOInfo]]: - return [x for x in self.operand_list if - isinstance(x, ArgStringType) or - ((isinstance(x, FileNameWithIOInfo) or isinstance(x, StdDescriptorWithIOInfo)) - and x.access.is_config_input())] + def get_operands_with_config_input( + self, + ) -> List[Union[ArgStringType, FileNameOrStdDescriptorWithIOInfo]]: + return [ + x + for x in self.operand_list + if isinstance(x, ArgStringType) + or ( + ( + isinstance(x, FileNameWithIOInfo) + or isinstance(x, StdDescriptorWithIOInfo) + ) + and x.access.is_config_input() + ) + ] def get_operands_with_stream_input(self) -> List[FileNameOrStdDescriptorWithIOInfo]: - return [x for x in self.operand_list if (isinstance(x, FileNameWithIOInfo) or isinstance(x, StdDescriptorWithIOInfo)) - and x.access.is_stream_input()] + return [ + x + for x in self.operand_list + if ( + isinstance(x, FileNameWithIOInfo) + or isinstance(x, StdDescriptorWithIOInfo) + ) + and x.access.is_stream_input() + ] def get_operands_with_other_input(self) -> List[FileNameOrStdDescriptorWithIOInfo]: - return [x for x in self.operand_list if (isinstance(x, FileNameWithIOInfo) or isinstance(x, StdDescriptorWithIOInfo)) - and x.access.is_other_input()] + return [ + x + for x in self.operand_list + if ( + isinstance(x, FileNameWithIOInfo) + or isinstance(x, StdDescriptorWithIOInfo) + ) + and x.access.is_other_input() + ] - def get_operands_with_stream_output(self) -> List[FileNameOrStdDescriptorWithIOInfo]: - return [x for x in self.operand_list if (isinstance(x, FileNameWithIOInfo) or isinstance(x, StdDescriptorWithIOInfo)) - and x.access.is_stream_output()] + def get_operands_with_stream_output( + self, + ) -> List[FileNameOrStdDescriptorWithIOInfo]: + return [ + x + for x in self.operand_list + if ( + isinstance(x, FileNameWithIOInfo) + or isinstance(x, StdDescriptorWithIOInfo) + ) + and x.access.is_stream_output() + ] def get_operands_with_other_output(self) -> List[FileNameOrStdDescriptorWithIOInfo]: - return [x for x in self.operand_list if (isinstance(x, FileNameWithIOInfo) or isinstance(x, StdDescriptorWithIOInfo)) - and x.access.is_other_output()] + return [ + x + for x in self.operand_list + if ( + isinstance(x, FileNameWithIOInfo) + or isinstance(x, StdDescriptorWithIOInfo) + ) + and x.access.is_other_output() + ] def get_options_with_other_output(self) -> List[OptionWithIO]: - only_options: List[OptionWithIO] = [x for x in self.flag_option_list if isinstance(x, OptionWithIO)] - return [x for x in only_options if - ((isinstance(x.option_arg, FileNameWithIOInfo) or isinstance(x.option_arg, StdDescriptorWithIOInfo))) - and x.option_arg.access.is_other_output()] + only_options: List[OptionWithIO] = [ + x for x in self.flag_option_list if isinstance(x, OptionWithIO) + ] + return [ + x + for x in only_options + if ( + ( + isinstance(x.option_arg, FileNameWithIOInfo) + or isinstance(x.option_arg, StdDescriptorWithIOInfo) + ) + ) + and x.option_arg.access.is_other_output() + ] diff --git a/pash_annotations/datatypes/CommandInvocationWithIOVars.py b/pash_annotations/datatypes/command_invocation_with_iovars.py similarity index 63% rename from pash_annotations/datatypes/CommandInvocationWithIOVars.py rename to pash_annotations/datatypes/command_invocation_with_iovars.py index b18b7ea..f255121 100644 --- a/pash_annotations/datatypes/CommandInvocationWithIOVars.py +++ b/pash_annotations/datatypes/command_invocation_with_iovars.py @@ -1,32 +1,52 @@ from copy import deepcopy from typing import List, Union, Optional, Dict -from pash_annotations.datatypes.BasicDatatypes import Flag, ArgStringType, FileNameOrStdDescriptor -from pash_annotations.datatypes.BasicDatatypesWithIO import OptionWithIO, FileNameOrStdDescriptorWithIOInfo, FileNameWithIOInfo, \ - StdDescriptorWithIOInfo -from pash_annotations.datatypes.AccessKind import make_stream_input, make_stream_output, AccessKind -from pash_annotations.annotation_generation.datatypes.Inputs import Inputs, InputsEnum -from pash_annotations.datatypes.BasicDatatypesWithIOVar import OptionWithIOVar, IOVar +from pash_annotations.datatypes.basic_datatypes import ( + Flag, + ArgStringType, +) +from pash_annotations.datatypes.basic_datatypes_with_io import ( + OptionWithIO, + FileNameOrStdDescriptorWithIOInfo, + FileNameWithIOInfo, + StdDescriptorWithIOInfo, +) +from pash_annotations.datatypes.access_kind import ( + make_stream_input, + make_stream_output, + AccessKind, +) +from pash_annotations.annotation_generation.datatypes.inputs import Inputs, InputsEnum +from pash_annotations.datatypes.basic_datatypes_with_iovar import ( + OptionWithIOVar, + IOVar, +) from pash_annotations.util_standard import standard_repr, standard_eq class CommandInvocationWithIOVars: - # TODO: get_access() will not work here anymore, use access_map - def __init__(self, - cmd_name: str, - flag_option_list: List[Union[Flag, OptionWithIOVar]], - operand_list: List[Union[ArgStringType, IOVar]], - implicit_use_of_streaming_input: Optional[IOVar], - implicit_use_of_streaming_output: Optional[IOVar], - access_map: Dict[IOVar, AccessKind] - ) -> None: + def __init__( + self, + cmd_name: str, + flag_option_list: List[Union[Flag, OptionWithIOVar]], + operand_list: List[Union[ArgStringType, IOVar]], + implicit_use_of_streaming_input: Optional[IOVar], + implicit_use_of_streaming_output: Optional[IOVar], + access_map: Dict[IOVar, AccessKind], + ) -> None: self.cmd_name: str = cmd_name - self.flag_option_list: List[Union[Flag, OptionWithIOVar]] = deepcopy(flag_option_list) + self.flag_option_list: List[Union[Flag, OptionWithIOVar]] = deepcopy( + flag_option_list + ) self.operand_list: List[Union[ArgStringType, IOVar]] = deepcopy(operand_list) - self.implicit_use_of_streaming_input: Optional[IOVar] = deepcopy(implicit_use_of_streaming_input) - self.implicit_use_of_streaming_output: Optional[IOVar] = deepcopy(implicit_use_of_streaming_output) + self.implicit_use_of_streaming_input: Optional[IOVar] = deepcopy( + implicit_use_of_streaming_input + ) + self.implicit_use_of_streaming_output: Optional[IOVar] = deepcopy( + implicit_use_of_streaming_output + ) self.access_map = deepcopy(access_map) def __repr__(self): @@ -35,7 +55,9 @@ def __repr__(self): def __eq__(self, other): return standard_eq(self, other) - def is_aggregator_concatenate(self): # needed since isinstance(_, Aggregator) does not work + def is_aggregator_concatenate( + self, + ): # needed since isinstance(_, Aggregator) does not work return False # problematic regarding typing so removed @@ -48,10 +70,9 @@ def is_aggregator_concatenate(self): # needed since isinstance(_, Aggregator) do # implicit_use_of_streaming_output=cmd_inv_with_io.implicit_use_of_streaming_output, # access_map=access_map) - - def substitute_inputs_and_outputs_in_cmd_invocation(self, - inputs_from: List[IOVar], - outputs_to: List[IOVar]) -> None: + def substitute_inputs_and_outputs_in_cmd_invocation( + self, inputs_from: List[IOVar], outputs_to: List[IOVar] + ) -> None: self.substitute_inputs_in_cmd_invocation(inputs_from) self.substitute_outputs_in_cmd_invocation(outputs_to) @@ -62,6 +83,7 @@ def function_to_apply_to_vars(var): return self.replace_var_consistently(var, new_var) else: return var + self.map_var(function_to_apply_to_vars) assert len(inputs_from) == 0 @@ -72,44 +94,57 @@ def function_to_apply_to_vars(var): return self.replace_var_consistently(var, new_var) else: return var + self.map_var(function_to_apply_to_vars) assert len(outputs_to) == 0 def generate_inputs(self): streaming_inputs = [] + def function_to_apply(el): if self.access_map[el].is_stream_input(): streaming_inputs.append(el) return el + self.map_var(function_to_apply) # ASSUMPTION: no configuration inputs, no fallback (no streaming inputs) return Inputs(InputsEnum.STREAMING, ([], streaming_inputs)) def generate_outputs(self): outputs = [] + def function_to_apply(el): if self.access_map[el].is_any_output(): outputs.append(el) return el + self.map_var(function_to_apply) return outputs def has_other_outputs(self): outputs = [] + def function_to_apply(el): if self.access_map[el].is_other_output(): outputs.append(el) return el + self.map_var(function_to_apply) return len(outputs) > 0 def replace_var_consistently(self, from_var, to_var): - assert(from_var in self.access_map) # if this is not true, something went wrong before + assert ( + from_var in self.access_map + ) # if this is not true, something went wrong before self.access_map[to_var] = self.access_map.pop(from_var) return to_var def replace_var(self, from_var, to_var): - function_to_apply_to_var = lambda el: self.replace_var_consistently(from_var, to_var) if el == from_var else el + function_to_apply_to_var = ( + lambda el: self.replace_var_consistently(from_var, to_var) + if el == from_var + else el + ) self.map_var(function_to_apply_to_var) def flat_map_non_names_aux_flag_option_list(self, function_to_apply): @@ -118,7 +153,9 @@ def flat_map_non_names_aux_flag_option_list(self, function_to_apply): flagoption = self.flag_option_list[i] if isinstance(flagoption, OptionWithIO): [new_option_arg] = function_to_apply(flagoption.option_arg) - new_flag_option_list.append(OptionWithIO(flagoption.option_name, new_option_arg)) + new_flag_option_list.append( + OptionWithIO(flagoption.option_name, new_option_arg) + ) else: new_flag_option_list.append(flagoption) self.flag_option_list = new_flag_option_list @@ -154,7 +191,9 @@ def flat_map_anything_non_names(self, function_to_apply): self.flat_map_non_names_aux_implicit_streaming_output(function_to_apply) def map_var(self, function_to_apply_to_vars): - function_to_apply_to_anything = lambda el: [function_to_apply_to_vars(el)] if isinstance(el, int) else [el] + function_to_apply_to_anything = ( + lambda el: [function_to_apply_to_vars(el)] if isinstance(el, int) else [el] + ) self.flat_map_anything_non_names(function_to_apply_to_anything) # TODO: move to util-file command-invocation helpers @@ -168,17 +207,19 @@ def make_cat_command_invocation_with_io_vars(input_ids, output_id): operand_list=input_ids, implicit_use_of_streaming_input=None, implicit_use_of_streaming_output=output_id, - access_map=access_map) + access_map=access_map, + ) return cmd_inv_with_io_vars def remove_streaming_inputs(self): # TODO: check whether this removes options with streaming input def function_to_apply(el): - if isinstance(el, int) and self.access_map[el].is_stream_input(): - self.access_map.pop(el) - return [] - else: - return [el] + if isinstance(el, int) and self.access_map[el].is_stream_input(): + self.access_map.pop(el) + return [] + else: + return [el] + self.flat_map_anything_non_names(function_to_apply) def remove_streaming_outputs(self): @@ -189,34 +230,84 @@ def function_to_apply(el): return [] else: return [el] - self.flat_map_anything_non_names(function_to_apply) + self.flat_map_anything_non_names(function_to_apply) # for test cases: - def get_operands_with_config_input(self) -> List[Union[ArgStringType, FileNameOrStdDescriptorWithIOInfo]]: - return [x for x in self.operand_list if - isinstance(x, ArgStringType) or - ((isinstance(x, FileNameWithIOInfo) or isinstance(x, StdDescriptorWithIOInfo)) - and x.access.is_config_input())] + def get_operands_with_config_input( + self, + ) -> List[Union[ArgStringType, FileNameOrStdDescriptorWithIOInfo]]: + return [ + x + for x in self.operand_list + if isinstance(x, ArgStringType) + or ( + ( + isinstance(x, FileNameWithIOInfo) + or isinstance(x, StdDescriptorWithIOInfo) + ) + and x.access.is_config_input() + ) + ] def get_operands_with_stream_input(self) -> List[FileNameOrStdDescriptorWithIOInfo]: - return [x for x in self.operand_list if (isinstance(x, FileNameWithIOInfo) or isinstance(x, StdDescriptorWithIOInfo)) - and x.access.is_stream_input()] + return [ + x + for x in self.operand_list + if ( + isinstance(x, FileNameWithIOInfo) + or isinstance(x, StdDescriptorWithIOInfo) + ) + and x.access.is_stream_input() + ] def get_operands_with_other_input(self) -> List[FileNameOrStdDescriptorWithIOInfo]: - return [x for x in self.operand_list if (isinstance(x, FileNameWithIOInfo) or isinstance(x, StdDescriptorWithIOInfo)) - and x.access.is_other_input()] - - def get_operands_with_stream_output(self) -> List[FileNameOrStdDescriptorWithIOInfo]: - return [x for x in self.operand_list if (isinstance(x, FileNameWithIOInfo) or isinstance(x, StdDescriptorWithIOInfo)) - and x.access.is_stream_output()] + return [ + x + for x in self.operand_list + if ( + isinstance(x, FileNameWithIOInfo) + or isinstance(x, StdDescriptorWithIOInfo) + ) + and x.access.is_other_input() + ] + + def get_operands_with_stream_output( + self, + ) -> List[FileNameOrStdDescriptorWithIOInfo]: + return [ + x + for x in self.operand_list + if ( + isinstance(x, FileNameWithIOInfo) + or isinstance(x, StdDescriptorWithIOInfo) + ) + and x.access.is_stream_output() + ] def get_operands_with_other_output(self) -> List[FileNameOrStdDescriptorWithIOInfo]: - return [x for x in self.operand_list if (isinstance(x, FileNameWithIOInfo) or isinstance(x, StdDescriptorWithIOInfo)) - and x.access.is_other_output()] + return [ + x + for x in self.operand_list + if ( + isinstance(x, FileNameWithIOInfo) + or isinstance(x, StdDescriptorWithIOInfo) + ) + and x.access.is_other_output() + ] def get_options_with_other_output(self) -> List[OptionWithIO]: - only_options: List[OptionWithIO] = [x for x in self.flag_option_list if isinstance(x, OptionWithIO)] - return [x for x in only_options if - ((isinstance(x.option_arg, FileNameWithIOInfo) or isinstance(x.option_arg, StdDescriptorWithIOInfo))) - and x.option_arg.access.is_other_output()] + only_options: List[OptionWithIO] = [ + x for x in self.flag_option_list if isinstance(x, OptionWithIO) + ] + return [ + x + for x in only_options + if ( + ( + isinstance(x.option_arg, FileNameWithIOInfo) + or isinstance(x.option_arg, StdDescriptorWithIOInfo) + ) + ) + and x.option_arg.access.is_other_output() + ] diff --git a/pash_annotations/datatypes/operand.py b/pash_annotations/datatypes/operand.py new file mode 100644 index 0000000..9faff88 --- /dev/null +++ b/pash_annotations/datatypes/operand.py @@ -0,0 +1 @@ +from pash_annotations.datatypes.basic_datatypes import ArgStringType diff --git a/pash_annotations/parser/parser.py b/pash_annotations/parser/parser.py index 0a177ad..6b5fcb1 100644 --- a/pash_annotations/parser/parser.py +++ b/pash_annotations/parser/parser.py @@ -1,15 +1,14 @@ from typing import Set, Literal, List, Dict import shlex -from pash_annotations.datatypes.BasicDatatypes import FlagOption, Flag, Option, Operand, FileName, ArgStringType -from pash_annotations.datatypes.CommandInvocationInitial import CommandInvocationInitial +from pash_annotations.datatypes.basic_datatypes import FlagOption, Flag, Option, Operand +from pash_annotations.datatypes.command_invocation_initial import CommandInvocationInitial from pash_annotations.parser.util_parser import get_json_data def parse(command) -> CommandInvocationInitial: - # split all terms (command, flags, options, arguments, operands) - parsed_elements_list : list[str] = shlex.split(command) + parsed_elements_list: list[str] = shlex.split(command) cmd_name: str = parsed_elements_list[0] json_data = get_json_data(cmd_name) @@ -18,7 +17,9 @@ def parse(command) -> CommandInvocationInitial: set_of_all_flags: Set[str] = get_set_of_all_flags(json_data) dict_flag_to_primary_repr: Dict[str, str] = get_dict_flag_to_primary_repr(json_data) set_of_all_options: Set[str] = get_set_of_all_options(json_data) - dict_option_to_primary_repr: Dict[str, str] = get_dict_option_to_primary_repr(json_data) + dict_option_to_primary_repr: Dict[str, str] = get_dict_option_to_primary_repr( + json_data + ) # dict_option_to_class_for_arg: Dict[str, WhichClassForArg] = get_dict_option_to_class_for_arg(json_data) # parse list of command invocation terms @@ -27,18 +28,24 @@ def parse(command) -> CommandInvocationInitial: while i < len(parsed_elements_list): potential_flag_or_option = parsed_elements_list[i] if potential_flag_or_option in set_of_all_flags: - flag_name_as_string: str = dict_flag_to_primary_repr.get(potential_flag_or_option, potential_flag_or_option) + flag_name_as_string: str = dict_flag_to_primary_repr.get( + potential_flag_or_option, potential_flag_or_option + ) flag: Flag = Flag(flag_name_as_string) flag_option_list.append(flag) - elif (potential_flag_or_option in set_of_all_options) and ((i+1) < len(parsed_elements_list)): - option_name_as_string: str = dict_option_to_primary_repr.get(potential_flag_or_option, potential_flag_or_option) - option_arg_as_string: str = parsed_elements_list[i+1] + elif (potential_flag_or_option in set_of_all_options) and ( + (i + 1) < len(parsed_elements_list) + ): + option_name_as_string: str = dict_option_to_primary_repr.get( + potential_flag_or_option, potential_flag_or_option + ) + option_arg_as_string: str = parsed_elements_list[i + 1] option = Option(option_name_as_string, option_arg_as_string) flag_option_list.append(option) i += 1 # since we consumed another term for the argument elif are_all_individually_flags(potential_flag_or_option, set_of_all_flags): for split_el in list(potential_flag_or_option[1:]): - flag: Flag = Flag(f'-{split_el}') + flag: Flag = Flag(f"-{split_el}") flag_option_list.append(flag) else: break # next one is Operand, and we keep these in separate list @@ -65,6 +72,7 @@ def parse(command) -> CommandInvocationInitial: # return Option(option_name_as_string, option_arg) # + def get_set_of_all_flags(json_data) -> Set[str]: return get_set_of_all("flag", json_data) @@ -72,12 +80,16 @@ def get_set_of_all_flags(json_data) -> Set[str]: def get_set_of_all_options(json_data) -> Set[str]: set_of_all: set[str] = set() for list_of_flags_or_options in json_data["option"]: - for flag_or_option in list_of_flags_or_options[:-1]: # off by 1 due to what the argument is + for flag_or_option in list_of_flags_or_options[ + :-1 + ]: # off by 1 due to what the argument is set_of_all.add(flag_or_option) return set_of_all -def get_set_of_all(flag_or_option_str: Literal["flag", "option"], json_data) -> Set[str]: +def get_set_of_all( + flag_or_option_str: Literal["flag", "option"], json_data +) -> Set[str]: set_of_all: set[str] = set() for list_of_flags_or_options in json_data[flag_or_option_str]: for flag_or_option in list_of_flags_or_options: @@ -89,16 +101,22 @@ def get_dict_flag_to_primary_repr(json_data): dict_flag_to_primary_repr: Dict[str, str] = dict() for list_of_equiv_flag_repr in json_data["flag"]: for i in range(1, len(list_of_equiv_flag_repr)): - dict_flag_to_primary_repr[list_of_equiv_flag_repr[i]] = list_of_equiv_flag_repr[0] + dict_flag_to_primary_repr[ + list_of_equiv_flag_repr[i] + ] = list_of_equiv_flag_repr[0] return dict_flag_to_primary_repr + def get_dict_option_to_primary_repr(json_data): dict_option_to_primary_repr: Dict[str, str] = dict() for list_of_equiv_flag_repr in json_data["option"]: - for i in range(1, len(list_of_equiv_flag_repr) - 1): # last one contains type - dict_option_to_primary_repr[list_of_equiv_flag_repr[i]] = list_of_equiv_flag_repr[0] + for i in range(1, len(list_of_equiv_flag_repr) - 1): # last one contains type + dict_option_to_primary_repr[ + list_of_equiv_flag_repr[i] + ] = list_of_equiv_flag_repr[0] return dict_option_to_primary_repr + # moved ot IOInfoGenerator # def get_dict_option_to_class_for_arg(json_data) -> Dict[str, WhichClassForArg]: # dict_option_to_class_for_arg: Dict[str, WhichClassForArg] = dict() @@ -113,10 +131,16 @@ def get_dict_option_to_primary_repr(json_data): # dict_option_to_class_for_arg[option_name] = WhichClassForArg.ARGSTRING # return dict_option_to_class_for_arg + def are_all_individually_flags(potential_flag_or_option, set_of_all_flags): - if potential_flag_or_option[0] != '-' or potential_flag_or_option == '-': + if potential_flag_or_option[0] != "-" or potential_flag_or_option == "-": return False - return all(f'-{split_el}' in set_of_all_flags for split_el in list(potential_flag_or_option[1:])) + return all( + f"-{split_el}" in set_of_all_flags + for split_el in list(potential_flag_or_option[1:]) + ) + + # # class WhichClassForArg(Enum): # FILENAME = 'filename' diff --git a/pash_annotations/parser/tests/test_parser_cat.py b/pash_annotations/parser/tests/test_parser_cat.py index f5544ba..f85f077 100644 --- a/pash_annotations/parser/tests/test_parser_cat.py +++ b/pash_annotations/parser/tests/test_parser_cat.py @@ -1,16 +1,14 @@ -from pash_annotations.util_flag_option import make_arg_simple -from pash_annotations.datatypes.BasicDatatypes import Operand -from pash_annotations.datatypes.CommandInvocationInitial import CommandInvocationInitial +from pash_annotations.util_flag_option import make_arg_simple +from pash_annotations.datatypes.basic_datatypes import Operand +from pash_annotations.datatypes.command_invocation_initial import CommandInvocationInitial from pash_annotations.parser.parser import parse def test_cat_1(): parser_result = parse("cat -b -e in1.txt in2.txt") - args = [make_arg_simple(["-b"]), - make_arg_simple(["-e"])] - operands = [Operand("in1.txt"), - Operand("in2.txt")] + args = [make_arg_simple(["-b"]), make_arg_simple(["-e"])] + operands = [Operand("in1.txt"), Operand("in2.txt")] expected_result = CommandInvocationInitial("cat", args, operands) assert expected_result == parser_result @@ -21,8 +19,7 @@ def test_cat_2(): parser_result = parse("cat -be in1.txt in2.txt") args = [make_arg_simple(["-b"]), make_arg_simple(["-e"])] - operands = [Operand("in1.txt"), - Operand("in2.txt")] + operands = [Operand("in1.txt"), Operand("in2.txt")] expected_result = CommandInvocationInitial("cat", args, operands) assert expected_result == parser_result diff --git a/pash_annotations/parser/tests/test_parser_grep.py b/pash_annotations/parser/tests/test_parser_grep.py index 8fec79d..bb7b0a7 100644 --- a/pash_annotations/parser/tests/test_parser_grep.py +++ b/pash_annotations/parser/tests/test_parser_grep.py @@ -1,15 +1,17 @@ -from pash_annotations.util_flag_option import make_arg_simple -from pash_annotations.datatypes.BasicDatatypes import ArgStringType, Operand -from pash_annotations.datatypes.CommandInvocationInitial import CommandInvocationInitial +from pash_annotations.util_flag_option import make_arg_simple +from pash_annotations.datatypes.basic_datatypes import Operand +from pash_annotations.datatypes.command_invocation_initial import CommandInvocationInitial from pash_annotations.parser.parser import parse def test_grep_1(): parser_result = parse(r"grep -e '^\s*def' -m 3 -n test.py") - args = [make_arg_simple(["-e", r"^\s*def"]), - make_arg_simple(["-m", "3"]), - make_arg_simple(["-n"])] + args = [ + make_arg_simple(["-e", r"^\s*def"]), + make_arg_simple(["-m", "3"]), + make_arg_simple(["-n"]), + ] operands = [Operand("test.py")] expected_result = CommandInvocationInitial("grep", args, operands) diff --git a/pash_annotations/parser/tests/test_parser_sort.py b/pash_annotations/parser/tests/test_parser_sort.py index c152cb5..e73e6ac 100644 --- a/pash_annotations/parser/tests/test_parser_sort.py +++ b/pash_annotations/parser/tests/test_parser_sort.py @@ -1,6 +1,6 @@ -from pash_annotations.util_flag_option import make_arg_simple -from pash_annotations.datatypes.BasicDatatypes import Operand, FileName -from pash_annotations.datatypes.CommandInvocationInitial import CommandInvocationInitial +from pash_annotations.util_flag_option import make_arg_simple +from pash_annotations.datatypes.basic_datatypes import Operand +from pash_annotations.datatypes.command_invocation_initial import CommandInvocationInitial from pash_annotations.parser.parser import parse @@ -8,8 +8,7 @@ def test_sort_1(): parser_result = parse("sort in1.txt in2.txt") args = [] - operands = [Operand("in1.txt"), - Operand("in2.txt")] + operands = [Operand("in1.txt"), Operand("in2.txt")] expected_result = CommandInvocationInitial("sort", args, operands) assert expected_result == parser_result @@ -19,19 +18,18 @@ def test_sort_2(): parser_result = parse("sort -b -o result.txt in1.txt in2.txt") args = [make_arg_simple(["-b"]), make_arg_simple(["-o", "result.txt"])] - operands = [Operand("in1.txt"), - Operand("in2.txt")] + operands = [Operand("in1.txt"), Operand("in2.txt")] expected_result = CommandInvocationInitial("sort", args, operands) assert expected_result == parser_result + def test_sort_3(): # this tests whether options will be mapped to their primary representation parser_result = parse("sort -b --output result.txt in1.txt in2.txt") args = [make_arg_simple(["-b"]), make_arg_simple(["-o", "result.txt"])] - operands = [Operand("in1.txt"), - Operand("in2.txt")] + operands = [Operand("in1.txt"), Operand("in2.txt")] expected_result = CommandInvocationInitial("sort", args, operands) assert expected_result == parser_result diff --git a/pash_annotations/parser/util_parser.py b/pash_annotations/parser/util_parser.py index 4cfd818..35cc374 100644 --- a/pash_annotations/parser/util_parser.py +++ b/pash_annotations/parser/util_parser.py @@ -1,17 +1,22 @@ import json -import os import pkgutil + def get_json_data(cmd_name): - command_json_fn = f'{cmd_name}.json' + command_json_fn = f"{cmd_name}.json" # get man page data for command as dict try: - json_data_bytes = pkgutil.get_data(__name__, 'command_flag_option_info/data/' + command_json_fn) + json_data_bytes = pkgutil.get_data( + __name__, "command_flag_option_info/data/" + command_json_fn + ) except FileNotFoundError: try: - json_data_bytes = pkgutil.get_data(__name__, 'command_flag_option_info/data/_default_data_for_commands.json') + json_data_bytes = pkgutil.get_data( + __name__, + "command_flag_option_info/data/_default_data_for_commands.json", + ) except FileNotFoundError: - raise Exception(f'json-File for default values not found.') + raise Exception(f"json-File for default values not found.") json_data = json.loads(json_data_bytes) return json_data @@ -30,4 +35,3 @@ def get_json_data(cmd_name): # except FileNotFoundError: # raise Exception(f'json-File for default values not found.') # return json_data - diff --git a/pash_annotations/rename_files.sh b/pash_annotations/rename_files.sh new file mode 100755 index 0000000..6abb97e --- /dev/null +++ b/pash_annotations/rename_files.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +directory="/home/castlehoney/repos/research/annotations/pash_annotations/annotation_generation/annotation_generators" + +for filename in $directory/*; do + if [ -f "$filename" ]; then + # Get the file name without the path + base=$(basename "$filename") + + # Convert camelCase to snake_case + snake_case=$(echo "$base" | sed 's/\([a-z]\)\([A-Z]\)/\1_\2/g' | tr '[:upper:]' '[:lower:]') + + # Construct the new file path + new_path="$directory/$snake_case" + + # Rename the file + mv "$filename" "$new_path" + + echo "Renamed: $base -> $snake_case" + fi +done + diff --git a/pash_annotations/util.py b/pash_annotations/util.py index 554b76d..899673a 100644 --- a/pash_annotations/util.py +++ b/pash_annotations/util.py @@ -1,19 +1,24 @@ -from pash_annotations.datatypes.BasicDatatypes import * +from pash_annotations.datatypes.basic_datatypes import * from typing import Any, List, Optional, Union, TypeVar TType = TypeVar("TType") + def foldl(func, acc0, some_list): acc = acc0 for el in some_list: - acc = func(el, acc) + acc = func(el, acc) return acc def list_deduplication(list_to_dedup: List[TType]) -> List[TType]: deduplicated_input_list = list() # side-effectful comprehension - [deduplicated_input_list.append(item) for item in list_to_dedup if item not in deduplicated_input_list] + [ + deduplicated_input_list.append(item) + for item in list_to_dedup + if item not in deduplicated_input_list + ] return deduplicated_input_list @@ -30,27 +35,36 @@ def compute_actual_el_for_output(output_el: str) -> FileNameOrStdDescriptor: else: return FileName(output_el) -def return_empty_flag_option_list_if_none_else_itself(arg: Optional[List[FlagOption]]) -> List[FlagOption]: + +def return_empty_flag_option_list_if_none_else_itself( + arg: Optional[List[FlagOption]], +) -> List[FlagOption]: if arg is None: return [] else: return arg -def return_empty_pos_config_list_if_none_else_itself(arg: Optional[List[OptionArgPosConfigType]]) -> List[OptionArgPosConfigType]: + +def return_empty_pos_config_list_if_none_else_itself( + arg: Optional[List[OptionArgPosConfigType]], +) -> List[OptionArgPosConfigType]: if arg is None: return [] else: return arg -def return_empty_list_if_none_else_itself(arg: Optional[TType]) -> Union[TType, List[Any]]: #list always empty + +def return_empty_list_if_none_else_itself( + arg: Optional[TType], +) -> Union[TType, List[Any]]: # list always empty if arg is None: return [] else: return arg + def return_default_if_none_else_itself(arg: Optional[TType], default: TType) -> TType: if arg is None: return default else: return arg - diff --git a/pash_annotations/util_flag_option.py b/pash_annotations/util_flag_option.py index cc7d2e8..5002dbe 100644 --- a/pash_annotations/util_flag_option.py +++ b/pash_annotations/util_flag_option.py @@ -1,4 +1,5 @@ -from pash_annotations.datatypes.BasicDatatypes import FlagOption, Option, Flag +from pash_annotations.datatypes.basic_datatypes import FlagOption, Option, Flag + def make_arg_simple(arg: list) -> FlagOption: if len(arg) == 1: