diff --git a/schematic/help.py b/schematic/help.py index 65c030ac8..34b3a981b 100644 --- a/schematic/help.py +++ b/schematic/help.py @@ -219,6 +219,13 @@ "output_jsonld": ( "Path to where the generated JSON-LD file needs to be outputted." ), + "output_type": ( + "Output format to export the schema. " + "Options are 'jsonld', 'graph', 'all'. Default is 'jsonld'." + "'jsonld' will output the schema as a JSON-LD file. " + "'graph' will output an nx graph object of the schema as a pickle file." + "'all' will output both the JSON-LD file and the graph object." + ), "data_model_labels": DATA_MODEL_LABELS_HELP, } } diff --git a/schematic/manifest/generator.py b/schematic/manifest/generator.py index 69b86e136..d967930da 100644 --- a/schematic/manifest/generator.py +++ b/schematic/manifest/generator.py @@ -32,6 +32,7 @@ DisplayLabelType, extract_component_validation_rules, ) +from schematic.utils.io_utils import read_pickle from schematic.utils.validate_utils import rule_in_rule_list logger = logging.getLogger(__name__) @@ -1675,11 +1676,15 @@ def create_manifests( title: Optional[str] = None, strict: Optional[bool] = True, use_annotations: Optional[bool] = False, + graph_data_model: Optional[nx.MultiDiGraph] = None, + data_model_graph_pickle: Optional[str] = None, ) -> Union[List[str], List[pd.DataFrame]]: """Create multiple manifests Args: path_to_data_model (str): str path to data model + data_model_graph_pickle (str, optional): path to pickled networkx MultiDiGraph object. Defaults to None. + graph_data_model (nx.MultiDiGraph, optional): An networkx MultiDiGraph object. Defaults to None. data_types (list): a list of data types access_token (str, optional): synapse access token. Required when getting an existing manifest. Defaults to None. dataset_ids (list, optional): a list of dataset ids when generating an existing manifest. Defaults to None. @@ -1737,16 +1742,25 @@ def create_manifests( "Please check your submission and try again." ) - data_model_parser = DataModelParser(path_to_data_model=path_to_data_model) + if graph_data_model is None: + if data_model_graph_pickle: + """What if pickle file does not fit in memory?""" + graph_data_model = read_pickle(data_model_graph_pickle) + else: + data_model_parser = DataModelParser( + path_to_data_model=path_to_data_model + ) - # Parse Model - parsed_data_model = data_model_parser.parse_model() + # Parse Model + parsed_data_model = data_model_parser.parse_model() - # Instantiate DataModelGraph - data_model_grapher = DataModelGraph(parsed_data_model, data_model_labels) + # Instantiate DataModelGraph + data_model_grapher = DataModelGraph( + parsed_data_model, data_model_labels + ) - # Generate graph - graph_data_model = data_model_grapher.graph + # Generate graph + graph_data_model = data_model_grapher.graph # Gather all returned result urls all_results = [] diff --git a/schematic/models/metadata.py b/schematic/models/metadata.py index 1e44c13e0..f7669e835 100644 --- a/schematic/models/metadata.py +++ b/schematic/models/metadata.py @@ -19,6 +19,7 @@ # we shouldn't need to expose Synapse functionality explicitly from schematic.store.synapse import SynapseStorage from schematic.utils.df_utils import load_df +from schematic.utils.io_utils import read_pickle logger = logging.getLogger(__name__) @@ -41,12 +42,14 @@ def __init__( inputMModelLocation: str, inputMModelLocationType: str, data_model_labels: str, + data_model_graph_pickle: Optional[str] = None, ) -> None: """Instantiates a MetadataModel object. Args: inputMModelLocation: local path, uri, synapse entity id (e.g. gs://, syn123, /User/x/…); present location inputMModelLocationType: specifier to indicate where the metadata model resource can be found (e.g. 'local' if file/JSON-LD is on local machine) + data_model_graph_pickle: filepath to a data model graph stored as pickle file. """ # extract extension of 'inputMModelLocation' # ensure that it is necessarily pointing to a '.jsonld' file @@ -59,17 +62,24 @@ def __init__( self.inputMModelLocation = inputMModelLocation self.path_to_json_ld = inputMModelLocation - data_model_parser = DataModelParser(path_to_data_model=self.inputMModelLocation) - # Parse Model - parsed_data_model = data_model_parser.parse_model() + # Use graph, if provided. Otherwise parse data model for graph. + if data_model_graph_pickle: + self.graph_data_model = read_pickle(data_model_graph_pickle) + self.dmge = DataModelGraphExplorer(self.graph_data_model) + else: + data_model_parser = DataModelParser( + path_to_data_model=self.inputMModelLocation + ) + # Parse Model + parsed_data_model = data_model_parser.parse_model() - # Instantiate DataModelGraph - data_model_grapher = DataModelGraph(parsed_data_model, data_model_labels) + # Instantiate DataModelGraph + data_model_grapher = DataModelGraph(parsed_data_model, data_model_labels) - # Generate graph - self.graph_data_model = data_model_grapher.graph + # Generate graph + self.graph_data_model = data_model_grapher.graph - self.dmge = DataModelGraphExplorer(self.graph_data_model) + self.dmge = DataModelGraphExplorer(self.graph_data_model) # check if the type of MModel file is "local" # currently, the application only supports reading from local JSON-LD files diff --git a/schematic/schemas/commands.py b/schematic/schemas/commands.py index 5b143f640..84d4557ec 100644 --- a/schematic/schemas/commands.py +++ b/schematic/schemas/commands.py @@ -3,7 +3,7 @@ import logging import time import re -from typing import get_args, Optional, Any +from typing import get_args, Optional, Any, Literal import click import click_log # type: ignore @@ -17,7 +17,7 @@ from schematic.utils.schema_utils import DisplayLabelType from schematic.utils.cli_utils import query_dict -from schematic.utils.schema_utils import export_schema +from schematic.utils.schema_utils import export_schema, export_graph from schematic.help import schema_commands logger = logging.getLogger("schematic") @@ -59,9 +59,21 @@ def schema() -> None: # use as `schematic model ...` metavar="", help=query_dict(schema_commands, ("schema", "convert", "output_jsonld")), ) +@click.option("--output_path", help="Alias for --output_jsonld") +@click.option( + "--output_type", + "-ot", + type=click.Choice(["jsonld", "graph", "all"], case_sensitive=False), + default="jsonld", + help=query_dict(schema_commands, ("schema", "convert", "output_type")), +) def convert( - schema: Any, data_model_labels: DisplayLabelType, output_jsonld: Optional[str] -) -> None: + schema: Any, + data_model_labels: DisplayLabelType, + output_jsonld: Optional[str], + output_type: Optional[Literal["jsonld", "graph", "all"]], + output_path: Optional[str], +) -> int: """ Running CLI to convert data model specification in CSV format to data model in JSON-LD format. @@ -80,7 +92,7 @@ def convert( data_model_parser = DataModelParser(schema) # Parse Model - logger.info("Parsing data model.") + click.echo("Parsing data model.") parsed_data_model = data_model_parser.parse_model() # Convert parsed model to graph @@ -88,11 +100,11 @@ def convert( data_model_grapher = DataModelGraph(parsed_data_model, data_model_labels) # Generate graphschema - logger.info("Generating data model graph.") + click.echo("Generating data model graph.") graph_data_model = data_model_grapher.graph # Validate generated data model. - logger.info("Validating the data model internally.") + click.echo("Validating the data model internally.") data_model_validator = DataModelValidator(graph=graph_data_model) data_model_errors, data_model_warnings = data_model_validator.run_checks() @@ -114,22 +126,32 @@ def convert( for warning in war: logger.warning(warning) - logger.info("Converting data model to JSON-LD") + if output_path: + output_jsonld = output_path + + if output_jsonld is None: + output_file_no_ext = re.sub("[.](jsonld|csv|pickle)$", "", schema) + else: + output_file_no_ext = re.sub("[.](jsonld|csv|pickle)$", "", output_jsonld) + + click.echo( + "By default, the JSON-LD output will be stored alongside the first " + f"input CSV or JSON-LD file. In this case, it will appear here: '{output_jsonld}'. " + "You can use the `--output_jsonld` argument to specify another file path." + ) + + if output_type in ["graph", "all"]: + output_graph = output_file_no_ext + ".pickle" + click.echo(f"Saving data model graph to '{output_graph}'.") + export_graph(graph_data_model, output_graph) + if output_type == "graph": + return 0 + + click.echo("Converting data model to JSON-LD") jsonld_data_model = convert_graph_to_jsonld(graph=graph_data_model) # output JSON-LD file alongside CSV file by default, get path. - if output_jsonld is None: - if not ".jsonld" in schema: - csv_no_ext = re.sub("[.]csv$", "", schema) - output_jsonld = csv_no_ext + ".jsonld" - else: - output_jsonld = schema - - logger.info( - "By default, the JSON-LD output will be stored alongside the first " - f"input CSV or JSON-LD file. In this case, it will appear here: '{output_jsonld}'. " - "You can use the `--output_jsonld` argument to specify another file path." - ) + output_jsonld = output_file_no_ext + ".jsonld" # saving updated schema.org schema try: @@ -137,13 +159,11 @@ def convert( click.echo( f"The Data Model was created and saved to '{output_jsonld}' location." ) - except: # pylint: disable=bare-except - click.echo( - ( - f"The Data Model could not be created by using '{output_jsonld}' location. " - "Please check your file path again" - ) - ) + except Exception as exc: + raise ValueError( + f"The Data Model could not be created by using '{output_jsonld}' location. " + "Please check your file path again" + ) from exc # get the end time end_time = time.time() @@ -151,3 +171,4 @@ def convert( # get the execution time elapsed_time = time.strftime("%M:%S", time.gmtime(end_time - start_time)) click.echo(f"Execution time: {elapsed_time} (M:S)") + return 0 diff --git a/schematic/utils/io_utils.py b/schematic/utils/io_utils.py index a0bb9d241..0fec4542a 100644 --- a/schematic/utils/io_utils.py +++ b/schematic/utils/io_utils.py @@ -5,6 +5,7 @@ import time import urllib.request from typing import Any +import pickle def load_json(file_path: str) -> Any: @@ -49,6 +50,17 @@ def load_schemaorg() -> Any: return load_json(schema_org_path) +def read_pickle(file_path: str) -> Any: + """Read pickle file and return error if file not a .pkl or .pickle file""" + if not os.path.exists(file_path): + raise FileNotFoundError(f"File not found: {file_path}") + if not file_path.lower().endswith((".pkl", ".pickle")): + raise ValueError(f"File is not a pickle file: {file_path}") + with open(file_path, "rb") as fle: + data = pickle.load(fle) + return data + + def cleanup_temporary_storage( temporary_storage_directory: str, time_delta_seconds: int ) -> None: diff --git a/schematic/utils/schema_utils.py b/schematic/utils/schema_utils.py index bbf34940f..a0b260835 100644 --- a/schematic/utils/schema_utils.py +++ b/schematic/utils/schema_utils.py @@ -7,6 +7,7 @@ import os import string from typing import Literal, Union, Optional +import pickle import inflection @@ -500,3 +501,20 @@ def get_json_schema_log_file_path(data_model_path: str, source_node: str) -> str prefix = prefix_root json_schema_log_file_path = f"{prefix}.{source_node}.schema.json" return json_schema_log_file_path + + +def export_graph(schema: dict, file_path: str) -> None: + """Write object to a pickle file. + Args: + schema, dict: A data model graph to export + file_path, str: File to create + """ + try: + with open(file_path, "wb") as file: + pickle.dump(schema, file) + logger.info(f"The graph was created and saved to '{file_path}'.") + except SystemExit as error: + logger.exception( + f"The graph failed to save to '{file_path}'. Please check your file path again." + ) + raise error diff --git a/schematic/visualization/attributes_explorer.py b/schematic/visualization/attributes_explorer.py index 668ea1374..90f335392 100644 --- a/schematic/visualization/attributes_explorer.py +++ b/schematic/visualization/attributes_explorer.py @@ -6,12 +6,13 @@ import numpy as np import pandas as pd +import networkx as nx # type: ignore from schematic.schemas.data_model_graph import DataModelGraph, DataModelGraphExplorer from schematic.schemas.data_model_json_schema import DataModelJSONSchema -from schematic.schemas.data_model_parser import DataModelParser -from schematic.utils.io_utils import load_json from schematic.utils.schema_utils import DisplayLabelType +from schematic.utils.io_utils import load_json, read_pickle +from schematic.schemas.data_model_parser import DataModelParser logger = logging.getLogger(__name__) @@ -23,17 +24,28 @@ class AttributesExplorer: def __init__( self, path_to_jsonld: str, - data_model_labels: DisplayLabelType, + data_model_labels: DisplayLabelType = "class_label", data_model_grapher: Optional[DataModelGraph] = None, data_model_graph_explorer: Optional[DataModelGraphExplorer] = None, parsed_data_model: Optional[dict] = None, + graph_data_model: Optional[nx.MultiDiGraph] = None, + data_model_graph_pickle: Optional[str] = None, ) -> None: self.path_to_jsonld = path_to_jsonld self.jsonld = load_json(self.path_to_jsonld) + if graph_data_model is not None: + self.graph_data_model = graph_data_model + elif data_model_graph_pickle is not None: + data_model_graph = read_pickle(data_model_graph_pickle) + if not isinstance(data_model_graph, nx.MultiDiGraph): + raise ValueError( + "The data model graph must be a networkx MultiDiGraph object." + ) + self.graph_data_model = data_model_graph # Parse Model - if not parsed_data_model: + if parsed_data_model is None: data_model_parser = DataModelParser( path_to_data_model=self.path_to_jsonld, ) @@ -43,15 +55,14 @@ def __init__( if not data_model_grapher: assert parsed_data_model is not None data_model_grapher = DataModelGraph(parsed_data_model, data_model_labels) - - # Generate graph - self.graph_data_model = data_model_grapher.graph + # Generate graph + self.graph_data_model = data_model_grapher.graph # Instantiate Data Model Graph Explorer - if not data_model_graph_explorer: - self.dmge = DataModelGraphExplorer(self.graph_data_model) - else: + if data_model_graph_explorer is not None: self.dmge = data_model_graph_explorer + else: + self.dmge = DataModelGraphExplorer(self.graph_data_model) # Instantiate Data Model Json Schema self.data_model_js = DataModelJSONSchema( diff --git a/schematic/visualization/tangled_tree.py b/schematic/visualization/tangled_tree.py index 95eac7e75..7b86ae4a1 100644 --- a/schematic/visualization/tangled_tree.py +++ b/schematic/visualization/tangled_tree.py @@ -16,9 +16,9 @@ from networkx.classes.reportviews import EdgeDataView, NodeView # type: ignore from typing_extensions import assert_never -from schematic.schemas.data_model_graph import DataModelGraph, DataModelGraphExplorer from schematic.schemas.data_model_parser import DataModelParser -from schematic.utils.io_utils import load_json +from schematic.schemas.data_model_graph import DataModelGraph, DataModelGraphExplorer +from schematic.utils.io_utils import load_json, read_pickle from schematic.utils.schema_utils import DisplayLabelType from schematic.visualization.attributes_explorer import AttributesExplorer @@ -43,14 +43,15 @@ class Node(TypedDict): children: list[str] -class TangledTree: # pylint: disable=too-many-instance-attributes +class TangledTree: # pylint: disable=too-many-instance-attributes disable=too-many-arguments """Tangled tree class""" def __init__( self, path_to_json_ld: str, figure_type: FigureType, - data_model_labels: DisplayLabelType, + data_model_labels: DisplayLabelType = "class_label", + data_model_graph_pickle: Optional[str] = None, ) -> None: # Load jsonld self.path_to_json_ld = path_to_json_ld @@ -59,19 +60,20 @@ def __init__( # Parse schema name self.schema_name = path.basename(self.path_to_json_ld).split(".model.jsonld")[0] - # Instantiate Data Model Parser - data_model_parser = DataModelParser( - path_to_data_model=self.path_to_json_ld, - ) - - # Parse Model - parsed_data_model = data_model_parser.parse_model() + parsed_data_model = None - # Instantiate DataModelGraph - data_model_grapher = DataModelGraph(parsed_data_model, data_model_labels) + # Instantiate Data Model Parser and generate graph + if data_model_graph_pickle is None: + data_model_parser = DataModelParser( + path_to_data_model=self.path_to_json_ld, + ) + parsed_data_model = data_model_parser.parse_model() + data_model_grapher = DataModelGraph(parsed_data_model, data_model_labels) + self.graph_data_model = data_model_grapher.graph - # Generate graph - self.graph_data_model = data_model_grapher.graph + else: + self.graph_data_model = read_pickle(data_model_graph_pickle) + data_model_grapher = self.graph_data_model # Instantiate Data Model Graph Explorer self.dmge = DataModelGraphExplorer(self.graph_data_model) @@ -91,6 +93,8 @@ def __init__( data_model_grapher=data_model_grapher, data_model_graph_explorer=self.dmge, parsed_data_model=parsed_data_model, + graph_data_model=self.graph_data_model, + data_model_graph_pickle=data_model_graph_pickle, ) # Create output paths. diff --git a/schematic_api/api/openapi/api.yaml b/schematic_api/api/openapi/api.yaml index 8689a8ae2..52c80d7d5 100644 --- a/schematic_api/api/openapi/api.yaml +++ b/schematic_api/api/openapi/api.yaml @@ -110,6 +110,15 @@ paths: class_label, default, use standard class or property label. Do not change from default unless there is a real need, using 'display_label' can have consequences if not used properly. required: false + - in: query + name: graph_url + schema: + type: string + nullable: true + description: Graph Pickle URL + example: >- + url_to_graph_pickle_in_github + required: false operationId: schematic_api.api.routes.get_manifest_route responses: "200": diff --git a/schematic_api/api/routes.py b/schematic_api/api/routes.py index 0cda1f4ac..a7fc78d47 100644 --- a/schematic_api/api/routes.py +++ b/schematic_api/api/routes.py @@ -25,6 +25,7 @@ DisplayLabelType, get_property_label_from_display_name, ) +from schematic.utils.io_utils import read_pickle from schematic.visualization.attributes_explorer import AttributesExplorer from schematic.visualization.tangled_tree import TangledTree @@ -213,6 +214,19 @@ def initalize_metadata_model(schema_url, data_model_labels): return metadata_model +def get_temp_file(url: str, suffix: str) -> str: + """ + Retrieve a file via URL and store it in a temporary location + :param url str: URL to the file + :param suffix str: Suffix of the file + :return: Path to the temporary file + """ + with urllib.request.urlopen(url) as response: + with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file: + shutil.copyfileobj(response, tmp_file) + return tmp_file.name + + def get_temp_jsonld(schema_url): # retrieve a JSON-LD via URL and store it in a temporary location with urllib.request.urlopen(schema_url) as response: @@ -237,7 +251,6 @@ def get_temp_csv(schema_url): ) as tmp_file: shutil.copyfileobj(response, tmp_file) - # get path to temporary csv file return tmp_file.name @@ -246,12 +259,14 @@ def get_temp_model_path(schema_url): # Get model type: model_extension = pathlib.Path(schema_url).suffix.replace(".", "").upper() if model_extension == "CSV": - temp_path = get_temp_csv(schema_url) + temp_path = get_temp_file(schema_url, ".model.csv") elif model_extension == "JSONLD": - temp_path = get_temp_jsonld(schema_url) + temp_path = get_temp_file(schema_url, ".model.jsonld") + elif model_extension == "PICKLE": + temp_path = get_temp_file(schema_url, ".model.pickle") else: raise ValueError( - "Did not provide a valid model type CSV or JSONLD, please check submission and try again." + "Did not provide a valid model type CSV or JSONLD or PICKLE, please check submission and try again." ) return temp_path @@ -267,6 +282,7 @@ def get_manifest_route( strict_validation: bool = True, data_model_labels: DisplayLabelType = "class_label", data_type: str = None, + graph_url: str = None, ): """Get the immediate dependencies that are related to a given source node. Args: @@ -278,6 +294,7 @@ def get_manifest_route( use_annotations: Whether to use existing annotations during manifest generation asset_view: ID of view listing all project data assets. For example, for Synapse this would be the Synapse ID of the fileview listing all data assets for a given project. strict: bool, strictness with which to apply validation rules to google sheets. + graph_url: str, URL to a pickled graph object. Returns: Googlesheet URL (if sheet_url is True), or pandas dataframe (if sheet_url is False). """ @@ -286,6 +303,11 @@ def get_manifest_route( config_handler(asset_view=asset_view) + graph_data_model = None + if graph_url is not None: + graph_path = get_temp_model_path(graph_url) + graph_data_model = read_pickle(graph_path) + all_results = ManifestGenerator.create_manifests( path_to_data_model=schema_url, output_format=output_format, @@ -296,6 +318,7 @@ def get_manifest_route( strict=strict_validation, use_annotations=use_annotations, data_model_labels=data_model_labels, + graph_data_model=graph_data_model, ) # return an excel file if output_format is set to "excel" diff --git a/tests/data/example.display.label.model.pickle b/tests/data/example.display.label.model.pickle new file mode 100644 index 000000000..45f3957ee Binary files /dev/null and b/tests/data/example.display.label.model.pickle differ diff --git a/tests/data/example.model.pickle b/tests/data/example.model.pickle new file mode 100644 index 000000000..116e0007f Binary files /dev/null and b/tests/data/example.model.pickle differ diff --git a/tests/data/htan.model.pickle b/tests/data/htan.model.pickle new file mode 100644 index 000000000..c58efd025 Binary files /dev/null and b/tests/data/htan.model.pickle differ diff --git a/tests/test_cli.py b/tests/test_cli.py index a6e4e8ef7..7eb5285ac 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,4 +1,7 @@ import os + +import pickle +import json from unittest.mock import patch import pytest @@ -45,18 +48,76 @@ def assert_expected_file(self, result, output_path): except: pass - def test_schema_convert_cli(self, runner, helpers): - data_model_csv_path = helpers.get_data_path("example.model.csv") + @pytest.mark.parametrize( + "output_path", + [ + # Test case 1: pickle file passed to output_path + "tests/data/example.model.pickle", + # Test case 2: jsonld file passed to output_path + "tests/data/example.model.jsonld", + ], + ids=["output_path_pickle", "output_path_jsonld"], + ) + @pytest.mark.parametrize( + "output_type", + [ + # Test case 1: jsonld passed to output_type + "jsonld", + # Test case 2: graph passed to output_type + "graph", + # Test case 3: both jsonld and graph are created + "all", + ], + ids=["output_type_jsonld", "output_type_graph", "output_type_all"], + ) + def test_schema_convert_cli(self, runner, output_path, output_type): + model = "tests/data/example.model.csv" + label_type = "class_label" + expected = 0 + + result_one = runner.invoke(schema, ["convert", model]) - output_path = helpers.get_data_path("example.model.jsonld") + assert result_one.exit_code == expected + # check output_path file is created then remove it + assert os.path.exists(output_path) - label_type = "class_label" + result_two = runner.invoke( + schema, ["convert", model, "--output_path", output_path] + ) + + assert result_two.exit_code == expected + # check output_path file is created then remove it + assert os.path.exists(output_path) + + result_three = runner.invoke( + schema, ["convert", model, "--output_type", output_type] + ) + + assert result_three.exit_code == expected + # check output_path file is created then remove it + assert os.path.exists(output_path) + + result_four = runner.invoke( + schema, + [ + "convert", + model, + "--output_type", + output_type, + "--output_jsonld", + output_path, + ], + ) + + assert result_four.exit_code == expected + # check output_path file is created then remove it + assert os.path.exists(output_path) result = runner.invoke( schema, [ "convert", - data_model_csv_path, + model, "--output_jsonld", output_path, "--data_model_labels", @@ -64,13 +125,33 @@ def test_schema_convert_cli(self, runner, helpers): ], ) - assert result.exit_code == 0 + assert result.exit_code == expected + # check output_path file is created then remove it + assert os.path.exists(output_path) - expected_substr = ( - "The Data Model was created and saved to " f"'{output_path}' location." + result_five = runner.invoke( + schema, + [ + "convert", + model, + "--output_jsonld", + "tests/data/example.model.pickle", + "--output_path", + "tests/data/example.model.pickle", + ], ) - assert expected_substr in result.output + assert result_five.exit_code == expected + # check output_path file is created then remove it + assert os.path.exists(output_path) + + result_six = runner.invoke( + schema, ["convert", model, "--output_jsonld", "", "--output_path", ""] + ) + + assert result_six.exit_code == expected + # check output_path file is created then remove it + assert os.path.exists(output_path) # get manifest by default # by default this should download the manifest as a CSV file diff --git a/tests/test_manifest.py b/tests/test_manifest.py index ade80fbe9..cdf5c5e88 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -759,6 +759,7 @@ def test_create_manifests( output_format="google_sheet", use_annotations=False, data_model_labels="class_label", + data_model_graph_pickle=helpers.get_data_path("example.model.pickle"), ) assert all_results == expected_result diff --git a/tests/test_metadata.py b/tests/test_metadata.py index fca1d3db5..9e8b331cc 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -20,6 +20,22 @@ def metadata_model(helpers, data_model_labels): inputMModelLocation=helpers.get_data_path("example.model.jsonld"), data_model_labels=data_model_labels, inputMModelLocationType="local", + data_model_graph_pickle=helpers.get_data_path("example.model.pickle"), + ) + + return metadata_model + + +def metadata_model_display(helpers, data_model_labels): + # Test which labels are displayed to the user. + # Can be either 'display_label' or 'class_label' + metadata_model = MetadataModel( + inputMModelLocation=helpers.get_data_path("example.model.jsonld"), + data_model_labels=data_model_labels, + inputMModelLocationType="local", + data_model_graph_pickle=helpers.get_data_path( + "example.display.label.model.pickle" + ), ) return metadata_model @@ -34,7 +50,11 @@ class TestMetadataModel: ) def test_get_component_requirements(self, helpers, as_graph, data_model_labels): # Instantiate MetadataModel - meta_data_model = metadata_model(helpers, data_model_labels) + # Set up a metadata model with the given data model labels + if data_model_labels == "class_label": + meta_data_model = metadata_model(helpers, data_model_labels) + else: + meta_data_model = metadata_model_display(helpers, data_model_labels) if data_model_labels == "display_label": source_component = "BulkRNAseqAssay" diff --git a/tests/test_utils.py b/tests/test_utils.py index 5883ef4af..056507e60 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -28,6 +28,7 @@ from schematic.schemas.data_model_parser import DataModelParser from schematic.utils import cli_utils, df_utils, general, io_utils, validate_utils from schematic.utils.df_utils import load_df +import pickle from schematic.utils.general import ( calculate_datetime, check_synapse_cache_size, @@ -49,6 +50,10 @@ get_stripped_label, parse_single_set_validation_rules, parse_validation_rules, + extract_component_validation_rules, + check_for_duplicate_components, + get_json_schema_log_file_path, + export_graph, strip_context, ) @@ -462,6 +467,19 @@ def test_load_schema_org(self): actual_graph_keys = len(schema_org_schema["@graph"]) assert expected_graph_keys == actual_graph_keys + def TestReadPickle(self): + # Test that the function can read a pickle file + with tempfile.TemporaryDirectory() as tmpdir: + pickle_file = tmpdir + "/test.pkl" + with open(pickle_file, "wb") as f: + pickle.dump({"foo": "bar"}, f) + assert io_utils.read_pickle(pickle_file) == {"foo": "bar"} + + def test_read_pickle_invalid_file(self): + # Test that the function raises an error when trying to read an invalid file + with pytest.raises(FileNotFoundError): + io_utils.read_pickle("invalid_file.pkl") + class TestDfUtils: @pytest.mark.parametrize( @@ -1004,6 +1022,51 @@ def test_get_label_from_display_name(self, test_dn: str, data_model_labels: str) return return + +class TestExportGraph: + def test_export_graph_success(self, tmp_path): + # Create a temporary file path + file_path = tmp_path / "graph.pickle" + + # Define a sample schema + schema = { + "node1": {"edges": ["node2", "node3"]}, + "node2": {"edges": []}, + "node3": {"edges": []}, + } + + # Call the export_graph function + export_graph(schema, str(file_path)) + + # Check if the file exists + assert file_path.exists() + + # Load the saved schema from the file + with open(file_path, "rb") as file: + saved_schema = pickle.load(file) + + # Check if the saved schema is equal to the original schema + assert saved_schema == schema + + def test_export_graph_failure(self, tmp_path, caplog): + # Create a temporary file path + file_path = tmp_path / "graph.pickle" + + # Define a sample schema + schema = { + "node1": {"edges": ["node2", "node3"]}, + "node2": {"edges": []}, + "node3": {"edges": []}, + } + + # Set the file path to a non-existent directory + invalid_file_path = str(tmp_path / "non_existent_directory" / "graph.pickle") + + # Call the export_graph function with an invalid file path + # and catch the exception + with pytest.raises(FileNotFoundError): + export_graph(schema, invalid_file_path) + @pytest.mark.parametrize( "data_model", list(DATA_MODEL_DICT.keys()), ids=list(DATA_MODEL_DICT.values()) ) diff --git a/tests/test_viz.py b/tests/test_viz.py index b94d79688..fb10cf49d 100644 --- a/tests/test_viz.py +++ b/tests/test_viz.py @@ -13,30 +13,67 @@ logger = logging.getLogger(__name__) -@pytest.fixture -def attributes_explorer(helpers): +@pytest.fixture( + params=[ + # Test case 1: pass jsonld and pickle file + ("example.model.jsonld", "example.model.pickle"), + # Test case 2: only pass jsonld file + ("example.model.jsonld", ""), + # Test case 3: don't provide jsonld or pickle file; should fail + pytest.param(("", ""), marks=pytest.mark.xfail), + # Test case 4: pass only pickle file; should fail + pytest.param(("", "example.model.pickle"), marks=pytest.mark.xfail), + ] +) +def attributes_explorer(request, helpers): # Get JSONLD file path - path_to_jsonld = helpers.get_data_path("example.model.jsonld") + param1, param2 = request.param + path_to_jsonld = helpers.get_data_path(param1) + path_to_graph = helpers.get_data_path(param2) # Initialize TangledTree - attributes_explorer = AttributesExplorer( - path_to_jsonld, - data_model_labels="class_label", - ) + if param2 != "": + attributes_explorer = AttributesExplorer( + path_to_jsonld, + data_model_graph_pickle=path_to_graph, + data_model_labels="class_label", + ) + else: + attributes_explorer = AttributesExplorer( + path_to_jsonld, + data_model_labels="class_label", + ) yield attributes_explorer -@pytest.fixture -def tangled_tree(helpers): +@pytest.fixture( + params=[ + ("example.model.jsonld", "example.model.pickle"), + ("example.model.jsonld", ""), + pytest.param(("", ""), marks=pytest.mark.xfail), + pytest.param(("", "example.model.pickle"), marks=pytest.mark.xfail), + ] +) +def tangled_tree(helpers, request): figure_type = "component" # Get JSONLD file path - path_to_jsonld = helpers.get_data_path("example.model.jsonld") + param1, param2 = request.param + path_to_jsonld = helpers.get_data_path(param1) + path_to_graph = helpers.get_data_path(param2) # Initialize TangledTree - tangled_tree = TangledTree( - path_to_jsonld, figure_type, data_model_labels="class_label" - ) + if param2 == "": + tangled_tree = TangledTree( + path_to_jsonld, figure_type, data_model_labels="class_label" + ) + else: + tangled_tree = TangledTree( + path_to_jsonld, + figure_type, + data_model_labels="class_label", + data_model_graph_pickle=path_to_graph, + ) yield tangled_tree