From c40b821f9b2ee22cbf39ecf11232ce160d09f17a Mon Sep 17 00:00:00 2001 From: james hadfield Date: Mon, 6 May 2024 12:59:11 +1200 Subject: [PATCH] [export] refactor to always use a list of trees MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Always use a list of trees, even if it's a one-item list. This simplifies internal logic by avoiding the need to check data type on every reference. Requested via code review¹ ¹ --- augur/export_v2.py | 54 ++++++++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 31 deletions(-) diff --git a/augur/export_v2.py b/augur/export_v2.py index fbfd8e5c4..9fad7cd98 100644 --- a/augur/export_v2.py +++ b/augur/export_v2.py @@ -473,7 +473,7 @@ def _get_colorings(): data_json['meta']["colorings"] = colorings -def set_geo_resolutions(data_json, config, command_line_traits, lat_long_mapping, node_attrs): +def set_geo_resolutions(trees, meta_json, config, command_line_traits, lat_long_mapping, node_attrs): """ appropriately combine provided geo resolutions from command line & config files and associate with lat/longs. @@ -535,15 +535,11 @@ def _transfer_geo_data(node): else: warn("Geo resolution \"{}\" had no demes with supplied lat/longs and will be excluded from the exported \"geo_resolutions\".".format(trait_info["key"])) - if isinstance(data_json['tree'], list): - for subtree in data_json['tree']: - _transfer_geo_data(subtree) - else: - _transfer_geo_data(data_json['tree']) - + for subtree in trees: + _transfer_geo_data(subtree) if geo_resolutions: - data_json['meta']["geo_resolutions"] = geo_resolutions + meta_json["geo_resolutions"] = geo_resolutions def set_annotations(data_json, node_data): @@ -743,9 +739,9 @@ def node_to_author_tuple(data): return node_author_info -def set_branch_attrs_on_tree(data_json, branch_attrs): +def set_branch_attrs_on_tree(trees, branch_attrs): """ - Shifts the provided `branch_attrs` onto the (auspice) `data_json`. + Shifts the provided `branch_attrs` onto the provided tree Currently all data is transferred, there is no way for (e.g.) the set of exported labels to be restricted by the user in a config. """ @@ -755,14 +751,11 @@ def _recursively_set_data(node): for child in node.get("children", []): _recursively_set_data(child) - if isinstance(data_json["tree"], list): - for subtree in data_json['tree']: - _recursively_set_data(subtree) - else: - _recursively_set_data(data_json["tree"]) + for subtree in trees: + _recursively_set_data(subtree) -def set_node_attrs_on_tree(data_json, node_attrs, additional_metadata_columns): +def set_node_attrs_on_tree(trees, meta_json, node_attrs, additional_metadata_columns): ''' Assign desired colorings, metadata etc to the `node_attrs` of nodes in the tree @@ -812,10 +805,10 @@ def _transfer_url_accession(node, raw_data): def _transfer_colorings_filters(node, raw_data): trait_keys = set() # order we add to the node_attrs is not important for auspice - if "colorings" in data_json["meta"]: - trait_keys = trait_keys.union([t["key"] for t in data_json["meta"]["colorings"]]) - if "filters" in data_json["meta"]: - trait_keys = trait_keys.union(data_json["meta"]["filters"]) + if "colorings" in meta_json: + trait_keys = trait_keys.union([t["key"] for t in meta_json["colorings"]]) + if "filters" in meta_json: + trait_keys = trait_keys.union(meta_json["filters"]) exclude_list = ["gt", "num_date", "author"] # exclude special cases already taken care of trait_keys = trait_keys.difference(exclude_list) for key in trait_keys: @@ -848,11 +841,8 @@ def _recursively_set_data(node): for child in node.get("children", []): _recursively_set_data(child) - if isinstance(data_json["tree"], list): - for subtree in data_json['tree']: - _recursively_set_data(subtree) - else: - _recursively_set_data(data_json["tree"]) + for subtree in trees: + _recursively_set_data(subtree) def node_data_prop_is_normal_trait(name): # those traits / keys / attrs which are not "special" and can be exported @@ -1235,16 +1225,18 @@ def run(args): sys.exit(2) set_filters(data_json, config) - # set tree structure - trees_json = [convert_tree_to_json_structure(tree.root, node_attrs, node_div(tree, node_attrs)) for tree in trees] - data_json["tree"] = trees_json[0] if len(trees_json)==1 else trees_json - set_node_attrs_on_tree(data_json, node_attrs, additional_metadata_columns) - set_branch_attrs_on_tree(data_json, branch_attrs) + # maintain a list of (sub)trees so that we are always dealing with a list even in the (common) case of a single tree + trees = [convert_tree_to_json_structure(tree.root, node_attrs, node_div(tree, node_attrs)) for tree in trees] + set_node_attrs_on_tree(trees, data_json['meta'], node_attrs, additional_metadata_columns) + set_branch_attrs_on_tree(trees, branch_attrs) - set_geo_resolutions(data_json, config, args.geo_resolutions, read_lat_longs(args.lat_longs), node_attrs) + set_geo_resolutions(trees, data_json['meta'], config, args.geo_resolutions, read_lat_longs(args.lat_longs), node_attrs) set_panels(data_json, config, args.panels) set_data_provenance(data_json, config) + # If we have a single tree then we set this directly as .tree + data_json['tree'] = trees[0] if len(trees)==1 else trees + # pass through any extensions block in the auspice config JSON without any changes / checking if config.get("extensions"): data_json["meta"]["extensions"] = config["extensions"]