From 30a963ac645b6eab99bc05b74357d8d38ee41d0d Mon Sep 17 00:00:00 2001 From: Alexander Henoch Date: Thu, 21 Dec 2023 16:01:09 +0100 Subject: [PATCH 001/265] new imports --- anvio/panops.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/anvio/panops.py b/anvio/panops.py index e622bca3de..6f93de4dbd 100644 --- a/anvio/panops.py +++ b/anvio/panops.py @@ -10,7 +10,10 @@ import json import math import copy +import argparse import pandas as pd +import networkx as nx + from itertools import chain # multiprocess is a fork of multiprocessing that uses the dill serializer instead of pickle From 8064f7edce519897ea4935ab65cf5468b4067aff Mon Sep 17 00:00:00 2001 From: Alexander Henoch Date: Thu, 21 Dec 2023 16:02:01 +0100 Subject: [PATCH 002/265] new pangraph class --- anvio/panops.py | 941 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 941 insertions(+) diff --git a/anvio/panops.py b/anvio/panops.py index 6f93de4dbd..4d4854b7a3 100644 --- a/anvio/panops.py +++ b/anvio/panops.py @@ -60,6 +60,7 @@ additional_param_sets_for_sequence_search = {'diamond' : '--masking 0', 'ncbi_blast': ''} + class Pangenome(object): def __init__(self, args=None, run=run, progress=progress): self.args = args @@ -1037,3 +1038,943 @@ def process(self): self.run.quit() + + +class Pangraph(): + + def __init__(self, args, run=run, progress=progress): + self.args = args + self.run = run + self.progress = progress + + A = lambda x: args.__dict__[x] if x in args.__dict__ else None + self.pan_db = A('pan_db') + self.external_genomes = A('external_genomes') + self.genomes_storage_db = A('genomes_storage') + self.max_edge_length_filter = A('max_edge_length_filter') + self.gene_cluster_grouping_threshold = A('gene_cluster_grouping_threshold') + self.debug = anvio.DEBUG + + # this is the dictionary that wil keep all data that is going to be loaded + # from anvi'o artifacts + self.gene_synteny_data_dict = {} + self.genome_coloring = {} + self.genome_size = [] + + self.initial_graph = nx.DiGraph() + self.pangenome_graph = nx.DiGraph() + self.edmonds_graph = nx.DiGraph() + self.ancest = nx.DiGraph() + + self.leaf_path = [] + self.grouping = {} + + self.global_y = 0 + self.global_x = 1 + self.k = 0 + self.genome_gc_occurence = {} + self.ghost = 0 + self.debug = False + + self.position = {} + self.x_list = [] + self.path = {} + self.edges = [] + + + def sanity_check(self): + pass + + + def process(self): + """Primary driver function for the class""" + + # sanity check EVERYTHING + self.sanity_check() + + # populate self.gene_synteny_data_dict + self.get_gene_synteny_data_dict() + + # contextualize paralogs + self.contextualize_paralogs() + + # build graph + self.build_graph() + + # reconnect open leaves in the graph to generate + # a flow network from left to right + self.run_tree_to_flow_network_algorithm() + + # process edges and nodes to extract unique paths + # from the nework + self.calculate_component_paths() + + # run Alex's layout algorithm + self.run_synteny_layout_algorithm() + + # condense gene clusters into groups + self.condense_gene_clusters_into_groups() + + # store network in the database + self.store_network() + + + def get_gene_synteny_data_dict(self): + """A function to roduce a comprehensive data structure from anvi'o artifacts for + downstream analyses. + """ + self.run.warning(None, header="Loading data from database", lc="green") + + pan_db = dbops.PanSuperclass(self.args, r=terminal.Run(verbose=False), p=terminal.Progress(verbose=False)) + + pan_db.init_gene_clusters() + pan_db.init_gene_clusters_functions_summary_dict() + gene_cluster_dict = pan_db.gene_callers_id_to_gene_cluster + + external_genomes = pd.read_csv(self.external_genomes, header=0, sep="\t", names=["name","contigs_db_path"]) + external_genomes.set_index("name", inplace=True) + + for genome, contigs_db_path in external_genomes.iterrows(): + + if genome not in self.genome_coloring.keys(): + self.genome_coloring[genome] = "on" + + if self.genome_coloring[genome] != "off": + args = argparse.Namespace(contigs_db=contigs_db_path.item()) + contigs_db = dbops.ContigsSuperclass(args, r=terminal.Run(verbose=False), p=terminal.Progress(verbose=False)) + + caller_id_cluster = gene_cluster_dict[genome] + caller_id_cluster_df = pd.DataFrame.from_dict(caller_id_cluster, orient="index", columns=["gene_cluster_name"]).rename_axis("gene_caller_id").reset_index() + caller_id_cluster_df["gene_cluster_id"] = "" + caller_id_cluster_df["max_paralog"] = 0 + caller_id_cluster_df["draw"] = self.genome_coloring[genome] + + contigs_db.init_functions() + gene_function_calls_df = pd.DataFrame.from_dict(contigs_db.gene_function_calls_dict, orient="index", columns=["COG20_PATHWAY", "KEGG_Class", "Transfer_RNAs", "KOfam", "KEGG_Module", "COG20_CATEGORY", "COG20_FUNCTION"]).rename_axis("gene_caller_id").reset_index() + + all_gene_calls = caller_id_cluster_df['gene_caller_id'].values.tolist() + genes_in_contigs_df = pd.DataFrame.from_dict(contigs_db.get_sequences_for_gene_callers_ids(all_gene_calls, include_aa_sequences=True, simple_headers=True)[1], orient="index", columns=["contig", "start", "stop", "direction", "partial", "call_type", "source", "version", "sequence", "length", "rev_compd", "aa_sequence", "header"]).rename_axis("gene_caller_id").reset_index() + + joined_contigs_df = caller_id_cluster_df.merge(genes_in_contigs_df, on="gene_caller_id", how="left").merge(gene_function_calls_df, on="gene_caller_id", how="left") + joined_contigs_df.sort_values(["contig", "start", "stop"], axis=0, ascending=True, inplace=True) + joined_contigs_df.set_index(["contig", "gene_caller_id"], inplace=True) + + self.gene_synteny_data_dict[genome] = joined_contigs_df.fillna("None").groupby(level=0).apply(lambda df: df.xs(df.name).to_dict("index")).to_dict() + + self.run.info_single(f"{contigs_db_path.item().split('/')[-1]}") + + self.run.info_single("Done") + + + def contextualize_paralogs(self): + """A function that resolves the graph context of paralogs based on gene synteny information across genomes""" + self.run.warning(None, header="Select paralog context", lc="green") + + unresolved = True + solved = set() + + while unresolved: + + unresolved = False + drop = set() + + for genome in self.gene_synteny_data_dict.keys(): + for contig in self.gene_synteny_data_dict[genome].keys(): + genome_gc_order = [(self.gene_synteny_data_dict[genome][contig][gene_call]["gene_cluster_name"], gene_call) for gene_call in self.gene_synteny_data_dict[genome][contig].keys()] + + for i in range(0, len(genome_gc_order)): + start = (i - self.k) if (i - self.k) >= 0 else 0 + stop = (i + self.k + 1) if (i + self.k + 1) <= len(genome_gc_order) else len(genome_gc_order) + entry = [item[0] for item in genome_gc_order[start:stop]] + gene_call = genome_gc_order[i][1] + name = genome_gc_order[i][0] + + if len(entry) == 1 + (2 * self.k): + gc_k = tuple(entry) + elif start == 0: + gc_k = tuple([""] * (1 + 2 * self.k - len(entry)) + entry) + else: + gc_k = tuple(entry + [""] * (1 + 2 * self.k - len(entry))) + + gc = gc_k[int(len(gc_k) / 2)] + if gc not in solved: + + if gc_k not in self.genome_gc_occurence.keys() and gc_k[::-1] not in self.genome_gc_occurence.keys(): + self.genome_gc_occurence[gc_k] = {genome: 1} + + elif gc_k in self.genome_gc_occurence.keys(): + if genome not in self.genome_gc_occurence[gc_k].keys(): + self.genome_gc_occurence[gc_k][genome] = 1 + else: + self.genome_gc_occurence[gc_k][genome] += 1 + + else: + if genome not in self.genome_gc_occurence[gc_k[::-1]].keys(): + self.genome_gc_occurence[gc_k[::-1]][genome] = 1 + else: + self.genome_gc_occurence[gc_k[::-1]][genome] += 1 + + for gc, genome_gc_frequency in self.genome_gc_occurence.items(): + if max(genome_gc_frequency.values()) > 1: + unresolved = True + drop.add(gc[int(len(gc)/2)]) + + self.run.info_single(f"Iteration #{str(self.k)}: {pp(len(self.genome_gc_occurence))} GCs containing {len(drop)} paralogs") + + if self.k == 0: + self.paralog_dict = copy.deepcopy(self.genome_gc_occurence) + + if unresolved: + keys = list(self.genome_gc_occurence.keys()) + for gc in keys: + if gc[int(len(gc)/2)] in drop: + self.genome_gc_occurence.pop(gc) + + solved = set([gc[int(len(gc)/2)] for gc in self.genome_gc_occurence.keys()]) + self.k += 1 + + for genome in self.gene_synteny_data_dict.keys(): + + for contig in self.gene_synteny_data_dict[genome].keys(): + genome_gc_order = [(self.gene_synteny_data_dict[genome][contig][gene_call]["gene_cluster_name"], gene_call) for gene_call in self.gene_synteny_data_dict[genome][contig].keys()] + + for i in range(0, len(genome_gc_order)): + start = i-self.k if i-self.k >= 0 else 0 + stop = i+self.k+1 if i+self.k+1 <= len(genome_gc_order) else len(genome_gc_order) + entry = [item[0] for item in genome_gc_order[start:stop]] + gene_call = genome_gc_order[i][1] + name = genome_gc_order[i][0] + + if len(entry) == 1+2*self.k: + gc_k = tuple(entry) + elif start == 0: + gc_k = tuple([""] * (1+2*self.k - len(entry)) + entry) + else: + gc_k = tuple(entry + [""] * (1+2*self.k - len(entry))) + + for j in range(0, self.k+1): + + gc_group = gc_k[int(len(gc_k)/2)-j:int(len(gc_k)/2)+j+1] + + if gc_group in self.genome_gc_occurence.keys(): + + self.gene_synteny_data_dict[genome][contig][gene_call]["gene_cluster_id"] = ','.join(gc_group) + self.gene_synteny_data_dict[genome][contig][gene_call]["max_paralog"] = self.paralog_dict[tuple([name])][genome] + break + + elif gc_group[::-1] in self.genome_gc_occurence.keys(): + + self.gene_synteny_data_dict[genome][contig][gene_call]["gene_cluster_id"] = ','.join(gc_group[::-1]) + self.gene_synteny_data_dict[genome][contig][gene_call]["max_paralog"] = self.paralog_dict[tuple([name])][genome] + break + + else: + pass + + self.run.info_single("Done") + + + # ANCHOR Node adding + def add_node_to_graph(self, gene_cluster, name, info): + + if not self.initial_graph.has_node(gene_cluster): + self.initial_graph.add_node( + gene_cluster, + name=name, + pos=(0, 0), + weight=1, + genome=info + ) + + else: + self.initial_graph.nodes[gene_cluster]['weight'] += 1 + self.initial_graph.nodes[gene_cluster]['genome'].update(info) + + + # ANCHOR Edge adding + def add_edge_to_graph(self, gene_cluster_i, gene_cluster_j, info): + + draw = {genome: {y: info[genome][y] for y in info[genome].keys() if y == 'draw'} for genome in info.keys()} + + if not self.initial_graph.has_edge(*(gene_cluster_i, gene_cluster_j)): + self.initial_graph.add_edge( + *(gene_cluster_i, gene_cluster_j), + weight=1, + genome=draw, + bended=[], + direction='R' + ) + + else: + self.initial_graph[gene_cluster_i][gene_cluster_j]['weight'] += 1 + self.initial_graph[gene_cluster_i][gene_cluster_j]['genome'].update(draw) + + # TODO Should reverse genes also be connected in reverse? + def build_graph(self): + """FIXME""" + + self.run.warning(None, header="Building directed gene cluster graph G", lc="green") + + for genome in self.gene_synteny_data_dict.keys(): + self.genome_size.append(genome) + + for contig in self.gene_synteny_data_dict[genome].keys(): + + gene_cluster_kmer = [] + for gene_call in self.gene_synteny_data_dict[genome][contig].keys(): + + gene_cluster_kmer.append((self.gene_synteny_data_dict[genome][contig][gene_call]['gene_cluster_id'], self.gene_synteny_data_dict[genome][contig][gene_call]['gene_cluster_name'], {genome: {'contig':contig, 'gene_call':gene_call, **self.gene_synteny_data_dict[genome][contig][gene_call]}})) + + if len(gene_cluster_kmer) > 1: + gene_cluster_pairs = map(tuple, zip(gene_cluster_kmer, gene_cluster_kmer[1:])) + first_pair = next(gene_cluster_pairs) + + self.add_node_to_graph(first_pair[0][0], first_pair[0][1], first_pair[0][2]) + self.add_node_to_graph(first_pair[1][0], first_pair[1][1], first_pair[1][2]) + self.add_edge_to_graph(first_pair[0][0], first_pair[1][0], first_pair[1][2]) + + for gene_cluster_pair in gene_cluster_pairs: + + self.add_node_to_graph(gene_cluster_pair[1][0], gene_cluster_pair[1][1], gene_cluster_pair[1][2]) + self.add_edge_to_graph(gene_cluster_pair[0][0], gene_cluster_pair[1][0], gene_cluster_pair[1][2]) + + + self.run.info_single(f"Adding {pp(len(self.initial_graph.nodes()))} nodes and {pp(len(self.initial_graph.edges()))} edges to G") + connectivity = nx.is_connected(self.initial_graph.to_undirected()) + self.run.info_single(f"Connectivity is {connectivity}") + + if connectivity == False: + self.pangenome_graph = nx.DiGraph(self.initial_graph.subgraph(max(nx.weakly_connected_components(self.initial_graph), key=len))) + self.run.info_single(f"Keeping Subgraph with {pp(len(self.pangenome_graph.nodes()))} nodes and {pp(len(self.pangenome_graph.edges()))} edges") + + connectivity = nx.is_connected(self.pangenome_graph.to_undirected()) + self.run.info_single(f"Connectivity is {connectivity}") + else: + self.pangenome_graph = nx.DiGraph(self.initial_graph) + + self.run.info_single("Done") + + self.run.warning(None, header="Building maximum branching graph M of G", lc="green") + + self.pangenome_graph.add_node( + 'start', + name='start', + pos=(0, 0), + weight=len(self.genome_size), + genome={genome: {'draw': 'on'} for genome in self.genome_size} + ) + + add_start = [] + for node in self.pangenome_graph.nodes(): + if len(list(self.pangenome_graph.predecessors(node))) == 0: + add_start.append(node) + + for u in add_start: + self.pangenome_graph.add_edge( + *('start', u), + genome={genome: {'draw': 'on'} for genome in self.genome_size}, + weight=len(self.genome_size), + bended=[], + direction='R' + ) + + # ANCHOR Edmonds Algorithm + self.edmonds_graph = nx.algorithms.tree.branchings.maximum_spanning_arborescence(self.pangenome_graph, attr="weight") + nx.set_edge_attributes(self.edmonds_graph, {(i, j): d for i, j, d in self.pangenome_graph.edges(data=True) if (i, j) in self.edmonds_graph.edges()}) + nx.set_node_attributes(self.edmonds_graph, {k: d for k, d in self.pangenome_graph.nodes(data=True) if k in self.edmonds_graph.nodes()}) + + self.run.info_single(f"Removing {pp(len(self.pangenome_graph.edges()) - len(self.edmonds_graph.edges()))} edges from G to create M") + self.run.info_single("Done") + + + def run_tree_to_flow_network_algorithm(self): + + self.run.warning(None, header="Building flow network F from M and G", lc="green") + + edmonds_graph_removed_edges = [(i, j) for i,j in self.pangenome_graph.edges() if (i, j) not in list(self.edmonds_graph.edges())] + edmonds_graph_edges = list(self.pangenome_graph.edges()) + edmonds_graph_added_edges = [] + + edmonds_graph_paths = {} + edmonds_graph_distances = {} + edmonds_graph_nodes = [node for node in self.edmonds_graph.nodes() if node != 'start'] + edmonds_graph_predecessors = {} + + pangenome_graph_successors = {} + pangenome_graph_predecessors = {} + + self.progress.new("Calculating node distances") + for i, node in enumerate(edmonds_graph_nodes): + + node_path = nx.shortest_path(G=self.edmonds_graph, source='start', target=node, weight='weight') + edmonds_graph_paths[node] = node_path + + # WRONG? + # edmonds_graph_distances[node] = nx.path_weight(G=self.edmonds_graph, path=node_path, weight='weight') / len(node_path) + edmonds_graph_distances[node] = nx.path_weight(G=self.edmonds_graph, path=node_path, weight='weight') + edmonds_graph_predecessors[node] = list(self.edmonds_graph.predecessors(node))[0] + + pangenome_graph_successors[node] = list(self.pangenome_graph.successors(node)) + pangenome_graph_predecessors[node] = list(self.pangenome_graph.predecessors(node)) + + self.progress.update(f"{str(i).rjust(len(str(len(edmonds_graph_nodes))), ' ')} / {len(edmonds_graph_nodes)}") + + self.progress.end() + + edmonds_graph_leaves = [x for x in self.edmonds_graph.nodes() if self.edmonds_graph.out_degree(x) == 0] + self.run.info_single(f"Current iteration number of leaves {len(edmonds_graph_leaves)}") + + end_successors = max([(edmonds_graph_distances[node], node) for node in edmonds_graph_nodes if self.edmonds_graph.out_degree(node) > 1])[1] + end_branch = self.edmonds_graph.subgraph(nx.dfs_tree(self.edmonds_graph, source=end_successors, depth_limit=None).nodes()) + end_branch_leaves = [x for x in end_branch.nodes() if end_branch.out_degree(x) == 0] + edmonds_graph_end = max([(edmonds_graph_distances[leaf], leaf) for leaf in end_branch_leaves])[1] + + self.edmonds_graph.add_node( + 'stop', + name='stop', + pos=(0, 0), + weight=len(self.genome_size), + genome={genome: {'draw': 'on'} for genome in self.genome_size} + ) + + self.edmonds_graph.add_edge( + *(edmonds_graph_end, 'stop'), + genome={genome: {'draw': 'on'} for genome in self.genome_size}, + weight=len(self.genome_size), + bended=[], + direction='R' + ) + + edmonds_graph_closed = set([edmonds_graph_end]) + edmonds_graph_open = set() + edmonds_graph_all = set(edmonds_graph_nodes) + + current_node = edmonds_graph_end + + while edmonds_graph_closed.union(edmonds_graph_open) != edmonds_graph_all: + + current_root = edmonds_graph_predecessors[current_node] + + successors_branch_leaves = [] + blocked_branch_nodes = [] + # blocked_branch_paths = [] + + # TODO Decide by connectability of the leaves instead of weight to pick the first leave to connect + for successors in self.edmonds_graph.successors(current_root): + if successors not in edmonds_graph_closed and successors not in edmonds_graph_open and successors != current_node: + successors_branch = self.edmonds_graph.subgraph(nx.dfs_tree(self.edmonds_graph, source=successors, depth_limit=None).nodes()) + successors_branch_leaves += [x for x in successors_branch.nodes() if successors_branch.out_degree(x) == 0] + + elif successors in edmonds_graph_open: + blocked_branch = self.edmonds_graph.subgraph(nx.dfs_tree(self.edmonds_graph, source=successors, depth_limit=None).nodes()) + # blocked_branch_paths += [nx.shortest_path(G=self.edmonds_graph, source=current_root, target=x, weight='weight') for x in blocked_branch.nodes() if blocked_branch.out_degree(x) == 0] + blocked_branch_nodes += blocked_branch + + if not successors_branch_leaves: + current_node = current_root + else: + current_node = max([(edmonds_graph_distances[leaf], leaf) for leaf in successors_branch_leaves])[1] + + connected = False + for node_successor in pangenome_graph_successors[current_node]: + if (current_node, node_successor) in edmonds_graph_removed_edges and node_successor not in blocked_branch_nodes: + pangenome_graph_edge_data = self.pangenome_graph.get_edge_data(current_node, node_successor) + + if node_successor in edmonds_graph_closed: + edmonds_graph_added_edges.append((current_node, node_successor, pangenome_graph_edge_data)) + connected = True + + # elif node_successor in edmonds_graph_open: + # edmonds_graph_added_edges.append((current_node, node_successor, pangenome_graph_edge_data)) + + elif (current_node, node_successor) in edmonds_graph_edges and node_successor in edmonds_graph_closed: + connected = True + + if connected == False and len(list(self.pangenome_graph.successors(current_node))) == 0: + pangenome_graph_edge_data = { + 'genome':{genome: {'draw': 'on'} for genome in self.genome_size}, + 'weight':len(self.genome_size), + 'bended': [], + 'direction': 'R' + } + edmonds_graph_added_edges.append((current_node, 'stop', pangenome_graph_edge_data)) + connected = True + + if connected == True: + edmonds_graph_closed.add(current_node) + for node_predecessor in pangenome_graph_predecessors[current_node]: + if node_predecessor not in blocked_branch_nodes: + if (node_predecessor, current_node) in edmonds_graph_removed_edges: + if node_predecessor in edmonds_graph_open: + pangenome_graph_edge_data = {y:z if y != 'direction' else 'L' for y,z in self.pangenome_graph.get_edge_data(node_predecessor, current_node).items()} + edmonds_graph_added_edges.append((current_node, node_predecessor, pangenome_graph_edge_data)) + + # TODO Blue Part in the picture + # if blocked_branch_paths: + # for blocked_branch_path in blocked_branch_paths: + # for blocked_branch_node in blocked_branch_path: + # for node_successor in pangenome_graph_successors[blocked_branch_node]: + # if (blocked_branch_node, node_successor) in edmonds_graph_removed_edges and node_successor == current_node: + + # pangenome_graph_edge_data = self.pangenome_graph.get_edge_data(blocked_branch_node, node_successor) + # edmonds_graph_added_edges.append((blocked_branch_node, node_successor, pangenome_graph_edge_data)) + + # sub_path = blocked_branch_path[:blocked_branch_path.index(blocked_branch_node)] + # for sub_path_node in sub_path: + # if sub_path_node not in edmonds_graph_closed: + # edmonds_graph_closed.add(sub_path_node) + + # if sub_path_node in edmonds_graph_open: + # edmonds_graph_open.remove(sub_path_node) + + else: + edmonds_graph_open.add(current_node) + + + self.edmonds_graph.add_edges_from(edmonds_graph_added_edges) + edmonds_graph_remaining_leaves = [x for x in self.edmonds_graph.nodes() if self.edmonds_graph.out_degree(x) == 0 and x != edmonds_graph_end] + + self.run.info_single(f"Current iteration number of leaves {len(edmonds_graph_remaining_leaves)}") + self.run.info_single(f"Current iteration acyclic nature {nx.is_directed_acyclic_graph(self.edmonds_graph)}") + + for curr_node in edmonds_graph_open: + predecessor = edmonds_graph_predecessors[curr_node] + edmonds_graph_edge_data = {y:z if y != 'direction' else 'L' for y,z in self.edmonds_graph.get_edge_data(predecessor, curr_node).items()} + self.edmonds_graph.remove_edge(predecessor, curr_node) + self.edmonds_graph.add_edge(curr_node, predecessor, **edmonds_graph_edge_data) + + edmonds_graph_remaining_leaves = [x for x in self.edmonds_graph.nodes() if self.edmonds_graph.out_degree(x) == 0 and x != edmonds_graph_end] + + self.run.info_single(f"Current iteration number of leaves {len(edmonds_graph_remaining_leaves)}") + self.run.info_single(f"Current iteration acyclic nature {nx.is_directed_acyclic_graph(self.edmonds_graph)}") + + for x, generation in enumerate(nx.topological_generations(self.edmonds_graph)): + nodes = {} + self.x_list.append(generation) + for node in generation: + self.position[node] = (x, 0) + node_list = node.split(',') + + if node_list[int(len(node_list)/2)] in nodes.keys(): + + found = False + for contractor in nodes[node_list[int(len(node_list)/2)]]: + + intersection = set(self.edmonds_graph.nodes()[contractor]['genome'].keys()).intersection(set(self.edmonds_graph.nodes()[node]['genome'].keys())) + if not intersection: + + self.edmonds_graph.nodes()[contractor]['weight'] += self.edmonds_graph.nodes()[node]['weight'] + self.edmonds_graph.nodes()[contractor]['genome'].update(self.edmonds_graph.nodes()[node]['genome']) + + nx.contracted_nodes(self.edmonds_graph, contractor, node, copy=False) + found = True + break + + if found == False: + nodes[node_list[int(len(node_list)/2)]] += [node] + + else: + nodes[node_list[int(len(node_list)/2)]] = [node] + + edmonds_graph_edges = list(self.edmonds_graph.edges()) + for i, j in edmonds_graph_edges: + if abs(self.position[j][0] - self.position[i][0]) > self.max_edge_length_filter and self.max_edge_length_filter != -1: + self.edmonds_graph.remove_edge(i, j) + + # TODO Speed up component path finding (multithreading) + def calculate_component_paths(self): + + self.run.warning(None, header="Extracting component paths from F", lc="green") + + self.progress.new("Solving Path") + + edmonds_graph_edges = list(self.edmonds_graph.edges()) + number = len(str(len(edmonds_graph_edges))) + + j = 0 + for i, (node_i, node_j) in enumerate(edmonds_graph_edges): + + self.progress.update(f"{str(i).rjust(number, ' ')} / {len(edmonds_graph_edges)}") + + if nx.has_path(self.edmonds_graph, 'start', node_i) and nx.has_path(self.edmonds_graph, node_j, 'stop'): + + path_leaf = nx.shortest_path(self.edmonds_graph, 'start', node_i, method='bellman-ford') + path_succ = nx.shortest_path(self.edmonds_graph, node_j, 'stop', method='bellman-ford') + full_path = path_leaf + path_succ + + value = nx.path_weight(self.edmonds_graph, full_path, 'weight')/len(full_path) + + self.leaf_path.append((value, full_path)) + + else: + j += 1 + + self.progress.end() + + self.run.info_single(f"Removed {j} unsolvable nodes.") + self.run.info_single("Done.") + + # ANCHOR Longest path calculation + # NOTE changed from https://stackoverflow.com/questions/25589633/how-to-find-the-longest-path-with-python-networkx + def inverse_weight(self, graph, weight='weight'): + copy_graph = graph.copy() + for n, m, w in copy_graph.edges(data='weight'): + copy_graph[n][m][weight] = w * -1 + return copy_graph + + def longest_path(self, graph, s, t, weight='weight'): + i_w_graph = self.inverse_weight(graph, weight) + # changed path = nx.dijkstra_path(i_w_graph, s, t) to the next line + # this solves a problem the negative weights. Using Dijkstra is not + # possible here. Instead bellman ford is the way to go. + # Seems to even work with inf weight! + path = nx.shortest_path(i_w_graph, s, t, method='bellman-ford') + return path + + # ANCHOR Sub path calculation script + def calculate_unknown_edges(self, path, known): + + ancest_nodes = list(self.ancest.nodes()) + + unknown_edges = [] + sub_edges = [] + + for k, o in map(tuple, zip(path, path[1:])): + if not (k, o) in known: + + if k in ancest_nodes and o in ancest_nodes: + if sub_edges: + unknown_edges.append(sub_edges) + sub_edges = [] + + unknown_edges.append([(k, o)]) + + elif o in ancest_nodes: + sub_edges.append((k, o)) + unknown_edges.append(sub_edges) + sub_edges = [] + + else: + sub_edges.append((k, o)) + + else: + if sub_edges: + unknown_edges.append(sub_edges) + sub_edges = [] + + if sub_edges: + unknown_edges.append(sub_edges) + sub_edges = [] + + return(unknown_edges) + + # ANCHOR Main position calculation + # TODO Recalculate Topo Coordinated + # It is possible that the topological x positions have to be recalculated here as + # change of the graph can happen after the first topological sorting by removing / adding + # more edges. + def run_synteny_layout_algorithm(self): + + self.run.warning(None, header="Calculating graph P node positions", lc="green") + + self.ancest.add_edge("start", "stop", weight=1) + known = set([('start', 'stop')]) + + paths = [value for value in sorted(self.leaf_path, key=lambda x: x[0], reverse=True)] + self.progress.new("Running path") + + number = len(str(len(paths))) + for i, (_, path) in enumerate(paths): + self.progress.update(f"{str(i).rjust(number, ' ')} / {len(paths)}") + + try: + unknown_edges = self.calculate_unknown_edges(path, known) + + for sub_edges in unknown_edges: + + known.update(sub_edges) + + node_start = sub_edges[0][0] + node_stop = sub_edges[-1][1] + + sub_path = path[path.index(node_start): path.index(node_stop)+1] + + node_start_x, node_start_y = self.position[node_start] + node_stop_x, node_stop_y = self.position[node_stop] + + for z in range(node_start_x, node_stop_x+1): + if sub_path[z-node_start_x] not in self.x_list[z]: + + sub_path = sub_path[:z-node_start_x] + ["Ghost_" + str(self.ghost)] + sub_path[z-node_start_x:] + + self.x_list[z].append("Ghost_" + str(self.ghost)) + + self.ghost += 1 + + curr_path = [] + for s in sub_path: + if not s.startswith('Ghost_'): + if len(curr_path) > 1: + curr_path.append(s) + self.edges.append(curr_path) + curr_path = [s] + else: + curr_path.append(s) + + sub_path = sub_path[1:-1] + + next_y = self.y_shifting(sub_path, node_start_x, node_stop_x, node_start_y, node_stop_y) + + self.add_new_edges(sub_path, next_y, node_start, node_stop, node_start_x, node_stop_x) + + except Exception as error: + self.debug = True + self.run.info_single(f'Error: Message: {str(error)}') + self.run.info_single('Starting debug mode') + break + + self.progress.end() + + if self.debug == False: + self.run.info_single('Sanity check: No errors reported') + self.run.info_single('Done') + + nx.set_edge_attributes(self.ancest, {(i, j): d for i, j, d in self.edmonds_graph.edges(data=True)}) + + for edge in self.edges: + self.ancest.add_edge(edge[0], edge[-1], **self.edmonds_graph[edge[0]][edge[-1]]) + self.ancest[edge[0]][edge[-1]]['bended'] = [self.position[p] for p in edge[1:-1]] + self.ancest.remove_nodes_from(edge[1:-1]) + + nx.set_node_attributes(self.ancest, {k: d for k, d in self.edmonds_graph.nodes(data=True)}) + + for node in self.ancest.nodes(): + self.ancest.nodes[node]['pos'] = self.position[node] + + self.ancest.remove_edge('start', 'stop') + + self.run.info_single(f"Final graph {len(self.ancest.nodes())} nodes and {len(self.ancest.edges())} edges.") + + # ANCHOR Gene Cluster grouping + # TODO Degree is calculated by pangenome graph not edmonds graph probably not a bad idea due + # to easier adding of additional edges. + def condense_gene_clusters_into_groups(self): + + self.run.warning(None, header="Grouping GCs to gene cluster groups (GCGs)", lc="green") + + if self.gene_cluster_grouping_threshold == -1: + self.run.info_single("Setting algorithm to 'no grouping'") + else: + self.run.info_single(f"Setting algorithm to 'Grouping single connected chains size > {str(self.gene_cluster_grouping_threshold)}'") + + dfs_list = list(nx.dfs_edges(self.ancest, source='start')) + + group = 0 + degree = dict(self.ancest.degree()) + groups = {} + groups_rev = {} + + for node_v, node_w in dfs_list: + + if node_v != 'start' and node_w != 'stop' and degree[node_v] == 2 and degree[node_w] == 2 and set(self.ancest.nodes[node_v]['genome'].keys()) == set(self.ancest.nodes[node_w]['genome'].keys()): + + if node_v not in groups_rev.keys(): + group_name = 'GCG_' + str(group).zfill(8) + groups[group_name] = [node_v, node_w] + groups_rev[node_v] = group_name + groups_rev[node_w] = group_name + group += 1 + + else: + group_name = groups_rev[node_v] + groups[group_name] += [node_w] + groups_rev[node_w] = group_name + + for label, condense_nodes in groups.items(): + + if len(condense_nodes) >= self.gene_cluster_grouping_threshold and self.gene_cluster_grouping_threshold != -1: + self.grouping[label] = condense_nodes + + + self.run.info_single(f"{str(len(list(self.edmonds_graph.nodes())) - group)} GCs and {str(group)} GCGs") + self.run.info_single("Done") + + # ANCHOR y-shifting script + # TODO Wrong hierarchy bug: + # Sometimes very small branches are included on top of way longer and higher weighted ones I'm currently + # not completely sure why this occures and have to solve it. + def y_shifting(self, sub_path, node_start_x, node_stop_x, node_start_y, node_stop_y): + current_start_x = node_start_x + 1 + current_stop_x = node_stop_x - 1 + current_path_length = (current_stop_x - current_start_x) - 1 + current_y = max(node_start_y, node_stop_y) + 1 + + next_y = -1 + + increase_layer = [] + + while current_y <= self.global_y + 1: + + node = '' + current_layer_start_x = self.global_x + current_layer_stop_x = 0 + layer_branches = [] + sub_branch = [] + layer_size = 0 + + z = current_start_x + while z <= current_stop_x: + for check in self.x_list[z]: + if check not in sub_path and self.position[check] == (z, current_y): + node = check + + if not sub_branch or node not in sub_branch: + + sub_branch = self.path[node] + + if (current_y, sub_branch) not in layer_branches: + layer_branches.append((current_y, sub_branch)) + + sub_branch_start_x, _ = self.position[sub_branch[0]] + sub_branch_stop_x, _ = self.position[sub_branch[-1]] + + layer_size += sub_branch_stop_x - sub_branch_start_x + 1 + + current_layer_start_x = sub_branch_start_x if sub_branch_start_x < current_layer_start_x else current_layer_start_x + current_layer_stop_x = sub_branch_stop_x if sub_branch_stop_x > current_layer_stop_x else current_layer_stop_x + + current_start_x = sub_branch_start_x if sub_branch_start_x < current_start_x else current_start_x + current_stop_x = sub_branch_stop_x if sub_branch_stop_x > current_stop_x else current_stop_x + + z = current_layer_stop_x + + z += 1 + + if layer_size > current_path_length: + + if next_y == -1: + next_y = current_y + + if next_y != -1: + increase_layer.extend(layer_branches) + + if not sub_branch: + if current_y != max(node_start_y, node_stop_y): + break + else: + current_y += 1 + else: + current_y += 1 + + for _, layer_branch in sorted(increase_layer, reverse=True): + for node_branch in layer_branch: + node_branch_x = self.position[node_branch][0] + node_branch_y = self.position[node_branch][1] + + self.position[node_branch] = (node_branch_x, node_branch_y + 1) + + if next_y == -1: + next_y = current_y + + self.global_y = current_y if current_y > self.global_y else self.global_y + self.global_x = self.position["stop"][0] if self.position["stop"][0] > self.global_x else self.global_x + + return(next_y) + + # ANCHOR adding new nodes and edges + # TODO take a closer look at the len(sub_path) == 0 situation for now added if sub_path + # keep in mind this can become a error later + def add_new_edges(self, sub_path, next_y, node_start, node_stop, node_start_x, node_stop_x): + if sub_path: + if sub_path[0].startswith('Ghost_'): + self.ancest.add_edge(node_start, sub_path[0], weight=0) + + else: + self.ancest.add_edge(node_start, sub_path[0], weight=1) + + for i, new in enumerate(sub_path, 1): + self.path[new] = sub_path + + if new == sub_path[-1]: + self.position[new] = (node_stop_x - 1, next_y) + else: + self.position[new] = (node_start_x + i, next_y) + + if new != sub_path[-1]: + if sub_path[i].startswith('Ghost_') or new.startswith('Ghost_') or (sub_path[i].startswith('Ghost_') and new.startswith('Ghost_')): + self.ancest.add_edge(new, sub_path[i], weight=0) + else: + self.ancest.add_edge(new, sub_path[i], weight=1) + + if sub_path[-1].startswith('Ghost_'): + self.ancest.add_edge(sub_path[-1], node_stop, weight=0) + else: + self.ancest.add_edge(sub_path[-1], node_stop, weight=1) + + # ANCHOR Converting network to JSON data + # TODO rework that section for better debugging and add more features as an example fuse start and top + # together or remove both so the graph becomes a REAL circle. Aside from that there is a bug in the remove + # edges section for (k,o) in circular edges and for (k,o) in pangenome edges. Change and test while reworking. + def get_json_dict_for_graph(self): + self.run.warning(None, header="Exporting network to JSON", lc="green") + + jsondata = {} + + instances_pangenome_graph = 0 + for node, attr in self.pangenome_graph.nodes(data=True): + instances_pangenome_graph += len(list(attr['genome'].keys())) + + instances_ancest_graph = 0 + for node, attr in self.ancest.nodes(data=True): + instances_ancest_graph += len(list(attr['genome'].keys())) + + self.run.info_single(f"Total fraction of recovered genecall information {round(instances_ancest_graph/instances_pangenome_graph, 3)}%") + + jsondata["infos"] = {'meta': {'global_x': self.global_x, + 'global_y': self.global_y}, + 'genomes': self.genome_size, + 'original': {'nodes': len(self.pangenome_graph.nodes()), + 'edges': len(self.pangenome_graph.edges()), + 'instances': instances_pangenome_graph}, + 'visualization': {'nodes': len(self.ancest.nodes()), + 'edges': len(self.ancest.edges()), + 'instances': instances_ancest_graph}, + 'data': list(self.ancest.graph.items()), + 'directed': self.ancest.is_directed(), + 'groups': self.grouping} + + jsondata['infos']['grouped'] = {'nodes': len(self.ancest.nodes()), 'edges': len(self.ancest.edges())} + + jsondata["elements"] = {"nodes": {}, "edges": {}} + + for i, j in self.ancest.nodes(data=True): + jsondata["elements"]["nodes"][str(i)] = { + "name": j["name"], + "weight": j["weight"], + "genome": j["genome"], + "position": { + 'x': j['pos'][0], + 'y': j['pos'][1] + }} + + for l, (k, o, m) in enumerate(self.ancest.edges(data=True)): + jsondata["elements"]["edges"]['E_' + str(l).zfill(8)] = { + "source": k, + "target": o, + "genome": m["genome"], + "weight": m["weight"], + "direction": m["direction"], + "bended": [{'x': x, 'y': y} for x, y in m["bended"]] if len(m["bended"]) != 0 else "" + } + + + self.run.info_single("Done.") + + return(jsondata) + + def store_network(self): + # FIXME: This will store things in the pan-db: + print(json.dumps(self.get_json_dict_for_graph())) + + def get_genome_gc_fused(self): + return(self.genome_gc_fused) From f3198df056c41b3d6b0d0dad794cfff9d29c551c Mon Sep 17 00:00:00 2001 From: Alexander Henoch Date: Thu, 21 Dec 2023 16:02:28 +0100 Subject: [PATCH 003/265] draft page for pangraph --- anvio/data/interactive/pangraph.html | 709 +++++++++++++++++++++++++++ 1 file changed, 709 insertions(+) create mode 100644 anvio/data/interactive/pangraph.html diff --git a/anvio/data/interactive/pangraph.html b/anvio/data/interactive/pangraph.html new file mode 100644 index 0000000000..387ae29b92 --- /dev/null +++ b/anvio/data/interactive/pangraph.html @@ -0,0 +1,709 @@ + + + + + + + Gene Cluster Network + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + +
+ + +
+
+ +
+ +
+ +
+
+ + + + + + + + + +
+ + + +
+ + + From f25ac58d1bcd947ef3e26d3654bb5affe767e4b2 Mon Sep 17 00:00:00 2001 From: Alexander Henoch Date: Thu, 21 Dec 2023 16:02:57 +0100 Subject: [PATCH 004/265] draft js pangraph --- anvio/data/interactive/js/pangraph.js | 1939 +++++++++++++++++++++++++ 1 file changed, 1939 insertions(+) create mode 100644 anvio/data/interactive/js/pangraph.js diff --git a/anvio/data/interactive/js/pangraph.js b/anvio/data/interactive/js/pangraph.js new file mode 100644 index 0000000000..ed90cbc3c1 --- /dev/null +++ b/anvio/data/interactive/js/pangraph.js @@ -0,0 +1,1939 @@ +//ANCHOR - Constants +const mapAS = { + 'A': 'A', 'R': 'R', + 'N': 'N', 'D': 'D', + 'C': 'C', 'Q': 'Q', + 'E': 'E', 'G': 'G', + 'H': 'H', 'I': 'I', + 'L': 'L', 'K': 'K', + 'M': 'M', 'F': 'F', + 'P': 'P', 'S': 'S', + 'T': 'T', 'W': 'W', + 'Y': 'Y', 'V': 'V', + '-': '-' +}; +const notation = ["COG20_PATHWAY", "KEGG_Class", "Transfer_RNAs", "KOfam", "KEGG_Module", "COG20_CATEGORY", "COG20_FUNCTION"]; + +for (var source of notation) { + + $('#functiondiv').append( + $('
') + ) + ).append( + $('
maxCount) { + maxEl = el; + maxCount = modeMap[el]; + } else if (modeMap[el] == maxCount && maxCount == 1) { + maxEl = el; + maxCount = modeMap[el]; + } else if (modeMap[el] == maxCount) { + maxEl += ' & ' + el; + maxCount = modeMap[el]; + } + } + return [maxEl, maxCount/i]; +} + +//ANCHOR - Fetch GC consensus functions +function fetchinfo(info) { + var d = new Object(); + for (var source of notation) { + + var id = [] + var func = [] + + if (info != '') { + for (var element of Object.keys(info)) { + var entry = info[element][source] + + if (entry === 'None' || entry === undefined) { + entry = ['None', 'None', 'None'] + } + + id.push(entry[0]) + func.push(entry[1]) + } + + var [id_maxEl, id_maxCount] = modeString(id); + var [func_maxEl, func_maxCount] = modeString(func); + } else { + + var id_maxEl = '' + var id_maxCount = '' + var func_maxEl = '' + var func_maxCount = '' + } + d[source] = [id_maxEl, id_maxCount, func_maxEl, func_maxCount] + } + + return(d); +} + +//ANCHOR - Append GC consensus functions +async function appendinfo(body, suffix, drop, position, genomes, group, info) { + + body.append( + $('
').append( + $('
').append( + $('Name') + ) + ).append( + $('
').append( + drop + ) + ).append( + $('
').append( + $('Group') + ) + ).append( + $('
').append( + group + ) + ).append( + $('
').append( + $('Genomes') + ) + ).append( + $('
').append( + genomes + ) + ).append( + $('
').append( + $('Position') + ) + ).append( + $('
').append( + position + ) + ) + ) + + var node = $('
').append( + $('
').append( + $('Source') + ) + ).append( + $('
').append( + $('
').append( + $('
').append( + $('Accession') + ) + ).append( + $('
').append( + $('Function') + ) + ) + ) + ).append( + $('
').append( + $('Value') + ) + ) + + var d = fetchinfo(info) + for (var [source, value] of Object.entries(d)) { + + var [id_maxEl, id_maxCount, func_maxEl, func_maxCount] = value + + node.append( + $('
').append( + source + ) + ) + + var acc = $('
') + var list_id_maxEl = id_maxEl.split('!!!') + var list_func_maxEl = func_maxEl.split('!!!') + + for (var i = 0; i < list_id_maxEl.length; i++) { + acc.append( + $('
').append( + list_id_maxEl[i] + ) + ) + acc.append( + $('
').append( + list_func_maxEl[i] + ) + ) + } + + node.append( + $('
', { + class: 'col-9' + }).append(acc) + ) + + node.append( + $('
', { + class: 'col-1 text-end' + }).append(func_maxCount === '' ? '' : func_maxCount.toFixed(3)) + ) + + } + + body.append( + $('
') + ).append( + node + ) + +} + +//ANCHOR - Fetch genecall functions +function fetchgenome(info) { + + var d = new Object(); + for (var source of notation) { + + if (info[source] == 'None') { + var func_id = 'None' + var func = 'None' + var evalue = 0 + } else { + + var func_id = info[source][0] + var func = info[source][1] + var evalue = info[source][2] + } + + d[source] = [func_id, func, evalue] + } + return(d); +} + +//ANCHOR - Append genecall functions +async function appendgenome(bodygenome, name, call, genome, length, direction, paralog, partial, info) { + + bodygenome.append( + $('
').append( + $('
').append( + $('Name') + ) + ).append( + $('
').append( + name + ) + ).append( + $('
').append( + $('Genecall') + ) + ).append( + $('
').append( + call + ) + ).append( + $('
').append( + $('Genome') + ) + ).append( + $('
').append( + genome + ) + ).append( + $('
').append( + $('Length') + ) + ).append( + $('
').append( + length + ) + ).append( + $('
').append( + $('Direction') + ) + ).append( + $('
').append( + direction + ) + ).append( + $('
').append( + $('Paralog') + ) + ).append( + $('
').append( + paralog + ) + ).append( + $('
').append( + $('Partial') + ) + ).append( + $('
').append( + partial + ) + ) + ) + + var node = $('
').append( + $('
').append( + $('Source') + ) + ).append( + $('
').append( + $('
').append( + $('
').append( + $('Accession') + ) + ).append( + $('
').append( + $('Function') + ) + ) + ) + ).append( + $('
').append( + $('e-value') + ) + ) + + var d = fetchgenome(info) + for (var [source, value] of Object.entries(d)) { + + var [func_id, func, evalue] = value + + node.append( + $('
').append( + source + ) + ) + + var acc = $('
') + var list_func_id = func_id.split('!!!') + var list_func = func.split('!!!') + + for (var i = 0; i < list_func_id.length; i++) { + acc.append( + $('
').append( + list_func_id[i] + ) + ) + acc.append( + $('
').append( + list_func[i] + ) + ) + } + + node.append( + $('
', { + class: 'col-9' + }).append(acc) + ) + + node.append( + $('
', { + class: 'col-1 text-end' + }).append(evalue === '' ? '' : evalue.toFixed(3)) + ) + + } + + bodygenome.append( + $('
') + ).append( + node + ) + +} + +//ANCHOR - Fetch GC alignment +function fetchalignment(alignment) { + + var d = $.ajax({ + url: "/alignment", + type: "POST", + data: JSON.stringify(alignment), + contentType: "application/json", + dataType: "json" + }) + + return d + +}; + +//ANCHOR - Append GC alignment +async function appendalignment(body, alignment) { + + if (Object.keys(alignment).length !== 0) { + + var divid = $('
') + divid.append( + $('
').append( + $('').append('Genome') + ) + ).append( + $('
').append( + $('').append('Genecall') + ) + ) + ) + + var divalign = $('
') + + var d = await fetchalignment(alignment) + + var size = Object.entries(d)[0][1][1].length; + var numbers = ''; + var i = 1 + + while (i < size) { + var j = i.toString().length + if (i+50 <= size){ + var k = 50; + } else { + var k = 50 - ((i+50) - size) + } + if (j > k){ + numbers += '-'.repeat(k) + } else { + numbers += String(i).padEnd(k, '-'); + } + i += k + }; + numbers += '-'; + + divalign.append( + $('
').append( + $('
').append( + $('').append( + numbers + ) + ) + ) + ) + + for (var [genome, value] of Object.entries(d)) { + var colored = value[1].replace(/A|R|N|D|C|Q|E|G|H|I|L|K|M|F|P|S|T|W|Y|V|-/gi, function(matched){return mapAS[matched];}); + + divid.append( + $('
').append( + genome + ) + ).append( + $('
').append( + $('').append( + value[0] + ) + ) + ) + ) + + divalign.append( + $('
').append( + $('
').append( + $('').append( + colored + ) + ) + ) + ) + } + + + var div = $('
') + div.append( + divid + ).append( + divalign + ) + + body.append(div) + + } + +} + +//ANCHOR - Color node and add/remove from bin +function marknode(e, data, binid, bins){ + + var bincolor = document.getElementById(binid + 'color').value + var id = e.id; + var current = '' + + var binkeys = Object.keys(bins) + for (var key of binkeys) { + if (bins[key].includes(id)) { + current = key + break; + } + } + + if (e.getAttribute('class') == 'group') { + + var node_color = $('#groups')[0].value; + var genome_size = data["infos"]["genomes"].length; + + var group = data['infos']['groups'][id] + var node_name = group[0] + + var node = data['elements']['nodes'][node_name]; + var genome = Object.keys(node['genome']).length; + + + } else if (e.getAttribute('class') == 'node') { + + var node_color = $('#nodes')[0].value; + var genome_size = data["infos"]["genomes"].length; + + var node = data['elements']['nodes'][id]; + var genome = Object.keys(node['genome']).length; + + } + + if (current === binid) { + + e.setAttribute("fill", lighter_color('#ffffff', node_color, genome / genome_size)) + bins[binid] = bins[binid].filter(item => item !== id) + $('#' + binid + 'value')[0].value = bins[binid].length + + } else if (current === '') { + + e.setAttribute("fill", lighter_color('#ffffff', bincolor, genome / genome_size)) + bins[binid].push(id) + $('#' + binid + 'value')[0].value = bins[binid].length + + } else { + + e.setAttribute("fill", lighter_color('#ffffff', bincolor, genome / genome_size)) + bins[current] = bins[current].filter(item => item !== id) + bins[binid].push(id) + $('#' + binid + 'value')[0].value = bins[binid].length + $('#' + current + 'value')[0].value = bins[current].length + + } + + return bins +} + +// //ANCHOR - Information for the GC +function nodeinfo(e, data) { + var id = e.id; + var element = document.getElementById(id); + + if (element.getAttribute('class') == 'group') { + + var drop = $('') + + drop.append( + $('').append( + $('').append('Choose GC') + ) + ) + + var dropitem = $('') + var grouplist = data['infos']['groups'][id] + + for (var listitem of grouplist) { + dropitem.append( + $('
  • ').append( + $('' + data['elements']['nodes'][listitem]['name'] + '') + ) + ) + } + drop.append(dropitem) + + var position = '' + var genomes = '' + var group = id + var info = '' + var alignment = {} + + } else { + + var drop = $('
    ').append( + data['elements']['nodes'][id]['name'] + ) + var position = data['elements']['nodes'][id]['position']['x'] + "/" + (data["infos"]["meta"]["global_x"] - 1); + var genomes = Object.keys(data['elements']['nodes'][id]['genome']).length + "/" + (data['infos']['genomes'].length); + var group = 'None' + var info = data['elements']['nodes'][id]['genome'] + + var alignment = {} + + if (id != 'start' && id != 'stop') { + for (var genome of Object.keys(data['elements']['nodes'][id]['genome'])) { + alignment[genome] = [data['elements']['nodes'][id]['genome'][genome]['gene_call'], data['elements']['nodes'][id]['name']] + } + } + } + + $('#InfoModalBody').empty() + var bodyinfo = $('
    ') + $('#InfoModalBody').append( + bodyinfo + ) + appendinfo(bodyinfo, '', drop, position, genomes, group, info) + + $('#AlignmentModalBody').empty() + var bodyalign = $('
    ') + $('#AlignmentModalBody').append( + bodyalign + ) + appendalignment(bodyalign, alignment) + + $('#GenomeModalBody').empty() + + $('#InfoModal').modal('show'); +} + +//ANCHOR - Degree to rad calculation +function deg2rad(degrees) +{ + return degrees * Math.PI/180; +} + +//ANCHOR - General download function +function downloadBlob(blob, name) { + + var blobUrl = URL.createObjectURL(blob); + var link = document.createElement("a"); + + link.href = blobUrl; + link.download = name; + + document.body.appendChild(link); + + link.dispatchEvent( + new MouseEvent('click', { + bubbles: true, + cancelable: true, + view: window + }) + ); + + document.body.removeChild(link); +} + +// NOTE - From https://coderwall.com/p/z8uxzw/javascript-color-blender +function int_to_hex(num) { + var hex = Math.round(num).toString(16); + if (hex.length == 1) + hex = '0' + hex; + return hex; +} + +//ANCHOR - Function to mix two colors +function lighter_color(color1, color2, percentage, threshold=0.25) { + + percentage = threshold + (1 - threshold) * percentage + + color1 = color1.substring(1); + color2 = color2.substring(1); + + color1 = [parseInt(color1[0] + color1[1], 16), parseInt(color1[2] + color1[3], 16), parseInt(color1[4] + color1[5], 16)]; + color2 = [parseInt(color2[0] + color2[1], 16), parseInt(color2[2] + color2[3], 16), parseInt(color2[4] + color2[5], 16)]; + + var color3 = [ + (1 - percentage) * color1[0] + percentage * color2[0], + (1 - percentage) * color1[1] + percentage * color2[1], + (1 - percentage) * color1[2] + percentage * color2[2] + ]; + + color3 = '#' + int_to_hex(color3[0]) + int_to_hex(color3[1]) + int_to_hex(color3[2]); + + return color3 +} + +//ANCHOR - Transformation function +function transform(x, y, node_distance_y, radius, theta) { + var circle_x = ((y * node_distance_y) + radius) * Math.sin(deg2rad(theta * x)) + var circle_y = ((y * node_distance_y) + radius) * Math.cos(deg2rad(theta * x)) + return [circle_x, circle_y] +} + +function pickcolor (edgecoloring, genomes) { + + var array = [] + + for (var name of genomes) { + array.push(edgecoloring[name]) + var sortedArray = array.sort(function(a, b) { + return a[0] - b[0]; + }); + } + + return sortedArray[0][1] +} + +//ANCHOR - All in one SVG creation function +async function generate_svg(body, data) { + + var edgecoloring = {} + + $("#genomecolors :input[type='color']").each((index, element) => { + edgecoloring[element.id] = [index, element.value] + }) + + var node_size = parseInt($('#size')[0].value); + var node_thickness = parseInt($('#circ')[0].value); + var node_distance_x = parseInt($('#distx')[0].value) + node_size + node_thickness; + var node_distance_y = parseInt($('#disty')[0].value) + node_size; + var global_x = data["infos"]["meta"]["global_x"]; + var genome_size = data["infos"]["genomes"].length; + var global_y = data["infos"]["meta"]["global_y"]; + var group_color = $('#groups')[0].value; + var node_color = $('#nodes')[0].value; + var startstop_color = $('#startstop')[0].value; + var theta = 360 / global_x + var radius = 0.5 * (node_distance_x / Math.sin(deg2rad(theta * (1/2)))) + if (radius < 1000) { + radius = 1000 + } + var size = (global_y * node_distance_y + radius); + + var svg = $(''); + + // svg.append( + // $('').append( + // $('' + pass_project_name + ' PanGraph') + // ).append( + // $('Preserved nodes: ' + (data['infos']['visualization']['instances'] / data['infos']['original']['instances'] *100).toFixed(3) + '%') + // ) + // ) + + var arrow_start = 0 + var arrow_stop = global_x + var pointer_length = 2 + var pointer_height = 4 + var arow_thickness = 2 + var arrow_y_offset = 21 + var steps = 50 + + var [a_x, a_y] = transform(arrow_start, - arrow_y_offset + arow_thickness / 2, node_distance_y, radius, theta) + var [b_x, b_y] = transform(arrow_start, - arrow_y_offset - arow_thickness / 2, node_distance_y, radius, theta) + var [c_x, c_y] = transform(arrow_stop - pointer_length, - arrow_y_offset + arow_thickness / 2, node_distance_y, radius, theta) + var [d_x, d_y] = transform(arrow_stop - pointer_length, - arrow_y_offset - arow_thickness / 2, node_distance_y, radius, theta) + var [e_x, e_y] = transform(arrow_stop - pointer_length, - arrow_y_offset + pointer_height / 2, node_distance_y, radius, theta) + var [f_x, f_y] = transform(arrow_stop - pointer_length, - arrow_y_offset - pointer_height / 2, node_distance_y, radius, theta) + var [g_x, g_y] = transform(arrow_stop, - arrow_y_offset, node_distance_y, radius, theta) + + svg.append( + $('') + ) + + var l = 1 + while (l < global_x) { + + if (l+steps <= global_x){ + var k = steps; + } else { + var k = steps - ((l+steps) - global_x); + } + var [l_x, l_y] = transform(l, - arrow_y_offset, node_distance_y, radius, theta) + var rotate = theta * l + if (rotate >= 90 && rotate <= 180) { + rotate += 180; + } else if (rotate >= 180 && rotate <= 270) { + rotate -= 180; + } + svg.append( + $('' + l + '') + ) + l += k + }; + + var groups = data['infos']['groups'] + var edges = data['elements']['edges'] + var nodes = data['elements']['nodes'] + + for(var i in edges) { + + var edge = data['elements']['edges'][i]; + var genome = Object.keys(edge['genome']).length; + + if ($('#coloredge').prop('checked') == true) { + var color = pickcolor (edgecoloring, Object.keys(edge['genome'])) + var draw = lighter_color('#ffffff', color, genome / genome_size); + } else { + var draw = lighter_color('#ffffff', '#000000', genome / genome_size); + } + + if (edge['direction'] == 'L') { + var stroke = ' stroke-dasharray="5,5" ' + } else { + var stroke = '' + } + + var source = edge['source'] + var target = edge['target'] + + if (edge['bended'] == ""){ + + var [circle_i_x, circle_i_y] = transform(nodes[source]['position']['x'], nodes[source]['position']['y'], node_distance_y, radius, theta); + var [circle_j_x, circle_j_y] = transform(nodes[target]['position']['x'], nodes[target]['position']['y'], node_distance_y, radius, theta); + + svg.append( + $('') + ) + + } else { + + var [circle_i_x, circle_i_y] = transform(nodes[source]['position']['x'], nodes[source]['position']['y'], node_distance_y, radius, theta); + var [circle_j_x, circle_j_y] = transform(nodes[target]['position']['x'], nodes[target]['position']['y'], node_distance_y, radius, theta); + var circle_svg = '' + + svg.append( + $(circle_svg) + ) + } + }; + + var group_nodes = []; + for(var g in groups) { + var group = data['infos']['groups'][g] + group_nodes = group_nodes.concat(group) + }; + + var shannon = {} + for(var k in nodes) { + + var node = data['elements']['nodes'][k]; + var pos_x = node['position']['x'] + var pos_y = node['position']['y'] + var genome = Object.keys(node['genome']).length; + var name = node['name'] + + if (pos_x in shannon){ + if (name in shannon[pos_x]) { + shannon[pos_x][name] += genome + } else { + shannon[pos_x][name] = genome + } + } else { + shannon[pos_x] = {} + shannon[pos_x][name] = genome + } + + if (!group_nodes.includes(k)) { + var node_class = 'class="node' + } else { + var node_class = 'stroke-opacity="0" fill-opacity="0" class="pseudo' + } + + if ($('#colorgenecluster').prop('checked') == true) { + var color = pickcolor (edgecoloring, Object.keys(node['genome'])) + var draw = lighter_color('#ffffff', color, genome / genome_size); + } else { + var draw = lighter_color('#ffffff', '#000000', genome / genome_size); + } + + var [circle_x, circle_y] = transform(pos_x, pos_y, node_distance_y, radius, theta); + + if (k == 'stop' || k == 'start'){ + svg.append( + $('') + ) + } else { + svg.append( + $('') + ) + } + }; + + for(var l in groups) { + var group = data['infos']['groups'][l] + + var group_length = group.length + var left_node_name = group[0] + var right_node_name = group[group_length-1] + + var left_node = data['elements']['nodes'][left_node_name]; + var right_node = data['elements']['nodes'][right_node_name]; + + var genome = Object.keys(left_node['genome']).length; + + if ($('#colorgenecluster').prop('checked') == true) { + var color = pickcolor (edgecoloring, Object.keys(left_node['genome'])) + var draw = lighter_color('#ffffff', color, genome / genome_size); + } else { + var draw = lighter_color('#ffffff', '#000000', genome / genome_size); + } + + var left_pos_x = left_node['position']['x'] + var left_pos_y = left_node['position']['y'] + + var right_pos_x = right_node['position']['x'] + var right_pos_y = right_node['position']['y'] + + var [circle_t_x, circle_t_y] = transform(left_pos_x, left_pos_y, node_distance_y, radius + 10, theta); + var [circle_u_x, circle_u_y] = transform(left_pos_x, left_pos_y, node_distance_y, radius - 10, theta); + var [circle_v_x, circle_v_y] = transform(right_pos_x, right_pos_y, node_distance_y, radius + 10, theta); + var [circle_w_x, circle_w_y] = transform(right_pos_x, right_pos_y, node_distance_y, radius - 10, theta); + + svg.append( + $('') + ) + }; + + var entropy = {} + for (var position of Object.keys(shannon)) { + + var res = 0 + var sum = Object.values(shannon[position]).reduce((partialSum, a) => partialSum + a, 0); + for (var genome of Object.keys(shannon[position])) { + var ref_freq = shannon[position][genome] / sum + + res += -(ref_freq * Math.log2(ref_freq)) + } + + entropy[position] = res + } + + var window = 2 + var mean_entropy = {} + for (let i = 0; i < Object.keys(entropy).length; i += 1) { + if (i in entropy){ + var values = [] + values.push(entropy[i]) + + for (let j = 1; j <= window; j += 1) { + if (i-j in entropy) {values.push(entropy[i-j])} + if (i+j in entropy) {values.push(entropy[i+j])} + } + + mean_entropy[i] = values.reduce((partialSum, a) => partialSum + a, 0) / values.length + } + } + + var max = Math.max(...Object.values(mean_entropy)) + + for (var key of Object.keys(mean_entropy)) { + + var [a_x, a_y] = transform(parseInt(key)-0.5, -2, node_distance_y, radius, theta) + var [b_x, b_y] = transform(parseInt(key)+0.5, -2, node_distance_y, radius, theta) + var [c_x, c_y] = transform(parseInt(key)-0.5, -14, node_distance_y, radius, theta) + var [d_x, d_y] = transform(parseInt(key)+0.5, -14, node_distance_y, radius, theta) + + svg.append( + $('') + ) + + var [e_x, e_y] = transform(parseInt(key)-0.5, -25, node_distance_y, radius, theta) + var [f_x, f_y] = transform(parseInt(key)+0.5, -25, node_distance_y, radius, theta) + var [g_x, g_y] = transform(parseInt(key)-0.5, -40, node_distance_y, radius, theta) + var [h_x, h_y] = transform(parseInt(key)+0.5, -40, node_distance_y, radius, theta) + + svg.append( + $('') + ) + }; + + body.append(svg) + + body.html(body.html()); +} + +//ANCHOR - Check node +function checknode(searchpos, positions, node, mingenomes, maxgenomes, minposition, maxposition, searchfunction, expressiondrop, expressioncomparison) { + + var append = true + + if (searchpos == true) { + + if (!positions.includes(node['position']['x'].toString())) { + append = false + } + } + + if (mingenomes != '-1'){ + if (eval(Object.keys(node['genome']).length + '<=' + mingenomes)){ + append = false + } + } + + if (maxgenomes != '-1'){ + if (eval(Object.keys(node['genome']).length + '>=' + maxgenomes)){ + append = false + } + } + + if (minposition != '-1'){ + if (eval(node['position']['x'] + '<=' + minposition)){ + append = false + } + } + + if (maxposition != '-1'){ + if (eval(node['position']['x'] + '>=' + maxposition)){ + append = false + } + } + + var d = fetchinfo(node['genome']) + for (var source of Object.keys(searchfunction)) { + if (!(d[source][2].includes(searchfunction[source]))) { + append = false + } + } + + return append +} + +function main () { + + $.getJSON('static/json/result.json?' + new Date().getTime(), function(data) { + + $('#redraw').on('click', function() { + + $(document).off().find("*").off(); + main() + + }) + + $('#settings').on('click', function() { + + var data = new Object; + + data['conntr'] = $('#conntr')[0].value + data['condtr'] = $('#condtr')[0].value + data['maxlength'] = $('#maxlength')[0].value + + $("#genomecolors :input[type='checkbox']").each((index, element) => { + + var genome = $(element).attr('name') + if ($(element).prop('checked') == true){ + data[genome] = 'on' + } else { + data[genome] = 'off' + } + }) + + $.ajax({ + url: "/settings", + type: "POST", + async: false, + data: JSON.stringify(data), + contentType: "application/json", + dataType: "json" + }); + + $(document).off().find("*").off(); + main() + }) + + var body = $('#svgbox') + body.empty() + generate_svg(body, data); + + window.zoomSVG = svgPanZoom('#result', { + zoomEnabled: true, + panEnabled: false, + controlIconsEnabled: false, + minZoom: 0.1, + maxZoom: 100 + }); + + //ANCHOR - Zoom pan functions + $('#plus').on('click', function() { + window.zoomSVG.zoomIn(); + }) + + $('#minus').on('click', function() { + window.zoomSVG.zoomOut(); + }) + + $('#fit').on('click', function() { + window.zoomSVG.resize(); + window.zoomSVG.fit(); + window.zoomSVG.center(); + }) + + //ANCHOR - Main panel response functions + if ($('#conntr')[0].value == -1){ + // $('#customRange1').prop('disabled', true); + $('#flexconntr').prop('checked', false); + } + + if ($('#condtr')[0].value == -1){ + // $('#customRange2').prop('disabled', true); + $('#flexcondtr').prop('checked', false); + } + + if ($('#maxlength')[0].value == -1){ + // $('#customRange3').prop('disabled', true); + $('#flexmaxlength').prop('checked', false); + } + + if ($('#groupcompress')[0].value == -1){ + $('#flexgroupcompress').prop('checked', false); + } + + $('#flexconntr').change(function() { + if ($(this).prop('checked') == true){ + $('#conntr')[0].value = 0; + // $('#customRange1')[0].value = 0; + // $('#customRange1').prop('disabled', false); + } else { + $('#conntr')[0].value = -1; + // $('#customRange1')[0].value = 0; + // $('#customRange1').prop('disabled', true); + } + }) + + $('#flexcondtr').change(function() { + if ($(this).prop('checked') == true){ + $('#condtr')[0].value = 2; + // $('#customRange2')[0].value = 2; + // $('#customRange2').prop('disabled', false); + } else { + $('#condtr')[0].value = -1; + // $('#customRange2')[0].value = 2; + // $('#customRange2').prop('disabled', true); + } + }) + + $('#flexmaxlength').change(function() { + if ($(this).prop('checked') == true){ + $('#maxlength')[0].value = 1; + // $('#customRange3')[0].value = 1; + // $('#customRange3').prop('disabled', false); + } else { + $('#maxlength')[0].value = -1; + // $('#customRange3')[0].value = 1; + // $('#customRange3').prop('disabled', true); + } + }) + + $('#flexgroupcompress').change(function() { + if ($(this).prop('checked') == true){ + $('#groupcompress')[0].value = 0; + $('#customRange4')[0].value = 0; + $('#customRange4').prop('disabled', false); + } else { + $('#groupcompress')[0].value = -1; + $('#customRange4')[0].value = 0; + $('#customRange4').prop('disabled', true); + } + }) + + $(function () { + $(".grid").sortable({ + tolerance: 'pointer', + revert: 'invalid', + handle: ".user-handle", + forceHelperSize: true + }); + }); + + //ANCHOR - Choose genecall + $(document).on("click", ".genome", function() { + + var id = $('#name').attr('name') + var call = this.innerText; + var genome = this.parentNode.parentNode.firstChild.innerText; + + var name = data['elements']['nodes'][id]['name']; + var length = data['elements']['nodes'][id]['genome'][genome]['length']; + var direction = data['elements']['nodes'][id]['genome'][genome]['direction']; + var paralog = data['elements']['nodes'][id]['genome'][genome]['max_paralog']; + var partial = data['elements']['nodes'][id]['genome'][genome]['partial']; + + var info = data['elements']['nodes'][id]['genome'][genome]; + + $('#GenomeModalBody').empty() + var bodygenome = $('
    ') + $('#GenomeModalBody').append( + bodygenome + ) + appendgenome(bodygenome, name, call, genome, length, direction, paralog, partial, info) + + $('#AlignmentModal').modal('hide'); + $('#GenomeModal').modal('show'); + }); + + $(document).on("click", ".choice li a", function() { + var name = $(this).attr('name') + var dropitem = $(this).parent().parent().parent().children(":first") + + dropitem[0].name = name + dropitem.empty() + dropitem.append( + $('').append( + name + ) + ) + }) + + //ANCHOR - Bin dropdown choice function + $(document).on("click", ".binremove", function() { + + var id = $(this).attr('name') + var binid = $(this).attr('bin') + + var name = document.getElementById(id); + bins = marknode(name, data, binid, bins); + + $(this).parent().parent().parent().remove() + + }) + + //ANCHOR - Change GC in group window + $(document).on("click", ".gcchoice li a", function() { + + var id = $(this).attr('name') + var drop = $('#drop') + var group = drop.attr('name') + var name = data['elements']['nodes'][id]['name'] + + var dropitem = $('#name') + dropitem[0].name = id + dropitem.empty() + dropitem.append( + $('').append( + name + ) + ) + + var position = data['elements']['nodes'][id]['position']['x'] + "/" + (data["infos"]["meta"]["global_x"] - 1); + var genomes = Object.keys(data['elements']['nodes'][id]['genome']).length + "/" + (data['infos']['genomes'].length); + var info = data['elements']['nodes'][id]['genome']; + + $('#InfoModalBody').empty() + var bodyinfo = $('
    ') + $('#InfoModalBody').append( + bodyinfo + ) + appendinfo(bodyinfo, '', drop, position, genomes, group, info) + + var alignment = {} + + if (id != 'start' && id != 'stop') { + for (var genome of Object.keys(data['elements']['nodes'][id]['genome'])) { + alignment[genome] = [data['elements']['nodes'][id]['genome'][genome]['gene_call'], data['elements']['nodes'][id]['name']] + } + } + + $('#AlignmentModalBody').empty() + var bodyalign = $('
    ') + $('#AlignmentModalBody').append( + bodyalign + ) + appendalignment(bodyalign, alignment) + + + }); + + //ANCHOR - Bin dropdown choice function + $(document).on("click", ".binchoice li a", function() { + + var id = $(this).attr('name') + var drop = $(this).parent().parent().parent() + var group = drop.attr('name') + var name = data['elements']['nodes'][id]['name'] + + var dropitem = $('#' + group + 'name') + dropitem[0].name = id + dropitem.empty() + dropitem.append( + $('').append( + name + ) + ) + + var position = data['elements']['nodes'][id]['position']['x'] + "/" + (data["infos"]["meta"]["global_x"] - 1); + var genomes = Object.keys(data['elements']['nodes'][id]['genome']).length + "/" + (data['infos']['genomes'].length); + var info = data['elements']['nodes'][id]['genome']; + + var body = $('#' + group + 'div') + body.empty() + + appendinfo(body, group, drop, position, genomes, group, info) + }) + + //ANCHOR - Bin creation + var bins = {"bin1": []} + var binnum = 1 + + //ANCHOR - Add bin + $('#binadd').on('click', function() { + binnum += 1 + + $('#bingrid').append( + $('
    ').append( + $('
    ').append( + $('
    ').append( + $('') + ) + ).append( + $('
    ').append( + $('') + ) + ).append( + $('
    ').append( + $('') + ) + ).append( + $('
    ').append( + $('') + ) + ) + ) + ) + + bins['bin' + binnum] = [] + }) + + //ANCHOR - Remove bin + $('#binremove').on('click', function() { + + var selection = document.querySelector('input[name="binradio"]:checked') + + if (selection !== null) { + var binid = selection.value + + for (var node of bins[binid]) { + var name = document.getElementById(node); + bins = marknode(name, data, binid, bins); + } + + $("#" + binid).remove(); + delete bins[binid] + var nextbin = document.querySelector('input[name="binradio"]') + + if (nextbin) { + nextbin.checked = true; + } else { + $('#binadd').click(); + } + } + }) + + //ANCHOR - Change bin + $(document).on("change", ".colorchange", function() { + + var binid = this.name + var nodes = bins[binid] + + for (var node of nodes) { + + bins[binid] = bins[binid].filter(item => item !== node) + var name = document.getElementById(node); + bins = marknode(name, data, binid, bins); + + } + + }); + + //ANCHOR - Info bin + $('#bininfo').on('click', function() { + + var selection = document.querySelector('input[name="binradio"]:checked') + + // if (selection !== null) { + var binid = selection.value + var appendlist = [] + + $('#BinModalBody').empty() + for (var id of bins[binid]) { + var element = document.getElementById(id); + if (element.getAttribute('class') == 'group') { + + var drop = $('') + + drop.append( + $('').append( + $('').append('Choose GC') + ) + ) + + var dropitem = $('') + var grouplist = data['infos']['groups'][id] + + for (var listitem of grouplist) { + dropitem.append( + $('
  • ').append( + $('' + data['elements']['nodes'][listitem]['name'] + '') + ) + ) + } + drop.append(dropitem) + + var position = '' + var genomes = '' + var group = id + var info = '' + + appendlist.push([id, drop, position, genomes, group, info]) + } else { + + var drop = $('
    ').append( + data['elements']['nodes'][id]['name'] + ) + var position = data['elements']['nodes'][id]['position']['x'] + "/" + (data["infos"]["meta"]["global_x"] - 1); + var genomes = Object.keys(data['elements']['nodes'][id]['genome']).length + "/" + (data['infos']['genomes'].length); + var group = 'None' + var info = data['elements']['nodes'][id]['genome'] + appendlist.push([id, drop, position, genomes, group, info]) + } + } + + for (var [id, drop, position, genomes, group, info] of appendlist) { + + var body = $('
    ') + + $('#BinModalBody').append( + $('
    ').append( + $('
    ').append( + $('
    ').append( + $('') + ) + ) + ).append( + body + ) + ) + + appendinfo(body, id, drop, position, genomes, group, info) + } + + $('#BinModal').modal('show'); + // } + }) + + $('#InfoDownload').on('click', async function() { + + var id = $('#name').attr('name') + var name = $('#name')[0].innerText; + + if (id != 'Choose GC' && id != 'start' && id != 'stop'){ + + var group = $('#group')[0].innerText; + var genomes = $('#genomes')[0].innerText; + var position = $('#position')[0].innerText; + + var csv = "Name\t" + name + "\nGroup\t" + group + "\nGenomes\t" + genomes + "\nPosition\t" + position + "\nSource\tAccession\tFunction\tConfidence"; + + var func = fetchinfo(data['elements']['nodes'][id]['genome']); + + for (var [key, value] of Object.entries(func)) { + csv += "\n" + key + "\t" + value[0] + "\t" + value[1] + "\t" + value[2]; + }; + + var blob = new Blob([csv]); + downloadBlob(blob, name + ".csv"); + } + }); + + $('#GenomeDownload').on('click', async function() { + + + var id = $('#name').attr('name') + var name = $('#name')[0].innerText; + + if (id != 'Choose GC' && id != 'start' && id != 'stop'){ + + var genome = $('#genome')[0].innerText; + var genecall = $('#genecall')[0].innerText; + var length = $('#length')[0].innerText; + var partial = $('#partial')[0].innerText; + var paralog = $('#paralog')[0].innerText; + var direction = $('#direction')[0].innerText; + + var csv = "Genome\t" + genome + "\nGenecall\t" + genecall + "\nLength\t" + length + "\Partial\t" + partial + "\ParalogS\t" + paralog + "\Direction\t" + direction + "\nSource\tAccession\tFunction\te-Value"; + + var func = fetchgenome(data['elements']['nodes'][id]['genome'][genome]); + + for (var [key, value] of Object.entries(func)) { + csv += "\n" + key + "\t" + value[0] + "\t" + value[1] + "\t" + value[2]; + }; + + var blob = new Blob([csv]); + downloadBlob(blob, name + ".csv"); + } + }); + + $('#AlignmentDownload').on('click', async function() { + + var id = $('#name').attr('name') + var name = $('#name')[0].innerText; + + if (id != 'Choose GC' && id != 'start' && id != 'stop'){ + + var al = {} + for (var genome of Object.keys(data['elements']['nodes'][id]['genome'])) { + al[genome] = [data['elements']['nodes'][id]['genome'][genome]['gene_call'], data['elements']['nodes'][id]['name']] + } + + var align = await fetchalignment(al); + var csv = ""; + + for (var [genome, value] of Object.entries(align)) { + + csv += ">" + name + "|Genome:" + genome +"|Genecall:" + value[0] + "\n"; + csv += value[1].match(/.{1,60}/g).join("\r\n") + "\n"; + + } + + var blob = new Blob([csv]); + downloadBlob(blob, name + ".fa"); + } + }); + + $('#jsonDownload').on('click', async function() { + var csv = JSON.stringify(data); + var blob = new Blob([csv]); + downloadBlob(blob, pass_project_name + ".json"); + }); + + $('#svgDownload').on('click', async function() { + var blob = new Blob([$('#svgbox')[0].innerHTML]); + downloadBlob(blob, pass_project_name + ".svg"); + }); + + $('#searchadd').on('click', function() { + + var selection = document.querySelector('input[name="binradio"]:checked') + var binid = selection.value + + for (var [id, members] of Object.entries(searched)) { + if (!(id in bins[binid])) { + + var e = document.getElementById(id); + bins = marknode(e, data, binid, bins); + + } + } + + }) + + $('#searchremove').on('click', function() { + + var selection = document.querySelector('input[name="binradio"]:checked') + var binid = selection.value + + for (var [id, members] of Object.entries(searched)) { + if (id in bins[binid]) { + + var e = document.getElementById(id); + bins = marknode(e, data, binid, bins); + + } + } + }) + + $('#searcherase').on('click', function() { + + for (var [id, members] of Object.entries(searched)) { + for (var mem of members) { + var xpos = data['elements']['nodes'][mem]['position']['x'] + var e = document.getElementById(xpos); + e.setAttribute("fill", "white") + } + } + + searched = {}; + }) + + $('#searchcolor').on('click', function() { + + for (var [id, members] of Object.entries(searched)) { + for (var mem of members) { + var xpos = data['elements']['nodes'][mem]['position']['x'] + var e = document.getElementById(xpos); + e.setAttribute("fill", "#ff0000") + } + } + }) + + var searched = {} + $('#search').on('click', function() { + + var mingenomes = ($("#mingenomes").prop('checked') == true && !isNaN($("#mingenomestext")[0].value)) ? $("#mingenomestext")[0].value : '-1' + var maxgenomes = ($("#maxgenomes").prop('checked') == true && !isNaN($("#maxgenomestext")[0].value)) ? $("#maxgenomestext")[0].value : '-1' + var minentropy = ($("#minentropy").prop('checked') == true && !isNaN($("#minentropytext")[0].value)) ? $("#minentropytext")[0].value : '-1' + var maxentropy = ($("#maxentropy").prop('checked') == true && !isNaN($("#maxentropytext")[0].value)) ? $("#maxentropytext")[0].value : '-1' + var minposition = ($("#minposition").prop('checked') == true && !isNaN($("#minpositiontext")[0].value)) ? $("#minpositiontext")[0].value : '-1' + var maxposition = ($("#maxposition").prop('checked') == true && !isNaN($("#maxpositiontext")[0].value)) ? $("#maxpositiontext")[0].value : '-1' + var searchfunction = {} + var expressioncomparison = '' + + var expressiondrop = $('#expressiondrop').attr('name') + var expressionrel = $('#expressionrel').attr('name') + var expressiontext = $('#expressiontext')[0].value + + if (expressionrel != "Choose operator" && expressiontext != '') { + if (expressionrel == '=') { + if (!isNaN(expressiontext)) { + expressioncomparison = '== ' + expressiontext + } else { + expressioncomparison = '== "' + expressiontext + '"' + } + } else if (expressionrel == '\u{2260}') { + if (!isNaN(expressiontext)) { + expressioncomparison = '!= ' + expressiontext + } else { + expressioncomparison = '!= "' + expressiontext + '"' + } + } else if (expressionrel == '\u{2264}' && !isNaN(expressiontext)) { + expressioncomparison = '<= ' + expressiontext + } else if (expressionrel == '\u{2265}' && !isNaN(expressiontext)) { + expressioncomparison = '>= ' + expressiontext + } else if (expressionrel == '\u{003C}' && !isNaN(expressiontext)) { + expressioncomparison = '< ' + expressiontext + } else if (expressionrel == '\u{003E}' && !isNaN(expressiontext)) { + expressioncomparison = '> ' + expressiontext + } else if (expressionrel == '\u{25C2}\u{25AA}\u{25B8}') { + expressioncomparison = '.includes("' + expressiontext + '")' + } else if (expressionrel == '\u{25C2}\u{25AA}') { + expressioncomparison = '.endsWith("' + expressiontext + '")' + } else if (expressionrel == '\u{25AA}\u{25B8}') { + expressioncomparison = '.startsWith("' + expressiontext + '")' + } + } + + for (var source of notation) { + if ($("#" + source).prop('checked') == true) { + searchfunction[source] = $('#functiontext')[0].value + } + } + + var positions = [] + var searchpos = false + + if ((expressiondrop == "Entropy" && expressionrel != "Choose operator") || minentropy != '-1' || maxentropy != '-1') { + var searchpos = true + var entropy = document.querySelectorAll(".entropy") + for (var en of entropy) { + + var append = true + var value = en.getAttribute("name") + + if (minentropy != '-1'){ + if (eval(value + '<' + minentropy)){ + append = false + } + } + + if (maxentropy != '-1'){ + if (eval(value + '>' + maxentropy)){ + append = false + } + } + + if ((expressiondrop == "Entropy" && expressionrel != "Choose operator") && expressionrel != '\u{25C2}\u{25AA}\u{25B8}' && expressionrel != '\u{25AA}\u{25B8}' && expressionrel != '\u{25C2}\u{25AA}') { + if (!eval(value + expressioncomparison)) { + append = false + } + } + + if (append == true) { + positions.push(en.getAttribute("xpos")) + } + } + } + + // console.log(positions) + + if ((expressioncomparison != "Choose operator") || Object.keys(searchfunction).length != 0 || mingenomes != '-1' || maxgenomes != '-1' || minposition != '-1' || maxposition != '-1' || searchpos == true) { + + var nodes = document.querySelectorAll(".node") + for (var node of nodes) { + + var id = node.getAttribute("id") + var node = data['elements']['nodes'][id] + + if (checknode(searchpos, positions, node, mingenomes, maxgenomes, minposition, maxposition, searchfunction, expressiondrop, expressioncomparison) == true) { + if (expressiondrop == "Name" && expressionrel != '\u{2264}' && expressionrel != '\u{2265}' && expressionrel != '\u{003C}' && expressionrel != '\u{003E}') { + if (eval('"' + node["name"] + '"' + expressioncomparison)) { + if (!(id in searched)) { + searched[id] = [id] + } + } + } else { + if (!(id in searched)) { + searched[id] = [id] + } + } + } + } + + var groups = document.querySelectorAll(".group") + for (var group of groups) { + var groupid = group.getAttribute("id") + var members = data["infos"]["groups"][groupid] + for (var id of members) { + + node = data['elements']['nodes'][id] + + if (checknode(searchpos, positions, node, mingenomes, maxgenomes, minposition, maxposition, searchfunction, expressiondrop, expressioncomparison) == true) { + if (expressiondrop == "Name" && expressionrel != '\u{2264}' && expressionrel != '\u{2265}' && expressionrel != '\u{003C}' && expressionrel != '\u{003E}') { + if (eval('"' + node["name"] + '"' + expressioncomparison) || eval('"' + group + '"' + expressioncomparison)) { + + if (!(groupid in searched)) { + searched[groupid] = [id] + } else { + searched[groupid].push(id) + } + + } + } else { + if (!(groupid in searched)) { + searched[groupid] = [id] + } else { + searched[groupid].push(id) + } + } + } + } + } + } + + // console.log(searched) + + var toastbody = $('#toastbody') + toastbody.empty() + toastbody.append( + 'You have ' + Object.keys(searched).length + ' item(s) in your queue.' + ) + var searchtoast = bootstrap.Toast.getOrCreateInstance($('#searchtoast')) + searchtoast.show() + }) + + var entropy = document.querySelectorAll(".entropy") + for (var en of entropy) { + tippy(en, { + content: '' + en.getAttribute("name") + '' + '
    ', + allowHTML: true, + arrow: true, + duration: 0, + followCursor: true, + theme: "light", + }); + } + + var nodes = document.querySelectorAll(".node") + var divs = document.querySelectorAll(".node, .group"); + for (var el of divs) { + + if (el.getAttribute("id").startsWith('GCG_')){ + var id = data["infos"]["groups"][el.getAttribute("id")][0] + var name = el.getAttribute("id") + } else { + var id = el.getAttribute("id") + var name = data['elements']['nodes'][el.getAttribute("id")]['name'] + } + + tippy(el, { + content: '' + name + '' + '
    ', + allowHTML: true, + onHide() { + for (var element of Object.keys(data['elements']['nodes'][id]['genome'])) { + $('#number_' + element)[0].innerText = '0'; + } + }, + onShow() { + for (var element of Object.keys(data['elements']['nodes'][id]['genome'])) { + $('#number_' + element)[0].innerText = '1'; + } + }, + arrow: true, + duration: 0, + followCursor: true, + theme: "light", + }); + }; + + var isDown = false + var diff = 0 + + var old_xpos = 0 + var old_ypos = 0 + + var xpos = 0 + var ypos = 0 + + var new_xpos = 0 + var new_ypos = 0 + + $("#svgbox").on('mousedown', function(e) { + old_xpos = e.offsetX + old_ypos = e.offsetY + + xpos = old_xpos + ypos = old_ypos + + isDown = true + diff = 0 + }) + + $("#svgbox").on('mousemove', function(e) { + if (isDown === true) { + new_xpos = e.offsetX + new_ypos = e.offsetY + + diff += Math.sqrt((new_xpos-xpos)^2+(new_ypos-ypos)^2) + + if (!e.shiftKey) { + window.zoomSVG.panBy({x: new_xpos-xpos, y: new_ypos-ypos}) + } + + xpos = new_xpos + ypos = new_ypos + } + }) + + $("#svgbox").on('mouseup', function(e) { + if (isDown === true) { + + var selection = document.querySelector('input[name="binradio"]:checked') + + isDown = false + + if (diff < 10) { + if (e.target.getAttribute('class') === 'group' || e.target.getAttribute('class') === 'node') { + + if (e.shiftKey && selection !== null) { + + var binid = selection.value + bins = marknode(e.target, data, binid, bins); + + } else { + nodeinfo(e.target, data); + } + + } else { + } + + } else { + if (e.shiftKey && selection !== null) { + + var binid = selection.value + + var max_xpos = Math.max(old_xpos, xpos) + var min_xpos = Math.min(old_xpos, xpos) + + var max_ypos = Math.max(old_ypos, ypos) + var min_ypos = Math.min(old_ypos, ypos) + + for (var n of nodes) { + + var bounding = n.getBoundingClientRect(); + var left = bounding.left + var right = bounding.right + var bottom = bounding.bottom + var top = bounding.top + + if ( + min_xpos < left && + max_xpos > right && + min_ypos < bottom && + max_ypos > top + ) { + bins = marknode(n, data, binid, bins); + + } + } + + var groups = data['infos']['groups'] + for(var g in groups) { + // var inside = true; + var group = data['infos']['groups'][g] + for (var k of group) { + var node = document.getElementById(k); + + var bounding = node.getBoundingClientRect(); + var left = bounding.left + var right = bounding.right + var bottom = bounding.bottom + var top = bounding.top + + if ( + min_xpos < left && + max_xpos > right && + min_ypos < bottom && + max_ypos > top + ) { + var name = document.getElementById(g); + bins = marknode(name, data, binid, bins); + break + } + // if ( + // min_xpos < left && + // max_xpos > right && + // min_ypos < bottom && + // max_ypos > top + // ) { + // } else { + // inside = false + // } + } + + // if (inside === true){ + // var name = document.getElementById(g); + // bins = marknode(name, data, binid, bins); + // } + } + } + } + } + }) + }) + +} + +//ANCHOR - Main function after loading DOM +$(document).ready(function() { + + main() + +}); From 2bb20ed9e8527455aa2bdbdf5fe47151ad6752a2 Mon Sep 17 00:00:00 2001 From: Alexander Henoch Date: Thu, 21 Dec 2023 16:03:29 +0100 Subject: [PATCH 005/265] new pangraph mode for interactive --- anvio/bottleroutes.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/anvio/bottleroutes.py b/anvio/bottleroutes.py index ebe7a68685..7b77ae1304 100644 --- a/anvio/bottleroutes.py +++ b/anvio/bottleroutes.py @@ -262,6 +262,8 @@ def redirect_to_app(self): homepage = 'structure.html' elif self.interactive.mode == 'metabolism': homepage = 'metabolism.html' + elif self.interactive.mode == 'pangraph': + homepage = 'pangraph.html' elif self.interactive.mode == 'inspect': redirect('/app/charts.html?id=%s&show_snvs=true&rand=%s' % (self.interactive.inspect_split_name, self.random_hash(8))) From d127d692c5a034f9d51ef5b50c6142578f94da35 Mon Sep 17 00:00:00 2001 From: Alexander Henoch Date: Thu, 21 Dec 2023 16:03:50 +0100 Subject: [PATCH 006/265] draft pangraph interactive class --- anvio/interactive.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/anvio/interactive.py b/anvio/interactive.py index 1564353064..e2e080e6c5 100644 --- a/anvio/interactive.py +++ b/anvio/interactive.py @@ -2969,6 +2969,17 @@ def get_metabolism_data(self): return self.estimator.get_metabolism_data_for_visualization() +class PangraphInteractive(): + def __init__(self, args, run=run, progress=progress): + self.mode = "pangraph" + + self.args = args + self.run = run + self.progress = progress + + PanSuperclass.__init__(self, self.args) + + class ContigsInteractive(): def __init__(self, args, run=run, progress=progress): self.mode = 'contigs' From bcbaf57684004a09e625ea6b35376cf1f366ab53 Mon Sep 17 00:00:00 2001 From: Alexander Henoch Date: Thu, 21 Dec 2023 16:04:32 +0100 Subject: [PATCH 007/265] += anvi-pan-graph --- bin/anvi-pan-graph | 63 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100755 bin/anvi-pan-graph diff --git a/bin/anvi-pan-graph b/bin/anvi-pan-graph new file mode 100755 index 0000000000..87542e0b78 --- /dev/null +++ b/bin/anvi-pan-graph @@ -0,0 +1,63 @@ +#!/usr/bin/env python +# -*- coding: utf-8 +"""Compute a graph representation of a pangenome""" + +import sys + +import anvio +import anvio.panops as panops +import anvio.terminal as terminal + +from anvio.errors import ConfigError, FilesNPathsError + + +__author__ = "Developers of anvi'o (see AUTHORS.txt)" +__copyright__ = "Copyleft 2015-2018, the Meren Lab (http://merenlab.org/)" +__credits__ = [] +__license__ = "GPL 3.0" +__version__ = anvio.__version__ +__authors__ = ['ahenoch'] +__requires__ = ['pan-db', 'genomes-storage-db', 'external-genomes'] +__provides__ = [] +__description__ = ("An anvi'o program to compute a graph representation of pangenomes") +__resources__ = [] + + +run = terminal.Run() +progress = terminal.Progress() + + +if __name__ == '__main__': + from anvio.argparse import ArgumentParser + + parser = ArgumentParser(description=__description__) + + groupA = parser.add_argument_group('INPUT DATABASES', "Anvi'o artifacts for the pan graph to be computed.") + groupA.add_argument(*anvio.A('pan-db'), **anvio.K('pan-db', {'required': True})) + groupA.add_argument(*anvio.A('genomes-storage'), **anvio.K('genomes-storage', {'required': True})) + groupA.add_argument(*anvio.A('external-genomes'), **anvio.K('external-genomes', {'required': True})) + + groupB = parser.add_argument_group('DETAILS OF GRAPH COMPUTATION', "Variables that will influence the computation of the graph, the organization " + "of the gene clusters, and edges between them.") + groupB.add_argument('--max-edge-length-filter', default = 50, type=int, help = "In the final pan graph, edges that connect gene clusters will vary in " + "their length. The longer edges that connect far gene clusters with one another will add additional layers to the final " + "display, reducing the readability of the overall graph structure. This parameter, which is by default set to %(default)d, " + "will remove edges that span across more than %(default)d gene clusters. You can change the thresold to make graph much " + "more accurate (lower values) or much more readable (higher values). We suggest you to start with the default, but " + "explore other options if you are not satisfied. Please keep in mind that pangenomes that contain a very large number of " + "genomic rearrangement events may take a very long time to compute with very small values of this parameter.") + groupB.add_argument('--gene-cluster-grouping-threshold', default = 2, type=int, help = "This parameters influences how gene clusters that share " + "perfect synteny (across all genomes that contribute genes) are represented in the final display. With the default value " + "of %(default)d, the final graph will represent as many gene clusters as possible in groups.") + + args = parser.get_args(parser) + + try: + graph = panops.Pangraph(args, run, progress) + graph.process() + except ConfigError as e: + print(e) + sys.exit(-1) + except FilesNPathsError as e: + print(e) + sys.exit(-2) From 1e59eda00cf2d22530ce485dce8801f3c79cefb4 Mon Sep 17 00:00:00 2001 From: Alexander Henoch Date: Thu, 21 Dec 2023 16:04:53 +0100 Subject: [PATCH 008/265] += anvi-display-pan-graph --- bin/anvi-display-pan-graph | 64 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100755 bin/anvi-display-pan-graph diff --git a/bin/anvi-display-pan-graph b/bin/anvi-display-pan-graph new file mode 100755 index 0000000000..d95a3d4498 --- /dev/null +++ b/bin/anvi-display-pan-graph @@ -0,0 +1,64 @@ +#!/usr/bin/env python +# -*- coding: utf-8 + +import sys +from anvio.argparse import ArgumentParser + +import anvio +import anvio.utils as utils +import anvio.terminal as terminal +import anvio.interactive as interactive +from anvio.bottleroutes import BottleApplication + +from anvio.errors import ConfigError, FilesNPathsError, DictIOError + +__author__ = "Developers of anvi'o (see AUTHORS.txt)" +__copyright__ = "Copyleft 2015-2018, the Meren Lab (http://merenlab.org/)" +__credits__ = [] +__license__ = "GPL 3.0" +__version__ = anvio.__version__ +__authors__ = ['ahenoch'] +__requires__ = [] +__provides__ = ['interactive', 'svg'] +__description__ = "Start an anvi'o interactive interface to view pan graphs." + +run = terminal.Run() +progress = terminal.Progress() + +if __name__ == '__main__': + parser = ArgumentParser(description=__description__) + groupA = parser.add_argument_group('INPUT DATABASE', "Specify where your pan-db is.") + groupA.add_argument(*anvio.A('pan-db'), **anvio.K('pan-db')) + groupA.add_argument(*anvio.A('genomes-storage'), **anvio.K('genomes-storage')) + + groupB = parser.add_argument_group('SERVER CONFIGURATION', "For power users.") + groupB.add_argument(*anvio.A('dry-run'), **anvio.K('dry-run')) + groupB.add_argument(*anvio.A('ip-address'), **anvio.K('ip-address')) + groupB.add_argument(*anvio.A('port-number'), **anvio.K('port-number')) + groupB.add_argument(*anvio.A('browser-path'), **anvio.K('browser-path')) + groupB.add_argument(*anvio.A('server-only'), **anvio.K('server-only')) + groupB.add_argument(*anvio.A('password-protected'), **anvio.K('password-protected')) + + args = parser.get_args(parser) + + try: + d = interactive.PangraphInteractive(args, run=run, progress=progress) + + if args.dry_run: + run.info_single('Dry run, eh? Fine. Bai!', nl_after=1) + sys.exit() + + args.mode = 'pangraph' + port_number = utils.get_port_num(args.port_number, args.ip_address, run=run) + + app = BottleApplication(d) + app.run_application(args.ip_address, port_number) + except ConfigError as e: + print(e) + sys.exit(-1) + except FilesNPathsError as e: + print(e) + sys.exit(-2) + except DictIOError as e: + print(e) + sys.exit(-3) From b87aa6b4bdeb8dd8ced7bb92562ff22dd9d4cba4 Mon Sep 17 00:00:00 2001 From: Alexander Henoch Date: Fri, 22 Dec 2023 12:58:55 +0100 Subject: [PATCH 009/265] updated html page without template --- anvio/data/interactive/css/pangraph.css | 86 ++++++ anvio/data/interactive/js/pangraph.js | 330 +++++++++++++++--------- anvio/data/interactive/pangraph.html | 177 ++++--------- 3 files changed, 338 insertions(+), 255 deletions(-) create mode 100644 anvio/data/interactive/css/pangraph.css diff --git a/anvio/data/interactive/css/pangraph.css b/anvio/data/interactive/css/pangraph.css new file mode 100644 index 0000000000..b7b272d936 --- /dev/null +++ b/anvio/data/interactive/css/pangraph.css @@ -0,0 +1,86 @@ +#svgbox { + position: relative; + height: 100vh; + width: 100vw; + } + + .offcanvas-btn { + visibility: visible; + } + + .offcanvas-btn span:last-child, + .offcanvas.show .offcanvas-btn span:first-child { + display: none; + } + + .offcanvas.show .offcanvas-btn span:last-child { + display: inline; + } + + h5 { + border-bottom: 1px solid #ccc; /* This is the line replacing the hr*/ + margin-bottom: 10px; /* This is the space below the line */ + margin-top: 20px; /* This is the space below the line */ + padding-bottom: 15px; /* This is the space between the heading text and the line */ + } + + #leftoffcanvas-button { + left: 400px; + } + + #rightoffcanvas-button { + right: 200px; + } + + #rightoffcanvas { + width: 200px; + } + + #left { + overflow-y: auto; + /* max-height: calc(100vh - 128px); */ + height: calc(100vh - 128px); + } + + .dropdown-toggle::after { + content: none; + } + + .offcanvas-body { + overflow: hidden; + } + + .dropdown-menu { + max-height: 200px; + overflow-y: auto; + } + + .modal-dialog, + .modal-content { + /* 80% of window height */ + height: 80%; + } + + .modal-body { + /* 100% = dialog height, 120px = header + footer */ + max-height: calc(100vh - 210px); + overflow-y: auto; + } + + #right { + overflow-y: auto; + max-height: 100%; + } + + + .btn-bot{ + position: absolute; + top:0px; + left:50%; + -ms-transform: translateX(-50%); + transform: translateX(-50%); + } + + .right { + overflow-y: scroll; + } \ No newline at end of file diff --git a/anvio/data/interactive/js/pangraph.js b/anvio/data/interactive/js/pangraph.js index ed90cbc3c1..06385148a6 100644 --- a/anvio/data/interactive/js/pangraph.js +++ b/anvio/data/interactive/js/pangraph.js @@ -84,7 +84,7 @@ function fetchinfo(info) { var func_maxEl = '' var func_maxCount = '' } - d[source] = [id_maxEl, id_maxCount, func_maxEl, func_maxCount] + d[source] = [id_maxEl, id_maxCount, func_maxEl, func_maxCount] } return(d); @@ -150,7 +150,7 @@ async function appendinfo(body, suffix, drop, position, genomes, group, info) { $('Value') ) ) - + var d = fetchinfo(info) for (var [source, value] of Object.entries(d)) { @@ -218,14 +218,14 @@ function fetchgenome(info) { var evalue = info[source][2] } - d[source] = [func_id, func, evalue] + d[source] = [func_id, func, evalue] } - return(d); + return(d); } //ANCHOR - Append genecall functions async function appendgenome(bodygenome, name, call, genome, length, direction, paralog, partial, info) { - + bodygenome.append( $('
    ').append( $('
    ').append( @@ -307,7 +307,7 @@ async function appendgenome(bodygenome, name, call, genome, length, direction, p $('e-value') ) ) - + var d = fetchgenome(info) for (var [source, value] of Object.entries(d)) { @@ -360,7 +360,7 @@ async function appendgenome(bodygenome, name, call, genome, length, direction, p //ANCHOR - Fetch GC alignment function fetchalignment(alignment) { - + var d = $.ajax({ url: "/alignment", type: "POST", @@ -398,7 +398,7 @@ async function appendalignment(body, alignment) { var size = Object.entries(d)[0][1][1].length; var numbers = ''; var i = 1 - + while (i < size) { var j = i.toString().length if (i+50 <= size){ @@ -452,7 +452,7 @@ async function appendalignment(body, alignment) { ) ) } - + var div = $('
    ') div.append( @@ -462,7 +462,7 @@ async function appendalignment(body, alignment) { ) body.append(div) - + } } @@ -509,15 +509,15 @@ function marknode(e, data, binid, bins){ e.setAttribute("fill", lighter_color('#ffffff', node_color, genome / genome_size)) bins[binid] = bins[binid].filter(item => item !== id) $('#' + binid + 'value')[0].value = bins[binid].length - + } else if (current === '') { e.setAttribute("fill", lighter_color('#ffffff', bincolor, genome / genome_size)) bins[binid].push(id) $('#' + binid + 'value')[0].value = bins[binid].length - + } else { - + e.setAttribute("fill", lighter_color('#ffffff', bincolor, genome / genome_size)) bins[current] = bins[current].filter(item => item !== id) bins[binid].push(id) @@ -535,7 +535,7 @@ function nodeinfo(e, data) { var element = document.getElementById(id); if (element.getAttribute('class') == 'group') { - + var drop = $('') drop.append( @@ -544,9 +544,9 @@ function nodeinfo(e, data) { ) ) - var dropitem = $('') + var dropitem = $('') var grouplist = data['infos']['groups'][id] - + for (var listitem of grouplist) { dropitem.append( $('
  • ').append( @@ -573,7 +573,7 @@ function nodeinfo(e, data) { var info = data['elements']['nodes'][id]['genome'] var alignment = {} - + if (id != 'start' && id != 'stop') { for (var genome of Object.keys(data['elements']['nodes'][id]['genome'])) { alignment[genome] = [data['elements']['nodes'][id]['genome'][genome]['gene_call'], data['elements']['nodes'][id]['name']] @@ -608,7 +608,7 @@ function deg2rad(degrees) //ANCHOR - General download function function downloadBlob(blob, name) { - + var blobUrl = URL.createObjectURL(blob); var link = document.createElement("a"); @@ -618,10 +618,10 @@ function downloadBlob(blob, name) { document.body.appendChild(link); link.dispatchEvent( - new MouseEvent('click', { - bubbles: true, - cancelable: true, - view: window + new MouseEvent('click', { + bubbles: true, + cancelable: true, + view: window }) ); @@ -638,8 +638,10 @@ function int_to_hex(num) { //ANCHOR - Function to mix two colors function lighter_color(color1, color2, percentage, threshold=0.25) { + + percentage = threshold + (1 - threshold) * percentage - percentage = threshold + (1 - threshold) * percentage + // var color3 = $.xcolor.gradientlevel(color1, color2, percentage, 1); color1 = color1.substring(1); color2 = color2.substring(1); @@ -647,9 +649,9 @@ function lighter_color(color1, color2, percentage, threshold=0.25) { color1 = [parseInt(color1[0] + color1[1], 16), parseInt(color1[2] + color1[3], 16), parseInt(color1[4] + color1[5], 16)]; color2 = [parseInt(color2[0] + color2[1], 16), parseInt(color2[2] + color2[3], 16), parseInt(color2[4] + color2[5], 16)]; - var color3 = [ - (1 - percentage) * color1[0] + percentage * color2[0], - (1 - percentage) * color1[1] + percentage * color2[1], + var color3 = [ + (1 - percentage) * color1[0] + percentage * color2[0], + (1 - percentage) * color1[1] + percentage * color2[1], (1 - percentage) * color1[2] + percentage * color2[2] ]; @@ -658,7 +660,7 @@ function lighter_color(color1, color2, percentage, threshold=0.25) { return color3 } -//ANCHOR - Transformation function +//ANCHOR - Transformation function function transform(x, y, node_distance_y, radius, theta) { var circle_x = ((y * node_distance_y) + radius) * Math.sin(deg2rad(theta * x)) var circle_y = ((y * node_distance_y) + radius) * Math.cos(deg2rad(theta * x)) @@ -666,7 +668,7 @@ function transform(x, y, node_distance_y, radius, theta) { } function pickcolor (edgecoloring, genomes) { - + var array = [] for (var name of genomes) { @@ -683,7 +685,7 @@ function pickcolor (edgecoloring, genomes) { async function generate_svg(body, data) { var edgecoloring = {} - + $("#genomecolors :input[type='color']").each((index, element) => { edgecoloring[element.id] = [index, element.value] }) @@ -732,10 +734,10 @@ async function generate_svg(body, data) { var [g_x, g_y] = transform(arrow_stop, - arrow_y_offset, node_distance_y, radius, theta) svg.append( - $('') @@ -743,7 +745,7 @@ async function generate_svg(body, data) { var l = 1 while (l < global_x) { - + if (l+steps <= global_x){ var k = steps; } else { @@ -770,7 +772,7 @@ async function generate_svg(body, data) { var edge = data['elements']['edges'][i]; var genome = Object.keys(edge['genome']).length; - + if ($('#coloredge').prop('checked') == true) { var color = pickcolor (edgecoloring, Object.keys(edge['genome'])) var draw = lighter_color('#ffffff', color, genome / genome_size); @@ -807,9 +809,9 @@ async function generate_svg(body, data) { var bend_n_x = edge['bended'][j]['x'] var bend_n_y = edge['bended'][j]['y'] var [circle_n_x, circle_n_y] = transform(bend_n_x, bend_n_y, node_distance_y, radius, theta); - + circle_svg += 'L ' + circle_n_x + ' ' + circle_n_y - + } circle_svg += 'L ' + circle_j_x + ' ' + circle_j_y + '"' + stroke + ' stroke="' + draw + '" stroke-width="2" fill="none"/>' @@ -817,7 +819,7 @@ async function generate_svg(body, data) { svg.append( $(circle_svg) ) - } + } }; var group_nodes = []; @@ -860,7 +862,7 @@ async function generate_svg(body, data) { } var [circle_x, circle_y] = transform(pos_x, pos_y, node_distance_y, radius, theta); - + if (k == 'stop' || k == 'start'){ svg.append( $('') @@ -869,12 +871,12 @@ async function generate_svg(body, data) { svg.append( $('') ) - } + } }; for(var l in groups) { var group = data['infos']['groups'][l] - + var group_length = group.length var left_node_name = group[0] var right_node_name = group[group_length-1] @@ -883,7 +885,7 @@ async function generate_svg(body, data) { var right_node = data['elements']['nodes'][right_node_name]; var genome = Object.keys(left_node['genome']).length; - + if ($('#colorgenecluster').prop('checked') == true) { var color = pickcolor (edgecoloring, Object.keys(left_node['genome'])) var draw = lighter_color('#ffffff', color, genome / genome_size); @@ -892,10 +894,10 @@ async function generate_svg(body, data) { } var left_pos_x = left_node['position']['x'] - var left_pos_y = left_node['position']['y'] + var left_pos_y = left_node['position']['y'] var right_pos_x = right_node['position']['x'] - var right_pos_y = right_node['position']['y'] + var right_pos_y = right_node['position']['y'] var [circle_t_x, circle_t_y] = transform(left_pos_x, left_pos_y, node_distance_y, radius + 10, theta); var [circle_u_x, circle_u_y] = transform(left_pos_x, left_pos_y, node_distance_y, radius - 10, theta); @@ -933,20 +935,20 @@ async function generate_svg(body, data) { if (i in entropy){ var values = [] values.push(entropy[i]) - + for (let j = 1; j <= window; j += 1) { if (i-j in entropy) {values.push(entropy[i-j])} if (i+j in entropy) {values.push(entropy[i+j])} } - + mean_entropy[i] = values.reduce((partialSum, a) => partialSum + a, 0) / values.length } } var max = Math.max(...Object.values(mean_entropy)) - + for (var key of Object.keys(mean_entropy)) { - + var [a_x, a_y] = transform(parseInt(key)-0.5, -2, node_distance_y, radius, theta) var [b_x, b_y] = transform(parseInt(key)+0.5, -2, node_distance_y, radius, theta) var [c_x, c_y] = transform(parseInt(key)-0.5, -14, node_distance_y, radius, theta) @@ -985,7 +987,7 @@ async function generate_svg(body, data) { //ANCHOR - Check node function checknode(searchpos, positions, node, mingenomes, maxgenomes, minposition, maxposition, searchfunction, expressiondrop, expressioncomparison) { - + var append = true if (searchpos == true) { @@ -1003,7 +1005,7 @@ function checknode(searchpos, positions, node, mingenomes, maxgenomes, minpositi if (maxgenomes != '-1'){ if (eval(Object.keys(node['genome']).length + '>=' + maxgenomes)){ - append = false + append = false } } @@ -1032,32 +1034,38 @@ function checknode(searchpos, positions, node, mingenomes, maxgenomes, minpositi function main () { $.getJSON('static/json/result.json?' + new Date().getTime(), function(data) { - + $('#redraw').on('click', function() { + + // [...document.querySelectorAll('*')].forEach(node => { + // if (node._tippy) { + // node._tippy.destroy(); + // } + // }); $(document).off().find("*").off(); main() - + }) - + $('#settings').on('click', function() { - + var data = new Object; - + data['conntr'] = $('#conntr')[0].value data['condtr'] = $('#condtr')[0].value data['maxlength'] = $('#maxlength')[0].value - + $("#genomecolors :input[type='checkbox']").each((index, element) => { - + var genome = $(element).attr('name') if ($(element).prop('checked') == true){ - data[genome] = 'on' + data[genome] = 'on' } else { data[genome] = 'off' } }) - + $.ajax({ url: "/settings", type: "POST", @@ -1066,6 +1074,12 @@ function main () { contentType: "application/json", dataType: "json" }); + + // [...document.querySelectorAll('*')].forEach(node => { + // if (node._tippy) { + // node._tippy.destroy(); + // } + // }); $(document).off().find("*").off(); main() @@ -1177,11 +1191,11 @@ function main () { //ANCHOR - Choose genecall $(document).on("click", ".genome", function() { - + var id = $('#name').attr('name') var call = this.innerText; var genome = this.parentNode.parentNode.firstChild.innerText; - + var name = data['elements']['nodes'][id]['name']; var length = data['elements']['nodes'][id]['genome'][genome]['length']; var direction = data['elements']['nodes'][id]['genome'][genome]['direction']; @@ -1189,7 +1203,7 @@ function main () { var partial = data['elements']['nodes'][id]['genome'][genome]['partial']; var info = data['elements']['nodes'][id]['genome'][genome]; - + $('#GenomeModalBody').empty() var bodygenome = $('
    ') $('#GenomeModalBody').append( @@ -1200,7 +1214,7 @@ function main () { $('#AlignmentModal').modal('hide'); $('#GenomeModal').modal('show'); }); - + $(document).on("click", ".choice li a", function() { var name = $(this).attr('name') var dropitem = $(this).parent().parent().parent().children(":first") @@ -1219,10 +1233,10 @@ function main () { var id = $(this).attr('name') var binid = $(this).attr('bin') - + var name = document.getElementById(id); bins = marknode(name, data, binid, bins); - + $(this).parent().parent().parent().remove() }) @@ -1254,9 +1268,9 @@ function main () { bodyinfo ) appendinfo(bodyinfo, '', drop, position, genomes, group, info) - + var alignment = {} - + if (id != 'start' && id != 'stop') { for (var genome of Object.keys(data['elements']['nodes'][id]['genome'])) { alignment[genome] = [data['elements']['nodes'][id]['genome'][genome]['gene_call'], data['elements']['nodes'][id]['name']] @@ -1275,10 +1289,10 @@ function main () { //ANCHOR - Bin dropdown choice function $(document).on("click", ".binchoice li a", function() { - + var id = $(this).attr('name') var drop = $(this).parent().parent().parent() - var group = drop.attr('name') + var group = drop.attr('name') var name = data['elements']['nodes'][id]['name'] var dropitem = $('#' + group + 'name') @@ -1335,12 +1349,12 @@ function main () { //ANCHOR - Remove bin $('#binremove').on('click', function() { - + var selection = document.querySelector('input[name="binradio"]:checked') - + if (selection !== null) { var binid = selection.value - + for (var node of bins[binid]) { var name = document.getElementById(node); bins = marknode(name, data, binid, bins); @@ -1360,16 +1374,16 @@ function main () { //ANCHOR - Change bin $(document).on("change", ".colorchange", function() { - + var binid = this.name var nodes = bins[binid] - + for (var node of nodes) { - + bins[binid] = bins[binid].filter(item => item !== node) var name = document.getElementById(node); bins = marknode(name, data, binid, bins); - + } }); @@ -1378,7 +1392,7 @@ function main () { $('#bininfo').on('click', function() { var selection = document.querySelector('input[name="binradio"]:checked') - + // if (selection !== null) { var binid = selection.value var appendlist = [] @@ -1387,7 +1401,7 @@ function main () { for (var id of bins[binid]) { var element = document.getElementById(id); if (element.getAttribute('class') == 'group') { - + var drop = $('') drop.append( @@ -1396,9 +1410,9 @@ function main () { ) ) - var dropitem = $('') + var dropitem = $('') var grouplist = data['infos']['groups'][id] - + for (var listitem of grouplist) { dropitem.append( $('
  • ').append( @@ -1428,41 +1442,41 @@ function main () { } for (var [id, drop, position, genomes, group, info] of appendlist) { - + var body = $('
    ') $('#BinModalBody').append( $('
    ').append( $('
    ').append( $('
    ').append( - $('') + $('') ) ) ).append( body ) ) - + appendinfo(body, id, drop, position, genomes, group, info) } $('#BinModal').modal('show'); // } }) - + $('#InfoDownload').on('click', async function() { - + var id = $('#name').attr('name') var name = $('#name')[0].innerText; if (id != 'Choose GC' && id != 'start' && id != 'stop'){ - + var group = $('#group')[0].innerText; var genomes = $('#genomes')[0].innerText; var position = $('#position')[0].innerText; var csv = "Name\t" + name + "\nGroup\t" + group + "\nGenomes\t" + genomes + "\nPosition\t" + position + "\nSource\tAccession\tFunction\tConfidence"; - + var func = fetchinfo(data['elements']['nodes'][id]['genome']); for (var [key, value] of Object.entries(func)) { @@ -1473,7 +1487,7 @@ function main () { downloadBlob(blob, name + ".csv"); } }); - + $('#GenomeDownload').on('click', async function() { @@ -1503,12 +1517,12 @@ function main () { }); $('#AlignmentDownload').on('click', async function() { - + var id = $('#name').attr('name') var name = $('#name')[0].innerText; if (id != 'Choose GC' && id != 'start' && id != 'stop'){ - + var al = {} for (var genome of Object.keys(data['elements']['nodes'][id]['genome'])) { al[genome] = [data['elements']['nodes'][id]['genome'][genome]['gene_call'], data['elements']['nodes'][id]['name']] @@ -1539,15 +1553,15 @@ function main () { var blob = new Blob([$('#svgbox')[0].innerHTML]); downloadBlob(blob, pass_project_name + ".svg"); }); - + $('#searchadd').on('click', function() { var selection = document.querySelector('input[name="binradio"]:checked') var binid = selection.value - + for (var [id, members] of Object.entries(searched)) { if (!(id in bins[binid])) { - + var e = document.getElementById(id); bins = marknode(e, data, binid, bins); @@ -1560,10 +1574,10 @@ function main () { var selection = document.querySelector('input[name="binradio"]:checked') var binid = selection.value - + for (var [id, members] of Object.entries(searched)) { if (id in bins[binid]) { - + var e = document.getElementById(id); bins = marknode(e, data, binid, bins); @@ -1572,7 +1586,7 @@ function main () { }) $('#searcherase').on('click', function() { - + for (var [id, members] of Object.entries(searched)) { for (var mem of members) { var xpos = data['elements']['nodes'][mem]['position']['x'] @@ -1585,7 +1599,7 @@ function main () { }) $('#searchcolor').on('click', function() { - + for (var [id, members] of Object.entries(searched)) { for (var mem of members) { var xpos = data['elements']['nodes'][mem]['position']['x'] @@ -1718,7 +1732,7 @@ function main () { if (checknode(searchpos, positions, node, mingenomes, maxgenomes, minposition, maxposition, searchfunction, expressiondrop, expressioncomparison) == true) { if (expressiondrop == "Name" && expressionrel != '\u{2264}' && expressionrel != '\u{2265}' && expressionrel != '\u{003C}' && expressionrel != '\u{003E}') { if (eval('"' + node["name"] + '"' + expressioncomparison) || eval('"' + group + '"' + expressioncomparison)) { - + if (!(groupid in searched)) { searched[groupid] = [id] } else { @@ -1764,7 +1778,7 @@ function main () { var nodes = document.querySelectorAll(".node") var divs = document.querySelectorAll(".node, .group"); for (var el of divs) { - + if (el.getAttribute("id").startsWith('GCG_')){ var id = data["infos"]["groups"][el.getAttribute("id")][0] var name = el.getAttribute("id") @@ -1776,13 +1790,27 @@ function main () { tippy(el, { content: '' + name + '' + '
    ', allowHTML: true, - onHide() { - for (var element of Object.keys(data['elements']['nodes'][id]['genome'])) { - $('#number_' + element)[0].innerText = '0'; + onHide(instance) { + if (instance.reference.id.startsWith('GCG_')){ + var id = data["infos"]["groups"][instance.reference.id][0] + } else { + var id = instance.reference.id + } + var elements = Object.keys(data['elements']['nodes'][id]['genome']) + + for (var element of elements) { + $('#number_' + element)[0].innerText = '0'; } }, - onShow() { - for (var element of Object.keys(data['elements']['nodes'][id]['genome'])) { + onShow(instance) { + if (instance.reference.id.startsWith('GCG_')){ + var id = data["infos"]["groups"][instance.reference.id][0] + } else { + var id = instance.reference.id + } + var elements = Object.keys(data['elements']['nodes'][id]['genome']) + + for (var element of elements) { $('#number_' + element)[0].innerText = '1'; } }, @@ -1801,10 +1829,10 @@ function main () { var xpos = 0 var ypos = 0 - + var new_xpos = 0 var new_ypos = 0 - + $("#svgbox").on('mousedown', function(e) { old_xpos = e.offsetX old_ypos = e.offsetY @@ -1815,7 +1843,7 @@ function main () { isDown = true diff = 0 }) - + $("#svgbox").on('mousemove', function(e) { if (isDown === true) { new_xpos = e.offsetX @@ -1831,17 +1859,17 @@ function main () { ypos = new_ypos } }) - + $("#svgbox").on('mouseup', function(e) { if (isDown === true) { var selection = document.querySelector('input[name="binradio"]:checked') - + isDown = false - + if (diff < 10) { if (e.target.getAttribute('class') === 'group' || e.target.getAttribute('class') === 'node') { - + if (e.shiftKey && selection !== null) { var binid = selection.value @@ -1852,8 +1880,8 @@ function main () { } } else { - } - + } + } else { if (e.shiftKey && selection !== null) { @@ -1868,19 +1896,19 @@ function main () { for (var n of nodes) { var bounding = n.getBoundingClientRect(); - var left = bounding.left + var left = bounding.left var right = bounding.right var bottom = bounding.bottom var top = bounding.top if ( min_xpos < left && - max_xpos > right && - min_ypos < bottom && + max_xpos > right && + min_ypos < bottom && max_ypos > top ) { bins = marknode(n, data, binid, bins); - + } } @@ -1890,17 +1918,17 @@ function main () { var group = data['infos']['groups'][g] for (var k of group) { var node = document.getElementById(k); - + var bounding = node.getBoundingClientRect(); - var left = bounding.left + var left = bounding.left var right = bounding.right var bottom = bounding.bottom var top = bounding.top if ( min_xpos < left && - max_xpos > right && - min_ypos < bottom && + max_xpos > right && + min_ypos < bottom && max_ypos > top ) { var name = document.getElementById(g); @@ -1909,8 +1937,8 @@ function main () { } // if ( // min_xpos < left && - // max_xpos > right && - // min_ypos < bottom && + // max_xpos > right && + // min_ypos < bottom && // max_ypos > top // ) { // } else { @@ -1926,14 +1954,70 @@ function main () { } } } - }) + }) }) } //ANCHOR - Main function after loading DOM $(document).ready(function() { + + //ANCHOR fetch from backend + $.ajax({ + url: "/template", + type: "POST", + success: function(result){ + console.log(result) + + $("#conntr")[0].value = result['conntr']; + $("#condtr")[0].value = result['condtr']; + $("#maxlength")[0].value = result['maxlength']; + + for (var [genome, value] of Object.entries(result['genome_coloring'])) { + + var state = '' + if (value == 'on') { + state = ' checked' + } + + $('#genomecolors').append( + $('
    ').append( + $('
    ').append( + $('
    ').append( + $('
    ').append( + $('') + ) + ) + ).append( + $('
    ').append( + genome + ) + ).append( + $('
    ').append( + $('') + ) + ).append( + $('
    ').append( + $('') + ) + ) + ) + ) + + $('#RightOffcanvasBodyTop').append( + $('
    ').append( + genome + ) + ).append( + $('
    ').append( + 0 + ) + ) + + } + } + }) main() -}); +}); \ No newline at end of file diff --git a/anvio/data/interactive/pangraph.html b/anvio/data/interactive/pangraph.html index 387ae29b92..b6e3a29f8e 100644 --- a/anvio/data/interactive/pangraph.html +++ b/anvio/data/interactive/pangraph.html @@ -3,7 +3,7 @@ - + Gene Cluster Network @@ -18,26 +18,21 @@ + + - - - - - +
    - +
    - +