diff --git a/bin/append_GCF.sh b/bin/append_GCF.sh
new file mode 100755
index 0000000..3c559ff
--- /dev/null
+++ b/bin/append_GCF.sh
@@ -0,0 +1,92 @@
+
+#!/usr/bin/env bash
+##############################################################################################
+# Copyright 2022 The Johns Hopkins University Applied Physics Laboratory LLC
+# All rights reserved.
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this
+# software and associated documentation files (the "Software"), to deal in the Software
+# without restriction, including without limitation the rights to use, copy, modify,
+# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+# OR OTHER DEALINGS IN THE SOFTWARE.
+#
+#    FUNCTIONS
+usage()
+{
+cat << EOF
+
+Help message for make_gcf_mapping.sh:
+
+DESCRIPTION:
+    Optionally merge assembly refseq on a gcf assembly pull FASTA file. FASTA file must be in format >chr GCF description (3 columns) where first 2 are a space and the third is anything after
+
+NOTES:
+
+
+OPTIONS:
+    -h    help    show this message
+    -i    FILE    a fasta file/dir of gcf references pulled. Appends the filename's GCF to the middle of the accession (between chr accession and description)
+    -d    FILE    optional. If called, it is a directory of GCF files (*fna, *fasta, *faa, *fa)
+    -o    FILE        full path to output  file
+
+USAGE:
+
+bash append_GCF.sh -i GCF_020735385.1.fasta -o test.fasta -t (looks at all fasta files in dir)
+bash append_GCF.sh -i GCF_020735385.1.fasta -o test.fasta  (appends base assembly accession to center of contig/chromosome header)
+
+EOF
+}
+
+DIR="FALSE"
+
+#    ARGUMENTS
+# parse args
+while getopts "hi:do:" OPTION
+do
+    case $OPTION in
+        h) usage; exit 1 ;;
+        i) fasta=$OPTARG ;;
+        d) DIR="TRUE" ;;
+        o) output=$OPTARG ;;
+        ?) usage; exit ;;
+    esac
+done
+
+
+# make a function to append the GCF to the middle of the accession
+# if the file is GCF_102310239.1_aSDGM I want only GCF. Split on the _ and get first 2 elements
+# now get all lines that start with ">" in the filename, and append the gcf to the middle of the accession between NC_12123 and "djaskdjasdklj" (random description). It should be $1 then $gcf then everything AFTER $1
+# use sed after first space
+append_f(){
+    filename=$1
+    # echo "Name BEGINS with GCF: ${basen}"
+    # get the basename of the file removing extension
+    basen=$(basename $filename | sed 's/\.[^.]*$//')
+    # if the file is GCF_102310239.1_aSDGM I want only GCF. Split on the _ and get first 2 elements
+    gcf=$(echo $basen | awk -F '_' '{print $1"_"$2}')
+    # now get all lines that start with ">" in the filename, and append the gcf to the middle of the accession between NC_12123 and "djaskdjasdklj" (random description). It should be $1 then $gcf then everything AFTER $1
+    # use sed after first space
+    sed -e "s/ / $gcf /" $filename
+}
+
+
+if [[ $DIR == "TRUE" ]]; then
+    echo "Directory detected"
+    for filename in $(find $fasta  \( -name "*.fasta" -o -name "*fna" -o -name "*faa" -o -name "*fa" \) -type f ); do
+        basen=$(basename $filename)
+        if [[ $basen =~ "GCF" ]]; then
+            append_f $filename
+        fi
+
+    done
+
+else
+    append_f $fasta
+fi
+
diff --git a/bin/determine_priority_assembly.py b/bin/determine_priority_assembly.py
new file mode 100644
index 0000000..6184f32
--- /dev/null
+++ b/bin/determine_priority_assembly.py
@@ -0,0 +1,41 @@
+import re
+def determine_priority_assembly(line):
+    cols = line.strip().split("\t")
+    if len(cols) > 7:
+        name = cols[7]
+        level = cols[4]
+        if level == "representative genome" :
+            return 0
+        elif cols[4] == "reference genome"  :
+            return 1
+        elif cols[11] == "Complete Genome"  :
+            return 2
+        else:
+            return 3
+def format_description(id, description):
+    delimiters = "[ ]"  # Split on underscore or comma
+    description = description.split(',', 1)[0]
+    linesplit = re.split(delimiters, description)
+    id = id.replace(">", "")
+    # remove all after first comma
+
+    if len(linesplit) > 1:
+        return id, " ".join(linesplit[1:])
+    else:
+        return id, description
+def extract_strain_from_description(description):
+    delimiters = "[ ]"  # Split on underscore or comma
+    # regex match for strain, get value after space until next space
+def parse_fasta_header(header):
+    parts = header.split()
+    organism_parts = []
+    strain = None
+    for part in parts:
+        if part.lower() == "strain":
+            strain_index = parts.index(part) + 1
+            if strain_index < len(parts):
+                strain = parts[strain_index]
+            break
+        organism_parts.append(part)
+    organism_name = " ".join(organism_parts)
+    return organism_name, strain
diff --git a/bin/download_fastas.py b/bin/download_fastas.py
index 0738231..fc8d80f 100755
--- a/bin/download_fastas.py
+++ b/bin/download_fastas.py
@@ -40,6 +40,8 @@
 import shutil
 import urllib.request as request
 import ssl
+from determine_priority_assembly import determine_priority_assembly, format_description
+
 ctx = ssl.create_default_context()
 ctx.check_hostname = False
 ctx.verify_mode = ssl.CERT_NONE
@@ -173,8 +175,7 @@ def import_genome_file(filename, kraken2output):
 
 #
 def import_assembly_file(input, filename, matchcol, idx, nameidx, index_ftp):
-    refs = dict()
-    seen = dict()
+    assemblies  = dict()
     first = dict()
 
     # matchcol is the column is number of column where you match the top hits to
@@ -200,6 +201,7 @@ def get_url(utl, id):
             formatted_header = namecol.replace(" ", "_")
             formatted_header = str(formatted_header)
             if len(linesplit) >= 12 and (matchidx in input):
+
                 if namecol not in priorities:
                     priorities[namecol] = dict()
                 seencols[matchidx] = True
@@ -251,16 +253,16 @@ def get_url(utl, id):
 
     for key, value in priorities.items():
         if value.get('0'):
-            refs[key] = value['0']
+            assemblies[key] = value['0']
         elif value.get('1'):
-            refs[key] = value['1']
+            assemblies[key] = value['1']
         elif value.get('2'):
-            refs[key] = value['2']
+            assemblies[key] = value['2']
         elif value.get('3'):
-            refs[key] = value['3']
+            assemblies[key] = value['3']
         else:
             print("No reference genome found for", key)
-    return refs
+    return assemblies
 
 
 def get_assembly_summary(id):
@@ -270,7 +272,22 @@ def get_assembly_summary(id):
     return esummary_record
 
 
-def get_assemblies(refs, outfile, seen, index_ftp, refresh):
+def download_fasta(ftp_site):
+    encoding = guess_type(ftp_site)[1]   # uses file extension
+    _open = partial(
+        gzip.open, mode='rt') if encoding == 'gzip' else open
+    try:
+        with closing(request.urlopen(ftp_site, context=ctx)) as r:
+            with open('file.gz', 'wb') as f:
+                shutil.copyfileobj(r, f)
+            f.close()
+        r.close()
+        return _open
+    except Exception as ex:
+        print("Could not download", ftp_site, ex)
+        raise ex
+
+def get_assemblies(assemblies, outfile, GCFs_to_skip, refresh):
     """Download genbank assemblies for a given search term.
     Args:
         term: search term, usually organism name
@@ -278,49 +295,60 @@ def get_assemblies(refs, outfile, seen, index_ftp, refresh):
         path: folder to save to
     """
     # get the value.accession from the refs dict asa list
-    ids = list(refs.keys())
-    if index_ftp and len(ids) > 0:
-        typee = "a"
-        if refresh:
-            typee = "w"
-        with open(outfile, typee) as w:
-            for id in ids:
-                try:
-                    accession = refs[id]['accession']
-                    if seen and accession in seen:
-                        refs[id]['chrs'] = seen[accession]
-                        print("key already seen:", id, "; skipping")
-                    else:
-                        ftp_site = refs[id]['reference']
-                        obj = refs[id]['id']
-                        name = refs[id]['name']
-
-                        encoding = guess_type(ftp_site)[1]   # uses file extension
-                        _open = partial(
-                            gzip.open, mode='rt') if encoding == 'gzip' else open
-                        with closing(request.urlopen(ftp_site, context=ctx)) as r:
-                            with open('file.gz', 'wb') as f:
-                                shutil.copyfileobj(r, f)
-                            f.close()
-                        r.close()
-                        with _open('file.gz') as uncompressed:
-                            for record in SeqIO.parse(uncompressed, "fasta"):
-                                # if (len(record.seq) > 1000):
-                                pattern = f"{record.id}\s*"
-                                record.description = re.sub(pattern, "", record.description)
-                                # record.description = record.description.replace(" ", "_")
-                                newobj = f"{record.id} {accession} {record.description}"
-                                refs[id]['chrs'].append(str(record.id))
-                                record.id = newobj
-                                record.description = ""
-                                SeqIO.write(record, w, "fasta")
-                        uncompressed.close()
-                except Exception as ex:
-                    print(ex)
-                    pass
-        w.close()
-    return
+    ids = list(assemblies.keys())
+    accessions = [assemblies[id]['accession'] for id in ids]
+    # filter accessions on those present in GCFS_to_skip
+    accessions = [x for x in accessions if x not in GCFs_to_skip]
+    caught_ncs = [ ]
+    new_mapping = dict()
+    seen = dict()
+    typee = "a"
+    if refresh:
+        typee = "w"
+
 
+    with open(outfile, typee) as w:
+        for id in ids:
+            try:
+                accession = assemblies[id]['accession']
+                if accession in GCFs_to_skip:
+                    assemblies[id]['chrs'] = GCFs_to_skip[accession]
+                    print("key already seen:", id, "; skipping")
+                else:
+                    ftp_site = assemblies[id]['reference']
+                    obj = assemblies[id]['id']
+                    name = assemblies[id]['name']
+                    _open = download_fasta(ftp_site)
+                    print(f"Downloading {ftp_site} to for {accession}: {id}")
+                    with _open('file.gz') as uncompressed:
+                        for record in SeqIO.parse(uncompressed, "fasta"):
+                            # pattern = f"{record.id}\s*"
+                            # record.description = re.sub(pattern, "", record.description)
+                            # record.description = record.description.replace(" ", "_")
+                            # newobj = f"{record.id} {accession} {record.description}"
+                            if record.id not in caught_ncs:
+                                caught_ncs.append(record.id)
+                            if (not refresh and record.id not in new_mapping) or refresh:
+                                print("writing", record.id, "to file")
+
+                                SeqIO.write(record, w, "fasta")
+                            elif not refresh and record.id in new_mapping:
+                                print("already seen", record.id, "skipping")
+                            new_mapping[record.id] = dict(accession=accession, name=obj)
+                            if record.id not in assemblies[id]['chrs']:
+                                fid, description = format_description(record.id, record.description)
+                                assemblies[id]['chrs'].append(dict(acc=record.id, name=description) )
+                    uncompressed.close()
+                    try:
+                        os.remove('file.gz')
+                    except Exception as ex:
+                        print(f"Could not remove file.gz {ex}")
+                        pass
+            except Exception as ex:
+                print(ex)
+                pass
+    w.close()
+    return new_mapping
 
 def download(refs, db, outfile, seen):
     # if refs is not empty
@@ -378,24 +406,38 @@ def get_hits_from_file(filename, colnumber):
 def main(argv=None):
     """Coordinate argument parsing and program execution."""
     args = parse_args(argv)
+
+
+    gcf_mapping = dict()
+
+    if args.gcf_map:
+        if os.path.exists(args.gcf_map):
+            #import the file, and save them to a dict of column 2 as key, column 1 as value
+            with open(args.gcf_map, "r") as f:
+                lines = f.readlines()
+                for line in lines:
+                    line = line.strip()
+                    linesplit = line.split("\t")
+                    if len(linesplit) > 1:
+                        key = linesplit[0]
+                        value = linesplit[1]
+                        gcf_mapping[key] = value
+            f.close()
     # logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s")
     if args.type == 'file':
         #colnumber file hits is the column from the input top hits file you want to match to the args.assembly_names
-        taxids = get_hits_from_file(args.input, args.colnumber_file_hits)
-
+        seen_in_tops = get_hits_from_file(args.input, args.colnumber_file_hits)
     else:
-        taxids = args.input
+        seen_in_tops = args.input.split(" ")
+
+    assemblies = import_assembly_file(
+        seen_in_tops, args.assembly_refseq_file, args.assembly_names, args.assembly_map_idx, args.name_col_assembly, args.ftp_path
+    )
 
-    if (args.assembly_refseq_file):
-        refs = import_assembly_file(
-            # what column to match input to , col match accession (GCF accession), col match refrence name
-            taxids, args.assembly_refseq_file, args.assembly_names, args.assembly_map_idx, args.name_col_assembly, args.ftp_path)
-    else:
-        refs = import_genome_file(taxids, args.kraken2output)
 
-    seen = dict()
 
     i = 0
+    seen_in_fasta = dict()
     if os.path.exists(args.file_out):
         for seq_record in SeqIO.parse(args.file_out, "fasta"):
             line = str(seq_record.id)
@@ -408,45 +450,46 @@ def main(argv=None):
                 # Splitting on space or pipe
                 delimiters = "[ ]"  # Split on underscore or comma
                 linesplit = re.split(delimiters, desc)
-                if (args.kraken2output):
-                    # use regex to split on " | " and get the second element
-                    acc = linesplit[0]
-                    desc =  linesplit[1:]
-                else:
-                    acc = linesplit[0]
-                    desc = linesplit[1:]
-                header = desc[0]
-                desc = " ".join(desc)
+                acc, desc = format_description(line, desc)
                 if not args.refresh:
-                    if header not in seen:
-                        seen[header] = [[acc, desc]]
-                    else:
-                        seen[header].append([acc, desc])
-                    # Find the key where header == value.accession if it exists
-                    for key, value in refs.items():
-                        if value['accession'] == header:
-                            refs[key]['chrs'].append(acc)
-
+                    seen_in_fasta[acc] = desc
             except Exception as ex:
                 print(ex)
                 pass
+    GCFs_to_skip = dict()
+    if not args.refresh:
+        for key, value in seen_in_fasta.items():
+            if key in gcf_mapping:
+                if gcf_mapping[key] not in GCFs_to_skip:
+                    GCFs_to_skip[gcf_mapping[key]] = [dict(acc=key, name=value)]
+                    print("GCF found in mapping file, skipping", key, "from", gcf_mapping[key])
+                else:
+                    GCFs_to_skip[gcf_mapping[key]].append(dict(acc=key, name=value))
+
+            else:
+                print("No mapping for chr/contig to GCF. Will need to redownload to get gcf mapping for: ", key)
+    ## Now, check what contigs/chrs in seen_in_fasta are present in the gcf_mapping
     if args.email:
         Entrez.email = args.email
     if not (args.refresh):
-        print(len(seen.keys()), "already seen reference ids")
-    if (not args.assembly_refseq_file):
-        print("downloading refseq file")
-        download(refs, args.db, args.file_out, seen)
-    else:
-        print("get assemblies")
-        get_assemblies(refs, args.file_out, seen, args.ftp_path, args.refresh)
+        print(len(seen_in_fasta.keys()), "already seen reference ids")
+    # Now use the assembly refseq file to get the ftp path and download the fasta files
+    print("Get assemblies now")
+
+
+    get_assemblies(
+        assemblies, #this is the top hits from the input file you want to retrieve
+        args.file_out, # this is the file you want to write to
+        GCFs_to_skip, # this is the list of GCFs you want to skip
+        args.refresh # this is the boolean to check if you want to refresh the file
+    )
     if args.gcf_map:
         with open(args.gcf_map, "w") as w:
-            for key, value in refs.items():
+            for key, value in assemblies.items():
                 for chr in value['chrs']:
-                    outstring = f"{chr}\t{key}\t{value['accession']}"
+                    outstring = f"{chr['acc']}\t{value['accession']}\t{key}\t{chr['name']}"
                     w.write(f"{outstring}\n")
-            w.close()
+        w.close()
 
 if __name__ == "__main__":
     sys.exit(main())
diff --git a/bin/download_features.py b/bin/download_features.py
new file mode 100755
index 0000000..a0fbd2f
--- /dev/null
+++ b/bin/download_features.py
@@ -0,0 +1,148 @@
+#!/usr/bin/env python3
+
+##############################################################################################
+# Copyright 2022 The Johns Hopkins University Applied Physics Laboratory LLC
+# All rights reserved.
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this
+# software and associated documentation files (the "Software"), to deal in the Software
+# without restriction, including without limitation the rights to use, copy, modify,
+# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+# OR OTHER DEALINGS IN THE SOFTWARE.
+#
+
+"""Provide a command line tool to fetch a list of refseq genome ids to a single file, useful for kraken2 database building or alignment purposes"""
+from xmlrpc.client import Boolean
+import sys
+import os
+import gzip
+import argparse
+import logging
+from pathlib import Path
+import urllib.request as request
+import ssl
+
+ctx = ssl.create_default_context()
+ctx.check_hostname = False
+ctx.verify_mode = ssl.CERT_NONE
+
+
+# import requests
+logger = logging.getLogger()
+
+
+def parse_args(argv=None):
+    """Define and immediately parse command line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Validate and transform a tabular samplesheet.",
+        epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv",
+    )
+    parser.add_argument(
+        "-i",
+        "--input",
+        metavar="INPUT",
+        help="List of taxIDs or names",
+    )
+    parser.add_argument(
+        "-a",
+        "--assembly_file",
+        type=Path,
+        help="Assembly refseq file to download from, with taxid and accession in the header",
+    )
+    parser.add_argument(
+        "-d",
+        "--decompress",
+        action='store_true',
+        default=False,
+        help="Assembly refseq file to download from, with taxid and accession in the header",
+    )
+    parser.add_argument(
+        "-o",
+        "--dir_out",
+        metavar="DIR_OUT",
+        type=Path,
+        help="Name of output directory for feature table files",
+    )
+
+    return parser.parse_args(argv)
+
+import shutil
+from contextlib import closing
+from mimetypes import guess_type
+from functools import partial
+
+def download_feature_file(dir_out: str, url: str,  outfile: str):
+    """Download the assembly file from NCBI"""
+    try:
+        response = request.urlopen(url, context=ctx)
+        encoding = guess_type(url)[1]   # uses file extension
+        _open = partial(
+            gzip.open, mode='rt') if encoding == 'gzip' else open
+        outpath = os.path.join(dir_out, outfile)
+        with closing(request.urlopen(url, context=ctx)) as r:
+            with open(outpath, 'wb') as f:
+                shutil.copyfileobj(r, f)
+            f.close()
+        r.close()
+        if encoding == 'gzip':
+            # decompress the outpath to dir_out, write to file
+            outpathdecompress = outpath.replace(".gz", "")
+            with gzip.open(outpath, 'rt') as f:
+                #write contents to outpath
+                contents = f.readlines()
+                # remove the gz extension
+                with open(outpathdecompress, 'w') as r:
+                    # filter lines with CDS in beginning of line only
+                    for line in contents:
+                        if line.startswith("CDS"):
+                            r.write(line)
+                r.close()
+                # remove the gz file
+                os.remove(outpath)
+            f.close()
+    except Exception as e:
+        logger.error(f"Failed to download assembly file from {url} with error {e}")
+        return None
+def main():
+    args = parse_args()
+    logger.setLevel(logging.INFO)
+    inputfile = args.input
+    assembly_file = args.assembly_file
+    dir_out = args.dir_out
+    if not dir_out.exists():
+        dir_out.mkdir()
+    with open(inputfile, 'r') as f:
+        # read in lines, remove newline and get rid of empty lines
+        gcfs = [line.strip() for line in f if line.strip()]
+    f.close()
+    assembly_urls = dict()
+    with open(assembly_file, 'r') as f:
+        for line in f:
+            if line.startswith("#"):
+                continue
+            line = line.strip().split("\t")
+            gcf = line[0]
+            url = line[19]
+            assembly_urls[gcf] = url
+    f.close()
+    for gcf in gcfs:
+        if gcf in assembly_urls:
+            try:
+                url = assembly_urls[gcf]
+                baseurl = os.path.basename(url)
+                url = f"{url}/{baseurl}_feature_table.txt.gz"
+                download_feature_file(dir_out, url, f"{gcf}_feature_table.txt.gz")
+                print(f"Downloaded feature file for {gcf} at {url} to {dir_out}/{gcf}_feature_table.txt")
+            except Exception as e:
+                print(f"Failed to write feature file for {gcf} at {url} with error {e}")
+
+if __name__ == "__main__":
+    sys.exit(main())
+
+
diff --git a/bin/fuzzy_match_assembly.py b/bin/fuzzy_match_assembly.py
new file mode 100755
index 0000000..3594f3f
--- /dev/null
+++ b/bin/fuzzy_match_assembly.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python3
+##############################################################################################
+# Copyright 2022 The Johns Hopkins University Applied Physics Laboratory LLC
+# All rights reserved.
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this
+# software and associated documentation files (the "Software"), to deal in the Software
+# without restriction, including without limitation the rights to use, copy, modify,
+# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+# OR OTHER DEALINGS IN THE SOFTWARE.
+##############################################################################################
+
+import argparse
+import time
+import re
+from Bio import SeqIO
+import os
+# import function determine_priority_assembly from fil in same dir
+from determine_priority_assembly import determine_priority_assembly, format_description, parse_fasta_header
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Fetch assembly accessions for a list of nuccore accessions.")
+    parser.add_argument("-i", "--input", required=True, help="Path to a file containing nuccore accessions, one per line.")
+    parser.add_argument("-a", "--assembly", required=True, help="Path to a file of assembly refseq information")
+    parser.add_argument("-o", "--output", required=True, help="Output File for GCFs")
+    return parser.parse_args()
+
+def search_in_dicts(organism_name, strain, assemblies, gcfs):
+    if organism_name in assemblies:
+        if strain and strain in assemblies[organism_name]:
+            # print(f"Match found for {organism_name} and strain {strain}")
+            # You can return or process the matched data here
+            return organism_name, gcfs[organism_name][strain]
+        else:
+            # print(f"Organism {organism_name} found, but strain {strain} is not present.")
+            return None, None
+    else:
+        # print(f"Organism {organism_name} not found.")
+        return None, None
+
+def long_match_names(organism_name, names):
+    split_name = organism_name.split(" ")
+    match = False
+    # iterate through split_name combining index each time until full combine, for each combined iteration check if it is names. If so , print and return
+    # move backwards from the full length to only first word until first match
+    for i in range(len(split_name), 0, -1):
+        name = " ".join(split_name[:i])
+        if  name in names:
+            match = name
+            break
+    return match
+
+def extract_after_colon(text):
+    # Regular expression to match everything after ': '
+    match = re.search(r':\s*(.*)', text)
+    if match:
+        return match.group(1)  # Return the matched group, which is everything after ': '
+    else:
+        return text  # Return the original text if no ': ' is found
+
+def main():
+    args = parse_args()
+    assemblies = dict()
+    gcfs = dict()
+    with open(args.assembly, 'r') as assembly_file:
+        lines = assembly_file.readlines()
+        for line in lines:
+            if line.startswith("#"):
+                pass
+            cols = line.split("\t")
+            if len(cols) > 7:
+                name = cols[7]
+                strain = cols[8]
+                strain = strain.split("=")[1] if "=" in strain else strain
+                if strain == "na":
+                    strain = "None"
+                ## set 2d list for [name] in the assemblies dict
+                if name not in gcfs:
+                    gcfs[name] = dict()
+                if  name in assemblies and assemblies[name] == 0:
+                    continue
+                priority = determine_priority_assembly(line)
+                if name not in assemblies or (name in assemblies and priority < assemblies[name]):
+                    assemblies[name] = priority
+                    # extract value of strain=value. If strain=value is not present, set to just the same value for strain
+                    gcfs[name] = cols[0]
+    assembly_file.close()
+    accessions = dict()
+    total = 0
+    count = 0
+    final_list = []
+    missing_elements = []
+    if os.path.exists(args.input):
+        for seq_record in SeqIO.parse(args.input, "fasta"):
+            id, desc = format_description(seq_record.id, seq_record.description)
+            desc = desc.replace("TPA_inf: ", "")
+            desc = desc.replace("MAG: ", "")
+            desc = desc.replace("MAGL: ", "")
+            # if there is something like MAG: or TPA_inf: in the description, remove it but use fuzzy match so that if it is up : and a space then remove it, dont hardcode the values.
+            # desc = extract_after_colon(desc)
+            total +=1
+            name = long_match_names(desc, assemblies.keys())
+            # organism, gcf = search_in_dicts(organism_name, explicit_strain, assemblies, gcfs)
+            if name:
+                count+=1
+                # print(f"{seq_record.id}\t{gcfs[name]}\t{name}\t{desc}")
+                final_list.append(f"{seq_record.id}\t{gcfs[name]}\t{name}\t{desc}")
+            else:
+                print(f"No match found for {seq_record.id}\t{desc}")
+                missing_elements.append(f"{seq_record.id}\t{desc}")
+    if total > 0:
+        print(f"Found {count} out of {total} FASTA accessions ({count/total*100:.2f}%) in the assembly file.")
+    if len(final_list) == 0:
+        print("No assemblies found")
+    else:
+        with open(args.output, 'w') as infile:
+            for line in final_list:
+                infile.write(line + "\n")
+        infile.close()
+    if len(missing_elements) > 0:
+        dirname = os.path.dirname(args.output)
+        with open(os.path.join(dirname, "missing_assemblies.txt"), 'w') as missing_file:
+            for line in missing_elements:
+                missing_file.write(line + "\n")
+        missing_file.close()
+
+
+
+
+if __name__ == "__main__":
+    main()
diff --git a/bin/make_gcf_mapping.awk b/bin/make_gcf_mapping.awk
new file mode 100755
index 0000000..35a8d17
--- /dev/null
+++ b/bin/make_gcf_mapping.awk
@@ -0,0 +1,17 @@
+#!/usr/bin/awk -f
+
+# Check if the line starts with ">"
+/^>/ {
+    line = substr($0, 2);
+
+    # Find the first two spaces to separate chromosome and assembly from description
+    firstSpace = index(line, " ");
+    secondSpace = index(substr(line, firstSpace + 1), " ") + firstSpace;
+
+    # Extract chromosome, assembly, and description
+    chromosome = substr(line, 1, firstSpace-1);
+    assembly = substr(line, firstSpace+1, secondSpace-firstSpace-1);
+    description = substr(line, secondSpace+1);
+
+    print chromosome "\t" assembly "\t" description;
+}
diff --git a/bin/make_gcf_mapping.sh b/bin/make_gcf_mapping.sh
new file mode 100755
index 0000000..b966931
--- /dev/null
+++ b/bin/make_gcf_mapping.sh
@@ -0,0 +1,116 @@
+#!/usr/bin/env bash
+##############################################################################################
+# Copyright 2022 The Johns Hopkins University Applied Physics Laboratory LLC
+# All rights reserved.
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this
+# software and associated documentation files (the "Software"), to deal in the Software
+# without restriction, including without limitation the rights to use, copy, modify,
+# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+# OR OTHER DEALINGS IN THE SOFTWARE.
+#
+#    FUNCTIONS
+usage()
+{
+cat << EOF
+
+Help message for make_gcf_mapping.sh:
+
+DESCRIPTION:
+    Optionally merge assembly refseq on a gcf assembly pull FASTA file. FASTA file must be in format >chr GCF description (3 columns) where first 2 are a space and the third is anything after
+
+NOTES:
+
+
+OPTIONS:
+    -h    help    show this message
+    -i    FILE    a fasta file of gcf references pulled. should be in chr \s gcf \s description format
+    -a    FILE    a file of the ncbi assembly refseq. Optional. If optional, the 3rd column of the mapping file will be pasted to col 4
+    -o    DIR        full path to output  file
+
+USAGE:
+
+bash make_gcf_mapping.sh -i ~/Downloads/combined_genomes.fasta -o test_output/test.tsv -a test_output/get/assembly_summary_refseq.txt
+
+EOF
+}
+
+
+
+
+#    ARGUMENTS
+# parse args
+while getopts "hi:a:o:" OPTION
+do
+    case $OPTION in
+        h) usage; exit 1 ;;
+        i) fasta=$OPTARG ;;
+        a) assembly=$OPTARG ;;
+        o) output=$OPTARG ;;
+        ?) usage; exit ;;
+    esac
+done
+
+
+# check args
+if [[ -z $fasta ]] || [[ -z $output ]]
+then
+    usage
+    exit 1
+fi
+# print something if assembly is empty
+if [[ -z $assembly ]]
+then
+    echo "No assembly file provided. Will only output gcf \t chr \t description \t description (repeat)"
+fi
+
+
+grep '^>' $fasta   | awk -F ' ' '{
+    # Initialize the description variable
+    description = "";
+
+    # Concatenate fields to form the description, ignoring fields after the first comma
+    for (i = 2; i <= NF; i++) {
+        if (index($i, ",") != 0) {
+            # If theres a comma in the current field, split it and take the part before the comma
+            split($i, parts, ",");
+            description = description (description == "" ? "" : " ") parts[1];
+            break;  # Stop processing further fields after encountering the first comma
+        } else {
+            description = description (description == "" ? "" : " ") $i;
+        }
+    }
+    # Remove the ">" from the first column
+    gsub("^>", "", $1);
+    print $1 "\t" description;
+
+}' > $output.tmp
+
+echo "Assembly references of names completed from FASTA file. Moving on to adding the organism name"
+
+if [[  -z $assembly ]]
+then
+    paste $output.tmp <(awk -F '\t' '{print $3}' $output.tmp) > $output
+else
+    # use awk, import both output.tmp and assembly, and print the 1st, 2nd, 3rd, and map
+    awk -F '\t' 'BEGIN {FS=OFS="\t"}
+    NR==FNR {map[$1]=$8; next}
+    {
+        if ($2 == "") {
+            print $0, $3  # If assembly is empty, duplicate the third column
+        } else {
+            print $0, (map[$2] != "" ? map[$2] : $3)  # Otherwise, use mapped value if present, else default to third column
+        }
+    }' $assembly $output.tmp > $output
+fi
+
+
+
+
+
diff --git a/bin/map_nuccore_gcf.py b/bin/map_nuccore_gcf.py
new file mode 100755
index 0000000..27da037
--- /dev/null
+++ b/bin/map_nuccore_gcf.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+
+from Bio import Entrez
+import argparse
+import time
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Fetch assembly accessions for a list of nuccore accessions.")
+    parser.add_argument("-i", "--input", required=True, help="Path to a file containing nuccore accessions, one per line.")
+    parser.add_argument("-r", "--restart", action='store_true', help="Email for entrez")
+    parser.add_argument("-o", "--file_out", required=True, help="Output file to append the assembly accessions.")
+    parser.add_argument("-e", "--email", required=False, help="Email for entrez")
+    parser.add_argument("-b", "--batch_count", required=False, default=100, help="Batch count for fetching assembly accessions. Default is 100.")
+    parser.add_argument("-f", "--failed", default="failed_accessions.txt", help="File to save failed accessions.")
+    return parser.parse_args()
+
+def read_processed_accessions(output_file):
+    processed = set()
+    try:
+        with open(output_file, 'r') as f:
+            for line in f:
+                parts = line.strip().split("\t")
+                if parts:
+                    processed.add(parts[0])
+    except FileNotFoundError:
+        pass  # File doesn't exist, which is fine for the first run
+    return processed
+
+def fetch_assembly_accession(nuccore_id, retries=2):
+    try:
+        handle = Entrez.elink(dbfrom="nuccore", db="assembly", id=nuccore_id, retmax=1)
+        link_record = Entrez.read(handle)
+        handle.close()
+        if link_record[0]["LinkSetDb"]:
+            assembly_id = link_record[0]["LinkSetDb"][0]["Link"][0]["Id"]
+            handle = Entrez.esummary(db="assembly", id=assembly_id)
+            summary_record = Entrez.read(handle)
+            handle.close()
+            return summary_record["DocumentSummarySet"]["DocumentSummary"][0]["AssemblyAccession"]
+    except Exception as e:
+        if retries > 0:
+            print(f"Failed to fetch assembly accession for {nuccore_id}. Retrying {retries} more times.")
+            time.sleep(1)  # Wait for 5 seconds before retrying
+            return fetch_assembly_accession(nuccore_id, retries-1)
+    return None
+
+def main():
+    args = parse_args()
+    if args.email:
+        Entrez.email = args.email
+    processed_accessions = read_processed_accessions(args.file_out)
+    failed_accessions = []
+
+    with open(args.input, 'r') as infile:
+        for line in infile:
+            nuccore_id = line.strip()
+            print(nuccore_id)
+            if nuccore_id and nuccore_id not in processed_accessions:
+                assembly_accession = fetch_assembly_accession(nuccore_id)
+                print(assembly_accession)
+                if assembly_accession:
+                    mode="w" if args.restart else "a"
+                    with open(args.file_out, mode) as outfile:
+                        print(f"{nuccore_id}\t{assembly_accession}\n")
+                        outfile.write(f"{nuccore_id}\t{assembly_accession}\n")
+                else:
+                    failed_accessions.append(nuccore_id)
+    if failed_accessions:
+        with open(args.failed, 'w') as failed_file:
+            for acc in failed_accessions:
+                failed_file.write(acc + "\n")
+
+if __name__ == "__main__":
+    main()
diff --git a/bin/merge_assemblies_conf.py b/bin/merge_assemblies_conf.py
index 5586475..032a05c 100755
--- a/bin/merge_assemblies_conf.py
+++ b/bin/merge_assemblies_conf.py
@@ -44,11 +44,12 @@ def calculate_stdev(depths, ref_size, weighted_mean_depth):
     return math.sqrt(variance)
 
 def coverage_thresholds(depths, thresholds, ref_size):
-    coverage_counts = {threshold: 0 for threshold in thresholds}
+    coverage_counts = [0] * len(thresholds)
+    print(depths, thresholds, ref_size)
     for depth, size in zip(depths, ref_size):
-        for threshold in thresholds:
+        for i, threshold in enumerate(thresholds):
             if depth >= threshold:
-                coverage_counts[threshold] += size
+                coverage_counts[i] += size
     return coverage_counts
 
 def main(argv=None):
@@ -64,12 +65,15 @@ def main(argv=None):
         file.seek(0)  # Reset file pointer to the beginning
         reader = csv.DictReader(file, delimiter='\t')  # Reinitialize reader
         for row in reader:
-            name = row['Name']
+            name = row['Organism']
             data[name]['Mean Depth'].append(float(row['Mean Depth']))
             data[name]['Average Coverage'].append(float(row['Average Coverage']))
             data[name]['Ref Size'].append(float(row['Ref Size']))
             data[name]['Total Reads Aligned'] += float(row['Total Reads Aligned'])
             # data[name]['Individual Reads Aligned'].append(float(row['Individual Reads Aligned']))
+            # split 1:10:50:100:300X Cov. column into list on colons, setting thresholds and making it as a dict
+            data[name]['Coverage Thresholds'] = {int(t): float(p) for t, p in zip([1, 10, 50, 100, 300], row['1:10:50:100:300X Cov.'].split(':'))}
+
     file.close()
     # Process data
     aggregated = []
@@ -82,18 +86,22 @@ def main(argv=None):
             abu_total_aligned = 0
         else:
             abu_total_aligned = values['Total Reads Aligned'] / total_reads_aligned
-        coverage_counts = coverage_thresholds(values['Mean Depth'], thresholds, values['Ref Size'])
-        coverage_percentages = [coverage_counts[t] / sum(values['Ref Size']) * 100 for t in thresholds]
+        # use the Coverage Thresholds column to find weighted threhsolds for merged row
+        cov_thresholds = values['Coverage Thresholds']
 
+        # coverage_counts =  coverage_thresholds(cov_thresholds, thresholds, values['Ref Size'])
+        # print(coverage_counts)
+        # exit()
         aggregated.append( {
-            'Name': name,
+            'Organism': name,
             'Weighted Mean Depth': mean_depth,
             'Weighted Avg Coverage': avg_coverage,
             'Total Ref Size': sum(values['Ref Size']),
             'Total Reads Aligned': values['Total Reads Aligned'],
             '% Reads Aligned': abu_total_aligned * 100,
             'Stdev': stdev,
-            '1X:10x:50x:100x:300x Cov.': f"{coverage_percentages[0]:.2f}:{coverage_percentages[1]:.2f}:{coverage_percentages[2]:.2f}:{coverage_percentages[3]:.2f}:{coverage_percentages[4]:.2f}",
+            "1X:10x:50x:100x:300x Cov.": "Disabled"
+            # '1X:10x:50x:100x:300x Cov.': f"{coverage_percentages[0]:.2f}:{coverage_percentages[1]:.2f}:{coverage_percentages[2]:.2f}:{coverage_percentages[3]:.2f}:{coverage_percentages[4]:.2f}",
         })
 
     # Output the aggregated data
diff --git a/bin/merge_assembly.awk b/bin/merge_assembly.awk
new file mode 100644
index 0000000..797d70f
--- /dev/null
+++ b/bin/merge_assembly.awk
@@ -0,0 +1,12 @@
+#!/usr/bin/awk -f
+
+# Check if the line starts with ">"
+BEGIN {FS=OFS="\t"}
+NR==FNR {map[$1]=$2; next}
+{
+    if ($2 == "") {
+        print $0, $3  # If assembly is empty, duplicate the third column
+    } else {
+        print $0, (map[$2] != "" ? map[$2] : $3)  # Otherwise, use mapped value if present, else default to third column
+    }
+}
diff --git a/bin/reheader.py b/bin/reheader.py
new file mode 100755
index 0000000..e95e4e6
--- /dev/null
+++ b/bin/reheader.py
@@ -0,0 +1,75 @@
+
+
+#!/usr/bin/env python3
+
+##############################################################################################
+# Copyright 2022 The Johns Hopkins University Applied Physics Laboratory LLC
+# All rights reserved.
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this
+# software and associated documentation files (the "Software"), to deal in the Software
+# without restriction, including without limitation the rights to use, copy, modify,
+# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+# OR OTHER DEALINGS IN THE SOFTWARE.
+#
+import argparse
+import sys
+import pysam  # Ensure pysam is installed
+
+
+def parse_args(argv=None):
+    """Define and immediately parse command line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Validate and transform a tabular samplesheet.",
+        epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv",
+    )
+    parser.add_argument(
+        "-i",
+        "--input",
+        metavar="INPUT",
+        help="INPUT fasta file",
+    )
+    parser.add_argument(
+        "-m",
+        "--mapping",
+        metavar="MAP",
+        help="MAPPING FILE",
+    )
+    return parser.parse_args(argv)
+
+def main():
+    args = parse_args()
+    input_bam = args.input
+    mapping_tsv = args.mapping
+
+    # Load the mapping from the TSV file
+    mapping = {}
+    with open(mapping_tsv, 'r') as f:
+        for line in f:
+            old, new, chr = line.strip().split('\t')
+            mapping[old] = chr
+
+    # Read the current header from the BAM file
+    bam = pysam.AlignmentFile(input_bam, "rb")
+    header_dict = bam.header.to_dict()
+
+    # Update the reference names in the header
+    for ref in header_dict['SQ']:
+        old_name = ref['SN']
+        if old_name in mapping:
+            ref['SN'] = mapping[old_name]
+    bam.close()
+    header = pysam.AlignmentHeader.from_dict(header_dict)
+    print(header)
+    # # Write the new header to a SAM file
+    # with open(new_header_sam, 'w') as f:
+    #     f.write(pysam.AlignmentHeader.from_dict(header_dict).to_string())
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/bin/reheader_fasta.sh b/bin/reheader_fasta.sh
new file mode 100755
index 0000000..f2ff8a2
--- /dev/null
+++ b/bin/reheader_fasta.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+##############################################################################################
+# Copyright 2022 The Johns Hopkins University Applied Physics Laboratory LLC
+# All rights reserved.
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this
+# software and associated documentation files (the "Software"), to deal in the Software
+# without restriction, including without limitation the rights to use, copy, modify,
+# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+# OR OTHER DEALINGS IN THE SOFTWARE.
+#
+
+usage() {
+    cat << EOF
+
+Help message for fasta_reheader.sh:
+
+DESCRIPTION:
+    Processes a FASTA file to modify its headers. In each header:
+    - Replaces the first space with a "|"
+    - Replaces all subsequent spaces with "_"
+    - Ignores everything after the first comma
+
+NOTES:
+    - This script is designed to work with FASTA files that have descriptive headers.
+
+OPTIONS:
+    -h          Show this help message
+    -i FILE     Input FASTA file
+    -o FILE     Output FASTA file where the processed headers will be written
+
+USAGE:
+    bash fasta_reheader.sh -i <input_fasta_file> -o <output_fasta_file>
+    Example: bash fasta_reheader.sh -i input.fasta -o output.fasta
+
+EOF
+}
+
+while getopts "hi:o:" OPTION
+do
+    case $OPTION in
+        h) usage; exit 0 ;;
+        i) INPUT_FASTA="$OPTARG" ;;
+        o) OUTPUT_FASTA="$OPTARG" ;;
+        ?) usage; exit 1 ;;
+    esac
+done
+
+if [ -z "$INPUT_FASTA" ] || [ -z "$OUTPUT_FASTA" ]; then
+    echo "Both input and output files must be specified."
+    usage
+    exit 1
+fi
+
+awk -F, 'BEGIN{OFS=""} /^>/{split($1, a, " "); gsub(" ", "_", $1); print a[1]"|"substr($1, length(a[1])+2)} !/^>/{print}' "$INPUT_FASTA" > "$OUTPUT_FASTA"
+
+echo "Processed FASTA file saved to $OUTPUT_FASTA"
diff --git a/bin/sam_to_confidence.py b/bin/sam_to_confidence.py
index 76d1247..e55b7ca 100755
--- a/bin/sam_to_confidence.py
+++ b/bin/sam_to_confidence.py
@@ -54,6 +54,15 @@ def parse_args(argv=None):
         required=False,
         help="Output accession to name tsv. 2 columns minimum. must be in 1 col for accession second is name",
     )
+    parser.add_argument(
+        "-s",
+        "--splitidx",
+        metavar="SPLITIDX",
+        default=None,
+        required=False,
+        type=int,
+        help="If using -m mapping, split the index of the chr and get element. Split on | by default. 0 based index.",
+    )
     parser.add_argument(
         "-o",
         "--file_out",
@@ -150,9 +159,10 @@ def fm(x):
 # Calculate statistics and output
 
 
-def output_statistics(depth, total_positions, coverage, count, sumsq, ireads, ilen, total_reads_aligned, FILE_OUT, mapping):
+def output_statistics(depth, total_positions, coverage, count, sumsq, ireads, ilen, total_reads_aligned, FILE_OUT, mapping, splitidx):
     headers = ["Accession",
-        "Name",
+        "Organism",
+        "Full Name",
         "Mean Depth",
         "Average Coverage",
         "Ref Size",
@@ -181,11 +191,16 @@ def output_statistics(depth, total_positions, coverage, count, sumsq, ireads, il
                     stdev = math.sqrt(
                         abs(sumsq[ref] / count[ref] - (total_depth / count[ref]) ** 2))
                     abu_total_aligned = ireads[ref] / total_reads_aligned
-                    if ref in mapping:
-                        name = mapping[ref]
+                    check_idx = ref
+                    if splitidx or splitidx == 0:
+                        check_idx = check_idx.split("|")[splitidx]
+                    if check_idx in mapping:
+                        organism = mapping[check_idx]['organism']
+                        fullname = mapping[check_idx]['fullname']
                     else:
-                        name = ref
-                    outstring = f"{ref}\t{name}\t{fm(avgDepth)}\t{fm(avg_coverage)}\t{ilen[ref]}\t{ireads[ref]}\t{fm(100*(ireads[ref]/total_reads_aligned))}\t{fm(stdev)}\t{fm(abu_total_aligned)}\t{':'.join([fm(x) for x in xCov])}"
+                        organism = "N/A"
+                        fullname = "N/A"
+                    outstring = f"{check_idx}\t{organism}\t{fullname}\t{fm(avgDepth)}\t{fm(avg_coverage)}\t{ilen[ref]}\t{ireads[ref]}\t{fm(100*(ireads[ref]/total_reads_aligned))}\t{fm(stdev)}\t{fm(abu_total_aligned)}\t{':'.join([fm(x) for x in xCov])}"
                     f.write(outstring+"\n")
 
 
@@ -202,7 +217,15 @@ def main(argv=None):
         with open(args.mapping, 'r') as f:
             for line in f:
                 cols = line.strip().split("\t")
-                mapping[cols[0]] = cols[1]
+                idx = cols[0]
+                organism = "N/A"
+                fullname = "N/A"
+                obj = dict(organism=organism, fullname=fullname)
+                if len(cols) > 2:
+                    obj['organism'] = cols[2]
+                if len(cols) > 3:
+                    obj['fullname'] = cols[3]
+                mapping[idx] = obj
         f.close()
     depth, total_positions = load_depth_file(DEPTHFILE)
 
@@ -215,7 +238,7 @@ def ensure_bam_index(bam_path):
     coverage, count, sumsq, ireads, ilen, total_reads_aligned = process_bam_file(
         BAMFILE, depth, total_positions)
     output_statistics(depth, total_positions, coverage, count,
-        sumsq, ireads, ilen, total_reads_aligned, args.file_out, mapping)
+        sumsq, ireads, ilen, total_reads_aligned, args.file_out, mapping, args.splitidx)
 
 
 if __name__ == "__main__":
diff --git a/bin/samtools_coverage.sh b/bin/samtools_coverage.sh
new file mode 100755
index 0000000..181effa
--- /dev/null
+++ b/bin/samtools_coverage.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+
+##############################################################################################
+# Copyright 2022 The Johns Hopkins University Applied Physics Laboratory LLC
+# All rights reserved.
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this
+# software and associated documentation files (the "Software"), to deal in the Software
+# without restriction, including without limitation the rights to use, copy, modify,
+# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+# OR OTHER DEALINGS IN THE SOFTWARE.
+#
+
+
+usage() {
+    cat << EOF
+
+Usage: $0 [options]
+
+This script runs samtools coverage on a BAM file and replaces reference names
+in the output according to a provided mapping file.
+
+OPTIONS:
+    -h          Show this help message
+    -b FILE     Input BAM file
+    -m FILE     Reference name mapping file (original and new names, tab-separated)
+    -a ARGS     Extra args to attach to samtools coverage
+    -o FILE     Output file for the modified samtools coverage results
+
+The mapping file should have two columns:
+1. Original reference name (as in the BAM file)
+2. New reference name
+
+Each line in the mapping file corresponds to one reference name mapping.
+
+Example:
+bash $0 -b input.bam -m mapping.txt -o coverage_output.txt
+
+EOF
+}
+
+# Default values for arguments
+bam_file=""
+mapping_file=""
+output_file=""
+args=""
+# Parse command-line arguments
+while getopts "hb:m:o:a:" opt; do
+    case ${opt} in
+        h )
+            usage
+            exit 0
+            ;;
+        b )
+            bam_file=${OPTARG}
+            ;;
+        m )
+            mapping_file=${OPTARG}
+            ;;
+        o )
+            output_file=${OPTARG}
+            ;;
+        a )
+            args=${OPTARG}
+            ;;
+        \? )
+            usage
+            exit 1
+            ;;
+    esac
+done
+
+# Check for mandatory options
+if [ -z "${bam_file}" ] || [ -z "${mapping_file}" ] || [ -z "${output_file}" ]; then
+    echo "Error: Missing required arguments."
+    usage
+    exit 1
+fi
+
+# Check if samtools is installed
+if ! command -v samtools &> /dev/null; then
+    echo "Error: samtools could not be found. Please install samtools and try again."
+    exit 1
+fi
+# Run samtools coverage and process the output
+samtools coverage $args "${bam_file}"  | awk -v mapFile="${mapping_file}" '
+    BEGIN {
+        FS=OFS="\t"
+        # Load the mapping from the mapping file
+        while ((getline < mapFile) > 0) {
+            map[$1] = $2;
+            # print $1,$2
+        }
+    }
+    /^[^>]/ {
+        # Replace the reference name if it exists in the mapping
+        # split on space, get first index
+        split($1, a, " ");
+
+        if (a[1] in map) {
+            $1 =a[1]" "map[a[1]];
+        }
+    } {print}
+' > "${output_file}"
+
+echo "Modified samtools coverage output saved to ${output_file}"
diff --git a/conf/modules.config b/conf/modules.config
index 5e1deec..3313bdf 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -72,9 +72,7 @@ process {
     withName: SAMTOOLS_VIEW{
         ext.prefix = { "${meta.id}.remove" }
     }
-    withName: SAMTOOLS_SORT{
-        ext.prefix = { "${meta.id}.sort" }
-    }
+
     withName: BCFTOOLS_CONSENSUS{
         errorStrategy = 'ignore'
         ext.prefix = { "${meta.id}.consensus" }
diff --git a/nextflow.config b/nextflow.config
index 608c0c2..41f0b60 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -25,14 +25,19 @@ params {
     remoteblast = null
     filter = null
     remove_taxids = null
-    skip_features = null
+    get_features = null
     fuzzy = null
     refresh_download = null
     skip_krona = null
     k2_minimum_hit_groups = null
     reference_fasta = null
+    organisms = null
+    organisms_file = null
     skip_kraken2 = null
-    minmapq = null
+    get_variants = null
+    reference_assembly = null
+    minmapq = 12
+    denovo_assembly = null
     skip_realignment = null
     skip_stats = null
     skip_multiqc = null
diff --git a/nextflow_schema.json b/nextflow_schema.json
index d9fa306..6028121 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -150,17 +150,37 @@
                     "help_text": "",
                     "fa_icon": "fas fa-file-csv"
                 },
-                "refresh_download": {
+                "organisms": {
+                    "type": "string",
+                    "hint": "Options: Comma-separated taxids or organism names to match to ncbi assembly info. See --assembly for more info. If using organism names, make sure to set it to fuzzy-matching with --fuzzy ",
+                    "default": null,
+                    "description": "Either a string of taxids separated by a space, or organisms names with '' enclosure, separated by space for each enclosed name with ''",
+                    "fa_icon": "fas fa-file-signature"
+                },
+                "organisms_file": {
+                    "type": "string",
+                    "hint": "Options: [string or single column list of taxids or organism names]. If using organism names, make sure to set it to fuzzy-matching with --fuzzy ",
+                    "default": null,
+                    "description": "Either a file or string of taxids separated by a space, or organisms names with '' enclosure",
+                    "fa_icon": "fas fa-file-signature"
+                },
+                "fuzzy": {
                     "type": "boolean",
-                    "description": "Redownload all references from NCBI",
+                    "description": "Fuzzy matching for organism names",
                     "fa_icon": "fas fa-file-signature",
                     "default": false
                 },
-                "default_download": {
+                "minmapq": {
+                    "type": "number",
+                    "default": 5,
+                    "description": "Minimum MapQ Score for alignment",
+                    "fa_icon": "fas fa-file-signature"
+                },
+                "refresh_download": {
                     "type": "boolean",
-                    "description": "Download the default assembly file from NCBI using ncbi-genome-download tool",
+                    "description": "Redownload all references from NCBI",
                     "fa_icon": "fas fa-file-signature",
-                    "default": true
+                    "default": false
                 },
                 "genome": {
                     "type": "string",
@@ -223,21 +243,38 @@
                     "fa_icon": "fas fa-file-signature",
                     "default": false
                 },
+                "skip_kraken2": {
+                    "type": "boolean",
+                    "description": "Skip Kraken2. Will not work with \"unknown taxa\" mode. You must provide a reference FASTA if this is toggled on.",
+                    "fa_icon": "fas fa-file-signature",
+                    "default": false
+                },
                 "skip_krona": {
                     "type": "boolean",
                     "description": "Skip Krona Plots",
                     "fa_icon": "fas fa-file-signature",
                     "default": false
                 },
-                "skip_assembly": {
+                "denovo_assembly": {
                     "type": "boolean",
-                    "description": "Skip De Novo Assembly",
+                    "description": "Perform De Novo Assembly",
                     "fa_icon": "fas fa-file-signature",
                     "default": true
                 },
-                "skip_features": {
+                "reference_assembly": {
+                    "type": "boolean",
+                    "description": "Perform Reference-Based Assembly. Performs variant analysis by default (mandatory)",
+                    "fa_icon": "fas fa-file-signature",
+                    "default": true
+                },
+                "get_features": {
+                    "type": "boolean",
+                    "description": "Start generating annotations from alignments that take place. Requires LOTS of RAM for large data files/genomes",
+                    "fa_icon": "fas fa-file-signature"
+                },
+                "get_variants": {
                     "type": "boolean",
-                    "description": "Skip generating annotations from alignments that take place",
+                    "description": "Start generating variant call analysis relative to aligned reads to references",
                     "fa_icon": "fas fa-file-signature"
                 },
                 "skip_multiqc": {
diff --git a/params.json b/params.json
index e8587f4..3bbcca7 100644
--- a/params.json
+++ b/params.json
@@ -6,22 +6,23 @@
     "top_hits_count": 3,
     "demux": true,
     "assembly": "examples/assembly_summary_refseq.txt",
-    "skip_assembly": true,
+    "reference_assembly": false,
     "max_memory": "10GB",
     "max_cpus": 3,
     "low_memory": false,
     "taxtab": "default",
     "remove_taxids": "9606",
     "profile": "docker",
-    "top_per_taxa": "10239:20:S 2:20:S",
     "blastdb": null,
     "remoteblast": false,
     "skip_fastp": false,
-    "skip_features": false,
+    "get_features": false,
+    "get_variants": false,
     "fuzzy": false,
+    "minmapq": 5,
     "refresh_download": false,
     "skip_stats": false,
-    "skip_plots": true,
+    "skip_plots": false,
     "skip_consensus": false,
     "skip_variants": false,
     "subsample": null
diff --git a/subworkflows/local/alignment.nf b/subworkflows/local/alignment.nf
index d1283b6..1909743 100644
--- a/subworkflows/local/alignment.nf
+++ b/subworkflows/local/alignment.nf
@@ -11,7 +11,7 @@ include { SAMTOOLS_DEPTH } from '../../modules/nf-core/samtools/depth/main'
 include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main'
 include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main'
 include { SAMTOOLS_COVERAGE } from '../../modules/nf-core/samtools/coverage/main'
-include { SAMTOOLS_COVERAGE as SAMTOOLS_HIST_COVERAGE }  from '../../modules/nf-core/samtools/coverage/main'
+include { SAMTOOLS_HIST_COVERAGE  }  from '../../modules/local/samtools_hist_coverage'
 include { BCFTOOLS_CONSENSUS } from '../../modules/nf-core/bcftools/consensus/main'
 include { BCFTOOLS_MPILEUP as BCFTOOLS_MPILEUP_ILLUMINA } from '../../modules/nf-core/bcftools/mpileup/main'
 include { BCFTOOLS_INDEX  } from '../../modules/nf-core/bcftools/index/main'
@@ -23,7 +23,6 @@ include { BOWTIE2_ALIGN } from '../../modules/nf-core/bowtie2/align/main'
 include { BOWTIE2_BUILD } from '../../modules/nf-core/bowtie2/build/main'
 include { RSEQC_BAMSTAT } from '../../modules/nf-core/rseqc/bamstat/main'
 include { BEDTOOLS_DEPTHCOVERAGE } from '../../modules/local/bedtools_coverage'
-include { SAMTOOLS_SORT } from '../../modules/local/samtools_sort'
 
 workflow ALIGNMENT {
     take:
@@ -55,18 +54,18 @@ workflow ALIGNMENT {
 
 
     SAMTOOLS_FAIDX(
-        fastq_reads.map{ m, report, fastq, fasta, bed -> return [ m, fasta ] },
-        fastq_reads.map{ m, report, fastq, fasta, bed -> return [ m, [] ] },
+        fastq_reads.map{ m, fastq, fasta, bed, map -> return [ m, fasta ] },
+        fastq_reads.map{ m, fastq, fasta, bed, map -> return [ m, [] ] },
     )
 
     BOWTIE2_BUILD(
-        ch_aligners.shortreads.map{  m, report, fastq, fasta, bed  ->
+        ch_aligners.shortreads.map{  m, fastq, fasta, bed, map  ->
             return [ m, fasta  ]
         }
 
     )
     BOWTIE2_ALIGN(
-        ch_aligners.shortreads.map{  m, report, fastq, fasta, bed ->
+        ch_aligners.shortreads.map{  m, fastq, fasta, bed, map ->
             return [ m, fastq ]
         },
         BOWTIE2_BUILD.out.index,
@@ -78,8 +77,8 @@ workflow ALIGNMENT {
     )
 
     MINIMAP2_ALIGN(
-        ch_aligners.longreads.map{ m, report, fastq, fasta, bed -> [ m, fastq ] },
-        ch_aligners.longreads.map{ meta, report, fastq, fasta, bed -> fasta },
+        ch_aligners.longreads.map{ m, fastq, fasta, bed, map -> [ m, fastq ] },
+        ch_aligners.longreads.map{ m, fastq, fasta, bed, map -> fasta },
         true,
         true,
         true
@@ -89,53 +88,54 @@ workflow ALIGNMENT {
     )
 
     nanopore_alignments.mix(illumina_alignments).map{
-        m, report, fastq, fasta, bed, bam -> [ m, bam ]
+        m, fastq, fasta, bed, map, bam -> [ m, bam ]
     }.set { collected_bams }
 
-    SAMTOOLS_SORT(
-        collected_bams
-    )
+
 
 
     SAMTOOLS_INDEX(
-        SAMTOOLS_SORT.out.bam
+        collected_bams
     )
 
-    sorted_bams = SAMTOOLS_SORT.out.bam
+    sorted_bams = collected_bams
+
     SAMTOOLS_DEPTH(
         sorted_bams
     )
 
-    SAMTOOLS_SORT.out.bam.join(SAMTOOLS_INDEX.out.bai).set{ sorted_bams_with_index }
+    collected_bams.join(SAMTOOLS_INDEX.out.bai).set{ sorted_bams_with_index }
 
     SAMTOOLS_COVERAGE(
         sorted_bams_with_index
     )
+    gcf_with_bam = sorted_bams.join(fastq_reads.map{ m, fastq, fasta, bed, map -> return [m, map] })
     SAMTOOLS_HIST_COVERAGE (
-        sorted_bams_with_index
+        gcf_with_bam
     )
 
-    sorted_bams_with_index.join(fastq_reads.map{ m, report, fastq, fasta, bed -> return [m, fasta] }).set{ sorted_bams_with_index_fasta }
+    sorted_bams_with_index.join(fastq_reads.map{ m, fastq, fasta, bed, map -> return [m, fasta] }).set{ sorted_bams_with_index_fasta }
 
 
 
-    if (!params.skip_features){
+    if (params.get_features){
 
         ch_merged_bed = sorted_bams.join(
-            fastq_reads.map{ m, report, fastq, fasta, bed -> return [m, bed] }
+            fastq_reads.map{ m, fastq, fasta, bed, map -> return [m, bed] }
         ).map{
             meta, bam, bed -> [meta, bed, bam]
+        }.filter{
+            it[1] != null
         }
         BEDTOOLS_DEPTHCOVERAGE(
             ch_merged_bed
         )
-        // ch_coverage = BEDTOOLS_DEPTHCOVERAGE.out.coverage
     }
 
 
 
     // branch out the samtools_sort output to nanopore and illumina
-    ch_sorted_bam_split = sorted_bams.join(fastq_reads.map{ m, report, fastq, fasta, bed -> return [m, fasta] })
+    ch_sorted_bam_split = sorted_bams.join(fastq_reads.map{ m, fastq, fasta, bed, map -> return [m, fasta] })
 
     ch_sorted_bam_split.branch{
             longreads: it[0].platform =~ 'OXFORD'
@@ -143,7 +143,7 @@ workflow ALIGNMENT {
     }.set { ch_sorted_bam_split }
 
 
-    if (!params.skip_variants){
+    if (params.get_variants || params.reference_assembly){
         BCFTOOLS_MPILEUP_OXFORD(
             ch_sorted_bam_split.longreads.map{ m, bam, fasta -> [m, bam] },
             ch_sorted_bam_split.longreads.map{ m, bam, fasta -> fasta },
@@ -194,14 +194,13 @@ workflow ALIGNMENT {
         //     )
         //     ch_stats = BCFTOOLS_STATS.out.stats
         // }
-        if (!params.skip_consensus){
+        if (params.reference_assembly){
             BCFTOOLS_CONSENSUS(
                 ch_merged_mpileup
             )
             ch_fasta = BCFTOOLS_CONSENSUS.out.fasta
         }
     }
-
     RSEQC_BAMSTAT(
         sorted_bams
     )
diff --git a/workflows/taxtriage.nf b/workflows/taxtriage.nf
index 55b5416..bec6882 100644
--- a/workflows/taxtriage.nf
+++ b/workflows/taxtriage.nf
@@ -76,12 +76,24 @@ if (params.assembly && ch_assembly_txt.isEmpty()) {
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
+// // // //
+// // // // MODULE: MultiQC
+// // // //
+workflow_summary    = WorkflowTaxtriage.paramsSummaryMultiqc(workflow, summary_params)
+ch_workflow_summary = Channel.value(workflow_summary)
 ch_multiqc_config        = file("$projectDir/assets/multiqc_config.yml", checkIfExists: true)
 ch_multiqc_css       = file("$projectDir/assets/mqc.css", checkIfExists: true)
 ch_multiqc_custom_config   = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty()
 ch_multiqc_logo            = params.multiqc_logo   ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty()
+
+ch_multiqc_files = Channel.empty()
+ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
+ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([]))
+ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_config))
+ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_css))
 ch_merged_table_config        = Channel.fromPath("$projectDir/assets/table_explanation_mqc.yml", checkIfExists: true)
-ch_alignment_stats = Channel.empty()
+ch_multiqc_files = ch_multiqc_files.mix(ch_merged_table_config.collect().ifEmpty([]))
+
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     IMPORT LOCAL MODULES/SUBWORKFLOWS
@@ -145,6 +157,10 @@ include { DOWNLOAD_ASSEMBLY } from '../modules/local/download_assembly'
 include { NCBIGENOMEDOWNLOAD_FEATURES } from '../modules/local/get_feature_tables'
 include { FEATURES_TO_BED } from '../modules/local/convert_features_to_bed'
 include { CONFIDENCE_MERGE } from '../modules/local/merge_confidence_contigs'
+include { MAP_GCF } from '../modules/local/map_gcfs'
+include {  FEATURES_DOWNLOAD } from '../modules/local/download_features'
+include {  REFERENCE_REHEADER } from '../modules/local/reheader'
+include { MAP_LOCAL_ASSEMBLY_TO_FASTA } from '../modules/local/map_assembly_to_fasta'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -222,17 +238,9 @@ workflow TAXTRIAGE {
     ch_accession_mapping  = Channel.empty()
     ch_empty_file = file("$projectDir/assets/NO_FILE")
 
-    ch_hit_to_kraken_report = Channel.empty()
-    // // // //
-    // // // // MODULE: MultiQC
-    // // // //
-    workflow_summary    = WorkflowTaxtriage.paramsSummaryMultiqc(workflow, summary_params)
-    ch_workflow_summary = Channel.value(workflow_summary)
-    ch_multiqc_files = Channel.empty()
-    ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
-    ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([]))
-    ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_config))
-    ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_css))
+
+
+
     // // // //
     // // // //
     // // // //
@@ -270,21 +278,28 @@ workflow TAXTRIAGE {
     // // // // MODULE: Run FastQC or Porechop, Trimgalore
     // // //
     ch_porechop_out = Channel.empty()
+    ch_fastp_reads = Channel.empty()
+    ch_fastp_html = Channel.empty()
+
     if (params.trim) {
         nontrimmed_reads = ch_reads.filter { !it[0].trim }
         TRIMGALORE(
             ch_reads.filter { it[0].platform == 'ILLUMINA' && it[0].trim }
         )
+
+        ch_multiqc_files = ch_multiqc_files.mix(TRIMGALORE.out.reads.collect { it[1] }.ifEmpty([]))
+
         PORECHOP(
             ch_reads.filter { it[0].platform == 'OXFORD' && it[0].trim  }
         )
         ch_porechop_out  = PORECHOP.out.reads
-
         trimmed_reads = TRIMGALORE.out.reads.mix(PORECHOP.out.reads)
         ch_reads = nontrimmed_reads.mix(trimmed_reads)
+        ch_multiqc_files = ch_multiqc_files.mix(ch_porechop_out.collect { it[1] }.ifEmpty([]))
+        ch_multiqc_files = ch_multiqc_files.mix(ch_fastp_html.collect { it[1] }.ifEmpty([]))
+
     }
-    ch_fastp_reads = Channel.empty()
-    ch_fastp_html = Channel.empty()
+
     if (!params.skip_fastp) {
         FASTP(
             ch_reads,
@@ -312,141 +327,194 @@ workflow TAXTRIAGE {
         NANOPLOT(
             ch_reads.filter { it[0].platform  =~ /(?i)OXFORD/ }
         )
+        ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect { it[1] }.ifEmpty([]))
+        ch_multiqc_files = ch_multiqc_files.mix(NANOPLOT.out.txt.collect { it[1] }.ifEmpty([]))
 
     }
+    ch_filtered_reads = ch_reads
 
-    // // // // // //
-    // // // // // // MODULE: Run Kraken2
-    // // // // // //
-    KRAKEN2_KRAKEN2(
-        ch_reads,
-        ch_db,
-        true,
-        true
-    )
+    if (!params.skip_kraken2){
+        // // // // // //
+        // // // // // // MODULE: Run Kraken2
+        // // // // // //
+        KRAKEN2_KRAKEN2(
+            ch_reads,
+            ch_db,
+            true,
+            true
+        )
 
-    ch_kraken2_report = KRAKEN2_KRAKEN2.out.report
+        ch_kraken2_report = KRAKEN2_KRAKEN2.out.report
 
-    KREPORT_TO_KRONATXT(
-        ch_kraken2_report
-    )
-    ch_krona_txt = KREPORT_TO_KRONATXT.out.txt
-    ch_combined = ch_krona_txt
-                .map{ it[1] }        // Get the file path
-                .collect()            // Collect all file parts into a list
-                .map { files ->
-                    // Join the files with single quotes and space
-                    String joinedFiles = files.collect { "'$it'" }.join(' ')
-                    [[id:'combined_krona_kreports'], joinedFiles]  // Combine with new ID
-                }
-
-    KRONA_KTIMPORTTEXT(
-        ch_combined
-    )
+        KREPORT_TO_KRONATXT(
+            ch_kraken2_report
+        )
+        ch_krona_txt = KREPORT_TO_KRONATXT.out.txt
+        ch_combined = ch_krona_txt
+                    .map{ it[1] }        // Get the file path
+                    .collect()            // Collect all file parts into a list
+                    .map { files ->
+                        // Join the files with single quotes and space
+                        String joinedFiles = files.collect { "'$it'" }.join(' ')
+                        [[id:'combined_krona_kreports'], joinedFiles]  // Combine with new ID
+                    }
+
+        KRONA_KTIMPORTTEXT(
+            ch_combined
+        )
 
-    if (params.remove_taxids) {
-        remove_input = ch_kraken2_report.map {
-            meta, report -> [
-                meta, report, params.remove_taxids
-            ]
+        if (params.remove_taxids) {
+            remove_input = ch_kraken2_report.map {
+                meta, report -> [
+                    meta, report, params.remove_taxids
+                ]
+            }
+            REMOVETAXIDSCLASSIFICATION(
+                remove_input
+            )
+            ch_kraken2_report = REMOVETAXIDSCLASSIFICATION.out.report
         }
-        REMOVETAXIDSCLASSIFICATION(
-            remove_input
+
+        TOP_HITS(
+            ch_kraken2_report
+        )
+        MERGEDKRAKENREPORT(
+            TOP_HITS.out.krakenreport.map { meta, file ->  file }.collect()
+        )
+
+        FILTERKRAKEN(
+            MERGEDKRAKENREPORT.out.krakenreport
         )
-        ch_kraken2_report = REMOVETAXIDSCLASSIFICATION.out.report
+        ch_filtered_reads = KRAKEN2_KRAKEN2.out.classified_reads_fastq.map { m, r-> [m, r.findAll { it =~ /.*\.classified.*(fq|fastq)(\.gz)?/  }] }
+
+        if (params.fuzzy){
+            ch_organisms = TOP_HITS.out.names
+        } else {
+            ch_organisms = TOP_HITS.out.taxids
+        }
+
+        ch_multiqc_files = ch_multiqc_files.mix(MERGEDKRAKENREPORT.out.krakenreport.collect().ifEmpty([]))
+        ch_multiqc_files = ch_multiqc_files.mix(ch_kraken2_report.collect { it[1] }.ifEmpty([]))
+        ch_multiqc_files = ch_multiqc_files.mix(FILTERKRAKEN.out.reports.collect().ifEmpty([]))
+        ch_multiqc_files = ch_multiqc_files.mix(TOP_HITS.out.krakenreport.collect { it[1] }.ifEmpty([]))
+
+    } else if (params.organisms_file){
+        // check if params.organisms is a file or a string
+        ch_organisms = Channel.fromPath(params.organisms_file, checkIfExists: true)
+    } else if (params.organisms) {
+        ch_organisms = Channel.from(params.organisms)
     }
 
-    TOP_HITS(
-        ch_kraken2_report
-    )
-    MERGEDKRAKENREPORT(
-        TOP_HITS.out.krakenreport.map { meta, file ->  file }.collect()
-    )
 
-    FILTERKRAKEN(
-        MERGEDKRAKENREPORT.out.krakenreport
-    )
-    ch_filtered_reads = KRAKEN2_KRAKEN2.out.classified_reads_fastq.map { m, r-> [m, r.findAll { it =~ /.*\.classified.*(fq|fastq)(\.gz)?/  }] }
-    ch_hit_to_kraken_report = TOP_HITS.out.tops.join(ch_filtered_reads)
     ch_accessions = Channel.empty()
     ch_bedfiles = Channel.empty()
-    ch_hit_to_kraken_report_merged = Channel.empty()
     ch_bedfiles_or_default = Channel.empty()
+    ch_alignment_stats = Channel.empty()
+    ch_mapped_assemblies = Channel.empty()
+
+
     if (params.reference_fasta) { //
         // format of the FASTA file MUST be "kraken:taxid|<taxidnumber>" in each reference accession
         ch_reference_fasta = params.reference_fasta ? Channel.fromPath(params.reference_fasta, checkIfExists: true) : Channel.empty()
-        ch_hit_to_kraken_report = ch_hit_to_kraken_report.combine(
+
+        // merge ch_reference_fasta on all of the krakenreports. single channel merged to multiple
+        ch_filtered_reads = ch_filtered_reads.combine(
             ch_reference_fasta
         )
+
+        MAP_LOCAL_ASSEMBLY_TO_FASTA(
+            ch_filtered_reads.map {
+                meta, readsclass, fasta ->  return [ meta, fasta ]
+            },
+            ch_assembly_txt
+        )
+
+        ch_mapped_assemblies = MAP_LOCAL_ASSEMBLY_TO_FASTA.out.map
+        ch_accessions = MAP_LOCAL_ASSEMBLY_TO_FASTA.out.accessions
     } else {
+
+        if (params.organisms || params.organisms_file){
+            ch_pre_download = ch_filtered_reads.combine(ch_organisms)
+        } else {
+            ch_pre_download = ch_filtered_reads.map {
+                meta, readsclass ->  return [ meta, readsclass ]
+            }.join(ch_organisms)
+        }
+
         DOWNLOAD_ASSEMBLY(
-            ch_hit_to_kraken_report.map {
-                meta, report, readsclass ->  [ meta, report ]
+            ch_pre_download.map {
+                meta, readsclass, report ->  return [ meta, report ]
             },
             ch_assembly_txt
         )
-        ch_hit_to_kraken_report = ch_hit_to_kraken_report.join(
-            DOWNLOAD_ASSEMBLY.out.fasta
-        )
+        ch_filtered_reads = ch_filtered_reads.join(DOWNLOAD_ASSEMBLY.out.fasta)
+
+        // MAP_GCF(
+        //     DOWNLOAD_ASSEMBLY.out.fasta.map({ meta, fasta -> return  [ meta, fasta ] }),
+        //     ch_assembly_txt
+        // )
         ch_accessions = DOWNLOAD_ASSEMBLY.out.accessions
-        ch_accession_mapping = DOWNLOAD_ASSEMBLY.out.mappings
+        ch_mapped_assemblies = DOWNLOAD_ASSEMBLY.out.mappings
 
-        if (!params.skip_features){
-            NCBIGENOMEDOWNLOAD_FEATURES(
-                ch_accessions
-            )
+    }
+    if (params.get_features){
 
-            FEATURES_TO_BED(
-                NCBIGENOMEDOWNLOAD_FEATURES.out.features
-            )
+        FEATURES_DOWNLOAD(
+            ch_accessions,
+            ch_assembly_txt
+        )
 
-            ch_bedfiles = FEATURES_TO_BED.out.bed
-        }
-    }
+        FEATURES_TO_BED(
+            FEATURES_DOWNLOAD.out.features
+        )
 
+        ch_bedfiles = FEATURES_TO_BED.out.bed
+    }
 
     if (!params.skip_realignment) {
-        // Combine ch_hit_to_kraken_report with ch_bedfiles_or_default
-        if (params.skip_features){
-            ch_hit_to_kraken_report = ch_hit_to_kraken_report.map {
-                meta, report, readsclass, fasta -> [ meta, report, readsclass, fasta, null ]
-            }
+        if (params.get_features){
+            ch_reads_to_align  = ch_filtered_reads.join(ch_bedfiles, remainder: true)
         } else {
-            ch_hit_to_kraken_report = ch_hit_to_kraken_report.join(ch_bedfiles)
+            ch_reads_to_align = ch_reads_to_align.map {
+                meta, reads, fasta -> [ meta, reads, fasta, null ]
+            }
         }
 
+        ch_reads_to_align = ch_reads_to_align.join(ch_mapped_assemblies, remainder: true)
+
         ALIGNMENT(
-            ch_hit_to_kraken_report
+            ch_reads_to_align
         )
 
         ch_alignment_stats = ALIGNMENT.out.stats
+        ch_multiqc_files = ch_multiqc_files.mix(ch_alignment_stats.collect { it[1] }.ifEmpty([]))
+
         ch_bamstats = ALIGNMENT.out.bamstats
         ch_depth = ALIGNMENT.out.depth
 
         ch_alignment_outmerg = ALIGNMENT.out.bams.join(ALIGNMENT.out.depth)
-        // ch_alignment_outmerg = ch_alignment_outmerg.join(Channel.empty(), remainder: true)
 
+        ch_accession_mapping.view()
         ch_combined = ch_alignment_outmerg
-            .join(ch_accession_mapping, by: 0, remainder: true)
+            .join(ch_mapped_assemblies, by: 0, remainder: true)
             .map { meta, bam, depth, mapping ->
                 // If mapping is not present, replace it with null or an empty placeholder
                 return [meta, bam, depth, mapping ?: ch_empty_file]
             }
-        CONFIDENCE_METRIC(
-           ch_combined
-        )
+        ch_mapped_assemblies.view()
 
-        CONFIDENCE_MERGE(
-            CONFIDENCE_METRIC.out.tsv
-        )
+        if (!params.skip_confidence) {
+            CONFIDENCE_METRIC(
+                ch_combined
+            )
+
+            CONFIDENCE_MERGE(
+                CONFIDENCE_METRIC.out.tsv
+            )
 
-        ch_joined_confidence_report = KRAKEN2_KRAKEN2.out.report.join(
-            CONFIDENCE_MERGE.out.confidence
-        )
 
-        if (!params.skip_confidence) {
             CONVERT_CONFIDENCE(
-                ch_joined_confidence_report
+                CONFIDENCE_MERGE.out.confidence
             )
 
             MERGE_CONFIDENCE(
@@ -454,11 +522,13 @@ workflow TAXTRIAGE {
             )
 
             ch_mergedtsv = MERGE_CONFIDENCE.out.confidence_report
+            ch_multiqc_files = ch_multiqc_files.mix(ch_mergedtsv.collect().ifEmpty([]))
+
         }
     }
 
-    if (!params.skip_assembly) {
-        illumina_reads = ch_hit_to_kraken_report.filter {
+    if (params.denovo_assembly) {
+        illumina_reads = ch_filtered_reads.filter {
             it[0].platform == 'ILLUMINA'
         }.map {
             meta, reads -> [meta, reads, [], []]
@@ -467,9 +537,8 @@ workflow TAXTRIAGE {
             illumina_reads
         )
 
-        println('____________________________')
 
-        nanopore_reads = ch_hit_to_kraken_report.filter {
+        nanopore_reads = ch_filtered_reads.filter {
             it[0].platform == 'OXFORD'
         }.map {
             meta, reads -> [meta, [], [], [], reads]
@@ -487,24 +556,6 @@ workflow TAXTRIAGE {
     // // //
     // // // MODULE: MultiQC Pt 2
     // // //
-
-    ch_multiqc_files = ch_multiqc_files.mix(MERGEDKRAKENREPORT.out.krakenreport.collect().ifEmpty([]))
-    ch_multiqc_files = ch_multiqc_files.mix(FILTERKRAKEN.out.reports.collect().ifEmpty([]))
-    ch_multiqc_files = ch_multiqc_files.mix(TOP_HITS.out.krakenreport.collect { it[1] }.ifEmpty([]))
-    ch_multiqc_files = ch_multiqc_files.mix(ch_alignment_stats.collect { it[1] }.ifEmpty([]))
-    ch_multiqc_files = ch_multiqc_files.mix(ch_porechop_out.collect { it[1] }.ifEmpty([]))
-    ch_multiqc_files = ch_multiqc_files.mix(ch_merged_table_config.collect().ifEmpty([]))
-    ch_multiqc_files = ch_multiqc_files.mix(ch_fastp_html.collect { it[1] }.ifEmpty([]))
-    ch_multiqc_files = ch_multiqc_files.mix(ch_kraken2_report.collect { it[1] }.ifEmpty([]))
-    if (params.trim) {
-        ch_multiqc_files = ch_multiqc_files.mix(TRIMGALORE.out.reads.collect { it[1] }.ifEmpty([]))
-    }
-    if (!params.skip_plots) {
-        ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect { it[1] }.ifEmpty([]))
-        ch_multiqc_files = ch_multiqc_files.mix(NANOPLOT.out.txt.collect { it[1] }.ifEmpty([]))
-    }
-    ch_multiqc_files = ch_multiqc_files.mix(ch_mergedtsv.collect().ifEmpty([]))
-
     // Unused or Incomplete
     // if (params.blastdb && !params.remoteblast){
     //     ch_multiqc_files = ch_multiqc_files.mix(BLAST_BLASTN.out.txt.collect{it[1]}.ifEmpty([]))