diff --git a/.gitignore b/.gitignore
index b2592a6..5df9590 100644
--- a/.gitignore
+++ b/.gitignore
@@ -139,4 +139,7 @@ slurm/err
 slurm/logs
 tmp
 .Rproj.user
-*.Rproj
\ No newline at end of file
+*.Rproj
+.quarto
+*/libs
+*.html
\ No newline at end of file
diff --git a/README.md b/README.md
index 3f52c95..547aeb5 100644
--- a/README.md
+++ b/README.md
@@ -140,6 +140,32 @@ Here's an example of running up to and including the filtering part of the workf
 snakemake --use-conda -j 10 -p filter
 ```
 
+##### Configuring login to JGI server
+
+The workflow automatically downloads transcript data from the [JGI Mycocosm](https://mycocosm.jgi.doe.gov/fungi/fungi.info.html) database. For this to work you must have [registered an account](https://contacts.jgi.doe.gov/registration/new). After registering, create a file in YAML format containing your login credentials:
+
+```yaml
+jgi_password: <your_password>
+jgi_user: <your_username/email>
+```
+
+Save this file as `jgi_login.yml` and pass it to the workflow with the `--configfile` flag:
+
+```bash
+snakemake --configfile jgi_login.yml <other flags>
+```
+
+Information about genomes found in the JGI Mycocosm database will be stored in `resources/JGI/genomes.tsv`. This file will be available after a dry-run of the workflow so you may inspect it before running the workflow. The file should look like this:
+
+| portal | Name | bp | genes |
+|--------|------|----|------|
+| Aaoar1 | Aaosphaeria arxii CBS 175.79 v1.0 | 38901049 | 14203 |
+| Abobi1 | Abortiporus biennis CCBS 521 v1.0 | 45165060 | 11987 |
+| Abobie1 | Abortiporus biennis ​CIRM-BRFM 1778 v1.0 | 33118568 | 11767 |
+
+The `portal` column contains the JGI portal name, `Name` is the name of the genome, `bp` is the size of the genome in base pairs and `genes` is the number of genes in the genome. You may filter this file to your liking to only download transcripts for the genomes you are interested in. Remember to save it as `resources/JGI/genomes.tsv` before running the workflow.
+
+
 #### Assembly
 
 Assemblies can be generated for single samples or by combining multiple samples
diff --git a/Snakefile b/Snakefile
index 7dc705e..d6ef5c0 100644
--- a/Snakefile
+++ b/Snakefile
@@ -6,7 +6,7 @@ import platform
 from source.utils.parse import parse_sample_list
 from snakemake.utils import min_version, validate
 from snakemake.exceptions import WorkflowError
-
+from source.utils.list_jgi_genomes import parse_genomes
 
 def parse_validation_error(e):
     """
@@ -33,11 +33,6 @@ if not os.getenv("TMPDIR"):
     os.environ["TMPDIR"] = "tmp"
     os.makedirs(os.environ["TMPDIR"],exist_ok=True)
 
-wildcard_constraints:
-    sample_id = "[A-Za-z0-9_\-\.]+",
-    assembler = "megahit|trinity|transabyss",
-    filter_source = "unfiltered|filtered|taxmapper|bowtie2"
-
 # Set default config and validate against schema
 if os.path.exists("config.yml"):
     configfile: "config.yml"
@@ -49,14 +44,23 @@ except WorkflowError as e:
 
 workdir: config["workdir"]
 
+# Parse samples and assemblies
+samples, map_dict, assemblies = parse_sample_list(config["sample_file_list"], config)
+# Parse JGI genomes from fungi info url, also save to file for quick re-use in future runs
+genomes = parse_genomes(config["fungi_info"], f="resources/JGI/genomes.tsv")
+
+wildcard_constraints:
+    sample_id = f"({'|'.join(list(samples.keys()))})",
+    assembler = "megahit|trinity|transabyss",
+    filter_source = "unfiltered|filtered|taxmapper|bowtie2",
+    portals = f"({'|'.join(list(genomes.index.tolist()))})",
+
 # Get environment info
 pythonpath = sys.executable
 envdir = '/'.join(pythonpath.split("/")[0:-2])
 system = platform.system()
 config["system"] = system
 
-# Parse samples and assemblies
-samples, map_dict, assemblies = parse_sample_list(config["sample_file_list"], config)
 
 # Include rules
 include: "source/rules/db.smk"
diff --git a/config.yml b/config.yml
index fa4ab7f..ed88e03 100644
--- a/config.yml
+++ b/config.yml
@@ -5,7 +5,7 @@ sample_file_list: "sample_list.tsv"
 datadir: "data/raw"
 mem_per_cpu: 6
 ## data for filtering
-fungi_url: "https://zenodo.org/record/3550762/files/fungi_transcripts.fasta.gz?download=1"
+fungi_info: "https://mycocosm.jgi.doe.gov/fungi/fungi.info.html"
 host_fna_url: "https://zenodo.org/record/3550762/files/spruce.fna.gz?download=1"
 host_gtf_url: ""
 ## mapping
diff --git a/config/config.schema.yaml b/config/config.schema.yaml
index a131924..02ad98c 100644
--- a/config/config.schema.yaml
+++ b/config/config.schema.yaml
@@ -84,10 +84,10 @@ properties:
       'both_mapped' = requires that both ends are mapped or 'one_mapped' = keep paired reads even if only one end maps"
     default: "both_mapped"
     enum: ["concordant","both_mapped","one_mapped"]
-  fungi_transcript_url:
+  fungi_info:
     type: string
-    description: Dataset url for fungal transcripts to use in filtering
-    default: "https://zenodo.org/record/3550762/files/fungi_transcripts.fasta.gz?download=1"
+    description: URL to fungi info page
+    default: https://mycocosm.jgi.doe.gov/fungi/fungi.info.html
   spruce_transcript_url:
     type: string
     description: Dataset url for spruce transcripts to use in filtering
diff --git a/environment.yml b/environment.yml
index 01aebe4..9d5b144 100644
--- a/environment.yml
+++ b/environment.yml
@@ -1,13 +1,16 @@
 name: fungal-trans
 channels:
-  - bioconda
   - conda-forge
+  - bioconda
   - defaults
 dependencies:
-  - python=3.8
-  - snakemake-minimal=6.2.1
-  - biopython=1.78
-  - pandas=1.3.4
+  - python
+  - snakemake-minimal
+  - biopython
+  - pandas
+  - lxml
+  - pycurl
+  - tqdm
   - multiqc=1.11
   - bowtie2=2.4.4
   - samtools=1.14
diff --git a/envs/bowtie2.yaml b/envs/bowtie2.yaml
new file mode 100644
index 0000000..300998b
--- /dev/null
+++ b/envs/bowtie2.yaml
@@ -0,0 +1,8 @@
+name: bowtie2
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bowtie2=2.5.3
+  - samtools=1.20
\ No newline at end of file
diff --git a/local/config.yaml b/local/config.yaml
new file mode 100644
index 0000000..a76717f
--- /dev/null
+++ b/local/config.yaml
@@ -0,0 +1,4 @@
+use-conda: True
+keep-going: True
+rerun-triggers: mtime
+cores: 1
\ No newline at end of file
diff --git a/source/rules/annotate_co.smk b/source/rules/annotate_co.smk
index e6fd9ae..3d17aa8 100644
--- a/source/rules/annotate_co.smk
+++ b/source/rules/annotate_co.smk
@@ -371,7 +371,7 @@ rule create_blob_co:
     conda: "../../envs/blobtools.yaml"
     shell:
         """
-        blobtools create --nodes {input.nodes} --names {input.names} -i {input.fa} -b {input.bam} -t {input.hit} \ 
+        blobtools create --nodes {input.nodes} --names {input.names} -i {input.fa} -b {input.bam} -t {input.hit} \
             --title {wildcards.sample_id} -o {params.prefix}
         """
 
diff --git a/source/rules/db.smk b/source/rules/db.smk
index 8c989f3..0d2e179 100644
--- a/source/rules/db.smk
+++ b/source/rules/db.smk
@@ -6,7 +6,8 @@ localrules:
     get_kegg_files,
     download_refseq_db,
     download_host,
-    download_fungi_transcripts,
+    init_jgi,
+    download_jgi_transcripts,
     prepare_diamond_JGI
 
 ## NCBI Taxonomy ##
@@ -112,18 +113,62 @@ rule press_dbCAN:
         """
 
 ## Fungal transcripts ##
-rule download_fungi_transcripts:
+rule init_jgi:
     """
-    Downloads snapshot of fungal transcripts obtained from JGI 
+    Creates a cookie file for JGI authentication using user-defined credentials
     """
     output:
-        temp("resources/fungi/fungi_transcripts.fasta")
+        cookies="resources/JGI/cookies"
+    log:
+        "resources/JGI/init_jgi.log"
+    shadow: "minimal"
     params:
-        url = config["fungi_url"]
+        user_name = config["jgi_user"],
+        password = config["jgi_password"]
+    retries: 3
+    shell:
+        """
+        curl 'https://signon.jgi.doe.gov/signon/create' \
+            --data-urlencode 'login={params.user_name}' \
+            --data-urlencode 'password={params.password}' -c {output.cookies} > /dev/null 2>{log}
+        """
+
+rule download_jgi_transcripts:
+    """
+    For each 'portal', download the filtered transcripts 
+    """
+    output:
+        temp("resources/JGI/genomes/{portal}.transcripts.fna.gz"),
+    input:
+        cookies=rules.init_jgi.output.cookies,
+    log:
+        "resources/JGI/genomes/download_jgi_transcripts.{portal}.log"
+    params:
+        tmpdir = "$TMPDIR/{portal}",
+        xml = "$TMPDIR/{portal}/files.xml",
+        output = "$TMPDIR/{portal}/transcripts.fna.gz"
+    retries: 3
+    shell:
+        """
+        mkdir -p {params.tmpdir}
+        python source/utils/download_jgi_transcripts.py -p {wildcards.portal} -c {input.cookies} -o {params.output} 2>{log}
+        mv {params.output} {output}
+        rm -rf {params.tmpdir}
+        """
+
+rule concat_transcripts:
+    """
+    Concatenates all transcripts downloaded
+    """
+    output:
+        "resources/fungi/fungi_transcripts.fasta.gz"
+    input:
+        expand(rules.download_jgi_transcripts.output, portal=genomes.index.tolist())
+    log:
+        "resources/fungi/concat_transcripts.log"
     shell:
         """
-        curl -L -s -o {output[0]}.gz {params.url}
-        gunzip {output[0]}.gz
+        cat {input} > {output} 
         """
 
 ## Host data ##
diff --git a/source/rules/filter.smk b/source/rules/filter.smk
index 7608483..fc423d1 100644
--- a/source/rules/filter.smk
+++ b/source/rules/filter.smk
@@ -25,27 +25,29 @@ include: "paired_strategy.smk"
 
 rule bowtie_build_fungi:
     input:
-        "resources/fungi/fungi_transcripts.fasta"
+        rules.concat_transcripts.output
     output:
         expand("resources/fungi/fungi_transcripts.fasta.{index}.bt2l",
                index=range(1,5))
     log:
         "resources/fungi/bowtie2.log"
     params:
-        tmp = "$TMPDIR/fungi_transcripts/fungi_transcripts.fasta",
+        fasta = "$TMPDIR/fungi_transcripts/fungi_transcripts.fasta",
         tmpdir = "$TMPDIR/fungi_transcripts",
         outdir = "resources/fungi/"
     threads: 20
     resources:
         runtime = lambda wildcards, attempt: attempt**2*60*120
+    conda: "../../envs/bowtie2.yaml"
     shell:
         """
         mkdir -p {params.tmpdir}
+        gunzip -c {input} > {params.fasta}
         bowtie2-build \
             --threads {threads} \
             --large-index {input} \
-            {params.tmp} >{log} 2>&1
-        mv {params.tmp}*.bt2l {params.outdir}/
+            {params.fasta} >{log} 2>&1
+        mv {params.fasta}*.bt2l {params.outdir}/
         rm -r {params.tmpdir}
         """
 
@@ -83,6 +85,7 @@ rule bowtie_build_host:
         "resources/host/bowtie2.log"
     resources:
         runtime = lambda wildcards, attempt: attempt**2*60
+    conda: "../../envs/bowtie2.yaml"
     shell:
         """
         bowtie2-build \
diff --git a/source/utils/download_jgi_transcripts.py b/source/utils/download_jgi_transcripts.py
new file mode 100644
index 0000000..38f522a
--- /dev/null
+++ b/source/utils/download_jgi_transcripts.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python
+
+import xml.etree.ElementTree as ET
+import pycurl
+from datetime import datetime
+import pytz
+import tempfile
+import os
+from argparse import ArgumentParser
+import sys
+
+
+def tz_convert(timestamp):
+    """
+    Converts timestamp to UTC
+    """
+    timezones = {
+        "PDT": "America/Los_Angeles",
+        "PST": "America/Los_Angeles"
+                 }
+    tz = timestamp.split(" ")[-2]
+    timestamp = " ".join(timestamp.split(" ")[0:-2]+timestamp.split(" ")[-1:])
+    t = datetime.strptime(timestamp, "%a %b %d %H:%M:%S %Y")
+    tz = pytz.timezone(timezones[tz])
+    return tz.localize(t).astimezone(pytz.utc)
+
+def get_xml(portal, cookie):
+    """
+    Get XML tree from JGI portal
+    The xml file is stored in a temporary file and read into an ElementTree object
+    """
+    url=f"https://genome-downloads.jgi.doe.gov/portal/ext-api/downloads/get-directory?organism={portal}"
+    with tempfile.TemporaryFile() as fp:
+        c = pycurl.Curl()
+        c.setopt(c.COOKIEFILE, cookie)
+        c.setopt(c.URL, url)
+        c.setopt(c.WRITEDATA, fp)
+        c.setopt(c.VERBOSE, False)
+        c.perform()
+        c.close()
+        fp.seek(0)
+        tree = ET.fromstring(fp.read())
+    return tree
+    
+def find_files(root):
+    """
+    Find files in the XML tree
+    Searches for the latest transcript file in the folder "Filtered Models ("best")/Transcripts"
+    """
+    f = ""
+    for child in root:
+        if child.tag == "folder" and child.attrib["name"] == "Files":
+            for item in child:
+                if item.tag == "folder" and item.attrib["name"] == "Annotation":
+                    for annot_item in item:
+                        if annot_item.tag == "folder" and annot_item.attrib["name"] == 'Filtered Models ("best")':
+                            for filtered_item in annot_item:
+                                if filtered_item.tag == "folder" and filtered_item.attrib["name"] == "Transcripts":
+                                    timestamp = False
+                                    for transcript_item in filtered_item:
+                                        if transcript_item.tag == "file":
+                                            filename=transcript_item.attrib["filename"]
+                                            if "transcripts" in filename and filename.endswith(".nt.fasta.gz"):
+                                                url = transcript_item.attrib["url"]
+                                                _timestamp = tz_convert(transcript_item.attrib["timestamp"])
+                                                if timestamp and _timestamp>timestamp:
+                                                    sys.stderr.write(f"Found newer file {filename} with timestamp {_timestamp}\n")
+                                                    f = url
+                                                elif not timestamp:
+                                                    sys.stderr.write(f"Found file {filename} with timestamp {_timestamp}\n")
+                                                    f = url
+                                                    timestamp = _timestamp
+                                    break
+    return f
+
+def main(args):
+    """
+    Main function
+    """
+    root = get_xml(args.portal, args.cookie)
+    f = find_files(root)
+    url = f"{args.base}{f}"
+    if not args.outfile:
+        outfile = os.path.basename(f)
+    else:
+        outfile = args.outfile
+    sys.stderr.write(f"Downloading {os.path.basename(f)} to {outfile}\n")
+    with open(outfile, 'wb') as fhout:
+        c = pycurl.Curl()
+        c.setopt(c.COOKIEFILE, args.cookie)
+        c.setopt(c.URL, url)
+        c.setopt(c.WRITEDATA, fhout)
+        c.setopt(c.VERBOSE, False)
+        c.perform()
+        c.close()
+    
+
+if __name__ == '__main__':
+    parser = ArgumentParser(description='Download all genomes from JGI Mycocosm')
+    parser.add_argument('-p', '--portal', help='Portal shorthand name (e.g. Aaoar1)', required=True)
+    parser.add_argument('-c', '--cookie', help="Cookie file for JGI", )
+    parser.add_argument('-o', '--outfile', help='Output file name. If not given, the file will be saved in the current directory with the remote filename')
+    parser.add_argument('-b', '--base', help='Base URL for JGI downloads (default: https://genome-downloads.jgi.doe.gov)', default="https://genome-downloads.jgi.doe.gov")
+    args = parser.parse_args()
+    main(args)
\ No newline at end of file
diff --git a/source/utils/list_jgi_genomes.py b/source/utils/list_jgi_genomes.py
new file mode 100644
index 0000000..e2cd094
--- /dev/null
+++ b/source/utils/list_jgi_genomes.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python
+
+import pandas as pd
+import numpy as np
+import xml.etree.ElementTree as ET
+import os
+import sys
+from argparse import ArgumentParser
+
+def parse_genomes(url, f = None):
+    """
+    Parses available genome portals at JGI (default: https://mycocosm.jgi.doe.gov/fungi/fungi.info.html)
+
+    :param url: str
+    :return: dict
+    """
+    def extract_name_portal(genome_table):
+        df = {}
+        for _, items in genome_table.iterrows():
+            name, portal = items["Name"]
+            if name == "Name":
+                continue
+            asm_length, _ = items["Assembly Length"]
+            genes, _ = items["# Genes"]
+            if asm_length == "NO DATA":
+                asm_length = np.nan
+            else:
+                asm_length = int(asm_length.replace(",",""))
+            if genes == "NO DATA":
+                genes = np.nan
+            else:
+                genes = int(genes.replace(",",""))
+            df[portal.lstrip("/")] = {'Name': name, 'bp': asm_length, 'genes': genes}
+        df = pd.DataFrame(df).T
+        df.index.name = "portal"
+        return df
+    if f is not None and os.path.exists(f):
+        genomes = pd.read_csv(f, sep="\t", header=0, index_col=0)
+        return genomes
+    genome_table = pd.read_html(url, extract_links="body", header=None)[0]
+    genome_table.columns = ["##","Name","Assembly Length","# Genes", "Published"]
+    genome_table.set_index("##", inplace=True)
+    genomes = extract_name_portal(genome_table)
+    if f is not None:
+        os.makedirs(os.path.dirname(f), exist_ok=True)
+        genomes.to_csv(f, sep="\t")
+    else:
+        with sys.stdout as f:
+            genomes.to_csv(f, sep="\t")
+    return genomes
+
+
+def main(args):
+    """
+    Main function
+    """
+    genomes = parse_genomes(args.info, args.file)
+    
+
+if __name__ == '__main__':
+    parser = ArgumentParser(description='Download a table of all available genomes from JGI Mycocosm')
+    parser.add_argument('-i', '--info', help='Info file at JGI Mycocosm (default: https://mycocosm.jgi.doe.gov/fungi/fungi.info.html)', default="https://mycocosm.jgi.doe.gov/fungi/fungi.info.html")
+    parser.add_argument('-f', '--file', help='Output file name. Will print to stdout if not provided.')
+    args = parser.parse_args()
+    main(args)
\ No newline at end of file