From 7ad57cb09985e93b5e5e5b63f9b348f258c9337f Mon Sep 17 00:00:00 2001
From: Harshil Patel <drpatelhh@gmail.com>
Date: Thu, 30 Jun 2022 17:16:34 +0100
Subject: [PATCH 01/10] Add ffq to fetch metadata

---
 CITATIONS.md         |  4 ++++
 conf/modules.config  |  8 ++++++++
 modules/local/ffq.nf | 34 ++++++++++++++++++++++++++++++++++
 workflows/sra.nf     |  9 +++++++++
 4 files changed, 55 insertions(+)
 create mode 100644 modules/local/ffq.nf

diff --git a/CITATIONS.md b/CITATIONS.md
index 3826bfe7..852085ca 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -10,6 +10,10 @@
 
 ## Pipeline tools
 
+- [ffq](https://www.biorxiv.org/content/10.1101/2022.05.18.492548v2)
+ 
+ > Gálvez-Merchán A, Min, KHJ, Pachter L, Booeshaghi SA. Metadata retrieval from sequence databases with ffq. bioRxiv, 19 May 2022. doi: 10.1101/2022.05.18.492548.
+
 - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/)
 
   > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.
diff --git a/conf/modules.config b/conf/modules.config
index 82a5f75c..ccf12423 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -38,6 +38,14 @@ if (params.input_type == 'sra') {
 
     process {
 
+        withName: FFQ {
+            publishDir = [
+                path: { "${params.outdir}/metadata" },
+                mode: params.publish_dir_mode,
+                saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+            ]
+        }
+
         withName: SRA_IDS_TO_RUNINFO {
             publishDir = [
                 path: { "${params.outdir}/metadata" },
diff --git a/modules/local/ffq.nf b/modules/local/ffq.nf
new file mode 100644
index 00000000..2e6e572c
--- /dev/null
+++ b/modules/local/ffq.nf
@@ -0,0 +1,34 @@
+process FFQ {
+    tag "$id"
+    label 'process_low'
+
+    conda (params.enable_conda ? "bioconda::ffq=0.2.1" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/ffq:0.2.1--pyhdfd78af_0':
+        'quay.io/biocontainers/ffq:0.2.1--pyhdfd78af_0' }"
+
+    input:
+    val id
+
+    output:
+    path "*.json"      , emit: json
+    path "versions.yml", emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "$id"   
+    """
+    ffq \\
+        $id \\
+        $args \\
+        > ${prefix}.json
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        ffq: \$(echo \$(ffq --help 2>&1) | sed 's/^.*ffq //; s/: A command.*\$//' )
+    END_VERSIONS
+    """
+}
diff --git a/workflows/sra.nf b/workflows/sra.nf
index 2b3cb498..ccd0edfb 100644
--- a/workflows/sra.nf
+++ b/workflows/sra.nf
@@ -19,6 +19,7 @@ WorkflowSra.initialise(params, log, valid_params)
 ========================================================================================
 */
 
+include { FFQ                     } from '../modules/local/ffq'
 include { SRA_IDS_TO_RUNINFO      } from '../modules/local/sra_ids_to_runinfo'
 include { SRA_RUNINFO_TO_FTP      } from '../modules/local/sra_runinfo_to_ftp'
 include { SRA_FASTQ_FTP           } from '../modules/local/sra_fastq_ftp'
@@ -50,6 +51,14 @@ workflow SRA {
     main:
     ch_versions = Channel.empty()
 
+    // //
+    // // MODULE: Get id metadata from ffq
+    // //
+    // FFQ (
+    //     ids
+    // )
+    // ch_versions = ch_versions.mix(FFQ.out.versions.first())
+
     //
     // MODULE: Get SRA run information for public database ids
     //

From c73ce9e8346579ad6577fa809d884ff9d782e292 Mon Sep 17 00:00:00 2001
From: Harshil Patel <drpatelhh@gmail.com>
Date: Thu, 30 Jun 2022 21:36:47 +0100
Subject: [PATCH 02/10] Fix linting

---
 CITATIONS.md         |   4 +-
 modules/local/ffq.nf |  10 ++-
 workflows/sra.nf     | 181 ++++++++++++++++++++++---------------------
 3 files changed, 99 insertions(+), 96 deletions(-)

diff --git a/CITATIONS.md b/CITATIONS.md
index 852085ca..e9a6d19e 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -11,8 +11,8 @@
 ## Pipeline tools
 
 - [ffq](https://www.biorxiv.org/content/10.1101/2022.05.18.492548v2)
- 
- > Gálvez-Merchán A, Min, KHJ, Pachter L, Booeshaghi SA. Metadata retrieval from sequence databases with ffq. bioRxiv, 19 May 2022. doi: 10.1101/2022.05.18.492548.
+
+> Gálvez-Merchán A, Min, KHJ, Pachter L, Booeshaghi SA. Metadata retrieval from sequence databases with ffq. bioRxiv, 19 May 2022. doi: 10.1101/2022.05.18.492548.
 
 - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/)
 
diff --git a/modules/local/ffq.nf b/modules/local/ffq.nf
index 2e6e572c..be7c56c2 100644
--- a/modules/local/ffq.nf
+++ b/modules/local/ffq.nf
@@ -1,5 +1,5 @@
 process FFQ {
-    tag "$id"
+    tag "${ids.size() == 1 ? ids[0] : "${ids[0]+'..'+ids[-1]}"}"
     label 'process_low'
 
     conda (params.enable_conda ? "bioconda::ffq=0.2.1" : null)
@@ -8,7 +8,7 @@ process FFQ {
         'quay.io/biocontainers/ffq:0.2.1--pyhdfd78af_0' }"
 
     input:
-    val id
+    val ids
 
     output:
     path "*.json"      , emit: json
@@ -19,10 +19,12 @@ process FFQ {
 
     script:
     def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "$id"   
+    def id_list = ids.sort()
+    def name = id_list.size() == 1 ? ids[0] : 'metadata'
+    def prefix = task.ext.prefix ?: "${name}"
     """
     ffq \\
-        $id \\
+        ${id_list.join(' ')} \\
         $args \\
         > ${prefix}.json
 
diff --git a/workflows/sra.nf b/workflows/sra.nf
index ccd0edfb..907e62f7 100644
--- a/workflows/sra.nf
+++ b/workflows/sra.nf
@@ -51,101 +51,102 @@ workflow SRA {
     main:
     ch_versions = Channel.empty()
 
-    // //
-    // // MODULE: Get id metadata from ffq
-    // //
-    // FFQ (
-    //     ids
-    // )
-    // ch_versions = ch_versions.mix(FFQ.out.versions.first())
-
     //
-    // MODULE: Get SRA run information for public database ids
+    // MODULE: Get id metadata from ffq
     //
-    SRA_IDS_TO_RUNINFO (
-        ids,
-        params.ena_metadata_fields ?: ''
+    FFQ (
+        //ids.map { [ it ] }
+        ids
     )
-    ch_versions = ch_versions.mix(SRA_IDS_TO_RUNINFO.out.versions.first())
+    ch_versions = ch_versions.mix(FFQ.out.versions.first())
 
-    //
-    // MODULE: Parse SRA run information, create file containing FTP links and read into workflow as [ meta, [reads] ]
-    //
-    SRA_RUNINFO_TO_FTP (
-        SRA_IDS_TO_RUNINFO.out.tsv
-    )
-    ch_versions = ch_versions.mix(SRA_RUNINFO_TO_FTP.out.versions.first())
-
-    SRA_RUNINFO_TO_FTP
-        .out
-        .tsv
-        .splitCsv(header:true, sep:'\t')
-        .map {
-            meta ->
-                meta.single_end = meta.single_end.toBoolean()
-                [ meta, [ meta.fastq_1, meta.fastq_2 ] ]
-        }
-        .unique()
-        .branch {
-            ftp: it[0].fastq_1  && !params.force_sratools_download
-            sra: !it[0].fastq_1 || params.force_sratools_download
-        }
-        .set { ch_sra_reads }
-    ch_versions = ch_versions.mix(SRA_RUNINFO_TO_FTP.out.versions.first())
-
-    if (!params.skip_fastq_download) {
-
-        //
-        // MODULE: If FTP link is provided in run information then download FastQ directly via FTP and validate with md5sums
-        //
-        SRA_FASTQ_FTP (
-            ch_sra_reads.ftp
-        )
-        ch_versions = ch_versions.mix(SRA_FASTQ_FTP.out.versions.first())
-
-        //
-        // SUBWORKFLOW: Download sequencing reads without FTP links using sra-tools.
-        //
-        SRAFASTQ (
-            ch_sra_reads.sra.map { meta, reads -> [ meta, meta.run_accession ] }
-        )
-        ch_versions = ch_versions.mix(SRAFASTQ.out.versions.first())
-
-        //
-        // MODULE: Stage FastQ files downloaded by SRA together and auto-create a samplesheet
-        //
-        SRA_TO_SAMPLESHEET (
-            SRA_FASTQ_FTP.out.fastq.mix(SRAFASTQ.out.reads),
-            params.nf_core_pipeline ?: '',
-            params.sample_mapping_fields
-        )
-
-        //
-        // MODULE: Create a merged samplesheet across all samples for the pipeline
-        //
-        SRA_MERGE_SAMPLESHEET (
-            SRA_TO_SAMPLESHEET.out.samplesheet.collect{it[1]},
-            SRA_TO_SAMPLESHEET.out.mappings.collect{it[1]}
-        )
-        ch_versions = ch_versions.mix(SRA_MERGE_SAMPLESHEET.out.versions)
-
-        //
-        // MODULE: Create a MutiQC config file with sample name mappings
-        //
-        if (params.sample_mapping_fields) {
-            MULTIQC_MAPPINGS_CONFIG (
-                SRA_MERGE_SAMPLESHEET.out.mappings
-            )
-            ch_versions = ch_versions.mix(MULTIQC_MAPPINGS_CONFIG.out.versions)
-        }
-    }
+    // //
+    // // MODULE: Get SRA run information for public database ids
+    // //
+    // SRA_IDS_TO_RUNINFO (
+    //     ids,
+    //     params.ena_metadata_fields ?: ''
+    // )
+    // ch_versions = ch_versions.mix(SRA_IDS_TO_RUNINFO.out.versions.first())
 
-    //
-    // MODULE: Dump software versions for all tools used in the workflow
-    //
-    CUSTOM_DUMPSOFTWAREVERSIONS (
-        ch_versions.unique().collectFile(name: 'collated_versions.yml')
-    )
+    // //
+    // // MODULE: Parse SRA run information, create file containing FTP links and read into workflow as [ meta, [reads] ]
+    // //
+    // SRA_RUNINFO_TO_FTP (
+    //     SRA_IDS_TO_RUNINFO.out.tsv
+    // )
+    // ch_versions = ch_versions.mix(SRA_RUNINFO_TO_FTP.out.versions.first())
+
+    // SRA_RUNINFO_TO_FTP
+    //     .out
+    //     .tsv
+    //     .splitCsv(header:true, sep:'\t')
+    //     .map {
+    //         meta ->
+    //             meta.single_end = meta.single_end.toBoolean()
+    //             [ meta, [ meta.fastq_1, meta.fastq_2 ] ]
+    //     }
+    //     .unique()
+    //     .branch {
+    //         ftp: it[0].fastq_1  && !params.force_sratools_download
+    //         sra: !it[0].fastq_1 || params.force_sratools_download
+    //     }
+    //     .set { ch_sra_reads }
+    // ch_versions = ch_versions.mix(SRA_RUNINFO_TO_FTP.out.versions.first())
+
+    // if (!params.skip_fastq_download) {
+
+    //     //
+    //     // MODULE: If FTP link is provided in run information then download FastQ directly via FTP and validate with md5sums
+    //     //
+    //     SRA_FASTQ_FTP (
+    //         ch_sra_reads.ftp
+    //     )
+    //     ch_versions = ch_versions.mix(SRA_FASTQ_FTP.out.versions.first())
+
+    //     //
+    //     // SUBWORKFLOW: Download sequencing reads without FTP links using sra-tools.
+    //     //
+    //     SRAFASTQ (
+    //         ch_sra_reads.sra.map { meta, reads -> [ meta, meta.run_accession ] }
+    //     )
+    //     ch_versions = ch_versions.mix(SRAFASTQ.out.versions.first())
+
+    //     //
+    //     // MODULE: Stage FastQ files downloaded by SRA together and auto-create a samplesheet
+    //     //
+    //     SRA_TO_SAMPLESHEET (
+    //         SRA_FASTQ_FTP.out.fastq.mix(SRAFASTQ.out.reads),
+    //         params.nf_core_pipeline ?: '',
+    //         params.sample_mapping_fields
+    //     )
+
+    //     //
+    //     // MODULE: Create a merged samplesheet across all samples for the pipeline
+    //     //
+    //     SRA_MERGE_SAMPLESHEET (
+    //         SRA_TO_SAMPLESHEET.out.samplesheet.collect{it[1]},
+    //         SRA_TO_SAMPLESHEET.out.mappings.collect{it[1]}
+    //     )
+    //     ch_versions = ch_versions.mix(SRA_MERGE_SAMPLESHEET.out.versions)
+
+    //     //
+    //     // MODULE: Create a MutiQC config file with sample name mappings
+    //     //
+    //     if (params.sample_mapping_fields) {
+    //         MULTIQC_MAPPINGS_CONFIG (
+    //             SRA_MERGE_SAMPLESHEET.out.mappings
+    //         )
+    //         ch_versions = ch_versions.mix(MULTIQC_MAPPINGS_CONFIG.out.versions)
+    //     }
+    // }
+
+    // //
+    // // MODULE: Dump software versions for all tools used in the workflow
+    // //
+    // CUSTOM_DUMPSOFTWAREVERSIONS (
+    //     ch_versions.unique().collectFile(name: 'collated_versions.yml')
+    // )
 }
 
 /*

From ba6099da4643998e1ec1bfbeda4a330a21ec8fea Mon Sep 17 00:00:00 2001
From: Harshil Patel <drpatelhh@gmail.com>
Date: Thu, 30 Jun 2022 21:44:39 +0100
Subject: [PATCH 03/10] Revert comments

---
 workflows/sra.nf | 181 +++++++++++++++++++++++------------------------
 1 file changed, 90 insertions(+), 91 deletions(-)

diff --git a/workflows/sra.nf b/workflows/sra.nf
index 907e62f7..ccd0edfb 100644
--- a/workflows/sra.nf
+++ b/workflows/sra.nf
@@ -51,102 +51,101 @@ workflow SRA {
     main:
     ch_versions = Channel.empty()
 
-    //
-    // MODULE: Get id metadata from ffq
-    //
-    FFQ (
-        //ids.map { [ it ] }
-        ids
-    )
-    ch_versions = ch_versions.mix(FFQ.out.versions.first())
-
     // //
-    // // MODULE: Get SRA run information for public database ids
+    // // MODULE: Get id metadata from ffq
     // //
-    // SRA_IDS_TO_RUNINFO (
-    //     ids,
-    //     params.ena_metadata_fields ?: ''
+    // FFQ (
+    //     ids
     // )
-    // ch_versions = ch_versions.mix(SRA_IDS_TO_RUNINFO.out.versions.first())
+    // ch_versions = ch_versions.mix(FFQ.out.versions.first())
 
-    // //
-    // // MODULE: Parse SRA run information, create file containing FTP links and read into workflow as [ meta, [reads] ]
-    // //
-    // SRA_RUNINFO_TO_FTP (
-    //     SRA_IDS_TO_RUNINFO.out.tsv
-    // )
-    // ch_versions = ch_versions.mix(SRA_RUNINFO_TO_FTP.out.versions.first())
-
-    // SRA_RUNINFO_TO_FTP
-    //     .out
-    //     .tsv
-    //     .splitCsv(header:true, sep:'\t')
-    //     .map {
-    //         meta ->
-    //             meta.single_end = meta.single_end.toBoolean()
-    //             [ meta, [ meta.fastq_1, meta.fastq_2 ] ]
-    //     }
-    //     .unique()
-    //     .branch {
-    //         ftp: it[0].fastq_1  && !params.force_sratools_download
-    //         sra: !it[0].fastq_1 || params.force_sratools_download
-    //     }
-    //     .set { ch_sra_reads }
-    // ch_versions = ch_versions.mix(SRA_RUNINFO_TO_FTP.out.versions.first())
-
-    // if (!params.skip_fastq_download) {
-
-    //     //
-    //     // MODULE: If FTP link is provided in run information then download FastQ directly via FTP and validate with md5sums
-    //     //
-    //     SRA_FASTQ_FTP (
-    //         ch_sra_reads.ftp
-    //     )
-    //     ch_versions = ch_versions.mix(SRA_FASTQ_FTP.out.versions.first())
-
-    //     //
-    //     // SUBWORKFLOW: Download sequencing reads without FTP links using sra-tools.
-    //     //
-    //     SRAFASTQ (
-    //         ch_sra_reads.sra.map { meta, reads -> [ meta, meta.run_accession ] }
-    //     )
-    //     ch_versions = ch_versions.mix(SRAFASTQ.out.versions.first())
-
-    //     //
-    //     // MODULE: Stage FastQ files downloaded by SRA together and auto-create a samplesheet
-    //     //
-    //     SRA_TO_SAMPLESHEET (
-    //         SRA_FASTQ_FTP.out.fastq.mix(SRAFASTQ.out.reads),
-    //         params.nf_core_pipeline ?: '',
-    //         params.sample_mapping_fields
-    //     )
-
-    //     //
-    //     // MODULE: Create a merged samplesheet across all samples for the pipeline
-    //     //
-    //     SRA_MERGE_SAMPLESHEET (
-    //         SRA_TO_SAMPLESHEET.out.samplesheet.collect{it[1]},
-    //         SRA_TO_SAMPLESHEET.out.mappings.collect{it[1]}
-    //     )
-    //     ch_versions = ch_versions.mix(SRA_MERGE_SAMPLESHEET.out.versions)
-
-    //     //
-    //     // MODULE: Create a MutiQC config file with sample name mappings
-    //     //
-    //     if (params.sample_mapping_fields) {
-    //         MULTIQC_MAPPINGS_CONFIG (
-    //             SRA_MERGE_SAMPLESHEET.out.mappings
-    //         )
-    //         ch_versions = ch_versions.mix(MULTIQC_MAPPINGS_CONFIG.out.versions)
-    //     }
-    // }
+    //
+    // MODULE: Get SRA run information for public database ids
+    //
+    SRA_IDS_TO_RUNINFO (
+        ids,
+        params.ena_metadata_fields ?: ''
+    )
+    ch_versions = ch_versions.mix(SRA_IDS_TO_RUNINFO.out.versions.first())
 
-    // //
-    // // MODULE: Dump software versions for all tools used in the workflow
-    // //
-    // CUSTOM_DUMPSOFTWAREVERSIONS (
-    //     ch_versions.unique().collectFile(name: 'collated_versions.yml')
-    // )
+    //
+    // MODULE: Parse SRA run information, create file containing FTP links and read into workflow as [ meta, [reads] ]
+    //
+    SRA_RUNINFO_TO_FTP (
+        SRA_IDS_TO_RUNINFO.out.tsv
+    )
+    ch_versions = ch_versions.mix(SRA_RUNINFO_TO_FTP.out.versions.first())
+
+    SRA_RUNINFO_TO_FTP
+        .out
+        .tsv
+        .splitCsv(header:true, sep:'\t')
+        .map {
+            meta ->
+                meta.single_end = meta.single_end.toBoolean()
+                [ meta, [ meta.fastq_1, meta.fastq_2 ] ]
+        }
+        .unique()
+        .branch {
+            ftp: it[0].fastq_1  && !params.force_sratools_download
+            sra: !it[0].fastq_1 || params.force_sratools_download
+        }
+        .set { ch_sra_reads }
+    ch_versions = ch_versions.mix(SRA_RUNINFO_TO_FTP.out.versions.first())
+
+    if (!params.skip_fastq_download) {
+
+        //
+        // MODULE: If FTP link is provided in run information then download FastQ directly via FTP and validate with md5sums
+        //
+        SRA_FASTQ_FTP (
+            ch_sra_reads.ftp
+        )
+        ch_versions = ch_versions.mix(SRA_FASTQ_FTP.out.versions.first())
+
+        //
+        // SUBWORKFLOW: Download sequencing reads without FTP links using sra-tools.
+        //
+        SRAFASTQ (
+            ch_sra_reads.sra.map { meta, reads -> [ meta, meta.run_accession ] }
+        )
+        ch_versions = ch_versions.mix(SRAFASTQ.out.versions.first())
+
+        //
+        // MODULE: Stage FastQ files downloaded by SRA together and auto-create a samplesheet
+        //
+        SRA_TO_SAMPLESHEET (
+            SRA_FASTQ_FTP.out.fastq.mix(SRAFASTQ.out.reads),
+            params.nf_core_pipeline ?: '',
+            params.sample_mapping_fields
+        )
+
+        //
+        // MODULE: Create a merged samplesheet across all samples for the pipeline
+        //
+        SRA_MERGE_SAMPLESHEET (
+            SRA_TO_SAMPLESHEET.out.samplesheet.collect{it[1]},
+            SRA_TO_SAMPLESHEET.out.mappings.collect{it[1]}
+        )
+        ch_versions = ch_versions.mix(SRA_MERGE_SAMPLESHEET.out.versions)
+
+        //
+        // MODULE: Create a MutiQC config file with sample name mappings
+        //
+        if (params.sample_mapping_fields) {
+            MULTIQC_MAPPINGS_CONFIG (
+                SRA_MERGE_SAMPLESHEET.out.mappings
+            )
+            ch_versions = ch_versions.mix(MULTIQC_MAPPINGS_CONFIG.out.versions)
+        }
+    }
+
+    //
+    // MODULE: Dump software versions for all tools used in the workflow
+    //
+    CUSTOM_DUMPSOFTWAREVERSIONS (
+        ch_versions.unique().collectFile(name: 'collated_versions.yml')
+    )
 }
 
 /*

From e38275384ea7d9d5ea993cbd44fd6bb30ee10898 Mon Sep 17 00:00:00 2001
From: Harshil Patel <drpatelhh@gmail.com>
Date: Fri, 1 Jul 2022 09:11:10 +0100
Subject: [PATCH 04/10] Install ffq module from nf-core modules

---
 modules.json                                  |  3 ++
 .../ffq.nf => nf-core/modules/ffq/main.nf}    |  0
 modules/nf-core/modules/ffq/meta.yml          | 36 +++++++++++++++++++
 workflows/sra.nf                              |  4 +--
 4 files changed, 41 insertions(+), 2 deletions(-)
 rename modules/{local/ffq.nf => nf-core/modules/ffq/main.nf} (100%)
 create mode 100644 modules/nf-core/modules/ffq/meta.yml

diff --git a/modules.json b/modules.json
index fefca2b7..0a8c4bb2 100644
--- a/modules.json
+++ b/modules.json
@@ -9,6 +9,9 @@
             "custom/sratoolsncbisettings": {
                 "git_sha": "b2dbaa99309a2057efc32ef9d029ed91140068df"
             },
+            "ffq": {
+                "git_sha": "b96066565a52fdd42901f62e03c4ff5551980b43"
+            },
             "sratools/fasterqdump": {
                 "git_sha": "0cdf7767a79faf424645beeff83ecfa5528b6a7c"
             },
diff --git a/modules/local/ffq.nf b/modules/nf-core/modules/ffq/main.nf
similarity index 100%
rename from modules/local/ffq.nf
rename to modules/nf-core/modules/ffq/main.nf
diff --git a/modules/nf-core/modules/ffq/meta.yml b/modules/nf-core/modules/ffq/meta.yml
new file mode 100644
index 00000000..611e0adc
--- /dev/null
+++ b/modules/nf-core/modules/ffq/meta.yml
@@ -0,0 +1,36 @@
+name: "ffq"
+description: A command line tool that makes it easier to find sequencing data from the SRA / GEO / ENA.
+keywords:
+  - SRA
+  - ENA
+  - GEO
+  - metadata
+  - fetch
+  - public
+  - databases
+tools:
+  - "ffq":
+      description: "A command line tool that makes it easier to find sequencing data from the SRA / GEO / ENA."
+      homepage: https://github.com/pachterlab/ffq
+      documentation: https://github.com/pachterlab/ffq#usage
+      tool_dev_url: https://github.com/pachterlab/ffq
+      doi: "10.1101/2022.05.18.492548"
+      licence: "['MIT']"
+
+input:
+  - ids:
+      type: list
+      description: List of supported database ids e.g. SRA / GEO / ENA
+
+output:
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - json:
+      type: file
+      description: JSON file containing metadata for ids
+      pattern: "*.{json}"
+
+authors:
+  - "@drpatelh"
diff --git a/workflows/sra.nf b/workflows/sra.nf
index ccd0edfb..fe7b8d4c 100644
--- a/workflows/sra.nf
+++ b/workflows/sra.nf
@@ -19,7 +19,6 @@ WorkflowSra.initialise(params, log, valid_params)
 ========================================================================================
 */
 
-include { FFQ                     } from '../modules/local/ffq'
 include { SRA_IDS_TO_RUNINFO      } from '../modules/local/sra_ids_to_runinfo'
 include { SRA_RUNINFO_TO_FTP      } from '../modules/local/sra_runinfo_to_ftp'
 include { SRA_FASTQ_FTP           } from '../modules/local/sra_fastq_ftp'
@@ -35,6 +34,7 @@ include { SRAFASTQ                } from '../subworkflows/nf-core/srafastq/main'
 ========================================================================================
 */
 
+include { FFQ                         } from '../modules/nf-core/modules/ffq/main'
 include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main'
 
 /*
@@ -55,7 +55,7 @@ workflow SRA {
     // // MODULE: Get id metadata from ffq
     // //
     // FFQ (
-    //     ids
+    //     ids.map { [it] }
     // )
     // ch_versions = ch_versions.mix(FFQ.out.versions.first())
 

From 7caf7628943480920dc23bd11277804154bc8fd4 Mon Sep 17 00:00:00 2001
From: Harshil Patel <drpatelhh@gmail.com>
Date: Fri, 1 Jul 2022 11:56:37 +0100
Subject: [PATCH 05/10] Fix API call for SRA ids

---
 bin/sra_ids_to_runinfo.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/bin/sra_ids_to_runinfo.py b/bin/sra_ids_to_runinfo.py
index bbd1e6f0..f802da9e 100755
--- a/bin/sra_ids_to_runinfo.py
+++ b/bin/sra_ids_to_runinfo.py
@@ -246,13 +246,13 @@ def _content_check(cls, response, identifier):
     def _id_to_srx(cls, identifier):
         """Resolve the identifier to SRA experiments."""
         params = {
-            "save": "efetch",
+            "id": identifier,
             "db": "sra",
             "rettype": "runinfo",
-            "term": identifier,
+            "retmode": "text"
         }
         response = fetch_url(
-            f"https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?{urlencode(params)}"
+            f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?{urlencode(params)}"
         )
         cls._content_check(response, identifier)
         return [row["Experiment"] for row in open_table(response, delimiter=",")]

From a6c11e04c73b842f06febec66d3c7f247741ee5d Mon Sep 17 00:00:00 2001
From: Harshil Patel <drpatelhh@gmail.com>
Date: Fri, 1 Jul 2022 11:57:37 +0100
Subject: [PATCH 06/10] Update API call for GEO ids - still broken

---
 bin/sra_ids_to_runinfo.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/bin/sra_ids_to_runinfo.py b/bin/sra_ids_to_runinfo.py
index f802da9e..0f85d2fa 100755
--- a/bin/sra_ids_to_runinfo.py
+++ b/bin/sra_ids_to_runinfo.py
@@ -261,9 +261,14 @@ def _id_to_srx(cls, identifier):
     def _gse_to_srx(cls, identifier):
         """Resolve the identifier to SRA experiments."""
         ids = []
-        params = {"acc": identifier, "targ": "gsm", "view": "data", "form": "text"}
+        params = {
+            "id": identifier,
+            "db": "gds",
+            "rettype": "runinfo",
+            "retmode": "text"
+        }
         response = fetch_url(
-            f"https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?{urlencode(params)}"
+            f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?{urlencode(params)}"
         )
         cls._content_check(response, identifier)
         gsm_ids = [

From 2cb54e3e390049c7d0b7637e2b3636eaff41728b Mon Sep 17 00:00:00 2001
From: Harshil Patel <drpatelhh@gmail.com>
Date: Fri, 1 Jul 2022 13:21:33 +0100
Subject: [PATCH 07/10] Change default function to resolve GSM ids

---
 bin/sra_ids_to_runinfo.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/bin/sra_ids_to_runinfo.py b/bin/sra_ids_to_runinfo.py
index 0f85d2fa..21c7225d 100755
--- a/bin/sra_ids_to_runinfo.py
+++ b/bin/sra_ids_to_runinfo.py
@@ -193,9 +193,11 @@ def is_valid(cls, identifier):
 class DatabaseResolver:
     """Define a service class for resolving various identifiers to experiments."""
 
-    _GEO_PREFIXES = {"GSE"}
+    _GEO_PREFIXES = {
+        "GSE",
+        "GSM"
+    }
     _SRA_PREFIXES = {
-        "GSM",
         "PRJNA",
         "SAMN",
         "SRR",
@@ -207,7 +209,9 @@ class DatabaseResolver:
         "PRJDB",
         "SAMD",
     }
-    _ENA_PREFIXES = {"ERR"}
+    _ENA_PREFIXES = {
+        "ERR"
+    }
 
     @classmethod
     def expand_identifier(cls, identifier):

From edf3dafcc4585d879eca35687b9833bdf2279962 Mon Sep 17 00:00:00 2001
From: Harshil Patel <drpatelhh@gmail.com>
Date: Fri, 1 Jul 2022 16:20:17 +0100
Subject: [PATCH 08/10] Bump pipeline version to 1.7

---
 CHANGELOG.md    | 7 ++++++-
 nextflow.config | 2 +-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9e635ea4..dada8a72 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,11 +3,16 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [Unpublished Version / DEV]
+## [[1.7](https://github.com/nf-core/fetchngs/releases/tag/1.7)] - 2022-07-01
+
+### :warning: Major enhancements
+
+Support for GEO ids has been dropped in this release due to breaking changes introduced in the NCBI API. Please see [this PR](https://github.com/nf-core/fetchngs/pull/102).
 
 ### Enhancements & fixes
 
 - [#97](https://github.com/nf-core/fetchngs/pull/97) - Add support for generating nf-core/taxprofiler compatible samplesheets.
+- [#99](https://github.com/nf-core/fetchngs/issues/99) - SRA_IDS_TO_RUNINFO fails due to bad request
 - Add `enum` field for `--nf_core_pipeline` to parameter schema so only accept supported pipelines are accepted
 
 ## [[1.6](https://github.com/nf-core/fetchngs/releases/tag/1.6)] - 2022-05-17
diff --git a/nextflow.config b/nextflow.config
index 667f15e1..80ed6507 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -158,7 +158,7 @@ manifest {
     description     = 'Pipeline to fetch metadata and raw FastQ files from public databases'
     mainScript      = 'main.nf'
     nextflowVersion = '!>=21.10.3'
-    version         = '1.7dev'
+    version         = '1.7'
 }
 
 // Load modules.config for DSL2 module specific options

From d1e07712666a224b44a5326594d56c5ec31cab48 Mon Sep 17 00:00:00 2001
From: Harshil Patel <drpatelhh@gmail.com>
Date: Fri, 1 Jul 2022 16:28:18 +0100
Subject: [PATCH 09/10] Strip out mention of GEO ids everywhere

---
 CHANGELOG.md             | 10 +++++++++-
 README.md                | 16 ++++++++++++++--
 assets/schema_input.json |  2 +-
 docs/output.md           |  6 +++---
 docs/usage.md            | 18 +++++++++---------
 lib/WorkflowMain.groovy  |  4 ++--
 lib/WorkflowSra.groovy   | 17 +++++++++++++++++
 main.nf                  |  4 ++--
 nextflow_schema.json     |  2 +-
 workflows/sra.nf         |  7 +++++++
 10 files changed, 65 insertions(+), 21 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index dada8a72..f3bbf9a7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,7 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### :warning: Major enhancements
 
-Support for GEO ids has been dropped in this release due to breaking changes introduced in the NCBI API. Please see [this PR](https://github.com/nf-core/fetchngs/pull/102).
+Support for GEO ids has been dropped in this release due to breaking changes introduced in the NCBI API. For more detailed information please see [this PR](https://github.com/nf-core/fetchngs/pull/102).
+
+As a workaround, if you have a GEO accession you can directly download a text file containing the appropriate SRA ids to pass to the pipeline:
+
+- Search for your GEO accession on [GEO](https://www.ncbi.nlm.nih.gov/geo)
+- Click `SRA Run Selector` at the bottom of the GEO accession page
+- Select the desired samples in the `SRA Run Selector` and then download the `Accession List`
+
+This downloads a text file called `SRR_Acc_List.txt` that can be directly provided to the pipeline e.g. `--input SRR_Acc_List.txt`.
 
 ### Enhancements & fixes
 
diff --git a/README.md b/README.md
index 7806362e..1151db3d 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@
 
 ## Introduction
 
-**nf-core/fetchngs** is a bioinformatics pipeline to fetch metadata and raw FastQ files from both public and private databases. At present, the pipeline supports SRA / ENA / DDBJ / GEO / Synapse ids (see [usage docs](https://nf-co.re/fetchngs/usage#introduction)).
+**nf-core/fetchngs** is a bioinformatics pipeline to fetch metadata and raw FastQ files from both public and private databases. At present, the pipeline supports SRA / ENA / DDBJ / Synapse ids (see [usage docs](https://nf-co.re/fetchngs/usage#introduction)).
 
 The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies.
 
@@ -27,7 +27,7 @@ On release, automated continuous integration tests run the pipeline on a full-si
 
 Via a single file of ids, provided one-per-line (see [example input file](https://raw.githubusercontent.com/nf-core/test-datasets/fetchngs/sra_ids_test.txt)) the pipeline performs the following steps:
 
-### SRA / ENA / DDBJ / GEO ids
+### SRA / ENA / DDBJ ids
 
 1. Resolve database ids back to appropriate experiment-level ids and to be compatible with the [ENA API](https://ena-docs.readthedocs.io/en/latest/retrieval/programmatic-access.html)
 2. Fetch extensive id metadata via ENA API
@@ -36,6 +36,18 @@ Via a single file of ids, provided one-per-line (see [example input file](https:
    - Otherwise use [`sra-tools`](https://github.com/ncbi/sra-tools) to download `.sra` files and convert them to FastQ
 4. Collate id metadata and paths to FastQ files in a single samplesheet
 
+### GEO ids
+
+Support for GEO ids was dropped in [[v1.7](https://github.com/nf-core/fetchngs/releases/tag/1.7)] due to breaking changes introduced in the NCBI API. For more detailed information please see [this PR](https://github.com/nf-core/fetchngs/pull/102).
+
+As a workaround, if you have a GEO accession you can directly download a text file containing the appropriate SRA ids to pass to the pipeline instead:
+
+- Search for your GEO accession on [GEO](https://www.ncbi.nlm.nih.gov/geo)
+- Click `SRA Run Selector` at the bottom of the GEO accession page
+- Select the desired samples in the `SRA Run Selector` and then download the `Accession List`
+
+This downloads a text file called `SRR_Acc_List.txt` that can be directly provided to the pipeline e.g. `--input SRR_Acc_List.txt`.
+
 ### Synapse ids
 
 1. Resolve Synapse directory ids to their corresponding FastQ files ids via the `synapse list` command.
diff --git a/assets/schema_input.json b/assets/schema_input.json
index 9a800216..8bf05fd8 100644
--- a/assets/schema_input.json
+++ b/assets/schema_input.json
@@ -9,7 +9,7 @@
         "items": {
             "type": "string",
             "pattern": "^(((SR|ER|DR)[APRSX])|(SAM(N|EA|D))|(PRJ(NA|EB|DB))|(GS[EM])|(syn))(\\d+)$",
-            "errorMessage": "Please provide a valid SRA, ENA, DDBJ or GEO identifier"
+            "errorMessage": "Please provide a valid SRA, ENA, DDBJ identifier"
         }
     }
 }
diff --git a/docs/output.md b/docs/output.md
index 7402976c..daaca914 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -9,19 +9,19 @@ This document describes the output produced by the pipeline. The directories lis
 The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data depending on the type of ids provided:
 
 - Download FastQ files and create samplesheet from:
-  1. [SRA / ENA / DDBJ / GEO ids](#sra--ena--ddbj--geo-ids)
+  1. [SRA / ENA / DDBJ ids](#sra--ena--ddbj-ids)
   2. [Synapse ids](#synapse-ids)
 - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution
 
 Please see the [usage documentation](https://nf-co.re/fetchngs/usage#introduction) for a list of supported public repository identifiers and how to provide them to the pipeline.
 
-### SRA / ENA / DDBJ / GEO ids
+### SRA / ENA / DDBJ ids
 
 <details markdown="1">
 <summary>Output files</summary>
 
 - `fastq/`
-  - `*.fastq.gz`: Paired-end/single-end reads downloaded from the SRA / ENA / DDBJ / GEO.
+  - `*.fastq.gz`: Paired-end/single-end reads downloaded from the SRA / ENA / DDBJ.
 - `fastq/md5/`
   - `*.md5`: Files containing `md5` sum for FastQ files downloaded from the ENA.
 - `samplesheet/`
diff --git a/docs/usage.md b/docs/usage.md
index 0cb6a976..ba4de1b2 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -8,15 +8,15 @@
 
 The pipeline has been set-up to automatically download and process the raw FastQ files from both public and private repositories. Identifiers can be provided in a file, one-per-line via the `--input` parameter. Currently, the following types of example identifiers are supported:
 
-| `SRA`        | `ENA`        | `DDBJ`       | `GEO`      | `Synapse`   |
-| ------------ | ------------ | ------------ | ---------- | ----------- |
-| SRR11605097  | ERR4007730   | DRR171822    | GSM4432381 | syn26240435 |
-| SRX8171613   | ERX4009132   | DRX162434    | GSE147507  |             |
-| SRS6531847   | ERS4399630   | DRS090921    |            |             |
-| SAMN14689442 | SAMEA6638373 | SAMD00114846 |            |             |
-| SRP256957    | ERP120836    | DRP004793    |            |             |
-| SRA1068758   | ERA2420837   | DRA008156    |            |             |
-| PRJNA625551  | PRJEB37513   | PRJDB4176    |            |             |
+| `SRA`        | `ENA`        | `DDBJ`       | `Synapse`   |
+| ------------ | ------------ | ------------ | ----------- |
+| SRR11605097  | ERR4007730   | DRR171822    | syn26240435 |
+| SRX8171613   | ERX4009132   | DRX162434    |             |
+| SRS6531847   | ERS4399630   | DRS090921    |             |
+| SAMN14689442 | SAMEA6638373 | SAMD00114846 |             |
+| SRP256957    | ERP120836    | DRP004793    |             |
+| SRA1068758   | ERA2420837   | DRA008156    |             |
+| PRJNA625551  | PRJEB37513   | PRJDB4176    |             |
 
 ### SRR / ERR / DRR ids
 
diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy
index f64fa80f..77b7ffde 100755
--- a/lib/WorkflowMain.groovy
+++ b/lib/WorkflowMain.groovy
@@ -104,7 +104,7 @@ class WorkflowMain {
             if (num_match == total_ids) {
                 is_sra = true
             } else {
-                log.error "Mixture of ids provided via --input: ${no_match_ids.join(', ')}\nPlease provide either SRA / ENA / DDBJ / GEO or Synapse ids!"
+                log.error "Mixture of ids provided via --input: ${no_match_ids.join(', ')}\nPlease provide either SRA / ENA / DDBJ or Synapse ids!"
                 System.exit(1)
             }
         }
@@ -129,7 +129,7 @@ class WorkflowMain {
             if (num_match == total_ids) {
                 is_synapse = true
             } else {
-                log.error "Mixture of ids provided via --input: ${no_match_ids.join(', ')}\nPlease provide either SRA / ENA / DDBJ / GEO or Synapse ids!"
+                log.error "Mixture of ids provided via --input: ${no_match_ids.join(', ')}\nPlease provide either SRA / ENA / DDBJ or Synapse ids!"
                 System.exit(1)
             }
         }
diff --git a/lib/WorkflowSra.groovy b/lib/WorkflowSra.groovy
index a2c16219..90d86f1c 100755
--- a/lib/WorkflowSra.groovy
+++ b/lib/WorkflowSra.groovy
@@ -29,4 +29,21 @@ class WorkflowSra {
             "  running nf-core/other pipelines.\n" +
             "==================================================================================="
     }
+
+    // Fail pipeline if input ids are from the GEO
+    public static void isGeoFail(ids, log) {
+        def pattern = /^(GS[EM])(\d+)$/
+        for (id in ids) {
+            if (id =~ pattern) {
+                log.error "===================================================================================\n" +
+                    "  GEO id detected: ${id}\n" +
+                    "  Support for GEO ids was dropped in v1.7 due to breaking changes in the NCBI API.\n" +
+                    "  Please remove any GEO ids from the input samplesheet.\n\n" +
+                    "  Please see:\n" +
+                    "  https://github.com/nf-core/fetchngs/pull/102\n" +
+                    "==================================================================================="
+                System.exit(1)
+            }
+        }
+    }
 }
diff --git a/main.nf b/main.nf
index c6303a41..2c4b52f2 100644
--- a/main.nf
+++ b/main.nf
@@ -44,7 +44,7 @@ if (WorkflowMain.isSraId(ch_input, log)) {
 } else if (WorkflowMain.isSynapseId(ch_input, log)) {
     input_type = 'synapse'
 } else {
-    exit 1, 'Ids provided via --input not recognised please make sure they are either SRA / ENA / DDBJ / GEO or Synapse ids!'
+    exit 1, 'Ids provided via --input not recognised please make sure they are either SRA / ENA / DDBJ or Synapse ids!'
 }
 
 if (params.input_type == input_type) {
@@ -63,7 +63,7 @@ if (params.input_type == input_type) {
 workflow NFCORE_FETCHNGS {
 
     //
-    // WORKFLOW: Download FastQ files for SRA / ENA / DDBJ / GEO ids
+    // WORKFLOW: Download FastQ files for SRA / ENA / DDBJ ids
     //
     if (params.input_type == 'sra') {
         SRA ( ch_ids )
diff --git a/nextflow_schema.json b/nextflow_schema.json
index a31b9396..a51dc45a 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -19,7 +19,7 @@
                     "pattern": "^\\S+\\.txt$",
                     "schema": "assets/schema_input.json",
                     "fa_icon": "fas fa-file-excel",
-                    "description": "File containing SRA/ENA/DDBJ/GEO identifiers one per line to download their associated metadata and FastQ files."
+                    "description": "File containing SRA/ENA/DDBJ identifiers one per line to download their associated metadata and FastQ files."
                 },
                 "input_type": {
                     "type": "string",
diff --git a/workflows/sra.nf b/workflows/sra.nf
index 2b3cb498..0dde4588 100644
--- a/workflows/sra.nf
+++ b/workflows/sra.nf
@@ -50,6 +50,13 @@ workflow SRA {
     main:
     ch_versions = Channel.empty()
 
+    //
+    // Fail the pipeline if GEO ids detected
+    //
+    ids
+        .collect()
+        .map { WorkflowSra.isGeoFail(it, log) }
+
     //
     // MODULE: Get SRA run information for public database ids
     //

From 3696f40cf5bcd4ec3266275b4cb77e4359d76c06 Mon Sep 17 00:00:00 2001
From: Harshil Patel <drpatelhh@gmail.com>
Date: Fri, 1 Jul 2022 16:35:45 +0100
Subject: [PATCH 10/10] Strip out regex for GEO from input schema

---
 assets/schema_input.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/assets/schema_input.json b/assets/schema_input.json
index 8bf05fd8..71f0f976 100644
--- a/assets/schema_input.json
+++ b/assets/schema_input.json
@@ -8,7 +8,7 @@
         "type": "array",
         "items": {
             "type": "string",
-            "pattern": "^(((SR|ER|DR)[APRSX])|(SAM(N|EA|D))|(PRJ(NA|EB|DB))|(GS[EM])|(syn))(\\d+)$",
+            "pattern": "^(((SR|ER|DR)[APRSX])|(SAM(N|EA|D))|(PRJ(NA|EB|DB))|(syn))(\\d+)$",
             "errorMessage": "Please provide a valid SRA, ENA, DDBJ identifier"
         }
     }