From 885db860b184cfd8dbec46d99bdc11b1516d38b6 Mon Sep 17 00:00:00 2001 From: Vyacheslav Brover Date: Fri, 16 Aug 2024 18:40:40 -0400 Subject: [PATCH 01/10] PD-5085 new AMRFinderPlus column names --- Makefile | 6 +++--- amrfinder_columns.hpp | 30 +++++++++++++++++++++++++++ stxtyper.cpp | 48 ++++++++++++++++++++++--------------------- version.txt | 2 +- 4 files changed, 59 insertions(+), 27 deletions(-) create mode 100644 amrfinder_columns.hpp diff --git a/Makefile b/Makefile index b02d9c3..004b60c 100644 --- a/Makefile +++ b/Makefile @@ -90,17 +90,17 @@ all: $(BINARIES) common.o: common.hpp common.inc -stxtyper.o: common.hpp common.inc +stxtyper.o: common.hpp common.inc amrfinder_columns.hpp tsv.hpp version.txt stxtyperOBJS=stxtyper.o common.o tsv.o stxtyper: $(stxtyperOBJS) $(CXX) -o $@ $(stxtyperOBJS) -pthread $(DBDIR) -fasta_check.o: common.hpp common.inc +fasta_check.o: common.hpp common.inc version.txt fasta_checkOBJS=fasta_check.o common.o fasta_check: $(fasta_checkOBJS) $(CXX) -o $@ $(fasta_checkOBJS) -fasta_extract.o: common.hpp common.inc +fasta_extract.o: common.hpp common.inc version.txt fasta_extractOBJS=fasta_extract.o common.o fasta_extract: $(fasta_extractOBJS) $(CXX) -o $@ $(fasta_extractOBJS) diff --git a/amrfinder_columns.hpp b/amrfinder_columns.hpp new file mode 100644 index 0000000..5501b6f --- /dev/null +++ b/amrfinder_columns.hpp @@ -0,0 +1,30 @@ +// AMRFinderPlus column names +// PD-5085 + +constexpr const char* prot_colName = "Protein id"; // PD-2534 +constexpr const char* contig_colName = "Contig id"; +// Target +constexpr const char* start_colName = "Start"; +constexpr const char* stop_colName = "Stop"; +constexpr const char* strand_colName = "Strand"; +// +constexpr const char* genesymbol_colName = "Element symbol"; // PD-4924 +constexpr const char* elemName_colName = "Element name"; // PD-4910 +constexpr const char* scope_colName = "Scope"; // PD-2825 +// PD-1856 +constexpr const char* type_colName = "Type"; +constexpr const char* subtype_colName = "Subtype"; +constexpr const char* class_colName = "Class"; +constexpr const char* subclass_colName = "Subclass"; +// +constexpr const char* method_colName = "Method"; +constexpr const char* targetLen_colName = "Element length"; // was: "Target length" ?? +constexpr const char* refLen_colName = "Reference sequence length"; +constexpr const char* refCov_colName = "% Coverage of reference"; +constexpr const char* refIdent_colName = "% Identity to reference"; +constexpr const char* alignLen_colName = "Alignment length"; +constexpr const char* closestRefAccession_colName = "Closest reference accession"; +constexpr const char* closestRefName_colName = "Closest reference name"; +constexpr const char* hmmAccession_colName = "HMM accession"; +constexpr const char* hmmDescr_colName = "HMM description"; +constexpr const char* hierarchyNode_colName = "Hierarchy node"; diff --git a/stxtyper.cpp b/stxtyper.cpp index cac0c5e..9650c5d 100644 --- a/stxtyper.cpp +++ b/stxtyper.cpp @@ -32,6 +32,7 @@ * Dependencies: NCBI BLAST, gunzip (optional) * * Release changes: +* 1.0.25 08/16/2024 PD-5085 AMRFinderPlus column names to match MicroBIGG-E * 1.0.24 08/05/2024 PD-5076 "na" -> "NA" * 1.0.23 07/29/2024 PD-5064 AMBIGUOUS operon type * 1.0.22 07/25/2024 First codon L|I|V -> M @@ -94,6 +95,7 @@ #include "common.hpp" #include "tsv.hpp" using namespace Common_sp; +#include "amrfinder_columns.hpp" #include "common.inc" @@ -950,31 +952,31 @@ struct ThisApplication : ShellApplication td << "name"; if (amrfinder) { - td << /* 1*/ "Protein identifier" - << /* 2*/ "Contig id" - << /* 3*/ "Start" - << /* 4*/ "Stop" - << /* 5*/ "Strand" - << /* 6*/ "Element symbol" // PD-4924 - << /* 7*/ "Element name" // PD-4910 - << /* 8*/ "Scope" - << /* 9*/ "Element type" - << /*10*/ "Element subtype" - << /*11*/ "Class" - << /*12*/ "Subclass" - << /*13*/ "Method" - << /*14*/ "Target length" - << /*15*/ "Reference sequence length" - << /*16*/ "% Coverage of reference sequence" - << /*17*/ "% Identity to reference sequence" - << /*18*/ "Alignment length" - << /*19*/ "Accession of closest sequence" - << /*20*/ "Name of closest sequence" - << /*21*/ "HMM id" - << /*22*/ "HMM description" + td << /* 1*/ prot_colName + << /* 2*/ contig_colName + << /* 3*/ start_colName + << /* 4*/ stop_colName + << /* 5*/ strand_colName + << /* 6*/ genesymbol_colName + << /* 7*/ elemName_colName + << /* 8*/ scope_colName + << /* 9*/ type_colName + << /*10*/ subtype_colName + << /*11*/ class_colName + << /*12*/ subclass_colName + << /*13*/ method_colName + << /*14*/ targetLen_colName + << /*15*/ refLen_colName + << /*16*/ refCov_colName + << /*17*/ refIdent_colName + << /*18*/ alignLen_colName + << /*19*/ closestRefAccession_colName + << /*20*/ closestRefName_colName + << /*21*/ hmmAccession_colName + << /*22*/ hmmDescr_colName ; if (print_node) - td << "Hierarchy node"; + td << hierarchyNode_colName; } else td << "target_contig" diff --git a/version.txt b/version.txt index 79728fe..4a4127c 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -1.0.24 +1.0.25 From e7620714ad6a8d0a1748c341bf545c38ec3111e7 Mon Sep 17 00:00:00 2001 From: Arjun Prasad Date: Tue, 20 Aug 2024 15:54:27 -0400 Subject: [PATCH 02/10] Update for new AMRFinderPlus format --- README.md | 2 ++ test/amrfinder_integration.expected | 2 +- test/amrfinder_integration2.expected | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 993894f..1413810 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ StxTyper is used to determine stx type from nucleotide sequence. Stx (Shiga-toxin) genes are found in some strains of _Escherichia coli_ and code for powerful toxins that can cause severe illness. StxTyper is software to classify these genes from assembled sequence using a standard algorithm. +## WARNING: This is currently beta software and changes and new releases may come quickly. Please report any issues or comments to pd-help@ncbi.nlm.nih.gov or open an issue on GitHub. + # Installation ## Installing with Bioconda diff --git a/test/amrfinder_integration.expected b/test/amrfinder_integration.expected index 0ddbf18..b447c3b 100644 --- a/test/amrfinder_integration.expected +++ b/test/amrfinder_integration.expected @@ -1,4 +1,4 @@ -#Protein identifier Contig id Start Stop Strand Element symbol Element name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description +#Protein id Contig id Start Stop Strand Element symbol Element name Scope Type Subtype Class Subclass Method Element length Reference sequence length % Coverage of reference % Identity to reference Alignment length Closest reference accession Closest reference name HMM accession HMM description NA partial 27 1048 + stx2_operon Partial stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 PARTIAL 1022 99.41 337 AAA16362.1, AAS07607.1 Shiga toxin stx2 NA NA NA partial_contig_end 3 661 - stx2_operon Partial stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 PARTIAL_CONTIG_END 659 100.00 216 AAM70046.1, AAA16362.1 Shiga toxin stx2 NA NA NA stx1a 218 1444 + stx1a_operon stx1a operon plus VIRULENCE STX_TYPE STX1 STX1A COMPLETE 1227 100.00 406 AAA98347.1, AAA71894.1 Shiga toxin stx1a NA NA diff --git a/test/amrfinder_integration2.expected b/test/amrfinder_integration2.expected index 9660200..e16e1b5 100644 --- a/test/amrfinder_integration2.expected +++ b/test/amrfinder_integration2.expected @@ -1,4 +1,4 @@ -#Protein identifier Contig id Start Stop Strand Element symbol Element name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node +#Protein id Contig id Start Stop Strand Element symbol Element name Scope Type Subtype Class Subclass Method Element length Reference sequence length % Coverage of reference % Identity to reference Alignment length Closest reference accession Closest reference name HMM accession HMM description Hierarchy node NA partial 27 1048 + stx2_operon Partial stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 PARTIAL 1022 99.41 337 AAA16362.1, AAS07607.1 Shiga toxin stx2 NA NA stxA2c, stxB2a NA partial_contig_end 3 661 - stx2_operon Partial stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 PARTIAL_CONTIG_END 659 100.00 216 AAM70046.1, AAA16362.1 Shiga toxin stx2 NA NA stxB2a, stxA2c NA stx1a 218 1444 + stx1a_operon stx1a operon plus VIRULENCE STX_TYPE STX1 STX1A COMPLETE 1227 100.00 406 AAA98347.1, AAA71894.1 Shiga toxin stx1a NA NA stxA1a, stxB1a From dd36bcdf7b3ce42f5c4471f4bec39921c28dd5f9 Mon Sep 17 00:00:00 2001 From: Arjun Prasad Date: Wed, 21 Aug 2024 10:25:05 -0400 Subject: [PATCH 03/10] Added conda action test --- .github/workflows/conda.yml | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 .github/workflows/conda.yml diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml new file mode 100644 index 0000000..19533eb --- /dev/null +++ b/.github/workflows/conda.yml @@ -0,0 +1,34 @@ +name: Linux bioconda + +on: + workflow_dispatch: + schedule: + - cron: '30 10 * * *' # 3:15am everyday + repository_dispatch: + types: [linux-bioconda-test, install-test] +jobs: + x86_linux: + runs-on: ubuntu-latest + steps: + - name: When was this run + run: date + - name: configure conda + run: | + . $CONDA/bin/activate + conda config --add channels defaults + conda config --add channels bioconda + conda config --add channels conda-forge + - name: install StxTyper + run: | + . $CONDA/bin/activate + conda install --update-deps -c conda-forge -c bioconda --strict-channel-priority -y stxtyper + stxtyper --version + - name: download tests + run: | + BASE_URL=https://raw.githubusercontent.com/${GITHUB_REPOSITORY}/master + curl --silent --location -O ${BASE_URL}/test_stxtyper.sh + - name: run tests + run: | + source /usr/share/miniconda/bin/activate + echo CONDA_PREFIX = $CONDA_PREFIX + bash ./test_stxtyper.sh path From 4c2999e53374660cde04e0d8c22605e8183b84eb Mon Sep 17 00:00:00 2001 From: Arjun Prasad Date: Wed, 21 Aug 2024 10:50:36 -0400 Subject: [PATCH 04/10] Add trigger for testing --- .github/workflows/conda.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index 19533eb..24f9dca 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -1,9 +1,10 @@ name: Linux bioconda on: + push: workflow_dispatch: schedule: - - cron: '30 10 * * *' # 3:15am everyday + - cron: '30 10 * * *' # repository_dispatch: types: [linux-bioconda-test, install-test] jobs: From 2021afd139e36aee90f018cd23efafb7d69df664 Mon Sep 17 00:00:00 2001 From: Arjun Prasad Date: Thu, 22 Aug 2024 08:15:03 -0400 Subject: [PATCH 05/10] Fix bug in conda action --- .github/workflows/conda.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index 24f9dca..538ebbe 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -22,7 +22,7 @@ jobs: - name: install StxTyper run: | . $CONDA/bin/activate - conda install --update-deps -c conda-forge -c bioconda --strict-channel-priority -y stxtyper + conda install --update-deps -c conda-forge -c bioconda --strict-channel-priority -y ncbi-stxtyper stxtyper --version - name: download tests run: | From 5f0c0eb51d0e50b4f5b16125c76263716dd4ecab Mon Sep 17 00:00:00 2001 From: Arjun Prasad Date: Thu, 22 Aug 2024 08:35:47 -0400 Subject: [PATCH 06/10] Add MacOS to conda test --- .github/workflows/conda.yml | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index 538ebbe..faca34f 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -1,4 +1,4 @@ -name: Linux bioconda +name: Bioconda on: push: @@ -33,3 +33,38 @@ jobs: source /usr/share/miniconda/bin/activate echo CONDA_PREFIX = $CONDA_PREFIX bash ./test_stxtyper.sh path + x86_mac: + runs-on: macos-latest + steps: + - name: Install conda b/c built-in conda was borked + run: | + curl -O https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh + bash ./Miniconda3-latest-MacOSX-x86_64.sh -b -p /Users/runner/miniconda3 + - name: Configure conda + run: | + source /Users/runner/miniconda3/bin/activate + conda init + # THIS DOESN"T WORK! Just install miniconda myself + # . $CONDA/bin/activate + conda config --add channels defaults + conda config --add channels bioconda + conda config --add channels conda-forge + # permissions are messed up on the mac runner + # Is this faster than installing miniconda myself? + # sudo chown -R 501:20 $CONDA + conda update conda + - name: Install StxTyper + run: | + source /Users/runner/miniconda3/bin/activate + conda install --update-deps -c bioconda -c conda-forge -y ncbi-stxtyper + - name: Download tests + run: | + BASE_URL=https://raw.githubusercontent.com/${GITHUB_REPOSITORY}/master + curl --silent --location -O ${BASE_URL}/test_stxtyper.sh + - name: Run tests + source /usr/share/miniconda/bin/activate + echo CONDA_PREFIX = $CONDA_PREFIX + bash ./test_stxtyper.sh path + + + From b059503fc952a7f41d0c572eb5efbe1cb0d01cfd Mon Sep 17 00:00:00 2001 From: Arjun Prasad Date: Thu, 22 Aug 2024 08:42:27 -0400 Subject: [PATCH 07/10] Fix whitespace in conda action (?) --- .github/workflows/conda.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index faca34f..420e86f 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -53,15 +53,15 @@ jobs: # Is this faster than installing miniconda myself? # sudo chown -R 501:20 $CONDA conda update conda - - name: Install StxTyper - run: | + - name: Install StxTyper + run: | source /Users/runner/miniconda3/bin/activate conda install --update-deps -c bioconda -c conda-forge -y ncbi-stxtyper - - name: Download tests - run: | - BASE_URL=https://raw.githubusercontent.com/${GITHUB_REPOSITORY}/master + - name: Download tests + run: | + BASE_URL=https://raw.githubusercontent.com/${GITHUB_REPOSITORY}/master curl --silent --location -O ${BASE_URL}/test_stxtyper.sh - - name: Run tests + - name: Run tests source /usr/share/miniconda/bin/activate echo CONDA_PREFIX = $CONDA_PREFIX bash ./test_stxtyper.sh path From 821eca166352208c91d2a3b0f0d83fc6445f49b0 Mon Sep 17 00:00:00 2001 From: Arjun Prasad Date: Thu, 22 Aug 2024 08:44:40 -0400 Subject: [PATCH 08/10] Another fix for the conda action --- .github/workflows/conda.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index 420e86f..b0e5eb2 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -62,6 +62,7 @@ jobs: BASE_URL=https://raw.githubusercontent.com/${GITHUB_REPOSITORY}/master curl --silent --location -O ${BASE_URL}/test_stxtyper.sh - name: Run tests + run: | source /usr/share/miniconda/bin/activate echo CONDA_PREFIX = $CONDA_PREFIX bash ./test_stxtyper.sh path From 12dc21b2f1d3f9512155200a8456108f43e7a402 Mon Sep 17 00:00:00 2001 From: Arjun Prasad Date: Thu, 22 Aug 2024 08:53:28 -0400 Subject: [PATCH 09/10] Fix erronious path in mac conda test --- .github/workflows/conda.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index b0e5eb2..7e47f58 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -63,7 +63,7 @@ jobs: curl --silent --location -O ${BASE_URL}/test_stxtyper.sh - name: Run tests run: | - source /usr/share/miniconda/bin/activate + source /Users/runner/miniconda3/bin/activate echo CONDA_PREFIX = $CONDA_PREFIX bash ./test_stxtyper.sh path From 94ac374d0eca8e6d97d198c1db8cf26baa0c785c Mon Sep 17 00:00:00 2001 From: Arjun Prasad Date: Fri, 18 Oct 2024 14:50:00 -0400 Subject: [PATCH 10/10] Don't run conda test on push --- .github/workflows/conda.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index 7e47f58..824ebd1 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -1,7 +1,6 @@ name: Bioconda on: - push: workflow_dispatch: schedule: - cron: '30 10 * * *' #