diff --git a/README.md b/README.md index cbec31445..50e9f67e0 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,23 @@ # STELLAR3 - the SwifT Exact LocaL AligneR -This is similar to [STELLAR](https://github.com/seqan/seqan/tree/master/apps/stellar) but adapted for seqan3 usage and -added new cool features. +This is similar to [STELLAR](https://github.com/seqan/seqan/tree/master/apps/stellar) but uses the [sharg parser](https://docs.seqan.de/seqan3/3-master-user/tutorial_argument_parser.html) and can be used to narrow the search to only a segment of the reference database. - -Instructions: +## Building Stellar3: 1. clone this repository: `git clone --recurse-submodules https://github.com/seqan/stellar3` 2. create a build directory and visit it: `mkdir stellar3/build && cd $_` 3. run cmake: `cmake ..` 4. build the application: `make` -5. optional: build and run the tests: `make test` -6. execute the app: `./bin/stellar --help` +5. show the help page: `./bin/stellar --help` + +## Running Stellar3: +The `example` folder contains example data for a test run. Stellar3 can be used to produce soft-clipped read alignments or to find all local alignments between two genomes. + +Searching the reference sequences for local alignments with a minimum length of 50 and a maximum error rate of 0.02: +`./bin/stellar -e 0.02 -l 50 -o mapped.reads.gff ../example/NC_001477.fasta ../example/reads.fasta` + +Searching only a segment of the reference for local alignments: +`./bin/stellar --sequenceOfInterest 0 --segmentBegin 5000 --segmentEnd 7500 -e 0.02 -l 50 -o segment.mapped.reads.gff ../example/NC_001477.fasta ../example/reads.fasta` + +Finding all local alignments between two related genomes: +`./bin/stellar -e 0.1 -l 50 -o alignments.gff ../example/NC_001474.fasta ../example/NC_001477.fasta` + +See the original [documentation](https://github.com/seqan/seqan/tree/main/apps/stellar) for details on the Stellar output formats. diff --git a/src/example/NC_001474.fasta b/example/NC_001474.fasta similarity index 100% rename from src/example/NC_001474.fasta rename to example/NC_001474.fasta diff --git a/src/example/NC_001477.fasta b/example/NC_001477.fasta similarity index 100% rename from src/example/NC_001477.fasta rename to example/NC_001477.fasta diff --git a/example/alignments.gff b/example/alignments.gff new file mode 100644 index 000000000..b5f691e8c --- /dev/null +++ b/example/alignments.gff @@ -0,0 +1,14 @@ +gi|158976983|ref|NC_001474.2| Stellar eps-matches 10542 10723 90.3225 + . gi|9626685|ref|NC_001477.1|;seq2Range=10553,10735;eValue=2.35373e-61;cigar=30M1D5M1I22M1D7M1I1M1I50M1D2M1I62M;mutations=10C,11A,13C,16G,17G,27C,28C,32G,36T,66C,68C,74C,97G,121A,149A +gi|158976983|ref|NC_001474.2| Stellar eps-matches 10558 10723 91.7647 + . gi|9626685|ref|NC_001477.1|;seq2Range=10570,10735;eValue=2.74788e-59;cigar=1D15M1D3M1I28M1I1M1D1M1I50M1D2M1I62M;mutations=10C,11C,14G,19T,48G,51C,57C,80G,104A,132A +gi|158976983|ref|NC_001474.2| Stellar eps-matches 9601 9710 90.09 + . gi|9626685|ref|NC_001477.1|;seq2Range=9602,9711;eValue=1.90335e-33;cigar=78M1D1M1I30M;mutations=10T,12G,28G,30A,33A,44C,45G,62A,80A,90T +gi|158976983|ref|NC_001474.2| Stellar eps-matches 10290 10384 90.625 + . gi|9626685|ref|NC_001477.1|;seq2Range=10311,10405;eValue=8.52734e-29;cigar=79M1D4M1I11M;mutations=10A,21C,37C,42T,45G,47G,55G,84T +gi|158976983|ref|NC_001474.2| Stellar eps-matches 10441 10520 90 + . gi|9626685|ref|NC_001477.1|;seq2Range=10456,10534;eValue=4.46015e-22;cigar=9M1D70M;mutations=9C,19T,20G,46G,51C,52A,59T +gi|158976983|ref|NC_001474.2| Stellar eps-matches 43 121 90 + . gi|9626685|ref|NC_001477.1|;seq2Range=42,118;eValue=4.46015e-22;cigar=3M1I4M1D26M1D43M1D;mutations=4A,8T,12T,59C,75A +gi|158976983|ref|NC_001474.2| Stellar eps-matches 3093 3157 90.7692 + . gi|9626685|ref|NC_001477.1|;seq2Range=3091,3155;eValue=1.99822e-17;cigar=65M;mutations=13C,19T,22A,37C,40G,58C +gi|158976983|ref|NC_001474.2| Stellar eps-matches 5419 5477 90 + . gi|9626685|ref|NC_001477.1|;seq2Range=5420,5478;eValue=7.66823e-15;cigar=16M1D1M1I41M;mutations=9C,18T,27A,35C,42C +gi|158976983|ref|NC_001474.2| Stellar eps-matches 9046 9101 91.0714 + . gi|9626685|ref|NC_001477.1|;seq2Range=9047,9102;eValue=2.52061e-14;cigar=56M;mutations=19A,20G,30T,33A,36C +gi|158976983|ref|NC_001474.2| Stellar eps-matches 3400 3452 90.566 + . gi|9626685|ref|NC_001477.1|;seq2Range=3398,3450;eValue=8.95234e-13;cigar=53M;mutations=3C,18C,34G,36C,45G +gi|158976983|ref|NC_001474.2| Stellar eps-matches 7785 7835 90.3846 + . gi|9626685|ref|NC_001477.1|;seq2Range=7789,7839;eValue=2.94271e-12;cigar=15M1I1M1D34M;mutations=16C,19A,34A,40T +gi|158976983|ref|NC_001474.2| Stellar eps-matches 131 180 90.196 + . gi|9626685|ref|NC_001477.1|;seq2Range=129,178;eValue=9.67292e-12;cigar=42M1I3M1D4M;mutations=22C,38A,43T,45T +gi|158976983|ref|NC_001474.2| Stellar eps-matches 7066 7115 90 + . gi|9626685|ref|NC_001477.1|;seq2Range=7070,7118;eValue=3.17957e-11;cigar=45M1D4M;mutations=10A,12A,27A,36A +gi|158976983|ref|NC_001474.2| Stellar eps-matches 8989 9038 90.196 + . gi|9626685|ref|NC_001477.1|;seq2Range=8990,9039;eValue=9.67292e-12;cigar=15M1D2M1I32M;mutations=18G,24G,30T,48T diff --git a/example/mapped_reads.gff b/example/mapped_reads.gff new file mode 100644 index 000000000..2d848d5af --- /dev/null +++ b/example/mapped_reads.gff @@ -0,0 +1,7 @@ +gi|9626685|ref|NC_001477.1| Stellar eps-matches 10580 10629 98.0392 + . read01;seq2Range=24,74;eValue=4.77542e-20;cigar=19M1I31M;mutations=20C +gi|9626685|ref|NC_001477.1| Stellar eps-matches 6870 6923 98.1818 + . read03;seq2Range=46,100;eValue=4.09044e-22;cigar=29M1I25M;mutations=30C +gi|9626685|ref|NC_001477.1| Stellar eps-matches 6418 6519 98.0392 + . read04;seq2Range=1,100;eValue=7.44922e-45;cigar=76M1D24M1D;mutations= +gi|9626685|ref|NC_001477.1| Stellar eps-matches 9160 9222 98.4126 + . read08;seq2Range=11,73;eValue=3.00114e-26;cigar=63M;mutations=52N +gi|9626685|ref|NC_001477.1| Stellar eps-matches 7149 7217 98.5507 + . read10;seq2Range=1,69;eValue=2.37916e-29;cigar=69M;mutations=48A +gi|9626685|ref|NC_001477.1| Stellar eps-matches 9870 9939 98.5915 - . read02;seq2Range=1,71;eValue=2.20193e-30;cigar=20M1I50M;mutations=21C +gi|9626685|ref|NC_001477.1| Stellar eps-matches 9056 9151 98.9583 - . read06;seq2Range=6,100;eValue=2.64571e-43;cigar=2M1D93M;mutations= diff --git a/src/example/reads.fasta b/example/reads.fasta similarity index 100% rename from src/example/reads.fasta rename to example/reads.fasta diff --git a/example/segment.mapped.reads.gff b/example/segment.mapped.reads.gff new file mode 100644 index 000000000..fbfc91d5e --- /dev/null +++ b/example/segment.mapped.reads.gff @@ -0,0 +1,3 @@ +gi|9626685|ref|NC_001477.1| Stellar eps-matches 6870 6923 98.1818 + . read03;seq2Range=46,100;eValue=4.09044e-22;cigar=29M1I25M;mutations=30C +gi|9626685|ref|NC_001477.1| Stellar eps-matches 6418 6519 98.0392 + . read04;seq2Range=1,100;eValue=7.44922e-45;cigar=76M1D24M1D;mutations= +gi|9626685|ref|NC_001477.1| Stellar eps-matches 7149 7217 98.5507 + . read10;seq2Range=1,69;eValue=2.37916e-29;cigar=69M;mutations=48A