Skip to content

Commit

Permalink
[TEST] update small error rate edge case test
Browse files Browse the repository at this point in the history
  • Loading branch information
eaasna committed Mar 18, 2024
1 parent 98a5257 commit 147b070
Show file tree
Hide file tree
Showing 17 changed files with 117 additions and 68 deletions.
22 changes: 11 additions & 11 deletions include/stellar/utils/fraction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ struct fraction
}

template <typename TDenominator>
constexpr explicit fraction(uint32_t const numerator, TDenominator const denominator, std::enable_if_t<std::is_unsigned_v<TDenominator>, int> = 0) :
constexpr explicit fraction(difference_t const numerator, TDenominator const denominator, std::enable_if_t<std::is_unsigned_v<TDenominator>, int> = 0) :
_numerator{numerator},
_denominator{denominator}
{
Expand All @@ -35,8 +35,8 @@ struct fraction
}

template <typename TDenominator>
constexpr fraction(uint32_t const numerator, TDenominator const denominator, std::enable_if_t<std::is_signed_v<TDenominator>, int> = 0) :
fraction{_abs(numerator), static_cast<uint32_t>(_abs(denominator))}
constexpr fraction(difference_t const numerator, TDenominator const denominator, std::enable_if_t<std::is_signed_v<TDenominator>, int> = 0) :
fraction{_abs(numerator), static_cast<size_t>(_abs(denominator))}
{
if (numerator < 0)
_numerator = -_numerator;
Expand Down Expand Up @@ -71,7 +71,7 @@ struct fraction
};

difference_t numerator = parse_int(float_significand_str);
uint32_t denominator{1u};
std::size_t denominator = 1u;
for (std::size_t exponent = float_fraction_str.size(); exponent > 0u; --exponent)
denominator *= 10;

Expand Down Expand Up @@ -112,8 +112,8 @@ struct fraction
exponent--;
}

uint32_t numerator = std::llround(normalized_value);
uint32_t denominator = 1;
difference_t numerator = std::llround(normalized_value);
difference_t denominator = 1;

if (exponent > 0) // normalized_value * 2^exponent
{
Expand All @@ -135,7 +135,7 @@ struct fraction
return _numerator;
}

constexpr uint32_t denominator() const
constexpr size_t denominator() const
{
return _denominator;
}
Expand Down Expand Up @@ -199,7 +199,7 @@ struct fraction
};
}

constexpr fraction limit_denominator(uint64_t max_denominator=1000000) const
constexpr fraction limit_denominator(size_t max_denominator=1000000) const
{
// https://stackoverflow.com/questions/17537613/does-python-have-a-function-to-reduce-fractions
// https://hg.python.org/cpython/file/822c7c0d27d1/Lib/fractions.py#l211
Expand Down Expand Up @@ -280,9 +280,9 @@ struct fraction
// std::abs is not constexpr
constexpr static auto _abs = [](auto a) { return a < 0 ? -a : a; };

uint32_t _limiter{1000u};
uint32_t _numerator{0u};
uint32_t _denominator{1u};
uint32_t _limiter{10000u};
difference_t _numerator{0u};
size_t _denominator{1u};
};

} // namespace stellar::utils
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ add_library ("${PROJECT_NAME}_interface" INTERFACE)
target_link_libraries ("${PROJECT_NAME}_interface" INTERFACE seqan3::seqan3)
target_link_libraries ("${PROJECT_NAME}_interface" INTERFACE sharg::sharg)
target_include_directories ("${PROJECT_NAME}_interface" INTERFACE ../include)
target_compile_options ("${PROJECT_NAME}_interface" INTERFACE "-pedantic" "-Wall" "-Wextra")
target_compile_options ("${PROJECT_NAME}_interface" INTERFACE "-pedantic" "-Wall")

# ----------------------------------------------------------------------------
# Dependencies
Expand Down
2 changes: 1 addition & 1 deletion src/stellar3.arg_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ void run_stellar(sharg::parser & parser)
}

options.outputFormat = options.outputFile.substr(options.outputFile.size() - 3); // file extension previously validated
options.epsilon = stellar::utils::fraction::from_double(options.numEpsilon).limit_denominator();
options.epsilon = stellar::utils::fraction::from_double_with_limit(options.numEpsilon, options.minLength).limit_denominator();

if (options.strVerificationMethod == to_string(StellarVerificationMethod{AllLocal{}}))
options.verificationMethod = StellarVerificationMethod{AllLocal{}};
Expand Down
4 changes: 3 additions & 1 deletion test/cli/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ target_use_datasources (stellar_test FILES ref.fasta)
target_use_datasources (stellar_test FILES multi_seq_ref.fasta)
target_use_datasources (stellar_test FILES query_e0.fasta)
target_use_datasources (stellar_test FILES query_e0.05.fasta)
target_use_datasources (stellar_test FILES query_e0.0009.fasta)
target_use_datasources (stellar_test FILES query_e0.001.fasta)

target_use_datasources (stellar_test FILES segment_0_0_400_e0.gff)
target_use_datasources (stellar_test FILES segment_0_500_800_e0.gff)
Expand Down Expand Up @@ -92,5 +92,7 @@ target_use_datasources (stellar_test FILES subset_0_1_2_e0.05.stdout)

target_use_datasources (stellar_test FILES er_edge_case_e0.gff)
target_use_datasources (stellar_test FILES er_edge_case_e0.stdout)
target_use_datasources (stellar_test FILES er_edge_case_e0.001.gff)
target_use_datasources (stellar_test FILES er_edge_case_e0.001.stdout)
target_use_datasources (stellar_test FILES er_edge_case_e0.0009.gff)
target_use_datasources (stellar_test FILES er_edge_case_e0.0009.stdout)
4 changes: 2 additions & 2 deletions test/cli/create_gold_standard.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
set -ex

# hack to stop seqan.app_tests from complaining about relative paths
for eps in "e-1" "75e-3" "5e-2" "25e-3" "e-4"
for eps in "0" "e-1" "75e-3" "5e-2" "25e-3"
do
ln -s ../512_simSeq1_$eps.fa gold_standard/512_simSeq1_$eps.fa
ln -s ../512_simSeq2_$eps.fa gold_standard/512_simSeq2_$eps.fa
Expand All @@ -28,7 +28,7 @@ echo "./gold_standard_for.sh reverse protein" >> $com_file
parallel --jobs 6 < $com_file
rm $com_file

for eps in "e-1" "75e-3" "5e-2" "25e-3" "e-4"
for eps in "0" "e-1" "75e-3" "5e-2" "25e-3"
do
rm gold_standard/512_simSeq1_$eps.fa
rm gold_standard/512_simSeq2_$eps.fa
Expand Down
8 changes: 4 additions & 4 deletions test/cli/generate_dna5_reverse_outputs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ call_stellar_reverse()
${STELLAR} -e $1 -l 50 -x 10 -k 7 -n 5000 -s 10000 -r -v -o $2.txt ../../512_simSeq1_$2.fa ../../512_simSeq2_$2.fa > $2.txt.stdout
}

eps="0"
errRate=0
call_stellar_reverse $errRate $eps

eps="e-1"
errRate=0.1
call_stellar_reverse $errRate $eps
Expand All @@ -32,10 +36,6 @@ eps="25e-3"
errRate=0.025
call_stellar_reverse $errRate $eps

eps="e-4"
errRate=0.0001
call_stellar_reverse $errRate $eps

# ============================================================
# Varying minimal lengths
# ============================================================
Expand Down
8 changes: 4 additions & 4 deletions test/cli/gold_standard_for.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ call_stellar()
$ref_file $query_file > $dir/$2.txt.stdout
}

eps="0"
errRate=0
call_stellar $errRate $eps

eps="e-1"
errRate=0.1
call_stellar $errRate $eps
Expand All @@ -58,10 +62,6 @@ eps="25e-3"
errRate=0.025
call_stellar $errRate $eps

eps="e-4"
errRate=0.0001
call_stellar $errRate $eps

# ============================================================
# Varying minimal lengths
# ============================================================
Expand Down
4 changes: 2 additions & 2 deletions test/cli/stellar_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ TEST_P(search_small_error, edge_case)

cli_test_result const result = execute_app("stellar",
data("ref.fasta"),
data("query_e0.0009.fasta"),
data("query_e0.001.fasta"),
"-o small_er_out.gff",
"--epsilon ", err_str,
"--minLength 1000",
Expand Down Expand Up @@ -153,7 +153,7 @@ TEST_P(search_small_error, edge_case)

INSTANTIATE_TEST_SUITE_P(edge_case_suite,
search_small_error,
testing::Values(0.0f, 0.0009f),
testing::Values(0.0f, 0.0009f, 0.001f),
[] (testing::TestParamInfo<search_small_error::ParamType> const & info)
{
std::string name = "er_edge_case_" + std::to_string((int) std::round(info.param * 10000));
Expand Down
10 changes: 7 additions & 3 deletions test/data/create_output.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,11 @@ done
# ZERO ERRORS EDGE CASE
############################################################
minLen=1000
simulationErrRate=0.0009
searchErrRate=0
${STELLAR} -e $searchErrRate -l $minLen -v -o er_edge_case/e$searchErrRate.gff ref.fasta query_e$simulationErrRate.fasta > er_edge_case/e$searchErrRate.stdout
simulationErrRate=0.001
${STELLAR} -e $simulationErrRate -l $minLen -v -o er_edge_case/e$simulationErrRate.gff ref.fasta query_e$simulationErrRate.fasta > er_edge_case/e$simulationErrRate.stdout

for searchErrRate in 0 0.0009
do
${STELLAR} -e $searchErrRate -l $minLen -v -o er_edge_case/e$searchErrRate.gff ref.fasta query_e$simulationErrRate.fasta > er_edge_case/e$searchErrRate.stdout
done

15 changes: 12 additions & 3 deletions test/data/datasources.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ declare_datasource (FILE multi_seq_ref.fasta
declare_datasource (FILE query_e0.0009.fasta
URL ${CMAKE_SOURCE_DIR}/test/data/query_e0.0009.fasta
URL_HASH SHA256=1ad1a3d42c801b6518d23ff193b0002da316a290eedde84c96a2ea27693fdc51)
declare_datasource (FILE query_e0.001.fasta
URL ${CMAKE_SOURCE_DIR}/test/data/query_e0.001.fasta
URL_HASH SHA256=5aa1989d674d0c0f23b44d001b22b167117b35ea8d5588270c76809d40e3386e)
declare_datasource (FILE query_e0.05.fasta
URL ${CMAKE_SOURCE_DIR}/test/data/query_e0.05.fasta
URL_HASH SHA256=5c055b271113473970757db3e313353906b387a46424a88bc137e7948d10066d)
Expand Down Expand Up @@ -115,13 +118,19 @@ declare_datasource (FILE segment_all_e0.stdout
URL_HASH SHA256=b9755a5a95e7c7ae98e0b553dfb12b0a2027a985626c1484272d2c572d5fe9bc)
declare_datasource (FILE er_edge_case_e0.0009.gff
URL ${CMAKE_SOURCE_DIR}/test/data/er_edge_case/e0.0009.gff
URL_HASH SHA256=ce7648a8aa6496624c491ee899ce1ff8e00abe244383352d1e8425c36c4592fb)
URL_HASH SHA256=e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855)
declare_datasource (FILE er_edge_case_e0.0009.stdout
URL ${CMAKE_SOURCE_DIR}/test/data/er_edge_case/e0.0009.stdout
URL_HASH SHA256=5a79eb40971d32c93a7c71a90a9ce7bed24992b261b61f38112d15490a5d6b3d)
URL_HASH SHA256=ff67d13e22be4cf67a07e9fa264a04562c557e1dcfba7ac91b03d27e7affa70c)
declare_datasource (FILE er_edge_case_e0.001.gff
URL ${CMAKE_SOURCE_DIR}/test/data/er_edge_case/e0.001.gff
URL_HASH SHA256=3c91be74c5473c098e507a6974755cba5f57f3266dc6c507e9c49fd92d9c11e5)
declare_datasource (FILE er_edge_case_e0.001.stdout
URL ${CMAKE_SOURCE_DIR}/test/data/er_edge_case/e0.001.stdout
URL_HASH SHA256=ff38ce270a2aeeb86bd6c9a31c2a341a9280ef8483d5c502b355c8250b6d5007)
declare_datasource (FILE er_edge_case_e0.gff
URL ${CMAKE_SOURCE_DIR}/test/data/er_edge_case/e0.gff
URL_HASH SHA256=e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855)
declare_datasource (FILE er_edge_case_e0.stdout
URL ${CMAKE_SOURCE_DIR}/test/data/er_edge_case/e0.stdout
URL_HASH SHA256=1a29b22b43675a844353931154ce2f88c195fc04ab073fdc83be16fa19ce5766)
URL_HASH SHA256=92030d9313d5e83dc6d3d237db0d9773038ba15660b1706e816e1a2765d02d4c)
1 change: 0 additions & 1 deletion test/data/er_edge_case/e0.0009.gff
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@
1 Stellar eps-matches 544 1693 99.913 + . 1;seq2Range=1807,2956;eValue=0;cigar=1150M;mutations=865A
17 changes: 6 additions & 11 deletions test/data/er_edge_case/e0.0009.stdout
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
I/O options:
database file : ref.fasta
query file : query_e0.0009.fasta
query file : query_e0.001.fasta
alphabet : dna5
output file : er_edge_case/e0.0009.gff
output format : gff

User specified parameters:
minimal match length : 1000
maximal error rate (epsilon) : 0.0009 = (9/10000)
maximal error rate (epsilon) : 0 = (0/1)
maximal x-drop : 5
search forward strand : yes
search reverse complement : yes
Expand All @@ -18,11 +18,11 @@ User specified parameters:

Calculated parameters:
k-mer length : 32
s^min : 556
s^min : 1000
threshold : 969
distance cut : 1032
distance cut : 1000
delta : 16
overlap : 1
overlap : 0

Loaded 1 query sequence.
Loaded 1 database sequence.
Expand All @@ -34,11 +34,6 @@ Constructing index...

Aligning all query sequences to database sequence...
1
# SWIFT hits : 1
Longest hit : 1150
Avg hit length : 1150
1, complement

# Eps-matches : 1
Longest eps-match : 1150
Avg match length : 1150
# Eps-matches : 0
1 change: 1 addition & 0 deletions test/data/er_edge_case/e0.001.gff
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1 Stellar eps-matches 639 1640 99.9001 + . 1;seq2Range=1806,2807;eValue=0;cigar=1002M;mutations=754A
44 changes: 44 additions & 0 deletions test/data/er_edge_case/e0.001.stdout
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
I/O options:
database file : ref.fasta
query file : query_e0.001.fasta
alphabet : dna5
output file : er_edge_case/e0.001.gff
output format : gff

User specified parameters:
minimal match length : 1000
maximal error rate (epsilon) : 0.001 = (1/1000)
maximal x-drop : 5
search forward strand : yes
search reverse complement : yes

verification strategy : exact
maximal number of matches : 50
duplicate removal every : 500

Calculated parameters:
k-mer length : 32
s^min : 500
threshold : 937
distance cut : 1000
delta : 16
overlap : 1

Loaded 1 query sequence.
Loaded 1 database sequence.

All matches resulting from your search have an E-value of:
0 or smaller (match score = 1, error penalty = -2)

Constructing index...

Aligning all query sequences to database sequence...
1
# SWIFT hits : 1
Longest hit : 1002
Avg hit length : 1002
1, complement

# Eps-matches : 1
Longest eps-match : 1002
Avg match length : 1002
5 changes: 1 addition & 4 deletions test/data/er_edge_case/e0.stdout
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
I/O options:
database file : ref.fasta
query file : query_e0.0009.fasta
query file : query_e0.001.fasta
alphabet : dna5
output file : er_edge_case/e0.gff
output format : gff
Expand Down Expand Up @@ -34,9 +34,6 @@ Constructing index...

Aligning all query sequences to database sequence...
1
# SWIFT hits : 1
Longest hit : 1150
Avg hit length : 1150
1, complement

# Eps-matches : 0
34 changes: 16 additions & 18 deletions test/data/query_e0.0009.fasta → test/data/query_e0.001.fasta
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,19 @@ TCAGAAAAATGGGAGTTCGGTGGCAACAAAGATTTGCACTACATTAGGAAGCTCTGCGCGGAACTGTTTTCTTAGGGAGA
TGTCGCATGTGACAACCTTCTCAGCAGCAGCTGCCGTAGAGGAGCGGAATTAACACAGTTCGTCATTGTATACCAGCGTG
TGTAGGGCGTGGGTTCTCCCTAGTGTGGCACAAAAGGACCAACCGCCTCGCCGAAATTCCATACAAGGGGCTTCGCTGCA
TTACGGTAAGCTCGAGACCCAGACACTGGAAACTACCGCAGCCTTATCGTGCTACAGCGTGATCACTAGTTAACAGGTGG
TGAGGTACACCGGAAAAGCGCATCTAGGAAGCAGGTACGGAACTCGTACACCTCTTATGCCTGTTGTGGGAGGCTCGGTC
TTAAGCAGCGCGCGAGCTGTGATCCAGGCTACCACGGACATAGTGTATGGAAAGTGATCCAGAGTAGACCCGCGGGGGCC
TGACCTAACCTATATAAGTTGTATCGTGGCTATGAGGGTAGTCGCCGGAGAAAACGTATGCTTACTGATTTTTAAGTCGG
CGTGGCGCCGAAGCCGGATCGGTTGTAAGCTAGCCGGGCCTAGGGGTTCACCGTAACGGATTAGTCAAATTAAAATCCAG
CGATGACTTCCTGATAGAACTCAAGTCGTGACCCCTCCGCTGCGGGCCTACATCTGTTTTCGCAGGCGTGGTTGTTTACC
AGGTATGGTGCTCATCTCTATTAGTCACGGGCAGCATGGTGTCACCGAACCGCGCGTCTCCTAATATCTGGTCTACCGAT
TTAGCCCCGGCAAATAACTTTGGATTGTGGTTGGAGAGTGCCAGAACTGACGGGCGCTGCCGTGGGGCTCCTAACTAAAA
ACGCCACGGACCTGGCTAACATTCGTTGTTGACTATAACATTTGAGGGCGCTTCGGATTCCCCATACTGCCAGAGTATTA
TGTGGGTGGTAAACATAGATTCTATATAGTCAACGACATACACTCATTATTATGCAATTGCGGCATCTCAACTATGTCTT
AATTAGTTTTCCCGGATGGCGAAAACGATCTTACAGGAGAAGCGCTACGCTGGTTTGGAAGACACTTAGTATCCTAGTAG
TATGGGCTTGTGCGGGTCAACGGGCGCCGTCAAAGCGCACACATATCTGGTGGGGACGGTGTCCCCTATCGGCGCACACG
GGAGCCTAGGCAATCCCGACGTCCCGCGTGATGGATAAAGAAAAGGCCGACTGCGCGAAATGAAGAATCGTCAATTTATT
GTTGGCAGCTTTACAGTTCTTCTCCGCGGGCGGGCAGAGTGGTTTTAAGACCGGGGTCTATGCACAAGGGTGGAGCTTGA
TTACTATCATCGAAGGGTGACTTGCCGTGTTACAATCGACAAGCGAACGGCCGACTGCTTCGGCCCGCTGAGCGGACAAC
CTCCGATGTACCTACTCCATAGTAGACTTGGAAAACCCAGTCTTATGGCGCGGGGGAATCAAATGTGCCGATTCTTTGGC
TCGGGTCGATAATCACTATGCTCGTTAATCGTGCCATCCACTGCCCGCTTTAATCCCAACCGGCGTTTCTGCTACATGCT
AGTCACGAAGGGGCGCCAGGGGATAGGCGAAAGCACTGGCTACGTCATCCTTGCAATAAAAACCGTTGCCTGGCACGGTC
ATCAAGTATCTCGCGTCGCCCAGTGGCCCC
TGAGGTACACCGGAAAAGCGCATCTAGGAAGCAGGTACGGAACTCGAGTAGACCCGCGGGGGCCTGACCTAACCTATATA
AGTTGTATCGTGGCTATGAGGGTAGTCGCCGGAGAAAACGTATGCTTACTGATTTTTAAGTCGGCGTGGCGCCGAAGCCG
GATCGGTTGTAAGCTAGCCGGGCCTAGGGGTTCACCGTAACGGATTAGTCAAATTAAAATCCAGCGATGACTTCCTGATA
GAACTCAAGTCGTGACCCCTCCGCTGCGGGCCTACATCTGTTTTCGCAGGCGTGGTTGTTTACCAGGTATGGTGCTCATC
TCTATTAGTCACGGGCAGCATGGTGTCACCGAACCGCGCGTCTCCTAATATCTGGTCTACCGATTTAGCCCCGGCAAATA
ACTTTGGATTGTGGTTGGAGAGTGCCAGAACTGACGGGCGCTGCCGTGGGGCTCCTAACTAAAAACGCCACGGACCTGGC
TAACATTCGTTGTTGACTATAACATTTGAGGGCGCTTCGGATTCCCCATACTGCCAGAGTATTATGTGGGTGGTAAACAT
AGATTCTATATAGTCAACGACATACACTCATTATTATGCAATTGCGGCATCTCAACTATGTCTTAATTAGTTTTCCCGGA
TGGCGAAAACGATCTTACAGGAGAAGCGCTACGCTGGTTTGGAAGACACTTAGTATCCTAGTAGTATGGGCTTGTGCGGG
TCAACGGGCGCCGTCAAAGCGCACACATATCTGGTGGGGACGGTGTCCCCTATCGGCGCACACGGGAGCCTAGGCAATAC
CGACGTCCCGCGTGCTGGATAAAGAAAAGGCCGACTGCGCGAAATGAAGAATCGTCAATTTATTGTTGGCAGCTTTACAG
TTCTTCTCCGCGGGCGGGCAGAGTGGTTTTAAGACCGGGGTCTATGCACAAGGGTGGAGCTTGATTACTATCATCGAAGG
GTGACTTGCCGTGTTACAATCGACAAGCGAACGGCCGACTGCTTCGGCCCGCTGAGCGGACAACCTCCGATGTACCTACT
CCATAGTGGCTCGGGTCGATAATCACTATGCTCGTTAATCGTGCCATCCACTGCCCGCTTTAATCCCAACCGGCGTTTCT
GCTACATGCTAGTCACGAAGGGGCGCCAGGGGATAGGCGAAAGCACTGGCTACGTCATCCTTGCAATAAAAACCGTTGCC
TGGCACGGTCATCAAGTATCTCGCGTCGCCCAGTGGCCCC
4 changes: 2 additions & 2 deletions test/data/simulate_input.sh
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ do
done

# Low error rate edge case
error_rate=0.0009
error_rate=0.001
match_count=1
match_length=1150
match_length=1000
echo "Generating $match_count reads of length $match_length with error rate $error_rate"
generate_local_matches \
--matches-out matches_e$error_rate.fasta \
Expand Down

0 comments on commit 147b070

Please sign in to comment.