diff --git a/data/2024-04-12_leung/1/adapters.fasta b/data/2024-04-12_leung/1/adapters.fasta new file mode 100644 index 0000000..c993fc0 --- /dev/null +++ b/data/2024-04-12_leung/1/adapters.fasta @@ -0,0 +1,41 @@ +>0 +TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG +>1 +GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG +>2 +CTGTCTCTTATACACATCTCCGAGCCCACGAGAC +>3 +unspecified +>4 +GATCGGAAGAGCGGTTCAGCAGGAATGCCGAG +>5 +AGATCGGAAGAGCACACGTCTGAACTCCAGTCA +>6 +GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT +>7 +CTGTCTCTTATACACATCTGACGCTGCCGACGA +>8 +CAAGCAGAAGACGGCATACGAGATCGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATC +T +>9 +TGACTGGAGTTCAGACGTGTGCTCTTCCGATCT +>10 +CGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT +>11 +ACACTCTTTCCCTACACGACGCTCTTCCGATCT +>12 +AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT +>13 +GTGACTGGAGTTCAGACGTGTGCTCTTCCGATC +>14 +GATCGGAAGAGCACACGTCTGAACTCCAGTCAC +>15 +CAAGCAGAAGACGGCATACGAGCTCTTCCGATCT +>16 +CAAGCAGAAGACGGCATACGAGAT +>17 +AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT +>18 +heifigepsna +>19 +GATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG diff --git a/data/2024-04-12_leung/1/bracken_counts.tsv b/data/2024-04-12_leung/1/bracken_counts.tsv new file mode 100644 index 0000000..a091a15 --- /dev/null +++ b/data/2024-04-12_leung/1/bracken_counts.tsv @@ -0,0 +1,401 @@ +name taxonomy_id taxonomy_lvl kraken_assigned_reads added_reads new_est_reads fraction_total_reads sample +Eukaryota 2759 D 628649 846 629495 0.50701 SRR10002747 +Bacteria 2 D 610755 127 610882 0.49202 SRR10002747 +Archaea 2157 D 874 0 874 7e-4 SRR10002747 +Viruses 10239 D 312 12 324 2.6e-4 SRR10002747 +Bacteria 2 D 645143 227 645370 0.67442 SRR10002760 +Eukaryota 2759 D 307163 689 307852 0.32171 SRR10002760 +Archaea 2157 D 3325 1 3326 0.00348 SRR10002760 +Viruses 10239 D 368 7 375 3.9e-4 SRR10002760 +Eukaryota 2759 D 943543 950 944493 0.54109 SRR10002752 +Bacteria 2 D 797023 197 797220 0.45672 SRR10002752 +Archaea 2157 D 3386 0 3386 0.00194 SRR10002752 +Viruses 10239 D 443 7 450 2.6e-4 SRR10002752 +Bacteria 2 D 680400 300 680700 0.61376 SRR10002699 +Eukaryota 2759 D 426007 737 426744 0.38478 SRR10002699 +Archaea 2157 D 881 0 881 7.9e-4 SRR10002699 +Viruses 10239 D 726 16 742 6.7e-4 SRR10002699 +Bacteria 2 D 697509 209 697718 0.60802 SRR10002726 +Eukaryota 2759 D 446733 741 447474 0.38995 SRR10002726 +Archaea 2157 D 2010 0 2010 0.00175 SRR10002726 +Viruses 10239 D 305 18 323 2.8e-4 SRR10002726 +Bacteria 2 D 1547817 600 1548417 0.81416 SRR10002737 +Eukaryota 2759 D 348050 1158 349208 0.18361 SRR10002737 +Archaea 2157 D 3818 2 3820 0.00201 SRR10002737 +Viruses 10239 D 413 8 421 2.2e-4 SRR10002737 +Bacteria 2 D 423832 113 423945 0.5327 SRR10002711 +Eukaryota 2759 D 370273 429 370702 0.4658 SRR10002711 +Archaea 2157 D 1018 0 1018 0.00128 SRR10002711 +Viruses 10239 D 164 6 170 2.1e-4 SRR10002711 +Eukaryota 2759 D 881551 782 882333 0.64056 SRR10002704 +Bacteria 2 D 494069 105 494174 0.35876 SRR10002704 +Archaea 2157 D 695 0 695 5e-4 SRR10002704 +Viruses 10239 D 230 1 231 1.7e-4 SRR10002704 +Bacteria 2 D 1247006 487 1247493 0.6594 SRR10002731 +Eukaryota 2759 D 640257 1351 641608 0.33914 SRR10002731 +Archaea 2157 D 2330 1 2331 0.00123 SRR10002731 +Viruses 10239 D 347 77 424 2.2e-4 SRR10002731 +Eukaryota 2759 D 799507 727 800234 0.55472 SRR10002740 +Bacteria 2 D 639247 193 639440 0.44326 SRR10002740 +Archaea 2157 D 1887 0 1887 0.00131 SRR10002740 +Viruses 10239 D 1017 17 1034 7.2e-4 SRR10002740 +Eukaryota 2759 D 1386259 1166 1387425 0.62982 SRR10002703 +Bacteria 2 D 813892 221 814113 0.36956 SRR10002703 +Archaea 2157 D 898 0 898 4.1e-4 SRR10002703 +Viruses 10239 D 454 9 463 2.1e-4 SRR10002703 +Eukaryota 2759 D 1208737 628 1209365 0.6341 SRR10002698 +Bacteria 2 D 696363 297 696660 0.36527 SRR10002698 +Archaea 2157 D 416 0 416 2.2e-4 SRR10002698 +Viruses 10239 D 665 114 779 4.1e-4 SRR10002698 +Bacteria 2 D 839069 119 839188 0.50766 SRR10002684 +Eukaryota 2759 D 811732 619 812351 0.49142 SRR10002684 +Archaea 2157 D 696 0 696 4.2e-4 SRR10002684 +Viruses 10239 D 723 103 826 5e-4 SRR10002684 +Eukaryota 2759 D 1958028 1167 1959195 0.58634 SRR10002772 +Bacteria 2 D 1379465 347 1379812 0.41295 SRR10002772 +Archaea 2157 D 1463 0 1463 4.4e-4 SRR10002772 +Viruses 10239 D 891 10 901 2.7e-4 SRR10002772 +Eukaryota 2759 D 783160 551 783711 0.52419 SRR10002692 +Bacteria 2 D 710035 376 710411 0.47517 SRR10002692 +Archaea 2157 D 452 0 452 3e-4 SRR10002692 +Viruses 10239 D 449 56 505 3.4e-4 SRR10002692 +Eukaryota 2759 D 1099460 868 1100328 0.6034 SRR10002748 +Bacteria 2 D 721681 241 721922 0.39589 SRR10002748 +Archaea 2157 D 704 0 704 3.9e-4 SRR10002748 +Viruses 10239 D 578 23 601 3.3e-4 SRR10002748 +Bacteria 2 D 750960 357 751317 0.51188 SRR10002751 +Eukaryota 2759 D 713349 810 714159 0.48656 SRR10002751 +Archaea 2157 D 1519 0 1519 0.00103 SRR10002751 +Viruses 10239 D 731 44 775 5.3e-4 SRR10002751 +Eukaryota 2759 D 930747 1068 931815 0.54011 SRR10002761 +Bacteria 2 D 791078 307 791385 0.45871 SRR10002761 +Archaea 2157 D 1668 0 1668 9.7e-4 SRR10002761 +Viruses 10239 D 339 17 356 2.1e-4 SRR10002761 +Bacteria 2 D 1137395 423 1137818 0.67629 SRR10002764 +Eukaryota 2759 D 540425 907 541332 0.32176 SRR10002764 +Archaea 2157 D 2728 1 2729 0.00162 SRR10002764 +Viruses 10239 D 534 18 552 3.3e-4 SRR10002764 +Bacteria 2 D 1351953 277 1352230 0.54357 SRR10002725 +Eukaryota 2759 D 1130802 1103 1131905 0.455 SRR10002725 +Archaea 2157 D 2953 0 2953 0.00119 SRR10002725 +Viruses 10239 D 597 5 602 2.4e-4 SRR10002725 +Bacteria 2 D 688960 144 689104 0.53483 SRR10002705 +Eukaryota 2759 D 596790 620 597410 0.46366 SRR10002705 +Archaea 2157 D 1696 0 1696 0.00132 SRR10002705 +Viruses 10239 D 236 10 246 1.9e-4 SRR10002705 +Eukaryota 2759 D 2252516 1000 2253516 0.78035 SRR10002768 +Bacteria 2 D 633370 99 633469 0.21936 SRR10002768 +Archaea 2157 D 426 0 426 1.5e-4 SRR10002768 +Viruses 10239 D 404 0 404 1.4e-4 SRR10002768 +Bacteria 2 D 1095202 207 1095409 0.5382 SRR10002681 +Eukaryota 2759 D 932314 1311 933625 0.45871 SRR10002681 +Archaea 2157 D 5572 2 5574 0.00274 SRR10002681 +Viruses 10239 D 724 3 727 3.6e-4 SRR10002681 +Bacteria 2 D 513353 277 513630 0.84433 SRR10002694 +Eukaryota 2759 D 92484 630 93114 0.15307 SRR10002694 +Archaea 2157 D 1398 1 1399 0.0023 SRR10002694 +Viruses 10239 D 177 5 182 3e-4 SRR10002694 +Eukaryota 2759 D 1512741 1050 1513791 0.5684 SRR10002773 +Bacteria 2 D 1146428 368 1146796 0.4306 SRR10002773 +Archaea 2157 D 1233 0 1233 4.6e-4 SRR10002773 +Viruses 10239 D 1400 6 1406 5.3e-4 SRR10002773 +Bacteria 2 D 799213 355 799568 0.75145 SRR10002759 +Eukaryota 2759 D 262284 860 263144 0.24731 SRR10002759 +Archaea 2157 D 986 0 986 0.00093000000000000005 SRR10002759 +Viruses 10239 D 317 13 330 3.1e-4 SRR10002759 +Bacteria 2 D 448278 202 448480 0.64225 SRR10002750 +Eukaryota 2759 D 246535 570 247105 0.35387 SRR10002750 +Archaea 2157 D 2497 1 2498 0.00358 SRR10002750 +Viruses 10239 D 187 23 210 3e-4 SRR10002750 +Eukaryota 2759 D 2160896 1088 2161984 0.62625 SRR10002771 +Bacteria 2 D 1286727 353 1287080 0.37282 SRR10002771 +Archaea 2157 D 892 0 892 2.6e-4 SRR10002771 +Viruses 10239 D 2304 32 2336 6.8e-4 SRR10002771 +Bacteria 2 D 491439 184 491623 0.60764 SRR10002707 +Eukaryota 2759 D 315388 570 315958 0.39052 SRR10002707 +Archaea 2157 D 1337 0 1337 0.00165 SRR10002707 +Viruses 10239 D 129 25 154 1.9e-4 SRR10002707 +Bacteria 2 D 440784 163 440947 0.81836 SRR10002702 +Eukaryota 2759 D 96321 628 96949 0.17993 SRR10002702 +Archaea 2157 D 780 1 781 0.00145 SRR10002702 +Viruses 10239 D 141 1 142 2.6e-4 SRR10002702 +Eukaryota 2759 D 1696779 931 1697710 0.60946 SRR10002766 +Bacteria 2 D 1085917 260 1086177 0.38993 SRR10002766 +Archaea 2157 D 921 0 921 3.3e-4 SRR10002766 +Viruses 10239 D 775 7 782 2.8e-4 SRR10002766 +Eukaryota 2759 D 1547872 1541 1549413 0.57446 SRR10002715 +Bacteria 2 D 1145067 395 1145462 0.42469 SRR10002715 +Archaea 2157 D 1515 0 1515 5.6e-4 SRR10002715 +Viruses 10239 D 694 67 761 2.8e-4 SRR10002715 +Eukaryota 2759 D 900876 1151 902027 0.52985 SRR10002720 +Bacteria 2 D 798926 258 799184 0.46944 SRR10002720 +Archaea 2157 D 835 0 835 4.9e-4 SRR10002720 +Viruses 10239 D 367 3 370 2.2e-4 SRR10002720 +Bacteria 2 D 743309 286 743595 0.63308 SRR10002719 +Eukaryota 2759 D 429089 640 429729 0.36586 SRR10002719 +Archaea 2157 D 931 0 931 7.9e-4 SRR10002719 +Viruses 10239 D 290 17 307 2.6e-4 SRR10002719 +Eukaryota 2759 D 1727108 896 1728004 0.6328 SRR10002765 +Bacteria 2 D 1000733 217 1000950 0.36655 SRR10002765 +Archaea 2157 D 1012 0 1012 3.7e-4 SRR10002765 +Viruses 10239 D 778 2 780 2.9e-4 SRR10002765 +Bacteria 2 D 1260090 449 1260539 0.50979 SRR10002714 +Eukaryota 2759 D 1206857 1439 1208296 0.48866 SRR10002714 +Archaea 2157 D 2954 0 2954 0.00119 SRR10002714 +Viruses 10239 D 873 23 896 3.6e-4 SRR10002714 +Bacteria 2 D 460375 205 460580 0.54277 SRR10002723 +Eukaryota 2759 D 384214 724 384938 0.45363 SRR10002723 +Archaea 2157 D 2812 1 2813 0.00331 SRR10002723 +Viruses 10239 D 225 16 241 2.8e-4 SRR10002723 +Eukaryota 2759 D 1546306 837 1547143 0.56696 SRR10002727 +Bacteria 2 D 1178845 325 1179170 0.43212 SRR10002727 +Archaea 2157 D 1809 0 1809 6.6e-4 SRR10002727 +Viruses 10239 D 686 10 696 2.6e-4 SRR10002727 +Bacteria 2 D 1439979 416 1440395 0.54023 SRR10002733 +Eukaryota 2759 D 1222546 1010 1223556 0.45891 SRR10002733 +Archaea 2157 D 1404 0 1404 5.3e-4 SRR10002733 +Viruses 10239 D 874 10 884 3.3e-4 SRR10002733 +Bacteria 2 D 1998972 370 1999342 0.59273 SRR10002678 +Eukaryota 2759 D 1369980 1177 1371157 0.40649 SRR10002678 +Archaea 2157 D 1339 0 1339 4e-4 SRR10002678 +Viruses 10239 D 1279 5 1284 3.8e-4 SRR10002678 +Eukaryota 2759 D 3039642 1222 3040864 0.85061 SRR10002770 +Bacteria 2 D 533355 129 533484 0.14923 SRR10002770 +Archaea 2157 D 296 0 296 8e-5 SRR10002770 +Viruses 10239 D 267 1 268 7e-5 SRR10002770 +Bacteria 2 D 553135 138 553273 0.58918 SRR10002712 +Eukaryota 2759 D 383335 590 383925 0.40884 SRR10002712 +Archaea 2157 D 1638 0 1638 0.00174 SRR10002712 +Viruses 10239 D 210 2 212 2.3e-4 SRR10002712 +Bacteria 2 D 845781 8467 854248 0.98487 SRR10002688 +Eukaryota 2759 D 12921 11 12932 0.01491 SRR10002688 +Archaea 2157 D 13 0 13 1e-5 SRR10002688 +Viruses 10239 D 13 162 175 2e-4 SRR10002688 +Bacteria 2 D 943795 288 944083 0.7035 SRR10002717 +Eukaryota 2759 D 395680 716 396396 0.29538 SRR10002717 +Archaea 2157 D 1137 0 1137 8.5e-4 SRR10002717 +Viruses 10239 D 341 14 355 2.6e-4 SRR10002717 +Bacteria 2 D 912720 423 913143 0.61133 SRR10002722 +Eukaryota 2759 D 577483 964 578447 0.38725 SRR10002722 +Archaea 2157 D 1720 0 1720 0.00115 SRR10002722 +Viruses 10239 D 382 17 399 2.7e-4 SRR10002722 +Bacteria 2 D 894613 340 894953 0.53304 SRR10002701 +Eukaryota 2759 D 781289 957 782246 0.46591 SRR10002701 +Archaea 2157 D 929 0 929 5.5e-4 SRR10002701 +Viruses 10239 D 812 24 836 5e-4 SRR10002701 +Bacteria 2 D 596387 172 596559 0.67465 SRR10002710 +Eukaryota 2759 D 283481 649 284130 0.32132 SRR10002710 +Archaea 2157 D 3276 1 3277 0.00371 SRR10002710 +Viruses 10239 D 277 6 283 3.2e-4 SRR10002710 +Eukaryota 2759 D 1078557 680 1079237 0.52948 SRR10002683 +Bacteria 2 D 956848 208 957056 0.46953 SRR10002683 +Archaea 2157 D 1051 0 1051 5.2e-4 SRR10002683 +Viruses 10239 D 958 3 961 4.7e-4 SRR10002683 +Eukaryota 2759 D 2022033 663 2022696 0.86846 SRR10002769 +Bacteria 2 D 305927 35 305962 0.13137 SRR10002769 +Archaea 2157 D 217 0 217 9e-5 SRR10002769 +Viruses 10239 D 194 2 196 8e-5 SRR10002769 +Bacteria 2 D 465896 209 466105 0.53287 SRR10002741 +Eukaryota 2759 D 404343 670 405013 0.46302 SRR10002741 +Archaea 2157 D 3381 1 3382 0.00387 SRR10002741 +Viruses 10239 D 195 15 210 2.4e-4 SRR10002741 +Eukaryota 2759 D 2715830 1000 2716830 0.69138 SRR10002687 +Bacteria 2 D 1210799 182 1210981 0.30817 SRR10002687 +Archaea 2157 D 1123 0 1123 2.9e-4 SRR10002687 +Viruses 10239 D 654 8 662 1.7e-4 SRR10002687 +Eukaryota 2759 D 1602111 843 1602954 0.63754 SRR10002680 +Bacteria 2 D 909542 144 909686 0.36181 SRR10002680 +Archaea 2157 D 993 0 993 3.9e-4 SRR10002680 +Viruses 10239 D 657 1 658 2.6e-4 SRR10002680 +Eukaryota 2759 D 1269117 717 1269834 0.61285 SRR10002690 +Bacteria 2 D 799876 216 800092 0.38614 SRR10002690 +Archaea 2157 D 1592 0 1592 7.7e-4 SRR10002690 +Viruses 10239 D 500 10 510 2.5e-4 SRR10002690 +Bacteria 2 D 879133 288 879421 0.5014 SRR10002718 +Eukaryota 2759 D 868586 1119 869705 0.49586 SRR10002718 +Archaea 2157 D 4211 1 4212 0.0024 SRR10002718 +Viruses 10239 D 568 9 577 3.3e-4 SRR10002718 +Bacteria 2 D 1397470 376 1397846 0.50116 SRR10002732 +Eukaryota 2759 D 1386870 1008 1387878 0.49759 SRR10002732 +Archaea 2157 D 2078 0 2078 7.5e-4 SRR10002732 +Viruses 10239 D 1376 14 1390 5e-4 SRR10002732 +Bacteria 2 D 1943132 373 1943505 0.62719 SRR10002679 +Eukaryota 2759 D 1148169 1447 1149616 0.37099 SRR10002679 +Archaea 2157 D 4867 1 4868 0.00157 SRR10002679 +Viruses 10239 D 757 8 765 2.5e-4 SRR10002679 +Eukaryota 2759 D 640496 552 641048 0.59165 SRR10002696 +Bacteria 2 D 441169 163 441332 0.40733 SRR10002696 +Archaea 2157 D 739 0 739 6.8e-4 SRR10002696 +Viruses 10239 D 360 4 364 3.4e-4 SRR10002696 +Bacteria 2 D 622050 206 622256 0.53884 SRR10002763 +Eukaryota 2759 D 530554 658 531212 0.46 SRR10002763 +Archaea 2157 D 932 0 932 8.1e-4 SRR10002763 +Viruses 10239 D 403 5 408 3.5e-4 SRR10002763 +Bacteria 2 D 757362 226 757588 0.62781 SRR10002708 +Eukaryota 2759 D 446586 933 447519 0.37085 SRR10002708 +Archaea 2157 D 1353 0 1353 0.00112 SRR10002708 +Viruses 10239 D 240 21 261 2.2e-4 SRR10002708 +Bacteria 2 D 1184415 272 1184687 0.63928 SRR10002734 +Eukaryota 2759 D 660390 1130 661520 0.35697 SRR10002734 +Archaea 2157 D 6056 2 6058 0.00327 SRR10002734 +Viruses 10239 D 899 4 903 4.9e-4 SRR10002734 +Eukaryota 2759 D 1695354 881 1696235 0.68165 SRR10002685 +Bacteria 2 D 790626 198 790824 0.3178 SRR10002685 +Archaea 2157 D 719 0 719 2.9e-4 SRR10002685 +Viruses 10239 D 639 1 640 2.6e-4 SRR10002685 +Eukaryota 2759 D 778573 840 779413 0.55794 SRR10002738 +Bacteria 2 D 615020 167 615187 0.44038 SRR10002738 +Archaea 2157 D 2064 0 2064 0.00148 SRR10002738 +Viruses 10239 D 265 18 283 2e-4 SRR10002738 +Eukaryota 2759 D 1223873 797 1224670 0.51813 SRR10002686 +Bacteria 2 D 1135794 241 1136035 0.48063 SRR10002686 +Archaea 2157 D 2104 0 2104 8.9e-4 SRR10002686 +Viruses 10239 D 815 10 825 3.5e-4 SRR10002686 +Bacteria 2 D 955167 326 955493 0.56761 SRR10002693 +Eukaryota 2759 D 725641 515 726156 0.43137 SRR10002693 +Archaea 2157 D 467 0 467 2.8e-4 SRR10002693 +Viruses 10239 D 1212 24 1236 7.3e-4 SRR10002693 +Bacteria 2 D 565845 457 566302 0.91245 SRR10002689 +Eukaryota 2759 D 51849 572 52421 0.08446 SRR10002689 +Archaea 2157 D 1707 3 1710 0.00276 SRR10002689 +Viruses 10239 D 204 3 207 3.3e-4 SRR10002689 +Bacteria 2 D 1078533 258 1078791 0.53988 SRR10002774 +Eukaryota 2759 D 915958 1229 917187 0.45901 SRR10002774 +Archaea 2157 D 1279 0 1279 6.4e-4 SRR10002774 +Viruses 10239 D 933 6 939 4.7e-4 SRR10002774 +Eukaryota 2759 D 751215 809 752024 0.5451 SRR10002739 +Bacteria 2 D 626008 159 626167 0.45388 SRR10002739 +Archaea 2157 D 1005 0 1005 7.3e-4 SRR10002739 +Viruses 10239 D 394 9 403 2.9e-4 SRR10002739 +Bacteria 2 D 1432736 314 1433050 0.5171 SRR10002735 +Eukaryota 2759 D 1335005 915 1335920 0.48205 SRR10002735 +Archaea 2157 D 1327 0 1327 4.8e-4 SRR10002735 +Viruses 10239 D 1033 7 1040 3.8e-4 SRR10002735 +Eukaryota 2759 D 1071557 1232 1072789 0.50359 SRR10002716 +Bacteria 2 D 1055282 371 1055653 0.49554 SRR10002716 +Archaea 2157 D 1211 0 1211 5.7e-4 SRR10002716 +Viruses 10239 D 625 15 640 3e-4 SRR10002716 +Bacteria 2 D 712912 236 713148 0.59343 SRR10002713 +Eukaryota 2759 D 483508 835 484343 0.40304 SRR10002713 +Archaea 2157 D 3876 1 3877 0.00323 SRR10002713 +Viruses 10239 D 355 12 367 3.1e-4 SRR10002713 +Eukaryota 2759 D 1342721 858 1343579 0.64985 SRR10002754 +Bacteria 2 D 721798 169 721967 0.3492 SRR10002754 +Archaea 2157 D 1463 0 1463 7.1e-4 SRR10002754 +Viruses 10239 D 481 23 504 2.4e-4 SRR10002754 +Bacteria 2 D 851577 349 851926 0.60275 SRR10002762 +Eukaryota 2759 D 557790 893 558683 0.39527 SRR10002762 +Archaea 2157 D 2395 1 2396 0.0017 SRR10002762 +Viruses 10239 D 387 14 401 2.8e-4 SRR10002762 +Bacteria 2 D 1173856 259 1174115 0.52212 SRR10002729 +Eukaryota 2759 D 1072037 804 1072841 0.47709 SRR10002729 +Archaea 2157 D 928 0 928 4.1e-4 SRR10002729 +Viruses 10239 D 833 16 849 3.8e-4 SRR10002729 +Bacteria 2 D 1619520 425 1619945 0.55031 SRR10002724 +Eukaryota 2759 D 1319763 1012 1320775 0.44868 SRR10002724 +Archaea 2157 D 1945 0 1945 6.6e-4 SRR10002724 +Viruses 10239 D 1026 18 1044 3.5e-4 SRR10002724 +Bacteria 2 D 853825 329 854154 0.63784 SRR10002757 +Eukaryota 2759 D 478203 873 479076 0.35775 SRR10002757 +Archaea 2157 D 5567 3 5570 0.00416 SRR10002757 +Viruses 10239 D 321 9 330 2.5e-4 SRR10002757 +Bacteria 2 D 304292 92 304384 0.51999 SRR10002746 +Eukaryota 2759 D 279699 422 280121 0.47854 SRR10002746 +Archaea 2157 D 393 0 393 6.7e-4 SRR10002746 +Viruses 10239 D 463 1 464 7.9e-4 SRR10002746 +Eukaryota 2759 D 1431683 1381 1433064 0.65503 SRR10002676 +Bacteria 2 D 752846 270 753116 0.34424 SRR10002676 +Archaea 2157 D 785 0 785 3.6e-4 SRR10002676 +Viruses 10239 D 784 27 811 3.7e-4 SRR10002676 +Bacteria 2 D 547597 607 548204 0.94287 SRR10002709 +Eukaryota 2759 D 31662 376 32038 0.0551 SRR10002709 +Archaea 2157 D 1008 2 1010 0.00174 SRR10002709 +Viruses 10239 D 142 25 167 2.9e-4 SRR10002709 +Eukaryota 2759 D 1077738 1311 1079049 0.54157 SRR10002677 +Bacteria 2 D 911302 293 911595 0.45753 SRR10002677 +Archaea 2157 D 1006 0 1006 5e-4 SRR10002677 +Viruses 10239 D 752 30 782 3.9e-4 SRR10002677 +Eukaryota 2759 D 1665404 1521 1666925 0.57245 SRR10002753 +Bacteria 2 D 1238995 338 1239333 0.42561 SRR10002753 +Archaea 2157 D 4395 1 4396 0.00151 SRR10002753 +Viruses 10239 D 1214 48 1262 4.3e-4 SRR10002753 +Bacteria 2 D 686513 323 686836 0.61064 SRR10002758 +Eukaryota 2759 D 435838 752 436590 0.38816 SRR10002758 +Archaea 2157 D 983 0 983 8.7e-4 SRR10002758 +Viruses 10239 D 354 15 369 3.3e-4 SRR10002758 +Eukaryota 2759 D 1556793 925 1557718 0.56993 SRR10002730 +Bacteria 2 D 1172868 289 1173157 0.42923 SRR10002730 +Archaea 2157 D 1094 0 1094 4e-4 SRR10002730 +Viruses 10239 D 1174 13 1187 4.3e-4 SRR10002730 +Bacteria 2 D 1793712 420 1794132 0.57882 SRR10002728 +Eukaryota 2759 D 1301335 1017 1302352 0.42016 SRR10002728 +Archaea 2157 D 1966 0 1966 6.3e-4 SRR10002728 +Viruses 10239 D 1156 18 1174 3.8e-4 SRR10002728 +Eukaryota 2759 D 879071 708 879779 0.66892 SRR10002736 +Bacteria 2 D 433693 123 433816 0.32984 SRR10002736 +Archaea 2157 D 1267 0 1267 9.6e-4 SRR10002736 +Viruses 10239 D 345 6 351 2.7e-4 SRR10002736 +Eukaryota 2759 D 880878 847 881725 0.54895 SRR10002749 +Bacteria 2 D 722532 248 722780 0.44999 SRR10002749 +Archaea 2157 D 1182 0 1182 7.4e-4 SRR10002749 +Viruses 10239 D 509 20 529 3.3e-4 SRR10002749 +Eukaryota 2759 D 1844883 1552 1846435 0.65403 SRR10002743 +Bacteria 2 D 974948 291 975239 0.34544 SRR10002743 +Archaea 2157 D 1208 0 1208 4.3e-4 SRR10002743 +Viruses 10239 D 286 5 291 1e-4 SRR10002743 +Bacteria 2 D 918913 432 919345 0.60088 SRR10002700 +Eukaryota 2759 D 608162 964 609126 0.39812 SRR10002700 +Archaea 2157 D 1044 0 1044 6.8e-4 SRR10002700 +Viruses 10239 D 419 56 475 3.1e-4 SRR10002700 +Eukaryota 2759 D 747785 630 748415 0.60704 SRR10002745 +Bacteria 2 D 483055 100 483155 0.39189 SRR10002745 +Archaea 2157 D 1017 0 1017 8.2e-4 SRR10002745 +Viruses 10239 D 292 6 298 2.4e-4 SRR10002745 +Bacteria 2 D 997708 216 997924 0.66843 SRR10002767 +Eukaryota 2759 D 492583 710 493293 0.33042 SRR10002767 +Archaea 2157 D 773 0 773 5.2e-4 SRR10002767 +Viruses 10239 D 953 1 954 6.4e-4 SRR10002767 +Eukaryota 2759 D 1600992 1515 1602507 0.68271 SRR10002675 +Bacteria 2 D 736439 271 736710 0.31386 SRR10002675 +Archaea 2157 D 7401 1 7402 0.00315 SRR10002675 +Viruses 10239 D 605 37 642 2.7e-4 SRR10002675 +Bacteria 2 D 1087170 392 1087562 0.73115 SRR10002721 +Eukaryota 2759 D 397279 743 398022 0.26759 SRR10002721 +Archaea 2157 D 1513 0 1513 0.00102 SRR10002721 +Viruses 10239 D 326 34 360 2.4e-4 SRR10002721 +Bacteria 2 D 556053 183 556236 0.60586 SRR10002742 +Eukaryota 2759 D 356414 767 357181 0.38905 SRR10002742 +Archaea 2157 D 4444 1 4445 0.00484 SRR10002742 +Viruses 10239 D 224 7 231 2.5e-4 SRR10002742 +Bacteria 2 D 944483 313 944796 0.6728 SRR10002755 +Eukaryota 2759 D 457167 777 457944 0.32611 SRR10002755 +Archaea 2157 D 1151 0 1151 8.2e-4 SRR10002755 +Viruses 10239 D 361 15 376 2.7e-4 SRR10002755 +Bacteria 2 D 301491 206 301697 0.82792 SRR10002691 +Eukaryota 2759 D 61882 244 62126 0.17049 SRR10002691 +Archaea 2157 D 420 0 420 0.00115 SRR10002691 +Viruses 10239 D 128 30 158 4.3e-4 SRR10002691 +Bacteria 2 D 390914 131 391045 0.64989 SRR10002744 +Eukaryota 2759 D 209343 463 209806 0.34868 SRR10002744 +Archaea 2157 D 435 0 435 7.2e-4 SRR10002744 +Viruses 10239 D 249 174 423 7e-4 SRR10002744 +Bacteria 2 D 898709 251 898960 0.54708 SRR10002756 +Eukaryota 2759 D 741720 949 742669 0.45196 SRR10002756 +Archaea 2157 D 1060 0 1060 6.5e-4 SRR10002756 +Viruses 10239 D 515 4 519 3.2e-4 SRR10002756 +Bacteria 2 D 1179357 218 1179575 0.54076 SRR10002682 +Eukaryota 2759 D 998259 797 999056 0.45801 SRR10002682 +Archaea 2157 D 1635 0 1635 7.5e-4 SRR10002682 +Viruses 10239 D 1043 3 1046 4.8e-4 SRR10002682 +Eukaryota 2759 D 1364124 754 1364878 0.66125 SRR10002697 +Bacteria 2 D 697932 312 698244 0.33828 SRR10002697 +Archaea 2157 D 290 0 290 1.4e-4 SRR10002697 +Viruses 10239 D 528 158 686 3.3e-4 SRR10002697 +Bacteria 2 D 737921 164 738085 0.60224 SRR10002706 +Eukaryota 2759 D 485103 787 485890 0.39646 SRR10002706 +Archaea 2157 D 1331 0 1331 0.00109 SRR10002706 +Viruses 10239 D 259 8 267 2.2e-4 SRR10002706 +Eukaryota 2759 D 657523 779 658302 0.61983 SRR10002695 +Bacteria 2 D 402409 191 402600 0.37907 SRR10002695 +Archaea 2157 D 803 0 803 7.6e-4 SRR10002695 +Viruses 10239 D 355 8 363 3.4e-4 SRR10002695 diff --git a/data/2024-04-12_leung/1/hv_clade_counts.tsv.gz b/data/2024-04-12_leung/1/hv_clade_counts.tsv.gz new file mode 100644 index 0000000..8db5229 Binary files /dev/null and b/data/2024-04-12_leung/1/hv_clade_counts.tsv.gz differ diff --git a/data/2024-04-12_leung/1/hv_hits_putative_filtered.tsv.gz b/data/2024-04-12_leung/1/hv_hits_putative_filtered.tsv.gz new file mode 100644 index 0000000..e386c83 Binary files /dev/null and b/data/2024-04-12_leung/1/hv_hits_putative_filtered.tsv.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002675.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002675.report.gz new file mode 100644 index 0000000..9ed6bd5 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002675.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002676.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002676.report.gz new file mode 100644 index 0000000..5d9fa22 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002676.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002677.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002677.report.gz new file mode 100644 index 0000000..8dc5c6a Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002677.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002678.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002678.report.gz new file mode 100644 index 0000000..1d4e30a Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002678.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002679.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002679.report.gz new file mode 100644 index 0000000..e974ddf Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002679.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002680.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002680.report.gz new file mode 100644 index 0000000..121bc87 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002680.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002681.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002681.report.gz new file mode 100644 index 0000000..e408b9a Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002681.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002682.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002682.report.gz new file mode 100644 index 0000000..0e7278b Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002682.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002683.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002683.report.gz new file mode 100644 index 0000000..473e213 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002683.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002684.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002684.report.gz new file mode 100644 index 0000000..a65edb0 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002684.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002685.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002685.report.gz new file mode 100644 index 0000000..fa62d15 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002685.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002686.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002686.report.gz new file mode 100644 index 0000000..7d9f65c Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002686.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002687.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002687.report.gz new file mode 100644 index 0000000..b851c7b Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002687.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002688.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002688.report.gz new file mode 100644 index 0000000..b6569fd Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002688.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002689.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002689.report.gz new file mode 100644 index 0000000..f1e5318 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002689.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002690.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002690.report.gz new file mode 100644 index 0000000..d37235e Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002690.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002691.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002691.report.gz new file mode 100644 index 0000000..dc1a954 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002691.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002692.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002692.report.gz new file mode 100644 index 0000000..09fe3a3 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002692.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002693.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002693.report.gz new file mode 100644 index 0000000..e44f31d Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002693.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002694.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002694.report.gz new file mode 100644 index 0000000..84d2b47 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002694.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002695.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002695.report.gz new file mode 100644 index 0000000..be63e0c Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002695.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002696.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002696.report.gz new file mode 100644 index 0000000..623ea48 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002696.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002697.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002697.report.gz new file mode 100644 index 0000000..3900801 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002697.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002698.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002698.report.gz new file mode 100644 index 0000000..e97f4bf Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002698.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002699.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002699.report.gz new file mode 100644 index 0000000..efb359b Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002699.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002700.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002700.report.gz new file mode 100644 index 0000000..382b863 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002700.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002701.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002701.report.gz new file mode 100644 index 0000000..2618299 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002701.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002702.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002702.report.gz new file mode 100644 index 0000000..4527a4b Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002702.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002703.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002703.report.gz new file mode 100644 index 0000000..0e32aa8 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002703.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002704.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002704.report.gz new file mode 100644 index 0000000..cfac438 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002704.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002705.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002705.report.gz new file mode 100644 index 0000000..886fbed Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002705.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002706.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002706.report.gz new file mode 100644 index 0000000..3cee345 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002706.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002707.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002707.report.gz new file mode 100644 index 0000000..94e6ed8 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002707.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002708.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002708.report.gz new file mode 100644 index 0000000..957a3c9 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002708.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002709.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002709.report.gz new file mode 100644 index 0000000..343a7b3 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002709.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002710.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002710.report.gz new file mode 100644 index 0000000..e727381 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002710.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002711.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002711.report.gz new file mode 100644 index 0000000..27b0494 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002711.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002712.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002712.report.gz new file mode 100644 index 0000000..737618d Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002712.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002713.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002713.report.gz new file mode 100644 index 0000000..0fb2f25 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002713.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002714.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002714.report.gz new file mode 100644 index 0000000..05c43dc Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002714.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002715.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002715.report.gz new file mode 100644 index 0000000..7d8ae68 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002715.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002716.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002716.report.gz new file mode 100644 index 0000000..c4d05fe Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002716.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002717.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002717.report.gz new file mode 100644 index 0000000..ca330db Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002717.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002718.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002718.report.gz new file mode 100644 index 0000000..074ec54 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002718.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002719.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002719.report.gz new file mode 100644 index 0000000..a1aa2d4 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002719.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002720.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002720.report.gz new file mode 100644 index 0000000..0cefb45 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002720.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002721.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002721.report.gz new file mode 100644 index 0000000..5c5ce58 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002721.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002722.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002722.report.gz new file mode 100644 index 0000000..d251b8b Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002722.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002723.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002723.report.gz new file mode 100644 index 0000000..446d766 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002723.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002724.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002724.report.gz new file mode 100644 index 0000000..d84774b Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002724.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002725.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002725.report.gz new file mode 100644 index 0000000..51f934e Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002725.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002726.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002726.report.gz new file mode 100644 index 0000000..4f78c19 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002726.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002727.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002727.report.gz new file mode 100644 index 0000000..e44411d Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002727.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002728.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002728.report.gz new file mode 100644 index 0000000..4fb0dcb Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002728.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002729.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002729.report.gz new file mode 100644 index 0000000..a60f326 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002729.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002730.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002730.report.gz new file mode 100644 index 0000000..b2f9abd Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002730.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002731.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002731.report.gz new file mode 100644 index 0000000..778c001 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002731.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002732.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002732.report.gz new file mode 100644 index 0000000..401c788 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002732.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002733.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002733.report.gz new file mode 100644 index 0000000..35d6590 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002733.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002734.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002734.report.gz new file mode 100644 index 0000000..4586df5 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002734.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002735.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002735.report.gz new file mode 100644 index 0000000..f30e914 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002735.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002736.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002736.report.gz new file mode 100644 index 0000000..de67eb6 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002736.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002737.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002737.report.gz new file mode 100644 index 0000000..736c525 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002737.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002738.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002738.report.gz new file mode 100644 index 0000000..084c924 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002738.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002739.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002739.report.gz new file mode 100644 index 0000000..aa2ce15 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002739.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002740.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002740.report.gz new file mode 100644 index 0000000..d5f016c Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002740.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002741.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002741.report.gz new file mode 100644 index 0000000..08bf92b Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002741.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002742.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002742.report.gz new file mode 100644 index 0000000..faa0724 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002742.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002743.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002743.report.gz new file mode 100644 index 0000000..f086ee5 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002743.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002744.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002744.report.gz new file mode 100644 index 0000000..8d4dbb2 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002744.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002745.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002745.report.gz new file mode 100644 index 0000000..ae88341 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002745.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002746.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002746.report.gz new file mode 100644 index 0000000..4b53d89 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002746.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002747.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002747.report.gz new file mode 100644 index 0000000..2075388 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002747.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002748.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002748.report.gz new file mode 100644 index 0000000..c0e5ab3 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002748.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002749.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002749.report.gz new file mode 100644 index 0000000..c3b5150 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002749.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002750.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002750.report.gz new file mode 100644 index 0000000..8c80729 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002750.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002751.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002751.report.gz new file mode 100644 index 0000000..d6997b0 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002751.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002752.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002752.report.gz new file mode 100644 index 0000000..c8aae2c Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002752.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002753.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002753.report.gz new file mode 100644 index 0000000..0f5fff8 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002753.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002754.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002754.report.gz new file mode 100644 index 0000000..27a752a Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002754.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002755.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002755.report.gz new file mode 100644 index 0000000..f41dc4d Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002755.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002756.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002756.report.gz new file mode 100644 index 0000000..6f41dd0 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002756.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002757.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002757.report.gz new file mode 100644 index 0000000..28786c3 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002757.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002758.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002758.report.gz new file mode 100644 index 0000000..9cfd395 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002758.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002759.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002759.report.gz new file mode 100644 index 0000000..c540f92 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002759.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002760.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002760.report.gz new file mode 100644 index 0000000..36c45b7 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002760.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002761.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002761.report.gz new file mode 100644 index 0000000..041bd27 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002761.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002762.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002762.report.gz new file mode 100644 index 0000000..ef51467 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002762.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002763.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002763.report.gz new file mode 100644 index 0000000..edf6904 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002763.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002764.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002764.report.gz new file mode 100644 index 0000000..c41e015 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002764.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002765.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002765.report.gz new file mode 100644 index 0000000..e6031c1 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002765.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002766.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002766.report.gz new file mode 100644 index 0000000..ed7e70a Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002766.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002767.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002767.report.gz new file mode 100644 index 0000000..5b5e71b Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002767.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002768.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002768.report.gz new file mode 100644 index 0000000..a9ca32c Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002768.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002769.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002769.report.gz new file mode 100644 index 0000000..36874af Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002769.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002770.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002770.report.gz new file mode 100644 index 0000000..dd23cb2 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002770.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002771.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002771.report.gz new file mode 100644 index 0000000..87a60e6 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002771.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002772.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002772.report.gz new file mode 100644 index 0000000..fcc878c Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002772.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002773.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002773.report.gz new file mode 100644 index 0000000..2d6ad09 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002773.report.gz differ diff --git a/data/2024-04-12_leung/1/kraken/SRR10002774.report.gz b/data/2024-04-12_leung/1/kraken/SRR10002774.report.gz new file mode 100644 index 0000000..23a0d07 Binary files /dev/null and b/data/2024-04-12_leung/1/kraken/SRR10002774.report.gz differ diff --git a/data/2024-04-12_leung/1/qc_adapter_stats.tsv.gz b/data/2024-04-12_leung/1/qc_adapter_stats.tsv.gz new file mode 100644 index 0000000..6060e50 Binary files /dev/null and b/data/2024-04-12_leung/1/qc_adapter_stats.tsv.gz differ diff --git a/data/2024-04-12_leung/1/qc_basic_stats.tsv.gz b/data/2024-04-12_leung/1/qc_basic_stats.tsv.gz new file mode 100644 index 0000000..81c8213 Binary files /dev/null and b/data/2024-04-12_leung/1/qc_basic_stats.tsv.gz differ diff --git a/data/2024-04-12_leung/1/qc_quality_base_stats.tsv.gz b/data/2024-04-12_leung/1/qc_quality_base_stats.tsv.gz new file mode 100644 index 0000000..f37f310 Binary files /dev/null and b/data/2024-04-12_leung/1/qc_quality_base_stats.tsv.gz differ diff --git a/data/2024-04-12_leung/1/qc_quality_sequence_stats.tsv.gz b/data/2024-04-12_leung/1/qc_quality_sequence_stats.tsv.gz new file mode 100644 index 0000000..f9297ba Binary files /dev/null and b/data/2024-04-12_leung/1/qc_quality_sequence_stats.tsv.gz differ diff --git a/data/2024-04-12_leung/1/sample-metadata.csv b/data/2024-04-12_leung/1/sample-metadata.csv new file mode 100644 index 0000000..6efcf2a --- /dev/null +++ b/data/2024-04-12_leung/1/sample-metadata.csv @@ -0,0 +1,101 @@ +sample,library,country,region,city,location,instrument,date +SRR10002675,SRR10002675,Sweden,Europe,Stockholm,"Stockholm, T-centralen",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002676,SRR10002676,Sweden,Europe,Stockholm,"Stockholm, Slussen",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002677,SRR10002677,Sweden,Europe,Stockholm,"Stockholm, Medborgarplatsen",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002678,SRR10002678,Hong Kong,Asia,Hong Kong,"Hong Kong, Admiralty",SASS 3100 electret filter air sampler,2017-07-14 +SRR10002679,SRR10002679,Hong Kong,Asia,Hong Kong,"Hong Kong, Ocean Park",SASS 3100 electret filter air sampler,2017-07-14 +SRR10002680,SRR10002680,Hong Kong,Asia,Hong Kong,"Hong Kong, Fortress Hill",SASS 3100 electret filter air sampler,2017-07-14 +SRR10002681,SRR10002681,Hong Kong,Asia,Hong Kong,"Hong Kong, North Point",SASS 3100 electret filter air sampler,2017-07-14 +SRR10002682,SRR10002682,Hong Kong,Asia,Hong Kong,"Hong Kong, Quarry Bay",SASS 3100 electret filter air sampler,2017-07-14 +SRR10002683,SRR10002683,Hong Kong,Asia,Hong Kong,"Hong Kong, Yau Tong",SASS 3100 electret filter air sampler,2017-07-14 +SRR10002684,SRR10002684,Hong Kong,Asia,Hong Kong,"Hong Kong, Tsing Yi",SASS 3100 electret filter air sampler,2017-07-13 +SRR10002685,SRR10002685,Hong Kong,Asia,Hong Kong,"Hong Kong, Lai King",SASS 3100 electret filter air sampler,2017-07-13 +SRR10002686,SRR10002686,Hong Kong,Asia,Hong Kong,"Hong Kong, Cheung Sha Wan",SASS 3100 electret filter air sampler,2017-07-13 +SRR10002687,SRR10002687,Hong Kong,Asia,Hong Kong,"Hong Kong, Sham Shui Po",SASS 3100 electret filter air sampler,2017-07-13 +SRR10002688,SRR10002688,uncalculated,uncalculated,,"Hong Kong, Sham Shui Po",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002689,SRR10002689,USA,North America,Denver,"Denver, Denver Airport",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002690,SRR10002690,USA,North America,Denver,"Denver, Train Cabin - Back wagon",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002691,SRR10002691,USA,North America,Denver,"Denver, Boulder Junction",SASS 3100 electret filter air sampler,2017-06-22 +SRR10002692,SRR10002692,USA,North America,Denver,"Denver, Boulder Junction",SASS 3100 electret filter air sampler,2017-06-22 +SRR10002693,SRR10002693,USA,North America,Denver,"Denver, Boulder Junction",SASS 3100 electret filter air sampler,2017-06-22 +SRR10002694,SRR10002694,USA,North America,Denver,"Denver, Denver Airport",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002695,SRR10002695,Norway,Europe,Oslo,"Oslo, Nydalen",Bobcat ACD-200 electret filter air sampler,2017-06-21 +SRR10002696,SRR10002696,Norway,Europe,Oslo,"Oslo, Loeren",Bobcat ACD-200 electret filter air sampler,2017-06-21 +SRR10002697,SRR10002697,USA,North America,Denver,"Denver, Boulder Junction",SASS 3100 electret filter air sampler,2017-06-22 +SRR10002698,SRR10002698,USA,North America,Denver,"Denver, Boulder Junction",SASS 3100 electret filter air sampler,2017-06-22 +SRR10002699,SRR10002699,United Kingdom,Europe,London,"London, Warren Street",SASS 3100 electret filter air sampler,2017-07-23 +SRR10002700,SRR10002700,United Kingdom,Europe,London,"London, Warren Street",SASS 3100 electret filter air sampler,2017-08-21 +SRR10002701,SRR10002701,United Kingdom,Europe,London,"London, Green Park",SASS 3100 electret filter air sampler,2017-08-18 +SRR10002702,SRR10002702,Norway,Europe,Oslo,"Oslo, Forskningsparken",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002703,SRR10002703,United Kingdom,Europe,London,"London, Green Park",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002704,SRR10002704,Norway,Europe,Oslo,"Oslo, Majorstua",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002705,SRR10002705,Norway,Europe,Oslo,"Oslo, Toeyen",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002706,SRR10002706,Norway,Europe,Oslo,"Oslo, Helsfyr",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002707,SRR10002707,Norway,Europe,Oslo,"Oslo, Lindeberg",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002708,SRR10002708,Norway,Europe,Oslo,"Oslo, Ellingsrudaasen",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002709,SRR10002709,Norway,Europe,Oslo,"Oslo, Montebello",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002710,SRR10002710,Norway,Europe,Oslo,"Oslo, Nationaltheateret",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002711,SRR10002711,Norway,Europe,Oslo,"Oslo, Stortinget",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002712,SRR10002712,Norway,Europe,Oslo,"Oslo, Jernbanetorget",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002713,SRR10002713,United Kingdom,Europe,London,"London, Finsbury Park",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002714,SRR10002714,United Kingdom,Europe,London,"London, Highbury & Islington",SASS 3100 electret filter air sampler,2017-08-18 +SRR10002715,SRR10002715,United Kingdom,Europe,London,"London, Kings Cross St Pancras",SASS 3100 electret filter air sampler,2017-08-21 +SRR10002716,SRR10002716,United Kingdom,Europe,London,"London, Kings Cross St Pancras",SASS 3100 electret filter air sampler,2017-07-23 +SRR10002717,SRR10002717,United Kingdom,Europe,London,"London, Warren Street",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002718,SRR10002718,United Kingdom,Europe,London,"London, Finsbury Park",SASS 3100 electret filter air sampler,2017-08-18 +SRR10002719,SRR10002719,United Kingdom,Europe,London,"London, Pimlico",SASS 3100 electret filter air sampler,2017-08-21 +SRR10002720,SRR10002720,United Kingdom,Europe,London,"London, Pimlico",SASS 3100 electret filter air sampler,2017-07-23 +SRR10002721,SRR10002721,United Kingdom,Europe,London,"London, Vauxhall",SASS 3100 electret filter air sampler,2017-08-21 +SRR10002722,SRR10002722,United Kingdom,Europe,London,"London, Vauxhall",SASS 3100 electret filter air sampler,2017-07-23 +SRR10002723,SRR10002723,Norway,Europe,Oslo,"Oslo, Lindeberg",SASS 3100 electret filter air sampler,2017-06-27 +SRR10002724,SRR10002724,Hong Kong,Asia,Hong Kong,"Hong Kong, Admiralty",SASS 3100 electret filter air sampler,2017-07-12 +SRR10002725,SRR10002725,Hong Kong,Asia,Hong Kong,"Hong Kong, Ocean Park",SASS 3100 electret filter air sampler,2017-07-12 +SRR10002726,SRR10002726,Norway,Europe,Oslo,"Oslo, Stortinget",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002727,SRR10002727,Hong Kong,Asia,Hong Kong,"Hong Kong, Cheung Sha Wan",SASS 3100 electret filter air sampler,2017-07-11 +SRR10002728,SRR10002728,Hong Kong,Asia,Hong Kong,"Hong Kong, Sham Shui Po",SASS 3100 electret filter air sampler,2017-07-11 +SRR10002729,SRR10002729,Hong Kong,Asia,Hong Kong,"Hong Kong, Tsing Yi",SASS 3100 electret filter air sampler,2017-07-11 +SRR10002730,SRR10002730,Hong Kong,Asia,Hong Kong,"Hong Kong, Lai King",SASS 3100 electret filter air sampler,2017-07-11 +SRR10002731,SRR10002731,Norway,Europe,Oslo,"Oslo, Carl Berners plass",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002732,SRR10002732,Hong Kong,Asia,Hong Kong,"Hong Kong, Yau Tong",SASS 3100 electret filter air sampler,2017-07-12 +SRR10002733,SRR10002733,Hong Kong,Asia,Hong Kong,"Hong Kong, Fortress Hill",SASS 3100 electret filter air sampler,2017-07-12 +SRR10002734,SRR10002734,Hong Kong,Asia,Hong Kong,"Hong Kong, North Point",SASS 3100 electret filter air sampler,2017-07-12 +SRR10002735,SRR10002735,Hong Kong,Asia,Hong Kong,"Hong Kong, Tsim Sha Tsui",SASS 3100 electret filter air sampler,2017-07-12 +SRR10002736,SRR10002736,Norway,Europe,Oslo,"Oslo, Toeyen",SASS 3100 electret filter air sampler,2017-06-27 +SRR10002737,SRR10002737,USA,North America,New York City,"New York City, 63rd St.",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002738,SRR10002738,Norway,Europe,Oslo,"Oslo, Stortinget",SASS 3100 electret filter air sampler,2017-06-27 +SRR10002739,SRR10002739,Norway,Europe,Oslo,"Oslo, Carl Berners plass",SASS 3100 electret filter air sampler,2017-06-27 +SRR10002740,SRR10002740,Norway,Europe,Oslo,"Oslo, Groenland",SASS 3100 electret filter air sampler,2017-06-27 +SRR10002741,SRR10002741,Norway,Europe,Oslo,"Oslo, Ellingsrudaasen",SASS 3100 electret filter air sampler,2017-06-27 +SRR10002742,SRR10002742,Norway,Europe,Oslo,"Oslo, Nationaltheateret",SASS 3100 electret filter air sampler,2017-06-27 +SRR10002743,SRR10002743,USA,North America,New York City,"New York City, 72nd St.",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002744,SRR10002744,Norway,Europe,Oslo,"Oslo, Montebello",SASS 3100 electret filter air sampler,2017-06-27 +SRR10002745,SRR10002745,Norway,Europe,Oslo,"Oslo, Nationaltheateret",SASS 3100 electret filter air sampler,2017-06-26 +SRR10002746,SRR10002746,Norway,Europe,Oslo,"Oslo, Montebello",SASS 3100 electret filter air sampler,2017-06-26 +SRR10002747,SRR10002747,Norway,Europe,Oslo,"Oslo, Nydalen",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002748,SRR10002748,Norway,Europe,Oslo,"Oslo, Loeren",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002749,SRR10002749,Norway,Europe,Oslo,"Oslo, Vestli",SASS 3100 electret filter air sampler,2017-06-26 +SRR10002750,SRR10002750,Norway,Europe,Oslo,"Oslo, Romsaas",SASS 3100 electret filter air sampler,2017-06-26 +SRR10002751,SRR10002751,Norway,Europe,Oslo,"Oslo, Carl Berners plass",SASS 3100 electret filter air sampler,2017-06-26 +SRR10002752,SRR10002752,Norway,Europe,Oslo,"Oslo, Groenland",SASS 3100 electret filter air sampler,2017-06-26 +SRR10002753,SRR10002753,Norway,Europe,Oslo,"Oslo, Jernbanetorget",SASS 3100 electret filter air sampler,2017-06-26 +SRR10002754,SRR10002754,Norway,Europe,Oslo,"Oslo, Stortinget",SASS 3100 electret filter air sampler,2017-06-26 +SRR10002755,SRR10002755,United Kingdom,Europe,London,"London, Warren Street",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002756,SRR10002756,United Kingdom,Europe,London,"London, Oxford Circus",SASS 3100 electret filter air sampler,2017-07-23 +SRR10002757,SRR10002757,United Kingdom,Europe,London,"London, Finsbury Park",SASS 3100 electret filter air sampler,2017-08-21 +SRR10002758,SRR10002758,United Kingdom,Europe,London,"London, Warren Street",SASS 3100 electret filter air sampler,2017-08-18 +SRR10002759,SRR10002759,United Kingdom,Europe,London,"London, Brixton",SASS 3100 electret filter air sampler,2017-08-18 +SRR10002760,SRR10002760,United Kingdom,Europe,London,"London, Finsbury Park",SASS 3100 electret filter air sampler,2017-07-23 +SRR10002761,SRR10002761,United Kingdom,Europe,London,"London, Highbury & Islington",SASS 3100 electret filter air sampler,2017-07-23 +SRR10002762,SRR10002762,United Kingdom,Europe,London,"London, Highbury & Islington",SASS 3100 electret filter air sampler,2017-08-21 +SRR10002763,SRR10002763,United Kingdom,Europe,London,"London, Vauxhall",SASS 3100 electret filter air sampler,2017-08-18 +SRR10002764,SRR10002764,United Kingdom,Europe,London,"London, Highbury & Islington",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002765,SRR10002765,Hong Kong,Asia,Hong Kong,"Hong Kong, Shek Kip Mei",SASS 3100 electret filter air sampler,2017-07-06 +SRR10002766,SRR10002766,Hong Kong,Asia,Hong Kong,"Hong Kong, Prince Edward",SASS 3100 electret filter air sampler,2017-07-06 +SRR10002767,SRR10002767,Hong Kong,Asia,Hong Kong,"Hong Kong, Kowloon Tong",SASS 3100 electret filter air sampler,2017-07-06 +SRR10002768,SRR10002768,Hong Kong,Asia,Hong Kong,"Hong Kong, Tai Wai",SASS 3100 electret filter air sampler,2017-07-06 +SRR10002769,SRR10002769,Hong Kong,Asia,Hong Kong,"Hong Kong, Che Kung Temple",SASS 3100 electret filter air sampler,2017-07-06 +SRR10002770,SRR10002770,Hong Kong,Asia,Hong Kong,"Hong Kong, Sha Tin Wai",SASS 3100 electret filter air sampler,2017-07-06 +SRR10002771,SRR10002771,Hong Kong,Asia,Hong Kong,"Hong Kong, Tsim Sha Tsui",SASS 3100 electret filter air sampler,2017-07-05 +SRR10002772,SRR10002772,Hong Kong,Asia,Hong Kong,"Hong Kong, East Tsim Sha Tsui",SASS 3100 electret filter air sampler,2017-07-05 +SRR10002773,SRR10002773,Hong Kong,Asia,Hong Kong,"Hong Kong, Hung Hom",SASS 3100 electret filter air sampler,2017-07-05 +SRR10002774,SRR10002774,Hong Kong,Asia,Hong Kong,"Hong Kong, Mong Kok East",SASS 3100 electret filter air sampler,2017-07-05 diff --git a/data/2024-04-12_leung/1/taxonomic_composition.tsv b/data/2024-04-12_leung/1/taxonomic_composition.tsv new file mode 100644 index 0000000..e07be97 --- /dev/null +++ b/data/2024-04-12_leung/1/taxonomic_composition.tsv @@ -0,0 +1,801 @@ +sample classification n_reads p_reads +SRR10002675 Filtered 416158 0.07219161122368457 +SRR10002675 Duplicate 287447 0.04986390282396219 +SRR10002675 Ribosomal 17673 0.003065764313448684 +SRR10002675 Unassigned 2696092 0.4676955038405754 +SRR10002675 Bacterial 736710 0.12779829272680246 +SRR10002675 Archaeal 7402 0.0012840370875429842 +SRR10002675 Viral 642 1.11368793596676e-4 +SRR10002675 Human 1602507 0.27798951919038706 +SRR10002676 Filtered 374568 0.06795372511113723 +SRR10002676 Duplicate 260153 0.04719667843712673 +SRR10002676 Ribosomal 15965 0.0028963531892721908 +SRR10002676 Unassigned 2673642 0.4850492661241515 +SRR10002676 Bacterial 753116 0.13662949755664988 +SRR10002676 Archaeal 785 1.4241385866449544e-4 +SRR10002676 Viral 811 1.4713075079860613e-4 +SRR10002676 Human 1433064 0.2599849349721994 +SRR10002677 Filtered 424418 0.072292846514011 +SRR10002677 Duplicate 296132 0.050441369649466106 +SRR10002677 Ribosomal 17548 0.0029890223096755205 +SRR10002677 Unassigned 3140286 0.534897704169233 +SRR10002677 Bacterial 911595 0.15527568910352496 +SRR10002677 Archaeal 1006 1.7135607724718334e-4 +SRR10002677 Viral 782 1.3320124493767136e-4 +SRR10002677 Human 1079049 0.18379881093190453 +SRR10002678 Filtered 324841 0.053517675820362126 +SRR10002678 Duplicate 518702 0.08545634782362287 +SRR10002678 Ribosomal 12907 0.002126433060523191 +SRR10002678 Unassigned 1840217 0.3031764366108937 +SRR10002678 Bacterial 1999342 0.3293923396678204 +SRR10002678 Archaeal 1339 2.2060074905404453e-4 +SRR10002678 Viral 1284 2.1153947855518537e-4 +SRR10002678 Human 1371157 0.22589862678916844 +SRR10002679 Filtered 506588 0.07346259634218372 +SRR10002679 Duplicate 635649 0.09217831038696679 +SRR10002679 Ribosomal 12461 0.001807025458597423 +SRR10002679 Unassigned 2642411 0.3831878620558442 +SRR10002679 Bacterial 1943505 0.2818363705891489 +SRR10002679 Archaeal 4868 7.059304977491578e-4 +SRR10002679 Viral 765 1.1093607863149254e-4 +SRR10002679 Human 1149616 0.16671096859087833 +SRR10002680 Filtered 241483 0.05523916927250726 +SRR10002680 Duplicate 386492 0.08840993780295041 +SRR10002680 Ribosomal 8916 0.002039532527066811 +SRR10002680 Unassigned 1220408 0.2791679915088103 +SRR10002680 Bacterial 909686 0.20809042019036553 +SRR10002680 Archaeal 993 2.2714847458247457e-4 +SRR10002680 Viral 658 1.5051731749775253e-4 +SRR10002680 Human 1602954 0.3666752829062195 +SRR10002681 Filtered 346899 0.06208871032839456 +SRR10002681 Duplicate 571543 0.10229596443697334 +SRR10002681 Ribosomal 27521 0.004925766280524726 +SRR10002681 Unassigned 2605853 0.46640103337103295 +SRR10002681 Bacterial 1095409 0.1960585994543552 +SRR10002681 Archaeal 5574 9.976462064476153e-4 +SRR10002681 Viral 727 1.3011998422809764e-4 +SRR10002681 Human 933625 0.16710215993804356 +SRR10002682 Filtered 300409 0.06953612645381473 +SRR10002682 Duplicate 410836 0.09509683147901503 +SRR10002682 Ribosomal 11174 0.0025864627124850643 +SRR10002682 Unassigned 1416455 0.3278689852705416 +SRR10002682 Bacterial 1179575 0.2730380127151933 +SRR10002682 Archaeal 1635 3.7845592759200644e-4 +SRR10002682 Viral 1046 2.421192050527454e-4 +SRR10002682 Human 999056 0.23125300623630557 +SRR10002683 Filtered 225620 0.06164438767494758 +SRR10002683 Duplicate 322471 0.08810622878259028 +SRR10002683 Ribosomal 8565 0.002340147949809086 +SRR10002683 Unassigned 1065064 0.2909991051973689 +SRR10002683 Bacterial 957056 0.26148892425598186 +SRR10002683 Archaeal 1051 2.8715650849379443e-4 +SRR10002683 Viral 961 2.6256651252382155e-4 +SRR10002683 Human 1079237 0.2948714831182847 +SRR10002684 Filtered 162792 0.049692186343867295 +SRR10002684 Duplicate 309110 0.09435569143909295 +SRR10002684 Ribosomal 9312 0.0028424839011382146 +SRR10002684 Unassigned 1141733 0.34851349569353923 +SRR10002684 Bacterial 839188 0.2561617676147311 +SRR10002684 Archaeal 696 2.1245369364177377e-4 +SRR10002684 Viral 826 2.5213613641969127e-4 +SRR10002684 Human 812351 0.24796978517756976 +SRR10002685 Filtered 282582 0.06665697963789698 +SRR10002685 Duplicate 399879 0.09432563418980192 +SRR10002685 Ribosomal 8483 0.002001016194479054 +SRR10002685 Unassigned 1059984 0.250034793102521 +SRR10002685 Bacterial 790824 0.1865438678513148 +SRR10002685 Archaeal 719 1.6960163194983376e-4 +SRR10002685 Viral 640 1.509666821250259e-4 +SRR10002685 Human 1696235 0.4001171407099114 +SRR10002686 Filtered 339980 0.07510838888551372 +SRR10002686 Duplicate 400667 0.08851536222599013 +SRR10002686 Ribosomal 11747 0.0025951474917293065 +SRR10002686 Unassigned 1410497 0.31160702746588165 +SRR10002686 Bacterial 1136035 0.250972876544369 +SRR10002686 Archaeal 2104 4.648157250871253e-4 +SRR10002686 Viral 825 1.822590176791247e-4 +SRR10002686 Human 1224670 0.2705541226437499 +SRR10002687 Filtered 449834 0.07262698994697565 +SRR10002687 Duplicate 600205 0.09690481933585393 +SRR10002687 Ribosomal 11737 0.001894972325363697 +SRR10002687 Unassigned 1202386 0.19412866954117355 +SRR10002687 Bacterial 1210981 0.19551635695162775 +SRR10002687 Archaeal 1123 1.813115720698161e-4 +SRR10002687 Viral 662 1.068817993857687e-4 +SRR10002687 Human 2716830 0.4386399985275498 +SRR10002688 Filtered 159982 0.13426309799959046 +SRR10002688 Duplicate 43753 0.03671921420394845 +SRR10002688 Ribosomal 10154 0.008521630540234786 +SRR10002688 Unassigned 110299 0.09256719784886316 +SRR10002688 Bacterial 854248 0.7169180466549621 +SRR10002688 Archaeal 13 1.0910104099177882e-5 +SRR10002688 Viral 175 1.4686678595047148e-4 +SRR10002688 Human 12932 0.010853035862351413 +SRR10002689 Filtered 599909 0.13453296636143233 +SRR10002689 Duplicate 211271 0.04737870966454274 +SRR10002689 Ribosomal 26182 0.005871460713666609 +SRR10002689 Unassigned 3001195 0.6730348535846252 +SRR10002689 Bacterial 566302 0.12699640764917988 +SRR10002689 Archaeal 1710 3.8347711482583077e-4 +SRR10002689 Viral 207 4.6420913899968986e-5 +SRR10002689 Human 52421 0.011755703997827412 +SRR10002690 Filtered 429228 0.08937474037311312 +SRR10002690 Duplicate 251210 0.05230746486512936 +SRR10002690 Ribosomal 20431 0.0042541850032222365 +SRR10002690 Unassigned 2029668 0.42262166154960945 +SRR10002690 Bacterial 800092 0.1665968081639707 +SRR10002690 Archaeal 1592 3.314895269507024e-4 +SRR10002690 Viral 510 1.0619325298043858e-4 +SRR10002690 Human 1269834 0.264407457265024 +SRR10002691 Filtered 628381 0.2539530827296175 +SRR10002691 Duplicate 109317 0.04417923066539821 +SRR10002691 Ribosomal 9916 0.004007439385256535 +SRR10002691 Unassigned 1362383 0.5505916994759937 +SRR10002691 Bacterial 301697 0.1219274344709299 +SRR10002691 Archaeal 420 1.6973825552720297e-4 +SRR10002691 Viral 158 6.385391517451922e-5 +SRR10002691 Human 62126 0.02510752110210241 +SRR10002692 Filtered 1483857 0.2899388744480812 +SRR10002692 Duplicate 232137 0.04535850860140447 +SRR10002692 Ribosomal 16213 0.003167946083367023 +SRR10002692 Unassigned 1890541 0.36940306891968017 +SRR10002692 Bacterial 710411 0.13881106180416025 +SRR10002692 Archaeal 452 8.83187337125698e-5 +SRR10002692 Viral 505 9.867469142665432e-5 +SRR10002692 Human 783711 0.1531335467181677 +SRR10002693 Filtered 700543 0.15459003826006518 +SRR10002693 Duplicate 238612 0.052654923693921246 +SRR10002693 Ribosomal 17365 0.0038319646536844015 +SRR10002693 Unassigned 1891746 0.4174548693204061 +SRR10002693 Bacterial 955493 0.21085029673728015 +SRR10002693 Archaeal 467 1.030536995836807e-4 +SRR10002693 Viral 1236 2.727502627096988e-4 +SRR10002693 Human 726156 0.16024210337234956 +SRR10002694 Filtered 643701 0.14051726076247686 +SRR10002694 Duplicate 206231 0.04501937266573513 +SRR10002694 Ribosomal 28334 0.006185194782117815 +SRR10002694 Unassigned 3094348 0.6754833452268192 +SRR10002694 Bacterial 513630 0.11212330048490059 +SRR10002694 Archaeal 1399 3.0539590245580656e-4 +SRR10002694 Viral 182 3.9729845780526655e-5 +SRR10002694 Human 93114 0.02032640032971406 +SRR10002695 Filtered 327947 0.09200553132382848 +SRR10002695 Duplicate 217599 0.061047399764394106 +SRR10002695 Ribosomal 11536 0.0032364248166675876 +SRR10002695 Unassigned 1945277 0.5457474651605995 +SRR10002695 Bacterial 402600 0.11294943058168956 +SRR10002695 Archaeal 803 2.252816511602005e-4 +SRR10002695 Viral 363 1.018396505244742e-4 +SRR10002695 Human 658302 0.18468662705113614 +SRR10002696 Filtered 504787 0.13672787256460747 +SRR10002696 Duplicate 223865 0.06063663523758705 +SRR10002696 Ribosomal 12076 0.003270935640359597 +SRR10002696 Unassigned 1867699 0.5058896343627012 +SRR10002696 Bacterial 441332 0.11954029215230057 +SRR10002696 Archaeal 739 2.0016739302962424e-4 +SRR10002696 Viral 364 9.859395272365795e-5 +SRR10002696 Human 641048 0.17363586869669087 +SRR10002697 Filtered 1524213 0.3304099893000276 +SRR10002697 Duplicate 295463 0.06404874297001406 +SRR10002697 Ribosomal 8821 0.0019121648454747094 +SRR10002697 Unassigned 720501 0.1561859974299256 +SRR10002697 Bacterial 698244 0.15136125500097983 +SRR10002697 Archaeal 290 6.286450574624938e-5 +SRR10002697 Viral 686 1.487070722135416e-4 +SRR10002697 Human 1364878 0.29587027887561845 +SRR10002698 Filtered 651509 0.15681758335287008 +SRR10002698 Duplicate 224068 0.05393294991582755 +SRR10002698 Ribosomal 12864 0.0030963523025028367 +SRR10002698 Unassigned 1358905 0.32708711331099327 +SRR10002698 Bacterial 696660 0.1676853851882483 +SRR10002698 Archaeal 416 1.0013079585208178e-4 +SRR10002698 Viral 779 1.8750454319416276e-4 +SRR10002698 Human 1209365 0.29109298059051175 +SRR10002699 Filtered 393779 0.08964481186561758 +SRR10002699 Duplicate 216493 0.049285193611703894 +SRR10002699 Ribosomal 21665 0.004932093506938168 +SRR10002699 Unassigned 2651654 0.6036559185805042 +SRR10002699 Bacterial 680700 0.1549631225558648 +SRR10002699 Archaeal 881 2.0056193766962964e-4 +SRR10002699 Viral 742 1.6891822673196957e-4 +SRR10002699 Human 426744 0.09714937971496984 +SRR10002700 Filtered 384742 0.07601049134519339 +SRR10002700 Duplicate 295215 0.058323336683988924 +SRR10002700 Ribosomal 17689 0.003494678463503142 +SRR10002700 Unassigned 2834060 0.5599032419173336 +SRR10002700 Bacterial 919345 0.18162785754023947 +SRR10002700 Archaeal 1044 2.062549785684482e-4 +SRR10002700 Viral 475 9.384206400384377e-5 +SRR10002700 Human 609126 0.12034029700716914 +SRR10002701 Filtered 308676 0.07063900661064995 +SRR10002701 Duplicate 252152 0.05770376315258914 +SRR10002701 Ribosomal 12185 0.002788478195748194 +SRR10002701 Unassigned 2117790 0.48464597769171675 +SRR10002701 Bacterial 894953 0.20480565668604298 +SRR10002701 Archaeal 929 2.1259714762823738e-4 +SRR10002701 Viral 836 1.9131454835006077e-4 +SRR10002701 Human 782246 0.17901320596727469 +SRR10002702 Filtered 548125 0.15268952234399621 +SRR10002702 Duplicate 193231 0.05382777485437215 +SRR10002702 Ribosomal 14429 0.004019442860481681 +SRR10002702 Unassigned 2295197 0.6393660818524481 +SRR10002702 Bacterial 440947 0.12283327125932607 +SRR10002702 Archaeal 781 2.1756080629539075e-4 +SRR10002702 Viral 142 3.955651023552559e-5 +SRR10002702 Human 96949 0.027006789512844864 +SRR10002703 Filtered 340611 0.06918870233324557 +SRR10002703 Duplicate 290098 0.05892793882014931 +SRR10002703 Ribosomal 13012 0.0026431424550592657 +SRR10002703 Unassigned 2076308 0.42176282082532995 +SRR10002703 Bacterial 814113 0.16537170561909498 +SRR10002703 Archaeal 898 1.8241176795598068e-4 +SRR10002703 Viral 463 9.404972000403012e-5 +SRR10002703 Human 1387425 0.28182922845916086 +SRR10002704 Filtered 361793 0.09093315102785604 +SRR10002704 Duplicate 244627 0.06148461671865221 +SRR10002704 Ribosomal 11081 0.0027851015540369018 +SRR10002704 Unassigned 1983736 0.4985927458170695 +SRR10002704 Bacterial 494174 0.1242058275755466 +SRR10002704 Archaeal 695 1.7468148904030743e-4 +SRR10002704 Viral 231 5.805960283210218e-5 +SRR10002704 Human 882333 0.2217658162149663 +SRR10002705 Filtered 477607 0.12983153952233242 +SRR10002705 Duplicate 189186 0.05142786775753282 +SRR10002705 Ribosomal 12733 0.003461308131450876 +SRR10002705 Unassigned 1710685 0.46502850081293035 +SRR10002705 Bacterial 689104 0.18732437592203915 +SRR10002705 Archaeal 1696 4.6103656569077873e-4 +SRR10002705 Viral 246 6.687204903297852e-5 +SRR10002705 Human 597410 0.16239849923899063 +SRR10002706 Filtered 535194 0.13630792897238153 +SRR10002706 Duplicate 192905 0.04913074705325034 +SRR10002706 Ribosomal 13741 0.0034996790920852902 +SRR10002706 Unassigned 1958947 0.49892190221986776 +SRR10002706 Bacterial 738085 0.1879819985941177 +SRR10002706 Archaeal 1331 3.38990821014884e-4 +SRR10002706 Viral 267 6.800191525993542e-5 +SRR10002706 Human 485890 0.12375075133202254 +SRR10002707 Filtered 444979 0.1496137094317571 +SRR10002707 Duplicate 153172 0.05150047777778525 +SRR10002707 Ribosomal 11360 0.0038195324703969424 +SRR10002707 Unassigned 1555603 0.5230348740798323 +SRR10002707 Bacterial 491623 0.165296655958975 +SRR10002707 Archaeal 1337 4.4953476346133026e-4 +SRR10002707 Viral 154 5.177887327826841e-5 +SRR10002707 Human 315958 0.10623343664451383 +SRR10002708 Filtered 545969 0.12120521229765517 +SRR10002708 Duplicate 259964 0.057712052899977155 +SRR10002708 Ribosomal 15621 0.003467864697998735 +SRR10002708 Unassigned 2476226 0.5497225996841826 +SRR10002708 Bacterial 757588 0.1681846668476708 +SRR10002708 Archaeal 1353 3.003662336849298e-4 +SRR10002708 Viral 261 5.794204507891107e-5 +SRR10002708 Human 447519 0.09934929529375174 +SRR10002709 Filtered 695024 0.1812239206271852 +SRR10002709 Duplicate 168701 0.04398791499822563 +SRR10002709 Ribosomal 15955 0.004160183898119691 +SRR10002709 Unassigned 2374068 0.6190259772260244 +SRR10002709 Bacterial 548204 0.14294136343997538 +SRR10002709 Archaeal 1010 2.633522868756432e-4 +SRR10002709 Viral 167 4.354438802795289e-5 +SRR10002709 Human 32038 0.008353743135566196 +SRR10002710 Filtered 420704 0.12400486818381314 +SRR10002710 Duplicate 172755 0.050920507062197265 +SRR10002710 Ribosomal 11874 0.00349992822700663 +SRR10002710 Unassigned 1903059 0.5609373346605196 +SRR10002710 Bacterial 596559 0.1758391176667381 +SRR10002710 Archaeal 3277 9.659141653950417e-4 +SRR10002710 Viral 283 8.341584034385012e-5 +SRR10002710 Human 284130 0.08374891419398633 +SRR10002711 Filtered 382448 0.17199032226149683 +SRR10002711 Duplicate 111861 0.05030490272793502 +SRR10002711 Ribosomal 6812 0.0030634179685743324 +SRR10002711 Unassigned 926704 0.41674716458451383 +SRR10002711 Bacterial 423945 0.19065189822185047 +SRR10002711 Archaeal 1018 4.578038009407913e-4 +SRR10002711 Viral 170 7.645053650288264e-5 +SRR10002711 Human 370702 0.16670803989818586 +SRR10002712 Filtered 487784 0.16104955721474837 +SRR10002712 Duplicate 174534 0.05762514436496255 +SRR10002712 Ribosomal 6750 0.002228618632836566 +SRR10002712 Unassigned 1420666 0.46905521757591007 +SRR10002712 Bacterial 553273 0.18267178027339043 +SRR10002712 Archaeal 1638 5.408114549016734e-4 +SRR10002712 Viral 212 6.999513335723733e-5 +SRR10002712 Human 383925 0.12675887534989314 +SRR10002713 Filtered 280363 0.07521067939443657 +SRR10002713 Duplicate 217141 0.05825063269542469 +SRR10002713 Ribosomal 11303 0.0030321629786930395 +SRR10002713 Unassigned 2017160 0.5411269463063303 +SRR10002713 Bacterial 713148 0.19131035689011622 +SRR10002713 Archaeal 3877 0.001040050948278591 +SRR10002713 Viral 367 9.845207583653414e-5 +SRR10002713 Human 484343 0.12993071871088407 +SRR10002714 Filtered 498998 0.0801438032932329 +SRR10002714 Duplicate 346492 0.05564989577248577 +SRR10002714 Ribosomal 15231 0.002446242806502692 +SRR10002714 Unassigned 2892877 0.4646234358444677 +SRR10002714 Bacterial 1260539 0.20245449813315586 +SRR10002714 Archaeal 2954 4.744403683546026e-4 +SRR10002714 Viral 896 1.439060832923913e-4 +SRR10002714 Human 1208296 0.19406377769850808 +SRR10002715 Filtered 437335 0.06659467160399304 +SRR10002715 Duplicate 405126 0.06169008409626325 +SRR10002715 Ribosomal 16510 0.0025140407883703 +SRR10002715 Unassigned 3010995 0.45849571432943864 +SRR10002715 Bacterial 1145462 0.17442387580425323 +SRR10002715 Archaeal 1515 2.3069483915087854e-4 +SRR10002715 Viral 761 1.1588037794971522e-4 +SRR10002715 Human 1549413 0.23593503816058098 +SRR10002716 Filtered 347743 0.05851385775705769 +SRR10002716 Duplicate 369782 0.06222230598206235 +SRR10002716 Ribosomal 19177 0.003226866537089446 +SRR10002716 Unassigned 3075922 0.5175778157426731 +SRR10002716 Bacterial 1055653 0.17763212913793008 +SRR10002716 Archaeal 1211 2.0377198604658284e-4 +SRR10002716 Viral 640 1.0769122301388359e-4 +SRR10002716 Human 1072789 0.1805155616341268 +SRR10002717 Filtered 318050 0.07346548265701817 +SRR10002717 Duplicate 212495 0.04908362753404521 +SRR10002717 Ribosomal 16105 0.003720049043204772 +SRR10002717 Unassigned 2440623 0.56375270139544 +SRR10002717 Bacterial 944083 0.21807109971163557 +SRR10002717 Archaeal 1137 2.6263245961650577e-4 +SRR10002717 Viral 355 8.200046012652556e-5 +SRR10002717 Human 396396 0.0915624067389133 +SRR10002718 Filtered 350261 0.07735019047093247 +SRR10002718 Duplicate 250823 0.055390713851929556 +SRR10002718 Ribosomal 10830 0.0023916524043504665 +SRR10002718 Unassigned 2162421 0.47754010931375257 +SRR10002718 Bacterial 879421 0.19420769612985148 +SRR10002718 Archaeal 4212 9.30160658090874e-4 +SRR10002718 Viral 577 1.27422293380445e-4 +SRR10002718 Human 869705 0.19206205487771214 +SRR10002719 Filtered 254946 0.07135885685208318 +SRR10002719 Duplicate 186570 0.052220556207562226 +SRR10002719 Ribosomal 14293 0.0040005810680960865 +SRR10002719 Unassigned 1942360 0.5436625371459536 +SRR10002719 Bacterial 743595 0.20813069889672633 +SRR10002719 Archaeal 931 2.6058496987318666e-4 +SRR10002719 Viral 307 8.592866353498207e-5 +SRR10002719 Human 429729 0.12028025619617037 +SRR10002720 Filtered 465297 0.08675327009545344 +SRR10002720 Duplicate 348380 0.06495443606095477 +SRR10002720 Ribosomal 16572 0.0030898011206215697 +SRR10002720 Unassigned 2830787 0.5277919891890521 +SRR10002720 Bacterial 799184 0.14900552852901452 +SRR10002720 Archaeal 835 1.5568331738589252e-4 +SRR10002720 Viral 370 6.898542207518591e-5 +SRR10002720 Human 902027 0.16818030626544248 +SRR10002721 Filtered 456021 0.08943006841673512 +SRR10002721 Duplicate 306485 0.06010463228382698 +SRR10002721 Ribosomal 19080 0.00374177001802835 +SRR10002721 Unassigned 2830148 0.555019021644806 +SRR10002721 Bacterial 1087562 0.2132812832466954 +SRR10002721 Archaeal 1513 2.967137336099001e-4 +SRR10002721 Viral 360 7.05994343024217e-5 +SRR10002721 Human 398022 0.0780559112219958 +SRR10002722 Filtered 378250 0.07983073562466364 +SRR10002722 Duplicate 225835 0.047663117461456475 +SRR10002722 Ribosomal 14878 0.0031400441100429494 +SRR10002722 Unassigned 2625478 0.5541145805852495 +SRR10002722 Bacterial 913143 0.19272142080769922 +SRR10002722 Archaeal 1720 3.6301087977375134e-4 +SRR10002722 Viral 399 8.42100819940272e-5 +SRR10002722 Human 578447 0.12208288044912044 +SRR10002723 Filtered 437569 0.11774502306362179 +SRR10002723 Duplicate 217354 0.05848757965708369 +SRR10002723 Ribosomal 16059 0.004321300927119386 +SRR10002723 Unassigned 2196688 0.5911046697174188 +SRR10002723 Bacterial 460580 0.12393703101143574 +SRR10002723 Archaeal 2813 7.569474754335159e-4 +SRR10002723 Viral 241 6.485045914663254e-5 +SRR10002723 Human 384938 0.1035825976887404 +SRR10002724 Filtered 368965 0.06855887285210598 +SRR10002724 Duplicate 481425 0.08945551844436496 +SRR10002724 Ribosomal 14078 0.0026158898866069894 +SRR10002724 Unassigned 1573548 0.292387292178623 +SRR10002724 Bacterial 1619945 0.30100850563713305 +SRR10002724 Archaeal 1945 3.614082845184397e-4 +SRR10002724 Viral 1044 1.9398984526336816e-4 +SRR10002724 Human 1320775 0.2454185228713842 +SRR10002725 Filtered 426227 0.07611574288866484 +SRR10002725 Duplicate 535631 0.09565314135237428 +SRR10002725 Ribosomal 10931 0.001952061191609155 +SRR10002725 Unassigned 2139243 0.3820266434655149 +SRR10002725 Bacterial 1352230 0.2414816306952381 +SRR10002725 Archaeal 2953 5.27347607613378e-4 +SRR10002725 Viral 602 1.0750533687208044e-4 +SRR10002725 Human 1131905 0.20213592746211329 +SRR10002726 Filtered 207528 0.06327868012689399 +SRR10002726 Duplicate 327545 0.09987382561468088 +SRR10002726 Ribosomal 10385 0.0031665562869482387 +SRR10002726 Unassigned 1586605 0.4837818042998084 +SRR10002726 Bacterial 697718 0.21274562536513733 +SRR10002726 Archaeal 2010 6.128818619899817e-4 +SRR10002726 Viral 323 9.848798080734531e-5 +SRR10002726 Human 447474 0.13644213846373385 +SRR10002727 Filtered 291998 0.06405569197951433 +SRR10002727 Duplicate 403089 0.08842575916386568 +SRR10002727 Ribosomal 10924 0.0023964012739272683 +SRR10002727 Unassigned 1123673 0.2465004951187912 +SRR10002727 Bacterial 1179170 0.25867488925089865 +SRR10002727 Archaeal 1809 3.9684089202988174e-4 +SRR10002727 Viral 696 1.5268173623703577e-4 +SRR10002727 Human 1547143 0.33939724058473597 +SRR10002728 Filtered 419722 0.07103260457509261 +SRR10002728 Duplicate 536630 0.09081779509563936 +SRR10002728 Ribosomal 17363 0.0029384666832744837 +SRR10002728 Unassigned 1835525 0.31063923623897927 +SRR10002728 Bacterial 1794132 0.3036339980070619 +SRR10002728 Archaeal 1966 3.327204687736932e-4 +SRR10002728 Viral 1174 1.9868455256374153e-4 +SRR10002728 Human 1302352 0.2204064943786149 +SRR10002729 Filtered 235486 0.05299765108548118 +SRR10002729 Duplicate 447043 0.10060992557607146 +SRR10002729 Ribosomal 11223 0.002525808914892415 +SRR10002729 Unassigned 1500844 0.3377746729985558 +SRR10002729 Bacterial 1174115 0.2642421931844345 +SRR10002729 Archaeal 928 2.0885241673529014e-4 +SRR10002729 Viral 849 1.9107295453476436e-4 +SRR10002729 Human 1072841 0.24144982286929462 +SRR10002730 Filtered 327503 0.06305212216097185 +SRR10002730 Duplicate 503820 0.09699734105379443 +SRR10002730 Ribosomal 12906 0.002484712166329782 +SRR10002730 Unassigned 1616778 0.31126824475858766 +SRR10002730 Bacterial 1173157 0.22586064395745764 +SRR10002730 Archaeal 1094 2.1062103749920825e-4 +SRR10002730 Viral 1187 2.2852575092464369e-4 +SRR10002730 Human 1557718 0.2998977891144348 +SRR10002731 Filtered 411257 0.07107422476838839 +SRR10002731 Duplicate 522396 0.09028148024740495 +SRR10002731 Ribosomal 17963 0.0031044001670842334 +SRR10002731 Unassigned 2942831 0.5085857066247654 +SRR10002731 Bacterial 1247493 0.21559413670525032 +SRR10002731 Archaeal 2331 4.028478978719227e-4 +SRR10002731 Viral 424 7.327649450780576e-5 +SRR10002731 Human 641608 0.11088392709472698 +SRR10002732 Filtered 358858 0.06816789081831179 +SRR10002732 Duplicate 470287 0.08933470305600374 +SRR10002732 Ribosomal 12791 0.0024297507411205157 +SRR10002732 Unassigned 1633198 0.3102387656083609 +SRR10002732 Bacterial 1397846 0.2655318078705612 +SRR10002732 Archaeal 2078 3.94732393092677e-4 +SRR10002732 Viral 1390 2.640413986519832e-4 +SRR10002732 Human 1387878 0.2636383081138972 +SRR10002733 Filtered 427820 0.0811492072199312 +SRR10002733 Duplicate 491323 0.09319450221803155 +SRR10002733 Ribosomal 11279 0.0021394088827862276 +SRR10002733 Unassigned 1675356 0.317782738560972 +SRR10002733 Bacterial 1440395 0.2732151660360731 +SRR10002733 Archaeal 1404 2.663117360964504e-4 +SRR10002733 Viral 884 1.676777597644317e-4 +SRR10002733 Human 1223556 0.23208498758634505 +SRR10002734 Filtered 296175 0.061768367838529445 +SRR10002734 Duplicate 492109 0.10263111244585427 +SRR10002734 Ribosomal 9584 0.001998777875797978 +SRR10002734 Unassigned 2143894 0.44711685050668104 +SRR10002734 Bacterial 1184687 0.2470707601570826 +SRR10002734 Archaeal 6058 0.0012634178184040227 +SRR10002734 Viral 903 1.8832391713747646e-4 +SRR10002734 Human 661520 0.1379623894405132 +SRR10002735 Filtered 309462 0.06397132740369996 +SRR10002735 Duplicate 435139 0.08995100993052005 +SRR10002735 Ribosomal 11888 0.002457462112230856 +SRR10002735 Unassigned 1309685 0.2707353016871693 +SRR10002735 Bacterial 1433050 0.2962370524842217 +SRR10002735 Archaeal 1327 2.743146217135217e-4 +SRR10002735 Viral 1040 2.1498659124496048e-4 +SRR10002735 Human 1335920 0.27615854516919963 +SRR10002736 Filtered 317364 0.09135021959353463 +SRR10002736 Duplicate 206794 0.059523693016931355 +SRR10002736 Ribosomal 10094 0.002905462234459922 +SRR10002736 Unassigned 1624681 0.46764902799133945 +SRR10002736 Bacterial 433816 0.12486982412368391 +SRR10002736 Archaeal 1267 3.6469394205079466e-4 +SRR10002736 Viral 351 1.0103202340949402e-4 +SRR10002736 Human 879779 0.2532360470745904 +SRR10002737 Filtered 548466 0.08965721521321432 +SRR10002737 Duplicate 396457 0.06480844860353296 +SRR10002737 Ribosomal 13311 0.0021759365060060164 +SRR10002737 Unassigned 3257266 0.5324621740794976 +SRR10002737 Bacterial 1548417 0.2531182538367003 +SRR10002737 Archaeal 3820 6.244517656782347e-4 +SRR10002737 Viral 421 6.882046946349131e-5 +SRR10002737 Human 349208 0.057084699525907064 +SRR10002738 Filtered 458654 0.12145103393343805 +SRR10002738 Duplicate 204551 0.054164861621437264 +SRR10002738 Ribosomal 11035 0.0029220548811424055 +SRR10002738 Unassigned 1705265 0.4515521447114911 +SRR10002738 Bacterial 615187 0.1629007862406301 +SRR10002738 Archaeal 2064 5.465447462327073e-4 +SRR10002738 Viral 283 7.493806355806985e-5 +SRR10002738 Human 779413 0.2063876358020703 +SRR10002739 Filtered 705090 0.15567673976357377 +SRR10002739 Duplicate 270251 0.059668687114901045 +SRR10002739 Ribosomal 13356 0.002948869699304048 +SRR10002739 Unassigned 2160897 0.477104199357369 +SRR10002739 Bacterial 626167 0.13825133969782255 +SRR10002739 Archaeal 1005 2.218938340671285e-4 +SRR10002739 Viral 403 8.897832351149532e-5 +SRR10002739 Human 752024 0.166039292209451 +SRR10002740 Filtered 356790 0.0926827418235321 +SRR10002740 Duplicate 252479 0.06558604773918429 +SRR10002740 Ribosomal 11878 0.0030855282025278575 +SRR10002740 Unassigned 1785842 0.4639051907946417 +SRR10002740 Bacterial 639440 0.166106259793266 +SRR10002740 Archaeal 1887 4.901828353401302e-4 +SRR10002740 Viral 1034 2.686004513734471e-4 +SRR10002740 Human 800234 0.2078754483601345 +SRR10002741 Filtered 266006 0.09372236397703643 +SRR10002741 Duplicate 168546 0.059384109978247036 +SRR10002741 Ribosomal 8877 0.003127649094472126 +SRR10002741 Unassigned 1520095 0.5355777571546251 +SRR10002741 Bacterial 466105 0.16422359819521576 +SRR10002741 Archaeal 3382 0.0011915860355418194 +SRR10002741 Viral 210 7.398967104192255e-5 +SRR10002741 Human 405013 0.1426989458938199 +SRR10002742 Filtered 476284 0.13575854177373367 +SRR10002742 Duplicate 174675 0.04978883037080173 +SRR10002742 Ribosomal 10229 0.0029156430276967562 +SRR10002742 Unassigned 1929036 0.5498465503544863 +SRR10002742 Bacterial 556236 0.15854781651715052 +SRR10002742 Archaeal 4445 0.0012669892714939955 +SRR10002742 Viral 231 6.584353694378245e-5 +SRR10002742 Human 357181 0.10180978514769333 +SRR10002743 Filtered 427623 0.0774600203892211 +SRR10002743 Duplicate 277697 0.05030228795463652 +SRR10002743 Ribosomal 9163 0.0016597941804496787 +SRR10002743 Unassigned 1982908 0.3591857643530625 +SRR10002743 Bacterial 975239 0.1766556822817379 +SRR10002743 Archaeal 1208 2.1881822219613792e-4 +SRR10002743 Viral 291 5.271200551248025e-5 +SRR10002743 Human 1846435 0.3344649206131837 +SRR10002744 Filtered 546275 0.14597634129082876 +SRR10002744 Duplicate 192270 0.05137864837304955 +SRR10002744 Ribosomal 18130 0.004844723019729487 +SRR10002744 Unassigned 2383832 0.6370107978801866 +SRR10002744 Bacterial 391045 0.10449557160783877 +SRR10002744 Archaeal 435 1.1624128591187682e-4 +SRR10002744 Viral 423 1.1303462974879055e-4 +SRR10002744 Human 209806 0.05606464191270627 +SRR10002745 Filtered 286653 0.08429168233733551 +SRR10002745 Duplicate 207228 0.06093638213240875 +SRR10002745 Ribosomal 10559 0.0031049243294154457 +SRR10002745 Unassigned 1663402 0.48913129457318977 +SRR10002745 Bacterial 483155 0.14207403299353344 +SRR10002745 Archaeal 1017 2.9905370234070537e-4 +SRR10002745 Viral 298 8.762832182648004e-5 +SRR10002745 Human 748415 0.22007500160994986 +SRR10002746 Filtered 997779 0.2510736530928532 +SRR10002746 Duplicate 230192 0.057923795101670866 +SRR10002746 Ribosomal 14234 0.003581737416926666 +SRR10002746 Unassigned 2146482 0.5401246939833907 +SRR10002746 Bacterial 304384 0.07659291568875976 +SRR10002746 Archaeal 393 9.889158387327384e-5 +SRR10002746 Viral 464 1.1675749342798743e-4 +SRR10002746 Human 280121 0.07048755563909756 +SRR10002747 Filtered 554002 0.12873983214617357 +SRR10002747 Duplicate 221848 0.051553377572579724 +SRR10002747 Ribosomal 13585 0.003156903079241172 +SRR10002747 Unassigned 2272258 0.5280307896231422 +SRR10002747 Bacterial 610882 0.14195769354825216 +SRR10002747 Archaeal 874 2.0310145684628519e-4 +SRR10002747 Viral 324 7.529161558146042e-5 +SRR10002747 Human 629495 0.1462830109581834 +SRR10002748 Filtered 793430 0.17086512975697324 +SRR10002748 Duplicate 261688 0.05635450395856322 +SRR10002748 Ribosomal 12334 0.0026561265775462335 +SRR10002748 Unassigned 1752597 0.3774217181310034 +SRR10002748 Bacterial 721922 0.1554658838264417 +SRR10002748 Archaeal 704 1.5160638159498528e-4 +SRR10002748 Viral 601 1.2942533428776442e-4 +SRR10002748 Human 1100328 0.23695560603358942 +SRR10002749 Filtered 622582 0.138474859725093 +SRR10002749 Duplicate 252885 0.05624675127385652 +SRR10002749 Ribosomal 12074 0.002685502401805341 +SRR10002749 Unassigned 2002236 0.44533788197623975 +SRR10002749 Bacterial 722780 0.16076092645162038 +SRR10002749 Archaeal 1182 2.629007651924725e-4 +SRR10002749 Viral 529 1.1766032553876307e-4 +SRR10002749 Human 881725 0.1961135170806538 +SRR10002750 Filtered 443324 0.14529310879940902 +SRR10002750 Duplicate 164072 0.05377225448416201 +SRR10002750 Ribosomal 11771 0.0038577771193931383 +SRR10002750 Unassigned 1733779 0.5682213028871222 +SRR10002750 Bacterial 448480 0.14698291415388962 +SRR10002750 Archaeal 2498 8.186838199170895e-4 +SRR10002750 Viral 210 6.882450047341425e-5 +SRR10002750 Human 247105 0.08098513423563346 +SRR10002751 Filtered 783507 0.16034560667195485 +SRR10002751 Duplicate 257993 0.05279856351266504 +SRR10002751 Ribosomal 17286 0.003537599736736764 +SRR10002751 Unassigned 2359808 0.4829374152232621 +SRR10002751 Bacterial 751317 0.15375788623197126 +SRR10002751 Archaeal 1519 3.108650931449233e-4 +SRR10002751 Viral 775 1.5860463935965475e-4 +SRR10002751 Human 714159 0.1461534588909054 +SRR10002752 Filtered 444157 0.09893484391751305 +SRR10002752 Duplicate 266390 0.05933769606509928 +SRR10002752 Ribosomal 11689 0.0026036950685271423 +SRR10002752 Unassigned 2021604 0.45030715761098 +SRR10002752 Bacterial 797220 0.1775787306468653 +SRR10002752 Archaeal 3386 7.542229020474723e-4 +SRR10002752 Viral 450 1.0023635733058552e-4 +SRR10002752 Human 944493 0.21038341743163713 +SRR10002753 Filtered 755186 0.09941690221352835 +SRR10002753 Duplicate 486678 0.06406900966844664 +SRR10002753 Ribosomal 22191 0.002921347160858924 +SRR10002753 Unassigned 3420182 0.45025185774957405 +SRR10002753 Bacterial 1239333 0.16315271690815075 +SRR10002753 Archaeal 4396 5.787139885149759e-4 +SRR10002753 Viral 1262 1.6613672736712912e-4 +SRR10002753 Human 1666925 0.2194433155835592 +SRR10002754 Filtered 590984 0.1206077981276918 +SRR10002754 Duplicate 271505 0.055408640894946334 +SRR10002754 Ribosomal 14306 0.0029195632369315564 +SRR10002754 Unassigned 1955740 0.3991267024323027 +SRR10002754 Bacterial 721967 0.14733876076315988 +SRR10002754 Archaeal 1463 2.985685038187381e-4 +SRR10002754 Viral 504 1.0285613528683801e-4 +SRR10002754 Human 1343579 0.27419710990586216 +SRR10002755 Filtered 406652 0.09502080797081977 +SRR10002755 Duplicate 213513 0.0498907610740231 +SRR10002755 Ribosomal 15903 0.00371599281242917 +SRR10002755 Unassigned 2239275 0.5232427721217587 +SRR10002755 Bacterial 944796 0.22076684557705026 +SRR10002755 Archaeal 1151 2.6894974074740454e-4 +SRR10002755 Viral 376 8.78584730851643e-5 +SRR10002755 Human 457944 0.10700601223008639 +SRR10002756 Filtered 301404 0.06805109137690774 +SRR10002756 Duplicate 260294 0.05876926244794635 +SRR10002756 Ribosomal 12475 0.002816609484037783 +SRR10002756 Unassigned 2211703 0.499359009673332 +SRR10002756 Bacterial 898960 0.2029674758934353 +SRR10002756 Archaeal 1060 2.393271385234509e-4 +SRR10002756 Viral 519 1.1717998574874624e-4 +SRR10002756 Human 742669 0.16768004400006864 +SRR10002757 Filtered 324496 0.0721417722239415 +SRR10002757 Duplicate 283059 0.06292952117726153 +SRR10002757 Ribosomal 16032 0.003564225421250894 +SRR10002757 Unassigned 2535315 0.5636498361950293 +SRR10002757 Bacterial 854154 0.18989504743407784 +SRR10002757 Archaeal 5570 0.0012383193361007658 +SRR10002757 Viral 330 7.336541847634699e-5 +SRR10002757 Human 479076 0.10650791279386185 +SRR10002758 Filtered 282708 0.0765140461685735 +SRR10002758 Duplicate 198981 0.05385359247233515 +SRR10002758 Ribosomal 10677 0.0028896970405572512 +SRR10002758 Unassigned 2077707 0.5623249760274501 +SRR10002758 Bacterial 686836 0.18589003995019016 +SRR10002758 Archaeal 983 2.660459109176527e-4 +SRR10002758 Viral 369 9.986870918475468e-5 +SRR10002758 Human 436590 0.11816173372079145 +SRR10002759 Filtered 388323 0.07982557408106328 +SRR10002759 Duplicate 244837 0.050329890532585735 +SRR10002759 Ribosomal 18819 0.0038685256310636504 +SRR10002759 Unassigned 3148637 0.6472492128920431 +SRR10002759 Bacterial 799568 0.16436310652948088 +SRR10002759 Archaeal 986 2.0268697976665918e-4 +SRR10002759 Viral 330 6.783641310648837e-5 +SRR10002759 Human 263144 0.054093166940890225 +SRR10002760 Filtered 303623 0.08522198279915043 +SRR10002760 Duplicate 206180 0.05787133522008819 +SRR10002760 Ribosomal 10142 0.0028466926074407527 +SRR10002760 Unassigned 2085863 0.5854674405673626 +SRR10002760 Bacterial 645370 0.1811447454214197 +SRR10002760 Archaeal 3326 9.335535015133053e-4 +SRR10002760 Viral 375 1.0525633285252241e-4 +SRR10002760 Human 307852 0.0864089935501726 +SRR10002761 Filtered 311465 0.0713299153723737 +SRR10002761 Duplicate 277631 0.0635814481073234 +SRR10002761 Ribosomal 11971 0.002741529279125056 +SRR10002761 Unassigned 2040250 0.4672462711331464 +SRR10002761 Bacterial 791385 0.1812384218996226 +SRR10002761 Archaeal 1668 3.8199572613654607e-4 +SRR10002761 Viral 356 8.15290638516849e-5 +SRR10002761 Human 931815 0.21339888941842067 +SRR10002762 Filtered 385868 0.0817836806513734 +SRR10002762 Duplicate 313857 0.06652114365067355 +SRR10002762 Ribosomal 17627 0.0037359950523022352 +SRR10002762 Unassigned 2587396 0.5483915955265555 +SRR10002762 Bacterial 851926 0.18056341526792047 +SRR10002762 Archaeal 2396 5.078257301478502e-4 +SRR10002762 Viral 401 8.499086719085473e-5 +SRR10002762 Human 558683 0.11841135325383614 +SRR10002763 Filtered 276976 0.08285659242631108 +SRR10002763 Duplicate 161871 0.04842325498468965 +SRR10002763 Ribosomal 11149 0.003335192034547911 +SRR10002763 Unassigned 1738032 0.519927390993755 +SRR10002763 Bacterial 622256 0.1861461345994838 +SRR10002763 Archaeal 932 2.7880518218662237e-4 +SRR10002763 Viral 408 1.2205205400444414e-4 +SRR10002763 Human 531212 0.1589105777250215 +SRR10002764 Filtered 452427 0.08035837982734281 +SRR10002764 Duplicate 314471 0.05585515467176875 +SRR10002764 Ribosomal 21213 0.0037677731684391583 +SRR10002764 Unassigned 3159574 0.5611916344174791 +SRR10002764 Bacterial 1137818 0.20209494795489116 +SRR10002764 Archaeal 2729 4.847147021482328e-4 +SRR10002764 Viral 552 9.804416107945201e-5 +SRR10002764 Human 541332 0.09614935109685128 +SRR10002765 Filtered 252457 0.05315553410724129 +SRR10002765 Duplicate 451875 0.09514355702044174 +SRR10002765 Ribosomal 10813 0.0022767076781455855 +SRR10002765 Unassigned 1303511 0.27445792122881996 +SRR10002765 Bacterial 1000950 0.21075284846386977 +SRR10002765 Archaeal 1012 2.1307945716113314e-4 +SRR10002765 Viral 780 1.6423120215976664e-4 +SRR10002765 Human 1728004 0.36383612084216077 +SRR10002766 Filtered 263413 0.057221898786972994 +SRR10002766 Duplicate 419430 0.09111388203399257 +SRR10002766 Ribosomal 9147 0.001987026867331688 +SRR10002766 Unassigned 1125780 0.2445561502902228 +SRR10002766 Bacterial 1086177 0.23595308644120816 +SRR10002766 Archaeal 921 2.000712523026659e-4 +SRR10002766 Viral 782 1.6987591672169893e-4 +SRR10002766 Human 1697710 0.36879800841124744 +SRR10002767 Filtered 378056 0.07128723027531274 +SRR10002767 Duplicate 520504 0.09814759852302671 +SRR10002767 Ribosomal 24337 0.004589048509242774 +SRR10002767 Unassigned 2887437 0.5444626889256041 +SRR10002767 Bacterial 997924 0.18817116507940937 +SRR10002767 Archaeal 773 1.4575890609543757e-4 +SRR10002767 Viral 954 1.798887405110575e-4 +SRR10002767 Human 493293 0.09301662104079779 +SRR10002768 Filtered 296444 0.06300947148553813 +SRR10002768 Duplicate 403852 0.08583915032308816 +SRR10002768 Ribosomal 8886 0.001888728271175979 +SRR10002768 Unassigned 1107756 0.23545465617429864 +SRR10002768 Bacterial 633469 0.1346444754910619 +SRR10002768 Archaeal 426 9.054673008338589e-5 +SRR10002768 Viral 404 8.587060787250681e-5 +SRR10002768 Human 2253516 0.4789871009168813 +SRR10002769 Filtered 186340 0.057426188671872234 +SRR10002769 Duplicate 274868 0.08470871325458934 +SRR10002769 Ribosomal 3884 0.001196969608251324 +SRR10002769 Unassigned 450698 0.138895934217213 +SRR10002769 Bacterial 305962 0.09429125007203698 +SRR10002769 Archaeal 217 6.687497553824339e-5 +SRR10002769 Viral 196 6.040320371196177e-5 +SRR10002769 Human 2022696 0.6233536659967869 +SRR10002770 Filtered 241902 0.047577295256077415 +SRR10002770 Duplicate 533023 0.10483498544567697 +SRR10002770 Ribosomal 7404 0.0014562190228935568 +SRR10002770 Unassigned 727159 0.14301766186767367 +SRR10002770 Bacterial 533484 0.10492565494453623 +SRR10002770 Archaeal 296 5.821729210919676e-5 +SRR10002770 Viral 268 5.27102509637322e-5 +SRR10002770 Human 3040864 0.5980772559200692 +SRR10002771 Filtered 336581 0.059911722231725494 +SRR10002771 Duplicate 525437 0.09352826093650904 +SRR10002771 Ribosomal 12235 0.002177841059076898 +SRR10002771 Unassigned 1291404 0.22987107928534062 +SRR10002771 Bacterial 1287080 0.22910140337692636 +SRR10002771 Archaeal 892 1.5877680626862222e-4 +SRR10002771 Viral 2336 4.1581011148374614e-4 +SRR10002771 Human 2161984 0.38483510619266925 +SRR10002772 Filtered 425667 0.07019251587863949 +SRR10002772 Duplicate 591014 0.0974582468913452 +SRR10002772 Ribosomal 12275 0.0020241482952878653 +SRR10002772 Unassigned 1693952 0.279332794549855 +SRR10002772 Bacterial 1379812 0.22753108819696455 +SRR10002772 Archaeal 1463 2.4124879478665148e-4 +SRR10002772 Viral 901 1.4857495837510115e-4 +SRR10002772 Human 1959195 0.32307138243474615 +SRR10002773 Filtered 323993 0.057220803249081534 +SRR10002773 Duplicate 531750 0.09391302320636281 +SRR10002773 Ribosomal 14936 0.002637865377734339 +SRR10002773 Unassigned 2128249 0.3758726802556059 +SRR10002773 Bacterial 1146796 0.20253705568587502 +SRR10002773 Archaeal 1233 2.1776165042490896e-4 +SRR10002773 Viral 1406 2.483153937529781e-4 +SRR10002773 Human 1513791 0.2673524951811625 +SRR10002774 Filtered 337651 0.05724844752300528 +SRR10002774 Duplicate 564630 0.09573254906668267 +SRR10002774 Ribosomal 14818 0.0025123796328039667 +SRR10002774 Unassigned 2982699 0.5057141461995384 +SRR10002774 Bacterial 1078791 0.18290812096451778 +SRR10002774 Archaeal 1279 2.1685339116994694e-4 +SRR10002774 Viral 939 1.592066726415795e-4 +SRR10002774 Human 917187 0.15550829654964043 diff --git a/data/2024-04-12_leung/2/adapters.fasta b/data/2024-04-12_leung/2/adapters.fasta new file mode 100644 index 0000000..ebb5304 --- /dev/null +++ b/data/2024-04-12_leung/2/adapters.fasta @@ -0,0 +1,41 @@ +>0 +GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT +>1 +AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT +>2 +CGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT +>3 +GATCGGAAGAGCGGTTCAGCAGGAATGCCGAG +>4 +GATCGGAAGAGCACACGTCTGAACTCCAGTCAC +>5 +GTGACTGGAGTTCAGACGTGTGCTCTTCCGATC +>6 +CTGTCTCTTATACACATCTCCGAGCCCACGAGAC +>7 +GATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG +>8 +unspecified +>9 +TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG +>10 +AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT +>11 +heifigepsna +>12 +AGATCGGAAGAGCACACGTCTGAACTCCAGTCA +>13 +GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG +>14 +TGACTGGAGTTCAGACGTGTGCTCTTCCGATCT +>15 +ACACTCTTTCCCTACACGACGCTCTTCCGATCT +>16 +CAAGCAGAAGACGGCATACGAGAT +>17 +CAAGCAGAAGACGGCATACGAGCTCTTCCGATCT +>18 +CAAGCAGAAGACGGCATACGAGATCGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATC +T +>19 +CTGTCTCTTATACACATCTGACGCTGCCGACGA diff --git a/data/2024-04-12_leung/2/bracken_counts.tsv b/data/2024-04-12_leung/2/bracken_counts.tsv new file mode 100644 index 0000000..ab634a2 --- /dev/null +++ b/data/2024-04-12_leung/2/bracken_counts.tsv @@ -0,0 +1,401 @@ +name taxonomy_id taxonomy_lvl kraken_assigned_reads added_reads new_est_reads fraction_total_reads sample +Eukaryota 2759 D 1107392 1302 1108694 0.51895 SRR10002824 +Bacteria 2 D 1022579 424 1023003 0.47884 SRR10002824 +Archaea 2157 D 3633 1 3634 0.0017 SRR10002824 +Viruses 10239 D 1022 48 1070 5e-4 SRR10002824 +Bacteria 2 D 1029761 189 1029950 0.53563 SRR10002789 +Eukaryota 2759 D 891036 672 891708 0.46374 SRR10002789 +Archaea 2157 D 812 0 812 4.2e-4 SRR10002789 +Viruses 10239 D 373 14 387 2e-4 SRR10002789 +Bacteria 2 D 1187268 451 1187719 0.52052 SRR10002826 +Eukaryota 2759 D 1088894 1492 1090386 0.47786 SRR10002826 +Archaea 2157 D 2949 0 2949 0.00129 SRR10002826 +Viruses 10239 D 712 22 734 3.2e-4 SRR10002826 +Eukaryota 2759 D 569699 799 570498 0.5073 SRR10002857 +Bacteria 2 D 552656 140 552796 0.49156 SRR10002857 +Archaea 2157 D 908 0 908 8.1e-4 SRR10002857 +Viruses 10239 D 368 0 368 3.3e-4 SRR10002857 +Bacteria 2 D 1308914 519 1309433 0.63179 SRR10002783 +Eukaryota 2759 D 759698 1140 760838 0.3671 SRR10002783 +Archaea 2157 D 1763 0 1763 8.5e-4 SRR10002783 +Viruses 10239 D 516 13 529 2.6e-4 SRR10002783 +Eukaryota 2759 D 1685363 816 1686179 0.69973 SRR10002836 +Bacteria 2 D 722099 217 722316 0.29975 SRR10002836 +Archaea 2157 D 565 0 565 2.3e-4 SRR10002836 +Viruses 10239 D 679 25 704 2.9e-4 SRR10002836 +Bacteria 2 D 542655 135 542790 0.62234 SRR10002803 +Eukaryota 2759 D 327837 748 328585 0.37674 SRR10002803 +Archaea 2157 D 630 0 630 7.2e-4 SRR10002803 +Viruses 10239 D 166 6 172 2e-4 SRR10002803 +Eukaryota 2759 D 1642719 909 1643628 0.64572 SRR10002784 +Bacteria 2 D 900069 176 900245 0.35367 SRR10002784 +Archaea 2157 D 1081 0 1081 4.2e-4 SRR10002784 +Viruses 10239 D 446 3 449 1.8e-4 SRR10002784 +Eukaryota 2759 D 860253 728 860981 0.58358 SRR10002830 +Bacteria 2 D 612998 220 613218 0.41564 SRR10002830 +Archaea 2157 D 604 0 604 4.1e-4 SRR10002830 +Viruses 10239 D 538 6 544 3.7e-4 SRR10002830 +Eukaryota 2759 D 321847 420 322267 0.60782 SRR10002845 +Bacteria 2 D 206610 75 206685 0.38983 SRR10002845 +Archaea 2157 D 1107 0 1107 0.00209 SRR10002845 +Viruses 10239 D 137 1 138 2.6e-4 SRR10002845 +Eukaryota 2759 D 782195 977 783172 0.5379 SRR10002853 +Bacteria 2 D 669623 269 669892 0.4601 SRR10002853 +Archaea 2157 D 2381 0 2381 0.00164 SRR10002853 +Viruses 10239 D 512 23 535 3.7e-4 SRR10002853 +Eukaryota 2759 D 281916 459 282375 0.5755 SRR10002850 +Bacteria 2 D 207666 79 207745 0.4234 SRR10002850 +Archaea 2157 D 431 0 431 8.8e-4 SRR10002850 +Viruses 10239 D 109 1 110 2.2e-4 SRR10002850 +Eukaryota 2759 D 1054730 940 1055670 0.7078 SRR10002844 +Bacteria 2 D 434696 151 434847 0.29155 SRR10002844 +Archaea 2157 D 415 0 415 2.8e-4 SRR10002844 +Viruses 10239 D 532 18 550 3.7e-4 SRR10002844 +Eukaryota 2759 D 1410733 1151 1411884 0.63756 SRR10002829 +Bacteria 2 D 800624 325 800949 0.36168 SRR10002829 +Archaea 2157 D 899 0 899 4.1e-4 SRR10002829 +Viruses 10239 D 776 12 788 3.6e-4 SRR10002829 +Bacteria 2 D 103143 40 103183 0.90275 SRR10002817 +Eukaryota 2759 D 11055 17 11072 0.09687 SRR10002817 +Archaea 2157 D 2 0 2 2e-5 SRR10002817 +Viruses 10239 D 15 25 40 3.5e-4 SRR10002817 +Eukaryota 2759 D 1305557 1492 1307049 0.51599 SRR10002825 +Bacteria 2 D 1222579 480 1223059 0.48283 SRR10002825 +Archaea 2157 D 1853 0 1853 7.3e-4 SRR10002825 +Viruses 10239 D 1134 14 1148 4.5e-4 SRR10002825 +Bacteria 2 D 263328 197 263525 0.89614 SRR10002812 +Eukaryota 2759 D 30271 35 30306 0.10306 SRR10002812 +Archaea 2157 D 20 0 20 7e-5 SRR10002812 +Viruses 10239 D 63 150 213 7.2e-4 SRR10002812 +Bacteria 2 D 804873 275 805148 0.56094 SRR10002869 +Eukaryota 2759 D 625378 875 626253 0.43631 SRR10002869 +Archaea 2157 D 3390 1 3391 0.00236 SRR10002869 +Viruses 10239 D 548 9 557 3.9e-4 SRR10002869 +Bacteria 2 D 1096322 470 1096792 0.62977 SRR10002827 +Eukaryota 2759 D 640314 1534 641848 0.36855 SRR10002827 +Archaea 2157 D 2234 1 2235 0.00128 SRR10002827 +Viruses 10239 D 680 12 692 4e-4 SRR10002827 +Eukaryota 2759 D 3205656 2118 3207774 0.79493 SRR10002823 +Bacteria 2 D 825507 317 825824 0.20465 SRR10002823 +Archaea 2157 D 643 0 643 1.6e-4 SRR10002823 +Viruses 10239 D 1049 13 1062 2.6e-4 SRR10002823 +Bacteria 2 D 608912 128 609040 0.51621 SRR10002808 +Eukaryota 2759 D 569102 577 569679 0.48285 SRR10002808 +Archaea 2157 D 690 0 690 5.8e-4 SRR10002808 +Viruses 10239 D 414 4 418 3.5e-4 SRR10002808 +Eukaryota 2759 D 923198 1129 924327 0.50134 SRR10002861 +Bacteria 2 D 917090 393 917483 0.49763 SRR10002861 +Archaea 2157 D 1213 0 1213 6.6e-4 SRR10002861 +Viruses 10239 D 685 5 690 3.7e-4 SRR10002861 +Bacteria 2 D 1057109 381 1057490 0.56613 SRR10002865 +Eukaryota 2759 D 806173 1081 807254 0.43216 SRR10002865 +Archaea 2157 D 2412 0 2412 0.00129 SRR10002865 +Viruses 10239 D 774 11 785 4.2e-4 SRR10002865 +Eukaryota 2759 D 1499683 778 1500461 0.63711 SRR10002847 +Bacteria 2 D 852610 268 852878 0.36214 SRR10002847 +Archaea 2157 D 798 0 798 3.4e-4 SRR10002847 +Viruses 10239 D 953 4 957 4.1e-4 SRR10002847 +Bacteria 2 D 1394599 560 1395159 0.75942 SRR10002828 +Eukaryota 2759 D 435562 1148 436710 0.23771 SRR10002828 +Archaea 2157 D 4702 2 4704 0.00256 SRR10002828 +Viruses 10239 D 532 20 552 3e-4 SRR10002828 +Eukaryota 2759 D 1028832 982 1029814 0.59746 SRR10002866 +Bacteria 2 D 691960 352 692312 0.40165 SRR10002866 +Archaea 2157 D 646 0 646 3.7e-4 SRR10002866 +Viruses 10239 D 861 14 875 5.1e-4 SRR10002866 +Eukaryota 2759 D 1179559 647 1180206 0.62655 SRR10002795 +Bacteria 2 D 702077 178 702255 0.37282 SRR10002795 +Archaea 2157 D 552 0 552 2.9e-4 SRR10002795 +Viruses 10239 D 621 8 629 3.3e-4 SRR10002795 +Bacteria 2 D 942221 389 942610 0.51156 SRR10002860 +Eukaryota 2759 D 896760 1135 897895 0.4873 SRR10002860 +Archaea 2157 D 1372 0 1372 7.4e-4 SRR10002860 +Viruses 10239 D 707 22 729 4e-4 SRR10002860 +Bacteria 2 D 1329143 292 1329435 0.55402 SRR10002841 +Eukaryota 2759 D 1066073 1152 1067225 0.44475 SRR10002841 +Archaea 2157 D 1276 0 1276 5.3e-4 SRR10002841 +Viruses 10239 D 1671 7 1678 7e-4 SRR10002841 +Eukaryota 2759 D 1372193 502 1372695 0.75965 SRR10002859 +Bacteria 2 D 433544 109 433653 0.23999 SRR10002859 +Archaea 2157 D 287 0 287 1.6e-4 SRR10002859 +Viruses 10239 D 345 18 363 2e-4 SRR10002859 +Eukaryota 2759 D 802138 732 802870 0.59809 SRR10002799 +Bacteria 2 D 538298 166 538464 0.40112 SRR10002799 +Archaea 2157 D 655 0 655 4.9e-4 SRR10002799 +Viruses 10239 D 389 12 401 3e-4 SRR10002799 +Bacteria 2 D 43529 32 43561 0.57227 SRR10002815 +Eukaryota 2759 D 32496 12 32508 0.42707 SRR10002815 +Archaea 2157 D 6 0 6 8e-5 SRR10002815 +Viruses 10239 D 27 15 42 5.5e-4 SRR10002815 +Eukaryota 2759 D 554906 599 555505 0.66454 SRR10002843 +Bacteria 2 D 279781 87 279868 0.3348 SRR10002843 +Archaea 2157 D 317 0 317 3.8e-4 SRR10002843 +Viruses 10239 D 232 3 235 2.8e-4 SRR10002843 +Eukaryota 2759 D 1513576 882 1514458 0.68234 SRR10002838 +Bacteria 2 D 703441 216 703657 0.31703 SRR10002838 +Archaea 2157 D 513 0 513 2.3e-4 SRR10002838 +Viruses 10239 D 872 16 888 4e-4 SRR10002838 +Bacteria 2 D 1500019 577 1500596 0.66544 SRR10002780 +Eukaryota 2759 D 747265 1466 748731 0.33203 SRR10002780 +Archaea 2157 D 4702 2 4704 0.00209 SRR10002780 +Viruses 10239 D 976 32 1008 4.5e-4 SRR10002780 +Bacteria 2 D 927058 169 927227 0.70168 SRR10002800 +Eukaryota 2759 D 391974 726 392700 0.29718 SRR10002800 +Archaea 2157 D 1206 0 1206 9.1e-4 SRR10002800 +Viruses 10239 D 298 2 300 2.3e-4 SRR10002800 +Bacteria 2 D 491683 1321 493004 0.72346 SRR10002804 +Eukaryota 2759 D 186633 768 187401 0.275 SRR10002804 +Archaea 2157 D 812 0 812 0.00119 SRR10002804 +Viruses 10239 D 228 5 233 3.4e-4 SRR10002804 +Bacteria 2 D 899956 308 900264 0.57387 SRR10002868 +Eukaryota 2759 D 665392 897 666289 0.42473 SRR10002868 +Archaea 2157 D 1570 0 1570 0.001 SRR10002868 +Viruses 10239 D 612 12 624 4e-4 SRR10002868 +Bacteria 2 D 1172617 322 1172939 0.50964 SRR10002781 +Eukaryota 2759 D 1125450 1496 1126946 0.48966 SRR10002781 +Archaea 2157 D 1106 0 1106 4.8e-4 SRR10002781 +Viruses 10239 D 500 13 513 2.2e-4 SRR10002781 +Bacteria 2 D 1869057 410 1869467 0.54041 SRR10002818 +Eukaryota 2759 D 1585255 1344 1586599 0.45864 SRR10002818 +Archaea 2157 D 1981 0 1981 5.7e-4 SRR10002818 +Viruses 10239 D 1274 16 1290 3.7e-4 SRR10002818 +Bacteria 2 D 743861 323 744184 0.52455 SRR10002867 +Eukaryota 2759 D 672066 842 672908 0.47431 SRR10002867 +Archaea 2157 D 969 0 969 6.8e-4 SRR10002867 +Viruses 10239 D 615 21 636 4.5e-4 SRR10002867 +Eukaryota 2759 D 1280920 1300 1282220 0.58161 SRR10002833 +Bacteria 2 D 919269 457 919726 0.41718 SRR10002833 +Archaea 2157 D 1655 0 1655 7.5e-4 SRR10002833 +Viruses 10239 D 973 29 1002 4.5e-4 SRR10002833 +Bacteria 2 D 637233 143 637376 0.51723 SRR10002805 +Eukaryota 2759 D 592745 775 593520 0.48164 SRR10002805 +Archaea 2157 D 907 0 907 7.4e-4 SRR10002805 +Viruses 10239 D 478 1 479 3.9e-4 SRR10002805 +Eukaryota 2759 D 886584 962 887546 0.53659 SRR10002786 +Bacteria 2 D 764141 280 764421 0.46215 SRR10002786 +Archaea 2157 D 1426 0 1426 8.6e-4 SRR10002786 +Viruses 10239 D 645 19 664 4e-4 SRR10002786 +Eukaryota 2759 D 1790474 978 1791452 0.61297 SRR10002791 +Bacteria 2 D 1128757 272 1129029 0.38631 SRR10002791 +Archaea 2157 D 900 0 900 3.1e-4 SRR10002791 +Viruses 10239 D 1186 14 1200 4.1e-4 SRR10002791 +Bacteria 2 D 581929 261 582190 0.52145 SRR10002855 +Eukaryota 2759 D 531845 728 532573 0.47701 SRR10002855 +Archaea 2157 D 1454 0 1454 0.0013 SRR10002855 +Viruses 10239 D 245 24 269 2.4e-4 SRR10002855 +Eukaryota 2759 D 1317723 918 1318641 0.51452 SRR10002797 +Bacteria 2 D 1238637 279 1238916 0.48341 SRR10002797 +Archaea 2157 D 4462 1 4463 0.00174 SRR10002797 +Viruses 10239 D 823 3 826 3.2e-4 SRR10002797 +Bacteria 2 D 781718 265 781983 0.53047 SRR10002778 +Eukaryota 2759 D 689969 996 690965 0.46872 SRR10002778 +Archaea 2157 D 799 0 799 5.4e-4 SRR10002778 +Viruses 10239 D 382 7 389 2.6e-4 SRR10002778 +Eukaryota 2759 D 1728921 1091 1730012 0.57707 SRR10002870 +Bacteria 2 D 1256778 391 1257169 0.41935 SRR10002870 +Archaea 2157 D 9398 2 9400 0.00314 SRR10002870 +Viruses 10239 D 1315 11 1326 4.4e-4 SRR10002870 +Eukaryota 2759 D 661193 668 661861 0.55717 SRR10002802 +Bacteria 2 D 524891 129 525020 0.44198 SRR10002802 +Archaea 2157 D 737 0 737 6.2e-4 SRR10002802 +Viruses 10239 D 263 6 269 2.3e-4 SRR10002802 +Eukaryota 2759 D 1309775 1398 1311173 0.51975 SRR10002776 +Bacteria 2 D 1208540 419 1208959 0.47923 SRR10002776 +Archaea 2157 D 1609 0 1609 6.4e-4 SRR10002776 +Viruses 10239 D 949 31 980 3.9e-4 SRR10002776 +Bacteria 2 D 1307315 380 1307695 0.69098 SRR10002779 +Eukaryota 2759 D 580118 1111 581229 0.30712 SRR10002779 +Archaea 2157 D 2976 1 2977 0.00157 SRR10002779 +Viruses 10239 D 563 51 614 3.2e-4 SRR10002779 +Bacteria 2 D 534341 238 534579 0.65873 SRR10002872 +Eukaryota 2759 D 274744 833 275577 0.33958 SRR10002872 +Archaea 2157 D 1097 0 1097 0.00135 SRR10002872 +Viruses 10239 D 269 9 278 3.4e-4 SRR10002872 +Bacteria 2 D 630870 158 631028 0.64257 SRR10002801 +Eukaryota 2759 D 348552 923 349475 0.35586 SRR10002801 +Archaea 2157 D 1031 0 1031 0.00105 SRR10002801 +Viruses 10239 D 507 1 508 5.2e-4 SRR10002801 +Eukaryota 2759 D 2279389 772 2280161 0.77699 SRR10002794 +Bacteria 2 D 653129 129 653258 0.22261 SRR10002794 +Archaea 2157 D 498 0 498 1.7e-4 SRR10002794 +Viruses 10239 D 682 6 688 2.3e-4 SRR10002794 +Eukaryota 2759 D 2090547 1019 2091566 0.6574 SRR10002821 +Bacteria 2 D 1087608 186 1087794 0.3419 SRR10002821 +Archaea 2157 D 1447 0 1447 4.5e-4 SRR10002821 +Viruses 10239 D 763 2 765 2.4e-4 SRR10002821 +Eukaryota 2759 D 2909272 689 2909961 0.8735 SRR10002792 +Bacteria 2 D 420541 100 420641 0.12627 SRR10002792 +Archaea 2157 D 380 0 380 1.1e-4 SRR10002792 +Viruses 10239 D 378 1 379 1.1e-4 SRR10002792 +Bacteria 2 D 1604652 770 1605422 0.56517 SRR10002862 +Eukaryota 2759 D 1230588 1715 1232303 0.43382 SRR10002862 +Archaea 2157 D 1520 0 1520 5.4e-4 SRR10002862 +Viruses 10239 D 1302 37 1339 4.7e-4 SRR10002862 +Bacteria 2 D 693949 213 694162 0.58435 SRR10002777 +Eukaryota 2759 D 491570 701 492271 0.4144 SRR10002777 +Archaea 2157 D 1074 0 1074 9e-4 SRR10002777 +Viruses 10239 D 406 8 414 3.5e-4 SRR10002777 +Eukaryota 2759 D 1713749 1403 1715152 0.70909 SRR10002863 +Bacteria 2 D 699264 266 699530 0.28921 SRR10002863 +Archaea 2157 D 3743 0 3743 0.00155 SRR10002863 +Viruses 10239 D 351 14 365 1.5e-4 SRR10002863 +Bacteria 2 D 1488951 576 1489527 0.71249 SRR10002864 +Eukaryota 2759 D 593390 1314 594704 0.28447 SRR10002864 +Archaea 2157 D 5690 3 5693 0.00272 SRR10002864 +Viruses 10239 D 638 24 662 3.2e-4 SRR10002864 +Bacteria 2 D 319653 195 319848 0.91237 SRR10002813 +Eukaryota 2759 D 30363 25 30388 0.08668 SRR10002813 +Archaea 2157 D 19 0 19 5e-5 SRR10002813 +Viruses 10239 D 77 237 314 9e-4 SRR10002813 +Bacteria 2 D 1062182 433 1062615 0.53366 SRR10002822 +Eukaryota 2759 D 924735 1185 925920 0.46501 SRR10002822 +Archaea 2157 D 1742 0 1742 8.7e-4 SRR10002822 +Viruses 10239 D 888 18 906 4.6e-4 SRR10002822 +Eukaryota 2759 D 1045798 807 1046605 0.71666 SRR10002849 +Bacteria 2 D 412675 100 412775 0.28265 SRR10002849 +Archaea 2157 D 617 0 617 4.2e-4 SRR10002849 +Viruses 10239 D 366 35 401 2.7e-4 SRR10002849 +Bacteria 2 D 39019 25 39044 0.71601 SRR10002814 +Eukaryota 2759 D 15440 8 15448 0.28329 SRR10002814 +Archaea 2157 D 10 0 10 1.8e-4 SRR10002814 +Viruses 10239 D 20 6 26 4.8e-4 SRR10002814 +Eukaryota 2759 D 971514 680 972194 0.53659 SRR10002798 +Bacteria 2 D 837326 235 837561 0.46228 SRR10002798 +Archaea 2157 D 1233 0 1233 6.8e-4 SRR10002798 +Viruses 10239 D 797 6 803 4.4e-4 SRR10002798 +Eukaryota 2759 D 1493477 613 1494090 0.67283 SRR10002858 +Bacteria 2 D 724720 168 724888 0.32644 SRR10002858 +Archaea 2157 D 1106 0 1106 5e-4 SRR10002858 +Viruses 10239 D 530 3 533 2.4e-4 SRR10002858 +Eukaryota 2759 D 1503359 1040 1504399 0.55114 SRR10002810 +Bacteria 2 D 1222448 312 1222760 0.44796 SRR10002810 +Archaea 2157 D 1231 0 1231 4.5e-4 SRR10002810 +Viruses 10239 D 1213 6 1219 4.5e-4 SRR10002810 +Bacteria 2 D 878429 192 878621 0.63358 SRR10002806 +Eukaryota 2759 D 505211 856 506067 0.36493 SRR10002806 +Archaea 2157 D 1735 0 1735 0.00125 SRR10002806 +Viruses 10239 D 335 6 341 2.5e-4 SRR10002806 +Bacteria 2 D 126632 83 126715 0.82559 SRR10002816 +Eukaryota 2759 D 26619 35 26654 0.17366 SRR10002816 +Archaea 2157 D 12 0 12 8e-5 SRR10002816 +Viruses 10239 D 35 68 103 6.7e-4 SRR10002816 +Bacteria 2 D 511519 256 511775 0.83371 SRR10002831 +Eukaryota 2759 D 99618 631 100249 0.16331 SRR10002831 +Archaea 2157 D 1245 1 1246 0.00203 SRR10002831 +Viruses 10239 D 578 3 581 9.5e-4 SRR10002831 +Bacteria 2 D 1721905 295 1722200 0.57406 SRR10002788 +Eukaryota 2759 D 1274622 916 1275538 0.42517 SRR10002788 +Archaea 2157 D 1175 0 1175 3.9e-4 SRR10002788 +Viruses 10239 D 1108 12 1120 3.7e-4 SRR10002788 +Bacteria 2 D 376012 223 376235 0.71512 SRR10002871 +Eukaryota 2759 D 148322 565 148887 0.28299 SRR10002871 +Archaea 2157 D 832 0 832 0.00158 SRR10002871 +Viruses 10239 D 144 12 156 3e-4 SRR10002871 +Bacteria 2 D 1171812 269 1172081 0.68083 SRR10002807 +Eukaryota 2759 D 546962 864 547826 0.31822 SRR10002807 +Archaea 2157 D 1086 0 1086 6.3e-4 SRR10002807 +Viruses 10239 D 552 4 556 3.2e-4 SRR10002807 +Bacteria 2 D 425749 174 425923 0.59488 SRR10002840 +Eukaryota 2759 D 288328 536 288864 0.40345 SRR10002840 +Archaea 2157 D 460 0 460 6.4e-4 SRR10002840 +Viruses 10239 D 717 12 729 0.00102 SRR10002840 +Bacteria 2 D 799655 235 799890 0.65885 SRR10002854 +Eukaryota 2759 D 412072 583 412655 0.3399 SRR10002854 +Archaea 2157 D 1221 0 1221 0.00101 SRR10002854 +Viruses 10239 D 283 15 298 2.5e-4 SRR10002854 +Eukaryota 2759 D 3187943 666 3188609 0.97483 SRR10002839 +Bacteria 2 D 82187 21 82208 0.02513 SRR10002839 +Archaea 2157 D 66 0 66 2e-5 SRR10002839 +Viruses 10239 D 57 2 59 2e-5 SRR10002839 +Eukaryota 2759 D 731744 794 732538 0.53145 SRR10002809 +Bacteria 2 D 643705 139 643844 0.4671 SRR10002809 +Archaea 2157 D 1580 0 1580 0.00115 SRR10002809 +Viruses 10239 D 415 2 417 3e-4 SRR10002809 +Eukaryota 2759 D 1359821 664 1360485 0.6305 SRR10002793 +Bacteria 2 D 795729 152 795881 0.36884 SRR10002793 +Archaea 2157 D 671 0 671 3.1e-4 SRR10002793 +Viruses 10239 D 734 4 738 3.4e-4 SRR10002793 +Eukaryota 2759 D 1298227 1021 1299248 0.62275 SRR10002848 +Bacteria 2 D 785283 233 785516 0.37651 SRR10002848 +Archaea 2157 D 802 0 802 3.8e-4 SRR10002848 +Viruses 10239 D 687 40 727 3.5e-4 SRR10002848 +Eukaryota 2759 D 1323078 921 1323999 0.53793 SRR10002787 +Bacteria 2 D 1134298 271 1134569 0.46097 SRR10002787 +Archaea 2157 D 1737 0 1737 7.1e-4 SRR10002787 +Viruses 10239 D 951 11 962 3.9e-4 SRR10002787 +Eukaryota 2759 D 2780491 771 2781262 0.9145 SRR10002790 +Bacteria 2 D 259346 47 259393 0.08529 SRR10002790 +Archaea 2157 D 454 0 454 1.5e-4 SRR10002790 +Viruses 10239 D 178 0 178 6e-5 SRR10002790 +Bacteria 2 D 529249 319 529568 0.8568 SRR10002837 +Eukaryota 2759 D 86063 519 86582 0.14008 SRR10002837 +Archaea 2157 D 1540 1 1541 0.00249 SRR10002837 +Viruses 10239 D 381 2 383 6.2e-4 SRR10002837 +Bacteria 2 D 989530 301 989831 0.5572 SRR10002782 +Eukaryota 2759 D 783907 1011 784918 0.44185 SRR10002782 +Archaea 2157 D 1101 0 1101 6.2e-4 SRR10002782 +Viruses 10239 D 575 9 584 3.3e-4 SRR10002782 +Bacteria 2 D 1107590 472 1108062 0.58141 SRR10002832 +Eukaryota 2759 D 794202 1083 795285 0.41729 SRR10002832 +Archaea 2157 D 1723 0 1723 9e-4 SRR10002832 +Viruses 10239 D 724 27 751 3.9e-4 SRR10002832 +Bacteria 2 D 958263 192 958455 0.68471 SRR10002796 +Eukaryota 2759 D 435103 875 435978 0.31146 SRR10002796 +Archaea 2157 D 4932 2 4934 0.00352 SRR10002796 +Viruses 10239 D 411 9 420 3e-4 SRR10002796 +Bacteria 2 D 530410 440 530850 0.93557 SRR10002834 +Eukaryota 2759 D 34279 388 34667 0.0611 SRR10002834 +Archaea 2157 D 1686 3 1689 0.00298 SRR10002834 +Viruses 10239 D 199 0 199 3.5e-4 SRR10002834 +Eukaryota 2759 D 2127377 686 2128063 0.89153 SRR10002873 +Bacteria 2 D 258309 108 258417 0.10826 SRR10002873 +Archaea 2157 D 197 0 197 8e-5 SRR10002873 +Viruses 10239 D 255 37 292 1.2e-4 SRR10002873 +Bacteria 2 D 1141371 273 1141644 0.55115 SRR10002775 +Eukaryota 2759 D 922188 1334 923522 0.44585 SRR10002775 +Archaea 2157 D 5553 2 5555 0.00268 SRR10002775 +Viruses 10239 D 650 14 664 3.2e-4 SRR10002775 +Bacteria 2 D 382375 133 382508 0.71231 SRR10002856 +Eukaryota 2759 D 153092 491 153583 0.286 SRR10002856 +Archaea 2157 D 635 0 635 0.00118 SRR10002856 +Viruses 10239 D 267 1 268 5e-4 SRR10002856 +Bacteria 2 D 986046 359 986405 0.52105 SRR10002785 +Eukaryota 2759 D 903496 1035 904531 0.4778 SRR10002785 +Archaea 2157 D 1552 0 1552 8.2e-4 SRR10002785 +Viruses 10239 D 595 12 607 3.2e-4 SRR10002785 +Bacteria 2 D 1660330 304 1660634 0.59456 SRR10002819 +Eukaryota 2759 D 1127566 1455 1129021 0.40423 SRR10002819 +Archaea 2157 D 2349 0 2349 8.4e-4 SRR10002819 +Viruses 10239 D 1021 19 1040 3.7e-4 SRR10002819 +Eukaryota 2759 D 317229 422 317651 0.55785 SRR10002842 +Bacteria 2 D 250813 84 250897 0.44062 SRR10002842 +Archaea 2157 D 679 0 679 0.00119 SRR10002842 +Viruses 10239 D 192 3 195 3.4e-4 SRR10002842 +Bacteria 2 D 2116353 409 2116762 0.57843 SRR10002820 +Eukaryota 2759 D 1538250 1260 1539510 0.42069 SRR10002820 +Archaea 2157 D 1926 0 1926 5.3e-4 SRR10002820 +Viruses 10239 D 1307 13 1320 3.6e-4 SRR10002820 +Eukaryota 2759 D 863413 955 864368 0.55553 SRR10002852 +Bacteria 2 D 689114 235 689349 0.44305 SRR10002852 +Archaea 2157 D 1743 0 1743 0.00112 SRR10002852 +Viruses 10239 D 450 15 465 3e-4 SRR10002852 +Bacteria 2 D 560044 127 560171 0.56145 SRR10002851 +Eukaryota 2759 D 435241 965 436206 0.4372 SRR10002851 +Archaea 2157 D 1118 0 1118 0.00112 SRR10002851 +Viruses 10239 D 204 22 226 2.3e-4 SRR10002851 +Eukaryota 2759 D 2298036 833 2298869 0.77509 SRR10002835 +Bacteria 2 D 665320 172 665492 0.22438 SRR10002835 +Archaea 2157 D 753 0 753 2.5e-4 SRR10002835 +Viruses 10239 D 817 3 820 2.8e-4 SRR10002835 +Bacteria 2 D 654246 392 654638 0.91584 SRR10002874 +Eukaryota 2759 D 57951 722 58673 0.08208 SRR10002874 +Archaea 2157 D 1280 3 1283 0.00179 SRR10002874 +Viruses 10239 D 177 24 201 2.8e-4 SRR10002874 +Eukaryota 2759 D 613215 583 613798 0.64373 SRR10002811 +Bacteria 2 D 338750 108 338858 0.35538 SRR10002811 +Archaea 2157 D 580 0 580 6.1e-4 SRR10002811 +Viruses 10239 D 265 4 269 2.8e-4 SRR10002811 +Eukaryota 2759 D 1275959 913 1276872 0.5194 SRR10002846 +Bacteria 2 D 1179256 294 1179550 0.47981 SRR10002846 +Archaea 2157 D 1002 0 1002 4.1e-4 SRR10002846 +Viruses 10239 D 911 9 920 3.7e-4 SRR10002846 diff --git a/data/2024-04-12_leung/2/hv_clade_counts.tsv.gz b/data/2024-04-12_leung/2/hv_clade_counts.tsv.gz new file mode 100644 index 0000000..c5cf264 Binary files /dev/null and b/data/2024-04-12_leung/2/hv_clade_counts.tsv.gz differ diff --git a/data/2024-04-12_leung/2/hv_hits_putative_filtered.tsv.gz b/data/2024-04-12_leung/2/hv_hits_putative_filtered.tsv.gz new file mode 100644 index 0000000..74d14e2 Binary files /dev/null and b/data/2024-04-12_leung/2/hv_hits_putative_filtered.tsv.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002775.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002775.report.gz new file mode 100644 index 0000000..19f44cd Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002775.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002776.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002776.report.gz new file mode 100644 index 0000000..e1c60da Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002776.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002777.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002777.report.gz new file mode 100644 index 0000000..0789635 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002777.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002778.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002778.report.gz new file mode 100644 index 0000000..e83bf09 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002778.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002779.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002779.report.gz new file mode 100644 index 0000000..3a4b1d9 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002779.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002780.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002780.report.gz new file mode 100644 index 0000000..61731eb Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002780.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002781.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002781.report.gz new file mode 100644 index 0000000..4b625f2 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002781.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002782.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002782.report.gz new file mode 100644 index 0000000..79b90f4 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002782.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002783.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002783.report.gz new file mode 100644 index 0000000..3b86512 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002783.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002784.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002784.report.gz new file mode 100644 index 0000000..96cdf10 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002784.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002785.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002785.report.gz new file mode 100644 index 0000000..33969eb Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002785.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002786.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002786.report.gz new file mode 100644 index 0000000..57a92e8 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002786.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002787.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002787.report.gz new file mode 100644 index 0000000..7410258 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002787.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002788.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002788.report.gz new file mode 100644 index 0000000..1bce9fc Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002788.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002789.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002789.report.gz new file mode 100644 index 0000000..9ae71de Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002789.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002790.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002790.report.gz new file mode 100644 index 0000000..9bf30ae Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002790.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002791.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002791.report.gz new file mode 100644 index 0000000..9aff068 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002791.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002792.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002792.report.gz new file mode 100644 index 0000000..4e89972 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002792.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002793.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002793.report.gz new file mode 100644 index 0000000..2eff505 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002793.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002794.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002794.report.gz new file mode 100644 index 0000000..ea04fbc Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002794.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002795.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002795.report.gz new file mode 100644 index 0000000..d118cbc Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002795.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002796.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002796.report.gz new file mode 100644 index 0000000..81e8ea8 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002796.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002797.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002797.report.gz new file mode 100644 index 0000000..cab9b1d Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002797.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002798.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002798.report.gz new file mode 100644 index 0000000..c7ef692 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002798.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002799.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002799.report.gz new file mode 100644 index 0000000..1359d0e Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002799.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002800.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002800.report.gz new file mode 100644 index 0000000..ced3d40 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002800.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002801.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002801.report.gz new file mode 100644 index 0000000..44d8a5d Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002801.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002802.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002802.report.gz new file mode 100644 index 0000000..fb330c0 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002802.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002803.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002803.report.gz new file mode 100644 index 0000000..0a39c16 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002803.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002804.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002804.report.gz new file mode 100644 index 0000000..ef040ac Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002804.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002805.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002805.report.gz new file mode 100644 index 0000000..c97bdfe Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002805.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002806.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002806.report.gz new file mode 100644 index 0000000..ae0e036 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002806.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002807.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002807.report.gz new file mode 100644 index 0000000..7f25a68 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002807.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002808.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002808.report.gz new file mode 100644 index 0000000..9a78746 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002808.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002809.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002809.report.gz new file mode 100644 index 0000000..193df50 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002809.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002810.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002810.report.gz new file mode 100644 index 0000000..5221fc8 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002810.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002811.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002811.report.gz new file mode 100644 index 0000000..ad16a47 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002811.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002812.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002812.report.gz new file mode 100644 index 0000000..6c12005 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002812.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002813.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002813.report.gz new file mode 100644 index 0000000..cf89ece Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002813.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002814.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002814.report.gz new file mode 100644 index 0000000..daa9b9b Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002814.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002815.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002815.report.gz new file mode 100644 index 0000000..8b15a22 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002815.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002816.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002816.report.gz new file mode 100644 index 0000000..2752f27 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002816.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002817.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002817.report.gz new file mode 100644 index 0000000..8df4914 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002817.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002818.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002818.report.gz new file mode 100644 index 0000000..f9d9fc0 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002818.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002819.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002819.report.gz new file mode 100644 index 0000000..5dc587f Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002819.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002820.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002820.report.gz new file mode 100644 index 0000000..d3b45ef Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002820.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002821.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002821.report.gz new file mode 100644 index 0000000..3896496 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002821.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002822.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002822.report.gz new file mode 100644 index 0000000..97d63e3 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002822.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002823.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002823.report.gz new file mode 100644 index 0000000..c204956 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002823.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002824.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002824.report.gz new file mode 100644 index 0000000..49fb9c3 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002824.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002825.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002825.report.gz new file mode 100644 index 0000000..71b1b19 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002825.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002826.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002826.report.gz new file mode 100644 index 0000000..16e44d2 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002826.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002827.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002827.report.gz new file mode 100644 index 0000000..b7c1e70 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002827.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002828.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002828.report.gz new file mode 100644 index 0000000..7e5605c Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002828.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002829.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002829.report.gz new file mode 100644 index 0000000..e580b0a Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002829.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002830.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002830.report.gz new file mode 100644 index 0000000..4350e54 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002830.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002831.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002831.report.gz new file mode 100644 index 0000000..1e02134 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002831.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002832.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002832.report.gz new file mode 100644 index 0000000..5d520b7 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002832.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002833.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002833.report.gz new file mode 100644 index 0000000..7d05e3e Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002833.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002834.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002834.report.gz new file mode 100644 index 0000000..0edd622 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002834.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002835.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002835.report.gz new file mode 100644 index 0000000..acc11d9 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002835.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002836.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002836.report.gz new file mode 100644 index 0000000..64e7b2e Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002836.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002837.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002837.report.gz new file mode 100644 index 0000000..02ff91a Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002837.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002838.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002838.report.gz new file mode 100644 index 0000000..fc06902 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002838.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002839.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002839.report.gz new file mode 100644 index 0000000..c89660d Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002839.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002840.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002840.report.gz new file mode 100644 index 0000000..ba55c9f Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002840.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002841.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002841.report.gz new file mode 100644 index 0000000..7cb320a Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002841.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002842.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002842.report.gz new file mode 100644 index 0000000..2738ec5 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002842.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002843.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002843.report.gz new file mode 100644 index 0000000..c3a69fc Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002843.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002844.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002844.report.gz new file mode 100644 index 0000000..d0f6699 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002844.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002845.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002845.report.gz new file mode 100644 index 0000000..37cf9a1 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002845.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002846.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002846.report.gz new file mode 100644 index 0000000..626042a Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002846.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002847.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002847.report.gz new file mode 100644 index 0000000..afef26b Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002847.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002848.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002848.report.gz new file mode 100644 index 0000000..c523677 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002848.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002849.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002849.report.gz new file mode 100644 index 0000000..3bb5c5c Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002849.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002850.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002850.report.gz new file mode 100644 index 0000000..fc32a00 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002850.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002851.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002851.report.gz new file mode 100644 index 0000000..b71108b Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002851.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002852.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002852.report.gz new file mode 100644 index 0000000..2eb77ca Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002852.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002853.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002853.report.gz new file mode 100644 index 0000000..61b96a5 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002853.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002854.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002854.report.gz new file mode 100644 index 0000000..6323240 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002854.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002855.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002855.report.gz new file mode 100644 index 0000000..ee5ec94 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002855.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002856.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002856.report.gz new file mode 100644 index 0000000..5963872 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002856.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002857.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002857.report.gz new file mode 100644 index 0000000..86ef5a2 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002857.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002858.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002858.report.gz new file mode 100644 index 0000000..bea413a Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002858.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002859.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002859.report.gz new file mode 100644 index 0000000..2290e13 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002859.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002860.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002860.report.gz new file mode 100644 index 0000000..d703d76 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002860.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002861.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002861.report.gz new file mode 100644 index 0000000..08c657a Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002861.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002862.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002862.report.gz new file mode 100644 index 0000000..edacdae Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002862.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002863.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002863.report.gz new file mode 100644 index 0000000..97a34a9 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002863.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002864.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002864.report.gz new file mode 100644 index 0000000..3eebe57 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002864.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002865.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002865.report.gz new file mode 100644 index 0000000..0c82a1d Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002865.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002866.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002866.report.gz new file mode 100644 index 0000000..14f5bab Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002866.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002867.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002867.report.gz new file mode 100644 index 0000000..2603593 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002867.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002868.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002868.report.gz new file mode 100644 index 0000000..d7c4468 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002868.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002869.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002869.report.gz new file mode 100644 index 0000000..e9435d0 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002869.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002870.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002870.report.gz new file mode 100644 index 0000000..aa84d16 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002870.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002871.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002871.report.gz new file mode 100644 index 0000000..1ee4a7a Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002871.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002872.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002872.report.gz new file mode 100644 index 0000000..ed9437d Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002872.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002873.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002873.report.gz new file mode 100644 index 0000000..338db01 Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002873.report.gz differ diff --git a/data/2024-04-12_leung/2/kraken/SRR10002874.report.gz b/data/2024-04-12_leung/2/kraken/SRR10002874.report.gz new file mode 100644 index 0000000..8f4cffa Binary files /dev/null and b/data/2024-04-12_leung/2/kraken/SRR10002874.report.gz differ diff --git a/data/2024-04-12_leung/2/qc_adapter_stats.tsv.gz b/data/2024-04-12_leung/2/qc_adapter_stats.tsv.gz new file mode 100644 index 0000000..d35043f Binary files /dev/null and b/data/2024-04-12_leung/2/qc_adapter_stats.tsv.gz differ diff --git a/data/2024-04-12_leung/2/qc_basic_stats.tsv.gz b/data/2024-04-12_leung/2/qc_basic_stats.tsv.gz new file mode 100644 index 0000000..be715c3 Binary files /dev/null and b/data/2024-04-12_leung/2/qc_basic_stats.tsv.gz differ diff --git a/data/2024-04-12_leung/2/qc_quality_base_stats.tsv.gz b/data/2024-04-12_leung/2/qc_quality_base_stats.tsv.gz new file mode 100644 index 0000000..105df09 Binary files /dev/null and b/data/2024-04-12_leung/2/qc_quality_base_stats.tsv.gz differ diff --git a/data/2024-04-12_leung/2/qc_quality_sequence_stats.tsv.gz b/data/2024-04-12_leung/2/qc_quality_sequence_stats.tsv.gz new file mode 100644 index 0000000..bbd8c53 Binary files /dev/null and b/data/2024-04-12_leung/2/qc_quality_sequence_stats.tsv.gz differ diff --git a/data/2024-04-12_leung/2/sample-metadata.csv b/data/2024-04-12_leung/2/sample-metadata.csv new file mode 100644 index 0000000..a18c2e5 --- /dev/null +++ b/data/2024-04-12_leung/2/sample-metadata.csv @@ -0,0 +1,101 @@ +sample,library,country,region,city,location,instrument,date +SRR10002775,SRR10002775,Norway,Europe,Oslo,"Oslo, Jernbanetorget",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002776,SRR10002776,United Kingdom,Europe,London,"London, Kings Cross St Pancras",SASS 3100 electret filter air sampler,2017-08-18 +SRR10002777,SRR10002777,United Kingdom,Europe,London,"London, Vauxhall",SASS 3100 electret filter air sampler,2017-08-21 +SRR10002778,SRR10002778,United Kingdom,Europe,London,"London, Green Park",SASS 3100 electret filter air sampler,2017-07-23 +SRR10002779,SRR10002779,United Kingdom,Europe,London,"London, Euston",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002780,SRR10002780,USA,North America,New York City,"New York City, 28th St.",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002781,SRR10002781,United Kingdom,Europe,London,"London, Green Park",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002782,SRR10002782,United Kingdom,Europe,London,"London, Oxford Circus",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002783,SRR10002783,United Kingdom,Europe,London,"London, Kings Cross St Pancras",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002784,SRR10002784,Hong Kong,Asia,Hong Kong,"Hong Kong, East Tsim Sha Tsui",SASS 3100 electret filter air sampler,2017-07-12 +SRR10002785,SRR10002785,United Kingdom,Europe,London,"London, Euston",SASS 3100 electret filter air sampler,2017-08-18 +SRR10002786,SRR10002786,United Kingdom,Europe,London,"London, Brixton",SASS 3100 electret filter air sampler,2017-08-21 +SRR10002787,SRR10002787,Hong Kong,Asia,Hong Kong,"Hong Kong, Hung Hom",SASS 3100 electret filter air sampler,2017-07-12 +SRR10002788,SRR10002788,Hong Kong,Asia,Hong Kong,"Hong Kong, Admiralty",SASS 3100 electret filter air sampler,2017-07-07 +SRR10002789,SRR10002789,Hong Kong,Asia,Hong Kong,"Hong Kong, Ocean Park",SASS 3100 electret filter air sampler,2017-07-07 +SRR10002790,SRR10002790,Hong Kong,Asia,Hong Kong,"Hong Kong, Mong Kok East",SASS 3100 electret filter air sampler,2017-07-12 +SRR10002791,SRR10002791,Hong Kong,Asia,Hong Kong,"Hong Kong, Tsing Yi",SASS 3100 electret filter air sampler,2017-07-06 +SRR10002792,SRR10002792,Hong Kong,Asia,Hong Kong,"Hong Kong, Lai King",SASS 3100 electret filter air sampler,2017-07-06 +SRR10002793,SRR10002793,Hong Kong,Asia,Hong Kong,"Hong Kong, Cheung Sha Wan",SASS 3100 electret filter air sampler,2017-07-06 +SRR10002794,SRR10002794,Hong Kong,Asia,Hong Kong,"Hong Kong, Sham Shui Po",SASS 3100 electret filter air sampler,2017-07-06 +SRR10002795,SRR10002795,Hong Kong,Asia,Hong Kong,"Hong Kong, Fortress Hill",SASS 3100 electret filter air sampler,2017-07-07 +SRR10002796,SRR10002796,Hong Kong,Asia,Hong Kong,"Hong Kong, North Point",SASS 3100 electret filter air sampler,2017-07-07 +SRR10002797,SRR10002797,Hong Kong,Asia,Hong Kong,"Hong Kong, Quarry Bay",SASS 3100 electret filter air sampler,2017-07-07 +SRR10002798,SRR10002798,Hong Kong,Asia,Hong Kong,"Hong Kong, Yau Tong",SASS 3100 electret filter air sampler,2017-07-07 +SRR10002799,SRR10002799,Norway,Europe,Oslo,"Oslo, Vestli",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002800,SRR10002800,Norway,Europe,Oslo,"Oslo, Romsaas",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002801,SRR10002801,Norway,Europe,Oslo,"Oslo, Nydalen",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002802,SRR10002802,Norway,Europe,Oslo,"Oslo, Loeren",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002803,SRR10002803,Norway,Europe,Oslo,"Oslo, Majorstua",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002804,SRR10002804,Norway,Europe,Oslo,"Oslo, Forskningsparken",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002805,SRR10002805,Norway,Europe,Oslo,"Oslo, Helsfyr",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002806,SRR10002806,Norway,Europe,Oslo,"Oslo, Toeyen",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002807,SRR10002807,Hong Kong,Asia,Hong Kong,"Hong Kong, Che Kung Temple",SASS 3100 electret filter air sampler,2017-07-13 +SRR10002808,SRR10002808,Norway,Europe,Oslo,"Oslo, Carl Berners plass",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002809,SRR10002809,Norway,Europe,Oslo,"Oslo, Groenland",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002810,SRR10002810,Hong Kong,Asia,Hong Kong,"Hong Kong, Fortress Hill",SASS 3100 electret filter air sampler,2017-07-05 +SRR10002811,SRR10002811,Norway,Europe,Oslo,"Oslo, Helsfyr",SASS 3100 electret filter air sampler,2017-06-27 +SRR10002812,SRR10002812,uncalculated,uncalculated,,"Oslo, Helsfyr",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002813,SRR10002813,uncalculated,uncalculated,,"Oslo, Helsfyr",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002814,SRR10002814,uncalculated,uncalculated,,"Oslo, Helsfyr",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002815,SRR10002815,uncalculated,uncalculated,,"Oslo, Helsfyr",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002816,SRR10002816,uncalculated,uncalculated,,"Oslo, Helsfyr",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002817,SRR10002817,uncalculated,uncalculated,,"Oslo, Helsfyr",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002818,SRR10002818,Hong Kong,Asia,Hong Kong,"Hong Kong, Hung Hom",SASS 3100 electret filter air sampler,2017-07-14 +SRR10002819,SRR10002819,Hong Kong,Asia,Hong Kong,"Hong Kong, Mong Kok East",SASS 3100 electret filter air sampler,2017-07-14 +SRR10002820,SRR10002820,Hong Kong,Asia,Hong Kong,"Hong Kong, Tsim Sha Tsui",SASS 3100 electret filter air sampler,2017-07-14 +SRR10002821,SRR10002821,Hong Kong,Asia,Hong Kong,"Hong Kong, East Tsim Sha Tsui",SASS 3100 electret filter air sampler,2017-07-14 +SRR10002822,SRR10002822,USA,North America,New York City,"New York City, Time Square 42nd St.",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002823,SRR10002823,USA,North America,New York City,"New York City, Grand Central 42nd St.",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002824,SRR10002824,USA,North America,New York City,"New York City, 59th St.",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002825,SRR10002825,USA,North America,New York City,"New York City, 77th St.",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002826,SRR10002826,USA,North America,New York City,"New York City, 39th St. Herald Sq.",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002827,SRR10002827,USA,North America,New York City,"New York City, Union Square 14th St.",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002828,SRR10002828,USA,North America,New York City,"New York City, 33rd St.",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002829,SRR10002829,USA,North America,New York City,"New York City, 51st St.",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002830,SRR10002830,USA,North America,Denver,"Denver, Union Station",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002831,SRR10002831,USA,North America,Denver,"Denver, Union Station",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002832,SRR10002832,USA,North America,New York City,"New York City, Canal St.",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002833,SRR10002833,USA,North America,New York City,"New York City, 86th St.",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002834,SRR10002834,USA,North America,Denver,"Denver, Union Station",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002835,SRR10002835,USA,North America,Denver,"Denver, Train Cabin - front wagon",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002836,SRR10002836,USA,North America,Denver,"Denver, Union Station",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002837,SRR10002837,USA,North America,Denver,"Denver, Union Station",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002838,SRR10002838,Hong Kong,Asia,Hong Kong,"Hong Kong, Che Kung Temple",SASS 3100 electret filter air sampler,2017-07-04 +SRR10002839,SRR10002839,Hong Kong,Asia,Hong Kong,"Hong Kong, Sha Tin Wai",SASS 3100 electret filter air sampler,2017-07-04 +SRR10002840,SRR10002840,Hong Kong,Asia,Hong Kong,"Hong Kong, Kowloon Tong",SASS 3100 electret filter air sampler,2017-07-04 +SRR10002841,SRR10002841,Hong Kong,Asia,Hong Kong,"Hong Kong, Tai Wai",SASS 3100 electret filter air sampler,2017-07-04 +SRR10002842,SRR10002842,Sweden,Europe,Stockholm,"Stockholm, Universitetet",SASS 3100 electret filter air sampler,2017-09-28 +SRR10002843,SRR10002843,Sweden,Europe,Stockholm,"Stockholm, Danderyd",SASS 3100 electret filter air sampler,2017-09-28 +SRR10002844,SRR10002844,Sweden,Europe,Stockholm,"Stockholm, T-centralen",SASS 3100 electret filter air sampler,2017-09-28 +SRR10002845,SRR10002845,Sweden,Europe,Stockholm,"Stockholm, Tekniska hoegskolan",SASS 3100 electret filter air sampler,2017-09-28 +SRR10002846,SRR10002846,Hong Kong,Asia,Hong Kong,"Hong Kong, Shek Kip Mei",SASS 3100 electret filter air sampler,2017-07-04 +SRR10002847,SRR10002847,Hong Kong,Asia,Hong Kong,"Hong Kong, Prince Edward",SASS 3100 electret filter air sampler,2017-07-04 +SRR10002848,SRR10002848,Norway,Europe,Oslo,"Oslo, Loeren",SASS 3100 electret filter air sampler,2017-06-27 +SRR10002849,SRR10002849,Norway,Europe,Oslo,"Oslo, Nydalen",SASS 3100 electret filter air sampler,2017-06-27 +SRR10002850,SRR10002850,Norway,Europe,Oslo,"Oslo, Forskningsparken",SASS 3100 electret filter air sampler,2017-06-27 +SRR10002851,SRR10002851,Norway,Europe,Oslo,"Oslo, Majorstua",SASS 3100 electret filter air sampler,2017-06-27 +SRR10002852,SRR10002852,Norway,Europe,Oslo,"Oslo, Toeyen",Bobcat ACD-200 electret filter air sampler,2017-06-21 +SRR10002853,SRR10002853,Norway,Europe,Oslo,"Oslo, Helsfyr",Bobcat ACD-200 electret filter air sampler,2017-06-21 +SRR10002854,SRR10002854,Norway,Europe,Oslo,"Oslo, Lindeberg",Bobcat ACD-200 electret filter air sampler,2017-06-21 +SRR10002855,SRR10002855,Norway,Europe,Oslo,"Oslo, Ellingsrudaasen",Bobcat ACD-200 electret filter air sampler,2017-06-21 +SRR10002856,SRR10002856,Norway,Europe,Oslo,"Oslo, Forskningsparken",Bobcat ACD-200 electret filter air sampler,2017-06-21 +SRR10002857,SRR10002857,Norway,Europe,Oslo,"Oslo, Majorstua",Bobcat ACD-200 electret filter air sampler,2017-06-21 +SRR10002858,SRR10002858,Hong Kong,Asia,Hong Kong,"Hong Kong, Quarry Bay",SASS 3100 electret filter air sampler,2017-07-12 +SRR10002859,SRR10002859,USA,North America,Denver,"Denver, Denver Airport",SASS 3100 electret filter air sampler,2017-06-27 +SRR10002860,SRR10002860,USA,North America,New York City,"New York City, 68th St.",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002861,SRR10002861,USA,North America,New York City,"New York City, 57th St.",SASS 3100 electret filter air sampler,2017-06-23 +SRR10002862,SRR10002862,USA,North America,New York City,"New York City, 96th St.",SASS 3100 electret filter air sampler,2017-06-22 +SRR10002863,SRR10002863,USA,North America,New York City,"New York City, 28th St.",SASS 3100 electret filter air sampler,2017-06-22 +SRR10002864,SRR10002864,USA,North America,New York City,"New York City, 33rd St.",SASS 3100 electret filter air sampler,2017-06-22 +SRR10002865,SRR10002865,USA,North America,New York City,"New York City, 86th St.",SASS 3100 electret filter air sampler,2017-06-22 +SRR10002866,SRR10002866,USA,North America,New York City,"New York City, Grand Central 42nd St.",SASS 3100 electret filter air sampler,2017-06-22 +SRR10002867,SRR10002867,USA,North America,New York City,"New York City, 51st St.",SASS 3100 electret filter air sampler,2017-06-22 +SRR10002868,SRR10002868,USA,North America,New York City,"New York City, 77th St.",SASS 3100 electret filter air sampler,2017-06-22 +SRR10002869,SRR10002869,USA,North America,New York City,"New York City, 59th St.",SASS 3100 electret filter air sampler,2017-06-22 +SRR10002870,SRR10002870,Hong Kong,Asia,Hong Kong,"Hong Kong, Quarry Bay",SASS 3100 electret filter air sampler,2017-07-05 +SRR10002871,SRR10002871,USA,North America,Denver,"Denver, Denver Airport",SASS 3100 electret filter air sampler,2017-06-29 +SRR10002872,SRR10002872,USA,North America,Denver,"Denver, Denver Airport",SASS 3100 electret filter air sampler,2017-06-29 +SRR10002873,SRR10002873,USA,North America,Denver,"Denver, Boulder Central",SASS 3100 electret filter air sampler,2017-06-27 +SRR10002874,SRR10002874,USA,North America,Denver,"Denver, Boulder Central",SASS 3100 electret filter air sampler,2017-06-27 diff --git a/data/2024-04-12_leung/2/taxonomic_composition.tsv b/data/2024-04-12_leung/2/taxonomic_composition.tsv new file mode 100644 index 0000000..4601d83 --- /dev/null +++ b/data/2024-04-12_leung/2/taxonomic_composition.tsv @@ -0,0 +1,801 @@ +sample classification n_reads p_reads +SRR10002775 Filtered 423580 0.07354294409291712 +SRR10002775 Duplicate 500944 0.0869750615838384 +SRR10002775 Ribosomal 17017 0.002954531091244087 +SRR10002775 Unassigned 2746702 0.4768887851784872 +SRR10002775 Bacterial 1141644 0.19821488471130427 +SRR10002775 Archaeal 5555 9.644720110396018e-4 +SRR10002775 Viral 664 1.1528522328178139e-4 +SRR10002775 Human 923522 0.16034403610788753 +SRR10002776 Filtered 367603 0.06076302596738626 +SRR10002776 Duplicate 384727 0.06359354165051595 +SRR10002776 Ribosomal 15038 0.002485709813297374 +SRR10002776 Unassigned 2759692 0.4561639503975433 +SRR10002776 Bacterial 1208959 0.1998351675870581 +SRR10002776 Archaeal 1609 2.6596004053700456e-4 +SRR10002776 Viral 980 1.619893348205497e-4 +SRR10002776 Human 1311173 0.21673065520884144 +SRR10002777 Filtered 292208 0.08578711930806157 +SRR10002777 Duplicate 233894 0.0686671565577936 +SRR10002777 Ribosomal 10088 0.0029616590222708654 +SRR10002777 Unassigned 1682088 0.49383139387921843 +SRR10002777 Bacterial 694162 0.20379373019603375 +SRR10002777 Archaeal 1074 3.1530747322748906e-4 +SRR10002777 Viral 414 1.2154310420500975e-4 +SRR10002777 Human 492271 0.14452209045918926 +SRR10002778 Filtered 295701 0.07669338260505558 +SRR10002778 Duplicate 188047 0.04877210600820723 +SRR10002778 Ribosomal 10243 0.0026566373398249727 +SRR10002778 Unassigned 1887499 0.4895441103468023 +SRR10002778 Bacterial 781983 0.20281609263969066 +SRR10002778 Archaeal 799 2.072296431240997e-4 +SRR10002778 Viral 389 1.0089152837956793e-4 +SRR10002778 Human 690965 0.17920954988891558 +SRR10002779 Filtered 520309 0.08736655757369988 +SRR10002779 Duplicate 293549 0.04929064384664118 +SRR10002779 Ribosomal 20642 0.0034660566729314943 +SRR10002779 Unassigned 3228456 0.5420991891321442 +SRR10002779 Bacterial 1307695 0.21957877051202163 +SRR10002779 Archaeal 2977 4.998765001122497e-4 +SRR10002779 Viral 614 1.0309847869295308e-4 +SRR10002779 Human 581229 0.0975958072837564 +SRR10002780 Filtered 635366 0.08926973477433317 +SRR10002780 Duplicate 409347 0.057513776501526614 +SRR10002780 Ribosomal 16185 0.002274013178738841 +SRR10002780 Unassigned 3801436 0.5341066148985025 +SRR10002780 Bacterial 1500596 0.21083565523403086 +SRR10002780 Archaeal 4704 6.609180100579245e-4 +SRR10002780 Viral 1008 1.4162528786955525e-4 +SRR10002780 Human 748731 0.10519766211494044 +SRR10002781 Filtered 456341 0.07470456858509032 +SRR10002781 Duplicate 365387 0.0598150904731339 +SRR10002781 Ribosomal 17766 0.002908354422422519 +SRR10002781 Unassigned 2967611 0.4858079801801032 +SRR10002781 Bacterial 1172939 0.19201409027816316 +SRR10002781 Archaeal 1106 1.8105594907122063e-4 +SRR10002781 Viral 513 8.397983894533108e-5 +SRR10002781 Human 1126946 0.18448488027307036 +SRR10002782 Filtered 385318 0.0786244526336839 +SRR10002782 Duplicate 270066 0.05510718789407314 +SRR10002782 Ribosomal 14777 0.003015258920081457 +SRR10002782 Unassigned 2454145 0.5007702918334782 +SRR10002782 Bacterial 989831 0.2019758240592237 +SRR10002782 Archaeal 1101 2.246599493137771e-4 +SRR10002782 Viral 584 1.1916567702020512e-4 +SRR10002782 Human 784918 0.1601631590331256 +SRR10002783 Filtered 527781 0.079639764216768 +SRR10002783 Duplicate 381806 0.05761279738480036 +SRR10002783 Ribosomal 26267 0.00396357141822431 +SRR10002783 Unassigned 3618687 0.5460434904899636 +SRR10002783 Bacterial 1309433 0.19758751333915991 +SRR10002783 Archaeal 1763 2.6602872084095856e-4 +SRR10002783 Viral 529 7.982370579969772e-5 +SRR10002783 Human 760838 0.11480701072444313 +SRR10002784 Filtered 302051 0.06609490369607254 +SRR10002784 Duplicate 390109 0.08536378553943264 +SRR10002784 Ribosomal 8179 0.0017897315927779657 +SRR10002784 Unassigned 1324217 0.2897656193414427 +SRR10002784 Bacterial 900245 0.1969919204964421 +SRR10002784 Archaeal 1081 2.3654479175852562e-4 +SRR10002784 Viral 449 9.825033441219057e-5 +SRR10002784 Human 1643628 0.3596592442076614 +SRR10002785 Filtered 355080 0.07550851501621685 +SRR10002785 Duplicate 292494 0.06219946939042844 +SRR10002785 Ribosomal 12173 0.0025886142652146215 +SRR10002785 Unassigned 2149674 0.4571327349019121 +SRR10002785 Bacterial 986405 0.20976111511369658 +SRR10002785 Archaeal 1552 3.300360913179243e-4 +SRR10002785 Viral 607 1.2907983726158508e-4 +SRR10002785 Human 904531 0.1923504353839519 +SRR10002786 Filtered 312797 0.07664827221179434 +SRR10002786 Duplicate 203448 0.04985322009144952 +SRR10002786 Ribosomal 11467 0.002809891838645018 +SRR10002786 Unassigned 1899171 0.4653758692850177 +SRR10002786 Bacterial 764421 0.18731493234401878 +SRR10002786 Archaeal 1426 3.4942929815189635e-4 +SRR10002786 Viral 664 1.6270761148166844e-4 +SRR10002786 Human 887546 0.2174856773194411 +SRR10002787 Filtered 299470 0.06212799857267629 +SRR10002787 Duplicate 472060 0.09793349252418464 +SRR10002787 Ribosomal 12343 0.0025606768169851523 +SRR10002787 Unassigned 1575070 0.3267637717028926 +SRR10002787 Bacterial 1134569 0.23537750429960522 +SRR10002787 Archaeal 1737 3.6035774374975363e-4 +SRR10002787 Viral 962 1.9957636700475704e-4 +SRR10002787 Human 1323999 0.2746766219729016 +SRR10002788 Filtered 295123 0.05382781512033345 +SRR10002788 Duplicate 568155 0.10362642796289362 +SRR10002788 Ribosomal 13451 0.0024533429830396318 +SRR10002788 Unassigned 1605961 0.29291302879974057 +SRR10002788 Bacterial 1722200 0.3141139904386926 +SRR10002788 Archaeal 1175 2.1430956843889433e-4 +SRR10002788 Viral 1120 2.0427805672473332e-4 +SRR10002788 Human 1275538 0.2326468070701365 +SRR10002789 Filtered 220996 0.05354542551454208 +SRR10002789 Duplicate 404084 0.09790606944749328 +SRR10002789 Ribosomal 9725 0.0023562836573011356 +SRR10002789 Unassigned 1569600 0.38030054791772366 +SRR10002789 Bacterial 1029950 0.24954800543314187 +SRR10002789 Archaeal 812 1.9674059945794573e-4 +SRR10002789 Viral 387 9.376676353475984e-5 +SRR10002789 Human 891708 0.21605316066680524 +SRR10002790 Filtered 215563 0.0523442775771901 +SRR10002790 Duplicate 332486 0.08073620924986954 +SRR10002790 Ribosomal 4942 0.0012000455541371826 +SRR10002790 Unassigned 523899 0.1272162415554261 +SRR10002790 Bacterial 259393 0.06298733638694985 +SRR10002790 Archaeal 454 1.1024295458888727e-4 +SRR10002790 Viral 178 4.322300862736109e-5 +SRR10002790 Human 2781262 0.675362423713211 +SRR10002791 Filtered 381295 0.07059106023682843 +SRR10002791 Duplicate 475416 0.08801615414194265 +SRR10002791 Ribosomal 13671 0.0025309809583070365 +SRR10002791 Unassigned 1608500 0.2977896914224905 +SRR10002791 Bacterial 1129029 0.20902281474482007 +SRR10002791 Archaeal 900 1.6662152457584177e-4 +SRR10002791 Viral 1200 2.2216203276778903e-4 +SRR10002791 Human 1791452 0.33166051493826765 +SRR10002792 Filtered 307801 0.06569935109828338 +SRR10002792 Duplicate 444222 0.0948180712329773 +SRR10002792 Ribosomal 7725 0.0016488818659921157 +SRR10002792 Unassigned 593884 0.12676304959260346 +SRR10002792 Bacterial 420641 0.08978476595375916 +SRR10002792 Archaeal 380 8.111004648246005e-5 +SRR10002792 Viral 379 8.089659899171675e-5 +SRR10002792 Human 2909961 0.6211238736109104 +SRR10002793 Filtered 242074 0.06526476588569702 +SRR10002793 Duplicate 331856 0.08947059224767578 +SRR10002793 Ribosomal 8866 0.0023903327674289257 +SRR10002793 Unassigned 968536 0.26112376914443286 +SRR10002793 Bacterial 795881 0.21457482892782548 +SRR10002793 Archaeal 671 1.8090607793196583e-4 +SRR10002793 Viral 738 1.989697250578104e-4 +SRR10002793 Human 1360485 0.36679583522395015 +SRR10002794 Filtered 296345 0.0655764730614119 +SRR10002794 Duplicate 405507 0.08973230141124013 +SRR10002794 Ribosomal 8389 0.0018563533466472674 +SRR10002794 Unassigned 874229 0.19345308497867372 +SRR10002794 Bacterial 653258 0.14455568894076776 +SRR10002794 Archaeal 498 1.1019954304807953e-4 +SRR10002794 Viral 688 1.5224354541582073e-4 +SRR10002794 Human 2280161 0.5045636551727953 +SRR10002795 Filtered 195589 0.05968991845886367 +SRR10002795 Duplicate 298949 0.09123335889727355 +SRR10002795 Ribosomal 7110 0.0021698322515198744 +SRR10002795 Unassigned 891461 0.272056375354734 +SRR10002795 Bacterial 702255 0.21431442303672144 +SRR10002795 Archaeal 552 1.6845955032896914e-4 +SRR10002795 Viral 629 1.9195843687848117e-4 +SRR10002795 Human 1180206 0.36017567401368 +SRR10002796 Filtered 313492 0.08013906463693649 +SRR10002796 Duplicate 404688 0.1034518194716055 +SRR10002796 Ribosomal 8365 0.002138374426422281 +SRR10002796 Unassigned 1785518 0.45643825811316896 +SRR10002796 Bacterial 958455 0.24501322903485562 +SRR10002796 Archaeal 4934 0.0012612958063320423 +SRR10002796 Viral 420 1.0736607998772959e-4 +SRR10002796 Human 435978 0.11145059243069136 +SRR10002797 Filtered 253307 0.05282730910796473 +SRR10002797 Duplicate 459415 0.09581124174948034 +SRR10002797 Ribosomal 11898 0.0024813342061868185 +SRR10002797 Unassigned 1507535 0.3143972232748231 +SRR10002797 Bacterial 1238916 0.2583765884511807 +SRR10002797 Archaeal 4463 9.307610154825828e-4 +SRR10002797 Viral 826 1.7226273779713498e-4 +SRR10002797 Human 1318641 0.2750032794570846 +SRR10002798 Filtered 192327 0.056400846685720415 +SRR10002798 Duplicate 316576 0.0928374822067553 +SRR10002798 Ribosomal 8096 0.0023741921558990287 +SRR10002798 Unassigned 1081212 0.3170707817766676 +SRR10002798 Bacterial 837561 0.24561891752556156 +SRR10002798 Archaeal 1233 3.6158336564025474e-4 +SRR10002798 Viral 803 2.3548373285411562e-4 +SRR10002798 Human 972194 0.28510071255090175 +SRR10002799 Filtered 562188 0.14210870359819233 +SRR10002799 Duplicate 209718 0.05301207621152657 +SRR10002799 Ribosomal 11102 0.0028063402764682477 +SRR10002799 Unassigned 1830644 0.46274635102458467 +SRR10002799 Bacterial 538464 0.13611180063305697 +SRR10002799 Archaeal 655 1.655695263093769e-4 +SRR10002799 Viral 401 1.013639390077254e-4 +SRR10002799 Human 802870 0.2029477947908541 +SRR10002800 Filtered 880308 0.17700293701965794 +SRR10002800 Duplicate 337323 0.06782530855596232 +SRR10002800 Ribosomal 17226 0.003463620225081026 +SRR10002800 Unassigned 2417119 0.48600849035339744 +SRR10002800 Bacterial 927227 0.1864369087682111 +SRR10002800 Archaeal 1206 2.4248960823451278e-4 +SRR10002800 Viral 300 6.032079806828676e-5 +SRR10002800 Human 392700 0.07895992467138738 +SRR10002801 Filtered 295494 0.0764175352710938 +SRR10002801 Duplicate 333045 0.08612857802311193 +SRR10002801 Ribosomal 13154 0.003401748458364528 +SRR10002801 Unassigned 2243100 0.580086815186063 +SRR10002801 Bacterial 631028 0.1631897921685306 +SRR10002801 Archaeal 1031 2.666263235953952e-4 +SRR10002801 Viral 508 1.313735910634925e-4 +SRR10002801 Human 349475 0.09037753097817725 +SRR10002802 Filtered 311960 0.08498741779750296 +SRR10002802 Duplicate 350498 0.09548634428513011 +SRR10002802 Ribosomal 11971 0.0032612654777981406 +SRR10002802 Unassigned 1808345 0.492648326827239 +SRR10002802 Bacterial 525020 0.14303145945648482 +SRR10002802 Archaeal 737 2.0078127617886805e-4 +SRR10002802 Viral 269 7.328380365280259e-5 +SRR10002802 Human 661861 0.1803111210760133 +SRR10002803 Filtered 191576 0.055890734512592743 +SRR10002803 Duplicate 305104 0.08901160198944595 +SRR10002803 Ribosomal 9274 0.0027056138131591905 +SRR10002803 Unassigned 2049557 0.5979415279336976 +SRR10002803 Bacterial 542790 0.15835455268974305 +SRR10002803 Archaeal 630 1.8379735845269464e-4 +SRR10002803 Viral 172 5.0179596275973775e-5 +SRR10002803 Human 328585 0.0958619921066328 +SRR10002804 Filtered 224687 0.06598859244679123 +SRR10002804 Duplicate 320583 0.09415240281978786 +SRR10002804 Ribosomal 11613 0.003410635791499226 +SRR10002804 Unassigned 2166604 0.6363125073973469 +SRR10002804 Bacterial 493004 0.14479093152090627 +SRR10002804 Archaeal 812 2.3847724642188682e-4 +SRR10002804 Viral 233 6.843004731071383e-5 +SRR10002804 Human 187401 0.05503802272993597 +SRR10002805 Filtered 228431 0.05653222369665619 +SRR10002805 Duplicate 384710 0.09520823258813647 +SRR10002805 Ribosomal 13164 0.0032578336247828976 +SRR10002805 Unassigned 2182135 0.5400359143737183 +SRR10002805 Bacterial 637376 0.15773814679653786 +SRR10002805 Archaeal 907 2.2446483573975145e-4 +SRR10002805 Viral 479 1.1854317124513886e-4 +SRR10002805 Human 593520 0.14688464091318335 +SRR10002806 Filtered 248892 0.06168177328035156 +SRR10002806 Duplicate 368765 0.09138935411234127 +SRR10002806 Ribosomal 11259 0.0027902668039289256 +SRR10002806 Unassigned 2019418 0.5004631857764049 +SRR10002806 Bacterial 878621 0.21774464957232761 +SRR10002806 Archaeal 1735 4.299771653625265e-4 +SRR10002806 Viral 341 8.450848033926314e-5 +SRR10002806 Human 506067 0.12541628480894393 +SRR10002807 Filtered 395948 0.08585856066975003 +SRR10002807 Duplicate 444022 0.0962830720844751 +SRR10002807 Ribosomal 14728 0.0031936640203867137 +SRR10002807 Unassigned 2035384 0.44135881643609387 +SRR10002807 Bacterial 1172081 0.25415758546162953 +SRR10002807 Archaeal 1086 2.3549152132943855e-4 +SRR10002807 Viral 556 1.205647199439851e-4 +SRR10002807 Human 547826 0.11879224508639134 +SRR10002808 Filtered 584309 0.1530167108539369 +SRR10002808 Duplicate 219667 0.0575255931761307 +SRR10002808 Ribosomal 12257 0.0032098184777860765 +SRR10002808 Unassigned 1822536 0.4772790837260606 +SRR10002808 Bacterial 609040 0.1594931749784476 +SRR10002808 Archaeal 690 1.8069468464325633e-4 +SRR10002808 Viral 418 1.0946431620417556e-4 +SRR10002808 Human 569679 0.14918545978679074 +SRR10002809 Filtered 362182 0.09239395659411093 +SRR10002809 Duplicate 237170 0.06050293688097501 +SRR10002809 Ribosomal 12553 0.0032023163413031973 +SRR10002809 Unassigned 1929691 0.49227125172992175 +SRR10002809 Bacterial 643844 0.16424696586075166 +SRR10002809 Archaeal 1580 4.0306379504971335e-4 +SRR10002809 Viral 417 1.0637822945299396e-4 +SRR10002809 Human 732538 0.18687313056843474 +SRR10002810 Filtered 333483 0.06316040074712999 +SRR10002810 Duplicate 511006 0.09678257585600437 +SRR10002810 Ribosomal 12806 0.00242540726803989 +SRR10002810 Unassigned 1693034 0.32065414404487325 +SRR10002810 Bacterial 1222760 0.23158605271501295 +SRR10002810 Archaeal 1231 2.3314667710113262e-4 +SRR10002810 Viral 1219 2.3087392314076414e-4 +SRR10002810 Human 1504399 0.28492739876869766 +SRR10002811 Filtered 253661 0.08564345103324773 +SRR10002811 Duplicate 192837 0.06510747086425739 +SRR10002811 Ribosomal 9523 0.0032152462703750996 +SRR10002811 Unassigned 1552300 0.5241023611785433 +SRR10002811 Bacterial 338858 0.11440847639260375 +SRR10002811 Archaeal 580 1.9582514300299882e-4 +SRR10002811 Viral 269 9.082235080656325e-5 +SRR10002811 Human 613798 0.20723634676716324 +SRR10002812 Filtered 1076352 0.7372965819395382 +SRR10002812 Duplicate 32575 0.022313737658944708 +SRR10002812 Ribosomal 1584 0.0010850333216199054 +SRR10002812 Unassigned 55288 0.037872046897551345 +SRR10002812 Bacterial 263525 0.1805135139393217 +SRR10002812 Archaeal 20 1.3699915677019008e-5 +SRR10002812 Viral 213 1.4590410196025245e-4 +SRR10002812 Human 30306 0.020759482225386903 +SRR10002813 Filtered 435894 0.5055560903588118 +SRR10002813 Duplicate 34163 0.03962273560757452 +SRR10002813 Ribosomal 701 8.130298176655953e-4 +SRR10002813 Unassigned 40880 0.04741320819710348 +SRR10002813 Bacterial 319848 0.37096428119929437 +SRR10002813 Archaeal 19 2.2036471520180187e-5 +SRR10002813 Viral 314 3.64181687228241e-4 +SRR10002813 Human 30388 0.035244436660801876 +SRR10002814 Filtered 588862 0.8735049403904548 +SRR10002814 Duplicate 8186 0.01214293237131325 +SRR10002814 Ribosomal 785 0.0011644517360714513 +SRR10002814 Unassigned 21776 0.03230203949642283 +SRR10002814 Bacterial 39044 0.057917010933979295 +SRR10002814 Archaeal 10 1.483378007734333e-5 +SRR10002814 Viral 26 3.856782820109266e-5 +SRR10002814 Human 15448 0.022915223463479975 +SRR10002815 Filtered 645707 0.8601639315872697 +SRR10002815 Duplicate 9172 0.01221827172466527 +SRR10002815 Ribosomal 749 9.977633582396736e-4 +SRR10002815 Unassigned 18934 0.02522249856463282 +SRR10002815 Bacterial 43561 0.0580287979282756 +SRR10002815 Archaeal 6 7.992763884429963e-6 +SRR10002815 Viral 42 5.5949347191009736e-5 +SRR10002815 Human 32508 0.04330479472584154 +SRR10002816 Filtered 1110735 0.8475017911657189 +SRR10002816 Duplicate 17756 0.013548003622770963 +SRR10002816 Ribosomal 1300 9.919128581663804e-4 +SRR10002816 Unassigned 27324 0.020848482258875523 +SRR10002816 Bacterial 126715 0.09668479832504069 +SRR10002816 Archaeal 12 9.156118690766588e-6 +SRR10002816 Viral 103 7.859001876241322e-5 +SRR10002816 Human 26654 0.020337265631974386 +SRR10002817 Filtered 340709 0.7141653373802332 +SRR10002817 Duplicate 8784 0.018412276527910824 +SRR10002817 Ribosomal 474 9.935586377766085e-4 +SRR10002817 Unassigned 12809 0.02684914048793371 +SRR10002817 Bacterial 103183 0.2162834618601346 +SRR10002817 Archaeal 2 4.192230539141809e-6 +SRR10002817 Viral 40 8.384461078283617e-5 +SRR10002817 Human 11072 0.023208188264689053 +SRR10002818 Filtered 411842 0.06097689895242362 +SRR10002818 Duplicate 658827 0.09754524163666746 +SRR10002818 Ribosomal 16139 0.0023895235847562046 +SRR10002818 Unassigned 2207921 0.326902491032809 +SRR10002818 Bacterial 1869467 0.27679134317017334 +SRR10002818 Archaeal 1981 2.9330480335845103e-4 +SRR10002818 Viral 1290 1.909960607432619e-4 +SRR10002818 Human 1586599 0.23491020075906868 +SRR10002819 Filtered 369571 0.05436254691454364 +SRR10002819 Duplicate 694346 0.1021357655225267 +SRR10002819 Ribosomal 17185 0.0025278508560640104 +SRR10002819 Unassigned 2924119 0.43012724570166067 +SRR10002819 Bacterial 1660634 0.24427320794349736 +SRR10002819 Archaeal 2349 3.455293372647286e-4 +SRR10002819 Viral 1040 1.5298020892095263e-4 +SRR10002819 Human 1129021 0.16607487351552197 +SRR10002820 Filtered 412697 0.06145065396210699 +SRR10002820 Duplicate 580920 0.08649908746530068 +SRR10002820 Ribosomal 15565 0.0023176311650440766 +SRR10002820 Unassigned 2047209 0.3048297706237532 +SRR10002820 Bacterial 2116762 0.31518622423263926 +SRR10002820 Archaeal 1926 2.867817297703111e-4 +SRR10002820 Viral 1320 1.965482260108051e-4 +SRR10002820 Human 1539510 0.22923330259537467 +SRR10002821 Filtered 282787 0.05640751395392164 +SRR10002821 Duplicate 434645 0.08669862441520391 +SRR10002821 Ribosomal 9852 0.0019651781286764806 +SRR10002821 Unassigned 1104430 0.22030061719997623 +SRR10002821 Bacterial 1087794 0.21698223480567436 +SRR10002821 Archaeal 1447 2.8863304427475313e-4 +SRR10002821 Viral 765 1.5259452582597522e-4 +SRR10002821 Human 2091566 0.41720460392644665 +SRR10002822 Filtered 393246 0.07297302262437226 +SRR10002822 Duplicate 305456 0.056682197908561695 +SRR10002822 Ribosomal 16030 0.002974620346217602 +SRR10002822 Unassigned 2683008 0.49787462170084823 +SRR10002822 Bacterial 1062615 0.1971850404988158 +SRR10002822 Archaeal 1742 3.232556857835972e-4 +SRR10002822 Viral 906 1.6812264714118202e-4 +SRR10002822 Human 925920 0.17181911858825966 +SRR10002823 Filtered 546529 0.07745579230874323 +SRR10002823 Duplicate 387502 0.05491799050228373 +SRR10002823 Ribosomal 16066 0.0022769235653227345 +SRR10002823 Unassigned 2070612 0.29345358256193443 +SRR10002823 Bacterial 825824 0.11703834970802204 +SRR10002823 Archaeal 643 9.11279629343034e-5 +SRR10002823 Viral 1062 1.5050994811233314e-4 +SRR10002823 Human 3207774 0.4546157234426472 +SRR10002824 Filtered 457611 0.07566633469742214 +SRR10002824 Duplicate 351784 0.05816775795424049 +SRR10002824 Ribosomal 16773 0.0027734285930186586 +SRR10002824 Unassigned 3085180 0.5101369120973771 +SRR10002824 Bacterial 1023003 0.1691543415574952 +SRR10002824 Archaeal 3634 6.008847258707331e-4 +SRR10002824 Viral 1070 1.7692533205329784e-4 +SRR10002824 Human 1108694 0.18332341504252242 +SRR10002825 Filtered 509932 0.07527688241290152 +SRR10002825 Duplicate 396975 0.058602010456024686 +SRR10002825 Ribosomal 19424 0.0028673983275970113 +SRR10002825 Unassigned 3314645 0.4893125787467975 +SRR10002825 Bacterial 1223059 0.1805496978558728 +SRR10002825 Archaeal 1853 2.735424784306663e-4 +SRR10002825 Viral 1148 1.6946938221176734e-4 +SRR10002825 Human 1307049 0.19294842034016402 +SRR10002826 Filtered 519934 0.08435595463150264 +SRR10002826 Duplicate 348527 0.05654626894923918 +SRR10002826 Ribosomal 13750 0.002230849254296048 +SRR10002826 Unassigned 2999573 0.4866614683822952 +SRR10002826 Bacterial 1187719 0.19269978512459982 +SRR10002826 Archaeal 2949 4.784563237032033e-4 +SRR10002826 Viral 734 1.190867892838763e-4 +SRR10002826 Human 1090386 0.17690813054508003 +SRR10002827 Filtered 485863 0.07335290264246062 +SRR10002827 Duplicate 422417 0.06377417723827558 +SRR10002827 Ribosomal 16981 0.002563697255752391 +SRR10002827 Unassigned 3956809 0.597377090562179 +SRR10002827 Bacterial 1096792 0.165587576734655 +SRR10002827 Archaeal 2235 3.3742791158392283e-4 +SRR10002827 Viral 692 1.0447432430249423e-4 +SRR10002827 Human 641848 0.09690265333079093 +SRR10002828 Filtered 457571 0.0701953198108779 +SRR10002828 Duplicate 408330 0.06264132765926113 +SRR10002828 Ribosomal 19409 0.0029775072332148 +SRR10002828 Unassigned 3796105 0.5823550979207001 +SRR10002828 Bacterial 1395159 0.21402936853958096 +SRR10002828 Archaeal 4704 7.216339855243658e-4 +SRR10002828 Viral 552 8.468153911765518e-5 +SRR10002828 Human 436710 0.06699506331172318 +SRR10002829 Filtered 314732 0.06469544125431027 +SRR10002829 Duplicate 266491 0.05477915444029333 +SRR10002829 Ribosomal 12951 0.0026621718150190397 +SRR10002829 Unassigned 2056131 0.42265261340335986 +SRR10002829 Bacterial 800949 0.1646408658070948 +SRR10002829 Archaeal 899 1.8479595874466194e-4 +SRR10002829 Viral 788 1.6197910510655573e-4 +SRR10002829 Human 1411884 0.2902229782160715 +SRR10002830 Filtered 308664 0.07253496865851292 +SRR10002830 Duplicate 265912 0.06248839704637563 +SRR10002830 Ribosomal 19520 0.0045871322480566966 +SRR10002830 Unassigned 2185939 0.5136880778270905 +SRR10002830 Bacterial 613218 0.14410410158241962 +SRR10002830 Archaeal 604 1.4193790357716417e-4 +SRR10002830 Viral 544 1.2783811183108825e-4 +SRR10002830 Human 860981 0.2023275466221364 +SRR10002831 Filtered 282456 0.07073759626507416 +SRR10002831 Duplicate 238660 0.05976943214030715 +SRR10002831 Ribosomal 19467 0.004875268312559119 +SRR10002831 Unassigned 2838577 0.7108863461683427 +SRR10002831 Bacterial 511775 0.1281676909980964 +SRR10002831 Archaeal 1246 3.1204522101241393e-4 +SRR10002831 Viral 581 1.4550423226983346e-4 +SRR10002831 Human 100249 0.025106116662338272 +SRR10002832 Filtered 412309 0.08262851128436734 +SRR10002832 Duplicate 262315 0.05256906334219922 +SRR10002832 Ribosomal 12631 0.0025313071653367833 +SRR10002832 Unassigned 2396836 0.48033632657249264 +SRR10002832 Bacterial 1108062 0.22206042912179613 +SRR10002832 Archaeal 1723 3.4529667056252693e-4 +SRR10002832 Viral 751 1.505036561767021e-4 +SRR10002832 Human 795285 0.15937856218706864 +SRR10002833 Filtered 424908 0.08140575454471781 +SRR10002833 Duplicate 280586 0.05375590726624162 +SRR10002833 Ribosomal 14340 0.0027473206439305765 +SRR10002833 Unassigned 2295194 0.43972342106175705 +SRR10002833 Bacterial 919726 0.17620517618965786 +SRR10002833 Archaeal 1655 3.170722221551677e-4 +SRR10002833 Viral 1002 1.9196759311146709e-4 +SRR10002833 Human 1282220 0.24565338047842847 +SRR10002834 Filtered 508408 0.12187046648519023 +SRR10002834 Duplicate 179085 0.04292845999767961 +SRR10002834 Ribosomal 22271 0.005338580744385753 +SRR10002834 Unassigned 2894539 0.6938498571807998 +SRR10002834 Bacterial 530850 0.1272500376344653 +SRR10002834 Archaeal 1689 4.04870139520791e-4 +SRR10002834 Viral 199 4.770228405247922e-5 +SRR10002834 Human 34667 0.008310025533906017 +SRR10002835 Filtered 1124694 0.17414442405067015 +SRR10002835 Duplicate 381806 0.05911775644672255 +SRR10002835 Ribosomal 20672 0.0032007937572134763 +SRR10002835 Unassigned 1965292 0.30430023049059535 +SRR10002835 Bacterial 665492 0.10304289082215125 +SRR10002835 Archaeal 753 1.1659238095886936e-4 +SRR10002835 Viral 820 1.269664706325005e-4 +SRR10002835 Human 2298869 0.35595034558105587 +SRR10002836 Filtered 961536 0.1806800078620577 +SRR10002836 Duplicate 325284 0.06112336478031148 +SRR10002836 Ribosomal 17574 0.003302289730356224 +SRR10002836 Unassigned 1607604 0.3020811528211897 +SRR10002836 Bacterial 722316 0.13572873044679562 +SRR10002836 Archaeal 565 1.061678444094268e-4 +SRR10002836 Viral 704 1.32287013211038e-4 +SRR10002836 Human 1686179 0.3168459995016688 +SRR10002837 Filtered 368188 0.08703360334643528 +SRR10002837 Duplicate 213523 0.05047333451209953 +SRR10002837 Ribosomal 23378 0.0055261757010901065 +SRR10002837 Unassigned 3007249 0.7108643318901327 +SRR10002837 Bacterial 529568 0.1251811880261308 +SRR10002837 Archaeal 1541 3.6426712102745546e-4 +SRR10002837 Viral 383 9.053491716646038e-5 +SRR10002837 Human 86582 0.020466564485917684 +SRR10002838 Filtered 354560 0.07305105167471844 +SRR10002838 Duplicate 421150 0.08677078749099636 +SRR10002838 Ribosomal 13742 0.002831305144725803 +SRR10002838 Unassigned 1844624 0.3800533707818869 +SRR10002838 Bacterial 703657 0.14497654520610714 +SRR10002838 Archaeal 513 1.0569491625995758e-4 +SRR10002838 Viral 888 1.8295728194706105e-4 +SRR10002838 Human 1514458 0.3120282875033583 +SRR10002839 Filtered 260109 0.06356826301149175 +SRR10002839 Duplicate 333170 0.08142370386083797 +SRR10002839 Ribosomal 3865 9.445706859025086e-4 +SRR10002839 Unassigned 223720 0.054675123893948054 +SRR10002839 Bacterial 82208 0.020090884074171648 +SRR10002839 Archaeal 66 1.612979696495875e-5 +SRR10002839 Viral 59 1.44190609232207e-5 +SRR10002839 Human 3188609 0.7792669056157598 +SRR10002840 Filtered 429205 0.0845144610198327 +SRR10002840 Duplicate 371403 0.07313270899954318 +SRR10002840 Ribosomal 34880 0.006868196783289488 +SRR10002840 Unassigned 3527016 0.6945022920243853 +SRR10002840 Bacterial 425923 0.08386820465966195 +SRR10002840 Archaeal 460 9.057828326585908e-5 +SRR10002840 Viral 729 1.4354688804524187e-4 +SRR10002840 Human 288864 0.05688001134197634 +SRR10002841 Filtered 496384 0.08255033545885679 +SRR10002841 Duplicate 664065 0.11043625200748963 +SRR10002841 Ribosomal 15313 0.0025466036110782663 +SRR10002841 Unassigned 2437731 0.40540289737069374 +SRR10002841 Bacterial 1329435 0.22108952992188563 +SRR10002841 Archaeal 1276 2.1220310897511054e-4 +SRR10002841 Viral 1678 2.7905706650488676e-4 +SRR10002841 Human 1067225 0.17748312145451595 +SRR10002842 Filtered 873846 0.3322082894872902 +SRR10002842 Duplicate 179647 0.06829604138660957 +SRR10002842 Ribosomal 7140 0.002714399547448008 +SRR10002842 Unassigned 1000361 0.3803052444936466 +SRR10002842 Bacterial 250897 0.09538301166051301 +SRR10002842 Archaeal 679 2.5813407461025177e-4 +SRR10002842 Viral 195 7.413276074963047e-5 +SRR10002842 Human 317651 0.12076074658913268 +SRR10002843 Filtered 608452 0.21145050503646376 +SRR10002843 Duplicate 222240 0.07723330721125693 +SRR10002843 Ribosomal 7493 0.0026039829505667216 +SRR10002843 Unassigned 1203405 0.41820980950577147 +SRR10002843 Bacterial 279868 0.0972603096769261 +SRR10002843 Archaeal 317 1.1016449957689187e-4 +SRR10002843 Viral 235 8.166768896078735e-5 +SRR10002843 Human 555505 0.19305025343047733 +SRR10002844 Filtered 387751 0.10652154682897642 +SRR10002844 Duplicate 334131 0.09179125511865274 +SRR10002844 Ribosomal 8442 0.002319155587813362 +SRR10002844 Unassigned 1418312 0.3896335228693136 +SRR10002844 Bacterial 434847 0.11945958894739127 +SRR10002844 Archaeal 415 1.1400729317016646e-4 +SRR10002844 Viral 550 1.5109400299660616e-4 +SRR10002844 Human 1055670 0.29000982935168584 +SRR10002845 Filtered 341543 0.18184165635561728 +SRR10002845 Duplicate 148505 0.07906587216570371 +SRR10002845 Ribosomal 4888 0.0026024307810912748 +SRR10002845 Unassigned 853111 0.45420669518976237 +SRR10002845 Bacterial 206685 0.11004161333671238 +SRR10002845 Archaeal 1107 5.893802935081917e-4 +SRR10002845 Viral 138 7.347288211755235e-5 +SRR10002845 Human 322267 0.17157887899548727 +SRR10002846 Filtered 256889 0.05736034647222113 +SRR10002846 Duplicate 426704 0.09527807450331717 +SRR10002846 Ribosomal 9581 0.0021393266334889804 +SRR10002846 Unassigned 1326994 0.2963024325936829 +SRR10002846 Bacterial 1179550 0.26337989046361826 +SRR10002846 Archaeal 1002 2.2373502627658472e-4 +SRR10002846 Viral 920 2.0542537342760274e-4 +SRR10002846 Human 1276872 0.28511076893396736 +SRR10002847 Filtered 215831 0.053109014833938824 +SRR10002847 Duplicate 385264 0.09480098545149959 +SRR10002847 Ribosomal 10634 0.0026166827922963127 +SRR10002847 Unassigned 1097101 0.26996100320773714 +SRR10002847 Bacterial 852878 0.20986563725109034 +SRR10002847 Archaeal 798 1.9636193983942613e-4 +SRR10002847 Viral 957 2.3548668725104113e-4 +SRR10002847 Human 1500461 0.36921482783634735 +SRR10002848 Filtered 518850 0.10544370797671439 +SRR10002848 Duplicate 314677 0.06395048606531474 +SRR10002848 Ribosomal 12061 0.002451106412078929 +SRR10002848 Unassigned 1988754 0.40416612896506243 +SRR10002848 Bacterial 785516 0.1596371200058529 +SRR10002848 Archaeal 802 1.6298709414536944e-4 +SRR10002848 Viral 727 1.4774515890733614e-4 +SRR10002848 Human 1299248 0.2640407183219239 +SRR10002849 Filtered 373876 0.10631184268818177 +SRR10002849 Duplicate 212125 0.060317858408217045 +SRR10002849 Ribosomal 8625 0.0024525234120017537 +SRR10002849 Unassigned 1461762 0.41565281481443567 +SRR10002849 Bacterial 412775 0.11737279436394481 +SRR10002849 Archaeal 617 1.7544428350203852e-4 +SRR10002849 Viral 401 1.1402456674929893e-4 +SRR10002849 Human 1046605 0.2976026974629676 +SRR10002850 Filtered 557590 0.21156270085726925 +SRR10002850 Duplicate 139538 0.05294398420384447 +SRR10002850 Ribosomal 7654 0.0029041068031376797 +SRR10002850 Unassigned 1440135 0.5464209368874683 +SRR10002850 Bacterial 207745 0.07882331693465343 +SRR10002850 Archaeal 431 1.6353149100500916e-4 +SRR10002850 Viral 110 4.173657543051278e-5 +SRR10002850 Human 282375 0.10713968624719132 +SRR10002851 Filtered 717455 0.15762446028086816 +SRR10002851 Duplicate 249914 0.05490596534505005 +SRR10002851 Ribosomal 15610 0.0034295082269750047 +SRR10002851 Unassigned 2570973 0.5648413231794112 +SRR10002851 Bacterial 560171 0.12306925387654166 +SRR10002851 Archaeal 1118 2.456239716693181e-4 +SRR10002851 Viral 226 4.96520729850321e-5 +SRR10002851 Human 436206 0.09583421304649961 +SRR10002852 Filtered 535985 0.11773004842662346 +SRR10002852 Duplicate 262665 0.05769482946347202 +SRR10002852 Ribosomal 14382 0.0031590316081078736 +SRR10002852 Unassigned 2183704 0.47965442627948796 +SRR10002852 Bacterial 689349 0.151416720902347 +SRR10002852 Archaeal 1743 3.828530171695191e-4 +SRR10002852 Viral 465 1.0213806826381318e-4 +SRR10002852 Human 864368 0.18985995223452834 +SRR10002853 Filtered 517300 0.11026636799039598 +SRR10002853 Duplicate 264989 0.056484389304865726 +SRR10002853 Ribosomal 13364 0.0028486366553714514 +SRR10002853 Unassigned 2439734 0.5200475682247839 +SRR10002853 Bacterial 669892 0.14279249523646306 +SRR10002853 Archaeal 2381 5.075279763872662e-4 +SRR10002853 Viral 535 1.1403925550910854e-4 +SRR10002853 Human 783172 0.16693897535622346 +SRR10002854 Filtered 508837 0.12048911994639003 +SRR10002854 Duplicate 225478 0.053391647594951096 +SRR10002854 Ribosomal 19643 0.00465132799522625 +SRR10002854 Unassigned 2255073 0.5339858563446951 +SRR10002854 Bacterial 799890 0.18940847885259507 +SRR10002854 Archaeal 1221 2.89124445460024e-4 +SRR10002854 Viral 298 7.056436097222535e-5 +SRR10002854 Human 412655 0.09771388045971023 +SRR10002855 Filtered 477609 0.11461162724566525 +SRR10002855 Duplicate 264137 0.06338484280193271 +SRR10002855 Ribosomal 15724 0.0037732815478997263 +SRR10002855 Unassigned 2293239 0.5503075809987293 +SRR10002855 Bacterial 582190 0.1397078850401769 +SRR10002855 Archaeal 1454 3.489157574819513e-4 +SRR10002855 Viral 269 6.455181482987957e-5 +SRR10002855 Human 532573 0.12780131479328422 +SRR10002856 Filtered 442497 0.11350355133858146 +SRR10002856 Duplicate 250207 0.06417983188535166 +SRR10002856 Ribosomal 14518 0.003723967751947529 +SRR10002856 Unassigned 2654314 0.6808499613957055 +SRR10002856 Bacterial 382508 0.09811595652720384 +SRR10002856 Archaeal 635 1.6288190676998766e-4 +SRR10002856 Viral 268 6.874385986512865e-5 +SRR10002856 Human 153583 0.039395105334574826 +SRR10002857 Filtered 420584 0.09903170806997848 +SRR10002857 Duplicate 243389 0.057308952303092825 +SRR10002857 Ribosomal 12221 0.0028775857006524425 +SRR10002857 Unassigned 2446199 0.5759878294206943 +SRR10002857 Bacterial 552796 0.1301626597641656 +SRR10002857 Archaeal 908 2.1379983767223779e-4 +SRR10002857 Viral 368 8.665015447509197e-5 +SRR10002857 Human 570498 0.13433081474926906 +SRR10002858 Filtered 258735 0.07291507852880827 +SRR10002858 Duplicate 304042 0.08568321373627813 +SRR10002858 Ribosomal 6500 0.0018317893228100325 +SRR10002858 Unassigned 758549 0.2137695321581888 +SRR10002858 Bacterial 724888 0.20428339978971058 +SRR10002858 Archaeal 1106 3.116859986196763e-4 +SRR10002858 Viral 533 1.5020672447042265e-4 +SRR10002858 Human 1494090 0.42105509374111405 +SRR10002859 Filtered 312555 0.09802119833270977 +SRR10002859 Duplicate 181548 0.05693574735616705 +SRR10002859 Ribosomal 8855 0.002777039916930284 +SRR10002859 Unassigned 878691 0.27556860323516524 +SRR10002859 Bacterial 433653 0.13599906167098458 +SRR10002859 Archaeal 287 9.000682734714756e-5 +SRR10002859 Viral 363 1.1384138789900543e-4 +SRR10002859 Human 1372695 0.43049450127279687 +SRR10002860 Filtered 404273 0.07558807636719225 +SRR10002860 Duplicate 273470 0.05113146622241917 +SRR10002860 Ribosomal 14809 0.0027688809861696182 +SRR10002860 Unassigned 2813212 0.5259942748912285 +SRR10002860 Bacterial 942610 0.1762424813541322 +SRR10002860 Archaeal 1372 2.565267548804589e-4 +SRR10002860 Viral 729 1.3630321013692023e-4 +SRR10002860 Human 897895 0.16788199021384084 +SRR10002861 Filtered 468962 0.08708400555005898 +SRR10002861 Duplicate 352729 0.06550009210483312 +SRR10002861 Ribosomal 16055 0.002981336886797218 +SRR10002861 Unassigned 2703709 0.5020658594123711 +SRR10002861 Bacterial 917483 0.17037221494296928 +SRR10002861 Archaeal 1213 2.2524831165898632e-4 +SRR10002861 Viral 690 1.281297073740318e-4 +SRR10002861 Human 924327 0.1716431130839372 +SRR10002862 Filtered 538226 0.06877303184719886 +SRR10002862 Duplicate 456851 0.05837515908266165 +SRR10002862 Ribosomal 20212 0.00258263353999172 +SRR10002862 Unassigned 3970247 0.5073071969251686 +SRR10002862 Bacterial 1605422 0.20513638942413354 +SRR10002862 Archaeal 1520 1.9422140217630193e-4 +SRR10002862 Viral 1339 1.7109372204872914e-4 +SRR10002862 Human 1232303 0.15746027405662064 +SRR10002863 Filtered 406530 0.07873029128251767 +SRR10002863 Duplicate 260696 0.0504874720591032 +SRR10002863 Ribosomal 11571 0.0022408880044031483 +SRR10002863 Unassigned 2065991 0.4001084131972055 +SRR10002863 Bacterial 699530 0.1354738903915076 +SRR10002863 Archaeal 3743 7.248849538052878e-4 +SRR10002863 Viral 365 7.068741868526049e-5 +SRR10002863 Human 1715152 0.33216347269277235 +SRR10002864 Filtered 469628 0.06714884363888429 +SRR10002864 Duplicate 425994 0.06090992125065558 +SRR10002864 Ribosomal 21801 0.0031171734653200334 +SRR10002864 Unassigned 3985827 0.5699056998190979 +SRR10002864 Bacterial 1489527 0.21297711298920935 +SRR10002864 Archaeal 5693 8.140025016314366e-4 +SRR10002864 Viral 662 9.465477886527508e-5 +SRR10002864 Human 594704 0.08503259155633618 +SRR10002865 Filtered 367763 0.07060842632580754 +SRR10002865 Duplicate 274898 0.052778868945793464 +SRR10002865 Ribosomal 17297 0.00332092665699783 +SRR10002865 Unassigned 2680587 0.5146576183558907 +SRR10002865 Bacterial 1057490 0.20303212872224288 +SRR10002865 Archaeal 2412 4.630904258934362e-4 +SRR10002865 Viral 785 1.5071558222485383e-4 +SRR10002865 Human 807254 0.15498822498514925 +SRR10002866 Filtered 254879 0.07326648257988257 +SRR10002866 Duplicate 152690 0.043891647507728254 +SRR10002866 Ribosomal 9370 0.0026934621595874893 +SRR10002866 Unassigned 1338208 0.38467583881080625 +SRR10002866 Bacterial 692312 0.19900919686535046 +SRR10002866 Archaeal 646 1.8569653736323565e-4 +SRR10002866 Viral 875 2.5152394766692136e-4 +SRR10002866 Human 1029814 0.2960261515916148 +SRR10002867 Filtered 313414 0.08945300815829792 +SRR10002867 Duplicate 151400 0.043211807497962135 +SRR10002867 Ribosomal 9999 0.0028538630328409737 +SRR10002867 Unassigned 1610162 0.45956413728225703 +SRR10002867 Bacterial 744184 0.21240116083925664 +SRR10002867 Archaeal 969 2.7656698458074843e-4 +SRR10002867 Viral 636 1.8152384127281322e-4 +SRR10002867 Human 672908 0.19205793236353175 +SRR10002868 Filtered 393403 0.08700801533664923 +SRR10002868 Duplicate 253353 0.05603348655090604 +SRR10002868 Ribosomal 12487 0.002761720392342557 +SRR10002868 Unassigned 2293467 0.5072406969700254 +SRR10002868 Bacterial 900264 0.19910926942355087 +SRR10002868 Archaeal 1570 3.472332038101877e-4 +SRR10002868 Viral 624 1.380086109411192e-4 +SRR10002868 Human 666289 0.14736156951177462 +SRR10002869 Filtered 460920 0.1017919373859887 +SRR10002869 Duplicate 235143 0.05193018643745887 +SRR10002869 Ribosomal 12039 0.00265875452180404 +SRR10002869 Unassigned 2384609 0.5266292849476376 +SRR10002869 Bacterial 805148 0.17781301484520964 +SRR10002869 Archaeal 3391 7.488858363184234e-4 +SRR10002869 Viral 557 1.230107374902276e-4 +SRR10002869 Human 626253 0.13830492528809246 +SRR10002870 Filtered 343499 0.061443912902865744 +SRR10002870 Duplicate 525797 0.09405274854537597 +SRR10002870 Ribosomal 19093 0.003415289794306288 +SRR10002870 Unassigned 1704152 0.30483281482986696 +SRR10002870 Bacterial 1257169 0.22487804197445357 +SRR10002870 Archaeal 9400 0.0016814394839197145 +SRR10002870 Viral 1326 2.3719029315718526e-4 +SRR10002870 Human 1730012 0.3094585621760546 +SRR10002871 Filtered 265967 0.0824282661139362 +SRR10002871 Duplicate 185477 0.05748287386786535 +SRR10002871 Ribosomal 12578 0.00389816304722424 +SRR10002871 Unassigned 2236516 0.6931391338627579 +SRR10002871 Bacterial 376235 0.11660243075786389 +SRR10002871 Archaeal 832 2.5785273137943774e-4 +SRR10002871 Viral 156 4.8347387133644576e-5 +SRR10002871 Human 148887 0.04614293223183936 +SRR10002872 Filtered 291962 0.07842470463346184 +SRR10002872 Duplicate 217706 0.05847859908800612 +SRR10002872 Ribosomal 14401 0.0038682916661294414 +SRR10002872 Unassigned 2387232 0.6412408617955363 +SRR10002872 Bacterial 534579 0.14359471499116802 +SRR10002872 Archaeal 1097 2.9466814511103375e-4 +SRR10002872 Viral 278 7.467433394791921e-5 +SRR10002872 Human 275577 0.07402348534663933 +SRR10002873 Filtered 179035 0.053335148558504665 +SRR10002873 Duplicate 235026 0.07001506199967111 +SRR10002873 Ribosomal 6153 0.0018330000786465172 +SRR10002873 Unassigned 549609 0.16373043072075957 +SRR10002873 Bacterial 258417 0.07698332217188315 +SRR10002873 Archaeal 197 5.868698447803737e-5 +SRR10002873 Viral 292 8.698781455627874e-5 +SRR10002873 Human 2128063 0.6339573616715006 +SRR10002874 Filtered 287495 0.07708777433669844 +SRR10002874 Duplicate 220277 0.05906420517770717 +SRR10002874 Ribosomal 12588 0.0033752966255077826 +SRR10002874 Unassigned 2494295 0.6688104144042687 +SRR10002874 Bacterial 654638 0.17553204896164315 +SRR10002874 Archaeal 1283 3.440185550148145e-4 +SRR10002874 Viral 201 5.38953464988135e-5 +SRR10002874 Human 58673 0.015732346592661116 diff --git a/data/2024-04-12_leung/3/adapters.fasta b/data/2024-04-12_leung/3/adapters.fasta new file mode 100644 index 0000000..686a505 --- /dev/null +++ b/data/2024-04-12_leung/3/adapters.fasta @@ -0,0 +1,41 @@ +>0 +CTGTCTCTTATACACATCTGACGCTGCCGACGA +>1 +AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT +>2 +TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG +>3 +TGACTGGAGTTCAGACGTGTGCTCTTCCGATCT +>4 +CTGTCTCTTATACACATCTCCGAGCCCACGAGAC +>5 +ACACTCTTTCCCTACACGACGCTCTTCCGATCT +>6 +GATCGGAAGAGCGGTTCAGCAGGAATGCCGAG +>7 +GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT +>8 +CAAGCAGAAGACGGCATACGAGCTCTTCCGATCT +>9 +AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT +>10 +CGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT +>11 +CAAGCAGAAGACGGCATACGAGATCGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATC +T +>12 +unspecified +>13 +GTGACTGGAGTTCAGACGTGTGCTCTTCCGATC +>14 +CAAGCAGAAGACGGCATACGAGAT +>15 +GATCGGAAGAGCACACGTCTGAACTCCAGTCAC +>16 +GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG +>17 +heifigepsna +>18 +GATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG +>19 +AGATCGGAAGAGCACACGTCTGAACTCCAGTCA diff --git a/data/2024-04-12_leung/3/bracken_counts.tsv b/data/2024-04-12_leung/3/bracken_counts.tsv new file mode 100644 index 0000000..02a1a90 --- /dev/null +++ b/data/2024-04-12_leung/3/bracken_counts.tsv @@ -0,0 +1,373 @@ +name taxonomy_id taxonomy_lvl kraken_assigned_reads added_reads new_est_reads fraction_total_reads sample +Bacteria 2 D 440771 300 441071 0.88255 SRR10002879 +Eukaryota 2759 D 57232 393 57625 0.1153 SRR10002879 +Archaea 2157 D 775 1 776 0.00155 SRR10002879 +Viruses 10239 D 297 1 298 6e-4 SRR10002879 +Eukaryota 2759 D 893040 992 894032 0.5822 SRR10002886 +Bacteria 2 D 640190 212 640402 0.41704 SRR10002886 +Archaea 2157 D 637 0 637 4.1e-4 SRR10002886 +Viruses 10239 D 503 32 535 3.5e-4 SRR10002886 +Bacteria 2 D 701099 319 701418 0.51216 SRR10002938 +Eukaryota 2759 D 645329 1399 646728 0.47222 SRR10002938 +Archaea 2157 D 20967 9 20976 0.01532 SRR10002938 +Viruses 10239 D 386 29 415 3e-4 SRR10002938 +Eukaryota 2759 D 2042633 631 2043264 0.79601 SRR10002891 +Bacteria 2 D 522309 77 522386 0.20351 SRR10002891 +Archaea 2157 D 838 0 838 3.3e-4 SRR10002891 +Viruses 10239 D 359 27 386 1.5e-4 SRR10002891 +Eukaryota 2759 D 700913 612 701525 0.5893 SRR10002966 +Bacteria 2 D 487325 352 487677 0.40967 SRR10002966 +Archaea 2157 D 588 0 588 4.9e-4 SRR10002966 +Viruses 10239 D 611 27 638 5.4e-4 SRR10002966 +Bacteria 2 D 1325931 556 1326487 0.59184 SRR10002939 +Eukaryota 2759 D 911519 1363 912882 0.4073 SRR10002939 +Archaea 2157 D 1252 0 1252 5.6e-4 SRR10002939 +Viruses 10239 D 663 16 679 3e-4 SRR10002939 +Bacteria 2 D 1564240 338 1564578 0.63339 SRR10002945 +Eukaryota 2759 D 900786 910 901696 0.36503 SRR10002945 +Archaea 2157 D 2901 0 2901 0.00117 SRR10002945 +Viruses 10239 D 985 6 991 4e-4 SRR10002945 +Eukaryota 2759 D 863452 1001 864453 0.6407 SRR10002894 +Bacteria 2 D 483481 181 483662 0.35847 SRR10002894 +Archaea 2157 D 593 0 593 4.4e-4 SRR10002894 +Viruses 10239 D 503 15 518 3.8e-4 SRR10002894 +Bacteria 2 D 1405923 414 1406337 0.71129 SRR10002911 +Eukaryota 2759 D 565677 1803 567480 0.28702 SRR10002911 +Archaea 2157 D 2944 2 2946 0.00149 SRR10002911 +Viruses 10239 D 364 43 407 2.1e-4 SRR10002911 +Bacteria 2 D 1499966 358 1500324 0.69893 SRR10002955 +Eukaryota 2759 D 642060 1403 643463 0.29976 SRR10002955 +Archaea 2157 D 2141 1 2142 0.001 SRR10002955 +Viruses 10239 D 658 9 667 3.1e-4 SRR10002955 +Bacteria 2 D 774010 462 774472 0.5527 SRR10002909 +Eukaryota 2759 D 624316 836 625152 0.44614 SRR10002909 +Archaea 2157 D 1162 0 1162 8.3e-4 SRR10002909 +Viruses 10239 D 428 36 464 3.3e-4 SRR10002909 +Eukaryota 2759 D 1014367 555 1014922 0.56598 SRR10002896 +Bacteria 2 D 776900 474 777374 0.43351 SRR10002896 +Archaea 2157 D 460 0 460 2.6e-4 SRR10002896 +Viruses 10239 D 414 40 454 2.5e-4 SRR10002896 +Bacteria 2 D 1217832 396 1218228 0.73002 SRR10002907 +Eukaryota 2759 D 447721 906 448627 0.26884 SRR10002907 +Archaea 2157 D 1128 0 1128 6.8e-4 SRR10002907 +Viruses 10239 D 750 16 766 4.6e-4 SRR10002907 +Eukaryota 2759 D 1748056 678 1748734 0.7776 SRR10002913 +Bacteria 2 D 499145 139 499284 0.22201 SRR10002913 +Archaea 2157 D 432 0 432 1.9e-4 SRR10002913 +Viruses 10239 D 424 1 425 1.9e-4 SRR10002913 +Eukaryota 2759 D 767421 721 768142 0.58801 SRR10002958 +Bacteria 2 D 536916 134 537050 0.41111 SRR10002958 +Archaea 2157 D 777 0 777 5.9e-4 SRR10002958 +Viruses 10239 D 369 4 373 2.9e-4 SRR10002958 +Eukaryota 2759 D 1030481 781 1031262 0.71634 SRR10002962 +Bacteria 2 D 406668 111 406779 0.28256 SRR10002962 +Archaea 2157 D 1351 0 1351 9.4e-4 SRR10002962 +Viruses 10239 D 231 9 240 1.7e-4 SRR10002962 +Bacteria 2 D 897579 287 897866 0.69108 SRR10002905 +Eukaryota 2759 D 398928 783 399711 0.30765 SRR10002905 +Archaea 2157 D 1259 0 1259 9.7e-4 SRR10002905 +Viruses 10239 D 378 5 383 2.9e-4 SRR10002905 +Eukaryota 2759 D 1639045 1546 1640591 0.6823 SRR10002927 +Bacteria 2 D 761996 259 762255 0.31701 SRR10002927 +Archaea 2157 D 831 0 831 3.5e-4 SRR10002927 +Viruses 10239 D 807 22 829 3.4e-4 SRR10002927 +Eukaryota 2759 D 455648 635 456283 0.52109 SRR10002937 +Bacteria 2 D 416967 183 417150 0.4764 SRR10002937 +Archaea 2157 D 1832 0 1832 0.00209 SRR10002937 +Viruses 10239 D 331 35 366 4.2e-4 SRR10002937 +Bacteria 2 D 1688795 598 1689393 0.93543 SRR10002908 +Eukaryota 2759 D 113158 980 114138 0.0632 SRR10002908 +Archaea 2157 D 2045 3 2048 0.00113 SRR10002908 +Viruses 10239 D 356 70 426 2.4e-4 SRR10002908 +Eukaryota 2759 D 1811280 926 1812206 0.58898 SRR10002895 +Bacteria 2 D 1262208 231 1262439 0.4103 SRR10002895 +Archaea 2157 D 1235 0 1235 4e-4 SRR10002895 +Viruses 10239 D 990 5 995 3.2e-4 SRR10002895 +Bacteria 2 D 618893 228 619121 0.51652 SRR10002917 +Eukaryota 2759 D 576792 1007 577799 0.48204 SRR10002917 +Archaea 2157 D 1291 0 1291 0.00108 SRR10002917 +Viruses 10239 D 419 17 436 3.6e-4 SRR10002917 +Eukaryota 2759 D 885248 911 886159 0.6275 SRR10002960 +Bacteria 2 D 524032 217 524249 0.37123 SRR10002960 +Archaea 2157 D 1337 0 1337 9.5e-4 SRR10002960 +Viruses 10239 D 420 32 452 3.2e-4 SRR10002960 +Eukaryota 2759 D 864153 1192 865345 0.50394 SRR10002885 +Bacteria 2 D 849988 189 850177 0.49511 SRR10002885 +Archaea 2157 D 1087 0 1087 6.3e-4 SRR10002885 +Viruses 10239 D 517 34 551 3.2e-4 SRR10002885 +Eukaryota 2759 D 701305 821 702126 0.51021 SRR10002943 +Bacteria 2 D 672280 272 672552 0.48872 SRR10002943 +Archaea 2157 D 871 0 871 6.3e-4 SRR10002943 +Viruses 10239 D 602 9 611 4.4e-4 SRR10002943 +Eukaryota 2759 D 1851432 1087 1852519 0.58778 SRR10002892 +Bacteria 2 D 1295627 334 1295961 0.41119 SRR10002892 +Archaea 2157 D 1742 0 1742 5.5e-4 SRR10002892 +Viruses 10239 D 1511 5 1516 4.8e-4 SRR10002892 +Eukaryota 2759 D 1711727 764 1712491 0.67187 SRR10002952 +Bacteria 2 D 835136 161 835297 0.32772 SRR10002952 +Archaea 2157 D 620 0 620 2.4e-4 SRR10002952 +Viruses 10239 D 413 6 419 1.6e-4 SRR10002952 +Eukaryota 2759 D 1307444 656 1308100 0.698 SRR10002880 +Bacteria 2 D 564725 197 564922 0.30144 SRR10002880 +Archaea 2157 D 473 0 473 2.5e-4 SRR10002880 +Viruses 10239 D 559 9 568 3e-4 SRR10002880 +Bacteria 2 D 698557 299 698856 0.55298 SRR10002918 +Eukaryota 2759 D 560990 1001 561991 0.44468 SRR10002918 +Archaea 2157 D 2532 0 2532 0.002 SRR10002918 +Viruses 10239 D 353 71 424 3.4e-4 SRR10002918 +Bacteria 2 D 915769 8467 924236 0.98602 SRR10002934 +Eukaryota 2759 D 12496 9 12505 0.01334 SRR10002934 +Archaea 2157 D 18 0 18 2e-5 SRR10002934 +Viruses 10239 D 16 568 584 6.2e-4 SRR10002934 +Eukaryota 2759 D 1951313 1291 1952604 0.53388 SRR10002925 +Bacteria 2 D 1700269 497 1700766 0.46503 SRR10002925 +Archaea 2157 D 1666 0 1666 4.6e-4 SRR10002925 +Viruses 10239 D 2307 14 2321 6.3e-4 SRR10002925 +Bacteria 2 D 1217828 313 1218141 0.72434 SRR10002898 +Eukaryota 2759 D 459891 1429 461320 0.27432 SRR10002898 +Archaea 2157 D 1915 1 1916 0.00114 SRR10002898 +Viruses 10239 D 327 10 337 2e-4 SRR10002898 +Bacteria 2 D 1692623 452 1693075 0.6769 SRR10002889 +Eukaryota 2759 D 795390 1767 797157 0.31871 SRR10002889 +Archaea 2157 D 10474 5 10479 0.00419 SRR10002889 +Viruses 10239 D 498 21 519 2.1e-4 SRR10002889 +Bacteria 2 D 1418906 314 1419220 0.56242 SRR10002946 +Eukaryota 2759 D 1100466 973 1101439 0.43649 SRR10002946 +Archaea 2157 D 1750 0 1750 6.9e-4 SRR10002946 +Viruses 10239 D 991 14 1005 4e-4 SRR10002946 +Eukaryota 2759 D 1826300 1755 1828055 0.66431 SRR10002932 +Bacteria 2 D 921176 352 921528 0.33488 SRR10002932 +Archaea 2157 D 774 0 774 2.8e-4 SRR10002932 +Viruses 10239 D 1417 37 1454 5.3e-4 SRR10002932 +Eukaryota 2759 D 704128 767 704895 0.56567 SRR10002963 +Bacteria 2 D 539648 159 539807 0.43319 SRR10002963 +Archaea 2157 D 934 0 934 7.5e-4 SRR10002963 +Viruses 10239 D 473 13 486 3.9e-4 SRR10002963 +Eukaryota 2759 D 797857 569 798426 0.62332 SRR10002877 +Bacteria 2 D 481435 172 481607 0.37599 SRR10002877 +Archaea 2157 D 383 0 383 3e-4 SRR10002877 +Viruses 10239 D 498 4 502 3.9e-4 SRR10002877 +Eukaryota 2759 D 1001050 766 1001816 0.68095 SRR10002959 +Bacteria 2 D 468173 170 468343 0.31834 SRR10002959 +Archaea 2157 D 723 0 723 4.9e-4 SRR10002959 +Viruses 10239 D 320 3 323 2.2e-4 SRR10002959 +Eukaryota 2759 D 594865 712 595577 0.60786 SRR10002926 +Bacteria 2 D 382980 113 383093 0.391 SRR10002926 +Archaea 2157 D 794 0 794 8.1e-4 SRR10002926 +Viruses 10239 D 312 7 319 3.3e-4 SRR10002926 +Bacteria 2 D 945092 183 945275 0.52225 SRR10002876 +Eukaryota 2759 D 862262 886 863148 0.47687 SRR10002876 +Archaea 2157 D 1128 0 1128 6.2e-4 SRR10002876 +Viruses 10239 D 466 3 469 2.6e-4 SRR10002876 +Eukaryota 2759 D 1384581 1134 1385715 0.52302 SRR10002950 +Bacteria 2 D 1259830 322 1260152 0.47563 SRR10002950 +Archaea 2157 D 3142 0 3142 0.00119 SRR10002950 +Viruses 10239 D 439 9 448 1.7e-4 SRR10002950 +Bacteria 2 D 836785 409 837194 0.54869 SRR10002910 +Eukaryota 2759 D 686719 592 687311 0.45046 SRR10002910 +Archaea 2157 D 621 0 621 4.1e-4 SRR10002910 +Viruses 10239 D 636 50 686 4.5e-4 SRR10002910 +Bacteria 2 D 885110 323 885433 0.50696 SRR10002921 +Eukaryota 2759 D 856935 1277 858212 0.49137 SRR10002921 +Archaea 2157 D 1006 0 1006 5.8e-4 SRR10002921 +Viruses 10239 D 1875 43 1918 0.0011 SRR10002921 +Bacteria 2 D 1008937 338 1009275 0.51024 SRR10002935 +Eukaryota 2759 D 964946 1212 966158 0.48844 SRR10002935 +Archaea 2157 D 1973 0 1973 0.001 SRR10002935 +Viruses 10239 D 617 14 631 3.2e-4 SRR10002935 +Eukaryota 2759 D 1072798 1121 1073919 0.61842 SRR10002919 +Bacteria 2 D 661121 208 661329 0.38083 SRR10002919 +Archaea 2157 D 551 0 551 3.2e-4 SRR10002919 +Viruses 10239 D 744 20 764 4.4e-4 SRR10002919 +Eukaryota 2759 D 1762230 1326 1763556 0.59023 SRR10002929 +Bacteria 2 D 1221874 258 1222132 0.40902 SRR10002929 +Archaea 2157 D 1613 0 1613 5.4e-4 SRR10002929 +Viruses 10239 D 628 5 633 2.1e-4 SRR10002929 +Bacteria 2 D 882259 206 882465 0.59834 SRR10002928 +Eukaryota 2759 D 589884 893 590777 0.40056 SRR10002928 +Archaea 2157 D 968 0 968 6.6e-4 SRR10002928 +Viruses 10239 D 645 4 649 4.4e-4 SRR10002928 +Bacteria 2 D 1522257 386 1522643 0.65746 SRR10002956 +Eukaryota 2759 D 790362 1035 791397 0.34172 SRR10002956 +Archaea 2157 D 1341 0 1341 5.8e-4 SRR10002956 +Viruses 10239 D 555 9 564 2.4e-4 SRR10002956 +Eukaryota 2759 D 1800903 1216 1802119 0.50746 SRR10002914 +Bacteria 2 D 1746305 377 1746682 0.49185 SRR10002914 +Archaea 2157 D 1249 0 1249 3.5e-4 SRR10002914 +Viruses 10239 D 1200 15 1215 3.4e-4 SRR10002914 +Eukaryota 2759 D 1282468 747 1283215 0.55091 SRR10002899 +Bacteria 2 D 1043633 266 1043899 0.44817 SRR10002899 +Archaea 2157 D 1228 0 1228 5.3e-4 SRR10002899 +Viruses 10239 D 900 5 905 3.9e-4 SRR10002899 +Bacteria 2 D 664561 240 664801 0.51122 SRR10002915 +Eukaryota 2759 D 628950 1029 629979 0.48444 SRR10002915 +Archaea 2157 D 5183 1 5184 0.00399 SRR10002915 +Viruses 10239 D 423 26 449 3.5e-4 SRR10002915 +Bacteria 2 D 959699 237 959936 0.62276 SRR10002900 +Eukaryota 2759 D 574352 1211 575563 0.3734 SRR10002900 +Archaea 2157 D 5572 2 5574 0.00362 SRR10002900 +Viruses 10239 D 324 14 338 2.2e-4 SRR10002900 +Eukaryota 2759 D 1531511 1555 1533066 0.64978 SRR10002916 +Bacteria 2 D 824103 277 824380 0.34941 SRR10002916 +Archaea 2157 D 1189 0 1189 5e-4 SRR10002916 +Viruses 10239 D 714 15 729 3.1e-4 SRR10002916 +Bacteria 2 D 578696 487 579183 0.86628 SRR10002947 +Eukaryota 2759 D 87998 476 88474 0.13233 SRR10002947 +Archaea 2157 D 725 0 725 0.00108 SRR10002947 +Viruses 10239 D 156 47 203 3e-4 SRR10002947 +Bacteria 2 D 1236534 442 1236976 0.64533 SRR10002906 +Eukaryota 2759 D 676312 1021 677333 0.35336 SRR10002906 +Archaea 2157 D 1926 0 1926 0.001 SRR10002906 +Viruses 10239 D 543 38 581 3e-4 SRR10002906 +Eukaryota 2759 D 1371205 924 1372129 0.5179 SRR10002949 +Bacteria 2 D 1274715 277 1274992 0.48123 SRR10002949 +Archaea 2157 D 1727 0 1727 6.5e-4 SRR10002949 +Viruses 10239 D 561 7 568 2.1e-4 SRR10002949 +Eukaryota 2759 D 1829365 825 1830190 0.62171 SRR10002923 +Bacteria 2 D 1111099 224 1111323 0.37751 SRR10002923 +Archaea 2157 D 1754 0 1754 6e-4 SRR10002923 +Viruses 10239 D 538 16 554 1.9e-4 SRR10002923 +Eukaryota 2759 D 1381001 1086 1382087 0.51916 SRR10002875 +Bacteria 2 D 1275053 259 1275312 0.47905 SRR10002875 +Archaea 2157 D 3906 0 3906 0.00147 SRR10002875 +Viruses 10239 D 818 38 856 3.2e-4 SRR10002875 +Eukaryota 2759 D 801905 1034 802939 0.53004 SRR10002881 +Bacteria 2 D 709798 313 710111 0.46876 SRR10002881 +Archaea 2157 D 1377 0 1377 9.1e-4 SRR10002881 +Viruses 10239 D 404 47 451 3e-4 SRR10002881 +Eukaryota 2759 D 1025919 626 1026545 0.50427 SRR10002897 +Bacteria 2 D 1006859 276 1007135 0.49474 SRR10002897 +Archaea 2157 D 1087 0 1087 5.3e-4 SRR10002897 +Viruses 10239 D 930 2 932 4.6e-4 SRR10002897 +Bacteria 2 D 1419325 408 1419733 0.73872 SRR10002954 +Eukaryota 2759 D 498799 1083 499882 0.2601 SRR10002954 +Archaea 2157 D 1699 0 1699 8.8e-4 SRR10002954 +Viruses 10239 D 540 18 558 2.9e-4 SRR10002954 +Bacteria 2 D 54120 39 54159 0.76854 SRR10002890 +Eukaryota 2759 D 16252 12 16264 0.23079 SRR10002890 +Archaea 2157 D 8 0 8 1.1e-4 SRR10002890 +Viruses 10239 D 23 14 37 5.3e-4 SRR10002890 +Eukaryota 2759 D 697282 1069 698351 0.57326 SRR10002887 +Bacteria 2 D 516390 150 516540 0.42401 SRR10002887 +Archaea 2157 D 2890 0 2890 0.00237 SRR10002887 +Viruses 10239 D 412 25 437 3.6e-4 SRR10002887 +Bacteria 2 D 1013555 300 1013855 0.67599 SRR10002953 +Eukaryota 2759 D 483623 782 484405 0.32298 SRR10002953 +Archaea 2157 D 950 0 950 6.3e-4 SRR10002953 +Viruses 10239 D 575 25 600 4e-4 SRR10002953 +Eukaryota 2759 D 1073177 1524 1074701 0.52335 SRR10002922 +Bacteria 2 D 976828 313 977141 0.47584 SRR10002922 +Archaea 2157 D 1007 0 1007 4.9e-4 SRR10002922 +Viruses 10239 D 635 20 655 3.2e-4 SRR10002922 +Bacteria 2 D 1213519 338 1213857 0.52603 SRR10002942 +Eukaryota 2759 D 1089327 1331 1090658 0.47264 SRR10002942 +Archaea 2157 D 2386 0 2386 0.00103 SRR10002942 +Viruses 10239 D 655 15 670 2.9e-4 SRR10002942 +Eukaryota 2759 D 1537857 888 1538745 0.80753 SRR10002965 +Bacteria 2 D 365946 80 366026 0.19209 SRR10002965 +Archaea 2157 D 438 0 438 2.3e-4 SRR10002965 +Viruses 10239 D 273 4 277 1.5e-4 SRR10002965 +Bacteria 2 D 1508861 312 1509173 0.56118 SRR10002924 +Eukaryota 2759 D 1176622 1013 1177635 0.4379 SRR10002924 +Archaea 2157 D 1627 0 1627 6e-4 SRR10002924 +Viruses 10239 D 844 7 851 3.2e-4 SRR10002924 +Eukaryota 2759 D 614101 864 614965 0.50155 SRR10002893 +Bacteria 2 D 609876 199 610075 0.49757 SRR10002893 +Archaea 2157 D 602 0 602 4.9e-4 SRR10002893 +Viruses 10239 D 459 15 474 3.9e-4 SRR10002893 +Eukaryota 2759 D 1425074 1648 1426722 0.59419 SRR10002883 +Bacteria 2 D 972226 329 972555 0.40504 SRR10002883 +Archaea 2157 D 926 0 926 3.9e-4 SRR10002883 +Viruses 10239 D 902 35 937 3.9e-4 SRR10002883 +Eukaryota 2759 D 687098 992 688090 0.51012 SRR10002882 +Bacteria 2 D 659281 209 659490 0.48892 SRR10002882 +Archaea 2157 D 644 0 644 4.8e-4 SRR10002882 +Viruses 10239 D 627 25 652 4.8e-4 SRR10002882 +Eukaryota 2759 D 1229925 1178 1231103 0.55928 SRR10002941 +Bacteria 2 D 967753 368 968121 0.43981 SRR10002941 +Archaea 2157 D 1033 0 1033 4.7e-4 SRR10002941 +Viruses 10239 D 948 23 971 4.4e-4 SRR10002941 +Bacteria 2 D 461966 277 462243 0.81007 SRR10002964 +Eukaryota 2759 D 107134 474 107608 0.18858 SRR10002964 +Archaea 2157 D 567 0 567 9.9e-4 SRR10002964 +Viruses 10239 D 177 24 201 3.5e-4 SRR10002964 +Bacteria 2 D 729337 248 729585 0.59731 SRR10002944 +Eukaryota 2759 D 489729 734 490463 0.40154 SRR10002944 +Archaea 2157 D 932 0 932 7.6e-4 SRR10002944 +Viruses 10239 D 457 2 459 3.8e-4 SRR10002944 +Bacteria 2 D 1657868 301 1658169 0.50933 SRR10002951 +Eukaryota 2759 D 1593950 1034 1594984 0.48992 SRR10002951 +Archaea 2157 D 1308 0 1308 4e-4 SRR10002951 +Viruses 10239 D 1145 9 1154 3.5e-4 SRR10002951 +Eukaryota 2759 D 1746378 558 1746936 0.87698 SRR10002904 +Bacteria 2 D 244509 41 244550 0.12277 SRR10002904 +Archaea 2157 D 390 0 390 2e-4 SRR10002904 +Viruses 10239 D 121 0 121 6e-5 SRR10002904 +Eukaryota 2759 D 1625888 1353 1627241 0.7188 SRR10002933 +Bacteria 2 D 634003 249 634252 0.28017 SRR10002933 +Archaea 2157 D 1332 0 1332 5.9e-4 SRR10002933 +Viruses 10239 D 968 30 998 4.4e-4 SRR10002933 +Bacteria 2 D 907459 280 907739 0.5395 SRR10002936 +Eukaryota 2759 D 771908 945 772853 0.45933 SRR10002936 +Archaea 2157 D 1404 0 1404 8.3e-4 SRR10002936 +Viruses 10239 D 470 81 551 3.3e-4 SRR10002936 +Eukaryota 2759 D 1067585 1184 1068769 0.6058 SRR10002920 +Bacteria 2 D 693783 229 694012 0.39338 SRR10002920 +Archaea 2157 D 842 0 842 4.8e-4 SRR10002920 +Viruses 10239 D 588 22 610 3.5e-4 SRR10002920 +Bacteria 2 D 570820 170 570990 0.65342 SRR10002930 +Eukaryota 2759 D 300993 728 301721 0.34528 SRR10002930 +Archaea 2157 D 790 0 790 9e-4 SRR10002930 +Viruses 10239 D 349 3 352 4e-4 SRR10002930 +Bacteria 2 D 450306 166 450472 0.553 SRR10002957 +Eukaryota 2759 D 361661 648 362309 0.44477 SRR10002957 +Archaea 2157 D 1586 0 1586 0.00195 SRR10002957 +Viruses 10239 D 232 4 236 2.9e-4 SRR10002957 +Bacteria 2 D 821467 213 821680 0.51001 SRR10002902 +Eukaryota 2759 D 785450 862 786312 0.48805 SRR10002902 +Archaea 2157 D 2823 0 2823 0.00175 SRR10002902 +Viruses 10239 D 299 4 303 1.9e-4 SRR10002902 +Eukaryota 2759 D 1480327 1477 1481804 0.66177 SRR10002888 +Bacteria 2 D 755453 275 755728 0.33751 SRR10002888 +Archaea 2157 D 868 0 868 3.9e-4 SRR10002888 +Viruses 10239 D 741 16 757 3.4e-4 SRR10002888 +Bacteria 2 D 1161403 524 1161927 0.60116 SRR10002912 +Eukaryota 2759 D 762970 1516 764486 0.39553 SRR10002912 +Archaea 2157 D 5818 2 5820 0.00301 SRR10002912 +Viruses 10239 D 503 66 569 2.9e-4 SRR10002912 +Bacteria 2 D 605185 171 605356 0.6208 SRR10002967 +Eukaryota 2759 D 367109 1109 368218 0.37761 SRR10002967 +Archaea 2157 D 1256 0 1256 0.00129 SRR10002967 +Viruses 10239 D 287 7 294 3e-4 SRR10002967 +Eukaryota 2759 D 925159 859 926018 0.63067 SRR10002884 +Bacteria 2 D 540682 225 540907 0.36839 SRR10002884 +Archaea 2157 D 888 0 888 6e-4 SRR10002884 +Viruses 10239 D 470 26 496 3.4e-4 SRR10002884 +Bacteria 2 D 693632 402 694034 0.64886 SRR10002903 +Eukaryota 2759 D 372944 761 373705 0.34938 SRR10002903 +Archaea 2157 D 1191 0 1191 0.00111 SRR10002903 +Viruses 10239 D 660 27 687 6.4e-4 SRR10002903 +Eukaryota 2759 D 939859 898 940757 0.59124 SRR10002961 +Bacteria 2 D 648622 217 648839 0.40778 SRR10002961 +Archaea 2157 D 988 0 988 6.2e-4 SRR10002961 +Viruses 10239 D 565 12 577 3.6e-4 SRR10002961 +Bacteria 2 D 664767 166 664933 0.55513 SRR10002931 +Eukaryota 2759 D 527663 781 528444 0.44118 SRR10002931 +Archaea 2157 D 4121 1 4122 0.00344 SRR10002931 +Viruses 10239 D 291 5 296 2.5e-4 SRR10002931 +Bacteria 2 D 35774 24 35798 0.7267 SRR10002878 +Eukaryota 2759 D 13409 17 13426 0.27255 SRR10002878 +Archaea 2157 D 15 0 15 3e-4 SRR10002878 +Viruses 10239 D 17 3 20 4.1e-4 SRR10002878 +Bacteria 2 D 508161 215 508376 0.53959 SRR10002901 +Eukaryota 2759 D 431912 511 432423 0.45898 SRR10002901 +Archaea 2157 D 686 0 686 7.3e-4 SRR10002901 +Viruses 10239 D 660 3 663 7e-4 SRR10002901 +Bacteria 2 D 905468 346 905814 0.61705 SRR10002940 +Eukaryota 2759 D 559799 986 560785 0.38201 SRR10002940 +Archaea 2157 D 887 0 887 6e-4 SRR10002940 +Viruses 10239 D 471 13 484 3.3e-4 SRR10002940 +Bacteria 2 D 250572 179 250751 0.76417 SRR10002948 +Eukaryota 2759 D 76255 512 76767 0.23395 SRR10002948 +Archaea 2157 D 451 0 451 0.00137 SRR10002948 +Viruses 10239 D 165 0 165 5e-4 SRR10002948 diff --git a/data/2024-04-12_leung/3/hv_clade_counts.tsv.gz b/data/2024-04-12_leung/3/hv_clade_counts.tsv.gz new file mode 100644 index 0000000..5e842ab Binary files /dev/null and b/data/2024-04-12_leung/3/hv_clade_counts.tsv.gz differ diff --git a/data/2024-04-12_leung/3/hv_hits_putative_filtered.tsv.gz b/data/2024-04-12_leung/3/hv_hits_putative_filtered.tsv.gz new file mode 100644 index 0000000..0dafad2 Binary files /dev/null and b/data/2024-04-12_leung/3/hv_hits_putative_filtered.tsv.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002875.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002875.report.gz new file mode 100644 index 0000000..cad958e Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002875.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002876.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002876.report.gz new file mode 100644 index 0000000..1eecb3b Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002876.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002877.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002877.report.gz new file mode 100644 index 0000000..a1c9e99 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002877.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002878.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002878.report.gz new file mode 100644 index 0000000..20f3992 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002878.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002879.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002879.report.gz new file mode 100644 index 0000000..4e87a4a Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002879.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002880.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002880.report.gz new file mode 100644 index 0000000..98ef2be Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002880.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002881.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002881.report.gz new file mode 100644 index 0000000..b8ea324 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002881.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002882.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002882.report.gz new file mode 100644 index 0000000..07fafe0 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002882.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002883.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002883.report.gz new file mode 100644 index 0000000..39541f1 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002883.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002884.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002884.report.gz new file mode 100644 index 0000000..8e5c7ec Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002884.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002885.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002885.report.gz new file mode 100644 index 0000000..6ac8435 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002885.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002886.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002886.report.gz new file mode 100644 index 0000000..143fe4c Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002886.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002887.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002887.report.gz new file mode 100644 index 0000000..2ba0d9a Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002887.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002888.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002888.report.gz new file mode 100644 index 0000000..072895b Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002888.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002889.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002889.report.gz new file mode 100644 index 0000000..546f0cb Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002889.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002890.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002890.report.gz new file mode 100644 index 0000000..2b93f52 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002890.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002891.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002891.report.gz new file mode 100644 index 0000000..455adbf Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002891.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002892.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002892.report.gz new file mode 100644 index 0000000..d4bbd18 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002892.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002893.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002893.report.gz new file mode 100644 index 0000000..5857ae5 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002893.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002894.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002894.report.gz new file mode 100644 index 0000000..b5b2433 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002894.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002895.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002895.report.gz new file mode 100644 index 0000000..10192a5 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002895.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002896.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002896.report.gz new file mode 100644 index 0000000..7bcb70f Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002896.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002897.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002897.report.gz new file mode 100644 index 0000000..5399a1b Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002897.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002898.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002898.report.gz new file mode 100644 index 0000000..ce9579b Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002898.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002899.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002899.report.gz new file mode 100644 index 0000000..39fa983 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002899.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002900.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002900.report.gz new file mode 100644 index 0000000..fab7a05 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002900.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002901.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002901.report.gz new file mode 100644 index 0000000..a085804 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002901.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002902.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002902.report.gz new file mode 100644 index 0000000..be5bef0 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002902.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002903.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002903.report.gz new file mode 100644 index 0000000..abed0a4 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002903.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002904.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002904.report.gz new file mode 100644 index 0000000..ffd07b9 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002904.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002905.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002905.report.gz new file mode 100644 index 0000000..5167f4a Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002905.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002906.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002906.report.gz new file mode 100644 index 0000000..4eb407c Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002906.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002907.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002907.report.gz new file mode 100644 index 0000000..890dbba Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002907.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002908.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002908.report.gz new file mode 100644 index 0000000..6e7190c Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002908.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002909.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002909.report.gz new file mode 100644 index 0000000..52fdb75 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002909.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002910.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002910.report.gz new file mode 100644 index 0000000..3aa6917 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002910.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002911.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002911.report.gz new file mode 100644 index 0000000..17f2bb8 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002911.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002912.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002912.report.gz new file mode 100644 index 0000000..22a3164 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002912.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002913.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002913.report.gz new file mode 100644 index 0000000..7c796f0 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002913.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002914.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002914.report.gz new file mode 100644 index 0000000..9a5a5c0 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002914.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002915.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002915.report.gz new file mode 100644 index 0000000..cdaffa3 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002915.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002916.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002916.report.gz new file mode 100644 index 0000000..70a7e4c Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002916.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002917.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002917.report.gz new file mode 100644 index 0000000..3a42bab Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002917.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002918.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002918.report.gz new file mode 100644 index 0000000..b2e4d89 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002918.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002919.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002919.report.gz new file mode 100644 index 0000000..6ed65f3 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002919.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002920.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002920.report.gz new file mode 100644 index 0000000..f5fa96c Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002920.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002921.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002921.report.gz new file mode 100644 index 0000000..5d88ce4 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002921.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002922.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002922.report.gz new file mode 100644 index 0000000..91696c6 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002922.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002923.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002923.report.gz new file mode 100644 index 0000000..70670aa Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002923.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002924.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002924.report.gz new file mode 100644 index 0000000..da511eb Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002924.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002925.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002925.report.gz new file mode 100644 index 0000000..d8b6f2b Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002925.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002926.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002926.report.gz new file mode 100644 index 0000000..68716c9 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002926.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002927.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002927.report.gz new file mode 100644 index 0000000..e1dbd03 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002927.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002928.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002928.report.gz new file mode 100644 index 0000000..e1c15f9 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002928.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002929.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002929.report.gz new file mode 100644 index 0000000..abad17b Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002929.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002930.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002930.report.gz new file mode 100644 index 0000000..06a1b53 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002930.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002931.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002931.report.gz new file mode 100644 index 0000000..769c4a1 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002931.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002932.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002932.report.gz new file mode 100644 index 0000000..8d6d08f Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002932.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002933.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002933.report.gz new file mode 100644 index 0000000..3106543 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002933.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002934.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002934.report.gz new file mode 100644 index 0000000..ac693b1 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002934.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002935.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002935.report.gz new file mode 100644 index 0000000..b42b9b7 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002935.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002936.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002936.report.gz new file mode 100644 index 0000000..cb92a82 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002936.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002937.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002937.report.gz new file mode 100644 index 0000000..1bfe6bf Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002937.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002938.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002938.report.gz new file mode 100644 index 0000000..f634675 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002938.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002939.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002939.report.gz new file mode 100644 index 0000000..923f640 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002939.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002940.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002940.report.gz new file mode 100644 index 0000000..fd73d8f Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002940.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002941.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002941.report.gz new file mode 100644 index 0000000..d720b08 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002941.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002942.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002942.report.gz new file mode 100644 index 0000000..ee5af1d Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002942.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002943.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002943.report.gz new file mode 100644 index 0000000..d84d584 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002943.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002944.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002944.report.gz new file mode 100644 index 0000000..9642324 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002944.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002945.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002945.report.gz new file mode 100644 index 0000000..7897331 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002945.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002946.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002946.report.gz new file mode 100644 index 0000000..3a376ac Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002946.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002947.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002947.report.gz new file mode 100644 index 0000000..4f87f59 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002947.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002948.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002948.report.gz new file mode 100644 index 0000000..02ffab0 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002948.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002949.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002949.report.gz new file mode 100644 index 0000000..fffa203 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002949.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002950.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002950.report.gz new file mode 100644 index 0000000..523b297 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002950.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002951.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002951.report.gz new file mode 100644 index 0000000..a25dce7 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002951.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002952.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002952.report.gz new file mode 100644 index 0000000..cabf81f Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002952.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002953.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002953.report.gz new file mode 100644 index 0000000..871d310 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002953.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002954.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002954.report.gz new file mode 100644 index 0000000..e718841 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002954.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002955.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002955.report.gz new file mode 100644 index 0000000..6773a88 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002955.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002956.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002956.report.gz new file mode 100644 index 0000000..3d77eec Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002956.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002957.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002957.report.gz new file mode 100644 index 0000000..bbc44e2 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002957.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002958.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002958.report.gz new file mode 100644 index 0000000..8c77a17 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002958.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002959.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002959.report.gz new file mode 100644 index 0000000..c004229 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002959.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002960.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002960.report.gz new file mode 100644 index 0000000..fbe8cde Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002960.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002961.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002961.report.gz new file mode 100644 index 0000000..f8ce0ec Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002961.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002962.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002962.report.gz new file mode 100644 index 0000000..c41c66a Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002962.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002963.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002963.report.gz new file mode 100644 index 0000000..e10b108 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002963.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002964.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002964.report.gz new file mode 100644 index 0000000..c556967 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002964.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002965.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002965.report.gz new file mode 100644 index 0000000..b0a5965 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002965.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002966.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002966.report.gz new file mode 100644 index 0000000..3748eb3 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002966.report.gz differ diff --git a/data/2024-04-12_leung/3/kraken/SRR10002967.report.gz b/data/2024-04-12_leung/3/kraken/SRR10002967.report.gz new file mode 100644 index 0000000..7087592 Binary files /dev/null and b/data/2024-04-12_leung/3/kraken/SRR10002967.report.gz differ diff --git a/data/2024-04-12_leung/3/qc_adapter_stats.tsv.gz b/data/2024-04-12_leung/3/qc_adapter_stats.tsv.gz new file mode 100644 index 0000000..799e8aa Binary files /dev/null and b/data/2024-04-12_leung/3/qc_adapter_stats.tsv.gz differ diff --git a/data/2024-04-12_leung/3/qc_basic_stats.tsv.gz b/data/2024-04-12_leung/3/qc_basic_stats.tsv.gz new file mode 100644 index 0000000..89643d8 Binary files /dev/null and b/data/2024-04-12_leung/3/qc_basic_stats.tsv.gz differ diff --git a/data/2024-04-12_leung/3/qc_quality_base_stats.tsv.gz b/data/2024-04-12_leung/3/qc_quality_base_stats.tsv.gz new file mode 100644 index 0000000..a727664 Binary files /dev/null and b/data/2024-04-12_leung/3/qc_quality_base_stats.tsv.gz differ diff --git a/data/2024-04-12_leung/3/qc_quality_sequence_stats.tsv.gz b/data/2024-04-12_leung/3/qc_quality_sequence_stats.tsv.gz new file mode 100644 index 0000000..64d2b47 Binary files /dev/null and b/data/2024-04-12_leung/3/qc_quality_sequence_stats.tsv.gz differ diff --git a/data/2024-04-12_leung/3/sample-metadata.csv b/data/2024-04-12_leung/3/sample-metadata.csv new file mode 100644 index 0000000..9ee2e03 --- /dev/null +++ b/data/2024-04-12_leung/3/sample-metadata.csv @@ -0,0 +1,94 @@ +sample,library,country,region,city,location,instrument,date +SRR10002875,SRR10002875,Hong Kong,Asia,Hong Kong,"Hong Kong, Yau Tong",SASS 3100 electret filter air sampler,2017-07-05 +SRR10002876,SRR10002876,Hong Kong,Asia,Hong Kong,"Hong Kong, Kowloon Tong",SASS 3100 electret filter air sampler,2017-07-13 +SRR10002877,SRR10002877,USA,North America,Denver,"Denver, Union Station",SASS 3100 electret filter air sampler,2017-06-26 +SRR10002878,SRR10002878,uncalculated,uncalculated,,"Denver, Union Station",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002879,SRR10002879,USA,North America,Denver,"Denver, Union Station",SASS 3100 electret filter air sampler,2017-06-26 +SRR10002880,SRR10002880,USA,North America,Denver,"Denver, Union Station",SASS 3100 electret filter air sampler,2017-06-26 +SRR10002881,SRR10002881,Sweden,Europe,Stockholm,"Stockholm, Fridhemsplan",SASS 3100 electret filter air sampler,2017-09-28 +SRR10002882,SRR10002882,Sweden,Europe,Stockholm,"Stockholm, Skanstull",SASS 3100 electret filter air sampler,2017-09-28 +SRR10002883,SRR10002883,Sweden,Europe,Stockholm,"Stockholm, Skanstull",SASS 3100 electret filter air sampler,2017-09-28 +SRR10002884,SRR10002884,Sweden,Europe,Stockholm,"Stockholm, Fridhemsplan",SASS 3100 electret filter air sampler,2017-09-28 +SRR10002885,SRR10002885,Sweden,Europe,Stockholm,"Stockholm, Slussen",SASS 3100 electret filter air sampler,2017-09-27 +SRR10002886,SRR10002886,Sweden,Europe,Stockholm,"Stockholm, Medborgarplatsen",SASS 3100 electret filter air sampler,2017-09-27 +SRR10002887,SRR10002887,Sweden,Europe,Stockholm,"Stockholm, Tekniska hoegskolan",SASS 3100 electret filter air sampler,2017-09-27 +SRR10002888,SRR10002888,Sweden,Europe,Stockholm,"Stockholm, T-centralen",SASS 3100 electret filter air sampler,2017-09-27 +SRR10002889,SRR10002889,Norway,Europe,Oslo,"Oslo, Groenland",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002890,SRR10002890,uncalculated,uncalculated,,"Oslo, Groenland",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002891,SRR10002891,Hong Kong,Asia,Hong Kong,"Hong Kong, Cheung Sha Wan",SASS 3100 electret filter air sampler,2017-07-04 +SRR10002892,SRR10002892,Hong Kong,Asia,Hong Kong,"Hong Kong, Ocean Park",SASS 3100 electret filter air sampler,2017-07-05 +SRR10002893,SRR10002893,Sweden,Europe,Stockholm,"Stockholm, Medborgarplatsen",SASS 3100 electret filter air sampler,2017-09-28 +SRR10002894,SRR10002894,Sweden,Europe,Stockholm,"Stockholm, Slussen",SASS 3100 electret filter air sampler,2017-09-28 +SRR10002895,SRR10002895,Hong Kong,Asia,Hong Kong,"Hong Kong, Sham Shui Po",SASS 3100 electret filter air sampler,2017-07-04 +SRR10002896,SRR10002896,USA,North America,Denver,"Denver, Denver Airport",SASS 3100 electret filter air sampler,2017-06-27 +SRR10002897,SRR10002897,Hong Kong,Asia,Hong Kong,"Hong Kong, Tsing Yi",SASS 3100 electret filter air sampler,2017-07-04 +SRR10002898,SRR10002898,Hong Kong,Asia,Hong Kong,"Hong Kong, Tai Wai",SASS 3100 electret filter air sampler,2017-07-13 +SRR10002899,SRR10002899,Hong Kong,Asia,Hong Kong,"Hong Kong, Lai King",SASS 3100 electret filter air sampler,2017-07-04 +SRR10002900,SRR10002900,Norway,Europe,Oslo,"Oslo, Nationaltheateret",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002901,SRR10002901,USA,North America,New York City,"New York City, 68th St.",SASS 3100 electret filter air sampler,2017-06-22 +SRR10002902,SRR10002902,USA,North America,New York City,"New York City, 63rd St.",SASS 3100 electret filter air sampler,2017-06-22 +SRR10002903,SRR10002903,Norway,Europe,Oslo,"Oslo, Ellingsrudaasen",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002904,SRR10002904,Norway,Europe,Oslo,"Oslo, Lindeberg",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002905,SRR10002905,USA,North America,New York City,"New York City, 68th St.",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002906,SRR10002906,USA,North America,New York City,"New York City, 57th St",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002907,SRR10002907,USA,North America,New York City,"New York City, 72nd St.",SASS 3100 electret filter air sampler,2017-06-22 +SRR10002908,SRR10002908,USA,North America,New York City,"New York City, 77th St.",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002909,SRR10002909,USA,North America,Denver,"Denver, Denver Airport",SASS 3100 electret filter air sampler,2017-06-29 +SRR10002910,SRR10002910,USA,North America,Denver,"Denver, Denver Airport",SASS 3100 electret filter air sampler,2017-06-29 +SRR10002911,SRR10002911,Norway,Europe,Oslo,"Oslo, Vestli",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002912,SRR10002912,Norway,Europe,Oslo,"Oslo, Romsaas",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002913,SRR10002913,USA,North America,Denver,"Denver, Union Station",SASS 3100 electret filter air sampler,2017-06-26 +SRR10002914,SRR10002914,Hong Kong,Asia,Hong Kong,"Hong Kong, Admiralty",SASS 3100 electret filter air sampler,2017-07-05 +SRR10002915,SRR10002915,Sweden,Europe,Stockholm,"Stockholm, Tekniska hoegskolan",SASS 3100 electret filter air sampler,2017-09-26 +SRR10002916,SRR10002916,Sweden,Europe,Stockholm,"Stockholm, T-centralen",SASS 3100 electret filter air sampler,2017-09-26 +SRR10002917,SRR10002917,Sweden,Europe,Stockholm,"Stockholm, Danderyd",SASS 3100 electret filter air sampler,2017-09-27 +SRR10002918,SRR10002918,Sweden,Europe,Stockholm,"Stockholm, Universitetet",SASS 3100 electret filter air sampler,2017-09-27 +SRR10002919,SRR10002919,Sweden,Europe,Stockholm,"Stockholm, Skanstull",SASS 3100 electret filter air sampler,2017-09-26 +SRR10002920,SRR10002920,Sweden,Europe,Stockholm,"Stockholm, Fridhemsplan",SASS 3100 electret filter air sampler,2017-09-26 +SRR10002921,SRR10002921,Sweden,Europe,Stockholm,"Stockholm, Slussen",SASS 3100 electret filter air sampler,2017-09-26 +SRR10002922,SRR10002922,Sweden,Europe,Stockholm,"Stockholm, Medborgarplatsen",SASS 3100 electret filter air sampler,2017-09-26 +SRR10002923,SRR10002923,Hong Kong,Asia,Hong Kong,"Hong Kong, Shek Kip Mei",SASS 3100 electret filter air sampler,2017-07-13 +SRR10002924,SRR10002924,Hong Kong,Asia,Hong Kong,"Hong Kong, Prince Edward",SASS 3100 electret filter air sampler,2017-07-13 +SRR10002925,SRR10002925,Hong Kong,Asia,Hong Kong,"Hong Kong, North Point",SASS 3100 electret filter air sampler,2017-07-05 +SRR10002926,SRR10002926,Sweden,Europe,Stockholm,"Stockholm, Universitetet",SASS 3100 electret filter air sampler,2017-09-27 +SRR10002927,SRR10002927,Sweden,Europe,Stockholm,"Stockholm, Danderyd",SASS 3100 electret filter air sampler,2017-09-27 +SRR10002928,SRR10002928,USA,North America,Denver,"Denver, Union Station",SASS 3100 electret filter air sampler,2017-06-26 +SRR10002929,SRR10002929,Hong Kong,Asia,Hong Kong,"Hong Kong, Sha Tin Wai",SASS 3100 electret filter air sampler,2017-07-13 +SRR10002930,SRR10002930,USA,North America,Denver,"Denver, Blake & 38th",SASS 3100 electret filter air sampler,2017-06-27 +SRR10002931,SRR10002931,Norway,Europe,Oslo,"Oslo, Jernbanetorget",SASS 3100 electret filter air sampler,2017-06-27 +SRR10002932,SRR10002932,Sweden,Europe,Stockholm,"Stockholm, Skanstull",SASS 3100 electret filter air sampler,2017-09-26 +SRR10002933,SRR10002933,Sweden,Europe,Stockholm,"Stockholm, Fridhemsplan",SASS 3100 electret filter air sampler,2017-09-26 +SRR10002934,SRR10002934,uncalculated,uncalculated,,"Stockholm, Fridhemsplan",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002935,SRR10002935,United Kingdom,Europe,London,"London, Euston",SASS 3100 electret filter air sampler,2017-08-21 +SRR10002936,SRR10002936,United Kingdom,Europe,London,"London, Euston",SASS 3100 electret filter air sampler,2017-07-23 +SRR10002937,SRR10002937,Sweden,Europe,Stockholm,"Stockholm, Danderyd",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002938,SRR10002938,Sweden,Europe,Stockholm,"Stockholm, Universitetet",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002939,SRR10002939,United Kingdom,Europe,London,"London, Brixton",SASS 3100 electret filter air sampler,2017-07-23 +SRR10002940,SRR10002940,United Kingdom,Europe,London,"London, Brixton",SASS 3100 electret filter air sampler,2017-08-21 +SRR10002941,SRR10002941,United Kingdom,Europe,London,"London, Oxford Circus",SASS 3100 electret filter air sampler,2017-08-18 +SRR10002942,SRR10002942,United Kingdom,Europe,London,"London, Oxford Circus",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002943,SRR10002943,United Kingdom,Europe,London,"London, Pimlico",SASS 3100 electret filter air sampler,2017-08-18 +SRR10002944,SRR10002944,United Kingdom,Europe,London,"London, Pimlico",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002945,SRR10002945,Hong Kong,Asia,Hong Kong,"Hong Kong, Shek Kip Mei",SASS 3100 electret filter air sampler,2017-07-11 +SRR10002946,SRR10002946,Hong Kong,Asia,Hong Kong,"Hong Kong, Prince Edward",SASS 3100 electret filter air sampler,2017-07-11 +SRR10002947,SRR10002947,Norway,Europe,Oslo,"Oslo, Montebello",SASS 3100 electret filter air sampler,2017-06-21 +SRR10002948,SRR10002948,USA,North America,Denver,"Denver, Union Station",SASS 3100 electret filter air sampler,2017-06-26 +SRR10002949,SRR10002949,Hong Kong,Asia,Hong Kong,"Hong Kong, Hung Hom",SASS 3100 electret filter air sampler,2017-07-07 +SRR10002950,SRR10002950,Hong Kong,Asia,Hong Kong,"Hong Kong, Mong Kok East",SASS 3100 electret filter air sampler,2017-07-07 +SRR10002951,SRR10002951,Hong Kong,Asia,Hong Kong,"Hong Kong, Tsim Sha Tsui",SASS 3100 electret filter air sampler,2017-07-07 +SRR10002952,SRR10002952,Hong Kong,Asia,Hong Kong,"Hong Kong, East Tsim Sha Tsui",SASS 3100 electret filter air sampler,2017-07-07 +SRR10002953,SRR10002953,Hong Kong,Asia,Hong Kong,"Hong Kong, Che Kung Temple",SASS 3100 electret filter air sampler,2017-07-11 +SRR10002954,SRR10002954,Hong Kong,Asia,Hong Kong,"Hong Kong, Sha Tin Wai",SASS 3100 electret filter air sampler,2017-07-11 +SRR10002955,SRR10002955,Hong Kong,Asia,Hong Kong,"Hong Kong, Kowloon Tong",SASS 3100 electret filter air sampler,2017-07-11 +SRR10002956,SRR10002956,Hong Kong,Asia,Hong Kong,"Hong Kong, Tai Wai",SASS 3100 electret filter air sampler,2017-07-11 +SRR10002957,SRR10002957,Norway,Europe,Oslo,"Oslo, Romsaas",SASS 3100 electret filter air sampler,2017-06-27 +SRR10002958,SRR10002958,Norway,Europe,Oslo,"Oslo, Vestli",SASS 3100 electret filter air sampler,2017-06-27 +SRR10002959,SRR10002959,USA,North America,Denver,"Denver, Boulder Central",SASS 3100 electret filter air sampler,2017-06-27 +SRR10002960,SRR10002960,Norway,Europe,Oslo,"Oslo, Lindeberg",SASS 3100 electret filter air sampler,2017-06-26 +SRR10002961,SRR10002961,Norway,Europe,Oslo,"Oslo, Ellingsrudaasen",SASS 3100 electret filter air sampler,2017-06-26 +SRR10002962,SRR10002962,Norway,Europe,Oslo,"Oslo, Toeyen",SASS 3100 electret filter air sampler,2017-06-26 +SRR10002963,SRR10002963,Norway,Europe,Oslo,"Oslo, Helsfyr",SASS 3100 electret filter air sampler,2017-06-26 +SRR10002964,SRR10002964,Norway,Europe,Oslo,"Oslo, Forskningsparken",SASS 3100 electret filter air sampler,2017-06-26 +SRR10002965,SRR10002965,Norway,Europe,Oslo,"Oslo, Majorstua",SASS 3100 electret filter air sampler,2017-06-26 +SRR10002966,SRR10002966,Norway,Europe,Oslo,"Oslo, Loeren",SASS 3100 electret filter air sampler,2017-06-26 +SRR10002967,SRR10002967,Norway,Europe,Oslo,"Oslo, Nydalen",SASS 3100 electret filter air sampler,2017-06-26 diff --git a/data/2024-04-12_leung/3/taxonomic_composition.tsv b/data/2024-04-12_leung/3/taxonomic_composition.tsv new file mode 100644 index 0000000..62bf5c7 --- /dev/null +++ b/data/2024-04-12_leung/3/taxonomic_composition.tsv @@ -0,0 +1,745 @@ +sample classification n_reads p_reads +SRR10002875 Filtered 368155 0.06675752505662481 +SRR10002875 Duplicate 515777 0.0935258138586486 +SRR10002875 Ribosomal 11500 0.002085294341109547 +SRR10002875 Unassigned 1957216 0.35490186514165767 +SRR10002875 Bacterial 1275312 0.23125225189122597 +SRR10002875 Archaeal 3906 7.082747562064253e-4 +SRR10002875 Viral 856 1.5521843095563236e-4 +SRR10002875 Human 1382087 0.2506137565235714 +SRR10002876 Filtered 341697 0.07645533230840074 +SRR10002876 Duplicate 451638 0.10105483329704824 +SRR10002876 Ribosomal 10493 0.0023478280520813732 +SRR10002876 Unassigned 1855389 0.415146701774822 +SRR10002876 Bacterial 945275 0.21150702010208902 +SRR10002876 Archaeal 1128 2.5239207497834644e-4 +SRR10002876 Viral 469 1.049396127347912e-4 +SRR10002876 Human 863148 0.19313095277784553 +SRR10002877 Filtered 230163 0.06481728415852542 +SRR10002877 Duplicate 236430 0.06658216348240233 +SRR10002877 Ribosomal 15734 0.004430925687231392 +SRR10002877 Unassigned 1787706 0.5034442885863534 +SRR10002877 Bacterial 481607 0.13562761074427668 +SRR10002877 Archaeal 383 1.0785843003747447e-4 +SRR10002877 Viral 502 1.4137057931804748e-4 +SRR10002877 Human 798426 0.22484849833185533 +SRR10002878 Filtered 308735 0.7826438550283032 +SRR10002878 Duplicate 7557 0.019157010421393388 +SRR10002878 Ribosomal 510 0.0012928510407450879 +SRR10002878 Unassigned 28416 0.07203461798786748 +SRR10002878 Bacterial 35798 0.09074800305214245 +SRR10002878 Archaeal 15 3.8025030610149644e-5 +SRR10002878 Viral 20 5.070004081353286e-5 +SRR10002878 Human 13426 0.034034937398124604 +SRR10002879 Filtered 375141 0.08798707756276868 +SRR10002879 Duplicate 234914 0.055097673510973846 +SRR10002879 Ribosomal 32051 0.007517370329994052 +SRR10002879 Unassigned 3121716 0.7321798145788809 +SRR10002879 Bacterial 441071 0.10345056468817841 +SRR10002879 Archaeal 776 1.8200615818774404e-4 +SRR10002879 Viral 298 6.989411744838624e-5 +SRR10002879 Human 57625 0.013515599053567978 +SRR10002880 Filtered 299773 0.06738081478789203 +SRR10002880 Duplicate 289019 0.06496360816078088 +SRR10002880 Ribosomal 19468 0.004375876754379754 +SRR10002880 Unassigned 1966614 0.44204132357909315 +SRR10002880 Bacterial 564922 0.12697909635492702 +SRR10002880 Archaeal 473 1.0631753158113949e-4 +SRR10002880 Viral 568 1.2767094701498358e-4 +SRR10002880 Human 1308100 0.294025291884331 +SRR10002881 Filtered 426429 0.09457795130675706 +SRR10002881 Duplicate 227691 0.05049972753022618 +SRR10002881 Ribosomal 14223 0.0031545279552657197 +SRR10002881 Unassigned 2325536 0.51578206587758 +SRR10002881 Bacterial 710111 0.15749595731151622 +SRR10002881 Archaeal 1377 3.0540568054565815e-4 +SRR10002881 Viral 451 1.0002756857377766e-4 +SRR10002881 Human 802939 0.17808433676953536 +SRR10002882 Filtered 295345 0.07610052602625364 +SRR10002882 Duplicate 217922 0.056151209035850434 +SRR10002882 Ribosomal 10364 0.0026704560826697346 +SRR10002882 Unassigned 2008478 0.5175175889625958 +SRR10002882 Bacterial 659490 0.16992851041681428 +SRR10002882 Archaeal 644 1.6593725561938529e-4 +SRR10002882 Viral 652 1.6799858798732795e-4 +SRR10002882 Human 688090 0.17729777363220936 +SRR10002883 Filtered 478341 0.0759836360875292 +SRR10002883 Duplicate 368515 0.058537966958290895 +SRR10002883 Ribosomal 15717 0.002496618120520082 +SRR10002883 Unassigned 3031603 0.4815648650520482 +SRR10002883 Bacterial 972555 0.15448867062431815 +SRR10002883 Archaeal 926 1.4709348982640426e-4 +SRR10002883 Viral 937 1.4884082069907214e-4 +SRR10002883 Human 1426722 0.22663230884676797 +SRR10002884 Filtered 343008 0.08889047834142011 +SRR10002884 Duplicate 250089 0.06481053164336521 +SRR10002884 Ribosomal 14589 0.00378073744205085 +SRR10002884 Unassigned 1782776 0.4620061672485877 +SRR10002884 Bacterial 540907 0.14017597831019254 +SRR10002884 Archaeal 888 2.3012508386737642e-4 +SRR10002884 Viral 496 1.2853833513312918e-4 +SRR10002884 Human 926018 0.2399774435953831 +SRR10002885 Filtered 417525 0.08088661924831286 +SRR10002885 Duplicate 278810 0.05401352808244323 +SRR10002885 Ribosomal 13577 0.002630255983556299 +SRR10002885 Unassigned 2734783 0.5298062421358213 +SRR10002885 Bacterial 850177 0.16470377412771184 +SRR10002885 Archaeal 1087 2.105832108805846e-4 +SRR10002885 Viral 551 1.0674457147672687e-4 +SRR10002885 Human 865345 0.16764225263979712 +SRR10002886 Filtered 278980 0.0694535382268311 +SRR10002886 Duplicate 236314 0.05883161313547697 +SRR10002886 Ribosomal 9121 0.002270720919660644 +SRR10002886 Unassigned 1956765 0.4871469378752067 +SRR10002886 Bacterial 640402 0.15943144593712485 +SRR10002886 Archaeal 637 1.585845001451409e-4 +SRR10002886 Viral 535 1.3319106370117802e-4 +SRR10002886 Human 894032 0.22257396834185342 +SRR10002887 Filtered 342376 0.08195532690714485 +SRR10002887 Duplicate 236216 0.056543564679469735 +SRR10002887 Ribosomal 10041 0.002403537156443914 +SRR10002887 Unassigned 2370742 0.5674899397811132 +SRR10002887 Bacterial 516540 0.12364536229355037 +SRR10002887 Archaeal 2890 6.91785915956868e-4 +SRR10002887 Viral 437 1.0460569040593471e-4 +SRR10002887 Human 698351 0.16716587757591514 +SRR10002888 Filtered 391314 0.06712359854697103 +SRR10002888 Duplicate 299387 0.05135500594965173 +SRR10002888 Ribosomal 14493 0.0024860401461262597 +SRR10002888 Unassigned 2885402 0.49494412542006494 +SRR10002888 Bacterial 755728 0.12963293642114854 +SRR10002888 Archaeal 868 1.488913852782442e-4 +SRR10002888 Viral 757 1.298511274834457e-4 +SRR10002888 Human 1481804 0.25417955100327577 +SRR10002889 Filtered 625176 0.07938447496493146 +SRR10002889 Duplicate 745528 0.09466670001992307 +SRR10002889 Ribosomal 24499 0.0031108683829287366 +SRR10002889 Unassigned 3978860 0.5052332656067526 +SRR10002889 Bacterial 1693075 0.2149856519624095 +SRR10002889 Archaeal 10479 0.0013306171592599792 +SRR10002889 Viral 519 6.59023099204055e-5 +SRR10002889 Human 797157 0.10122251959387416 +SRR10002890 Filtered 820411 0.8870276646423015 +SRR10002890 Duplicate 10661 0.011526663992500803 +SRR10002890 Ribosomal 778 8.41172928071065e-4 +SRR10002890 Unassigned 22581 0.02441455769765131 +SRR10002890 Bacterial 54159 0.058556664024936776 +SRR10002890 Archaeal 8 8.649593090705038e-6 +SRR10002890 Viral 37 4.000436804451081e-5 +SRR10002890 Human 16264 0.017584622753403344 +SRR10002891 Filtered 241025 0.061437352380845564 +SRR10002891 Duplicate 329525 0.08399603171164043 +SRR10002891 Ribosomal 8476 0.002160535209127879 +SRR10002891 Unassigned 777202 0.1981090473813834 +SRR10002891 Bacterial 522386 0.13315636452990517 +SRR10002891 Archaeal 838 2.1360647773114235e-4 +SRR10002891 Viral 386 9.839152792866461e-5 +SRR10002891 Human 2043264 0.5208286707814378 +SRR10002892 Filtered 444299 0.07934420784193269 +SRR10002892 Duplicate 489961 0.08749866062818323 +SRR10002892 Ribosomal 11872 0.0021201362944760736 +SRR10002892 Unassigned 1501770 0.26819045510068507 +SRR10002892 Bacterial 1295961 0.23143648520262017 +SRR10002892 Archaeal 1742 3.110914273060411e-4 +SRR10002892 Viral 1516 0.00027073168989435033 +SRR10002892 Human 1852519 0.3308282318149024 +SRR10002893 Filtered 405115 0.10871327004170721 +SRR10002893 Duplicate 334907 0.08987283889724655 +SRR10002893 Ribosomal 8603 0.002308629061300636 +SRR10002893 Unassigned 1751713 0.4700750364824039 +SRR10002893 Bacterial 610075 0.16371461985039934 +SRR10002893 Archaeal 602 1.6154768044902742e-4 +SRR10002893 Viral 474 1.271986719814601e-4 +SRR10002893 Human 614965 0.16502685931451186 +SRR10002894 Filtered 512823 0.11766005569788235 +SRR10002894 Duplicate 480245 0.11018548982520189 +SRR10002894 Ribosomal 10536 0.002417337652236519 +SRR10002894 Unassigned 2005684 0.46017610589297175 +SRR10002894 Bacterial 483662 0.11096947262300867 +SRR10002894 Archaeal 593 1.360555455368504e-4 +SRR10002894 Viral 518 1.188478458483786e-4 +SRR10002894 Human 864453 0.19833663491731357 +SRR10002895 Filtered 303666 0.05398355992552741 +SRR10002895 Duplicate 533309 0.09480784269665718 +SRR10002895 Ribosomal 15457 0.0027478344159994113 +SRR10002895 Unassigned 1695850 0.3014760299134762 +SRR10002895 Bacterial 1262439 0.224427335983689 +SRR10002895 Archaeal 1235 2.19549427687085e-4 +SRR10002895 Viral 995 1.7688395186125471e-4 +SRR10002895 Human 1812206 0.3221609636851025 +SRR10002896 Filtered 320427 0.08970181698060124 +SRR10002896 Duplicate 230731 0.0645919037214439 +SRR10002896 Ribosomal 10251 0.002869712370893037 +SRR10002896 Unassigned 1217516 0.34083706242905154 +SRR10002896 Bacterial 777374 0.2176216744327972 +SRR10002896 Archaeal 460 1.2877452839828282e-4 +SRR10002896 Viral 454 1.270948606365661e-4 +SRR10002896 Human 1014922 0.28412196067617823 +SRR10002897 Filtered 246400 0.06358325716588985 +SRR10002897 Duplicate 378640 0.09770764810589502 +SRR10002897 Ribosomal 11063 0.0028547953491324654 +SRR10002897 Unassigned 1203432 0.31054434390284563 +SRR10002897 Bacterial 1007135 0.2598901124422422 +SRR10002897 Archaeal 1087 2.804991905004962e-4 +SRR10002897 Viral 932 2.4050160583851193e-4 +SRR10002897 Human 1026545 0.26489884223765586 +SRR10002898 Filtered 381478 0.07668055170567283 +SRR10002898 Duplicate 548167 0.11018655856128938 +SRR10002898 Ribosomal 10189 0.0020480817801527226 +SRR10002898 Unassigned 2353351 0.47304498041065757 +SRR10002898 Bacterial 1218141 0.24485743328658532 +SRR10002898 Archaeal 1916 3.8513344693027937e-4 +SRR10002898 Viral 337 6.774006869285186e-5 +SRR10002898 Human 461320 0.09272952074001904 +SRR10002899 Filtered 307634 0.06638172002870757 +SRR10002899 Duplicate 482336 0.10407917626714437 +SRR10002899 Ribosomal 12601 0.002719062438097688 +SRR10002899 Unassigned 1502500 0.32421167472754353 +SRR10002899 Bacterial 1043899 0.22525407190443125 +SRR10002899 Archaeal 1228 2.6497965827981595e-4 +SRR10002899 Viral 905 1.9528224001891972e-4 +SRR10002899 Human 1283215 0.2768940327357769 +SRR10002900 Filtered 354543 0.06876146814790708 +SRR10002900 Duplicate 474744 0.09207372429976053 +SRR10002900 Ribosomal 15789 0.00306218095008872 +SRR10002900 Unassigned 2769642 0.537155296153374 +SRR10002900 Bacterial 959936 0.1861737749385246 +SRR10002900 Archaeal 5574 0.001081043550306829 +SRR10002900 Viral 338 6.555305346316975e-5 +SRR10002900 Human 575563 0.11162695890657506 +SRR10002901 Filtered 273454 0.0982579015696241 +SRR10002901 Duplicate 161194 0.05792046993503108 +SRR10002901 Ribosomal 9016 0.003239642647581425 +SRR10002901 Unassigned 1397211 0.502047952891514 +SRR10002901 Bacterial 508376 0.18267042708594217 +SRR10002901 Archaeal 686 2.464945492724997e-4 +SRR10002901 Viral 663 2.3823015476336345e-4 +SRR10002901 Human 432423 0.15537888116627135 +SRR10002902 Filtered 404286 0.09510987064327364 +SRR10002902 Duplicate 231120 0.05437188847269855 +SRR10002902 Ribosomal 11537 0.0027141245989508616 +SRR10002902 Unassigned 1992665 0.4687822738986234 +SRR10002902 Bacterial 821680 0.19330344981069117 +SRR10002902 Archaeal 2823 6.641218464798719e-4 +SRR10002902 Viral 303 7.128194101431143e-5 +SRR10002902 Human 786312 0.18498298878826816 +SRR10002903 Filtered 356592 0.08781410376382746 +SRR10002903 Duplicate 394870 0.09724041804982318 +SRR10002903 Ribosomal 13473 0.0033178518306917915 +SRR10002903 Unassigned 2226208 0.5482244702962007 +SRR10002903 Bacterial 694034 0.170912341532127 +SRR10002903 Archaeal 1191 2.9329485121011833e-4 +SRR10002903 Viral 687 1.691801534688088e-4 +SRR10002903 Human 373705 0.09202833952265094 +SRR10002904 Filtered 238480 0.06718425891519735 +SRR10002904 Duplicate 293916 0.08280161289550127 +SRR10002904 Ribosomal 8062 0.0022712155961687394 +SRR10002904 Unassigned 1017186 0.2865602465150701 +SRR10002904 Bacterial 244550 0.06889429100013213 +SRR10002904 Archaeal 390 1.0987026575363537e-4 +SRR10002904 Viral 121 3.408795424664072e-5 +SRR10002904 Human 1746936 0.49214441685793014 +SRR10002905 Filtered 358440 0.08407372424034239 +SRR10002905 Duplicate 205246 0.048141378209556177 +SRR10002905 Ribosomal 15194 0.0035638214655388974 +SRR10002905 Unassigned 2385302 0.5594833795835766 +SRR10002905 Bacterial 897866 0.21059853389348082 +SRR10002905 Archaeal 1259 2.95304148026423e-4 +SRR10002905 Viral 383 8.983438339485308e-5 +SRR10002905 Human 399711 0.09375402407608385 +SRR10002906 Filtered 441970 0.07880384053941146 +SRR10002906 Duplicate 305462 0.05446428205274047 +SRR10002906 Ribosomal 20011 0.0035679879924749706 +SRR10002906 Unassigned 2924224 0.52139303979347 +SRR10002906 Bacterial 1236976 0.22055447079005142 +SRR10002906 Archaeal 1926 3.4340836907234985e-4 +SRR10002906 Viral 581 1.0359307499015331e-4 +SRR10002906 Human 677333 0.12076937738778917 +SRR10002907 Filtered 449083 0.0914626720005279 +SRR10002907 Duplicate 254037 0.05173854901432053 +SRR10002907 Ribosomal 13120 0.0026720901406798434 +SRR10002907 Unassigned 2525025 0.5142602444718081 +SRR10002907 Bacterial 1218228 0.24811090151677775 +SRR10002907 Archaeal 1128 2.2973457916820604e-4 +SRR10002907 Viral 766 1.5600770181103352e-4 +SRR10002907 Human 448627 0.0913698005749067 +SRR10002908 Filtered 513627 0.08069457154864633 +SRR10002908 Duplicate 341063 0.05358350058718868 +SRR10002908 Ribosomal 9718 0.001526769126836683 +SRR10002908 Unassigned 3694662 0.5804585177708039 +SRR10002908 Bacterial 1689393 0.2654160398738428 +SRR10002908 Archaeal 2048 3.2175583162806406e-4 +SRR10002908 Viral 426 6.692772669607192e-5 +SRR10002908 Human 114138 0.01793191753435741 +SRR10002909 Filtered 355749 0.08770484346582903 +SRR10002909 Duplicate 234389 0.057785265890029774 +SRR10002909 Ribosomal 15425 0.0038028138110308475 +SRR10002909 Unassigned 2049394 0.5052488692021881 +SRR10002909 Bacterial 774472 0.19093502871031975 +SRR10002909 Archaeal 1162 2.864745315019672e-4 +SRR10002909 Viral 464 1.1439258400767022e-4 +SRR10002909 Human 625152 0.15412231180509278 +SRR10002910 Filtered 411090 0.10255531348486742 +SRR10002910 Duplicate 318191 0.07937964375942846 +SRR10002910 Ribosomal 15573 0.003885022493614149 +SRR10002910 Unassigned 1737805 0.433533135202924 +SRR10002910 Bacterial 837194 0.2088561947934761 +SRR10002910 Archaeal 621 1.5492191411638003e-4 +SRR10002910 Viral 686 1.7113757340392384e-4 +SRR10002910 Human 687311 0.17146463077816954 +SRR10002911 Filtered 537777 0.07454250663987567 +SRR10002911 Duplicate 675995 0.09370122146544527 +SRR10002911 Ribosomal 18760 0.0026003667404222715 +SRR10002911 Unassigned 4004665 0.5550958247618952 +SRR10002911 Bacterial 1406337 0.19493560557703815 +SRR10002911 Archaeal 2946 4.0835183461002193e-4 +SRR10002911 Viral 407 5.641520593560045e-5 +SRR10002911 Human 567480 0.07865970777477775 +SRR10002912 Filtered 653552 0.09457796620152288 +SRR10002912 Duplicate 715881 0.1035978300461362 +SRR10002912 Ribosomal 22971 0.0033242197432112246 +SRR10002912 Unassigned 3584987 0.518796942429828 +SRR10002912 Bacterial 1161927 0.1681468231060985 +SRR10002912 Archaeal 5820 8.422340736358594e-4 +SRR10002912 Viral 569 8.234212850494913e-5 +SRR10002912 Human 764486 0.11063164227106247 +SRR10002913 Filtered 429873 0.10074774568912481 +SRR10002913 Duplicate 241266 0.056544620414476804 +SRR10002913 Ribosomal 11886 0.0027856778752350987 +SRR10002913 Unassigned 1334925 0.3128614367826194 +SRR10002913 Bacterial 499284 0.11701534513367667 +SRR10002913 Archaeal 432 1.0124624281520803e-4 +SRR10002913 Viral 425 9.960567869551716e-5 +SRR10002913 Human 1748734 0.4098443221833565 +SRR10002914 Filtered 452490 0.07114177446128371 +SRR10002914 Duplicate 589373 0.09266291197500534 +SRR10002914 Ribosomal 13759 0.0021632294079710107 +SRR10002914 Unassigned 1753511 0.27569202430413947 +SRR10002914 Bacterial 1746682 0.2746183493548674 +SRR10002914 Archaeal 1249 1.963713591507953e-4 +SRR10002914 Viral 1215 1.9102578171994897e-4 +SRR10002914 Human 1802119 0.28333431335586234 +SRR10002915 Filtered 486379 0.08997775803262834 +SRR10002915 Duplicate 293634 0.054320867064887236 +SRR10002915 Ribosomal 22660 0.004191990190816951 +SRR10002915 Unassigned 3302461 0.6109392814455226 +SRR10002915 Bacterial 664801 0.1229849634088835 +SRR10002915 Archaeal 5184 9.59014878605255e-4 +SRR10002915 Viral 449 8.306282416932088e-5 +SRR10002915 Human 629979 0.11654306215448686 +SRR10002916 Filtered 416420 0.07181152071181734 +SRR10002916 Duplicate 324743 0.05600184590201647 +SRR10002916 Ribosomal 13334 0.0022994448325521647 +SRR10002916 Unassigned 2684930 0.46301548029580647 +SRR10002916 Bacterial 824380 0.14216411662362033 +SRR10002916 Archaeal 1189 2.05042740805799e-4 +SRR10002916 Viral 729 1.2571586042676826e-4 +SRR10002916 Human 1533066 0.2643768330329546 +SRR10002917 Filtered 408928 0.08059258792100536 +SRR10002917 Duplicate 252449 0.04975330187238311 +SRR10002917 Ribosomal 16331 0.003218555719681554 +SRR10002917 Unassigned 3197660 0.6302031034594892 +SRR10002917 Bacterial 619121 0.12201796801940869 +SRR10002917 Archaeal 1291 2.544336191359308e-4 +SRR10002917 Viral 436 8.592800770198748e-5 +SRR10002917 Human 577799 0.11387412138119418 +SRR10002918 Filtered 460344 0.10031715758465126 +SRR10002918 Duplicate 238707 0.05201850732399977 +SRR10002918 Ribosomal 14105 0.003073730748595629 +SRR10002918 Unassigned 2611927 0.5691854188576487 +SRR10002918 Bacterial 698856 0.15229317093516814 +SRR10002918 Archaeal 2532 5.517679018393571e-4 +SRR10002918 Viral 424 9.23971525986917e-5 +SRR10002918 Human 561991 0.12246784949549847 +SRR10002919 Filtered 410129 0.08378559921160078 +SRR10002919 Duplicate 280921 0.05738958794945518 +SRR10002919 Ribosomal 14580 0.0029785604931744387 +SRR10002919 Unassigned 2452789 0.501082332887026 +SRR10002919 Bacterial 661329 0.1351034590116981 +SRR10002919 Archaeal 551 1.1256425457744277e-4 +SRR10002919 Viral 764 1.560782041690858e-4 +SRR10002919 Human 1073919 0.21939181798829904 +SRR10002920 Filtered 344098 0.07656274231937285 +SRR10002920 Duplicate 278880 0.06205155966621921 +SRR10002920 Ribosomal 12644 0.0028133244421244827 +SRR10002920 Unassigned 2094472 0.46602572531994224 +SRR10002920 Bacterial 694012 0.15441956048146918 +SRR10002920 Archaeal 842 1.8734729359924188e-4 +SRR10002920 Viral 610 1.3572666163365506e-4 +SRR10002920 Human 1068769 0.23780401381563915 +SRR10002921 Filtered 374987 0.06128053633277367 +SRR10002921 Duplicate 370618 0.060566552479365716 +SRR10002921 Ribosomal 14899 0.0024348009686255656 +SRR10002921 Unassigned 3612113 0.590293055318142 +SRR10002921 Bacterial 885433 0.1446978405297698 +SRR10002921 Archaeal 1006 1.6440095136836827e-4 +SRR10002921 Viral 1918 3.1344038242995067e-4 +SRR10002921 Human 858212 0.14024937303752494 +SRR10002922 Filtered 544174 0.08599686938739712 +SRR10002922 Duplicate 385948 0.06099210867540004 +SRR10002922 Ribosomal 14716 0.0023255979335744374 +SRR10002922 Unassigned 3329493 0.5261662164073494 +SRR10002922 Bacterial 977141 0.15441948154463572 +SRR10002922 Archaeal 1007 1.5913815704739456e-4 +SRR10002922 Viral 655 1.0351091645088723e-4 +SRR10002922 Human 1074701 0.16983707697814499 +SRR10002923 Filtered 334124 0.06962663134669612 +SRR10002923 Duplicate 425576 0.08868391154781324 +SRR10002923 Ribosomal 8891 0.0018527563997302657 +SRR10002923 Unassigned 1086384 0.22638678535199247 +SRR10002923 Bacterial 1111323 0.23158371391490698 +SRR10002923 Archaeal 1754 3.6550834834404297e-4 +SRR10002923 Viral 554 1.1544562427742292e-4 +SRR10002923 Human 1830190 0.3813852474662395 +SRR10002924 Filtered 315088 0.060901127477234654 +SRR10002924 Duplicate 475817 0.09196729730372265 +SRR10002924 Ribosomal 11396 0.0022026521122053716 +SRR10002924 Unassigned 1682176 0.3251358827221115 +SRR10002924 Bacterial 1509173 0.291697358383057 +SRR10002924 Archaeal 1627 3.144713045417813e-4 +SRR10002924 Viral 851 1.644837616257258e-4 +SRR10002924 Human 1177635 0.2276167269355013 +SRR10002925 Filtered 548664 0.08104980790954443 +SRR10002925 Duplicate 589008 0.08700950902042953 +SRR10002925 Ribosomal 16526 0.0024412557148147706 +SRR10002925 Unassigned 1957912 0.2892269066382922 +SRR10002925 Bacterial 1700766 0.2512407549959251 +SRR10002925 Archaeal 1666 2.461050478567958e-4 +SRR10002925 Viral 2321 3.428630348593176e-4 +SRR10002925 Human 1952604 0.28844279763827785 +SRR10002926 Filtered 350157 0.11391694688515033 +SRR10002926 Duplicate 157511 0.05124322010077455 +SRR10002926 Ribosomal 10647 0.003463799762638461 +SRR10002926 Unassigned 1575694 0.5126221943449655 +SRR10002926 Bacterial 383093 0.12463205057466478 +SRR10002926 Archaeal 794 2.583128591654868e-4 +SRR10002926 Viral 319 1.037806071458316e-4 +SRR10002926 Human 595577 0.19375969486549513 +SRR10002927 Filtered 428564 0.07151349517610468 +SRR10002927 Duplicate 354935 0.0592271922287703 +SRR10002927 Ribosomal 14782 0.0024666385550190387 +SRR10002927 Unassigned 2789984 0.4655582534356811 +SRR10002927 Bacterial 762255 0.12719574967907166 +SRR10002927 Archaeal 831 1.3866707070902593e-4 +SRR10002927 Viral 829 1.3833333528012333e-4 +SRR10002927 Human 1640591 0.27376167051936406 +SRR10002928 Filtered 423704 0.0854564809365474 +SRR10002928 Duplicate 371813 0.07499063154102982 +SRR10002928 Ribosomal 18608 0.003753030883039277 +SRR10002928 Unassigned 2669142 0.538336863564984 +SRR10002928 Bacterial 882465 0.1779835768594828 +SRR10002928 Archaeal 968 1.9523505453471736e-4 +SRR10002928 Viral 649 1.308962297448673e-4 +SRR10002928 Human 590777 0.1191532849306371 +SRR10002929 Filtered 487921 0.08144298268455498 +SRR10002929 Duplicate 533186 0.088998543136383 +SRR10002929 Ribosomal 12079 0.0020162071069839984 +SRR10002929 Unassigned 1969832 0.3288011654908936 +SRR10002929 Bacterial 1222132 0.20399629307662623 +SRR10002929 Archaeal 1613 2.6923934626750474e-4 +SRR10002929 Viral 633 1.0565933427608834e-4 +SRR10002929 Human 1763556 0.2943699098240146 +SRR10002930 Filtered 513170 0.11614808524391732 +SRR10002930 Duplicate 293199 0.06636105470980633 +SRR10002930 Ribosomal 20457 0.004630125260312989 +SRR10002930 Unassigned 2717560 0.6150776361351208 +SRR10002930 Bacterial 570990 0.12923474714699681 +SRR10002930 Archaeal 790 1.788042702081078e-4 +SRR10002930 Viral 352 7.966975077627082e-5 +SRR10002930 Human 301721 0.06828987748286139 +SRR10002931 Filtered 390406 0.1041114871820035 +SRR10002931 Duplicate 206432 0.055050236220640425 +SRR10002931 Ribosomal 11782 0.00314196385808201 +SRR10002931 Unassigned 1943469 0.5182744319557618 +SRR10002931 Bacterial 664933 0.17732095179477553 +SRR10002931 Archaeal 4122 0.0010992340029718252 +SRR10002931 Viral 296 7.89357750799758e-5 +SRR10002931 Human 528444 0.14092275921068492 +SRR10002932 Filtered 486220 0.06534619560206745 +SRR10002932 Duplicate 464326 0.06240372181137257 +SRR10002932 Ribosomal 22105 0.002970831421545187 +SRR10002932 Unassigned 3716216 0.4994458838294037 +SRR10002932 Bacterial 921528 0.12385000399157174 +SRR10002932 Archaeal 774 1.040227785693723e-4 +SRR10002932 Viral 1454 1.9541229979311026e-4 +SRR10002932 Human 1828055 0.24568392826567687 +SRR10002933 Filtered 365978 0.06995283824899431 +SRR10002933 Duplicate 297614 0.05688578002676717 +SRR10002933 Ribosomal 14429 0.002757951306075062 +SRR10002933 Unassigned 2289938 0.43769751874218 +SRR10002933 Bacterial 634252 0.12123058644263082 +SRR10002933 Archaeal 1332 2.545977642034779e-4 +SRR10002933 Viral 998 1.907571836899932e-4 +SRR10002933 Human 1627241 0.31102997028545915 +SRR10002934 Filtered 199587 0.1505318358613418 +SRR10002934 Duplicate 43468 0.032784288762398375 +SRR10002934 Ribosomal 12597 0.009500866972023842 +SRR10002934 Unassigned 132884 0.10022332354611545 +SRR10002934 Bacterial 924236 0.6970741674013994 +SRR10002934 Archaeal 18 1.3575899459905467e-5 +SRR10002934 Viral 584 4.4046251581026627e-4 +SRR10002934 Human 12505 0.009431479041450992 +SRR10002935 Filtered 338403 0.06333942951220649 +SRR10002935 Duplicate 332895 0.06230848836288679 +SRR10002935 Ribosomal 13827 0.002588021654256254 +SRR10002935 Unassigned 2679529 0.5015317187537142 +SRR10002935 Bacterial 1009275 0.1889076122875158 +SRR10002935 Archaeal 1973 3.6928955838920875e-4 +SRR10002935 Viral 631 1.1810527691008146e-4 +SRR10002935 Human 966158 0.1808373345941212 +SRR10002936 Filtered 298429 0.06703020580810605 +SRR10002936 Duplicate 233791 0.052511849874117195 +SRR10002936 Ribosomal 11377 0.0025553905668645556 +SRR10002936 Unassigned 2226013 0.49998528803004927 +SRR10002936 Bacterial 907739 0.20388746398655752 +SRR10002936 Archaeal 1404 3.1535276047093576e-4 +SRR10002936 Viral 551 1.237602357688644e-4 +SRR10002936 Human 772853 0.17359068873806563 +SRR10002937 Filtered 478281 0.15309802952479787 +SRR10002937 Duplicate 156974 0.050247469764898925 +SRR10002937 Ribosomal 11145 0.0035675210578172086 +SRR10002937 Unassigned 1601987 0.5127969813234111 +SRR10002937 Bacterial 417150 0.1335299604547733 +SRR10002937 Archaeal 1832 5.864242779651077e-4 +SRR10002937 Viral 366 1.1715681535765799e-4 +SRR10002937 Human 456283 0.14605645678097887 +SRR10002938 Filtered 458932 0.09844844569988982 +SRR10002938 Duplicate 255435 0.05479499953664455 +SRR10002938 Ribosomal 15294 0.0032808139953939036 +SRR10002938 Unassigned 2562450 0.5496875782984901 +SRR10002938 Bacterial 701418 0.15046567222578797 +SRR10002938 Archaeal 20976 0.004499696244761509 +SRR10002938 Viral 415 8.902431071586701e-5 +SRR10002938 Human 646728 0.13873376968831624 +SRR10002939 Filtered 507485 0.0658842972631568 +SRR10002939 Duplicate 573818 0.07449598645664425 +SRR10002939 Ribosomal 29620 0.0038454198349403518 +SRR10002939 Unassigned 4350447 0.5647972715954338 +SRR10002939 Bacterial 1326487 0.17221132412527085 +SRR10002939 Archaeal 1252 1.6254104096371778e-4 +SRR10002939 Viral 679 8.815125144917282e-5 +SRR10002939 Human 912882 0.11851500843214106 +SRR10002940 Filtered 361677 0.06801271582059072 +SRR10002940 Duplicate 341441 0.06420737205434217 +SRR10002940 Ribosomal 19310 0.003631211115154148 +SRR10002940 Unassigned 3127387 0.5880995564882747 +SRR10002940 Bacterial 905814 0.1703367097391113 +SRR10002940 Archaeal 887 1.6679877054074206e-4 +SRR10002940 Viral 484 9.10153381530092e-5 +SRR10002940 Human 560785 0.10545462067383318 +SRR10002941 Filtered 338303 0.06553484619400506 +SRR10002941 Duplicate 301823 0.05846807117528721 +SRR10002941 Ribosomal 13670 0.002648103467814501 +SRR10002941 Unassigned 2307161 0.4469349703662306 +SRR10002941 Bacterial 968121 0.1875409347011004 +SRR10002941 Archaeal 1033 2.0010906234472418e-4 +SRR10002941 Viral 971 1.8809864427563134e-4 +SRR10002941 Human 1231103 0.2384848663889419 +SRR10002942 Filtered 426379 0.07101843688783348 +SRR10002942 Duplicate 310259 0.051677285256502614 +SRR10002942 Ribosomal 16736 0.0027875776240264675 +SRR10002942 Unassigned 2942834 0.4901636119517391 +SRR10002942 Bacterial 1213857 0.20218215893689626 +SRR10002942 Archaeal 2386 3.9741636059555156e-4 +SRR10002942 Viral 670 1.1159637954694869e-4 +SRR10002942 Human 1090658 0.18166191660285963 +SRR10002943 Filtered 248693 0.06953911076910688 +SRR10002943 Duplicate 193072 0.053986462000993204 +SRR10002943 Ribosomal 9934 0.002777728067860003 +SRR10002943 Unassigned 1748445 0.488897196658897 +SRR10002943 Bacterial 672552 0.18805783848352936 +SRR10002943 Archaeal 871 2.4354752839803327e-4 +SRR10002943 Viral 611 1.708467736523517e-4 +SRR10002943 Human 702126 0.19632726971756315 +SRR10002944 Filtered 315874 0.08496457769478771 +SRR10002944 Duplicate 204890 0.0551118240940535 +SRR10002944 Ribosomal 11453 0.0030806565540006576 +SRR10002944 Unassigned 1964058 0.5282972278125752 +SRR10002944 Bacterial 729585 0.19624559608404518 +SRR10002944 Archaeal 932 2.506916884945964e-4 +SRR10002944 Viral 459 1.2346296675860488e-4 +SRR10002944 Human 490463 0.1319259631052846 +SRR10002945 Filtered 422150 0.0818990570598675 +SRR10002945 Duplicate 446137 0.08655264626203507 +SRR10002945 Ribosomal 15432 0.0029938795417455296 +SRR10002945 Unassigned 1800631 0.3493307616078794 +SRR10002945 Bacterial 1564578 0.3035353852815667 +SRR10002945 Archaeal 2901 5.628074488467976e-4 +SRR10002945 Viral 991 1.922585942113673e-4 +SRR10002945 Human 901696 0.17493320420384764 +SRR10002946 Filtered 285468 0.05871925429912205 +SRR10002946 Duplicate 455073 0.0936061037022166 +SRR10002946 Ribosomal 14179 0.00291654513538208 +SRR10002946 Unassigned 1583440 0.32570521399036606 +SRR10002946 Bacterial 1419220 0.29192603054072613 +SRR10002946 Archaeal 1750 3.599657230353791e-4 +SRR10002946 Viral 1005 2.0672317237174626e-4 +SRR10002946 Human 1101439 0.22656016343677993 +SRR10002947 Filtered 568934 0.11977288090563012 +SRR10002947 Duplicate 383671 0.08077102263170072 +SRR10002947 Ribosomal 27927 0.005879235983526266 +SRR10002947 Unassigned 3100990 0.6528252942512663 +SRR10002947 Bacterial 579183 0.1219305165125754 +SRR10002947 Archaeal 725 1.5262814079767045e-4 +SRR10002947 Viral 203 4.2735879423347725e-5 +SRR10002947 Human 88474 0.018625685695080132 +SRR10002948 Filtered 273541 0.0742424201301151 +SRR10002948 Duplicate 223711 0.06071794008842616 +SRR10002948 Ribosomal 33726 0.00915365470371265 +SRR10002948 Unassigned 2825318 0.766826347630434 +SRR10002948 Bacterial 250751 0.06805693146565411 +SRR10002948 Archaeal 451 1.224069937548006e-4 +SRR10002948 Viral 165 4.478304649565876e-5 +SRR10002948 Human 76767 0.02083551594140749 +SRR10002949 Filtered 329194 0.06721634522225392 +SRR10002949 Duplicate 446208 0.09110880201015654 +SRR10002949 Ribosomal 9611 0.0019624181908876907 +SRR10002949 Unassigned 1463100 0.29874248830379563 +SRR10002949 Bacterial 1274992 0.2603337315613649 +SRR10002949 Archaeal 1727 3.526268042516951e-4 +SRR10002949 Viral 568 1.159768528170022e-4 +SRR10002949 Human 1372129 0.2801676110544726 +SRR10002950 Filtered 304665 0.05768804733727811 +SRR10002950 Duplicate 522823 0.09899607100591716 +SRR10002950 Ribosomal 10484 0.001985136094674556 +SRR10002950 Unassigned 1793821 0.3396584142011834 +SRR10002950 Bacterial 1260152 0.2386086627218935 +SRR10002950 Archaeal 3142 5.949349112426036e-4 +SRR10002950 Viral 448 8.48284023668639e-5 +SRR10002950 Human 1385715 0.2623839053254438 +SRR10002951 Filtered 311564 0.05351714945308635 +SRR10002951 Duplicate 559440 0.09609465178914968 +SRR10002951 Ribosomal 14980 0.0025731050404001537 +SRR10002951 Unassigned 1680161 0.288600182762601 +SRR10002951 Bacterial 1658169 0.2848226309569615 +SRR10002951 Archaeal 1308 2.2467432528994668e-4 +SRR10002951 Viral 1154 1.982218435662068e-4 +SRR10002951 Human 1594984 0.2739693838289452 +SRR10002952 Filtered 249270 0.05941646155628367 +SRR10002952 Duplicate 367726 0.08765185438378453 +SRR10002952 Ribosomal 9332 0.0022243929042533766 +SRR10002952 Unassigned 1020147 0.24316413931583472 +SRR10002952 Bacterial 835297 0.19910294896529498 +SRR10002952 Archaeal 620 1.4778435497611375e-4 +SRR10002952 Viral 419 9.987362054030913e-5 +SRR10002952 Human 1712491 0.4081925448990323 +SRR10002953 Filtered 389665 0.09628687714623255 +SRR10002953 Duplicate 370288 0.09149878784269606 +SRR10002953 Ribosomal 12398 0.003063566660744463 +SRR10002953 Unassigned 1774756 0.43854519378578805 +SRR10002953 Bacterial 1013855 0.2505252764017646 +SRR10002953 Archaeal 950 2.347465984600129e-4 +SRR10002953 Viral 600 1.4826100955369235e-4 +SRR10002953 Human 484405 0.11969729055476057 +SRR10002954 Filtered 355083 0.07048603749854099 +SRR10002954 Duplicate 462894 0.09188714706660028 +SRR10002954 Ribosomal 15311 0.0030393224123378506 +SRR10002954 Unassigned 2282476 0.4530847405409998 +SRR10002954 Bacterial 1419733 0.2818252450157177 +SRR10002954 Archaeal 1699 3.372613662440081e-4 +SRR10002954 Viral 558 1.1076624035559536e-4 +SRR10002954 Human 499882 0.0992294798592038 +SRR10002955 Filtered 442367 0.07324959891031438 +SRR10002955 Duplicate 557428 0.09230204201800478 +SRR10002955 Ribosomal 16003 0.00264986613233302 +SRR10002955 Unassigned 2876779 0.47635313643109745 +SRR10002955 Bacterial 1500324 0.24843202869002096 +SRR10002955 Archaeal 2142 3.5468432515511645e-4 +SRR10002955 Viral 667 1.1044558584428696e-4 +SRR10002955 Human 643463 0.10654819790723001 +SRR10002956 Filtered 309622 0.06113870648298713 +SRR10002956 Duplicate 500423 0.09881473188060239 +SRR10002956 Ribosomal 10948 0.0021618184708313464 +SRR10002956 Unassigned 1927317 0.38057266073687046 +SRR10002956 Bacterial 1522643 0.3006647572051565 +SRR10002956 Archaeal 1341 2.6479709256346687e-4 +SRR10002956 Viral 564 1.1136879955689435e-4 +SRR10002956 Human 791397 0.15627115933143176 +SRR10002957 Filtered 676383 0.1736845835394028 +SRR10002957 Duplicate 227877 0.05851525222131321 +SRR10002957 Ribosomal 11457 0.0029419785441250563 +SRR10002957 Unassigned 2163998 0.5556808663288412 +SRR10002957 Bacterial 450472 0.11567416939243277 +SRR10002957 Archaeal 1586 4.0726001317817394e-4 +SRR10002957 Viral 236 6.060111167090104e-5 +SRR10002957 Human 362309 0.09303528884903596 +SRR10002958 Filtered 449284 0.11183165264593455 +SRR10002958 Duplicate 231352 0.05758601798181607 +SRR10002958 Ribosomal 11517 0.002866706011171616 +SRR10002958 Unassigned 2019008 0.5025529539119199 +SRR10002958 Bacterial 537050 0.133677560415014 +SRR10002958 Archaeal 777 1.9340371369977818e-4 +SRR10002958 Viral 373 9.284373900903123e-5 +SRR10002958 Human 768142 0.19119886158143504 +SRR10002959 Filtered 303016 0.08348840784716532 +SRR10002959 Duplicate 221998 0.061165943597879344 +SRR10002959 Ribosomal 10915 0.0030073526534962164 +SRR10002959 Unassigned 1622304 0.44698490510101013 +SRR10002959 Bacterial 468343 0.1290400883001721 +SRR10002959 Archaeal 723 1.99204394729983e-4 +SRR10002959 Viral 323 8.899449446443223e-5 +SRR10002959 Human 1001816 0.2760251036110825 +SRR10002960 Filtered 485411 0.10519938450869057 +SRR10002960 Duplicate 252485 0.05471912790949677 +SRR10002960 Ribosomal 24147 0.0052331931862511375 +SRR10002960 Unassigned 2439960 0.5287937237224221 +SRR10002960 Bacterial 524249 0.11361644488752114 +SRR10002960 Archaeal 1337 2.8975770447748256e-4 +SRR10002960 Viral 452 9.795847600884227e-5 +SRR10002960 Human 886159 0.19205040960513198 +SRR10002961 Filtered 521944 0.11426719938700673 +SRR10002961 Duplicate 297989 0.06523758962289967 +SRR10002961 Ribosomal 14154 0.0030986809698429203 +SRR10002961 Unassigned 2142502 0.4690497509714849 +SRR10002961 Bacterial 648839 0.1420478353675223 +SRR10002961 Archaeal 988 2.1629905314432708e-4 +SRR10002961 Viral 577 1.263203984456242e-4 +SRR10002961 Human 940757 0.20595632422965354 +SRR10002962 Filtered 402515 0.10899327543320843 +SRR10002962 Duplicate 202259 0.0547678245428004 +SRR10002962 Ribosomal 11272 0.003052239545565073 +SRR10002962 Unassigned 1637348 0.4433621642522961 +SRR10002962 Bacterial 406779 0.11014788414703823 +SRR10002962 Archaeal 1351 3.658246651932589e-4 +SRR10002962 Viral 240 6.498735725120809e-5 +SRR10002962 Human 1031262 0.2792458000566473 +SRR10002963 Filtered 401895 0.09492217890561562 +SRR10002963 Duplicate 256670 0.06062199245998174 +SRR10002963 Ribosomal 14916 0.003522958037686865 +SRR10002963 Unassigned 2314339 0.5466156598271776 +SRR10002963 Bacterial 539807 0.12749513337688612 +SRR10002963 Archaeal 934 2.2059820375432634e-4 +SRR10002963 Viral 486 1.147866456366195e-4 +SRR10002963 Human 704895 0.1664866925432611 +SRR10002964 Filtered 416227 0.10617791938450392 +SRR10002964 Duplicate 210521 0.053703103755270924 +SRR10002964 Ribosomal 23156 0.005907007237078741 +SRR10002964 Unassigned 2699567 0.6886492401960159 +SRR10002964 Bacterial 462243 0.11791642538819262 +SRR10002964 Archaeal 567 1.4463953633717593e-4 +SRR10002964 Viral 201 5.127433298725285e-5 +SRR10002964 Human 107608 0.027450390169613453 +SRR10002965 Filtered 322445 0.07941249848228528 +SRR10002965 Duplicate 269236 0.06630806320884666 +SRR10002965 Ribosomal 12279 0.003024100447716606 +SRR10002965 Unassigned 1550935 0.3819678498150789 +SRR10002965 Bacterial 366026 0.09014572770387803 +SRR10002965 Archaeal 438 1.0787165046826886e-4 +SRR10002965 Viral 277 6.822019904043487e-5 +SRR10002965 Human 1538745 0.37896566849268576 +SRR10002966 Filtered 976156 0.23406008244503823 +SRR10002966 Duplicate 232668 0.055788512555700276 +SRR10002966 Ribosomal 13495 0.0032357951112279093 +SRR10002966 Unassigned 1757789 0.421477958708425 +SRR10002966 Bacterial 487677 0.11693389051191501 +SRR10002966 Archaeal 588 1.4098907190826312e-4 +SRR10002966 Viral 638 1.5297793856712901e-4 +SRR10002966 Human 701525 0.16820979365721817 +SRR10002967 Filtered 388074 0.10369533919545623 +SRR10002967 Duplicate 207678 0.05549261391753624 +SRR10002967 Ribosomal 7671 0.0020497300694412527 +SRR10002967 Unassigned 2163897 0.5782042430027009 +SRR10002967 Bacterial 605356 0.16175419057706675 +SRR10002967 Archaeal 1256 3.3560956423128843e-4 +SRR10002967 Viral 294 7.855828971655956e-5 +SRR10002967 Human 368218 0.09838971538385077 diff --git a/data/2024-04-12_leung/blast/putative-viral-best.blast.gz b/data/2024-04-12_leung/blast/putative-viral-best.blast.gz new file mode 100644 index 0000000..58ca54e Binary files /dev/null and b/data/2024-04-12_leung/blast/putative-viral-best.blast.gz differ diff --git a/data/2024-04-12_leung/taxid-names.tsv.gz b/data/2024-04-12_leung/taxid-names.tsv.gz new file mode 120000 index 0000000..626546b --- /dev/null +++ b/data/2024-04-12_leung/taxid-names.tsv.gz @@ -0,0 +1 @@ +../2024-04-01_spurbeck/taxid-names.tsv.gz \ No newline at end of file diff --git a/data/2024-04-12_leung/viral-taxids.tsv.gz b/data/2024-04-12_leung/viral-taxids.tsv.gz new file mode 120000 index 0000000..349083e --- /dev/null +++ b/data/2024-04-12_leung/viral-taxids.tsv.gz @@ -0,0 +1 @@ +../2024-03-19_brumfield/viral-taxids.tsv.gz \ No newline at end of file diff --git a/docs/index.html b/docs/index.html index 84cd8a9..41646c0 100644 --- a/docs/index.html +++ b/docs/index.html @@ -164,7 +164,7 @@
-
+

diff --git a/docs/notebooks/2024-04-12_leung.html b/docs/notebooks/2024-04-12_leung.html new file mode 100644 index 0000000..bb7063b --- /dev/null +++ b/docs/notebooks/2024-04-12_leung.html @@ -0,0 +1,3999 @@ + + + + + + + + +Will’s Public NAO Notebook - Workflow analysis of Leung et al. (2021) + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+

Workflow analysis of Leung et al. (2021)

+

Air sampling from a student dorm in Colorado.

+
+
+ + +
+ +
+
Author
+
+

Will Bradshaw

+
+
+ +
+
Published
+
+

April 12, 2024

+
+
+ + +
+ + +
+ + + + +

The last in our current run of air sampling datasets is Leung et al. (2021), a study of active air samples collected in public transit systems from six cities (Denver, Hong Kong, London, NYC, Oslo, Stockholm) from June to September 2017.

+

Samples from Denver originated from their rail and bus system; all other samples originated from metro systems. Collection took place during working days and working hours. Air samples were collected with the SASS 3100 Dry Air Samplers (filtration) for 30 min at a flowrate of 300 L/min using electret microfibrous filters. Filters were stationed at 1.5m above floor level, facing downward (to avoid direct deposition).

+

This was a DNA-sequencing study, focused on the bacterial microbiome and resistome. Sample processing followed an ideosyncratic protocol, where samples were pelleted and the pellet and supernatant were processed separately before being recombined for NA extraction and sequencing; I don’t have a great understanding of how this is expected to affect the viral fraction. Samples were sequenced with Illumina HiSeqX 2x150bp.

+

The raw data

+

In total, the Leung dataset comprised 293 samples:

+
+
Code
# Importing the data is a bit more complicated this time as the samples are split across three pipeline runs
+data_dir_base <- "../data/2024-04-12_leung"
+data_dirs <- paste(data_dir_base, c(1,2,3), sep="/")
+
+# Define geo relationships for filling in
+geo <- tribble(~region, ~country, ~city,
+               "Asia", "Hong Kong", "Hong Kong",
+               "Europe", "Norway", "Oslo",
+               "Europe", "Sweden", "Stockholm",
+               "Europe", "United Kingdom", "London",
+               "North America", "USA", "New York City",
+               "North America", "USA", "Denver")
+
+# Data input paths
+libraries_paths <- file.path(data_dirs, "sample-metadata.csv")
+basic_stats_paths <- file.path(data_dirs, "qc_basic_stats.tsv.gz")
+adapter_stats_paths <- file.path(data_dirs, "qc_adapter_stats.tsv.gz")
+quality_base_stats_paths <- file.path(data_dirs, "qc_quality_base_stats.tsv.gz")
+quality_seq_stats_paths <- file.path(data_dirs, "qc_quality_sequence_stats.tsv.gz")
+
+# Import libraries and extract metadata from sample names
+libraries_raw <- lapply(libraries_paths, read_csv, show_col_types = FALSE) %>%
+  bind_rows
+libraries <- libraries_raw %>%
+  # Fix missing entries
+  mutate(city = ifelse(is.na(city), sub(", .*", "", location), city)) %>%
+  left_join(geo, by="city", suffix = c("", "_new")) %>%
+  mutate(region = ifelse(region == "uncalculated", region_new, region),
+         country = ifelse(country == "uncalculated", country_new, country)) %>%
+  select(-country_new, -region_new) %>%
+  # Add sample aliases
+  arrange(city, date, location) %>%
+  group_by(city, date) %>%
+  mutate(sample_count = row_number(),
+         date_alias = paste(as.character(date), sample_count, sep="_"),
+         sample_alias = paste(city, date_alias, sep="_"))
+
+count_city <- libraries %>% group_by(region, country, city) %>% 
+  count(name="n_samples")
+count_city
+
+
+ +
+
+
+
+
Code
# Import QC data
+stages <- c("raw_concat", "cleaned", "dedup", "ribo_initial", "ribo_secondary")
+import_basic <- function(paths){
+  lapply(paths, read_tsv, show_col_types = FALSE) %>% bind_rows %>%
+    inner_join(libraries, by="sample") %>% arrange(sample) %>%
+    mutate(stage = factor(stage, levels = stages),
+           sample = fct_inorder(sample))
+}
+import_basic_paired <- function(paths){
+  import_basic(paths) %>% arrange(read_pair) %>% 
+    mutate(read_pair = fct_inorder(as.character(read_pair)))
+}
+basic_stats <- import_basic(basic_stats_paths)
+adapter_stats <- import_basic_paired(adapter_stats_paths)
+quality_base_stats <- import_basic_paired(quality_base_stats_paths)
+quality_seq_stats <- import_basic_paired(quality_seq_stats_paths)
+
+# Filter to raw data
+basic_stats_raw <- basic_stats %>% filter(stage == "raw_concat")
+adapter_stats_raw <- adapter_stats %>% filter(stage == "raw_concat")
+quality_base_stats_raw <- quality_base_stats %>% filter(stage == "raw_concat")
+quality_seq_stats_raw <- quality_seq_stats %>% filter(stage == "raw_concat")
+
+# Get key values for readout
+raw_read_counts <- basic_stats_raw %>% ungroup %>% 
+  summarize(rmin = min(n_read_pairs), rmax=max(n_read_pairs),
+            rmean=mean(n_read_pairs), 
+            rtot = sum(n_read_pairs),
+            btot = sum(n_bases_approx),
+            dmin = min(percent_duplicates), dmax=max(percent_duplicates),
+            dmean=mean(percent_duplicates), .groups = "drop")
+
+
+

These 293 samples yielded 0.39M-7.86M (mean 4.57M) reads per sample, for a total of 1.34B read pairs (402 gigabases of sequence). Read qualities were high at the 5’ end but dropped off significantly in some samples, in definite need of cleaning. Adapter levels were high. With the exception of a couple of early samples, inferred duplication levels were low (mean 9.4%).

+
+
Code
# Prepare data
+basic_stats_raw_metrics <- basic_stats_raw %>%
+  select(sample, city, date,
+         `# Read pairs` = n_read_pairs,
+         `Total base pairs\n(approx)` = n_bases_approx,
+         `% Duplicates\n(FASTQC)` = percent_duplicates) %>%
+  pivot_longer(-(sample:date), names_to = "metric", values_to = "value") %>%
+  mutate(metric = fct_inorder(metric))
+
+# Set up plot templates
+scale_fill_city <- purrr::partial(scale_fill_brewer, palette="Set1",
+                                  name="City")
+scale_x_cdate <- purrr::partial(scale_x_date, name="Collection Date",
+                                date_breaks = "1 month", date_labels = "%Y-%m-%d")
+g_basic <- ggplot(basic_stats_raw_metrics, 
+                  aes(x=date, y=value, fill=city, group=interaction(city,sample))) +
+  geom_col(position = "dodge") +
+  scale_x_cdate() +
+  scale_y_continuous(expand=c(0,0)) +
+  expand_limits(y=c(0,100)) +
+  scale_fill_city() + 
+  facet_grid(metric~., scales = "free", space="free_x", switch="y") +
+  theme_rotate + theme(
+    axis.title.y = element_blank(),
+    strip.text.y = element_text(face="plain")
+  )
+g_basic
+
+
+

+
+
+
+
+
+
Code
# Set up plotting templates
+scale_color_city <- purrr::partial(scale_color_brewer, palette="Set1",
+                                   name="City")
+g_qual_raw <- ggplot(mapping=aes(color=city, linetype=read_pair, 
+                         group=interaction(sample,read_pair))) + 
+  scale_color_city() + scale_linetype_discrete(name = "Read Pair") +
+  guides(color=guide_legend(nrow=2,byrow=TRUE),
+         linetype = guide_legend(nrow=2,byrow=TRUE)) +
+  theme_base
+
+# Visualize adapters
+g_adapters_raw <- g_qual_raw + 
+  geom_line(aes(x=position, y=pc_adapters), data=adapter_stats_raw) +
+  scale_y_continuous(name="% Adapters", limits=c(0,NA),
+                     breaks = seq(0,100,10), expand=c(0,0)) +
+  scale_x_continuous(name="Position", limits=c(0,NA),
+                     breaks=seq(0,140,20), expand=c(0,0)) +
+  facet_grid(.~adapter)
+g_adapters_raw
+
+
+

+
+
+
+
Code
# Visualize quality
+g_quality_base_raw <- g_qual_raw +
+  geom_hline(yintercept=25, linetype="dashed", color="red") +
+  geom_hline(yintercept=30, linetype="dashed", color="red") +
+  geom_line(aes(x=position, y=mean_phred_score), data=quality_base_stats_raw) +
+  scale_y_continuous(name="Mean Phred score", expand=c(0,0), limits=c(10,45)) +
+  scale_x_continuous(name="Position", limits=c(0,NA),
+                     breaks=seq(0,140,20), expand=c(0,0))
+g_quality_base_raw
+
+
+

+
+
+
+
Code
g_quality_seq_raw <- g_qual_raw +
+  geom_vline(xintercept=25, linetype="dashed", color="red") +
+  geom_vline(xintercept=30, linetype="dashed", color="red") +
+  geom_line(aes(x=mean_phred_score, y=n_sequences), data=quality_seq_stats_raw) +
+  scale_x_continuous(name="Mean Phred score", expand=c(0,0)) +
+  scale_y_continuous(name="# Sequences", expand=c(0,0))
+g_quality_seq_raw
+
+
+

+
+
+
+
+

Preprocessing

+

The average fraction of reads lost at each stage in the preprocessing pipeline is shown in the following table. Read loss during cleaning was highly variable but averaged 11%, with a further ~7% lost during deduplication and ~0.3% during ribodepletion.

+
+
Code
n_reads_rel <- basic_stats %>% 
+  select(sample, date, city, stage, 
+         percent_duplicates, n_read_pairs) %>%
+  group_by(sample) %>% arrange(sample, stage) %>%
+  mutate(p_reads_retained = replace_na(n_read_pairs / lag(n_read_pairs), 0),
+         p_reads_lost = 1 - p_reads_retained,
+         p_reads_retained_abs = n_read_pairs / n_read_pairs[1],
+         p_reads_lost_abs = 1-p_reads_retained_abs,
+         p_reads_lost_abs_marginal = replace_na(p_reads_lost_abs - lag(p_reads_lost_abs), 0))
+n_reads_rel_display <- n_reads_rel %>% 
+  rename(Stage=stage, City=city) %>% 
+  group_by(Stage) %>% 
+  summarize(`% Total Reads Lost (Cumulative)` = paste0(round(min(p_reads_lost_abs*100),1), "-", round(max(p_reads_lost_abs*100),1), " (mean ", round(mean(p_reads_lost_abs*100),1), ")"),
+            `% Total Reads Lost (Marginal)` = paste0(round(min(p_reads_lost_abs_marginal*100),1), "-", round(max(p_reads_lost_abs_marginal*100),1), " (mean ", round(mean(p_reads_lost_abs_marginal*100),1), ")"), .groups="drop") %>% 
+  filter(Stage != "raw_concat") %>%
+  mutate(Stage = Stage %>% as.numeric %>% factor(labels=c("Trimming & filtering", "Deduplication", "Initial ribodepletion", "Secondary ribodepletion")))
+n_reads_rel_display
+
+
+ +
+
+
+
+
Code
g_stage_trace <- ggplot(basic_stats, aes(x=stage, color=city, group=sample)) +
+  scale_color_city() +
+  facet_wrap(~city, scales="free", ncol=3) +
+  theme_kit + theme(legend.position = "none")
+
+# Plot reads over preprocessing
+g_reads_stages <- g_stage_trace +
+  geom_line(aes(y=n_read_pairs)) +
+  scale_y_continuous("# Read pairs", expand=c(0,0), limits=c(0,NA))
+g_reads_stages
+
+
+

+
+
+
+
Code
# Plot relative read losses during preprocessing
+g_reads_rel <- ggplot(n_reads_rel, aes(x=stage, color=city, group=sample)) +
+  geom_line(aes(y=p_reads_lost_abs_marginal)) +
+  scale_y_continuous("% Total Reads Lost", expand=c(0,0), 
+                     labels = function(x) x*100) +
+  scale_color_city() +
+  facet_wrap(~city, scales="free", ncol=3) +
+  theme_kit + theme(legend.position = "none")
+g_reads_rel
+
+
+

+
+
+
+
+

Data cleaning was very successful at removing adapters and improving read qualities:

+
+
Code
g_qual <- ggplot(mapping=aes(color=city, linetype=read_pair, 
+                         group=interaction(sample,read_pair))) + 
+  scale_color_city() + scale_linetype_discrete(name = "Read Pair") +
+  guides(color=guide_legend(nrow=2,byrow=TRUE),
+         linetype = guide_legend(nrow=2,byrow=TRUE)) +
+  theme_base
+
+# Visualize adapters
+g_adapters <- g_qual + 
+  geom_line(aes(x=position, y=pc_adapters), data=adapter_stats) +
+  scale_y_continuous(name="% Adapters", limits=c(0,20),
+                     breaks = seq(0,50,10), expand=c(0,0)) +
+  scale_x_continuous(name="Position", limits=c(0,NA),
+                     breaks=seq(0,140,20), expand=c(0,0)) +
+  facet_grid(stage~adapter)
+g_adapters
+
+
+

+
+
+
+
Code
# Visualize quality
+g_quality_base <- g_qual +
+  geom_hline(yintercept=25, linetype="dashed", color="red") +
+  geom_hline(yintercept=30, linetype="dashed", color="red") +
+  geom_line(aes(x=position, y=mean_phred_score), data=quality_base_stats) +
+  scale_y_continuous(name="Mean Phred score", expand=c(0,0), limits=c(10,45)) +
+  scale_x_continuous(name="Position", limits=c(0,NA),
+                     breaks=seq(0,140,20), expand=c(0,0)) +
+  facet_grid(stage~.)
+g_quality_base
+
+
+

+
+
+
+
Code
g_quality_seq <- g_qual +
+  geom_vline(xintercept=25, linetype="dashed", color="red") +
+  geom_vline(xintercept=30, linetype="dashed", color="red") +
+  geom_line(aes(x=mean_phred_score, y=n_sequences), data=quality_seq_stats) +
+  scale_x_continuous(name="Mean Phred score", expand=c(0,0)) +
+  scale_y_continuous(name="# Sequences", expand=c(0,0)) +
+  facet_grid(stage~.)
+g_quality_seq
+
+
+

+
+
+
+
+

According to FASTQC, cleaning + deduplication was very effective at reducing measured duplicate levels, which fell from an average of 9.4% to 1.7% for DNA reads:

+
+
Code
stage_dup <- basic_stats %>% group_by(stage) %>% 
+  summarize(dmin = min(percent_duplicates), dmax=max(percent_duplicates),
+            dmean=mean(percent_duplicates), .groups = "drop")
+
+g_dup_stages <- g_stage_trace +
+  geom_line(aes(y=percent_duplicates)) +
+  scale_y_continuous("% Duplicates", limits=c(0,NA), expand=c(0,0))
+g_dup_stages
+
+
+

+
+
+
+
Code
g_readlen_stages <- g_stage_trace + geom_line(aes(y=mean_seq_len)) +
+  scale_y_continuous("Mean read length (nt)", expand=c(0,0), limits=c(0,NA))
+g_readlen_stages
+
+
+

+
+
+
+
+

High-level composition

+

As before, to assess the high-level composition of the reads, I ran the ribodepleted files through Kraken (using the Standard 16 database) and summarized the results with Bracken. Combining these results with the read counts above gives us a breakdown of the inferred composition of the samples:

+
+
Code
# Import Bracken data
+bracken_paths <- file.path(data_dirs, "bracken_counts.tsv")
+bracken <- lapply(bracken_paths, read_tsv, show_col_types = FALSE) %>% bind_rows
+total_assigned <- bracken %>% group_by(sample) %>% summarize(
+  name = "Total",
+  kraken_assigned_reads = sum(kraken_assigned_reads),
+  added_reads = sum(added_reads),
+  new_est_reads = sum(new_est_reads),
+  fraction_total_reads = sum(fraction_total_reads)
+)
+bracken_spread <- bracken %>% select(name, sample, new_est_reads) %>%
+  mutate(name = tolower(name)) %>%
+  pivot_wider(id_cols = "sample", names_from = "name", 
+              values_from = "new_est_reads")
+
+# Count reads
+read_counts_preproc <- basic_stats %>% 
+  select(sample, date, date_alias, city, stage, n_read_pairs) %>%
+  pivot_wider(id_cols = c("sample", "date", "date_alias", "city"),
+              names_from="stage", values_from="n_read_pairs")
+read_counts <- read_counts_preproc %>%
+  inner_join(total_assigned %>% select(sample, new_est_reads), by = "sample") %>%
+  rename(assigned = new_est_reads) %>%
+  inner_join(bracken_spread, by="sample")
+
+# Assess composition
+read_comp <- transmute(read_counts, sample, date, date_alias, city,
+                       n_filtered = raw_concat-cleaned,
+                       n_duplicate = cleaned-dedup,
+                       n_ribosomal = (dedup-ribo_initial) + (ribo_initial-ribo_secondary),
+                       n_unassigned = ribo_secondary-assigned,
+                       n_bacterial = bacteria,
+                       n_archaeal = archaea,
+                       n_viral = viruses,
+                       n_human = eukaryota)
+read_comp_long <- pivot_longer(read_comp, -(sample:city), 
+                               names_to = "classification",
+                               names_prefix = "n_", values_to = "n_reads") %>%
+  mutate(classification = fct_inorder(str_to_sentence(classification))) %>%
+  group_by(sample) %>% mutate(p_reads = n_reads/sum(n_reads))
+
+# Summarize composition
+read_comp_summ <- read_comp_long %>% 
+  group_by(city, classification) %>%
+  summarize(n_reads = sum(n_reads), .groups = "drop_last") %>%
+  mutate(n_reads = replace_na(n_reads,0),
+    p_reads = n_reads/sum(n_reads),
+    pc_reads = p_reads*100)
+
+
+
+
Code
# Prepare plotting templates
+g_comp_base <- ggplot(mapping=aes(x=date_alias, y=p_reads, fill=classification)) +
+  scale_x_discrete(name="Collection Date") +
+  facet_wrap(~city, scales = "free") +
+  theme_kit + theme(axis.text.x = element_blank())
+scale_y_pc_reads <- purrr::partial(scale_y_continuous, name = "% Reads",
+                                   expand = c(0,0), labels = function(y) y*100)
+
+# Plot overall composition
+g_comp <- g_comp_base + geom_col(data = read_comp_long, position = "stack") +
+  scale_y_pc_reads(limits = c(0,1.01), breaks = seq(0,1,0.2)) +
+  scale_fill_brewer(palette = "Set1", name = "Classification")
+g_comp
+
+
+

+
+
+
+
Code
# Plot composition of minor components
+read_comp_minor <- read_comp_long %>% 
+  filter(classification %in% c("Archaeal", "Viral", "Other"))
+palette_minor <- brewer.pal(9, "Set1")[c(6,7,9)]
+g_comp_minor <- g_comp_base + geom_col(data=read_comp_minor, position = "stack") +
+  scale_y_pc_reads() +
+  scale_fill_manual(values=palette_minor, name = "Classification")
+g_comp_minor
+
+
+

+
+
+
+
+
+
Code
p_reads_summ_group <- read_comp_long %>%
+  mutate(classification = ifelse(classification %in% c("Filtered", "Duplicate", "Unassigned"), "Excluded", as.character(classification)),
+         classification = fct_inorder(classification)) %>%
+  group_by(classification, sample, city) %>%
+  summarize(p_reads = sum(p_reads), .groups = "drop") %>%
+  group_by(classification, city) %>%
+  summarize(pc_min = min(p_reads)*100, pc_max = max(p_reads)*100, 
+            pc_mean = mean(p_reads)*100, .groups = "drop")
+p_reads_summ_prep <- p_reads_summ_group %>%
+  mutate(classification = fct_inorder(classification),
+         pc_min = pc_min %>% signif(digits=2) %>% sapply(format, scientific=FALSE, trim=TRUE, digits=2),
+         pc_max = pc_max %>% signif(digits=2) %>% sapply(format, scientific=FALSE, trim=TRUE, digits=2),
+         pc_mean = pc_mean %>% signif(digits=2) %>% sapply(format, scientific=FALSE, trim=TRUE, digits=2),
+         display = paste0(pc_min, "-", pc_max, "% (mean ", pc_mean, "%)"))
+p_reads_summ <- p_reads_summ_prep %>%
+  select(city, classification, read_fraction=display) %>%
+  arrange(city, classification)
+p_reads_summ
+
+
+ +
+
+
+

In many respects, these resemble the Prussin data: high human fraction (mean 19.6%), high bacterial fraction (mean 18.2%), high unclassified fraction (mean 43.9%), low viral fraction (mean 0.01%). One notable difference is that archaeal reads are more abundant (0.034% compared to 0.016% for Prussin).

+

As in Prussin, viral DNA reads were dominated by Caudoviricetes phages. Other viral classes that are prominent in at least some samples include Herviviricetes (herpesviruses), Papovaviricetes (polyomaviruses and papillomaviruses), Revtraviricetes (retroviruses + Hep B), and Naldaviricetes (mainly arthropod viruses). I’ll investigate the first three of this latter group in more depth, restricting in each case to samples where that family makes up at least 5% of viral reads.

+
+
Code
# Get viral taxonomy
+viral_taxa_path <- file.path(data_dir_base, "viral-taxids.tsv.gz")
+viral_taxa <- read_tsv(viral_taxa_path, show_col_types = FALSE)
+
+# Get paths to Kraken reports
+samples <- as.character(basic_stats_raw$sample)
+report_dirs <- file.path(data_dirs, "kraken")
+report_paths <- lapply(report_dirs, list.files, full.names = TRUE) %>% unlist
+names(report_paths) <- str_extract(report_paths, "SRR\\d*")
+
+# Extract viral taxa
+col_names <- c("pc_reads_total", "n_reads_clade", "n_reads_direct",
+               "rank", "taxid", "name")
+kraken_reports_raw <- lapply(report_paths, read_tsv, col_names = col_names,
+                             show_col_types = FALSE)
+kraken_reports <- lapply(names(kraken_reports_raw), 
+                         function(x) kraken_reports_raw[[x]] %>% 
+                           mutate(sample = x)) %>% bind_rows
+kraken_reports_viral <- filter(kraken_reports, taxid %in% viral_taxa$taxid) %>%
+  group_by(sample) %>%
+  mutate(p_reads_viral = n_reads_clade/n_reads_clade[1])
+kraken_reports_viral_cleaned <- kraken_reports_viral %>%
+  inner_join(libraries, by="sample") %>%
+  select(-pc_reads_total, -n_reads_direct) %>%
+  select(name, taxid, p_reads_viral, n_reads_clade, everything())
+
+viral_classes <- kraken_reports_viral_cleaned %>% filter(rank == "C")
+viral_families <- kraken_reports_viral_cleaned %>% filter(rank == "F")
+
+
+
+
Code
major_threshold <- 0.05
+
+# Identify major viral classes
+viral_classes_major_tab <- viral_classes %>% 
+  group_by(name, taxid) %>%
+  summarize(p_reads_viral_max = max(p_reads_viral), .groups="drop") %>%
+  filter(p_reads_viral_max >= major_threshold)
+viral_classes_major_list <- viral_classes_major_tab %>% pull(name)
+viral_classes_major <- viral_classes %>% 
+  filter(name %in% viral_classes_major_list) %>%
+  select(name, taxid, sample, date_alias, city, p_reads_viral)
+viral_classes_minor <- viral_classes_major %>% 
+  group_by(sample, date_alias, city) %>%
+  summarize(p_reads_viral_major = sum(p_reads_viral), .groups = "drop") %>%
+  mutate(name = "Other", taxid=NA, p_reads_viral = 1-p_reads_viral_major) %>%
+  select(name, taxid, sample, date_alias, city, p_reads_viral)
+viral_classes_display <- bind_rows(viral_classes_major, viral_classes_minor) %>%
+  arrange(desc(p_reads_viral)) %>% 
+  mutate(name = factor(name, levels=c(viral_classes_major_list, "Other")),
+         p_reads_viral = pmax(p_reads_viral, 0)) %>%
+  rename(p_reads = p_reads_viral, classification=name)
+
+palette_viral <- c(brewer.pal(12, "Set3"), brewer.pal(8, "Dark2"))
+g_classes <- g_comp_base + 
+  geom_col(data=viral_classes_display, position = "stack") +
+  scale_y_continuous(name="% Viral Reads", limits=c(0,1.01), breaks = seq(0,1,0.2),
+                     expand=c(0,0), labels = function(y) y*100) +
+  scale_fill_manual(values=palette_viral, name = "Viral class")
+  
+g_classes
+
+
+

+
+
+
+
+

Papovaviricetes are quite heterogeneous across samples, and frequently diverse within samples. Alphapolyomavirus and Alphapapillomavirus are the most abundant genera overall, but Betapapillomavirus, Gammapapillomavirus, Mupapillomavirus and others all have strong showings.

+
+
Code
# Get samples
+papova_taxid <- 2732421
+papova_threshold <- 0.05
+papova_samples <- viral_classes %>% filter(taxid == papova_taxid) %>% filter(p_reads_viral > 0.05) %>% pull(sample) %>% unique
+
+# Get all taxa in class
+papova_desc_taxids_old <- papova_taxid
+papova_desc_taxids_new <- unique(c(papova_desc_taxids_old, viral_taxa %>% filter(parent_taxid %in% papova_desc_taxids_old) %>% pull(taxid)))
+while (length(papova_desc_taxids_new) > length(papova_desc_taxids_old)){
+  papova_desc_taxids_old <- papova_desc_taxids_new
+  papova_desc_taxids_new <- unique(c(papova_desc_taxids_old, viral_taxa %>% filter(parent_taxid %in% papova_desc_taxids_old) %>% pull(taxid)))
+}
+
+# Get read counts
+papova_counts <- kraken_reports_viral_cleaned %>%
+  filter(taxid %in% papova_desc_taxids_new,
+         sample %in% papova_samples) %>%
+  mutate(p_reads_papova = n_reads_clade/n_reads_clade[1])
+
+# Get genus composition
+papova_genera <- papova_counts %>% filter(rank == "G")
+papova_genera_major_tab <- papova_genera %>% 
+  group_by(name, taxid) %>%
+  summarize(p_reads_papova_max = max(p_reads_papova), .groups="drop") %>%
+  filter(p_reads_papova_max >= papova_threshold)
+papova_genera_major_list <- papova_genera_major_tab %>% pull(name)
+papova_genera_major <- papova_genera %>% 
+  filter(name %in% papova_genera_major_list) %>%
+  select(name, taxid, sample, date_alias, city, p_reads_papova)
+papova_genera_minor <- papova_genera_major %>% 
+  group_by(sample, date_alias, city) %>%
+  summarize(p_reads_papova_major = sum(p_reads_papova), .groups = "drop") %>%
+  mutate(name = "Other", taxid=NA, p_reads_papova = 1-p_reads_papova_major) %>%
+  select(name, taxid, sample, date_alias, city, p_reads_papova)
+papova_genera_display <- bind_rows(papova_genera_major, papova_genera_minor) %>%
+  arrange(desc(p_reads_papova)) %>% 
+  mutate(name = factor(name, levels=c(papova_genera_major_list, "Other"))) %>%
+  rename(p_reads = p_reads_papova, classification=name)
+
+# Plot
+g_papova_genera <- g_comp_base + 
+  geom_col(data=papova_genera_display, position = "stack") +
+  scale_y_continuous(name="% Papovaviricetes Reads", limits=c(0,1.02), 
+                     breaks = seq(0,1,0.2),
+                     expand=c(0,0), labels = function(y) y*100) +
+  scale_fill_manual(values=palette_viral, name = "Viral genus") +
+  guides(fill=guide_legend(ncol=3))
+g_papova_genera
+
+
+

+
+
+
+
+

Only a few samples showed at least 5% prevalence of Herviviricetes, but those that did were typically dominated by one or a small number of species that varied between samples. Of these, human alphaherpesvirus 1 appeared in the most samples, but several other species were prominent in at least one sample:

+
+
Code
# Get samples
+hervi_taxid <- 2731363
+hervi_threshold <- 0.05
+hervi_samples <- viral_classes %>% filter(taxid == hervi_taxid) %>% filter(p_reads_viral > 0.05) %>% pull(sample) %>% unique
+
+# Get all taxa in class
+hervi_desc_taxids_old <- hervi_taxid
+hervi_desc_taxids_new <- unique(c(hervi_desc_taxids_old, viral_taxa %>% filter(parent_taxid %in% hervi_desc_taxids_old) %>% pull(taxid)))
+while (length(hervi_desc_taxids_new) > length(hervi_desc_taxids_old)){
+  hervi_desc_taxids_old <- hervi_desc_taxids_new
+  hervi_desc_taxids_new <- unique(c(hervi_desc_taxids_old, viral_taxa %>% filter(parent_taxid %in% hervi_desc_taxids_old) %>% pull(taxid)))
+}
+
+# Get read counts
+hervi_counts <- kraken_reports_viral_cleaned %>%
+  filter(taxid %in% hervi_desc_taxids_new,
+         sample %in% hervi_samples) %>%
+  mutate(p_reads_hervi = n_reads_clade/n_reads_clade[1])
+
+# Get genus composition
+hervi_genera <- hervi_counts %>% filter(rank == "S")
+hervi_genera_major_tab <- hervi_genera %>% 
+  group_by(name, taxid) %>%
+  summarize(p_reads_hervi_max = max(p_reads_hervi), .groups="drop") %>%
+  filter(p_reads_hervi_max >= hervi_threshold)
+hervi_genera_major_list <- hervi_genera_major_tab %>% pull(name)
+hervi_genera_major <- hervi_genera %>% 
+  filter(name %in% hervi_genera_major_list) %>%
+  select(name, taxid, sample, date_alias, city, p_reads_hervi)
+hervi_genera_minor <- hervi_genera_major %>% 
+  group_by(sample, date_alias, city) %>%
+  summarize(p_reads_hervi_major = sum(p_reads_hervi), .groups = "drop") %>%
+  mutate(name = "Other", taxid=NA, p_reads_hervi = 1-p_reads_hervi_major) %>%
+  select(name, taxid, sample, date_alias, city, p_reads_hervi)
+hervi_genera_display <- bind_rows(hervi_genera_major, hervi_genera_minor) %>%
+  arrange(desc(p_reads_hervi)) %>% 
+  mutate(name = factor(name, levels=c(hervi_genera_major_list, "Other"))) %>%
+  rename(p_reads = p_reads_hervi, classification=name)
+
+# Plot
+g_hervi_genera <- g_comp_base + 
+  geom_col(data=hervi_genera_display, position = "stack") +
+  scale_y_continuous(name="% Herviviricetes Reads", limits=c(0,1.01), 
+                     breaks = seq(0,1,0.2),
+                     expand=c(0,0), labels = function(y) y*100) +
+  scale_fill_manual(values=palette_viral, name = "Viral genus") +
+  guides(fill=guide_legend(ncol=3))
+g_hervi_genera
+
+
+

+
+
+
+
+

Finally, for Revtraviricetes, most samples were dominated by porcine type-C oncovirus, while one was dominated by an avian retrovirus. The last showed significant levels of two murine viruses plus HIV. I’m suspicious of many of these.

+
+
Code
# Get samples
+revtra_taxid <- 2732514
+revtra_threshold <- 0.05
+revtra_samples <- viral_classes %>% filter(taxid == revtra_taxid) %>% filter(p_reads_viral > 0.05) %>% pull(sample) %>% unique
+
+# Get all taxa in class
+revtra_desc_taxids_old <- revtra_taxid
+revtra_desc_taxids_new <- unique(c(revtra_desc_taxids_old, viral_taxa %>% filter(parent_taxid %in% revtra_desc_taxids_old) %>% pull(taxid)))
+while (length(revtra_desc_taxids_new) > length(revtra_desc_taxids_old)){
+  revtra_desc_taxids_old <- revtra_desc_taxids_new
+  revtra_desc_taxids_new <- unique(c(revtra_desc_taxids_old, viral_taxa %>% filter(parent_taxid %in% revtra_desc_taxids_old) %>% pull(taxid)))
+}
+
+# Get read counts
+revtra_counts <- kraken_reports_viral_cleaned %>%
+  filter(taxid %in% revtra_desc_taxids_new,
+         sample %in% revtra_samples) %>%
+  mutate(p_reads_revtra = n_reads_clade/n_reads_clade[1])
+
+# Get genus composition
+revtra_genera <- revtra_counts %>% filter(rank == "S")
+revtra_genera_major_tab <- revtra_genera %>% 
+  group_by(name, taxid) %>%
+  summarize(p_reads_revtra_max = max(p_reads_revtra), .groups="drop") %>%
+  filter(p_reads_revtra_max >= revtra_threshold)
+revtra_genera_major_list <- revtra_genera_major_tab %>% pull(name)
+revtra_genera_major <- revtra_genera %>% 
+  filter(name %in% revtra_genera_major_list) %>%
+  select(name, taxid, sample, date_alias, city, p_reads_revtra)
+revtra_genera_minor <- revtra_genera_major %>% 
+  group_by(sample, date_alias, city) %>%
+  summarize(p_reads_revtra_major = sum(p_reads_revtra), .groups = "drop") %>%
+  mutate(name = "Other", taxid=NA, p_reads_revtra = 1-p_reads_revtra_major) %>%
+  select(name, taxid, sample, date_alias, city, p_reads_revtra)
+revtra_genera_display <- bind_rows(revtra_genera_major, revtra_genera_minor) %>%
+  arrange(desc(p_reads_revtra)) %>% 
+  mutate(name = factor(name, levels=c(revtra_genera_major_list, "Other"))) %>%
+  rename(p_reads = p_reads_revtra, classification=name)
+
+# Plot
+g_revtra_genera <- g_comp_base + 
+  geom_col(data=revtra_genera_display, position = "stack") +
+  scale_y_continuous(name="% revtraviricetes Reads", limits=c(0,1.01), 
+                     breaks = seq(0,1,0.2),
+                     expand=c(0,0), labels = function(y) y*100) +
+  scale_fill_manual(values=palette_viral, name = "Viral genus") +
+  guides(fill=guide_legend(ncol=3))
+g_revtra_genera
+
+
+

+
+
+
+
+

Human-infecting virus reads: validation

+

Next, I investigated the human-infecting virus read content of these unenriched samples. Using the same workflow I used for Prussin et al, I identified 24,278 read pairs as putatively human viral: 0.002% of reads surviving to that stage in the pipeline.

+
+
Code
# Import HV read data
+hv_reads_filtered_paths <- file.path(data_dirs, "hv_hits_putative_filtered.tsv.gz")
+hv_reads_filtered <- lapply(hv_reads_filtered_paths, read_tsv,
+                            show_col_types = FALSE) %>%
+  bind_rows() %>%
+  inner_join(libraries, by="sample")
+
+# Count reads
+n_hv_filtered <- hv_reads_filtered %>%
+  group_by(sample, date, date_alias, city, seq_id) %>% count %>%
+  group_by(sample, date, date_alias, city) %>% count %>% 
+  inner_join(basic_stats %>% filter(stage == "ribo_initial") %>% 
+               select(sample, n_read_pairs), by="sample") %>% 
+  rename(n_putative = n, n_total = n_read_pairs) %>% 
+  mutate(p_reads = n_putative/n_total, pc_reads = p_reads * 100)
+n_hv_filtered_summ <- n_hv_filtered %>% ungroup %>%
+  summarize(n_putative = sum(n_putative), n_total = sum(n_total), 
+            .groups="drop") %>% 
+  mutate(p_reads = n_putative/n_total, pc_reads = p_reads*100)
+
+
+
+
Code
# Collapse multi-entry sequences
+rmax <- purrr::partial(max, na.rm = TRUE)
+collapse <- function(x) ifelse(all(x == x[1]), x[1], paste(x, collapse="/"))
+mrg <- hv_reads_filtered %>% 
+  mutate(adj_score_max = pmax(adj_score_fwd, adj_score_rev, na.rm = TRUE)) %>%
+  arrange(desc(adj_score_max)) %>%
+  group_by(seq_id) %>%
+  summarize(sample = collapse(sample),
+            genome_id = collapse(genome_id),
+            taxid_best = taxid[1],
+            taxid = collapse(as.character(taxid)),
+            best_alignment_score_fwd = rmax(best_alignment_score_fwd),
+            best_alignment_score_rev = rmax(best_alignment_score_rev),
+            query_len_fwd = rmax(query_len_fwd),
+            query_len_rev = rmax(query_len_rev),
+            query_seq_fwd = query_seq_fwd[!is.na(query_seq_fwd)][1],
+            query_seq_rev = query_seq_rev[!is.na(query_seq_rev)][1],
+            classified = rmax(classified),
+            assigned_name = collapse(assigned_name),
+            assigned_taxid_best = assigned_taxid[1],
+            assigned_taxid = collapse(as.character(assigned_taxid)),
+            assigned_hv = rmax(assigned_hv),
+            hit_hv = rmax(hit_hv),
+            encoded_hits = collapse(encoded_hits),
+            adj_score_fwd = rmax(adj_score_fwd),
+            adj_score_rev = rmax(adj_score_rev)
+            ) %>%
+  inner_join(libraries, by="sample") %>%
+  mutate(kraken_label = ifelse(assigned_hv, "Kraken2 HV\nassignment",
+                               ifelse(hit_hv, "Kraken2 HV\nhit",
+                                      "No hit or\nassignment"))) %>%
+  mutate(adj_score_max = pmax(adj_score_fwd, adj_score_rev),
+         highscore = adj_score_max >= 20)
+
+g_hist_0 <- ggplot(mrg, aes(x=adj_score_max)) + 
+  geom_histogram(binwidth=5,boundary=0) +
+  geom_vline(xintercept=20, linetype="dashed", color="red") +
+  facet_wrap(~kraken_label, labeller = labeller(kit = label_wrap_gen(20)), scales = "free_y") +
+  scale_x_continuous(name = "Maximum adjusted alignment score") + 
+  scale_y_continuous(name="# Read pairs") + 
+  theme_base 
+g_hist_0
+
+
+

+
+
+
+
+

As previously described, I ran BLASTN on these reads via a dedicated EC2 instance, using the same parameters I’ve used for previous datasets.

+
+
Code
mrg_fasta <-  mrg %>%
+  mutate(seq_head = paste0(">", seq_id)) %>%
+  ungroup %>%
+  select(header1=seq_head, seq1=query_seq_fwd, 
+         header2=seq_head, seq2=query_seq_rev) %>%
+  mutate(header1=paste0(header1, "_1"), header2=paste0(header2, "_2"))
+mrg_fasta_sep <- bind_rows(select(mrg_fasta, header=header1, seq=seq1),
+                           select(mrg_fasta, header=header2, seq=seq2)) %>%
+  filter(!is.na(seq))
+mrg_fasta_out <- do.call(paste, c(mrg_fasta_sep, sep="\n")) %>% 
+  paste(collapse="\n")
+blast_dir <- file.path(data_dir_base, "blast")
+dir.create(blast_dir, showWarnings = FALSE)
+write(mrg_fasta_out, file.path(blast_dir, "putative-viral.fasta"))
+
+
+
+
Code
# Import BLAST results
+# blast_results_path <- file.path(data_dir_base, "blast/putative-viral.blast.gz")
+# blast_cols <- c("qseqid", "sseqid", "sgi", "staxid", "qlen", "evalue", "bitscore", "qcovs", "length", "pident", "mismatch", "gapopen", "sstrand", "qstart", "qend", "sstart", "send")
+# blast_results <- read_tsv(blast_results_path, show_col_types = FALSE,
+#                           col_names = blast_cols, col_types = cols(.default="c"))
+blast_results_path <- file.path(data_dir_base, "blast/putative-viral-best.blast.gz")
+blast_results <- read_tsv(blast_results_path, show_col_types = FALSE)
+
+# Filter for best hit for each query/subject combination
+blast_results_best <- blast_results %>% group_by(qseqid, staxid) %>% 
+  filter(bitscore == max(bitscore)) %>%
+  filter(length == max(length)) %>% filter(row_number() == 1)
+write_tsv(blast_results_best, file.path(data_dir_base, "blast/putative-viral-best.blast.gz"))
+
+# Rank hits for each query and filter for high-ranking hits
+blast_results_ranked <- blast_results_best %>% 
+  group_by(qseqid) %>% mutate(rank = dense_rank(desc(bitscore)))
+blast_results_highrank <- blast_results_ranked %>% filter(rank <= 5) %>%
+    mutate(read_pair = str_split(qseqid, "_") %>% sapply(nth, n=-1), 
+         seq_id = str_split(qseqid, "_") %>% sapply(nth, n=1)) %>%
+    mutate(bitscore = as.numeric(bitscore))
+
+# Summarize by read pair and taxid
+blast_results_paired <- blast_results_highrank %>%
+  group_by(seq_id, staxid) %>%
+  summarize(bitscore_max = max(bitscore), bitscore_min = min(bitscore),
+            n_reads = n(), .groups = "drop")
+
+# Add viral status
+blast_results_viral <- mutate(blast_results_paired, viral = staxid %in% viral_taxa$taxid) %>%
+  mutate(viral_full = viral & n_reads == 2)
+
+# Compare to Kraken & Bowtie assignments
+match_taxid <- function(taxid_1, taxid_2){
+  p1 <- mapply(grepl, paste0("/", taxid_1, "$"), taxid_2)
+  p2 <- mapply(grepl, paste0("^", taxid_1, "/"), taxid_2)
+  p3 <- mapply(grepl, paste0("^", taxid_1, "$"), taxid_2)
+  out <- setNames(p1|p2|p3, NULL)
+  return(out)
+}
+mrg_assign <- mrg %>% select(sample, seq_id, taxid, assigned_taxid, adj_score_max)
+blast_results_assign <- inner_join(blast_results_viral, mrg_assign, by="seq_id") %>%
+    mutate(taxid_match_bowtie = match_taxid(staxid, taxid),
+           taxid_match_kraken = match_taxid(staxid, assigned_taxid),
+           taxid_match_any = taxid_match_bowtie | taxid_match_kraken)
+blast_results_out <- blast_results_assign %>%
+  group_by(seq_id) %>%
+  summarize(viral_status = ifelse(any(viral_full), 2,
+                                  ifelse(any(taxid_match_any), 2,
+                                             ifelse(any(viral), 1, 0))),
+            .groups = "drop")
+
+
+
+
Code
# Merge BLAST results with unenriched read data
+mrg_blast <- full_join(mrg, blast_results_out, by="seq_id") %>%
+  mutate(viral_status = replace_na(viral_status, 0),
+         viral_status_out = ifelse(viral_status == 0, FALSE, TRUE))
+
+# Plot RNA
+g_hist_1 <- ggplot(mrg_blast, aes(x=adj_score_max, fill=viral_status_out)) + 
+  geom_histogram(binwidth=5,boundary=0) + 
+  geom_vline(xintercept=20, linetype="dashed", color="red") +
+  facet_wrap(~kraken_label, labeller = labeller(kit = label_wrap_gen(20)), scales = "free_y") +
+  scale_x_continuous(name = "Maximum adjusted alignment score") + 
+  scale_y_continuous(name="# Read pairs") + 
+  scale_fill_brewer(palette = "Set1", name = "Viral status") +
+  theme_base
+g_hist_1
+
+
+

+
+
+
+
+

For a disjunctive score threshold of 20, the workflow achieves a measured F1 score of 98.0%.

+
+
Code
test_sens_spec <- function(tab, score_threshold){
+  tab_retained <- tab %>% 
+    mutate(retain_score = (adj_score_fwd > score_threshold | adj_score_rev > score_threshold),
+           retain = assigned_hv | hit_hv | retain_score) %>%
+    group_by(viral_status_out, retain) %>% count
+  pos_tru <- tab_retained %>% filter(viral_status_out == "TRUE", retain) %>% pull(n) %>% sum
+  pos_fls <- tab_retained %>% filter(viral_status_out != "TRUE", retain) %>% pull(n) %>% sum
+  neg_tru <- tab_retained %>% filter(viral_status_out != "TRUE", !retain) %>% pull(n) %>% sum
+  neg_fls <- tab_retained %>% filter(viral_status_out == "TRUE", !retain) %>% pull(n) %>% sum
+  sensitivity <- pos_tru / (pos_tru + neg_fls)
+  specificity <- neg_tru / (neg_tru + pos_fls)
+  precision   <- pos_tru / (pos_tru + pos_fls)
+  f1 <- 2 * precision * sensitivity / (precision + sensitivity)
+  out <- tibble(threshold=score_threshold, sensitivity=sensitivity, 
+                specificity=specificity, precision=precision, f1=f1)
+  return(out)
+}
+range_f1 <- function(intab, inrange=15:45){
+  tss <- purrr::partial(test_sens_spec, tab=intab)
+  stats <- lapply(inrange, tss) %>% bind_rows %>%
+    pivot_longer(!threshold, names_to="metric", values_to="value")
+  return(stats)
+}
+stats_0 <- range_f1(mrg_blast)
+g_stats_0 <- ggplot(stats_0, aes(x=threshold, y=value, color=metric)) +
+  geom_vline(xintercept=20, color = "red", linetype = "dashed") +
+  geom_line() +
+  scale_y_continuous(name = "Value", limits=c(0,1), breaks = seq(0,1,0.2), expand = c(0,0)) +
+  scale_x_continuous(name = "Adjusted Score Threshold", expand = c(0,0)) +
+  scale_color_brewer(palette="Dark2") +
+  theme_base
+g_stats_0
+
+
+

+
+
+
+
Code
stats_0 %>% filter(threshold == 20) %>% 
+  select(Threshold=threshold, Metric=metric, Value=value)
+
+
+ +
+
+
+

Looking into the composition of different read groups, the notable observation for me is the high prevalence of Pigeon torque teno virus among high-scoring false positives, with 77 such read pairs. BLAST maps these not to viruses but to their most common hosts, i.e. assorted species of pigeon. That said, the number of false positive PTTV reads is substantially exceeded by the number of true-positive PTTV reads (1883), which do map to appropriate viruses according to BLAST, so the presence of a comparatively small number of false positives seems unlikely to cause too much distortion.

+
+
Code
major_threshold <- 0.04
+
+# Add missing viral taxa
+viral_taxa$name[viral_taxa$taxid == 211787] <- "Human papillomavirus type 92"
+viral_taxa$name[viral_taxa$taxid == 509154] <- "Porcine endogenous retrovirus C"
+viral_taxa$name[viral_taxa$taxid == 493803] <- "Merkel cell polyomavirus"
+viral_taxa$name[viral_taxa$taxid == 427343] <- "Human papillomavirus 107"
+viral_taxa$name[viral_taxa$taxid == 194958] <- "Porcine endogenous retrovirus A"
+viral_taxa$name[viral_taxa$taxid == 340907] <- "Papiine alphaherpesvirus 2"
+viral_taxa$name[viral_taxa$taxid == 194959] <- "Porcine endogenous retrovirus B"
+
+
+# Prepare data
+fp <- mrg_blast %>% 
+  group_by(viral_status_out, highscore, taxid_best) %>% count %>% 
+  group_by(viral_status_out, highscore) %>% mutate(p=n/sum(n)) %>% 
+  rename(taxid = taxid_best) %>%
+  left_join(viral_taxa, by="taxid") %>%
+  arrange(desc(p))
+fp_major_tab <- fp %>% filter(p > major_threshold) %>% arrange(desc(p))
+fp_major_list <- fp_major_tab %>% pull(name) %>% sort %>% unique %>% c(., "Other")
+fp_major <- fp %>% mutate(major = p > major_threshold) %>% 
+  mutate(name_display = ifelse(major, name, "Other")) %>%
+  group_by(viral_status_out, highscore, name_display) %>% 
+  summarize(n=sum(n), p=sum(p), .groups = "drop")  %>%
+  mutate(name_display = factor(name_display, levels = fp_major_list),
+         score_display = ifelse(highscore, "S >= 20", "S < 20"),
+         status_display = ifelse(viral_status_out, "True positive", "False positive"))
+
+# Plot
+g_fp <- ggplot(fp_major, aes(x=score_display, y=p, fill=name_display)) +
+  geom_col(position="stack") +
+  scale_x_discrete(name = "True positive?") +
+  scale_y_continuous(name = "% reads", limits = c(0,1.01), 
+                     breaks = seq(0,1,0.2), expand = c(0,0)) +
+  scale_fill_manual(values = palette_viral, name = "Viral\ntaxon") +
+  facet_grid(.~status_display) +
+  guides(fill=guide_legend(ncol=3)) +
+  theme_kit
+g_fp
+
+
+

+
+
+
+
+
+
Code
# Configure
+ref_taxid_ptt <- 2233536
+p_threshold <- 0.3
+
+# Get taxon names
+tax_names_path <- file.path(data_dir_base, "taxid-names.tsv.gz")
+tax_names <- read_tsv(tax_names_path, show_col_types = FALSE)
+
+# Add missing names
+tax_names_new <- tribble(~staxid, ~name,
+                         3050295, "Cytomegalovirus humanbeta5",
+                         459231, "FLAG-tagging vector pFLAG97-TSR",
+                         3082113, "Rangifer tarandus platyrhynchus",
+                         3119969, "Bubalus kerabau",
+                         177155, "Streptopelia turtur",
+                         187126, "Nesoenas mayeri"
+                         )
+tax_names <- tax_names_new %>% filter(! staxid %in% tax_names$staxid) %>%
+  bind_rows(tax_names) %>% arrange(staxid)
+ref_name_ptt <- tax_names %>% filter(staxid == ref_taxid_ptt) %>% pull(name)
+
+# Get major matches
+mrg_staxid <- mrg_blast %>% filter(taxid_best == ref_taxid_ptt) %>%
+    group_by(highscore, viral_status_out) %>% mutate(n_seq = n())
+fp_staxid <- mrg_staxid %>%
+  left_join(blast_results_paired, by="seq_id") %>%
+  mutate(staxid = as.integer(staxid)) %>%
+  left_join(tax_names, by="staxid") %>% rename(sname=name) %>%
+  left_join(tax_names %>% rename(taxid_best=staxid), by="taxid_best")
+fp_staxid_count <- fp_staxid %>%
+  group_by(viral_status_out, highscore, 
+           taxid_best, name, staxid, sname, n_seq) %>%
+  count %>%
+  group_by(viral_status_out, highscore, taxid_best, name) %>%
+  mutate(p=n/n_seq)
+fp_staxid_count_major <- fp_staxid_count %>%
+  filter(n>1, p>p_threshold, !is.na(staxid)) %>%
+  mutate(score_display = ifelse(highscore, "S >= 20", "S < 20"),
+         status_display = ifelse(viral_status_out, 
+                                 "True positive", "False positive"))
+
+# Plot
+g <- ggplot(fp_staxid_count_major, aes(x=p, y=sname)) + 
+  geom_col() + 
+  facet_grid(status_display~score_display, scales="free",
+             labeller = label_wrap_gen(multi_line = FALSE)) +
+  scale_x_continuous(name="% mapped reads", limits=c(0,1), breaks=seq(0,1,0.2),
+                     expand=c(0,0)) +
+  labs(title=paste0(ref_name_ptt, " (taxid ", ref_taxid_ptt, ")")) +
+  theme_base + theme(
+    axis.title.y = element_blank(),
+    plot.title = element_text(size=rel(1.4), hjust=0, face="plain"))
+g
+
+
+

+
+
+
+
+

Human-infecting viruses: overall relative abundance

+
+
Code
# Get raw read counts
+read_counts_raw <- basic_stats_raw %>%
+  select(sample, date_alias, city, n_reads_raw = n_read_pairs)
+
+# Get HV read counts
+mrg_hv <- mrg %>% mutate(hv_status = assigned_hv | hit_hv | highscore) %>%
+  rename(taxid_all = taxid, taxid = taxid_best)
+read_counts_hv <- mrg_hv %>% filter(hv_status) %>% group_by(sample) %>% 
+  count(name="n_reads_hv")
+read_counts <- read_counts_raw %>% left_join(read_counts_hv, by="sample") %>%
+  mutate(n_reads_hv = replace_na(n_reads_hv, 0))
+
+# Aggregate
+read_counts_city <- read_counts %>% group_by(city) %>%
+  summarize(n_reads_raw = sum(n_reads_raw),
+            n_reads_hv = sum(n_reads_hv), .groups="drop") %>%
+  mutate(sample= "All samples", date_alias = "All dates")
+read_counts_total <- read_counts_city %>% group_by(sample, date_alias) %>%
+  summarize(n_reads_raw = sum(n_reads_raw),
+            n_reads_hv = sum(n_reads_hv), .groups="drop") %>%
+  mutate(city = "All cities")
+read_counts_agg <- read_counts_city %>% arrange(city) %>%
+  bind_rows(read_counts_total) %>%
+  mutate(p_reads_hv = n_reads_hv/n_reads_raw,
+         city = fct_inorder(city))
+
+
+

Applying a disjunctive cutoff at S=20 identifies 23,191 read pairs as human-viral. This gives an overall relative HV abundance of \(1.73 \times 10^{-5}\).

+
+
Code
# Visualize
+g_phv_agg <- ggplot(read_counts_agg, aes(x=city, color=city)) +
+  geom_point(aes(y=p_reads_hv)) +
+  scale_y_log10("Relative abundance of human virus reads") +
+  scale_x_discrete(name="Collection Date") +
+  #facet_grid(.~sample_type, scales = "free", space = "free_x") +
+  scale_color_city() + theme_rotate
+g_phv_agg
+
+
+

+
+
+
+
+

This is lower than for DNA reads from other air-sampling datasets I’ve analyzed, but not drastically so:

+
+
Code
# Collate past RA values
+ra_past <- tribble(~dataset, ~ra, ~na_type, ~panel_enriched,
+                   "Brumfield", 5e-5, "RNA", FALSE,
+                   "Brumfield", 3.66e-7, "DNA", FALSE,
+                   "Spurbeck", 5.44e-6, "RNA", FALSE,
+                   "Yang", 3.62e-4, "RNA", FALSE,
+                   "Rothman (unenriched)", 1.87e-5, "RNA", FALSE,
+                   "Rothman (panel-enriched)", 3.3e-5, "RNA", TRUE,
+                   "Crits-Christoph (unenriched)", 1.37e-5, "RNA", FALSE,
+                   "Crits-Christoph (panel-enriched)", 1.26e-2, "RNA", TRUE,
+                   "Prussin (non-control)", 1.63e-5, "RNA", FALSE,
+                   "Prussin (non-control)", 4.16e-5, "DNA", FALSE,
+                   "Rosario (non-control)", 1.21e-5, "RNA", FALSE,
+                   "Rosario (non-control)", 1.50e-4, "DNA", FALSE
+)
+
+# Collate new RA values
+ra_new <- tribble(~dataset, ~ra, ~na_type, ~panel_enriched,
+                  "Leung", 1.73e-5, "DNA", FALSE)
+
+
+# Plot
+scale_color_na <- purrr::partial(scale_color_brewer, palette="Set1",
+                                 name="Nucleic acid type")
+ra_comp <- bind_rows(ra_past, ra_new) %>% mutate(dataset = fct_inorder(dataset))
+g_ra_comp <- ggplot(ra_comp, aes(y=dataset, x=ra, color=na_type)) +
+  geom_point() +
+  scale_color_na() +
+  scale_x_log10(name="Relative abundance of human virus reads") +
+  theme_base + theme(axis.title.y = element_blank())
+g_ra_comp
+
+
+

+
+
+
+
+

Human-infecting viruses: taxonomy and composition

+

At the family level, most samples across all cities are dominated by Papillomaviridae, Herpesviridae, Anelloviridae, Polyomaviridae, and to a lesser extent Poxviridae:

+
+
Code
# Get viral taxon names for putative HV reads
+viral_taxa$name[viral_taxa$taxid == 249588] <- "Mamastrovirus"
+viral_taxa$name[viral_taxa$taxid == 194960] <- "Kobuvirus"
+viral_taxa$name[viral_taxa$taxid == 688449] <- "Salivirus"
+viral_taxa$name[viral_taxa$taxid == 585893] <- "Picobirnaviridae"
+viral_taxa$name[viral_taxa$taxid == 333922] <- "Betapapillomavirus"
+viral_taxa$name[viral_taxa$taxid == 334207] <- "Betapapillomavirus 3"
+viral_taxa$name[viral_taxa$taxid == 369960] <- "Porcine type-C oncovirus"
+viral_taxa$name[viral_taxa$taxid == 333924] <- "Betapapillomavirus 2"
+viral_taxa$name[viral_taxa$taxid == 687329] <- "Anelloviridae"
+viral_taxa$name[viral_taxa$taxid == 325455] <- "Gammapapillomavirus"
+viral_taxa$name[viral_taxa$taxid == 333750] <- "Alphapapillomavirus"
+viral_taxa$name[viral_taxa$taxid == 694002] <- "Betacoronavirus"
+viral_taxa$name[viral_taxa$taxid == 334202] <- "Mupapillomavirus"
+viral_taxa$name[viral_taxa$taxid == 197911] <- "Alphainfluenzavirus"
+viral_taxa$name[viral_taxa$taxid == 186938] <- "Respirovirus"
+viral_taxa$name[viral_taxa$taxid == 333926] <- "Gammapapillomavirus 1"
+viral_taxa$name[viral_taxa$taxid == 337051] <- "Betapapillomavirus 1"
+viral_taxa$name[viral_taxa$taxid == 337043] <- "Alphapapillomavirus 4"
+viral_taxa$name[viral_taxa$taxid == 694003] <- "Betacoronavirus 1"
+viral_taxa$name[viral_taxa$taxid == 334204] <- "Mupapillomavirus 2"
+viral_taxa$name[viral_taxa$taxid == 334208] <- "Betapapillomavirus 4"
+viral_taxa$name[viral_taxa$taxid == 333928] <- "Gammapapillomavirus 2"
+viral_taxa$name[viral_taxa$taxid == 337039] <- "Alphapapillomavirus 2"
+viral_taxa$name[viral_taxa$taxid == 333929] <- "Gammapapillomavirus 3"
+viral_taxa$name[viral_taxa$taxid == 337042] <- "Alphapapillomavirus 7"
+viral_taxa$name[viral_taxa$taxid == 334203] <- "Mupapillomavirus 1"
+viral_taxa$name[viral_taxa$taxid == 333757] <- "Alphapapillomavirus 8"
+viral_taxa$name[viral_taxa$taxid == 337050] <- "Alphapapillomavirus 6"
+viral_taxa$name[viral_taxa$taxid == 333767] <- "Alphapapillomavirus 3"
+viral_taxa$name[viral_taxa$taxid == 333754] <- "Alphapapillomavirus 10"
+viral_taxa$name[viral_taxa$taxid == 687363] <- "Torque teno virus 24"
+viral_taxa$name[viral_taxa$taxid == 687342] <- "Torque teno virus 3"
+viral_taxa$name[viral_taxa$taxid == 687359] <- "Torque teno virus 20"
+viral_taxa$name[viral_taxa$taxid == 194441] <- "Primate T-lymphotropic virus 2"
+viral_taxa$name[viral_taxa$taxid == 334209] <- "Betapapillomavirus 5"
+
+
+mrg_hv_named <- mrg_hv %>% left_join(viral_taxa, by="taxid")
+
+# Discover viral species & genera for HV reads
+raise_rank <- function(read_db, taxid_db, out_rank = "species", verbose = FALSE){
+  # Get higher ranks than search rank
+  ranks <- c("subspecies", "species", "subgenus", "genus", "subfamily", "family", "suborder", "order", "class", "subphylum", "phylum", "kingdom", "superkingdom")
+  rank_match <- which.max(ranks == out_rank)
+  high_ranks <- ranks[rank_match:length(ranks)]
+  # Merge read DB and taxid DB
+  reads <- read_db %>% select(-parent_taxid, -rank, -name) %>%
+    left_join(taxid_db, by="taxid")
+  # Extract sequences that are already at appropriate rank
+  reads_rank <- filter(reads, rank == out_rank)
+  # Drop sequences at a higher rank and return unclassified sequences
+  reads_norank <- reads %>% filter(rank != out_rank, !rank %in% high_ranks, !is.na(taxid))
+  while(nrow(reads_norank) > 0){ # As long as there are unclassified sequences...
+    # Promote read taxids and re-merge with taxid DB, then re-classify and filter
+    reads_remaining <- reads_norank %>% mutate(taxid = parent_taxid) %>%
+      select(-parent_taxid, -rank, -name) %>%
+      left_join(taxid_db, by="taxid")
+    reads_rank <- reads_remaining %>% filter(rank == out_rank) %>%
+      bind_rows(reads_rank)
+    reads_norank <- reads_remaining %>%
+      filter(rank != out_rank, !rank %in% high_ranks, !is.na(taxid))
+  }
+  # Finally, extract and append reads that were excluded during the process
+  reads_dropped <- reads %>% filter(!seq_id %in% reads_rank$seq_id)
+  reads_out <- reads_rank %>% bind_rows(reads_dropped) %>%
+    select(-parent_taxid, -rank, -name) %>%
+    left_join(taxid_db, by="taxid")
+  return(reads_out)
+}
+hv_reads_species <- raise_rank(mrg_hv_named, viral_taxa, "species")
+hv_reads_genus <- raise_rank(mrg_hv_named, viral_taxa, "genus")
+hv_reads_family <- raise_rank(mrg_hv_named, viral_taxa, "family")
+
+
+
+
Code
threshold_major_family <- 0.05
+
+# Count reads for each human-viral family
+hv_family_counts <- hv_reads_family %>% 
+  group_by(sample, date_alias, city, name, taxid) %>%
+  count(name = "n_reads_hv") %>%
+  group_by(sample, date_alias, city) %>%
+  mutate(p_reads_hv = n_reads_hv/sum(n_reads_hv))
+
+# Identify high-ranking families and group others
+hv_family_major_tab <- hv_family_counts %>% group_by(name) %>% 
+  filter(p_reads_hv == max(p_reads_hv)) %>% filter(row_number() == 1) %>%
+  arrange(desc(p_reads_hv)) %>% filter(p_reads_hv > threshold_major_family)
+hv_family_counts_major <- hv_family_counts %>%
+  mutate(name_display = ifelse(name %in% hv_family_major_tab$name, name, "Other")) %>%
+  group_by(sample, date_alias, city, name_display) %>%
+  summarize(n_reads_hv = sum(n_reads_hv), p_reads_hv = sum(p_reads_hv), 
+            .groups="drop") %>%
+  mutate(name_display = factor(name_display, 
+                               levels = c(hv_family_major_tab$name, "Other")))
+hv_family_counts_display <- hv_family_counts_major %>%
+  rename(p_reads = p_reads_hv, classification = name_display)
+
+# Plot
+g_hv_family <- g_comp_base + 
+  geom_col(data=hv_family_counts_display, position = "stack") +
+  scale_y_continuous(name="% HV Reads", limits=c(0,1.01), 
+                     breaks = seq(0,1,0.2),
+                     expand=c(0,0), labels = function(y) y*100) +
+  scale_fill_manual(values=palette_viral, name = "Viral family") +
+  labs(title="Family composition of human-viral reads") +
+  guides(fill=guide_legend(ncol=4)) +
+  theme(plot.title = element_text(size=rel(1.4), hjust=0, face="plain"))
+g_hv_family
+
+
+

+
+
+
+
+

In investigating individual viral families, to avoid distortions from a few rare reads, I restricted myself to samples where that family made up at least 10% of human-viral reads.

+

As usual, Papillomaviridae reads are divided among many different viral species. In this case, Betapapillomavirus 1 and 2 are the most prevalent across samples, but many other alpha-, beta-, gamma-, and mupapillomaviruses are highly prevalent in at least some samples.

+
+
Code
threshold_major_species <- 0.4
+taxid_papilloma <- 151340
+
+# Get set of Papillomaviridae reads
+papilloma_samples <- hv_family_counts %>% filter(taxid == taxid_papilloma) %>%
+  filter(p_reads_hv >= 0.1) %>%
+  pull(sample)
+papilloma_ids <- hv_reads_family %>% 
+  filter(taxid == taxid_papilloma, sample %in% papilloma_samples) %>%
+  pull(seq_id)
+
+# Count reads for each Papillomaviridae species
+papilloma_species_counts <- hv_reads_species %>%
+  filter(seq_id %in% papilloma_ids) %>%
+  group_by(sample, date_alias, city, name, taxid) %>%
+  count(name = "n_reads_hv") %>%
+  group_by(sample, date_alias, city) %>%
+  mutate(p_reads_papilloma = n_reads_hv/sum(n_reads_hv))
+
+# Identify high-ranking families and group others
+papilloma_species_major_tab <- papilloma_species_counts %>% group_by(name) %>% 
+  filter(p_reads_papilloma == max(p_reads_papilloma)) %>% 
+  filter(row_number() == 1) %>%
+  arrange(desc(p_reads_papilloma)) %>% 
+  filter(p_reads_papilloma > threshold_major_species)
+papilloma_species_counts_major <- papilloma_species_counts %>%
+  mutate(name_display = ifelse(name %in% papilloma_species_major_tab$name, 
+                               name, "Other")) %>%
+  group_by(sample, date_alias, city, name_display) %>%
+  summarize(n_reads_papilloma = sum(n_reads_hv),
+            p_reads_papilloma = sum(p_reads_papilloma), 
+            .groups="drop") %>%
+  mutate(name_display = factor(name_display, 
+                               levels = c(papilloma_species_major_tab$name, "Other")))
+papilloma_species_counts_display <- papilloma_species_counts_major %>%
+  rename(p_reads = p_reads_papilloma, classification = name_display)
+
+# Plot
+g_papilloma_species <- g_comp_base + 
+  geom_col(data=papilloma_species_counts_display, position = "stack") +
+  scale_y_continuous(name="% Papillomaviridae Reads", limits=c(0,1.01), 
+                     breaks = seq(0,1,0.2),
+                     expand=c(0,0), labels = function(y) y*100) +
+  scale_fill_manual(values=palette_viral, name = "Viral species") +
+  labs(title="Species composition of Papillomaviridae reads") +
+  guides(fill=guide_legend(ncol=3)) +
+  theme(plot.title = element_text(size=rel(1.4), hjust=0, face="plain"))
+
+g_papilloma_species
+
+
+

+
+
+
+
Code
# Get most prominent species for text
+papilloma_species_collate <- papilloma_species_counts %>% group_by(name, taxid) %>% 
+  summarize(n_reads_tot = sum(n_reads_hv), p_reads_mean = mean(p_reads_papilloma), .groups="drop") %>% 
+  arrange(desc(n_reads_tot))
+
+
+

In terms of total reads across samples, herpesviruses are dominated by Epstein-Barr virus (Human gammaherpesvirus 4), HSV-1 (Human alphaherpesvirus 1), and human cytomegalovirus (Human betaherpesvirus 5). However, numerous other herpesviruses are also present.

+
+
Code
threshold_major_species <- 0.4
+taxid_herpes <- viral_taxa %>% filter(name == "Herpesviridae") %>% pull(taxid)
+
+# Get set of herpesviridae reads
+herpes_samples <- hv_family_counts %>% filter(taxid == taxid_herpes) %>%
+  filter(p_reads_hv >= 0.1) %>%
+  pull(sample)
+herpes_ids <- hv_reads_family %>% 
+  filter(taxid == taxid_herpes, sample %in% herpes_samples) %>%
+  pull(seq_id)
+
+# Count reads for each herpesviridae species
+herpes_species_counts <- hv_reads_species %>%
+  filter(seq_id %in% herpes_ids) %>%
+  group_by(sample, date_alias, city, name, taxid) %>%
+  count(name = "n_reads_hv") %>%
+  group_by(sample, date_alias, city) %>%
+  mutate(p_reads_herpes = n_reads_hv/sum(n_reads_hv))
+
+# Identify high-ranking families and group others
+herpes_species_major_tab <- herpes_species_counts %>% group_by(name) %>% 
+  filter(p_reads_herpes == max(p_reads_herpes)) %>% 
+  filter(row_number() == 1) %>%
+  arrange(desc(p_reads_herpes)) %>% 
+  filter(p_reads_herpes > threshold_major_species)
+herpes_species_counts_major <- herpes_species_counts %>%
+  mutate(name_display = ifelse(name %in% herpes_species_major_tab$name, 
+                               name, "Other")) %>%
+  group_by(sample, date_alias, city, name_display) %>%
+  summarize(n_reads_herpes = sum(n_reads_hv),
+            p_reads_herpes = sum(p_reads_herpes), 
+            .groups="drop") %>%
+  mutate(name_display = factor(name_display, 
+                               levels = c(herpes_species_major_tab$name, "Other")))
+herpes_species_counts_display <- herpes_species_counts_major %>%
+  rename(p_reads = p_reads_herpes, classification = name_display)
+
+# Plot
+g_herpes_species <- g_comp_base + 
+  geom_col(data=herpes_species_counts_display, position = "stack") +
+  scale_y_continuous(name="% herpesviridae Reads", limits=c(0,1.01), 
+                     breaks = seq(0,1,0.2),
+                     expand=c(0,0), labels = function(y) y*100) +
+  scale_fill_manual(values=palette_viral, name = "Viral species") +
+  labs(title="Species composition of Herpesviridae reads") +
+  guides(fill=guide_legend(ncol=3)) +
+  theme(plot.title = element_text(size=rel(1.4), hjust=0, face="plain"))
+
+g_herpes_species
+
+
+

+
+
+
+
Code
# Get most prominent species for text
+herpes_species_collate <- herpes_species_counts %>% group_by(name, taxid) %>% 
+  summarize(n_reads_tot = sum(n_reads_hv), p_reads_mean = mean(p_reads_herpes), .groups="drop") %>% 
+  arrange(desc(n_reads_tot))
+
+
+

In sharp contrast to the above, my pipeline classifies the great majority of anellovirus reads in all samples into a single species, torque teno virus. Looking online, it looks like there are a lot of “torque teno viruses” within Anelloviridae – for example, Wikipedia says that the genus Alphatorquevirus contains >20 numbered torque teno viruses – so I’m not sure exactly which virus this refers to.

+
+
Code
threshold_major_species <- 0.1
+taxid_anello <- viral_taxa %>% filter(name == "Anelloviridae") %>% pull(taxid)
+
+# Get set of anelloviridae reads
+anello_samples <- hv_family_counts %>% filter(taxid == taxid_anello) %>%
+  filter(p_reads_hv >= 0.1) %>%
+  pull(sample)
+anello_ids <- hv_reads_family %>% 
+  filter(taxid == taxid_anello, sample %in% anello_samples) %>%
+  pull(seq_id)
+
+# Count reads for each anelloviridae species
+anello_species_counts <- hv_reads_species %>%
+  filter(seq_id %in% anello_ids) %>%
+  group_by(sample, date_alias, city, name, taxid) %>%
+  count(name = "n_reads_hv") %>%
+  group_by(sample, date_alias, city) %>%
+  mutate(p_reads_anello = n_reads_hv/sum(n_reads_hv))
+
+# Identify high-ranking families and group others
+anello_species_major_tab <- anello_species_counts %>% group_by(name) %>% 
+  filter(p_reads_anello == max(p_reads_anello)) %>% 
+  filter(row_number() == 1) %>%
+  arrange(desc(p_reads_anello)) %>% 
+  filter(p_reads_anello > threshold_major_species)
+anello_species_counts_major <- anello_species_counts %>%
+  mutate(name_display = ifelse(name %in% anello_species_major_tab$name, 
+                               name, "Other")) %>%
+  group_by(sample, date_alias, city, name_display) %>%
+  summarize(n_reads_anello = sum(n_reads_hv),
+            p_reads_anello = sum(p_reads_anello), 
+            .groups="drop") %>%
+  mutate(name_display = factor(name_display, 
+                               levels = c(anello_species_major_tab$name, "Other")))
+anello_species_counts_display <- anello_species_counts_major %>%
+  rename(p_reads = p_reads_anello, classification = name_display)
+
+# Plot
+g_anello_species <- g_comp_base + 
+  geom_col(data=anello_species_counts_display, position = "stack") +
+  scale_y_continuous(name="% Anelloviridae Reads", limits=c(0,1.01), 
+                     breaks = seq(0,1,0.2),
+                     expand=c(0,0), labels = function(y) y*100) +
+  scale_fill_manual(values=palette_viral, name = "Viral species") +
+  labs(title="Species composition of Anelloviridae reads") +
+  guides(fill=guide_legend(ncol=4)) +
+  theme(plot.title = element_text(size=rel(1.4), hjust=0, face="plain"))
+
+g_anello_species
+
+
+

+
+
+
+
Code
# Get most prominent species for text
+anello_species_collate <- anello_species_counts %>% group_by(name, taxid) %>% 
+  summarize(n_reads_tot = sum(n_reads_hv), p_reads_mean = mean(p_reads_anello), .groups="drop") %>% 
+  arrange(desc(n_reads_tot))
+
+
+

Polyomaviruses are intermediate; most viruses are dominated by a single species, Alphapolyomavirus quintihominis, but several other viruses in the family are also present.

+
+
Code
threshold_major_species <- 0.1
+taxid_polyoma <- viral_taxa %>% filter(name == "Polyomaviridae") %>% pull(taxid)
+
+# Get set of polyomaviridae reads
+# Get set of polyomaviridae reads
+polyoma_samples <- hv_family_counts %>% filter(taxid == taxid_polyoma) %>%
+  filter(p_reads_hv >= 0.1) %>%
+  pull(sample)
+polyoma_ids <- hv_reads_family %>% 
+  filter(taxid == taxid_polyoma, sample %in% polyoma_samples) %>%
+  pull(seq_id)
+
+# Count reads for each polyomaviridae species
+polyoma_species_counts <- hv_reads_species %>%
+  filter(seq_id %in% polyoma_ids) %>%
+  group_by(sample, date_alias, city, name, taxid) %>%
+  count(name = "n_reads_hv") %>%
+  group_by(sample, date_alias, city) %>%
+  mutate(p_reads_polyoma = n_reads_hv/sum(n_reads_hv))
+
+# Identify high-ranking families and group others
+polyoma_species_major_tab <- polyoma_species_counts %>% group_by(name) %>% 
+  filter(p_reads_polyoma == max(p_reads_polyoma)) %>% 
+  filter(row_number() == 1) %>%
+  arrange(desc(p_reads_polyoma)) %>% 
+  filter(p_reads_polyoma > threshold_major_species)
+polyoma_species_counts_major <- polyoma_species_counts %>%
+  mutate(name_display = ifelse(name %in% polyoma_species_major_tab$name, 
+                               name, "Other")) %>%
+  group_by(sample, date_alias, city, name_display) %>%
+  summarize(n_reads_polyoma = sum(n_reads_hv),
+            p_reads_polyoma = sum(p_reads_polyoma), 
+            .groups="drop") %>%
+  mutate(name_display = factor(name_display, 
+                               levels = c(polyoma_species_major_tab$name, "Other")))
+polyoma_species_counts_display <- polyoma_species_counts_major %>%
+  rename(p_reads = p_reads_polyoma, classification = name_display)
+
+# Plot
+g_polyoma_species <- g_comp_base + 
+  geom_col(data=polyoma_species_counts_display, position = "stack") +
+  scale_y_continuous(name="% Polyomaviridae Reads", limits=c(0,1.01), 
+                     breaks = seq(0,1,0.2),
+                     expand=c(0,0), labels = function(y) y*100) +
+  scale_fill_manual(values=palette_viral, name = "Viral species") +
+  labs(title="Species composition of Polyomaviridae reads") +
+  guides(fill=guide_legend(ncol=2)) +
+  theme(plot.title = element_text(size=rel(1.4), hjust=0, face="plain"))
+
+g_polyoma_species
+
+
+

+
+
+
+
Code
# Get most prominent species for text
+polyoma_species_collate <- polyoma_species_counts %>% group_by(name, taxid) %>% 
+  summarize(n_reads_tot = sum(n_reads_hv), p_reads_mean = mean(p_reads_polyoma), .groups="drop") %>% 
+  arrange(desc(n_reads_tot))
+
+
+

Finally, poxvirus reads in most samples are dominated by molluscum contagiosum virus (which I expect to be real), followed by Orf virus (which I expect to be fake). These expectations are borne out by BLAST alignments (below).

+
+
Code
threshold_major_species <- 0.1
+taxid_pox <- viral_taxa %>% filter(name == "Poxviridae") %>% pull(taxid)
+
+# Get set of poxviridae reads
+# Get set of poxviridae reads
+pox_samples <- hv_family_counts %>% filter(taxid == taxid_pox) %>%
+  filter(p_reads_hv >= 0.1) %>%
+  pull(sample)
+pox_ids <- hv_reads_family %>% 
+  filter(taxid == taxid_pox, sample %in% pox_samples) %>%
+  pull(seq_id)
+
+# Count reads for each poxviridae species
+pox_species_counts <- hv_reads_species %>%
+  filter(seq_id %in% pox_ids) %>%
+  group_by(sample, date_alias, city, name, taxid) %>%
+  count(name = "n_reads_hv") %>%
+  group_by(sample, date_alias, city) %>%
+  mutate(p_reads_pox = n_reads_hv/sum(n_reads_hv))
+
+# Identify high-ranking families and group others
+pox_species_major_tab <- pox_species_counts %>% group_by(name) %>% 
+  filter(p_reads_pox == max(p_reads_pox)) %>% 
+  filter(row_number() == 1) %>%
+  arrange(desc(p_reads_pox)) %>% 
+  filter(p_reads_pox > threshold_major_species)
+pox_species_counts_major <- pox_species_counts %>%
+  mutate(name_display = ifelse(name %in% pox_species_major_tab$name, 
+                               name, "Other")) %>%
+  group_by(sample, date_alias, city, name_display) %>%
+  summarize(n_reads_pox = sum(n_reads_hv),
+            p_reads_pox = sum(p_reads_pox), 
+            .groups="drop") %>%
+  mutate(name_display = factor(name_display, 
+                               levels = c(pox_species_major_tab$name, "Other")))
+pox_species_counts_display <- pox_species_counts_major %>%
+  rename(p_reads = p_reads_pox, classification = name_display)
+
+# Plot
+g_pox_species <- g_comp_base + 
+  geom_col(data=pox_species_counts_display, position = "stack") +
+  scale_y_continuous(name="% Poxviridae Reads", limits=c(0,1.01), 
+                     breaks = seq(0,1,0.2),
+                     expand=c(0,0), labels = function(y) y*100) +
+  scale_fill_manual(values=palette_viral, name = "Viral species") +
+  labs(title="Species composition of Poxviridae reads") +
+  guides(fill=guide_legend(ncol=4)) +
+  theme(plot.title = element_text(size=rel(1.4), hjust=0, face="plain"))
+
+g_pox_species
+
+
+

+
+
+
+
Code
# Get most prominent species for text
+pox_species_collate <- pox_species_counts %>% group_by(name, taxid) %>% 
+  summarize(n_reads_tot = sum(n_reads_hv), p_reads_mean = mean(p_reads_pox), .groups="drop") %>% 
+  arrange(desc(n_reads_tot))
+
+
+
+
Code
# Configure
+ref_taxids_hv <- c(10279, 10258)
+ref_names_hv <- sapply(ref_taxids_hv, function(x) viral_taxa %>% filter(taxid == x) %>% pull(name) %>% first)
+p_threshold <- 0.1
+
+# Get taxon names
+tax_names_path <- file.path(data_dir_base, "taxid-names.tsv.gz")
+tax_names <- read_tsv(tax_names_path, show_col_types = FALSE)
+
+# Add missing names
+tax_names_new <- tribble(~staxid, ~name,
+                         3050295, "Cytomegalovirus humanbeta5",
+                         459231, "FLAG-tagging vector pFLAG97-TSR",
+                         257877, "Macaca thibetana thibetana",
+                         256321, "Lentiviral transfer vector pHsCXW",
+                         419242, "Shuttle vector pLvCmvMYOCDHA",
+                         419243, "Shuttle vector pLvCmvLacZ",
+                         421868, "Cloning vector pLvCmvLacZ.Gfp",
+                         421869, "Cloning vector pLvCmvMyocardin.Gfp",
+                         426303, "Lentiviral vector pNL-GFP-RRE(SA)",
+                         436015, "Lentiviral transfer vector pFTMGW",
+                         454257, "Shuttle vector pLvCmvMYOCD2aHA",
+                         476184, "Shuttle vector pLV.mMyoD::ERT2.eGFP",
+                         476185, "Shuttle vector pLV.hMyoD.eGFP",
+                         591936, "Piliocolobus tephrosceles",
+                         627481, "Lentiviral transfer vector pFTM3GW",
+                         680261, "Self-inactivating lentivirus vector pLV.C-EF1a.cyt-bGal.dCpG",
+                         2952778, "Expression vector pLV[Exp]-EGFP:T2A:Puro-EF1A",
+                         3022699, "Vector PAS_122122",
+                         3025913, "Vector pSIN-WP-mPGK-GDNF",
+                         3105863, "Vector pLKO.1-ZsGreen1",
+                         3105864, "Vector pLKO.1-ZsGreen1 mouse Wfs1 shRNA",
+                         3108001, "Cloning vector pLVSIN-CMV_Neo_v4.0",
+                         3109234, "Vector pTwist+Kan+High",
+                         3117662, "Cloning vector pLV[Exp]-CBA>P301L",
+                         3117663, "Cloning vector pLV[Exp]-CBA>P301L:T2A:mRuby3",
+                         3117664, "Cloning vector pLV[Exp]-CBA>hMAPT[NM_005910.6](ns):T2A:mRuby3",
+                         3117665, "Cloning vector pLV[Exp]-CBA>mRuby3",
+                         3117666, "Cloning vector pLV[Exp]-CBA>mRuby3/NFAT3 fusion protein",
+                         3117667, "Cloning vector pLV[Exp]-Neo-mPGK>{EGFP-hSEPT6}",
+                         438045, "Xenotropic MuLV-related virus",
+                         447135, "Myodes glareolus",
+                         590745, "Mus musculus mobilized endogenous polytropic provirus",
+                         181858, "Murine AIDS virus-related provirus",
+                         356663, "Xenotropic MuLV-related virus VP35",
+                         356664, "Xenotropic MuLV-related virus VP42",
+                         373193, "Xenotropic MuLV-related virus VP62",
+                         286419, "Canis lupus dingo",
+                         415978, "Sus scrofa scrofa",
+                         494514, "Vulpes lagopus",
+                         3082113, "Rangifer tarandus platyrhynchus",
+                         3119969, "Bubalus kerabau")
+tax_names <- bind_rows(tax_names, tax_names_new)
+
+# Get matches
+hv_blast_staxids <- hv_reads_species %>% filter(taxid %in% ref_taxids_hv) %>%
+  group_by(taxid) %>% mutate(n_seq = n()) %>%
+  left_join(blast_results_paired, by="seq_id") %>%
+  mutate(staxid = as.integer(staxid)) %>%
+  left_join(tax_names %>% rename(sname=name), by="staxid")
+
+# Count matches
+hv_blast_counts <- hv_blast_staxids %>%
+  group_by(taxid, name, staxid, sname, n_seq) %>%
+  count %>% mutate(p=n/n_seq)
+
+# Subset to major matches
+hv_blast_counts_major <- hv_blast_counts %>% 
+  filter(n>1, p>p_threshold, !is.na(staxid)) %>%
+  arrange(desc(p)) %>% group_by(taxid) %>%
+  filter(row_number() <= 25) %>%
+  mutate(name_display = ifelse(name == ref_names_hv[1], "MCV", name))
+
+# Plot
+g_hv_blast <- ggplot(hv_blast_counts_major, mapping=aes(x=p, y=sname)) +
+  geom_col() +
+  facet_grid(name_display~., scales="free_y", space="free_y") +
+  scale_x_continuous(name="% mapped reads", limits=c(0,1), 
+                     breaks=seq(0,1,0.2), expand=c(0,0)) +
+  theme_base + theme(axis.title.y = element_blank())
+g_hv_blast
+
+
+

+
+
+
+
+

Finally, here again are the overall relative abundances of the specific viral genera I picked out manually in my last entry:

+
+
Code
# Define reference genera
+path_genera_rna <- c("Mamastrovirus", "Enterovirus", "Salivirus", "Kobuvirus", "Norovirus", "Sapovirus", "Rotavirus", "Alphacoronavirus", "Betacoronavirus", "Alphainfluenzavirus", "Betainfluenzavirus", "Lentivirus")
+path_genera_dna <- c("Mastadenovirus", "Alphapolyomavirus", "Betapolyomavirus", "Alphapapillomavirus", "Betapapillomavirus", "Gammapapillomavirus", "Orthopoxvirus", "Simplexvirus",
+                     "Lymphocryptovirus", "Cytomegalovirus", "Dependoparvovirus")
+path_genera <- bind_rows(tibble(name=path_genera_rna, genome_type="RNA genome"),
+                         tibble(name=path_genera_dna, genome_type="DNA genome")) %>%
+  left_join(viral_taxa, by="name")
+
+# Count in each sample
+n_path_genera <- hv_reads_genus %>% 
+  group_by(sample, date_alias, city, name, taxid) %>% 
+  count(name="n_reads_viral") %>% 
+  inner_join(path_genera, by=c("name", "taxid")) %>%
+  left_join(read_counts_raw, by=c("sample", "date_alias", "city")) %>%
+  mutate(p_reads_viral = n_reads_viral/n_reads_raw)
+
+# Pivot out and back to add zero lines
+n_path_genera_out <- n_path_genera %>% ungroup %>% select(sample, name, n_reads_viral) %>%
+  pivot_wider(names_from="name", values_from="n_reads_viral", values_fill=0) %>%
+  pivot_longer(-sample, names_to="name", values_to="n_reads_viral") %>%
+  left_join(read_counts_raw, by="sample") %>%
+  left_join(path_genera, by="name") %>%
+  mutate(p_reads_viral = n_reads_viral/n_reads_raw)
+
+## Aggregate across dates
+n_path_genera_stype <- n_path_genera_out %>% 
+  group_by(name, taxid, genome_type, city) %>%
+  summarize(n_reads_raw = sum(n_reads_raw),
+            n_reads_viral = sum(n_reads_viral), .groups = "drop") %>%
+  mutate(sample="All samples", date="All dates",
+         p_reads_viral = n_reads_viral/n_reads_raw,
+         na_type = "DNA")
+
+# Plot
+g_path_genera <- ggplot(n_path_genera_stype,
+                        aes(y=name, x=p_reads_viral, color=city)) +
+  geom_point() +
+  scale_x_log10(name="Relative abundance") +
+  scale_color_city() +
+  facet_grid(genome_type~., scales="free_y") +
+  theme_base + theme(axis.title.y = element_blank())
+g_path_genera
+
+
Warning: Transformation introduced infinite values in continuous x-axis
+
+
+
+

+
+
+
+
+

Conclusion

+

This is the third, largest, and final of this tranche of air-sampling datasets that I’ve run through this pipeline. Many of the high-level findings were similar to Prussin and Rosario, including high relative abundance of human reads, low total viral reads, an absence of enteric viruses, and high abundance of papillomaviruses among human-infecting viruses.

+

In the future, I’ll do a more in-depth comparative analysis across different datasets to compare the abundance of different viruses. For now, though, there are some major updates to the pipeline I want to make before I do any more public analyses.

+ + + + +
+
+ + + + \ No newline at end of file diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/fp-1.png b/docs/notebooks/2024-04-12_leung_files/figure-html/fp-1.png new file mode 100644 index 0000000..0c7fd92 Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/fp-1.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/herviviricetes-1.png b/docs/notebooks/2024-04-12_leung_files/figure-html/herviviricetes-1.png new file mode 100644 index 0000000..c63ec1d Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/herviviricetes-1.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/hv-blast-hits-1.png b/docs/notebooks/2024-04-12_leung_files/figure-html/hv-blast-hits-1.png new file mode 100644 index 0000000..6593293 Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/hv-blast-hits-1.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/hv-family-1.png b/docs/notebooks/2024-04-12_leung_files/figure-html/hv-family-1.png new file mode 100644 index 0000000..a27aec8 Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/hv-family-1.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/hv-species-anello-1.png b/docs/notebooks/2024-04-12_leung_files/figure-html/hv-species-anello-1.png new file mode 100644 index 0000000..c46fb23 Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/hv-species-anello-1.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/hv-species-herpes-1.png b/docs/notebooks/2024-04-12_leung_files/figure-html/hv-species-herpes-1.png new file mode 100644 index 0000000..202bc03 Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/hv-species-herpes-1.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/hv-species-papilloma-1.png b/docs/notebooks/2024-04-12_leung_files/figure-html/hv-species-papilloma-1.png new file mode 100644 index 0000000..1f12568 Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/hv-species-papilloma-1.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/hv-species-polyoma-1.png b/docs/notebooks/2024-04-12_leung_files/figure-html/hv-species-polyoma-1.png new file mode 100644 index 0000000..57d64fa Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/hv-species-polyoma-1.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/hv-species-pox-1.png b/docs/notebooks/2024-04-12_leung_files/figure-html/hv-species-pox-1.png new file mode 100644 index 0000000..0e5d38f Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/hv-species-pox-1.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/papovaviricetes-1.png b/docs/notebooks/2024-04-12_leung_files/figure-html/papovaviricetes-1.png new file mode 100644 index 0000000..f2aa20b Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/papovaviricetes-1.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/plot-basic-stats-1.png b/docs/notebooks/2024-04-12_leung_files/figure-html/plot-basic-stats-1.png new file mode 100644 index 0000000..94241f9 Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/plot-basic-stats-1.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/plot-blast-results-1.png b/docs/notebooks/2024-04-12_leung_files/figure-html/plot-blast-results-1.png new file mode 100644 index 0000000..7bdfab3 Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/plot-blast-results-1.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/plot-composition-all-1.png b/docs/notebooks/2024-04-12_leung_files/figure-html/plot-composition-all-1.png new file mode 100644 index 0000000..67a742e Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/plot-composition-all-1.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/plot-composition-all-2.png b/docs/notebooks/2024-04-12_leung_files/figure-html/plot-composition-all-2.png new file mode 100644 index 0000000..efed440 Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/plot-composition-all-2.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/plot-f1-1.png b/docs/notebooks/2024-04-12_leung_files/figure-html/plot-f1-1.png new file mode 100644 index 0000000..1239390 Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/plot-f1-1.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/plot-hv-ra-1.png b/docs/notebooks/2024-04-12_leung_files/figure-html/plot-hv-ra-1.png new file mode 100644 index 0000000..1d89b5e Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/plot-hv-ra-1.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/plot-hv-scores-1.png b/docs/notebooks/2024-04-12_leung_files/figure-html/plot-hv-scores-1.png new file mode 100644 index 0000000..66b59cd Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/plot-hv-scores-1.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/plot-quality-1.png b/docs/notebooks/2024-04-12_leung_files/figure-html/plot-quality-1.png new file mode 100644 index 0000000..078f830 Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/plot-quality-1.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/plot-quality-2.png b/docs/notebooks/2024-04-12_leung_files/figure-html/plot-quality-2.png new file mode 100644 index 0000000..e35fdd6 Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/plot-quality-2.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/plot-quality-3.png b/docs/notebooks/2024-04-12_leung_files/figure-html/plot-quality-3.png new file mode 100644 index 0000000..3793311 Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/plot-quality-3.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/plot-raw-quality-1.png b/docs/notebooks/2024-04-12_leung_files/figure-html/plot-raw-quality-1.png new file mode 100644 index 0000000..4690fc6 Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/plot-raw-quality-1.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/plot-raw-quality-2.png b/docs/notebooks/2024-04-12_leung_files/figure-html/plot-raw-quality-2.png new file mode 100644 index 0000000..345edea Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/plot-raw-quality-2.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/plot-raw-quality-3.png b/docs/notebooks/2024-04-12_leung_files/figure-html/plot-raw-quality-3.png new file mode 100644 index 0000000..f852eef Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/plot-raw-quality-3.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/preproc-dedup-1.png b/docs/notebooks/2024-04-12_leung_files/figure-html/preproc-dedup-1.png new file mode 100644 index 0000000..090e787 Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/preproc-dedup-1.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/preproc-dedup-2.png b/docs/notebooks/2024-04-12_leung_files/figure-html/preproc-dedup-2.png new file mode 100644 index 0000000..ad99079 Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/preproc-dedup-2.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/preproc-figures-1.png b/docs/notebooks/2024-04-12_leung_files/figure-html/preproc-figures-1.png new file mode 100644 index 0000000..f32f1d0 Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/preproc-figures-1.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/preproc-figures-2.png b/docs/notebooks/2024-04-12_leung_files/figure-html/preproc-figures-2.png new file mode 100644 index 0000000..807bd88 Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/preproc-figures-2.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/ptt-blast-hits-1.png b/docs/notebooks/2024-04-12_leung_files/figure-html/ptt-blast-hits-1.png new file mode 100644 index 0000000..b0aaebe Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/ptt-blast-hits-1.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/ra-genera-1.png b/docs/notebooks/2024-04-12_leung_files/figure-html/ra-genera-1.png new file mode 100644 index 0000000..6b2b506 Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/ra-genera-1.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/ra-hv-past-1.png b/docs/notebooks/2024-04-12_leung_files/figure-html/ra-hv-past-1.png new file mode 100644 index 0000000..544c22d Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/ra-hv-past-1.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/revtraviricetes-1.png b/docs/notebooks/2024-04-12_leung_files/figure-html/revtraviricetes-1.png new file mode 100644 index 0000000..177f687 Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/revtraviricetes-1.png differ diff --git a/docs/notebooks/2024-04-12_leung_files/figure-html/viral-class-composition-1.png b/docs/notebooks/2024-04-12_leung_files/figure-html/viral-class-composition-1.png new file mode 100644 index 0000000..5612736 Binary files /dev/null and b/docs/notebooks/2024-04-12_leung_files/figure-html/viral-class-composition-1.png differ diff --git a/docs/notebooks/2024-04-12_rosario.html b/docs/notebooks/2024-04-12_rosario.html index c6719dc..b395e19 100644 --- a/docs/notebooks/2024-04-12_rosario.html +++ b/docs/notebooks/2024-04-12_rosario.html @@ -176,7 +176,7 @@ -

Continuing our look at air sampling datasets, we turn to Rosario et al. (2018), another study of air filters, this time from HVAC filters from an undergraduate dorm building at the University of Colorado campus in Boulder. As in Prussin, samples were eluted from filters (in this case MERV-8, so less stringent than Prussin’s MERV-14 filters) and underwent both RNA and DNA sequencing – this time on an Illumina MiSeq with 2x250bp reads.

+

Continuing our look at air sampling datasets, we turn to Rosario et al. (2018), another study of air filters, this time from HVAC filters from an undergraduate dorm building at the University of Colorado campus in Boulder. As in Prussin, samples were eluted from filters (in this case MERV-8, so less stringent than Prussin’s MERV-14 filters) and underwent both RNA and DNA sequencing – this time on an Illumina MiSeq with 2x250bp reads.

The raw data

The Rosario dataset comprised sequencing data from 12 individual dormitory rooms at the sampling site, as well as a pooled sample of eight additional rooms and a negative control.

@@ -2018,7 +2018,7 @@ tnl <- theme(legend.position = "none") ``` -Continuing our look at air sampling datasets, we turn to Rosario et al. (2018), another study of air filters, this time from HVAC filters from an undergraduate dorm building at the University of Colorado campus in Boulder. As in [Prussin](https://data.securebio.org/wills-public-notebook/notebooks/2024-04-12_prussin.html), samples were eluted from filters (in this case MERV-8, so less stringent than Prussin's MERV-14 filters) and underwent both RNA and DNA sequencing – this time on an Illumina MiSeq with 2x250bp reads. +Continuing our look at air sampling datasets, we turn to [Rosario et al. (2018)](https://pubs.acs.org/doi/10.1021/acs.est.7b04203), another study of air filters, this time from HVAC filters from an undergraduate dorm building at the University of Colorado campus in Boulder. As in [Prussin](https://data.securebio.org/wills-public-notebook/notebooks/2024-04-12_prussin.html), samples were eluted from filters (in this case MERV-8, so less stringent than Prussin's MERV-14 filters) and underwent both RNA and DNA sequencing – this time on an Illumina MiSeq with 2x250bp reads. # The raw data diff --git a/notebooks/2024-04-12_leung.qmd b/notebooks/2024-04-12_leung.qmd new file mode 100644 index 0000000..e55ffc4 --- /dev/null +++ b/notebooks/2024-04-12_leung.qmd @@ -0,0 +1,1673 @@ +--- +title: "Workflow analysis of Leung et al. (2021)" +subtitle: "Air sampling from a student dorm in Colorado." +author: "Will Bradshaw" +date: 2024-04-12 +format: + html: + code-fold: true + code-tools: true + code-link: true + df-print: paged +editor: visual +title-block-banner: black +draft: true +--- + +```{r} +#| label: load-packages +#| include: false +library(tidyverse) +library(cowplot) +library(patchwork) +library(fastqcr) +library(RColorBrewer) +source("../scripts/aux_plot-theme.R") +theme_base <- theme_base + theme(aspect.ratio = NULL) +theme_rotate <- theme_base + theme( + axis.text.x = element_text(hjust = 1, angle = 45), +) +theme_kit <- theme_rotate + theme( + axis.title.x = element_blank(), +) +tnl <- theme(legend.position = "none") +``` + +The last in our current run of air sampling datasets is [Leung et al.](https://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-021-01044-7) (2021), a study of active air samples collected in public transit systems from six cities (Denver, Hong Kong, London, NYC, Oslo, Stockholm) from June to September 2017. + +Samples from Denver originated from their rail and bus system; all other samples originated from metro systems. Collection took place during working days and working hours. Air samples were collected with the SASS 3100 Dry Air Samplers (filtration) for 30 min at a flowrate of 300 L/min using electret microfibrous filters. Filters were stationed at 1.5m above floor level, facing downward (to avoid direct deposition). + +This was a DNA-sequencing study, focused on the bacterial microbiome and resistome. Sample processing followed an ideosyncratic protocol, where samples were pelleted and the pellet and supernatant were processed separately before being recombined for NA extraction and sequencing; I don't have a great understanding of how this is expected to affect the viral fraction. Samples were sequenced with Illumina HiSeqX 2x150bp. + +# The raw data + +In total, the Leung dataset comprised 293 samples: + +```{r} +#| warning: false +#| label: import-qc-data + +# Importing the data is a bit more complicated this time as the samples are split across three pipeline runs +data_dir_base <- "../data/2024-04-12_leung" +data_dirs <- paste(data_dir_base, c(1,2,3), sep="/") + +# Define geo relationships for filling in +geo <- tribble(~region, ~country, ~city, + "Asia", "Hong Kong", "Hong Kong", + "Europe", "Norway", "Oslo", + "Europe", "Sweden", "Stockholm", + "Europe", "United Kingdom", "London", + "North America", "USA", "New York City", + "North America", "USA", "Denver") + +# Data input paths +libraries_paths <- file.path(data_dirs, "sample-metadata.csv") +basic_stats_paths <- file.path(data_dirs, "qc_basic_stats.tsv.gz") +adapter_stats_paths <- file.path(data_dirs, "qc_adapter_stats.tsv.gz") +quality_base_stats_paths <- file.path(data_dirs, "qc_quality_base_stats.tsv.gz") +quality_seq_stats_paths <- file.path(data_dirs, "qc_quality_sequence_stats.tsv.gz") + +# Import libraries and extract metadata from sample names +libraries_raw <- lapply(libraries_paths, read_csv, show_col_types = FALSE) %>% + bind_rows +libraries <- libraries_raw %>% + # Fix missing entries + mutate(city = ifelse(is.na(city), sub(", .*", "", location), city)) %>% + left_join(geo, by="city", suffix = c("", "_new")) %>% + mutate(region = ifelse(region == "uncalculated", region_new, region), + country = ifelse(country == "uncalculated", country_new, country)) %>% + select(-country_new, -region_new) %>% + # Add sample aliases + arrange(city, date, location) %>% + group_by(city, date) %>% + mutate(sample_count = row_number(), + date_alias = paste(as.character(date), sample_count, sep="_"), + sample_alias = paste(city, date_alias, sep="_")) + +count_city <- libraries %>% group_by(region, country, city) %>% + count(name="n_samples") +count_city +``` + +```{r} +#| label: process-qc-data + +# Import QC data +stages <- c("raw_concat", "cleaned", "dedup", "ribo_initial", "ribo_secondary") +import_basic <- function(paths){ + lapply(paths, read_tsv, show_col_types = FALSE) %>% bind_rows %>% + inner_join(libraries, by="sample") %>% arrange(sample) %>% + mutate(stage = factor(stage, levels = stages), + sample = fct_inorder(sample)) +} +import_basic_paired <- function(paths){ + import_basic(paths) %>% arrange(read_pair) %>% + mutate(read_pair = fct_inorder(as.character(read_pair))) +} +basic_stats <- import_basic(basic_stats_paths) +adapter_stats <- import_basic_paired(adapter_stats_paths) +quality_base_stats <- import_basic_paired(quality_base_stats_paths) +quality_seq_stats <- import_basic_paired(quality_seq_stats_paths) + +# Filter to raw data +basic_stats_raw <- basic_stats %>% filter(stage == "raw_concat") +adapter_stats_raw <- adapter_stats %>% filter(stage == "raw_concat") +quality_base_stats_raw <- quality_base_stats %>% filter(stage == "raw_concat") +quality_seq_stats_raw <- quality_seq_stats %>% filter(stage == "raw_concat") + +# Get key values for readout +raw_read_counts <- basic_stats_raw %>% ungroup %>% + summarize(rmin = min(n_read_pairs), rmax=max(n_read_pairs), + rmean=mean(n_read_pairs), + rtot = sum(n_read_pairs), + btot = sum(n_bases_approx), + dmin = min(percent_duplicates), dmax=max(percent_duplicates), + dmean=mean(percent_duplicates), .groups = "drop") +``` + +These 293 samples yielded 0.39M-7.86M (mean 4.57M) reads per sample, for a total of 1.34B read pairs (402 gigabases of sequence). Read qualities were high at the 5' end but dropped off significantly in some samples, in definite need of cleaning. Adapter levels were high. With the exception of a couple of early samples, inferred duplication levels were low (mean 9.4%). + +```{r} +#| fig-width: 9 +#| warning: false +#| label: plot-basic-stats + +# Prepare data +basic_stats_raw_metrics <- basic_stats_raw %>% + select(sample, city, date, + `# Read pairs` = n_read_pairs, + `Total base pairs\n(approx)` = n_bases_approx, + `% Duplicates\n(FASTQC)` = percent_duplicates) %>% + pivot_longer(-(sample:date), names_to = "metric", values_to = "value") %>% + mutate(metric = fct_inorder(metric)) + +# Set up plot templates +scale_fill_city <- purrr::partial(scale_fill_brewer, palette="Set1", + name="City") +scale_x_cdate <- purrr::partial(scale_x_date, name="Collection Date", + date_breaks = "1 month", date_labels = "%Y-%m-%d") +g_basic <- ggplot(basic_stats_raw_metrics, + aes(x=date, y=value, fill=city, group=interaction(city,sample))) + + geom_col(position = "dodge") + + scale_x_cdate() + + scale_y_continuous(expand=c(0,0)) + + expand_limits(y=c(0,100)) + + scale_fill_city() + + facet_grid(metric~., scales = "free", space="free_x", switch="y") + + theme_rotate + theme( + axis.title.y = element_blank(), + strip.text.y = element_text(face="plain") + ) +g_basic +``` + +```{r} +#| label: plot-raw-quality + +# Set up plotting templates +scale_color_city <- purrr::partial(scale_color_brewer, palette="Set1", + name="City") +g_qual_raw <- ggplot(mapping=aes(color=city, linetype=read_pair, + group=interaction(sample,read_pair))) + + scale_color_city() + scale_linetype_discrete(name = "Read Pair") + + guides(color=guide_legend(nrow=2,byrow=TRUE), + linetype = guide_legend(nrow=2,byrow=TRUE)) + + theme_base + +# Visualize adapters +g_adapters_raw <- g_qual_raw + + geom_line(aes(x=position, y=pc_adapters), data=adapter_stats_raw) + + scale_y_continuous(name="% Adapters", limits=c(0,NA), + breaks = seq(0,100,10), expand=c(0,0)) + + scale_x_continuous(name="Position", limits=c(0,NA), + breaks=seq(0,140,20), expand=c(0,0)) + + facet_grid(.~adapter) +g_adapters_raw + +# Visualize quality +g_quality_base_raw <- g_qual_raw + + geom_hline(yintercept=25, linetype="dashed", color="red") + + geom_hline(yintercept=30, linetype="dashed", color="red") + + geom_line(aes(x=position, y=mean_phred_score), data=quality_base_stats_raw) + + scale_y_continuous(name="Mean Phred score", expand=c(0,0), limits=c(10,45)) + + scale_x_continuous(name="Position", limits=c(0,NA), + breaks=seq(0,140,20), expand=c(0,0)) +g_quality_base_raw + +g_quality_seq_raw <- g_qual_raw + + geom_vline(xintercept=25, linetype="dashed", color="red") + + geom_vline(xintercept=30, linetype="dashed", color="red") + + geom_line(aes(x=mean_phred_score, y=n_sequences), data=quality_seq_stats_raw) + + scale_x_continuous(name="Mean Phred score", expand=c(0,0)) + + scale_y_continuous(name="# Sequences", expand=c(0,0)) +g_quality_seq_raw +``` + +# Preprocessing + +The average fraction of reads lost at each stage in the preprocessing pipeline is shown in the following table. Read loss during cleaning was highly variable but averaged 11%, with a further \~7% lost during deduplication and \~0.3% during ribodepletion. + +```{r} +#| label: preproc-table +n_reads_rel <- basic_stats %>% + select(sample, date, city, stage, + percent_duplicates, n_read_pairs) %>% + group_by(sample) %>% arrange(sample, stage) %>% + mutate(p_reads_retained = replace_na(n_read_pairs / lag(n_read_pairs), 0), + p_reads_lost = 1 - p_reads_retained, + p_reads_retained_abs = n_read_pairs / n_read_pairs[1], + p_reads_lost_abs = 1-p_reads_retained_abs, + p_reads_lost_abs_marginal = replace_na(p_reads_lost_abs - lag(p_reads_lost_abs), 0)) +n_reads_rel_display <- n_reads_rel %>% + rename(Stage=stage, City=city) %>% + group_by(Stage) %>% + summarize(`% Total Reads Lost (Cumulative)` = paste0(round(min(p_reads_lost_abs*100),1), "-", round(max(p_reads_lost_abs*100),1), " (mean ", round(mean(p_reads_lost_abs*100),1), ")"), + `% Total Reads Lost (Marginal)` = paste0(round(min(p_reads_lost_abs_marginal*100),1), "-", round(max(p_reads_lost_abs_marginal*100),1), " (mean ", round(mean(p_reads_lost_abs_marginal*100),1), ")"), .groups="drop") %>% + filter(Stage != "raw_concat") %>% + mutate(Stage = Stage %>% as.numeric %>% factor(labels=c("Trimming & filtering", "Deduplication", "Initial ribodepletion", "Secondary ribodepletion"))) +n_reads_rel_display +``` + +```{r} +#| label: preproc-figures +#| warning: false +#| fig-height: 6 +#| fig-width: 6 + +g_stage_trace <- ggplot(basic_stats, aes(x=stage, color=city, group=sample)) + + scale_color_city() + + facet_wrap(~city, scales="free", ncol=3) + + theme_kit + theme(legend.position = "none") + +# Plot reads over preprocessing +g_reads_stages <- g_stage_trace + + geom_line(aes(y=n_read_pairs)) + + scale_y_continuous("# Read pairs", expand=c(0,0), limits=c(0,NA)) +g_reads_stages + +# Plot relative read losses during preprocessing +g_reads_rel <- ggplot(n_reads_rel, aes(x=stage, color=city, group=sample)) + + geom_line(aes(y=p_reads_lost_abs_marginal)) + + scale_y_continuous("% Total Reads Lost", expand=c(0,0), + labels = function(x) x*100) + + scale_color_city() + + facet_wrap(~city, scales="free", ncol=3) + + theme_kit + theme(legend.position = "none") +g_reads_rel +``` + +Data cleaning was very successful at removing adapters and improving read qualities: + +```{r} +#| warning: false +#| label: plot-quality +#| fig-height: 7 + +g_qual <- ggplot(mapping=aes(color=city, linetype=read_pair, + group=interaction(sample,read_pair))) + + scale_color_city() + scale_linetype_discrete(name = "Read Pair") + + guides(color=guide_legend(nrow=2,byrow=TRUE), + linetype = guide_legend(nrow=2,byrow=TRUE)) + + theme_base + +# Visualize adapters +g_adapters <- g_qual + + geom_line(aes(x=position, y=pc_adapters), data=adapter_stats) + + scale_y_continuous(name="% Adapters", limits=c(0,20), + breaks = seq(0,50,10), expand=c(0,0)) + + scale_x_continuous(name="Position", limits=c(0,NA), + breaks=seq(0,140,20), expand=c(0,0)) + + facet_grid(stage~adapter) +g_adapters + +# Visualize quality +g_quality_base <- g_qual + + geom_hline(yintercept=25, linetype="dashed", color="red") + + geom_hline(yintercept=30, linetype="dashed", color="red") + + geom_line(aes(x=position, y=mean_phred_score), data=quality_base_stats) + + scale_y_continuous(name="Mean Phred score", expand=c(0,0), limits=c(10,45)) + + scale_x_continuous(name="Position", limits=c(0,NA), + breaks=seq(0,140,20), expand=c(0,0)) + + facet_grid(stage~.) +g_quality_base + +g_quality_seq <- g_qual + + geom_vline(xintercept=25, linetype="dashed", color="red") + + geom_vline(xintercept=30, linetype="dashed", color="red") + + geom_line(aes(x=mean_phred_score, y=n_sequences), data=quality_seq_stats) + + scale_x_continuous(name="Mean Phred score", expand=c(0,0)) + + scale_y_continuous(name="# Sequences", expand=c(0,0)) + + facet_grid(stage~.) +g_quality_seq +``` + +According to FASTQC, cleaning + deduplication was very effective at reducing measured duplicate levels, which fell from an average of 9.4% to 1.7% for DNA reads: + +```{r} +#| label: preproc-dedup +#| fig-height: 6 +#| fig-width: 6 + +stage_dup <- basic_stats %>% group_by(stage) %>% + summarize(dmin = min(percent_duplicates), dmax=max(percent_duplicates), + dmean=mean(percent_duplicates), .groups = "drop") + +g_dup_stages <- g_stage_trace + + geom_line(aes(y=percent_duplicates)) + + scale_y_continuous("% Duplicates", limits=c(0,NA), expand=c(0,0)) +g_dup_stages + +g_readlen_stages <- g_stage_trace + geom_line(aes(y=mean_seq_len)) + + scale_y_continuous("Mean read length (nt)", expand=c(0,0), limits=c(0,NA)) +g_readlen_stages +``` + +# High-level composition + +As before, to assess the high-level composition of the reads, I ran the ribodepleted files through Kraken (using the Standard 16 database) and summarized the results with Bracken. Combining these results with the read counts above gives us a breakdown of the inferred composition of the samples: + +```{r} +#| label: prepare-composition + +# Import Bracken data +bracken_paths <- file.path(data_dirs, "bracken_counts.tsv") +bracken <- lapply(bracken_paths, read_tsv, show_col_types = FALSE) %>% bind_rows +total_assigned <- bracken %>% group_by(sample) %>% summarize( + name = "Total", + kraken_assigned_reads = sum(kraken_assigned_reads), + added_reads = sum(added_reads), + new_est_reads = sum(new_est_reads), + fraction_total_reads = sum(fraction_total_reads) +) +bracken_spread <- bracken %>% select(name, sample, new_est_reads) %>% + mutate(name = tolower(name)) %>% + pivot_wider(id_cols = "sample", names_from = "name", + values_from = "new_est_reads") + +# Count reads +read_counts_preproc <- basic_stats %>% + select(sample, date, date_alias, city, stage, n_read_pairs) %>% + pivot_wider(id_cols = c("sample", "date", "date_alias", "city"), + names_from="stage", values_from="n_read_pairs") +read_counts <- read_counts_preproc %>% + inner_join(total_assigned %>% select(sample, new_est_reads), by = "sample") %>% + rename(assigned = new_est_reads) %>% + inner_join(bracken_spread, by="sample") + +# Assess composition +read_comp <- transmute(read_counts, sample, date, date_alias, city, + n_filtered = raw_concat-cleaned, + n_duplicate = cleaned-dedup, + n_ribosomal = (dedup-ribo_initial) + (ribo_initial-ribo_secondary), + n_unassigned = ribo_secondary-assigned, + n_bacterial = bacteria, + n_archaeal = archaea, + n_viral = viruses, + n_human = eukaryota) +read_comp_long <- pivot_longer(read_comp, -(sample:city), + names_to = "classification", + names_prefix = "n_", values_to = "n_reads") %>% + mutate(classification = fct_inorder(str_to_sentence(classification))) %>% + group_by(sample) %>% mutate(p_reads = n_reads/sum(n_reads)) + +# Summarize composition +read_comp_summ <- read_comp_long %>% + group_by(city, classification) %>% + summarize(n_reads = sum(n_reads), .groups = "drop_last") %>% + mutate(n_reads = replace_na(n_reads,0), + p_reads = n_reads/sum(n_reads), + pc_reads = p_reads*100) +``` + +```{r} +#| label: plot-composition-all +#| fig-height: 7 +#| fig-width: 8 + +# Prepare plotting templates +g_comp_base <- ggplot(mapping=aes(x=date_alias, y=p_reads, fill=classification)) + + scale_x_discrete(name="Collection Date") + + facet_wrap(~city, scales = "free") + + theme_kit + theme(axis.text.x = element_blank()) +scale_y_pc_reads <- purrr::partial(scale_y_continuous, name = "% Reads", + expand = c(0,0), labels = function(y) y*100) + +# Plot overall composition +g_comp <- g_comp_base + geom_col(data = read_comp_long, position = "stack") + + scale_y_pc_reads(limits = c(0,1.01), breaks = seq(0,1,0.2)) + + scale_fill_brewer(palette = "Set1", name = "Classification") +g_comp + +# Plot composition of minor components +read_comp_minor <- read_comp_long %>% + filter(classification %in% c("Archaeal", "Viral", "Other")) +palette_minor <- brewer.pal(9, "Set1")[c(6,7,9)] +g_comp_minor <- g_comp_base + geom_col(data=read_comp_minor, position = "stack") + + scale_y_pc_reads() + + scale_fill_manual(values=palette_minor, name = "Classification") +g_comp_minor + +``` + +```{r} +#| label: composition-summary + +p_reads_summ_group <- read_comp_long %>% + mutate(classification = ifelse(classification %in% c("Filtered", "Duplicate", "Unassigned"), "Excluded", as.character(classification)), + classification = fct_inorder(classification)) %>% + group_by(classification, sample, city) %>% + summarize(p_reads = sum(p_reads), .groups = "drop") %>% + group_by(classification, city) %>% + summarize(pc_min = min(p_reads)*100, pc_max = max(p_reads)*100, + pc_mean = mean(p_reads)*100, .groups = "drop") +p_reads_summ_prep <- p_reads_summ_group %>% + mutate(classification = fct_inorder(classification), + pc_min = pc_min %>% signif(digits=2) %>% sapply(format, scientific=FALSE, trim=TRUE, digits=2), + pc_max = pc_max %>% signif(digits=2) %>% sapply(format, scientific=FALSE, trim=TRUE, digits=2), + pc_mean = pc_mean %>% signif(digits=2) %>% sapply(format, scientific=FALSE, trim=TRUE, digits=2), + display = paste0(pc_min, "-", pc_max, "% (mean ", pc_mean, "%)")) +p_reads_summ <- p_reads_summ_prep %>% + select(city, classification, read_fraction=display) %>% + arrange(city, classification) +p_reads_summ +``` + +In many respects, these resemble the Prussin data: high human fraction (mean 19.6%), high bacterial fraction (mean 18.2%), high unclassified fraction (mean 43.9%), low viral fraction (mean 0.01%). One notable difference is that archaeal reads are more abundant (0.034% compared to 0.016% for Prussin). + +As in Prussin, viral DNA reads were dominated by *Caudoviricetes* phages. Other viral classes that are prominent in at least some samples include *Herviviricetes* (herpesviruses), *Papovaviricetes* (polyomaviruses and papillomaviruses), *Revtraviricetes* (retroviruses + Hep B), and *Naldaviricetes* (mainly arthropod viruses). I'll investigate the first three of this latter group in more depth, restricting in each case to samples where that family makes up at least 5% of viral reads. + +```{r} +#| label: extract-viral-taxa + +# Get viral taxonomy +viral_taxa_path <- file.path(data_dir_base, "viral-taxids.tsv.gz") +viral_taxa <- read_tsv(viral_taxa_path, show_col_types = FALSE) + +# Get paths to Kraken reports +samples <- as.character(basic_stats_raw$sample) +report_dirs <- file.path(data_dirs, "kraken") +report_paths <- lapply(report_dirs, list.files, full.names = TRUE) %>% unlist +names(report_paths) <- str_extract(report_paths, "SRR\\d*") + +# Extract viral taxa +col_names <- c("pc_reads_total", "n_reads_clade", "n_reads_direct", + "rank", "taxid", "name") +kraken_reports_raw <- lapply(report_paths, read_tsv, col_names = col_names, + show_col_types = FALSE) +kraken_reports <- lapply(names(kraken_reports_raw), + function(x) kraken_reports_raw[[x]] %>% + mutate(sample = x)) %>% bind_rows +kraken_reports_viral <- filter(kraken_reports, taxid %in% viral_taxa$taxid) %>% + group_by(sample) %>% + mutate(p_reads_viral = n_reads_clade/n_reads_clade[1]) +kraken_reports_viral_cleaned <- kraken_reports_viral %>% + inner_join(libraries, by="sample") %>% + select(-pc_reads_total, -n_reads_direct) %>% + select(name, taxid, p_reads_viral, n_reads_clade, everything()) + +viral_classes <- kraken_reports_viral_cleaned %>% filter(rank == "C") +viral_families <- kraken_reports_viral_cleaned %>% filter(rank == "F") + +``` + +```{r} +#| label: viral-class-composition + +major_threshold <- 0.05 + +# Identify major viral classes +viral_classes_major_tab <- viral_classes %>% + group_by(name, taxid) %>% + summarize(p_reads_viral_max = max(p_reads_viral), .groups="drop") %>% + filter(p_reads_viral_max >= major_threshold) +viral_classes_major_list <- viral_classes_major_tab %>% pull(name) +viral_classes_major <- viral_classes %>% + filter(name %in% viral_classes_major_list) %>% + select(name, taxid, sample, date_alias, city, p_reads_viral) +viral_classes_minor <- viral_classes_major %>% + group_by(sample, date_alias, city) %>% + summarize(p_reads_viral_major = sum(p_reads_viral), .groups = "drop") %>% + mutate(name = "Other", taxid=NA, p_reads_viral = 1-p_reads_viral_major) %>% + select(name, taxid, sample, date_alias, city, p_reads_viral) +viral_classes_display <- bind_rows(viral_classes_major, viral_classes_minor) %>% + arrange(desc(p_reads_viral)) %>% + mutate(name = factor(name, levels=c(viral_classes_major_list, "Other")), + p_reads_viral = pmax(p_reads_viral, 0)) %>% + rename(p_reads = p_reads_viral, classification=name) + +palette_viral <- c(brewer.pal(12, "Set3"), brewer.pal(8, "Dark2")) +g_classes <- g_comp_base + + geom_col(data=viral_classes_display, position = "stack") + + scale_y_continuous(name="% Viral Reads", limits=c(0,1.01), breaks = seq(0,1,0.2), + expand=c(0,0), labels = function(y) y*100) + + scale_fill_manual(values=palette_viral, name = "Viral class") + +g_classes + +``` + +*Papovaviricetes* are quite heterogeneous across samples, and frequently diverse within samples. *Alphapolyomavirus* and *Alphapapillomavirus* are the most abundant genera overall, but *Betapapillomavirus, Gammapapillomavirus, Mupapillomavirus* and others all have strong showings. + +```{r} +#| label: papovaviricetes +#| fig-height: 6 +#| warning: false + +# Get samples +papova_taxid <- 2732421 +papova_threshold <- 0.05 +papova_samples <- viral_classes %>% filter(taxid == papova_taxid) %>% filter(p_reads_viral > 0.05) %>% pull(sample) %>% unique + +# Get all taxa in class +papova_desc_taxids_old <- papova_taxid +papova_desc_taxids_new <- unique(c(papova_desc_taxids_old, viral_taxa %>% filter(parent_taxid %in% papova_desc_taxids_old) %>% pull(taxid))) +while (length(papova_desc_taxids_new) > length(papova_desc_taxids_old)){ + papova_desc_taxids_old <- papova_desc_taxids_new + papova_desc_taxids_new <- unique(c(papova_desc_taxids_old, viral_taxa %>% filter(parent_taxid %in% papova_desc_taxids_old) %>% pull(taxid))) +} + +# Get read counts +papova_counts <- kraken_reports_viral_cleaned %>% + filter(taxid %in% papova_desc_taxids_new, + sample %in% papova_samples) %>% + mutate(p_reads_papova = n_reads_clade/n_reads_clade[1]) + +# Get genus composition +papova_genera <- papova_counts %>% filter(rank == "G") +papova_genera_major_tab <- papova_genera %>% + group_by(name, taxid) %>% + summarize(p_reads_papova_max = max(p_reads_papova), .groups="drop") %>% + filter(p_reads_papova_max >= papova_threshold) +papova_genera_major_list <- papova_genera_major_tab %>% pull(name) +papova_genera_major <- papova_genera %>% + filter(name %in% papova_genera_major_list) %>% + select(name, taxid, sample, date_alias, city, p_reads_papova) +papova_genera_minor <- papova_genera_major %>% + group_by(sample, date_alias, city) %>% + summarize(p_reads_papova_major = sum(p_reads_papova), .groups = "drop") %>% + mutate(name = "Other", taxid=NA, p_reads_papova = 1-p_reads_papova_major) %>% + select(name, taxid, sample, date_alias, city, p_reads_papova) +papova_genera_display <- bind_rows(papova_genera_major, papova_genera_minor) %>% + arrange(desc(p_reads_papova)) %>% + mutate(name = factor(name, levels=c(papova_genera_major_list, "Other"))) %>% + rename(p_reads = p_reads_papova, classification=name) + +# Plot +g_papova_genera <- g_comp_base + + geom_col(data=papova_genera_display, position = "stack") + + scale_y_continuous(name="% Papovaviricetes Reads", limits=c(0,1.02), + breaks = seq(0,1,0.2), + expand=c(0,0), labels = function(y) y*100) + + scale_fill_manual(values=palette_viral, name = "Viral genus") + + guides(fill=guide_legend(ncol=3)) +g_papova_genera + +``` + +Only a few samples showed at least 5% prevalence of *Herviviricetes*, but those that did were typically dominated by one or a small number of species that varied between samples. Of these, human alphaherpesvirus 1 appeared in the most samples, but several other species were prominent in at least one sample: + +```{r} +#| label: herviviricetes +#| fig-height: 6 +#| fig-width: 9 + +# Get samples +hervi_taxid <- 2731363 +hervi_threshold <- 0.05 +hervi_samples <- viral_classes %>% filter(taxid == hervi_taxid) %>% filter(p_reads_viral > 0.05) %>% pull(sample) %>% unique + +# Get all taxa in class +hervi_desc_taxids_old <- hervi_taxid +hervi_desc_taxids_new <- unique(c(hervi_desc_taxids_old, viral_taxa %>% filter(parent_taxid %in% hervi_desc_taxids_old) %>% pull(taxid))) +while (length(hervi_desc_taxids_new) > length(hervi_desc_taxids_old)){ + hervi_desc_taxids_old <- hervi_desc_taxids_new + hervi_desc_taxids_new <- unique(c(hervi_desc_taxids_old, viral_taxa %>% filter(parent_taxid %in% hervi_desc_taxids_old) %>% pull(taxid))) +} + +# Get read counts +hervi_counts <- kraken_reports_viral_cleaned %>% + filter(taxid %in% hervi_desc_taxids_new, + sample %in% hervi_samples) %>% + mutate(p_reads_hervi = n_reads_clade/n_reads_clade[1]) + +# Get genus composition +hervi_genera <- hervi_counts %>% filter(rank == "S") +hervi_genera_major_tab <- hervi_genera %>% + group_by(name, taxid) %>% + summarize(p_reads_hervi_max = max(p_reads_hervi), .groups="drop") %>% + filter(p_reads_hervi_max >= hervi_threshold) +hervi_genera_major_list <- hervi_genera_major_tab %>% pull(name) +hervi_genera_major <- hervi_genera %>% + filter(name %in% hervi_genera_major_list) %>% + select(name, taxid, sample, date_alias, city, p_reads_hervi) +hervi_genera_minor <- hervi_genera_major %>% + group_by(sample, date_alias, city) %>% + summarize(p_reads_hervi_major = sum(p_reads_hervi), .groups = "drop") %>% + mutate(name = "Other", taxid=NA, p_reads_hervi = 1-p_reads_hervi_major) %>% + select(name, taxid, sample, date_alias, city, p_reads_hervi) +hervi_genera_display <- bind_rows(hervi_genera_major, hervi_genera_minor) %>% + arrange(desc(p_reads_hervi)) %>% + mutate(name = factor(name, levels=c(hervi_genera_major_list, "Other"))) %>% + rename(p_reads = p_reads_hervi, classification=name) + +# Plot +g_hervi_genera <- g_comp_base + + geom_col(data=hervi_genera_display, position = "stack") + + scale_y_continuous(name="% Herviviricetes Reads", limits=c(0,1.01), + breaks = seq(0,1,0.2), + expand=c(0,0), labels = function(y) y*100) + + scale_fill_manual(values=palette_viral, name = "Viral genus") + + guides(fill=guide_legend(ncol=3)) +g_hervi_genera + +``` + +Finally, for *Revtraviricetes*, most samples were dominated by porcine type-C oncovirus, while one was dominated by an avian retrovirus. The last showed significant levels of two murine viruses plus HIV. I'm suspicious of many of these. + +```{r} +#| label: revtraviricetes +#| fig-height: 6 +#| fig-width: 9 + +# Get samples +revtra_taxid <- 2732514 +revtra_threshold <- 0.05 +revtra_samples <- viral_classes %>% filter(taxid == revtra_taxid) %>% filter(p_reads_viral > 0.05) %>% pull(sample) %>% unique + +# Get all taxa in class +revtra_desc_taxids_old <- revtra_taxid +revtra_desc_taxids_new <- unique(c(revtra_desc_taxids_old, viral_taxa %>% filter(parent_taxid %in% revtra_desc_taxids_old) %>% pull(taxid))) +while (length(revtra_desc_taxids_new) > length(revtra_desc_taxids_old)){ + revtra_desc_taxids_old <- revtra_desc_taxids_new + revtra_desc_taxids_new <- unique(c(revtra_desc_taxids_old, viral_taxa %>% filter(parent_taxid %in% revtra_desc_taxids_old) %>% pull(taxid))) +} + +# Get read counts +revtra_counts <- kraken_reports_viral_cleaned %>% + filter(taxid %in% revtra_desc_taxids_new, + sample %in% revtra_samples) %>% + mutate(p_reads_revtra = n_reads_clade/n_reads_clade[1]) + +# Get genus composition +revtra_genera <- revtra_counts %>% filter(rank == "S") +revtra_genera_major_tab <- revtra_genera %>% + group_by(name, taxid) %>% + summarize(p_reads_revtra_max = max(p_reads_revtra), .groups="drop") %>% + filter(p_reads_revtra_max >= revtra_threshold) +revtra_genera_major_list <- revtra_genera_major_tab %>% pull(name) +revtra_genera_major <- revtra_genera %>% + filter(name %in% revtra_genera_major_list) %>% + select(name, taxid, sample, date_alias, city, p_reads_revtra) +revtra_genera_minor <- revtra_genera_major %>% + group_by(sample, date_alias, city) %>% + summarize(p_reads_revtra_major = sum(p_reads_revtra), .groups = "drop") %>% + mutate(name = "Other", taxid=NA, p_reads_revtra = 1-p_reads_revtra_major) %>% + select(name, taxid, sample, date_alias, city, p_reads_revtra) +revtra_genera_display <- bind_rows(revtra_genera_major, revtra_genera_minor) %>% + arrange(desc(p_reads_revtra)) %>% + mutate(name = factor(name, levels=c(revtra_genera_major_list, "Other"))) %>% + rename(p_reads = p_reads_revtra, classification=name) + +# Plot +g_revtra_genera <- g_comp_base + + geom_col(data=revtra_genera_display, position = "stack") + + scale_y_continuous(name="% revtraviricetes Reads", limits=c(0,1.01), + breaks = seq(0,1,0.2), + expand=c(0,0), labels = function(y) y*100) + + scale_fill_manual(values=palette_viral, name = "Viral genus") + + guides(fill=guide_legend(ncol=3)) +g_revtra_genera + +``` + +# Human-infecting virus reads: validation + +Next, I investigated the human-infecting virus read content of these unenriched samples. Using the same workflow I used for Prussin et al, I identified 24,278 read pairs as putatively human viral: 0.002% of reads surviving to that stage in the pipeline. + +```{r} +#| label: hv-read-counts + +# Import HV read data +hv_reads_filtered_paths <- file.path(data_dirs, "hv_hits_putative_filtered.tsv.gz") +hv_reads_filtered <- lapply(hv_reads_filtered_paths, read_tsv, + show_col_types = FALSE) %>% + bind_rows() %>% + inner_join(libraries, by="sample") + +# Count reads +n_hv_filtered <- hv_reads_filtered %>% + group_by(sample, date, date_alias, city, seq_id) %>% count %>% + group_by(sample, date, date_alias, city) %>% count %>% + inner_join(basic_stats %>% filter(stage == "ribo_initial") %>% + select(sample, n_read_pairs), by="sample") %>% + rename(n_putative = n, n_total = n_read_pairs) %>% + mutate(p_reads = n_putative/n_total, pc_reads = p_reads * 100) +n_hv_filtered_summ <- n_hv_filtered %>% ungroup %>% + summarize(n_putative = sum(n_putative), n_total = sum(n_total), + .groups="drop") %>% + mutate(p_reads = n_putative/n_total, pc_reads = p_reads*100) +``` + +```{r} +#| label: plot-hv-scores +#| warning: false +#| fig-width: 8 + +# Collapse multi-entry sequences +rmax <- purrr::partial(max, na.rm = TRUE) +collapse <- function(x) ifelse(all(x == x[1]), x[1], paste(x, collapse="/")) +mrg <- hv_reads_filtered %>% + mutate(adj_score_max = pmax(adj_score_fwd, adj_score_rev, na.rm = TRUE)) %>% + arrange(desc(adj_score_max)) %>% + group_by(seq_id) %>% + summarize(sample = collapse(sample), + genome_id = collapse(genome_id), + taxid_best = taxid[1], + taxid = collapse(as.character(taxid)), + best_alignment_score_fwd = rmax(best_alignment_score_fwd), + best_alignment_score_rev = rmax(best_alignment_score_rev), + query_len_fwd = rmax(query_len_fwd), + query_len_rev = rmax(query_len_rev), + query_seq_fwd = query_seq_fwd[!is.na(query_seq_fwd)][1], + query_seq_rev = query_seq_rev[!is.na(query_seq_rev)][1], + classified = rmax(classified), + assigned_name = collapse(assigned_name), + assigned_taxid_best = assigned_taxid[1], + assigned_taxid = collapse(as.character(assigned_taxid)), + assigned_hv = rmax(assigned_hv), + hit_hv = rmax(hit_hv), + encoded_hits = collapse(encoded_hits), + adj_score_fwd = rmax(adj_score_fwd), + adj_score_rev = rmax(adj_score_rev) + ) %>% + inner_join(libraries, by="sample") %>% + mutate(kraken_label = ifelse(assigned_hv, "Kraken2 HV\nassignment", + ifelse(hit_hv, "Kraken2 HV\nhit", + "No hit or\nassignment"))) %>% + mutate(adj_score_max = pmax(adj_score_fwd, adj_score_rev), + highscore = adj_score_max >= 20) + +g_hist_0 <- ggplot(mrg, aes(x=adj_score_max)) + + geom_histogram(binwidth=5,boundary=0) + + geom_vline(xintercept=20, linetype="dashed", color="red") + + facet_wrap(~kraken_label, labeller = labeller(kit = label_wrap_gen(20)), scales = "free_y") + + scale_x_continuous(name = "Maximum adjusted alignment score") + + scale_y_continuous(name="# Read pairs") + + theme_base +g_hist_0 +``` + +As previously described, I ran BLASTN on these reads via a dedicated EC2 instance, using the same parameters I've used for previous datasets. + +```{r} +#| label: make-blast-fasta +mrg_fasta <- mrg %>% + mutate(seq_head = paste0(">", seq_id)) %>% + ungroup %>% + select(header1=seq_head, seq1=query_seq_fwd, + header2=seq_head, seq2=query_seq_rev) %>% + mutate(header1=paste0(header1, "_1"), header2=paste0(header2, "_2")) +mrg_fasta_sep <- bind_rows(select(mrg_fasta, header=header1, seq=seq1), + select(mrg_fasta, header=header2, seq=seq2)) %>% + filter(!is.na(seq)) +mrg_fasta_out <- do.call(paste, c(mrg_fasta_sep, sep="\n")) %>% + paste(collapse="\n") +blast_dir <- file.path(data_dir_base, "blast") +dir.create(blast_dir, showWarnings = FALSE) +write(mrg_fasta_out, file.path(blast_dir, "putative-viral.fasta")) +``` + +```{r} +#| label: process-blast-data +#| warning: false + +# Import BLAST results +# blast_results_path <- file.path(data_dir_base, "blast/putative-viral.blast.gz") +# blast_cols <- c("qseqid", "sseqid", "sgi", "staxid", "qlen", "evalue", "bitscore", "qcovs", "length", "pident", "mismatch", "gapopen", "sstrand", "qstart", "qend", "sstart", "send") +# blast_results <- read_tsv(blast_results_path, show_col_types = FALSE, +# col_names = blast_cols, col_types = cols(.default="c")) +blast_results_path <- file.path(data_dir_base, "blast/putative-viral-best.blast.gz") +blast_results <- read_tsv(blast_results_path, show_col_types = FALSE) + +# Filter for best hit for each query/subject combination +blast_results_best <- blast_results %>% group_by(qseqid, staxid) %>% + filter(bitscore == max(bitscore)) %>% + filter(length == max(length)) %>% filter(row_number() == 1) +write_tsv(blast_results_best, file.path(data_dir_base, "blast/putative-viral-best.blast.gz")) + +# Rank hits for each query and filter for high-ranking hits +blast_results_ranked <- blast_results_best %>% + group_by(qseqid) %>% mutate(rank = dense_rank(desc(bitscore))) +blast_results_highrank <- blast_results_ranked %>% filter(rank <= 5) %>% + mutate(read_pair = str_split(qseqid, "_") %>% sapply(nth, n=-1), + seq_id = str_split(qseqid, "_") %>% sapply(nth, n=1)) %>% + mutate(bitscore = as.numeric(bitscore)) + +# Summarize by read pair and taxid +blast_results_paired <- blast_results_highrank %>% + group_by(seq_id, staxid) %>% + summarize(bitscore_max = max(bitscore), bitscore_min = min(bitscore), + n_reads = n(), .groups = "drop") + +# Add viral status +blast_results_viral <- mutate(blast_results_paired, viral = staxid %in% viral_taxa$taxid) %>% + mutate(viral_full = viral & n_reads == 2) + +# Compare to Kraken & Bowtie assignments +match_taxid <- function(taxid_1, taxid_2){ + p1 <- mapply(grepl, paste0("/", taxid_1, "$"), taxid_2) + p2 <- mapply(grepl, paste0("^", taxid_1, "/"), taxid_2) + p3 <- mapply(grepl, paste0("^", taxid_1, "$"), taxid_2) + out <- setNames(p1|p2|p3, NULL) + return(out) +} +mrg_assign <- mrg %>% select(sample, seq_id, taxid, assigned_taxid, adj_score_max) +blast_results_assign <- inner_join(blast_results_viral, mrg_assign, by="seq_id") %>% + mutate(taxid_match_bowtie = match_taxid(staxid, taxid), + taxid_match_kraken = match_taxid(staxid, assigned_taxid), + taxid_match_any = taxid_match_bowtie | taxid_match_kraken) +blast_results_out <- blast_results_assign %>% + group_by(seq_id) %>% + summarize(viral_status = ifelse(any(viral_full), 2, + ifelse(any(taxid_match_any), 2, + ifelse(any(viral), 1, 0))), + .groups = "drop") +``` + +```{r} +#| label: plot-blast-results +#| fig-height: 6 +#| warning: false + +# Merge BLAST results with unenriched read data +mrg_blast <- full_join(mrg, blast_results_out, by="seq_id") %>% + mutate(viral_status = replace_na(viral_status, 0), + viral_status_out = ifelse(viral_status == 0, FALSE, TRUE)) + +# Plot RNA +g_hist_1 <- ggplot(mrg_blast, aes(x=adj_score_max, fill=viral_status_out)) + + geom_histogram(binwidth=5,boundary=0) + + geom_vline(xintercept=20, linetype="dashed", color="red") + + facet_wrap(~kraken_label, labeller = labeller(kit = label_wrap_gen(20)), scales = "free_y") + + scale_x_continuous(name = "Maximum adjusted alignment score") + + scale_y_continuous(name="# Read pairs") + + scale_fill_brewer(palette = "Set1", name = "Viral status") + + theme_base +g_hist_1 +``` + +For a disjunctive score threshold of 20, the workflow achieves a measured F1 score of 98.0%. + +```{r} +#| label: plot-f1 +test_sens_spec <- function(tab, score_threshold){ + tab_retained <- tab %>% + mutate(retain_score = (adj_score_fwd > score_threshold | adj_score_rev > score_threshold), + retain = assigned_hv | hit_hv | retain_score) %>% + group_by(viral_status_out, retain) %>% count + pos_tru <- tab_retained %>% filter(viral_status_out == "TRUE", retain) %>% pull(n) %>% sum + pos_fls <- tab_retained %>% filter(viral_status_out != "TRUE", retain) %>% pull(n) %>% sum + neg_tru <- tab_retained %>% filter(viral_status_out != "TRUE", !retain) %>% pull(n) %>% sum + neg_fls <- tab_retained %>% filter(viral_status_out == "TRUE", !retain) %>% pull(n) %>% sum + sensitivity <- pos_tru / (pos_tru + neg_fls) + specificity <- neg_tru / (neg_tru + pos_fls) + precision <- pos_tru / (pos_tru + pos_fls) + f1 <- 2 * precision * sensitivity / (precision + sensitivity) + out <- tibble(threshold=score_threshold, sensitivity=sensitivity, + specificity=specificity, precision=precision, f1=f1) + return(out) +} +range_f1 <- function(intab, inrange=15:45){ + tss <- purrr::partial(test_sens_spec, tab=intab) + stats <- lapply(inrange, tss) %>% bind_rows %>% + pivot_longer(!threshold, names_to="metric", values_to="value") + return(stats) +} +stats_0 <- range_f1(mrg_blast) +g_stats_0 <- ggplot(stats_0, aes(x=threshold, y=value, color=metric)) + + geom_vline(xintercept=20, color = "red", linetype = "dashed") + + geom_line() + + scale_y_continuous(name = "Value", limits=c(0,1), breaks = seq(0,1,0.2), expand = c(0,0)) + + scale_x_continuous(name = "Adjusted Score Threshold", expand = c(0,0)) + + scale_color_brewer(palette="Dark2") + + theme_base +g_stats_0 +stats_0 %>% filter(threshold == 20) %>% + select(Threshold=threshold, Metric=metric, Value=value) +``` + +Looking into the composition of different read groups, the notable observation for me is the high prevalence of Pigeon torque teno virus among high-scoring false positives, with 77 such read pairs. BLAST maps these not to viruses but to their most common hosts, i.e. assorted species of pigeon. That said, the number of false positive PTTV reads is substantially exceeded by the number of true-positive PTTV reads (1883), which do map to appropriate viruses according to BLAST, so the presence of a comparatively small number of false positives seems unlikely to cause too much distortion. + +```{r} +#| label: fp +#| fig-height: 5 + +major_threshold <- 0.04 + +# Add missing viral taxa +viral_taxa$name[viral_taxa$taxid == 211787] <- "Human papillomavirus type 92" +viral_taxa$name[viral_taxa$taxid == 509154] <- "Porcine endogenous retrovirus C" +viral_taxa$name[viral_taxa$taxid == 493803] <- "Merkel cell polyomavirus" +viral_taxa$name[viral_taxa$taxid == 427343] <- "Human papillomavirus 107" +viral_taxa$name[viral_taxa$taxid == 194958] <- "Porcine endogenous retrovirus A" +viral_taxa$name[viral_taxa$taxid == 340907] <- "Papiine alphaherpesvirus 2" +viral_taxa$name[viral_taxa$taxid == 194959] <- "Porcine endogenous retrovirus B" + + +# Prepare data +fp <- mrg_blast %>% + group_by(viral_status_out, highscore, taxid_best) %>% count %>% + group_by(viral_status_out, highscore) %>% mutate(p=n/sum(n)) %>% + rename(taxid = taxid_best) %>% + left_join(viral_taxa, by="taxid") %>% + arrange(desc(p)) +fp_major_tab <- fp %>% filter(p > major_threshold) %>% arrange(desc(p)) +fp_major_list <- fp_major_tab %>% pull(name) %>% sort %>% unique %>% c(., "Other") +fp_major <- fp %>% mutate(major = p > major_threshold) %>% + mutate(name_display = ifelse(major, name, "Other")) %>% + group_by(viral_status_out, highscore, name_display) %>% + summarize(n=sum(n), p=sum(p), .groups = "drop") %>% + mutate(name_display = factor(name_display, levels = fp_major_list), + score_display = ifelse(highscore, "S >= 20", "S < 20"), + status_display = ifelse(viral_status_out, "True positive", "False positive")) + +# Plot +g_fp <- ggplot(fp_major, aes(x=score_display, y=p, fill=name_display)) + + geom_col(position="stack") + + scale_x_discrete(name = "True positive?") + + scale_y_continuous(name = "% reads", limits = c(0,1.01), + breaks = seq(0,1,0.2), expand = c(0,0)) + + scale_fill_manual(values = palette_viral, name = "Viral\ntaxon") + + facet_grid(.~status_display) + + guides(fill=guide_legend(ncol=3)) + + theme_kit +g_fp + +``` + +```{r} +#| label: ptt-blast-hits + +# Configure +ref_taxid_ptt <- 2233536 +p_threshold <- 0.3 + +# Get taxon names +tax_names_path <- file.path(data_dir_base, "taxid-names.tsv.gz") +tax_names <- read_tsv(tax_names_path, show_col_types = FALSE) + +# Add missing names +tax_names_new <- tribble(~staxid, ~name, + 3050295, "Cytomegalovirus humanbeta5", + 459231, "FLAG-tagging vector pFLAG97-TSR", + 3082113, "Rangifer tarandus platyrhynchus", + 3119969, "Bubalus kerabau", + 177155, "Streptopelia turtur", + 187126, "Nesoenas mayeri" + ) +tax_names <- tax_names_new %>% filter(! staxid %in% tax_names$staxid) %>% + bind_rows(tax_names) %>% arrange(staxid) +ref_name_ptt <- tax_names %>% filter(staxid == ref_taxid_ptt) %>% pull(name) + +# Get major matches +mrg_staxid <- mrg_blast %>% filter(taxid_best == ref_taxid_ptt) %>% + group_by(highscore, viral_status_out) %>% mutate(n_seq = n()) +fp_staxid <- mrg_staxid %>% + left_join(blast_results_paired, by="seq_id") %>% + mutate(staxid = as.integer(staxid)) %>% + left_join(tax_names, by="staxid") %>% rename(sname=name) %>% + left_join(tax_names %>% rename(taxid_best=staxid), by="taxid_best") +fp_staxid_count <- fp_staxid %>% + group_by(viral_status_out, highscore, + taxid_best, name, staxid, sname, n_seq) %>% + count %>% + group_by(viral_status_out, highscore, taxid_best, name) %>% + mutate(p=n/n_seq) +fp_staxid_count_major <- fp_staxid_count %>% + filter(n>1, p>p_threshold, !is.na(staxid)) %>% + mutate(score_display = ifelse(highscore, "S >= 20", "S < 20"), + status_display = ifelse(viral_status_out, + "True positive", "False positive")) + +# Plot +g <- ggplot(fp_staxid_count_major, aes(x=p, y=sname)) + + geom_col() + + facet_grid(status_display~score_display, scales="free", + labeller = label_wrap_gen(multi_line = FALSE)) + + scale_x_continuous(name="% mapped reads", limits=c(0,1), breaks=seq(0,1,0.2), + expand=c(0,0)) + + labs(title=paste0(ref_name_ptt, " (taxid ", ref_taxid_ptt, ")")) + + theme_base + theme( + axis.title.y = element_blank(), + plot.title = element_text(size=rel(1.4), hjust=0, face="plain")) +g +``` + +# Human-infecting viruses: overall relative abundance + +```{r} +#| label: count-hv-reads + +# Get raw read counts +read_counts_raw <- basic_stats_raw %>% + select(sample, date_alias, city, n_reads_raw = n_read_pairs) + +# Get HV read counts +mrg_hv <- mrg %>% mutate(hv_status = assigned_hv | hit_hv | highscore) %>% + rename(taxid_all = taxid, taxid = taxid_best) +read_counts_hv <- mrg_hv %>% filter(hv_status) %>% group_by(sample) %>% + count(name="n_reads_hv") +read_counts <- read_counts_raw %>% left_join(read_counts_hv, by="sample") %>% + mutate(n_reads_hv = replace_na(n_reads_hv, 0)) + +# Aggregate +read_counts_city <- read_counts %>% group_by(city) %>% + summarize(n_reads_raw = sum(n_reads_raw), + n_reads_hv = sum(n_reads_hv), .groups="drop") %>% + mutate(sample= "All samples", date_alias = "All dates") +read_counts_total <- read_counts_city %>% group_by(sample, date_alias) %>% + summarize(n_reads_raw = sum(n_reads_raw), + n_reads_hv = sum(n_reads_hv), .groups="drop") %>% + mutate(city = "All cities") +read_counts_agg <- read_counts_city %>% arrange(city) %>% + bind_rows(read_counts_total) %>% + mutate(p_reads_hv = n_reads_hv/n_reads_raw, + city = fct_inorder(city)) +``` + +Applying a disjunctive cutoff at S=20 identifies 23,191 read pairs as human-viral. This gives an overall relative HV abundance of $1.73 \times 10^{-5}$. + +```{r} +#| label: plot-hv-ra +#| warning: false +# Visualize +g_phv_agg <- ggplot(read_counts_agg, aes(x=city, color=city)) + + geom_point(aes(y=p_reads_hv)) + + scale_y_log10("Relative abundance of human virus reads") + + scale_x_discrete(name="Collection Date") + + #facet_grid(.~sample_type, scales = "free", space = "free_x") + + scale_color_city() + theme_rotate +g_phv_agg +``` + +This is lower than for DNA reads from other air-sampling datasets I've analyzed, but not drastically so: + +```{r} +#| label: ra-hv-past + +# Collate past RA values +ra_past <- tribble(~dataset, ~ra, ~na_type, ~panel_enriched, + "Brumfield", 5e-5, "RNA", FALSE, + "Brumfield", 3.66e-7, "DNA", FALSE, + "Spurbeck", 5.44e-6, "RNA", FALSE, + "Yang", 3.62e-4, "RNA", FALSE, + "Rothman (unenriched)", 1.87e-5, "RNA", FALSE, + "Rothman (panel-enriched)", 3.3e-5, "RNA", TRUE, + "Crits-Christoph (unenriched)", 1.37e-5, "RNA", FALSE, + "Crits-Christoph (panel-enriched)", 1.26e-2, "RNA", TRUE, + "Prussin (non-control)", 1.63e-5, "RNA", FALSE, + "Prussin (non-control)", 4.16e-5, "DNA", FALSE, + "Rosario (non-control)", 1.21e-5, "RNA", FALSE, + "Rosario (non-control)", 1.50e-4, "DNA", FALSE +) + +# Collate new RA values +ra_new <- tribble(~dataset, ~ra, ~na_type, ~panel_enriched, + "Leung", 1.73e-5, "DNA", FALSE) + + +# Plot +scale_color_na <- purrr::partial(scale_color_brewer, palette="Set1", + name="Nucleic acid type") +ra_comp <- bind_rows(ra_past, ra_new) %>% mutate(dataset = fct_inorder(dataset)) +g_ra_comp <- ggplot(ra_comp, aes(y=dataset, x=ra, color=na_type)) + + geom_point() + + scale_color_na() + + scale_x_log10(name="Relative abundance of human virus reads") + + theme_base + theme(axis.title.y = element_blank()) +g_ra_comp +``` + +# Human-infecting viruses: taxonomy and composition + +At the family level, most samples across all cities are dominated by *Papillomaviridae*, *Herpesviridae*, *Anelloviridae*, *Polyomaviridae, and to a lesser extent* *Poxviridae*: + +```{r} +#| label: raise-hv-taxa + +# Get viral taxon names for putative HV reads +viral_taxa$name[viral_taxa$taxid == 249588] <- "Mamastrovirus" +viral_taxa$name[viral_taxa$taxid == 194960] <- "Kobuvirus" +viral_taxa$name[viral_taxa$taxid == 688449] <- "Salivirus" +viral_taxa$name[viral_taxa$taxid == 585893] <- "Picobirnaviridae" +viral_taxa$name[viral_taxa$taxid == 333922] <- "Betapapillomavirus" +viral_taxa$name[viral_taxa$taxid == 334207] <- "Betapapillomavirus 3" +viral_taxa$name[viral_taxa$taxid == 369960] <- "Porcine type-C oncovirus" +viral_taxa$name[viral_taxa$taxid == 333924] <- "Betapapillomavirus 2" +viral_taxa$name[viral_taxa$taxid == 687329] <- "Anelloviridae" +viral_taxa$name[viral_taxa$taxid == 325455] <- "Gammapapillomavirus" +viral_taxa$name[viral_taxa$taxid == 333750] <- "Alphapapillomavirus" +viral_taxa$name[viral_taxa$taxid == 694002] <- "Betacoronavirus" +viral_taxa$name[viral_taxa$taxid == 334202] <- "Mupapillomavirus" +viral_taxa$name[viral_taxa$taxid == 197911] <- "Alphainfluenzavirus" +viral_taxa$name[viral_taxa$taxid == 186938] <- "Respirovirus" +viral_taxa$name[viral_taxa$taxid == 333926] <- "Gammapapillomavirus 1" +viral_taxa$name[viral_taxa$taxid == 337051] <- "Betapapillomavirus 1" +viral_taxa$name[viral_taxa$taxid == 337043] <- "Alphapapillomavirus 4" +viral_taxa$name[viral_taxa$taxid == 694003] <- "Betacoronavirus 1" +viral_taxa$name[viral_taxa$taxid == 334204] <- "Mupapillomavirus 2" +viral_taxa$name[viral_taxa$taxid == 334208] <- "Betapapillomavirus 4" +viral_taxa$name[viral_taxa$taxid == 333928] <- "Gammapapillomavirus 2" +viral_taxa$name[viral_taxa$taxid == 337039] <- "Alphapapillomavirus 2" +viral_taxa$name[viral_taxa$taxid == 333929] <- "Gammapapillomavirus 3" +viral_taxa$name[viral_taxa$taxid == 337042] <- "Alphapapillomavirus 7" +viral_taxa$name[viral_taxa$taxid == 334203] <- "Mupapillomavirus 1" +viral_taxa$name[viral_taxa$taxid == 333757] <- "Alphapapillomavirus 8" +viral_taxa$name[viral_taxa$taxid == 337050] <- "Alphapapillomavirus 6" +viral_taxa$name[viral_taxa$taxid == 333767] <- "Alphapapillomavirus 3" +viral_taxa$name[viral_taxa$taxid == 333754] <- "Alphapapillomavirus 10" +viral_taxa$name[viral_taxa$taxid == 687363] <- "Torque teno virus 24" +viral_taxa$name[viral_taxa$taxid == 687342] <- "Torque teno virus 3" +viral_taxa$name[viral_taxa$taxid == 687359] <- "Torque teno virus 20" +viral_taxa$name[viral_taxa$taxid == 194441] <- "Primate T-lymphotropic virus 2" +viral_taxa$name[viral_taxa$taxid == 334209] <- "Betapapillomavirus 5" + + +mrg_hv_named <- mrg_hv %>% left_join(viral_taxa, by="taxid") + +# Discover viral species & genera for HV reads +raise_rank <- function(read_db, taxid_db, out_rank = "species", verbose = FALSE){ + # Get higher ranks than search rank + ranks <- c("subspecies", "species", "subgenus", "genus", "subfamily", "family", "suborder", "order", "class", "subphylum", "phylum", "kingdom", "superkingdom") + rank_match <- which.max(ranks == out_rank) + high_ranks <- ranks[rank_match:length(ranks)] + # Merge read DB and taxid DB + reads <- read_db %>% select(-parent_taxid, -rank, -name) %>% + left_join(taxid_db, by="taxid") + # Extract sequences that are already at appropriate rank + reads_rank <- filter(reads, rank == out_rank) + # Drop sequences at a higher rank and return unclassified sequences + reads_norank <- reads %>% filter(rank != out_rank, !rank %in% high_ranks, !is.na(taxid)) + while(nrow(reads_norank) > 0){ # As long as there are unclassified sequences... + # Promote read taxids and re-merge with taxid DB, then re-classify and filter + reads_remaining <- reads_norank %>% mutate(taxid = parent_taxid) %>% + select(-parent_taxid, -rank, -name) %>% + left_join(taxid_db, by="taxid") + reads_rank <- reads_remaining %>% filter(rank == out_rank) %>% + bind_rows(reads_rank) + reads_norank <- reads_remaining %>% + filter(rank != out_rank, !rank %in% high_ranks, !is.na(taxid)) + } + # Finally, extract and append reads that were excluded during the process + reads_dropped <- reads %>% filter(!seq_id %in% reads_rank$seq_id) + reads_out <- reads_rank %>% bind_rows(reads_dropped) %>% + select(-parent_taxid, -rank, -name) %>% + left_join(taxid_db, by="taxid") + return(reads_out) +} +hv_reads_species <- raise_rank(mrg_hv_named, viral_taxa, "species") +hv_reads_genus <- raise_rank(mrg_hv_named, viral_taxa, "genus") +hv_reads_family <- raise_rank(mrg_hv_named, viral_taxa, "family") +``` + +```{r} +#| label: hv-family +#| fig-height: 6 +#| fig-width: 9 + +threshold_major_family <- 0.05 + +# Count reads for each human-viral family +hv_family_counts <- hv_reads_family %>% + group_by(sample, date_alias, city, name, taxid) %>% + count(name = "n_reads_hv") %>% + group_by(sample, date_alias, city) %>% + mutate(p_reads_hv = n_reads_hv/sum(n_reads_hv)) + +# Identify high-ranking families and group others +hv_family_major_tab <- hv_family_counts %>% group_by(name) %>% + filter(p_reads_hv == max(p_reads_hv)) %>% filter(row_number() == 1) %>% + arrange(desc(p_reads_hv)) %>% filter(p_reads_hv > threshold_major_family) +hv_family_counts_major <- hv_family_counts %>% + mutate(name_display = ifelse(name %in% hv_family_major_tab$name, name, "Other")) %>% + group_by(sample, date_alias, city, name_display) %>% + summarize(n_reads_hv = sum(n_reads_hv), p_reads_hv = sum(p_reads_hv), + .groups="drop") %>% + mutate(name_display = factor(name_display, + levels = c(hv_family_major_tab$name, "Other"))) +hv_family_counts_display <- hv_family_counts_major %>% + rename(p_reads = p_reads_hv, classification = name_display) + +# Plot +g_hv_family <- g_comp_base + + geom_col(data=hv_family_counts_display, position = "stack") + + scale_y_continuous(name="% HV Reads", limits=c(0,1.01), + breaks = seq(0,1,0.2), + expand=c(0,0), labels = function(y) y*100) + + scale_fill_manual(values=palette_viral, name = "Viral family") + + labs(title="Family composition of human-viral reads") + + guides(fill=guide_legend(ncol=4)) + + theme(plot.title = element_text(size=rel(1.4), hjust=0, face="plain")) +g_hv_family +``` + +In investigating individual viral families, to avoid distortions from a few rare reads, I restricted myself to samples where that family made up at least 10% of human-viral reads. + +As usual, *Papillomaviridae* reads are divided among many different viral species. In this case, Betapapillomavirus 1 and 2 are the most prevalent across samples, but many other alpha-, beta-, gamma-, and mupapillomaviruses are highly prevalent in at least some samples. + +```{r} +#| label: hv-species-papilloma +#| fig-height: 6 +#| fig-width: 9 + + +threshold_major_species <- 0.4 +taxid_papilloma <- 151340 + +# Get set of Papillomaviridae reads +papilloma_samples <- hv_family_counts %>% filter(taxid == taxid_papilloma) %>% + filter(p_reads_hv >= 0.1) %>% + pull(sample) +papilloma_ids <- hv_reads_family %>% + filter(taxid == taxid_papilloma, sample %in% papilloma_samples) %>% + pull(seq_id) + +# Count reads for each Papillomaviridae species +papilloma_species_counts <- hv_reads_species %>% + filter(seq_id %in% papilloma_ids) %>% + group_by(sample, date_alias, city, name, taxid) %>% + count(name = "n_reads_hv") %>% + group_by(sample, date_alias, city) %>% + mutate(p_reads_papilloma = n_reads_hv/sum(n_reads_hv)) + +# Identify high-ranking families and group others +papilloma_species_major_tab <- papilloma_species_counts %>% group_by(name) %>% + filter(p_reads_papilloma == max(p_reads_papilloma)) %>% + filter(row_number() == 1) %>% + arrange(desc(p_reads_papilloma)) %>% + filter(p_reads_papilloma > threshold_major_species) +papilloma_species_counts_major <- papilloma_species_counts %>% + mutate(name_display = ifelse(name %in% papilloma_species_major_tab$name, + name, "Other")) %>% + group_by(sample, date_alias, city, name_display) %>% + summarize(n_reads_papilloma = sum(n_reads_hv), + p_reads_papilloma = sum(p_reads_papilloma), + .groups="drop") %>% + mutate(name_display = factor(name_display, + levels = c(papilloma_species_major_tab$name, "Other"))) +papilloma_species_counts_display <- papilloma_species_counts_major %>% + rename(p_reads = p_reads_papilloma, classification = name_display) + +# Plot +g_papilloma_species <- g_comp_base + + geom_col(data=papilloma_species_counts_display, position = "stack") + + scale_y_continuous(name="% Papillomaviridae Reads", limits=c(0,1.01), + breaks = seq(0,1,0.2), + expand=c(0,0), labels = function(y) y*100) + + scale_fill_manual(values=palette_viral, name = "Viral species") + + labs(title="Species composition of Papillomaviridae reads") + + guides(fill=guide_legend(ncol=3)) + + theme(plot.title = element_text(size=rel(1.4), hjust=0, face="plain")) + +g_papilloma_species + +# Get most prominent species for text +papilloma_species_collate <- papilloma_species_counts %>% group_by(name, taxid) %>% + summarize(n_reads_tot = sum(n_reads_hv), p_reads_mean = mean(p_reads_papilloma), .groups="drop") %>% + arrange(desc(n_reads_tot)) +``` + +In terms of total reads across samples, herpesviruses are dominated by Epstein-Barr virus (Human gammaherpesvirus 4), HSV-1 (Human alphaherpesvirus 1), and human cytomegalovirus (Human betaherpesvirus 5). However, numerous other herpesviruses are also present. + +```{r} +#| label: hv-species-herpes +#| fig-height: 6 +#| fig-width: 9 + +threshold_major_species <- 0.4 +taxid_herpes <- viral_taxa %>% filter(name == "Herpesviridae") %>% pull(taxid) + +# Get set of herpesviridae reads +herpes_samples <- hv_family_counts %>% filter(taxid == taxid_herpes) %>% + filter(p_reads_hv >= 0.1) %>% + pull(sample) +herpes_ids <- hv_reads_family %>% + filter(taxid == taxid_herpes, sample %in% herpes_samples) %>% + pull(seq_id) + +# Count reads for each herpesviridae species +herpes_species_counts <- hv_reads_species %>% + filter(seq_id %in% herpes_ids) %>% + group_by(sample, date_alias, city, name, taxid) %>% + count(name = "n_reads_hv") %>% + group_by(sample, date_alias, city) %>% + mutate(p_reads_herpes = n_reads_hv/sum(n_reads_hv)) + +# Identify high-ranking families and group others +herpes_species_major_tab <- herpes_species_counts %>% group_by(name) %>% + filter(p_reads_herpes == max(p_reads_herpes)) %>% + filter(row_number() == 1) %>% + arrange(desc(p_reads_herpes)) %>% + filter(p_reads_herpes > threshold_major_species) +herpes_species_counts_major <- herpes_species_counts %>% + mutate(name_display = ifelse(name %in% herpes_species_major_tab$name, + name, "Other")) %>% + group_by(sample, date_alias, city, name_display) %>% + summarize(n_reads_herpes = sum(n_reads_hv), + p_reads_herpes = sum(p_reads_herpes), + .groups="drop") %>% + mutate(name_display = factor(name_display, + levels = c(herpes_species_major_tab$name, "Other"))) +herpes_species_counts_display <- herpes_species_counts_major %>% + rename(p_reads = p_reads_herpes, classification = name_display) + +# Plot +g_herpes_species <- g_comp_base + + geom_col(data=herpes_species_counts_display, position = "stack") + + scale_y_continuous(name="% herpesviridae Reads", limits=c(0,1.01), + breaks = seq(0,1,0.2), + expand=c(0,0), labels = function(y) y*100) + + scale_fill_manual(values=palette_viral, name = "Viral species") + + labs(title="Species composition of Herpesviridae reads") + + guides(fill=guide_legend(ncol=3)) + + theme(plot.title = element_text(size=rel(1.4), hjust=0, face="plain")) + +g_herpes_species + +# Get most prominent species for text +herpes_species_collate <- herpes_species_counts %>% group_by(name, taxid) %>% + summarize(n_reads_tot = sum(n_reads_hv), p_reads_mean = mean(p_reads_herpes), .groups="drop") %>% + arrange(desc(n_reads_tot)) +``` + +In sharp contrast to the above, my pipeline classifies the great majority of anellovirus reads in all samples into a single species, torque teno virus. Looking online, it looks like there are a lot of "torque teno viruses" within *Anelloviridae* – for example, Wikipedia says that the genus *Alphatorquevirus* contains \>20 numbered torque teno viruses – so I'm not sure exactly which virus this refers to. + +```{r} +#| label: hv-species-anello +#| fig-height: 6 + +threshold_major_species <- 0.1 +taxid_anello <- viral_taxa %>% filter(name == "Anelloviridae") %>% pull(taxid) + +# Get set of anelloviridae reads +anello_samples <- hv_family_counts %>% filter(taxid == taxid_anello) %>% + filter(p_reads_hv >= 0.1) %>% + pull(sample) +anello_ids <- hv_reads_family %>% + filter(taxid == taxid_anello, sample %in% anello_samples) %>% + pull(seq_id) + +# Count reads for each anelloviridae species +anello_species_counts <- hv_reads_species %>% + filter(seq_id %in% anello_ids) %>% + group_by(sample, date_alias, city, name, taxid) %>% + count(name = "n_reads_hv") %>% + group_by(sample, date_alias, city) %>% + mutate(p_reads_anello = n_reads_hv/sum(n_reads_hv)) + +# Identify high-ranking families and group others +anello_species_major_tab <- anello_species_counts %>% group_by(name) %>% + filter(p_reads_anello == max(p_reads_anello)) %>% + filter(row_number() == 1) %>% + arrange(desc(p_reads_anello)) %>% + filter(p_reads_anello > threshold_major_species) +anello_species_counts_major <- anello_species_counts %>% + mutate(name_display = ifelse(name %in% anello_species_major_tab$name, + name, "Other")) %>% + group_by(sample, date_alias, city, name_display) %>% + summarize(n_reads_anello = sum(n_reads_hv), + p_reads_anello = sum(p_reads_anello), + .groups="drop") %>% + mutate(name_display = factor(name_display, + levels = c(anello_species_major_tab$name, "Other"))) +anello_species_counts_display <- anello_species_counts_major %>% + rename(p_reads = p_reads_anello, classification = name_display) + +# Plot +g_anello_species <- g_comp_base + + geom_col(data=anello_species_counts_display, position = "stack") + + scale_y_continuous(name="% Anelloviridae Reads", limits=c(0,1.01), + breaks = seq(0,1,0.2), + expand=c(0,0), labels = function(y) y*100) + + scale_fill_manual(values=palette_viral, name = "Viral species") + + labs(title="Species composition of Anelloviridae reads") + + guides(fill=guide_legend(ncol=4)) + + theme(plot.title = element_text(size=rel(1.4), hjust=0, face="plain")) + +g_anello_species + +# Get most prominent species for text +anello_species_collate <- anello_species_counts %>% group_by(name, taxid) %>% + summarize(n_reads_tot = sum(n_reads_hv), p_reads_mean = mean(p_reads_anello), .groups="drop") %>% + arrange(desc(n_reads_tot)) +``` + +Polyomaviruses are intermediate; most viruses are dominated by a single species, *Alphapolyomavirus quintihominis*, but several other viruses in the family are also present. + +```{r} +#| label: hv-species-polyoma +#| fig-height: 6 +#| fig-width: 9 + +threshold_major_species <- 0.1 +taxid_polyoma <- viral_taxa %>% filter(name == "Polyomaviridae") %>% pull(taxid) + +# Get set of polyomaviridae reads +# Get set of polyomaviridae reads +polyoma_samples <- hv_family_counts %>% filter(taxid == taxid_polyoma) %>% + filter(p_reads_hv >= 0.1) %>% + pull(sample) +polyoma_ids <- hv_reads_family %>% + filter(taxid == taxid_polyoma, sample %in% polyoma_samples) %>% + pull(seq_id) + +# Count reads for each polyomaviridae species +polyoma_species_counts <- hv_reads_species %>% + filter(seq_id %in% polyoma_ids) %>% + group_by(sample, date_alias, city, name, taxid) %>% + count(name = "n_reads_hv") %>% + group_by(sample, date_alias, city) %>% + mutate(p_reads_polyoma = n_reads_hv/sum(n_reads_hv)) + +# Identify high-ranking families and group others +polyoma_species_major_tab <- polyoma_species_counts %>% group_by(name) %>% + filter(p_reads_polyoma == max(p_reads_polyoma)) %>% + filter(row_number() == 1) %>% + arrange(desc(p_reads_polyoma)) %>% + filter(p_reads_polyoma > threshold_major_species) +polyoma_species_counts_major <- polyoma_species_counts %>% + mutate(name_display = ifelse(name %in% polyoma_species_major_tab$name, + name, "Other")) %>% + group_by(sample, date_alias, city, name_display) %>% + summarize(n_reads_polyoma = sum(n_reads_hv), + p_reads_polyoma = sum(p_reads_polyoma), + .groups="drop") %>% + mutate(name_display = factor(name_display, + levels = c(polyoma_species_major_tab$name, "Other"))) +polyoma_species_counts_display <- polyoma_species_counts_major %>% + rename(p_reads = p_reads_polyoma, classification = name_display) + +# Plot +g_polyoma_species <- g_comp_base + + geom_col(data=polyoma_species_counts_display, position = "stack") + + scale_y_continuous(name="% Polyomaviridae Reads", limits=c(0,1.01), + breaks = seq(0,1,0.2), + expand=c(0,0), labels = function(y) y*100) + + scale_fill_manual(values=palette_viral, name = "Viral species") + + labs(title="Species composition of Polyomaviridae reads") + + guides(fill=guide_legend(ncol=2)) + + theme(plot.title = element_text(size=rel(1.4), hjust=0, face="plain")) + +g_polyoma_species + +# Get most prominent species for text +polyoma_species_collate <- polyoma_species_counts %>% group_by(name, taxid) %>% + summarize(n_reads_tot = sum(n_reads_hv), p_reads_mean = mean(p_reads_polyoma), .groups="drop") %>% + arrange(desc(n_reads_tot)) +``` + +Finally, poxvirus reads in most samples are dominated by molluscum contagiosum virus (which I expect to be real), followed by Orf virus (which I expect to be fake). These expectations are borne out by BLAST alignments (below). + +```{r} +#| label: hv-species-pox +#| fig-height: 6 + +threshold_major_species <- 0.1 +taxid_pox <- viral_taxa %>% filter(name == "Poxviridae") %>% pull(taxid) + +# Get set of poxviridae reads +# Get set of poxviridae reads +pox_samples <- hv_family_counts %>% filter(taxid == taxid_pox) %>% + filter(p_reads_hv >= 0.1) %>% + pull(sample) +pox_ids <- hv_reads_family %>% + filter(taxid == taxid_pox, sample %in% pox_samples) %>% + pull(seq_id) + +# Count reads for each poxviridae species +pox_species_counts <- hv_reads_species %>% + filter(seq_id %in% pox_ids) %>% + group_by(sample, date_alias, city, name, taxid) %>% + count(name = "n_reads_hv") %>% + group_by(sample, date_alias, city) %>% + mutate(p_reads_pox = n_reads_hv/sum(n_reads_hv)) + +# Identify high-ranking families and group others +pox_species_major_tab <- pox_species_counts %>% group_by(name) %>% + filter(p_reads_pox == max(p_reads_pox)) %>% + filter(row_number() == 1) %>% + arrange(desc(p_reads_pox)) %>% + filter(p_reads_pox > threshold_major_species) +pox_species_counts_major <- pox_species_counts %>% + mutate(name_display = ifelse(name %in% pox_species_major_tab$name, + name, "Other")) %>% + group_by(sample, date_alias, city, name_display) %>% + summarize(n_reads_pox = sum(n_reads_hv), + p_reads_pox = sum(p_reads_pox), + .groups="drop") %>% + mutate(name_display = factor(name_display, + levels = c(pox_species_major_tab$name, "Other"))) +pox_species_counts_display <- pox_species_counts_major %>% + rename(p_reads = p_reads_pox, classification = name_display) + +# Plot +g_pox_species <- g_comp_base + + geom_col(data=pox_species_counts_display, position = "stack") + + scale_y_continuous(name="% Poxviridae Reads", limits=c(0,1.01), + breaks = seq(0,1,0.2), + expand=c(0,0), labels = function(y) y*100) + + scale_fill_manual(values=palette_viral, name = "Viral species") + + labs(title="Species composition of Poxviridae reads") + + guides(fill=guide_legend(ncol=4)) + + theme(plot.title = element_text(size=rel(1.4), hjust=0, face="plain")) + +g_pox_species + +# Get most prominent species for text +pox_species_collate <- pox_species_counts %>% group_by(name, taxid) %>% + summarize(n_reads_tot = sum(n_reads_hv), p_reads_mean = mean(p_reads_pox), .groups="drop") %>% + arrange(desc(n_reads_tot)) +``` + +```{r} +#| label: hv-blast-hits +#| fig-width: 6 + +# Configure +ref_taxids_hv <- c(10279, 10258) +ref_names_hv <- sapply(ref_taxids_hv, function(x) viral_taxa %>% filter(taxid == x) %>% pull(name) %>% first) +p_threshold <- 0.1 + +# Get taxon names +tax_names_path <- file.path(data_dir_base, "taxid-names.tsv.gz") +tax_names <- read_tsv(tax_names_path, show_col_types = FALSE) + +# Add missing names +tax_names_new <- tribble(~staxid, ~name, + 3050295, "Cytomegalovirus humanbeta5", + 459231, "FLAG-tagging vector pFLAG97-TSR", + 257877, "Macaca thibetana thibetana", + 256321, "Lentiviral transfer vector pHsCXW", + 419242, "Shuttle vector pLvCmvMYOCDHA", + 419243, "Shuttle vector pLvCmvLacZ", + 421868, "Cloning vector pLvCmvLacZ.Gfp", + 421869, "Cloning vector pLvCmvMyocardin.Gfp", + 426303, "Lentiviral vector pNL-GFP-RRE(SA)", + 436015, "Lentiviral transfer vector pFTMGW", + 454257, "Shuttle vector pLvCmvMYOCD2aHA", + 476184, "Shuttle vector pLV.mMyoD::ERT2.eGFP", + 476185, "Shuttle vector pLV.hMyoD.eGFP", + 591936, "Piliocolobus tephrosceles", + 627481, "Lentiviral transfer vector pFTM3GW", + 680261, "Self-inactivating lentivirus vector pLV.C-EF1a.cyt-bGal.dCpG", + 2952778, "Expression vector pLV[Exp]-EGFP:T2A:Puro-EF1A", + 3022699, "Vector PAS_122122", + 3025913, "Vector pSIN-WP-mPGK-GDNF", + 3105863, "Vector pLKO.1-ZsGreen1", + 3105864, "Vector pLKO.1-ZsGreen1 mouse Wfs1 shRNA", + 3108001, "Cloning vector pLVSIN-CMV_Neo_v4.0", + 3109234, "Vector pTwist+Kan+High", + 3117662, "Cloning vector pLV[Exp]-CBA>P301L", + 3117663, "Cloning vector pLV[Exp]-CBA>P301L:T2A:mRuby3", + 3117664, "Cloning vector pLV[Exp]-CBA>hMAPT[NM_005910.6](ns):T2A:mRuby3", + 3117665, "Cloning vector pLV[Exp]-CBA>mRuby3", + 3117666, "Cloning vector pLV[Exp]-CBA>mRuby3/NFAT3 fusion protein", + 3117667, "Cloning vector pLV[Exp]-Neo-mPGK>{EGFP-hSEPT6}", + 438045, "Xenotropic MuLV-related virus", + 447135, "Myodes glareolus", + 590745, "Mus musculus mobilized endogenous polytropic provirus", + 181858, "Murine AIDS virus-related provirus", + 356663, "Xenotropic MuLV-related virus VP35", + 356664, "Xenotropic MuLV-related virus VP42", + 373193, "Xenotropic MuLV-related virus VP62", + 286419, "Canis lupus dingo", + 415978, "Sus scrofa scrofa", + 494514, "Vulpes lagopus", + 3082113, "Rangifer tarandus platyrhynchus", + 3119969, "Bubalus kerabau") +tax_names <- bind_rows(tax_names, tax_names_new) + +# Get matches +hv_blast_staxids <- hv_reads_species %>% filter(taxid %in% ref_taxids_hv) %>% + group_by(taxid) %>% mutate(n_seq = n()) %>% + left_join(blast_results_paired, by="seq_id") %>% + mutate(staxid = as.integer(staxid)) %>% + left_join(tax_names %>% rename(sname=name), by="staxid") + +# Count matches +hv_blast_counts <- hv_blast_staxids %>% + group_by(taxid, name, staxid, sname, n_seq) %>% + count %>% mutate(p=n/n_seq) + +# Subset to major matches +hv_blast_counts_major <- hv_blast_counts %>% + filter(n>1, p>p_threshold, !is.na(staxid)) %>% + arrange(desc(p)) %>% group_by(taxid) %>% + filter(row_number() <= 25) %>% + mutate(name_display = ifelse(name == ref_names_hv[1], "MCV", name)) + +# Plot +g_hv_blast <- ggplot(hv_blast_counts_major, mapping=aes(x=p, y=sname)) + + geom_col() + + facet_grid(name_display~., scales="free_y", space="free_y") + + scale_x_continuous(name="% mapped reads", limits=c(0,1), + breaks=seq(0,1,0.2), expand=c(0,0)) + + theme_base + theme(axis.title.y = element_blank()) +g_hv_blast +``` + +Finally, here again are the overall relative abundances of the specific viral genera I picked out manually in my last entry: + +```{r} +#| fig-height: 5 +#| label: ra-genera + +# Define reference genera +path_genera_rna <- c("Mamastrovirus", "Enterovirus", "Salivirus", "Kobuvirus", "Norovirus", "Sapovirus", "Rotavirus", "Alphacoronavirus", "Betacoronavirus", "Alphainfluenzavirus", "Betainfluenzavirus", "Lentivirus") +path_genera_dna <- c("Mastadenovirus", "Alphapolyomavirus", "Betapolyomavirus", "Alphapapillomavirus", "Betapapillomavirus", "Gammapapillomavirus", "Orthopoxvirus", "Simplexvirus", + "Lymphocryptovirus", "Cytomegalovirus", "Dependoparvovirus") +path_genera <- bind_rows(tibble(name=path_genera_rna, genome_type="RNA genome"), + tibble(name=path_genera_dna, genome_type="DNA genome")) %>% + left_join(viral_taxa, by="name") + +# Count in each sample +n_path_genera <- hv_reads_genus %>% + group_by(sample, date_alias, city, name, taxid) %>% + count(name="n_reads_viral") %>% + inner_join(path_genera, by=c("name", "taxid")) %>% + left_join(read_counts_raw, by=c("sample", "date_alias", "city")) %>% + mutate(p_reads_viral = n_reads_viral/n_reads_raw) + +# Pivot out and back to add zero lines +n_path_genera_out <- n_path_genera %>% ungroup %>% select(sample, name, n_reads_viral) %>% + pivot_wider(names_from="name", values_from="n_reads_viral", values_fill=0) %>% + pivot_longer(-sample, names_to="name", values_to="n_reads_viral") %>% + left_join(read_counts_raw, by="sample") %>% + left_join(path_genera, by="name") %>% + mutate(p_reads_viral = n_reads_viral/n_reads_raw) + +## Aggregate across dates +n_path_genera_stype <- n_path_genera_out %>% + group_by(name, taxid, genome_type, city) %>% + summarize(n_reads_raw = sum(n_reads_raw), + n_reads_viral = sum(n_reads_viral), .groups = "drop") %>% + mutate(sample="All samples", date="All dates", + p_reads_viral = n_reads_viral/n_reads_raw, + na_type = "DNA") + +# Plot +g_path_genera <- ggplot(n_path_genera_stype, + aes(y=name, x=p_reads_viral, color=city)) + + geom_point() + + scale_x_log10(name="Relative abundance") + + scale_color_city() + + facet_grid(genome_type~., scales="free_y") + + theme_base + theme(axis.title.y = element_blank()) +g_path_genera +``` + +# Conclusion + +This is the third, largest, and final of this tranche of air-sampling datasets that I've run through this pipeline. Many of the high-level findings were similar to Prussin and Rosario, including high relative abundance of human reads, low total viral reads, an absence of enteric viruses, and high abundance of papillomaviruses among human-infecting viruses. + +In the future, I'll do a more in-depth comparative analysis across different datasets to compare the abundance of different viruses. For now, though, there are some major updates to the pipeline I want to make before I do any more public analyses. diff --git a/notebooks/2024-04-12_rosario.qmd b/notebooks/2024-04-12_rosario.qmd index 14078c2..241e65c 100644 --- a/notebooks/2024-04-12_rosario.qmd +++ b/notebooks/2024-04-12_rosario.qmd @@ -32,7 +32,7 @@ theme_kit <- theme_rotate + theme( tnl <- theme(legend.position = "none") ``` -Continuing our look at air sampling datasets, we turn to Rosario et al. (2018), another study of air filters, this time from HVAC filters from an undergraduate dorm building at the University of Colorado campus in Boulder. As in [Prussin](https://data.securebio.org/wills-public-notebook/notebooks/2024-04-12_prussin.html), samples were eluted from filters (in this case MERV-8, so less stringent than Prussin's MERV-14 filters) and underwent both RNA and DNA sequencing – this time on an Illumina MiSeq with 2x250bp reads. +Continuing our look at air sampling datasets, we turn to [Rosario et al. (2018)](https://pubs.acs.org/doi/10.1021/acs.est.7b04203), another study of air filters, this time from HVAC filters from an undergraduate dorm building at the University of Colorado campus in Boulder. As in [Prussin](https://data.securebio.org/wills-public-notebook/notebooks/2024-04-12_prussin.html), samples were eluted from filters (in this case MERV-8, so less stringent than Prussin's MERV-14 filters) and underwent both RNA and DNA sequencing – this time on an Illumina MiSeq with 2x250bp reads. # The raw data