diff --git a/io/slow5/data/example.slow5 b/io/slow5/data/example.slow5 new file mode 100644 index 00000000..ef6cdb8f --- /dev/null +++ b/io/slow5/data/example.slow5 @@ -0,0 +1,51 @@ +#slow5_version 0.2.0 +#num_read_groups 1 +@asic_id 4175987214 +@asic_id_eeprom 5910407 +@asic_temp 31.649540 +@asic_version IA02D +@auto_update 0 +@auto_update_source https://mirror.oxfordnanoportal.com/software/MinKNOW/ +@barcoding_enabled 0 +@basecall_config_filename dna_r9.4.1_450bps_sup.cfg +@bream_is_standard 0 +@configuration_version 4.3.11 +@device_id MN33517 +@device_type minion +@distribution_status stable +@distribution_version 21.06.10 +@exp_script_name sequencing/sequencing_MIN106_DNA:FLO-FLG001:SQK-LSK109 +@exp_script_purpose sequencing_run +@exp_start_time 2021-10-25T16:34:06.237443-07:00 +@experiment_duration_set 1440 +@experiment_type genomic_dna +@file_type multi-read +@file_version 2.3 +@flongle_adapter_id FA-02253 +@flow_cell_id AEI279 +@flow_cell_product_code FLO-FLG001 +@guppy_version 5.0.13+bbad52987 +@heatsink_temp 35.003906 +@host_product_code unknown +@host_product_serial_number . +@hostname trilo +@installation_type nc +@local_basecalling 1 +@local_firmware_file 1 +@operating_system ubuntu 18.04 +@package bream4 +@package_version 6.2.6 +@pore_type not_set +@protocol_group_id op_1 +@protocol_run_id ebe410af-7ff2-4117-9bff-f1bbae5be87d +@protocol_start_time 2021-10-25T16:33:39.151434-07:00 +@protocols_version 6.2.6 +@run_id 521e7966d4fd3d8fd57e3c264b9babd253db4c96 +@sample_frequency 4000 +@sample_id no_sample +@sequencing_kit sqk-lsk109 +@usb_config MinION_fx3_1.1.1_ONT#MinION_fpga_1.1.0#bulk#Auto +@version 4.3.12 +#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char* +#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number +0026631e-33a3-49ab-aa22-3ab157d71f8b 0 8192 16 1489.52832 4000 5347 430,472,463,467,454,465,463,450,450,449,449,391,402,412,420,407,414,423,423,412,401,408,407,432,393,417,411,404,414,408,412,405,411,422,427,372,376,404,411,423,395,419,418,411,405,408,408,419,427,411,419,405,409,439,416,424,402,439,416,411,426,391,393,414,402,413,396,404,420,419,426,393,415,423,407,423,406,412,426,412,390,410,408,421,434,407,417,417,420,410,434,427,425,429,421,424,438,412,433,414,406,415,403,415,420,409,418,413,441,411,406,413,412,394,421,417,417,442,411,417,418,413,410,421,408,428,410,434,403,419,413,422,412,435,434,421,427,432,418,425,417,410,414,413,412,414,424,420,416,408,430,406,424,420,419,431,421,439,435,416,424,401,443,427,406,415,423,430,418,418,417,421,425,434,427,434,405,428,428,423,427,409,411,403,412,406,423,409,427,421,437,424,411,422,408,414,421,409,405,416,425,431,437,427,423,426,428,425,427,430,411,420,414,416,432,426,414,405,415,404,400,410,420,409,408,432,417,412,423,421,424,411,423,413,430,420,427,421,430,405,417,415,430,416,419,430,425,415,421,433,418,415,413,417,409,422,417,412,420,426,427,433,430,429,432,414,409,434,438,422,416,405,424,411,425,431,434,434,415,423,421,426,414,421,411,420,416,411,437,437,423,423,420,414,436,410,423,419,427,417,411,411,430,423,423,421,407,414,420,419,413,413,413,407,421,432,434,440,432,426,427,432,409,417,417,423,417,406,412,426,421,426,430,421,418,420,420,422,419,423,415,424,413,414,432,430,405,398,411,411,409,408,424,425,409,417,420,411,439,428,436,433,411,399,402,409,424,408,415,416,414,400,408,416,419,402,424,435,410,401,414,429,411,406,418,423,433,410,420,422,428,428,410,406,409,430,397,442,401,422,414,399,419,428,433,424,417,413,416,410,420,427,422,428,435,416,431,426,420,432,413,390,432,418,413,433,421,402,404,414,420,427,423,413,420,410,424,413,418,407,420,441,418,419,412,414,435,418,433,439,414,420,438,423,417,434,431,424,439,438,423,420,422,421,445,415,424,421,425,437,415,430,416,425,435,426,432,414,424,413,425,415,401,429,413,432,431,425,419,425,434,428,431,415,411,411,412,409,424,437,406,418,420,415,397,410,411,410,418,407,408,414,408,407,416,426,419,413,407,411,435,397,431,415,408,418,430,409,418,414,415,427,419,419,420,426,428,426,426,430,417,427,410,422,419,409,429,410,416,412,410,412,422,428,425,432,419,412,430,433,425,425,406,408,413,411,422,418,432,411,421,411,417,430,415,405,412,421,426,425,410,424,419,407,430,417,431,417,421,430,402,413,420,426,419,414,429,430,425,428,424,417,420,403,416,420,426,412,416,425,424,414,414,412,430,406,432,409,427,397,413,431,414,415,426,407,435,425,425,431,437,434,404,421,432,414,434,424,429,425,424,428,411,419,430,425,414,437,431,425,448,406,410,399,407,409,431,407,425,418,413,411,432,415,423,430,415,437,410,420,431,440,422,424,427,409,434,425,434,434,419,423,440,425,416,444,436,423,427,419,422,420,437,425,410,416,422,420,429,427,410,427,415,435,397,402,422,415,407,419,412,420,411,432,427,415,419,425,430,425,411,428,424,414,434,420,417,413,429,400,411,412,420,410,428,412,426,424,432,432,420,415,421,428,415,428,432,414,420,428,436,416,427,424,420,433,414,420,419,414,443,412,427,411,417,413,426,425,417,416,423,445,431,435,425,414,421,421,422,420,436,414,414,422,418,414,433,429,412,430,415,408,428,418,428,427,405,425,435,416,415,416,421,434,425,442,407,427,430,433,423,418,419,407,417,406,423,417,421,408,423,431,432,414,425,421,409,391,402,421,410,413,424,425,415,435,415,450,425,426,416,437,427,424,411,434,438,415,434,422,425,414,432,422,421,438,423,430,432,434,421,425,435,423,432,423,431,423,409,425,407,423,419,430,414,440,406,429,417,440,420,444,434,415,419,430,417,414,427,430,426,403,420,441,422,429,436,410,424,445,427,435,411,439,422,425,430,429,434,430,421,434,425,417,432,431,439,418,423,429,430,416,435,437,413,421,430,419,426,422,415,432,429,437,415,422,418,420,413,416,433,409,417,417,431,430,423,432,415,437,418,429,414,399,422,416,426,422,418,414,412,420,407,436,433,419,435,431,430,426,418,410,423,426,420,409,434,425,435,435,421,410,416,425,425,426,442,434,423,419,431,427,421,436,431,422,426,418,420,416,420,417,424,405,432,399,425,418,421,433,428,426,421,415,419,430,422,438,419,428,433,437,431,419,430,434,416,431,425,409,431,431,428,436,439,425,426,418,425,406,416,411,421,434,421,415,416,416,417,411,421,413,418,406,403,436,409,431,418,436,432,421,433,423,431,424,408,418,421,414,430,426,402,407,424,444,425,432,426,424,437,437,424,415,425,421,426,412,444,425,423,420,417,431,427,404,409,411,423,404,421,419,425,407,409,413,425,423,411,413,395,417,417,433,419,432,413,422,408,404,398,423,416,426,418,427,434,428,414,428,413,433,417,415,413,435,416,421,416,399,414,423,415,409,424,408,411,418,424,426,418,438,412,420,421,407,416,430,417,426,412,424,407,423,412,405,417,408,422,416,408,406,406,424,418,415,408,425,430,416,430,396,409,410,410,411,412,422,396,415,412,409,408,407,424,423,410,410,427,422,427,396,409,414,431,421,403,422,410,422,408,417,421,411,406,415,418,405,403,427,435,402,399,407,420,401,408,422,445,406,407,426,400,402,398,422,414,413,417,421,389,414,418,409,425,419,431,417,413,427,418,405,409,405,410,411,387,388,383,387,383,386,409,389,387,381,396,429,452,474,462,469,474,448,455,450,462,456,431,495,489,620,675,660,664,680,683,674,689,680,703,675,668,672,671,672,668,692,677,675,689,673,661,690,695,717,713,673,700,684,687,686,669,674,690,694,675,658,648,669,689,666,670,672,674,706,675,667,674,666,674,678,679,683,679,622,652,674,663,628,631,626,627,644,623,627,621,631,589,559,589,631,633,619,620,629,629,625,613,624,626,620,633,633,632,634,618,624,630,563,524,513,487,504,367,328,343,327,353,336,356,347,341,331,358,343,357,338,369,349,354,344,348,340,347,349,343,360,340,338,355,348,339,347,324,333,343,339,351,344,344,346,363,366,354,350,350,348,357,344,328,340,340,333,338,341,343,345,345,346,335,348,365,350,359,343,347,339,341,328,351,358,349,333,347,332,331,320,320,319,333,348,342,327,329,316,321,324,320,305,331,332,321,322,307,468,462,473,462,468,444,447,457,444,456,468,439,459,364,356,357,374,373,474,482,479,484,490,466,475,479,485,461,473,475,482,486,468,491,480,497,505,479,484,492,501,487,489,485,485,491,489,496,485,484,463,493,491,491,486,508,469,491,488,474,488,484,487,495,470,506,490,455,481,488,473,440,466,442,416,403,398,409,403,409,414,424,427,403,395,415,420,414,396,397,411,388,375,375,381,365,371,390,436,449,460,455,458,479,450,466,452,459,443,459,472,435,456,471,473,454,460,456,460,454,452,465,446,451,465,476,464,462,468,458,447,461,432,447,489,504,501,490,486,497,511,491,492,500,502,508,493,500,499,499,501,500,501,503,489,509,480,510,450,449,415,433,439,426,457,446,448,442,455,443,451,430,447,443,440,426,453,433,437,434,437,437,426,443,429,439,454,449,462,450,459,481,455,438,431,463,512,469,481,459,438,435,450,455,437,462,435,449,448,432,444,449,444,434,434,450,441,435,446,444,453,456,449,446,447,437,437,455,441,451,455,432,459,466,459,454,444,453,437,457,455,458,454,453,439,448,432,309,266,279,268,266,243,276,270,272,257,288,297,279,282,288,248,263,269,271,279,272,265,268,276,265,268,265,268,275,266,263,265,270,277,269,266,272,298,462,421,447,430,428,445,447,416,426,437,438,434,443,428,436,462,521,522,520,506,510,496,513,505,520,529,524,518,513,500,530,519,518,517,491,444,462,471,464,469,475,457,466,464,460,454,464,465,455,458,462,453,454,458,457,455,459,462,467,460,468,459,469,452,444,457,471,365,356,336,347,351,354,349,349,359,364,354,363,341,347,354,356,368,347,355,354,354,355,357,353,346,356,354,358,353,345,349,347,355,346,353,355,329,357,364,316,263,272,266,297,472,473,463,484,471,476,462,465,477,473,492,488,457,495,480,485,463,477,481,483,464,497,451,466,488,457,453,461,472,468,472,463,466,470,465,457,478,453,460,479,438,418,418,418,418,435,432,422,438,420,425,444,444,444,448,458,454,455,458,468,451,470,459,448,456,443,450,458,444,434,449,453,444,451,440,464,439,441,418,449,460,431,437,445,458,449,457,455,419,443,454,454,457,439,452,433,373,350,334,338,352,379,417,492,519,520,496,480,516,485,463,514,494,505,507,505,477,488,485,470,483,457,461,445,455,460,450,458,356,351,352,354,354,355,357,352,354,357,341,356,362,362,353,359,371,345,354,358,341,335,475,467,473,476,463,459,492,480,479,463,473,474,473,453,475,479,472,501,477,483,471,475,486,487,480,464,470,470,432,399,387,393,387,389,385,396,428,524,533,527,525,524,529,523,504,531,537,513,550,522,511,550,539,483,489,495,473,371,378,382,373,372,382,394,382,379,362,385,384,394,396,389,395,378,375,368,378,382,374,422,385,388,372,349,348,334,339,351,342,352,347,352,346,335,338,348,347,346,339,335,349,367,563,578,569,588,570,549,499,512,475,485,478,480,498,489,488,472,376,369,375,363,367,383,359,483,479,489,476,419,357,375,364,338,336,274,269,247,267,260,263,270,255,238,260,249,241,246,254,258,256,257,252,262,277,344,292,265,252,246,255,264,258,251,262,266,255,272,242,269,253,246,393,428,421,442,415,416,434,418,418,438,410,402,405,427,430,410,416,415,427,418,418,430,404,439,406,413,368,426,446,464,460,450,449,438,441,439,426,438,445,442,449,450,438,438,423,440,444,438,441,444,428,450,437,450,435,444,443,452,442,409,378,367,358,361,365,372,360,366,420,438,459,440,451,445,431,441,448,438,447,420,434,421,417,447,444,446,431,450,432,453,467,451,441,451,437,457,432,438,443,443,454,479,478,484,492,472,464,464,451,469,477,464,473,477,461,490,485,469,480,481,476,471,472,461,469,459,477,471,454,485,490,478,479,464,489,490,480,480,443,382,374,381,384,388,391,389,376,374,389,371,370,374,370,383,390,387,385,387,394,454,445,461,451,462,464,475,454,466,430,470,456,472,455,452,473,453,485,449,432,454,444,449,412,359,432,344,369,372,349,360,360,356,363,353,357,368,368,359,347,378,362,362,358,363,366,354,358,370,357,370,360,359,366,365,368,359,347,363,360,360,359,366,341,360,338,348,347,349,357,355,373,378,381,375,355,379,371,379,391,371,359,368,357,366,367,386,378,466,488,476,468,471,481,469,468,462,475,465,469,460,492,474,465,488,491,468,466,471,460,468,480,472,469,470,471,493,419,406,388,392,406,415,385,391,391,384,396,388,418,392,390,391,388,366,359,374,362,348,355,342,356,346,344,372,364,383,328,324,330,337,344,340,347,308,309,294,284,299,296,294,311,304,295,323,291,326,325,329,326,329,316,330,304,329,333,295,338,337,352,323,335,363,359,408,473,445,471,497,511,477,510,522,507,511,412,362,406,410,415,454,362,379,390,410,438,438,455,446,431,451,440,451,449,446,431,438,444,432,416,438,453,434,433,447,443,433,432,430,436,442,444,448,441,470,462,488,486,507,512,512,495,513,498,494,492,491,480,451,439,424,428,429,437,429,407,395,392,401,388,397,389,396,392,398,395,402,401,391,404,387,392,388,388,404,395,395,397,390,401,405,369,406,477,497,494,494,444,449,450,420,456,435,447,449,449,457,430,427,446,437,453,452,453,432,449,446,429,448,402,389,368,381,375,363,347,377,391,372,379,384,389,384,373,379,368,392,380,359,376,364,377,350,370,344,374,374,573,580,586,587,549,558,570,572,560,584,580,550,582,590,576,590,576,561,563,568,569,578,584,576,549,560,579,585,586,591,596,576,574,569,573,579,578,580,584,550,578,587,589,579,586,589,599,579,564,595,584,587,580,586,566,578,553,570,541,584,580,582,568,560,558,581,564,568,564,576,566,572,546,601,575,566,569,564,577,586,589,570,557,569,586,591,564,541,473,494,490,505,473,518,488,483,465,481,485,491,483,478,484,493,512,480,515,496,488,497,503,477,510,503,496,500,505,491,498,498,496,502,498,481,486,488,500,497,465,492,506,482,489,494,484,488,484,465,481,484,506,486,457,481,476,374,367,350,366,365,381,377,360,356,370,371,373,363,352,356,366,350,368,364,364,355,376,366,379,374,359,369,367,388,365,362,349,363,357,366,357,360,362,361,355,369,342,361,332,333,348,349,359,267,250,261,259,262,256,239,237,250,251,260,434,447,461,458,440,458,452,444,442,426,445,457,452,447,449,455,450,444,533,528,523,536,516,529,523,527,532,514,532,522,523,526,526,520,537,534,543,545,546,526,493,478,467,449,468,473,429,431,480,487,474,487,471,486,464,447,463,468,458,486,463,465,460,463,450,476,464,467,467,470,469,476,468,376,379,390,398,376,372,389,375,392,390,382,390,382,373,387,384,383,384,399,387,397,393,396,372,366,405,384,396,393,357,373,377,344,365,367,377,382,418,386,395,393,396,387,405,392,390,400,439,504,504,492,492,495,495,504,509,485,492,491,500,412,380,392,408,406,380,388,385,395,380,388,389,411,386,386,388,384,381,397,374,379,348,378,322,320,315,324,315,314,338,312,348,372,359,351,341,350,363,361,417,419,332,356,341,348,365,348,369,400,386,401,395,397,396,399,413,398,401,381,415,389,391,386,397,395,395,392,395,404,397,404,397,399,402,407,427,446,468,442,433,448,439,434,445,440,425,439,441,449,461,450,425,453,429,435,409,424,408,427,420,437,423,432,441,424,407,422,426,428,417,408,410,427,429,435,429,424,445,469,458,470,498,504,502,491,485,510,499,489,509,513,496,487,484,477,510,464,387,353,362,352,353,368,353,350,343,358,371,346,359,346,349,363,355,368,353,360,341,348,354,377,341,346,342,358,336,335,358,339,341,346,359,349,338,352,333,345,341,356,341,333,328,360,352,395,381,352,326,335,484,504,520,505,492,506,485,483,500,493,520,506,518,515,491,505,492,504,507,488,500,509,484,481,473,454,470,459,471,469,465,490,472,478,475,479,481,483,477,470,473,489,481,475,484,346,338,335,338,334,344,332,356,334,355,352,350,325,319,341,357,336,334,343,363,315,351,354,343,362,359,366,330,349,337,333,318,328,327,345,330,330,344,363,364,360,372,364,348,362,343,374,343,358,375,359,370,366,360,359,359,356,359,361,367,363,380,369,379,373,356,376,366,369,373,399,489,476,484,499,492,500,494,493,503,488,497,490,500,494,491,488,495,494,495,495,496,483,473,487,482,498,472,503,482,488,495,503,483,497,482,478,459,496,491,480,471,494,481,410,361,363,450,493,469,496,482,472,484,502,479,479,504,426,425,415,413,457,518,514,479,479,499,462,434,413,427,431,422,421,429,427,419,423,424,436,447,432,444,430,426,444,435,434,430,455,438,416,442,441,446,429,428,438,448,457,437,439,435,450,428,454,444,435,430,418,455,477,477,484,475,497,494,485,478,468,454,486,490,477,478,492,495,485,489,463,492,479,478,484,491,493,496,471,470,466,469,480,465,490,485,489,499,470,393,369,351,346,353,349,358,359,344,354,337,360,352,362,351,365,337,334,353,341,348,344,351,353,352,354,363,350,353,372,364,356,351,349,373,342,358,353,354,351,344,351,354,354,344,359,295,258,279,271,250,257,240,244,280,263,271,284,266,265,249,262,276,263,270,265,245,271,258,249,254,255,259,254,247,249,247,249,366,431,425,416,435,420,439,437,427,430,430,443,432,441,431,448,441,439,442,454,446,442,448,414,439,436,451,446,427,429,426,418,430,446,407,419,425,426,419,436,447,406,418,422,441,416,440,415,423,437,420,410,405,412,394,413,421,406,427,429,414,417,412,414,429,423,416,414,414,416,449,436,448,442,433,451,436,440,433,453,444,450,435,447,428,436,444,443,428,440,437,425,464,436,425,436,436,428,442,461,505,545,515,511,503,511,521,539,503,501,512,509,535,520,525,526,466,461,471,478,472,476,464,455,466,461,455,465,464,404,378,386,407,406,401,389,390,372,389,398,367,378,370,377,363,372,348,320,333,337,332,340,327,317,339,352,335,361,333,335,326,343,336,343,326,343,364,361,336,332,343,337,345,329,329,342,330,326,345,345,323,325,350,351,347,331,324,317,317,292,315,316,311,283,286,302,308,286,297,290,303,310,293,290,283,314,292,295,307,306,287,287,280,294,304,367,578,568,536,568,565,565,580,565,578,559,559,411,397,373,377,394,383,398,389,395,378,390,400,399,388,408,390,416,396,396,395,397,429,444,431,452,435,451,440,455,452,472,464,435,441,441,447,440,433,437,429,448,447,448,454,433,445,442,430,444,446,449,455,441,430,427,419,443,440,483,484,465,484,479,473,498,489,478,461,478,483,487,478,487,484,489,495,476,490,480,487,493,495,469,487,481,476,481,484,489,489,487,495,465,461,481,489,483,493,486,459,482,471,462,459,451,410,384,394,425,410,405,418,410,406,439,415,421,395,404,401,388,402,414,411,413,401,415,420,410,412,395,388,409,413,405,417,426,421,407,416,400,417,405,425,395,403,404,392,400,405,418,402,401,409,494,527,537,545,547,526,512,527,540,530,544,546,513,539,548,560,527,523,529,538,524,530,534,521,534,534,532,540,398,394,405,393,415,450,393,413,406,406,405,403,416,395,414,417,403,413,401,400,501,490,488,484,491,491,490,477,478,494,465,482,478,498,490,495,482,508,507,483,495,495,496,495,481,502,496,508,505,507,510,507,490,487,493,471,479,495,485,484,511,484,493,479,477,477,522,495,479,491,461,491,469,485,482,500,499,490,495,495,463,477,473,464,446,468,479,461,477,465,479,480,467,472,453,464,470,468,476,465,483,476,472,479,475,474,491,483,477,474,471,467,469,466,486,489,484,475,472,482,468,441,469,449,460,495,482,475,464,473,468,478,471,468,473,466,328,328,340,356,353,337,333,346,332,333,324,333,334,330,313,306,328,345,315,336,316,324,329,306,312,305,318,311,312,303,315,304,314,315,304,317,318,333,326,317,320,313,313,305,302,317,314,326,343,319,339,331,314,317,327,315,309,308,314,306,308,321,322,315,310,316,304,293,295,311,322,317,333,324,422,506,512,540,522,506,524,533,532,523,519,533,519,508,519,488,523,521,556,520,538,527,521,519,510,515,553,532,530,509,523,522,496,526,518,528,514,502,508,497,504,528,513,523,514,524,518,516,510,517,501,508,518,527,509,538,512,414,375,371,364,387,383,389,385,363,380,367,360,375,375,344,406,560,544,546,543,520,510,532,518,539,528,519,532,534,502,526,521,533,506,513,455,372,373,351,389,364,385,389,386,378,387,371,372,442,452,426,441,433,439,437,429,416,422,445,439,421,445,434,441,440,412,433,408,428,448,416,429,447,453,446,442,458,433,452,464,428,468,448,454,451,483,507,475,481,489,493,483,474,481,375,317,326,334,324,334,329,322,328,318,326,301,326,340,323,333,329,333,332,328,315,313,325,343,325,318,326,324,331,323,317,329,341,332,337,326,329,423,461,480,467,486,467,483,489,481,467,467,479,471,474,490,478,475,478,492,484,469,470,483,460,473,469,454,487,422,390,389,396,384,343,309,315,319,335,334,332,343,336,354,332,328,331,343,316,250,254,254,256,259,254,246,258,246,263,253,258,241,251,422,463,461,470,432,450,472,466,455,448,462,476,459,448,453,478,471,468,454,469,472,458,471,461,466,456,467,459,450,463,454,450,442,462,478,469,465,456,470,455,461,460,470,443,449,462,469,458,448,456,458,478,459,451,456,456,446,454,433,447,469,456,462,458,465,470,435,518,532,492,502,515,516,532,527,517,498,526,509,517,502,520,500,512,514,518,515,512,498,512,513,497,457,373,380,383,376,380,376,390,379,397,382,392,370,406,405,376,377,403,393,374,365,388,375,379,372,370,372,387,382,357,379,366,373,367,436,453,468,455,462,460,446,459,471,436,473,466,467,458,429,474,515,491,506,489,506,500,483,498,484,500,486,487,489,496,491,507,498,500,496,511,501,501,413,412,411,429,433,434,420,489,550,543,546,548,548,572,573,539,545,555,557,571,573,560,552,548,562,559,551,544,525,536,548,537,467,457,454,441,429,439,443,340,340,321,332,325,330,345,322,352,334,348,344,333,332,324,340,331,338,358,346,368,341,342,329,331,341,343,358,359,326,340,339,338,340,344,339,346,316,317,372,459,461,474,491,470,463,473,483,454,483,482,467,495,456,466,464,478,479,460,454,450,467,448,453,459,459,469,476,477,462,481,477,490,514,500,510,497,498,488,493,501,487,484,496,492,513,499,498,481,381,392,397,395,406,393,357,378,313,291,286,289,279,289,293,306,288,264,268,284,299,289,287,269,279,275,284,272,282,271,266,274,438,461,473,487,471,488,476,506,453,488,480,489,493,481,477,473,467,484,460,490,528,529,504,523,522,514,527,521,526,529,541,511,530,487,480,475,473,485,471,470,460,439,495,534,528,525,502,513,493,455,458,473,467,474,479,458,447,471,464,473,460,451,476,456,453,382,339,323,331,358,323,325,321,317,320,336,331,320,318,327,303,305,330,315,310,319,307,315,321,330,322,311,314,324,322,331,317,327,318,305,317,317,315,324,328,325,303,319,320,319,335,316,338,301,328,322,331,411,537,537,544,517,528,516,534,535,530,523,531,546,538,508,522,526,528,533,540,551,529,512,513,543,536,528,542,536,524,522,534,529,537,547,535,560,546,520,528,544,537,531,491,511,546,554,527,544,518,504,503,483,503,506,492,409,373,377,355,374,364,378,380,380,378,372,359,372,370,381,377,376,392,384,468,545,547,558,544,549,556,548,542,554,554,555,555,567,546,535,513,546,374,391,380,400,398,402,375,398,384,371,383,388,382,369,370,374,351,403,461,456,448,454,464,464,443,462,451,444,453,453,450,456,457,444,464,445,451,463,436,501,507,503,508,490,505,483,497,478,484,498,502,509,521,492,503,498,511,509,495,523,490,469,461,470,473,470,480,480,456,466,473,458,473,481,482,479,470,472,471,459,473,464,471,450,478,475,459,471,465,485,484,475,481,468,482,477,476,477,468,490,464,506,483,487,467,467,485,465,481,462,470,497,477,478,475,474,474,481,478,485,484,482,481,479,484,450,471,477,476,481,489,486,492,478,469,465,469,455,480,479,465,480,464,462,477,470,469,452,482,463,470,467,481,475,479,471,469,470,465,484,460,473,475,473,475,473,474,468,473,453,458,481,467,481,482,475,470,483,470,475,470,477,476,464,436,399,376,376,386,382,386,328,310,285,319,338,299,312,318,310,294,289,293,280,284,270,275,269,266,275,274,292,268,292,273,278,271,297,277,281,277,280,278,288,272,271,267,271,271,269,253,266,278,280,267,277,315,337,343,340,330,459,553,542,574,537,541,521,565,552,544,543,552,534,529,544,526,510,546,543,524,536,540,526,524,534,541,531,538,540,543,558,525,554,542,543,554,545,545,538,527,550,527,529,540,517,498,526,531,519,511,419,371,350,377,369,363,337,370,336,357,360,349,352,336,358,359,343,365,345,359,368,340,349,341,329,351,342,328,314,435,442,443,449,429,447,431,442,434,424,421,433,438,426,428,432,443,434,442,419,429,424,438,419,414,440,446,442,445,427,408,451,436,453,431,462,444,437,438,420,446,442,458,452,464,454,460,461,430,438,441,435,428,414,432,442,428,441,421,457,436,453,434,446,457,470,439,470,469,453,452,456,459,461,448,455,412 8318394 5383 1 219.133423 5 10 diff --git a/io/slow5/data/header_tests/test_header_empty.slow5 b/io/slow5/data/header_tests/test_header_empty.slow5 new file mode 100644 index 00000000..e69de29b diff --git a/io/slow5/data/header_tests/test_header_not_enough_attributes.slow5 b/io/slow5/data/header_tests/test_header_not_enough_attributes.slow5 new file mode 100644 index 00000000..1d6f57b7 --- /dev/null +++ b/io/slow5/data/header_tests/test_header_not_enough_attributes.slow5 @@ -0,0 +1,3 @@ +#slow5_version 0.2.0 +#num_read_groups 2 +@asic_id 4175987214 diff --git a/io/slow5/data/header_tests/test_header_numReadGroups_bad.slow5 b/io/slow5/data/header_tests/test_header_numReadGroups_bad.slow5 new file mode 100644 index 00000000..6652124d --- /dev/null +++ b/io/slow5/data/header_tests/test_header_numReadGroups_bad.slow5 @@ -0,0 +1,3 @@ +#slow5_version 0.2.0 +#num_read_groups bad! +@asic_id 4175987214 diff --git a/io/slow5/data/header_tests/test_header_without_tabs.slow5 b/io/slow5/data/header_tests/test_header_without_tabs.slow5 new file mode 100644 index 00000000..df3dcf7e --- /dev/null +++ b/io/slow5/data/header_tests/test_header_without_tabs.slow5 @@ -0,0 +1,4 @@ +#slow5_version 0.2.0 +#num_read_groups 1 +@bad +@asic_id 4175987214 diff --git a/io/slow5/data/read_tests/continue.slow5 b/io/slow5/data/read_tests/continue.slow5 new file mode 100644 index 00000000..509b5162 --- /dev/null +++ b/io/slow5/data/read_tests/continue.slow5 @@ -0,0 +1,7 @@ +#slow5_version 0.2.0 +#num_read_groups 1 +@asic_id 4175987214 +#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char* +#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number +0026631e-33a3-49ab-aa22-3ab157d71f8b 0 . bad 1489.52832 4000 5347 430,472,463 8318394 5383 1 219.133423 5 10 + diff --git a/io/slow5/data/read_tests/digitisation.slow5 b/io/slow5/data/read_tests/digitisation.slow5 new file mode 100644 index 00000000..4d4c5c8c --- /dev/null +++ b/io/slow5/data/read_tests/digitisation.slow5 @@ -0,0 +1,7 @@ +#slow5_version 0.2.0 +#num_read_groups 1 +@asic_id 4175987214 +#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char* +#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number +0026631e-33a3-49ab-aa22-3ab157d71f8b 0 bad 16 1489.52832 4000 5347 430,472,463 8318394 5383 1 219.133423 5 10 + diff --git a/io/slow5/data/read_tests/endReason.slow5 b/io/slow5/data/read_tests/endReason.slow5 new file mode 100644 index 00000000..01cf1d93 --- /dev/null +++ b/io/slow5/data/read_tests/endReason.slow5 @@ -0,0 +1,7 @@ +#slow5_version 0.2.0 +#num_read_groups 1 +@asic_id 4175987214 +#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{bad,unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char* +#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number +0026631e-33a3-49ab-aa22-3ab157d71f8b 0 8192 16 1489.52832 4000 5347 430,472,463 8318394 5383 1 219.133423 5 10 + diff --git a/io/slow5/data/read_tests/end_reason.slow5 b/io/slow5/data/read_tests/end_reason.slow5 new file mode 100644 index 00000000..58367748 --- /dev/null +++ b/io/slow5/data/read_tests/end_reason.slow5 @@ -0,0 +1,7 @@ +#slow5_version 0.2.0 +#num_read_groups 1 +@asic_id 4175987214 +#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char* +#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number +0026631e-33a3-49ab-aa22-3ab157d71f8b 0 8192 16 1489.52832 4000 5347 430,472,463 8318394 5383 1 219.133423 bad 10 + diff --git a/io/slow5/data/read_tests/end_reason_unknown.slow5 b/io/slow5/data/read_tests/end_reason_unknown.slow5 new file mode 100644 index 00000000..8e58ccc3 --- /dev/null +++ b/io/slow5/data/read_tests/end_reason_unknown.slow5 @@ -0,0 +1,7 @@ +#slow5_version 0.2.0 +#num_read_groups 1 +@asic_id 4175987214 +#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char* +#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number +0026631e-33a3-49ab-aa22-3ab157d71f8b 0 8192 16 1489.52832 4000 5347 430,472,463 8318394 5383 1 219.133423 1000 10 + diff --git a/io/slow5/data/read_tests/len_raw_signal.slow5 b/io/slow5/data/read_tests/len_raw_signal.slow5 new file mode 100644 index 00000000..15b3c471 --- /dev/null +++ b/io/slow5/data/read_tests/len_raw_signal.slow5 @@ -0,0 +1,7 @@ +#slow5_version 0.2.0 +#num_read_groups 1 +@asic_id 4175987214 +#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char* +#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number +0026631e-33a3-49ab-aa22-3ab157d71f8b 0 8192 16 1489.52832 4000 bad 430,472,463 8318394 5383 1 219.133423 5 10 + diff --git a/io/slow5/data/read_tests/median_before.slow5 b/io/slow5/data/read_tests/median_before.slow5 new file mode 100644 index 00000000..4fddd7e1 --- /dev/null +++ b/io/slow5/data/read_tests/median_before.slow5 @@ -0,0 +1,7 @@ +#slow5_version 0.2.0 +#num_read_groups 1 +@asic_id 4175987214 +#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char* +#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number +0026631e-33a3-49ab-aa22-3ab157d71f8b 0 8192 16 1489.52832 4000 5347 430,472,463 8318394 5383 1 bad 5 10 + diff --git a/io/slow5/data/read_tests/offset.slow5 b/io/slow5/data/read_tests/offset.slow5 new file mode 100644 index 00000000..d1773be1 --- /dev/null +++ b/io/slow5/data/read_tests/offset.slow5 @@ -0,0 +1,7 @@ +#slow5_version 0.2.0 +#num_read_groups 1 +@asic_id 4175987214 +#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char* +#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number +0026631e-33a3-49ab-aa22-3ab157d71f8b 0 8192 bad 1489.52832 4000 5347 430,472,463 8318394 5383 1 219.133423 5 10 + diff --git a/io/slow5/data/read_tests/range.slow5 b/io/slow5/data/read_tests/range.slow5 new file mode 100644 index 00000000..4696a090 --- /dev/null +++ b/io/slow5/data/read_tests/range.slow5 @@ -0,0 +1,7 @@ +#slow5_version 0.2.0 +#num_read_groups 1 +@asic_id 4175987214 +#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char* +#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number +0026631e-33a3-49ab-aa22-3ab157d71f8b 0 8192 16 bad 4000 5347 430,472,463 8318394 5383 1 219.133423 5 10 + diff --git a/io/slow5/data/read_tests/raw_signal.slow5 b/io/slow5/data/read_tests/raw_signal.slow5 new file mode 100644 index 00000000..8342a424 --- /dev/null +++ b/io/slow5/data/read_tests/raw_signal.slow5 @@ -0,0 +1,7 @@ +#slow5_version 0.2.0 +#num_read_groups 1 +@asic_id 4175987214 +#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char* +#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number +0026631e-33a3-49ab-aa22-3ab157d71f8b 0 8192 16 1489.52832 4000 5347 430,bad,463 8318394 5383 1 219.133423 5 10 + diff --git a/io/slow5/data/read_tests/read_group.slow5 b/io/slow5/data/read_tests/read_group.slow5 new file mode 100644 index 00000000..cc392b64 --- /dev/null +++ b/io/slow5/data/read_tests/read_group.slow5 @@ -0,0 +1,7 @@ +#slow5_version 0.2.0 +#num_read_groups 1 +@asic_id 4175987214 +#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char* +#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number +0026631e-33a3-49ab-aa22-3ab157d71f8b bad 8192 16 1489.52832 4000 5347 430,472,463 8318394 5383 1 219.133423 5 10 + diff --git a/io/slow5/data/read_tests/read_number.slow5 b/io/slow5/data/read_tests/read_number.slow5 new file mode 100644 index 00000000..2a30457e --- /dev/null +++ b/io/slow5/data/read_tests/read_number.slow5 @@ -0,0 +1,7 @@ +#slow5_version 0.2.0 +#num_read_groups 1 +@asic_id 4175987214 +#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char* +#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number +0026631e-33a3-49ab-aa22-3ab157d71f8b 0 8192 16 1489.52832 4000 5347 430,472,463 8318394 bad 1 219.133423 5 10 + diff --git a/io/slow5/data/read_tests/sampling_rate.slow5 b/io/slow5/data/read_tests/sampling_rate.slow5 new file mode 100644 index 00000000..e27d59d5 --- /dev/null +++ b/io/slow5/data/read_tests/sampling_rate.slow5 @@ -0,0 +1,7 @@ +#slow5_version 0.2.0 +#num_read_groups 1 +@asic_id 4175987214 +#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char* +#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number +0026631e-33a3-49ab-aa22-3ab157d71f8b 0 8192 16 1489.52832 raw 5347 430,472,463 8318394 5383 1 219.133423 5 10 + diff --git a/io/slow5/data/read_tests/start_mux.slow5 b/io/slow5/data/read_tests/start_mux.slow5 new file mode 100644 index 00000000..a7f72984 --- /dev/null +++ b/io/slow5/data/read_tests/start_mux.slow5 @@ -0,0 +1,7 @@ +#slow5_version 0.2.0 +#num_read_groups 1 +@asic_id 4175987214 +#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char* +#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number +0026631e-33a3-49ab-aa22-3ab157d71f8b 0 8192 16 1489.52832 4000 5347 430,472,463 8318394 5383 bad 219.133423 5 10 + diff --git a/io/slow5/data/read_tests/start_time.slow5 b/io/slow5/data/read_tests/start_time.slow5 new file mode 100644 index 00000000..57b8d26e --- /dev/null +++ b/io/slow5/data/read_tests/start_time.slow5 @@ -0,0 +1,7 @@ +#slow5_version 0.2.0 +#num_read_groups 1 +@asic_id 4175987214 +#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char* +#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number +0026631e-33a3-49ab-aa22-3ab157d71f8b 0 8192 16 1489.52832 4000 5347 430,472,463 bad 5383 1 219.133423 5 10 + diff --git a/io/slow5/data/read_tests/unknown.slow5 b/io/slow5/data/read_tests/unknown.slow5 new file mode 100644 index 00000000..8794f66a --- /dev/null +++ b/io/slow5/data/read_tests/unknown.slow5 @@ -0,0 +1,6 @@ +#slow5_version 0.2.0 +#num_read_groups 1 +@asic_id 4175987214 +#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char* +#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number bad +0026631e-33a3-49ab-aa22-3ab157d71f8b 0 8192 16 1489.52832 4000 5347 430,472,463 8318394 5383 1 219.133423 5 10 1 diff --git a/io/slow5/data/test_example.slow5 b/io/slow5/data/test_example.slow5 new file mode 100644 index 00000000..729e5373 --- /dev/null +++ b/io/slow5/data/test_example.slow5 @@ -0,0 +1,7 @@ +#slow5_version 0.2.0 +#num_read_groups 1 +@asic_id 4175987214 +#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char* +#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number +0026631e-33a3-49ab-aa22-3ab157d71f8b 0 8192 16 1489.52832 4000 5347 430,472,463 8318394 5383 1 219.133423 5 10 + diff --git a/io/slow5/example_test.go b/io/slow5/example_test.go new file mode 100644 index 00000000..f51d44b2 --- /dev/null +++ b/io/slow5/example_test.go @@ -0,0 +1,32 @@ +package slow5_test + +import ( + "fmt" + "os" + + "github.com/TimothyStiles/poly/io/slow5" +) + +func ExampleNewParser() { + // example.slow5 is a file I generated using slow5tools from nanopore fast5 + // run where I was testing using nanopore for doing COVID testing. It + // contains real nanopore data. + file, _ := os.Open("data/example.slow5") + // Set maxLineSize to 64kb. If you expect longer reads, + // make maxLineSize longer! + const maxLineSize = 2 * 32 * 1024 + parser, _, _ := slow5.NewParser(file, maxLineSize) + + var outputReads []slow5.Read + for { + read, err := parser.ParseNext() + if err != nil { + // Break at EOF + break + } + outputReads = append(outputReads, read) + } + + fmt.Println(outputReads[0].RawSignal[0:10]) + // Output: [430 472 463 467 454 465 463 450 450 449] +} diff --git a/io/slow5/slow5.go b/io/slow5/slow5.go new file mode 100644 index 00000000..fc9e5c96 --- /dev/null +++ b/io/slow5/slow5.go @@ -0,0 +1,439 @@ +/* +Package slow5 contains slow5 parsers and writers. + +Right now, only parsing slow5 files is supported. Support for writing and blow5 +coming soon. + +slow5 is a file format alternative to fast5, which is the file format outputted +by Oxford Nanopore sequencing devices. fast5 uses hdf5, which is a complex file +format that can only be read and written with a single software library built +in 1998. On the other hand, slow5 uses a .tsv file format, which is easy to +both parse and write. + +slow5 files contain both general metadata about the sequencing run and raw +signal reads from the sequencing run. This raw signal can be used directly or +basecalled and used for alignment. + +More information on slow5 can be found here: https://github.com/hasindu2008/slow5tools +*/ +package slow5 + +import ( + "bufio" + "fmt" + "io" + "sort" + "strconv" + "strings" +) + +/****************************************************************************** +Oct 10, 2021 + +slow5 parser begins here. Specification below: +https://hasindu2008.github.io/slow5specs/slow5-v1.0.0.pdf + +slow5 is able to combine multiple sequencing runs into a single file format, +but we read each sequencing run separately. Each sequencing run contains a +header with metadata and a list of reads. However, unlike many other file +formats, a slow5 file should almost never be read into a common struct, since +most runs are large, and will take a ton of memory. Instead, the default way +to parse slow5 files is to produce a list of headers and a channel of raw +reads. In order to connect the two (if needed), use ReadGroupID. + +Nanopore changes the attributes found in the header quite often, so we store +most of these attributes in a map for future proofing. Even the binary file +format, blow5, does not have types for these attributes, and just stores them +as a long string. + +Reads have 8 required columns, and a few auxillary. These are typed, since they +are what will probably be used in real software. + +Cheers mate, + +Keoni + +******************************************************************************/ + +// Header contains metadata about the sequencing run in general. +type Header struct { + ReadGroupID uint32 + Slow5Version string + Attributes map[string]string + EndReasonHeaderMap map[string]int +} + +// Read contains metadata and raw signal strengths for a single nanopore read. +type Read struct { + ReadID string + ReadGroupID uint32 + Digitisation float64 + Offset float64 + Range float64 + SamplingRate float64 + LenRawSignal uint64 + RawSignal []int16 + + // Auxiliary fields + ChannelNumber string + MedianBefore float64 + ReadNumber int32 + StartMux uint8 + StartTime uint64 + EndReason string // enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} + + Error error // in case there is an error while parsing! +} + +var knownEndReasons = map[string]bool{"unknown": true, + "partial": true, + "mux_change": true, + "unblock_mux_change": true, + "data_service_unblock_mux_change": true, + "signal_positive": true, + "signal_negative": true, +} + +// Parser is a flexible parser that provides ample +// control over reading slow5 sequences. +// It is initialized with NewParser. +type Parser struct { + // reader keeps state of current reader. + reader bufio.Reader + line uint + headerMap map[int]string + endReasonMap map[int]string +} + +// NewParser parsers a slow5 file. +func NewParser(r io.Reader, maxLineSize int) (*Parser, []Header, error) { + parser := &Parser{ + reader: *bufio.NewReaderSize(r, maxLineSize), + line: 0, + } + var headers []Header + var slow5Version string + var numReadGroups uint32 + headerMap := make(map[int]string) + endReasonMap := make(map[int]string) + endReasonHeaderMap := make(map[string]int) + + for { + lineBytes, err := parser.reader.ReadSlice('\n') + if err != nil { + return parser, []Header{}, err + } + line := strings.TrimSpace(string(lineBytes)) + parser.line++ + values := strings.Split(line, "\t") + if len(values) < 2 { + return parser, []Header{}, fmt.Errorf("Got following line without tabs: %s", line) + } + + // First, we need to identify the number of read groups. This number will be the length of our + // ReadGroups output, and we will need it for iteration through the rest of the header. + if numReadGroups == 0 { + switch values[0] { + case "#slow5_version": + slow5Version = values[1] + case "#num_read_groups": + numReadGroupsUint, err := strconv.ParseUint(values[1], 10, 32) + if err != nil { + return parser, []Header{}, err + } + numReadGroups = uint32(numReadGroupsUint) + for id := uint32(0); id < numReadGroups; id++ { + headers = append(headers, Header{Slow5Version: slow5Version, ReadGroupID: id, Attributes: make(map[string]string)}) + } + } + continue + } + // Terminate if we hit the beginning of the raw read headers + // Get endReasonEnums. This is simply a string between enum{} that is used for the reasons that a read could have ended. + if values[0] == "#char*" { + for _, typeInfo := range values { + if strings.Contains(typeInfo, "enum") { + endReasonEnumsMinusPrefix := strings.TrimPrefix(typeInfo, "enum{") + endReasonEnumsMinusSuffix := strings.TrimSuffix(endReasonEnumsMinusPrefix, "}") + endReasons := strings.Split(endReasonEnumsMinusSuffix, ",") + + for endReasonIndex, endReason := range endReasons { + if _, ok := knownEndReasons[endReason]; !ok { + return parser, headers, fmt.Errorf("unknown end reason '%s' found in end_reason enum. Please report", endReason) + } + endReasonMap[endReasonIndex] = endReason + endReasonHeaderMap[endReason] = endReasonIndex + } + } + } + // Add endReasonMap to each output header. This helps the writer + for headerIndex := range headers { + headers[headerIndex].EndReasonHeaderMap = endReasonHeaderMap + } + continue + } + + // Get the read headers and their identifiers. Though the primary read headers are in a defined order, the auxiliary headers are not. + if values[0] == "#read_id" { + headerMap[0] = "read_id" + for headerNum := 1; headerNum < len(values); headerNum++ { + headerMap[headerNum] = values[headerNum] + } + break + } + + // Check to make sure we have the right amount of information for the num_read_groups + if len(values) != int(numReadGroups+1) { + return parser, []Header{}, fmt.Errorf("Improper amount of information for read groups. Needed %d, got %d, in line: %s", numReadGroups+1, len(values), line) + } + for id := 0; id < int(numReadGroups); id++ { + headers[id].Attributes[values[0]] = values[id+1] + } + continue + } + parser.headerMap = headerMap + parser.endReasonMap = endReasonMap + return parser, headers, nil +} + +// ParseNext parses the next read from a parser. +func (parser *Parser) ParseNext() (Read, error) { + lineBytes, err := parser.reader.ReadSlice('\n') + if err != nil { + return Read{}, err + } + line := strings.TrimSpace(string(lineBytes)) + + values := strings.Split(string(line), "\t") + // Reads have started. + // Once we have the read headers, start to parse the actual reads + var newRead Read + for valueIndex := 0; valueIndex < len(values); valueIndex++ { + fieldValue := parser.headerMap[valueIndex] + if values[valueIndex] == "." { + continue + } + switch fieldValue { + case "read_id": + newRead.ReadID = values[valueIndex] + case "read_group": + readGroupID, err := strconv.ParseUint(values[valueIndex], 10, 32) + if err != nil { + newRead.Error = fmt.Errorf("Failed convert read_group '%s' to uint on line %d. Got Error: %w", values[valueIndex], parser.line, err) + } + newRead.ReadGroupID = uint32(readGroupID) + case "digitisation": + digitisation, err := strconv.ParseFloat(values[valueIndex], 64) + if err != nil { + newRead.Error = fmt.Errorf("Failed to convert digitisation '%s' to float on line %d. Got Error: %w", values[valueIndex], parser.line, err) + } + newRead.Digitisation = digitisation + case "offset": + offset, err := strconv.ParseFloat(values[valueIndex], 64) + if err != nil { + newRead.Error = fmt.Errorf("Failed to convert offset '%s' to float on line %d. Got Error: %w", values[valueIndex], parser.line, err) + } + newRead.Offset = offset + case "range": + nanoporeRange, err := strconv.ParseFloat(values[valueIndex], 64) + if err != nil { + newRead.Error = fmt.Errorf("Failed to convert range '%s' to float on line %d. Got Error: %w", values[valueIndex], parser.line, err) + } + newRead.Range = nanoporeRange + case "sampling_rate": + samplingRate, err := strconv.ParseFloat(values[valueIndex], 64) + if err != nil { + newRead.Error = fmt.Errorf("Failed to convert sampling_rate '%s' to float on line %d. Got Error: %w", values[valueIndex], parser.line, err) + } + newRead.SamplingRate = samplingRate + case "len_raw_signal": + lenRawSignal, err := strconv.ParseUint(values[valueIndex], 10, 64) + if err != nil { + newRead.Error = fmt.Errorf("Failed to convert len_raw_signal '%s' to float on line %d. Got Error: %w", values[valueIndex], parser.line, err) + } + newRead.LenRawSignal = lenRawSignal + case "raw_signal": + var rawSignals []int16 + for rawSignalIndex, rawSignalString := range strings.Split(values[valueIndex], ",") { + rawSignal, err := strconv.ParseInt(rawSignalString, 10, 16) + if err != nil { + newRead.Error = fmt.Errorf("Failed to convert raw signal '%s' to int on line %d, signal index %d. Got Error: %w", rawSignalString, parser.line, rawSignalIndex, err) + } + rawSignals = append(rawSignals, int16(rawSignal)) + } + newRead.RawSignal = rawSignals + case "start_time": + startTime, err := strconv.ParseUint(values[valueIndex], 10, 64) + if err != nil { + newRead.Error = fmt.Errorf("Failed to convert start_time '%s' to uint on line %d. Got Error: %w", values[valueIndex], parser.line, err) + } + newRead.StartTime = startTime + case "read_number": + readNumber, err := strconv.ParseInt(values[valueIndex], 10, 32) + if err != nil { + newRead.Error = fmt.Errorf("Failed to convert read_number '%s' to int on line %d. Got Error: %w", values[valueIndex], parser.line, err) + } + newRead.ReadNumber = int32(readNumber) + case "start_mux": + startMux, err := strconv.ParseUint(values[valueIndex], 10, 8) + if err != nil { + newRead.Error = fmt.Errorf("Failed to convert start_mux '%s' to uint on line %d. Got Error: %w", values[valueIndex], parser.line, err) + } + newRead.StartMux = uint8(startMux) + case "median_before": + medianBefore, err := strconv.ParseFloat(values[valueIndex], 64) + if err != nil { + newRead.Error = fmt.Errorf("Failed to convert median_before '%s' to float on line %d. Got Error: %w", values[valueIndex], parser.line, err) + } + newRead.MedianBefore = medianBefore + case "end_reason": + endReasonIndex, err := strconv.ParseInt(values[valueIndex], 10, 64) + if err != nil { + newRead.Error = fmt.Errorf("Failed to convert end_reason '%s' to int on line %d. Got Error: %w", values[valueIndex], parser.line, err) + } + if _, ok := parser.endReasonMap[int(endReasonIndex)]; !ok { + newRead.Error = fmt.Errorf("End reason out of range. Got '%d' on line %d. Cannot find valid enum reason", int(endReasonIndex), parser.line) + } + newRead.EndReason = parser.endReasonMap[int(endReasonIndex)] + case "channel_number": + // For whatever reason, this is a string. + newRead.ChannelNumber = values[valueIndex] + default: + newRead.Error = fmt.Errorf("Unknown field to parser '%s' found on line %d. Please report to github.com/TimothyStiles/poly", fieldValue, parser.line) + } + } + return newRead, nil +} + +/****************************************************************************** +March 26, 2023 + +Start of Write functions + +Slow5 write takes in a header, a channel of reads, and an io.Writer output. +The intended use case of slow5 write is reading from a location, such as a +database, and then directly writing the output to somewhere (stdout, a file, +etc). + +A channel is used here so that reading and writing of slow5 can be done +concurrently. In almost all cases, you do not want to have slow5 files entirely +in memory, because they're freakin' huge. + +Cheers, + +Keoni + +******************************************************************************/ + +// Write writes a list of headers and a channel of reads to an output. +func Write(headers []Header, reads <-chan Read, output io.Writer) error { + // First, write the slow5 version number + slow5Version := headers[0].Slow5Version + endReasonHeaderMap := headers[0].EndReasonHeaderMap + _, err := fmt.Fprintf(output, "#slow5_version\t%s\n", slow5Version) + if err != nil { + return err + } + // Then, write the number of read groups (ie, the number of headers) + _, err = fmt.Fprintf(output, "#num_read_groups\t%d\n", len(headers)) + if err != nil { + return err + } + // Next, we need a map of what attribute values are available + possibleAttributeKeys := make(map[string]bool) + for _, header := range headers { + for key := range header.Attributes { + possibleAttributeKeys[key] = true + } + } + // Now that we know what attribute values are possible, lets build a map + // with those values with "." placeholders (as defined in slow5 spec) + headerAttributes := make(map[string][]string) + for key := range possibleAttributeKeys { + newBlankAttributes := make([]string, len(headers)) + for blankAttributeIndex := range newBlankAttributes { + newBlankAttributes[blankAttributeIndex] = "." + } + headerAttributes[key] = newBlankAttributes + } + // Build a list with all header values + var headerAttributeStrings []string + for headerIndex, header := range headers { + for key, value := range header.Attributes { + headerAttributes[key][headerIndex] = value + } + } + for key, value := range headerAttributes { + var attributeRow strings.Builder + attributeRow.Write([]byte(key)) + for _, attributeValue := range value { + attributeRow.Write([]byte("\t")) + attributeRow.Write([]byte(attributeValue)) + } + headerAttributeStrings = append(headerAttributeStrings, attributeRow.String()) + } + // Sort the header strings + sort.Strings(headerAttributeStrings) + + // Write the header attribute strings to the output + for _, headerAttributeString := range headerAttributeStrings { + _, err = fmt.Fprintf(output, "%s\n", headerAttributeString) + if err != nil { + return err + } + } + + // Now we handle endReasons / failureReasons. In the slow5 spec, these are + // enums depending on what is present in the FAST5 files. This means the + // labels may not be consistent and there is no exhaustive list of enum + // labels. So, we have to create this from the endReasonMap each time we + // write the slow5 file (not a const!) + // Invert the endReasonMap + endReasonStringList := make([]string, len(endReasonHeaderMap)) + for endReasonString, endReasonIndex := range endReasonHeaderMap { + endReasonStringList[endReasonIndex] = endReasonString + } + // Build endReasonString + var endReasonStringBuilder strings.Builder + for _, endReason := range endReasonStringList { + fmt.Fprintf(&endReasonStringBuilder, "%s,", endReason) + } + endReasonString := endReasonStringBuilder.String() + endReasonString = endReasonString[:len(endReasonString)-1] // Remove trailing comma + + // Write the read headers + // These are according to the slow5 specifications + _, err = fmt.Fprintf(output, "#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{%s} char*\n", endReasonString) + if err != nil { + return err + } + _, err = fmt.Fprintln(output, "#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number") + if err != nil { + return err + } + + // Iterate over reads. This is reading from a channel, and will end + // when the channel is closed. + for read := range reads { + // converts []int16 to string + var rawSignalStringBuilder strings.Builder + for signalIndex, signal := range read.RawSignal { + _, err = fmt.Fprint(&rawSignalStringBuilder, signal) + if err != nil { + return err + } + if signalIndex != len(read.RawSignal)-1 { // Don't add a comma to last number + _, err = fmt.Fprint(&rawSignalStringBuilder, ",") + if err != nil { + return err + } + } + } + // Look at above output.Write("#read_id ... for the values here. + _, err = fmt.Fprintf(output, "%s\t%d\t%g\t%g\t%g\t%g\t%d\t%s\t%d\t%d\t%d\t%g\t%d\t%s\n", read.ReadID, read.ReadGroupID, read.Digitisation, read.Offset, read.Range, read.SamplingRate, read.LenRawSignal, rawSignalStringBuilder.String(), read.StartTime, read.ReadNumber, read.StartMux, read.MedianBefore, endReasonHeaderMap[read.EndReason], read.ChannelNumber) + if err != nil { + return err + } + } + return nil +} diff --git a/io/slow5/slow5_test.go b/io/slow5/slow5_test.go new file mode 100644 index 00000000..65027bfc --- /dev/null +++ b/io/slow5/slow5_test.go @@ -0,0 +1,206 @@ +package slow5 + +import ( + "errors" + "io" + "io/ioutil" + "os" + "testing" +) + +const maxLineSize = 2 * 32 * 1024 + +func TestParse(t *testing.T) { + file, err := os.Open("data/example.slow5") + if err != nil { + t.Errorf("Failed to open example.slow5: %s", err) + } + parser, headers, err := NewParser(file, maxLineSize) + if err != nil { + t.Errorf("Failed to parse headers of file: %s", err) + } + // Test headers + if len(headers) != 1 { + t.Errorf("There should only be 1 read group. Got: %d", len(headers)) + } + if headers[0].Attributes["@asic_id"] != "4175987214" { + t.Errorf("Expected AsicId 4175987214. Got: %s", headers[0].Attributes["asic_id"]) + } + + // Test reads + var outputReads []Read + for { + read, err := parser.ParseNext() + if err != nil { + if !errors.Is(err, io.EOF) { + t.Errorf("Got unknown error: %s", err) + } + break + } + outputReads = append(outputReads, read) + } + if outputReads[0].RawSignal[0] != 430 { + t.Errorf("Expected first outputRead to have a raw_signal of 430. Got: %d", outputReads[0].RawSignal[0]) + } +} + +func TestParseImproperHeaders(t *testing.T) { + // Improper files + file, err := os.Open("data/header_tests/test_header_without_tabs.slow5") + if err != nil { + t.Errorf("Failed to open file with error: %s", err) + } + _, _, err = NewParser(file, maxLineSize) + if err == nil { + t.Errorf("Test should have failed if header line doesn't have any tabs") + } + + file, err = os.Open("data/header_tests/test_header_numReadGroups_bad.slow5") + if err != nil { + t.Errorf("Failed to open file with error: %s", err) + } + _, _, err = NewParser(file, maxLineSize) + if err == nil { + t.Errorf("Test should have failed if numReadGroup can't be converted to an int") + } + + file, err = os.Open("data/header_tests/test_header_not_enough_attributes.slow5") + if err != nil { + t.Errorf("Failed to open file with error: %s", err) + } + _, _, err = NewParser(file, maxLineSize) + if err == nil { + t.Errorf("Test should have failed if the header doesn't have enough attributes for numReadGroup") + } + + file, err = os.Open("data/header_tests/test_header_empty.slow5") + if err != nil { + t.Errorf("Failed to open file with error: %s", err) + } + _, _, err = NewParser(file, maxLineSize) + if err == nil { + t.Errorf("Test should have failed if the file is empty") + } +} + +func testParseReadsHelper(t *testing.T, fileTarget string, errorMessage string) { + file, err := os.Open(fileTarget) + if err != nil { + t.Errorf("Failed to open file with error: %s", err) + } + parser, _, _ := NewParser(file, maxLineSize) + var targetErr []error + for { + read, err := parser.ParseNext() + if err != nil { + if !errors.Is(err, io.EOF) { + t.Errorf("Got unknown error: %s", err) + } + break + } + err = read.Error + if err != nil { + targetErr = append(targetErr, err) + } + } + if len(targetErr) == 0 { + t.Errorf(errorMessage) + } +} + +func TestParseReads(t *testing.T) { + file, err := os.Open("data/example.slow5") + if err != nil { + t.Errorf("Failed to open file with error: %s", err) + } + parser, _, _ := NewParser(file, maxLineSize) + + var outputReads []Read + for { + read, err := parser.ParseNext() + if err != nil { + if !errors.Is(err, io.EOF) { + t.Errorf("Got unknown error: %s", err) + } + break + } + err = read.Error + if err != nil { + t.Errorf("Failed ParseReads with error: %s", err) + } + outputReads = append(outputReads, read) + } + if outputReads[0].ReadID != "0026631e-33a3-49ab-aa22-3ab157d71f8b" { + t.Errorf("First read id should be 0026631e-33a3-49ab-aa22-3ab157d71f8b. Got: %s", outputReads[0].ReadID) + } + + // Test improper files + testParseReadsHelper(t, "data/read_tests/endReason.slow5", "Test should have failed if there are unknown end reasons") + testParseReadsHelper(t, "data/read_tests/continue.slow5", "Test should have failed at terminate, but should have gone through a continue") + testParseReadsHelper(t, "data/read_tests/read_group.slow5", "Test should have failed with bad read_group") + testParseReadsHelper(t, "data/read_tests/digitisation.slow5", "Test should have failed with bad digitisation") + testParseReadsHelper(t, "data/read_tests/offset.slow5", "Test should have failed with bad offset") + testParseReadsHelper(t, "data/read_tests/range.slow5", "Test should have failed with bad range") + testParseReadsHelper(t, "data/read_tests/sampling_rate.slow5", "Test should have failed with bad samping_rate") + testParseReadsHelper(t, "data/read_tests/len_raw_signal.slow5", "Test should have failed with bad len_raw_signal") + testParseReadsHelper(t, "data/read_tests/raw_signal.slow5", "Test should have failed with bad raw_signal") + testParseReadsHelper(t, "data/read_tests/start_time.slow5", "Test should have failed with bad start_time") + testParseReadsHelper(t, "data/read_tests/read_number.slow5", "Test should have failed with bad read_number") + testParseReadsHelper(t, "data/read_tests/start_mux.slow5", "Test should have failed with bad start_mux") + testParseReadsHelper(t, "data/read_tests/median_before.slow5", "Test should have failed with bad median_before") + testParseReadsHelper(t, "data/read_tests/end_reason.slow5", "Test should have failed with if end reason can't be converted to int") + testParseReadsHelper(t, "data/read_tests/end_reason_unknown.slow5", "Test should have failed with end reason out of range") + testParseReadsHelper(t, "data/read_tests/unknown.slow5", "Test should have failed with unknown header") +} + +func TestWrite(t *testing.T) { + file, err := os.Open("data/example.slow5") + if err != nil { + t.Errorf("Failed to open file with error: %s", err) + } + // Parse headers + parser, headers, err := NewParser(file, maxLineSize) + if err != nil { + t.Errorf("Failed to parse headers with error: %s", err) + } + // Create empty file + testFile, err := os.Create("data/test_write.slow5") + if err != nil { + t.Errorf("Failed to create temporary file") + } + + // Send parsed reads into this channel + reads := make(chan Read) + go func() { + for { + read, err := parser.ParseNext() + if err != nil { + // Break at EOF + break + } + reads <- read + } + close(reads) + }() + + // Write + err = Write(headers, reads, testFile) + if err != nil { + t.Errorf("Failed to write slow5 file. Got error: %s", err) + } + + // Compare both files + example, err := ioutil.ReadFile("data/example.slow5") + if err != nil { + t.Errorf("Failed to read example file: %s", err) + } + testWrite, err := ioutil.ReadFile("data/test_write.slow5") + if err != nil { + t.Errorf("Failed to read test file: %s", err) + } + os.Remove("data/test_write.slow5") + + if string(example) != string(testWrite) { + t.Errorf("Example and test write are different") + } +}