From 79aeceeab4979b0e6ed07722cc218fd2b7b4acf6 Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Fri, 1 Mar 2024 14:48:23 -0800 Subject: [PATCH] Update to revised JSON parsing. Use martian for lazy JSON reading. --- .github/workflows/test.yaml | 2 +- Cargo.lock | 277 ++++++++++++++++---- Cargo.toml | 3 +- enclone_exec/Cargo.toml | 1 + enclone_exec/tests/enclone_test1.rs | 156 +++++------ enclone_main/Cargo.toml | 2 + enclone_main/src/determine_ref.rs | 36 ++- enclone_main/src/subset.rs | 119 ++++----- enclone_tools/Cargo.toml | 2 + enclone_tools/src/bin/filtered_from_json.rs | 111 ++++---- 10 files changed, 412 insertions(+), 297 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 56b7888247..e97c3dbcbe 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -7,7 +7,7 @@ on: - master env: - RUST_VERSION: "1.71.0" + RUST_VERSION: "1.74.0" CARGO_INCREMENTAL: 0 jobs: diff --git a/Cargo.lock b/Cargo.lock index 220d321a23..1389e52855 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -35,7 +35,7 @@ dependencies = [ [[package]] name = "align_tools" version = "0.1.12" -source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#fb77ad2f8c19f42dac327e7bbc0c072d71a54adc" +source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#51f755f643f32551eb2921d07b2c17e61f3c1e94" dependencies = [ "bio_edit", "debruijn", @@ -47,7 +47,7 @@ dependencies = [ [[package]] name = "amino" version = "0.1.7" -source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#fb77ad2f8c19f42dac327e7bbc0c072d71a54adc" +source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#51f755f643f32551eb2921d07b2c17e61f3c1e94" dependencies = [ "debruijn", "string_utils", @@ -56,7 +56,7 @@ dependencies = [ [[package]] name = "ansi_escape" version = "0.1.3" -source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#fb77ad2f8c19f42dac327e7bbc0c072d71a54adc" +source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#51f755f643f32551eb2921d07b2c17e61f3c1e94" dependencies = [ "string_utils", "vector_utils", @@ -67,6 +67,9 @@ name = "anyhow" version = "1.0.57" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08f9b8508dccb7687a1d6c4ce66b2b0ecef467c94667de27d8d7fe1f8d2a9cdc" +dependencies = [ + "backtrace", +] [[package]] name = "approx" @@ -224,7 +227,7 @@ dependencies = [ [[package]] name = "bio_edit" version = "0.1.1" -source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#fb77ad2f8c19f42dac327e7bbc0c072d71a54adc" +source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#51f755f643f32551eb2921d07b2c17e61f3c1e94" dependencies = [ "bio-types", "bit-set", @@ -557,6 +560,15 @@ dependencies = [ "adler32", ] +[[package]] +name = "deranged" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" +dependencies = [ + "powerfmt", +] + [[package]] name = "derive-new" version = "0.5.9" @@ -565,7 +577,7 @@ checksum = "3418329ca0ad70234b9735dc4ceed10af4df60eff9c8e7b06cb5e520d92c3535" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.92", ] [[package]] @@ -601,7 +613,7 @@ dependencies = [ [[package]] name = "dna" version = "0.1.3" -source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#fb77ad2f8c19f42dac327e7bbc0c072d71a54adc" +source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#51f755f643f32551eb2921d07b2c17e61f3c1e94" [[package]] name = "edit-distance" @@ -618,7 +630,7 @@ checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" [[package]] name = "enclone" version = "0.5.219" -source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#fb77ad2f8c19f42dac327e7bbc0c072d71a54adc" +source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#51f755f643f32551eb2921d07b2c17e61f3c1e94" dependencies = [ "amino", "ansi_escape", @@ -645,7 +657,7 @@ dependencies = [ [[package]] name = "enclone_args" version = "0.5.219" -source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#fb77ad2f8c19f42dac327e7bbc0c072d71a54adc" +source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#51f755f643f32551eb2921d07b2c17e61f3c1e94" dependencies = [ "debruijn", "enclone_core", @@ -655,12 +667,14 @@ dependencies = [ "hdf5", "io_utils", "itertools", + "martian-filetypes", "rand", "rayon", "regex", "serde_json", "string_utils", "vdj_ann", + "vdj_types", "vector_utils", ] @@ -678,7 +692,7 @@ dependencies = [ [[package]] name = "enclone_core" version = "0.5.219" -source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#fb77ad2f8c19f42dac327e7bbc0c072d71a54adc" +source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#51f755f643f32551eb2921d07b2c17e61f3c1e94" dependencies = [ "amino", "ansi_escape", @@ -748,6 +762,7 @@ dependencies = [ "perf_stats", "pretty_trace", "rayon", + "serde", "serde_json", "sha2", "stats_utils", @@ -790,9 +805,11 @@ dependencies = [ "hdf5", "io_utils", "itertools", + "martian-filetypes", "perf_stats", "pretty_trace", "rayon", + "serde", "serde_json", "stats_utils", "string_utils", @@ -829,7 +846,7 @@ dependencies = [ [[package]] name = "enclone_print" version = "0.5.219" -source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#fb77ad2f8c19f42dac327e7bbc0c072d71a54adc" +source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#51f755f643f32551eb2921d07b2c17e61f3c1e94" dependencies = [ "amino", "ansi_escape", @@ -861,7 +878,7 @@ dependencies = [ [[package]] name = "enclone_proto" version = "0.5.219" -source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#fb77ad2f8c19f42dac327e7bbc0c072d71a54adc" +source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#51f755f643f32551eb2921d07b2c17e61f3c1e94" dependencies = [ "bio_edit", "byteorder", @@ -874,7 +891,7 @@ dependencies = [ [[package]] name = "enclone_ranger" version = "0.5.219" -source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#fb77ad2f8c19f42dac327e7bbc0c072d71a54adc" +source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#51f755f643f32551eb2921d07b2c17e61f3c1e94" dependencies = [ "enclone", "enclone_args", @@ -890,7 +907,7 @@ dependencies = [ [[package]] name = "enclone_stuff" version = "0.5.219" -source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#fb77ad2f8c19f42dac327e7bbc0c072d71a54adc" +source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#51f755f643f32551eb2921d07b2c17e61f3c1e94" dependencies = [ "amino", "debruijn", @@ -983,6 +1000,7 @@ dependencies = [ "pager", "pretty_trace", "rayon", + "serde", "serde_json", "statrs", "stats_utils", @@ -990,13 +1008,14 @@ dependencies = [ "tables", "vdj_ann", "vdj_ann_ref", + "vdj_types", "vector_utils", ] [[package]] name = "enclone_vars" version = "0.5.219" -source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#fb77ad2f8c19f42dac327e7bbc0c072d71a54adc" +source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#51f755f643f32551eb2921d07b2c17e61f3c1e94" dependencies = [ "io_utils", "itertools", @@ -1030,13 +1049,13 @@ checksum = "84278eae0af6e34ff6c1db44c11634a694aafac559ff3080e4db4e4ac35907aa" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.92", ] [[package]] name = "equiv" version = "0.1.3" -source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#fb77ad2f8c19f42dac327e7bbc0c072d71a54adc" +source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#51f755f643f32551eb2921d07b2c17e61f3c1e94" [[package]] name = "errno" @@ -1068,7 +1087,7 @@ checksum = "1d4fd7bd9e32c1205549decf6f36772d7b606a579b26afaffa335ae148151a5d" [[package]] name = "exons" version = "0.1.5" -source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#fb77ad2f8c19f42dac327e7bbc0c072d71a54adc" +source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#51f755f643f32551eb2921d07b2c17e61f3c1e94" dependencies = [ "io_utils", "string_utils", @@ -1078,7 +1097,7 @@ dependencies = [ [[package]] name = "expr_tools" version = "0.1.3" -source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#fb77ad2f8c19f42dac327e7bbc0c072d71a54adc" +source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#51f755f643f32551eb2921d07b2c17e61f3c1e94" dependencies = [ "evalexpr", "statrs", @@ -1089,7 +1108,7 @@ dependencies = [ [[package]] name = "fasta_tools" version = "0.1.8" -source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#fb77ad2f8c19f42dac327e7bbc0c072d71a54adc" +source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#51f755f643f32551eb2921d07b2c17e61f3c1e94" dependencies = [ "debruijn", "flate2", @@ -1112,6 +1131,15 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "835a3dc7d1ec9e75e2b5fb4ba75396837112d2060b03f7d43bc1897c7f7211da" +[[package]] +name = "fern" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9f0c14694cbd524c8720dd69b0e3179344f04ebb5f90f2e4a440c6ea3b2f1ee" +dependencies = [ + "log", +] + [[package]] name = "filetime" version = "0.2.16" @@ -1247,7 +1275,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn", + "syn 1.0.92", ] [[package]] @@ -1259,7 +1287,7 @@ checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0" [[package]] name = "graph_simple" version = "0.1.5" -source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#fb77ad2f8c19f42dac327e7bbc0c072d71a54adc" +source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#51f755f643f32551eb2921d07b2c17e61f3c1e94" dependencies = [ "petgraph", "vector_utils", @@ -1296,7 +1324,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn", + "syn 1.0.92", ] [[package]] @@ -1304,7 +1332,7 @@ name = "hdf5-sys" version = "0.8.1" source = "git+https://github.com/10XGenomics/hdf5-rust.git?branch=conda_nov2021#2d4a40b7ef75de530bc53fd2eb0fe75047e083ad" dependencies = [ - "attohttpc 0.18.0", + "attohttpc 0.19.1", "bzip2", "libc", "libloading", @@ -1366,7 +1394,7 @@ dependencies = [ [[package]] name = "hyperbase" version = "0.1.8" -source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#fb77ad2f8c19f42dac327e7bbc0c072d71a54adc" +source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#51f755f643f32551eb2921d07b2c17e61f3c1e94" dependencies = [ "debruijn", "equiv", @@ -1409,7 +1437,7 @@ dependencies = [ [[package]] name = "io_utils" version = "0.3.2" -source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#fb77ad2f8c19f42dac327e7bbc0c072d71a54adc" +source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#51f755f643f32551eb2921d07b2c17e61f3c1e94" dependencies = [ "bincode", "flate2", @@ -1475,7 +1503,7 @@ dependencies = [ [[package]] name = "kmer_lookup" version = "0.1.5" -source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#fb77ad2f8c19f42dac327e7bbc0c072d71a54adc" +source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#51f755f643f32551eb2921d07b2c17e61f3c1e94" dependencies = [ "debruijn", "rayon", @@ -1564,6 +1592,52 @@ dependencies = [ "libc", ] +[[package]] +name = "martian" +version = "0.26.0" +source = "git+https://github.com/martian-lang/martian-rust?branch=master#345490b52d2722fe30b042d78dcd601225aaee21" +dependencies = [ + "anyhow", + "backtrace", + "fern", + "heck 0.4.0", + "log", + "rustc_version 0.4.0", + "serde", + "serde_json", + "tempfile", + "time", +] + +[[package]] +name = "martian-derive" +version = "0.26.0" +source = "git+https://github.com/martian-lang/martian-rust?branch=master#345490b52d2722fe30b042d78dcd601225aaee21" +dependencies = [ + "martian", + "proc-macro2", + "quote", + "serde", + "syn 2.0.52", +] + +[[package]] +name = "martian-filetypes" +version = "0.27.0" +source = "git+https://github.com/martian-lang/martian-rust?branch=master#345490b52d2722fe30b042d78dcd601225aaee21" +dependencies = [ + "anyhow", + "bincode", + "csv", + "flate2", + "lz4", + "martian", + "martian-derive", + "serde", + "serde_json", + "zstd", +] + [[package]] name = "matches" version = "0.1.9" @@ -1656,7 +1730,7 @@ checksum = "01fcc0b8149b4632adc89ac3b7b31a12fb6099a0317a4eb2ebff574ef7de7218" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.92", ] [[package]] @@ -1678,7 +1752,7 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac8cd24d9f185bb7223958d8c1ff7a961b74b1953fd05dba7cc568a63b3861ec" dependencies = [ - "rustc_version", + "rustc_version 0.1.7", ] [[package]] @@ -1690,6 +1764,7 @@ dependencies = [ "bitflags 1.3.2", "cfg-if", "libc", + "memoffset", ] [[package]] @@ -1712,6 +1787,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + [[package]] name = "num-integer" version = "0.1.45" @@ -1753,6 +1834,15 @@ dependencies = [ "libc", ] +[[package]] +name = "num_threads" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9" +dependencies = [ + "libc", +] + [[package]] name = "object" version = "0.32.1" @@ -1808,7 +1898,7 @@ dependencies = [ "find-crate", "proc-macro2", "quote", - "syn", + "syn 1.0.92", ] [[package]] @@ -1849,7 +1939,7 @@ checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" [[package]] name = "perf_stats" version = "0.1.8" -source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#fb77ad2f8c19f42dac327e7bbc0c072d71a54adc" +source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#51f755f643f32551eb2921d07b2c17e61f3c1e94" dependencies = [ "io_utils", "libc", @@ -1904,7 +1994,7 @@ dependencies = [ "proc-macro-hack", "proc-macro2", "quote", - "syn", + "syn 1.0.92", ] [[package]] @@ -1974,6 +2064,12 @@ dependencies = [ "miniz_oxide 0.5.1", ] +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "pprof" version = "0.9.1" @@ -2006,13 +2102,13 @@ checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" [[package]] name = "pretty_trace" version = "0.5.24" -source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#fb77ad2f8c19f42dac327e7bbc0c072d71a54adc" +source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#51f755f643f32551eb2921d07b2c17e61f3c1e94" dependencies = [ "backtrace", "io_utils", "lazy_static", "libc", - "nix 0.27.1", + "nix 0.24.2", "pprof", "stats_utils", "string_utils", @@ -2029,7 +2125,7 @@ dependencies = [ "proc-macro-error-attr", "proc-macro2", "quote", - "syn", + "syn 1.0.92", "version_check", ] @@ -2052,11 +2148,11 @@ checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" [[package]] name = "proc-macro2" -version = "1.0.37" +version = "1.0.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec757218438d5fda206afc041538b2f6d889286160d649a86a24d37e1235afd1" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" dependencies = [ - "unicode-xid", + "unicode-ident", ] [[package]] @@ -2101,7 +2197,7 @@ dependencies = [ "itertools", "proc-macro2", "quote", - "syn", + "syn 1.0.92", ] [[package]] @@ -2121,9 +2217,9 @@ source = "git+https://github.com/Barandis/qd#0fb276d70346f11f4b2a5b30568d8a26d0d [[package]] name = "quote" -version = "1.0.18" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" dependencies = [ "proc-macro2", ] @@ -2316,7 +2412,16 @@ version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c5f5376ea5e30ce23c03eb77cbe4962b988deead10910c372b226388b594c084" dependencies = [ - "semver", + "semver 0.1.20", +] + +[[package]] +name = "rustc_version" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +dependencies = [ + "semver 1.0.22", ] [[package]] @@ -2390,24 +2495,30 @@ version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4f410fedcf71af0345d7607d246e7ad15faaadd49d240ee3b24e5dc21a820ac" +[[package]] +name = "semver" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca" + [[package]] name = "serde" -version = "1.0.137" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61ea8d54c77f8315140a05f4c7237403bf38b72704d031543aa1d16abbf517d1" +checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.137" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f26faba0c3959972377d3b2d306ee9f71faee9714294e41bb777f83f88578be" +checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.52", ] [[package]] @@ -2493,12 +2604,12 @@ dependencies = [ [[package]] name = "stats_utils" version = "0.1.3" -source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#fb77ad2f8c19f42dac327e7bbc0c072d71a54adc" +source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#51f755f643f32551eb2921d07b2c17e61f3c1e94" [[package]] name = "string_utils" version = "0.1.4" -source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#fb77ad2f8c19f42dac327e7bbc0c072d71a54adc" +source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#51f755f643f32551eb2921d07b2c17e61f3c1e94" dependencies = [ "vector_utils", ] @@ -2519,7 +2630,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn", + "syn 1.0.92", ] [[package]] @@ -2571,10 +2682,21 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "syn" +version = "2.0.52" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b699d15b36d1f02c3e7c69f8ffef53de37aefae075d8488d4ba1a7788d574a07" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "tables" version = "0.1.5" -source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#fb77ad2f8c19f42dac327e7bbc0c072d71a54adc" +source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#51f755f643f32551eb2921d07b2c17e61f3c1e94" dependencies = [ "io_utils", "itertools", @@ -2623,7 +2745,7 @@ checksum = "0396bc89e626244658bef819e22d0cc459e795a5ebe878e6ec336d1674a8d79a" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.92", ] [[package]] @@ -2635,6 +2757,39 @@ dependencies = [ "libc", ] +[[package]] +name = "time" +version = "0.3.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8248b6521bb14bc45b4067159b9b6ad792e2d6d754d6c41fb50e29fefe38749" +dependencies = [ + "deranged", + "itoa 1.0.1", + "libc", + "num-conv", + "num_threads", + "powerfmt", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" + +[[package]] +name = "time-macros" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ba3a3ef41e6672a2f0f001392bb5dcd3ff0a9992d618ca761a11c3121547774" +dependencies = [ + "num-conv", + "time-core", +] + [[package]] name = "tiny-skia" version = "0.6.3" @@ -2685,7 +2840,7 @@ checksum = "b557f72f448c511a979e2564e55d74e6c4432fc96ff4f6241bc6bded342643b7" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.92", ] [[package]] @@ -2739,6 +2894,12 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07547e3ee45e28326cc23faac56d44f58f16ab23e413db526debce3b0bfd2742" +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + [[package]] name = "unicode-normalization" version = "0.1.19" @@ -2826,7 +2987,7 @@ checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" [[package]] name = "vdj_ann" version = "0.4.4" -source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#fb77ad2f8c19f42dac327e7bbc0c072d71a54adc" +source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#51f755f643f32551eb2921d07b2c17e61f3c1e94" dependencies = [ "align_tools", "amino", @@ -2848,7 +3009,7 @@ dependencies = [ [[package]] name = "vdj_ann_ref" version = "0.2.1" -source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#fb77ad2f8c19f42dac327e7bbc0c072d71a54adc" +source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#51f755f643f32551eb2921d07b2c17e61f3c1e94" dependencies = [ "debruijn", "exons", @@ -2866,7 +3027,7 @@ dependencies = [ [[package]] name = "vdj_types" version = "0.2.0" -source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#fb77ad2f8c19f42dac327e7bbc0c072d71a54adc" +source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#51f755f643f32551eb2921d07b2c17e61f3c1e94" dependencies = [ "serde", ] @@ -2883,7 +3044,7 @@ dependencies = [ [[package]] name = "vector_utils" version = "0.1.5" -source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#fb77ad2f8c19f42dac327e7bbc0c072d71a54adc" +source = "git+https://github.com/10XGenomics/enclone_ranger?branch=main#51f755f643f32551eb2921d07b2c17e61f3c1e94" dependencies = [ "permutation", "superslice", @@ -2922,7 +3083,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn", + "syn 1.0.92", "wasm-bindgen-shared", ] @@ -2944,7 +3105,7 @@ checksum = "99ec0dc7a4756fffc231aab1b9f2f578d23cd391390ab27f952ae0c9b3ece20b" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.92", "wasm-bindgen-backend", "wasm-bindgen-shared", ] diff --git a/Cargo.toml b/Cargo.toml index 11b1be3003..bac54e4a15 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -125,6 +125,7 @@ libc = "0.2" log = "0.4" lz4 = "1" mach = "0.3" +martian-filetypes = { git = "https://github.com/martian-lang/martian-rust", branch = "master" } ndarray = "0.15" nix = { version = "0.27", features = ["signal"] } num-traits = "0.2" @@ -169,7 +170,7 @@ tonic-build = { version = "0.6", default-features = false, features = ["transpor triple_accel = "0.4" users = "0.11" usvg = { version = "0.19", features = ["text"] } -# vdj_ann = "0.4" +vdj_types = { git = "https://github.com/10XGenomics/enclone_ranger", branch = "main" } vdj_ann = { git = "https://github.com/10XGenomics/enclone_ranger", branch = "main" } vdj_ann_ref = { git = "https://github.com/10XGenomics/enclone_ranger", branch = "main" } vector_utils = { git = "https://github.com/10XGenomics/enclone_ranger", branch = "main" } diff --git a/enclone_exec/Cargo.toml b/enclone_exec/Cargo.toml index e53d11da44..6409ffe609 100644 --- a/enclone_exec/Cargo.toml +++ b/enclone_exec/Cargo.toml @@ -49,6 +49,7 @@ fs_extra.workspace = true itertools.workspace = true perf_stats.workspace = true rayon.workspace = true +serde.workspace = true serde_json.workspace = true sha2.workspace = true stats_utils.workspace = true diff --git a/enclone_exec/tests/enclone_test1.rs b/enclone_exec/tests/enclone_test1.rs index 0461a783f9..fc07b7ea39 100644 --- a/enclone_exec/tests/enclone_test1.rs +++ b/enclone_exec/tests/enclone_test1.rs @@ -606,6 +606,17 @@ fn test_cpu() { #[cfg(not(feature = "cpu"))] #[test] fn test_licenses() { + use serde::Deserialize; + + #[derive(Clone, Debug, Deserialize)] + pub struct DependencyDetails { + pub name: String, + pub repository: Option, + pub license: Option, + pub license_file: Option, + pub description: Option, + } + const ACCEPTABLE_LICENSE_TYPES: [&str; 9] = [ "MIT", "ISC", "Zlib", "WTFPL", "MPL-2.0", "CC0-1.0", "BSL-1.0", "0BSD", "OFL-1.1", ]; @@ -647,101 +658,72 @@ fn test_licenses() { ); panic!("failed"); } - let lic = &new.unwrap().stdout; - let mut f = &lic[..]; - let mut fails = Vec::::new(); - loop { - match read_vector_entry_from_json(&mut f).unwrap() { - None => break, - Some(x) => { - let v: Value = serde_json::from_str(strme(&x)).unwrap(); - let package = v["name"].to_string().between("\"", "\"").to_string(); - let version = v["version"].to_string().between("\"", "\"").to_string(); - let mut license = String::new(); - if v.get("license").is_some() { - license = v["license"].to_string(); - if license.contains('"') { - license = license.between("\"", "\"").to_string(); - } - } - let mut license_file = String::new(); - if v.get("license_file").is_some() { - license_file = v["license_file"].to_string(); - if license_file.contains('"') { - license_file = license_file.between("\"", "\"").to_string(); - } - } - if license == "null" && license_file == "null" { - continue; + let dep_details: Vec = serde_json::from_slice(&new.unwrap().stdout).unwrap(); + + let fails: Vec<_> = dep_details + .into_iter() + .filter_map(|dep| { + let Some(license) = dep.license else { + return None; + }; + + let package = &dep.name; + + if package.starts_with("enclone") { + return None; + } + for y in ACCEPTABLE_10X_PACKAGES.iter() { + if package == *y { + return None; } - let mut repo = String::new(); - if v.get("repository").is_some() { - repo = v["repository"].to_string(); - if repo.contains('"') { - repo = repo.between("\"", "\"").to_string(); - } + } + for y in ACCEPTABLE_OTHER_PACKAGES.iter() { + if package == *y { + return None; } - let mut ok = false; - if package.starts_with("enclone") { - ok = true; + } + for y in ACCEPTABLE_LICENSE_TYPES.iter() { + if license == *y { + return None; } - for y in ACCEPTABLE_10X_PACKAGES.iter() { - if package == *y { - ok = true; - } + if license.ends_with(&format!(" OR {}", y)) { + return None; } - for y in ACCEPTABLE_OTHER_PACKAGES.iter() { - if package == *y { - ok = true; - } + if license.starts_with(&format!("{} OR ", y)) { + return None; } - for y in ACCEPTABLE_LICENSE_TYPES.iter() { - if license == *y { - ok = true; - } - if license.ends_with(&format!(" OR {}", y)) { - ok = true; - } - if license.starts_with(&format!("{} OR ", y)) { - ok = true; - } + } + + let (mut x1, mut x2) = (false, false); + let repo = dep.repository.unwrap_or_default(); + if repo.starts_with("https://github.com") { + let f1 = format!("{}/blob/master/Cargo.toml", repo); + if valid_link(&f1) { + x1 = true; } - if !ok { - let (mut x1, mut x2) = (false, false); - if repo.starts_with("https://github.com") { - let f1 = format!("{}/blob/master/Cargo.toml", repo); - if valid_link(&f1) { - x1 = true; - } - let f2 = format!("{}/blob/master/NOTICE", repo); - if valid_link(&f2) { - x2 = true; - } - } - let a2 = license == A2 - || license.ends_with(&format!(" OR {}", A2)) - || license.starts_with(&format!("{} OR ", A2)); - if a2 && x1 && !x2 { - continue; - } - fails.push(format!("{}, {}, {}, {}", package, version, license, repo)); + let f2 = format!("{}/blob/master/NOTICE", repo); + if valid_link(&f2) { + x2 = true; } } - } - } - if fails.len() > 0 { - fails.sort(); - let mut msg = format!("\nLicense check failed. The following packages had problems:\n"); - for i in 0..fails.len() { - msg += &format!("{}. {}\n", i + 1, fails[i]); - } - eprintln!( - "{}\nYou may want to retry the test, since the license checks fails sporadically \ - at a low rate.\n", - msg - ); - panic!("failed"); - } + let a2 = license == A2 + || license.ends_with(&format!(" OR {}", A2)) + || license.starts_with(&format!("{} OR ", A2)); + if a2 && x1 && !x2 { + return None; + } + Some(dep.name) + }) + .sorted() + .collect(); + + assert!( + fails.is_empty(), + "\nLicense check failed. The following packages had problems:\n{}\n\ + You may want to retry the test, since the license checks fails sporadically \ + at a low rate.\n", + fails.join(", "), + ); } // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ diff --git a/enclone_main/Cargo.toml b/enclone_main/Cargo.toml index 01db8247f5..f8a3b6e8b3 100644 --- a/enclone_main/Cargo.toml +++ b/enclone_main/Cargo.toml @@ -33,9 +33,11 @@ enclone_testlist = { path = "../enclone_testlist" } expr_tools.workspace = true io_utils.workspace = true itertools.workspace = true +martian-filetypes.workspace = true perf_stats.workspace = true pretty_trace.workspace = true rayon.workspace = true +serde.workspace = true serde_json.workspace = true stats_utils.workspace = true string_utils.workspace = true diff --git a/enclone_main/src/determine_ref.rs b/enclone_main/src/determine_ref.rs index 71a68ba663..e1f836104b 100644 --- a/enclone_main/src/determine_ref.rs +++ b/enclone_main/src/determine_ref.rs @@ -3,12 +3,14 @@ // Start of code to determine the reference sequence that is to be used. use enclone_core::defs::EncloneControl; -use io_utils::{open_for_read, open_maybe_compressed, path_exists, read_vector_entry_from_json}; -use serde_json::Value; +use io_utils::{open_for_read, open_maybe_compressed, path_exists}; +use martian_filetypes::json_file::{Json, LazyJsonReader}; +use martian_filetypes::LazyRead; use std::collections::HashMap; use std::fs::File; use std::io::{BufRead, BufReader}; -use string_utils::{strme, TextUtils}; +use string_utils::TextUtils; +use vdj_ann::annotate::ContigAnnotation; use vdj_ann_ref::{ human_ref, human_ref_2_0, human_ref_3_1, human_ref_4_0, human_ref_old, mouse_ref, mouse_ref_3_1, mouse_ref_4_0, mouse_ref_old, @@ -292,28 +294,24 @@ pub fn determine_ref(ctl: &mut EncloneControl, refx: &mut String) -> Result<(), } } erase_if(&mut refhash, &to_delete); - let mut f = BufReader::new(open_maybe_compressed(&jsonx)); - 'json_entry: loop { - let x = read_vector_entry_from_json(&mut f)?; - if x.is_none() { - break; - } - let v: Value = serde_json::from_str(strme(&x.unwrap())).unwrap(); - let ann = v["annotations"].as_array(); - if ann.is_none() { + + let reader: LazyJsonReader = + LazyJsonReader::with_reader(BufReader::new(open_maybe_compressed(&jsonx))) + .map_err(|err| format!("{err:#?}"))?; + + 'json_entry: for ann in reader.into_iter() { + let ann = ann.unwrap(); + if ann.annotations.is_empty() { return Err(format!( "\nThe file\n{jsonx}\ndoes not contain annotations. To use enclone with it, \ please specify the argument BUILT_IN\nto force use of the internal \ reference and recompute annotations.\n" )); } - let ann = ann.unwrap(); - for i in 0..ann.len() { - let a = &ann[i]; - let id = a["feature"]["feature_id"].as_u64().unwrap() as usize; - let gene = a["feature"]["gene_name"].to_string(); - let gene = gene.between("\"", "\"").to_string(); - let len = a["annotation_length"].as_u64().unwrap() as usize; + for a in ann.annotations { + let id = a.feature.feature_id; + let gene = a.feature.gene_name; + let len = a.annotation_length; let mut matches = Vec::::new(); for j in 0..refhash.len() { if refhash[j].0.contains_key(&id) && refhash[j].0[&id] == (len, gene.clone()) { diff --git a/enclone_main/src/subset.rs b/enclone_main/src/subset.rs index d6fa38fbc2..25ec08b11f 100644 --- a/enclone_main/src/subset.rs +++ b/enclone_main/src/subset.rs @@ -3,80 +3,67 @@ // Process the SUBSET_JSON option. use enclone_core::defs::{EncloneControl, ExactClonotype}; -use io_utils::{ - fwrite, fwriteln, open_for_write_new, open_maybe_compressed, path_exists, - read_vector_entry_from_json, -}; -use serde_json::Value; - -use std::io::{BufReader, Write}; -use string_utils::{strme, TextUtils}; +use io_utils::{open_for_write_new, open_maybe_compressed, path_exists}; +use serde::{Deserialize, Serialize}; +use vdj_ann::annotate::ContigAnnotation; use vector_utils::{bin_member, unique_sort}; +#[derive(Serialize, Deserialize)] +struct AnnotationWithDataset<'a> { + dataset: Option<&'a str>, + #[serde(flatten)] + data: ContigAnnotation, +} + pub fn subset_json( ctl: &EncloneControl, - exact_clonotypes: &Vec, + exact_clonotypes: &[ExactClonotype], exacts: &Vec>, ann: &str, ) -> Result<(), String> { - if !ctl.gen_opt.subset_json.is_empty() { - let mut barcode_li = Vec::<(String, usize)>::new(); - for l in 0..exacts.len() { - for u in 0..exacts[l].len() { - let ex = &exact_clonotypes[exacts[l][u]]; - for j in 0..ex.clones.len() { - barcode_li.push(( - ex.clones[j][0].barcode.clone(), - ex.clones[j][0].dataset_index, - )); - } - } - } - unique_sort(&mut barcode_li); - let mut g = open_for_write_new![&ctl.gen_opt.subset_json]; - fwriteln!(g, "["); - let mut written = false; - for li in 0..ctl.origin_info.dataset_path.len() { - let json = format!("{}/{}", ctl.origin_info.dataset_path[li], ann); - let mut jsonx = json.clone(); - if !path_exists(&json) { - jsonx = format!("{}.lz4", json); - } - let mut xs = Vec::>::new(); - let mut f = BufReader::new(open_maybe_compressed(&jsonx)); - loop { - match read_vector_entry_from_json(&mut f)? { - None => break, - Some(x) => { - let v: Value = serde_json::from_str(strme(&x)).unwrap(); - let barcode = &v["barcode"].to_string().between("\"", "\"").to_string(); - if bin_member(&barcode_li, &(barcode.clone(), li)) { - let y = format!( - " {{\n \"dataset\": \"{}\",\n{}", - ctl.origin_info.dataset_id[li], - strme(&x).after(" {\n"), - ); - xs.push(y.as_bytes().to_vec()); - } - } - } - } - for j in 0..xs.len() { - if j == 0 && written { - fwriteln!(g, ","); - } - written = true; - fwrite!(g, "{}", strme(&xs[j])); - if j < xs.len() - 1 { - fwrite!(g, ","); - fwriteln!(g, ""); - } + if ctl.gen_opt.subset_json.is_empty() { + return Ok(()); + } + + let mut barcode_li = Vec::<(&str, usize)>::new(); + for l in 0..exacts.len() { + for u in 0..exacts[l].len() { + let ex = &exact_clonotypes[exacts[l][u]]; + for j in 0..ex.clones.len() { + barcode_li.push(( + ex.clones[j][0].barcode.as_str(), + ex.clones[j][0].dataset_index, + )); } } - if written { - fwriteln!(g, ""); - } - fwriteln!(g, "]"); } - Ok(()) + unique_sort(&mut barcode_li); + + let annotations: Vec<_> = + std::iter::zip(&ctl.origin_info.dataset_path, &ctl.origin_info.dataset_id) + .enumerate() + .flat_map(|(li, (ds_path, ds_id))| { + let mut json_path = format!("{}/{}", ds_path, ann); + if !path_exists(&json_path) { + json_path = format!("{}.lz4", json_path); + } + let mut contents = String::new(); + open_maybe_compressed(&json_path) + .read_to_string(&mut contents) + .unwrap(); + + serde_json::Deserializer::from_str(&contents) + .into_iter::() + .map(Result::unwrap) + .filter(|ann| bin_member(&barcode_li, &(&ann.data.barcode, li))) + .map(|ann| AnnotationWithDataset { + dataset: Some(ds_id), + data: ann.data, + }) + .collect::>() + }) + .collect(); + + serde_json::to_writer_pretty(open_for_write_new![&ctl.gen_opt.subset_json], &annotations) + .map_err(|e| e.to_string()) } diff --git a/enclone_tools/Cargo.toml b/enclone_tools/Cargo.toml index dc8908de90..3ca74a8469 100644 --- a/enclone_tools/Cargo.toml +++ b/enclone_tools/Cargo.toml @@ -39,6 +39,7 @@ itertools.workspace = true lz4.workspace = true pretty_trace.workspace = true rayon.workspace = true +serde.workspace = true serde_json.workspace = true statrs.workspace = true tables.workspace = true @@ -46,6 +47,7 @@ stats_utils.workspace = true string_utils.workspace = true vdj_ann.workspace = true vdj_ann_ref.workspace = true +vdj_types.workspace = true vector_utils.workspace = true [target.'cfg(not(windows))'.dependencies] diff --git a/enclone_tools/src/bin/filtered_from_json.rs b/enclone_tools/src/bin/filtered_from_json.rs index 3c1d40c126..2fa5ae9e0d 100644 --- a/enclone_tools/src/bin/filtered_from_json.rs +++ b/enclone_tools/src/bin/filtered_from_json.rs @@ -1,36 +1,28 @@ // Copyright (c) 2022 10X Genomics, Inc. All rights reserved. // // Make filtered_contig.fasta and filtered_contig_annotations.csv from all_contig_annotations.json. - -use io_utils::*; -use pretty_trace::PrettyTrace; -use serde_json::Value; -use std::io::{BufReader, Write}; +use io_utils::{fwriteln, open_for_write_new, open_maybe_compressed}; +use serde::{Deserialize, Serialize}; +use std::io::Write; use string_utils::*; +use vdj_ann::annotate::ContigAnnotation; +use vdj_types::VdjRegion; -fn strip(x: &str) -> String { - if x == "null" { - String::new() - } else { - x.between("\"", "\"").to_string() - } +// FIXME duplicated between here and enclone_main/src/subset.rs +#[derive(Serialize, Deserialize)] +struct AnnotationWithDataset { + dataset: Option, + #[serde(flatten)] + data: ContigAnnotation, } fn main() { - PrettyTrace::new().on(); - // Read in the json file and break it into entries. - let mut f = BufReader::new(open_maybe_compressed("all_contig_annotations.json")); - let mut xs = Vec::>::new(); - loop { - match read_vector_entry_from_json(&mut f).unwrap() { - None => break, - Some(x) => { - xs.push(x); - } - } - } + let mut contents = String::new(); + open_maybe_compressed("all_contig_annotations.json") + .read_to_string(&mut contents) + .unwrap(); // Go through the json entries. @@ -41,59 +33,48 @@ fn main() { "barcode,is_cell,contig_id,high_confidence,length,chain,v_gene,d_gene,j_gene,\ c_gene,full_length,productive,cdr3,cdr3_nt,reads,umis,raw_clonotype_id,raw_consensus_id" ); - for i in 0..xs.len() { - let v: Result = serde_json::from_str(strme(&xs[i])); - if v.is_err() { - eprintln!( - "\nInternal error, failed to parse a value from a string. The string is:\n{}\n", - strme(&xs[i]) - ); - std::process::exit(1); - } - let v = v.unwrap(); - let dataset = strip(&v["dataset"].to_string()); - let barcode = strip(&v["barcode"].to_string()); - let bc = barcode.before("-"); - let barcode = format!("{}-{}", bc, dataset); - let is_cell = v["is_cell"].as_bool().unwrap_or(false); - let mut contig_id = strip(&v["contig_name"].to_string()); + for ann in serde_json::Deserializer::from_str(&contents).into_iter::() { + let ann = ann.unwrap(); + let dataset = ann.dataset.unwrap_or("null".to_string()); + let ann = ann.data; + + let bc = ann.barcode.before("-"); + let barcode = format!("{bc}-{dataset}"); + let is_cell = ann.is_cell; + let mut contig_id = ann.contig_name; let contig = contig_id.rev_after("_"); - contig_id = format!("{}-{}_contig_{}", bc, dataset, contig); - let high_confidence = v["high_confidence"].as_bool().unwrap_or(false); - let seq = strip(&v["sequence"].to_string()); - let length = seq.len(); - let ann = v["annotations"].as_array().unwrap(); - let chain = strip(&ann[0]["feature"]["chain"].to_string()); + contig_id = format!("{bc}-{dataset}_contig_{contig}"); + + let length = ann.sequence.len(); + + let chain = ann.annotations[0].feature.chain; let mut v_gene = String::new(); let mut d_gene = String::new(); let mut j_gene = String::new(); let mut c_gene = String::new(); - for j in 0..ann.len() { - if ann[j]["feature"]["region_type"] == "L-REGION+V-REGION" { - v_gene = strip(&ann[j]["feature"]["gene_name"].to_string()); - } - if ann[j]["feature"]["region_type"] == "D-REGION" { - d_gene = strip(&ann[j]["feature"]["gene_name"].to_string()); - } - if ann[j]["feature"]["region_type"] == "J-REGION" { - j_gene = strip(&ann[j]["feature"]["gene_name"].to_string()); - } - if ann[j]["feature"]["region_type"] == "C-REGION" { - c_gene = strip(&ann[j]["feature"]["gene_name"].to_string()); + for region in ann.annotations { + let gene_name = region.feature.gene_name; + match region.feature.region_type { + VdjRegion::V => v_gene = gene_name, + VdjRegion::D => d_gene = gene_name, + VdjRegion::J => j_gene = gene_name, + VdjRegion::C => c_gene = gene_name, + VdjRegion::UTR => (), } } - let full_length = v["full_length"].as_bool().unwrap_or(false); - let productive = v["productive"].as_bool().unwrap_or(false); - let cdr3 = strip(&v["cdr3"].to_string()); - let cdr3_nt = strip(&v["cdr3_seq"].to_string()); - let reads = v["read_count"].as_i64().unwrap() as usize; - let umis = v["umi_count"].as_i64().unwrap() as usize; - let (raw_clonotype_id, raw_consensus_id) = (String::new(), String::new()); + let high_confidence = ann.high_confidence; + let full_length = ann.full_length.unwrap_or_default(); + let productive = ann.productive.unwrap_or_default(); + let cdr3 = ann.cdr3.unwrap_or_default(); + let cdr3_nt = ann.cdr3_seq.unwrap_or_default(); + let reads = ann.read_count; + let umis = ann.umi_count; + let seq = ann.sequence; fwriteln!( csv, "{barcode},{is_cell},{contig_id},{high_confidence},{length},{chain},\ {v_gene},{d_gene},{j_gene},{c_gene},{full_length},{productive},{cdr3},{cdr3_nt},\ - {reads},{umis},{raw_clonotype_id},{raw_consensus_id}" + {reads},{umis},," ); fwriteln!(fasta, ">{contig_id}\n{seq}"); }