From fb6e77a2f87f52aeb0b035c0579768b72200788f Mon Sep 17 00:00:00 2001
From: SimonVandeVyver <simon.vandevyver@ugent.be>
Date: Wed, 11 Sep 2024 08:50:04 +0200
Subject: [PATCH 01/13] Represent chars in protein text with 5 bits, tests
 don't work yet

---
 Cargo.lock                              |  10 +
 bitarray/src/binary.rs                  |   8 +-
 bitarray/src/lib.rs                     |  18 +-
 sa-index/Cargo.toml                     |   1 +
 sa-index/src/lib.rs                     |  10 +-
 sa-index/src/sa_searcher.rs             |  95 +++---
 sa-index/src/suffix_to_protein_index.rs |  14 +-
 sa-mappings/Cargo.toml                  |   2 +
 sa-mappings/src/proteins.rs             |  35 +--
 text-compression/Cargo.toml             |   9 +
 text-compression/src/lib.rs             | 391 ++++++++++++++++++++++++
 11 files changed, 506 insertions(+), 87 deletions(-)
 create mode 100644 text-compression/Cargo.toml
 create mode 100644 text-compression/src/lib.rs

diff --git a/Cargo.lock b/Cargo.lock
index c29abc3..9d81263 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1127,15 +1127,18 @@ dependencies = [
  "serde",
  "serde_json",
  "tempdir",
+ "text-compression",
 ]
 
 [[package]]
 name = "sa-mappings"
 version = "0.1.0"
 dependencies = [
+ "bitarray",
  "bytelines",
  "fa-compression",
  "tempdir",
+ "text-compression",
 ]
 
 [[package]]
@@ -1275,6 +1278,13 @@ dependencies = [
  "remove_dir_all",
 ]
 
+[[package]]
+name = "text-compression"
+version = "0.1.0"
+dependencies = [
+ "bitarray",
+]
+
 [[package]]
 name = "tinytemplate"
 version = "1.2.1"
diff --git a/bitarray/src/binary.rs b/bitarray/src/binary.rs
index e7265cd..a8084d1 100644
--- a/bitarray/src/binary.rs
+++ b/bitarray/src/binary.rs
@@ -159,10 +159,10 @@ mod tests {
     #[test]
     fn test_write_binary() {
         let mut bitarray = BitArray::with_capacity(4, 40);
-        bitarray.set(0, 0x1234567890);
-        bitarray.set(1, 0xabcdef0123);
-        bitarray.set(2, 0x4567890abc);
-        bitarray.set(3, 0xdef0123456);
+        bitarray.set(0, 0x1234567890_u64);
+        bitarray.set(1, 0xabcdef0123_u64);
+        bitarray.set(2, 0x4567890abc_u64);
+        bitarray.set(3, 0xdef0123456_u64);
 
         let mut buffer = Vec::new();
         bitarray.write_binary(&mut buffer).unwrap();
diff --git a/bitarray/src/lib.rs b/bitarray/src/lib.rs
index 655d17e..fe7b532 100644
--- a/bitarray/src/lib.rs
+++ b/bitarray/src/lib.rs
@@ -19,7 +19,7 @@ pub struct BitArray {
     /// The length of the bit array.
     len: usize,
     /// The number of bits in a single element of the data vector.
-    bits_per_value: usize
+    bits_per_value: usize,
 }
 
 impl BitArray {
@@ -39,7 +39,7 @@ impl BitArray {
             data: vec![0; capacity * bits_per_value / 64 + extra],
             mask: (1 << bits_per_value) - 1,
             len: capacity,
-            bits_per_value
+            bits_per_value,
         }
     }
 
@@ -85,6 +85,7 @@ impl BitArray {
     /// * `index` - The index of the value to set.
     /// * `value` - The value to set at the specified index.
     pub fn set(&mut self, index: usize, value: u64) {
+        let value: u64 = value.into();
         let start_block = index * self.bits_per_value / 64;
         let start_block_offset = index * self.bits_per_value % 64;
 
@@ -142,6 +143,11 @@ impl BitArray {
     pub fn clear(&mut self) {
         self.data.iter_mut().for_each(|x| *x = 0);
     }
+
+    pub fn get_data_slice(&self, start_slice: usize, end_slice: usize) -> &[u64] {
+        &self.data[start_slice..end_slice]
+    }
+
 }
 
 /// Writes the data to a writer in a binary format using a bit array. This function is helpfull
@@ -257,10 +263,10 @@ mod tests {
     fn test_bitarray_set() {
         let mut bitarray = BitArray::with_capacity(4, 40);
 
-        bitarray.set(0, 0b0001110011111010110001000111111100110010);
-        bitarray.set(1, 0b1100001001010010011000010100110111001001);
-        bitarray.set(2, 0b1111001101001101101101101011101001010001);
-        bitarray.set(3, 0b0000100010010001010001001110101110011100);
+        bitarray.set(0, 0b0001110011111010110001000111111100110010_u64);
+        bitarray.set(1, 0b1100001001010010011000010100110111001001_u64);
+        bitarray.set(2, 0b1111001101001101101101101011101001010001_u64);
+        bitarray.set(3, 0b0000100010010001010001001110101110011100_u64);
 
         assert_eq!(bitarray.data, vec![0x1cfac47f32c25261, 0x4dc9f34db6ba5108, 0x9144EB9C00000000]);
     }
diff --git a/sa-index/Cargo.toml b/sa-index/Cargo.toml
index de57fc9..25dda76 100644
--- a/sa-index/Cargo.toml
+++ b/sa-index/Cargo.toml
@@ -14,5 +14,6 @@ clap = { version = "4.4.8", features = ["derive"] }
 rayon = "1.8.1"
 serde = { version = "1.0.197", features = ["derive"] }
 sa-mappings = { path = "../sa-mappings" }
+text-compression = { path = "../text-compression" }
 bitarray = { path = "../bitarray" }
 serde_json = "1.0.116"
diff --git a/sa-index/src/lib.rs b/sa-index/src/lib.rs
index f276906..53f5348 100644
--- a/sa-index/src/lib.rs
+++ b/sa-index/src/lib.rs
@@ -115,11 +115,11 @@ mod tests {
     #[test]
     fn test_suffix_array_compressed() {
         let mut bitarray = BitArray::with_capacity(5, 40);
-        bitarray.set(0, 1);
-        bitarray.set(1, 2);
-        bitarray.set(2, 3);
-        bitarray.set(3, 4);
-        bitarray.set(4, 5);
+        bitarray.set(0, 1 as u64);
+        bitarray.set(1, 2 as u64);
+        bitarray.set(2, 3 as u64);
+        bitarray.set(3, 4 as u64);
+        bitarray.set(4, 5 as u64);
 
         let sa = SuffixArray::Compressed(bitarray, 1);
         assert_eq!(sa.len(), 5);
diff --git a/sa-index/src/sa_searcher.rs b/sa-index/src/sa_searcher.rs
index d09c704..7f60cbb 100644
--- a/sa-index/src/sa_searcher.rs
+++ b/sa-index/src/sa_searcher.rs
@@ -1,6 +1,7 @@
 use std::{cmp::min, ops::Deref};
 
 use sa_mappings::proteins::{Protein, Proteins};
+use text_compression::ProteinTextSlice;
 
 use crate::{
     sa_searcher::BoundSearch::{Maximum, Minimum},
@@ -75,7 +76,7 @@ pub struct SparseSearcher(Searcher);
 
 impl SparseSearcher {
     pub fn new(sa: SuffixArray, proteins: Proteins) -> Self {
-        let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.input_string);
+        let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.text);
         let searcher = Searcher::new(sa, proteins, Box::new(suffix_index_to_protein));
         Self(searcher)
     }
@@ -93,7 +94,7 @@ pub struct DenseSearcher(Searcher);
 
 impl DenseSearcher {
     pub fn new(sa: SuffixArray, proteins: Proteins) -> Self {
-        let suffix_index_to_protein = DenseSuffixToProtein::new(&proteins.input_string);
+        let suffix_index_to_protein = DenseSuffixToProtein::new(&proteins.text);
         let searcher = Searcher::new(sa, proteins, Box::new(suffix_index_to_protein));
         Self(searcher)
     }
@@ -176,12 +177,12 @@ impl Searcher {
 
         // match as long as possible
         while index_in_search_string < search_string.len()
-            && index_in_suffix < self.proteins.input_string.len()
-            && (search_string[index_in_search_string] == self.proteins.input_string[index_in_suffix]
+            && index_in_suffix < self.proteins.text.len()
+            && (search_string[index_in_search_string] == self.proteins.text.get(index_in_suffix) as u8
                 || (search_string[index_in_search_string] == b'L'
-                    && self.proteins.input_string[index_in_suffix] == b'I')
+                    && self.proteins.text.get(index_in_suffix) as u8 == b'I')
                 || (search_string[index_in_search_string] == b'I'
-                    && self.proteins.input_string[index_in_suffix] == b'L'))
+                    && self.proteins.text.get(index_in_suffix) as u8 == b'L'))
         {
             index_in_suffix += 1;
             index_in_search_string += 1;
@@ -191,7 +192,7 @@ impl Searcher {
         if !search_string.is_empty() {
             if index_in_search_string == search_string.len() {
                 is_cond_or_equal = true
-            } else if index_in_suffix < self.proteins.input_string.len() {
+            } else if index_in_suffix < self.proteins.text.len() {
                 // in our index every L was replaced by a I, so we need to replace them if we want
                 // to search in the right direction
                 let peptide_char = if search_string[index_in_search_string] == b'L' {
@@ -200,10 +201,10 @@ impl Searcher {
                     search_string[index_in_search_string]
                 };
 
-                let protein_char = if self.proteins.input_string[index_in_suffix] == b'L' {
+                let protein_char = if self.proteins.text.get(index_in_suffix) as u8 == b'L' {
                     b'I'
                 } else {
-                    self.proteins.input_string[index_in_suffix]
+                    self.proteins.text.get(index_in_suffix) as u8
                 };
 
                 is_cond_or_equal = condition_check(peptide_char, protein_char);
@@ -340,16 +341,14 @@ impl Searcher {
                     // check at all
                     if suffix >= skip
                         && ((skip == 0
-                            || Self::check_prefix(
-                                current_search_string_prefix,
-                                &self.proteins.input_string[suffix - skip..suffix],
-                                equate_il
-                            ))
-                            && Self::check_suffix(
+                            || ProteinTextSlice::new(&self.proteins.text, suffix - skip, suffix)
+                                .equals_slice(current_search_string_prefix, equate_il))
+                            && 
+                            Self::check_suffix(
                                 skip,
                                 il_locations_current_suffix,
                                 current_search_string_suffix,
-                                &self.proteins.input_string[suffix..suffix + search_string.len() - skip],
+                                ProteinTextSlice::new(&self.proteins.text, suffix, suffix + search_string.len() - skip),
                                 equate_il
                             ))
                     {
@@ -419,19 +418,13 @@ impl Searcher {
         skip: usize,
         il_locations: &[usize],
         search_string: &[u8],
-        index_string: &[u8],
+        text_slice: ProteinTextSlice,
         equate_il: bool
     ) -> bool {
         if equate_il {
             true
         } else {
-            for &il_location in il_locations {
-                let index = il_location - skip;
-                if search_string[index] != index_string[index] {
-                    return false;
-                }
-            }
-            true
+            text_slice.check_il_locations(skip, il_locations, search_string)
         }
     }
 
@@ -459,6 +452,7 @@ impl Searcher {
 #[cfg(test)]
 mod tests {
     use sa_mappings::proteins::{Protein, Proteins};
+    use text_compression::ProteinText;
 
     use crate::{
         sa_searcher::{BoundSearchResult, SearchAllSuffixesResult, Searcher},
@@ -487,9 +481,11 @@ mod tests {
     }
 
     fn get_example_proteins() -> Proteins {
-        let text = "AI-BLACVAA-AC-KCRLZ$".to_string().into_bytes();
+        let input_string = "AI-BLACVAA-AC-KCRLZ$";
+        let text = ProteinText::from_string(input_string);
+
         Proteins {
-            input_string: text,
+            text,
             proteins: vec![
                 Protein {
                     uniprot_id: String::new(),
@@ -520,7 +516,7 @@ mod tests {
         let proteins = get_example_proteins();
         let sa = SuffixArray::Original(vec![19, 10, 2, 13, 9, 8, 11, 5, 0, 3, 12, 15, 6, 1, 4, 17, 14, 16, 7, 18], 1);
 
-        let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.input_string);
+        let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.text);
         let searcher = Searcher::new(sa, proteins, Box::new(suffix_index_to_protein));
 
         // search bounds 'A'
@@ -541,7 +537,7 @@ mod tests {
         let proteins = get_example_proteins();
         let sa = SuffixArray::Original(vec![9, 0, 3, 12, 15, 6, 18], 3);
 
-        let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.input_string);
+        let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.text);
         let searcher = Searcher::new(sa, proteins, Box::new(suffix_index_to_protein));
 
         // search suffix 'VAA'
@@ -558,7 +554,7 @@ mod tests {
         let proteins = get_example_proteins();
         let sa = SuffixArray::Original(vec![19, 10, 2, 13, 9, 8, 11, 5, 0, 3, 12, 15, 6, 1, 4, 17, 14, 16, 7, 18], 1);
 
-        let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.input_string);
+        let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.text);
         let searcher = Searcher::new(sa, proteins, Box::new(suffix_index_to_protein));
 
         let bounds_res = searcher.search_bounds(&[b'I']);
@@ -574,7 +570,7 @@ mod tests {
         let proteins = get_example_proteins();
         let sa = SuffixArray::Original(vec![9, 0, 3, 12, 15, 6, 18], 3);
 
-        let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.input_string);
+        let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.text);
         let searcher = Searcher::new(sa, proteins, Box::new(suffix_index_to_protein));
 
         // search bounds 'RIZ' with equal I and L
@@ -589,10 +585,11 @@ mod tests {
     // test edge case where an I or L is the first index in the sparse SA.
     #[test]
     fn test_l_first_index_in_sa() {
-        let text = "LMOXZ$".to_string().into_bytes();
+        let input_string = "LMOXZ$";
+        let text = ProteinText::from_string(input_string);
 
         let proteins = Proteins {
-            input_string: text,
+            text,
             proteins: vec![Protein {
                 uniprot_id: String::new(),
                 taxon_id: 0,
@@ -601,7 +598,7 @@ mod tests {
         };
 
         let sparse_sa = SuffixArray::Original(vec![0, 2, 4], 2);
-        let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.input_string);
+        let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.text);
         let searcher = Searcher::new(sparse_sa, proteins, Box::new(suffix_index_to_protein));
 
         // search bounds 'IM' with equal I and L
@@ -611,10 +608,11 @@ mod tests {
 
     #[test]
     fn test_il_missing_matches() {
-        let text = "AAILLL$".to_string().into_bytes();
+        let input_string = "AAILLL$";
+        let text = ProteinText::from_string(input_string);
 
         let proteins = Proteins {
-            input_string: text,
+            text,
             proteins: vec![Protein {
                 uniprot_id: String::new(),
                 taxon_id: 0,
@@ -623,7 +621,7 @@ mod tests {
         };
 
         let sparse_sa = SuffixArray::Original(vec![6, 0, 1, 5, 4, 3, 2], 1);
-        let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.input_string);
+        let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.text);
         let searcher = Searcher::new(sparse_sa, proteins, Box::new(suffix_index_to_protein));
 
         let found_suffixes = searcher.search_matching_suffixes(&[b'I'], usize::MAX, true);
@@ -632,19 +630,20 @@ mod tests {
 
     #[test]
     fn test_il_duplication() {
-        let text = "IIIILL$".to_string().into_bytes();
+        let input_string = "IIIILL$";
+        let text = ProteinText::from_string(input_string);
 
         let proteins = Proteins {
-            input_string: text,
+            text,
             proteins: vec![Protein {
                 uniprot_id: String::new(),
                 taxon_id: 0,
                 functional_annotations: vec![]
             }]
         };
-
+        
         let sparse_sa = SuffixArray::Original(vec![6, 5, 4, 3, 2, 1, 0], 1);
-        let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.input_string);
+        let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.text);
         let searcher = Searcher::new(sparse_sa, proteins, Box::new(suffix_index_to_protein));
 
         let found_suffixes = searcher.search_matching_suffixes(&[b'I', b'I'], usize::MAX, true);
@@ -653,10 +652,11 @@ mod tests {
 
     #[test]
     fn test_il_suffix_check() {
-        let text = "IIIILL$".to_string().into_bytes();
-
+        let input_string = "IIIILL$";
+        let text = ProteinText::from_string(input_string);
+        
         let proteins = Proteins {
-            input_string: text,
+            text,
             proteins: vec![Protein {
                 uniprot_id: String::new(),
                 taxon_id: 0,
@@ -665,7 +665,7 @@ mod tests {
         };
 
         let sparse_sa = SuffixArray::Original(vec![6, 4, 2, 0], 2);
-        let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.input_string);
+        let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.text);
         let searcher = Searcher::new(sparse_sa, proteins, Box::new(suffix_index_to_protein));
 
         // search all places where II is in the string IIIILL, but with a sparse SA
@@ -676,10 +676,11 @@ mod tests {
 
     #[test]
     fn test_il_duplication2() {
-        let text = "IILLLL$".to_string().into_bytes();
+        let input_string = "IILLLL$";
+        let text = ProteinText::from_string(input_string);
 
         let proteins = Proteins {
-            input_string: text,
+            text,
             proteins: vec![Protein {
                 uniprot_id: String::new(),
                 taxon_id: 0,
@@ -688,7 +689,7 @@ mod tests {
         };
 
         let sparse_sa = SuffixArray::Original(vec![6, 5, 4, 3, 2, 1, 0], 1);
-        let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.input_string);
+        let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.text);
         let searcher = Searcher::new(sparse_sa, proteins, Box::new(suffix_index_to_protein));
 
         // search bounds 'IM' with equal I and L
diff --git a/sa-index/src/suffix_to_protein_index.rs b/sa-index/src/suffix_to_protein_index.rs
index 121b569..6aed362 100644
--- a/sa-index/src/suffix_to_protein_index.rs
+++ b/sa-index/src/suffix_to_protein_index.rs
@@ -2,6 +2,7 @@ use clap::ValueEnum;
 use sa_mappings::proteins::{SEPARATION_CHARACTER, TERMINATION_CHARACTER};
 
 use crate::Nullable;
+use text_compression::ProteinText;
 
 /// Enum used to define the commandline arguments and choose which index style is used
 #[derive(ValueEnum, Clone, Debug, PartialEq)]
@@ -66,10 +67,10 @@ impl DenseSuffixToProtein {
     /// # Returns
     ///
     /// Returns a new DenseSuffixToProtein build over the provided text
-    pub fn new(text: &[u8]) -> Self {
+    pub fn new(text: &ProteinText) -> Self {
         let mut current_protein_index: u32 = 0;
         let mut suffix_index_to_protein: Vec<u32> = vec![];
-        for &char in text.iter() {
+        for char in text.iter() {
             if char == SEPARATION_CHARACTER || char == TERMINATION_CHARACTER {
                 current_protein_index += 1;
                 suffix_index_to_protein.push(u32::NULL);
@@ -92,9 +93,9 @@ impl SparseSuffixToProtein {
     /// # Returns
     ///
     /// Returns a new SparseSuffixToProtein build over the provided text
-    pub fn new(text: &[u8]) -> Self {
+    pub fn new(text: &ProteinText) -> Self {
         let mut suffix_index_to_protein: Vec<i64> = vec![0];
-        for (index, &char) in text.iter().enumerate() {
+        for (index, char) in text.iter().enumerate() {
             if char == SEPARATION_CHARACTER || char == TERMINATION_CHARACTER {
                 suffix_index_to_protein.push(index as i64 + 1);
             }
@@ -108,6 +109,7 @@ impl SparseSuffixToProtein {
 mod tests {
     use clap::ValueEnum;
     use sa_mappings::proteins::{SEPARATION_CHARACTER, TERMINATION_CHARACTER};
+    use text_compression::ProteinText;
 
     use crate::{
         suffix_to_protein_index::{
@@ -116,10 +118,10 @@ mod tests {
         Nullable
     };
 
-    fn build_text() -> Vec<u8> {
+    fn build_text() -> ProteinText {
         let mut text = ["ACG", "CG", "AAA"].join(&format!("{}", SEPARATION_CHARACTER as char));
         text.push(TERMINATION_CHARACTER as char);
-        text.into_bytes()
+        ProteinText::from_string(&text)
     }
 
     #[test]
diff --git a/sa-mappings/Cargo.toml b/sa-mappings/Cargo.toml
index b20a2bf..d255f7c 100644
--- a/sa-mappings/Cargo.toml
+++ b/sa-mappings/Cargo.toml
@@ -11,3 +11,5 @@ tempdir = "0.3.7"
 [dependencies]
 fa-compression = { path = "../fa-compression" }
 bytelines = "2.5.0"
+bitarray = { path = "../bitarray" }
+text-compression = { path = "../text-compression" }
diff --git a/sa-mappings/src/proteins.rs b/sa-mappings/src/proteins.rs
index f2b24cc..ca3bdd7 100644
--- a/sa-mappings/src/proteins.rs
+++ b/sa-mappings/src/proteins.rs
@@ -5,6 +5,7 @@ use std::{error::Error, fs::File, io::BufReader, ops::Index, str::from_utf8};
 
 use bytelines::ByteLines;
 use fa_compression::algorithm1::{decode, encode};
+use text_compression::ProteinText;
 
 /// The separation character used in the input string
 pub static SEPARATION_CHARACTER: u8 = b'-';
@@ -28,7 +29,7 @@ pub struct Protein {
 /// A struct that represents a collection of proteins
 pub struct Proteins {
     /// The input string containing all proteins
-    pub input_string: Vec<u8>,
+    pub text: ProteinText,
 
     /// The proteins in the input string
     pub proteins: Vec<Protein>
@@ -86,12 +87,13 @@ impl Proteins {
 
         input_string.pop();
         input_string.push(TERMINATION_CHARACTER.into());
-        input_string.shrink_to_fit();
         proteins.shrink_to_fit();
-        Ok(Self { input_string: input_string.into_bytes(), proteins })
+
+        let text = ProteinText::from_string(&input_string);
+        Ok(Self { text, proteins })
     }
 
-    /// Creates a `vec<u8>` which represents all the proteins concatenated from the database file
+    /// Creates a `ProteinText` which represents all the proteins concatenated from the database file
     ///
     /// # Arguments
     /// * `file` - The path to the database file
@@ -99,12 +101,12 @@ impl Proteins {
     ///
     /// # Returns
     ///
-    /// Returns a `Result` containing the `Vec<u8>`
+    /// Returns a `Result` containing the `ProteinText`
     ///
     /// # Errors
     ///
     /// Returns a `Box<dyn Error>` if an error occurred while reading the database file
-    pub fn try_from_database_file_without_annotations(database_file: &str) -> Result<Vec<u8>, Box<dyn Error>> {
+    pub fn try_from_database_file_without_annotations(database_file: &str) -> Result<ProteinText, Box<dyn Error>> {
         let mut input_string: String = String::new();
 
         let file = File::open(database_file)?;
@@ -123,11 +125,10 @@ impl Proteins {
             input_string.push(SEPARATION_CHARACTER.into());
         }
 
-        input_string.pop();
-        input_string.push(TERMINATION_CHARACTER.into());
+        let text = ProteinText::from_string(&input_string);
+
+        Ok(text)
 
-        input_string.shrink_to_fit();
-        Ok(input_string.into_bytes())
     }
 }
 
@@ -181,8 +182,10 @@ mod tests {
 
     #[test]
     fn test_new_proteins() {
+        let input_string = "MLPGLALLLLAAWTARALEV-PTDGNAGLLAEPQIAMFCGRLNMHMNVQNG";
+        let text = ProteinText::from_string(&input_string);
         let proteins = Proteins {
-            input_string: "MLPGLALLLLAAWTARALEV-PTDGNAGLLAEPQIAMFCGRLNMHMNVQNG".as_bytes().to_vec(),
+            text,
             proteins: vec![
                 Protein {
                     uniprot_id: "P12345".to_string(),
@@ -197,7 +200,6 @@ mod tests {
             ]
         };
 
-        assert_eq!(proteins.input_string, "MLPGLALLLLAAWTARALEV-PTDGNAGLLAEPQIAMFCGRLNMHMNVQNG".as_bytes());
         assert_eq!(proteins.proteins.len(), 2);
         assert_eq!(proteins[0].uniprot_id, "P12345");
         assert_eq!(proteins[0].taxon_id, 1);
@@ -245,12 +247,7 @@ mod tests {
 
         let proteins = Proteins::try_from_database_file_without_annotations(database_file.to_str().unwrap()).unwrap();
 
-        let sep_char = SEPARATION_CHARACTER as char;
-        let end_char = TERMINATION_CHARACTER as char;
-        let expected = format!(
-            "MLPGLALLLLAAWTARALEV{}PTDGNAGLLAEPQIAMFCGRLNMHMNVQNG{}KWDSDPSGTKTCIDT{}KEGILQYCQEVYPELQITNVVEANQPVTIQNWCKRGRKQCKTHPH{}",
-            sep_char, sep_char, sep_char, end_char
-        );
-        assert_eq!(proteins, expected.as_bytes());
+        let expected = 'L' as u8;
+        assert_eq!(proteins.get(4), expected);
     }
 }
diff --git a/text-compression/Cargo.toml b/text-compression/Cargo.toml
new file mode 100644
index 0000000..c312a3c
--- /dev/null
+++ b/text-compression/Cargo.toml
@@ -0,0 +1,9 @@
+[package]
+name = "text-compression"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+bitarray = { path = "../bitarray" }
diff --git a/text-compression/src/lib.rs b/text-compression/src/lib.rs
new file mode 100644
index 0000000..60b2463
--- /dev/null
+++ b/text-compression/src/lib.rs
@@ -0,0 +1,391 @@
+use std::{
+    error::Error,
+    io::{BufRead, Write}
+};
+use std::collections::HashMap;
+
+use bitarray::{data_to_writer, Binary, BitArray};
+
+pub struct ProteinText {
+    bit_array: BitArray,
+    char_to_5bit: HashMap<u8, u8>,
+    bit5_to_char: Vec<u8>,
+}
+
+impl ProteinText {
+
+    fn create_char_to_5bit_hashmap() -> HashMap<u8, u8> {
+        let mut hashmap = HashMap::<u8, u8>::new();
+        for (i, c) in "ACDEFGHIKLMNPQRSTVWY-".chars().enumerate() {
+            hashmap.insert(c as u8, i as u8);
+        }
+
+        hashmap
+    }
+
+    fn create_bit5_to_char() -> Vec<u8> {
+        let mut vec = Vec::<u8>::new();
+        for c in "ACDEFGHIKLMNPQRSTVWY-".chars() {
+            vec.push(c as u8);
+        }
+        vec
+    }
+    
+    pub fn from_string(input_string: &str) -> ProteinText {
+        let char_to_5bit = ProteinText::create_char_to_5bit_hashmap();
+        let bit5_to_char = ProteinText::create_bit5_to_char();
+
+        let mut bit_array = BitArray::with_capacity(input_string.len(), 5);
+        for (i, c) in input_string.chars().enumerate() {
+            let char_5bit: u8 = *char_to_5bit.get(&(c as u8)).expect("Input character not in alphabet");
+            bit_array.set(i, char_5bit as u64);
+        }
+
+        Self { bit_array, char_to_5bit, bit5_to_char }
+    }
+
+    pub fn from_vec(input_vec: &Vec<u8>) -> ProteinText {
+        let char_to_5bit = ProteinText::create_char_to_5bit_hashmap();
+        let bit5_to_char = ProteinText::create_bit5_to_char();
+
+        let mut bit_array = BitArray::with_capacity(input_vec.len(), 5);
+        for (i, e) in input_vec.iter().enumerate() {
+            let char_5bit: u8 = *char_to_5bit.get(e).expect("Input character not in alphabet");
+            bit_array.set(i, char_5bit as u64);
+        }
+
+        Self { bit_array, char_to_5bit, bit5_to_char }
+    }
+
+    pub fn new(bit_array: BitArray) -> ProteinText {
+        let char_to_5bit = ProteinText::create_char_to_5bit_hashmap();
+        let bit5_to_char = ProteinText::create_bit5_to_char();
+        Self { bit_array, char_to_5bit, bit5_to_char }
+    }
+
+    pub fn with_capacity(capacity: usize) -> Self {
+        Self::new(BitArray::with_capacity(capacity, 5))
+    }
+
+    pub fn get(&self, index: usize) -> u8 {
+        let char_5bit = self.bit_array.get(index) as usize;
+        self.bit5_to_char[char_5bit]
+    }
+
+    pub fn set(&mut self, index: usize, value: u8) {
+        let char_5bit: u8 = *self.char_to_5bit.get(&value).expect("Input character not in alphabet");
+        self.bit_array.set(index, char_5bit as u64);
+    }
+
+    pub fn len(&self) -> usize {
+        self.bit_array.len()
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.bit_array.len() == 0
+    }
+
+    /// Clears the `BitArray`, setting all bits to 0.
+    pub fn clear(&mut self) {
+        self.bit_array.clear()
+    }
+
+    pub fn iter(&self) -> ProteinTextIterator {
+        ProteinTextIterator {protein_text: self, index: 0, }
+    }
+
+}
+
+pub struct ProteinTextSlice<'a> {
+    text: &'a ProteinText,
+    start: usize, // included
+    end: usize,   // excluded
+}
+
+impl<'a> ProteinTextSlice<'a> {
+
+    pub fn new(text: &'a ProteinText, start: usize, end: usize) -> ProteinTextSlice {
+        Self {text, start, end }
+    }
+
+    pub fn get(&self, index: usize) -> u8 {
+        self.text.get(self.start + index)
+    }
+
+    pub fn len(&self) -> usize {
+        self.end - self.start
+    }
+
+    #[inline]
+    pub fn equals_slice(&self, other: &[u8], equate_il: bool) -> bool {
+        if equate_il {
+            other.iter().zip(self.iter()).all(|(&search_character, text_character)| {
+                search_character == text_character
+                    || (search_character == b'I' && text_character == b'L')
+                    || (search_character == b'L' && text_character == b'I')
+            })
+        } else {
+            other.iter().zip(self.iter()).all(|(&search_character, text_character)| search_character == text_character)
+        }
+    }
+
+    pub fn check_il_locations(
+        &self,
+        skip: usize,
+        il_locations: &[usize],
+        search_string: &[u8],
+    ) -> bool {
+        for &il_location in il_locations {
+            let index = il_location - skip;
+            if search_string[index] != self.get(index) {
+                return false;
+            }
+        }
+        true
+    }
+
+    pub fn iter(&self) -> ProteinTextSliceIterator {
+        ProteinTextSliceIterator {text_slice: self, index: 0, }
+    }
+}
+
+pub struct ProteinTextIterator<'a> {
+    protein_text: &'a ProteinText,
+    index: usize,
+}
+
+pub struct ProteinTextSliceIterator<'a> {
+    text_slice: &'a ProteinTextSlice<'a>,
+    index: usize,
+}
+
+impl<'a> Iterator for ProteinTextSliceIterator<'a> {
+
+    type Item = u8;
+    
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.index >= self.text_slice.len() {
+            return None;
+        }
+
+        self.index += 1;
+        Some(self.text_slice.get(self.index - 1))
+    }
+}
+
+impl<'a> Iterator for ProteinTextIterator<'a> {
+
+    type Item = u8;
+    
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.index >= self.protein_text.len() {
+            return None;
+        }
+
+        self.index += 1;
+        Some(self.protein_text.get(self.index - 1))
+    }
+}
+
+/// Writes the compressed text to a writer.
+///
+/// # Arguments
+///
+/// * `text` - The text to be compressed.
+/// * `writer` - The writer to which the compressed text will be written.
+///
+/// # Errors
+///
+/// Returns an error if writing to the writer fails.
+pub fn dump_compressed_text(
+    text: Vec<u8>,
+    writer: &mut impl Write
+) -> Result<(), Box<dyn Error>> {
+    let bits_per_value = 5;
+
+    // Write the flags to the writer
+    // 00000001 indicates that the text is compressed
+    writer
+        .write(&[bits_per_value as u8])
+        .map_err(|_| "Could not write the required bits to the writer")?;
+
+    // Write the size of the text to the writer
+    writer
+        .write(&(text.len() as u64).to_le_bytes())
+        .map_err(|_| "Could not write the size of the text to the writer")?;
+
+    // Compress the text and write it to the writer
+    let text_writer: Vec<i64> = text.iter().map(|item| <i64>::from(*item)).collect();
+    data_to_writer(text_writer, bits_per_value, 8 * 1024, writer)
+        .map_err(|_| "Could not write the compressed text to the writer")?;
+
+    Ok(())
+}
+
+/// Load the compressed text from a reader.
+///
+/// # Arguments
+///
+/// * `reader` - The reader from which the compressed text will be read.
+///
+/// # Errors
+///
+/// Returns an error if reading from the reader fails.
+pub fn load_compressed_text(
+    reader: &mut impl BufRead
+) -> Result<ProteinText, Box<dyn Error>> {
+    let bits_per_value: usize = 5;
+    // Read the size of the text from the binary file (8 bytes)
+    let mut size_buffer = [0_u8; 8];
+    reader
+        .read_exact(&mut size_buffer)
+        .map_err(|_| "Could not read the size of the text from the binary file")?;
+    let size = u64::from_le_bytes(size_buffer) as usize;
+
+    // Read the compressed text from the binary file
+    let mut compressed_text = BitArray::with_capacity(size, bits_per_value);
+    compressed_text
+        .read_binary(reader)
+        .map_err(|_| "Could not read the compressed text from the binary file")?;
+
+    Ok(ProteinText::new(compressed_text))
+}
+
+#[cfg(test)]
+mod tests {
+    use std::io::Read;
+
+    use super::*;
+
+    pub struct FailingWriter {
+        /// The number of times the write function can be called before it fails.
+        pub valid_write_count: usize
+    }
+
+    impl Write for FailingWriter {
+        fn write(&mut self, _: &[u8]) -> Result<usize, std::io::Error> {
+            if self.valid_write_count == 0 {
+                return Err(std::io::Error::new(std::io::ErrorKind::Other, "Write failed"));
+            }
+
+            self.valid_write_count -= 1;
+            Ok(1)
+        }
+
+        fn flush(&mut self) -> Result<(), std::io::Error> {
+            Ok(())
+        }
+    }
+
+    pub struct FailingReader {
+        /// The number of times the read function can be called before it fails.
+        pub valid_read_count: usize
+    }
+
+    impl Read for FailingReader {
+        fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
+            if self.valid_read_count == 0 {
+                return Err(std::io::Error::new(std::io::ErrorKind::Other, "Read failed"));
+            }
+
+            self.valid_read_count -= 1;
+            Ok(buf.len())
+        }
+    }
+
+    impl BufRead for FailingReader {
+        fn fill_buf(&mut self) -> std::io::Result<&[u8]> {
+            Ok(&[])
+        }
+
+        fn consume(&mut self, _: usize) {}
+    }
+
+    #[test]
+    fn test_dump_compressed_text() {
+        let text: Vec<u8> = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+
+        let mut writer = vec![];
+        dump_compressed_text(text, &mut writer).unwrap();
+
+        assert_eq!(writer, vec![
+            // bits per value
+            5, // size of the text
+            10, 0, 0, 0, 0, 0, 0, 0, // compressed text
+            0, 128, 74, 232, 152, 66, 134, 8
+        ]);
+    }
+
+    #[test]
+    #[should_panic(expected = "Could not write the required bits to the writer")]
+    fn test_dump_compressed_text_fail_required_bits() {
+        let mut writer = FailingWriter { valid_write_count: 0 };
+
+        dump_compressed_text(vec![], &mut writer).unwrap();
+    }
+
+    #[test]
+    #[should_panic(expected = "Could not write the size of the text to the writer")]
+    fn test_dump_compressed_text_fail_size() {
+        let mut writer = FailingWriter { valid_write_count: 1 };
+
+        dump_compressed_text(vec![], &mut writer).unwrap();
+    }
+
+    #[test]
+    #[should_panic(expected = "Could not write the compressed text to the writer")]
+    fn test_dump_compressed_text_fail_compressed_text() {
+        let mut writer = FailingWriter { valid_write_count: 3 };
+
+        dump_compressed_text(vec![1], &mut writer).unwrap();
+    }
+
+    #[test]
+    fn test_load_compressed_text() {
+        let data = vec![
+             // size of the text
+            10, 0, 0, 0, 0, 0, 0, 0, // compressed text
+            0, 128, 74, 232, 152, 66, 134, 8
+        ];
+
+        let mut reader = std::io::BufReader::new(&data[..]);
+        let compressed_text = load_compressed_text(&mut reader).unwrap();
+
+        for i in 0..10 {
+            assert_eq!(compressed_text.get(i), i as u8 + 1);
+        }
+    }
+
+    #[test]
+    #[should_panic(expected = "Could not read the size of the text from the binary file")]
+    fn test_load_compressed_text_fail_size() {
+        let mut reader = FailingReader { valid_read_count: 0 };
+
+        load_compressed_text(&mut reader).unwrap();
+    }
+
+    #[test]
+    #[should_panic(expected = "Could not read the compressed text from the binary file")]
+    fn test_load_compressed_text_fail_compressed_text() {
+        let mut reader = FailingReader { valid_read_count: 2 };
+
+        load_compressed_text(&mut reader).unwrap();
+    }
+
+    #[test]
+    fn test_failing_writer() {
+        let mut writer = FailingWriter { valid_write_count: 0 };
+        assert!(writer.flush().is_ok());
+        assert!(writer.write(&[0]).is_err());
+    }
+
+    #[test]
+    fn test_failing_reader() {
+        let mut reader = FailingReader { valid_read_count: 0 };
+        let right_buffer: [u8; 0] = [];
+        assert_eq!(reader.fill_buf().unwrap(), &right_buffer);
+        assert_eq!(reader.consume(0), ());
+        let mut buffer = [0_u8; 1];
+        assert!(reader.read(&mut buffer).is_err());
+    }
+}

From b0a804d1eb09feb19c6039c7610192f7ef5a5613 Mon Sep 17 00:00:00 2001
From: SimonVandeVyver <simon.vandevyver@ugent.be>
Date: Wed, 11 Sep 2024 09:54:33 +0200
Subject: [PATCH 02/13] fix tests to only use characters of peptide alphabet

---
 sa-index/src/sa_searcher.rs | 10 +++++-----
 text-compression/src/lib.rs |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/sa-index/src/sa_searcher.rs b/sa-index/src/sa_searcher.rs
index 7f60cbb..2324046 100644
--- a/sa-index/src/sa_searcher.rs
+++ b/sa-index/src/sa_searcher.rs
@@ -481,7 +481,7 @@ mod tests {
     }
 
     fn get_example_proteins() -> Proteins {
-        let input_string = "AI-BLACVAA-AC-KCRLZ$";
+        let input_string = "AI-CLACVAA-AC-KCRLY$";
         let text = ProteinText::from_string(input_string);
 
         Proteins {
@@ -561,7 +561,7 @@ mod tests {
         assert_eq!(bounds_res, BoundSearchResult::SearchResult((13, 16)));
 
         // search bounds 'RIZ' with equal I and L
-        let bounds_res = searcher.search_bounds(&[b'R', b'I', b'Z']);
+        let bounds_res = searcher.search_bounds(&[b'R', b'I', b'Y']);
         assert_eq!(bounds_res, BoundSearchResult::SearchResult((17, 18)));
     }
 
@@ -574,18 +574,18 @@ mod tests {
         let searcher = Searcher::new(sa, proteins, Box::new(suffix_index_to_protein));
 
         // search bounds 'RIZ' with equal I and L
-        let found_suffixes = searcher.search_matching_suffixes(&[b'R', b'I', b'Z'], usize::MAX, true);
+        let found_suffixes = searcher.search_matching_suffixes(&[b'R', b'I', b'Y'], usize::MAX, true);
         assert_eq!(found_suffixes, SearchAllSuffixesResult::SearchResult(vec![16]));
 
         // search bounds 'RIZ' without equal I and L
-        let found_suffixes = searcher.search_matching_suffixes(&[b'R', b'I', b'Z'], usize::MAX, false);
+        let found_suffixes = searcher.search_matching_suffixes(&[b'R', b'I', b'Y'], usize::MAX, false);
         assert_eq!(found_suffixes, SearchAllSuffixesResult::NoMatches);
     }
 
     // test edge case where an I or L is the first index in the sparse SA.
     #[test]
     fn test_l_first_index_in_sa() {
-        let input_string = "LMOXZ$";
+        let input_string = "LMPYY$";
         let text = ProteinText::from_string(input_string);
 
         let proteins = Proteins {
diff --git a/text-compression/src/lib.rs b/text-compression/src/lib.rs
index 60b2463..b090826 100644
--- a/text-compression/src/lib.rs
+++ b/text-compression/src/lib.rs
@@ -16,7 +16,7 @@ impl ProteinText {
 
     fn create_char_to_5bit_hashmap() -> HashMap<u8, u8> {
         let mut hashmap = HashMap::<u8, u8>::new();
-        for (i, c) in "ACDEFGHIKLMNPQRSTVWY-".chars().enumerate() {
+        for (i, c) in "ACDEFGHIKLMNPQRSTVWY-$".chars().enumerate() {
             hashmap.insert(c as u8, i as u8);
         }
 
@@ -25,7 +25,7 @@ impl ProteinText {
 
     fn create_bit5_to_char() -> Vec<u8> {
         let mut vec = Vec::<u8>::new();
-        for c in "ACDEFGHIKLMNPQRSTVWY-".chars() {
+        for c in "ACDEFGHIKLMNPQRSTVWY-$".chars() {
             vec.push(c as u8);
         }
         vec

From e47646106fb329f514e39d3127d1c613bbf9c80a Mon Sep 17 00:00:00 2001
From: SimonVandeVyver <simon.vandevyver@ugent.be>
Date: Wed, 11 Sep 2024 10:44:26 +0200
Subject: [PATCH 03/13] use uncompressed text for SA construction

---
 sa-builder/src/main.rs      |  2 +-
 sa-index/src/sa_searcher.rs | 27 +-----------------------
 sa-mappings/src/proteins.rs | 41 +++++++++++++++++++++++++++++++++++--
 text-compression/src/lib.rs |  4 ++--
 4 files changed, 43 insertions(+), 31 deletions(-)

diff --git a/sa-builder/src/main.rs b/sa-builder/src/main.rs
index 98a1414..01cc3c4 100644
--- a/sa-builder/src/main.rs
+++ b/sa-builder/src/main.rs
@@ -21,7 +21,7 @@ fn main() {
     eprintln!();
     eprintln!("📋 Started loading the proteins...");
     let start_proteins_time = get_time_ms().unwrap();
-    let mut data = Proteins::try_from_database_file_without_annotations(&database_file)
+    let mut data = Proteins::try_from_database_file_uncompressed(&database_file)
         .unwrap_or_else(|err| eprint_and_exit(err.to_string().as_str()));
     eprintln!(
         "✅ Successfully loaded the proteins in {} seconds!",
diff --git a/sa-index/src/sa_searcher.rs b/sa-index/src/sa_searcher.rs
index 2324046..4f4522e 100644
--- a/sa-index/src/sa_searcher.rs
+++ b/sa-index/src/sa_searcher.rs
@@ -342,7 +342,7 @@ impl Searcher {
                     if suffix >= skip
                         && ((skip == 0
                             || ProteinTextSlice::new(&self.proteins.text, suffix - skip, suffix)
-                                .equals_slice(current_search_string_prefix, equate_il))
+                                .equals_slice(current_search_string_prefix, equate_il)) // Check the prefix
                             && 
                             Self::check_suffix(
                                 skip,
@@ -372,31 +372,6 @@ impl Searcher {
         }
     }
 
-    /// Returns true of the prefixes are the same
-    /// if `equate_il` is set to true, L and I are considered the same
-    ///
-    /// # Arguments
-    /// * `search_string_prefix` - The unchecked prefix of the string/peptide that is searched
-    /// * `index_prefix` - The unchecked prefix from the protein from the suffix array
-    /// * `equate_il` - True if we want to equate I and L during search, otherwise false
-    ///
-    /// # Returns
-    ///
-    /// Returns true if `search_string_prefix` and `index_prefix` are considered the same, otherwise
-    /// false
-    #[inline]
-    fn check_prefix(search_string_prefix: &[u8], index_prefix: &[u8], equate_il: bool) -> bool {
-        if equate_il {
-            search_string_prefix.iter().zip(index_prefix).all(|(&search_character, &index_character)| {
-                search_character == index_character
-                    || (search_character == b'I' && index_character == b'L')
-                    || (search_character == b'L' && index_character == b'I')
-            })
-        } else {
-            search_string_prefix == index_prefix
-        }
-    }
-
     /// Returns true of the search_string and index_string are equal
     /// This is automatically true if `equate_il` is set to true, since there matched during
     /// search where I = L If `equate_il` is set to false, we need to check if the I and
diff --git a/sa-mappings/src/proteins.rs b/sa-mappings/src/proteins.rs
index ca3bdd7..626ead3 100644
--- a/sa-mappings/src/proteins.rs
+++ b/sa-mappings/src/proteins.rs
@@ -47,7 +47,6 @@ impl Proteins {
     ///
     /// # Arguments
     /// * `file` - The path to the database file
-    /// * `taxon_aggregator` - The `TaxonAggregator` to use
     ///
     /// # Returns
     ///
@@ -97,7 +96,6 @@ impl Proteins {
     ///
     /// # Arguments
     /// * `file` - The path to the database file
-    /// * `taxon_aggregator` - The `TaxonAggregator` to use
     ///
     /// # Returns
     ///
@@ -130,6 +128,45 @@ impl Proteins {
         Ok(text)
 
     }
+
+    /// Creates a `vec<u8>` which represents all the proteins concatenated from the database file
+    ///
+    /// # Arguments
+    /// * `file` - The path to the database file
+    ///
+    /// # Returns
+    ///
+    /// Returns a `Result` containing the `Vec<u8>`
+    ///
+    /// # Errors
+    ///
+    /// Returns a `Box<dyn Error>` if an error occurred while reading the database file
+    pub fn try_from_database_file_uncompressed(database_file: &str) -> Result<Vec<u8>, Box<dyn Error>> {
+        let mut input_string: String = String::new();
+
+        let file = File::open(database_file)?;
+
+        // Read the lines as bytes, since the input string is not guaranteed to be utf8
+        // because of the encoded functional annotations
+        let mut lines = ByteLines::new(BufReader::new(file));
+
+        while let Some(Ok(line)) = lines.next() {
+            let mut fields = line.split(|b| *b == b'\t');
+
+            // only get the taxon id and sequence from each line, we don't need the other parts
+            let sequence = from_utf8(fields.nth(2).unwrap())?;
+
+            input_string.push_str(&sequence.to_uppercase());
+            input_string.push(SEPARATION_CHARACTER.into());
+        }
+
+        input_string.pop();
+        input_string.push(TERMINATION_CHARACTER.into());
+
+        input_string.shrink_to_fit();
+        Ok(input_string.into_bytes())
+
+    }
 }
 
 impl Index<usize> for Proteins {
diff --git a/text-compression/src/lib.rs b/text-compression/src/lib.rs
index b090826..871de5b 100644
--- a/text-compression/src/lib.rs
+++ b/text-compression/src/lib.rs
@@ -351,8 +351,8 @@ mod tests {
         let mut reader = std::io::BufReader::new(&data[..]);
         let compressed_text = load_compressed_text(&mut reader).unwrap();
 
-        for i in 0..10 {
-            assert_eq!(compressed_text.get(i), i as u8 + 1);
+        for (i, c) in "CDEFGHIKLM".chars().enumerate() {
+            assert_eq!(compressed_text.get(i), c as u8);
         }
     }
 

From 2c14c52bfc7e6fc5476bdc6d7d698cd1630734c5 Mon Sep 17 00:00:00 2001
From: SimonVandeVyver <simon.vandevyver@ugent.be>
Date: Wed, 11 Sep 2024 11:46:59 +0200
Subject: [PATCH 04/13] add tests for text-compression

---
 text-compression/src/lib.rs | 108 +++++++++++++++++++++++++++++++++++-
 1 file changed, 107 insertions(+), 1 deletion(-)

diff --git a/text-compression/src/lib.rs b/text-compression/src/lib.rs
index 871de5b..2e25f7b 100644
--- a/text-compression/src/lib.rs
+++ b/text-compression/src/lib.rs
@@ -94,6 +94,10 @@ impl ProteinText {
         ProteinTextIterator {protein_text: self, index: 0, }
     }
 
+    pub fn slice(&self, start: usize, end:usize) -> ProteinTextSlice {
+        ProteinTextSlice::new(self, start, end)
+    }
+
 }
 
 pub struct ProteinTextSlice<'a> {
@@ -253,7 +257,7 @@ pub fn load_compressed_text(
 
 #[cfg(test)]
 mod tests {
-    use std::io::Read;
+    use std::{char, io::Read};
 
     use super::*;
 
@@ -301,6 +305,108 @@ mod tests {
         fn consume(&mut self, _: usize) {}
     }
 
+    #[test]
+    fn test_u8_5bit_conversion() {
+        let char_to_5bit = ProteinText::create_char_to_5bit_hashmap();
+        let bit5_to_char = ProteinText::create_bit5_to_char();
+
+        for c in "ACDEFGHIKLMNPQRSTVWY-$".chars() {
+            let char_5bit = char_to_5bit.get(&(c as u8)).unwrap();
+            assert_eq!(c as u8, bit5_to_char[*char_5bit as usize]);
+        }
+    }
+
+    #[test]
+    fn test_build_from_string() {
+        let text = ProteinText::from_string("ACACA-CAC$");
+
+        for (i, c) in "ACACA-CAC$".chars().enumerate() {
+            assert_eq!(c as u8, text.get(i));
+        }
+    }
+
+    #[test]
+    fn test_build_from_vec() {
+        let vec = vec![b'A', b'C', b'A', b'C', b'A', b'-', b'C', b'A', b'C', b'$'];
+        let text = ProteinText::from_vec(&vec);
+
+        for (i, c) in "ACACA-CAC$".chars().enumerate() {
+            assert_eq!(c as u8, text.get(i));
+        }
+    }
+
+    #[test]
+    fn test_build_from_bitarray() {
+        let input_string = "ACACA-CAC$";
+        let char_to_5bit = ProteinText::create_char_to_5bit_hashmap();
+
+        let mut bit_array = BitArray::with_capacity(input_string.len(), 5);
+        for (i, c) in input_string.chars().enumerate() {
+            let char_5bit: u8 = *char_to_5bit.get(&(c as u8)).expect("Input character not in alphabet");
+            bit_array.set(i, char_5bit as u64);
+        }
+
+        let text = ProteinText::new(bit_array);
+
+        for (i, c) in "ACACA-CAC$".chars().enumerate() {
+            assert_eq!(c as u8, text.get(i));
+        }
+    }
+
+    #[test]
+    fn test_build_with_capacity() {
+        let input_string = "ACACA-CAC$";
+
+        let mut text = ProteinText::with_capacity(input_string.len());
+        for (i, c) in "ACACA-CAC$".chars().enumerate() {
+            text.set(i, c as u8);
+        }
+
+        for (i, c) in "ACACA-CAC$".chars().enumerate() {
+            assert_eq!(c as u8, text.get(i));
+        }
+    }
+
+    #[test]
+    fn test_text_slice() {
+        let input_string = "ACACA-CAC$";
+        let start = 1;
+        let end  = 5;
+        let text = ProteinText::from_string(&input_string);
+        let text_slice = text.slice(start, end);
+
+        for (i, c) in input_string[start..end].chars().enumerate() {
+            assert_eq!(c as u8, text_slice.get(i));
+        }
+    }
+
+    #[test]
+    fn test_equals_slice() {
+        let input_string = "ACICA-CAC$";
+        let text = ProteinText::from_string(&input_string);
+        let text_slice = text.slice(1, 5);
+        let eq_slice_true = [b'C', b'I', b'C', b'A'];
+        let eq_slice_false = [b'C', b'C', b'C', b'A'];
+        let eq_slice_il_true = [b'C', b'L', b'C', b'A'];
+
+        assert!(text_slice.equals_slice(&eq_slice_true, false));
+        assert!(! text_slice.equals_slice(&eq_slice_false, false));
+        assert!(text_slice.equals_slice(&eq_slice_il_true, true));
+    }
+
+    #[test]
+    fn test_check_il_locations() {
+        let input_string = "ACILA-CAC$";
+        let text = ProteinText::from_string(&input_string);
+        let text_slice = text.slice(1, 5);
+        let il_locations = [1, 2];
+        let il_true = [b'C', b'I', b'L', b'A'];
+        let il_false = [b'C', b'I', b'C', b'A'];
+
+        assert!(text_slice.check_il_locations(0, &il_locations, &il_true));
+        assert!(! text_slice.check_il_locations(0, &il_locations, &il_false));
+    }
+
     #[test]
     fn test_dump_compressed_text() {
         let text: Vec<u8> = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];

From 224fa5185adc06694d102ebd34592e85d3f9c26a Mon Sep 17 00:00:00 2001
From: SimonVandeVyver <simon.vandevyver@ugent.be>
Date: Wed, 11 Sep 2024 13:59:51 +0200
Subject: [PATCH 05/13] Add documentation to text compression

---
 text-compression/src/lib.rs | 145 +++++++++++++++++++++++++++++++++++-
 1 file changed, 144 insertions(+), 1 deletion(-)

diff --git a/text-compression/src/lib.rs b/text-compression/src/lib.rs
index 2e25f7b..6bfaf1a 100644
--- a/text-compression/src/lib.rs
+++ b/text-compression/src/lib.rs
@@ -6,14 +6,23 @@ use std::collections::HashMap;
 
 use bitarray::{data_to_writer, Binary, BitArray};
 
+/// Structure representing the proteins, stored in a bit array using 5 bits per amino acid.
 pub struct ProteinText {
+    /// Bit array holding the sequence of amino acids
     bit_array: BitArray,
+    /// Hashmap storing the mapping between the character as `u8` and a 5 bit number.
     char_to_5bit: HashMap<u8, u8>,
+    /// Vector storing the mapping between the 5 bit number and the character as `u8`.
     bit5_to_char: Vec<u8>,
 }
 
 impl ProteinText {
 
+    /// Creates the hashmap storing the mappings between the characters as `u8` and 5 bit numbers.
+    ///
+    /// # Returns
+    ///
+    /// Returns the hashmap
     fn create_char_to_5bit_hashmap() -> HashMap<u8, u8> {
         let mut hashmap = HashMap::<u8, u8>::new();
         for (i, c) in "ACDEFGHIKLMNPQRSTVWY-$".chars().enumerate() {
@@ -23,6 +32,11 @@ impl ProteinText {
         hashmap
     }
 
+    /// Creates the vector storing the mappings between the 5 bit numbers and the characters as `u8`.
+    ///
+    /// # Returns
+    ///
+    /// Returns the vector
     fn create_bit5_to_char() -> Vec<u8> {
         let mut vec = Vec::<u8>::new();
         for c in "ACDEFGHIKLMNPQRSTVWY-$".chars() {
@@ -31,6 +45,14 @@ impl ProteinText {
         vec
     }
     
+    /// Creates the compressed text from a string.
+    /// 
+    /// # Arguments
+    /// * `input_string` - The text (proteins) in string format
+    ///
+    /// # Returns
+    ///
+    /// An instance of `ProteinText`
     pub fn from_string(input_string: &str) -> ProteinText {
         let char_to_5bit = ProteinText::create_char_to_5bit_hashmap();
         let bit5_to_char = ProteinText::create_bit5_to_char();
@@ -44,6 +66,14 @@ impl ProteinText {
         Self { bit_array, char_to_5bit, bit5_to_char }
     }
 
+    /// Creates the compressed text from a vector.
+    /// 
+    /// # Arguments
+    /// * `input_vec` - The text (proteins) in a vector with elements of type `u8` representing the amino acids.
+    ///
+    /// # Returns
+    ///
+    /// An instance of `ProteinText`
     pub fn from_vec(input_vec: &Vec<u8>) -> ProteinText {
         let char_to_5bit = ProteinText::create_char_to_5bit_hashmap();
         let bit5_to_char = ProteinText::create_bit5_to_char();
@@ -57,30 +87,69 @@ impl ProteinText {
         Self { bit_array, char_to_5bit, bit5_to_char }
     }
 
+    /// Creates the compressed text from a bit array.
+    /// 
+    /// # Arguments
+    /// * `bit_array` - The text (proteins) in a bit array using 5 bits for each amino acid.
+    ///
+    /// # Returns
+    ///
+    /// An instance of `ProteinText`
     pub fn new(bit_array: BitArray) -> ProteinText {
         let char_to_5bit = ProteinText::create_char_to_5bit_hashmap();
         let bit5_to_char = ProteinText::create_bit5_to_char();
         Self { bit_array, char_to_5bit, bit5_to_char }
     }
 
+    /// Creates an instance of `ProteinText` with a given capacity.
+    /// 
+    /// # Arguments
+    /// * `capacity` - The amount of characters in the text.
+    ///
+    /// # Returns
+    ///
+    /// An instance of `ProteinText`
     pub fn with_capacity(capacity: usize) -> Self {
         Self::new(BitArray::with_capacity(capacity, 5))
     }
 
+    /// Search the character at a given position in the compressed text.
+    /// 
+    /// # Arguments
+    /// * `index` - The index of the character to search.
+    ///
+    /// # Returns
+    ///
+    /// the character at position `index` as `u8`.
     pub fn get(&self, index: usize) -> u8 {
         let char_5bit = self.bit_array.get(index) as usize;
         self.bit5_to_char[char_5bit]
     }
 
+    /// Set the character at a given index.
+    /// 
+    /// # Arguments
+    /// * `index` - The index of the character to change.
+    /// * `value` - The character to fill in as `u8`.
     pub fn set(&mut self, index: usize, value: u8) {
         let char_5bit: u8 = *self.char_to_5bit.get(&value).expect("Input character not in alphabet");
         self.bit_array.set(index, char_5bit as u64);
     }
 
+    /// Queries the length of the text.
+    ///
+    /// # Returns
+    /// 
+    /// the length of the text
     pub fn len(&self) -> usize {
         self.bit_array.len()
     }
 
+    /// Check if the text is empty (length 0).
+    ///
+    /// # Returns
+    /// 
+    /// true if the the text has length 0, false otherwise.
     pub fn is_empty(&self) -> bool {
         self.bit_array.len() == 0
     }
@@ -90,36 +159,83 @@ impl ProteinText {
         self.bit_array.clear()
     }
 
+    /// Get an iterator over the characters of the text.
+    ///
+    /// # Returns
+    /// 
+    /// A `ProteinTextIterator`, which can iterate over the characters of the text.
     pub fn iter(&self) -> ProteinTextIterator {
         ProteinTextIterator {protein_text: self, index: 0, }
     }
 
+    /// Get a slice of the text
+    ///
+    /// # Returns
+    /// 
+    /// An `ProteinTextSlice` representing a slice of the text.
     pub fn slice(&self, start: usize, end:usize) -> ProteinTextSlice {
         ProteinTextSlice::new(self, start, end)
     }
 
 }
 
+/// Structure representing a slice of a `ProteinText`.
 pub struct ProteinTextSlice<'a> {
+    /// The `Proteintext` of whih to take a slice.
     text: &'a ProteinText,
+    /// The start of the slice.
     start: usize, // included
+    /// The end of the slice.
     end: usize,   // excluded
 }
 
 impl<'a> ProteinTextSlice<'a> {
 
+    /// Creates an instance of `ProteintextSlice`, given the text and boundaries.
+    /// 
+    /// # Arguments
+    /// * `text` - The `Proteintext` representing the text of proteins with 5 bits per amino acid.
+    /// * `start` - The start of the slice.
+    /// * `end` - The end of the slice.
+    ///
+    /// # Returns
+    ///
+    /// An instance of `ProteinTextSlice`
     pub fn new(text: &'a ProteinText, start: usize, end: usize) -> ProteinTextSlice {
         Self {text, start, end }
     }
 
+    /// Get a character (amino acid) in the slice.
+    /// 
+    /// # Arguments
+    /// * `index` - The index in the slice of the character to get.
+    ///
+    /// # Returns
+    ///
+    /// The character as `u8`.
     pub fn get(&self, index: usize) -> u8 {
         self.text.get(self.start + index)
     }
 
+    /// Get the length of the slice.
+    ///
+    /// # Returns
+    ///
+    /// The length of the slice.
     pub fn len(&self) -> usize {
         self.end - self.start
     }
 
+    /// Checks if the slice and a given array of `u8` are equal.
+    /// I and L can be equated.
+    /// 
+    /// # Arguments
+    /// * `other` - the array of `u8` to compare the slice with.
+    /// * `equate_il` - true if I and L need to be equated, false otherwise.
+    ///
+    /// # Returns
+    ///
+    /// True if the slice is equal to the given array, false otherwise.
     #[inline]
     pub fn equals_slice(&self, other: &[u8], equate_il: bool) -> bool {
         if equate_il {
@@ -133,6 +249,16 @@ impl<'a> ProteinTextSlice<'a> {
         }
     }
 
+    /// Check if the slice and a given array of `u8` are equal on the I and L positions.
+    /// 
+    /// # Arguments
+    /// * `skip` - The amount of positions this slice skipped, this has an influence on the I and L positions.
+    /// * `il_locations` - The positions where I and L occur.
+    /// * `search_string` -  An array of `u8` to compare the slice with.
+    ///
+    /// # Returns
+    ///
+    /// True if the slice and `search_string` have the same contents on the I and L positions, false otherwise.
     pub fn check_il_locations(
         &self,
         skip: usize,
@@ -148,16 +274,23 @@ impl<'a> ProteinTextSlice<'a> {
         true
     }
 
+    /// Get an iterator over the slice.
+    ///
+    /// # Returns
+    ///
+    /// An iterator over the slice.
     pub fn iter(&self) -> ProteinTextSliceIterator {
         ProteinTextSliceIterator {text_slice: self, index: 0, }
     }
 }
 
+/// Structure representing an iterator over a `ProteinText` instance, iterating the characters of the text.
 pub struct ProteinTextIterator<'a> {
     protein_text: &'a ProteinText,
     index: usize,
 }
 
+/// Structure representing an iterator over a `ProteintextSlice` instance, iterating the characters of the slice.
 pub struct ProteinTextSliceIterator<'a> {
     text_slice: &'a ProteinTextSlice<'a>,
     index: usize,
@@ -167,6 +300,11 @@ impl<'a> Iterator for ProteinTextSliceIterator<'a> {
 
     type Item = u8;
     
+    /// Get the next character in the `ProteinTextSlice`.
+    /// 
+    /// # Returns
+    /// 
+    /// The next character in the slice.
     fn next(&mut self) -> Option<Self::Item> {
         if self.index >= self.text_slice.len() {
             return None;
@@ -181,6 +319,11 @@ impl<'a> Iterator for ProteinTextIterator<'a> {
 
     type Item = u8;
     
+    /// Get the next character in the `ProteinText`.
+    /// 
+    /// # Returns
+    /// 
+    /// The next character in the text.
     fn next(&mut self) -> Option<Self::Item> {
         if self.index >= self.protein_text.len() {
             return None;
@@ -257,7 +400,7 @@ pub fn load_compressed_text(
 
 #[cfg(test)]
 mod tests {
-    use std::{char, io::Read};
+    use std::io::Read;
 
     use super::*;
 

From 7e17bd66c39005075f1e8bf1ef2fc48afeb26256 Mon Sep 17 00:00:00 2001
From: SimonVandeVyver <simon.vandevyver@ugent.be>
Date: Thu, 12 Sep 2024 15:54:47 +0200
Subject: [PATCH 06/13] remove trailing space

---
 sa-index/src/sa_searcher.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sa-index/src/sa_searcher.rs b/sa-index/src/sa_searcher.rs
index 4f4522e..d2250ed 100644
--- a/sa-index/src/sa_searcher.rs
+++ b/sa-index/src/sa_searcher.rs
@@ -343,7 +343,7 @@ impl Searcher {
                         && ((skip == 0
                             || ProteinTextSlice::new(&self.proteins.text, suffix - skip, suffix)
                                 .equals_slice(current_search_string_prefix, equate_il)) // Check the prefix
-                            && 
+                            &&
                             Self::check_suffix(
                                 skip,
                                 il_locations_current_suffix,

From 4be9148a5e60d1bf44e93d8c864caaa20873ade3 Mon Sep 17 00:00:00 2001
From: SimonVandeVyver <simon.vandevyver@ugent.be>
Date: Thu, 12 Sep 2024 15:56:55 +0200
Subject: [PATCH 07/13] cargo fmt to format code

---
 bitarray/src/binary.rs                      |  11 +-
 bitarray/src/lib.rs                         |  78 ++++++++------
 fa-compression/benches/algorithm1/decode.rs |   2 +-
 fa-compression/benches/algorithm1/encode.rs |   2 +-
 fa-compression/benches/algorithm2/decode.rs |   2 +-
 fa-compression/benches/algorithm2/encode.rs |   2 +-
 fa-compression/benches/util.rs              |   2 +-
 fa-compression/src/algorithm1/encode.rs     |  21 ++--
 fa-compression/src/algorithm1/mod.rs        |   8 +-
 fa-compression/src/algorithm2/encode.rs     |   7 +-
 fa-compression/src/algorithm2/mod.rs        |   4 +-
 libsais64-rs/builder.rs                     |   8 +-
 libsais64-rs/src/lib.rs                     |   6 +-
 sa-builder/src/lib.rs                       |  10 +-
 sa-builder/src/main.rs                      |   4 +-
 sa-compression/src/lib.rs                   |  27 ++---
 sa-index/src/binary.rs                      |  36 ++++---
 sa-index/src/lib.rs                         |  10 +-
 sa-index/src/peptide_search.rs              |  18 ++--
 sa-index/src/sa_searcher.rs                 |  56 +++++-----
 sa-index/src/suffix_to_protein_index.rs     |  12 +--
 sa-mappings/src/proteins.rs                 |  18 ++--
 sa-server/src/main.rs                       |  12 +--
 text-compression/src/lib.rs                 | 111 +++++++++-----------
 24 files changed, 243 insertions(+), 224 deletions(-)

diff --git a/bitarray/src/binary.rs b/bitarray/src/binary.rs
index a8084d1..4ab535f 100644
--- a/bitarray/src/binary.rs
+++ b/bitarray/src/binary.rs
@@ -167,10 +167,13 @@ mod tests {
         let mut buffer = Vec::new();
         bitarray.write_binary(&mut buffer).unwrap();
 
-        assert_eq!(buffer, vec![
-            0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12, 0xde, 0xbc, 0x0a, 0x89, 0x67, 0x45, 0x23, 0x01, 0x00, 0x00,
-            0x00, 0x00, 0x56, 0x34, 0x12, 0xf0
-        ]);
+        assert_eq!(
+            buffer,
+            vec![
+                0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12, 0xde, 0xbc, 0x0a, 0x89, 0x67, 0x45, 0x23, 0x01, 0x00,
+                0x00, 0x00, 0x00, 0x56, 0x34, 0x12, 0xf0
+            ]
+        );
     }
 
     #[test]
diff --git a/bitarray/src/lib.rs b/bitarray/src/lib.rs
index fe7b532..d58a60c 100644
--- a/bitarray/src/lib.rs
+++ b/bitarray/src/lib.rs
@@ -4,7 +4,7 @@ mod binary;
 
 use std::{
     cmp::max,
-    io::{Result, Write}
+    io::{Result, Write},
 };
 
 /// Re-export the `Binary` trait.
@@ -147,7 +147,6 @@ impl BitArray {
     pub fn get_data_slice(&self, start_slice: usize, end_slice: usize) -> &[u64] {
         &self.data[start_slice..end_slice]
     }
-
 }
 
 /// Writes the data to a writer in a binary format using a bit array. This function is helpfull
@@ -168,7 +167,7 @@ pub fn data_to_writer(
     data: Vec<i64>,
     bits_per_value: usize,
     max_capacity: usize,
-    writer: &mut impl Write
+    writer: &mut impl Write,
 ) -> Result<()> {
     // Update the max capacity to be a multiple of the greatest common divisor of the bits per value
     // and 64. This is done to ensure that the bit array can store the data entirely
@@ -312,10 +311,13 @@ mod tests {
 
         data_to_writer(data, 40, 2, &mut writer).unwrap();
 
-        assert_eq!(writer, vec![
-            0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12, 0xde, 0xbc, 0x0a, 0x89, 0x67, 0x45, 0x23, 0x01, 0x00, 0x00,
-            0x00, 0x00, 0x56, 0x34, 0x12, 0xf0
-        ]);
+        assert_eq!(
+            writer,
+            vec![
+                0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12, 0xde, 0xbc, 0x0a, 0x89, 0x67, 0x45, 0x23, 0x01, 0x00,
+                0x00, 0x00, 0x00, 0x56, 0x34, 0x12, 0xf0
+            ]
+        );
     }
 
     #[test]
@@ -334,23 +336,27 @@ mod tests {
 
         data_to_writer(data, 32, 8, &mut writer).unwrap();
 
-        assert_eq!(writer, vec![
-            0x22, 0x22, 0x22, 0x22, 0x11, 0x11, 0x11, 0x11, 0x44, 0x44, 0x44, 0x44, 0x33, 0x33, 0x33, 0x33, 0x66, 0x66,
-            0x66, 0x66, 0x55, 0x55, 0x55, 0x55, 0x88, 0x88, 0x88, 0x88, 0x77, 0x77, 0x77, 0x77, 0xaa, 0xaa, 0xaa, 0xaa,
-            0x99, 0x99, 0x99, 0x99, 0xcc, 0xcc, 0xcc, 0xcc, 0xbb, 0xbb, 0xbb, 0xbb, 0xee, 0xee, 0xee, 0xee, 0xdd, 0xdd,
-            0xdd, 0xdd, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x22, 0x22, 0x22, 0x22, 0x11, 0x11, 0x11, 0x11,
-            0x44, 0x44, 0x44, 0x44, 0x33, 0x33, 0x33, 0x33, 0x66, 0x66, 0x66, 0x66, 0x55, 0x55, 0x55, 0x55, 0x88, 0x88,
-            0x88, 0x88, 0x77, 0x77, 0x77, 0x77, 0xaa, 0xaa, 0xaa, 0xaa, 0x99, 0x99, 0x99, 0x99, 0xcc, 0xcc, 0xcc, 0xcc,
-            0xbb, 0xbb, 0xbb, 0xbb, 0xee, 0xee, 0xee, 0xee, 0xdd, 0xdd, 0xdd, 0xdd, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff,
-            0xff, 0xff, 0x22, 0x22, 0x22, 0x22, 0x11, 0x11, 0x11, 0x11, 0x44, 0x44, 0x44, 0x44, 0x33, 0x33, 0x33, 0x33,
-            0x66, 0x66, 0x66, 0x66, 0x55, 0x55, 0x55, 0x55, 0x88, 0x88, 0x88, 0x88, 0x77, 0x77, 0x77, 0x77, 0xaa, 0xaa,
-            0xaa, 0xaa, 0x99, 0x99, 0x99, 0x99, 0xcc, 0xcc, 0xcc, 0xcc, 0xbb, 0xbb, 0xbb, 0xbb, 0xee, 0xee, 0xee, 0xee,
-            0xdd, 0xdd, 0xdd, 0xdd, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x22, 0x22, 0x22, 0x22, 0x11, 0x11,
-            0x11, 0x11, 0x44, 0x44, 0x44, 0x44, 0x33, 0x33, 0x33, 0x33, 0x66, 0x66, 0x66, 0x66, 0x55, 0x55, 0x55, 0x55,
-            0x88, 0x88, 0x88, 0x88, 0x77, 0x77, 0x77, 0x77, 0xaa, 0xaa, 0xaa, 0xaa, 0x99, 0x99, 0x99, 0x99, 0xcc, 0xcc,
-            0xcc, 0xcc, 0xbb, 0xbb, 0xbb, 0xbb, 0xee, 0xee, 0xee, 0xee, 0xdd, 0xdd, 0xdd, 0xdd, 0x00, 0x00, 0x00, 0x00,
-            0xff, 0xff, 0xff, 0xff
-        ]);
+        assert_eq!(
+            writer,
+            vec![
+                0x22, 0x22, 0x22, 0x22, 0x11, 0x11, 0x11, 0x11, 0x44, 0x44, 0x44, 0x44, 0x33, 0x33, 0x33, 0x33, 0x66,
+                0x66, 0x66, 0x66, 0x55, 0x55, 0x55, 0x55, 0x88, 0x88, 0x88, 0x88, 0x77, 0x77, 0x77, 0x77, 0xaa, 0xaa,
+                0xaa, 0xaa, 0x99, 0x99, 0x99, 0x99, 0xcc, 0xcc, 0xcc, 0xcc, 0xbb, 0xbb, 0xbb, 0xbb, 0xee, 0xee, 0xee,
+                0xee, 0xdd, 0xdd, 0xdd, 0xdd, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x22, 0x22, 0x22, 0x22,
+                0x11, 0x11, 0x11, 0x11, 0x44, 0x44, 0x44, 0x44, 0x33, 0x33, 0x33, 0x33, 0x66, 0x66, 0x66, 0x66, 0x55,
+                0x55, 0x55, 0x55, 0x88, 0x88, 0x88, 0x88, 0x77, 0x77, 0x77, 0x77, 0xaa, 0xaa, 0xaa, 0xaa, 0x99, 0x99,
+                0x99, 0x99, 0xcc, 0xcc, 0xcc, 0xcc, 0xbb, 0xbb, 0xbb, 0xbb, 0xee, 0xee, 0xee, 0xee, 0xdd, 0xdd, 0xdd,
+                0xdd, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x22, 0x22, 0x22, 0x22, 0x11, 0x11, 0x11, 0x11,
+                0x44, 0x44, 0x44, 0x44, 0x33, 0x33, 0x33, 0x33, 0x66, 0x66, 0x66, 0x66, 0x55, 0x55, 0x55, 0x55, 0x88,
+                0x88, 0x88, 0x88, 0x77, 0x77, 0x77, 0x77, 0xaa, 0xaa, 0xaa, 0xaa, 0x99, 0x99, 0x99, 0x99, 0xcc, 0xcc,
+                0xcc, 0xcc, 0xbb, 0xbb, 0xbb, 0xbb, 0xee, 0xee, 0xee, 0xee, 0xdd, 0xdd, 0xdd, 0xdd, 0x00, 0x00, 0x00,
+                0x00, 0xff, 0xff, 0xff, 0xff, 0x22, 0x22, 0x22, 0x22, 0x11, 0x11, 0x11, 0x11, 0x44, 0x44, 0x44, 0x44,
+                0x33, 0x33, 0x33, 0x33, 0x66, 0x66, 0x66, 0x66, 0x55, 0x55, 0x55, 0x55, 0x88, 0x88, 0x88, 0x88, 0x77,
+                0x77, 0x77, 0x77, 0xaa, 0xaa, 0xaa, 0xaa, 0x99, 0x99, 0x99, 0x99, 0xcc, 0xcc, 0xcc, 0xcc, 0xbb, 0xbb,
+                0xbb, 0xbb, 0xee, 0xee, 0xee, 0xee, 0xdd, 0xdd, 0xdd, 0xdd, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff,
+                0xff
+            ]
+        );
     }
 
     #[test]
@@ -365,16 +371,20 @@ mod tests {
 
         data_to_writer(data, 32, 8, &mut writer).unwrap();
 
-        assert_eq!(writer, vec![
-            0x22, 0x22, 0x22, 0x22, 0x11, 0x11, 0x11, 0x11, 0x44, 0x44, 0x44, 0x44, 0x33, 0x33, 0x33, 0x33, 0x66, 0x66,
-            0x66, 0x66, 0x55, 0x55, 0x55, 0x55, 0x88, 0x88, 0x88, 0x88, 0x77, 0x77, 0x77, 0x77, 0xaa, 0xaa, 0xaa, 0xaa,
-            0x99, 0x99, 0x99, 0x99, 0xcc, 0xcc, 0xcc, 0xcc, 0xbb, 0xbb, 0xbb, 0xbb, 0xee, 0xee, 0xee, 0xee, 0xdd, 0xdd,
-            0xdd, 0xdd, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x22, 0x22, 0x22, 0x22, 0x11, 0x11, 0x11, 0x11,
-            0x44, 0x44, 0x44, 0x44, 0x33, 0x33, 0x33, 0x33, 0x66, 0x66, 0x66, 0x66, 0x55, 0x55, 0x55, 0x55, 0x88, 0x88,
-            0x88, 0x88, 0x77, 0x77, 0x77, 0x77, 0xaa, 0xaa, 0xaa, 0xaa, 0x99, 0x99, 0x99, 0x99, 0xcc, 0xcc, 0xcc, 0xcc,
-            0xbb, 0xbb, 0xbb, 0xbb, 0xee, 0xee, 0xee, 0xee, 0xdd, 0xdd, 0xdd, 0xdd, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff,
-            0xff, 0xff, 0x22, 0x22, 0x22, 0x22, 0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x00, 0x33, 0x33, 0x33, 0x33
-        ]);
+        assert_eq!(
+            writer,
+            vec![
+                0x22, 0x22, 0x22, 0x22, 0x11, 0x11, 0x11, 0x11, 0x44, 0x44, 0x44, 0x44, 0x33, 0x33, 0x33, 0x33, 0x66,
+                0x66, 0x66, 0x66, 0x55, 0x55, 0x55, 0x55, 0x88, 0x88, 0x88, 0x88, 0x77, 0x77, 0x77, 0x77, 0xaa, 0xaa,
+                0xaa, 0xaa, 0x99, 0x99, 0x99, 0x99, 0xcc, 0xcc, 0xcc, 0xcc, 0xbb, 0xbb, 0xbb, 0xbb, 0xee, 0xee, 0xee,
+                0xee, 0xdd, 0xdd, 0xdd, 0xdd, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x22, 0x22, 0x22, 0x22,
+                0x11, 0x11, 0x11, 0x11, 0x44, 0x44, 0x44, 0x44, 0x33, 0x33, 0x33, 0x33, 0x66, 0x66, 0x66, 0x66, 0x55,
+                0x55, 0x55, 0x55, 0x88, 0x88, 0x88, 0x88, 0x77, 0x77, 0x77, 0x77, 0xaa, 0xaa, 0xaa, 0xaa, 0x99, 0x99,
+                0x99, 0x99, 0xcc, 0xcc, 0xcc, 0xcc, 0xbb, 0xbb, 0xbb, 0xbb, 0xee, 0xee, 0xee, 0xee, 0xdd, 0xdd, 0xdd,
+                0xdd, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x22, 0x22, 0x22, 0x22, 0x11, 0x11, 0x11, 0x11,
+                0x00, 0x00, 0x00, 0x00, 0x33, 0x33, 0x33, 0x33
+            ]
+        );
     }
 
     #[test]
diff --git a/fa-compression/benches/algorithm1/decode.rs b/fa-compression/benches/algorithm1/decode.rs
index 24991dc..bd1c94d 100644
--- a/fa-compression/benches/algorithm1/decode.rs
+++ b/fa-compression/benches/algorithm1/decode.rs
@@ -22,7 +22,7 @@ pub fn decode_benchmark(c: &mut criterion::Criterion) {
         b.iter_batched(
             || generate_encoded_annotations(100),
             |annotations| black_box(decode(annotations.as_slice())),
-            criterion::BatchSize::SmallInput
+            criterion::BatchSize::SmallInput,
         )
     });
 }
diff --git a/fa-compression/benches/algorithm1/encode.rs b/fa-compression/benches/algorithm1/encode.rs
index d3a9c86..1d23a6e 100644
--- a/fa-compression/benches/algorithm1/encode.rs
+++ b/fa-compression/benches/algorithm1/encode.rs
@@ -22,7 +22,7 @@ pub fn encode_benchmark(c: &mut criterion::Criterion) {
         b.iter_batched(
             || generate_decoded_annotations(100),
             |annotations| black_box(encode(annotations.as_str())),
-            criterion::BatchSize::SmallInput
+            criterion::BatchSize::SmallInput,
         )
     });
 }
diff --git a/fa-compression/benches/algorithm2/decode.rs b/fa-compression/benches/algorithm2/decode.rs
index 4d562fc..62f8b9e 100644
--- a/fa-compression/benches/algorithm2/decode.rs
+++ b/fa-compression/benches/algorithm2/decode.rs
@@ -28,7 +28,7 @@ pub fn decode_benchmark(c: &mut criterion::Criterion) {
         b.iter_batched(
             || generate_encoded_annotations_and_table(100),
             |(annotations, ct)| black_box(decode(annotations.as_slice(), ct)),
-            criterion::BatchSize::SmallInput
+            criterion::BatchSize::SmallInput,
         )
     });
 }
diff --git a/fa-compression/benches/algorithm2/encode.rs b/fa-compression/benches/algorithm2/encode.rs
index 827dd50..a69ed0e 100644
--- a/fa-compression/benches/algorithm2/encode.rs
+++ b/fa-compression/benches/algorithm2/encode.rs
@@ -26,7 +26,7 @@ pub fn encode_benchmark(c: &mut criterion::Criterion) {
         b.iter_batched(
             || generate_decoded_annotations_and_table(100),
             |(annotations, ct)| black_box(encode(annotations.as_str(), ct)),
-            criterion::BatchSize::SmallInput
+            criterion::BatchSize::SmallInput,
         )
     });
 }
diff --git a/fa-compression/benches/util.rs b/fa-compression/benches/util.rs
index b6ddd9a..0e80765 100644
--- a/fa-compression/benches/util.rs
+++ b/fa-compression/benches/util.rs
@@ -27,6 +27,6 @@ pub fn generate_annotation(random: &mut ThreadRng) -> String {
         0 => generate_ipr(random),
         1 => generate_go(random),
         2 => generate_ec(random),
-        _ => unreachable!()
+        _ => unreachable!(),
     }
 }
diff --git a/fa-compression/src/algorithm1/encode.rs b/fa-compression/src/algorithm1/encode.rs
index ef79372..9138be1 100644
--- a/fa-compression/src/algorithm1/encode.rs
+++ b/fa-compression/src/algorithm1/encode.rs
@@ -106,16 +106,18 @@ mod tests {
 
     #[test]
     fn test_encode_no_ec() {
-        assert_eq!(encode("IPR:IPR016364;GO:0009279;IPR:IPR008816"), vec![
-            225, 17, 163, 138, 225, 39, 71, 95, 17, 153, 39
-        ])
+        assert_eq!(
+            encode("IPR:IPR016364;GO:0009279;IPR:IPR008816"),
+            vec![225, 17, 163, 138, 225, 39, 71, 95, 17, 153, 39]
+        )
     }
 
     #[test]
     fn test_encode_no_go() {
-        assert_eq!(encode("IPR:IPR016364;EC:1.1.1.-;EC:1.2.1.7"), vec![
-            44, 44, 44, 191, 44, 60, 44, 142, 225, 39, 71, 80
-        ])
+        assert_eq!(
+            encode("IPR:IPR016364;EC:1.1.1.-;EC:1.2.1.7"),
+            vec![44, 44, 44, 191, 44, 60, 44, 142, 225, 39, 71, 80]
+        )
     }
 
     #[test]
@@ -125,8 +127,9 @@ mod tests {
 
     #[test]
     fn test_encode_all() {
-        assert_eq!(encode("IPR:IPR016364;EC:1.1.1.-;IPR:IPR032635;GO:0009279;IPR:IPR008816"), vec![
-            44, 44, 44, 190, 17, 26, 56, 174, 18, 116, 117, 241, 67, 116, 111, 17, 153, 39
-        ])
+        assert_eq!(
+            encode("IPR:IPR016364;EC:1.1.1.-;IPR:IPR032635;GO:0009279;IPR:IPR008816"),
+            vec![44, 44, 44, 190, 17, 26, 56, 174, 18, 116, 117, 241, 67, 116, 111, 17, 153, 39]
+        )
     }
 }
diff --git a/fa-compression/src/algorithm1/mod.rs b/fa-compression/src/algorithm1/mod.rs
index cdf7283..8ea45c5 100644
--- a/fa-compression/src/algorithm1/mod.rs
+++ b/fa-compression/src/algorithm1/mod.rs
@@ -79,7 +79,7 @@ enum CharacterSet {
     Comma,
 
     /// Annotation separator
-    Semicolon
+    Semicolon,
 }
 
 impl Encode for CharacterSet {
@@ -110,7 +110,7 @@ impl Encode for CharacterSet {
             b'n' => CharacterSet::Preliminary,
             b',' => CharacterSet::Comma,
             b';' => CharacterSet::Semicolon,
-            _ => panic!("Invalid character")
+            _ => panic!("Invalid character"),
         }
     }
 }
@@ -143,7 +143,7 @@ impl Decode for CharacterSet {
             13 => 'n',
             14 => ',',
             15 => ';',
-            _ => panic!("Invalid character")
+            _ => panic!("Invalid character"),
         }
     }
 }
@@ -189,7 +189,7 @@ mod tests {
         CharacterSet::Point,
         CharacterSet::Preliminary,
         CharacterSet::Comma,
-        CharacterSet::Semicolon
+        CharacterSet::Semicolon,
     ];
 
     #[test]
diff --git a/fa-compression/src/algorithm2/encode.rs b/fa-compression/src/algorithm2/encode.rs
index f55eb11..d60fe61 100644
--- a/fa-compression/src/algorithm2/encode.rs
+++ b/fa-compression/src/algorithm2/encode.rs
@@ -89,8 +89,9 @@ mod tests {
     #[test]
     fn test_encode_all() {
         let table = create_compresion_table();
-        assert_eq!(encode("IPR:IPR000001;EC:1.1.1.-;IPR:IPR000003;GO:0000002", table), vec![
-            0, 0, 0, 7, 0, 0, 2, 0, 0, 5, 0, 0
-        ])
+        assert_eq!(
+            encode("IPR:IPR000001;EC:1.1.1.-;IPR:IPR000003;GO:0000002", table),
+            vec![0, 0, 0, 7, 0, 0, 2, 0, 0, 5, 0, 0]
+        )
     }
 }
diff --git a/fa-compression/src/algorithm2/mod.rs b/fa-compression/src/algorithm2/mod.rs
index 8fc505a..117b87c 100644
--- a/fa-compression/src/algorithm2/mod.rs
+++ b/fa-compression/src/algorithm2/mod.rs
@@ -12,13 +12,13 @@ pub use encode::encode;
 /// Represents an entry in the compression table.
 #[doc(hidden)]
 pub struct CompressionTableEntry {
-    annotation: String
+    annotation: String,
 }
 
 /// Represents a compression table.
 pub struct CompressionTable {
     /// List of annotations in the compression table.
-    entries: Vec<CompressionTableEntry>
+    entries: Vec<CompressionTableEntry>,
 }
 
 impl CompressionTable {
diff --git a/libsais64-rs/builder.rs b/libsais64-rs/builder.rs
index 5b3feb2..c6fc2d6 100644
--- a/libsais64-rs/builder.rs
+++ b/libsais64-rs/builder.rs
@@ -3,14 +3,14 @@ use std::{
     error::Error,
     fmt::{Display, Formatter},
     path::{Path, PathBuf},
-    process::{Command, ExitStatus}
+    process::{Command, ExitStatus},
 };
 
 /// Custom error for compilation of the C library
 #[derive(Debug)]
 struct CompileError<'a> {
     command: &'a str,
-    exit_code: Option<i32>
+    exit_code: Option<i32>,
 }
 
 impl<'a> Display for CompileError<'a> {
@@ -43,7 +43,7 @@ impl<'a> Error for CompileError<'a> {}
 fn exit_status_to_result(name: &str, exit_status: ExitStatus) -> Result<(), CompileError> {
     match exit_status.success() {
         true => Ok(()),
-        false => Err(CompileError { command: name, exit_code: exit_status.code() })
+        false => Err(CompileError { command: name, exit_code: exit_status.code() }),
     }
 }
 
@@ -61,7 +61,7 @@ fn main() -> Result<(), Box<dyn Error>> {
     Command::new("rm").args(["libsais/CMakeCache.txt"]).status().unwrap_or_default(); // if removing fails, it is since the cmake cache did not exist, we just can ignore it
     exit_status_to_result(
         "cmake",
-        Command::new("cmake").args(["-DCMAKE_BUILD_TYPE=\"Release\"", "libsais", "-Blibsais"]).status()?
+        Command::new("cmake").args(["-DCMAKE_BUILD_TYPE=\"Release\"", "libsais", "-Blibsais"]).status()?,
     )?;
     exit_status_to_result("make", Command::new("make").args(["-C", "libsais"]).status()?)?;
 
diff --git a/libsais64-rs/src/lib.rs b/libsais64-rs/src/lib.rs
index e2a87f6..b2a1d3a 100644
--- a/libsais64-rs/src/lib.rs
+++ b/libsais64-rs/src/lib.rs
@@ -16,7 +16,11 @@ include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
 pub fn sais64(text: &[u8]) -> Option<Vec<i64>> {
     let mut sa = vec![0; text.len()];
     let exit_code = unsafe { libsais64(text.as_ptr(), sa.as_mut_ptr(), text.len() as i64, 0, std::ptr::null_mut()) };
-    if exit_code == 0 { Some(sa) } else { None }
+    if exit_code == 0 {
+        Some(sa)
+    } else {
+        None
+    }
 }
 
 #[cfg(test)]
diff --git a/sa-builder/src/lib.rs b/sa-builder/src/lib.rs
index c0e13cd..f20ec27 100644
--- a/sa-builder/src/lib.rs
+++ b/sa-builder/src/lib.rs
@@ -21,14 +21,14 @@ pub struct Arguments {
     pub construction_algorithm: SAConstructionAlgorithm,
     /// If the suffix array should be compressed (default value true)
     #[arg(short, long, default_value_t = false)]
-    pub compress_sa: bool
+    pub compress_sa: bool,
 }
 
 /// Enum representing the two possible algorithms to construct the suffix array
 #[derive(ValueEnum, Clone, Debug, PartialEq)]
 pub enum SAConstructionAlgorithm {
     LibDivSufSort,
-    LibSais
+    LibSais,
 }
 
 /// Build a sparse suffix array from the given text
@@ -48,7 +48,7 @@ pub enum SAConstructionAlgorithm {
 pub fn build_ssa(
     text: &mut Vec<u8>,
     construction_algorithm: &SAConstructionAlgorithm,
-    sparseness_factor: u8
+    sparseness_factor: u8,
 ) -> Result<Vec<i64>, Box<dyn Error>> {
     // translate all L's to a I
     translate_l_to_i(text);
@@ -56,7 +56,7 @@ pub fn build_ssa(
     // Build the suffix array using the selected algorithm
     let mut sa = match construction_algorithm {
         SAConstructionAlgorithm::LibSais => libsais64_rs::sais64(text),
-        SAConstructionAlgorithm::LibDivSufSort => libdivsufsort_rs::divsufsort64(text)
+        SAConstructionAlgorithm::LibDivSufSort => libdivsufsort_rs::divsufsort64(text),
     }
     .ok_or("Building suffix array failed")?;
 
@@ -125,7 +125,7 @@ mod tests {
             "2",
             "--construction-algorithm",
             "lib-div-suf-sort",
-            "--compress-sa"
+            "--compress-sa",
         ]);
 
         assert_eq!(args.database_file, "database.fa");
diff --git a/sa-builder/src/main.rs b/sa-builder/src/main.rs
index 01cc3c4..20f2e8a 100644
--- a/sa-builder/src/main.rs
+++ b/sa-builder/src/main.rs
@@ -1,7 +1,7 @@
 use std::{
     fs::{File, OpenOptions},
     io::BufWriter,
-    time::{SystemTime, SystemTimeError, UNIX_EPOCH}
+    time::{SystemTime, SystemTimeError, UNIX_EPOCH},
 };
 
 use clap::Parser;
@@ -16,7 +16,7 @@ fn main() {
         output,
         sparseness_factor,
         construction_algorithm,
-        compress_sa
+        compress_sa,
     } = Arguments::parse();
     eprintln!();
     eprintln!("📋 Started loading the proteins...");
diff --git a/sa-compression/src/lib.rs b/sa-compression/src/lib.rs
index e9952a2..9814e20 100644
--- a/sa-compression/src/lib.rs
+++ b/sa-compression/src/lib.rs
@@ -1,6 +1,6 @@
 use std::{
     error::Error,
-    io::{BufRead, Write}
+    io::{BufRead, Write},
 };
 
 use bitarray::{data_to_writer, Binary, BitArray};
@@ -22,7 +22,7 @@ pub fn dump_compressed_suffix_array(
     sa: Vec<i64>,
     sparseness_factor: u8,
     bits_per_value: usize,
-    writer: &mut impl Write
+    writer: &mut impl Write,
 ) -> Result<(), Box<dyn Error>> {
     // Write the flags to the writer
     // 00000001 indicates that the suffix array is compressed
@@ -59,7 +59,7 @@ pub fn dump_compressed_suffix_array(
 /// Returns an error if reading from the reader fails.
 pub fn load_compressed_suffix_array(
     reader: &mut impl BufRead,
-    bits_per_value: usize
+    bits_per_value: usize,
 ) -> Result<SuffixArray, Box<dyn Error>> {
     // Read the sample rate from the binary file (1 byte)
     let mut sample_rate_buffer = [0_u8; 1];
@@ -92,7 +92,7 @@ mod tests {
 
     pub struct FailingWriter {
         /// The number of times the write function can be called before it fails.
-        pub valid_write_count: usize
+        pub valid_write_count: usize,
     }
 
     impl Write for FailingWriter {
@@ -112,7 +112,7 @@ mod tests {
 
     pub struct FailingReader {
         /// The number of times the read function can be called before it fails.
-        pub valid_read_count: usize
+        pub valid_read_count: usize,
     }
 
     impl Read for FailingReader {
@@ -141,13 +141,16 @@ mod tests {
         let mut writer = vec![];
         dump_compressed_suffix_array(sa, 1, 8, &mut writer).unwrap();
 
-        assert_eq!(writer, vec![
-            // bits per value
-            8, // sparseness factor
-            1, // size of the suffix array
-            10, 0, 0, 0, 0, 0, 0, 0, // compressed suffix array
-            8, 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 10, 9
-        ]);
+        assert_eq!(
+            writer,
+            vec![
+                // bits per value
+                8, // sparseness factor
+                1, // size of the suffix array
+                10, 0, 0, 0, 0, 0, 0, 0, // compressed suffix array
+                8, 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 10, 9
+            ]
+        );
     }
 
     #[test]
diff --git a/sa-index/src/binary.rs b/sa-index/src/binary.rs
index 55c082a..fc41f24 100644
--- a/sa-index/src/binary.rs
+++ b/sa-index/src/binary.rs
@@ -1,6 +1,6 @@
 use std::{
     error::Error,
-    io::{BufRead, Read, Write}
+    io::{BufRead, Read, Write},
 };
 
 use crate::SuffixArray;
@@ -190,7 +190,7 @@ mod tests {
 
     pub struct FailingWriter {
         /// The number of times the write function can be called before it fails.
-        pub valid_write_count: usize
+        pub valid_write_count: usize,
     }
 
     impl Write for FailingWriter {
@@ -210,7 +210,7 @@ mod tests {
 
     pub struct FailingReader {
         /// The number of times the read function can be called before it fails.
-        pub valid_read_count: usize
+        pub valid_read_count: usize,
     }
 
     impl Read for FailingReader {
@@ -266,10 +266,13 @@ mod tests {
 
         values.write_binary(&mut buffer).unwrap();
 
-        assert_eq!(buffer, vec![
-            1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0,
-            0, 0, 0, 0
-        ]);
+        assert_eq!(
+            buffer,
+            vec![
+                1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 5, 0,
+                0, 0, 0, 0, 0, 0
+            ]
+        );
     }
 
     #[test]
@@ -292,14 +295,17 @@ mod tests {
 
         dump_suffix_array(&sa, 1, &mut buffer).unwrap();
 
-        assert_eq!(buffer, vec![
-            // required bits
-            64, // Sparseness factor
-            1,  // Size of the suffix array
-            5, 0, 0, 0, 0, 0, 0, 0, // Suffix array
-            1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0,
-            0, 0, 0, 0
-        ]);
+        assert_eq!(
+            buffer,
+            vec![
+                // required bits
+                64, // Sparseness factor
+                1,  // Size of the suffix array
+                5, 0, 0, 0, 0, 0, 0, 0, // Suffix array
+                1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 5, 0,
+                0, 0, 0, 0, 0, 0
+            ]
+        );
     }
 
     #[test]
diff --git a/sa-index/src/lib.rs b/sa-index/src/lib.rs
index 53f5348..a43168d 100644
--- a/sa-index/src/lib.rs
+++ b/sa-index/src/lib.rs
@@ -10,7 +10,7 @@ pub enum SuffixArray {
     /// The original suffix array.
     Original(Vec<i64>, u8),
     /// The compressed suffix array.
-    Compressed(BitArray, u8)
+    Compressed(BitArray, u8),
 }
 
 impl SuffixArray {
@@ -22,7 +22,7 @@ impl SuffixArray {
     pub fn len(&self) -> usize {
         match self {
             SuffixArray::Original(sa, _) => sa.len(),
-            SuffixArray::Compressed(sa, _) => sa.len()
+            SuffixArray::Compressed(sa, _) => sa.len(),
         }
     }
 
@@ -34,7 +34,7 @@ impl SuffixArray {
     pub fn bits_per_value(&self) -> usize {
         match self {
             SuffixArray::Original(_, _) => 64,
-            SuffixArray::Compressed(sa, _) => sa.bits_per_value()
+            SuffixArray::Compressed(sa, _) => sa.bits_per_value(),
         }
     }
 
@@ -46,7 +46,7 @@ impl SuffixArray {
     pub fn sample_rate(&self) -> u8 {
         match self {
             SuffixArray::Original(_, sample_rate) => *sample_rate,
-            SuffixArray::Compressed(_, sample_rate) => *sample_rate
+            SuffixArray::Compressed(_, sample_rate) => *sample_rate,
         }
     }
 
@@ -62,7 +62,7 @@ impl SuffixArray {
     pub fn get(&self, index: usize) -> i64 {
         match self {
             SuffixArray::Original(sa, _) => sa[index],
-            SuffixArray::Compressed(sa, _) => sa.get(index) as i64
+            SuffixArray::Compressed(sa, _) => sa.get(index) as i64,
         }
     }
 
diff --git a/sa-index/src/peptide_search.rs b/sa-index/src/peptide_search.rs
index 55d629f..02e4975 100644
--- a/sa-index/src/peptide_search.rs
+++ b/sa-index/src/peptide_search.rs
@@ -8,7 +8,7 @@ use crate::sa_searcher::{SearchAllSuffixesResult, Searcher};
 pub struct SearchResult {
     pub sequence: String,
     pub proteins: Vec<ProteinInfo>,
-    pub cutoff_used: bool
+    pub cutoff_used: bool,
 }
 
 /// Struct that represents all information known about a certain protein in our database
@@ -16,7 +16,7 @@ pub struct SearchResult {
 pub struct ProteinInfo {
     pub taxon: u32,
     pub uniprot_accession: String,
-    pub functional_annotations: String
+    pub functional_annotations: String,
 }
 
 impl From<&Protein> for ProteinInfo {
@@ -24,7 +24,7 @@ impl From<&Protein> for ProteinInfo {
         ProteinInfo {
             taxon: protein.taxon_id,
             uniprot_accession: protein.uniprot_id.clone(),
-            functional_annotations: protein.get_functional_annotations()
+            functional_annotations: protein.get_functional_annotations(),
         }
     }
 }
@@ -50,7 +50,7 @@ pub fn search_proteins_for_peptide<'a>(
     searcher: &'a Searcher,
     peptide: &str,
     cutoff: usize,
-    equate_il: bool
+    equate_il: bool,
 ) -> Option<(bool, Vec<&'a Protein>)> {
     let peptide = peptide.trim_end().to_uppercase();
 
@@ -63,7 +63,7 @@ pub fn search_proteins_for_peptide<'a>(
     let (suffixes, cutoff_used) = match suffix_search {
         SearchAllSuffixesResult::MaxMatches(matched_suffixes) => Some((matched_suffixes, true)),
         SearchAllSuffixesResult::SearchResult(matched_suffixes) => Some((matched_suffixes, false)),
-        SearchAllSuffixesResult::NoMatches => None
+        SearchAllSuffixesResult::NoMatches => None,
     }?;
 
     let proteins = searcher.retrieve_proteins(&suffixes);
@@ -77,7 +77,7 @@ pub fn search_peptide(searcher: &Searcher, peptide: &str, cutoff: usize, equate_
     Some(SearchResult {
         sequence: peptide.to_string(),
         proteins: proteins.iter().map(|&protein| protein.into()).collect(),
-        cutoff_used
+        cutoff_used,
     })
 }
 
@@ -99,7 +99,7 @@ pub fn search_all_peptides(
     searcher: &Searcher,
     peptides: &Vec<String>,
     cutoff: usize,
-    equate_il: bool
+    equate_il: bool,
 ) -> Vec<SearchResult> {
     peptides
         .par_iter()
@@ -123,7 +123,7 @@ mod tests {
         let protein_info = ProteinInfo {
             taxon: 1,
             uniprot_accession: "P12345".to_string(),
-            functional_annotations: "GO:0001234;GO:0005678".to_string()
+            functional_annotations: "GO:0001234;GO:0005678".to_string(),
         };
 
         let generated_json = serde_json::to_string(&protein_info).unwrap();
@@ -138,7 +138,7 @@ mod tests {
         let search_result = SearchResult {
             sequence: "MSKIAALLPSV".to_string(),
             proteins: vec![],
-            cutoff_used: true
+            cutoff_used: true,
         };
 
         let generated_json = serde_json::to_string(&search_result).unwrap();
diff --git a/sa-index/src/sa_searcher.rs b/sa-index/src/sa_searcher.rs
index d2250ed..119af6c 100644
--- a/sa-index/src/sa_searcher.rs
+++ b/sa-index/src/sa_searcher.rs
@@ -6,21 +6,21 @@ use text_compression::ProteinTextSlice;
 use crate::{
     sa_searcher::BoundSearch::{Maximum, Minimum},
     suffix_to_protein_index::{DenseSuffixToProtein, SparseSuffixToProtein, SuffixToProteinIndex},
-    Nullable, SuffixArray
+    Nullable, SuffixArray,
 };
 
 /// Enum indicating if we are searching for the minimum, or maximum bound in the suffix array
 #[derive(Clone, Copy, PartialEq)]
 enum BoundSearch {
     Minimum,
-    Maximum
+    Maximum,
 }
 
 /// Enum representing the minimum and maximum bound of the found matches in the suffix array
 #[derive(PartialEq, Debug)]
 pub enum BoundSearchResult {
     NoMatches,
-    SearchResult((usize, usize))
+    SearchResult((usize, usize)),
 }
 
 /// Enum representing the matching suffixes after searching a peptide in the suffix array
@@ -30,7 +30,7 @@ pub enum BoundSearchResult {
 pub enum SearchAllSuffixesResult {
     NoMatches,
     MaxMatches(Vec<i64>),
-    SearchResult(Vec<i64>)
+    SearchResult(Vec<i64>),
 }
 
 /// Custom implementation of partialEq for SearchAllSuffixesResult
@@ -67,7 +67,7 @@ impl PartialEq for SearchAllSuffixesResult {
                 array_eq_unordered(arr1, arr2)
             }
             (SearchAllSuffixesResult::NoMatches, SearchAllSuffixesResult::NoMatches) => true,
-            _ => false
+            _ => false,
         }
     }
 }
@@ -123,7 +123,7 @@ impl Deref for DenseSearcher {
 pub struct Searcher {
     pub sa: SuffixArray,
     pub proteins: Proteins,
-    pub suffix_index_to_protein: Box<dyn SuffixToProteinIndex>
+    pub suffix_index_to_protein: Box<dyn SuffixToProteinIndex>,
 }
 
 impl Searcher {
@@ -172,7 +172,7 @@ impl Searcher {
         // Depending on if we are searching for the min of max bound our condition is different
         let condition_check = match bound {
             Minimum => |a: u8, b: u8| a < b,
-            Maximum => |a: u8, b: u8| a > b
+            Maximum => |a: u8, b: u8| a > b,
         };
 
         // match as long as possible
@@ -265,7 +265,7 @@ impl Searcher {
 
         match bound {
             Minimum => (found, right),
-            Maximum => (found, left)
+            Maximum => (found, left),
         }
     }
 
@@ -307,7 +307,7 @@ impl Searcher {
         &self,
         search_string: &[u8],
         max_matches: usize,
-        equate_il: bool
+        equate_il: bool,
     ) -> SearchAllSuffixesResult {
         let mut matching_suffixes: Vec<i64> = vec![];
         let mut il_locations = vec![];
@@ -394,7 +394,7 @@ impl Searcher {
         il_locations: &[usize],
         search_string: &[u8],
         text_slice: ProteinTextSlice,
-        equate_il: bool
+        equate_il: bool,
     ) -> bool {
         if equate_il {
             true
@@ -432,7 +432,7 @@ mod tests {
     use crate::{
         sa_searcher::{BoundSearchResult, SearchAllSuffixesResult, Searcher},
         suffix_to_protein_index::SparseSuffixToProtein,
-        SuffixArray
+        SuffixArray,
     };
 
     #[test]
@@ -465,24 +465,24 @@ mod tests {
                 Protein {
                     uniprot_id: String::new(),
                     taxon_id: 0,
-                    functional_annotations: vec![]
+                    functional_annotations: vec![],
                 },
                 Protein {
                     uniprot_id: String::new(),
                     taxon_id: 0,
-                    functional_annotations: vec![]
+                    functional_annotations: vec![],
                 },
                 Protein {
                     uniprot_id: String::new(),
                     taxon_id: 0,
-                    functional_annotations: vec![]
+                    functional_annotations: vec![],
                 },
                 Protein {
                     uniprot_id: String::new(),
                     taxon_id: 0,
-                    functional_annotations: vec![]
+                    functional_annotations: vec![],
                 },
-            ]
+            ],
         }
     }
 
@@ -568,8 +568,8 @@ mod tests {
             proteins: vec![Protein {
                 uniprot_id: String::new(),
                 taxon_id: 0,
-                functional_annotations: vec![]
-            }]
+                functional_annotations: vec![],
+            }],
         };
 
         let sparse_sa = SuffixArray::Original(vec![0, 2, 4], 2);
@@ -591,8 +591,8 @@ mod tests {
             proteins: vec![Protein {
                 uniprot_id: String::new(),
                 taxon_id: 0,
-                functional_annotations: vec![]
-            }]
+                functional_annotations: vec![],
+            }],
         };
 
         let sparse_sa = SuffixArray::Original(vec![6, 0, 1, 5, 4, 3, 2], 1);
@@ -613,10 +613,10 @@ mod tests {
             proteins: vec![Protein {
                 uniprot_id: String::new(),
                 taxon_id: 0,
-                functional_annotations: vec![]
-            }]
+                functional_annotations: vec![],
+            }],
         };
-        
+
         let sparse_sa = SuffixArray::Original(vec![6, 5, 4, 3, 2, 1, 0], 1);
         let suffix_index_to_protein = SparseSuffixToProtein::new(&proteins.text);
         let searcher = Searcher::new(sparse_sa, proteins, Box::new(suffix_index_to_protein));
@@ -629,14 +629,14 @@ mod tests {
     fn test_il_suffix_check() {
         let input_string = "IIIILL$";
         let text = ProteinText::from_string(input_string);
-        
+
         let proteins = Proteins {
             text,
             proteins: vec![Protein {
                 uniprot_id: String::new(),
                 taxon_id: 0,
-                functional_annotations: vec![]
-            }]
+                functional_annotations: vec![],
+            }],
         };
 
         let sparse_sa = SuffixArray::Original(vec![6, 4, 2, 0], 2);
@@ -659,8 +659,8 @@ mod tests {
             proteins: vec![Protein {
                 uniprot_id: String::new(),
                 taxon_id: 0,
-                functional_annotations: vec![]
-            }]
+                functional_annotations: vec![],
+            }],
         };
 
         let sparse_sa = SuffixArray::Original(vec![6, 5, 4, 3, 2, 1, 0], 1);
diff --git a/sa-index/src/suffix_to_protein_index.rs b/sa-index/src/suffix_to_protein_index.rs
index 6aed362..1a224d2 100644
--- a/sa-index/src/suffix_to_protein_index.rs
+++ b/sa-index/src/suffix_to_protein_index.rs
@@ -8,7 +8,7 @@ use text_compression::ProteinText;
 #[derive(ValueEnum, Clone, Debug, PartialEq)]
 pub enum SuffixToProteinMappingStyle {
     Dense,
-    Sparse
+    Sparse,
 }
 
 /// Trait implemented by the SuffixToProtein mappings
@@ -29,14 +29,14 @@ pub trait SuffixToProteinIndex: Send + Sync {
 #[derive(Debug, PartialEq)]
 pub struct DenseSuffixToProtein {
     // UniProtKB does not have more that u32::MAX proteins, so a larger type is not needed
-    mapping: Vec<u32>
+    mapping: Vec<u32>,
 }
 
 /// Mapping that uses O(m) memory with m the number of proteins, but retrieval of the protein is
 /// O(log m)
 #[derive(Debug, PartialEq)]
 pub struct SparseSuffixToProtein {
-    mapping: Vec<i64>
+    mapping: Vec<i64>,
 }
 
 impl SuffixToProteinIndex for DenseSuffixToProtein {
@@ -113,9 +113,9 @@ mod tests {
 
     use crate::{
         suffix_to_protein_index::{
-            DenseSuffixToProtein, SparseSuffixToProtein, SuffixToProteinIndex, SuffixToProteinMappingStyle
+            DenseSuffixToProtein, SparseSuffixToProtein, SuffixToProteinIndex, SuffixToProteinMappingStyle,
         },
-        Nullable
+        Nullable,
     };
 
     fn build_text() -> ProteinText {
@@ -138,7 +138,7 @@ mod tests {
         let u8_text = &build_text();
         let index = DenseSuffixToProtein::new(u8_text);
         let expected = DenseSuffixToProtein {
-            mapping: vec![0, 0, 0, u32::NULL, 1, 1, u32::NULL, 2, 2, 2, u32::NULL]
+            mapping: vec![0, 0, 0, u32::NULL, 1, 1, u32::NULL, 2, 2, 2, u32::NULL],
         };
         assert_eq!(index, expected);
     }
diff --git a/sa-mappings/src/proteins.rs b/sa-mappings/src/proteins.rs
index 626ead3..9285980 100644
--- a/sa-mappings/src/proteins.rs
+++ b/sa-mappings/src/proteins.rs
@@ -23,7 +23,7 @@ pub struct Protein {
     pub taxon_id: u32,
 
     /// The encoded functional annotations of the protein
-    pub functional_annotations: Vec<u8>
+    pub functional_annotations: Vec<u8>,
 }
 
 /// A struct that represents a collection of proteins
@@ -32,7 +32,7 @@ pub struct Proteins {
     pub text: ProteinText,
 
     /// The proteins in the input string
-    pub proteins: Vec<Protein>
+    pub proteins: Vec<Protein>,
 }
 
 impl Protein {
@@ -80,7 +80,7 @@ impl Proteins {
             proteins.push(Protein {
                 uniprot_id: uniprot_id.to_string(),
                 taxon_id,
-                functional_annotations
+                functional_annotations,
             });
         }
 
@@ -126,7 +126,6 @@ impl Proteins {
         let text = ProteinText::from_string(&input_string);
 
         Ok(text)
-
     }
 
     /// Creates a `vec<u8>` which represents all the proteins concatenated from the database file
@@ -165,7 +164,6 @@ impl Proteins {
 
         input_string.shrink_to_fit();
         Ok(input_string.into_bytes())
-
     }
 }
 
@@ -197,7 +195,7 @@ mod tests {
             .unwrap();
         file.write(
             "P13579\t17\tKEGILQYCQEVYPELQITNVVEANQPVTIQNWCKRGRKQCKTHPH\tGO:0009279;IPR:IPR016364;IPR:IPR008816\n"
-                .as_bytes()
+                .as_bytes(),
         )
         .unwrap();
 
@@ -209,7 +207,7 @@ mod tests {
         let protein = Protein {
             uniprot_id: "P12345".to_string(),
             taxon_id: 1,
-            functional_annotations: vec![0xD1, 0x11]
+            functional_annotations: vec![0xD1, 0x11],
         };
 
         assert_eq!(protein.uniprot_id, "P12345");
@@ -227,14 +225,14 @@ mod tests {
                 Protein {
                     uniprot_id: "P12345".to_string(),
                     taxon_id: 1,
-                    functional_annotations: vec![0xD1, 0x11]
+                    functional_annotations: vec![0xD1, 0x11],
                 },
                 Protein {
                     uniprot_id: "P54321".to_string(),
                     taxon_id: 2,
-                    functional_annotations: vec![0xD1, 0x11]
+                    functional_annotations: vec![0xD1, 0x11],
                 },
-            ]
+            ],
         };
 
         assert_eq!(proteins.proteins.len(), 2);
diff --git a/sa-server/src/main.rs b/sa-server/src/main.rs
index 5284546..c65ba7c 100644
--- a/sa-server/src/main.rs
+++ b/sa-server/src/main.rs
@@ -2,14 +2,14 @@ use std::{
     error::Error,
     fs::File,
     io::{BufReader, Read},
-    sync::Arc
+    sync::Arc,
 };
 
 use axum::{
     extract::{DefaultBodyLimit, State},
     http::StatusCode,
     routing::post,
-    Json, Router
+    Json, Router,
 };
 use clap::Parser;
 use sa_compression::load_compressed_suffix_array;
@@ -17,7 +17,7 @@ use sa_index::{
     binary::load_suffix_array,
     peptide_search::{search_all_peptides, SearchResult},
     sa_searcher::SparseSearcher,
-    SuffixArray
+    SuffixArray,
 };
 use sa_mappings::proteins::Proteins;
 use serde::Deserialize;
@@ -30,7 +30,7 @@ pub struct Arguments {
     #[arg(short, long)]
     database_file: String,
     #[arg(short, long)]
-    index_file: String
+    index_file: String,
 }
 
 /// Function used by serde to place a default value in the cutoff field of the input
@@ -58,7 +58,7 @@ struct InputData {
     cutoff: usize,
     #[serde(default = "bool::default")]
     // default value is false // TODO: maybe default should be true?
-    equate_il: bool
+    equate_il: bool,
 }
 
 #[tokio::main]
@@ -81,7 +81,7 @@ async fn main() {
 /// Returns the search results from the index as a JSON
 async fn search(
     State(searcher): State<Arc<SparseSearcher>>,
-    data: Json<InputData>
+    data: Json<InputData>,
 ) -> Result<Json<Vec<SearchResult>>, StatusCode> {
     let search_result = search_all_peptides(&searcher, &data.peptides, data.cutoff, data.equate_il);
 
diff --git a/text-compression/src/lib.rs b/text-compression/src/lib.rs
index 6bfaf1a..dc7f71e 100644
--- a/text-compression/src/lib.rs
+++ b/text-compression/src/lib.rs
@@ -1,8 +1,8 @@
+use std::collections::HashMap;
 use std::{
     error::Error,
-    io::{BufRead, Write}
+    io::{BufRead, Write},
 };
-use std::collections::HashMap;
 
 use bitarray::{data_to_writer, Binary, BitArray};
 
@@ -17,7 +17,6 @@ pub struct ProteinText {
 }
 
 impl ProteinText {
-
     /// Creates the hashmap storing the mappings between the characters as `u8` and 5 bit numbers.
     ///
     /// # Returns
@@ -44,9 +43,9 @@ impl ProteinText {
         }
         vec
     }
-    
+
     /// Creates the compressed text from a string.
-    /// 
+    ///
     /// # Arguments
     /// * `input_string` - The text (proteins) in string format
     ///
@@ -67,7 +66,7 @@ impl ProteinText {
     }
 
     /// Creates the compressed text from a vector.
-    /// 
+    ///
     /// # Arguments
     /// * `input_vec` - The text (proteins) in a vector with elements of type `u8` representing the amino acids.
     ///
@@ -88,7 +87,7 @@ impl ProteinText {
     }
 
     /// Creates the compressed text from a bit array.
-    /// 
+    ///
     /// # Arguments
     /// * `bit_array` - The text (proteins) in a bit array using 5 bits for each amino acid.
     ///
@@ -102,7 +101,7 @@ impl ProteinText {
     }
 
     /// Creates an instance of `ProteinText` with a given capacity.
-    /// 
+    ///
     /// # Arguments
     /// * `capacity` - The amount of characters in the text.
     ///
@@ -114,7 +113,7 @@ impl ProteinText {
     }
 
     /// Search the character at a given position in the compressed text.
-    /// 
+    ///
     /// # Arguments
     /// * `index` - The index of the character to search.
     ///
@@ -127,7 +126,7 @@ impl ProteinText {
     }
 
     /// Set the character at a given index.
-    /// 
+    ///
     /// # Arguments
     /// * `index` - The index of the character to change.
     /// * `value` - The character to fill in as `u8`.
@@ -139,7 +138,7 @@ impl ProteinText {
     /// Queries the length of the text.
     ///
     /// # Returns
-    /// 
+    ///
     /// the length of the text
     pub fn len(&self) -> usize {
         self.bit_array.len()
@@ -148,7 +147,7 @@ impl ProteinText {
     /// Check if the text is empty (length 0).
     ///
     /// # Returns
-    /// 
+    ///
     /// true if the the text has length 0, false otherwise.
     pub fn is_empty(&self) -> bool {
         self.bit_array.len() == 0
@@ -162,21 +161,20 @@ impl ProteinText {
     /// Get an iterator over the characters of the text.
     ///
     /// # Returns
-    /// 
+    ///
     /// A `ProteinTextIterator`, which can iterate over the characters of the text.
     pub fn iter(&self) -> ProteinTextIterator {
-        ProteinTextIterator {protein_text: self, index: 0, }
+        ProteinTextIterator { protein_text: self, index: 0 }
     }
 
     /// Get a slice of the text
     ///
     /// # Returns
-    /// 
+    ///
     /// An `ProteinTextSlice` representing a slice of the text.
-    pub fn slice(&self, start: usize, end:usize) -> ProteinTextSlice {
+    pub fn slice(&self, start: usize, end: usize) -> ProteinTextSlice {
         ProteinTextSlice::new(self, start, end)
     }
-
 }
 
 /// Structure representing a slice of a `ProteinText`.
@@ -186,13 +184,12 @@ pub struct ProteinTextSlice<'a> {
     /// The start of the slice.
     start: usize, // included
     /// The end of the slice.
-    end: usize,   // excluded
+    end: usize, // excluded
 }
 
 impl<'a> ProteinTextSlice<'a> {
-
     /// Creates an instance of `ProteintextSlice`, given the text and boundaries.
-    /// 
+    ///
     /// # Arguments
     /// * `text` - The `Proteintext` representing the text of proteins with 5 bits per amino acid.
     /// * `start` - The start of the slice.
@@ -202,11 +199,11 @@ impl<'a> ProteinTextSlice<'a> {
     ///
     /// An instance of `ProteinTextSlice`
     pub fn new(text: &'a ProteinText, start: usize, end: usize) -> ProteinTextSlice {
-        Self {text, start, end }
+        Self { text, start, end }
     }
 
     /// Get a character (amino acid) in the slice.
-    /// 
+    ///
     /// # Arguments
     /// * `index` - The index in the slice of the character to get.
     ///
@@ -228,7 +225,7 @@ impl<'a> ProteinTextSlice<'a> {
 
     /// Checks if the slice and a given array of `u8` are equal.
     /// I and L can be equated.
-    /// 
+    ///
     /// # Arguments
     /// * `other` - the array of `u8` to compare the slice with.
     /// * `equate_il` - true if I and L need to be equated, false otherwise.
@@ -245,12 +242,15 @@ impl<'a> ProteinTextSlice<'a> {
                     || (search_character == b'L' && text_character == b'I')
             })
         } else {
-            other.iter().zip(self.iter()).all(|(&search_character, text_character)| search_character == text_character)
+            other
+                .iter()
+                .zip(self.iter())
+                .all(|(&search_character, text_character)| search_character == text_character)
         }
     }
 
     /// Check if the slice and a given array of `u8` are equal on the I and L positions.
-    /// 
+    ///
     /// # Arguments
     /// * `skip` - The amount of positions this slice skipped, this has an influence on the I and L positions.
     /// * `il_locations` - The positions where I and L occur.
@@ -259,12 +259,7 @@ impl<'a> ProteinTextSlice<'a> {
     /// # Returns
     ///
     /// True if the slice and `search_string` have the same contents on the I and L positions, false otherwise.
-    pub fn check_il_locations(
-        &self,
-        skip: usize,
-        il_locations: &[usize],
-        search_string: &[u8],
-    ) -> bool {
+    pub fn check_il_locations(&self, skip: usize, il_locations: &[usize], search_string: &[u8]) -> bool {
         for &il_location in il_locations {
             let index = il_location - skip;
             if search_string[index] != self.get(index) {
@@ -280,7 +275,7 @@ impl<'a> ProteinTextSlice<'a> {
     ///
     /// An iterator over the slice.
     pub fn iter(&self) -> ProteinTextSliceIterator {
-        ProteinTextSliceIterator {text_slice: self, index: 0, }
+        ProteinTextSliceIterator { text_slice: self, index: 0 }
     }
 }
 
@@ -297,13 +292,12 @@ pub struct ProteinTextSliceIterator<'a> {
 }
 
 impl<'a> Iterator for ProteinTextSliceIterator<'a> {
-
     type Item = u8;
-    
+
     /// Get the next character in the `ProteinTextSlice`.
-    /// 
+    ///
     /// # Returns
-    /// 
+    ///
     /// The next character in the slice.
     fn next(&mut self) -> Option<Self::Item> {
         if self.index >= self.text_slice.len() {
@@ -316,13 +310,12 @@ impl<'a> Iterator for ProteinTextSliceIterator<'a> {
 }
 
 impl<'a> Iterator for ProteinTextIterator<'a> {
-
     type Item = u8;
-    
+
     /// Get the next character in the `ProteinText`.
-    /// 
+    ///
     /// # Returns
-    /// 
+    ///
     /// The next character in the text.
     fn next(&mut self) -> Option<Self::Item> {
         if self.index >= self.protein_text.len() {
@@ -344,10 +337,7 @@ impl<'a> Iterator for ProteinTextIterator<'a> {
 /// # Errors
 ///
 /// Returns an error if writing to the writer fails.
-pub fn dump_compressed_text(
-    text: Vec<u8>,
-    writer: &mut impl Write
-) -> Result<(), Box<dyn Error>> {
+pub fn dump_compressed_text(text: Vec<u8>, writer: &mut impl Write) -> Result<(), Box<dyn Error>> {
     let bits_per_value = 5;
 
     // Write the flags to the writer
@@ -378,9 +368,7 @@ pub fn dump_compressed_text(
 /// # Errors
 ///
 /// Returns an error if reading from the reader fails.
-pub fn load_compressed_text(
-    reader: &mut impl BufRead
-) -> Result<ProteinText, Box<dyn Error>> {
+pub fn load_compressed_text(reader: &mut impl BufRead) -> Result<ProteinText, Box<dyn Error>> {
     let bits_per_value: usize = 5;
     // Read the size of the text from the binary file (8 bytes)
     let mut size_buffer = [0_u8; 8];
@@ -406,7 +394,7 @@ mod tests {
 
     pub struct FailingWriter {
         /// The number of times the write function can be called before it fails.
-        pub valid_write_count: usize
+        pub valid_write_count: usize,
     }
 
     impl Write for FailingWriter {
@@ -426,7 +414,7 @@ mod tests {
 
     pub struct FailingReader {
         /// The number of times the read function can be called before it fails.
-        pub valid_read_count: usize
+        pub valid_read_count: usize,
     }
 
     impl Read for FailingReader {
@@ -514,7 +502,7 @@ mod tests {
     fn test_text_slice() {
         let input_string = "ACACA-CAC$";
         let start = 1;
-        let end  = 5;
+        let end = 5;
         let text = ProteinText::from_string(&input_string);
         let text_slice = text.slice(start, end);
 
@@ -533,7 +521,7 @@ mod tests {
         let eq_slice_il_true = [b'C', b'L', b'C', b'A'];
 
         assert!(text_slice.equals_slice(&eq_slice_true, false));
-        assert!(! text_slice.equals_slice(&eq_slice_false, false));
+        assert!(!text_slice.equals_slice(&eq_slice_false, false));
         assert!(text_slice.equals_slice(&eq_slice_il_true, true));
     }
 
@@ -547,7 +535,7 @@ mod tests {
         let il_false = [b'C', b'I', b'C', b'A'];
 
         assert!(text_slice.check_il_locations(0, &il_locations, &il_true));
-        assert!(! text_slice.check_il_locations(0, &il_locations, &il_false));
+        assert!(!text_slice.check_il_locations(0, &il_locations, &il_false));
     }
 
     #[test]
@@ -557,12 +545,15 @@ mod tests {
         let mut writer = vec![];
         dump_compressed_text(text, &mut writer).unwrap();
 
-        assert_eq!(writer, vec![
-            // bits per value
-            5, // size of the text
-            10, 0, 0, 0, 0, 0, 0, 0, // compressed text
-            0, 128, 74, 232, 152, 66, 134, 8
-        ]);
+        assert_eq!(
+            writer,
+            vec![
+                // bits per value
+                5, // size of the text
+                10, 0, 0, 0, 0, 0, 0, 0, // compressed text
+                0, 128, 74, 232, 152, 66, 134, 8
+            ]
+        );
     }
 
     #[test]
@@ -592,9 +583,9 @@ mod tests {
     #[test]
     fn test_load_compressed_text() {
         let data = vec![
-             // size of the text
+            // size of the text
             10, 0, 0, 0, 0, 0, 0, 0, // compressed text
-            0, 128, 74, 232, 152, 66, 134, 8
+            0, 128, 74, 232, 152, 66, 134, 8,
         ];
 
         let mut reader = std::io::BufReader::new(&data[..]);

From d1d8f88fd6e799a216a69acd91e9a7edfec6c54f Mon Sep 17 00:00:00 2001
From: SimonVandeVyver <simon.vandevyver@ugent.be>
Date: Thu, 12 Sep 2024 16:22:17 +0200
Subject: [PATCH 08/13] cargo fmt to format code

---
 bitarray/src/binary.rs                      | 11 +--
 bitarray/src/lib.rs                         | 81 +++++++++------------
 fa-compression/benches/algorithm1/decode.rs |  2 +-
 fa-compression/benches/algorithm1/encode.rs |  2 +-
 fa-compression/benches/algorithm2/decode.rs |  2 +-
 fa-compression/benches/algorithm2/encode.rs |  2 +-
 fa-compression/benches/util.rs              |  2 +-
 fa-compression/src/algorithm1/encode.rs     | 21 +++---
 fa-compression/src/algorithm1/mod.rs        |  8 +-
 fa-compression/src/algorithm2/encode.rs     |  7 +-
 fa-compression/src/algorithm2/mod.rs        |  4 +-
 libsais64-rs/builder.rs                     |  8 +-
 libsais64-rs/src/lib.rs                     |  6 +-
 sa-builder/src/lib.rs                       | 10 +--
 sa-builder/src/main.rs                      |  4 +-
 sa-compression/src/lib.rs                   | 27 +++----
 sa-index/src/binary.rs                      | 36 ++++-----
 sa-index/src/lib.rs                         | 10 +--
 sa-index/src/peptide_search.rs              | 18 ++---
 sa-index/src/sa_searcher.rs                 | 58 +++++++--------
 sa-index/src/suffix_to_protein_index.rs     | 14 ++--
 sa-mappings/src/proteins.rs                 | 16 ++--
 sa-server/src/main.rs                       | 12 +--
 text-compression/src/lib.rs                 | 31 ++++----
 24 files changed, 177 insertions(+), 215 deletions(-)

diff --git a/bitarray/src/binary.rs b/bitarray/src/binary.rs
index 4ab535f..a8084d1 100644
--- a/bitarray/src/binary.rs
+++ b/bitarray/src/binary.rs
@@ -167,13 +167,10 @@ mod tests {
         let mut buffer = Vec::new();
         bitarray.write_binary(&mut buffer).unwrap();
 
-        assert_eq!(
-            buffer,
-            vec![
-                0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12, 0xde, 0xbc, 0x0a, 0x89, 0x67, 0x45, 0x23, 0x01, 0x00,
-                0x00, 0x00, 0x00, 0x56, 0x34, 0x12, 0xf0
-            ]
-        );
+        assert_eq!(buffer, vec![
+            0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12, 0xde, 0xbc, 0x0a, 0x89, 0x67, 0x45, 0x23, 0x01, 0x00, 0x00,
+            0x00, 0x00, 0x56, 0x34, 0x12, 0xf0
+        ]);
     }
 
     #[test]
diff --git a/bitarray/src/lib.rs b/bitarray/src/lib.rs
index d58a60c..e4bd8a2 100644
--- a/bitarray/src/lib.rs
+++ b/bitarray/src/lib.rs
@@ -4,7 +4,7 @@ mod binary;
 
 use std::{
     cmp::max,
-    io::{Result, Write},
+    io::{Result, Write}
 };
 
 /// Re-export the `Binary` trait.
@@ -19,7 +19,7 @@ pub struct BitArray {
     /// The length of the bit array.
     len: usize,
     /// The number of bits in a single element of the data vector.
-    bits_per_value: usize,
+    bits_per_value: usize
 }
 
 impl BitArray {
@@ -39,7 +39,7 @@ impl BitArray {
             data: vec![0; capacity * bits_per_value / 64 + extra],
             mask: (1 << bits_per_value) - 1,
             len: capacity,
-            bits_per_value,
+            bits_per_value
         }
     }
 
@@ -167,7 +167,7 @@ pub fn data_to_writer(
     data: Vec<i64>,
     bits_per_value: usize,
     max_capacity: usize,
-    writer: &mut impl Write,
+    writer: &mut impl Write
 ) -> Result<()> {
     // Update the max capacity to be a multiple of the greatest common divisor of the bits per value
     // and 64. This is done to ensure that the bit array can store the data entirely
@@ -311,13 +311,10 @@ mod tests {
 
         data_to_writer(data, 40, 2, &mut writer).unwrap();
 
-        assert_eq!(
-            writer,
-            vec![
-                0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12, 0xde, 0xbc, 0x0a, 0x89, 0x67, 0x45, 0x23, 0x01, 0x00,
-                0x00, 0x00, 0x00, 0x56, 0x34, 0x12, 0xf0
-            ]
-        );
+        assert_eq!(writer, vec![
+            0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12, 0xde, 0xbc, 0x0a, 0x89, 0x67, 0x45, 0x23, 0x01, 0x00, 0x00,
+            0x00, 0x00, 0x56, 0x34, 0x12, 0xf0
+        ]);
     }
 
     #[test]
@@ -336,27 +333,23 @@ mod tests {
 
         data_to_writer(data, 32, 8, &mut writer).unwrap();
 
-        assert_eq!(
-            writer,
-            vec![
-                0x22, 0x22, 0x22, 0x22, 0x11, 0x11, 0x11, 0x11, 0x44, 0x44, 0x44, 0x44, 0x33, 0x33, 0x33, 0x33, 0x66,
-                0x66, 0x66, 0x66, 0x55, 0x55, 0x55, 0x55, 0x88, 0x88, 0x88, 0x88, 0x77, 0x77, 0x77, 0x77, 0xaa, 0xaa,
-                0xaa, 0xaa, 0x99, 0x99, 0x99, 0x99, 0xcc, 0xcc, 0xcc, 0xcc, 0xbb, 0xbb, 0xbb, 0xbb, 0xee, 0xee, 0xee,
-                0xee, 0xdd, 0xdd, 0xdd, 0xdd, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x22, 0x22, 0x22, 0x22,
-                0x11, 0x11, 0x11, 0x11, 0x44, 0x44, 0x44, 0x44, 0x33, 0x33, 0x33, 0x33, 0x66, 0x66, 0x66, 0x66, 0x55,
-                0x55, 0x55, 0x55, 0x88, 0x88, 0x88, 0x88, 0x77, 0x77, 0x77, 0x77, 0xaa, 0xaa, 0xaa, 0xaa, 0x99, 0x99,
-                0x99, 0x99, 0xcc, 0xcc, 0xcc, 0xcc, 0xbb, 0xbb, 0xbb, 0xbb, 0xee, 0xee, 0xee, 0xee, 0xdd, 0xdd, 0xdd,
-                0xdd, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x22, 0x22, 0x22, 0x22, 0x11, 0x11, 0x11, 0x11,
-                0x44, 0x44, 0x44, 0x44, 0x33, 0x33, 0x33, 0x33, 0x66, 0x66, 0x66, 0x66, 0x55, 0x55, 0x55, 0x55, 0x88,
-                0x88, 0x88, 0x88, 0x77, 0x77, 0x77, 0x77, 0xaa, 0xaa, 0xaa, 0xaa, 0x99, 0x99, 0x99, 0x99, 0xcc, 0xcc,
-                0xcc, 0xcc, 0xbb, 0xbb, 0xbb, 0xbb, 0xee, 0xee, 0xee, 0xee, 0xdd, 0xdd, 0xdd, 0xdd, 0x00, 0x00, 0x00,
-                0x00, 0xff, 0xff, 0xff, 0xff, 0x22, 0x22, 0x22, 0x22, 0x11, 0x11, 0x11, 0x11, 0x44, 0x44, 0x44, 0x44,
-                0x33, 0x33, 0x33, 0x33, 0x66, 0x66, 0x66, 0x66, 0x55, 0x55, 0x55, 0x55, 0x88, 0x88, 0x88, 0x88, 0x77,
-                0x77, 0x77, 0x77, 0xaa, 0xaa, 0xaa, 0xaa, 0x99, 0x99, 0x99, 0x99, 0xcc, 0xcc, 0xcc, 0xcc, 0xbb, 0xbb,
-                0xbb, 0xbb, 0xee, 0xee, 0xee, 0xee, 0xdd, 0xdd, 0xdd, 0xdd, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff,
-                0xff
-            ]
-        );
+        assert_eq!(writer, vec![
+            0x22, 0x22, 0x22, 0x22, 0x11, 0x11, 0x11, 0x11, 0x44, 0x44, 0x44, 0x44, 0x33, 0x33, 0x33, 0x33, 0x66, 0x66,
+            0x66, 0x66, 0x55, 0x55, 0x55, 0x55, 0x88, 0x88, 0x88, 0x88, 0x77, 0x77, 0x77, 0x77, 0xaa, 0xaa, 0xaa, 0xaa,
+            0x99, 0x99, 0x99, 0x99, 0xcc, 0xcc, 0xcc, 0xcc, 0xbb, 0xbb, 0xbb, 0xbb, 0xee, 0xee, 0xee, 0xee, 0xdd, 0xdd,
+            0xdd, 0xdd, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x22, 0x22, 0x22, 0x22, 0x11, 0x11, 0x11, 0x11,
+            0x44, 0x44, 0x44, 0x44, 0x33, 0x33, 0x33, 0x33, 0x66, 0x66, 0x66, 0x66, 0x55, 0x55, 0x55, 0x55, 0x88, 0x88,
+            0x88, 0x88, 0x77, 0x77, 0x77, 0x77, 0xaa, 0xaa, 0xaa, 0xaa, 0x99, 0x99, 0x99, 0x99, 0xcc, 0xcc, 0xcc, 0xcc,
+            0xbb, 0xbb, 0xbb, 0xbb, 0xee, 0xee, 0xee, 0xee, 0xdd, 0xdd, 0xdd, 0xdd, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff,
+            0xff, 0xff, 0x22, 0x22, 0x22, 0x22, 0x11, 0x11, 0x11, 0x11, 0x44, 0x44, 0x44, 0x44, 0x33, 0x33, 0x33, 0x33,
+            0x66, 0x66, 0x66, 0x66, 0x55, 0x55, 0x55, 0x55, 0x88, 0x88, 0x88, 0x88, 0x77, 0x77, 0x77, 0x77, 0xaa, 0xaa,
+            0xaa, 0xaa, 0x99, 0x99, 0x99, 0x99, 0xcc, 0xcc, 0xcc, 0xcc, 0xbb, 0xbb, 0xbb, 0xbb, 0xee, 0xee, 0xee, 0xee,
+            0xdd, 0xdd, 0xdd, 0xdd, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x22, 0x22, 0x22, 0x22, 0x11, 0x11,
+            0x11, 0x11, 0x44, 0x44, 0x44, 0x44, 0x33, 0x33, 0x33, 0x33, 0x66, 0x66, 0x66, 0x66, 0x55, 0x55, 0x55, 0x55,
+            0x88, 0x88, 0x88, 0x88, 0x77, 0x77, 0x77, 0x77, 0xaa, 0xaa, 0xaa, 0xaa, 0x99, 0x99, 0x99, 0x99, 0xcc, 0xcc,
+            0xcc, 0xcc, 0xbb, 0xbb, 0xbb, 0xbb, 0xee, 0xee, 0xee, 0xee, 0xdd, 0xdd, 0xdd, 0xdd, 0x00, 0x00, 0x00, 0x00,
+            0xff, 0xff, 0xff, 0xff
+        ]);
     }
 
     #[test]
@@ -371,20 +364,16 @@ mod tests {
 
         data_to_writer(data, 32, 8, &mut writer).unwrap();
 
-        assert_eq!(
-            writer,
-            vec![
-                0x22, 0x22, 0x22, 0x22, 0x11, 0x11, 0x11, 0x11, 0x44, 0x44, 0x44, 0x44, 0x33, 0x33, 0x33, 0x33, 0x66,
-                0x66, 0x66, 0x66, 0x55, 0x55, 0x55, 0x55, 0x88, 0x88, 0x88, 0x88, 0x77, 0x77, 0x77, 0x77, 0xaa, 0xaa,
-                0xaa, 0xaa, 0x99, 0x99, 0x99, 0x99, 0xcc, 0xcc, 0xcc, 0xcc, 0xbb, 0xbb, 0xbb, 0xbb, 0xee, 0xee, 0xee,
-                0xee, 0xdd, 0xdd, 0xdd, 0xdd, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x22, 0x22, 0x22, 0x22,
-                0x11, 0x11, 0x11, 0x11, 0x44, 0x44, 0x44, 0x44, 0x33, 0x33, 0x33, 0x33, 0x66, 0x66, 0x66, 0x66, 0x55,
-                0x55, 0x55, 0x55, 0x88, 0x88, 0x88, 0x88, 0x77, 0x77, 0x77, 0x77, 0xaa, 0xaa, 0xaa, 0xaa, 0x99, 0x99,
-                0x99, 0x99, 0xcc, 0xcc, 0xcc, 0xcc, 0xbb, 0xbb, 0xbb, 0xbb, 0xee, 0xee, 0xee, 0xee, 0xdd, 0xdd, 0xdd,
-                0xdd, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x22, 0x22, 0x22, 0x22, 0x11, 0x11, 0x11, 0x11,
-                0x00, 0x00, 0x00, 0x00, 0x33, 0x33, 0x33, 0x33
-            ]
-        );
+        assert_eq!(writer, vec![
+            0x22, 0x22, 0x22, 0x22, 0x11, 0x11, 0x11, 0x11, 0x44, 0x44, 0x44, 0x44, 0x33, 0x33, 0x33, 0x33, 0x66, 0x66,
+            0x66, 0x66, 0x55, 0x55, 0x55, 0x55, 0x88, 0x88, 0x88, 0x88, 0x77, 0x77, 0x77, 0x77, 0xaa, 0xaa, 0xaa, 0xaa,
+            0x99, 0x99, 0x99, 0x99, 0xcc, 0xcc, 0xcc, 0xcc, 0xbb, 0xbb, 0xbb, 0xbb, 0xee, 0xee, 0xee, 0xee, 0xdd, 0xdd,
+            0xdd, 0xdd, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x22, 0x22, 0x22, 0x22, 0x11, 0x11, 0x11, 0x11,
+            0x44, 0x44, 0x44, 0x44, 0x33, 0x33, 0x33, 0x33, 0x66, 0x66, 0x66, 0x66, 0x55, 0x55, 0x55, 0x55, 0x88, 0x88,
+            0x88, 0x88, 0x77, 0x77, 0x77, 0x77, 0xaa, 0xaa, 0xaa, 0xaa, 0x99, 0x99, 0x99, 0x99, 0xcc, 0xcc, 0xcc, 0xcc,
+            0xbb, 0xbb, 0xbb, 0xbb, 0xee, 0xee, 0xee, 0xee, 0xdd, 0xdd, 0xdd, 0xdd, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff,
+            0xff, 0xff, 0x22, 0x22, 0x22, 0x22, 0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x00, 0x33, 0x33, 0x33, 0x33
+        ]);
     }
 
     #[test]
diff --git a/fa-compression/benches/algorithm1/decode.rs b/fa-compression/benches/algorithm1/decode.rs
index bd1c94d..24991dc 100644
--- a/fa-compression/benches/algorithm1/decode.rs
+++ b/fa-compression/benches/algorithm1/decode.rs
@@ -22,7 +22,7 @@ pub fn decode_benchmark(c: &mut criterion::Criterion) {
         b.iter_batched(
             || generate_encoded_annotations(100),
             |annotations| black_box(decode(annotations.as_slice())),
-            criterion::BatchSize::SmallInput,
+            criterion::BatchSize::SmallInput
         )
     });
 }
diff --git a/fa-compression/benches/algorithm1/encode.rs b/fa-compression/benches/algorithm1/encode.rs
index 1d23a6e..d3a9c86 100644
--- a/fa-compression/benches/algorithm1/encode.rs
+++ b/fa-compression/benches/algorithm1/encode.rs
@@ -22,7 +22,7 @@ pub fn encode_benchmark(c: &mut criterion::Criterion) {
         b.iter_batched(
             || generate_decoded_annotations(100),
             |annotations| black_box(encode(annotations.as_str())),
-            criterion::BatchSize::SmallInput,
+            criterion::BatchSize::SmallInput
         )
     });
 }
diff --git a/fa-compression/benches/algorithm2/decode.rs b/fa-compression/benches/algorithm2/decode.rs
index 62f8b9e..4d562fc 100644
--- a/fa-compression/benches/algorithm2/decode.rs
+++ b/fa-compression/benches/algorithm2/decode.rs
@@ -28,7 +28,7 @@ pub fn decode_benchmark(c: &mut criterion::Criterion) {
         b.iter_batched(
             || generate_encoded_annotations_and_table(100),
             |(annotations, ct)| black_box(decode(annotations.as_slice(), ct)),
-            criterion::BatchSize::SmallInput,
+            criterion::BatchSize::SmallInput
         )
     });
 }
diff --git a/fa-compression/benches/algorithm2/encode.rs b/fa-compression/benches/algorithm2/encode.rs
index a69ed0e..827dd50 100644
--- a/fa-compression/benches/algorithm2/encode.rs
+++ b/fa-compression/benches/algorithm2/encode.rs
@@ -26,7 +26,7 @@ pub fn encode_benchmark(c: &mut criterion::Criterion) {
         b.iter_batched(
             || generate_decoded_annotations_and_table(100),
             |(annotations, ct)| black_box(encode(annotations.as_str(), ct)),
-            criterion::BatchSize::SmallInput,
+            criterion::BatchSize::SmallInput
         )
     });
 }
diff --git a/fa-compression/benches/util.rs b/fa-compression/benches/util.rs
index 0e80765..b6ddd9a 100644
--- a/fa-compression/benches/util.rs
+++ b/fa-compression/benches/util.rs
@@ -27,6 +27,6 @@ pub fn generate_annotation(random: &mut ThreadRng) -> String {
         0 => generate_ipr(random),
         1 => generate_go(random),
         2 => generate_ec(random),
-        _ => unreachable!(),
+        _ => unreachable!()
     }
 }
diff --git a/fa-compression/src/algorithm1/encode.rs b/fa-compression/src/algorithm1/encode.rs
index 9138be1..ef79372 100644
--- a/fa-compression/src/algorithm1/encode.rs
+++ b/fa-compression/src/algorithm1/encode.rs
@@ -106,18 +106,16 @@ mod tests {
 
     #[test]
     fn test_encode_no_ec() {
-        assert_eq!(
-            encode("IPR:IPR016364;GO:0009279;IPR:IPR008816"),
-            vec![225, 17, 163, 138, 225, 39, 71, 95, 17, 153, 39]
-        )
+        assert_eq!(encode("IPR:IPR016364;GO:0009279;IPR:IPR008816"), vec![
+            225, 17, 163, 138, 225, 39, 71, 95, 17, 153, 39
+        ])
     }
 
     #[test]
     fn test_encode_no_go() {
-        assert_eq!(
-            encode("IPR:IPR016364;EC:1.1.1.-;EC:1.2.1.7"),
-            vec![44, 44, 44, 191, 44, 60, 44, 142, 225, 39, 71, 80]
-        )
+        assert_eq!(encode("IPR:IPR016364;EC:1.1.1.-;EC:1.2.1.7"), vec![
+            44, 44, 44, 191, 44, 60, 44, 142, 225, 39, 71, 80
+        ])
     }
 
     #[test]
@@ -127,9 +125,8 @@ mod tests {
 
     #[test]
     fn test_encode_all() {
-        assert_eq!(
-            encode("IPR:IPR016364;EC:1.1.1.-;IPR:IPR032635;GO:0009279;IPR:IPR008816"),
-            vec![44, 44, 44, 190, 17, 26, 56, 174, 18, 116, 117, 241, 67, 116, 111, 17, 153, 39]
-        )
+        assert_eq!(encode("IPR:IPR016364;EC:1.1.1.-;IPR:IPR032635;GO:0009279;IPR:IPR008816"), vec![
+            44, 44, 44, 190, 17, 26, 56, 174, 18, 116, 117, 241, 67, 116, 111, 17, 153, 39
+        ])
     }
 }
diff --git a/fa-compression/src/algorithm1/mod.rs b/fa-compression/src/algorithm1/mod.rs
index 8ea45c5..cdf7283 100644
--- a/fa-compression/src/algorithm1/mod.rs
+++ b/fa-compression/src/algorithm1/mod.rs
@@ -79,7 +79,7 @@ enum CharacterSet {
     Comma,
 
     /// Annotation separator
-    Semicolon,
+    Semicolon
 }
 
 impl Encode for CharacterSet {
@@ -110,7 +110,7 @@ impl Encode for CharacterSet {
             b'n' => CharacterSet::Preliminary,
             b',' => CharacterSet::Comma,
             b';' => CharacterSet::Semicolon,
-            _ => panic!("Invalid character"),
+            _ => panic!("Invalid character")
         }
     }
 }
@@ -143,7 +143,7 @@ impl Decode for CharacterSet {
             13 => 'n',
             14 => ',',
             15 => ';',
-            _ => panic!("Invalid character"),
+            _ => panic!("Invalid character")
         }
     }
 }
@@ -189,7 +189,7 @@ mod tests {
         CharacterSet::Point,
         CharacterSet::Preliminary,
         CharacterSet::Comma,
-        CharacterSet::Semicolon,
+        CharacterSet::Semicolon
     ];
 
     #[test]
diff --git a/fa-compression/src/algorithm2/encode.rs b/fa-compression/src/algorithm2/encode.rs
index d60fe61..f55eb11 100644
--- a/fa-compression/src/algorithm2/encode.rs
+++ b/fa-compression/src/algorithm2/encode.rs
@@ -89,9 +89,8 @@ mod tests {
     #[test]
     fn test_encode_all() {
         let table = create_compresion_table();
-        assert_eq!(
-            encode("IPR:IPR000001;EC:1.1.1.-;IPR:IPR000003;GO:0000002", table),
-            vec![0, 0, 0, 7, 0, 0, 2, 0, 0, 5, 0, 0]
-        )
+        assert_eq!(encode("IPR:IPR000001;EC:1.1.1.-;IPR:IPR000003;GO:0000002", table), vec![
+            0, 0, 0, 7, 0, 0, 2, 0, 0, 5, 0, 0
+        ])
     }
 }
diff --git a/fa-compression/src/algorithm2/mod.rs b/fa-compression/src/algorithm2/mod.rs
index 117b87c..8fc505a 100644
--- a/fa-compression/src/algorithm2/mod.rs
+++ b/fa-compression/src/algorithm2/mod.rs
@@ -12,13 +12,13 @@ pub use encode::encode;
 /// Represents an entry in the compression table.
 #[doc(hidden)]
 pub struct CompressionTableEntry {
-    annotation: String,
+    annotation: String
 }
 
 /// Represents a compression table.
 pub struct CompressionTable {
     /// List of annotations in the compression table.
-    entries: Vec<CompressionTableEntry>,
+    entries: Vec<CompressionTableEntry>
 }
 
 impl CompressionTable {
diff --git a/libsais64-rs/builder.rs b/libsais64-rs/builder.rs
index c6fc2d6..5b3feb2 100644
--- a/libsais64-rs/builder.rs
+++ b/libsais64-rs/builder.rs
@@ -3,14 +3,14 @@ use std::{
     error::Error,
     fmt::{Display, Formatter},
     path::{Path, PathBuf},
-    process::{Command, ExitStatus},
+    process::{Command, ExitStatus}
 };
 
 /// Custom error for compilation of the C library
 #[derive(Debug)]
 struct CompileError<'a> {
     command: &'a str,
-    exit_code: Option<i32>,
+    exit_code: Option<i32>
 }
 
 impl<'a> Display for CompileError<'a> {
@@ -43,7 +43,7 @@ impl<'a> Error for CompileError<'a> {}
 fn exit_status_to_result(name: &str, exit_status: ExitStatus) -> Result<(), CompileError> {
     match exit_status.success() {
         true => Ok(()),
-        false => Err(CompileError { command: name, exit_code: exit_status.code() }),
+        false => Err(CompileError { command: name, exit_code: exit_status.code() })
     }
 }
 
@@ -61,7 +61,7 @@ fn main() -> Result<(), Box<dyn Error>> {
     Command::new("rm").args(["libsais/CMakeCache.txt"]).status().unwrap_or_default(); // if removing fails, it is since the cmake cache did not exist, we just can ignore it
     exit_status_to_result(
         "cmake",
-        Command::new("cmake").args(["-DCMAKE_BUILD_TYPE=\"Release\"", "libsais", "-Blibsais"]).status()?,
+        Command::new("cmake").args(["-DCMAKE_BUILD_TYPE=\"Release\"", "libsais", "-Blibsais"]).status()?
     )?;
     exit_status_to_result("make", Command::new("make").args(["-C", "libsais"]).status()?)?;
 
diff --git a/libsais64-rs/src/lib.rs b/libsais64-rs/src/lib.rs
index b2a1d3a..e2a87f6 100644
--- a/libsais64-rs/src/lib.rs
+++ b/libsais64-rs/src/lib.rs
@@ -16,11 +16,7 @@ include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
 pub fn sais64(text: &[u8]) -> Option<Vec<i64>> {
     let mut sa = vec![0; text.len()];
     let exit_code = unsafe { libsais64(text.as_ptr(), sa.as_mut_ptr(), text.len() as i64, 0, std::ptr::null_mut()) };
-    if exit_code == 0 {
-        Some(sa)
-    } else {
-        None
-    }
+    if exit_code == 0 { Some(sa) } else { None }
 }
 
 #[cfg(test)]
diff --git a/sa-builder/src/lib.rs b/sa-builder/src/lib.rs
index f20ec27..c0e13cd 100644
--- a/sa-builder/src/lib.rs
+++ b/sa-builder/src/lib.rs
@@ -21,14 +21,14 @@ pub struct Arguments {
     pub construction_algorithm: SAConstructionAlgorithm,
     /// If the suffix array should be compressed (default value true)
     #[arg(short, long, default_value_t = false)]
-    pub compress_sa: bool,
+    pub compress_sa: bool
 }
 
 /// Enum representing the two possible algorithms to construct the suffix array
 #[derive(ValueEnum, Clone, Debug, PartialEq)]
 pub enum SAConstructionAlgorithm {
     LibDivSufSort,
-    LibSais,
+    LibSais
 }
 
 /// Build a sparse suffix array from the given text
@@ -48,7 +48,7 @@ pub enum SAConstructionAlgorithm {
 pub fn build_ssa(
     text: &mut Vec<u8>,
     construction_algorithm: &SAConstructionAlgorithm,
-    sparseness_factor: u8,
+    sparseness_factor: u8
 ) -> Result<Vec<i64>, Box<dyn Error>> {
     // translate all L's to a I
     translate_l_to_i(text);
@@ -56,7 +56,7 @@ pub fn build_ssa(
     // Build the suffix array using the selected algorithm
     let mut sa = match construction_algorithm {
         SAConstructionAlgorithm::LibSais => libsais64_rs::sais64(text),
-        SAConstructionAlgorithm::LibDivSufSort => libdivsufsort_rs::divsufsort64(text),
+        SAConstructionAlgorithm::LibDivSufSort => libdivsufsort_rs::divsufsort64(text)
     }
     .ok_or("Building suffix array failed")?;
 
@@ -125,7 +125,7 @@ mod tests {
             "2",
             "--construction-algorithm",
             "lib-div-suf-sort",
-            "--compress-sa",
+            "--compress-sa"
         ]);
 
         assert_eq!(args.database_file, "database.fa");
diff --git a/sa-builder/src/main.rs b/sa-builder/src/main.rs
index 20f2e8a..01cc3c4 100644
--- a/sa-builder/src/main.rs
+++ b/sa-builder/src/main.rs
@@ -1,7 +1,7 @@
 use std::{
     fs::{File, OpenOptions},
     io::BufWriter,
-    time::{SystemTime, SystemTimeError, UNIX_EPOCH},
+    time::{SystemTime, SystemTimeError, UNIX_EPOCH}
 };
 
 use clap::Parser;
@@ -16,7 +16,7 @@ fn main() {
         output,
         sparseness_factor,
         construction_algorithm,
-        compress_sa,
+        compress_sa
     } = Arguments::parse();
     eprintln!();
     eprintln!("📋 Started loading the proteins...");
diff --git a/sa-compression/src/lib.rs b/sa-compression/src/lib.rs
index 9814e20..e9952a2 100644
--- a/sa-compression/src/lib.rs
+++ b/sa-compression/src/lib.rs
@@ -1,6 +1,6 @@
 use std::{
     error::Error,
-    io::{BufRead, Write},
+    io::{BufRead, Write}
 };
 
 use bitarray::{data_to_writer, Binary, BitArray};
@@ -22,7 +22,7 @@ pub fn dump_compressed_suffix_array(
     sa: Vec<i64>,
     sparseness_factor: u8,
     bits_per_value: usize,
-    writer: &mut impl Write,
+    writer: &mut impl Write
 ) -> Result<(), Box<dyn Error>> {
     // Write the flags to the writer
     // 00000001 indicates that the suffix array is compressed
@@ -59,7 +59,7 @@ pub fn dump_compressed_suffix_array(
 /// Returns an error if reading from the reader fails.
 pub fn load_compressed_suffix_array(
     reader: &mut impl BufRead,
-    bits_per_value: usize,
+    bits_per_value: usize
 ) -> Result<SuffixArray, Box<dyn Error>> {
     // Read the sample rate from the binary file (1 byte)
     let mut sample_rate_buffer = [0_u8; 1];
@@ -92,7 +92,7 @@ mod tests {
 
     pub struct FailingWriter {
         /// The number of times the write function can be called before it fails.
-        pub valid_write_count: usize,
+        pub valid_write_count: usize
     }
 
     impl Write for FailingWriter {
@@ -112,7 +112,7 @@ mod tests {
 
     pub struct FailingReader {
         /// The number of times the read function can be called before it fails.
-        pub valid_read_count: usize,
+        pub valid_read_count: usize
     }
 
     impl Read for FailingReader {
@@ -141,16 +141,13 @@ mod tests {
         let mut writer = vec![];
         dump_compressed_suffix_array(sa, 1, 8, &mut writer).unwrap();
 
-        assert_eq!(
-            writer,
-            vec![
-                // bits per value
-                8, // sparseness factor
-                1, // size of the suffix array
-                10, 0, 0, 0, 0, 0, 0, 0, // compressed suffix array
-                8, 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 10, 9
-            ]
-        );
+        assert_eq!(writer, vec![
+            // bits per value
+            8, // sparseness factor
+            1, // size of the suffix array
+            10, 0, 0, 0, 0, 0, 0, 0, // compressed suffix array
+            8, 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 10, 9
+        ]);
     }
 
     #[test]
diff --git a/sa-index/src/binary.rs b/sa-index/src/binary.rs
index fc41f24..55c082a 100644
--- a/sa-index/src/binary.rs
+++ b/sa-index/src/binary.rs
@@ -1,6 +1,6 @@
 use std::{
     error::Error,
-    io::{BufRead, Read, Write},
+    io::{BufRead, Read, Write}
 };
 
 use crate::SuffixArray;
@@ -190,7 +190,7 @@ mod tests {
 
     pub struct FailingWriter {
         /// The number of times the write function can be called before it fails.
-        pub valid_write_count: usize,
+        pub valid_write_count: usize
     }
 
     impl Write for FailingWriter {
@@ -210,7 +210,7 @@ mod tests {
 
     pub struct FailingReader {
         /// The number of times the read function can be called before it fails.
-        pub valid_read_count: usize,
+        pub valid_read_count: usize
     }
 
     impl Read for FailingReader {
@@ -266,13 +266,10 @@ mod tests {
 
         values.write_binary(&mut buffer).unwrap();
 
-        assert_eq!(
-            buffer,
-            vec![
-                1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 5, 0,
-                0, 0, 0, 0, 0, 0
-            ]
-        );
+        assert_eq!(buffer, vec![
+            1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0,
+            0, 0, 0, 0
+        ]);
     }
 
     #[test]
@@ -295,17 +292,14 @@ mod tests {
 
         dump_suffix_array(&sa, 1, &mut buffer).unwrap();
 
-        assert_eq!(
-            buffer,
-            vec![
-                // required bits
-                64, // Sparseness factor
-                1,  // Size of the suffix array
-                5, 0, 0, 0, 0, 0, 0, 0, // Suffix array
-                1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 5, 0,
-                0, 0, 0, 0, 0, 0
-            ]
-        );
+        assert_eq!(buffer, vec![
+            // required bits
+            64, // Sparseness factor
+            1,  // Size of the suffix array
+            5, 0, 0, 0, 0, 0, 0, 0, // Suffix array
+            1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0,
+            0, 0, 0, 0
+        ]);
     }
 
     #[test]
diff --git a/sa-index/src/lib.rs b/sa-index/src/lib.rs
index a43168d..53f5348 100644
--- a/sa-index/src/lib.rs
+++ b/sa-index/src/lib.rs
@@ -10,7 +10,7 @@ pub enum SuffixArray {
     /// The original suffix array.
     Original(Vec<i64>, u8),
     /// The compressed suffix array.
-    Compressed(BitArray, u8),
+    Compressed(BitArray, u8)
 }
 
 impl SuffixArray {
@@ -22,7 +22,7 @@ impl SuffixArray {
     pub fn len(&self) -> usize {
         match self {
             SuffixArray::Original(sa, _) => sa.len(),
-            SuffixArray::Compressed(sa, _) => sa.len(),
+            SuffixArray::Compressed(sa, _) => sa.len()
         }
     }
 
@@ -34,7 +34,7 @@ impl SuffixArray {
     pub fn bits_per_value(&self) -> usize {
         match self {
             SuffixArray::Original(_, _) => 64,
-            SuffixArray::Compressed(sa, _) => sa.bits_per_value(),
+            SuffixArray::Compressed(sa, _) => sa.bits_per_value()
         }
     }
 
@@ -46,7 +46,7 @@ impl SuffixArray {
     pub fn sample_rate(&self) -> u8 {
         match self {
             SuffixArray::Original(_, sample_rate) => *sample_rate,
-            SuffixArray::Compressed(_, sample_rate) => *sample_rate,
+            SuffixArray::Compressed(_, sample_rate) => *sample_rate
         }
     }
 
@@ -62,7 +62,7 @@ impl SuffixArray {
     pub fn get(&self, index: usize) -> i64 {
         match self {
             SuffixArray::Original(sa, _) => sa[index],
-            SuffixArray::Compressed(sa, _) => sa.get(index) as i64,
+            SuffixArray::Compressed(sa, _) => sa.get(index) as i64
         }
     }
 
diff --git a/sa-index/src/peptide_search.rs b/sa-index/src/peptide_search.rs
index 02e4975..55d629f 100644
--- a/sa-index/src/peptide_search.rs
+++ b/sa-index/src/peptide_search.rs
@@ -8,7 +8,7 @@ use crate::sa_searcher::{SearchAllSuffixesResult, Searcher};
 pub struct SearchResult {
     pub sequence: String,
     pub proteins: Vec<ProteinInfo>,
-    pub cutoff_used: bool,
+    pub cutoff_used: bool
 }
 
 /// Struct that represents all information known about a certain protein in our database
@@ -16,7 +16,7 @@ pub struct SearchResult {
 pub struct ProteinInfo {
     pub taxon: u32,
     pub uniprot_accession: String,
-    pub functional_annotations: String,
+    pub functional_annotations: String
 }
 
 impl From<&Protein> for ProteinInfo {
@@ -24,7 +24,7 @@ impl From<&Protein> for ProteinInfo {
         ProteinInfo {
             taxon: protein.taxon_id,
             uniprot_accession: protein.uniprot_id.clone(),
-            functional_annotations: protein.get_functional_annotations(),
+            functional_annotations: protein.get_functional_annotations()
         }
     }
 }
@@ -50,7 +50,7 @@ pub fn search_proteins_for_peptide<'a>(
     searcher: &'a Searcher,
     peptide: &str,
     cutoff: usize,
-    equate_il: bool,
+    equate_il: bool
 ) -> Option<(bool, Vec<&'a Protein>)> {
     let peptide = peptide.trim_end().to_uppercase();
 
@@ -63,7 +63,7 @@ pub fn search_proteins_for_peptide<'a>(
     let (suffixes, cutoff_used) = match suffix_search {
         SearchAllSuffixesResult::MaxMatches(matched_suffixes) => Some((matched_suffixes, true)),
         SearchAllSuffixesResult::SearchResult(matched_suffixes) => Some((matched_suffixes, false)),
-        SearchAllSuffixesResult::NoMatches => None,
+        SearchAllSuffixesResult::NoMatches => None
     }?;
 
     let proteins = searcher.retrieve_proteins(&suffixes);
@@ -77,7 +77,7 @@ pub fn search_peptide(searcher: &Searcher, peptide: &str, cutoff: usize, equate_
     Some(SearchResult {
         sequence: peptide.to_string(),
         proteins: proteins.iter().map(|&protein| protein.into()).collect(),
-        cutoff_used,
+        cutoff_used
     })
 }
 
@@ -99,7 +99,7 @@ pub fn search_all_peptides(
     searcher: &Searcher,
     peptides: &Vec<String>,
     cutoff: usize,
-    equate_il: bool,
+    equate_il: bool
 ) -> Vec<SearchResult> {
     peptides
         .par_iter()
@@ -123,7 +123,7 @@ mod tests {
         let protein_info = ProteinInfo {
             taxon: 1,
             uniprot_accession: "P12345".to_string(),
-            functional_annotations: "GO:0001234;GO:0005678".to_string(),
+            functional_annotations: "GO:0001234;GO:0005678".to_string()
         };
 
         let generated_json = serde_json::to_string(&protein_info).unwrap();
@@ -138,7 +138,7 @@ mod tests {
         let search_result = SearchResult {
             sequence: "MSKIAALLPSV".to_string(),
             proteins: vec![],
-            cutoff_used: true,
+            cutoff_used: true
         };
 
         let generated_json = serde_json::to_string(&search_result).unwrap();
diff --git a/sa-index/src/sa_searcher.rs b/sa-index/src/sa_searcher.rs
index 119af6c..03abf07 100644
--- a/sa-index/src/sa_searcher.rs
+++ b/sa-index/src/sa_searcher.rs
@@ -6,21 +6,21 @@ use text_compression::ProteinTextSlice;
 use crate::{
     sa_searcher::BoundSearch::{Maximum, Minimum},
     suffix_to_protein_index::{DenseSuffixToProtein, SparseSuffixToProtein, SuffixToProteinIndex},
-    Nullable, SuffixArray,
+    Nullable, SuffixArray
 };
 
 /// Enum indicating if we are searching for the minimum, or maximum bound in the suffix array
 #[derive(Clone, Copy, PartialEq)]
 enum BoundSearch {
     Minimum,
-    Maximum,
+    Maximum
 }
 
 /// Enum representing the minimum and maximum bound of the found matches in the suffix array
 #[derive(PartialEq, Debug)]
 pub enum BoundSearchResult {
     NoMatches,
-    SearchResult((usize, usize)),
+    SearchResult((usize, usize))
 }
 
 /// Enum representing the matching suffixes after searching a peptide in the suffix array
@@ -30,7 +30,7 @@ pub enum BoundSearchResult {
 pub enum SearchAllSuffixesResult {
     NoMatches,
     MaxMatches(Vec<i64>),
-    SearchResult(Vec<i64>),
+    SearchResult(Vec<i64>)
 }
 
 /// Custom implementation of partialEq for SearchAllSuffixesResult
@@ -67,7 +67,7 @@ impl PartialEq for SearchAllSuffixesResult {
                 array_eq_unordered(arr1, arr2)
             }
             (SearchAllSuffixesResult::NoMatches, SearchAllSuffixesResult::NoMatches) => true,
-            _ => false,
+            _ => false
         }
     }
 }
@@ -123,7 +123,7 @@ impl Deref for DenseSearcher {
 pub struct Searcher {
     pub sa: SuffixArray,
     pub proteins: Proteins,
-    pub suffix_index_to_protein: Box<dyn SuffixToProteinIndex>,
+    pub suffix_index_to_protein: Box<dyn SuffixToProteinIndex>
 }
 
 impl Searcher {
@@ -172,7 +172,7 @@ impl Searcher {
         // Depending on if we are searching for the min of max bound our condition is different
         let condition_check = match bound {
             Minimum => |a: u8, b: u8| a < b,
-            Maximum => |a: u8, b: u8| a > b,
+            Maximum => |a: u8, b: u8| a > b
         };
 
         // match as long as possible
@@ -265,7 +265,7 @@ impl Searcher {
 
         match bound {
             Minimum => (found, right),
-            Maximum => (found, left),
+            Maximum => (found, left)
         }
     }
 
@@ -307,7 +307,7 @@ impl Searcher {
         &self,
         search_string: &[u8],
         max_matches: usize,
-        equate_il: bool,
+        equate_il: bool
     ) -> SearchAllSuffixesResult {
         let mut matching_suffixes: Vec<i64> = vec![];
         let mut il_locations = vec![];
@@ -394,13 +394,9 @@ impl Searcher {
         il_locations: &[usize],
         search_string: &[u8],
         text_slice: ProteinTextSlice,
-        equate_il: bool,
+        equate_il: bool
     ) -> bool {
-        if equate_il {
-            true
-        } else {
-            text_slice.check_il_locations(skip, il_locations, search_string)
-        }
+        if equate_il { true } else { text_slice.check_il_locations(skip, il_locations, search_string) }
     }
 
     /// Returns all the proteins that correspond with the provided suffixes
@@ -432,7 +428,7 @@ mod tests {
     use crate::{
         sa_searcher::{BoundSearchResult, SearchAllSuffixesResult, Searcher},
         suffix_to_protein_index::SparseSuffixToProtein,
-        SuffixArray,
+        SuffixArray
     };
 
     #[test]
@@ -465,24 +461,24 @@ mod tests {
                 Protein {
                     uniprot_id: String::new(),
                     taxon_id: 0,
-                    functional_annotations: vec![],
+                    functional_annotations: vec![]
                 },
                 Protein {
                     uniprot_id: String::new(),
                     taxon_id: 0,
-                    functional_annotations: vec![],
+                    functional_annotations: vec![]
                 },
                 Protein {
                     uniprot_id: String::new(),
                     taxon_id: 0,
-                    functional_annotations: vec![],
+                    functional_annotations: vec![]
                 },
                 Protein {
                     uniprot_id: String::new(),
                     taxon_id: 0,
-                    functional_annotations: vec![],
+                    functional_annotations: vec![]
                 },
-            ],
+            ]
         }
     }
 
@@ -568,8 +564,8 @@ mod tests {
             proteins: vec![Protein {
                 uniprot_id: String::new(),
                 taxon_id: 0,
-                functional_annotations: vec![],
-            }],
+                functional_annotations: vec![]
+            }]
         };
 
         let sparse_sa = SuffixArray::Original(vec![0, 2, 4], 2);
@@ -591,8 +587,8 @@ mod tests {
             proteins: vec![Protein {
                 uniprot_id: String::new(),
                 taxon_id: 0,
-                functional_annotations: vec![],
-            }],
+                functional_annotations: vec![]
+            }]
         };
 
         let sparse_sa = SuffixArray::Original(vec![6, 0, 1, 5, 4, 3, 2], 1);
@@ -613,8 +609,8 @@ mod tests {
             proteins: vec![Protein {
                 uniprot_id: String::new(),
                 taxon_id: 0,
-                functional_annotations: vec![],
-            }],
+                functional_annotations: vec![]
+            }]
         };
 
         let sparse_sa = SuffixArray::Original(vec![6, 5, 4, 3, 2, 1, 0], 1);
@@ -635,8 +631,8 @@ mod tests {
             proteins: vec![Protein {
                 uniprot_id: String::new(),
                 taxon_id: 0,
-                functional_annotations: vec![],
-            }],
+                functional_annotations: vec![]
+            }]
         };
 
         let sparse_sa = SuffixArray::Original(vec![6, 4, 2, 0], 2);
@@ -659,8 +655,8 @@ mod tests {
             proteins: vec![Protein {
                 uniprot_id: String::new(),
                 taxon_id: 0,
-                functional_annotations: vec![],
-            }],
+                functional_annotations: vec![]
+            }]
         };
 
         let sparse_sa = SuffixArray::Original(vec![6, 5, 4, 3, 2, 1, 0], 1);
diff --git a/sa-index/src/suffix_to_protein_index.rs b/sa-index/src/suffix_to_protein_index.rs
index 1a224d2..a6a4e93 100644
--- a/sa-index/src/suffix_to_protein_index.rs
+++ b/sa-index/src/suffix_to_protein_index.rs
@@ -1,14 +1,14 @@
 use clap::ValueEnum;
 use sa_mappings::proteins::{SEPARATION_CHARACTER, TERMINATION_CHARACTER};
+use text_compression::ProteinText;
 
 use crate::Nullable;
-use text_compression::ProteinText;
 
 /// Enum used to define the commandline arguments and choose which index style is used
 #[derive(ValueEnum, Clone, Debug, PartialEq)]
 pub enum SuffixToProteinMappingStyle {
     Dense,
-    Sparse,
+    Sparse
 }
 
 /// Trait implemented by the SuffixToProtein mappings
@@ -29,14 +29,14 @@ pub trait SuffixToProteinIndex: Send + Sync {
 #[derive(Debug, PartialEq)]
 pub struct DenseSuffixToProtein {
     // UniProtKB does not have more that u32::MAX proteins, so a larger type is not needed
-    mapping: Vec<u32>,
+    mapping: Vec<u32>
 }
 
 /// Mapping that uses O(m) memory with m the number of proteins, but retrieval of the protein is
 /// O(log m)
 #[derive(Debug, PartialEq)]
 pub struct SparseSuffixToProtein {
-    mapping: Vec<i64>,
+    mapping: Vec<i64>
 }
 
 impl SuffixToProteinIndex for DenseSuffixToProtein {
@@ -113,9 +113,9 @@ mod tests {
 
     use crate::{
         suffix_to_protein_index::{
-            DenseSuffixToProtein, SparseSuffixToProtein, SuffixToProteinIndex, SuffixToProteinMappingStyle,
+            DenseSuffixToProtein, SparseSuffixToProtein, SuffixToProteinIndex, SuffixToProteinMappingStyle
         },
-        Nullable,
+        Nullable
     };
 
     fn build_text() -> ProteinText {
@@ -138,7 +138,7 @@ mod tests {
         let u8_text = &build_text();
         let index = DenseSuffixToProtein::new(u8_text);
         let expected = DenseSuffixToProtein {
-            mapping: vec![0, 0, 0, u32::NULL, 1, 1, u32::NULL, 2, 2, 2, u32::NULL],
+            mapping: vec![0, 0, 0, u32::NULL, 1, 1, u32::NULL, 2, 2, 2, u32::NULL]
         };
         assert_eq!(index, expected);
     }
diff --git a/sa-mappings/src/proteins.rs b/sa-mappings/src/proteins.rs
index 9285980..53e52b8 100644
--- a/sa-mappings/src/proteins.rs
+++ b/sa-mappings/src/proteins.rs
@@ -23,7 +23,7 @@ pub struct Protein {
     pub taxon_id: u32,
 
     /// The encoded functional annotations of the protein
-    pub functional_annotations: Vec<u8>,
+    pub functional_annotations: Vec<u8>
 }
 
 /// A struct that represents a collection of proteins
@@ -32,7 +32,7 @@ pub struct Proteins {
     pub text: ProteinText,
 
     /// The proteins in the input string
-    pub proteins: Vec<Protein>,
+    pub proteins: Vec<Protein>
 }
 
 impl Protein {
@@ -80,7 +80,7 @@ impl Proteins {
             proteins.push(Protein {
                 uniprot_id: uniprot_id.to_string(),
                 taxon_id,
-                functional_annotations,
+                functional_annotations
             });
         }
 
@@ -195,7 +195,7 @@ mod tests {
             .unwrap();
         file.write(
             "P13579\t17\tKEGILQYCQEVYPELQITNVVEANQPVTIQNWCKRGRKQCKTHPH\tGO:0009279;IPR:IPR016364;IPR:IPR008816\n"
-                .as_bytes(),
+                .as_bytes()
         )
         .unwrap();
 
@@ -207,7 +207,7 @@ mod tests {
         let protein = Protein {
             uniprot_id: "P12345".to_string(),
             taxon_id: 1,
-            functional_annotations: vec![0xD1, 0x11],
+            functional_annotations: vec![0xD1, 0x11]
         };
 
         assert_eq!(protein.uniprot_id, "P12345");
@@ -225,14 +225,14 @@ mod tests {
                 Protein {
                     uniprot_id: "P12345".to_string(),
                     taxon_id: 1,
-                    functional_annotations: vec![0xD1, 0x11],
+                    functional_annotations: vec![0xD1, 0x11]
                 },
                 Protein {
                     uniprot_id: "P54321".to_string(),
                     taxon_id: 2,
-                    functional_annotations: vec![0xD1, 0x11],
+                    functional_annotations: vec![0xD1, 0x11]
                 },
-            ],
+            ]
         };
 
         assert_eq!(proteins.proteins.len(), 2);
diff --git a/sa-server/src/main.rs b/sa-server/src/main.rs
index c65ba7c..5284546 100644
--- a/sa-server/src/main.rs
+++ b/sa-server/src/main.rs
@@ -2,14 +2,14 @@ use std::{
     error::Error,
     fs::File,
     io::{BufReader, Read},
-    sync::Arc,
+    sync::Arc
 };
 
 use axum::{
     extract::{DefaultBodyLimit, State},
     http::StatusCode,
     routing::post,
-    Json, Router,
+    Json, Router
 };
 use clap::Parser;
 use sa_compression::load_compressed_suffix_array;
@@ -17,7 +17,7 @@ use sa_index::{
     binary::load_suffix_array,
     peptide_search::{search_all_peptides, SearchResult},
     sa_searcher::SparseSearcher,
-    SuffixArray,
+    SuffixArray
 };
 use sa_mappings::proteins::Proteins;
 use serde::Deserialize;
@@ -30,7 +30,7 @@ pub struct Arguments {
     #[arg(short, long)]
     database_file: String,
     #[arg(short, long)]
-    index_file: String,
+    index_file: String
 }
 
 /// Function used by serde to place a default value in the cutoff field of the input
@@ -58,7 +58,7 @@ struct InputData {
     cutoff: usize,
     #[serde(default = "bool::default")]
     // default value is false // TODO: maybe default should be true?
-    equate_il: bool,
+    equate_il: bool
 }
 
 #[tokio::main]
@@ -81,7 +81,7 @@ async fn main() {
 /// Returns the search results from the index as a JSON
 async fn search(
     State(searcher): State<Arc<SparseSearcher>>,
-    data: Json<InputData>,
+    data: Json<InputData>
 ) -> Result<Json<Vec<SearchResult>>, StatusCode> {
     let search_result = search_all_peptides(&searcher, &data.peptides, data.cutoff, data.equate_il);
 
diff --git a/text-compression/src/lib.rs b/text-compression/src/lib.rs
index dc7f71e..85e93b3 100644
--- a/text-compression/src/lib.rs
+++ b/text-compression/src/lib.rs
@@ -1,7 +1,7 @@
-use std::collections::HashMap;
 use std::{
+    collections::HashMap,
     error::Error,
-    io::{BufRead, Write},
+    io::{BufRead, Write}
 };
 
 use bitarray::{data_to_writer, Binary, BitArray};
@@ -13,7 +13,7 @@ pub struct ProteinText {
     /// Hashmap storing the mapping between the character as `u8` and a 5 bit number.
     char_to_5bit: HashMap<u8, u8>,
     /// Vector storing the mapping between the 5 bit number and the character as `u8`.
-    bit5_to_char: Vec<u8>,
+    bit5_to_char: Vec<u8>
 }
 
 impl ProteinText {
@@ -184,7 +184,7 @@ pub struct ProteinTextSlice<'a> {
     /// The start of the slice.
     start: usize, // included
     /// The end of the slice.
-    end: usize, // excluded
+    end: usize // excluded
 }
 
 impl<'a> ProteinTextSlice<'a> {
@@ -282,13 +282,13 @@ impl<'a> ProteinTextSlice<'a> {
 /// Structure representing an iterator over a `ProteinText` instance, iterating the characters of the text.
 pub struct ProteinTextIterator<'a> {
     protein_text: &'a ProteinText,
-    index: usize,
+    index: usize
 }
 
 /// Structure representing an iterator over a `ProteintextSlice` instance, iterating the characters of the slice.
 pub struct ProteinTextSliceIterator<'a> {
     text_slice: &'a ProteinTextSlice<'a>,
-    index: usize,
+    index: usize
 }
 
 impl<'a> Iterator for ProteinTextSliceIterator<'a> {
@@ -394,7 +394,7 @@ mod tests {
 
     pub struct FailingWriter {
         /// The number of times the write function can be called before it fails.
-        pub valid_write_count: usize,
+        pub valid_write_count: usize
     }
 
     impl Write for FailingWriter {
@@ -414,7 +414,7 @@ mod tests {
 
     pub struct FailingReader {
         /// The number of times the read function can be called before it fails.
-        pub valid_read_count: usize,
+        pub valid_read_count: usize
     }
 
     impl Read for FailingReader {
@@ -545,15 +545,12 @@ mod tests {
         let mut writer = vec![];
         dump_compressed_text(text, &mut writer).unwrap();
 
-        assert_eq!(
-            writer,
-            vec![
-                // bits per value
-                5, // size of the text
-                10, 0, 0, 0, 0, 0, 0, 0, // compressed text
-                0, 128, 74, 232, 152, 66, 134, 8
-            ]
-        );
+        assert_eq!(writer, vec![
+            // bits per value
+            5, // size of the text
+            10, 0, 0, 0, 0, 0, 0, 0, // compressed text
+            0, 128, 74, 232, 152, 66, 134, 8
+        ]);
     }
 
     #[test]

From b145e776e53d81e146d9bcb606834299f1b44d1e Mon Sep 17 00:00:00 2001
From: SimonVandeVyver <simon.vandevyver@ugent.be>
Date: Thu, 12 Sep 2024 16:29:17 +0200
Subject: [PATCH 09/13] fix cargo clippy errors

---
 bitarray/src/lib.rs         |  2 +-
 sa-index/src/sa_searcher.rs | 10 +++++-----
 text-compression/src/lib.rs |  6 +++++-
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/bitarray/src/lib.rs b/bitarray/src/lib.rs
index e4bd8a2..78b6c60 100644
--- a/bitarray/src/lib.rs
+++ b/bitarray/src/lib.rs
@@ -85,7 +85,7 @@ impl BitArray {
     /// * `index` - The index of the value to set.
     /// * `value` - The value to set at the specified index.
     pub fn set(&mut self, index: usize, value: u64) {
-        let value: u64 = value.into();
+        let value: u64 = value;
         let start_block = index * self.bits_per_value / 64;
         let start_block_offset = index * self.bits_per_value % 64;
 
diff --git a/sa-index/src/sa_searcher.rs b/sa-index/src/sa_searcher.rs
index 03abf07..f709fd6 100644
--- a/sa-index/src/sa_searcher.rs
+++ b/sa-index/src/sa_searcher.rs
@@ -178,11 +178,11 @@ impl Searcher {
         // match as long as possible
         while index_in_search_string < search_string.len()
             && index_in_suffix < self.proteins.text.len()
-            && (search_string[index_in_search_string] == self.proteins.text.get(index_in_suffix) as u8
+            && (search_string[index_in_search_string] == self.proteins.text.get(index_in_suffix)
                 || (search_string[index_in_search_string] == b'L'
-                    && self.proteins.text.get(index_in_suffix) as u8 == b'I')
+                    && self.proteins.text.get(index_in_suffix) == b'I')
                 || (search_string[index_in_search_string] == b'I'
-                    && self.proteins.text.get(index_in_suffix) as u8 == b'L'))
+                    && self.proteins.text.get(index_in_suffix) == b'L'))
         {
             index_in_suffix += 1;
             index_in_search_string += 1;
@@ -201,10 +201,10 @@ impl Searcher {
                     search_string[index_in_search_string]
                 };
 
-                let protein_char = if self.proteins.text.get(index_in_suffix) as u8 == b'L' {
+                let protein_char = if self.proteins.text.get(index_in_suffix) == b'L' {
                     b'I'
                 } else {
-                    self.proteins.text.get(index_in_suffix) as u8
+                    self.proteins.text.get(index_in_suffix)
                 };
 
                 is_cond_or_equal = condition_check(peptide_char, protein_char);
diff --git a/text-compression/src/lib.rs b/text-compression/src/lib.rs
index 85e93b3..cac302f 100644
--- a/text-compression/src/lib.rs
+++ b/text-compression/src/lib.rs
@@ -73,7 +73,7 @@ impl ProteinText {
     /// # Returns
     ///
     /// An instance of `ProteinText`
-    pub fn from_vec(input_vec: &Vec<u8>) -> ProteinText {
+    pub fn from_vec(input_vec: &[u8]) -> ProteinText {
         let char_to_5bit = ProteinText::create_char_to_5bit_hashmap();
         let bit5_to_char = ProteinText::create_bit5_to_char();
 
@@ -223,6 +223,10 @@ impl<'a> ProteinTextSlice<'a> {
         self.end - self.start
     }
 
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
     /// Checks if the slice and a given array of `u8` are equal.
     /// I and L can be equated.
     ///

From 3623691e8f7fc0a359fc416c0c86532f0846478a Mon Sep 17 00:00:00 2001
From: SimonVandeVyver <simon.vandevyver@ugent.be>
Date: Thu, 12 Sep 2024 16:30:41 +0200
Subject: [PATCH 10/13] reformat with cargo

---
 sa-index/src/sa_searcher.rs | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/sa-index/src/sa_searcher.rs b/sa-index/src/sa_searcher.rs
index f709fd6..e9590c8 100644
--- a/sa-index/src/sa_searcher.rs
+++ b/sa-index/src/sa_searcher.rs
@@ -179,10 +179,8 @@ impl Searcher {
         while index_in_search_string < search_string.len()
             && index_in_suffix < self.proteins.text.len()
             && (search_string[index_in_search_string] == self.proteins.text.get(index_in_suffix)
-                || (search_string[index_in_search_string] == b'L'
-                    && self.proteins.text.get(index_in_suffix) == b'I')
-                || (search_string[index_in_search_string] == b'I'
-                    && self.proteins.text.get(index_in_suffix) == b'L'))
+                || (search_string[index_in_search_string] == b'L' && self.proteins.text.get(index_in_suffix) == b'I')
+                || (search_string[index_in_search_string] == b'I' && self.proteins.text.get(index_in_suffix) == b'L'))
         {
             index_in_suffix += 1;
             index_in_search_string += 1;

From 76ca5da9fc2e813f06be530b4889c0353d1646fa Mon Sep 17 00:00:00 2001
From: SimonVandeVyver <simon.vandevyver@ugent.be>
Date: Thu, 12 Sep 2024 16:33:48 +0200
Subject: [PATCH 11/13] fix to long doc comment

---
 bitarray/src/lib.rs | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/bitarray/src/lib.rs b/bitarray/src/lib.rs
index 78b6c60..06235d1 100644
--- a/bitarray/src/lib.rs
+++ b/bitarray/src/lib.rs
@@ -149,9 +149,8 @@ impl BitArray {
     }
 }
 
-/// Writes the data to a writer in a binary format using a bit array. This function is helpfull
-/// when writing large amounts of data to a writer in chunks. The data is written in chunks of the
-/// specified capacity, so memory usage is minimized.
+/// Writes the data to a writer in a binary format using a bit array. The data is written 
+/// in chunks of the specified capacity, so memory usage is minimized.
 ///
 /// # Arguments
 ///

From 1c440127b88d19400a009afb2d87ff90dc55a5eb Mon Sep 17 00:00:00 2001
From: SimonVandeVyver <simon.vandevyver@ugent.be>
Date: Thu, 12 Sep 2024 16:35:12 +0200
Subject: [PATCH 12/13] reformat with cargo

---
 bitarray/src/lib.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bitarray/src/lib.rs b/bitarray/src/lib.rs
index 06235d1..901b395 100644
--- a/bitarray/src/lib.rs
+++ b/bitarray/src/lib.rs
@@ -149,7 +149,7 @@ impl BitArray {
     }
 }
 
-/// Writes the data to a writer in a binary format using a bit array. The data is written 
+/// Writes the data to a writer in a binary format using a bit array. The data is written
 /// in chunks of the specified capacity, so memory usage is minimized.
 ///
 /// # Arguments

From 79bee50349dc0cffc5213546153713132401c226 Mon Sep 17 00:00:00 2001
From: SimonVandeVyver <simon.vandevyver@ugent.be>
Date: Thu, 12 Sep 2024 16:39:42 +0200
Subject: [PATCH 13/13] add lifetime parameter to ProteinTextSlice

---
 text-compression/src/lib.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/text-compression/src/lib.rs b/text-compression/src/lib.rs
index cac302f..4866a6c 100644
--- a/text-compression/src/lib.rs
+++ b/text-compression/src/lib.rs
@@ -198,7 +198,7 @@ impl<'a> ProteinTextSlice<'a> {
     /// # Returns
     ///
     /// An instance of `ProteinTextSlice`
-    pub fn new(text: &'a ProteinText, start: usize, end: usize) -> ProteinTextSlice {
+    pub fn new(text: &'a ProteinText, start: usize, end: usize) -> ProteinTextSlice<'a> {
         Self { text, start, end }
     }