Skip to content

Commit

Permalink
add comments + do not bitpack text sparseness is 1
Browse files Browse the repository at this point in the history
  • Loading branch information
SimonVandeVyver committed Oct 24, 2024
1 parent 64e5589 commit aa6ef9f
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 9 deletions.
8 changes: 8 additions & 0 deletions libsais64-rs/src/bitpacking.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,12 @@ fn get_rank(c: u8) -> u8 {
}
}

// Amount of bits necessary to represent one character in the protein text.
pub const BITS_PER_CHAR: usize = 5;

// Bitpack text in a vector of u8 elements. BITS_PER_CHAR * sparseness_factor <= 8.
pub fn bitpack_text_8(text: &Vec<u8>, sparseness_factor: usize) -> Vec<u8> {
assert!(BITS_PER_CHAR * sparseness_factor <= 8);

let num_ints = (text.len() + (sparseness_factor-1)) / sparseness_factor;
let mut text_packed = vec![0; num_ints];
Expand Down Expand Up @@ -42,7 +46,9 @@ pub fn bitpack_text_8(text: &Vec<u8>, sparseness_factor: usize) -> Vec<u8> {

}

// Bitpack text in a vector of u16 elements. BITS_PER_CHAR * sparseness_factor <= 16.
pub fn bitpack_text_16(text: &Vec<u8>, sparseness_factor: usize) -> Vec<u16> {
assert!(BITS_PER_CHAR * sparseness_factor <= 16);

let num_ints = (text.len() + (sparseness_factor-1)) / sparseness_factor;
let mut text_packed = vec![0; num_ints];
Expand Down Expand Up @@ -74,7 +80,9 @@ pub fn bitpack_text_16(text: &Vec<u8>, sparseness_factor: usize) -> Vec<u16> {

}

// Bitpack text in a vector of u16 elements. BITS_PER_CHAR * sparseness_factor <= 32.
pub fn bitpack_text_32(text: &Vec<u8>, sparseness_factor: usize) -> Vec<u32> {
assert!(BITS_PER_CHAR * sparseness_factor <= 32);

let num_ints = (text.len() + (sparseness_factor-1)) / sparseness_factor;
let mut text_packed = vec![0; num_ints];
Expand Down
14 changes: 5 additions & 9 deletions libsais64-rs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ include!(concat!(env!("OUT_DIR"), "/bindings.rs"));

pub mod bitpacking;

/// Builds the suffix array over the `text` using the libsais64 algorithm
/// Builds the suffix array over the `text` using the libsais algorithm
///
/// # Arguments
/// * `text` - The text used for suffix array construction
Expand All @@ -24,7 +24,7 @@ pub fn sais64(text: &Vec<u8>, libsais_sparseness: usize) -> Result<Vec<i64>, &st
let required_bits = libsais_sparseness * BITS_PER_CHAR;
if required_bits <= 8 {
// bitpacked values fit in uint8_t
let packed_text = bitpack_text_8(text, libsais_sparseness);
let packed_text = if libsais_sparseness == 1 { text } else { &bitpack_text_8(text, libsais_sparseness) };
sa = vec![0; packed_text.len()];
exit_code = unsafe { libsais64(packed_text.as_ptr(), sa.as_mut_ptr(), packed_text.len() as i64, 0, null_mut()) };
} else if required_bits <= 16 {
Expand Down Expand Up @@ -54,14 +54,10 @@ mod tests {

#[test]
fn check_build_sa_with_libsais64() {
let bits_per_char = 5;
let sparseness_factor = 4;
let mut text = [100834, // BANA
493603, // NA-B
80975, // ANAN
65536 // A$
].to_vec();
let mut text = "BANANA-BANANA$".as_bytes().to_vec();
let sa = sais64(&mut text, sparseness_factor);
assert_eq!(sa, Some(vec![12, 8, 0, 4]));
let correct_sa: Vec<i64> = vec![12, 8, 0, 4];
assert_eq!(sa, Ok(correct_sa));
}
}

0 comments on commit aa6ef9f

Please sign in to comment.