Skip to content

Commit

Permalink
fmt + clippy
Browse files Browse the repository at this point in the history
  • Loading branch information
tibvdm committed May 17, 2024
1 parent 58772f1 commit 9f3d6f4
Show file tree
Hide file tree
Showing 9 changed files with 209 additions and 152 deletions.
39 changes: 23 additions & 16 deletions bitarray/src/binary.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
//! This module provides utilities for reading and writing the bitarray as binary.

use std::io::{BufRead, Read, Result, Write};
use std::io::{
BufRead,
Read,
Result,
Write
};

use crate::BitArray;

Expand Down Expand Up @@ -61,11 +66,12 @@ impl Binary for BitArray {
self.data.clear();

let mut buffer = vec![0; 8 * 1024];

loop {
let (finished, bytes_read) = fill_buffer(&mut reader, &mut buffer);
for buffer_slice in buffer[..bytes_read].chunks_exact(8) {
self.data.push(u64::from_le_bytes(buffer_slice.try_into().unwrap()));
for buffer_slice in buffer[.. bytes_read].chunks_exact(8) {
self.data
.push(u64::from_le_bytes(buffer_slice.try_into().unwrap()));
}

if finished {
Expand All @@ -86,8 +92,8 @@ impl Binary for BitArray {
///
/// # Returns
///
/// Returns a tuple `(finished, bytes_read)` where `finished` indicates whether the end of the input is reached,
/// and `bytes_read` is the number of bytes read into the buffer.
/// Returns a tuple `(finished, bytes_read)` where `finished` indicates whether the end of the input
/// is reached, and `bytes_read` is the number of bytes read into the buffer.
fn fill_buffer<T: Read>(input: &mut T, buffer: &mut Vec<u8>) -> (bool, usize) {
// Store the buffer size in advance, because rust will complain
// about the buffer being borrowed mutably while it's borrowed
Expand All @@ -109,7 +115,7 @@ fn fill_buffer<T: Read>(input: &mut T, buffer: &mut Vec<u8>) -> (bool, usize) {
// We've read {bytes_read} bytes
Ok(bytes_read) => {
// Shrink the writable buffer slice
writable_buffer_space = writable_buffer_space[bytes_read..].as_mut();
writable_buffer_space = writable_buffer_space[bytes_read ..].as_mut();
}

Err(err) => {
Expand Down Expand Up @@ -137,7 +143,7 @@ mod tests {
let mut input = input_str.as_bytes();

let mut buffer = vec![0; 800];

loop {
let (finished, bytes_read) = fill_buffer(&mut input, &mut buffer);

Expand Down Expand Up @@ -170,19 +176,20 @@ mod tests {
let mut buffer = Vec::new();
bitarray.write_binary(&mut buffer).unwrap();

assert_eq!(buffer, vec![
0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12,
0xde, 0xbc, 0x0a, 0x89, 0x67, 0x45, 0x23, 0x01,
0x00, 0x00, 0x00, 0x00, 0x56, 0x34, 0x12, 0xf0
]);
assert_eq!(
buffer,
vec![
0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12, 0xde, 0xbc, 0x0a, 0x89, 0x67, 0x45,
0x23, 0x01, 0x00, 0x00, 0x00, 0x00, 0x56, 0x34, 0x12, 0xf0
]
);
}

#[test]
fn test_read_binary() {
let buffer = vec![
0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12,
0xde, 0xbc, 0x0a, 0x89, 0x67, 0x45, 0x23, 0x01,
0x00, 0x00, 0x00, 0x00, 0x56, 0x34, 0x12, 0xf0
0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12, 0xde, 0xbc, 0x0a, 0x89, 0x67, 0x45,
0x23, 0x01, 0x00, 0x00, 0x00, 0x00, 0x56, 0x34, 0x12, 0xf0,
];

let mut bitarray = BitArray::with_capacity(4, 40);
Expand Down
38 changes: 23 additions & 15 deletions bitarray/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,22 @@

mod binary;

use std::io::{Result, Write};
use std::io::{
Result,
Write
};

/// Re-export the `Binary` trait.
pub use binary::Binary;

/// A fixed-size bit array implementation.
pub struct BitArray {
/// The underlying data storage for the bit array.
data: Vec<u64>,
data: Vec<u64>,
/// The mask used to extract the relevant bits from each element in the data vector.
mask: u64,
mask: u64,
/// The length of the bit array.
len: usize,
len: usize,
/// The number of bits in a single element of the data vector.
bits_per_value: usize
}
Expand All @@ -34,7 +37,7 @@ impl BitArray {
Self {
data: vec![0; capacity * bits_per_value / 64 + 1],
mask: (1 << bits_per_value) - 1,
len: capacity,
len: capacity,
bits_per_value
}
}
Expand All @@ -56,7 +59,8 @@ impl BitArray {
if start_block_offset + self.bits_per_value <= 64 {
// Shift the value to the right so that the relevant bits are in the least significant
// position Then mask out the irrelevant bits
return self.data[start_block] >> (64 - start_block_offset - self.bits_per_value) & self.mask;
return self.data[start_block] >> (64 - start_block_offset - self.bits_per_value)
& self.mask;
}

let end_block = (index + 1) * self.bits_per_value / 64;
Expand Down Expand Up @@ -87,7 +91,8 @@ impl BitArray {
// If the value is contained within a single block
if start_block_offset + self.bits_per_value <= 64 {
// Clear the relevant bits in the start block
self.data[start_block] &= !(self.mask << (64 - start_block_offset - self.bits_per_value));
self.data[start_block] &=
!(self.mask << (64 - start_block_offset - self.bits_per_value));
// Set the relevant bits in the start block
self.data[start_block] |= value << (64 - start_block_offset - self.bits_per_value);
return;
Expand Down Expand Up @@ -146,13 +151,14 @@ impl BitArray {
///
/// A `Result` indicating whether the write operation was successful or not.
pub fn data_to_writer(
data: Vec<i64>,
data: Vec<i64>,
bits_per_value: usize,
max_capacity: usize,
writer: &mut impl Write,
writer: &mut impl Write
) -> Result<()> {
// Calculate the capacity of the bit array so the data buffer can be stored entirely
// This makes the process of writing partial data to the writer easier as bounds checking is not needed
// This makes the process of writing partial data to the writer easier as bounds checking is not
// needed
let capacity = max_capacity / (bits_per_value * 64) * bits_per_value * 64;

// If the capacity is 0, we can write the data directly to the writer
Expand Down Expand Up @@ -255,11 +261,13 @@ mod tests {

data_to_writer(data, 40, 2, &mut writer).unwrap();

assert_eq!(writer, vec![
0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12,
0xde, 0xbc, 0x0a, 0x89, 0x67, 0x45, 0x23, 0x01,
0x00, 0x00, 0x00, 0x00, 0x56, 0x34, 0x12, 0xf0
]);
assert_eq!(
writer,
vec![
0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12, 0xde, 0xbc, 0x0a, 0x89, 0x67, 0x45,
0x23, 0x01, 0x00, 0x00, 0x00, 0x00, 0x56, 0x34, 0x12, 0xf0
]
);
}

// #[test]
Expand Down
16 changes: 8 additions & 8 deletions sa-builder/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,14 @@ pub fn build_ssa(
}

/// Translate all L's to I's in the given text
///
///
/// # Arguments
/// * `text` - The text in which we want to translate the L's to I's
///
///
/// # Returns
///
///
/// The text with all L's translated to I's
fn translate_l_to_i(text: &mut Vec<u8>) {
fn translate_l_to_i(text: &mut [u8]) {
for character in text.iter_mut() {
if *character == b'L' {
*character = b'I'
Expand All @@ -89,13 +89,13 @@ fn translate_l_to_i(text: &mut Vec<u8>) {
}

/// Sample the suffix array with the given sparseness factor
///
///
/// # Arguments
/// * `sa` - The suffix array that we want to sample
/// * `sparseness_factor` - The sparseness factor used for sampling
///
///
/// # Returns
///
///
/// The sampled suffix array
fn sample_sa(sa: &mut Vec<i64>, sparseness_factor: u8) {
if sparseness_factor <= 1 {
Expand All @@ -110,7 +110,7 @@ fn sample_sa(sa: &mut Vec<i64>, sparseness_factor: u8) {
current_sampled_index += 1;
}
}

// make shorter
sa.resize(current_sampled_index, 0);
}
Expand Down
42 changes: 23 additions & 19 deletions sa-builder/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
use std::{fs::{File, OpenOptions}, io::Result};
use std::{
fs::{
File,
OpenOptions
},
io::Result
};

use clap::Parser;
use sa_builder::{
build_ssa,
Arguments
};
use sa_index::binary::dump_suffix_array;
use sa_compression::dump_compressed_suffix_array;
use sa_index::binary::dump_suffix_array;
use sa_mappings::{
proteins::Proteins,
taxonomy::{
Expand All @@ -25,34 +31,32 @@ fn main() {
compress_sa
} = Arguments::parse();

let taxon_id_calculator = TaxonAggregator::try_from_taxonomy_file(&taxonomy, AggregationMethod::LcaStar).unwrap_or_else(
|err| eprint_and_exit(err.to_string().as_str())
);
let taxon_id_calculator =
TaxonAggregator::try_from_taxonomy_file(&taxonomy, AggregationMethod::LcaStar)
.unwrap_or_else(|err| eprint_and_exit(err.to_string().as_str()));

// read input
let mut data = Proteins::try_from_database_file_without_annotations(&database_file, &taxon_id_calculator).unwrap_or_else(
|err| eprint_and_exit(err.to_string().as_str())
);
let mut data =
Proteins::try_from_database_file_without_annotations(&database_file, &taxon_id_calculator)
.unwrap_or_else(|err| eprint_and_exit(err.to_string().as_str()));

// calculate sparse suffix array
let sa = build_ssa(&mut data, &construction_algorithm, sparseness_factor).unwrap_or_else(
|err| eprint_and_exit(err.to_string().as_str())
);
let sa = build_ssa(&mut data, &construction_algorithm, sparseness_factor)
.unwrap_or_else(|err| eprint_and_exit(err.to_string().as_str()));

// open the output file
let mut file = open_file(&output).unwrap_or_else(
|err| eprint_and_exit(err.to_string().as_str())
);
let mut file =
open_file(&output).unwrap_or_else(|err| eprint_and_exit(err.to_string().as_str()));

if compress_sa {
let bits_per_value = (data.len() as f64).log2().ceil() as usize;
if let Err(err) = dump_compressed_suffix_array(sa, sparseness_factor, bits_per_value, &mut file) {
eprint_and_exit(err.to_string().as_str());
};
} else {
if let Err(err) = dump_suffix_array(&sa, sparseness_factor, &mut file) {
if let Err(err) =
dump_compressed_suffix_array(sa, sparseness_factor, bits_per_value, &mut file)
{
eprint_and_exit(err.to_string().as_str());
};
} else if let Err(err) = dump_suffix_array(&sa, sparseness_factor, &mut file) {
eprint_and_exit(err.to_string().as_str());
}
}

Expand Down
Loading

0 comments on commit 9f3d6f4

Please sign in to comment.