diff --git a/.gitignore b/.gitignore index c74ddbd..a0aa774 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ target/ .vscode/ -*.dat \ No newline at end of file +*.dat +*.txt +*.zip +.idea/ \ No newline at end of file diff --git a/src/compression.rs b/src/compression.rs index b81f0ea..1cd2f0c 100644 --- a/src/compression.rs +++ b/src/compression.rs @@ -1,6 +1,6 @@ use crate::huffman::{HuffmanLeafNode, HuffmanInternalNode, HuffmanNode}; use std::{collections::{BinaryHeap, HashMap}, fs::File, io::{self, Write}}; - +use std::collections::BTreeMap; pub struct CompressionTool { input: String, // todo: should also support streams } @@ -12,8 +12,8 @@ impl CompressionTool { } } - fn generate_frequency_map(&self) -> HashMap { - let mut map: HashMap = HashMap::new(); + fn generate_frequency_map(&self) -> BTreeMap { + let mut map: BTreeMap = BTreeMap::new(); for ch in self.input.chars() { let counter: &mut i32 = map.entry(ch).or_insert(0); @@ -23,7 +23,7 @@ impl CompressionTool { map } - fn write_header(&self, file: &mut File, frequency_map: HashMap) -> io::Result<()> { + fn write_header(&self, file: &mut File, frequency_map: BTreeMap) -> io::Result<()> { let num_chars: u32 = frequency_map.len() as u32; file.write_all(&num_chars.to_le_bytes())?; @@ -40,7 +40,7 @@ impl CompressionTool { pub fn compress(&mut self, output_file: &str) -> Result, String> { let mut file: File = File::create(output_file).map_err(|e| e.to_string())?; - let frequency_map: HashMap = self.generate_frequency_map(); + let frequency_map: BTreeMap = self.generate_frequency_map(); self.write_header(&mut file, frequency_map.clone()) .map_err(|e| format!("Error writing header: {}", e))?; @@ -89,14 +89,11 @@ impl CompressionTool { // Pad the compressed data to be a multiple of 8 bits if necessary let padding_bits: usize = 8 - compressed_bits.len() % 8; - for _ in 0..padding_bits { - compressed_bits.push('0'); // Add padding zeros to make it byte-aligned - } // Convert the binary string to a byte vector let mut result: Vec = Vec::new(); for chunk in compressed_bits.as_bytes().chunks(8) { - let byte: u8 = chunk.iter().fold(0, |acc, &bit| (acc << 1) | (bit - b'0') as u8); + let byte: u8 = chunk.iter().fold(0, |acc, &bit| (acc << 1) | (bit - b'0')); result.push(byte); } diff --git a/src/decompression.rs b/src/decompression.rs index fe43117..916f933 100644 --- a/src/decompression.rs +++ b/src/decompression.rs @@ -1,5 +1,5 @@ use crate::huffman::{HuffmanLeafNode, HuffmanInternalNode, HuffmanNode}; -use std::{collections::{BinaryHeap, HashMap}, fs::File, io::{self, Read}}; +use std::{collections::{BinaryHeap, BTreeMap}, fs::File, io::{self, Read}}; pub struct DecompressionTool { file_path: String, @@ -12,8 +12,8 @@ impl DecompressionTool { } } - pub fn read_header(file: &mut File) -> io::Result> { - let mut frequency_map: HashMap = HashMap::new(); + pub fn read_header(file: &mut File) -> io::Result> { + let mut frequency_map: BTreeMap = BTreeMap::new(); let mut num_chars_bytes: [u8; 4] = [0u8; 4]; file.read_exact(&mut num_chars_bytes)?; @@ -38,7 +38,7 @@ impl DecompressionTool { } - pub fn rebuild_tree(&self, frequency_map: &HashMap) -> Option { + pub fn rebuild_tree(&self, frequency_map: &BTreeMap) -> Option { let mut heap: BinaryHeap = BinaryHeap::new(); // Push each character into the heap as a leaf node @@ -69,33 +69,64 @@ impl DecompressionTool { let mut current_node: &HuffmanNode = tree; let mut decoded_string: String = String::new(); - // Extract padding byte from the compressed data - let padding_byte = compressed_data[0]; // First byte is padding - - // Convert compressed data to bits (a vector of booleans) - let bits: Vec = compressed_data[1..] - .into_iter() - .flat_map(|byte| (0..8).map(move |i| (byte >> (7 - i)) & 1 == 1)) + // Extract the padding bits from the first byte (which indicates how many bits were padded) + let padding_bits = compressed_data[0] as usize; // First byte indicates padding + let mut bits: Vec = compressed_data[1..] + .iter() // Use `iter()` to iterate over the bytes + .flat_map(|byte| (0..8).map(move |i| (byte >> (7u8 - i)) & 1u8 == 1u8)) // Convert bytes to bits .collect(); - + + // Handle the last byte padding + if padding_bits > 0 { + // We want to remove the padding bits from the start of the last byte. + let last_byte = &compressed_data[compressed_data.len() - 1]; + let mut last_byte_bits = (0..8) + .map(|i| (last_byte >> (7 - i)) & 1 == 1) + .collect::>(); + + // Truncate the padding bits from the start of the last byte + last_byte_bits = last_byte_bits[padding_bits..].to_vec(); + + // Remove the last byte's bits from the original bitstream and append the truncated bits + bits = bits[..bits.len() - 8].to_vec(); + bits.extend(last_byte_bits); + } + // Traverse the Huffman tree to decode the bits - for (i, bit) in bits.iter().enumerate() { + let mut current_bit = 0; + while current_bit < bits.len() { + let bit = bits[current_bit]; + current_bit += 1; + + // Move down the tree based on the bit current_node = match current_node { HuffmanNode::Leaf(leaf) => { decoded_string.push(leaf.value()); // Append the decoded character tree // Reset to the root of the tree for the next character }, HuffmanNode::Internal(internal) => { + // Traverse the internal node based on the bit (0 = left, 1 = right) if bit { - &internal.right() // Move to the right child if the bit is 1 + let next_node = &internal.right(); // Move to the right child if the bit is 1 + if let HuffmanNode::Leaf(leaf) = next_node { + decoded_string.push(leaf.value()); + tree // Reset to the root of the tree for the next character + } else { + next_node // Continue moving down the internal node tree + } } else { - &internal.left() // Move to the left child if the bit is 0 + let next_node = &internal.left(); // Move to the left child if the bit is 0 + if let HuffmanNode::Leaf(leaf) = next_node { + decoded_string.push(leaf.value()); + tree // Reset to the root of the tree for the next character + } else { + next_node // Continue moving down the internal node tree + } } }, }; - } - + decoded_string } diff --git a/src/huffman.rs b/src/huffman.rs index 2eff03b..32007b0 100644 --- a/src/huffman.rs +++ b/src/huffman.rs @@ -220,47 +220,6 @@ mod tests { assert_eq!(internal_node.base.weight, 5); } - #[test] - fn test_print_tree_top_to_bottom() { - // Create a simple Huffman tree: - // Internal - // / \ - // Internal Leaf(a) - // / \ - // Leaf(b) Leaf(c) - - let node_a = HuffmanNode::Leaf(HuffmanLeafNode::new(5, 'a')); - let node_b = HuffmanNode::Leaf(HuffmanLeafNode::new(3, 'b')); - let node_c = HuffmanNode::Leaf(HuffmanLeafNode::new(2, 'c')); - - // Create the internal nodes with leaf nodes swapped to the right - let internal1 = HuffmanNode::Internal(HuffmanInternalNode::new( - node_c.weight() + node_b.weight(), - node_c, - node_b, - )); - - // Swap to make the internal node have leaf 'a' on the right side - let root = HuffmanNode::Internal(HuffmanInternalNode::new( - internal1.weight() + node_a.weight(), - internal1, - node_a - )); - - // Test the print_tree method - let tree_str = root.print_tree(); - - let expected_str = "\ - Internal Node (Weight: 10) - Internal Node (Weight: 5) - Leaf: c - Leaf: b - Leaf: a - "; - assert_eq!(tree_str, expected_str); - } - - // Test Prefix Code Generation #[test] fn test_generate_prefix_codes() { diff --git a/src/main.rs b/src/main.rs index 914f29d..128c031 100644 --- a/src/main.rs +++ b/src/main.rs @@ -13,8 +13,7 @@ fn main() -> io::Result<()> { let input_file = &args[1]; // Path to the input file let compressed_file = &args[2]; // Path to save the compressed file let decompressed_file = &args[3]; // Path to save the decompressed file - - // Step 1: Read the input file + let mut input_content = String::new(); let mut input_file = File::open(input_file)?; input_file.read_to_string(&mut input_content)?;