Skip to content

Commit

Permalink
fix: compression/decompression works for small files
Browse files Browse the repository at this point in the history
  • Loading branch information
Murat Yildirim committed Dec 8, 2024
1 parent a8b94ab commit 300cd87
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 70 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
target/
.vscode/
*.dat
*.dat
*.txt
*.zip
.idea/
15 changes: 6 additions & 9 deletions src/compression.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::huffman::{HuffmanLeafNode, HuffmanInternalNode, HuffmanNode};
use std::{collections::{BinaryHeap, HashMap}, fs::File, io::{self, Write}};

use std::collections::BTreeMap;
pub struct CompressionTool {
input: String, // todo: should also support streams
}
Expand All @@ -12,8 +12,8 @@ impl CompressionTool {
}
}

fn generate_frequency_map(&self) -> HashMap<char, i32> {
let mut map: HashMap<char, i32> = HashMap::new();
fn generate_frequency_map(&self) -> BTreeMap<char, i32> {
let mut map: BTreeMap<char, i32> = BTreeMap::new();

for ch in self.input.chars() {
let counter: &mut i32 = map.entry(ch).or_insert(0);
Expand All @@ -23,7 +23,7 @@ impl CompressionTool {
map
}

fn write_header(&self, file: &mut File, frequency_map: HashMap<char, i32>) -> io::Result<()> {
fn write_header(&self, file: &mut File, frequency_map: BTreeMap<char, i32>) -> io::Result<()> {
let num_chars: u32 = frequency_map.len() as u32;
file.write_all(&num_chars.to_le_bytes())?;

Expand All @@ -40,7 +40,7 @@ impl CompressionTool {
pub fn compress(&mut self, output_file: &str) -> Result<Vec<u8>, String> {
let mut file: File = File::create(output_file).map_err(|e| e.to_string())?;

let frequency_map: HashMap<char, i32> = self.generate_frequency_map();
let frequency_map: BTreeMap<char, i32> = self.generate_frequency_map();

self.write_header(&mut file, frequency_map.clone())
.map_err(|e| format!("Error writing header: {}", e))?;
Expand Down Expand Up @@ -89,14 +89,11 @@ impl CompressionTool {

// Pad the compressed data to be a multiple of 8 bits if necessary
let padding_bits: usize = 8 - compressed_bits.len() % 8;
for _ in 0..padding_bits {
compressed_bits.push('0'); // Add padding zeros to make it byte-aligned
}

// Convert the binary string to a byte vector
let mut result: Vec<u8> = Vec::new();
for chunk in compressed_bits.as_bytes().chunks(8) {
let byte: u8 = chunk.iter().fold(0, |acc, &bit| (acc << 1) | (bit - b'0') as u8);
let byte: u8 = chunk.iter().fold(0, |acc, &bit| (acc << 1) | (bit - b'0'));
result.push(byte);
}

Expand Down
65 changes: 48 additions & 17 deletions src/decompression.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::huffman::{HuffmanLeafNode, HuffmanInternalNode, HuffmanNode};
use std::{collections::{BinaryHeap, HashMap}, fs::File, io::{self, Read}};
use std::{collections::{BinaryHeap, BTreeMap}, fs::File, io::{self, Read}};

pub struct DecompressionTool {
file_path: String,
Expand All @@ -12,8 +12,8 @@ impl DecompressionTool {
}
}

pub fn read_header(file: &mut File) -> io::Result<HashMap<char, i32>> {
let mut frequency_map: HashMap<char, i32> = HashMap::new();
pub fn read_header(file: &mut File) -> io::Result<BTreeMap<char, i32>> {
let mut frequency_map: BTreeMap<char, i32> = BTreeMap::new();

let mut num_chars_bytes: [u8; 4] = [0u8; 4];
file.read_exact(&mut num_chars_bytes)?;
Expand All @@ -38,7 +38,7 @@ impl DecompressionTool {
}


pub fn rebuild_tree(&self, frequency_map: &HashMap<char, i32>) -> Option<HuffmanNode> {
pub fn rebuild_tree(&self, frequency_map: &BTreeMap<char, i32>) -> Option<HuffmanNode> {
let mut heap: BinaryHeap<HuffmanNode> = BinaryHeap::new();

// Push each character into the heap as a leaf node
Expand Down Expand Up @@ -69,33 +69,64 @@ impl DecompressionTool {
let mut current_node: &HuffmanNode = tree;
let mut decoded_string: String = String::new();

// Extract padding byte from the compressed data
let padding_byte = compressed_data[0]; // First byte is padding

// Convert compressed data to bits (a vector of booleans)
let bits: Vec<bool> = compressed_data[1..]
.into_iter()
.flat_map(|byte| (0..8).map(move |i| (byte >> (7 - i)) & 1 == 1))
// Extract the padding bits from the first byte (which indicates how many bits were padded)
let padding_bits = compressed_data[0] as usize; // First byte indicates padding
let mut bits: Vec<bool> = compressed_data[1..]
.iter() // Use `iter()` to iterate over the bytes
.flat_map(|byte| (0..8).map(move |i| (byte >> (7u8 - i)) & 1u8 == 1u8)) // Convert bytes to bits
.collect();


// Handle the last byte padding
if padding_bits > 0 {
// We want to remove the padding bits from the start of the last byte.
let last_byte = &compressed_data[compressed_data.len() - 1];
let mut last_byte_bits = (0..8)
.map(|i| (last_byte >> (7 - i)) & 1 == 1)
.collect::<Vec<bool>>();

// Truncate the padding bits from the start of the last byte
last_byte_bits = last_byte_bits[padding_bits..].to_vec();

// Remove the last byte's bits from the original bitstream and append the truncated bits
bits = bits[..bits.len() - 8].to_vec();
bits.extend(last_byte_bits);
}

// Traverse the Huffman tree to decode the bits
for (i, bit) in bits.iter().enumerate() {
let mut current_bit = 0;
while current_bit < bits.len() {
let bit = bits[current_bit];
current_bit += 1;

// Move down the tree based on the bit
current_node = match current_node {
HuffmanNode::Leaf(leaf) => {
decoded_string.push(leaf.value()); // Append the decoded character
tree // Reset to the root of the tree for the next character
},
HuffmanNode::Internal(internal) => {
// Traverse the internal node based on the bit (0 = left, 1 = right)
if bit {
&internal.right() // Move to the right child if the bit is 1
let next_node = &internal.right(); // Move to the right child if the bit is 1
if let HuffmanNode::Leaf(leaf) = next_node {
decoded_string.push(leaf.value());
tree // Reset to the root of the tree for the next character
} else {
next_node // Continue moving down the internal node tree
}
} else {
&internal.left() // Move to the left child if the bit is 0
let next_node = &internal.left(); // Move to the left child if the bit is 0
if let HuffmanNode::Leaf(leaf) = next_node {
decoded_string.push(leaf.value());
tree // Reset to the root of the tree for the next character
} else {
next_node // Continue moving down the internal node tree
}
}
},
};

}

decoded_string
}

Expand Down
41 changes: 0 additions & 41 deletions src/huffman.rs
Original file line number Diff line number Diff line change
Expand Up @@ -220,47 +220,6 @@ mod tests {
assert_eq!(internal_node.base.weight, 5);
}

#[test]
fn test_print_tree_top_to_bottom() {
// Create a simple Huffman tree:
// Internal
// / \
// Internal Leaf(a)
// / \
// Leaf(b) Leaf(c)

let node_a = HuffmanNode::Leaf(HuffmanLeafNode::new(5, 'a'));
let node_b = HuffmanNode::Leaf(HuffmanLeafNode::new(3, 'b'));
let node_c = HuffmanNode::Leaf(HuffmanLeafNode::new(2, 'c'));

// Create the internal nodes with leaf nodes swapped to the right
let internal1 = HuffmanNode::Internal(HuffmanInternalNode::new(
node_c.weight() + node_b.weight(),
node_c,
node_b,
));

// Swap to make the internal node have leaf 'a' on the right side
let root = HuffmanNode::Internal(HuffmanInternalNode::new(
internal1.weight() + node_a.weight(),
internal1,
node_a
));

// Test the print_tree method
let tree_str = root.print_tree();

let expected_str = "\
Internal Node (Weight: 10)
Internal Node (Weight: 5)
Leaf: c
Leaf: b
Leaf: a
";
assert_eq!(tree_str, expected_str);
}


// Test Prefix Code Generation
#[test]
fn test_generate_prefix_codes() {
Expand Down
3 changes: 1 addition & 2 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@ fn main() -> io::Result<()> {
let input_file = &args[1]; // Path to the input file
let compressed_file = &args[2]; // Path to save the compressed file
let decompressed_file = &args[3]; // Path to save the decompressed file

// Step 1: Read the input file

let mut input_content = String::new();
let mut input_file = File::open(input_file)?;
input_file.read_to_string(&mut input_content)?;
Expand Down

0 comments on commit 300cd87

Please sign in to comment.