diff --git a/src/compression.rs b/src/compression.rs index ed5a38d..a6e6d47 100644 --- a/src/compression.rs +++ b/src/compression.rs @@ -1,5 +1,5 @@ use crate::huffman::{HuffmanLeafNode, HuffmanInternalNode, HuffmanNode}; -use std::collections::HashMap; +use std::collections::{BinaryHeap, HashMap}; pub struct CompressionTool { input: String, // todo: should also support streams @@ -12,41 +12,28 @@ impl CompressionTool { } } - pub fn compress(&mut self) -> Result, String> { + pub fn compress(&mut self) -> Result { let mut map: HashMap = HashMap::new(); for ch in self.input.chars() { let counter: &mut i32 = map.entry(ch).or_insert(0); *counter += 1; } - // Create leaf nodes - let leaf_a = HuffmanLeafNode::new(5, 'a'); - let leaf_b = HuffmanLeafNode::new(12, 'b'); - let leaf_c = HuffmanLeafNode::new(13, 'c'); - let leaf_d = HuffmanLeafNode::new(14, 'd'); - // Create internal nodes with leaf nodes as children - let internal_1 = HuffmanInternalNode::new(17, HuffmanNode::Leaf(leaf_a), HuffmanNode::Leaf(leaf_b)); - let internal_2 = HuffmanInternalNode::new(27, HuffmanNode::Leaf(leaf_c), HuffmanNode::Leaf(leaf_d)); - - // Create an internal node with other internal nodes as children - let root = HuffmanInternalNode::new(44, HuffmanNode::Internal(internal_1), HuffmanNode::Internal(internal_2)); - - // Wrap nodes in the enum - let huffman_tree = HuffmanNode::Internal(root); - - // Accessing the weight of the root - println!("Root weight: {}", huffman_tree.weight()); - - // Accessing the value of leaf nodes - if let Some(value) = huffman_tree.left().unwrap().left().unwrap().value() { - println!("Left leaf value: {}", value); - } + let mut heap: BinaryHeap = BinaryHeap::new(); + for (ch, count) in map { + let leaf: HuffmanLeafNode = HuffmanLeafNode::new(count, ch); + heap.push(HuffmanNode::Leaf(leaf)); + } - if let Some(value) = huffman_tree.right().unwrap().left().unwrap().value() { - println!("Right leaf value: {}", value); - } + while heap.len() > 1 { + let left: HuffmanNode = heap.pop().unwrap(); + let right: HuffmanNode = heap.pop().unwrap(); + let combined_weight: i32 = left.weight() + right.weight(); + let internal_node: HuffmanInternalNode = HuffmanInternalNode::new(combined_weight, left, right); + heap.push(HuffmanNode::Internal(internal_node)); + } - Ok(map) + Ok(heap.pop().unwrap()) } } diff --git a/src/huffman.rs b/src/huffman.rs index ffdf7f1..b2abf13 100644 --- a/src/huffman.rs +++ b/src/huffman.rs @@ -1,3 +1,8 @@ +use std::cmp::Ordering; +use std::fmt::Debug; +use std::fmt::Formatter; +use std::fmt::Result; + struct HuffmanBaseNode { is_leaf: bool, weight :i32, @@ -31,6 +36,10 @@ impl HuffmanLeafNode { pub fn value(&self) -> char { self.element } + + pub fn weight(&self) -> i32 { + self.base.weight + } } pub struct HuffmanInternalNode { @@ -57,6 +66,10 @@ impl HuffmanInternalNode { pub fn right(&self) -> &HuffmanNode { &self.right } + + pub fn weight(&self) -> i32 { + self.base.weight() + } } pub enum HuffmanNode { @@ -100,3 +113,59 @@ impl HuffmanNode { } } } + +// Implementing Ord and PartialOrd for the HuffmanNode so we can use BinaryHeap +impl Ord for HuffmanNode { + fn cmp(&self, other: &Self) -> Ordering { + self.weight().cmp(&other.weight()) + } +} + +impl PartialOrd for HuffmanNode { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Eq for HuffmanNode { + +} + +impl PartialEq for HuffmanNode { + fn eq(&self, other: &Self) -> bool { + self.weight() == other.weight() + } +} + +// For printing the tree +impl Debug for HuffmanNode { + fn fmt(&self, f: &mut Formatter) -> Result { + match self { + HuffmanNode::Leaf(leaf) => write!(f, "Leaf({} : {})", leaf.value(), leaf.base.weight()), + HuffmanNode::Internal(internal) => write!(f, "Internal({}, left: {:?}, right: {:?})", internal.base.weight(), internal.left(), internal.right()), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // Test HuffmanLeafNode creation + #[test] + fn test_leaf_node_creation() { + let leaf = HuffmanLeafNode::new(3, 'a'); + assert_eq!(leaf.value(), 'a'); + assert_eq!(leaf.weight(), 3); + } + + // Test HuffmanInternalNode creation + #[test] + fn test_internal_node_creation() { + let left = HuffmanNode::Leaf(HuffmanLeafNode::new(2, 'b')); + let right = HuffmanNode::Leaf(HuffmanLeafNode::new(3, 'a')); + let internal_node = HuffmanInternalNode::new(5, left, right); + assert_eq!(internal_node.base.weight, 5); + } +} + diff --git a/src/main.rs b/src/main.rs index fbd5459..6949163 100644 --- a/src/main.rs +++ b/src/main.rs @@ -14,11 +14,7 @@ fn main() { let mut tool = CompressionTool::new(&content); match tool.compress() { - Ok(map) => { - for (ch, count) in map { - println!("character: '{}', count: {}", ch, count); - } - }, - Err(_) => println!("error compressing content") + Ok(root) => println!("{:?}", root), + Err(e) => println!("Error: {}", e), } } diff --git a/tests/compression_integration_test.rs b/tests/compression_integration_test.rs new file mode 100644 index 0000000..5bf0945 --- /dev/null +++ b/tests/compression_integration_test.rs @@ -0,0 +1,111 @@ +use compression_tool::huffman::HuffmanNode; +use compression_tool::compression::CompressionTool; + +#[cfg(test)] +mod tests { + use super::*; + + // Helper function to extract the leaf nodes from the Huffman tree for validation + fn extract_leaves(node: &HuffmanNode, leaves: &mut Vec<(char, i32)>) { + match node { + HuffmanNode::Leaf(leaf) => { + leaves.push((leaf.value(), leaf.weight())); + } + HuffmanNode::Internal(internal) => { + extract_leaves(&internal.left(), leaves); + extract_leaves(&internal.right(), leaves); + } + } + } + + // Test for a simple input string + #[test] + fn test_huffman_tree_structure() { + let input = "abacab"; + let mut tool = CompressionTool::new(input); + + // Compress the input to get the Huffman tree root + let root = tool.compress().expect("Compression failed"); + + // Extract all leaf nodes and their frequencies + let mut leaves = Vec::new(); + extract_leaves(&root, &mut leaves); + + // The expected frequencies for "a" and "b" in the string "abacab" + let expected_frequencies = vec![('a', 3), ('b', 2), ('c', 1)]; + + // Sort both vectors so we can compare them + leaves.sort_by(|a, b| a.0.cmp(&b.0)); // Sort by character + let mut expected_frequencies = expected_frequencies; + expected_frequencies.sort_by(|a, b| a.0.cmp(&b.0)); // Sort by character + + // Check that the leaves match the expected frequencies + assert_eq!(leaves, expected_frequencies); + } + + // Test for a case with a more complex string + #[test] + fn test_complex_huffman_tree() { + let input = "this is an example of huffman compression"; + let mut tool = CompressionTool::new(input); + + // Compress the input to get the Huffman tree root + let root = tool.compress().expect("Compression failed"); + + // Extract all leaf nodes and their frequencies + let mut leaves = Vec::new(); + extract_leaves(&root, &mut leaves); + + // Expected frequencies for a more complex string (you can manually calculate or expect a certain structure) + let expected_frequencies = vec![ + (' ', 6), ('a', 3), ('e', 3), ('s', 4), ('i', 3), + ('n', 3), ('t', 1), ('h', 2), ('m', 3), ('o', 3), + ('f', 3), ('l', 1), ('x', 1), ('p', 2), ('c', 1), + ('r', 1), ('u', 1), + ]; + + // Sort both vectors so we can compare them + leaves.sort_by(|a, b| a.0.cmp(&b.0)); // Sort by character + let mut expected_frequencies = expected_frequencies; + expected_frequencies.sort_by(|a, b| a.0.cmp(&b.0)); // Sort by character + + // Check that the leaves match the expected frequencies + assert_eq!(leaves, expected_frequencies); + } + + // Test to check if the tree is properly built (you can manually check if internal nodes are correct) + #[test] + fn test_tree_structure() { + let input = "aaabbbcc"; + let mut tool = CompressionTool::new(input); + + // Compress the input to get the Huffman tree root + let root = tool.compress().expect("Compression failed"); + + // We should have a tree with only two internal nodes (since we only have three distinct characters) + let internal_count = count_internal_nodes(&root); + assert_eq!(internal_count, 2, "The tree should have 2 internal nodes"); + + // Also check the total weight of the tree (should be equal to the sum of character frequencies) + let total_weight = sum_weights(&root); + assert_eq!(total_weight, input.len() as i32, "The total weight should be equal to the length of the input string"); + } + + // Helper function to count the number of internal nodes + fn count_internal_nodes(node: &HuffmanNode) -> i32 { + match node { + HuffmanNode::Leaf(_) => 0, + HuffmanNode::Internal(internal) => { + 1 + count_internal_nodes(&internal.left()) + count_internal_nodes(&internal.right()) + } + } + } + + // Helper function to sum the weights of all nodes (leaf and internal) + fn sum_weights(node: &HuffmanNode) -> i32 { + match node { + HuffmanNode::Leaf(leaf) => leaf.weight(), + HuffmanNode::Internal(internal) => sum_weights(&internal.left()) + sum_weights(&internal.right()), + } + } +} diff --git a/tests/integration_test.rs b/tests/integration_test.rs deleted file mode 100644 index f9e51ed..0000000 --- a/tests/integration_test.rs +++ /dev/null @@ -1,18 +0,0 @@ -// tests/integration_test.rs -use std::process::Command; - -fn run_test(file_path: &str, expected_output: &str) { - let output = Command::new("cargo") - .arg("run") - .arg("--") - .arg(file_path) - .output() - .expect("Failed to execute command"); - - let stdout = String::from_utf8_lossy(&output.stdout); - assert_eq!(stdout.trim(), expected_output); -} -#[test] -fn test_valid() { - run_test("tests/test.txt", "valid"); -}