diff --git a/.gitignore b/.gitignore index 64ee209..c74ddbd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ target/ -.vscode/ \ No newline at end of file +.vscode/ +*.dat \ No newline at end of file diff --git a/src/compression.rs b/src/compression.rs index bb76783..e413f6e 100644 --- a/src/compression.rs +++ b/src/compression.rs @@ -1,5 +1,5 @@ use crate::huffman::{HuffmanLeafNode, HuffmanInternalNode, HuffmanNode}; -use std::collections::{BinaryHeap, HashMap}; +use std::{collections::{BinaryHeap, HashMap}, fs::File, io::{self, Write}}; pub struct CompressionTool { input: String, // todo: should also support streams @@ -12,7 +12,7 @@ impl CompressionTool { } } - pub fn compress(&mut self) -> Result { + fn generate_frequency_map(&self) -> HashMap { let mut map: HashMap = HashMap::new(); for ch in self.input.chars() { @@ -20,12 +20,44 @@ impl CompressionTool { *counter += 1; } + map + } + + // Method to write header with frequency map to the output file + fn write_header(&self, file: &mut File, frequency_map: HashMap) -> io::Result<()> { + // Write the number of unique characters (for future decoding) + let num_chars: u32 = frequency_map.len() as u32; + file.write_all(&num_chars.to_le_bytes())?; + + // Write the frequency table to the file + for (ch, count) in frequency_map { + file.write_all(&ch.to_string().as_bytes())?; + file.write_all(&count.to_le_bytes())?; + } + + // Write a delimiter to indicate the end of the header + file.write_all(&[0x00])?; + + Ok(()) + } + + pub fn compress(&mut self, output_file: &str) -> Result<(), String> { + let mut file: File = File::create(output_file).map_err(|e| e.to_string())?; + + let frequency_map: HashMap = self.generate_frequency_map(); + + self.write_header(&mut file, frequency_map.clone()) + .map_err(|e| format!("Error writing header: {}", e))?; + + + let mut heap: BinaryHeap = BinaryHeap::new(); - for (ch, count) in map { + for (ch, count) in frequency_map { let leaf: HuffmanLeafNode = HuffmanLeafNode::new(count, ch); heap.push(HuffmanNode::Leaf(leaf)); } + // Build the Huffman tree while heap.len() > 1 { // Pop the two nodes with the smallest frequencies let left: HuffmanNode = heap.pop().unwrap(); @@ -39,12 +71,33 @@ impl CompressionTool { let root = heap.pop().unwrap(); + // Generate the prefix codes for each character let mut codes: HashMap = HashMap::new(); root.generate_prefix_codes(&mut codes); - for (ch, code) in &codes { - println!("Character: '{}' -> code: {}", ch, code); + + // Now write the compressed data after the header + let compressed_data = self.compressed_data(&codes); + file.write_all(&compressed_data).map_err(|e| e.to_string())?; + + + Ok(()) + } + + fn compressed_data(&self, codes: &HashMap) -> Vec { + let mut compressed_bits = String::new(); + for ch in self.input.chars() { + compressed_bits.push_str(&codes[&ch]); + } + + // Convert the binary string to byte vector + let mut result = Vec::new(); + for chunk in compressed_bits.as_bytes().chunks(8) { + let byte = chunk.iter().fold(0, |acc, &bit| (acc <<1) | (bit - b'0') as u8); + result.push(byte); } - Ok(root) + result } + + } diff --git a/src/main.rs b/src/main.rs index bddd605..6720eae 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,19 +5,20 @@ use compression_tool::compression::CompressionTool; fn main() { let args: Vec = std::env::args().collect(); - let file_path: String = args[1].to_string(); + if args.len() != 3 { + println!("Usage: cargo run "); + return; + } + let input_file: &String = &args[1]; + let output_file: &String = &args[2]; - let mut file: File = File::open(file_path).unwrap(); - + let mut file: File = File::open(input_file).unwrap(); let mut content: String = String::new(); let _ = file.read_to_string(&mut content); - let mut tool = CompressionTool::new(&content); - match tool.compress() { - Ok(root) => { - root.print_tree(); - println!("{:?}", root) - }, + let mut tool: CompressionTool = CompressionTool::new(&content); + match tool.compress(&output_file) { + Ok(_) => println!("Compression successfull, file written to '{}'", output_file), Err(e) => println!("Error: {}", e), } }