diff --git a/Huffman compressor/compressor.py b/Huffman compressor/compressor.py new file mode 100644 index 00000000..16e112ab --- /dev/null +++ b/Huffman compressor/compressor.py @@ -0,0 +1,48 @@ +import math +def compress(ifile, codes, ofile): + symbol_codes = {} + with open(codes, 'r') as f: + for line in f: + last_space_index = line.rfind(' ') + if last_space_index != -1: + symbol = line[:last_space_index] + code = line[last_space_index + 1:].strip() + symbol_codes[symbol] = code + + with open(ifile, 'r') as f: + itext = f.read().strip() + compressed = '' + for c in itext: + if c.isalpha() or c.isspace(): + c=c.upper() + if c in symbol_codes: + compressed += symbol_codes[c] + with open(ofile, 'w') as f: + f.write(compressed) + entropy(itext,compressed) +def entropy(a,b): + D = {} + E = {} + for i in a: + if i not in D: + D[i]=1 + else: + D[i]+=1 + for i in b: + if i not in E: + E[i]=1 + else: + E[i]+=1 + entropya=0 + for i in D: + entropya+=(D[i]/len(a))*math.log2((len(a)/D[i])) + entropyb=0 + for i in E: + entropyb+=(E[i]/len(b))*math.log2((len(b)/E[i])) + print("The information gain from compression is: ",entropya-entropyb) + +ifile = input("Enter your input filename: ") +codes = input("Enter the filename where you want to save your codes: ") +ofile = input("Enter the filename where you want your compressed document saved: ") +compress(ifile, codes, ofile) +print("Compressed code has been written to:", ofile) \ No newline at end of file diff --git a/Huffman compressor/decompressor.py b/Huffman compressor/decompressor.py new file mode 100644 index 00000000..76d1f47c --- /dev/null +++ b/Huffman compressor/decompressor.py @@ -0,0 +1,27 @@ +def decompress(ifile, codes,ofile): + symbol_codes = {} + with open(codes, 'r') as f: + for line in f: + last_space_index = line.rfind(' ') + if last_space_index != -1: + symbol = line[:last_space_index] + code = line[last_space_index + 1:].strip() + symbol_codes[code] = symbol + with open(ifile, 'r') as f: + compressed = f.readline() + decompressed = '' + current = '' + for bit in compressed: + current += bit + if current in symbol_codes.keys(): + decompressed += symbol_codes[current] + current = '' + with open(ofile, 'w') as f: + f.write(decompressed) + +ifile = input("Enter your input (compressed) filename: ") +codes = input("Enter the filename where you have saved your codes: ") +ofile = input("Enter the filename where you want your decompressed document saved: ") +decompress(ifile, codes,ofile) +print("Decompressed code has been written to:", ofile) + diff --git a/Huffman compressor/huffman.cpp b/Huffman compressor/huffman.cpp new file mode 100644 index 00000000..d61a08c1 --- /dev/null +++ b/Huffman compressor/huffman.cpp @@ -0,0 +1,95 @@ +#include +#include +#include +#include +#include +using namespace std; +struct Node{ + char symbol; + unsigned freq; + Node *left; + Node *right; + + Node(char symbol, unsigned freq) : symbol(symbol), freq(freq), left(nullptr), right(nullptr) {} +}; + +struct comp{ + bool operator()(Node *l, Node *r){ + return l->freq > r->freq; + } +}; + +void printCodes(Node *root, string str, unordered_map &huffmanCodes){ + if (!root){ + return; + } + printCodes(root->left, str + "0", huffmanCodes); + printCodes(root->right, str + "1", huffmanCodes); + if (!root->left && !root->right){ + huffmanCodes[root->symbol] = str; + } +} +unordered_map generateHuffmanCodes(istream &input) { + unordered_map freq; + string line; + while (getline(input, line)) { + for (char c : line) { + if (isalpha(c) || isspace(c)) { + freq[toupper(c)]++; + } + } + } + + priority_queue, comp> pq; + for (const auto &pair : freq) { + pq.push(new Node(pair.first, pair.second)); + } + while (pq.size() > 1) { + Node *left = pq.top(); + pq.pop(); + Node *right = pq.top(); + pq.pop(); + + Node *mergedNode = new Node('$', left->freq + right->freq); + mergedNode->left = left; + mergedNode->right = right; + + pq.push(mergedNode); + } + Node *root = pq.top(); + unordered_map huffmanCodes; + queue> q; + q.push({root, ""}); + while (!q.empty()) { + Node *current = q.front().first; + string currentCode = q.front().second; + q.pop(); + if (current->left) { + q.push({current->left, currentCode + "0"}); + } + if (current->right) { + q.push({current->right, currentCode + "1"}); + } + if (!current->left && !current->right) { + huffmanCodes[current->symbol] = currentCode; + } + } + delete root; + return huffmanCodes; +} +int main(){ + string file; + cout<<"Enter file name: ";//Your input text file goes here + getline(cin, file); + ifstream input (file); + string ofile; + cout<<"Enter your output file name, where your codes will be saved: ";//Your output text file goes here + getline(cin, ofile); + ofstream outputFile(ofile); + unordered_map huffmanCodes = generateHuffmanCodes(input); + for (const auto &pair : huffmanCodes) { + cout << pair.first << " " << pair.second << "\n"; + outputFile << pair.first << " " << pair.second << "\n"; + } + return 0; +}