-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhuffman.cpp
More file actions
184 lines (162 loc) · 5.15 KB
/
huffman.cpp
File metadata and controls
184 lines (162 loc) · 5.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#include <unordered_map>
#include <vector>
#include <queue>
#include "tree_printer.hpp"
using namespace std;
struct Node
{
char label;
int frequency;
const Node *right;
const Node *left;
Node(int frq, char lab = '_', const Node *right = nullptr, const Node *left = nullptr)
{
this->frequency = frq;
this->label = lab;
this->right = right;
this->left = left;
}
~Node()
{
delete this->right;
delete this->left;
}
int get_frequency() const
{
return this->frequency;
}
bool is_leaf() const
{
return ((this->left == nullptr) && (this->right == nullptr));
}
};
// big thanks to Matteo Italia from
// https://stackoverflow.com/questions/11836643/
// sorting-of-priority-queue-elements-that-are-pointer-type
// This struct and its call in the priority queues
// below is adapted from their solution
template <typename T>
struct CompNodePtrs
{
bool operator()(const T *left, const T *right)
{
return left->frequency > right->frequency;
}
};
// Takes two Node pointers and combines them into a single
// tree by making them the left and right nodes of a root.
// The root's frequency is the sum of the two nodes' frequencies
Node *huffman_combine(const Node *smaller, const Node *larger)
{
int sum = smaller->get_frequency() + larger->get_frequency();
return new Node(sum, '_', larger, smaller);
}
// Takes a reference to a priority queue and performs one
// iteration of the "huffman algorithm." This means it pops
// the top two elemens and combines them into a single tree
// by calling huffman_combine()
void huffman_iterate(std::priority_queue<Node *, std::vector<Node *>, CompNodePtrs<Node>> &huff_queue)
{
Node *top = huff_queue.top();
huff_queue.pop();
Node *next = huff_queue.top();
huff_queue.pop();
huff_queue.push(huffman_combine(top, next));
}
// Takes a reference to a priority queue and performs the
// "huffman algorithm" by calling huffman_iterate() until
// the queue contains only one tree
void build_huff_tree(std::priority_queue<Node *, std::vector<Node *>, CompNodePtrs<Node>> &huff_queue)
{
while (huff_queue.size() > 1)
{
huffman_iterate(huff_queue);
}
}
// Takes a pointer to a root Node, reference to a unordered map,
// and a string representing a character encoding. Performs a
// recursive preorder traversal of the root tree to calculate
// the huffman encodings of each letter in the unordered map.
void calc_order(const Node *root, std::unordered_map<char, string> &codes, string code = "")
{
if (root->is_leaf())
{
codes[root->label] = code;
}
if (root->left)
{
calc_order(root->left, codes, code + "0");
}
if (root->right)
{
calc_order(root->right, codes, code + "1");
}
}
int main()
{
std::unordered_map<char, int> frequencies;
char letters[26] = {'A', 'B', 'C', 'D', 'E',
'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U',
'V', 'W', 'X', 'Y', 'Z'};
int sum = 0;
char letter;
vector<Node *> nodes;
// a deeply-heartfelt thanks to the author of this for showing
// me how to terminate reading a file in stdin on EOF:
// https://cplusplus.com/reference/ios/ios/eof/
while (std::cin.get(letter))
{
if (isalpha(letter))
{
std::cout << (char)toupper(letter);
frequencies[(char)toupper(letter)]++;
}
}
// print frequencies for each letter and total
std::cout << "\n\nCounts:\n"
<< "-------\n";
for (char ltr : letters)
{
std::cout << ltr << ": " << frequencies[ltr] << "\n";
// add to sum
sum = sum + frequencies[ltr];
}
std::cout << "Total = " << sum << "\n";
std::cout << "---------------------------------\n\n\n";
// make new node and push to priority queue for later use
for (char letter : letters)
{
nodes.push_back(new Node(frequencies[letter], letter));
}
std::priority_queue huffman_queue(nodes.begin(), nodes.end(),
CompNodePtrs<Node>());
// huffman algorithm on priority queue
if (huffman_queue.size() > 1)
{
build_huff_tree(huffman_queue);
}
// print tree
print_tree(huffman_queue.top(), nullptr, false, sum);
std::cout << "---------------------------------\n";
// calculate codes for each letter
std::unordered_map<char, string> codes;
calc_order(huffman_queue.top(), codes);
std::cout << endl;
// print letter/huffman encoding pair
for (char letter : letters)
{
std::cout << letter << ": " << codes[letter] << endl;
}
// calculate bit requirement for huffman encoding
int huff_bits = 0;
for (char letter : letters)
{
huff_bits = huff_bits + (frequencies[letter] * codes[letter].length());
}
// print constant and huffman bit requirements
std::cout << "\n Fixed encoding: " << (sum * 5) << " bits\n";
std::cout << "Huffman encoding: " << huff_bits << " bits\n\n";
// free heap memory
delete huffman_queue.top();
}