這是我的第一篇博客,希望大神們批評指正。
首先介紹以下什麼是哈夫曼樹(來自百度百科)
哈夫曼樹─即最優二叉樹,帶權路徑長度最小的二叉樹,經常應用於數據壓縮。 在計算機信息處理中,“哈夫曼編碼”是一種一致性編碼法(又稱“熵編碼法”),用於數據的無損耗壓縮。這一術語是指使用一張特殊的編碼表將源字符(例如某文件中的一個符號)進行編碼。這張編碼表的特殊之處在於,它是根據每一個源字符出現的估算概率而建立起來的(出現概率高的字符使用較短的編碼,反之出現概率低的則使用較長的編碼,這便使編碼之後的字符串的平均期望長度降低,從而達到無損壓縮數據的目的)。
構造哈夫曼樹的主要思想:
構造哈夫曼樹非常簡單,將所有的節點放到一個隊列中,用一個節點替換兩個頻率最低的節點,新節點的頻率就是這兩個節點的頻率之和。這樣,新節點就是兩個被替換節點的父節點了。如此循環,直到隊列中只剩一個節點(樹根)。
這裡用到了最小優先隊列。
我這裡用STL來實現。(這裡有優先隊列的介紹)
template<typename T> struct cmp { bool operator()(TreeNode<T>* t1, TreeNode<T>* t2) { return !(*t1 < *t2); } };
優先隊列的定義:
priority_queue<TreeNode*,vector<TreeNode* >,cmp > pri_que;
哈夫曼樹節點結構
template<typename T> class TreeNode { public: TreeNode():pfather(NULL),plchild(NULL),prchild(NULL) { } T data; TreeNode *pfather; TreeNode *plchild; TreeNode *prchild; bool operator < (const TreeNode& rhs) { return data < rhs.data; } };
構造哈夫曼樹
每次從最小優先隊列取頭兩個節點,合並後放回最小優先隊列,如此重復。
template<typename T> TreeNode<T>* MakeHuffmanTree(T* begin, T* end) //構造哈夫曼樹 { priority_queue<TreeNode<T>*,vector<TreeNode<T>* >,cmp<T> > pri_que; T *iter = begin; TreeNode<T>* pNode; TreeNode<T>* pf = NULL; while(iter != end) { pNode = new TreeNode<T>; pNode->data = *iter++; pNode->pfather = pf; pri_que.push(pNode); } TreeNode<T>* plchild; TreeNode<T>* prchild; while(pri_que.size() > 1)//取兩個小的合並 { plchild = pri_que.top(); pri_que.pop(); prchild = pri_que.top(); pri_que.pop(); pNode = new TreeNode<T>; pNode->plchild = plchild; pNode->prchild = prchild; pNode->data = plchild->data + prchild->data; pri_que.push(pNode); } pNode = pri_que.top(); pri_que.pop(); return pNode; }
構造哈夫曼樹這個函數的參數是一個結構體,保存著對應字符,其頻率,編碼值。
重載它的+運算符,為了合並時的+運算(只是頻率相加)。
到此為止,已經可以把哈夫曼樹生成出來了。
template<typename T> struct mydata { mydata(){} mydata(int i):freq(i) { } string coded; int freq; T data; bool operator<(const mydata& rhs) { return freq < rhs.freq; } mydata operator+(mydata& rhs) { return mydata(freq + rhs.freq); } };
我們可以通過DFS將每個葉子節點的路徑記錄下來(用一個全局變量數組path),然後得到它的編碼。
當發現當前節點是葉子節點,就把當前的路徑賦值至該葉子節點的編碼屬性(coded)。
const int MAXLEN = 20; char path[MAXLEN] = {0}; template<typename T> void DFS(T* root,int deep = -1, char a = '-') //DFS 得到葉子節點的編碼 { if(root == NULL) return; if(a != '-') path[deep] = a; if(root->plchild == NULL || root->prchild == NULL)//leaf (root->data).coded = string(path,path + deep + 1); if(root->plchild != NULL) DFS(root->plchild, deep + 1, '0'); if(root->prchild != NULL) DFS(root->prchild, deep + 1, '1'); }
這樣整個哈夫曼編碼工作已經完成,為了查看我們的編碼結果,我們可以用BFS跟DFS來看到我們的結果。在這裡我選擇了BFS。
當遍歷到葉子節點,就將其編碼屬性(coded)和其對應字符輸出。
template<typename T,typename U> void BFS(T* root, mydata<U>* data) //BFS 將葉子節點的編碼,提到data指向的數據 { queue<T*> que; que.push(root); T* pT = NULL; while(!que.empty()) { pT = que.front(); //cout<<pT->data.freq<<endl; que.pop(); if(pT->plchild != NULL) que.push(pT->plchild); if(pT->prchild != NULL) que.push(pT->prchild); if(pT->plchild == NULL || pT->prchild == NULL)// leaf 提取葉子節點的編碼 { //cout<<(pT->data).data<<":"<<(pT->data).coded<<endl; mydata<U>* pd = data; while((pT->data).data != pd->data) pd++; assert(pd->data == (pT->data).data); pd->coded = (pT->data).coded; } } }
測試驅動代碼
mydata<char> *pdata = new mydata<char>[4]; pdata[0].data = 'a'; pdata[0].freq = 7; pdata[1].data = 'b'; pdata[1].freq = 5; pdata[2].data = 'c'; pdata[2].freq = 2; pdata[3].data = 'd'; pdata[3].freq = 4; TreeNode<mydata<char> >* pihuffmanTree = MakeHuffmanTree(pdata, pdata + 4); DFS(pihuffmanTree); BFS(pihuffmanTree);
為了更方便的使用我將這些封裝到一個類裡面。
template<typename T> class Huffman { public: void Coded(string& coded);//傳入待輸出的編碼 void DeCode(const string& codedstr,string& decodestr);//輸入待解碼字符串,輸出解碼字符串 void InputData(T* begin,T* end);//傳入數據 private: string FindVal(char c); void m_CalcFreq(T* begin, T* end);//計算輸入數據的頻率 TreeNode<mydata<T> > *root;//huffman根節點 mydata<T>* data; int data_size; T* m_begin;//保存原始數據的開始與結束的位置 T* m_end; //string codedstr; };
輸入數據並計算其頻率。
用map容器來統計輸入字符每個出現的個數。
template<typename T> void Huffman<T>::InputData(T* begin, T* end) { this->m_begin = begin; this->m_end = end; m_CalcFreq(begin, end); } template<typename T> void Huffman<T>::m_CalcFreq(T* begin, T* end) { int len = end - begin; //data_size = len; if(len == 0) return; map<T,int> countMap; map<T,int>::iterator mapIter = countMap.begin(); T *pT = begin; while(pT != end) { mapIter = countMap.find(*pT); if(mapIter != countMap.end())//在map裡有沒有字符*iter ++mapIter->second; else { countMap.insert(make_pair(*pT,1)); } pT++; } data_size = countMap.size(); data = new mydata<T>[data_size]; int i = 0; for (mapIter = countMap.begin(); mapIter != countMap.end(); ++mapIter) { data[i].data = mapIter->first; data[i].freq = mapIter->second; i++; } }
編碼
template<typename T> void Huffman<T>::Coded(string& coded) { root = MakeHuffmanTree(data,data + data_size); DFS(root); BFS(root,data); cout<<"code:"<<endl; for (int i = 0; i < data_size; ++i) { cout<<data[i].data<<":"<<data[i].coded<<endl; } T *begin = m_begin; while (begin != m_end) { coded += FindVal(*begin); begin++; } //string subcode = }
解碼
template<typename T> void Huffman<T>::DeCode(const string& codedstr,string& decodestr) { string::const_iterator iter = codedstr.begin(); TreeNode<mydata<T> >* curNode = root; while (iter != codedstr.end()) { if (curNode->plchild == NULL || curNode->prchild == NULL) { decodestr += (curNode->data).data; curNode = root; continue; } if (*iter == '0') curNode = curNode->plchild; if(*iter == '1') curNode = curNode->prchild; iter++; } }
測試驅動程序
char *pmystr = "cbcddddbbbbaaaaaaa"; Huffman<char> h; h.InputData(pmystr, pmystr + 18); cout<<"originstr: "<<pmystr<<endl; string coded; h.Coded(coded); cout<<"coded: "<<coded<<endl; string decode; h.DeCode(coded,decode); cout<<"decode: "<<decode<<endl;
完整程序(環境:VS2012)
#include <iostream> //#include <algorithm> #include <queue> #include <string> #include <vector> #include <cassert> #include <map> using namespace std; template<typename T> class TreeNode { public: TreeNode():pfather(NULL),plchild(NULL),prchild(NULL) { } T data; TreeNode *pfather; TreeNode *plchild; TreeNode *prchild; bool operator < (const TreeNode& rhs) { return data < rhs.data; } }; template<typename T> struct cmp { bool operator()(TreeNode<T>* t1, TreeNode<T>* t2) { return !(*t1 < *t2); } }; template<typename T> TreeNode<T>* MakeHuffmanTree(T* begin, T* end) //構造哈夫曼樹 { priority_queue<TreeNode<T>*,vector<TreeNode<T>* >,cmp<T> > pri_que; T *iter = begin; TreeNode<T>* pNode; TreeNode<T>* pf = NULL; while(iter != end) { pNode = new TreeNode<T>; pNode->data = *iter++; pNode->pfather = pf; pri_que.push(pNode); } TreeNode<T>* plchild; TreeNode<T>* prchild; while(pri_que.size() > 1)//取兩個小的合並 { //cout<<static_cast<TreeNode<T>* >(pri_que.top())->data<<endl; //pri_que.pop(); plchild = pri_que.top(); pri_que.pop(); prchild = pri_que.top(); pri_que.pop(); pNode = new TreeNode<T>; pNode->plchild = plchild; pNode->prchild = prchild; pNode->data = plchild->data + prchild->data; pri_que.push(pNode); } pNode = pri_que.top(); pri_que.pop(); return pNode; } template<typename T> struct mydata { mydata(){} mydata(int i):freq(i) { } string coded; int freq; T data; bool operator<(const mydata& rhs) { return freq < rhs.freq; } mydata operator+(mydata& rhs) { return mydata(freq + rhs.freq); } }; template<typename T,typename U> void BFS(T* root, mydata<U>* data) //BFS 將葉子節點的編碼,提到data指向的數據 { queue<T*> que; que.push(root); T* pT = NULL; while(!que.empty()) { pT = que.front(); //cout<<pT->data.freq<<endl; que.pop(); if(pT->plchild != NULL) que.push(pT->plchild); if(pT->prchild != NULL) que.push(pT->prchild); if(pT->plchild == NULL || pT->prchild == NULL)// leaf 提取葉子節點的編碼 { //cout<<(pT->data).data<<":"<<(pT->data).coded<<endl; mydata<U>* pd = data; while((pT->data).data != pd->data) pd++; assert(pd->data == (pT->data).data); pd->coded = (pT->data).coded; } } } const int MAXLEN = 20; char path[MAXLEN] = {0}; template<typename T> void DFS(T* root,int deep = -1, char a = '-') //DFS 得到葉子節點的編碼 { if(root == NULL) return; if(a != '-') path[deep] = a; if(root->plchild == NULL || root->prchild == NULL)//leaf (root->data).coded = string(path,path + deep + 1); if(root->plchild != NULL) DFS(root->plchild, deep + 1, '0'); if(root->prchild != NULL) DFS(root->prchild, deep + 1, '1'); } template<typename T> class Huffman { public: void Coded(string& coded); void DeCode(const string& codedstr,string& decodestr); void InputData(T* begin,T* end); private: string FindVal(char c); void m_CalcFreq(T* begin, T* end); TreeNode<mydata<T> > *root; mydata<T>* data; int data_size; T* m_begin; T* m_end; //string codedstr; }; template<typename T> void Huffman<T>::InputData(T* begin, T* end) { this->m_begin = begin; this->m_end = end; m_CalcFreq(begin, end); } template<typename T> void Huffman<T>::m_CalcFreq(T* begin, T* end) { int len = end - begin; //data_size = len; if(len == 0) return; map<T,int> countMap; map<T,int>::iterator mapIter = countMap.begin(); T *pT = begin; while(pT != end) { mapIter = countMap.find(*pT); if(mapIter != countMap.end()) ++mapIter->second; else { countMap.insert(make_pair(*pT,1)); } pT++; } data_size = countMap.size(); data = new mydata<T>[data_size]; int i = 0; for (mapIter = countMap.begin(); mapIter != countMap.end(); ++mapIter) { data[i].data = mapIter->first; data[i].freq = mapIter->second; i++; } } template<typename T> void Huffman<T>::Coded(string& coded) { root = MakeHuffmanTree(data,data + data_size); DFS(root); BFS(root,data); cout<<"code:"<<endl; for (int i = 0; i < data_size; ++i) { cout<<data[i].data<<":"<<data[i].coded<<endl; } T *begin = m_begin; while (begin != m_end) { coded += FindVal(*begin); begin++; } //string subcode = } template<typename T> void Huffman<T>::DeCode(const string& codedstr,string& decodestr) { string::const_iterator iter = codedstr.begin(); TreeNode<mydata<T> >* curNode = root; while (iter != codedstr.end()) { if (curNode->plchild == NULL || curNode->prchild == NULL) { decodestr += (curNode->data).data; curNode = root; continue; } if (*iter == '0') curNode = curNode->plchild; if(*iter == '1') curNode = curNode->prchild; iter++; } } template<typename T> string Huffman<T>::FindVal(char c) { for (int i = 0; i < data_size ; ++i) { if (c != data[i].data) continue; return data[i].coded; } return string(); } int main() { //mydata<char> *pdata = new mydata<char>[4]; //pdata[0].data = 'a'; //pdata[0].freq = 7; //pdata[1].data = 'b'; //pdata[1].freq = 5; //pdata[2].data = 'c'; //pdata[2].freq = 2; //pdata[3].data = 'd'; //pdata[3].freq = 4; ////int a[12]={14,10,56,7,83,22,36,91,3,47,72,0}; //TreeNode<mydata<char> >* pihuffmanTree = MakeHuffmanTree(pdata, pdata + 4); //DFS(pihuffmanTree); //BFS(pihuffmanTree); //string str = "cbcddddbbbbaaaaaaa"; char *pmystr = "cbcddddbbbbaaaaaaa"; Huffman<char> h; h.InputData(pmystr, pmystr + 18); cout<<"originstr: "<<pmystr<<endl; string coded; h.Coded(coded); cout<<"coded: "<<coded<<endl; string decode; h.DeCode(coded,decode); cout<<"decode: "<<decode<<endl; return 0; }