哈夫曼编码的原理?
6个回答
展开全部
霍夫曼编码的基本思想:输入一个待编码的串,首先统计串中各字符出现的次数,称之为频次,假设统计频次的数组为count[ ],则霍夫曼编码每次找出count数组中的值最小的两个分别作为左右孩子,建立他们的父节点,循环这个操作2*n-1-n(n是不同的字符数)次,这样就把霍夫曼树建好了。建树的过程需要注意,首先把count数组里面的n个值初始化为霍夫曼树的n个叶子节点,他们的孩子节点的标号初始化为-1,父节点初始化为他本身的标号。接下来是编码,每次从霍夫曼树的叶子节点出发,依次向上找,假设当前的节点标号是i,那么他的父节点必然是myHuffmantree[i].parent,如果i是myHuffmantree[i].parent的左节点,则该节点的路径为0,如果是右节点,则该节点的路径为1。当向上找到一个节点,他的父节点标号就是他本身,就停止(说明该节点已经是根节点)。还有一个需要注意的地方:在查找当前权值最小的两个节点时,那些父节点不是他本身的节点不能考虑进去,因为这些节点已经被处理过了
推荐于2016-12-02
展开全部
这个是我同学的哈夫曼编码程序
另外还有解码的程序,要的话再商量
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define TRUE 1
#define ERROR 0
#define OK 1
#define FALSE 0
#define INFEASIBLE -1
#define OVERFLOW -2
#define Status int
#define MAXLENGTH 128
typedef struct HTnode
{
long weight;
int parent;
int lchild;
int rchild;
}HTNode, *HuffmanTree;
typedef struct CTnode
{
long weight;
char *coded_string;
}CharacterTable;
typedef char * *HuffmanCode;
FILE *fp=NULL;
void Analyse (CharacterTable * *character_table, long * *w, char * *chara, int &n)//分析所有不同的字符的权值
{
long *tmpw;
char ch, *tmpchara;
int i;
(*character_table)=(CharacterTable *)malloc(128*sizeof(CharacterTable));//定义存放字母的数组
for(i=0; i<128; i++)
{
(*character_table)[i].weight=0; //初始化
(*character_table)[i].coded_string=NULL;
}
ch=fgetc(fp);
while(!feof(fp))//诺到文件末尾,函数值为真
{
//m=ch;
if(ch<128 && ch>=0)
(*character_table)[ch].weight++;//获得各个字母在文件中出现的次数
ch=fgetc(fp);
}
for(i=0, n=0; i<128; i++)
if((*character_table)[i].weight)
n++; //统计有多少不同的字符数
(*w)=(long *)malloc(n*sizeof(long));//deliver the character and the weight to main
(*chara)=(char *)malloc(n*sizeof(char));
tmpw=(*w);
tmpchara=(*chara);
for(i=0; i<128; i++)
if((*character_table)[i].weight)
{//将权值放入*w数组中
*(*w)=(*character_table)[i].weight;
*(*chara)=i;//这里i是字符
(*w)++;
(*chara)++;
}
(*w)=tmpw;
(*chara)=tmpchara;//指针返回数组头
}
void Select (HuffmanTree *HT, int i, int *Min1, int *Min2)
{
int j, n, tmp1=-1, tmp2=-2;
for(n=0; n<i; n++)
{
if(!(*HT)[n].parent)
{
if(tmp1 == -1)
{
tmp1=n;
continue;
}
if(tmp2 == -2)
{
tmp2=n;
if((*HT)[tmp1].weight > (*HT)[tmp2].weight)
{
j=tmp1;
tmp1=tmp2;
tmp2=j;
}
continue;
}
if((*HT)[n].weight < (*HT)[tmp2].weight) //scan and change
if((*HT)[n].weight < (*HT)[tmp1].weight)
tmp1=n;
else
tmp2=n;
}
}
*Min1=tmp1;
*Min2=tmp2; //tmp[Min2].weight >= tmp[Min1].weight
}
Status Huffman(HuffmanTree *HT, HuffmanCode *HC,long *w, int n)
{
int m, i, Min1, Min2, p1, p2, start, *M1, *M2;
char *cd;
HuffmanTree *HTp;
if(n<1) return ERROR;
m=2*n-1;
(*HT)=(HTNode *)malloc(m*sizeof(HTNode)); //intialise Hc in main
HTp=HT;
for(i=0; i<n; i++, w++)
{
(*HTp)[i].weight=*w;
(*HTp)[i].parent=0;
(*HTp)[i].lchild=0;
(*HTp)[i].rchild=0;
}
for(; i<m; i++)
{
(*HTp)[i].weight=0;
(*HTp)[i].parent=0;
(*HTp)[i].lchild=0;
(*HTp)[i].rchild=0;
}
M1=&Min1;
M2=&Min2;
for(i=n; i<m; i++)
{
Select(HT, i, M1, M2);
(*HTp)[Min1].parent=i;
(*HTp)[Min2].parent=i;
(*HTp)[i].lchild=Min1; //左孩子要小一些
(*HTp)[i].rchild=Min2;
(*HTp)[i].weight=(*HTp)[Min1].weight + (*HTp)[Min2].weight;
}
//coded the weight below
(*HC)=(HuffmanCode)malloc(n*sizeof(char *));
cd=(char *)malloc(n*sizeof(char));
cd[n-1]='\0';
for(i=0; i<n; i++)
{
start=n-1;
for(p1=i, p2=(*HTp)[p1].parent; p2!=0; p1=p2, p2=(*HTp)[p1].parent)
{
if( (*HTp)[p2].lchild ==p1) //编码, 左孩子为0, 右孩子为1
cd[--start]='0';
else
cd[--start]='1';
}
(*HC)[i]=(char *)malloc((n-start)*sizeof(char));
strcpy((*HC)[i],&cd[start]);
} //over
return OK;
}
void Weinumber_to_stringnumber(char * *stringnumber, long *w, int leaves)
{//将权值以字符数组形式存放在上米的数组中
char tmp[30];
long i, j, k;
int start;
for(i=0; i<leaves; i++)
{
start=29;
tmp[start--]='\0';
for(k=w[i], j=k%10; k!=0; k=k/10, j=k%10)
tmp[start--]=j+'0';
stringnumber[i]=(char *)malloc((29-start)*sizeof(char));
strcpy(stringnumber[i], &tmp[start+1]);
}
}
void Save_huffman_weight_dictionary(long *w, char *character, int leaves, HuffmanCode *HC)
{
char * *stringnumber;
int i;
FILE *fp1;
fp1=fopen("weight.txt", "w");
stringnumber=(char * *)malloc(leaves * sizeof(char *));
Weinumber_to_stringnumber(stringnumber, w, leaves);
for(i=0; i<leaves; i++)
{
fputc(' ', fp1); // for unhuffman add '
fputc(character[i], fp1);
fputc('\t', fp1);
fputs(stringnumber[i], fp1);
fputc('\t', fp1);
fputc('\'', fp1);
fputs((*HC)[i], fp1);
fputc('\'', fp1);
fputc('\n', fp1);
}
fclose(fp1);
}
void Huffman_file_convert(HuffmanCode *HC, CharacterTable *character_table) //fp had opened
{
int i;
char ch;
FILE *fp2=fopen("coded.txt","w");
for( i=0; i<128; i++)
if(character_table[i].weight)
{
character_table[i].coded_string=*(*HC);
(*HC)++;
}
ch=fgetc(fp);
while(!feof(fp))
{
if( (ch>=0 && ch<128) && (character_table[ch].weight) )//it is very importan to add (ch>=0 && ch<128)
fputs(character_table[ch].coded_string,fp2);
ch=fgetc(fp);
}
fclose(fp2);
}
void fileopen1() //通过指针fp传递信息
{
char filename[100];
do{
printf("\n\n\t请输入要编码的文件:");
scanf("%s", filename);
if ((fp=fopen(filename,"r"))==NULL)
printf("\n\t不能打开此文件! 请重新输入!\n");
}while(!fp);
}
void main()
{
HuffmanTree Ht, *ht;//three level pointer
HuffmanCode Hc, *hc;
CharacterTable *CT, * *character_table;
long *weight, * *w;
char * character, * *chara;
int leave; //the all leaves number
ht=&Ht;
hc=&Hc;
w=&weight;
chara=&character;
character_table=&CT;
fileopen1();
Analyse(character_table, w, chara, leave);
fseek(fp, 0, 0);//将文件指针还原
Huffman(ht, hc, weight, leave);//构建哈弗曼树!
Save_huffman_weight_dictionary(weight, character, leave, hc);
Huffman_file_convert(hc, CT);
fclose(fp);
}
另外还有解码的程序,要的话再商量
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define TRUE 1
#define ERROR 0
#define OK 1
#define FALSE 0
#define INFEASIBLE -1
#define OVERFLOW -2
#define Status int
#define MAXLENGTH 128
typedef struct HTnode
{
long weight;
int parent;
int lchild;
int rchild;
}HTNode, *HuffmanTree;
typedef struct CTnode
{
long weight;
char *coded_string;
}CharacterTable;
typedef char * *HuffmanCode;
FILE *fp=NULL;
void Analyse (CharacterTable * *character_table, long * *w, char * *chara, int &n)//分析所有不同的字符的权值
{
long *tmpw;
char ch, *tmpchara;
int i;
(*character_table)=(CharacterTable *)malloc(128*sizeof(CharacterTable));//定义存放字母的数组
for(i=0; i<128; i++)
{
(*character_table)[i].weight=0; //初始化
(*character_table)[i].coded_string=NULL;
}
ch=fgetc(fp);
while(!feof(fp))//诺到文件末尾,函数值为真
{
//m=ch;
if(ch<128 && ch>=0)
(*character_table)[ch].weight++;//获得各个字母在文件中出现的次数
ch=fgetc(fp);
}
for(i=0, n=0; i<128; i++)
if((*character_table)[i].weight)
n++; //统计有多少不同的字符数
(*w)=(long *)malloc(n*sizeof(long));//deliver the character and the weight to main
(*chara)=(char *)malloc(n*sizeof(char));
tmpw=(*w);
tmpchara=(*chara);
for(i=0; i<128; i++)
if((*character_table)[i].weight)
{//将权值放入*w数组中
*(*w)=(*character_table)[i].weight;
*(*chara)=i;//这里i是字符
(*w)++;
(*chara)++;
}
(*w)=tmpw;
(*chara)=tmpchara;//指针返回数组头
}
void Select (HuffmanTree *HT, int i, int *Min1, int *Min2)
{
int j, n, tmp1=-1, tmp2=-2;
for(n=0; n<i; n++)
{
if(!(*HT)[n].parent)
{
if(tmp1 == -1)
{
tmp1=n;
continue;
}
if(tmp2 == -2)
{
tmp2=n;
if((*HT)[tmp1].weight > (*HT)[tmp2].weight)
{
j=tmp1;
tmp1=tmp2;
tmp2=j;
}
continue;
}
if((*HT)[n].weight < (*HT)[tmp2].weight) //scan and change
if((*HT)[n].weight < (*HT)[tmp1].weight)
tmp1=n;
else
tmp2=n;
}
}
*Min1=tmp1;
*Min2=tmp2; //tmp[Min2].weight >= tmp[Min1].weight
}
Status Huffman(HuffmanTree *HT, HuffmanCode *HC,long *w, int n)
{
int m, i, Min1, Min2, p1, p2, start, *M1, *M2;
char *cd;
HuffmanTree *HTp;
if(n<1) return ERROR;
m=2*n-1;
(*HT)=(HTNode *)malloc(m*sizeof(HTNode)); //intialise Hc in main
HTp=HT;
for(i=0; i<n; i++, w++)
{
(*HTp)[i].weight=*w;
(*HTp)[i].parent=0;
(*HTp)[i].lchild=0;
(*HTp)[i].rchild=0;
}
for(; i<m; i++)
{
(*HTp)[i].weight=0;
(*HTp)[i].parent=0;
(*HTp)[i].lchild=0;
(*HTp)[i].rchild=0;
}
M1=&Min1;
M2=&Min2;
for(i=n; i<m; i++)
{
Select(HT, i, M1, M2);
(*HTp)[Min1].parent=i;
(*HTp)[Min2].parent=i;
(*HTp)[i].lchild=Min1; //左孩子要小一些
(*HTp)[i].rchild=Min2;
(*HTp)[i].weight=(*HTp)[Min1].weight + (*HTp)[Min2].weight;
}
//coded the weight below
(*HC)=(HuffmanCode)malloc(n*sizeof(char *));
cd=(char *)malloc(n*sizeof(char));
cd[n-1]='\0';
for(i=0; i<n; i++)
{
start=n-1;
for(p1=i, p2=(*HTp)[p1].parent; p2!=0; p1=p2, p2=(*HTp)[p1].parent)
{
if( (*HTp)[p2].lchild ==p1) //编码, 左孩子为0, 右孩子为1
cd[--start]='0';
else
cd[--start]='1';
}
(*HC)[i]=(char *)malloc((n-start)*sizeof(char));
strcpy((*HC)[i],&cd[start]);
} //over
return OK;
}
void Weinumber_to_stringnumber(char * *stringnumber, long *w, int leaves)
{//将权值以字符数组形式存放在上米的数组中
char tmp[30];
long i, j, k;
int start;
for(i=0; i<leaves; i++)
{
start=29;
tmp[start--]='\0';
for(k=w[i], j=k%10; k!=0; k=k/10, j=k%10)
tmp[start--]=j+'0';
stringnumber[i]=(char *)malloc((29-start)*sizeof(char));
strcpy(stringnumber[i], &tmp[start+1]);
}
}
void Save_huffman_weight_dictionary(long *w, char *character, int leaves, HuffmanCode *HC)
{
char * *stringnumber;
int i;
FILE *fp1;
fp1=fopen("weight.txt", "w");
stringnumber=(char * *)malloc(leaves * sizeof(char *));
Weinumber_to_stringnumber(stringnumber, w, leaves);
for(i=0; i<leaves; i++)
{
fputc(' ', fp1); // for unhuffman add '
fputc(character[i], fp1);
fputc('\t', fp1);
fputs(stringnumber[i], fp1);
fputc('\t', fp1);
fputc('\'', fp1);
fputs((*HC)[i], fp1);
fputc('\'', fp1);
fputc('\n', fp1);
}
fclose(fp1);
}
void Huffman_file_convert(HuffmanCode *HC, CharacterTable *character_table) //fp had opened
{
int i;
char ch;
FILE *fp2=fopen("coded.txt","w");
for( i=0; i<128; i++)
if(character_table[i].weight)
{
character_table[i].coded_string=*(*HC);
(*HC)++;
}
ch=fgetc(fp);
while(!feof(fp))
{
if( (ch>=0 && ch<128) && (character_table[ch].weight) )//it is very importan to add (ch>=0 && ch<128)
fputs(character_table[ch].coded_string,fp2);
ch=fgetc(fp);
}
fclose(fp2);
}
void fileopen1() //通过指针fp传递信息
{
char filename[100];
do{
printf("\n\n\t请输入要编码的文件:");
scanf("%s", filename);
if ((fp=fopen(filename,"r"))==NULL)
printf("\n\t不能打开此文件! 请重新输入!\n");
}while(!fp);
}
void main()
{
HuffmanTree Ht, *ht;//three level pointer
HuffmanCode Hc, *hc;
CharacterTable *CT, * *character_table;
long *weight, * *w;
char * character, * *chara;
int leave; //the all leaves number
ht=&Ht;
hc=&Hc;
w=&weight;
chara=&character;
character_table=&CT;
fileopen1();
Analyse(character_table, w, chara, leave);
fseek(fp, 0, 0);//将文件指针还原
Huffman(ht, hc, weight, leave);//构建哈弗曼树!
Save_huffman_weight_dictionary(weight, character, leave, hc);
Huffman_file_convert(hc, CT);
fclose(fp);
}
本回答被提问者采纳
已赞过
已踩过<
评论
收起
你对这个回答的评价是?
展开全部
最简单的理解方式是:把最常用的词用最短的代码表示,可以理解为缩写
比如NBA就代表National Basketball Association这多字母了,这样如果把文章中的所有National Basketball Association都改成NBA是不是能省很多字,解说员都是这么做的,呵呵
比如NBA就代表National Basketball Association这多字母了,这样如果把文章中的所有National Basketball Association都改成NBA是不是能省很多字,解说员都是这么做的,呵呵
已赞过
已踩过<
评论
收起
你对这个回答的评价是?
展开全部
最小带权生成树
已赞过
已踩过<
评论
收起
你对这个回答的评价是?
展开全部
已赞过
已踩过<
评论
收起
你对这个回答的评价是?
推荐律师服务:
若未解决您的问题,请您详细描述您的问题,通过百度律临进行免费专业咨询