/* Written 1995/05/27 by Craig Bruce */ #include #include typedef unsigned char uchar; typedef unsigned long ulong; typedef int bool; #define TRUE 1 #define FALSE 0 #define N_LITERALS 256 #define LITERAL_HEAD 256 typedef struct struct_litrec { ulong count; ulong code; uchar codelen; long groupCount; int groupLink; int activeNext; int activePrev; } LITREC; LITREC litrec[ N_LITERALS+1 ]; void main( int argc, char *argv[] ); void CountFile( FILE *fin ); void HuffBuildCodes( void ); void HuffAddBranch( int node, int bitVal ); void PrintSummary( void ); /****************************************************************************/ void main(int argc, char *argv[]) { FILE *fin, *fout; char *outname = "out"; int ifile; if (argc < 2) { CountFile( stdin ); HuffBuildCodes(); PrintSummary(); } else { for( ifile=1; ifile 0) { litrec[i].groupCount = litrec[i].count; litrec[i].groupLink = i; litrec[i].activePrev = LITERAL_HEAD; t = litrec[LITERAL_HEAD].activeNext; litrec[i].activeNext = t; litrec[t].activePrev = i; litrec[LITERAL_HEAD].activeNext = i; activeNodes += 1; } } /* for each interior node of the virtual tree */ while( activeNodes > 1 ) { /* find the two subtrees with the smallest count */ lowCount1 = 0x7fffffff; lowCount2 = 0x7fffffff; for (i=litrec[LITERAL_HEAD].activeNext; i!=LITERAL_HEAD; i=litrec[i].activeNext) { fflush(stdout); if (litrec[i].groupCount < lowCount2) { lowCount1 = lowCount2; lowCount1Ind = lowCount2Ind; lowCount2 = litrec[i].groupCount; lowCount2Ind = i; } else if (litrec[i].groupCount < lowCount1) { lowCount1 = litrec[i].groupCount; lowCount1Ind = i; } } /*printf("the two least frequent groups are 0x%02x(%d) and 0x%02x(%d)\n", lowCount1Ind, litrec[lowCount1Ind].groupCount, lowCount2Ind, litrec[lowCount2Ind].groupCount);*/ /* remove the count2 node from the unprocessed list */ t = litrec[lowCount2Ind].activePrev; t2 = litrec[lowCount2Ind].activeNext; litrec[t].activeNext = t2; litrec[t2].activePrev = t; litrec[lowCount2Ind].activePrev = -1; litrec[lowCount2Ind].activeNext = -1; activeNodes -= 1; /* insert a branch bit to the start of the codes for the literals, */ /* and to all of their descendents */ HuffAddBranch( lowCount1Ind, 0 ); HuffAddBranch( lowCount2Ind, 1 ); /* link the two new nodes into one group, with lowCount1Ind being hd */ t = litrec[lowCount1Ind].groupLink; t2 = litrec[lowCount2Ind].groupLink; litrec[lowCount1Ind].groupLink = t2; litrec[lowCount2Ind].groupLink = t; litrec[lowCount1Ind].groupCount += litrec[lowCount2Ind].groupCount; } } /****************************************************************************/ void HuffAddBranch( int node, int bitVal ) { int i; i = node; do { if (litrec[i].codelen >= 32) { fprintf(stderr, "codelen overflow!, literal=0x%02x\n", i); exit( 1 ); } litrec[i].code |= bitVal << litrec[i].codelen; litrec[i].codelen += 1; i = litrec[i].groupLink; } while( i != node ); } /****************************************************************************/ void PrintSummary( void ) { int i, b; ulong rawBytes, huffBits, huffBytes; rawBytes = 0; huffBits = 0; for (i=0; i<256; i++) { if (litrec[i].count > 0) { if (i<0x20 || i>=0x7f) { printf("0x%02x : ", i); } else { printf("'%c' : ", i); } printf("count=%-6d codelen=%-2d code=", litrec[i].count, litrec[i].codelen); for (b=litrec[i].codelen-1; b>=0; b--) { putchar( (litrec[i].code & (1<