/*
 * This is a **very** simple text compression program which performs
 * a "Huffman" encoding of the most common characters in the input file.
 * It is designed to operate as a "unix" style filter, accepting input
 * from "stdin" and writing to "stdout".
 *
 * Syntax:	HFTEXT (Encode | Decode) <input_file >output_file
 *
 * First, the input file is scanned, and a table built up containing
 * the most common characters (highest frequency at the beginning of
 * the table). Then the file is re-read, and any characters which are
 * in the table are replaced with a series of one bits equal in number
 * to its position in the table, followed by a zero bit. Thus, the
 * highest frequency character is encoded into two bits, the second
 * highest is encoded into three bits etc...
 *
 * Characters not occuring in the table are written with a zero bit,
 * followed by the 7 bits of the ASCII character value.
 *
 * Note that this scheme works only on ASCII text files, and becomes *very*
 * confused if the original file contains characters with the high bit set.
 *
 * Compile command: cc hftext -fop
 */
#include <stdio.h>
#include <file.h>
#define	TSIZE	7		/* Size of common character table */

	unsigned ftable[256] = 0, ocount = 0;
	unsigned char ctable[TSIZE] = 0, obyte = 0;

main(argc, argv)
	int argc;
	char *argv[];
{
	int i, j, k;

	stdin  = setbuf(stdin,  1000);
	stdout = setbuf(stdout, 1000);
	
	/* Use MICRO-C's more powerful '&&' to force a zero if !enough args */
	switch((argc > 1) && toupper(*argv[1])) {
		case 'E' :		/* Encode the file */
			*(char*)stdout |= F_BINARY;		/* Convert stdout to BINARY */
			while((i = getc(stdin)) != EOF)
				++ftable[i];
			rewind(stdin);

			/* Build table of most frequent characters */
			for(i=0; i < TSIZE; ++i) {
				k = 0;
				for(j=1; j < 256; ++j)
					if(ftable[j] > ftable[k])
						k = j;
				ctable[i] = k;
				ftable[k] = 0; }

			/* Write the index table */
			fwrite(ctable, TSIZE, stdout);

			/* Process the file */
			while((i = getc(stdin)) != EOF) {
				for(j=0; j < TSIZE; ++j) {
					if(ctable[j] == i)
						break; }
				if(j < TSIZE) {		/* Write a token */
					do
						write_bit(1);
					while(j--);
					write_bit(0); }
				else {				/* Write the character */
					write_bit(0);
					for(k=0; k < 7; ++k) {
						write_bit(i & 0x01);
						i >>= 1; } } }

			/* Clean up output bits */
			while(obyte)
				write_bit(0);
			break;
		case 'D' :		/* Decode the file */
			*(char*)stdin  |= F_BINARY;	/* Convert stdin to BINARY */
			fread(ctable, TSIZE, stdin);
			while((i = read_bit()) != EOF) {
				j = 0;
				if(i) {	/* token */
					while((k = read_bit()) && (k != EOF))
						++j;
					j = ctable[j]; }
				else {		/* Normal character */
					for(k=0; k < 7; ++k)
						j = (j >> 1) | read_bit();
					j >>= 1; }
				putc(j, stdout); }
			break;
		default:
			abort("Use: HFTEXT E|D <input_file >output_file"); }

	fflush(stdout);
}

/*
 * Write a single bit to the output file
 */
write_bit(value)
	int value;
{
	obyte = (obyte << 1) | value;
	if(++ocount > 7) {
		putc(obyte, stdout);
		ocount = obyte = 0; }
}

/*
 * Read a single bit from the input file
 */
read_bit()
{
	int i;

	if(!ocount) {
		if((obyte = getc(stdin)) == EOF)
			return EOF;
		ocount = 8; }
	i = obyte;
	obyte <<= 1;
	--ocount;
	return i & 0x80;
}
