#include <stdlib.h>
#include <memory.h>
#include <string.h>
#include <fcntl.h>

#ifdef unix
#define __cdecl
#else
#include <io.h>
#endif

//  please address any bugs discovered in this code
//  to the author: karl malbrain, karl_m@acm.org

//  link bit-level I/O

void arc_put1 (unsigned bit);
unsigned arc_get1 ();

//  This code is adapted from Professor Vitter's
//  article, Design and Analysis of Dynamic Huffman Codes,
//  which appeared in JACM October 1987

//  A design trade-off has been made to simplify the
//  code:  a node's block is determined dynamically,
//  and the implicit tree structure is maintained,
//  e.g. explicit node numbers are also implicit.

//  Dynamic huffman table weight ranking
//  is maintained per Professor Vitter's
//  invariant (*) for algorithm FGK:

//  leaves preceed internal nodes of the
//  same weight in a non-decreasing ranking
//  of weights using implicit node numbers:

//  1) leaves slide over internal nodes, internal nodes
//  swap over groups of leaves, leaves are swapped
//  into group leader position, but two internal
//  nodes never change positions relative
//  to one another.

//  2) weights are incremented by 2:
//  leaves always have even weight values;
//  internal nodes always have odd values.

//  3) even node numbers are always right children;
//  odd numbers are left children in the tree.

//  node 2 * HuffSize - 1 is always the tree root;
//  node HuffEsc is the escape node;

//  the tree is initialized by creating an
//  escape node as the root.

//  each new leaf symbol is paired with a new escape
//  node into the previous escape node in the tree,
//  until the last symbol which takes over the
//  tree position of the escape node, and
//  HuffEsc is left at zero.

//  overall table size: 2 * HuffSize

//  huff_init(alphabet_size, potential symbols used)
//  huff_encode(next_symbol)
//  next_symbol = huff_decode()

//  huff_scale(by_bits) -- scale weights and rebalance tree

typedef struct {
    unsigned up,      // next node up the tree
        down,         // pair of down nodes
        symbol,       // node symbol value
        weight;       // node weight
} HTable;

typedef struct {
    unsigned esc,     // the current tree height
        root,         // the root of the tree
        size,         // the alphabet size
        *map;         // mapping for symbols to nodes
    HTable table[1];  // the coding table starts here
} HCoder;

//  initialize an adaptive coder
//  for alphabet size, and count
//  of nodes to be used

HCoder *huff_init (unsigned size, unsigned root)
{
HCoder *huff;

  //  default: all alphabet symbols are used

  if( !root || root > size )
      root = size;

  //  create the initial escape node
  //  at the tree root

  if( root <<= 1 )
    root--;

  huff = malloc (root * sizeof(HTable) + sizeof(HCoder));
  memset (huff->table + 1, 0, root * sizeof(HTable));
  memset (huff, 0, sizeof(HCoder));

  if( huff->size = size )
      huff->map = calloc (size, sizeof(unsigned));

  huff->esc = huff->root = root;
  return huff;
}

// split escape node to incorporate new symbol

unsigned huff_split (HCoder *huff, unsigned symbol)
{
unsigned pair, node;

    //  is the tree already full???

    if( pair = huff->esc )
        huff->esc--;
    else
        return 0;

    //  if this is the last symbol, it moves into
    //  the escape node's old position, and
    //  huff->esc is set to zero.

    //  otherwise, the escape node is promoted to
    //  parent a new escape node and the new symbol.

    if( node = huff->esc ) {
        huff->table[pair].down = node;
        huff->table[pair].weight = 1;
        huff->table[node].up = pair;
        huff->esc--;
    } else
        pair = 0, node = 1;

    //  initialize the new symbol node

    huff->table[node].symbol = symbol;
    huff->table[node].weight = 0;
    huff->table[node].down = 0;
    huff->map[symbol] = node;

    //  initialize a new escape node.

    huff->table[huff->esc].weight = 0;
    huff->table[huff->esc].down = 0;
    huff->table[huff->esc].up = pair;
    return node;
}

//  swap leaf to group leader position
//  return symbol's new node

unsigned huff_leader (HCoder *huff, unsigned node)
{
unsigned weight = huff->table[node].weight;
unsigned leader = node, prev, symbol;

    while( weight == huff->table[leader + 1].weight )
        leader++;

    if( leader == node )
        return node;

    // swap the leaf nodes

    symbol = huff->table[node].symbol;
    prev = huff->table[leader].symbol;

    huff->table[leader].symbol = symbol;
    huff->table[node].symbol = prev;
    huff->map[symbol] = leader;
    huff->map[prev] = node;
    return leader;
}

//  slide internal node up over all leaves of equal weight;
//  or exchange leaf with next smaller weight internal node

//  return node's new position

unsigned huff_slide (HCoder *huff, unsigned node)
{
unsigned next = node;
HTable swap[1];

    *swap = huff->table[next++];

    // if we're sliding an internal node, find the
    // highest possible leaf to exchange with

    if( swap->weight & 1 )
      while( swap->weight > huff->table[next + 1].weight )
          next++;

    //  swap the two nodes

    huff->table[node] = huff->table[next];
    huff->table[next] = *swap;

    huff->table[next].up = huff->table[node].up;
    huff->table[node].up = swap->up;

    //  repair the symbol map and tree structure

    if( swap->weight & 1 ) {
        huff->table[swap->down].up = next;
        huff->table[swap->down - 1].up = next;
        huff->map[huff->table[node].symbol] = node;
    } else {
        huff->table[huff->table[node].down - 1].up = node;
        huff->table[huff->table[node].down].up = node;
        huff->map[swap->symbol] = next;
    }

    return next;
}

//  increment symbol weight and re balance the tree.

void huff_increment (HCoder *huff, unsigned node)
{
unsigned up;

  //  obviate swapping a parent with its child:
  //    increment the leaf and proceed
  //    directly to its parent.

  //  otherwise, promote leaf to group leader position in the tree

  if( huff->table[node].up == node + 1 )
    huff->table[node].weight += 2, node++;
  else
    node = huff_leader (huff, node);

  //  increase the weight of each node and slide
  //  over any smaller weights ahead of it
  //  until reaching the root

  //  internal nodes work upwards from
  //  their initial positions; while
  //  symbol nodes slide over first,
  //  then work up from their final
  //  positions.

  while( huff->table[node].weight += 2, up = huff->table[node].up ) {
    while( huff->table[node].weight > huff->table[node + 1].weight )
        node = huff_slide (huff, node);

    if( huff->table[node].weight & 1 )
        node = up;
    else
        node = huff->table[node].up;
  }
}

//  scale all weights and rebalance the tree

//  zero weight nodes are removed from the tree
//  by sliding them out the left of the rank list

void huff_scale (HCoder *huff, unsigned bits)
{
unsigned node = huff->esc, weight, prev;

  //  work up the tree from the escape node
  //  scaling weights by the value of bits

  while( ++node <= huff->root ) {
    //  recompute the weight of internal nodes;
    //  slide down and out any unused ones

    if( huff->table[node].weight & 1 ) {
      if( weight = huff->table[huff->table[node].down].weight & ~1 )
        weight += huff->table[huff->table[node].down - 1].weight | 1;

    //  remove zero weight leaves by incrementing HuffEsc
    //  and removing them from the symbol map.  take care

    } else if( !(weight = huff->table[node].weight >> bits & ~1) )
      if( huff->map[huff->table[node].symbol] = 0, huff->esc++ )
        huff->esc++;

    // slide the scaled node back down over any
    // previous nodes with larger weights

    huff->table[node].weight = weight;
    prev = node;

    while( weight < huff->table[--prev].weight )
        huff_slide (huff, prev);
  }

  // prepare a new escape node

  huff->table[huff->esc].down = 0;
}

//  send the bits for an escaped symbol

void huff_sendid (HCoder *huff, unsigned symbol)
{
unsigned empty = 0, max;

    //  count the number of empty symbols
    //  before the symbol in the table

    while( symbol-- )
      if( !huff->map[symbol] )
        empty++;

    //  send LSB of this count first, using
    //  as many bits as are required for
    //  the maximum possible count

    if( max = huff->size - (huff->root - huff->esc) / 2 - 1 )
      do arc_put1 (empty & 1), empty >>= 1;
      while( max >>= 1 );
}

//  encode the next symbol

void huff_encode (HCoder *huff, unsigned symbol)
{
unsigned emit = 1, bit;
unsigned up, idx, node;

    if( symbol < huff->size )
        node = huff->map[symbol];
    else
        return;

    //  for a new symbol, direct the receiver to the escape node
    //  but refuse input if table is already full.

    if( !(idx = node) )
      if( !(idx = huff->esc) )
        return;

    //  accumulate the code bits by
    //  working up the tree from
    //  the node to the root

    while( up = huff->table[idx].up )
        emit <<= 1, emit |= idx & 1, idx = up;

    //  send the code, root selector bit first

    while( bit = emit & 1, emit >>= 1 )
        arc_put1 (bit);

    //  send identification and incorporate
    //  new symbols into the tree

    if( !node )
        huff_sendid(huff, symbol), node = huff_split(huff, symbol);

    //  adjust and re-balance the tree

    huff_increment (huff, node);
}

//  read the identification bits
//  for an escaped symbol

unsigned huff_readid (HCoder *huff)
{
unsigned empty = 0, bit = 1, max, symbol;

    //  receive the symbol, LSB first, reading
    //  only the number of bits necessary to
    //  transmit the maximum possible symbol value

    if( max = huff->size - (huff->root - huff->esc) / 2 - 1 )
      do empty |= arc_get1 () ? bit : 0, bit <<= 1;
      while( max >>= 1 );

    //  the count is of unmapped symbols
    //  in the table before the new one

    for( symbol = 0; symbol < huff->size; symbol++ )
      if( !huff->map[symbol] )
        if( !empty-- )
            return symbol;

    //  oops!  our count is too big, either due
    //  to a bit error, or a short node count
    //  given to huff_init.

    return 0;
}

//  decode the next symbol

unsigned huff_decode (HCoder *huff)
{
unsigned node = huff->root;
unsigned symbol, down;

    //  work down the tree from the root
    //  until reaching either a leaf
    //  or the escape node.  A one
    //  bit means go left, a zero
    //  means go right.

    while( down = huff->table[node].down )
      if( arc_get1 () )
        node = down - 1;  // the left child preceeds the right child
      else
        node = down;

    //  sent to the escape node???
    //  refuse to add to a full tree

    if( node == huff->esc )
      if( huff->esc )
        symbol = huff_readid (huff), node = huff_split (huff, symbol);
      else
        return 0;
    else
        symbol = huff->table[node].symbol;

    //  increment weights and rebalance
    //  the coding tree

    huff_increment (huff, node);
    return symbol;
}

#ifdef HUFFSTANDALONE

#include <stdio.h>

FILE *In = stdin, *Out = stdout;
unsigned char ArcBit = 0;
int ArcChar = 0;

int main (int argc, char **argv)
{
int mode, size, symbol;
unsigned mask = ~0;
HCoder *huff;

    if( argc > 1 )
        mode = argv[1][0], argv[1]++;
    else {
        printf ("Usage: %s [cdtls]nn infile outfile\nnn -- alphabet size\ninfile -- source file\noutfile -- output file", argv[0]);
        return 1;
    }

    if( argv[1][0] == 's' )
      argv[1]++, mask = 8191;

    if( argc > 3 )
      if( !(Out = fopen (argv[3], "w")) )
        return 1;

#ifndef unix
    _setmode (_fileno (Out), _O_BINARY);
#endif

    //  literal text

    if( mode == 'l' ) {
      if( !(size = atoi (argv[1])) )
        size = 256;

      huff = huff_init (256, size);
      putc (size >> 8, Out);
      putc (size, Out);

      size = strlen (argv[2]);
      putc (size >> 16, Out);
      putc (size >> 8, Out);
      putc (size, Out);

      while( symbol = *argv[2]++ )
        huff_encode(huff, symbol);

      while( ArcBit )  // flush last few bits
        arc_put1 (0);

      return 0;
    }

    //  alphabet fill

    if( mode == 't' ) {
      if( !(size = atoi (argv[1])) )
        size = 256;

      huff = huff_init (256, size);
      putc (size >> 8, Out);
      putc (size, Out);

      putc (size >> 16, Out);
      putc (size >> 8, Out);
      putc (size, Out);

      for( symbol = 0; symbol < size; symbol++ )
        huff_encode(huff, symbol);

      while( ArcBit )  // flush last few bits
        arc_put1 (0);

      return 0;
    }

    if( argc > 2 )
      if( !(In = fopen (argv[2], "r")) )
        return 1;

#ifndef unix
    _setmode (_fileno (In), _O_BINARY);
#endif

    //  decompression

    if( mode == 'd' ) {
      size = getc(In) << 8;
      size |= getc(In);

      huff = huff_init (256, size);

      size = getc(In) << 16;
      size |= getc(In) << 8;
      size |= getc(In);

      while( size )
        if( symbol = huff_decode(huff), putc (symbol, Out), size-- & mask )
            continue;
        else
            huff_scale(huff, 1);

      return 0;
    }

    // compression

    if( !(size = atoi (argv[1])) )
        size = 256;

    huff = huff_init (256, size);
    putc (size >> 8, Out);
    putc (size, Out);

    fseek(In, 0, 2);
    size = ftell(In);
    fseek (In, 0, 0);

    putc (size >> 16, Out);
    putc (size >> 8, Out);
    putc (size, Out);

    while( size )
      if( symbol = getc(In), huff_encode(huff, symbol), size-- & mask )
        continue;
      else
        huff_scale(huff, 1);

    while( ArcBit )  // flush last few bits
        arc_put1 (0);

    return 0;
}

void arc_put1 (unsigned bit)
{
    ArcChar <<= 1;

    if( bit )
        ArcChar |= 1;

    if( ++ArcBit < 8 )
        return;

    putc (ArcChar, Out);
    ArcChar = ArcBit = 0;
}

unsigned arc_get1 ()
{
    if( !ArcBit )
        ArcChar = getc (In), ArcBit = 8;

    return ArcChar >> --ArcBit & 1;
}
#endif