Back to... Zip-Ada

Source file : length_limited_huffman_code_lengths.adb


--  Copyright 2011 Google Inc. All Rights Reserved.

--  Licensed under the Apache License, Version 2.0 (the "License");
--  you may not use this file except in compliance with the License.
--  You may obtain a copy of the License at

--  http://www.apache.org/licenses/LICENSE-2.0

--  Unless required by applicable law or agreed to in writing, software
--  distributed under the License is distributed on an "AS IS" BASIS,
--  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
--  See the License for the specific language governing permissions and
--  limitations under the License.

--  Author: lode.vandevenne [*] gmail [*] com (Lode Vandevenne)
--  Author: jyrki.alakuijala [*] gmail [*] com (Jyrki Alakuijala)

--  Bounded package merge algorithm, based on the paper
--  "A Fast and Space-Economical Algorithm for Length-Limited Coding
--  Jyrki Katajainen, Alistair Moffat, Andrew Turpin".

--  Translated by G. de Montmollin to Ada from katajainen.c (Zopfli project), 7-Feb-2016
--
--  Main technical differences to katajainen.c:
--    - pointers are not used, array indices instead
--    - all structures are allocated on stack
--    - sub-programs are nested, then unneeded parameters are removed

procedure Length_limited_Huffman_code_lengths(
  frequencies : in  Count_Array;
  bit_lengths : out Length_Array
)
is
  subtype Index_Type is Count_Type;

  null_index: constant Index_Type:= Index_Type'Last;

  --  Nodes forming chains.
  type Node is record
    weight : Count_Type;
    count  : Count_Type;               --  Number of leaves before this chain.
    tail   : Index_Type:= null_index;  --  Previous node(s) of this chain, or null_index if none.
    in_use : Boolean:= False;          --  Tracking for garbage collection.
  end record;

  type Leaf_Node is record
    weight : Count_Type;
    symbol : Alphabet;
  end record;

  --  Memory pool for nodes.
  pool: array(0 .. Index_Type(2 * max_bits * (max_bits + 1) - 1)) of Node;
  pool_next: Index_Type:= pool'First;

  type Index_pair is array(Index_Type'(0)..1) of Index_Type;
  lists: array(0..Index_Type(max_bits-1)) of Index_pair;

  type Leaf_array is array(Index_Type range <>) of Leaf_Node;
  leaves: Leaf_array(0..frequencies'Length-1);

  num_symbols: Count_Type := 0;  --  Amount of symbols with frequency > 0.
  num_Boundary_PM_runs: Count_Type;

  too_many_symbols_for_length_limit : exception;
  zero_length_but_nonzero_frequency : exception;
  nonzero_length_but_zero_frequency : exception;
  length_exceeds_length_limit       : exception;
  buggy_sorting                     : exception;

  procedure Init_Node(weight, count: Count_Type; tail, node: Index_Type) is
  begin
    pool(node).weight := weight;
    pool(node).count  := count;
    pool(node).tail   := tail;
    pool(node).in_use := True;
  end Init_Node;

  --  Finds a free location in the memory pool. Performs garbage collection if needed.
  --  If use_lists = True, used to mark in-use nodes during garbage collection.

  function Get_Free_Node(use_lists: Boolean) return Index_Type is
    node: Index_Type;
  begin
    loop
      if pool_next > pool'Last then
        --  Garbage collection.
        for i in pool'Range loop
          pool(i).in_use := False;
        end loop;
        if use_lists then
          for i in 0 .. Index_Type(max_bits * 2 - 1) loop
            node:= lists(i / 2)(i mod 2);
            while node /= null_index loop
              pool(node).in_use := True;
              node := pool(node).tail;
            end loop;
          end loop;
        end if;
        pool_next:= pool'First;
      end if;
      exit when not pool(pool_next).in_use;  -- Found one.
      pool_next:= pool_next + 1;
    end loop;
    pool_next:= pool_next + 1;
    return pool_next - 1;
  end Get_Free_Node;

  --  Performs a Boundary Package-Merge step. Puts a new chain in the given list. The
  --  new chain is, depending on the weights, a leaf or a combination of two chains
  --  from the previous list.
  --  index: The index of the list in which a new chain or leaf is required.
  --  final: Whether this is the last time this function is called. If it is then it
  --  is no more needed to recursively call self.

  procedure Boundary_PM(index: Index_Type; final: Boolean) is
    newchain: Index_Type;
    oldchain: Index_Type;
    lastcount: constant Count_Type:= pool(lists(index)(1)).count;  --  Count of last chain of list.
    sum: Count_Type;
  begin
    if index = 0 and lastcount >= num_symbols then
      return;
    end if;
    newchain:= Get_Free_Node(use_lists => True);
    oldchain:= lists(index)(1);
    --  These are set up before the recursive calls below, so that there is a list
    --  pointing to the new node, to let the garbage collection know it's in use.
    lists(index) := (oldchain, newchain);

    if index = 0 then
      --  New leaf node in list 0.
      Init_Node(leaves(lastcount).weight, lastcount + 1, null_index, newchain);
    else
      sum:= pool(lists(index - 1)(0)).weight + pool(lists(index - 1)(1)).weight;
      if lastcount < num_symbols and then sum > leaves(lastcount).weight then
        --  New leaf inserted in list, so count is incremented.
        Init_Node(leaves(lastcount).weight, lastcount + 1, pool(oldchain).tail, newchain);
      else
        Init_Node(sum, lastcount, lists(index - 1)(1), newchain);
        if not final then
          --  Two lookahead chains of previous list used up, create new ones.
          Boundary_PM(index - 1, False);
          Boundary_PM(index - 1, False);
        end if;
      end if;
    end if;
  end Boundary_PM;

  --  Initializes each list with as lookahead chains the two leaves with lowest weights.

  procedure Init_Lists is
    node0: constant Index_Type:= Get_Free_Node(use_lists => False);
    node1: constant Index_Type:= Get_Free_Node(use_lists => False);
  begin
    Init_Node(leaves(0).weight, 1, null_index, node0);
    Init_Node(leaves(1).weight, 2, null_index, node1);
    lists:= (others => (node0, node1));
  end Init_Lists;

  --  Converts result of boundary package-merge to the bit_lengths. The result in the
  --  last chain of the last list contains the amount of active leaves in each list.
  --  chain: Chain to extract the bit length from (last chain from last list).

  procedure Extract_Bit_Lengths(chain: Index_Type) is
    node: Index_Type:= chain;
  begin
    while node /= null_index loop
      for i in 0 .. pool(node).count - 1 loop
        bit_lengths(leaves(i).symbol):= bit_lengths(leaves(i).symbol) + 1;
      end loop;
      node := pool(node).tail;
    end loop;
  end Extract_Bit_Lengths;

  function "<"(a, b: Leaf_Node) return Boolean is
  begin
    return a.weight < b.weight;
  end "<";

  procedure Quick_sort (a: in out Leaf_array) is
    n: constant Index_Type:= a'Length;
    i, j: Index_Type;
    p, t: Leaf_Node;
  begin
    if n < 2 then
      return;
    end if;
    p := a(n / 2 + a'First);
    i:= 0;
    j:= n - 1;
    loop
      while a(i + a'First) < p loop
        i:= i + 1;
      end loop;
      while p < a(j + a'First) loop
        j:= j - 1;
      end loop;
      exit when i >= j;
      t := a(i + a'First);
      a(i + a'First) := a(j + a'First);
      a(j + a'First) := t;
      i:= i + 1;
      j:= j - 1;
    end loop;
    Quick_sort(a(a'First .. a'First + i - 1));
    Quick_sort(a(a'First + i .. a'Last));
  end Quick_sort;

  paranoid: constant Boolean:= True;

begin
  bit_lengths:= (others => 0);
  --  Count used symbols and place them in the leaves.
  for a in Alphabet loop
    if frequencies(a) > 0 then
      leaves(num_symbols):= (frequencies(a), a);
      num_symbols:= num_symbols + 1;
    end if;
  end loop;
  --  Check special cases and error conditions.
  if num_symbols > 2 ** max_bits then
    raise too_many_symbols_for_length_limit;  --  Error, too few max_bits to represent symbols.
  end if;
  if num_symbols = 0 then
    return;  --  No symbols at all. OK.
  end if;
  if num_symbols = 1 then
    bit_lengths(leaves(0).symbol) := 1;
    return;  --  Only one symbol, give it bit length 1, not 0. OK.
  end if;
  --  Sort the leaves from lightest to heaviest.
  Quick_sort(leaves(0..num_symbols-1));
  if paranoid then
    for i in 1..num_symbols-1 loop
      if leaves(i) < leaves(i-1) then
        raise buggy_sorting;
      end if;
    end loop;
  end if;
  Init_Lists;
  --  In the last list, 2 * num_symbols - 2 active chains need to be created. Two
  --  are already created in the initialization. Each Boundary_PM run creates one.
  num_Boundary_PM_runs := 2 * num_symbols - 4;
  for i in 1 .. num_Boundary_PM_runs loop
    Boundary_PM(Index_Type(max_bits - 1), i = num_Boundary_PM_runs);
  end loop;
  Extract_Bit_Lengths(lists(Index_Type(max_bits - 1))(1));
  if paranoid then
    --  Done; some checks before leaving. Not checked: completeness of Huffman codes.
    for a in Alphabet loop
      if frequencies(a) = 0 then
        if bit_lengths(a) > 0 then
          raise nonzero_length_but_zero_frequency;  --  Never happened so far
        end if;
      else
        if bit_lengths(a) = 0 then
          raise zero_length_but_nonzero_frequency;  --  Happened before null_index fix
        elsif bit_lengths(a) > max_bits then
          raise length_exceeds_length_limit;        --  Never happened so far
        end if;
      end if;
    end loop;
  end if;
end Length_limited_Huffman_code_lengths;

Zip-Ada: Ada library for zip archive files (.zip). Ada programming.
Some news about Zip-Ada and other related Ada projects on Gautier's blog.