Logo Search packages:      
Sourcecode: malaga version File versions  Download package

tries.c

/* Copyright (C) 1995 Bjoern Beutel. */

/* Description. =============================================================*/

/* This module implements a static trie structure */

/* Includes. ================================================================*/

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <setjmp.h>
#include "basic.h"
#include "pools.h"
#include "tries.h"

/* Macros. ==================================================================*/

#define ALIGN(address) (((ptr_t) (address) + (ptr_t) 3) & ~ (ptr_t) 3) 
/* Align ADDRESS to next 32-bit int. */

#define INTS(bytes) (((bytes) + (ptr_t ) 3) >> 2) 
/* Number of ints needed to store BYTES. */ 

/* Types. ===================================================================*/

typedef struct /* A node in a list of keys. */
{ 
  list_node_t *next;
  char_t key; /* Key for this node. */
  int_t subnode; /* Index of node for KEY in trie. */
  int_t content; /* Content associated with KEY. */
} key_node_t;

typedef struct /* A dynamic trie node. */
{ 
  string_t prefix; /* The prefix of this node. */
  int_t prefix_len; /* The length of PREFIX. */
  list_t key_list; /* List of keys. */
} trie_node_t;

/* The trie is a vector of int_t that contains compact trie nodes.
 * A compact trie node is int_t-aligned and looks as follows:
 *   u_byte_t prefix_len;
 *   char_t prefix[ prefix_len ];
 *   u_byte_t subnode_count;
 *   u_byte_t content_count;
 *   char_t subnode_keys[ subnode_count ];
 *   char_t content_keys[ content_count ];
 *   0..3 pad bytes of value 0;
 *   int_t subnodes[ subnode_count ];
 *   int_t contents[ content_count ]; */

typedef struct /* Pointers to the items of a compact trie node. */
{ 
  u_byte_t *prefix_len; /* Number of chars that precede the keys. */
  char_t *prefix; /* The chars that precede the keys. */ 
  u_byte_t *subnode_count; /* The number of subnodes in this node. */
  u_byte_t *content_count; /* The number of contents in this node. */
  char_t *subnode_keys; /* The keys for the subnodes in this node. */
  char_t *content_keys; /* The keys for the contents in this node. */
  /* 0..3 pad bytes of value 0; */
  int_t *subnodes; /* Indexes of the subnodes. */
  int_t *contents; /* Contents. */
} compact_node_t;

/*---------------------------------------------------------------------------*/

static int_t 
store_trie_node( pool_t trie_pool, trie_node_t *node )
/* Store NODE in TRIE_POOL and return its index. */
{
  int_t node_size; /* Size of the trie node in TRIE_POOL. */
  int_t node_index; /* Index of the trie node in TRIE_POOL. */
  int_t i, j, subnode_count, content_count;
  key_node_t *key_node;
  int_t *compact_node;
  compact_node_t n; /* Pointers to the items of the compactified NODE. */
  
  /* Count the number of entries in NODE->KEY_LIST. */
  subnode_count = content_count = 0;
  FOREACH( key_node, node->key_list )
  {
    if (key_node->subnode != -1) 
      subnode_count++;
    if (key_node->content != -1) 
      content_count++;
  }

  /* Get pool space for the new node. */
  node_size = (INTS( sizeof( u_byte_t ) + sizeof( char_t ) * node->prefix_len 
                     + sizeof( u_byte_t ) + sizeof( char_t ) * subnode_count
                     + sizeof( u_byte_t ) + sizeof( char_t ) * content_count )
               + subnode_count + content_count);
  compact_node = get_pool_space( trie_pool, node_size, &node_index );

  /* Set up the pointers to the items of the node. */
  n.prefix_len = (u_byte_t *) compact_node;
  n.prefix = (char_t *) (n.prefix_len + 1);
  n.subnode_count = (u_byte_t *) (n.prefix + node->prefix_len);
  n.content_count = (u_byte_t *) (n.subnode_count + 1);
  n.subnode_keys = (char_t *) (n.content_count + 1);
  n.content_keys = (char_t *) (n.subnode_keys + subnode_count);
  n.subnodes = (int_t *) ALIGN( n.content_keys + content_count );
  n.contents = (int_t *) ALIGN( n.subnodes + subnode_count );

  /* Copy the items. */
  *n.prefix_len = node->prefix_len;
  for (i = 0; i < node->prefix_len; i++) 
    n.prefix[i] = TO_LOWER( node->prefix[i] );
  *n.subnode_count = subnode_count;
  *n.content_count = content_count;
  i = j = 0;
  FOREACH( key_node, node->key_list ) 
  { 
    if (key_node->subnode != -1)
    {
      n.subnode_keys[i] = key_node->key;
      n.subnodes[i] = key_node->subnode;
      i++;
    }
    if (key_node->content != -1)
    {
      n.content_keys[j] = key_node->key;
      n.contents[j] = key_node->content;
      j++;
    }
  }

  return node_index;
}

/*---------------------------------------------------------------------------*/

static int_t 
new_subtrie( pool_t trie_pool, trie_entry_t entries[], 
             int_t start, int_t end, int_t index )
/* Create a subtrie for ENTRIES[ START ]..ENTRIES[ END - 1 ], 
 * store it in TRIE_POOL and return the index of its root node.
 * The first INDEX chars in the keys will be ignored, so they
 * must match for the keys in ENTRIES[ START ]..ENTRIES[ END - 1 ]. */
{
  trie_node_t node;
  int_t key_idx; /* The key index. */
  int_t node_index;
  int_t sub_start, sub_end; /* Start and end of a subtrie. */
  string_t first_key, last_key; /* Used as abbreviations. */
  key_node_t *key_node;

  if (start >= end) 
    return -1;

  /* Look how many chars from INDEX onward are equal in the first and 
   * the last entry. This will be the prefix of the new node.
   * If the key of the first entry is as long as the prefix, shorten the prefix
   * by 1. */
  first_key = entries[ start ].key;
  last_key = entries[ end - 1 ].key;
  for (key_idx = index; first_key[ key_idx + 1 ] != EOS; key_idx++) 
  { 
    if (TO_LOWER( first_key[ key_idx ] ) != TO_LOWER( last_key[ key_idx ] )) 
      break;
  }
  node.prefix = first_key + index;
  node.prefix_len = key_idx - index;

  /* Scan list of entries for the first char behind the prefix. For each key,
   * find the corresponding lower and upper index in the list of entries. */
  clear_list( &node.key_list );
  for (sub_start = start; sub_start < end; sub_start = sub_end) 
  { 
    /* Create a new node for the current key. */
    key_node = new_node( &node.key_list, sizeof( key_node_t ), LIST_END );
    key_node->key = TO_LOWER( entries[ sub_start ].key[ key_idx ] );
    if (entries[ sub_start ].key[ key_idx + 1 ] == EOS) 
    { 
      key_node->content = entries[ sub_start ].content;
      sub_start++;
    } 
    else 
      key_node->content = -1;

    /* Find last entry with same key. */
    for (sub_end = sub_start; sub_end < end; sub_end++) 
    { 
      if (TO_LOWER( entries[ sub_end ].key[ key_idx ] ) != key_node->key) 
      break;
    }
    key_node->subnode = new_subtrie( trie_pool, entries, sub_start, sub_end, 
                                     key_idx + 1 );
  }

  /* Store compact node and clear the key list. */
  node_index = store_trie_node( trie_pool, &node );
  while (node.key_list.first != NULL) 
    free_first_node( &node.key_list );

  return node_index;
}

/*---------------------------------------------------------------------------*/

void 
new_trie( int_t n, trie_entry_t *entries, 
          pool_t *trie_pool, int_t *root_node_index )
/* Take N ENTRIES and build a trie of them.
 * Return the trie in *TRIE_POOL 
 * and the index of its root node in *ROOT_NODE_INDEX.
 * The keys in *ENTRIES must be non-empty, unique, and sorted. */
{
  *trie_pool = new_pool( sizeof( int_t ) );
  *root_node_index = new_subtrie( *trie_pool, entries, 0, n, 0 );
}

/*---------------------------------------------------------------------------*/

bool_t
lookup_trie( int_t *trie, int_t *node_index, string_t *input, int_t *content )
/* Test if a prefix of *INPUT matches the node at *NODE_INDEX in TRIE.
 * If it does, return TRUE (else return FALSE) and:
 *   *CONTENT contains the associated content,
 *   *NODE contains the subnode for the matched input, and
 *   *INPUT points to the first char behind the prefix. */
{
  int_t lower, upper, middle;
  compact_node_t r; /* Pointers to the items of the root node; */
  char_t c;

  while (*node_index != -1) 
  { 
    /* Test if node's prefix matches the given key. */
    r.prefix_len = (u_byte_t *) (trie + *node_index);
    r.prefix = (char_t *) (r.prefix_len + 1);
    if (strncmp_no_case( *input, r.prefix, *r.prefix_len ) != 0) 
      return FALSE;
    (*input) += *r.prefix_len;

    /* Get the rest of the node. */
    r.subnode_count = (u_byte_t *) (r.prefix + *r.prefix_len);
    r.content_count = (u_byte_t *) (r.subnode_count + 1);
    r.subnode_keys = (char_t *) (r.content_count + 1);
    r.content_keys = (char_t *) (r.subnode_keys + *r.subnode_count);
    r.subnodes = (int_t *) ALIGN( r.content_keys + *r.content_count );
    r.contents = (int_t *) (r.subnodes + *r.subnode_count);

    /* Perform binary search for subnode with given key. */
    c = TO_LOWER( **input );
    *node_index = -1;
    lower = 0;
    upper = *r.subnode_count - 1;
    while (lower <= upper) 
    { 
      middle = (lower + upper) / 2;
      if (ORD(c) < ORD( r.subnode_keys[ middle ] )) 
      upper = middle - 1;
      else if (ORD(c) > ORD( r.subnode_keys[ middle ] )) 
      lower = middle + 1;
      else /* This entry matches. */
      { 
      *node_index = r.subnodes[ middle ];
      break;
      }
    }

    /* Perform binary search for content with given key. */
    *content = -1;
    lower = 0;
    upper = *r.content_count - 1;
    while (lower <= upper) 
    { 
      middle = (lower + upper) / 2;
      if (ORD(c) < ORD( r.content_keys[ middle ] )) 
      upper = middle - 1;
      else if (ORD(c) > ORD( r.content_keys[ middle ] )) 
      lower = middle + 1;
      else /* This entry matches. */
      { 
        *content = r.contents[ middle ];
        break;
      }
    }

    (*input)++;
    if (*content != -1) 
      return TRUE;
  }
  return FALSE;
}

/* End of file. =============================================================*/

Generated by  Doxygen 1.6.0   Back to index