Logo Search packages:      
Sourcecode: malaga version File versions  Download package

lex_compiler.c

/* Copyright (C) 1995 Bjoern Beutel. */

/* Description. =============================================================*/

/* This module contains data structures and functions related to the generation
 * of the allomorph lexicon. */

/* Includes. ================================================================*/

#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <stdlib.h>
#include <time.h>
#include <setjmp.h>
#include "basic.h"
#include "pools.h"
#include "values.h"
#include "tries.h"
#include "rule_type.h"
#include "rules.h"
#include "scanner.h"
#include "files.h"
#include "malaga_files.h"
#include "symbols.h"
#include "input.h"
#include "commands.h"
#include "avl_trees.h"
#include "options.h"
#include "hangul.h"
#include "lex_compiler.h"

/* Macros. ==================================================================*/

#define MARK_LAST_ENTRY(var) ((var) = - ((var) + 1))
/* Mark last entry of a list in FS_LISTS. */

/* types ====================================================================*/

typedef struct /* A feature structure for a given lexicon entry. */
{ 
  list_node_t *next; /* Next feature structure. */
  value_t value; /* Feature structure of this entry as a value. */
} fs_node_t;

typedef struct /* An entry in the lexicon tree. */
{ 
  avln_node_t node; /* The lexicon tree is an AVLN tree. */
  list_t fs_list; /* The list of feature structures for this lexicon entry. */
} lex_node_t;

typedef struct /* A node in the constants tree. */
{ 
  avln_node_t node; /* The constants tree is an AVLN tree. */
  value_t value; /* Value of the node. */
  bool_t fixed; /* FALSE if value is a default value only. */
} const_node_t;

/* Variables. ===============================================================*/

int_t lex_entry_line_number; /* Line number of lexical entry just parsed. */

string_t lex_entry_file_name; /* Name of lexicon file just parsed. */

rule_sys_t *allo_rule_sys;

static const_node_t *const_tree; /* Root of the tree of constants. */

static lex_node_t *lex_tree; /* Root of the lexicon tree. */
static pool_t string_pool;
static pool_t lex_node_pool;
static pool_t fs_node_pool;
static list_t fs_free_list;

static int_t prelex_count; /* Number of prelex entries. */
static int_t lex_entry_count; /* Number of lexicon entries. */
static int_t allomorph_count; /* Number of allomorphs. */
static int_t intermediate_count; /* Number of pre-filter allomorphs. */

static lex_node_t *current_lexicon_entry; 
/* Current entry when output filter is being executed. */

/* Forward declarations. ====================================================*/

static void parse_value_local( void );

/* Functions for rule execution. ============================================*/

static void 
free_fs_list( lex_node_t *entry )
{ 
  fs_node_t *fs;
  
  /* Free the feature structures of this lexicon entry. */
  while (entry->fs_list.first != NULL) 
  { 
    fs = remove_first_node( &entry->fs_list );
    free_mem( &fs->value );
    add_node( &fs_free_list, (list_node_t *) fs, LIST_END );
  }
}

/*---------------------------------------------------------------------------*/

static void 
free_lex_tree_local( lex_node_t *lex_node )
{ 
  if (lex_node == NULL) 
    return;
  free_lex_tree_local( (lex_node_t *) lex_node->node.left );
  free_lex_tree_local( (lex_node_t *) lex_node->node.right );
  free_fs_list( lex_node );
}

/*---------------------------------------------------------------------------*/

static void 
free_lex_tree( void )
/* Free all memory used by lexicon buffer. */
{ 
  free_lex_tree_local( lex_tree );
  lex_tree = NULL;
  clear_list( &fs_free_list );
  clear_pool( lex_node_pool );
  clear_pool( string_pool );
  clear_pool( fs_node_pool ) ;
  lex_entry_count = allomorph_count = 0;
  prelex_count = intermediate_count = -1;
}

/*---------------------------------------------------------------------------*/

static void 
add_to_fs_list( lex_node_t *entry, value_t value )
{ 
  fs_node_t *fs;

  if (fs_free_list.first != NULL) 
    fs = remove_first_node( &fs_free_list );
  else 
    fs = get_pool_space( fs_node_pool, 1, NULL );
  fs->value = new_value( value );
  add_node( &entry->fs_list, (list_node_t *) fs, LIST_END );
}

/*---------------------------------------------------------------------------*/

static void 
lex_add_allo( string_t surface, value_t fs )
/* Add an allomorph, consisting of SURF and FS, to the allomorph lexicon. */
{ 
  lex_node_t *entry;

  if (*surface == EOS) 
    complain( "Allomorph surface is empty." );

  /* Find the lexicon entry with the correct surface. */
  entry = (lex_node_t *) find_avln_node( surface, (avln_node_t *) lex_tree );

  /* If no lexicon entry was found, create a new one. */
  if (entry == NULL) 
  { 
    entry = (lex_node_t *) get_pool_space( lex_node_pool, 1, NULL );
    entry->node.name = copy_string_to_pool( string_pool, surface, NULL );
    clear_list( &entry->fs_list );
    insert_avln_node( (avln_node_t *) entry, (avln_node_t **) &lex_tree );
  }

  /* Add new feature structure. */
  add_to_fs_list( entry, fs );
  allomorph_count++;
}

/*---------------------------------------------------------------------------*/

static void 
execute_allo_rule( void )
/* Execute the allo_rule on LEX_ENTRY. */
{ 
  add_allo = lex_add_allo; /* Set callback routine. */
  execute_rule( allo_rule_sys, allo_rule_sys->allo_rule );
}

/*---------------------------------------------------------------------------*/

static void 
lex_add_end_state( value_t fs )
/* Add a filtered allomorph, with feature structure FS, to the allomorph
 * lexicon. */
{ 
  add_to_fs_list( current_lexicon_entry, fs );
  allomorph_count++;
}

/*---------------------------------------------------------------------------*/

static void 
execute_output_filter_local( lex_node_t *entry )
/* Execute the output filter on the lexicon tree with root node ENTRY. */
{ 
  fs_node_t *fs;

  if (entry == NULL) 
    return;

  /* Execute the filter for entries alphabetically before ENTRY. */
  execute_output_filter_local( (lex_node_t *) entry->node.left );

  /* Create a list containing all allomorphs with SURFACE. */
  top = 0;
  FOREACH( fs, entry->fs_list ) 
    push_value( fs->value );
  build_list( top );

  /* Execute the output filter rule for ENTRY. */
  free_fs_list( entry );
  current_lexicon_entry = entry;
  execute_rule( allo_rule_sys, allo_rule_sys->output_filter );
  if (! rule_successful) 
  { 
    complain( "Output filter generated no allomorphs for \"%s\".", 
            entry->node.name );
  }

  /* Execute the filter for entries alphabetically behind ENTRY. */
  execute_output_filter_local( (lex_node_t *) entry->node.right );
}

/*---------------------------------------------------------------------------*/

static void 
execute_output_filter( void )
/* Execute the lexicon output filter on all entries in the lexicon tree. */
{ 
  /* If there's no allomorph filter rule, we're finished. */
  if (allo_rule_sys->output_filter == -1) 
    return;

  add_end_state = lex_add_end_state;
  intermediate_count = allomorph_count;
  allomorph_count = 0;

  execute_output_filter_local( lex_tree );
}

/* Support functions for parsing. ===========================================*/

static void 
free_const_node( const_node_t **const_node_p )
{ 
  if (*const_node_p == NULL) 
    return;
  free_const_node( (const_node_t **) &(*const_node_p)->node.left );
  free_const_node( (const_node_t **) &(*const_node_p)->node.right );
  free_mem( &(*const_node_p)->node.name );
  free_mem( &(*const_node_p)->value );
  free_mem( const_node_p );
}

/*---------------------------------------------------------------------------*/

static const_node_t *
new_const_node( string_t name )
/* Create a new const node with NAME and insert it into the const tree. */
{ 
  const_node_t *const_node;

  const_node = new_mem( sizeof( const_node_t ) );
  const_node->node.name = new_string( name, NULL );
  insert_avln_node( (avln_node_t *) const_node, (avln_node_t **) &const_tree );
  return const_node;
}

/* Parse functions. =========================================================*/

static void 
parse_simple_value( void )
/* Parse a value and leave it on the value stack. */
{ 
  int_t n; /* Number of values in list or record. */
  const_node_t *const_node;

  switch (next_token) 
  {
  case '<': 
    /* Parse a list. */
    read_next_token();
    n = 0;
    if (next_token != '>') 
    { 
      parse_value_local();
      n++;
      while (next_token == ',') 
      { 
      read_next_token();
        parse_value_local();
        n++;
      }
    }
    parse_token( '>' );
    build_list( n );
    break;
  case '[': 
    /* Parse a record. */
    read_next_token();
    n = 0;
    if (next_token != ']') 
    { 
      parse_value_local();
      parse_token( ':' );
      parse_value_local();
      n++;
      while (next_token == ',') 
      { 
      read_next_token();
        parse_value_local();
        parse_token( ':' );
        parse_value_local();
        n++;
      }
    }
    parse_token( ']') ;
    build_record( n );
    break;
  case TOK_IDENT: 
    /* Parse a symbol. */
    test_token( TOK_IDENT );
    push_symbol_value( find_symbol( token_name ) );
    read_next_token();
    break;
  case TOK_STRING: 
    /* Parse a string. */
    encode_hangul( &token_string );
    push_string_value( token_string, NULL );
    read_next_token();
    break;
  case TOK_NUMBER: 
    /* Parse a number value. */
    push_number_value( token_number );
    read_next_token();
    break;
  case TOK_CONSTANT: 
    /* Parse a constant. */
    const_node = ((const_node_t *) 
              find_avln_node( token_name, (avln_node_t *) const_tree ));
    if (const_node == NULL) 
      complain( "Constant \"@%s\" is not defined.", token_name );
    push_value( const_node->value );
    const_node->fixed = TRUE;
    read_next_token();
    break;
  case '(':
    read_next_token();
    parse_value_local();
    parse_token( ')' );
    break;
  default:
    complain( "Value expected, not %s.", token_as_text( next_token ) );
  }
}

/*---------------------------------------------------------------------------*/

static void 
parse_dotted_value( void )
/* Parse a value and as suffix a sequence of ".IDENT" and/or ".NUMBER". */
{ 
  parse_simple_value();
  while (next_token == '.') 
  { 
    read_next_token();
    parse_simple_value();
    dot_operation();
    if (value_stack[ top - 1 ] == NULL) 
      complain( "Component does not exist." );
  }
}

/*---------------------------------------------------------------------------*/

static void 
parse_term_value( void )
/* Parse a value that may contain the "*" and "/" operator. */
{ 
  int_t operator_token ;

  parse_dotted_value();
  while (next_token == '*' || next_token == '/') 
  { 
    operator_token = next_token;
    read_next_token();
    parse_dotted_value();
    if (operator_token == '*')
      asterisk_operation();
    else 
      slash_operation();
  }
}

/*---------------------------------------------------------------------------*/

static void 
parse_value_local( void )
/* Parse any value. This function is recursive. 
 * To get a value from outside, use "parse_value". */
{ 
  int_t operator_token;

  if (next_token == '-') 
  { 
    read_next_token();
    parse_term_value();
    unary_minus_operation();
  } 
  else 
    parse_term_value();
  while (next_token == '+' || next_token == '-') 
  { 
    operator_token = next_token;  
    read_next_token();
    parse_term_value();
    if (operator_token == '-') 
      minus_operation();
    else 
      plus_operation();
  }
}

/*---------------------------------------------------------------------------*/

static void 
parse_value( void )
/* Parse a value and return it on the value stack. */
{ 
  top = 0;
  parse_value_local();
}

/*---------------------------------------------------------------------------*/

static void 
parse_lex_value( void )
/* Parse a value and compile it. */
{ 
  int_t line_number;
  string_t file_name;

  /* Remember position of lexicon entry. */
  line_number = current_line_number();
  file_name = current_file_name();

  parse_value(); /* Parse lexicon entry. */

  /* Set position of lexicon entry for "where" command. */
  lex_entry_file_name = file_name;
  lex_entry_line_number = line_number;

  TRY 
  { 
    lex_entry_count++;
    execute_allo_rule();
    if (! rule_successful) 
    { 
      fprintf( stderr, "Warning: No allomorphs generated. (\"%s\", line %d)\n",
             name_in_path( file_name ), line_number );
    }
  } 
  IF_ERROR 
  { 
    print_text( error_text, " (\"%s\", line %d)", 
            name_in_path( file_name ), line_number );
  }
  END_TRY;

  /* Clear position of lexicon entry. */
  lex_entry_file_name = NULL;
  lex_entry_line_number = -1;

  parse_token( ';' );
}

/*---------------------------------------------------------------------------*/

static void 
parse_lex_values( void )
/* Read all values in the current file and run the allomorph rules on them. */
{ 
  const_node_t *const_node;
  string_t file_name;
  bool_t fixed;

  while (next_token != EOF) 
  { 
    if (next_token == TOK_DEFINE || next_token == TOK_DEFAULT) 
    { 
      fixed = (next_token == TOK_DEFINE);
      read_next_token();
      test_token( TOK_CONSTANT );
      const_node = ((const_node_t *) 
                find_avln_node( token_name, (avln_node_t *) const_tree ));
      if (const_node == NULL) 
      const_node = new_const_node( token_name );
      if (const_node->fixed) 
      complain( "Constant \"@%s\" is already defined.", token_name );
      if (! fixed && const_node->value != NULL)
      {
      complain( "Constant \"@%s\" already has a default value.", 
              token_name );
      }
      read_next_token();
      parse_token( TOK_ASSIGN );
      parse_value();
      free_mem( &const_node->value );
      const_node->value = new_value( value_stack[ --top ] );
      const_node->fixed = fixed;
      parse_token( ';' );
    } 
    else if (next_token == TOK_INCLUDE) 
    { 
      read_next_token();
      test_token( TOK_STRING );
      file_name = absolute_path( token_string, current_file_name() );
      read_next_token();
      begin_include( file_name );
      parse_lex_values();
      end_include();
      parse_token( ';' );
      free_mem( &file_name );
    } 
    else 
    { 
      check_user_break();
      parse_lex_value();
    }
  }
}

/* Functions for construction of run-time lexicon. ==========================*/

static void 
print_lex_tree_local( FILE *stream, string_t allo_format, lex_node_t *entry,
                      int_t *count )
/* Print all lexicon entries in tree ENTRY to STREAM using allomorph format
 * ALLO_FORMAT. *COUNT is the counter of entries that have been printed.
 * If ALLO_FORMAT == NULL, use line breaking. */
{ 
  fs_node_t *fs;
  string_t line_number, surface, buffer, value_string;

  if (entry == NULL) 
    return;
  print_lex_tree_local( stream, allo_format, (lex_node_t *) entry->node.left, 
                  count );
  surface = new_string_readable( entry->node.name, NULL );
  decode_hangul( &surface );
  FOREACH( fs, entry->fs_list ) 
  { 
    (*count)++;
    if (allo_format == NULL) 
    { 
      value_string = value_to_readable( fs->value, FALSE, 
                              strlen( surface ) + 2 );
      fprintf( stream, "%s: %s\n", surface, value_string );
      free_mem( &value_string );
    } 
    else if (*allo_format != EOS) 
    { 
      value_string = value_to_readable( fs->value, FALSE, -1 );
      line_number = int_to_string( *count );
      buffer = replace_arguments( allo_format, "sfn", 
                                  surface, value_string, line_number );
      fprintf( stream, "%s\n", buffer );
      free_mem( &line_number ); 
      free_mem( &buffer );
      free_mem( &value_string );
      if (ferror( stream )) 
      complain( "Can't write results: %s.", strerror( errno ) );
    }
  }
  free_mem( &surface );
  print_lex_tree_local( stream, allo_format, 
                        (lex_node_t *) entry->node.right, count );
}

/*---------------------------------------------------------------------------*/

void 
print_lex_tree( FILE *stream, string_t allo_format )
/* Print all lexicon entries to STREAM using format ALLO_FORMAT.
 * If ALLO_FORMAT == NULL, format the feature structures. */
{ 
  int_t count;

  count = 0;
  print_lex_tree_local( stream, allo_format, lex_tree, &count );
}

/*---------------------------------------------------------------------------*/

static void 
count_trie_entries( lex_node_t *tree, int_t *surf_count, int_t *fs_count )
/* Add to *SURF_COUNT the number of valid entries in subtree TREE.
 * Add to *FS_COUNT the number of feature structures in subtree TREE. */
{ 
  fs_node_t *fs;
  
  if (tree == NULL) 
    return;
  count_trie_entries( (lex_node_t *) tree->node.left, surf_count, fs_count );
  if (tree->fs_list.first != NULL) 
    (*surf_count)++;
  FOREACH( fs, tree->fs_list ) 
    (*fs_count)++;
  count_trie_entries( (lex_node_t *) tree->node.right, surf_count, fs_count );
}

/*---------------------------------------------------------------------------*/

static void 
fill_trie_entries( lex_node_t *tree, trie_entry_t *trie_entries,
               int_t *fs_lists, 
                   int_t *surf_count, int_t *fs_count, int_t *cell_count )
/* Fill the entries in TREE and its subnodes 
 * into the vectors TRIE_ENTRIES and FS_LISTS.
 * Increment *SURF_COUNT, *FS_COUNT and *CELL_COUNT accordingly. */
{ 
  fs_node_t *fs;

  if (tree == NULL) 
    return;
  fill_trie_entries( (lex_node_t *) tree->node.left, trie_entries, 
                     fs_lists, surf_count, fs_count, cell_count );
  if (tree->fs_list.first != NULL) 
  { 
    trie_entries[ *surf_count ].key = tree->node.name;
    trie_entries[ *surf_count ].content = *fs_count;
    (*surf_count)++;
    FOREACH( fs, tree->fs_list ) 
    { 
      fs_lists[ *fs_count ] = *cell_count;
      (*fs_count)++;
      *cell_count += length_of_value( fs->value );
    }
    MARK_LAST_ENTRY( fs_lists[ *fs_count - 1 ] );
  }
  fill_trie_entries( (lex_node_t *) tree->node.right, trie_entries, 
                     fs_lists, surf_count, fs_count, cell_count );
}

/*---------------------------------------------------------------------------*/

static void 
write_fs_table( lex_node_t *tree, FILE *stream, string_t file_name )
{ 
  fs_node_t *fs;

  if (tree == NULL) 
    return;
  write_fs_table( (lex_node_t *) tree->node.left, stream, file_name );
  FOREACH( fs, tree->fs_list ) 
  { 
    write_vector( fs->value, sizeof( cell_t ), length_of_value( fs->value ),
              stream, file_name );
  }
  write_fs_table( (lex_node_t *) tree->node.right, stream, file_name );
}

/*---------------------------------------------------------------------------*/

static void 
write_surfaces( lex_node_t *tree, FILE *stream, string_t file_name )
{ 
  if (tree == NULL) 
    return;
  write_surfaces( (lex_node_t *) tree->node.left, stream, file_name );
  write_vector( tree->node.name, sizeof( char_t ), 
            strlen( tree->node.name ) + 1, stream, file_name );
  write_surfaces( (lex_node_t *) tree->node.right, stream, file_name );
}

/*---------------------------------------------------------------------------*/

void 
write_lex_tree( string_t file_name )
/* Write lexicon tree to file FILE_NAME. */
{ 
  lexicon_header_t header;
  FILE *stream;
  pool_t trie_pool;
  int_t trie_root;
  trie_entry_t *trie_entries; /* Trie entries used to build the trie. */
  int_t *fs_lists;
  int_t surf_count, fs_count, cell_count;

  /* Count the number of surfaces and feature structures in the lexicon 
   * and get vectors that will get these values. */
  surf_count = fs_count = 0;
  count_trie_entries( lex_tree, &surf_count, &fs_count );
  fs_lists = new_vector( sizeof( int_t ), fs_count );

  /* Create the lexicon trie. */
  trie_entries = new_vector( sizeof( trie_entry_t ), surf_count );
  surf_count = fs_count = cell_count = 0;
  fill_trie_entries( lex_tree, trie_entries, fs_lists, 
                     &surf_count, &fs_count, &cell_count );
  new_trie( surf_count, trie_entries, &trie_pool, &trie_root );
  free_mem( &trie_entries );

  /* Create the binary file. */
  stream = open_stream( file_name, "wb" );

  /* Initialise the header. */
  set_header( &header.common_header, LEXICON_FILE, LEXICON_CODE_VERSION );
  header.trie_size = pool_item_count( trie_pool );
  header.trie_root = trie_root;
  header.fs_lists_size = fs_count;
  header.values_size = cell_count;

  /* Write everything to the file. */
  write_vector( &header, sizeof( lexicon_header_t ), 1, stream, file_name );
  write_pool( trie_pool, stream, file_name );
  write_vector( fs_lists, sizeof( int_t ), fs_count, stream, file_name );
  write_fs_table( lex_tree, stream, file_name );
  close_stream( &stream, file_name );

  /* Clean up. */
  free_pool( &trie_pool );
  free_mem( &fs_lists );
}

/*---------------------------------------------------------------------------*/

static void 
count_prelex_entries( lex_node_t *tree, int_t *entry_count_p, 
                  int_t *values_size_p, int_t *strings_size_p )
/* Add to *ENTRY_COUNT_P the number of entries in subtree TREE.
 * Add to *VALUES_SIZE_P the number of cells for feature structures in subtree
 * TREE.
 * Add to *STRINGS_SIZE_P the number of chars for surfaces in subtree TREE. */
{ 
  fs_node_t *fs;
  
  if (tree == NULL) 
    return;
  count_prelex_entries( (lex_node_t *) tree->node.left, 
                  entry_count_p, values_size_p, strings_size_p );
  if (tree->fs_list.first != NULL) 
    (*strings_size_p) += strlen( tree->node.name ) + 1;
  FOREACH( fs, tree->fs_list )
  {
    (*entry_count_p)++;
    (*values_size_p) += length_of_value( fs->value );
  }
  count_prelex_entries( (lex_node_t *) tree->node.right,
                  entry_count_p, values_size_p, strings_size_p );
}

/*---------------------------------------------------------------------------*/

static void 
write_prelex_entries( lex_node_t *tree, int_t *values_size_p, 
                  int_t *strings_size_p, FILE *stream, string_t file_name )
/* Write prelex entries in subtree TREE to STREAM.
 * Update *VALUES_SIZE_P and *STRINGS_SIZE_P for indexing.
 * Use FILE_NAME for error messages. */
{ 
  fs_node_t *fs;
  prelex_entry_t entry;

  if (tree == NULL) 
    return;
  write_prelex_entries( (lex_node_t *) tree->node.left, values_size_p, 
                  strings_size_p, stream, file_name );
  FOREACH( fs, tree->fs_list )
  {
    entry.surface = (*strings_size_p);
    entry.fs = (*values_size_p);
    write_vector( &entry, sizeof( prelex_entry_t ), 1, stream, file_name);
    (*values_size_p) += length_of_value( fs->value );
  }
  if (tree->fs_list.first != NULL) 
    (*strings_size_p) += strlen( tree->node.name ) + 1;
  write_prelex_entries( (lex_node_t *) tree->node.right, values_size_p, 
                  strings_size_p, stream, file_name );
}

/*---------------------------------------------------------------------------*/

void 
write_prelex_file( string_t file_name )
/* Write lexicon tree to prelex file FILE_NAME. */
{ 
  prelex_header_t header;
  FILE *stream;
  int_t entry_count, values_size, strings_size;

  /* Count the number of surfaces and feature structures in the lexicon 
   * and get vectors that will contain these values. */
  entry_count = values_size = strings_size = 0;
  count_prelex_entries( lex_tree, &entry_count, &values_size, &strings_size );

  /* Create the binary file. */
  stream = open_stream( file_name, "wb" );

  /* Initialise the header. */
  set_header( &header.common_header, PRELEX_FILE, PRELEX_CODE_VERSION );
  header.entry_count = entry_count;
  header.values_size = values_size;
  header.strings_size = strings_size;

  /* Write everything to the file. */
  write_vector( &header, sizeof( prelex_header_t ), 1, stream, file_name );
  values_size = strings_size = 0;
  write_prelex_entries( lex_tree, &values_size, &strings_size, 
                  stream, file_name );
  write_fs_table( lex_tree, stream, file_name );
  write_surfaces( lex_tree, stream, file_name );
  close_stream( &stream, file_name );
}

/*---------------------------------------------------------------------------*/

static void 
read_prelex_file( string_t file_name )
/* Read a prelex from FILE_NAME. */
{
  void *prelex_data;
  int_t prelex_length;
  prelex_header_t *header;
  prelex_entry_t *entries;
  cell_t *values;
  char_t *strings;
  int_t i;

  /* Map stream into main memory. */
  map_file( file_name, &prelex_data, &prelex_length );
  header = (prelex_header_t *) prelex_data;
  check_header( &header->common_header, file_name,
            PRELEX_FILE, MIN_PRELEX_CODE_VERSION, PRELEX_CODE_VERSION );
  entries = (prelex_entry_t *) (header + 1);
  values = (cell_t *) (entries + header->entry_count);
  strings = (char_t *) (values + header->values_size);

  /* Enter entries into tree. */
  for (i = 0; i < header->entry_count; i++)
    lex_add_allo( strings + entries[i].surface, values + entries[i].fs );

  prelex_count = header->entry_count;
  unmap_file( &prelex_data, prelex_length );
  allomorph_count = 0;
}

/* Interface functions for the lexicon compiler. ============================*/

void 
print_lex_statistics( FILE *stream )
/* Print statistics about lexicon buffer into STREAM. */
{ 
  if (prelex_count != -1)
    fprintf( stream, "Prelex entries read:     %d\n", prelex_count );
  fprintf( stream, "Source entries read:     %d\n", lex_entry_count );
  if (intermediate_count != -1) 
    fprintf( stream, "Intermediates generated: %d\n", intermediate_count );
  fprintf( stream, "Allomorphs generated:    %d\n", allomorph_count );
  if (prelex_count == -1 && lex_entry_count > 0)
  {
    fprintf( stream, "Allomorphs per entry:    %.4G\n",
           ((double) allomorph_count / (double) lex_entry_count) );
  }
}

/*---------------------------------------------------------------------------*/

void 
generate_allos_for_file( string_t source_name, string_t prelex_name,
                   bool_t use_filter )
/* Parse a lexicon file SOURCE_NAME and a precompiled lexicon file PRELEX_NAME.
 * Generate allomorphs. Write allomorphs into lexicon buffer.
 * Don't use the output filter if USE_FILTER == FALSE. */
{ 
  free_lex_tree();
  free_const_node( &const_tree );
  if (prelex_name != NULL) 
    read_prelex_file( prelex_name );
  lex_entry_file_name = NULL;
  begin_include( source_name );
  TRY 
    parse_lex_values();
  IF_ERROR 
  { 
    if (lex_entry_file_name == NULL) 
    { 
      print_text( error_text, " (\"%s\", line %d, column %d)",
              name_in_path( current_file_name() ), 
              current_line_number(), current_column() );
      if (in_emacs_malaga_mode) 
      { 
      printf( "SHOW \"%s\":%d:%d\n", current_file_name(), 
            current_line_number(), current_column() );
      }
    }
  } 
  FINALLY 
    end_includes();
  END_TRY;
  if (use_filter) 
    execute_output_filter();
}

/*---------------------------------------------------------------------------*/

void 
generate_allos_for_line( string_t lexicon_name, int_t line )
/* Read line LINE in lexicon file LEXICON_NAME and generate allomorphs.
 * Write allomorphs into lexicon buffer. */
{ 
  free_lex_tree();
  lex_entry_file_name = NULL;
  begin_include( lexicon_name );
  TRY 
  { 
    while (next_token != EOF && current_line_number() < line) 
    { 
      check_user_break();
      if (next_token == TOK_INCLUDE) 
      { 
      read_next_token();
      parse_token( TOK_STRING );
      parse_token( ';' );
      } 
      else if (next_token == TOK_DEFINE || next_token == TOK_DEFAULT) 
      { 
      read_next_token();
      parse_token( TOK_CONSTANT );
      parse_token( TOK_ASSIGN );
      parse_value();
      parse_token( ';' );
      } 
      else 
      { 
      parse_value();
      parse_token( ';' );
      }
    }
    if (next_token == EOF) 
      complain( "No lexicon entry at or behind line %d.", line );
    parse_lex_value();
  } 
  IF_ERROR 
  { 
    if (lex_entry_file_name == NULL) 
    { 
      print_text( error_text, " (\"%s\", line %d, column %d)",
              name_in_path( current_file_name() ), 
              current_line_number(), current_column() );
      if (in_emacs_malaga_mode) 
      { 
      printf( "SHOW \"%s\":%d:%d\n", current_file_name(), 
            current_line_number(),  current_column() );
      }
    }
  } 
  FINALLY 
    end_includes();
  END_TRY;
  execute_output_filter();
}

/*---------------------------------------------------------------------------*/

static void 
read_lex_constants_local( void )
/* Read all constants in current scanner input. */
{ 
  const_node_t *const_node;
  string_t file_name;

  while (next_token != EOF) 
  { 
    check_user_break(); 
    if (next_token == TOK_INCLUDE) 
    { 
      read_next_token();
      test_token( TOK_STRING );
      file_name = absolute_path( token_string, current_file_name() );
      read_next_token();
      read_lex_constants( file_name );
      parse_token( ';' );
      free_mem( &file_name );
    } 
    else if (next_token == TOK_DEFINE || next_token == TOK_DEFAULT) 
    { 
      read_next_token();
      test_token( TOK_CONSTANT );
      const_node = ((const_node_t *) 
                find_avln_node( token_name, (avln_node_t *) const_tree ));
      if (const_node != NULL) 
      free_mem( &const_node->value );
      else 
      const_node = new_const_node( token_name );
      read_next_token();
      parse_token( TOK_ASSIGN );
      parse_value();
      const_node->value = new_value( value_stack[ --top ] );
      parse_token( ';' );
    } 
    else 
    { 
      parse_value();
      parse_token( ';' );
    }
  }
}

/*---------------------------------------------------------------------------*/

void 
read_lex_constants( string_t lexicon_name )
/* Read all constants in file LEXICON_NAME. */
{ 
  begin_include( lexicon_name );
  TRY 
    read_lex_constants_local();
  IF_ERROR 
  { 
    print_text( error_text, " (\"%s\", line %d, column %d)",
            name_in_path( current_file_name() ), 
            current_line_number(),  current_column() );
    if (in_emacs_malaga_mode) 
    { 
      printf( "SHOW \"%s\":%d:%d\n", current_file_name(), 
            current_line_number(), current_column() );
    }
  } 
  FINALLY 
    end_includes();
  END_TRY;
}

/*---------------------------------------------------------------------------*/

void 
generate_allos_for_string( string_t fs_string )
/* Generate allomorphs from FS_STRING, which should contain a readable feature
 * structure.  Write allomorphs into lexicon buffer. */
{ 
  free_lex_tree();
  set_scanner_input( fs_string );
  TRY 
  { 
    parse_value();
    if (next_token == ';') 
      read_next_token();
    test_token( EOF );
  } 
  FINALLY 
    set_scanner_input( NULL );
  END_TRY;
  execute_allo_rule();
  if (! rule_successful) 
    printf( "Warning: no allomorphs generated.\n" );
  execute_output_filter();
}

/*---------------------------------------------------------------------------*/

void 
init_lex_compiler( string_t allo_rule_file )
/* Initialise the "lex_compiler" module.
 * Use allomorph rules from ALLO_RULE_FILE. */
{ 
  allo_rule_sys = read_rule_sys( allo_rule_file );

  /* Initialise lexicon tree. */
  lex_tree = NULL;
  clear_list( &fs_free_list );
  lex_node_pool = new_pool( sizeof( lex_node_t ) );
  string_pool = new_pool( sizeof( char_t ) );
  fs_node_pool = new_pool( sizeof( fs_node_t ) );
  lex_entry_count = allomorph_count = 0;
  prelex_count = intermediate_count = -1;
}

/*---------------------------------------------------------------------------*/

void 
terminate_lex_compiler( void )
/* Terminate the "lex_compiler" module. */
{ 
  free_rule_sys( &allo_rule_sys );
  free_lex_tree();
  free_pool( &string_pool );
  free_pool( &fs_node_pool );
  free_pool( &lex_node_pool );
  clear_list( &fs_free_list );
  free_const_node( &const_tree );
}

/* End of file. =============================================================*/

Generated by  Doxygen 1.6.0   Back to index