Logo Search packages:      
Sourcecode: malaga version File versions  Download package

analysis.c

/* Copyright (C) 1995 Bjoern Beutel. */

/* Description. =============================================================*/

/* This file contains data structures and functions used for grammatical 
 * analysis. */

/* Includes. ================================================================*/

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <setjmp.h>
#include <string.h>
#include "basic.h"
#include "pools.h"
#include "values.h"
#include "rule_type.h"
#include "rules.h"
#include "lexicon.h"
#include "cache.h"
#include "analysis.h"

/* Types. ===================================================================*/

typedef struct tree_node /* A rule application is stored in "tree_node". */
{ 
  struct tree_node *parent; /* Predecessor of this tree node. */
  struct tree_node *first_child; /* First successor of this tree node. */
  struct tree_node *sibling; /* Alternative tree node. */
  tree_node_type_t type; /* Type of this tree node. */
  int_t rule; /* Number of the executed rule. */
  int_t index; /* Index of this tree node. */
  value_t link_fs; /* Feature structure of the link. */
  value_t result_fs; /* Result feature structure of the resulting state. */
  int_t rule_set; /* Successor rules of resulting state (-1 for end state). */
  string_t input; /* The input that is not yet analysed. */
} tree_node_t;

typedef struct /* A state in morphological or syntactical analysis. */
{ 
  list_node_t *next;
  value_t fs; /* Feature structure of input read in so far. */
  string_t input; /* Pointer to input that is analysed next. */
  int_t rule_set; /* Set of rules to be applied. */
  tree_node_t *tree_node; /* Tree node of rule application that created
                           * this state (NULL if no tree). */
  int_t item_index; /* Number of items read in so far. */
} state_t;

typedef struct /* The structure for morphological and syntactical analysis. */
{ 
  pool_t state_pool; /* All states are saved in STATE_POOL. */
  pool_t value_pool; /* All feature structures are saved in VALUE_POOL. */
  list_t running_states; /* States that need further analysis
                    * (in the order of their INPUT indexes). */
  list_t end_states; /* End states */
  list_t free_states; /* States that can be reused. */
} analysis_t;

/* Global variables. ========================================================*/

rule_sys_t *rule_system[2];
grammar_t top_grammar;
int_t state_count;
int_t current_state;
bool_t recognised_by_combi_rules;
bool_t recognised_by_robust_rule; 
string_t last_analysis_input; 
void (*debug_state)( int_t index, bool_t enter );
string_t (*get_surface)( surface_t surface_type );

/* Variables. ===============================================================*/

/* Structures used for LAG analysis, one for morphology and one for syntax. */
static analysis_t *analyses[2];

/* The data structure used to save the analysis tree. */
static tree_node_t *root_tree_node; /* A pointer to the root tree node. */
static pool_t tree_pool; /* Pool where tree nodes are stored. */
static int_t tree_node_count;

static state_t *next_result_state; /* Needed for "next_analysis_result". */
static tree_node_t *next_tree_node; /* Needed for "get_next_analysis_node". */

static string_t state_surface, link_surface, link_surface_end;
/* Start and end position of surfaces when rule is executed. Read only! */

static struct /* Information needed to generate states and tree nodes. */
{ 
  analysis_t *analysis;
  tree_mode_t tree_mode; /* Are we building an analysis tree? */
  int_t rule; /* Rule just executed. */
  value_t link_fs; /* Link's feature structure. */
  tree_node_t *parent; /* Predecessor tree node. */
  int_t item_index; /* Index of item that is added. */
  string_t input; /* End of analysed input. */
} state_info;

static bool_t options[ANALYSIS_OPTION_COUNT];

/* Functions for analysis options. ==========================================*/

void 
set_analysis_option( analysis_option_t selected, bool_t setting )
/* Set analysis option SELECTED to SETTING. */
{ 
  switch (selected) 
  {
  case ROBUST_RULE_OPTION:
    if (rule_system[ MORPHOLOGY ]->robust_rule == -1) 
      complain( "No morphology \"robust_rule\"." );
    break;
  case PRUNING_RULE_OPTION:
    if (rule_system[ SYNTAX ] == NULL 
      || rule_system[ SYNTAX ]->pruning_rule == -1)
    {
      complain( "No syntax \"pruning_rule\"." );
    }
    break;
  case MOR_OUT_FILTER_OPTION:
    if (rule_system[ MORPHOLOGY ]->output_filter == -1) 
      complain( "No morphology \"output_filter\"." );
    break;
  case SYN_OUT_FILTER_OPTION:
    if (rule_system[ SYNTAX ] == NULL 
      || rule_system[ SYNTAX ]->output_filter == -1)
    {
      complain( "No syntax \"output_filter\"." ); 
    }
    break;
  case SYN_IN_FILTER_OPTION:
    if (rule_system[ SYNTAX ] == NULL 
      || rule_system[ SYNTAX ]->input_filter == -1)
    {
      complain( "No syntax \"input_filter\"." );
    }
    break;
  case SYN_INCOMPLETE_OPTION:
    if (rule_system[ SYNTAX ] == NULL)
      complain ( "No syntax rules loaded." );
    break;
  case CACHE_OPTION:
  case MOR_INCOMPLETE_OPTION:
    break;
  default:
    complain( "Internal error." );
  }
  options[ selected ] = setting;
}

/*---------------------------------------------------------------------------*/

bool_t 
get_analysis_option( analysis_option_t selected )
/* Return the current setting of analysis option SELECTED. */
{ 
  return options[ selected ]; 
}

/* Functions for segmentation and preprocessing. ============================*/

void 
preprocess_input( string_t input, bool_t expect_quotes )
/* Delete heading and trailing spaces in INPUT
 * and compress all whitespace sequences to a single space.
 * If EXPECT_QUOTES == TRUE, expect quoted input and remove the quotes. */
{ 
  string_t input_p, output_p;
  int_t code, i;

  output_p = input;

  /* Cut heading spaces. */
  input_p = next_non_space( input );

  if (expect_quotes)
  {
    if (*input_p != '"') 
      complain( "Opening '\"' in input is missing." );
    input_p++;
    while (*input_p != '"')
    {
      if (*input_p == EOS)
      complain( "Closing '\"' in input is missing." );
      else if (*input_p == '\\')
      {
      input_p++;
      if (*input_p == '\\' || *input_p == '\"')
        *output_p++ = *input_p++;
      else if (TO_LOWER( *input_p ) == 'x') 
      { 
        code = 0;
        input_p++;
        for (i = 0; i < 2 * (int_t) sizeof( char_t ); i++) 
        { 
          if (IS_DIGIT( *input_p ))
            code = 16 * code + ORD( *input_p ) - '0';
          else if (TO_LOWER( *input_p ) >= 'a' 
                 && TO_LOWER( *input_p ) <= 'f')
          {
            code = 16 * code + 10 + TO_LOWER( *input_p ) - 'a';
          }
          else 
            complain( "Control char sequence must have 2 digits." );
          input_p++;
        }
        *output_p++ = code;
      } 
      else 
        complain( "Illegal escape sequence." );
      }
      else /* No escape char. */
      *output_p++ = *input_p++;
    }
    input_p++;
    input_p = next_non_space( input_p );
    if (*input_p != EOS)
      complain( "Junk after quoted string in input." );
  }
  else
  {
    while (*input_p != EOS) 
    { 
      if (IS_SPACE( *input_p )) 
      { 
      /* Overread all whitespace and write a single space. */
      input_p = next_non_space( input_p );
      *output_p++ = ' ';
      } 
      else 
      *output_p++ = *input_p++;
    }
  }

  /* Last space may be superfluous. */
  if (output_p > input && IS_SPACE( output_p[-1] )) 
    output_p--;
  *output_p = EOS;
}

/*---------------------------------------------------------------------------*/

static bool_t 
word_may_end_here( string_t string, rule_t *rule )
/* Return whether there may be a word boundary between STRING-1 and STRING. */
{ 
  if (options[ MOR_INCOMPLETE_OPTION ] && top_grammar == MORPHOLOGY)
    return TRUE;
  if (rule->type == END_RULE && rule->param_count == 2) 
    return TRUE;
  return (*string == EOS || *string == ' ');
}

/* Functions for state list processing. =====================================*/

static state_t *
insert_state( analysis_t *analysis,
            list_t *state_list,
            value_t fs,
            string_t input,
            int_t rule_set,
            int_t item_index )
/* Insert a state, composed of FS, INPUT, RULE_SET, and ITEM_INDEX
 * in the list STATE_LIST, in front of all states with a
 * higher INPUT index. Return this state. */
{ 
  state_t *state, *prev_state, *next_state;

  state = remove_first_node( &analysis->free_states );
  if (state == NULL) 
    state = (state_t *) get_pool_space( analysis->state_pool, 1, NULL );

  /* Set values. */
  state->fs = fs;
  state->input = input;
  state->rule_set = rule_set;
  state->item_index = item_index;
  state->tree_node = NULL;

  /* Find position where to insert. */
  FOREACH( prev_state, *state_list ) 
  { 
    next_state = (state_t *) prev_state->next;
    if (next_state == NULL || next_state->input > input) 
      break;
  }

  insert_node( state_list, (list_node_t *) state, (list_node_t *) prev_state );
  return state;
}

/*---------------------------------------------------------------------------*/

static tree_node_t *
add_tree_node( value_t result_fs, 
             string_t input, 
             int_t rule_set,
             tree_node_type_t type )
/* Add a tree node for a rule that created a state (RESULT_FS and RULE_SET),
 * where INPUT is yet to be analysed. */
{ 
  tree_node_t **tree_node_p;
  tree_node_t *tree_node;
  
  /* Get a new tree node. */
  tree_node = (tree_node_t *) get_pool_space( tree_pool, 1, NULL ); 
  tree_node->parent = state_info.parent;
  tree_node->first_child = NULL;
  tree_node->sibling = NULL;
  tree_node->type = type;
  tree_node->rule = state_info.rule;
  tree_node->index = tree_node_count;
  tree_node->link_fs = state_info.link_fs;
  tree_node->result_fs = result_fs;
  tree_node->rule_set = rule_set;
  tree_node->input = input;

  /* Link the tree node into the tree structure. */
  tree_node_p = &state_info.parent->first_child;
  while (*tree_node_p != NULL) 
    tree_node_p = &(*tree_node_p)->sibling;
  *tree_node_p = tree_node;

  /* Increment the number of tree nodes. */
  tree_node_count++;

  return tree_node;
}

/*---------------------------------------------------------------------------*/

static void 
add_state( list_t *list, string_t input, value_t fs, int_t rule_set, 
         tree_node_type_t type )
/* Add state, consisting of INPUT, FS and RULE_SET, to LIST.
 * When STATE_INFO.TREE_MODE == BUILD_TREE, also generate a tree node. */
{ 
  value_t new_fs;
  state_t *state;
  
  /* Preserve the feature structure. */
  new_fs = copy_value_to_pool( state_info.analysis->value_pool, fs, NULL );

  /* Create a new state. */
  state = insert_state( state_info.analysis, list, new_fs, input,
                        rule_set, state_info.item_index );
  if (state_info.tree_mode == BUILD_TREE) 
    state->tree_node = add_tree_node( new_fs, input, rule_set, type );
}

/* Callback functions needed by "rules.c" ===================================*/

static void 
add_allo_local( string_t surface, value_t fs )
/* Add a state, consisting of SURFACE and FS, as an end state. */
{ 
  int_t length;

  length = strlen( surface );
  if (length == 0) 
    complain( "Surface is empty." );
  if (strncmp_no_case( state_surface, surface, length ) != 0) 
    complain( "Surface does not match input." );
  add_state( &state_info.analysis->end_states, 
           state_surface + length, fs, -1, FINAL_NODE );
}

/*---------------------------------------------------------------------------*/

static void 
add_end_state_local( value_t fs )
/* Add a state, consisting of FS, as an end state. */
{ 
  value_t value;
  rule_t *rule;

  rule = executed_rule_sys->rules + executed_rule_number;

  /* Combi-rules and end-rules must check for word boundary. */
  if ((rule->type != COMBI_RULE && rule->type != END_RULE)
      || word_may_end_here( state_info.input, rule ))
  { 
    add_state( &state_info.analysis->end_states,
             state_info.input, fs, -1, FINAL_NODE );
  } 
  else if (state_info.tree_mode == BUILD_TREE) 
  { 
    /* Preserve the feature structure. */
    value = copy_value_to_pool( state_info.analysis->value_pool, fs, NULL );
    add_tree_node( value, state_info.input, -1, UNFINAL_NODE );
  }
}

/*---------------------------------------------------------------------------*/

static void 
add_running_state_local( value_t fs, int_t rule_set )
/* Add a running state, consisting of FS and RULE_SET. */
{ 
  add_state( &state_info.analysis->running_states, 
           state_info.input, fs, rule_set, INTER_NODE );
}

/*---------------------------------------------------------------------------*/

static string_t 
get_surface_local( surface_t surface_type )
/* Return surface SURFACE_TYPE for currently executed rule.
 * The result must be freed after use. */
{ 
  string_t state_surf_end;

  switch (surface_type) 
  {
  case STATE_SURFACE:  
    if (link_surface > state_surface && link_surface[-1] == ' ') 
      state_surf_end = link_surface - 1;
    else 
      state_surf_end = link_surface;
    return new_string_readable( state_surface, state_surf_end );
  case LINK_SURFACE:
    return new_string_readable( link_surface, link_surface_end );
  case RESULT_SURFACE:
    return new_string_readable( state_surface, link_surface_end );
  default: 
    return NULL;
  }
}

/* Analysis functions. ======================================================*/

static analysis_t *
new_analysis( void )
/* Create a new analysis structure. */
{ 
  analysis_t *analysis;

  analysis = new_mem( sizeof( analysis_t ) );
  analysis->state_pool = new_pool( sizeof( state_t ) );
  analysis->value_pool = new_pool( sizeof( cell_t ) );
  clear_list( &analysis->running_states );
  clear_list( &analysis->end_states );
  clear_list( &analysis->free_states );
  return analysis;
}

/*---------------------------------------------------------------------------*/

static void 
free_analysis( analysis_t **analysis )
/* Destroy an analysis structure. */
{ 
  if (*analysis != NULL) 
  { 
    free_pool( &(*analysis)->state_pool );
    free_pool( &(*analysis)->value_pool );
    free_mem( analysis );
  }
}

/*---------------------------------------------------------------------------*/

void 
init_analysis( string_t morphology_file, string_t syntax_file )
/* Initialise the analysis module.
 * MORPHOLOGY_FILE and SYNTAX_FILE are the rule files to load. 
 * SYNTAX_FILE may be NULL. */
{ 
  int_t i;

  /* Read rule files. */
  rule_system[ MORPHOLOGY ] = read_rule_sys( morphology_file );
  if (syntax_file != NULL) 
    rule_system[ SYNTAX ] = read_rule_sys( syntax_file );

  /* Init analysis structure. */
  analyses[ MORPHOLOGY ] = new_analysis();
  analyses[ SYNTAX ] = new_analysis();
  tree_pool = new_pool( sizeof( tree_node_t ) );

  /* Set analysis options to start values. */
  for (i = 0; i < ANALYSIS_OPTION_COUNT; i++) 
    options[i] = FALSE;
  options[ MOR_OUT_FILTER_OPTION ] = 
    (rule_system[ MORPHOLOGY ]->output_filter != -1);
  options[ SYN_IN_FILTER_OPTION ] = 
    (rule_system[ SYNTAX ] != NULL 
     && rule_system[ SYNTAX ]->input_filter != -1);
  options[ SYN_OUT_FILTER_OPTION ] = 
    (rule_system[ SYNTAX ] != NULL 
     && rule_system[ SYNTAX ]->output_filter != -1);
}

/*---------------------------------------------------------------------------*/

void 
terminate_analysis( void )
/* Terminate the analysis module. */
{ 
  free_rule_sys( &rule_system[ MORPHOLOGY ] );
  free_rule_sys( &rule_system[ SYNTAX ] );
  free_analysis( &analyses[ MORPHOLOGY ] );
  free_analysis( &analyses[ SYNTAX ] );
  free_pool( &tree_pool );
  clear_cache();
  free_switches();
}

/*---------------------------------------------------------------------------*/

bool_t
analysis_has_results( void )
/* Return TRUE iff the last analysis has created results. */
{ 
  return (analyses[ top_grammar ]->end_states.first != NULL); 
}

/*---------------------------------------------------------------------------*/

value_t
first_analysis_result( void )
/* Return the feature structure of the first analysis result.
 * Return NULL if there are no results. */
{ 
  next_result_state = (state_t *) analyses[ top_grammar ]->end_states.first;
  return next_analysis_result();
}

/*---------------------------------------------------------------------------*/

value_t 
next_analysis_result( void )
/* Return the feature structure of the next analysis result.
 * Return NULL if there are no more results. */
{ 
  value_t result;

  if (next_result_state == NULL) 
    return NULL;
  result = next_result_state->fs;
  next_result_state = (state_t *) next_result_state->next;
  return result;
}

/*---------------------------------------------------------------------------*/

bool_t
analysis_has_nodes( void )
/* Return TRUE iff the last analysis has created tree nodes. */
{ 
  return (root_tree_node != NULL); 
}

/*---------------------------------------------------------------------------*/

analysis_node_t *
get_first_analysis_node( void )
/* Return the first analysis tree node of the last analysis.
 * Return NULL if there is no node. 
 * The node must be freed with "free_analysis_node" after use. */
{
  next_tree_node = root_tree_node;
  return get_next_analysis_node();
}

/*---------------------------------------------------------------------------*/

analysis_node_t *
get_next_analysis_node( void )
/* Return the next analysis tree node of the last analysis.
 * Return NULL if there is no more node. 
 * The node must be freed with "free_analysis_node" after use. */
{ 
  analysis_node_t *node;
  string_t link_surf;
  rule_sys_t *rule_sys;

  rule_sys = rule_system[ top_grammar ];
  if (next_tree_node == NULL) 
    return NULL;
  node = new_mem( sizeof( analysis_node_t ) );
  node->index = next_tree_node->index;
  node->type = next_tree_node->type;

  /* Set parent index. */
  if (next_tree_node->parent == NULL) 
    node->parent_index = -1;
  else 
    node->parent_index = next_tree_node->parent->index;

  /* Set rule name. */
  if (next_tree_node->rule != -1) 
  { 
    node->rule_name 
      = rule_sys->strings + rule_sys->rules[ next_tree_node->rule ].name;
  } 
  else if (next_tree_node->parent == NULL) 
    node->rule_name = "(initial)";
  else 
    node->rule_name = NULL;

  /* Set link surface and feature structure. */
  if (next_tree_node->parent == NULL) 
    link_surf = last_analysis_input; 
  else 
    link_surf = next_non_space( next_tree_node->parent->input );
  if (link_surf < next_tree_node->input) 
    node->link_surf = new_string( link_surf, next_tree_node->input );
  node->link_fs = next_tree_node->link_fs;

  /* Set result surface and feature structure. */
  node->result_surf = new_string( last_analysis_input, next_tree_node->input );
  node->result_fs = next_tree_node->result_fs;

  /* Set rule set. */
  if (next_tree_node->result_fs != NULL) 
    node->rule_set = rule_set_readable( rule_sys, next_tree_node->rule_set );

  /* Update NEXT_TREE_NODE. */
  if (next_tree_node->first_child != NULL) 
    next_tree_node = next_tree_node->first_child;
  else
  {
    while (next_tree_node != NULL && next_tree_node->sibling == NULL) 
      next_tree_node = next_tree_node->parent;
    if (next_tree_node != NULL) 
      next_tree_node = next_tree_node->sibling;
  }
  return node;
}

/*---------------------------------------------------------------------------*/

void 
free_analysis_node( analysis_node_t **node )
/* Free the memory occupied by NODE. */
{ 
  if (*node != NULL) 
  { 
    free_mem( &(*node)->link_surf );
    free_mem( &(*node)->result_surf );
    free_mem( &(*node)->rule_set );
    free_mem( node );
  }
}

/*---------------------------------------------------------------------------*/

static string_t 
get_word_end( string_t input )
/* Return the end of the word that starts at INPUT. */
{
  string_t input_end;

  input_end = input;
  while (*input_end != EOS && *input_end != ' ') 
    input_end++;
  return input_end;
}

/*---------------------------------------------------------------------------*/

static bool_t 
get_from_cache( analysis_t *analysis, string_t input )
/* If first word at INPUT is in analysis cache, 
 * enter its results in ANALYSIS, and return TRUE. Otherwise return FALSE. */
{ 
  string_t input_end;
  value_t result;

  input_end = get_word_end( input );
  if (word_in_cache( input, input_end )) 
  { 
    while (TRUE) 
    { 
      result = next_result_in_cache();
      if (result == NULL) 
      break;
      insert_state( analysis, &analysis->end_states, result, 
                input_end, -1, 0 );
    }
    return TRUE;
  } 
  else 
    return FALSE;
}

/*---------------------------------------------------------------------------*/

static void 
put_into_cache( analysis_t *analysis, string_t input )
/* Store the results in ANALYSIS for the word form that starts at INPUT
 * in the cache. */
{
  value_t *fs_vector;
  int_t i;
  state_t *state;
  string_t input_end;

  input_end = get_word_end( input );

  /* Count feature structures. */
  i = 0;
  FOREACH( state, analysis->end_states ) 
  { 
    /* Only put into cache if all entries will be found in cache. */
    if (state->input != input_end) 
      return;

    i++;
  }

  /* Allocate a new vector which takes the feature structures. */
  fs_vector = new_vector( sizeof( value_t ), i );
  i = 0;
  FOREACH( state, analysis->end_states ) 
  { 
    fs_vector[i] = new_value( state->fs );
    i++;
  }

  enter_in_cache( input, input_end, i, fs_vector );
}

/*---------------------------------------------------------------------------*/

static void 
execute_robust_rule( analysis_t *analysis,
                     rule_sys_t *rule_sys,
                     string_t input )
/* Execute robust_rule in RULE_SYS for the first word in INPUT and enter
 * results into ANALYSIS. */
{
  string_t input_end;
  rule_t *rule;

  input_end = get_word_end( input );

  /* Set debugging information. */
  state_surface = input;
  link_surface = input;
  link_surface_end = input_end;

  /* Setup STATE_INFO. */
  state_info.analysis = analysis;
  state_info.tree_mode = NO_TREE;
  state_info.item_index = 1;
  state_info.input = input_end;

  /* Execute rule. */
  rule = rule_sys->rules + rule_sys->robust_rule;
  top = 0;
  push_string_value( input, input_end );
  if (rule->param_count >= 2) 
    push_string_value( input, NULL );
  execute_rule( rule_sys, rule_sys->robust_rule );
}

/*---------------------------------------------------------------------------*/

static void 
execute_filter_rule( analysis_t *analysis, 
                     rule_sys_t *rule_sys,
                     int_t filter_rule )
/* Execute FILTER_RULE in RULE_SYS for ANALYSIS. */
{
  list_t old_end_states;
  state_t *state;
  string_t input;

  /* Go through all results with the same length. */
  old_end_states = analysis->end_states;
  clear_list( &analysis->end_states );
  while (old_end_states.first != NULL) 
  { 
    state = (state_t *) old_end_states.first;
    input = state->input;

    /* Create a list with the results of all states and remove states. */
    top = 0;
    while (old_end_states.first != NULL 
         && ((state_t *) old_end_states.first)->input == input) 
    { 
      state = remove_first_node( &old_end_states );
      add_node( &analysis->free_states, (list_node_t *) state, LIST_END );
      push_value( state->fs );
    }
    build_list( top );

    link_surface = link_surface_end = input; /* Set debugging information. */

    /* Execute filter rule. */
    state_info.analysis = analysis;
    state_info.tree_mode = NO_TREE;
    state_info.item_index = 0;
    state_info.input = input;
    execute_rule( rule_sys, filter_rule );
  }
}

/*---------------------------------------------------------------------------*/

static void 
execute_pruning_rule( analysis_t *analysis, grammar_t grammar )
/* Execute pruning_rule in GRAMMAR for the running states in ANALYSIS. */
{ 
  int_t result_count, i;
  state_t *state, *next_state;
  string_t input;
  rule_sys_t *rule_sys;
  value_t list;
  symbol_t symbol;

  state = (state_t *) analysis->running_states.first;
  input = state->input;
  rule_sys = rule_system[ grammar ];

  /* Create a list that contains the results. */
  top = 0;
  result_count = 0;
  FOREACH( state, analysis->running_states ) 
  { 
    if (state->input != input) 
      break;
    result_count++;
    push_value( state->fs );
  }
  build_list( result_count );

  link_surface = link_surface_end = input; /* Set debugging information. */
  execute_rule( rule_sys, rule_sys->pruning_rule ); /* Execute pruning rule. */

  /* Interprete the result. */
  list = value_stack[ top - 1 ];
  if (get_value_type( list ) != LIST_SYMBOL) 
    complain( "Pruning rule result must be a list." );
  if (get_list_length( list ) != result_count) 
    complain( "Pruning rule result must have as many elements as argument." );
  state = (state_t *) analysis->running_states.first;
  for (i = 0; i < result_count; i++) 
  { 
    next_state = (state_t *) state->next;
    symbol = value_to_symbol( get_element( list, i + 1 ) );
    if (symbol == NO_SYMBOL) 
    { 
      if (state->tree_node != NULL) 
      state->tree_node->type = PRUNED_NODE;
      remove_node( &analysis->running_states, (list_node_t *) state );
      add_node( &analysis->free_states, (list_node_t *) state, LIST_END );
    } 
    else if (symbol != YES_SYMBOL) 
      complain( "Pruning rule result list may only contain yes/no symbols." );
    state = next_state;
  }
}

/*---------------------------------------------------------------------------*/

static void 
execute_rules( analysis_t *analysis, 
               rule_sys_t *rule_sys, 
               state_t *state, 
               value_t link_fs,
               string_t link_surf,
               string_t link_surf_end, 
               tree_mode_t tree_mode,
               rule_type_t rule_type )
/* Execute the successor rules in RULE_SYS for STATE in ANALYSIS.
 * Consume the segment from LINK_SURF to LINK_SURF_END with feature structure
 * LINK_FS. 
 * Enter a tree node if TREE_MODE == BUILD_TREE. */
{ 
  int_t *rule_p;
  bool_t rules_successful;
  rule_t *rule;

  /* Setup STATE_INFO. */
  state_info.analysis = analysis;
  state_info.tree_mode = tree_mode;
  state_info.link_fs = link_fs;
  state_info.parent = state->tree_node;
  state_info.item_index = state->item_index + 1;
  state_info.input = link_surf_end;

  /* Set debugging information. */
  link_surface = link_surf;
  link_surface_end = link_surf_end;
  if (state->tree_node != NULL) 
    current_state = state->tree_node->index;

  /* Check if we are now executing the rules for a state to be debugged */
  if (debug_state != NULL && state->tree_node != NULL) 
    debug_state( state->tree_node->index, TRUE );

  /* Execute rules in rule set. */
  rules_successful = FALSE;
  for (rule_p = rule_sys->rule_sets + state->rule_set; *rule_p != -1; rule_p++)
  { 
    if (*rule_p == -2) 
    { 
      if (rule_type == END_RULE || rules_successful) 
      break;
    } 
    else 
    { 
      rule = rule_sys->rules + *rule_p;
      if (rule->type == rule_type
        && (rule->type == COMBI_RULE 
            || word_may_end_here( link_surf, rule )))
      { 
      state_info.rule = *rule_p;
      top = 0;
      push_value( state->fs );
      if (rule->type == COMBI_RULE) 
      { 
        push_value( link_fs );
        if (rule->param_count >= 3) 
          push_string_value( link_surf, link_surf_end );
        if (rule->param_count >= 4) 
          push_number_value( state_info.item_index );
      } 
      else /* rule->type == END_RULE */
      { 
        if (rule->param_count >= 2) 
          push_string_value( link_surf, NULL );
      }
      execute_rule( rule_sys, *rule_p );
      rules_successful |= rule_successful;
      }
    }
  }
  current_state = -1;
  
  /* Save the current debug mode if we are leaving a debug state. */
  if (debug_state != NULL && state->tree_node != NULL) 
    debug_state( state->tree_node->index, FALSE );

  /* Enter a tree node for a rule set that did not fire. */
  if (! rules_successful && tree_mode == BUILD_TREE && rule_type != END_RULE) 
  { 
    state_info.rule = -1;
    add_tree_node( NULL, link_surf_end, -1, BREAK_NODE );
  }
}

/*---------------------------------------------------------------------------*/

static void
check_end_states( analysis_t *analysis, 
              grammar_t grammar, 
              analysis_mode_t mode )
/* If MODE == ANALYSE_ALL, 
 * delete all states in ANALYSIS that didn't consume all the input. */
{ 
  state_t *state;

  if (mode != ANALYSE_ALL
      || (grammar == MORPHOLOGY && options[ MOR_INCOMPLETE_OPTION ])
      || (grammar == SYNTAX && options[ SYN_INCOMPLETE_OPTION ]))
  { 
    return;
  }
  while (TRUE) 
  { 
    state = (state_t *) analysis->end_states.first;
    if (state == NULL || *state->input == EOS) 
      break;
    if (state->tree_node != NULL) 
      state->tree_node->type = UNFINAL_NODE;
    remove_first_node( &analysis->end_states );
    add_node( &analysis->free_states, (list_node_t *) state, LIST_END );
  }
}

/*---------------------------------------------------------------------------*/

void 
analyse( grammar_t grammar, 
         string_t input, 
         tree_mode_t tree_mode,
         analysis_mode_t mode )
/* Perform a LAG analysis of INPUT using GRAMMAR (MORPHOLOGY or SYNTAX).
 * An analysis tree will be built if TREE_MODE == BUILD_TREE.
 * The whole input will be analysed if MODE == ANALYSE_ALL. */
{ 
  rule_sys_t *rule_sys;
  state_t *initial_state;
  analysis_t *analysis;
  state_t *state;
  string_t current_input;
  value_t link_fs;
  string_t link_surf_end; /* End of the link's surface. */
  string_t input_behind_space;
  state_t *mor_state; /* Morphology end state. */
  bool_t use_cache;

  if (mode == ANALYSE_ALL) 
  { 
    top_grammar = grammar;
    root_tree_node = NULL;
    state_count = 0;
    last_analysis_input = input;
    recognised_by_robust_rule = recognised_by_combi_rules = FALSE;
  }
  analysis = analyses[ grammar ];
  rule_sys = rule_system[ grammar ];
  if (rule_sys == NULL) 
    complain( "Missing rule system." );

  /* Set callback functions for "execute_rules". */
  add_running_state = add_running_state_local;
  add_end_state = add_end_state_local;
  add_allo = add_allo_local;

  /* Reset the analysis data structures */
  clear_list( &analysis->running_states );
  clear_list( &analysis->end_states );
  clear_list( &analysis->free_states );
  clear_pool( analysis->state_pool );
  clear_pool( analysis->value_pool );

  /* Set debug information. */
  get_surface = get_surface_local;
  state_surface = input;
  current_state = -1;

  /* Try to get analysis result from cache. */
  use_cache = (grammar == MORPHOLOGY 
             && options[ CACHE_OPTION ] 
             && tree_mode == NO_TREE 
             && (mode == ANALYSE_WORD || *get_word_end( input ) == EOS));
  if (use_cache && get_from_cache( analysis, input )) 
    return;

  /* Enter the initial state. */
  initial_state = insert_state( analysis, &analysis->running_states,
                                rule_sys->values + rule_sys->initial_fs,
                                input, rule_sys->initial_rule_set, 0 );
  if (tree_mode == BUILD_TREE) 
  { 
    /* Clear all tree nodes and setup ROOT_TREE_NODE. */
    clear_pool( tree_pool );
    root_tree_node = (tree_node_t *) get_pool_space( tree_pool, 1, NULL );
    root_tree_node->parent = NULL;
    root_tree_node->first_child = NULL;
    root_tree_node->sibling = NULL;
    root_tree_node->type = INTER_NODE;
    root_tree_node->rule = -1;
    root_tree_node->index = 0;
    root_tree_node->link_fs = NULL;
    root_tree_node->result_fs = rule_sys->values + rule_sys->initial_fs;
    root_tree_node->rule_set = rule_sys->initial_rule_set;
    root_tree_node->input = input;
    initial_state->tree_node = root_tree_node;
    tree_node_count = 1;
  }

  /* Analyse while there are running states. */
  while (analysis->running_states.first != NULL) 
  { 
    state = (state_t *) analysis->running_states.first;
    current_input = state->input;
    if (options[ PRUNING_RULE_OPTION ] && current_input > input 
        && rule_sys->pruning_rule != -1) 
    { 
      execute_pruning_rule( analysis, grammar ); 
    }
    if (current_input > input) /* Apply end_rules if any input was parsed. */
    { 
      /* Apply all end_rules to states at CURRENT_INPUT. */
      FOREACH( state, analysis->running_states ) 
      { 
      if (state->input != current_input) 
        break;
        execute_rules( analysis, rule_sys, state, NULL, current_input, 
                       current_input, tree_mode, END_RULE );
      }
    }
    if (*current_input == EOS) 
      break; /* If analysis ate all input, leave. */
    if (grammar == MORPHOLOGY) 
    { 
      /* Look for prefixes of increasing length
       * that match the string at CURRENT_INPUT. */
      search_for_prefix( current_input );
      while (get_next_prefix( &link_surf_end, &link_fs )) 
      { 
      /* Combine that link with all morphological states. */
        FOREACH( state, analysis->running_states ) 
        { 
        if (state->input != current_input) 
          break;
          execute_rules( analysis, rule_sys, state, link_fs, current_input,
                         link_surf_end, tree_mode, COMBI_RULE );
        }
      }
    } 
    else /* GRAMMAR == SYNTAX */ 
    { 
      input_behind_space = next_non_space( current_input );

      /* Call morphological analysis to get feature structures for the link. */
      analyse( MORPHOLOGY, input_behind_space, NO_TREE, ANALYSE_WORD );

      /* Execution of morphology rules has changed STATE_SURFACE. */
      state_surface = input;

      /* Step through all morphological end states. */
      FOREACH( mor_state, analyses[ MORPHOLOGY ]->end_states ) 
      { 
      /* The morphology will be cleared by next morphological analysis,
         * so copy STATE->FS to the syntax pool. */ 
        link_fs = copy_value_to_pool( analysis->value_pool, mor_state->fs, 
                              NULL );

        /* Combine the link with all syntactic states. */
        FOREACH( state, analysis->running_states ) 
      { 
        if (state->input != current_input) 
          break;
        execute_rules( analysis, rule_sys, state, link_fs,
                   input_behind_space, mor_state->input, 
                   tree_mode, COMBI_RULE );
        }
      }
    }

    /* We have combined all analyses at CURRENT_INPUT with all states
     * that were at CURRENT_INPUT, so we can kill these states. */
    while (TRUE) 
    { 
      state = (state_t *) analysis->running_states.first;
      if (state == NULL || state->input != current_input) 
      break;
      remove_first_node( &analysis->running_states );
      if (mode == ANALYSE_ALL) 
      state_count++;
      add_node( &analysis->free_states, (list_node_t *) state, LIST_END );
    }
  } /* End of loop that consumes all running states. */

  check_end_states( analysis, grammar, mode );
  if (mode == ANALYSE_ALL && analysis->end_states.first != NULL) 
    recognised_by_combi_rules = TRUE;
  if (grammar == MORPHOLOGY) 
  { 
    if (analysis->end_states.first == NULL && options[ ROBUST_RULE_OPTION ]) 
    { 
      execute_robust_rule( analysis, rule_sys, input );
      check_end_states( analysis, grammar, mode );
      if (mode == ANALYSE_ALL && analysis->end_states.first != NULL) 
      recognised_by_robust_rule = TRUE;
    }
    if (options[ MOR_OUT_FILTER_OPTION ]) 
    { 
      execute_filter_rule( analysis, rule_system[ MORPHOLOGY ],
                           rule_system[ MORPHOLOGY ]->output_filter );
    }
    if (options[ SYN_IN_FILTER_OPTION ]) 
    { 
      execute_filter_rule( analysis, rule_system[ SYNTAX ],
                           rule_system[ SYNTAX ]->input_filter );
    }
    if (use_cache) 
      put_into_cache( analysis, input );
  } 
  else /* grammar == SYNTAX */
  { 
    if (options[ SYN_OUT_FILTER_OPTION ]) 
    { 
      execute_filter_rule( analysis, rule_system[ SYNTAX ],
                           rule_system[ SYNTAX ]->output_filter );
    }
  }
}

/* End of file. =============================================================*/

Generated by  Doxygen 1.6.0   Back to index