Logo Search packages:      
Sourcecode: malaga version File versions  Download package

generation.c

/* Copyright (C) 1995 Bjoern Beutel. */

/* Description. =============================================================*/

/* The generation commands for malaga. */

/* Includes. ================================================================*/

#include <stdio.h>
#include <stdlib.h>
#include <setjmp.h>
#include "basic.h"
#include "pools.h"
#include "values.h"
#include "input.h"
#include "commands.h"
#include "rule_type.h"
#include "rules.h"
#include "lexicon.h"
#include "analysis.h"
#include "debugger.h"
#include "hangul.h"
#include "generation.h"

/* Types. ===================================================================*/

typedef struct /* A running generation LAG state. */
{
  list_node_t *next; /* Next segment state. */
  value_t fs; /* Feature structure of this state. */
  int_t rule_set; /* Rule set of this state. */
} segment_state_t;

typedef struct /* A segment of generation output. */
{
  list_node_t *next; /* Next segment. */
  string_t surface; /* Surface of this segment. */
  list_t states; /* List of running states after combination. */
} segment_t;

typedef struct /* A feature structure node for an item. */
{
  list_node_t *next; /* Next item_fs. */
  value_t value; /* Feature structure. */
} item_fs_t;

typedef struct /* Segment of which a word or sentence may consist. */
{
  list_node_t *next; /* Next item. */
  string_t surf; /* Surface of this item. */
  list_t fs_list; /* Feature structures for this item. */
} item_t;

/* Variables. ===============================================================*/

static grammar_t grammar; /* Grammar used for generation. */
static int_t result_count; /* Index of the current word form. */

static int_t segment_count;
static int_t max_segment_count; /* User limit on segments in a word form. */
static list_t segments;
static rule_type_t current_rule_type;

static list_t items;

/* Functions. ===============================================================*/

static void 
combine_surface( text_t *text, segment_t *segment )
{ 
  segment_t *next_segment;

  if (segment != NULL) 
  { 
    next_segment = (segment_t *) segment->next;
    combine_surface( text, next_segment );
    if (grammar == SYNTAX && next_segment != NULL) 
      add_char_to_text( text, ' ' );
    add_to_text( text, segment->surface );
  }
}

/*---------------------------------------------------------------------------*/

static string_t 
get_surface_local( surface_t surface_type )
/* Return surface SURFACE_TYPE for currently executed rule.
 * The result must be freed after use. */
{
  text_t *text;
  string_t string;

  text = new_text();
  switch (surface_type) 
  {
  case RESULT_SURFACE:
    combine_surface( text, (segment_t *) segments.first );
    break;
  case STATE_SURFACE:
    if (current_rule_type == COMBI_RULE) 
      combine_surface( text, (segment_t *) segments.first->next );
    else 
      combine_surface( text, (segment_t *) segments.first );
    break;
  case LINK_SURFACE:
    if (current_rule_type == COMBI_RULE) 
      add_to_text( text, ((segment_t *) segments.first)->surface );
    break;
  default: 
    complain( "Internal error." );
  }
  string = new_string_readable( text->buffer, NULL );
  free_text( &text );
  return string;
}

/*---------------------------------------------------------------------------*/

static void 
add_end_state_local( value_t fs )
/* Print end state, consisting of FS. */
{
  string_t surf;

  /* Print index of word form. */
  surf = get_surface_local( RESULT_SURFACE );
  result_count++;
  decode_hangul( &surf );
  printf( "%d: %s\n", result_count, surf );
  free_mem( &surf );
}

/*---------------------------------------------------------------------------*/

static void 
add_running_state_local( value_t fs, int_t rule_set )
/* Add running state, consisting of FS and RULE_SET. */
{ 
  segment_t *segment;
  segment_state_t *state;

  segment = (segment_t *) segments.first;
  state = new_node( &segment->states, sizeof( segment_state_t ), LIST_END );
  state->fs = new_value( fs );
  state->rule_set = rule_set;
}

/*---------------------------------------------------------------------------*/

static void 
push_segment( string_t surface )
/* Push a new segment with name SURFACE. */
{ 
  segment_t *segment;

  segment = new_node( &segments, sizeof( segment_t ), LIST_START );
  segment->surface = surface;
  clear_list( &segment->states );
  segment_count++;
}

/*---------------------------------------------------------------------------*/

static void 
pop_segment( void )
/* Pop the topmost segment. */
{
  segment_state_t *state;
  segment_t *segment;

  segment = (segment_t *) segments.first;
  segment_count--;
  FOREACH_FREE( state, segment->states ) 
    free_mem( &state->fs );
  free_first_node( &segments );
}

/*---------------------------------------------------------------------------*/

static void 
execute_rules( value_t state_fs, int_t rule_set, item_t *item )
/* Add ITEM to LAG state (STATE_FS, RULE_SET).
 * Save the resulting states in SEGMENTS.FIRST->STATES or print them
 * if they are end states. */
{ 
  rule_sys_t *rule_sys;
  int_t *rule_p;
  item_fs_t *link_fs;
  rule_t *rule;

  rule_sys = rule_system[ grammar ];
  FOREACH( link_fs, item->fs_list ) 
  { 
    for (rule_p = rule_sys->rule_sets + rule_set; *rule_p >= 0; rule_p++)
    { 
      rule = rule_sys->rules + *rule_p;
      if (rule->type == COMBI_RULE) 
      { 
      current_rule_type = COMBI_RULE;
        top = 0;
        push_value( state_fs );
        push_value( link_fs->value);
        if (rule->param_count >= 3) 
        push_string_value( item->surf, NULL );
      if (rule->param_count >= 4) 
        push_number_value( segment_count );
      execute_rule( rule_sys, *rule_p );
      }
    }
  }
}

/*---------------------------------------------------------------------------*/

static void 
generate_local( void )
/* Generate all word forms or sentences (according to GRAMMAR)
 * that are successors of STATES and print them immediately. */
{ 
  item_t *item;
  segment_t *segment;
  segment_state_t *state;
  int_t *rule_p;
  rule_sys_t *rule_sys;
  rule_t *rule;

  rule_sys = rule_system[ grammar ];
  segment = (segment_t *) segments.first;
  check_user_break();

  /* Execute end rules first. */
  FOREACH( state, segment->states ) 
  { 
    for (rule_p = rule_sys->rule_sets + state->rule_set; 
       *rule_p >= 0; 
       rule_p++) 
    { 
      rule = rule_sys->rules + *rule_p;
      if (rule->type == END_RULE) 
      { 
      current_rule_type = END_RULE;
        top = 0;
        push_value( state->fs );
      if (rule->param_count >= 2) 
        push_string_value( "", NULL );
        execute_rule( rule_sys, *rule_p );
      }
    }
  }

  /* Don't execute combi_rules if too many segments are to be combined. */
  if (segment_count >= max_segment_count) 
    return;

  /* Execute rules with all ITEMS. */
  FOREACH( item, items ) 
  { 
    push_segment( item->surf );
    FOREACH( state, segment->states ) 
      execute_rules( state->fs, state->rule_set, item );
    if (((segment_t *) segments.first)->states.first != NULL) 
      generate_local();
    pop_segment();
  }
}

/*---------------------------------------------------------------------------*/

static void 
generate( void )
/* Generate a sentence or a word form */
{ 
  item_t *item;
  rule_sys_t *rule_sys;
  
  rule_sys = rule_system[ grammar ];
  while (segments.first != NULL) 
    pop_segment();
  segment_count = result_count = 0;
  add_running_state = add_running_state_local;
  add_end_state = add_end_state_local;
  get_surface = get_surface_local;
  set_debug_mode( RUN_MODE, NULL );

  /* Execute all rules that add the first item to the empty start. */
  FOREACH( item, items ) 
  { 
    push_segment( item->surf );
    execute_rules( rule_sys->values + rule_sys->initial_fs,
                   rule_sys->initial_rule_set, item );
    if (((segment_t *) segments.first)->states.first != NULL) 
      generate_local();
    pop_segment();
  }
}

/*---------------------------------------------------------------------------*/

static void 
free_item_fs_list( item_t *item )
/* Free the feature structures in ITEM. */
{
  item_fs_t *fs;

  FOREACH_FREE( fs, item->fs_list ) 
    free_mem( &fs->value );
}

/*---------------------------------------------------------------------------*/

static void 
free_items( void )
/* Free the item list. */
{ 
  item_t *item;

  FOREACH_FREE( item, items ) 
  {
    free_item_fs_list( item ); 
    free_mem( &item->surf ); 
  }
}

/*---------------------------------------------------------------------------*/

static void 
generate_command( string_t arguments )
/* Generate sentences or words from items, depending on GRAMMAR. */
{
  item_t *item;
  item_fs_t *fs;
  value_t value;
  string_t surf_end;
    
  assert_not_in_debug_mode();
  if (rule_system[ grammar ] == NULL) 
  { 
    complain( "%s rule file not loaded.", 
            grammar == SYNTAX ? "Syntax": "Morphology" );
  }
  max_segment_count = parse_int( &arguments );
  if (max_segment_count < 1) 
    complain( "Must generate one segment at least." );
  if (*arguments != EOS) 
  { 
    /* Read new items. */
    free_items();
    while (*arguments != EOS) 
    { 
      item = new_node( &items, sizeof( item_t ), LIST_END );
      item->surf = parse_word( &arguments );
      encode_hangul( &item->surf );
      clear_list( &item->fs_list );
    }
  }

  /* Create feature structures for items. */
  FOREACH( item, items ) 
  { 
    free_item_fs_list( item );
    if (grammar == MORPHOLOGY) 
    { 
      search_for_prefix( item->surf );
      while (get_next_prefix( &surf_end, &value )) 
      { 
      if (*surf_end == EOS) /* Found prefix that covers the whole string. */
        { 
        fs = new_node( &item->fs_list, sizeof( item_fs_t ), LIST_END );
          fs->value = new_value( value );
        }
      }
    } 
    else 
    { 
      analyse( MORPHOLOGY, item->surf, NO_TREE, ANALYSE_ALL );
      for (value = first_analysis_result();
         value != NULL;
           value = next_analysis_result()) 
      { 
      fs = new_node( &item->fs_list, sizeof( item_fs_t ), LIST_END );
        fs->value = new_value( value );
      }
    }
  }
  generate();
  FOREACH( item, items ) 
    free_item_fs_list( item );
}

/*---------------------------------------------------------------------------*/

static void 
do_mg( string_t arguments )
/* Generate morphologically. */
{
  grammar = MORPHOLOGY;
  generate_command( arguments );
}

command_t mg_command = 
{ 
  "mg", do_mg,
  "Generate all word forms that consist only of the given allomorphs.\n"
  "Usage:\n"
  "  mg MAX_ALLO_COUNT ALLOMORPHS -- Use ALLOMORPHS\n"
  "  mg MAX_ALLO_COUNT -- Use allomorphs of last generation command.\n"
  "\"mg\" can't be used in debug mode.\n"
};

/*---------------------------------------------------------------------------*/

static void 
do_sg( string_t arguments )
/* Generate syntactically. */
{
  grammar = SYNTAX;
  generate_command( arguments );
}

command_t sg_command = 
{ 
  "sg", do_sg,
  "Generate all sentences that consist only of the given word forms.\n"
  "Usage:\n"
  "  sg MAX_WORD_COUNT WORDS -- use WORDS\n"
  "  sg MAX_WORD_COUNT -- Use words of last generation command.\n"
  "\"sg\" can't be used in debug mode.\n"
};

/*---------------------------------------------------------------------------*/

void 
init_generation( void )
/* Initialise this module. */
{}

/*---------------------------------------------------------------------------*/

void 
terminate_generation( void )
/* Terminate this module. */
{ 
  while (segments.first != NULL) 
    pop_segment();
  free_items();
}

/* End of file. =============================================================*/

Generated by  Doxygen 1.6.0   Back to index