Logo Search packages:      
Sourcecode: malaga version File versions  Download package

mallex.c

/* Copyright (C) 1995 Bjoern Beutel. */

/* Description. =============================================================*/

/* This program takes a lexicon file and compiles it to binary format. 
 * It also includes an interactive allomorph rules debugger. */

/* Includes. ================================================================*/

#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include <stdlib.h>
#include <setjmp.h>
#include <time.h>
#include <locale.h>
#include "basic.h"
#include "pools.h"
#include "values.h"
#include "symbols.h"
#include "scanner.h"
#include "rule_type.h"
#include "rules.h"
#include "files.h"
#include "lex_compiler.h"
#include "input.h"
#include "commands.h"
#include "options.h"
#include "breakpoints.h"
#include "debugger.h"
#include "display.h"
#include "transmit.h"
#include "patterns.h"
#include "hangul.h"

/* Variables. ===============================================================*/

static string_t allo_format; /* Format of allomorph output. */

static bool_t lex_tree_to_output = FALSE;
/* Indicates whether the lexicon tree can be printed. */

static string_t lexicon_file, rule_file, symbol_file, project_file;
static string_t prelex_file;

static string_t base_fs_string; /* Last base feature structure. */

/* Functions. ===============================================================*/

static void 
display_where( void )
/* Print the name of the current rule. */
{ 
  string_t file, rule;
  int_t line;

  source_of_instr( executed_rule_sys, pc, &line, &file, &rule );
  printf( "At \"%s\", line %d, rule \"%s\".", 
        name_in_path( file ), line, rule );
  if (lex_entry_file_name != NULL) 
  { 
    printf( " (\"%s\", line %d)", 
          name_in_path( lex_entry_file_name ), lex_entry_line_number );
  }
  printf( "\n" );
  if (in_emacs_malaga_mode) 
    printf( "SHOW \"%s\":%d:0\n", file, line );
}

/*---------------------------------------------------------------------------*/

static void 
display_result( void )
/* Display result in the modes that have been switched on after analysis. */
{ 
  if (use_display) 
  { 
    start_display_process();
    fprintf( display_stream, "allomorph\n" );
    print_lex_tree( display_stream, "%n %s {%f}" );
    fprintf( display_stream, "end\n" );
    fflush( display_stream );
  } 
  else 
    print_lex_tree( stdout, NULL );
}

/*---------------------------------------------------------------------------*/

static void 
do_result( string_t arguments )
/* Show result of last allomorph generation. */
{ 
  parse_end( &arguments );
  if (! lex_tree_to_output) 
    complain( "No previous allomorph generation." );
  display_result();
}

static command_t result_command = 
{ 
  "result res", do_result,
  "Show result of last allomorph generation.\n"
  "Usage: result\n"
};

/*---------------------------------------------------------------------------*/

static void 
do_read_constants( string_t arguments )
/* Read the constants in the lexicon with name on line ARGUMENTS. */
{ 
  string_t lexicon_name;

  assert_not_in_debug_mode();
  lexicon_name = parse_absolute_path( &arguments, NULL );
  parse_end( &arguments );
  read_lex_constants( lexicon_name );
  free_mem( &lexicon_name );
}

static command_t read_constants_command = 
{ 
  "read-constants", do_read_constants,
  "Read the constants from the definitions in a lexicon file.\n"
  "Usage: read-constants LEXICON_FILE\n"
  "\"read-constants\" can't be used in debug mode.\n"
};

/*---------------------------------------------------------------------------*/

static void 
do_ga_file( string_t arguments )
/* Generate allomorphs of a base lexicon.
 * Write the allomorphs readably into file. */
{ 
  string_t lexicon_name, output_name;
  FILE *output_stream;

  assert_not_in_debug_mode();

  output_stream = NULL;
  output_name = lexicon_name = NULL;
  TRY 
  { 
    lexicon_name = parse_absolute_path( &arguments, NULL );
    if (*arguments != EOS) 
      output_name = parse_absolute_path( &arguments, NULL );
    else 
      output_name = concat_strings( lexicon_name, ".out", NULL );
    parse_end( &arguments );

    set_debug_mode( RUN_MODE, NULL );
    lex_tree_to_output = FALSE;
    generate_allos_for_file( lexicon_name, NULL, TRUE );
    lex_tree_to_output = TRUE;

    output_stream = open_stream( output_name, "w" );
    print_lex_tree( output_stream, allo_format );
    print_lex_statistics( stdout );
    close_stream( &output_stream, output_name );
  } 
  FINALLY 
  { 
    close_stream( &output_stream, NULL );
    free_mem( &output_name );
    free_mem( &lexicon_name );
  }
  END_TRY;
}

static command_t ga_file_command = 
{ 
  "ga-file gaf", do_ga_file,
  "Generate allomorphs from the entries in a lexicon file.\n"
  "Usage: ga-file LEXICON_FILE [ALLO_FILE]\n"
  "The results are written to \"ALLO_FILE\".\n"
  "If ALLO_FILE is missing, they are written to \"LEXICON_FILE.out\".\n"
  "\"ga-file\" can't be used in debug mode.\n"
};

/*---------------------------------------------------------------------------*/

static void 
do_debug_ga_file( string_t arguments )
/* Generate allomorphs of the base lexicon with name in ARGUMENTS.
 * Execute rules in debug mode. */
{ 
  string_t lexicon_name;

  assert_not_in_debug_mode();
  lexicon_name = NULL;
  TRY 
  { 
    lexicon_name = parse_absolute_path( &arguments, NULL );
    parse_end( &arguments );
    set_debug_mode( WALK_MODE, allo_rule_sys );
    lex_tree_to_output = FALSE;
    generate_allos_for_file( lexicon_name, NULL, TRUE );
    lex_tree_to_output = TRUE;
  }
  FINALLY 
    free_mem( &lexicon_name );
  END_TRY;
}

static command_t debug_ga_file_command = 
{ 
  "debug-ga-file dgaf", do_debug_ga_file,
  "Generate allomorphs from the entries in a lexicon file.\n"
  "Execute the rules in debug mode.\n"
  "Usage: debug-ga-file LEXICON_FILE\n"
  "\"debug-ga-file\" can't be used in debug mode.\n"
};

/*---------------------------------------------------------------------------*/

static void 
generate_allomorphs_for_line( string_t arguments )
/* Generate allomorphs for ARGUMENTS, which should consist
 * of a file name and a line number. */
{ 
  string_t lexicon_name;
  int_t line;

  lexicon_name = parse_absolute_path( &arguments, NULL );
  line = parse_int( &arguments );
  parse_end( &arguments );
  lex_tree_to_output = FALSE;
  generate_allos_for_line( lexicon_name, line );
  lex_tree_to_output = TRUE;
  free_mem( &lexicon_name );
}

/*---------------------------------------------------------------------------*/

static void 
do_ga_line( string_t arguments )
/* Generate allomorphs for ARGUMENTS, which should consist
 * of a file name and a line number. */
{ 
  assert_not_in_debug_mode();
  set_debug_mode( RUN_MODE, NULL );
  generate_allomorphs_for_line( arguments );
  display_result();
}

static command_t ga_line_command = 
{ 
  "ga-line gal", do_ga_line,
  "Generate allomorphs from a single entry in a file.\n"
  "Usage: ga-line FILE LINE\n"
  "The first lexicon entry at or behind LINE in FILE is read in.\n"
  "\"ga-line\" can't be used in debug mode.\n"
};

/*---------------------------------------------------------------------------*/

static void 
do_debug_ga_line( string_t arguments )
/* Generate an allomorph for ARGUMENTS, which should consist
 * of a file name and a line number, in debugger mode. */
{ 
  assert_not_in_debug_mode();
  set_debug_mode( WALK_MODE, allo_rule_sys );
  generate_allomorphs_for_line( arguments );
}

static command_t debug_ga_line_command = 
{ 
  "debug-ga-line dgal", do_debug_ga_line,
  "Generate allomorphs from a single entry in a file.\n"
  "Execute allomorph rules in debug mode.\n"
  "Usage: debug-ga-line FILE LINE\n"
  "The first lexicon entry at or behind LINE in FILE is read in.\n"
  "Allomorph rule execution stops at the first statement.\n"
  "\"debug-line\" can't be used in debug mode.\n"
};

/*---------------------------------------------------------------------------*/

static void 
generate_allomorphs( string_t arguments )
/* Generate allomorphs for lexicon entry ARGUMENTS. */
{ 
  /* If no argument given, re-analyze last argument */
  if (*arguments == EOS) 
  { 
    if (base_fs_string == NULL) 
      complain( "No previous base feature structure." );
  } 
  else 
  { 
    free_mem( &base_fs_string );
    base_fs_string = new_string( arguments, NULL );
  }
  lex_tree_to_output = FALSE;
  generate_allos_for_string( base_fs_string );
  lex_tree_to_output = TRUE;
}

/*---------------------------------------------------------------------------*/

static void 
do_ga( string_t arguments )
/* Generate allomorphs for ARGUMENTS. */
{ 
  assert_not_in_debug_mode();
  set_debug_mode( RUN_MODE, NULL );
  generate_allomorphs( arguments );
  display_result();
}

static command_t ga_command = 
{ 
  "ga", do_ga,
  "Generate allomorphs from a feature structure argument.\n"
  "Usage:\n"
  "  ga FS -- Generate allomorphs for feature structure FS.\n"
  "  ga -- Re-generate allomorphs for the last argument.\n"
  "The allomorphs are printed on screen.\n"
  "\"ga\" can't be used in debug mode.\n"
};

/*---------------------------------------------------------------------------*/

static void 
do_debug_ga( string_t arguments )
/* Generate allomorphs for ARGUMENTS.
 * Execute allomorph rules in debug mode. */
{
  assert_not_in_debug_mode();
  set_debug_mode( WALK_MODE, allo_rule_sys );
  generate_allomorphs( arguments );
}

static command_t debug_ga_command = 
{ 
  "debug-ga dga ga-debug gad", do_debug_ga,
  "Generate allomorphs from the feature structure argument. "
  "Execute allomorph rules in debug mode.\n"
  "Usage:\n"
  "  debug-ga FS -- Generate allomorphs for feature structure FS.\n"
  "  debug-ga -- Re-generate allomorphs for the last argument.\n"
  "Rule execution stops at the first statement.\n"
  "The allomorphs are printed on screen.\n"
  "\"debug-ga\" can't be used in debug mode.\n"
};

/*---------------------------------------------------------------------------*/

static void 
do_allo_format_option( string_t arguments ) 
/* Change allomorph output line to "arguments" */
{ 
  string_t format;

  if (*arguments == EOS) 
  { 
    format = new_string_readable( allo_format, NULL );
    printf( "allo-format: %s\n", format );
    free_mem( &format );
  } 
  else 
  { 
    format = parse_word( &arguments );
    free_mem( &allo_format );
    allo_format = format;
  }
}

static command_t allo_format_option = 
{ 
  "allo-format", do_allo_format_option,
  "Describe the format in which generated allomorphs will be printed.\n"
  "Usage: allo-format STRING\n"
  "STRING may contain the following special sequences:\n"
  "  %f -- Allomorph feature structure.\n"
  "  %n -- Allomorph number.\n"
  "  %s -- Allomorph surface.\n"
};

/* Commands. ================================================================*/

static command_t *mallex_options[] = 
{ 
  &alias_option, &allo_format_option, &auto_variables_option, 
  &display_line_option, &hidden_option, &sort_records_option, &switch_option, 
  &transmit_line_option, &use_display_option, &use_ksc_option,
  NULL
};

static command_t *mallex_commands[] = 
{ 
  &backtrace_command, &break_command, &continue_command, &debug_ga_command, 
  &debug_ga_file_command, &debug_ga_line_command, &delete_command, 
  &down_command, &finish_command, &frame_command, &ga_command, 
  &ga_file_command, &ga_line_command, &get_command, &help_command, 
  &list_command, &next_command, &print_command, &quit_command, 
  &read_constants_command, &result_command, &run_command, &set_command, 
  &step_command, &transmit_command, &up_command, &variables_command, 
  &walk_command, &where_command,
  NULL
};

/*---------------------------------------------------------------------------*/

static void 
read_project_file( string_t file_name )
/* Read the project file FILE_NAME. */
{ 
  FILE *project_stream;
  string_t include_file;
  string_t project_line, project_line_p, argument, extension;
  string_t *name_p;
  volatile bool_t binary = FALSE;
  volatile int_t line_count;
  static bool_t err_pos_printed;

  err_pos_printed = FALSE;
  project_stream = open_stream( file_name, "r" );
  line_count = 0;
  while (TRUE) 
  { 
    project_line = read_line( project_stream );
    if (project_line == NULL) 
      break;
    line_count++;
    cut_comment( project_line );
    project_line_p = project_line;

    if (*project_line_p != EOS) 
    { 
      argument = NULL;
      TRY
      {
      argument = parse_word( &project_line_p );
      extension = NULL;
      name_p = NULL;
      if (strcmp_no_case( argument, "sym:" ) == 0) 
      { 
        name_p = &symbol_file;
        extension = "sym";
        binary = TRUE;
      } 
      else if (strcmp_no_case( argument, "lex:" ) == 0) 
      {
        name_p = &lexicon_file;
        extension = "lex";
        binary = FALSE;
      }
      else if (strcmp_no_case( argument, "all:" ) == 0) 
      { 
        name_p = &rule_file;
        extension = "all";
        binary = TRUE;
      } 
      else if (strcmp_no_case( argument, "prelex:" ) == 0)
      {
        if (prelex_file != NULL) 
          complain( "Prelex file already defined." );
        name_p = &prelex_file;
        extension = "prelex";
        binary = TRUE;
      }
      else if (strcmp_no_case( argument, "include:" ) == 0) 
      { 
        include_file = parse_absolute_path( &project_line_p, file_name );
        parse_end( &project_line_p );
        read_project_file( include_file );
        free_mem( &include_file );
      }
      else if (strcmp_no_case( argument, "char-set:" ) == 0
             || strcmp_no_case( argument, "char_set:" ) == 0)
      {
        if (char_set != NULL) 
          complain( "Char set already defined." );
        char_set = parse_word( &project_line_p );
        parse_end( &project_line_p );
      }
      free_mem( &argument );

      if (name_p != NULL && *name_p == NULL && *project_line_p != EOS) 
      { 
        argument = parse_absolute_path( &project_line_p, file_name );
        if (! has_extension( argument, extension ))
        {
          complain( "\"%s\" should have extension \"%s\".", 
                  name_in_path( argument ), extension );
        }
        if (binary) 
          set_binary_file_name( name_p, argument );
        else 
          set_file_name( name_p, argument );
        free_mem( &argument );
      }
      }
      IF_ERROR
      {
      if (! err_pos_printed)
      {
        print_text( error_text, " (\"%s\", line %d)",
                  name_in_path( file_name ), line_count );
        err_pos_printed = TRUE;
      }
      }
      END_TRY;
    }
    free_mem( &project_line );
  }
  close_stream( &project_stream, file_name );
}

/*---------------------------------------------------------------------------*/

int 
main( int argc, char *argv[] )
/* The main function of "mallex". */
{ 
  volatile enum {INTERACTIVE_MODE, BINARY_MODE, TEXT_MODE, 
             PRELEX_MODE} mallex_mode;
  int_t i;
  string_t malagarc_path, s;
  rule_sys_name_t rule_systems[1]; /* Rule system for debugger. */
  string_t object_file = NULL; /* Object file for binary and prelex mode. */
    
  mallex_mode = INTERACTIVE_MODE;
  init_basic( "mallex" );
  init_input();

  /* Parse arguments. */
  if (argc == 2) 
  { 
    if (strcmp_no_case( argv[1], "--version" ) == 0
      || strcmp_no_case( argv[1], "-version" ) == 0
      || strcmp_no_case( argv[1], "-v" ) == 0)  
    { 
      program_message();
      exit( 0 );
    } 
    else if (strcmp_no_case( argv[1], "--help" ) == 0
           || strcmp_no_case( argv[1], "-help" ) == 0
           || strcmp_no_case( argv[1], "-h" ) == 0) 
    { 
      printf( "Apply the allomorph rules on the entries of a Malaga lexicon.\n"
            "\n"
            "Usage:\n"
            "mallex GRAMMAR             "
            "-- Start interactive mallex.\n"
            "mallex GRAMMAR -b[inary]   "
            "-- Create binary allomorph lexicon.\n"
            "mallex GRAMMAR -r[eadable] "
            "-- Output readable allomorph lexicon.\n"
            "mallex GRAMMAR -p[relex]   "
            "-- Output precompiled lexicon.\n"
            "mallex -v[ersion]          "
            "-- Print version information.\n"
            "mallex -h[elp]             "
            "-- Print this help.\n\n"
            "GRAMMAR may be \"PROJECT_FILE\" "
            "or \"SYM_FILE ALLO_FILE LEX_FILE [PRELEX_FILE]\".\n"
            "PROJECT_FILE must end on \".pro\".\n"
            "SYM_FILE must end on \".sym\".\n"
            "ALLO_FILE must end on \".all\".\n"
            "LEX_FILE must end on \".lex\".\n"
            "PRELEX_FILE must end on \".prelex\".\n" );
      exit( 0 );
    }
  }
  for (i = 1; i < argc; i++) 
  { 
    if (has_extension( argv[i], "pro" )) 
      set_file_name( &project_file, argv[i] ); 
    else if (has_extension( argv[i], "lex" )) 
      set_file_name( &lexicon_file, argv[i] );
    else if (has_extension( argv[i], "all" )) 
      set_binary_file_name( &rule_file, argv[i] );
    else if (has_extension( argv[i], "sym" )) 
      set_binary_file_name( &symbol_file, argv[i] );
    else if (has_extension( argv[i], "prelex") )
      set_binary_file_name( &prelex_file, argv[i] );
    else if (strcmp_no_case( argv[i], "-binary" ) == 0
           || strcmp_no_case( argv[i], "-b" ) == 0) 
    { 
      mallex_mode = BINARY_MODE; 
    } 
    else if (strcmp_no_case( argv[i], "-readable" ) == 0
           || strcmp_no_case( argv[i], "-r" ) == 0) 
    { 
      mallex_mode = TEXT_MODE; 
    } 
    else if (strcmp_no_case( argv[i], "-prelex" ) == 0
           || strcmp_no_case( argv[i], "-p" ) == 0) 
    { 
      mallex_mode = PRELEX_MODE; 
    } 
    else 
      complain( "Illegal argument \"%s\".", argv[i] );
  }
  if (project_file != NULL) 
    read_project_file( project_file );
  if (char_set == NULL) 
    char_set = new_string( "iso8859-1", NULL );
  if (rule_file == NULL) 
    complain( "Missing allomorph rule file name." );
  if (symbol_file == NULL) 
    complain( "Missing symbol file name." );

  /* Init modules. */
  init_values();
  init_symbols( symbol_file );
  init_hangul();
  init_transmit();
  init_lex_compiler( rule_file );
  init_scanner();

  /* Set mallex options to default values. */
  options = mallex_options;
  allo_format = new_string( "%s: %f", NULL );
  use_display = FALSE;

  /* Set mallex options by user scripts. */
  if (project_file != NULL) 
    execute_set_commands( project_file, "mallex:" );
  malagarc_path = NULL;
#ifdef UNIX
  TRY 
    malagarc_path = absolute_path( "~/.malagarc", NULL );
  IF_ERROR 
    RESUME;
  END_TRY;
#endif
#ifdef WINDOWS
  TRY 
    malagarc_path = absolute_path( "~\\malaga.ini", NULL );
  IF_ERROR 
    RESUME;
  END_TRY;
#endif
  if (malagarc_path != NULL && file_exists( malagarc_path ))
    execute_set_commands( malagarc_path, "mallex:" );
  free_mem( &malagarc_path );

  if (mallex_mode == INTERACTIVE_MODE) 
  { 
    init_debugger( display_where, mallex_commands );
    rule_systems[0].rule_sys = allo_rule_sys;
    rule_systems[0].name = "all";
    init_breakpoints( 1, rule_systems );
    program_message();
    command_loop( program_name, mallex_commands );
    terminate_breakpoints();
    terminate_debugger();
  } 
  else 
  { 
    if (lexicon_file == NULL) 
      complain( "missing lexicon file name" );
    switch (mallex_mode)
    {
    case TEXT_MODE:
      generate_allos_for_file( lexicon_file, NULL, TRUE );
      print_lex_tree( stdout, allo_format );
      break;
    case BINARY_MODE:
      generate_allos_for_file( lexicon_file, prelex_file, TRUE );
      set_binary_file_name( &object_file, lexicon_file );
      write_lex_tree( object_file );
      free_mem( &object_file );
      break;
    case PRELEX_MODE:
      generate_allos_for_file( lexicon_file, prelex_file, FALSE );
      s = replace_extension( lexicon_file, "prelex" );
      set_binary_file_name( &object_file, s );
      free_mem( &s );
      write_prelex_file( object_file );
      free_mem( &object_file );
      break;
    default:
      complain( "Internal error." );
    }
    print_lex_statistics( stderr );
  }

  free_aliases();
  free_mem( &base_fs_string );
  free_mem( &allo_format );
  stop_display_process();
  terminate_lex_compiler();
  terminate_hangul();
  terminate_symbols();
  terminate_transmit();
  terminate_values();
  terminate_scanner();
  terminate_patterns();
  free_switches();
  free_mem( &rule_file );
  free_mem( &symbol_file );
  free_mem( &lexicon_file );
  free_mem( &project_file );
  free_mem( &char_set );
  terminate_input();
  terminate_basic();
  return 0;
}

/* End of file. =============================================================*/

Generated by  Doxygen 1.6.0   Back to index