Logo Search packages:      
Sourcecode: malaga version File versions  Download package

scanner.c

/* Copyright (C) 1995 Bjoern Beutel. */

/* Description. =============================================================*/

/* This module supports scanning (lexical analysis) of malaga source files. */

/* Includes. ================================================================*/

#include <ctype.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
#include <setjmp.h>
#include "basic.h"
#include "files.h"
#include "scanner.h"

/* Constants. ===============================================================*/

/* List of all keywords and their token codes.
 * (This list must be maintained in alphabetical order.) */
static struct { string_t name; int_t code; } keywords[ NUMBER_OF_KEYWORDS ] = 
{ 
  { "accept", TOK_ACCEPT },
  { "allo_rule", TOK_ALLO_RULE },
  { "and", TOK_AND },
  { "assert", TOK_ASSERT },
  { "break", TOK_BREAK },
  { "choose", TOK_CHOOSE },
  { "combi_rule", TOK_COMBI_RULE },
  { "default", TOK_DEFAULT },
  { "define", TOK_DEFINE },
  { "else", TOK_ELSE },
  { "elseif", TOK_ELSEIF },
  { "end", TOK_END },
  { "end_rule", TOK_END_RULE },
  { "error", TOK_ERROR },
  { "foreach", TOK_FOREACH },
  { "greater", TOK_GREATER },
  { "greater_equal", TOK_GREATER_EQUAL },
  { "if", TOK_IF },
  { "in", TOK_IN },
  { "include", TOK_INCLUDE },
  { "initial", TOK_INITIAL },
  { "input_filter", TOK_INPUT_FILTER },
  { "less", TOK_LESS },
  { "less_equal", TOK_LESS_EQUAL },
  { "matches", TOK_MATCHES },
  { "not", TOK_NOT },
  { "or", TOK_OR },
  { "output_filter", TOK_OUTPUT_FILTER },
  { "parallel", TOK_PARALLEL },
  { "pruning_rule", TOK_PRUNING_RULE },
  { "repeat", TOK_REPEAT },
  { "require", TOK_REQUIRE },
  { "result", TOK_RESULT },
  { "return", TOK_RETURN },
  { "robust_rule", TOK_ROBUST_RULE },
  { "rules", TOK_RULES },
  { "stop", TOK_STOP },
  { "subrule", TOK_SUBRULE },
  { "then", TOK_THEN },
  { "while", TOK_WHILE }
};

/* Types. ===================================================================*/

typedef struct /* A source stream for lexical analysis. */
{ 
  list_node_t *next; /* The next (including) source stream. */
  FILE *stream; /* The input stream for this include level. */
  string_t file_name; /* The name of the input file. */
  int_t column; /* Column that has been read. */
  int_t line_number; /* Number of the line that has been read. */
  int_t next_char; /* Buffer NEXT_CHAR if this source is backed up. */
  int_t next_token; /* Buffer NEXT_TOKEN if this source is backed up. */
} source_t;

/* Global variables. ========================================================*/

int_t next_token;
string_t token_name;
string_t token_string;
double token_number;

/* Variables. ===============================================================*/

static list_t sources; /* The list of sources, current source first. */

static string_t scanner_input;
/* If no file is included, the scanner reads its input from SCANNER_INPUT. */

static int_t next_char; /* The next char to be read. */

static text_t *token_text; /* The text of the next token. */

/* Functions. ===============================================================*/

static void 
read_next_char( void )
/* Read the next char from input into NEXT_CHAR.
 * If end of input stream is reached, return EOF.
 * If no input stream is selected, read input from INPUT_BUFFER.
 * If reading from stream, update column information. */
{ 
  source_t *source;

  source = (source_t *) sources.first;
  if (scanner_input != NULL) 
  { 
    if (*scanner_input != EOS) 
      next_char = *scanner_input++;
    else 
      next_char = EOF;
  } 
  else if (source != NULL) 
  { 
    next_char = getc( source->stream );
    if (next_char == EOF && ferror( source->stream )) 
    { 
      complain( "Can't read from \"%s\": %s.", 
            source->file_name, strerror( errno ) );
    }
    if (next_char == '\t') 
      source->column = (source->column + 8) & ~7;
    else if (next_char == '\n') 
    { 
      source->column = 0;
      source->line_number++;
    } 
    else if (next_char != EOF) 
      source->column++;
  } 
  else 
    next_char = EOF;
}

/*---------------------------------------------------------------------------*/

string_t 
current_file_name( void )
/* Return the name of the file reading from or NULL. */
{
  source_t *source;

  source = (source_t *) sources.first;
  if (source == NULL) 
    return NULL;
  return source->file_name;
}

/*---------------------------------------------------------------------------*/

int_t 
current_line_number( void )
/* Return the line number where the last char has been read or -1. */
{
  source_t *source;

  source = (source_t *) sources.first;
  if (source == NULL) 
    return -1;
  return source->line_number;
}

/*---------------------------------------------------------------------------*/

int_t 
current_column( void )
/* Return the column where the last char has been read or -1. */
{
  source_t *source;

  source = (source_t *) sources.first;
  if (source == NULL) 
    return -1;
  if (source->column == 0) 
    return 0;
  return source->column - 1; /* Let columns start with 0. */
}

/*---------------------------------------------------------------------------*/

void 
set_scanner_input( string_t input )
/* Make the scanner use INPUT as scanner input 
 * until "set_scanner_input( NULL )" is called.
 * INPUT must remain valid until then. */
{
  source_t *source;

  source = (source_t *) sources.first;
  scanner_input = input;
  if (input != NULL) 
  { 
    if (source != NULL) 
    { 
      source->next_char = next_char;
      source->next_token = next_token;
    }
    read_next_char();
    read_next_token();
  } 
  else if (source != NULL) 
  { 
    next_char = source->next_char;
    next_token = source->next_token;
  }
}

/*---------------------------------------------------------------------------*/

void 
begin_include( string_t file_name )
/* Open a new level of inclusion and read tokens from file FILE_NAME. */
{
  FILE *stream;
  source_t *source;

  source = (source_t *) sources.first;
  stream = open_stream( file_name, "r" );
  /* Next char of old source should be read later. */
  if (source != NULL) 
  { 
    source->next_char = next_char;
    source->next_token = next_token;
  }
  /* Create new source description. */
  source = new_node( &sources, sizeof( source_t ), LIST_START );
  source->file_name = file_name;
  source->line_number = 1;
  source->column = 0;
  source->stream = stream;
  read_next_char();
  read_next_token();
}

/*---------------------------------------------------------------------------*/

void 
end_include( void )
/* Stop reading from current source stream and read from former stream. */
{
  source_t *source;

  source = (source_t *) sources.first;
  close_stream( &source->stream, source->file_name );
  free_first_node( &sources );
  if (sources.first != NULL) 
  { 
    source = (source_t *) sources.first;
    next_char = source->next_char;
    next_token = source->next_token;
  }
}

/*---------------------------------------------------------------------------*/

void 
end_includes( void )
/* Stop reading from all nested source streams. */
{
  while (sources.first != NULL) 
    end_include();
}

/*---------------------------------------------------------------------------*/

void 
init_scanner( void )
/* Initialise the scanner. */
{
  token_text = new_text();
}

/*---------------------------------------------------------------------------*/

void 
terminate_scanner( void )
/* Terminate the scanner, even when it's scanning. */
{
  source_t *source;

  scanner_input = NULL;
  FOREACH_FREE( source, sources ) 
    close_stream( &source->stream, NULL );
  token_name = NULL;
  free_text( &token_text );
  free_mem( &token_string );
}

/*---------------------------------------------------------------------------*/

static void 
read_name( void )
/* Read rule name, variable, or keyword into TOKEN_NAME. */
{
  token_name = NULL;
  clear_text( token_text );

  while (next_char != EOF &&
         (next_char == '_' || next_char == '&' || next_char == '|' 
          || IS_ALPHA( next_char ) || IS_DIGIT( next_char ))) 
  { 
    add_char_to_text( token_text, next_char );
    read_next_char();
  }

  token_name = token_text->buffer;
  if (*token_name == EOS) 
    complain( "Illegal character in name." );
}

/*---------------------------------------------------------------------------*/

static int_t 
keyword( string_t name )
/* Look up NAME in the keyword table and return its token value.
 * If NAME is no keyword, return TOK_IDENT. */
{
  int_t lower, upper, middle, result;

  /* We do a binary search on the keywords.
   * A keyword must be in the range of keywords[ lower..upper ]. */
  lower = 0;
  upper = NUMBER_OF_KEYWORDS - 1;
  while (lower <= upper) 
  { 
    middle = (lower + upper) / 2;
    result = strcmp_no_case( name, keywords[ middle ].name );
    if (result < 0) 
      upper = middle - 1;
    else if (result > 0) 
      lower = middle + 1;
    else 
      return keywords[ middle ].code;
  }
  return TOK_IDENT;
}

/*---------------------------------------------------------------------------*/

static void 
read_number( void )
/* Read a floating point number. Save its value in TOKEN_NUMBER. */
{
  token_name = NULL;
  clear_text( token_text );

  while (IS_DIGIT( next_char )) 
  { 
    add_char_to_text( token_text, next_char );
    read_next_char();
  }
  if (TO_LOWER( next_char ) == 'l') 
    read_next_char();
  else if (TO_LOWER( next_char ) == 'r') 
  { 
    insert_in_text( token_text, "-", 0 );
    read_next_char();
  } 
  else 
  { 
    if (next_char == '.') 
    { 
      add_char_to_text( token_text, next_char );
      read_next_char();
      if (! IS_DIGIT( next_char )) 
      complain( "Missing digits after \".\"." );
      while (IS_DIGIT( next_char )) 
      { 
      add_char_to_text( token_text, next_char );
        read_next_char();
      }
    }
    if (next_char == 'E' || next_char == 'e') 
    { /* Read an exponent. */
      add_char_to_text( token_text, next_char );
      read_next_char();
      if (next_char == '-' || next_char == '+') 
      { 
      add_char_to_text( token_text, next_char );
        read_next_char();
      }
      if (! IS_DIGIT( next_char )) 
      complain( "Missing exponent." );
      while (IS_DIGIT( next_char )) 
      { 
      add_char_to_text( token_text, next_char );
        read_next_char();
      }  
    }
  }
  if (sscanf( token_text->buffer, "%lf", &token_number ) != 1) 
    complain( "Illegal number." );
}

/*---------------------------------------------------------------------------*/

void 
read_next_token( void )
/* Read the next token from current source into NEXT_TOKEN.
 * If end of input stream is reached, return EOF. */
{
  int_t i, code;

  /* Read chars until a token has been recognised. */
  while (TRUE) 
  { 
    switch (next_char) 
    {
    case EOF:
      next_token = EOF;
      return;
    case ' ': 
    case '\t': 
    case '\n': /* Read over whitespace. */
      read_next_char();
      break;
    case '#': /* Read over a comment. */
      do 
      { 
      read_next_char(); 
      } while (next_char != '\n' && next_char != EOF);
      break;
    case '\"': /* Read a string. */
      token_name = NULL;
      clear_text( token_text );
      read_next_char(); /* Overread beginning '"'. */
      while (next_char != '\"') 
      { 
      if (next_char == EOF || next_char == '\n') 
        complain( "Unterminated string at end of line." );
        if (next_char != '\\') 
        add_char_to_text( token_text, next_char );
      else
      { 
        read_next_char();
          if (next_char == '\\' || next_char == '\"')
            add_char_to_text( token_text, next_char );
          else if (TO_LOWER( next_char ) == 'x') 
        { 
          code = 0;
            for (i = 0; i < 2 * (int_t) sizeof( char_t ); i++) 
          { 
              read_next_char();
            if (IS_DIGIT( next_char ))
            code = 16 * code + ORD( next_char ) - '0';
            else if (TO_LOWER( next_char ) >= 'a' 
                   && TO_LOWER( next_char ) <= 'f')
            {
            code = 16 * code + 10 + TO_LOWER( next_char ) - 'a';
            }
            else 
            complain( "Control char sequence must have 2 digits." );
            }
            add_char_to_text( token_text, code );
          } 
        else 
          complain( "Illegal escape sequence." );
        } 
      read_next_char();
      }
      read_next_char(); /* Read over final '"'. */
      free_mem( &token_string ); /* Free old token string. */
      token_string = new_string( token_text->buffer, NULL );
      next_token = TOK_STRING;
      return;
    case ':': /* Read a ":", ":=", ":=+", ":=-", ":=*", ":=/". */
      read_next_char();
      if (next_char == '=') 
      { 
      read_next_char();
        if (next_char == '+') 
      { 
        next_token = TOK_ASSIGN_PLUS;
        read_next_char();
        } 
      else if (next_char == '-') 
      { 
        next_token = TOK_ASSIGN_MINUS;
          read_next_char();
        }
      else if (next_char == '*') 
      { 
        next_token = TOK_ASSIGN_ASTERISK;
          read_next_char();
        } 
      else if (next_char == '/') 
      { 
        next_token = TOK_ASSIGN_SLASH;
          read_next_char();
        } 
      else 
        next_token = TOK_ASSIGN;
      } 
      else 
      next_token = ':';
      return;
    case '/': /* Read a "/", a "/=" or a "/~". */
      read_next_char();
      if (next_char == '=') 
      { 
      next_token = TOK_NOT_EQUAL;
        read_next_char();
      } 
      else if (next_char == '~') 
      { 
      next_token = TOK_NOT_CONGRUENT;
        read_next_char();
      } 
      else 
      next_token = '/';
      return;
    case '0': case '1': case '2': case '3': case '4':
    case '5': case '6': case '7': case '8': case '9': 
      /* Read a number. */
      read_number();
      next_token = TOK_NUMBER;
      return;
    case '$':
      read_next_char();
      read_name();
      next_token = TOK_VARIABLE;
      return;
    case '@':
      read_next_char();
      read_name();
      next_token = TOK_CONSTANT;
      return;
    default: 
      if (IS_ALPHA( next_char ) 
          || next_char == '_' || next_char == '&' || next_char == '|') 
      { 
      read_name();
        next_token = keyword( token_name );
        return;
      } 
      else 
      { 
      next_token = next_char;
        read_next_char();
        return;
      }
    }
  }
}

/*---------------------------------------------------------------------------*/

string_t 
token_as_text( int_t token )
/* Return TOKEN as a string readable for humans.
 * The string must be freed after use. */
{
  int_t i;
  char token_buffer[2];

  /* Look if TOKEN is a keyword. */
  for (i = 0; i < NUMBER_OF_KEYWORDS; i++) 
  { 
    if (keywords[i].code == token) 
      return concat_strings( "\"", keywords[i].name, "\"", NULL );
  }
  
  switch (token) 
  {
  case EOF: 
    return new_string( "end of input", NULL ); 
  case TOK_STRING: 
    return new_string( "string", NULL );
  case TOK_IDENT: 
    return new_string( "identifier", NULL );
  case TOK_VARIABLE: 
    return new_string( "variable", NULL );
  case TOK_CONSTANT: 
    return new_string( "constant", NULL );
  case TOK_NUMBER: 
    return new_string( "number", NULL );
  case TOK_ASSIGN: 
    return new_string_readable( ":=", NULL );
  case TOK_ASSIGN_PLUS: 
    return new_string_readable( ":=+", NULL );
  case TOK_ASSIGN_MINUS: 
    return new_string_readable( ":=-", NULL );
  case TOK_ASSIGN_ASTERISK: 
    return new_string_readable( ":=*", NULL );
  case TOK_ASSIGN_SLASH: 
    return new_string_readable( ":=/", NULL );
  case TOK_NOT_EQUAL: 
    return new_string_readable( "/=", NULL );
  case TOK_NOT_CONGRUENT: 
    return new_string_readable( "/~", NULL );
  default:
    token_buffer[0] = token;
    token_buffer[1] = EOS;
    return new_string_readable( token_buffer, NULL );
  }
}

/*---------------------------------------------------------------------------*/

void 
test_token( int_t token )
/* Test if TOKEN is the next token. If it's not, report an error. */
{
  if (next_token != token) 
  { 
    complain( "Expected %s, not %s.", 
            token_as_text( token ), token_as_text( next_token ) );
  }
}

/*---------------------------------------------------------------------------*/

void 
parse_token( int_t token )
/* Test if TOKEN is the next token and read next token. */
{
  test_token( token );
  read_next_token();
}

/* End of file. =============================================================*/

Generated by  Doxygen 1.6.0   Back to index