[deliverable/titan.core.git] / langviz / bison_la.l

/******************************************************************************
 * Copyright (c) 2000-2015 Ericsson Telecom AB
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 ******************************************************************************/
 
/*
 * bison lexical analyzer
 *
 * Written by Matyas Forstner using bison's scan-gram.l
 * 20050908
 */

/* %option debug */
%option nodefault nounput noyywrap never-interactive
%option yylineno
%option prefix="bison_"

%{ /* ****************** C declarations ***************** */

#include "../compiler2/string.hh"
#include "Symbol.hh"
#include "Rule.hh"
#include "bison_p.tab.hh"
#include <limits.h>
#include <errno.h>

#define lval bison_lval 

static int convert_ucn_to_byte (char const *hex_text);

%} /* ***************** definitions ***************** */
 
%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
%x SC_STRING SC_CHARACTER
%x SC_AFTER_IDENTIFIER
%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
%x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE

letter	  [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
id	  {letter}({letter}|[0-9])*
directive %{letter}({letter}|[0-9]|-)*
int	  [0-9]+

tag	 [^\0\n>]+

/* Zero or more instances of backslash-newline.  Following GCC, allow
   white space between the backslash and the newline.  */
splice	 (\\[ \f\t\v]*\n)*

%% /* ***************** rules ************************* */ 

%{
  /* Nesting level of the current code in braces.  */
  int braces_level=0;

  /* Parent context state, when applicable.  */
  int context_state=0;

  /* Token type to return, when applicable.  */
  int token_type=0;

%}

  /*-----------------------.
  | Scanning white space.  |
  `-----------------------*/

<INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>
{
  /* Comments and white space.  */
  ","	       /*warn_at (*loc, _("stray `,' treated as white space"));*/
  [ \f\n\t\v]  |
  "//".*       ;
  "/*" {
    context_state = YY_START;
    BEGIN SC_YACC_COMMENT;
  }

  /* #line directives are not documented, and may be withdrawn or
     modified in future versions of Bison.  */
  ^"#line "{int}" \"".*"\"\n" {
    ;
  }
}

  /*----------------------------.
  | Scanning Bison directives.  |
  `----------------------------*/
<INITIAL>
{
  "%binary"               return PERCENT_NONASSOC;
  "%debug"                return PERCENT_DEBUG;
  "%default"[-_]"prec"    return PERCENT_DEFAULT_PREC;
  "%define"               return PERCENT_DEFINE;
  "%defines"              return PERCENT_DEFINES;
  "%destructor"		  token_type = PERCENT_DESTRUCTOR; BEGIN SC_PRE_CODE;
  "%dprec"		  return PERCENT_DPREC;
  "%error"[-_]"verbose"   return PERCENT_ERROR_VERBOSE;
  "%expect"               return PERCENT_EXPECT;
  "%expect"[-_]"rr"	  return PERCENT_EXPECT_RR;
  "%file-prefix"          return PERCENT_FILE_PREFIX;
  "%fixed"[-_]"output"[-_]"files"   return PERCENT_YACC;
  "%initial-action"       token_type = PERCENT_INITIAL_ACTION; BEGIN SC_PRE_CODE;
  "%glr-parser"           return PERCENT_GLR_PARSER;
  "%left"                 return PERCENT_LEFT;
  "%lex-param"		  token_type = PERCENT_LEX_PARAM; BEGIN SC_PRE_CODE;
  "%locations"            return PERCENT_LOCATIONS;
  "%merge"		  return PERCENT_MERGE;
  "%name"[-_]"prefix"     return PERCENT_NAME_PREFIX;
  "%no"[-_]"default"[-_]"prec"	return PERCENT_NO_DEFAULT_PREC;
  "%no"[-_]"lines"        return PERCENT_NO_LINES;
  "%nonassoc"             return PERCENT_NONASSOC;
  "%nondeterministic-parser"   return PERCENT_NONDETERMINISTIC_PARSER;
  "%nterm"                return PERCENT_NTERM;
  "%output"               return PERCENT_OUTPUT;
  "%parse-param"	  token_type = PERCENT_PARSE_PARAM; BEGIN SC_PRE_CODE;
  "%prec"                 return PERCENT_PREC;
  "%printer"              token_type = PERCENT_PRINTER; BEGIN SC_PRE_CODE;
  "%pure"[-_]"parser"     return PERCENT_PURE_PARSER;
  "%right"                return PERCENT_RIGHT;
  "%skeleton"             return PERCENT_SKELETON;
  "%start"                return PERCENT_START;
  "%term"                 return PERCENT_TOKEN;
  "%token"                return PERCENT_TOKEN;
  "%token"[-_]"table"     return PERCENT_TOKEN_TABLE;
  "%type"                 return PERCENT_TYPE;
  "%union"		  token_type = PERCENT_UNION; BEGIN SC_PRE_CODE;
  "%verbose"              return PERCENT_VERBOSE;
  "%yacc"                 return PERCENT_YACC;

  {directive} {
    /*complain_at (*loc, _("invalid directive: %s"), quote (yytext));*/
  }

  "="                     return EQUAL;
  "|"                     return PIPE;
  ";"                     return SEMICOLON;

  {id} {
    //val->symbol = symbol_get (yytext, *loc);
    lval.string_val=new string(yytext);
    BEGIN SC_AFTER_IDENTIFIER;
  }

  {int} {
    //lval.integer = scan_integer (yytext, 10);
    return INT;
  }
  0[xX][0-9abcdefABCDEF]+ {
    //lval.integer = scan_integer (yytext, 16);
    return INT;
  }

  /* Characters.  We don't check there is only one.  */
  "'"	      /* +\' */ lval.string_val=new string("'"); BEGIN SC_ESCAPED_CHARACTER;

  /* Strings. */
  "\""	      lval.string_val=new string("\""); BEGIN SC_ESCAPED_STRING;

  /* Prologue. */
  "%{"        BEGIN SC_PROLOGUE;

  /* Code in between braces.  */
  "{" {
    //STRING_GROW;
    token_type = BRACED_CODE;
    braces_level = 0;
    BEGIN SC_BRACED_CODE;
  }

  /* A type. */
  "<"{tag}">" {
    /*
    obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
    STRING_FINISH;
    val->uniqstr = uniqstr_new (last_string);
    STRING_FREE;
    */
    return TYPE;
  }

  "%%" {
    static int percent_percent_count;
    if (++percent_percent_count == 2)
      BEGIN SC_EPILOGUE;
    return PERCENT_PERCENT;
  }

  . {
    //complain_at (*loc, _("invalid character: %s"), quote (yytext));
  }

  <<EOF>> {
    yyterminate ();
  }
}


  /*-----------------------------------------------------------------.
  | Scanning after an identifier, checking whether a colon is next.  |
  `-----------------------------------------------------------------*/

<SC_AFTER_IDENTIFIER>
{
  ":" {
    BEGIN INITIAL;
    return ID_COLON;
  }
  . {
    yyless (0);
    BEGIN INITIAL;
    return ID;
  }
  <<EOF>> {
    BEGIN INITIAL;
    return ID;
  }
}


  /*---------------------------------------------------------------.
  | Scanning a Yacc comment.  The initial `/ *' is already eaten.  |
  `---------------------------------------------------------------*/

<SC_YACC_COMMENT>
{
  "*/"     BEGIN context_state;
  .|\n	   ;
  <<EOF>>  /*unexpected_eof (token_start, "* /");*/ BEGIN context_state;
}


  /*------------------------------------------------------------.
  | Scanning a C comment.  The initial `/ *' is already eaten.  |
  `------------------------------------------------------------*/

<SC_COMMENT>
{
  "*"{splice}"/"  /*STRING_GROW;*/ BEGIN context_state;
  <<EOF>>	  /*unexpected_eof (token_start, "* /");*/ BEGIN context_state;
}


  /*--------------------------------------------------------------.
  | Scanning a line comment.  The initial `//' is already eaten.  |
  `--------------------------------------------------------------*/

<SC_LINE_COMMENT>
{
  "\n"		 /*STRING_GROW;*/ BEGIN context_state;
  {splice}	 /*STRING_GROW;*/
  <<EOF>>	 BEGIN context_state;
}


  /*------------------------------------------------.
  | Scanning a Bison string, including its escapes. |
  | The initial quote is already eaten.             |
  `------------------------------------------------*/

<SC_ESCAPED_STRING>
{
  "\"" {
    /*
    STRING_GROW;
    STRING_FINISH;
    loc->start = token_start;
    val->chars = last_string;
    rule_length++;
    */
    *lval.string_val+="\"";
    BEGIN INITIAL;
    return STRING;
  }

  \n		/*unexpected_newline (token_start, "\"");*/ delete lval.string_val; BEGIN INITIAL;

  <<EOF>>	/*unexpected_eof (token_start, "\"");*/ delete lval.string_val; BEGIN INITIAL;

  .     *lval.string_val+=yytext;

}


  /*----------------------------------------------------------.
  | Scanning a Bison character literal, decoding its escapes. |
  | The initial quote is already eaten.			      |
  `----------------------------------------------------------*/

<SC_ESCAPED_CHARACTER>
{
  "'" {
    /*
    unsigned char last_string_1;
    STRING_GROW;
    STRING_FINISH;
    loc->start = token_start;
    val->symbol = symbol_get (last_string, *loc);
    symbol_class_set (val->symbol, token_sym, *loc);
    last_string_1 = last_string[1];
    symbol_user_token_number_set (val->symbol, last_string_1, *loc);
    STRING_FREE;
    rule_length++;
    */
    *lval.string_val+="'";
    BEGIN INITIAL;
    return ID;
  }

  \n		/*unexpected_newline (token_start, "'");*/ delete lval.string_val; BEGIN INITIAL;

  <<EOF>>	/*unexpected_eof (token_start, "'");*/ delete lval.string_val; BEGIN INITIAL;

  .     *lval.string_val+=yytext;
}

<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
{
  \0	    /*complain_at (*loc, _("invalid null character"));*/
}


  /*----------------------------.
  | Decode escaped characters.  |
  `----------------------------*/

<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
{
  \\[0-7]{1,3} {
    unsigned long int c = strtoul (yytext + 1, 0, 8);
    if (UCHAR_MAX < c)
      /*complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext))*/;
    else if (! c) 
      /*complain_at (*loc, _("invalid null character: %s"), quote (yytext))*/;
    else
      /*obstack_1grow (&obstack_for_string, c);*/ *lval.string_val+=c;
  }

  \\x[0-9abcdefABCDEF]+ {
    unsigned long int c;
    errno=0;
    c = strtoul (yytext + 2, 0, 16);
    if (UCHAR_MAX < c || errno)
      /*complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext))*/;
    else if (! c)
      /*complain_at (*loc, _("invalid null character: %s"), quote (yytext))*/;
    else
      /*obstack_1grow (&obstack_for_string, c);*/ *lval.string_val+=c;
  }

  \\a	/*obstack_1grow (&obstack_for_string, '\a');*/ *lval.string_val+='\a';
  \\b	/*obstack_1grow (&obstack_for_string, '\b');*/ *lval.string_val+='\b';
  \\f	/*obstack_1grow (&obstack_for_string, '\f');*/ *lval.string_val+='\f';
  \\n	/*obstack_1grow (&obstack_for_string, '\n');*/ *lval.string_val+='\n';
  \\r	/*obstack_1grow (&obstack_for_string, '\r');*/ *lval.string_val+='\r';
  \\t	/*obstack_1grow (&obstack_for_string, '\t');*/ *lval.string_val+='\t';
  \\v	/*obstack_1grow (&obstack_for_string, '\v');*/ *lval.string_val+='\v';

  /* \\[\"\'?\\] would be shorter, but it confuses xgettext.  */
  \\("\""|"'"|"?"|"\\")  /*obstack_1grow (&obstack_for_string, yytext[1]);*/ *lval.string_val+=yytext[1];

  \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
    int c = convert_ucn_to_byte (yytext);
    if (c < 0)
      /*complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext))*/;
    else if (! c)
      /*complain_at (*loc, _("invalid null character: %s"), quote (yytext))*/;
    else
      /*obstack_1grow (&obstack_for_string, c);*/ *lval.string_val+=c;
  }
  \\(.|\n)	{
    /*
    complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
    STRING_GROW;
    */
    *lval.string_val+=yytext;
  }
}


  /*--------------------------------------------.
  | Scanning user-code characters and strings.  |
  `--------------------------------------------*/

<SC_CHARACTER,SC_STRING>
{
  {splice}|\\{splice}[^\n$@\[\]]	/*STRING_GROW;*/
}

<SC_CHARACTER>
{
  "'"		/*STRING_GROW;*/ BEGIN context_state;
  \n		/*unexpected_newline (token_start, "'");*/ BEGIN context_state;
  <<EOF>>	/*unexpected_eof (token_start, "'");*/ BEGIN context_state;
}

<SC_STRING>
{
  "\""		/*STRING_GROW;*/ BEGIN context_state;
  \n		/*unexpected_newline (token_start, "\"");*/ BEGIN context_state;
  <<EOF>>	/*unexpected_eof (token_start, "\"");*/ BEGIN context_state;
}


  /*---------------------------------------------------.
  | Strings, comments etc. can be found in user code.  |
  `---------------------------------------------------*/

<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
{
  "'" {
    /*
    STRING_GROW;
    token_start = loc->start;
    */
    context_state = YY_START;
    BEGIN SC_CHARACTER;
  }
  "\"" {
    /*
    STRING_GROW;
    token_start = loc->start;
    */
    context_state = YY_START;
    BEGIN SC_STRING;
  }
  "/"{splice}"*" {
    /*
    STRING_GROW;
    token_start = loc->start;
    */
    context_state = YY_START;
    BEGIN SC_COMMENT;
  }
  "/"{splice}"/" {
    /*STRING_GROW;*/
    context_state = YY_START;
    BEGIN SC_LINE_COMMENT;
  }
}


  /*---------------------------------------------------------------.
  | Scanning after %union etc., possibly followed by white space.  |
  | For %union only, allow arbitrary C code to appear before the   |
  | following brace, as an extension to POSIX.			   |
  `---------------------------------------------------------------*/

<SC_PRE_CODE>
{
  . {
    /*
    bool valid = yytext[0] == '{' || token_type == PERCENT_UNION;
    scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
    */
    bool valid=1;
    yyless (0);

    if (valid)
      {
	braces_level = -1;
	//code_start = loc->start;
	BEGIN SC_BRACED_CODE;
      }
    else
      {
        /*
	complain_at (*loc, _("missing `{' in `%s'"),
		     token_name (token_type));
	obstack_sgrow (&obstack_for_string, "{}");
	STRING_FINISH;
	val->chars = last_string;
        */
	BEGIN INITIAL;
	return token_type;
      }
  }

  <<EOF>>  /*unexpected_eof (scanner_cursor, "{}");*/ BEGIN INITIAL;
}


  /*---------------------------------------------------------------.
  | Scanning some code in braces (%union and actions). The initial |
  | "{" is already eaten.                                          |
  `---------------------------------------------------------------*/

<SC_BRACED_CODE>
{
  "{"|"<"{splice}"%"  /*STRING_GROW;*/ braces_level++;
  "%"{splice}">"      /*STRING_GROW;*/ braces_level--;
  "}" {
    bool outer_brace = --braces_level < 0;

    /* As an undocumented Bison extension, append `;' before the last
       brace in braced code, so that the user code can omit trailing
       `;'.  But do not append `;' if emulating Yacc, since Yacc does
       not append one.

       FIXME: Bison should warn if a semicolon seems to be necessary
       here, and should omit the semicolon if it seems unnecessary
       (e.g., after ';', '{', or '}', each followed by comments or
       white space).  Such a warning shouldn't depend on --yacc; it
       should depend on a new --pedantic option, which would cause
       Bison to warn if it detects an extension to POSIX.  --pedantic
       should also diagnose other Bison extensions like %yacc.
       Perhaps there should also be a GCC-style --pedantic-errors
       option, so that such warnings are diagnosed as errors.  */

    /*
    if (outer_brace && token_type == BRACED_CODE && ! yacc_flag)
      obstack_1grow (&obstack_for_string, ';');

    obstack_1grow (&obstack_for_string, '}');
    */

    if (outer_brace)
      {
        /*
	STRING_FINISH;
	rule_length++;
	loc->start = code_start;
	val->chars = last_string;
        */
	BEGIN INITIAL;
	return token_type;
      }
  }

  /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
     (as `<' `<%').  */
  "<"{splice}"<"  /*STRING_GROW;*/
  "$"("<"{tag}">")?(-?[0-9]+|"$")  /*handle_dollar (token_type, yytext, *loc);*/
  "@"(-?[0-9]+|"$")		/* handle_at (token_type, yytext, *loc);*/

  <<EOF>>  /*unexpected_eof (code_start, "}");*/ BEGIN INITIAL;
}


  /*--------------------------------------------------------------.
  | Scanning some prologue: from "%{" (already scanned) to "%}".  |
  `--------------------------------------------------------------*/

<SC_PROLOGUE>
{
  "%}" {
    /*
    STRING_FINISH;
    loc->start = code_start;
    val->chars = last_string;
    */
    BEGIN INITIAL;
    return PROLOGUE;
  }

  <<EOF>>  /*unexpected_eof (code_start, "%}");*/ BEGIN INITIAL;
}


  /*---------------------------------------------------------------.
  | Scanning the epilogue (everything after the second "%%", which |
  | has already been eaten).                                       |
  `---------------------------------------------------------------*/

<SC_EPILOGUE>
{
  <<EOF>> {
    /*
    STRING_FINISH;
    loc->start = code_start;
    val->chars = last_string;
    */
    BEGIN INITIAL;
    return EPILOGUE;
  }
}


 /*-----------------------------------------------------.
  | By default, grow the string obstack with the input.  |
  `-----------------------------------------------------*/

<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER>.	|
<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n	/*STRING_GROW;*/


%%

/*------------------------------------------------------------------.
| Convert universal character name UCN to a single-byte character,  |
| and return that character.  Return -1 if UCN does not correspond  |
| to a single-byte character.					    |
`------------------------------------------------------------------*/

static int
convert_ucn_to_byte (char const *ucn)
{
  unsigned long int code = strtoul (ucn + 2, 0, 16);

  /* FIXME: Currently we assume Unicode-compatible unibyte characters
     on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes).  On
     non-ASCII hosts we support only the portable C character set.
     These limitations should be removed once we add support for
     multibyte characters.  */

  if (UCHAR_MAX < code)
    return -1;

#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
  {
    /* A non-ASCII host.  Use CODE to index into a table of the C
       basic execution character set, which is guaranteed to exist on
       all Standard C platforms.  This table also includes '$', '@',
       and '`', which are not in the basic execution character set but
       which are unibyte characters on all the platforms that we know
       about.  */
    static signed char const table[] =
      {
	'\0',   -1,   -1,   -1,   -1,   -1,   -1, '\a',
	'\b', '\t', '\n', '\v', '\f', '\r',   -1,   -1,
	  -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
	  -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
	 ' ',  '!',  '"',  '#',  '$',  '%',  '&', '\'',
	 '(',  ')',  '*',  '+',  ',',  '-',  '.',  '/',
	 '0',  '1',  '2',  '3',  '4',  '5',  '6',  '7',
	 '8',  '9',  ':',  ';',  '<',  '=',  '>',  '?',
	 '@',  'A',  'B',  'C',  'D',  'E',  'F',  'G',
	 'H',  'I',  'J',  'K',  'L',  'M',  'N',  'O',
	 'P',  'Q',  'R',  'S',  'T',  'U',  'V',  'W',
	 'X',  'Y',  'Z',  '[', '\\',  ']',  '^',  '_',
	 '`',  'a',  'b',  'c',  'd',  'e',  'f',  'g',
	 'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',
	 'p',  'q',  'r',  's',  't',  'u',  'v',  'w',
	 'x',  'y',  'z',  '{',  '|',  '}',  '~'
      };

    code = code < sizeof table ? table[code] : -1;
  }
#endif

  return code;
}
Commit	Line	Data
	1	/******************************************************************************
	2	* Copyright (c) 2000-2015 Ericsson Telecom AB
	3	* All rights reserved. This program and the accompanying materials
	4	* are made available under the terms of the Eclipse Public License v1.0
	5	* which accompanies this distribution, and is available at
	6	* http://www.eclipse.org/legal/epl-v10.html
	7	******************************************************************************/
	8
	9	/*
	10	* bison lexical analyzer
	11	*
	12	* Written by Matyas Forstner using bison's scan-gram.l
	13	* 20050908
	14	*/
	15
	16	/* %option debug */
	17	%option nodefault nounput noyywrap never-interactive
	18	%option yylineno
	19	%option prefix="bison_"
	20
	21	%{ /* **************** C declarations *************** */
	22
	23	#include "../compiler2/string.hh"
	24	#include "Symbol.hh"
	25	#include "Rule.hh"
	26	#include "bison_p.tab.hh"
	27	#include <limits.h>
	28	#include <errno.h>
	29
	30	#define lval bison_lval
	31
	32	static int convert_ucn_to_byte (char const *hex_text);
	33
	34	%} /* *************** definitions *************** */
	35
	36	%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
	37	%x SC_STRING SC_CHARACTER
	38	%x SC_AFTER_IDENTIFIER
	39	%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
	40	%x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
	41
	42	letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
	43	id {letter}({letter}\|[0-9])*
	44	directive %{letter}({letter}\|[0-9]\|-)*
	45	int [0-9]+
	46
	47	tag [^\0\n>]+
	48
	49	/* Zero or more instances of backslash-newline. Following GCC, allow
	50	white space between the backslash and the newline. */
	51	splice (\\[ \f\t\v]\n)
	52
	53	%% /* *************** rules *********************** */
	54
	55	%{
	56	/* Nesting level of the current code in braces. */
	57	int braces_level=0;
	58
	59	/* Parent context state, when applicable. */
	60	int context_state=0;
	61
	62	/* Token type to return, when applicable. */
	63	int token_type=0;
	64
	65	%}
	66
	67	/*-----------------------.
	68	\| Scanning white space. \|
	69	`-----------------------*/
	70
	71	<INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>
	72	{
	73	/* Comments and white space. */
	74	"," /warn_at (loc, _("stray `,' treated as white space"));*/
	75	[ \f\n\t\v] \|
	76	"//".* ;
	77	"/*" {
	78	context_state = YY_START;
	79	BEGIN SC_YACC_COMMENT;
	80	}
	81
	82	/* #line directives are not documented, and may be withdrawn or
	83	modified in future versions of Bison. */
	84	^"#line "{int}" \"".*"\"\n" {
	85	;
	86	}
	87	}
	88
	89	/*----------------------------.
	90	\| Scanning Bison directives. \|
	91	`----------------------------*/
	92	<INITIAL>
	93	{
	94	"%binary" return PERCENT_NONASSOC;
	95	"%debug" return PERCENT_DEBUG;
	96	"%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
	97	"%define" return PERCENT_DEFINE;
	98	"%defines" return PERCENT_DEFINES;
	99	"%destructor" token_type = PERCENT_DESTRUCTOR; BEGIN SC_PRE_CODE;
	100	"%dprec" return PERCENT_DPREC;
	101	"%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
	102	"%expect" return PERCENT_EXPECT;
	103	"%expect"[-_]"rr" return PERCENT_EXPECT_RR;
	104	"%file-prefix" return PERCENT_FILE_PREFIX;
	105	"%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
	106	"%initial-action" token_type = PERCENT_INITIAL_ACTION; BEGIN SC_PRE_CODE;
	107	"%glr-parser" return PERCENT_GLR_PARSER;
	108	"%left" return PERCENT_LEFT;
	109	"%lex-param" token_type = PERCENT_LEX_PARAM; BEGIN SC_PRE_CODE;
	110	"%locations" return PERCENT_LOCATIONS;
	111	"%merge" return PERCENT_MERGE;
	112	"%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
	113	"%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
	114	"%no"[-_]"lines" return PERCENT_NO_LINES;
	115	"%nonassoc" return PERCENT_NONASSOC;
	116	"%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
	117	"%nterm" return PERCENT_NTERM;
	118	"%output" return PERCENT_OUTPUT;
	119	"%parse-param" token_type = PERCENT_PARSE_PARAM; BEGIN SC_PRE_CODE;
	120	"%prec" return PERCENT_PREC;
	121	"%printer" token_type = PERCENT_PRINTER; BEGIN SC_PRE_CODE;
	122	"%pure"[-_]"parser" return PERCENT_PURE_PARSER;
	123	"%right" return PERCENT_RIGHT;
	124	"%skeleton" return PERCENT_SKELETON;
	125	"%start" return PERCENT_START;
	126	"%term" return PERCENT_TOKEN;
	127	"%token" return PERCENT_TOKEN;
	128	"%token"[-_]"table" return PERCENT_TOKEN_TABLE;
	129	"%type" return PERCENT_TYPE;
	130	"%union" token_type = PERCENT_UNION; BEGIN SC_PRE_CODE;
	131	"%verbose" return PERCENT_VERBOSE;
	132	"%yacc" return PERCENT_YACC;
	133
	134	{directive} {
	135	/complain_at (loc, _("invalid directive: %s"), quote (yytext));*/
	136	}
	137
	138	"=" return EQUAL;
	139	"\|" return PIPE;
	140	";" return SEMICOLON;
	141
	142	{id} {
	143	//val->symbol = symbol_get (yytext, *loc);
	144	lval.string_val=new string(yytext);
	145	BEGIN SC_AFTER_IDENTIFIER;
	146	}
	147
	148	{int} {
	149	//lval.integer = scan_integer (yytext, 10);
	150	return INT;
	151	}
	152	0[xX][0-9abcdefABCDEF]+ {
	153	//lval.integer = scan_integer (yytext, 16);
	154	return INT;
	155	}
	156
	157	/* Characters. We don't check there is only one. */
	158	"'" /* +\' */ lval.string_val=new string("'"); BEGIN SC_ESCAPED_CHARACTER;
	159
	160	/* Strings. */
	161	"\"" lval.string_val=new string("\""); BEGIN SC_ESCAPED_STRING;
	162
	163	/* Prologue. */
	164	"%{" BEGIN SC_PROLOGUE;
	165
	166	/* Code in between braces. */
	167	"{" {
	168	//STRING_GROW;
	169	token_type = BRACED_CODE;
	170	braces_level = 0;
	171	BEGIN SC_BRACED_CODE;
	172	}
	173
	174	/* A type. */
	175	"<"{tag}">" {
	176	/*
	177	obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
	178	STRING_FINISH;
	179	val->uniqstr = uniqstr_new (last_string);
	180	STRING_FREE;
	181	*/
	182	return TYPE;
	183	}
	184
	185	"%%" {
	186	static int percent_percent_count;
	187	if (++percent_percent_count == 2)
	188	BEGIN SC_EPILOGUE;
	189	return PERCENT_PERCENT;
	190	}
	191
	192	. {
	193	//complain_at (*loc, _("invalid character: %s"), quote (yytext));
	194	}
	195
	196	<<EOF>> {
	197	yyterminate ();
	198	}
	199	}
	200
	201
	202	/*-----------------------------------------------------------------.
	203	\| Scanning after an identifier, checking whether a colon is next. \|
	204	`-----------------------------------------------------------------*/
	205
	206	<SC_AFTER_IDENTIFIER>
	207	{
	208	":" {
	209	BEGIN INITIAL;
	210	return ID_COLON;
	211	}
	212	. {
	213	yyless (0);
	214	BEGIN INITIAL;
	215	return ID;
	216	}
	217	<<EOF>> {
	218	BEGIN INITIAL;
	219	return ID;
	220	}
	221	}
	222
	223
	224
	225	/*---------------------------------------------------------------.
	226	\| Scanning a Yacc comment. The initial `/ *' is already eaten. \|
	227	`---------------------------------------------------------------*/
	228
	229	<SC_YACC_COMMENT>
	230	{
	231	"*/" BEGIN context_state;
	232	.\|\n ;
	233	<<EOF>> /unexpected_eof (token_start, " /");*/ BEGIN context_state;
	234	}
	235
	236
	237	/*------------------------------------------------------------.
	238	\| Scanning a C comment. The initial `/ *' is already eaten. \|
	239	`------------------------------------------------------------*/
	240
	241	<SC_COMMENT>
	242	{
	243	""{splice}"/" /STRING_GROW;*/ BEGIN context_state;
	244	<<EOF>> /unexpected_eof (token_start, " /");*/ BEGIN context_state;
	245	}
	246
	247
	248	/*--------------------------------------------------------------.
	249	\| Scanning a line comment. The initial `//' is already eaten. \|
	250	`--------------------------------------------------------------*/
	251
	252	<SC_LINE_COMMENT>
	253	{
	254	"\n" /STRING_GROW;/ BEGIN context_state;
	255	{splice} /STRING_GROW;/
	256	<<EOF>> BEGIN context_state;
	257	}
	258
	259
	260	/*------------------------------------------------.
	261	\| Scanning a Bison string, including its escapes. \|
	262	\| The initial quote is already eaten. \|
	263	`------------------------------------------------*/
	264
	265	<SC_ESCAPED_STRING>
	266	{
	267	"\"" {
	268	/*
	269	STRING_GROW;
	270	STRING_FINISH;
	271	loc->start = token_start;
	272	val->chars = last_string;
	273	rule_length++;
	274	*/
	275	*lval.string_val+="\"";
	276	BEGIN INITIAL;
	277	return STRING;
	278	}
	279
	280	\n /unexpected_newline (token_start, "\"");/ delete lval.string_val; BEGIN INITIAL;
	281
	282	<<EOF>> /unexpected_eof (token_start, "\"");/ delete lval.string_val; BEGIN INITIAL;
	283
	284	. *lval.string_val+=yytext;
	285
	286	}
	287
	288
	289	/*----------------------------------------------------------.
	290	\| Scanning a Bison character literal, decoding its escapes. \|
	291	\| The initial quote is already eaten. \|
	292	`----------------------------------------------------------*/
	293
	294	<SC_ESCAPED_CHARACTER>
	295	{
	296	"'" {
	297	/*
	298	unsigned char last_string_1;
	299	STRING_GROW;
	300	STRING_FINISH;
	301	loc->start = token_start;
	302	val->symbol = symbol_get (last_string, *loc);
	303	symbol_class_set (val->symbol, token_sym, *loc);
	304	last_string_1 = last_string[1];
	305	symbol_user_token_number_set (val->symbol, last_string_1, *loc);
	306	STRING_FREE;
	307	rule_length++;
	308	*/
	309	*lval.string_val+="'";
	310	BEGIN INITIAL;
	311	return ID;
	312	}
	313
	314	\n /unexpected_newline (token_start, "'");/ delete lval.string_val; BEGIN INITIAL;
	315
	316	<<EOF>> /unexpected_eof (token_start, "'");/ delete lval.string_val; BEGIN INITIAL;
	317
	318	. *lval.string_val+=yytext;
	319	}
	320
	321	<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
	322	{
	323	\0 /complain_at (loc, _("invalid null character"));*/
	324	}
	325
	326
	327	/*----------------------------.
	328	\| Decode escaped characters. \|
	329	`----------------------------*/
	330
	331	<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
	332	{
	333	\\[0-7]{1,3} {
	334	unsigned long int c = strtoul (yytext + 1, 0, 8);
	335	if (UCHAR_MAX < c)
	336	/complain_at (loc, _("invalid escape sequence: %s"), quote (yytext))*/;
	337	else if (! c)
	338	/complain_at (loc, _("invalid null character: %s"), quote (yytext))*/;
	339	else
	340	/obstack_1grow (&obstack_for_string, c);/ *lval.string_val+=c;
	341	}
	342
	343	\\x[0-9abcdefABCDEF]+ {
	344	unsigned long int c;
	345	errno=0;
	346	c = strtoul (yytext + 2, 0, 16);
	347	if (UCHAR_MAX < c \|\| errno)
	348	/complain_at (loc, _("invalid escape sequence: %s"), quote (yytext))*/;
	349	else if (! c)
	350	/complain_at (loc, _("invalid null character: %s"), quote (yytext))*/;
	351	else
	352	/obstack_1grow (&obstack_for_string, c);/ *lval.string_val+=c;
	353	}
	354
	355	\\a /obstack_1grow (&obstack_for_string, '\a');/ *lval.string_val+='\a';
	356	\\b /obstack_1grow (&obstack_for_string, '\b');/ *lval.string_val+='\b';
	357	\\f /obstack_1grow (&obstack_for_string, '\f');/ *lval.string_val+='\f';
	358	\\n /obstack_1grow (&obstack_for_string, '\n');/ *lval.string_val+='\n';
	359	\\r /obstack_1grow (&obstack_for_string, '\r');/ *lval.string_val+='\r';
	360	\\t /obstack_1grow (&obstack_for_string, '\t');/ *lval.string_val+='\t';
	361	\\v /obstack_1grow (&obstack_for_string, '\v');/ *lval.string_val+='\v';
	362
	363	/* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
	364	\\("\""\|"'"\|"?"\|"\\") /obstack_1grow (&obstack_for_string, yytext[1]);/ *lval.string_val+=yytext[1];
	365
	366	\\(u\|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
	367	int c = convert_ucn_to_byte (yytext);
	368	if (c < 0)
	369	/complain_at (loc, _("invalid escape sequence: %s"), quote (yytext))*/;
	370	else if (! c)
	371	/complain_at (loc, _("invalid null character: %s"), quote (yytext))*/;
	372	else
	373	/obstack_1grow (&obstack_for_string, c);/ *lval.string_val+=c;
	374	}
	375	\\(.\|\n) {
	376	/*
	377	complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
	378	STRING_GROW;
	379	*/
	380	*lval.string_val+=yytext;
	381	}
	382	}
	383
	384
	385	/*--------------------------------------------.
	386	\| Scanning user-code characters and strings. \|
	387	`--------------------------------------------*/
	388
	389	<SC_CHARACTER,SC_STRING>
	390	{
	391	{splice}\|\\{splice}[^\n$@\[\]] /STRING_GROW;/
	392	}
	393
	394	<SC_CHARACTER>
	395	{
	396	"'" /STRING_GROW;/ BEGIN context_state;
	397	\n /unexpected_newline (token_start, "'");/ BEGIN context_state;
	398	<<EOF>> /unexpected_eof (token_start, "'");/ BEGIN context_state;
	399	}
	400
	401	<SC_STRING>
	402	{
	403	"\"" /STRING_GROW;/ BEGIN context_state;
	404	\n /unexpected_newline (token_start, "\"");/ BEGIN context_state;
	405	<<EOF>> /unexpected_eof (token_start, "\"");/ BEGIN context_state;
	406	}
	407
	408
	409	/*---------------------------------------------------.
	410	\| Strings, comments etc. can be found in user code. \|
	411	`---------------------------------------------------*/
	412
	413	<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
	414	{
	415	"'" {
	416	/*
	417	STRING_GROW;
	418	token_start = loc->start;
	419	*/
	420	context_state = YY_START;
	421	BEGIN SC_CHARACTER;
	422	}
	423	"\"" {
	424	/*
	425	STRING_GROW;
	426	token_start = loc->start;
	427	*/
	428	context_state = YY_START;
	429	BEGIN SC_STRING;
	430	}
	431	"/"{splice}"*" {
	432	/*
	433	STRING_GROW;
	434	token_start = loc->start;
	435	*/
	436	context_state = YY_START;
	437	BEGIN SC_COMMENT;
	438	}
	439	"/"{splice}"/" {
	440	/STRING_GROW;/
	441	context_state = YY_START;
	442	BEGIN SC_LINE_COMMENT;
	443	}
	444	}
	445
	446
	447	/*---------------------------------------------------------------.
	448	\| Scanning after %union etc., possibly followed by white space. \|
	449	\| For %union only, allow arbitrary C code to appear before the \|
	450	\| following brace, as an extension to POSIX. \|
	451	`---------------------------------------------------------------*/
	452
	453	<SC_PRE_CODE>
	454	{
	455	. {
	456	/*
	457	bool valid = yytext[0] == '{' \|\| token_type == PERCENT_UNION;
	458	scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
	459	*/
	460	bool valid=1;
	461	yyless (0);
	462
	463	if (valid)
	464	{
	465	braces_level = -1;
	466	//code_start = loc->start;
	467	BEGIN SC_BRACED_CODE;
	468	}
	469	else
	470	{
	471	/*
	472	complain_at (*loc, _("missing `{' in `%s'"),
	473	token_name (token_type));
	474	obstack_sgrow (&obstack_for_string, "{}");
	475	STRING_FINISH;
	476	val->chars = last_string;
	477	*/
	478	BEGIN INITIAL;
	479	return token_type;
	480	}
	481	}
	482
	483	<<EOF>> /unexpected_eof (scanner_cursor, "{}");/ BEGIN INITIAL;
	484	}
	485
	486
	487	/*---------------------------------------------------------------.
	488	\| Scanning some code in braces (%union and actions). The initial \|
	489	\| "{" is already eaten. \|
	490	`---------------------------------------------------------------*/
	491
	492	<SC_BRACED_CODE>
	493	{
	494	"{"\|"<"{splice}"%" /STRING_GROW;/ braces_level++;
	495	"%"{splice}">" /STRING_GROW;/ braces_level--;
	496	"}" {
	497	bool outer_brace = --braces_level < 0;
	498
	499	/* As an undocumented Bison extension, append `;' before the last
	500	brace in braced code, so that the user code can omit trailing
	501	`;'. But do not append `;' if emulating Yacc, since Yacc does
	502	not append one.
	503
	504	FIXME: Bison should warn if a semicolon seems to be necessary
	505	here, and should omit the semicolon if it seems unnecessary
	506	(e.g., after ';', '{', or '}', each followed by comments or
	507	white space). Such a warning shouldn't depend on --yacc; it
	508	should depend on a new --pedantic option, which would cause
	509	Bison to warn if it detects an extension to POSIX. --pedantic
	510	should also diagnose other Bison extensions like %yacc.
	511	Perhaps there should also be a GCC-style --pedantic-errors
	512	option, so that such warnings are diagnosed as errors. */
	513
	514	/*
	515	if (outer_brace && token_type == BRACED_CODE && ! yacc_flag)
	516	obstack_1grow (&obstack_for_string, ';');
	517
	518	obstack_1grow (&obstack_for_string, '}');
	519	*/
	520
	521	if (outer_brace)
	522	{
	523	/*
	524	STRING_FINISH;
	525	rule_length++;
	526	loc->start = code_start;
	527	val->chars = last_string;
	528	*/
	529	BEGIN INITIAL;
	530	return token_type;
	531	}
	532	}
	533
	534	/* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
	535	(as `<' `<%'). */
	536	"<"{splice}"<" /STRING_GROW;/
	537	"$"("<"{tag}">")?(-?[0-9]+\|"$") /handle_dollar (token_type, yytext, loc);*/
	538	"@"(-?[0-9]+\|"$") /* handle_at (token_type, yytext, loc);/
	539
	540	<<EOF>> /unexpected_eof (code_start, "}");/ BEGIN INITIAL;
	541	}
	542
	543
	544	/*--------------------------------------------------------------.
	545	\| Scanning some prologue: from "%{" (already scanned) to "%}". \|
	546	`--------------------------------------------------------------*/
	547
	548	<SC_PROLOGUE>
	549	{
	550	"%}" {
	551	/*
	552	STRING_FINISH;
	553	loc->start = code_start;
	554	val->chars = last_string;
	555	*/
	556	BEGIN INITIAL;
	557	return PROLOGUE;
	558	}
	559
	560	<<EOF>> /unexpected_eof (code_start, "%}");/ BEGIN INITIAL;
	561	}
	562
	563
	564	/*---------------------------------------------------------------.
	565	\| Scanning the epilogue (everything after the second "%%", which \|
	566	\| has already been eaten). \|
	567	`---------------------------------------------------------------*/
	568
	569	<SC_EPILOGUE>
	570	{
	571	<<EOF>> {
	572	/*
	573	STRING_FINISH;
	574	loc->start = code_start;
	575	val->chars = last_string;
	576	*/
	577	BEGIN INITIAL;
	578	return EPILOGUE;
	579	}
	580	}
	581
	582
	583	/*-----------------------------------------------------.
	584	\| By default, grow the string obstack with the input. \|
	585	`-----------------------------------------------------*/
	586
	587	<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER>. \|
	588	<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n /STRING_GROW;/
	589
	590
	591	%%
	592
	593	/*------------------------------------------------------------------.
	594	\| Convert universal character name UCN to a single-byte character, \|
	595	\| and return that character. Return -1 if UCN does not correspond \|
	596	\| to a single-byte character. \|
	597	`------------------------------------------------------------------*/
	598
	599	static int
	600	convert_ucn_to_byte (char const *ucn)
	601	{
	602	unsigned long int code = strtoul (ucn + 2, 0, 16);
	603
	604	/* FIXME: Currently we assume Unicode-compatible unibyte characters
	605	on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
	606	non-ASCII hosts we support only the portable C character set.
	607	These limitations should be removed once we add support for
	608	multibyte characters. */
	609
	610	if (UCHAR_MAX < code)
	611	return -1;
	612
	613	#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
	614	{
	615	/* A non-ASCII host. Use CODE to index into a table of the C
	616	basic execution character set, which is guaranteed to exist on
	617	all Standard C platforms. This table also includes '$', '@',
	618	and '`', which are not in the basic execution character set but
	619	which are unibyte characters on all the platforms that we know
	620	about. */
	621	static signed char const table[] =
	622	{
	623	'\0', -1, -1, -1, -1, -1, -1, '\a',
	624	'\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
	625	-1, -1, -1, -1, -1, -1, -1, -1,
	626	-1, -1, -1, -1, -1, -1, -1, -1,
	627	' ', '!', '"', '#', '$', '%', '&', '\'',
	628	'(', ')', '*', '+', ',', '-', '.', '/',
	629	'0', '1', '2', '3', '4', '5', '6', '7',
	630	'8', '9', ':', ';', '<', '=', '>', '?',
	631	'@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
	632	'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
	633	'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
	634	'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
	635	'`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
	636	'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
	637	'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
	638	'x', 'y', 'z', '{', '\|', '}', '~'
	639	};
	640
	641	code = code < sizeof table ? table[code] : -1;
	642	}
	643	#endif
	644
	645	return code;
	646	}
	647