[deliverable/titan.core.git] / langviz / bison_la.l

/******************************************************************************
 * Copyright (c) 2000-2016 Ericsson Telecom AB
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *   Balasko, Jeno
 *   Forstner, Matyas
 *
 ******************************************************************************/
 
/*
 * bison lexical analyzer
 *
 * Written by Matyas Forstner using bison's scan-gram.l
 * 20050908
 */

/* %option debug */
%option nodefault nounput noyywrap never-interactive
%option yylineno
%option prefix="bison_"

%{ /* ****************** C declarations ***************** */

#include "../compiler2/string.hh"
#include "Symbol.hh"
#include "Rule.hh"
#include "bison_p.tab.hh"
#include <limits.h>
#include <errno.h>

#define lval bison_lval 

static int convert_ucn_to_byte (char const *hex_text);

%} /* ***************** definitions ***************** */
 
%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
%x SC_STRING SC_CHARACTER
%x SC_AFTER_IDENTIFIER
%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
%x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE

letter	  [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
id	  {letter}({letter}|[0-9])*
directive %{letter}({letter}|[0-9]|-)*
int	  [0-9]+

tag	 [^\0\n>]+

/* Zero or more instances of backslash-newline.  Following GCC, allow
   white space between the backslash and the newline.  */
splice	 (\\[ \f\t\v]*\n)*

%% /* ***************** rules ************************* */ 

%{
  /* Nesting level of the current code in braces.  */
  int braces_level=0;

  /* Parent context state, when applicable.  */
  int context_state=0;

  /* Token type to return, when applicable.  */
  int token_type=0;

%}

  /*-----------------------.
  | Scanning white space.  |
  `-----------------------*/

<INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>
{
  /* Comments and white space.  */
  ","	       /*warn_at (*loc, _("stray `,' treated as white space"));*/
  [ \f\n\t\v]  |
  "//".*       ;
  "/*" {
    context_state = YY_START;
    BEGIN SC_YACC_COMMENT;
  }

  /* #line directives are not documented, and may be withdrawn or
     modified in future versions of Bison.  */
  ^"#line "{int}" \"".*"\"\n" {
    ;
  }
}

  /*----------------------------.
  | Scanning Bison directives.  |
  `----------------------------*/
<INITIAL>
{
  "%binary"               return PERCENT_NONASSOC;
  "%debug"                return PERCENT_DEBUG;
  "%default"[-_]"prec"    return PERCENT_DEFAULT_PREC;
  "%define"               return PERCENT_DEFINE;
  "%defines"              return PERCENT_DEFINES;
  "%destructor"		  token_type = PERCENT_DESTRUCTOR; BEGIN SC_PRE_CODE;
  "%dprec"		  return PERCENT_DPREC;
  "%error"[-_]"verbose"   return PERCENT_ERROR_VERBOSE;
  "%expect"               return PERCENT_EXPECT;
  "%expect"[-_]"rr"	  return PERCENT_EXPECT_RR;
  "%file-prefix"          return PERCENT_FILE_PREFIX;
  "%fixed"[-_]"output"[-_]"files"   return PERCENT_YACC;
  "%initial-action"       token_type = PERCENT_INITIAL_ACTION; BEGIN SC_PRE_CODE;
  "%glr-parser"           return PERCENT_GLR_PARSER;
  "%left"                 return PERCENT_LEFT;
  "%lex-param"		  token_type = PERCENT_LEX_PARAM; BEGIN SC_PRE_CODE;
  "%locations"            return PERCENT_LOCATIONS;
  "%merge"		  return PERCENT_MERGE;
  "%name"[-_]"prefix"     return PERCENT_NAME_PREFIX;
  "%no"[-_]"default"[-_]"prec"	return PERCENT_NO_DEFAULT_PREC;
  "%no"[-_]"lines"        return PERCENT_NO_LINES;
  "%nonassoc"             return PERCENT_NONASSOC;
  "%nondeterministic-parser"   return PERCENT_NONDETERMINISTIC_PARSER;
  "%nterm"                return PERCENT_NTERM;
  "%output"               return PERCENT_OUTPUT;
  "%parse-param"	  token_type = PERCENT_PARSE_PARAM; BEGIN SC_PRE_CODE;
  "%prec"                 return PERCENT_PREC;
  "%printer"              token_type = PERCENT_PRINTER; BEGIN SC_PRE_CODE;
  "%pure"[-_]"parser"     return PERCENT_PURE_PARSER;
  "%right"                return PERCENT_RIGHT;
  "%skeleton"             return PERCENT_SKELETON;
  "%start"                return PERCENT_START;
  "%term"                 return PERCENT_TOKEN;
  "%token"                return PERCENT_TOKEN;
  "%token"[-_]"table"     return PERCENT_TOKEN_TABLE;
  "%type"                 return PERCENT_TYPE;
  "%union"		  token_type = PERCENT_UNION; BEGIN SC_PRE_CODE;
  "%verbose"              return PERCENT_VERBOSE;
  "%yacc"                 return PERCENT_YACC;

  {directive} {
    /*complain_at (*loc, _("invalid directive: %s"), quote (yytext));*/
  }

  "="                     return EQUAL;
  "|"                     return PIPE;
  ";"                     return SEMICOLON;

  {id} {
    //val->symbol = symbol_get (yytext, *loc);
    lval.string_val=new string(yytext);
    BEGIN SC_AFTER_IDENTIFIER;
  }

  {int} {
    //lval.integer = scan_integer (yytext, 10);
    return INT;
  }
  0[xX][0-9abcdefABCDEF]+ {
    //lval.integer = scan_integer (yytext, 16);
    return INT;
  }

  /* Characters.  We don't check there is only one.  */
  "'"	      /* +\' */ lval.string_val=new string("'"); BEGIN SC_ESCAPED_CHARACTER;

  /* Strings. */
  "\""	      lval.string_val=new string("\""); BEGIN SC_ESCAPED_STRING;

  /* Prologue. */
  "%{"        BEGIN SC_PROLOGUE;

  /* Code in between braces.  */
  "{" {
    //STRING_GROW;
    token_type = BRACED_CODE;
    braces_level = 0;
    BEGIN SC_BRACED_CODE;
  }

  /* A type. */
  "<"{tag}">" {
    /*
    obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
    STRING_FINISH;
    val->uniqstr = uniqstr_new (last_string);
    STRING_FREE;
    */
    return TYPE;
  }

  "%%" {
    static int percent_percent_count;
    if (++percent_percent_count == 2)
      BEGIN SC_EPILOGUE;
    return PERCENT_PERCENT;
  }

  . {
    //complain_at (*loc, _("invalid character: %s"), quote (yytext));
  }

  <<EOF>> {
    yyterminate ();
  }
}


  /*-----------------------------------------------------------------.
  | Scanning after an identifier, checking whether a colon is next.  |
  `-----------------------------------------------------------------*/

<SC_AFTER_IDENTIFIER>
{
  ":" {
    BEGIN INITIAL;
    return ID_COLON;
  }
  . {
    yyless (0);
    BEGIN INITIAL;
    return ID;
  }
  <<EOF>> {
    BEGIN INITIAL;
    return ID;
  }
}


  /*---------------------------------------------------------------.
  | Scanning a Yacc comment.  The initial `/ *' is already eaten.  |
  `---------------------------------------------------------------*/

<SC_YACC_COMMENT>
{
  "*/"     BEGIN context_state;
  .|\n	   ;
  <<EOF>>  /*unexpected_eof (token_start, "* /");*/ BEGIN context_state;
}


  /*------------------------------------------------------------.
  | Scanning a C comment.  The initial `/ *' is already eaten.  |
  `------------------------------------------------------------*/

<SC_COMMENT>
{
  "*"{splice}"/"  /*STRING_GROW;*/ BEGIN context_state;
  <<EOF>>	  /*unexpected_eof (token_start, "* /");*/ BEGIN context_state;
}


  /*--------------------------------------------------------------.
  | Scanning a line comment.  The initial `//' is already eaten.  |
  `--------------------------------------------------------------*/

<SC_LINE_COMMENT>
{
  "\n"		 /*STRING_GROW;*/ BEGIN context_state;
  {splice}	 /*STRING_GROW;*/
  <<EOF>>	 BEGIN context_state;
}


  /*------------------------------------------------.
  | Scanning a Bison string, including its escapes. |
  | The initial quote is already eaten.             |
  `------------------------------------------------*/

<SC_ESCAPED_STRING>
{
  "\"" {
    /*
    STRING_GROW;
    STRING_FINISH;
    loc->start = token_start;
    val->chars = last_string;
    rule_length++;
    */
    *lval.string_val+="\"";
    BEGIN INITIAL;
    return STRING;
  }

  \n		/*unexpected_newline (token_start, "\"");*/ delete lval.string_val; BEGIN INITIAL;

  <<EOF>>	/*unexpected_eof (token_start, "\"");*/ delete lval.string_val; BEGIN INITIAL;

  .     *lval.string_val+=yytext;

}


  /*----------------------------------------------------------.
  | Scanning a Bison character literal, decoding its escapes. |
  | The initial quote is already eaten.			      |
  `----------------------------------------------------------*/

<SC_ESCAPED_CHARACTER>
{
  "'" {
    /*
    unsigned char last_string_1;
    STRING_GROW;
    STRING_FINISH;
    loc->start = token_start;
    val->symbol = symbol_get (last_string, *loc);
    symbol_class_set (val->symbol, token_sym, *loc);
    last_string_1 = last_string[1];
    symbol_user_token_number_set (val->symbol, last_string_1, *loc);
    STRING_FREE;
    rule_length++;
    */
    *lval.string_val+="'";
    BEGIN INITIAL;
    return ID;
  }

  \n		/*unexpected_newline (token_start, "'");*/ delete lval.string_val; BEGIN INITIAL;

  <<EOF>>	/*unexpected_eof (token_start, "'");*/ delete lval.string_val; BEGIN INITIAL;

  .     *lval.string_val+=yytext;
}

<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
{
  \0	    /*complain_at (*loc, _("invalid null character"));*/
}


  /*----------------------------.
  | Decode escaped characters.  |
  `----------------------------*/

<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
{
  \\[0-7]{1,3} {
    unsigned long int c = strtoul (yytext + 1, 0, 8);
    if (UCHAR_MAX < c)
      /*complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext))*/;
    else if (! c) 
      /*complain_at (*loc, _("invalid null character: %s"), quote (yytext))*/;
    else
      /*obstack_1grow (&obstack_for_string, c);*/ *lval.string_val+=c;
  }

  \\x[0-9abcdefABCDEF]+ {
    unsigned long int c;
    errno=0;
    c = strtoul (yytext + 2, 0, 16);
    if (UCHAR_MAX < c || errno)
      /*complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext))*/;
    else if (! c)
      /*complain_at (*loc, _("invalid null character: %s"), quote (yytext))*/;
    else
      /*obstack_1grow (&obstack_for_string, c);*/ *lval.string_val+=c;
  }

  \\a	/*obstack_1grow (&obstack_for_string, '\a');*/ *lval.string_val+='\a';
  \\b	/*obstack_1grow (&obstack_for_string, '\b');*/ *lval.string_val+='\b';
  \\f	/*obstack_1grow (&obstack_for_string, '\f');*/ *lval.string_val+='\f';
  \\n	/*obstack_1grow (&obstack_for_string, '\n');*/ *lval.string_val+='\n';
  \\r	/*obstack_1grow (&obstack_for_string, '\r');*/ *lval.string_val+='\r';
  \\t	/*obstack_1grow (&obstack_for_string, '\t');*/ *lval.string_val+='\t';
  \\v	/*obstack_1grow (&obstack_for_string, '\v');*/ *lval.string_val+='\v';

  /* \\[\"\'?\\] would be shorter, but it confuses xgettext.  */
  \\("\""|"'"|"?"|"\\")  /*obstack_1grow (&obstack_for_string, yytext[1]);*/ *lval.string_val+=yytext[1];

  \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
    int c = convert_ucn_to_byte (yytext);
    if (c < 0)
      /*complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext))*/;
    else if (! c)
      /*complain_at (*loc, _("invalid null character: %s"), quote (yytext))*/;
    else
      /*obstack_1grow (&obstack_for_string, c);*/ *lval.string_val+=c;
  }
  \\(.|\n)	{
    /*
    complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
    STRING_GROW;
    */
    *lval.string_val+=yytext;
  }
}


  /*--------------------------------------------.
  | Scanning user-code characters and strings.  |
  `--------------------------------------------*/

<SC_CHARACTER,SC_STRING>
{
  {splice}|\\{splice}[^\n$@\[\]]	/*STRING_GROW;*/
}

<SC_CHARACTER>
{
  "'"		/*STRING_GROW;*/ BEGIN context_state;
  \n		/*unexpected_newline (token_start, "'");*/ BEGIN context_state;
  <<EOF>>	/*unexpected_eof (token_start, "'");*/ BEGIN context_state;
}

<SC_STRING>
{
  "\""		/*STRING_GROW;*/ BEGIN context_state;
  \n		/*unexpected_newline (token_start, "\"");*/ BEGIN context_state;
  <<EOF>>	/*unexpected_eof (token_start, "\"");*/ BEGIN context_state;
}


  /*---------------------------------------------------.
  | Strings, comments etc. can be found in user code.  |
  `---------------------------------------------------*/

<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
{
  "'" {
    /*
    STRING_GROW;
    token_start = loc->start;
    */
    context_state = YY_START;
    BEGIN SC_CHARACTER;
  }
  "\"" {
    /*
    STRING_GROW;
    token_start = loc->start;
    */
    context_state = YY_START;
    BEGIN SC_STRING;
  }
  "/"{splice}"*" {
    /*
    STRING_GROW;
    token_start = loc->start;
    */
    context_state = YY_START;
    BEGIN SC_COMMENT;
  }
  "/"{splice}"/" {
    /*STRING_GROW;*/
    context_state = YY_START;
    BEGIN SC_LINE_COMMENT;
  }
}


  /*---------------------------------------------------------------.
  | Scanning after %union etc., possibly followed by white space.  |
  | For %union only, allow arbitrary C code to appear before the   |
  | following brace, as an extension to POSIX.			   |
  `---------------------------------------------------------------*/

<SC_PRE_CODE>
{
  . {
    /*
    bool valid = yytext[0] == '{' || token_type == PERCENT_UNION;
    scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
    */
    bool valid=1;
    yyless (0);

    if (valid)
      {
	braces_level = -1;
	//code_start = loc->start;
	BEGIN SC_BRACED_CODE;
      }
    else
      {
        /*
	complain_at (*loc, _("missing `{' in `%s'"),
		     token_name (token_type));
	obstack_sgrow (&obstack_for_string, "{}");
	STRING_FINISH;
	val->chars = last_string;
        */
	BEGIN INITIAL;
	return token_type;
      }
  }

  <<EOF>>  /*unexpected_eof (scanner_cursor, "{}");*/ BEGIN INITIAL;
}


  /*---------------------------------------------------------------.
  | Scanning some code in braces (%union and actions). The initial |
  | "{" is already eaten.                                          |
  `---------------------------------------------------------------*/

<SC_BRACED_CODE>
{
  "{"|"<"{splice}"%"  /*STRING_GROW;*/ braces_level++;
  "%"{splice}">"      /*STRING_GROW;*/ braces_level--;
  "}" {
    bool outer_brace = --braces_level < 0;

    /* As an undocumented Bison extension, append `;' before the last
       brace in braced code, so that the user code can omit trailing
       `;'.  But do not append `;' if emulating Yacc, since Yacc does
       not append one.

       FIXME: Bison should warn if a semicolon seems to be necessary
       here, and should omit the semicolon if it seems unnecessary
       (e.g., after ';', '{', or '}', each followed by comments or
       white space).  Such a warning shouldn't depend on --yacc; it
       should depend on a new --pedantic option, which would cause
       Bison to warn if it detects an extension to POSIX.  --pedantic
       should also diagnose other Bison extensions like %yacc.
       Perhaps there should also be a GCC-style --pedantic-errors
       option, so that such warnings are diagnosed as errors.  */

    /*
    if (outer_brace && token_type == BRACED_CODE && ! yacc_flag)
      obstack_1grow (&obstack_for_string, ';');

    obstack_1grow (&obstack_for_string, '}');
    */

    if (outer_brace)
      {
        /*
	STRING_FINISH;
	rule_length++;
	loc->start = code_start;
	val->chars = last_string;
        */
	BEGIN INITIAL;
	return token_type;
      }
  }

  /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
     (as `<' `<%').  */
  "<"{splice}"<"  /*STRING_GROW;*/
  "$"("<"{tag}">")?(-?[0-9]+|"$")  /*handle_dollar (token_type, yytext, *loc);*/
  "@"(-?[0-9]+|"$")		/* handle_at (token_type, yytext, *loc);*/

  <<EOF>>  /*unexpected_eof (code_start, "}");*/ BEGIN INITIAL;
}


  /*--------------------------------------------------------------.
  | Scanning some prologue: from "%{" (already scanned) to "%}".  |
  `--------------------------------------------------------------*/

<SC_PROLOGUE>
{
  "%}" {
    /*
    STRING_FINISH;
    loc->start = code_start;
    val->chars = last_string;
    */
    BEGIN INITIAL;
    return PROLOGUE;
  }

  <<EOF>>  /*unexpected_eof (code_start, "%}");*/ BEGIN INITIAL;
}


  /*---------------------------------------------------------------.
  | Scanning the epilogue (everything after the second "%%", which |
  | has already been eaten).                                       |
  `---------------------------------------------------------------*/

<SC_EPILOGUE>
{
  <<EOF>> {
    /*
    STRING_FINISH;
    loc->start = code_start;
    val->chars = last_string;
    */
    BEGIN INITIAL;
    return EPILOGUE;
  }
}


 /*-----------------------------------------------------.
  | By default, grow the string obstack with the input.  |
  `-----------------------------------------------------*/

<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER>.	|
<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n	/*STRING_GROW;*/


%%

/*------------------------------------------------------------------.
| Convert universal character name UCN to a single-byte character,  |
| and return that character.  Return -1 if UCN does not correspond  |
| to a single-byte character.					    |
`------------------------------------------------------------------*/

static int
convert_ucn_to_byte (char const *ucn)
{
  unsigned long int code = strtoul (ucn + 2, 0, 16);

  /* FIXME: Currently we assume Unicode-compatible unibyte characters
     on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes).  On
     non-ASCII hosts we support only the portable C character set.
     These limitations should be removed once we add support for
     multibyte characters.  */

  if (UCHAR_MAX < code)
    return -1;

#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
  {
    /* A non-ASCII host.  Use CODE to index into a table of the C
       basic execution character set, which is guaranteed to exist on
       all Standard C platforms.  This table also includes '$', '@',
       and '`', which are not in the basic execution character set but
       which are unibyte characters on all the platforms that we know
       about.  */
    static signed char const table[] =
      {
	'\0',   -1,   -1,   -1,   -1,   -1,   -1, '\a',
	'\b', '\t', '\n', '\v', '\f', '\r',   -1,   -1,
	  -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
	  -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
	 ' ',  '!',  '"',  '#',  '$',  '%',  '&', '\'',
	 '(',  ')',  '*',  '+',  ',',  '-',  '.',  '/',
	 '0',  '1',  '2',  '3',  '4',  '5',  '6',  '7',
	 '8',  '9',  ':',  ';',  '<',  '=',  '>',  '?',
	 '@',  'A',  'B',  'C',  'D',  'E',  'F',  'G',
	 'H',  'I',  'J',  'K',  'L',  'M',  'N',  'O',
	 'P',  'Q',  'R',  'S',  'T',  'U',  'V',  'W',
	 'X',  'Y',  'Z',  '[', '\\',  ']',  '^',  '_',
	 '`',  'a',  'b',  'c',  'd',  'e',  'f',  'g',
	 'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',
	 'p',  'q',  'r',  's',  't',  'u',  'v',  'w',
	 'x',  'y',  'z',  '{',  '|',  '}',  '~'
      };

    code = code < sizeof table ? table[code] : -1;
  }
#endif

  return code;
}
Commit	Line	Data
970ed795	1	/******************************************************************************
d44e3c4f	2	* Copyright (c) 2000-2016 Ericsson Telecom AB
970ed795 EL	3	* All rights reserved. This program and the accompanying materials
	4	* are made available under the terms of the Eclipse Public License v1.0
	5	* which accompanies this distribution, and is available at
	6	* http://www.eclipse.org/legal/epl-v10.html
d44e3c4f	7	*
	8	* Contributors:
	9	* Balasko, Jeno
	10	* Forstner, Matyas
	11	*
970ed795 EL	12	******************************************************************************/
	13
	14	/*
	15	* bison lexical analyzer
	16	*
	17	* Written by Matyas Forstner using bison's scan-gram.l
	18	* 20050908
	19	*/
	20
	21	/* %option debug */
	22	%option nodefault nounput noyywrap never-interactive
	23	%option yylineno
	24	%option prefix="bison_"
	25
	26	%{ /* **************** C declarations *************** */
	27
	28	#include "../compiler2/string.hh"
	29	#include "Symbol.hh"
	30	#include "Rule.hh"
	31	#include "bison_p.tab.hh"
	32	#include <limits.h>
	33	#include <errno.h>
	34
	35	#define lval bison_lval
	36
	37	static int convert_ucn_to_byte (char const *hex_text);
	38
	39	%} /* *************** definitions *************** */
	40
	41	%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
	42	%x SC_STRING SC_CHARACTER
	43	%x SC_AFTER_IDENTIFIER
	44	%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
	45	%x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
	46
	47	letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
	48	id {letter}({letter}\|[0-9])*
	49	directive %{letter}({letter}\|[0-9]\|-)*
	50	int [0-9]+
	51
	52	tag [^\0\n>]+
	53
	54	/* Zero or more instances of backslash-newline. Following GCC, allow
	55	white space between the backslash and the newline. */
	56	splice (\\[ \f\t\v]\n)
	57
	58	%% /* *************** rules *********************** */
	59
	60	%{
	61	/* Nesting level of the current code in braces. */
	62	int braces_level=0;
	63
	64	/* Parent context state, when applicable. */
	65	int context_state=0;
	66
	67	/* Token type to return, when applicable. */
	68	int token_type=0;
	69
	70	%}
	71
	72	/*-----------------------.
	73	\| Scanning white space. \|
	74	`-----------------------*/
	75
76	<INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>
77	{
78	/* Comments and white space. */
79	"," /warn_at (loc, _("stray `,' treated as white space"));*/
80	[ \f\n\t\v] \|
81	"//".* ;
82	"/*" {
83	context_state = YY_START;
84	BEGIN SC_YACC_COMMENT;
85	}
86
87	/* #line directives are not documented, and may be withdrawn or
88	modified in future versions of Bison. */
89	^"#line "{int}" \"".*"\"\n" {
90	;
91	}
92	}
93
94	/*----------------------------.
95	\| Scanning Bison directives. \|
96	`----------------------------*/
97	<INITIAL>
98	{
99	"%binary" return PERCENT_NONASSOC;
100	"%debug" return PERCENT_DEBUG;
101	"%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
102	"%define" return PERCENT_DEFINE;
103	"%defines" return PERCENT_DEFINES;
104	"%destructor" token_type = PERCENT_DESTRUCTOR; BEGIN SC_PRE_CODE;
105	"%dprec" return PERCENT_DPREC;
106	"%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
107	"%expect" return PERCENT_EXPECT;
108	"%expect"[-_]"rr" return PERCENT_EXPECT_RR;
109	"%file-prefix" return PERCENT_FILE_PREFIX;
110	"%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
111	"%initial-action" token_type = PERCENT_INITIAL_ACTION; BEGIN SC_PRE_CODE;
112	"%glr-parser" return PERCENT_GLR_PARSER;
113	"%left" return PERCENT_LEFT;
114	"%lex-param" token_type = PERCENT_LEX_PARAM; BEGIN SC_PRE_CODE;
115	"%locations" return PERCENT_LOCATIONS;
116	"%merge" return PERCENT_MERGE;
117	"%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
118	"%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
119	"%no"[-_]"lines" return PERCENT_NO_LINES;
120	"%nonassoc" return PERCENT_NONASSOC;
121	"%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
122	"%nterm" return PERCENT_NTERM;
123	"%output" return PERCENT_OUTPUT;
124	"%parse-param" token_type = PERCENT_PARSE_PARAM; BEGIN SC_PRE_CODE;
125	"%prec" return PERCENT_PREC;
126	"%printer" token_type = PERCENT_PRINTER; BEGIN SC_PRE_CODE;
127	"%pure"[-_]"parser" return PERCENT_PURE_PARSER;
128	"%right" return PERCENT_RIGHT;
129	"%skeleton" return PERCENT_SKELETON;
130	"%start" return PERCENT_START;
131	"%term" return PERCENT_TOKEN;
132	"%token" return PERCENT_TOKEN;
133	"%token"[-_]"table" return PERCENT_TOKEN_TABLE;
134	"%type" return PERCENT_TYPE;
135	"%union" token_type = PERCENT_UNION; BEGIN SC_PRE_CODE;
136	"%verbose" return PERCENT_VERBOSE;
137	"%yacc" return PERCENT_YACC;
138
139	{directive} {
140	/complain_at (loc, _("invalid directive: %s"), quote (yytext));*/
141	}
142
143	"=" return EQUAL;
144	"\|" return PIPE;
145	";" return SEMICOLON;
146
147	{id} {
148	//val->symbol = symbol_get (yytext, *loc);
149	lval.string_val=new string(yytext);
150	BEGIN SC_AFTER_IDENTIFIER;
151	}
152
153	{int} {
154	//lval.integer = scan_integer (yytext, 10);
155	return INT;
156	}
157	0[xX][0-9abcdefABCDEF]+ {
158	//lval.integer = scan_integer (yytext, 16);
159	return INT;
160	}
161
162	/* Characters. We don't check there is only one. */
163	"'" /* +\' */ lval.string_val=new string("'"); BEGIN SC_ESCAPED_CHARACTER;
164
165	/* Strings. */
166	"\"" lval.string_val=new string("\""); BEGIN SC_ESCAPED_STRING;
167
168	/* Prologue. */
169	"%{" BEGIN SC_PROLOGUE;
170
171	/* Code in between braces. */
172	"{" {
173	//STRING_GROW;
174	token_type = BRACED_CODE;
175	braces_level = 0;
176	BEGIN SC_BRACED_CODE;
177	}
178
179	/* A type. */
180	"<"{tag}">" {
181	/*
182	obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
183	STRING_FINISH;
184	val->uniqstr = uniqstr_new (last_string);
185	STRING_FREE;
186	*/
187	return TYPE;
188	}
189
190	"%%" {
191	static int percent_percent_count;
192	if (++percent_percent_count == 2)
193	BEGIN SC_EPILOGUE;
194	return PERCENT_PERCENT;
195	}
196
197	. {
198	//complain_at (*loc, _("invalid character: %s"), quote (yytext));
199	}
200
201	<<EOF>> {
202	yyterminate ();
203	}
204	}
205
206
207	/*-----------------------------------------------------------------.
208	\| Scanning after an identifier, checking whether a colon is next. \|
209	`-----------------------------------------------------------------*/
210
211	<SC_AFTER_IDENTIFIER>
212	{
213	":" {
214	BEGIN INITIAL;
215	return ID_COLON;
216	}
217	. {
218	yyless (0);
219	BEGIN INITIAL;
220	return ID;
221	}
222	<<EOF>> {
223	BEGIN INITIAL;
224	return ID;
225	}
226	}
227
228
229
230	/*---------------------------------------------------------------.
231	\| Scanning a Yacc comment. The initial `/ *' is already eaten. \|
232	`---------------------------------------------------------------*/
233
234	<SC_YACC_COMMENT>
235	{
236	"*/" BEGIN context_state;
237	.\|\n ;
238	<<EOF>> /unexpected_eof (token_start, " /");*/ BEGIN context_state;
239	}
240
241
242	/*------------------------------------------------------------.
243	\| Scanning a C comment. The initial `/ *' is already eaten. \|
244	`------------------------------------------------------------*/
245
246	<SC_COMMENT>
247	{
248	""{splice}"/" /STRING_GROW;*/ BEGIN context_state;
249	<<EOF>> /unexpected_eof (token_start, " /");*/ BEGIN context_state;
250	}
251
252
253	/*--------------------------------------------------------------.
254	\| Scanning a line comment. The initial `//' is already eaten. \|
255	`--------------------------------------------------------------*/
256
257	<SC_LINE_COMMENT>
258	{
259	"\n" /STRING_GROW;/ BEGIN context_state;
260	{splice} /STRING_GROW;/
261	<<EOF>> BEGIN context_state;
262	}
263
264
265	/*------------------------------------------------.
266	\| Scanning a Bison string, including its escapes. \|
267	\| The initial quote is already eaten. \|
268	`------------------------------------------------*/
269
270	<SC_ESCAPED_STRING>
271	{
272	"\"" {
273	/*
274	STRING_GROW;
275	STRING_FINISH;
276	loc->start = token_start;
277	val->chars = last_string;
278	rule_length++;
279	*/
280	*lval.string_val+="\"";
281	BEGIN INITIAL;
282	return STRING;
283	}
284
285	\n /unexpected_newline (token_start, "\"");/ delete lval.string_val; BEGIN INITIAL;
286
287	<<EOF>> /unexpected_eof (token_start, "\"");/ delete lval.string_val; BEGIN INITIAL;
288
289	. *lval.string_val+=yytext;
290
291	}
292
293
294	/*----------------------------------------------------------.
295	\| Scanning a Bison character literal, decoding its escapes. \|
296	\| The initial quote is already eaten. \|
297	`----------------------------------------------------------*/
298
299	<SC_ESCAPED_CHARACTER>
300	{
301	"'" {
302	/*
303	unsigned char last_string_1;
304	STRING_GROW;
305	STRING_FINISH;
306	loc->start = token_start;
307	val->symbol = symbol_get (last_string, *loc);
308	symbol_class_set (val->symbol, token_sym, *loc);
309	last_string_1 = last_string[1];
310	symbol_user_token_number_set (val->symbol, last_string_1, *loc);
311	STRING_FREE;
312	rule_length++;
313	*/
314	*lval.string_val+="'";
315	BEGIN INITIAL;
316	return ID;
317	}
318
319	\n /unexpected_newline (token_start, "'");/ delete lval.string_val; BEGIN INITIAL;
320
321	<<EOF>> /unexpected_eof (token_start, "'");/ delete lval.string_val; BEGIN INITIAL;
322
323	. *lval.string_val+=yytext;
324	}
325
326	<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
327	{
328	\0 /complain_at (loc, _("invalid null character"));*/
329	}
330
331
332	/*----------------------------.
333	\| Decode escaped characters. \|
334	`----------------------------*/
335
336	<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
337	{
338	\\[0-7]{1,3} {
339	unsigned long int c = strtoul (yytext + 1, 0, 8);
340	if (UCHAR_MAX < c)
341	/complain_at (loc, _("invalid escape sequence: %s"), quote (yytext))*/;
342	else if (! c)
343	/complain_at (loc, _("invalid null character: %s"), quote (yytext))*/;
344	else
345	/obstack_1grow (&obstack_for_string, c);/ *lval.string_val+=c;
346	}
347
348	\\x[0-9abcdefABCDEF]+ {
349	unsigned long int c;
350	errno=0;
351	c = strtoul (yytext + 2, 0, 16);
352	if (UCHAR_MAX < c \|\| errno)
353	/complain_at (loc, _("invalid escape sequence: %s"), quote (yytext))*/;
354	else if (! c)
355	/complain_at (loc, _("invalid null character: %s"), quote (yytext))*/;
356	else
357	/obstack_1grow (&obstack_for_string, c);/ *lval.string_val+=c;
358	}
359
360	\\a /obstack_1grow (&obstack_for_string, '\a');/ *lval.string_val+='\a';
361	\\b /obstack_1grow (&obstack_for_string, '\b');/ *lval.string_val+='\b';
362	\\f /obstack_1grow (&obstack_for_string, '\f');/ *lval.string_val+='\f';
363	\\n /obstack_1grow (&obstack_for_string, '\n');/ *lval.string_val+='\n';
364	\\r /obstack_1grow (&obstack_for_string, '\r');/ *lval.string_val+='\r';
365	\\t /obstack_1grow (&obstack_for_string, '\t');/ *lval.string_val+='\t';
366	\\v /obstack_1grow (&obstack_for_string, '\v');/ *lval.string_val+='\v';
367
368	/* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
369	\\("\""\|"'"\|"?"\|"\\") /obstack_1grow (&obstack_for_string, yytext[1]);/ *lval.string_val+=yytext[1];
370
371	\\(u\|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
372	int c = convert_ucn_to_byte (yytext);
373	if (c < 0)
374	/complain_at (loc, _("invalid escape sequence: %s"), quote (yytext))*/;
375	else if (! c)
376	/complain_at (loc, _("invalid null character: %s"), quote (yytext))*/;
377	else
378	/obstack_1grow (&obstack_for_string, c);/ *lval.string_val+=c;
379	}
380	\\(.\|\n) {
381	/*
382	complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
383	STRING_GROW;
384	*/
385	*lval.string_val+=yytext;
386	}
387	}
388
389
390	/*--------------------------------------------.
391	\| Scanning user-code characters and strings. \|
392	`--------------------------------------------*/
393
394	<SC_CHARACTER,SC_STRING>
395	{
396	{splice}\|\\{splice}[^\n$@\[\]] /STRING_GROW;/
397	}
398
399	<SC_CHARACTER>
400	{
401	"'" /STRING_GROW;/ BEGIN context_state;
402	\n /unexpected_newline (token_start, "'");/ BEGIN context_state;
403	<<EOF>> /unexpected_eof (token_start, "'");/ BEGIN context_state;
404	}
405
406	<SC_STRING>
407	{
408	"\"" /STRING_GROW;/ BEGIN context_state;
409	\n /unexpected_newline (token_start, "\"");/ BEGIN context_state;
410	<<EOF>> /unexpected_eof (token_start, "\"");/ BEGIN context_state;
411	}
412
413
414	/*---------------------------------------------------.
415	\| Strings, comments etc. can be found in user code. \|
416	`---------------------------------------------------*/
417
418	<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
419	{
420	"'" {
421	/*
422	STRING_GROW;
423	token_start = loc->start;
424	*/
425	context_state = YY_START;
426	BEGIN SC_CHARACTER;
427	}
428	"\"" {
429	/*
430	STRING_GROW;
431	token_start = loc->start;
432	*/
433	context_state = YY_START;
434	BEGIN SC_STRING;
435	}
436	"/"{splice}"*" {
437	/*
438	STRING_GROW;
439	token_start = loc->start;
440	*/
441	context_state = YY_START;
442	BEGIN SC_COMMENT;
443	}
444	"/"{splice}"/" {
445	/STRING_GROW;/
446	context_state = YY_START;
447	BEGIN SC_LINE_COMMENT;
448	}
449	}
450
451
452	/*---------------------------------------------------------------.
453	\| Scanning after %union etc., possibly followed by white space. \|
454	\| For %union only, allow arbitrary C code to appear before the \|
455	\| following brace, as an extension to POSIX. \|
456	`---------------------------------------------------------------*/
457
458	<SC_PRE_CODE>
459	{
460	. {
461	/*
462	bool valid = yytext[0] == '{' \|\| token_type == PERCENT_UNION;
463	scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
464	*/
465	bool valid=1;
466	yyless (0);
467
468	if (valid)
469	{
470	braces_level = -1;
471	//code_start = loc->start;
472	BEGIN SC_BRACED_CODE;
473	}
474	else
475	{
476	/*
477	complain_at (*loc, _("missing `{' in `%s'"),
478	token_name (token_type));
479	obstack_sgrow (&obstack_for_string, "{}");
480	STRING_FINISH;
481	val->chars = last_string;
482	*/
483	BEGIN INITIAL;
484	return token_type;
485	}
486	}
487
488	<<EOF>> /unexpected_eof (scanner_cursor, "{}");/ BEGIN INITIAL;
489	}
490
491
492	/*---------------------------------------------------------------.
493	\| Scanning some code in braces (%union and actions). The initial \|
494	\| "{" is already eaten. \|
495	`---------------------------------------------------------------*/
496
497	<SC_BRACED_CODE>
498	{
499	"{"\|"<"{splice}"%" /STRING_GROW;/ braces_level++;
500	"%"{splice}">" /STRING_GROW;/ braces_level--;
501	"}" {
502	bool outer_brace = --braces_level < 0;
503
504	/* As an undocumented Bison extension, append `;' before the last
505	brace in braced code, so that the user code can omit trailing
506	`;'. But do not append `;' if emulating Yacc, since Yacc does
507	not append one.
508
509	FIXME: Bison should warn if a semicolon seems to be necessary
510	here, and should omit the semicolon if it seems unnecessary
511	(e.g., after ';', '{', or '}', each followed by comments or
512	white space). Such a warning shouldn't depend on --yacc; it
513	should depend on a new --pedantic option, which would cause
514	Bison to warn if it detects an extension to POSIX. --pedantic
515	should also diagnose other Bison extensions like %yacc.
516	Perhaps there should also be a GCC-style --pedantic-errors
517	option, so that such warnings are diagnosed as errors. */
518
519	/*
520	if (outer_brace && token_type == BRACED_CODE && ! yacc_flag)
521	obstack_1grow (&obstack_for_string, ';');
522
523	obstack_1grow (&obstack_for_string, '}');
524	*/
525
526	if (outer_brace)
527	{
528	/*
529	STRING_FINISH;
530	rule_length++;
531	loc->start = code_start;
532	val->chars = last_string;
533	*/
534	BEGIN INITIAL;
535	return token_type;
536	}
537	}
538
539	/* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
540	(as `<' `<%'). */
541	"<"{splice}"<" /STRING_GROW;/
542	"$"("<"{tag}">")?(-?[0-9]+\|"$") /handle_dollar (token_type, yytext, loc);*/
543	"@"(-?[0-9]+\|"$") /* handle_at (token_type, yytext, loc);/
544
545	<<EOF>> /unexpected_eof (code_start, "}");/ BEGIN INITIAL;
546	}
547
548
549	/*--------------------------------------------------------------.
550	\| Scanning some prologue: from "%{" (already scanned) to "%}". \|
551	`--------------------------------------------------------------*/
552
553	<SC_PROLOGUE>
554	{
555	"%}" {
556	/*
557	STRING_FINISH;
558	loc->start = code_start;
559	val->chars = last_string;
560	*/
561	BEGIN INITIAL;
562	return PROLOGUE;
563	}
564
565	<<EOF>> /unexpected_eof (code_start, "%}");/ BEGIN INITIAL;
566	}
567
568
569	/*---------------------------------------------------------------.
570	\| Scanning the epilogue (everything after the second "%%", which \|
571	\| has already been eaten). \|
572	`---------------------------------------------------------------*/
573
574	<SC_EPILOGUE>
575	{
576	<<EOF>> {
577	/*
578	STRING_FINISH;
579	loc->start = code_start;
580	val->chars = last_string;
581	*/
582	BEGIN INITIAL;
583	return EPILOGUE;
584	}
585	}
586
587
588	/*-----------------------------------------------------.
589	\| By default, grow the string obstack with the input. \|
590	`-----------------------------------------------------*/
591
592	<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER>. \|
593	<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n /STRING_GROW;/
594
595
596	%%
597
598	/*------------------------------------------------------------------.
599	\| Convert universal character name UCN to a single-byte character, \|
600	\| and return that character. Return -1 if UCN does not correspond \|
601	\| to a single-byte character. \|
602	`------------------------------------------------------------------*/
603
604	static int
605	convert_ucn_to_byte (char const *ucn)
606	{
607	unsigned long int code = strtoul (ucn + 2, 0, 16);
608
609	/* FIXME: Currently we assume Unicode-compatible unibyte characters
610	on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
611	non-ASCII hosts we support only the portable C character set.
612	These limitations should be removed once we add support for
613	multibyte characters. */
614
615	if (UCHAR_MAX < code)
616	return -1;
617
618	#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
619	{
620	/* A non-ASCII host. Use CODE to index into a table of the C
621	basic execution character set, which is guaranteed to exist on
622	all Standard C platforms. This table also includes '$', '@',
623	and '`', which are not in the basic execution character set but
624	which are unibyte characters on all the platforms that we know
625	about. */
626	static signed char const table[] =
627	{
628	'\0', -1, -1, -1, -1, -1, -1, '\a',
629	'\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
630	-1, -1, -1, -1, -1, -1, -1, -1,
631	-1, -1, -1, -1, -1, -1, -1, -1,
632	' ', '!', '"', '#', '$', '%', '&', '\'',
633	'(', ')', '*', '+', ',', '-', '.', '/',
634	'0', '1', '2', '3', '4', '5', '6', '7',
635	'8', '9', ':', ';', '<', '=', '>', '?',
636	'@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
637	'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
638	'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
639	'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
640	'`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
641	'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
642	'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
643	'x', 'y', 'z', '{', '\|', '}', '~'
644	};
645
646	code = code < sizeof table ? table[code] : -1;
647	}
648	#endif
649
650	return code;
651	}
652