[deliverable/titan.core.git] / langviz / bison_la.l

/******************************************************************************
 * Copyright (c) 2000-2015 Ericsson Telecom AB
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 ******************************************************************************/
 
/*
 * bison lexical analyzer
 *
 * Written by Matyas Forstner using bison's scan-gram.l
 * 20050908
 */

/* %option debug */
%option nodefault nounput noyywrap never-interactive
%option yylineno
%option prefix="bison_"

%{ /* ****************** C declarations ***************** */

#include "../compiler2/string.hh"
#include "Symbol.hh"
#include "Rule.hh"
#include "bison_p.tab.hh"
#include <limits.h>
#include <errno.h>

#define lval bison_lval 

static int convert_ucn_to_byte (char const *hex_text);

%} /* ***************** definitions ***************** */
 
%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
%x SC_STRING SC_CHARACTER
%x SC_AFTER_IDENTIFIER
%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
%x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE

letter	  [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
id	  {letter}({letter}|[0-9])*
directive %{letter}({letter}|[0-9]|-)*
int	  [0-9]+

tag	 [^\0\n>]+

/* Zero or more instances of backslash-newline.  Following GCC, allow
   white space between the backslash and the newline.  */
splice	 (\\[ \f\t\v]*\n)*

%% /* ***************** rules ************************* */ 

%{
  /* Nesting level of the current code in braces.  */
  int braces_level=0;

  /* Parent context state, when applicable.  */
  int context_state=0;

  /* Token type to return, when applicable.  */
  int token_type=0;

%}

  /*-----------------------.
  | Scanning white space.  |
  `-----------------------*/

<INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>
{
  /* Comments and white space.  */
  ","	       /*warn_at (*loc, _("stray `,' treated as white space"));*/
  [ \f\n\t\v]  |
  "//".*       ;
  "/*" {
    context_state = YY_START;
    BEGIN SC_YACC_COMMENT;
  }

  /* #line directives are not documented, and may be withdrawn or
     modified in future versions of Bison.  */
  ^"#line "{int}" \"".*"\"\n" {
    ;
  }
}

  /*----------------------------.
  | Scanning Bison directives.  |
  `----------------------------*/
<INITIAL>
{
  "%binary"               return PERCENT_NONASSOC;
  "%debug"                return PERCENT_DEBUG;
  "%default"[-_]"prec"    return PERCENT_DEFAULT_PREC;
  "%define"               return PERCENT_DEFINE;
  "%defines"              return PERCENT_DEFINES;
  "%destructor"		  token_type = PERCENT_DESTRUCTOR; BEGIN SC_PRE_CODE;
  "%dprec"		  return PERCENT_DPREC;
  "%error"[-_]"verbose"   return PERCENT_ERROR_VERBOSE;
  "%expect"               return PERCENT_EXPECT;
  "%expect"[-_]"rr"	  return PERCENT_EXPECT_RR;
  "%file-prefix"          return PERCENT_FILE_PREFIX;
  "%fixed"[-_]"output"[-_]"files"   return PERCENT_YACC;
  "%initial-action"       token_type = PERCENT_INITIAL_ACTION; BEGIN SC_PRE_CODE;
  "%glr-parser"           return PERCENT_GLR_PARSER;
  "%left"                 return PERCENT_LEFT;
  "%lex-param"		  token_type = PERCENT_LEX_PARAM; BEGIN SC_PRE_CODE;
  "%locations"            return PERCENT_LOCATIONS;
  "%merge"		  return PERCENT_MERGE;
  "%name"[-_]"prefix"     return PERCENT_NAME_PREFIX;
  "%no"[-_]"default"[-_]"prec"	return PERCENT_NO_DEFAULT_PREC;
  "%no"[-_]"lines"        return PERCENT_NO_LINES;
  "%nonassoc"             return PERCENT_NONASSOC;
  "%nondeterministic-parser"   return PERCENT_NONDETERMINISTIC_PARSER;
  "%nterm"                return PERCENT_NTERM;
  "%output"               return PERCENT_OUTPUT;
  "%parse-param"	  token_type = PERCENT_PARSE_PARAM; BEGIN SC_PRE_CODE;
  "%prec"                 return PERCENT_PREC;
  "%printer"              token_type = PERCENT_PRINTER; BEGIN SC_PRE_CODE;
  "%pure"[-_]"parser"     return PERCENT_PURE_PARSER;
  "%right"                return PERCENT_RIGHT;
  "%skeleton"             return PERCENT_SKELETON;
  "%start"                return PERCENT_START;
  "%term"                 return PERCENT_TOKEN;
  "%token"                return PERCENT_TOKEN;
  "%token"[-_]"table"     return PERCENT_TOKEN_TABLE;
  "%type"                 return PERCENT_TYPE;
  "%union"		  token_type = PERCENT_UNION; BEGIN SC_PRE_CODE;
  "%verbose"              return PERCENT_VERBOSE;
  "%yacc"                 return PERCENT_YACC;

  {directive} {
    /*complain_at (*loc, _("invalid directive: %s"), quote (yytext));*/
  }

  "="                     return EQUAL;
  "|"                     return PIPE;
  ";"                     return SEMICOLON;

  {id} {
    //val->symbol = symbol_get (yytext, *loc);
    lval.string_val=new string(yytext);
    BEGIN SC_AFTER_IDENTIFIER;
  }

  {int} {
    //lval.integer = scan_integer (yytext, 10);
    return INT;
  }
  0[xX][0-9abcdefABCDEF]+ {
    //lval.integer = scan_integer (yytext, 16);
    return INT;
  }

  /* Characters.  We don't check there is only one.  */
  "'"	      /* +\' */ lval.string_val=new string("'"); BEGIN SC_ESCAPED_CHARACTER;

  /* Strings. */
  "\""	      lval.string_val=new string("\""); BEGIN SC_ESCAPED_STRING;

  /* Prologue. */
  "%{"        BEGIN SC_PROLOGUE;

  /* Code in between braces.  */
  "{" {
    //STRING_GROW;
    token_type = BRACED_CODE;
    braces_level = 0;
    BEGIN SC_BRACED_CODE;
  }

  /* A type. */
  "<"{tag}">" {
    /*
    obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
    STRING_FINISH;
    val->uniqstr = uniqstr_new (last_string);
    STRING_FREE;
    */
    return TYPE;
  }

  "%%" {
    static int percent_percent_count;
    if (++percent_percent_count == 2)
      BEGIN SC_EPILOGUE;
    return PERCENT_PERCENT;
  }

  . {
    //complain_at (*loc, _("invalid character: %s"), quote (yytext));
  }

  <<EOF>> {
    yyterminate ();
  }
}


  /*-----------------------------------------------------------------.
  | Scanning after an identifier, checking whether a colon is next.  |
  `-----------------------------------------------------------------*/

<SC_AFTER_IDENTIFIER>
{
  ":" {
    BEGIN INITIAL;
    return ID_COLON;
  }
  . {
    yyless (0);
    BEGIN INITIAL;
    return ID;
  }
  <<EOF>> {
    BEGIN INITIAL;
    return ID;
  }
}


  /*---------------------------------------------------------------.
  | Scanning a Yacc comment.  The initial `/ *' is already eaten.  |
  `---------------------------------------------------------------*/

<SC_YACC_COMMENT>
{
  "*/"     BEGIN context_state;
  .|\n	   ;
  <<EOF>>  /*unexpected_eof (token_start, "* /");*/ BEGIN context_state;
}


  /*------------------------------------------------------------.
  | Scanning a C comment.  The initial `/ *' is already eaten.  |
  `------------------------------------------------------------*/

<SC_COMMENT>
{
  "*"{splice}"/"  /*STRING_GROW;*/ BEGIN context_state;
  <<EOF>>	  /*unexpected_eof (token_start, "* /");*/ BEGIN context_state;
}


  /*--------------------------------------------------------------.
  | Scanning a line comment.  The initial `//' is already eaten.  |
  `--------------------------------------------------------------*/

<SC_LINE_COMMENT>
{
  "\n"		 /*STRING_GROW;*/ BEGIN context_state;
  {splice}	 /*STRING_GROW;*/
  <<EOF>>	 BEGIN context_state;
}


  /*------------------------------------------------.
  | Scanning a Bison string, including its escapes. |
  | The initial quote is already eaten.             |
  `------------------------------------------------*/

<SC_ESCAPED_STRING>
{
  "\"" {
    /*
    STRING_GROW;
    STRING_FINISH;
    loc->start = token_start;
    val->chars = last_string;
    rule_length++;
    */
    *lval.string_val+="\"";
    BEGIN INITIAL;
    return STRING;
  }

  \n		/*unexpected_newline (token_start, "\"");*/ delete lval.string_val; BEGIN INITIAL;

  <<EOF>>	/*unexpected_eof (token_start, "\"");*/ delete lval.string_val; BEGIN INITIAL;

  .     *lval.string_val+=yytext;

}


  /*----------------------------------------------------------.
  | Scanning a Bison character literal, decoding its escapes. |
  | The initial quote is already eaten.			      |
  `----------------------------------------------------------*/

<SC_ESCAPED_CHARACTER>
{
  "'" {
    /*
    unsigned char last_string_1;
    STRING_GROW;
    STRING_FINISH;
    loc->start = token_start;
    val->symbol = symbol_get (last_string, *loc);
    symbol_class_set (val->symbol, token_sym, *loc);
    last_string_1 = last_string[1];
    symbol_user_token_number_set (val->symbol, last_string_1, *loc);
    STRING_FREE;
    rule_length++;
    */
    *lval.string_val+="'";
    BEGIN INITIAL;
    return ID;
  }

  \n		/*unexpected_newline (token_start, "'");*/ delete lval.string_val; BEGIN INITIAL;

  <<EOF>>	/*unexpected_eof (token_start, "'");*/ delete lval.string_val; BEGIN INITIAL;

  .     *lval.string_val+=yytext;
}

<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
{
  \0	    /*complain_at (*loc, _("invalid null character"));*/
}


  /*----------------------------.
  | Decode escaped characters.  |
  `----------------------------*/

<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
{
  \\[0-7]{1,3} {
    unsigned long int c = strtoul (yytext + 1, 0, 8);
    if (UCHAR_MAX < c)
      /*complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext))*/;
    else if (! c) 
      /*complain_at (*loc, _("invalid null character: %s"), quote (yytext))*/;
    else
      /*obstack_1grow (&obstack_for_string, c);*/ *lval.string_val+=c;
  }

  \\x[0-9abcdefABCDEF]+ {
    unsigned long int c;
    errno=0;
    c = strtoul (yytext + 2, 0, 16);
    if (UCHAR_MAX < c || errno)
      /*complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext))*/;
    else if (! c)
      /*complain_at (*loc, _("invalid null character: %s"), quote (yytext))*/;
    else
      /*obstack_1grow (&obstack_for_string, c);*/ *lval.string_val+=c;
  }

  \\a	/*obstack_1grow (&obstack_for_string, '\a');*/ *lval.string_val+='\a';
  \\b	/*obstack_1grow (&obstack_for_string, '\b');*/ *lval.string_val+='\b';
  \\f	/*obstack_1grow (&obstack_for_string, '\f');*/ *lval.string_val+='\f';
  \\n	/*obstack_1grow (&obstack_for_string, '\n');*/ *lval.string_val+='\n';
  \\r	/*obstack_1grow (&obstack_for_string, '\r');*/ *lval.string_val+='\r';
  \\t	/*obstack_1grow (&obstack_for_string, '\t');*/ *lval.string_val+='\t';
  \\v	/*obstack_1grow (&obstack_for_string, '\v');*/ *lval.string_val+='\v';

  /* \\[\"\'?\\] would be shorter, but it confuses xgettext.  */
  \\("\""|"'"|"?"|"\\")  /*obstack_1grow (&obstack_for_string, yytext[1]);*/ *lval.string_val+=yytext[1];

  \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
    int c = convert_ucn_to_byte (yytext);
    if (c < 0)
      /*complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext))*/;
    else if (! c)
      /*complain_at (*loc, _("invalid null character: %s"), quote (yytext))*/;
    else
      /*obstack_1grow (&obstack_for_string, c);*/ *lval.string_val+=c;
  }
  \\(.|\n)	{
    /*
    complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
    STRING_GROW;
    */
    *lval.string_val+=yytext;
  }
}


  /*--------------------------------------------.
  | Scanning user-code characters and strings.  |
  `--------------------------------------------*/

<SC_CHARACTER,SC_STRING>
{
  {splice}|\\{splice}[^\n$@\[\]]	/*STRING_GROW;*/
}

<SC_CHARACTER>
{
  "'"		/*STRING_GROW;*/ BEGIN context_state;
  \n		/*unexpected_newline (token_start, "'");*/ BEGIN context_state;
  <<EOF>>	/*unexpected_eof (token_start, "'");*/ BEGIN context_state;
}

<SC_STRING>
{
  "\""		/*STRING_GROW;*/ BEGIN context_state;
  \n		/*unexpected_newline (token_start, "\"");*/ BEGIN context_state;
  <<EOF>>	/*unexpected_eof (token_start, "\"");*/ BEGIN context_state;
}


  /*---------------------------------------------------.
  | Strings, comments etc. can be found in user code.  |
  `---------------------------------------------------*/

<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
{
  "'" {
    /*
    STRING_GROW;
    token_start = loc->start;
    */
    context_state = YY_START;
    BEGIN SC_CHARACTER;
  }
  "\"" {
    /*
    STRING_GROW;
    token_start = loc->start;
    */
    context_state = YY_START;
    BEGIN SC_STRING;
  }
  "/"{splice}"*" {
    /*
    STRING_GROW;
    token_start = loc->start;
    */
    context_state = YY_START;
    BEGIN SC_COMMENT;
  }
  "/"{splice}"/" {
    /*STRING_GROW;*/
    context_state = YY_START;
    BEGIN SC_LINE_COMMENT;
  }
}


  /*---------------------------------------------------------------.
  | Scanning after %union etc., possibly followed by white space.  |
  | For %union only, allow arbitrary C code to appear before the   |
  | following brace, as an extension to POSIX.			   |
  `---------------------------------------------------------------*/

<SC_PRE_CODE>
{
  . {
    /*
    bool valid = yytext[0] == '{' || token_type == PERCENT_UNION;
    scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
    */
    bool valid=1;
    yyless (0);

    if (valid)
      {
	braces_level = -1;
	//code_start = loc->start;
	BEGIN SC_BRACED_CODE;
      }
    else
      {
        /*
	complain_at (*loc, _("missing `{' in `%s'"),
		     token_name (token_type));
	obstack_sgrow (&obstack_for_string, "{}");
	STRING_FINISH;
	val->chars = last_string;
        */
	BEGIN INITIAL;
	return token_type;
      }
  }

  <<EOF>>  /*unexpected_eof (scanner_cursor, "{}");*/ BEGIN INITIAL;
}


  /*---------------------------------------------------------------.
  | Scanning some code in braces (%union and actions). The initial |
  | "{" is already eaten.                                          |
  `---------------------------------------------------------------*/

<SC_BRACED_CODE>
{
  "{"|"<"{splice}"%"  /*STRING_GROW;*/ braces_level++;
  "%"{splice}">"      /*STRING_GROW;*/ braces_level--;
  "}" {
    bool outer_brace = --braces_level < 0;

    /* As an undocumented Bison extension, append `;' before the last
       brace in braced code, so that the user code can omit trailing
       `;'.  But do not append `;' if emulating Yacc, since Yacc does
       not append one.

       FIXME: Bison should warn if a semicolon seems to be necessary
       here, and should omit the semicolon if it seems unnecessary
       (e.g., after ';', '{', or '}', each followed by comments or
       white space).  Such a warning shouldn't depend on --yacc; it
       should depend on a new --pedantic option, which would cause
       Bison to warn if it detects an extension to POSIX.  --pedantic
       should also diagnose other Bison extensions like %yacc.
       Perhaps there should also be a GCC-style --pedantic-errors
       option, so that such warnings are diagnosed as errors.  */

    /*
    if (outer_brace && token_type == BRACED_CODE && ! yacc_flag)
      obstack_1grow (&obstack_for_string, ';');

    obstack_1grow (&obstack_for_string, '}');
    */

    if (outer_brace)
      {
        /*
	STRING_FINISH;
	rule_length++;
	loc->start = code_start;
	val->chars = last_string;
        */
	BEGIN INITIAL;
	return token_type;
      }
  }

  /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
     (as `<' `<%').  */
  "<"{splice}"<"  /*STRING_GROW;*/
  "$"("<"{tag}">")?(-?[0-9]+|"$")  /*handle_dollar (token_type, yytext, *loc);*/
  "@"(-?[0-9]+|"$")		/* handle_at (token_type, yytext, *loc);*/

  <<EOF>>  /*unexpected_eof (code_start, "}");*/ BEGIN INITIAL;
}


  /*--------------------------------------------------------------.
  | Scanning some prologue: from "%{" (already scanned) to "%}".  |
  `--------------------------------------------------------------*/

<SC_PROLOGUE>
{
  "%}" {
    /*
    STRING_FINISH;
    loc->start = code_start;
    val->chars = last_string;
    */
    BEGIN INITIAL;
    return PROLOGUE;
  }

  <<EOF>>  /*unexpected_eof (code_start, "%}");*/ BEGIN INITIAL;
}


  /*---------------------------------------------------------------.
  | Scanning the epilogue (everything after the second "%%", which |
  | has already been eaten).                                       |
  `---------------------------------------------------------------*/

<SC_EPILOGUE>
{
  <<EOF>> {
    /*
    STRING_FINISH;
    loc->start = code_start;
    val->chars = last_string;
    */
    BEGIN INITIAL;
    return EPILOGUE;
  }
}


 /*-----------------------------------------------------.
  | By default, grow the string obstack with the input.  |
  `-----------------------------------------------------*/

<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER>.	|
<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n	/*STRING_GROW;*/


%%

/*------------------------------------------------------------------.
| Convert universal character name UCN to a single-byte character,  |
| and return that character.  Return -1 if UCN does not correspond  |
| to a single-byte character.					    |
`------------------------------------------------------------------*/

static int
convert_ucn_to_byte (char const *ucn)
{
  unsigned long int code = strtoul (ucn + 2, 0, 16);

  /* FIXME: Currently we assume Unicode-compatible unibyte characters
     on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes).  On
     non-ASCII hosts we support only the portable C character set.
     These limitations should be removed once we add support for
     multibyte characters.  */

  if (UCHAR_MAX < code)
    return -1;

#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
  {
    /* A non-ASCII host.  Use CODE to index into a table of the C
       basic execution character set, which is guaranteed to exist on
       all Standard C platforms.  This table also includes '$', '@',
       and '`', which are not in the basic execution character set but
       which are unibyte characters on all the platforms that we know
       about.  */
    static signed char const table[] =
      {
	'\0',   -1,   -1,   -1,   -1,   -1,   -1, '\a',
	'\b', '\t', '\n', '\v', '\f', '\r',   -1,   -1,
	  -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
	  -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
	 ' ',  '!',  '"',  '#',  '$',  '%',  '&', '\'',
	 '(',  ')',  '*',  '+',  ',',  '-',  '.',  '/',
	 '0',  '1',  '2',  '3',  '4',  '5',  '6',  '7',
	 '8',  '9',  ':',  ';',  '<',  '=',  '>',  '?',
	 '@',  'A',  'B',  'C',  'D',  'E',  'F',  'G',
	 'H',  'I',  'J',  'K',  'L',  'M',  'N',  'O',
	 'P',  'Q',  'R',  'S',  'T',  'U',  'V',  'W',
	 'X',  'Y',  'Z',  '[', '\\',  ']',  '^',  '_',
	 '`',  'a',  'b',  'c',  'd',  'e',  'f',  'g',
	 'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',
	 'p',  'q',  'r',  's',  't',  'u',  'v',  'w',
	 'x',  'y',  'z',  '{',  '|',  '}',  '~'
      };

    code = code < sizeof table ? table[code] : -1;
  }
#endif

  return code;
}
Commit	Line	Data
970ed795	1	/******************************************************************************
3abe9331	2	* Copyright (c) 2000-2015 Ericsson Telecom AB
970ed795 EL	3	* All rights reserved. This program and the accompanying materials
	4	* are made available under the terms of the Eclipse Public License v1.0
	5	* which accompanies this distribution, and is available at
	6	* http://www.eclipse.org/legal/epl-v10.html
	7	******************************************************************************/
	8
	9	/*
	10	* bison lexical analyzer
	11	*
	12	* Written by Matyas Forstner using bison's scan-gram.l
	13	* 20050908
	14	*/
	15
	16	/* %option debug */
	17	%option nodefault nounput noyywrap never-interactive
	18	%option yylineno
	19	%option prefix="bison_"
	20
	21	%{ /* **************** C declarations *************** */
	22
	23	#include "../compiler2/string.hh"
	24	#include "Symbol.hh"
	25	#include "Rule.hh"
	26	#include "bison_p.tab.hh"
	27	#include <limits.h>
	28	#include <errno.h>
	29
	30	#define lval bison_lval
	31
	32	static int convert_ucn_to_byte (char const *hex_text);
	33
	34	%} /* *************** definitions *************** */
	35
	36	%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
	37	%x SC_STRING SC_CHARACTER
	38	%x SC_AFTER_IDENTIFIER
	39	%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
	40	%x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
	41
	42	letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
	43	id {letter}({letter}\|[0-9])*
	44	directive %{letter}({letter}\|[0-9]\|-)*
	45	int [0-9]+
	46
	47	tag [^\0\n>]+
	48
	49	/* Zero or more instances of backslash-newline. Following GCC, allow
	50	white space between the backslash and the newline. */
	51	splice (\\[ \f\t\v]\n)
	52
	53	%% /* *************** rules *********************** */
	54
	55	%{
	56	/* Nesting level of the current code in braces. */
	57	int braces_level=0;
	58
	59	/* Parent context state, when applicable. */
	60	int context_state=0;
	61
	62	/* Token type to return, when applicable. */
	63	int token_type=0;
	64
	65	%}
	66
67	/*-----------------------.
68	\| Scanning white space. \|
69	`-----------------------*/
70
71	<INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>
72	{
73	/* Comments and white space. */
74	"," /warn_at (loc, _("stray `,' treated as white space"));*/
75	[ \f\n\t\v] \|
76	"//".* ;
77	"/*" {
78	context_state = YY_START;
79	BEGIN SC_YACC_COMMENT;
80	}
81
82	/* #line directives are not documented, and may be withdrawn or
83	modified in future versions of Bison. */
84	^"#line "{int}" \"".*"\"\n" {
85	;
86	}
87	}
88
89	/*----------------------------.
90	\| Scanning Bison directives. \|
91	`----------------------------*/
92	<INITIAL>
93	{
94	"%binary" return PERCENT_NONASSOC;
95	"%debug" return PERCENT_DEBUG;
96	"%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
97	"%define" return PERCENT_DEFINE;
98	"%defines" return PERCENT_DEFINES;
99	"%destructor" token_type = PERCENT_DESTRUCTOR; BEGIN SC_PRE_CODE;
100	"%dprec" return PERCENT_DPREC;
101	"%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
102	"%expect" return PERCENT_EXPECT;
103	"%expect"[-_]"rr" return PERCENT_EXPECT_RR;
104	"%file-prefix" return PERCENT_FILE_PREFIX;
105	"%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
106	"%initial-action" token_type = PERCENT_INITIAL_ACTION; BEGIN SC_PRE_CODE;
107	"%glr-parser" return PERCENT_GLR_PARSER;
108	"%left" return PERCENT_LEFT;
109	"%lex-param" token_type = PERCENT_LEX_PARAM; BEGIN SC_PRE_CODE;
110	"%locations" return PERCENT_LOCATIONS;
111	"%merge" return PERCENT_MERGE;
112	"%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
113	"%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
114	"%no"[-_]"lines" return PERCENT_NO_LINES;
115	"%nonassoc" return PERCENT_NONASSOC;
116	"%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
117	"%nterm" return PERCENT_NTERM;
118	"%output" return PERCENT_OUTPUT;
119	"%parse-param" token_type = PERCENT_PARSE_PARAM; BEGIN SC_PRE_CODE;
120	"%prec" return PERCENT_PREC;
121	"%printer" token_type = PERCENT_PRINTER; BEGIN SC_PRE_CODE;
122	"%pure"[-_]"parser" return PERCENT_PURE_PARSER;
123	"%right" return PERCENT_RIGHT;
124	"%skeleton" return PERCENT_SKELETON;
125	"%start" return PERCENT_START;
126	"%term" return PERCENT_TOKEN;
127	"%token" return PERCENT_TOKEN;
128	"%token"[-_]"table" return PERCENT_TOKEN_TABLE;
129	"%type" return PERCENT_TYPE;
130	"%union" token_type = PERCENT_UNION; BEGIN SC_PRE_CODE;
131	"%verbose" return PERCENT_VERBOSE;
132	"%yacc" return PERCENT_YACC;
133
134	{directive} {
135	/complain_at (loc, _("invalid directive: %s"), quote (yytext));*/
136	}
137
138	"=" return EQUAL;
139	"\|" return PIPE;
140	";" return SEMICOLON;
141
142	{id} {
143	//val->symbol = symbol_get (yytext, *loc);
144	lval.string_val=new string(yytext);
145	BEGIN SC_AFTER_IDENTIFIER;
146	}
147
148	{int} {
149	//lval.integer = scan_integer (yytext, 10);
150	return INT;
151	}
152	0[xX][0-9abcdefABCDEF]+ {
153	//lval.integer = scan_integer (yytext, 16);
154	return INT;
155	}
156
157	/* Characters. We don't check there is only one. */
158	"'" /* +\' */ lval.string_val=new string("'"); BEGIN SC_ESCAPED_CHARACTER;
159
160	/* Strings. */
161	"\"" lval.string_val=new string("\""); BEGIN SC_ESCAPED_STRING;
162
163	/* Prologue. */
164	"%{" BEGIN SC_PROLOGUE;
165
166	/* Code in between braces. */
167	"{" {
168	//STRING_GROW;
169	token_type = BRACED_CODE;
170	braces_level = 0;
171	BEGIN SC_BRACED_CODE;
172	}
173
174	/* A type. */
175	"<"{tag}">" {
176	/*
177	obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
178	STRING_FINISH;
179	val->uniqstr = uniqstr_new (last_string);
180	STRING_FREE;
181	*/
182	return TYPE;
183	}
184
185	"%%" {
186	static int percent_percent_count;
187	if (++percent_percent_count == 2)
188	BEGIN SC_EPILOGUE;
189	return PERCENT_PERCENT;
190	}
191
192	. {
193	//complain_at (*loc, _("invalid character: %s"), quote (yytext));
194	}
195
196	<<EOF>> {
197	yyterminate ();
198	}
199	}
200
201
202	/*-----------------------------------------------------------------.
203	\| Scanning after an identifier, checking whether a colon is next. \|
204	`-----------------------------------------------------------------*/
205
206	<SC_AFTER_IDENTIFIER>
207	{
208	":" {
209	BEGIN INITIAL;
210	return ID_COLON;
211	}
212	. {
213	yyless (0);
214	BEGIN INITIAL;
215	return ID;
216	}
217	<<EOF>> {
218	BEGIN INITIAL;
219	return ID;
220	}
221	}
222
223
224
225	/*---------------------------------------------------------------.
226	\| Scanning a Yacc comment. The initial `/ *' is already eaten. \|
227	`---------------------------------------------------------------*/
228
229	<SC_YACC_COMMENT>
230	{
231	"*/" BEGIN context_state;
232	.\|\n ;
233	<<EOF>> /unexpected_eof (token_start, " /");*/ BEGIN context_state;
234	}
235
236
237	/*------------------------------------------------------------.
238	\| Scanning a C comment. The initial `/ *' is already eaten. \|
239	`------------------------------------------------------------*/
240
241	<SC_COMMENT>
242	{
243	""{splice}"/" /STRING_GROW;*/ BEGIN context_state;
244	<<EOF>> /unexpected_eof (token_start, " /");*/ BEGIN context_state;
245	}
246
247
248	/*--------------------------------------------------------------.
249	\| Scanning a line comment. The initial `//' is already eaten. \|
250	`--------------------------------------------------------------*/
251
252	<SC_LINE_COMMENT>
253	{
254	"\n" /STRING_GROW;/ BEGIN context_state;
255	{splice} /STRING_GROW;/
256	<<EOF>> BEGIN context_state;
257	}
258
259
260	/*------------------------------------------------.
261	\| Scanning a Bison string, including its escapes. \|
262	\| The initial quote is already eaten. \|
263	`------------------------------------------------*/
264
265	<SC_ESCAPED_STRING>
266	{
267	"\"" {
268	/*
269	STRING_GROW;
270	STRING_FINISH;
271	loc->start = token_start;
272	val->chars = last_string;
273	rule_length++;
274	*/
275	*lval.string_val+="\"";
276	BEGIN INITIAL;
277	return STRING;
278	}
279
280	\n /unexpected_newline (token_start, "\"");/ delete lval.string_val; BEGIN INITIAL;
281
282	<<EOF>> /unexpected_eof (token_start, "\"");/ delete lval.string_val; BEGIN INITIAL;
283
284	. *lval.string_val+=yytext;
285
286	}
287
288
289	/*----------------------------------------------------------.
290	\| Scanning a Bison character literal, decoding its escapes. \|
291	\| The initial quote is already eaten. \|
292	`----------------------------------------------------------*/
293
294	<SC_ESCAPED_CHARACTER>
295	{
296	"'" {
297	/*
298	unsigned char last_string_1;
299	STRING_GROW;
300	STRING_FINISH;
301	loc->start = token_start;
302	val->symbol = symbol_get (last_string, *loc);
303	symbol_class_set (val->symbol, token_sym, *loc);
304	last_string_1 = last_string[1];
305	symbol_user_token_number_set (val->symbol, last_string_1, *loc);
306	STRING_FREE;
307	rule_length++;
308	*/
309	*lval.string_val+="'";
310	BEGIN INITIAL;
311	return ID;
312	}
313
314	\n /unexpected_newline (token_start, "'");/ delete lval.string_val; BEGIN INITIAL;
315
316	<<EOF>> /unexpected_eof (token_start, "'");/ delete lval.string_val; BEGIN INITIAL;
317
318	. *lval.string_val+=yytext;
319	}
320
321	<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
322	{
323	\0 /complain_at (loc, _("invalid null character"));*/
324	}
325
326
327	/*----------------------------.
328	\| Decode escaped characters. \|
329	`----------------------------*/
330
331	<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
332	{
333	\\[0-7]{1,3} {
334	unsigned long int c = strtoul (yytext + 1, 0, 8);
335	if (UCHAR_MAX < c)
336	/complain_at (loc, _("invalid escape sequence: %s"), quote (yytext))*/;
337	else if (! c)
338	/complain_at (loc, _("invalid null character: %s"), quote (yytext))*/;
339	else
340	/obstack_1grow (&obstack_for_string, c);/ *lval.string_val+=c;
341	}
342
343	\\x[0-9abcdefABCDEF]+ {
344	unsigned long int c;
345	errno=0;
346	c = strtoul (yytext + 2, 0, 16);
347	if (UCHAR_MAX < c \|\| errno)
348	/complain_at (loc, _("invalid escape sequence: %s"), quote (yytext))*/;
349	else if (! c)
350	/complain_at (loc, _("invalid null character: %s"), quote (yytext))*/;
351	else
352	/obstack_1grow (&obstack_for_string, c);/ *lval.string_val+=c;
353	}
354
355	\\a /obstack_1grow (&obstack_for_string, '\a');/ *lval.string_val+='\a';
356	\\b /obstack_1grow (&obstack_for_string, '\b');/ *lval.string_val+='\b';
357	\\f /obstack_1grow (&obstack_for_string, '\f');/ *lval.string_val+='\f';
358	\\n /obstack_1grow (&obstack_for_string, '\n');/ *lval.string_val+='\n';
359	\\r /obstack_1grow (&obstack_for_string, '\r');/ *lval.string_val+='\r';
360	\\t /obstack_1grow (&obstack_for_string, '\t');/ *lval.string_val+='\t';
361	\\v /obstack_1grow (&obstack_for_string, '\v');/ *lval.string_val+='\v';
362
363	/* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
364	\\("\""\|"'"\|"?"\|"\\") /obstack_1grow (&obstack_for_string, yytext[1]);/ *lval.string_val+=yytext[1];
365
366	\\(u\|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
367	int c = convert_ucn_to_byte (yytext);
368	if (c < 0)
369	/complain_at (loc, _("invalid escape sequence: %s"), quote (yytext))*/;
370	else if (! c)
371	/complain_at (loc, _("invalid null character: %s"), quote (yytext))*/;
372	else
373	/obstack_1grow (&obstack_for_string, c);/ *lval.string_val+=c;
374	}
375	\\(.\|\n) {
376	/*
377	complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
378	STRING_GROW;
379	*/
380	*lval.string_val+=yytext;
381	}
382	}
383
384
385	/*--------------------------------------------.
386	\| Scanning user-code characters and strings. \|
387	`--------------------------------------------*/
388
389	<SC_CHARACTER,SC_STRING>
390	{
391	{splice}\|\\{splice}[^\n$@\[\]] /STRING_GROW;/
392	}
393
394	<SC_CHARACTER>
395	{
396	"'" /STRING_GROW;/ BEGIN context_state;
397	\n /unexpected_newline (token_start, "'");/ BEGIN context_state;
398	<<EOF>> /unexpected_eof (token_start, "'");/ BEGIN context_state;
399	}
400
401	<SC_STRING>
402	{
403	"\"" /STRING_GROW;/ BEGIN context_state;
404	\n /unexpected_newline (token_start, "\"");/ BEGIN context_state;
405	<<EOF>> /unexpected_eof (token_start, "\"");/ BEGIN context_state;
406	}
407
408
409	/*---------------------------------------------------.
410	\| Strings, comments etc. can be found in user code. \|
411	`---------------------------------------------------*/
412
413	<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
414	{
415	"'" {
416	/*
417	STRING_GROW;
418	token_start = loc->start;
419	*/
420	context_state = YY_START;
421	BEGIN SC_CHARACTER;
422	}
423	"\"" {
424	/*
425	STRING_GROW;
426	token_start = loc->start;
427	*/
428	context_state = YY_START;
429	BEGIN SC_STRING;
430	}
431	"/"{splice}"*" {
432	/*
433	STRING_GROW;
434	token_start = loc->start;
435	*/
436	context_state = YY_START;
437	BEGIN SC_COMMENT;
438	}
439	"/"{splice}"/" {
440	/STRING_GROW;/
441	context_state = YY_START;
442	BEGIN SC_LINE_COMMENT;
443	}
444	}
445
446
447	/*---------------------------------------------------------------.
448	\| Scanning after %union etc., possibly followed by white space. \|
449	\| For %union only, allow arbitrary C code to appear before the \|
450	\| following brace, as an extension to POSIX. \|
451	`---------------------------------------------------------------*/
452
453	<SC_PRE_CODE>
454	{
455	. {
456	/*
457	bool valid = yytext[0] == '{' \|\| token_type == PERCENT_UNION;
458	scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
459	*/
460	bool valid=1;
461	yyless (0);
462
463	if (valid)
464	{
465	braces_level = -1;
466	//code_start = loc->start;
467	BEGIN SC_BRACED_CODE;
468	}
469	else
470	{
471	/*
472	complain_at (*loc, _("missing `{' in `%s'"),
473	token_name (token_type));
474	obstack_sgrow (&obstack_for_string, "{}");
475	STRING_FINISH;
476	val->chars = last_string;
477	*/
478	BEGIN INITIAL;
479	return token_type;
480	}
481	}
482
483	<<EOF>> /unexpected_eof (scanner_cursor, "{}");/ BEGIN INITIAL;
484	}
485
486
487	/*---------------------------------------------------------------.
488	\| Scanning some code in braces (%union and actions). The initial \|
489	\| "{" is already eaten. \|
490	`---------------------------------------------------------------*/
491
492	<SC_BRACED_CODE>
493	{
494	"{"\|"<"{splice}"%" /STRING_GROW;/ braces_level++;
495	"%"{splice}">" /STRING_GROW;/ braces_level--;
496	"}" {
497	bool outer_brace = --braces_level < 0;
498
499	/* As an undocumented Bison extension, append `;' before the last
500	brace in braced code, so that the user code can omit trailing
501	`;'. But do not append `;' if emulating Yacc, since Yacc does
502	not append one.
503
504	FIXME: Bison should warn if a semicolon seems to be necessary
505	here, and should omit the semicolon if it seems unnecessary
506	(e.g., after ';', '{', or '}', each followed by comments or
507	white space). Such a warning shouldn't depend on --yacc; it
508	should depend on a new --pedantic option, which would cause
509	Bison to warn if it detects an extension to POSIX. --pedantic
510	should also diagnose other Bison extensions like %yacc.
511	Perhaps there should also be a GCC-style --pedantic-errors
512	option, so that such warnings are diagnosed as errors. */
513
514	/*
515	if (outer_brace && token_type == BRACED_CODE && ! yacc_flag)
516	obstack_1grow (&obstack_for_string, ';');
517
518	obstack_1grow (&obstack_for_string, '}');
519	*/
520
521	if (outer_brace)
522	{
523	/*
524	STRING_FINISH;
525	rule_length++;
526	loc->start = code_start;
527	val->chars = last_string;
528	*/
529	BEGIN INITIAL;
530	return token_type;
531	}
532	}
533
534	/* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
535	(as `<' `<%'). */
536	"<"{splice}"<" /STRING_GROW;/
537	"$"("<"{tag}">")?(-?[0-9]+\|"$") /handle_dollar (token_type, yytext, loc);*/
538	"@"(-?[0-9]+\|"$") /* handle_at (token_type, yytext, loc);/
539
540	<<EOF>> /unexpected_eof (code_start, "}");/ BEGIN INITIAL;
541	}
542
543
544	/*--------------------------------------------------------------.
545	\| Scanning some prologue: from "%{" (already scanned) to "%}". \|
546	`--------------------------------------------------------------*/
547
548	<SC_PROLOGUE>
549	{
550	"%}" {
551	/*
552	STRING_FINISH;
553	loc->start = code_start;
554	val->chars = last_string;
555	*/
556	BEGIN INITIAL;
557	return PROLOGUE;
558	}
559
560	<<EOF>> /unexpected_eof (code_start, "%}");/ BEGIN INITIAL;
561	}
562
563
564	/*---------------------------------------------------------------.
565	\| Scanning the epilogue (everything after the second "%%", which \|
566	\| has already been eaten). \|
567	`---------------------------------------------------------------*/
568
569	<SC_EPILOGUE>
570	{
571	<<EOF>> {
572	/*
573	STRING_FINISH;
574	loc->start = code_start;
575	val->chars = last_string;
576	*/
577	BEGIN INITIAL;
578	return EPILOGUE;
579	}
580	}
581
582
583	/*-----------------------------------------------------.
584	\| By default, grow the string obstack with the input. \|
585	`-----------------------------------------------------*/
586
587	<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER>. \|
588	<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n /STRING_GROW;/
589
590
591	%%
592
593	/*------------------------------------------------------------------.
594	\| Convert universal character name UCN to a single-byte character, \|
595	\| and return that character. Return -1 if UCN does not correspond \|
596	\| to a single-byte character. \|
597	`------------------------------------------------------------------*/
598
599	static int
600	convert_ucn_to_byte (char const *ucn)
601	{
602	unsigned long int code = strtoul (ucn + 2, 0, 16);
603
604	/* FIXME: Currently we assume Unicode-compatible unibyte characters
605	on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
606	non-ASCII hosts we support only the portable C character set.
607	These limitations should be removed once we add support for
608	multibyte characters. */
609
610	if (UCHAR_MAX < code)
611	return -1;
612
613	#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
614	{
615	/* A non-ASCII host. Use CODE to index into a table of the C
616	basic execution character set, which is guaranteed to exist on
617	all Standard C platforms. This table also includes '$', '@',
618	and '`', which are not in the basic execution character set but
619	which are unibyte characters on all the platforms that we know
620	about. */
621	static signed char const table[] =
622	{
623	'\0', -1, -1, -1, -1, -1, -1, '\a',
624	'\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
625	-1, -1, -1, -1, -1, -1, -1, -1,
626	-1, -1, -1, -1, -1, -1, -1, -1,
627	' ', '!', '"', '#', '$', '%', '&', '\'',
628	'(', ')', '*', '+', ',', '-', '.', '/',
629	'0', '1', '2', '3', '4', '5', '6', '7',
630	'8', '9', ':', ';', '<', '=', '>', '?',
631	'@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
632	'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
633	'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
634	'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
635	'`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
636	'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
637	'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
638	'x', 'y', 'z', '{', '\|', '}', '~'
639	};
640
641	code = code < sizeof table ? table[code] : -1;
642	}
643	#endif
644
645	return code;
646	}
647