[deliverable/binutils-gdb.git] / binutils / rclex.c

/* rclex.c -- lexer for Windows rc files parser  */

/* Copyright 1997, 1998, 1999, 2001, 2002, 2003, 2005, 2006, 2007
   Free Software Foundation, Inc.

   Written by Kai Tietz, Onevision.

   This file is part of GNU Binutils.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
   02110-1301, USA.  */

/* This is a lexer used by the Windows rc file parser.  It basically
   just recognized a bunch of keywords.  */

#include "sysdep.h"
#include "bfd.h"
#include "bucomm.h"
#include "libiberty.h"
#include "safe-ctype.h"
#include "windres.h"
#include "rcparse.h"

#include <assert.h>

/* Whether we are in rcdata mode, in which we returns the lengths of
   strings.  */

static int rcdata_mode;

/* Whether we are supressing lines from cpp (including windows.h or
   headers from your C sources may bring in externs and typedefs).
   When active, we return IGNORED_TOKEN, which lets us ignore these
   outside of resource constructs.  Thus, it isn't required to protect
   all the non-preprocessor lines in your header files with #ifdef
   RC_INVOKED.  It also means your RC file can't include other RC
   files if they're named "*.h".  Sorry.  Name them *.rch or whatever.  */

static int suppress_cpp_data;

#define IGNORE_CPP(x) (suppress_cpp_data ? IGNORED_TOKEN : (x))

/* The first filename we detect in the cpp output.  We use this to
   tell included files from the original file.  */

static char *initial_fn;

/* List of allocated strings.  */

struct alloc_string
{
  struct alloc_string *next;
  char *s;
};

static struct alloc_string *strings;

struct rclex_keywords
{
  const char *name;
  int tok;
};

#define K(KEY)  { #KEY, KEY }
#define KRT(KEY)  { #KEY, RT_##KEY }

static const struct rclex_keywords keywds[] =
{
  K(ACCELERATORS), K(ALT), K(ANICURSOR), K(ANIICON), K(ASCII),
  K(AUTO3STATE), K(AUTOCHECKBOX), K(AUTORADIOBUTTON),
  K(BEDIT), { "BEGIN", BEG }, K(BITMAP), K(BLOCK), K(BUTTON),
  K(CAPTION), K(CHARACTERISTICS), K(CHECKBOX), K(CHECKED),
  K(CLASS), K(COMBOBOX), K(CONTROL), K(CTEXT), K(CURSOR),
  K(DEFPUSHBUTTON), K(DIALOG), K(DIALOGEX), K(DISCARDABLE),
  K(DLGINCLUDE), K(DLGINIT),
  K(EDITTEXT), K(END), K(EXSTYLE),
  K(FILEFLAGS), K(FILEFLAGSMASK), K(FILEOS), K(FILESUBTYPE),
  K(FILETYPE), K(FILEVERSION), K(FIXED), K(FONT), K(FONTDIR),
  K(GRAYED), KRT(GROUP_CURSOR), KRT(GROUP_ICON), K(GROUPBOX),
  K(HEDIT), K(HELP), K(HTML),
  K(ICON), K(IEDIT), K(IMPURE), K(INACTIVE),
  K(LANGUAGE), K(LISTBOX), K(LOADONCALL), K(LTEXT),
  K(MANIFEST), K(MENU), K(MENUBARBREAK), K(MENUBREAK),
  K(MENUEX), K(MENUITEM), K(MESSAGETABLE), K(MOVEABLE),
  K(NOINVERT), K(NOT),
  K(PLUGPLAY), K(POPUP), K(PRELOAD), K(PRODUCTVERSION),
  K(PURE), K(PUSHBOX), K(PUSHBUTTON),
  K(RADIOBUTTON), K(RCDATA), K(RTEXT),
  K(SCROLLBAR), K(SEPARATOR), K(SHIFT), K(STATE3),
  K(STRINGTABLE), K(STYLE),
  K(TOOLBAR),
  K(USERBUTTON),
  K(VALUE), { "VERSION", VERSIONK }, K(VERSIONINFO),
  K(VIRTKEY), K(VXD),
  { NULL, 0 },
};

/* External input stream from resrc */
extern FILE *cpp_pipe;

/* Lexical scanner helpers.  */
static int rclex_lastch = -1;
static size_t rclex_tok_max = 0;
static size_t rclex_tok_pos = 0;
static char *rclex_tok = NULL;

static int
rclex_translatekeyword (const char *key)
{
  if (key && ISUPPER (key[0]))
    {
      const struct rclex_keywords *kw = &keywds[0];

      do
        {
	  if (! strcmp (kw->name, key))
	    return kw->tok;
	  ++kw;
        }
      while (kw->name != NULL);
    }
  return STRING;
}

/* Handle a C preprocessor line.  */

static void
cpp_line (void)
{
  const char *s = rclex_tok;
  int line;
  char *send, *fn;

  ++s;
  while (ISSPACE (*s))
    ++s;
  
  line = strtol (s, &send, 0);
  if (*send != '\0' && ! ISSPACE (*send))
    return;

  /* Subtract 1 because we are about to count the newline.  */
  rc_lineno = line - 1;

  s = send;
  while (ISSPACE (*s))
    ++s;

  if (*s != '"')
    return;

  ++s;
  send = strchr (s, '"');
  if (send == NULL)
    return;

  fn = xmalloc (send - s + 1);
  strncpy (fn, s, send - s);
  fn[send - s] = '\0';

  free (rc_filename);
  rc_filename = fn;

  if (! initial_fn)
    {
      initial_fn = xmalloc (strlen (fn) + 1);
      strcpy (initial_fn, fn);
    }

  /* Allow the initial file, regardless of name.  Suppress all other
     files if they end in ".h" (this allows included "*.rc").  */
  if (strcmp (initial_fn, fn) == 0
      || strcmp (fn + strlen (fn) - 2, ".h") != 0)
    suppress_cpp_data = 0;
  else
    suppress_cpp_data = 1;
}

/* Allocate a string of a given length.  */

static char *
get_string (int len)
{
  struct alloc_string *as;

  as = xmalloc (sizeof *as);
  as->s = xmalloc (len);

  as->next = strings;
  strings = as;

  return as->s;
}

/* Handle a quoted string.  The quotes are stripped.  A pair of quotes
   in a string are turned into a single quote.  Adjacent strings are
   merged separated by whitespace are merged, as in C.  */

static char *
handle_quotes (rc_uint_type *len)
{
  const char *input = rclex_tok;
  char *ret, *s;
  const char *t;
  int ch;
  int num_xdigits;

  ret = get_string (strlen (input) + 1);

  s = ret;
  t = input;
  if (*t == '"')
    ++t;
  while (*t != '\0')
    {
      if (*t == '\\')
	{
	  ++t;
	  switch (*t)
	    {
	    case '\0':
	      rcparse_warning ("backslash at end of string");
	      break;

	    case '\"':
	      rcparse_warning ("use \"\" to put \" in a string");
	      *s++ = '"';
	      ++t;
	      break;

	    case 'a':
	      *s++ = ESCAPE_B; /* Strange, but true...  */
	      ++t;
	      break;

	    case 'b':
	      *s++ = ESCAPE_B;
	      ++t;
	      break;

	    case 'f':
	      *s++ = ESCAPE_F;
	      ++t;
	      break;

	    case 'n':
	      *s++ = ESCAPE_N;
	      ++t;
	      break;

	    case 'r':
	      *s++ = ESCAPE_R;
	      ++t;
	      break;

	    case 't':
	      *s++ = ESCAPE_T;
	      ++t;
	      break;

	    case 'v':
	      *s++ = ESCAPE_V;
	      ++t;
	      break;

	    case '\\':
	      *s++ = *t++;
	      break;

	    case '0': case '1': case '2': case '3':
	    case '4': case '5': case '6': case '7':
	      ch = *t - '0';
	      ++t;
	      if (*t >= '0' && *t <= '7')
		{
		  ch = (ch << 3) | (*t - '0');
		  ++t;
		  if (*t >= '0' && *t <= '7')
		    {
		      ch = (ch << 3) | (*t - '0');
		      ++t;
		    }
		}
	      *s++ = ch;
	      break;

	    case 'x': case 'X':
	      ++t;
	      ch = 0;
	      /* We only handle single byte chars here.  Make sure
		 we finish an escape sequence like "/xB0ABC" after
		 the first two digits.  */
              num_xdigits = 2;
 	      while (num_xdigits--)
		{
		  if (*t >= '0' && *t <= '9')
		    ch = (ch << 4) | (*t - '0');
		  else if (*t >= 'a' && *t <= 'f')
		    ch = (ch << 4) | (*t - 'a' + 10);
		  else if (*t >= 'A' && *t <= 'F')
		    ch = (ch << 4) | (*t - 'A' + 10);
		  else
		    break;
		  ++t;
		}
	      *s++ = ch;
	      break;

	    default:
	      rcparse_warning ("unrecognized escape sequence");
	      *s++ = '\\';
	      *s++ = *t++;
	      break;
	    }
	}
      else if (*t != '"')
	*s++ = *t++;
      else if (t[1] == '\0')
	break;
      else if (t[1] == '"')
	{
	  *s++ = '"';
	  t += 2;
	}
      else
	{
	  rcparse_warning ("unexpected character after '\"'");
	  ++t;
	  assert (ISSPACE (*t));
	  while (ISSPACE (*t))
	    {
	      if ((*t) == '\n')
		++rc_lineno;
	      ++t;
	    }
	  if (*t == '\0')
	    break;
	  assert (*t == '"');
	  ++t;
	}
    }

  *s = '\0';

  *len = s - ret;

  return ret;
}

/* Allocate a unicode string of a given length.  */

static unichar *
get_unistring (int len)
{
  return (unichar *) get_string (len * sizeof (unichar));
}

/* Handle a quoted unicode string.  The quotes are stripped.  A pair of quotes
   in a string are turned into a single quote.  Adjacent strings are
   merged separated by whitespace are merged, as in C.  */

static unichar *
handle_uniquotes (rc_uint_type *len)
{
  const char *input = rclex_tok;
  unichar *ret, *s;
  const char *t;
  int ch;
  int num_xdigits;

  ret = get_unistring (strlen (input) + 1);

  s = ret;
  t = input;
  if ((*t == 'L' || *t == 'l') && t[1] == '"')
    t += 2;
  else if (*t == '"')
    ++t;
  while (*t != '\0')
    {
      if (*t == '\\')
	{
	  ++t;
	  switch (*t)
	    {
	    case '\0':
	      rcparse_warning ("backslash at end of string");
	      break;

	    case '\"':
	      rcparse_warning ("use \"\" to put \" in a string");
	      break;

	    case 'a':
	      *s++ = ESCAPE_B; /* Strange, but true...  */
	      ++t;
	      break;

	    case 'b':
	      *s++ = ESCAPE_B;
	      ++t;
	      break;

	    case 'f':
	      *s++ = ESCAPE_F;
	      ++t;
	      break;

	    case 'n':
	      *s++ = ESCAPE_N;
	      ++t;
	      break;

	    case 'r':
	      *s++ = ESCAPE_R;
	      ++t;
	      break;

	    case 't':
	      *s++ = ESCAPE_T;
	      ++t;
	      break;

	    case 'v':
	      *s++ = ESCAPE_V;
	      ++t;
	      break;

	    case '\\':
	      *s++ = (unichar) *t++;
	      break;

	    case '0': case '1': case '2': case '3':
	    case '4': case '5': case '6': case '7':
	      ch = *t - '0';
	      ++t;
	      if (*t >= '0' && *t <= '7')
		{
		  ch = (ch << 3) | (*t - '0');
		  ++t;
		  if (*t >= '0' && *t <= '7')
		    {
		      ch = (ch << 3) | (*t - '0');
		      ++t;
		    }
		}
	      *s++ = (unichar) ch;
	      break;

	    case 'x': case 'X':
	      ++t;
	      ch = 0;
	      /* We only handle two byte chars here.  Make sure
		 we finish an escape sequence like "/xB0ABC" after
		 the first two digits.  */
              num_xdigits = 4;
 	      while (num_xdigits--)
		{
		  if (*t >= '0' && *t <= '9')
		    ch = (ch << 4) | (*t - '0');
		  else if (*t >= 'a' && *t <= 'f')
		    ch = (ch << 4) | (*t - 'a' + 10);
		  else if (*t >= 'A' && *t <= 'F')
		    ch = (ch << 4) | (*t - 'A' + 10);
		  else
		    break;
		  ++t;
		}
	      *s++ = (unichar) ch;
	      break;

	    default:
	      rcparse_warning ("unrecognized escape sequence");
	      *s++ = '\\';
	      *s++ = (unichar) *t++;
	      break;
	    }
	}
      else if (*t != '"')
	*s++ = (unichar) *t++;
      else if (t[1] == '\0')
	break;
      else if (t[1] == '"')
	{
	  *s++ = '"';
	  t += 2;
	}
      else
	{
	  ++t;
	  assert (ISSPACE (*t));
	  while (ISSPACE (*t))
	    {
	      if ((*t) == '\n')
		++rc_lineno;
	      ++t;
	    }
	  if (*t == '\0')
	    break;
	  assert (*t == '"');
	  ++t;
	}
    }

  *s = '\0';

  *len = s - ret;

  return ret;
}

/* Discard all the strings we have allocated.  The parser calls this
   when it no longer needs them.  */

void
rcparse_discard_strings (void)
{
  struct alloc_string *as;

  as = strings;
  while (as != NULL)
    {
      struct alloc_string *n;

      free (as->s);
      n = as->next;
      free (as);
      as = n;
    }

  strings = NULL;
}

/* Enter rcdata mode.  */
void
rcparse_rcdata (void)
{
  rcdata_mode = 1;
}

/* Go back to normal mode from rcdata mode.  */
void
rcparse_normal (void)
{
  rcdata_mode = 0;
}

static void
rclex_tok_add_char (int ch)
{
  if (! rclex_tok || rclex_tok_max <= rclex_tok_pos)
    {
      char *h = xmalloc (rclex_tok_max + 9);

      if (! h)
	abort ();
      if (rclex_tok)
	{
	  memcpy (h, rclex_tok, rclex_tok_pos + 1);
	  free (rclex_tok);
	}
      else
	rclex_tok_pos = 0;
      rclex_tok_max += 8;
      rclex_tok = h;
    }
  if (ch != -1)
    rclex_tok[rclex_tok_pos++] = (char) ch;
  rclex_tok[rclex_tok_pos] = 0;
}

static int
rclex_readch (void)
{
  int r = -1;

  if ((r = rclex_lastch) != -1)
    rclex_lastch = -1;
  else
    {
      char ch;
      do
        {
	  if (! cpp_pipe || feof (cpp_pipe)
	      || fread (&ch, 1, 1,cpp_pipe) != 1)
	    break;
	  r = ((int) ch) & 0xff;
        }
      while (r == 0 || r == '\r');
  }
  rclex_tok_add_char (r);
  return r;
}

static int
rclex_peekch (void)
{
  int r;

  if ((r = rclex_lastch) == -1)
    {
      if ((r = rclex_readch ()) != -1)
	{
	  rclex_lastch = r;
	  if (rclex_tok_pos > 0)
	    rclex_tok[--rclex_tok_pos] = 0;
	}
    }
  return r;
}

static void
rclex_string (void)
{
  int c;
  
  while ((c = rclex_peekch ()) != -1)
    {
      if (c == '\n')
	break;
      if (c == '\\')
        {
	  rclex_readch ();
	  if ((c = rclex_peekch ()) == -1 || c == '\n')
	    break;
	  rclex_readch ();
        }
      else if (rclex_readch () == '"')
	{
	  if (rclex_peekch () == '"')
	    rclex_readch ();
	  else
	    break;
	}
    }
}

static rc_uint_type
read_digit (int ch)
{
  rc_uint_type base = 10;
  rc_uint_type ret, val;
  int warned = 0;

  ret = 0;
  if (ch == '0')
    {
      base = 8;
      switch (rclex_peekch ())
	{
	case 'o': case 'O':
	  rclex_readch ();
	  base = 8;
	  break;

	case 'x': case 'X':
	  rclex_readch ();
	  base = 16;
	  break;
	}
    }
  else
    ret = (rc_uint_type) (ch - '0');
  while ((ch = rclex_peekch ()) != -1)
    {
      if (ISDIGIT (ch))
	val = (rc_uint_type) (ch - '0');
      else if (ch >= 'a' && ch <= 'f')
	val = (rc_uint_type) ((ch - 'a') + 10);
      else if (ch >= 'A' && ch <= 'F')
	val = (rc_uint_type) ((ch - 'A') + 10);
      else
	break;
      rclex_readch ();
      if (! warned && val >= base)
	{
	  warned = 1;
	  rcparse_warning ("digit exceeds base");
	}
      ret *= base;
      ret += val;
    }
  return ret;
}

/* yyparser entry method.  */

int
yylex (void)
{
  char *s;
  unichar *us;
  rc_uint_type length;
  int ch;

  /* Make sure that rclex_tok is initialized.  */
  if (! rclex_tok)
    rclex_tok_add_char (-1);

  do
    {
      do
	{
	  /* Clear token.  */
	  rclex_tok_pos = 0;
	  rclex_tok[0] = 0;
	  
	  if ((ch = rclex_readch ()) == -1)
	    return -1;
	  if (ch == '\n')
	    ++rc_lineno;
	}
      while (ch <= 0x20);

      switch (ch)
	{
	case '#':
	  while ((ch = rclex_peekch ()) != -1 && ch != '\n')
	    rclex_readch ();
	  cpp_line ();
	  ch = IGNORED_TOKEN;
	  break;
	
	case '{':
	  ch = IGNORE_CPP (BEG);
	  break;
	
	case '}':
	  ch = IGNORE_CPP (END);
	  break;
	
	case '0': case '1': case '2': case '3': case '4':
	case '5': case '6': case '7': case '8': case '9':
	  yylval.i.val = read_digit (ch);
	  yylval.i.dword = 0;
	  switch (rclex_peekch ())
	    {
	    case 'l': case 'L':
	      rclex_readch ();
	      yylval.i.dword = 1;
	      break;
	    }
	  ch = IGNORE_CPP (NUMBER);
	  break;
	case '"':
	  rclex_string ();
	  ch = IGNORE_CPP ((! rcdata_mode ? QUOTEDSTRING : SIZEDSTRING));
	  if (ch == IGNORED_TOKEN)
	    break;
	  s = handle_quotes (&length);
	  if (! rcdata_mode)
	    yylval.s = s;
	  else
	    {
	      yylval.ss.length = length;
	      yylval.ss.s = s;
	  }
	  break;
	case 'L': case 'l':
	  if (rclex_peekch () == '"')
	    {
	      rclex_readch ();
	      rclex_string ();
	      ch = IGNORE_CPP ((! rcdata_mode ? QUOTEDUNISTRING : SIZEDUNISTRING));
	      if (ch == IGNORED_TOKEN)
		break;
	      us = handle_uniquotes (&length);
	      if (! rcdata_mode)
		yylval.uni = us;
	      else
	        {
		  yylval.suni.length = length;
		  yylval.suni.s = us;
	      }
	      break;
	    }
	  /* Fall through.  */
	default:
	  if (ISIDST (ch) || ch=='$')
	    {
	      while ((ch = rclex_peekch ()) != -1 && (ISIDNUM (ch) || ch == '$' || ch == '.'))
		rclex_readch ();
	      ch = IGNORE_CPP (rclex_translatekeyword (rclex_tok));
	      if (ch == STRING)
		{
		  s = get_string (strlen (rclex_tok) + 1);
		  strcpy (s, rclex_tok);
		  yylval.s = s;
		}
	      else if (ch == BLOCK)
		{
		  const char *hs = NULL;

		  switch (yylex ())
		  {
		  case STRING:
		  case QUOTEDSTRING:
		    hs = yylval.s;
		    break;
		  case SIZEDSTRING:
		    hs = yylval.s = yylval.ss.s;
		    break;
		  }
		  if (! hs)
		    {
		      rcparse_warning ("BLOCK expects a string as argument.");
		      ch = IGNORED_TOKEN;
		    }
		  else if (! strcmp (hs, "StringFileInfo"))
		    ch = BLOCKSTRINGFILEINFO;
		  else if (! strcmp (hs, "VarFileInfo"))
		    ch = BLOCKVARFILEINFO;
		}
	      break;
	    }
	  ch = IGNORE_CPP (ch);
	  break;
	}
    }
  while (ch == IGNORED_TOKEN);

  return ch;
}
Commit	Line	Data
4a594fce NC	1	/* rclex.c -- lexer for Windows rc files parser */
	2
	3	/* Copyright 1997, 1998, 1999, 2001, 2002, 2003, 2005, 2006, 2007
	4	Free Software Foundation, Inc.
	5
	6	Written by Kai Tietz, Onevision.
	7
	8	This file is part of GNU Binutils.
	9
	10	This program is free software; you can redistribute it and/or modify
	11	it under the terms of the GNU General Public License as published by
	12	the Free Software Foundation; either version 2 of the License, or
	13	(at your option) any later version.
	14
	15	This program is distributed in the hope that it will be useful,
	16	but WITHOUT ANY WARRANTY; without even the implied warranty of
	17	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	18	GNU General Public License for more details.
	19
	20	You should have received a copy of the GNU General Public License
	21	along with this program; if not, write to the Free Software
	22	Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
	23	02110-1301, USA. */
	24
	25	/* This is a lexer used by the Windows rc file parser. It basically
	26	just recognized a bunch of keywords. */
	27
	28	#include "sysdep.h"
	29	#include "bfd.h"
	30	#include "bucomm.h"
	31	#include "libiberty.h"
	32	#include "safe-ctype.h"
	33	#include "windres.h"
	34	#include "rcparse.h"
	35
	36	#include <assert.h>
	37
	38	/* Whether we are in rcdata mode, in which we returns the lengths of
	39	strings. */
	40
	41	static int rcdata_mode;
	42
	43	/* Whether we are supressing lines from cpp (including windows.h or
	44	headers from your C sources may bring in externs and typedefs).
	45	When active, we return IGNORED_TOKEN, which lets us ignore these
	46	outside of resource constructs. Thus, it isn't required to protect
	47	all the non-preprocessor lines in your header files with #ifdef
	48	RC_INVOKED. It also means your RC file can't include other RC
	49	files if they're named ".h". Sorry. Name them .rch or whatever. */
	50
	51	static int suppress_cpp_data;
	52
	53	#define IGNORE_CPP(x) (suppress_cpp_data ? IGNORED_TOKEN : (x))
	54
	55	/* The first filename we detect in the cpp output. We use this to
	56	tell included files from the original file. */
	57
	58	static char *initial_fn;
	59
	60	/* List of allocated strings. */
	61
	62	struct alloc_string
	63	{
	64	struct alloc_string *next;
65	char *s;
66	};
67
68	static struct alloc_string *strings;
69
70	struct rclex_keywords
71	{
72	const char *name;
73	int tok;
74	};
75
76	#define K(KEY) { #KEY, KEY }
77	#define KRT(KEY) { #KEY, RT_##KEY }
78
79	static const struct rclex_keywords keywds[] =
80	{
81	K(ACCELERATORS), K(ALT), K(ANICURSOR), K(ANIICON), K(ASCII),
82	K(AUTO3STATE), K(AUTOCHECKBOX), K(AUTORADIOBUTTON),
83	K(BEDIT), { "BEGIN", BEG }, K(BITMAP), K(BLOCK), K(BUTTON),
84	K(CAPTION), K(CHARACTERISTICS), K(CHECKBOX), K(CHECKED),
85	K(CLASS), K(COMBOBOX), K(CONTROL), K(CTEXT), K(CURSOR),
86	K(DEFPUSHBUTTON), K(DIALOG), K(DIALOGEX), K(DISCARDABLE),
87	K(DLGINCLUDE), K(DLGINIT),
88	K(EDITTEXT), K(END), K(EXSTYLE),
89	K(FILEFLAGS), K(FILEFLAGSMASK), K(FILEOS), K(FILESUBTYPE),
90	K(FILETYPE), K(FILEVERSION), K(FIXED), K(FONT), K(FONTDIR),
91	K(GRAYED), KRT(GROUP_CURSOR), KRT(GROUP_ICON), K(GROUPBOX),
92	K(HEDIT), K(HELP), K(HTML),
93	K(ICON), K(IEDIT), K(IMPURE), K(INACTIVE),
94	K(LANGUAGE), K(LISTBOX), K(LOADONCALL), K(LTEXT),
95	K(MANIFEST), K(MENU), K(MENUBARBREAK), K(MENUBREAK),
96	K(MENUEX), K(MENUITEM), K(MESSAGETABLE), K(MOVEABLE),
97	K(NOINVERT), K(NOT),
98	K(PLUGPLAY), K(POPUP), K(PRELOAD), K(PRODUCTVERSION),
99	K(PURE), K(PUSHBOX), K(PUSHBUTTON),
100	K(RADIOBUTTON), K(RCDATA), K(RTEXT),
101	K(SCROLLBAR), K(SEPARATOR), K(SHIFT), K(STATE3),
102	K(STRINGTABLE), K(STYLE),
103	K(TOOLBAR),
104	K(USERBUTTON),
105	K(VALUE), { "VERSION", VERSIONK }, K(VERSIONINFO),
106	K(VIRTKEY), K(VXD),
107	{ NULL, 0 },
108	};
109
110	/* External input stream from resrc */
111	extern FILE *cpp_pipe;
112
113	/* Lexical scanner helpers. */
114	static int rclex_lastch = -1;
115	static size_t rclex_tok_max = 0;
116	static size_t rclex_tok_pos = 0;
117	static char *rclex_tok = NULL;
118
119	static int
120	rclex_translatekeyword (const char *key)
121	{
122	if (key && ISUPPER (key[0]))
123	{
124	const struct rclex_keywords *kw = &keywds[0];
125
126	do
127	{
128	if (! strcmp (kw->name, key))
129	return kw->tok;
130	++kw;
131	}
132	while (kw->name != NULL);
133	}
134	return STRING;
135	}
136
137	/* Handle a C preprocessor line. */
138
139	static void
140	cpp_line (void)
141	{
142	const char *s = rclex_tok;
143	int line;
144	char send, fn;
145
146	++s;
147	while (ISSPACE (*s))
148	++s;
149
150	line = strtol (s, &send, 0);
151	if (send != '\0' && ! ISSPACE (send))
152	return;
153
154	/* Subtract 1 because we are about to count the newline. */
155	rc_lineno = line - 1;
156
157	s = send;
158	while (ISSPACE (*s))
159	++s;
160
161	if (*s != '"')
162	return;
163
164	++s;
165	send = strchr (s, '"');
166	if (send == NULL)
167	return;
168
169	fn = xmalloc (send - s + 1);
170	strncpy (fn, s, send - s);
171	fn[send - s] = '\0';
172
173	free (rc_filename);
174	rc_filename = fn;
175
176	if (! initial_fn)
177	{
178	initial_fn = xmalloc (strlen (fn) + 1);
179	strcpy (initial_fn, fn);
180	}
181
182	/* Allow the initial file, regardless of name. Suppress all other
183	files if they end in ".h" (this allows included ".rc"). /
184	if (strcmp (initial_fn, fn) == 0
185	\|\| strcmp (fn + strlen (fn) - 2, ".h") != 0)
186	suppress_cpp_data = 0;
187	else
188	suppress_cpp_data = 1;
189	}
190
191	/* Allocate a string of a given length. */
192
193	static char *
194	get_string (int len)
195	{
196	struct alloc_string *as;
197
198	as = xmalloc (sizeof *as);
199	as->s = xmalloc (len);
200
201	as->next = strings;
202	strings = as;
203
204	return as->s;
205	}
206
207	/* Handle a quoted string. The quotes are stripped. A pair of quotes
208	in a string are turned into a single quote. Adjacent strings are
209	merged separated by whitespace are merged, as in C. */
210
211	static char *
212	handle_quotes (rc_uint_type *len)
213	{
214	const char *input = rclex_tok;
215	char ret, s;
216	const char *t;
217	int ch;
218	int num_xdigits;
219
220	ret = get_string (strlen (input) + 1);
221
222	s = ret;
223	t = input;
224	if (*t == '"')
225	++t;
226	while (*t != '\0')
227	{
228	if (*t == '\\')
229	{
230	++t;
231	switch (*t)
232	{
233	case '\0':
234	rcparse_warning ("backslash at end of string");
235	break;
236
237	case '\"':
238	rcparse_warning ("use \"\" to put \" in a string");
239	*s++ = '"';
240	++t;
241	break;
242
243	case 'a':
244	s++ = ESCAPE_B; / Strange, but true... */
245	++t;
246	break;
247
248	case 'b':
249	*s++ = ESCAPE_B;
250	++t;
251	break;
252
253	case 'f':
254	*s++ = ESCAPE_F;
255	++t;
256	break;
257
258	case 'n':
259	*s++ = ESCAPE_N;
260	++t;
261	break;
262
263	case 'r':
264	*s++ = ESCAPE_R;
265	++t;
266	break;
267
268	case 't':
269	*s++ = ESCAPE_T;
270	++t;
271	break;
272
273	case 'v':
274	*s++ = ESCAPE_V;
275	++t;
276	break;
277
278	case '\\':
279	s++ = t++;
280	break;
281
282	case '0': case '1': case '2': case '3':
283	case '4': case '5': case '6': case '7':
284	ch = *t - '0';
285	++t;
286	if (t >= '0' && t <= '7')
287	{
288	ch = (ch << 3) \| (*t - '0');
289	++t;
290	if (t >= '0' && t <= '7')
291	{
292	ch = (ch << 3) \| (*t - '0');
293	++t;
294	}
295	}
296	*s++ = ch;
297	break;
298
299	case 'x': case 'X':
300	++t;
301	ch = 0;
302	/* We only handle single byte chars here. Make sure
303	we finish an escape sequence like "/xB0ABC" after
304	the first two digits. */
305	num_xdigits = 2;
306	while (num_xdigits--)
307	{
308	if (t >= '0' && t <= '9')
309	ch = (ch << 4) \| (*t - '0');
310	else if (t >= 'a' && t <= 'f')
311	ch = (ch << 4) \| (*t - 'a' + 10);
312	else if (t >= 'A' && t <= 'F')
313	ch = (ch << 4) \| (*t - 'A' + 10);
314	else
315	break;
316	++t;
317	}
318	*s++ = ch;
319	break;
320
321	default:
322	rcparse_warning ("unrecognized escape sequence");
323	*s++ = '\\';
324	s++ = t++;
325	break;
326	}
327	}
328	else if (*t != '"')
329	s++ = t++;
330	else if (t[1] == '\0')
331	break;
332	else if (t[1] == '"')
333	{
334	*s++ = '"';
335	t += 2;
336	}
337	else
338	{
339	rcparse_warning ("unexpected character after '\"'");
340	++t;
341	assert (ISSPACE (*t));
342	while (ISSPACE (*t))
343	{
344	if ((*t) == '\n')
345	++rc_lineno;
346	++t;
347	}
348	if (*t == '\0')
349	break;
350	assert (*t == '"');
351	++t;
352	}
353	}
354
355	*s = '\0';
356
357	*len = s - ret;
358
359	return ret;
360	}
361
362	/* Allocate a unicode string of a given length. */
363
364	static unichar *
365	get_unistring (int len)
366	{
367	return (unichar ) get_string (len sizeof (unichar));
368	}
369
370	/* Handle a quoted unicode string. The quotes are stripped. A pair of quotes
371	in a string are turned into a single quote. Adjacent strings are
372	merged separated by whitespace are merged, as in C. */
373
374	static unichar *
375	handle_uniquotes (rc_uint_type *len)
376	{
377	const char *input = rclex_tok;
378	unichar ret, s;
379	const char *t;
380	int ch;
381	int num_xdigits;
382
383	ret = get_unistring (strlen (input) + 1);
384
385	s = ret;
386	t = input;
387	if ((t == 'L' \|\| t == 'l') && t[1] == '"')
388	t += 2;
389	else if (*t == '"')
390	++t;
391	while (*t != '\0')
392	{
393	if (*t == '\\')
394	{
395	++t;
396	switch (*t)
397	{
398	case '\0':
399	rcparse_warning ("backslash at end of string");
400	break;
401
402	case '\"':
403	rcparse_warning ("use \"\" to put \" in a string");
404	break;
405
406	case 'a':
407	s++ = ESCAPE_B; / Strange, but true... */
408	++t;
409	break;
410
411	case 'b':
412	*s++ = ESCAPE_B;
413	++t;
414	break;
415
416	case 'f':
417	*s++ = ESCAPE_F;
418	++t;
419	break;
420
421	case 'n':
422	*s++ = ESCAPE_N;
423	++t;
424	break;
425
426	case 'r':
427	*s++ = ESCAPE_R;
428	++t;
429	break;
430
431	case 't':
432	*s++ = ESCAPE_T;
433	++t;
434	break;
435
436	case 'v':
437	*s++ = ESCAPE_V;
438	++t;
439	break;
440
441	case '\\':
442	s++ = (unichar) t++;
443	break;
444
445	case '0': case '1': case '2': case '3':
446	case '4': case '5': case '6': case '7':
447	ch = *t - '0';
448	++t;
449	if (t >= '0' && t <= '7')
450	{
451	ch = (ch << 3) \| (*t - '0');
452	++t;
453	if (t >= '0' && t <= '7')
454	{
455	ch = (ch << 3) \| (*t - '0');
456	++t;
457	}
458	}
459	*s++ = (unichar) ch;
460	break;
461
462	case 'x': case 'X':
463	++t;
464	ch = 0;
465	/* We only handle two byte chars here. Make sure
466	we finish an escape sequence like "/xB0ABC" after
467	the first two digits. */
468	num_xdigits = 4;
469	while (num_xdigits--)
470	{
471	if (t >= '0' && t <= '9')
472	ch = (ch << 4) \| (*t - '0');
473	else if (t >= 'a' && t <= 'f')
474	ch = (ch << 4) \| (*t - 'a' + 10);
475	else if (t >= 'A' && t <= 'F')
476	ch = (ch << 4) \| (*t - 'A' + 10);
477	else
478	break;
479	++t;
480	}
481	*s++ = (unichar) ch;
482	break;
483
484	default:
485	rcparse_warning ("unrecognized escape sequence");
486	*s++ = '\\';
487	s++ = (unichar) t++;
488	break;
489	}
490	}
491	else if (*t != '"')
492	s++ = (unichar) t++;
493	else if (t[1] == '\0')
494	break;
495	else if (t[1] == '"')
496	{
497	*s++ = '"';
498	t += 2;
499	}
500	else
501	{
502	++t;
503	assert (ISSPACE (*t));
504	while (ISSPACE (*t))
505	{
506	if ((*t) == '\n')
507	++rc_lineno;
508	++t;
509	}
510	if (*t == '\0')
511	break;
512	assert (*t == '"');
513	++t;
514	}
515	}
516
517	*s = '\0';
518
519	*len = s - ret;
520
521	return ret;
522	}
523
524	/* Discard all the strings we have allocated. The parser calls this
525	when it no longer needs them. */
526
527	void
528	rcparse_discard_strings (void)
529	{
530	struct alloc_string *as;
531
532	as = strings;
533	while (as != NULL)
534	{
535	struct alloc_string *n;
536
537	free (as->s);
538	n = as->next;
539	free (as);
540	as = n;
541	}
542
543	strings = NULL;
544	}
545
546	/* Enter rcdata mode. */
547	void
548	rcparse_rcdata (void)
549	{
550	rcdata_mode = 1;
551	}
552
553	/* Go back to normal mode from rcdata mode. */
554	void
555	rcparse_normal (void)
556	{
557	rcdata_mode = 0;
558	}
559
560	static void
561	rclex_tok_add_char (int ch)
562	{
563	if (! rclex_tok \|\| rclex_tok_max <= rclex_tok_pos)
564	{
565	char *h = xmalloc (rclex_tok_max + 9);
566
567	if (! h)
568	abort ();
569	if (rclex_tok)
570	{
571	memcpy (h, rclex_tok, rclex_tok_pos + 1);
572	free (rclex_tok);
573	}
574	else
575	rclex_tok_pos = 0;
576	rclex_tok_max += 8;
577	rclex_tok = h;
578	}
579	if (ch != -1)
580	rclex_tok[rclex_tok_pos++] = (char) ch;
581	rclex_tok[rclex_tok_pos] = 0;
582	}
583
584	static int
585	rclex_readch (void)
586	{
587	int r = -1;
588
589	if ((r = rclex_lastch) != -1)
590	rclex_lastch = -1;
591	else
592	{
593	char ch;
594	do
595	{
596	if (! cpp_pipe \|\| feof (cpp_pipe)
597	\|\| fread (&ch, 1, 1,cpp_pipe) != 1)
598	break;
599	r = ((int) ch) & 0xff;
600	}
601	while (r == 0 \|\| r == '\r');
602	}
603	rclex_tok_add_char (r);
604	return r;
605	}
606
607	static int
608	rclex_peekch (void)
609	{
610	int r;
611
612	if ((r = rclex_lastch) == -1)
613	{
614	if ((r = rclex_readch ()) != -1)
615	{
616	rclex_lastch = r;
617	if (rclex_tok_pos > 0)
618	rclex_tok[--rclex_tok_pos] = 0;
619	}
620	}
621	return r;
622	}
623
624	static void
625	rclex_string (void)
626	{
627	int c;
628
629	while ((c = rclex_peekch ()) != -1)
630	{
631	if (c == '\n')
632	break;
633	if (c == '\\')
634	{
635	rclex_readch ();
636	if ((c = rclex_peekch ()) == -1 \|\| c == '\n')
637	break;
638	rclex_readch ();
639	}
640	else if (rclex_readch () == '"')
641	{
642	if (rclex_peekch () == '"')
643	rclex_readch ();
644	else
645	break;
646	}
647	}
648	}
649
650	static rc_uint_type
651	read_digit (int ch)
652	{
653	rc_uint_type base = 10;
654	rc_uint_type ret, val;
655	int warned = 0;
656
657	ret = 0;
658	if (ch == '0')
659	{
660	base = 8;
661	switch (rclex_peekch ())
662	{
663	case 'o': case 'O':
664	rclex_readch ();
665	base = 8;
666	break;
667
668	case 'x': case 'X':
669	rclex_readch ();
670	base = 16;
671	break;
672	}
673	}
674	else
675	ret = (rc_uint_type) (ch - '0');
676	while ((ch = rclex_peekch ()) != -1)
677	{
678	if (ISDIGIT (ch))
679	val = (rc_uint_type) (ch - '0');
680	else if (ch >= 'a' && ch <= 'f')
681	val = (rc_uint_type) ((ch - 'a') + 10);
682	else if (ch >= 'A' && ch <= 'F')
683	val = (rc_uint_type) ((ch - 'A') + 10);
684	else
685	break;
686	rclex_readch ();
687	if (! warned && val >= base)
688	{
689	warned = 1;
690	rcparse_warning ("digit exceeds base");
691	}
692	ret *= base;
693	ret += val;
694	}
695	return ret;
696	}
697
698	/* yyparser entry method. */
699
700	int
701	yylex (void)
702	{
703	char *s;
704	unichar *us;
705	rc_uint_type length;
706	int ch;
707
708	/* Make sure that rclex_tok is initialized. */
709	if (! rclex_tok)
710	rclex_tok_add_char (-1);
711
712	do
713	{
714	do
715	{
716	/* Clear token. */
717	rclex_tok_pos = 0;
718	rclex_tok[0] = 0;
719
720	if ((ch = rclex_readch ()) == -1)
721	return -1;
722	if (ch == '\n')
723	++rc_lineno;
724	}
725	while (ch <= 0x20);
726
727	switch (ch)
728	{
729	case '#':
730	while ((ch = rclex_peekch ()) != -1 && ch != '\n')
731	rclex_readch ();
732	cpp_line ();
733	ch = IGNORED_TOKEN;
734	break;
735
736	case '{':
737	ch = IGNORE_CPP (BEG);
738	break;
739
740	case '}':
741	ch = IGNORE_CPP (END);
742	break;
743
744	case '0': case '1': case '2': case '3': case '4':
745	case '5': case '6': case '7': case '8': case '9':
746	yylval.i.val = read_digit (ch);
747	yylval.i.dword = 0;
748	switch (rclex_peekch ())
749	{
750	case 'l': case 'L':
751	rclex_readch ();
752	yylval.i.dword = 1;
753	break;
754	}
755	ch = IGNORE_CPP (NUMBER);
756	break;
757	case '"':
758	rclex_string ();
759	ch = IGNORE_CPP ((! rcdata_mode ? QUOTEDSTRING : SIZEDSTRING));
760	if (ch == IGNORED_TOKEN)
761	break;
762	s = handle_quotes (&length);
763	if (! rcdata_mode)
764	yylval.s = s;
765	else
766	{
767	yylval.ss.length = length;
768	yylval.ss.s = s;
769	}
770	break;
771	case 'L': case 'l':
772	if (rclex_peekch () == '"')
773	{
774	rclex_readch ();
775	rclex_string ();
776	ch = IGNORE_CPP ((! rcdata_mode ? QUOTEDUNISTRING : SIZEDUNISTRING));
777	if (ch == IGNORED_TOKEN)
778	break;
779	us = handle_uniquotes (&length);
780	if (! rcdata_mode)
781	yylval.uni = us;
782	else
783	{
784	yylval.suni.length = length;
785	yylval.suni.s = us;
786	}
787	break;
788	}
789	/* Fall through. */
790	default:
791	if (ISIDST (ch) \|\| ch=='$')
792	{
793	while ((ch = rclex_peekch ()) != -1 && (ISIDNUM (ch) \|\| ch == '$' \|\| ch == '.'))
794	rclex_readch ();
795	ch = IGNORE_CPP (rclex_translatekeyword (rclex_tok));
796	if (ch == STRING)
797	{
798	s = get_string (strlen (rclex_tok) + 1);
799	strcpy (s, rclex_tok);
800	yylval.s = s;
801	}
802	else if (ch == BLOCK)
803	{
804	const char *hs = NULL;
805
806	switch (yylex ())
807	{
808	case STRING:
809	case QUOTEDSTRING:
810	hs = yylval.s;
811	break;
812	case SIZEDSTRING:
813	hs = yylval.s = yylval.ss.s;
814	break;
815	}
816	if (! hs)
817	{
818	rcparse_warning ("BLOCK expects a string as argument.");
819	ch = IGNORED_TOKEN;
820	}
821	else if (! strcmp (hs, "StringFileInfo"))
822	ch = BLOCKSTRINGFILEINFO;
823	else if (! strcmp (hs, "VarFileInfo"))
824	ch = BLOCKVARFILEINFO;
825	}
826	break;
827	}
828	ch = IGNORE_CPP (ch);
829	break;
830	}
831	}
832	while (ch == IGNORED_TOKEN);
833
834	return ch;
835	}