[deliverable/binutils-gdb.git] / binutils / rclex.c

/* rclex.c -- lexer for Windows rc files parser  */

/* Copyright 1997, 1998, 1999, 2001, 2002, 2003, 2005, 2006, 2007
   Free Software Foundation, Inc.

   Written by Kai Tietz, Onevision.

   This file is part of GNU Binutils.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
   02110-1301, USA.  */


/* This is a lexer used by the Windows rc file parser.  It basically
   just recognized a bunch of keywords.  */

#include "sysdep.h"
#include "bfd.h"
#include "bucomm.h"
#include "libiberty.h"
#include "safe-ctype.h"
#include "windres.h"
#include "rcparse.h"

#include <assert.h>

/* Whether we are in rcdata mode, in which we returns the lengths of
   strings.  */

static int rcdata_mode;

/* Whether we are supressing lines from cpp (including windows.h or
   headers from your C sources may bring in externs and typedefs).
   When active, we return IGNORED_TOKEN, which lets us ignore these
   outside of resource constructs.  Thus, it isn't required to protect
   all the non-preprocessor lines in your header files with #ifdef
   RC_INVOKED.  It also means your RC file can't include other RC
   files if they're named "*.h".  Sorry.  Name them *.rch or whatever.  */

static int suppress_cpp_data;

#define IGNORE_CPP(x) (suppress_cpp_data ? IGNORED_TOKEN : (x))

/* The first filename we detect in the cpp output.  We use this to
   tell included files from the original file.  */

static char *initial_fn;

/* List of allocated strings.  */

struct alloc_string
{
  struct alloc_string *next;
  char *s;
};

static struct alloc_string *strings;

struct rclex_keywords
{
  const char *name;
  int tok;
};

#define K(KEY)  { #KEY, KEY }
#define KRT(KEY)  { #KEY, RT_##KEY }

static const struct rclex_keywords keywds[] =
{
  K(ACCELERATORS), K(ALT), K(ANICURSOR), K(ANIICON), K(ASCII),
  K(AUTO3STATE), K(AUTOCHECKBOX), K(AUTORADIOBUTTON),
  K(BEDIT), { "BEGIN", BEG }, K(BITMAP), K(BLOCK), K(BUTTON),
  K(CAPTION), K(CHARACTERISTICS), K(CHECKBOX), K(CHECKED),
  K(CLASS), K(COMBOBOX), K(CONTROL), K(CTEXT), K(CURSOR),
  K(DEFPUSHBUTTON), K(DIALOG), K(DIALOGEX), K(DISCARDABLE),
  K(DLGINCLUDE), K(DLGINIT),
  K(EDITTEXT), K(END), K(EXSTYLE),
  K(FILEFLAGS), K(FILEFLAGSMASK), K(FILEOS), K(FILESUBTYPE),
  K(FILETYPE), K(FILEVERSION), K(FIXED), K(FONT), K(FONTDIR),
  K(GRAYED), KRT(GROUP_CURSOR), KRT(GROUP_ICON), K(GROUPBOX),
  K(HEDIT), K(HELP), K(HTML),
  K(ICON), K(IEDIT), K(IMPURE), K(INACTIVE),
  K(LANGUAGE), K(LISTBOX), K(LOADONCALL), K(LTEXT),
  K(MANIFEST), K(MENU), K(MENUBARBREAK), K(MENUBREAK),
  K(MENUEX), K(MENUITEM), K(MESSAGETABLE), K(MOVEABLE),
  K(NOINVERT), K(NOT),
  K(PLUGPLAY), K(POPUP), K(PRELOAD), K(PRODUCTVERSION),
  K(PURE), K(PUSHBOX), K(PUSHBUTTON),
  K(RADIOBUTTON), K(RCDATA), K(RTEXT),
  K(SCROLLBAR), K(SEPARATOR), K(SHIFT), K(STATE3),
  K(STRINGTABLE), K(STYLE),
  K(TOOLBAR),
  K(USERBUTTON),
  K(VALUE), { "VERSION", VERSIONK }, K(VERSIONINFO),
  K(VIRTKEY), K(VXD),
  { NULL, 0 },
};

/* External input stream from resrc */
extern FILE *cpp_pipe;

/* Lexical scanner helpers.  */
static int rclex_lastch = -1;
static size_t rclex_tok_max = 0;
static size_t rclex_tok_pos = 0;
static char *rclex_tok = NULL;

static int
rclex_translatekeyword (const char *key)
{
  if (key && ISUPPER (key[0]))
    {
      const struct rclex_keywords *kw = &keywds[0];

      do
        {
	  if (! strcmp (kw->name, key))
	    return kw->tok;
	  ++kw;
        }
      while (kw->name != NULL);
    }
  return STRING;
}

/* Handle a C preprocessor line.  */

static void
cpp_line (void)
{
  const char *s = rclex_tok;
  int line;
  char *send, *fn;
  size_t len, mlen;

  ++s;
  while (ISSPACE (*s))
    ++s;
  
  /* Check for #pragma code_page ( DEFAULT | <nr>).  */
  len = strlen (s);
  mlen = strlen ("pragma");
  if (len > mlen && memcmp (s, "pragma", mlen) == 0 && ISSPACE (s[mlen]))
    {
      const char *end;

      s += mlen + 1;
      while (ISSPACE (*s))
	++s;
      len = strlen (s);
      mlen = strlen ("code_page");
      if (len <= mlen || memcmp (s, "code_page", mlen) != 0)
	/* FIXME: We ought to issue a warning message about an unrecognised pragma.  */
	return;
      s += mlen;
      while (ISSPACE (*s))
	++s;
      if (*s != '(')
	/* FIXME: We ought to issue an error message about a malformed pragma.  */
	return;
      ++s;
      while (ISSPACE (*s))
	++s;
      if (*s == 0 || (end = strchr (s, ')')) == NULL)
	/* FIXME: We ought to issue an error message about a malformed pragma.  */
	return;
      len = (size_t) (end - s);
      fn = xmalloc (len + 1);
      if (len)
      	memcpy (fn, s, len);
      fn[len] = 0;
      while (len > 0 && (fn[len - 1] > 0 && fn[len - 1] <= 0x20))
	fn[--len] = 0;
      if (! len || (len == strlen ("DEFAULT") && strcasecmp (fn, "DEFAULT") == 0))
	wind_current_codepage = wind_default_codepage;
      else if (len > 0)
	{
	  rc_uint_type ncp;

	  if (fn[0] == '0' && (fn[1] == 'x' || fn[1] == 'X'))
	      ncp = (rc_uint_type) strtol (fn + 2, NULL, 16);
	  else
	      ncp = (rc_uint_type) strtol (fn, NULL, 10);
	  if (ncp == CP_UTF16 || ! unicode_is_valid_codepage (ncp))
	    fatal (_("invalid value specified for pragma code_page.\n"));
	  wind_current_codepage = ncp;
	}
      free (fn);
      return;
    }

  line = strtol (s, &send, 0);
  if (*send != '\0' && ! ISSPACE (*send))
    return;

  /* Subtract 1 because we are about to count the newline.  */
  rc_lineno = line - 1;

  s = send;
  while (ISSPACE (*s))
    ++s;

  if (*s != '"')
    return;

  ++s;
  send = strchr (s, '"');
  if (send == NULL)
    return;

  fn = xmalloc (send - s + 1);
  strncpy (fn, s, send - s);
  fn[send - s] = '\0';

  free (rc_filename);
  rc_filename = fn;

  if (! initial_fn)
    {
      initial_fn = xmalloc (strlen (fn) + 1);
      strcpy (initial_fn, fn);
    }

  /* Allow the initial file, regardless of name.  Suppress all other
     files if they end in ".h" (this allows included "*.rc").  */
  if (strcmp (initial_fn, fn) == 0
      || strcmp (fn + strlen (fn) - 2, ".h") != 0)
    suppress_cpp_data = 0;
  else
    suppress_cpp_data = 1;
}

/* Allocate a string of a given length.  */

static char *
get_string (int len)
{
  struct alloc_string *as;

  as = xmalloc (sizeof *as);
  as->s = xmalloc (len);

  as->next = strings;
  strings = as;

  return as->s;
}

/* Handle a quoted string.  The quotes are stripped.  A pair of quotes
   in a string are turned into a single quote.  Adjacent strings are
   merged separated by whitespace are merged, as in C.  */

static char *
handle_quotes (rc_uint_type *len)
{
  const char *input = rclex_tok;
  char *ret, *s;
  const char *t;
  int ch;
  int num_xdigits;

  ret = get_string (strlen (input) + 1);

  s = ret;
  t = input;
  if (*t == '"')
    ++t;
  while (*t != '\0')
    {
      if (*t == '\\')
	{
	  ++t;
	  switch (*t)
	    {
	    case '\0':
	      rcparse_warning ("backslash at end of string");
	      break;

	    case '\"':
	      rcparse_warning ("use \"\" to put \" in a string");
	      *s++ = '"';
	      ++t;
	      break;

	    case 'a':
	      *s++ = ESCAPE_B; /* Strange, but true...  */
	      ++t;
	      break;

	    case 'b':
	      *s++ = ESCAPE_B;
	      ++t;
	      break;

	    case 'f':
	      *s++ = ESCAPE_F;
	      ++t;
	      break;

	    case 'n':
	      *s++ = ESCAPE_N;
	      ++t;
	      break;

	    case 'r':
	      *s++ = ESCAPE_R;
	      ++t;
	      break;

	    case 't':
	      *s++ = ESCAPE_T;
	      ++t;
	      break;

	    case 'v':
	      *s++ = ESCAPE_V;
	      ++t;
	      break;

	    case '\\':
	      *s++ = *t++;
	      break;

	    case '0': case '1': case '2': case '3':
	    case '4': case '5': case '6': case '7':
	      ch = *t - '0';
	      ++t;
	      if (*t >= '0' && *t <= '7')
		{
		  ch = (ch << 3) | (*t - '0');
		  ++t;
		  if (*t >= '0' && *t <= '7')
		    {
		      ch = (ch << 3) | (*t - '0');
		      ++t;
		    }
		}
	      *s++ = ch;
	      break;

	    case 'x': case 'X':
	      ++t;
	      ch = 0;
	      /* We only handle single byte chars here.  Make sure
		 we finish an escape sequence like "/xB0ABC" after
		 the first two digits.  */
              num_xdigits = 2;
 	      while (num_xdigits--)
		{
		  if (*t >= '0' && *t <= '9')
		    ch = (ch << 4) | (*t - '0');
		  else if (*t >= 'a' && *t <= 'f')
		    ch = (ch << 4) | (*t - 'a' + 10);
		  else if (*t >= 'A' && *t <= 'F')
		    ch = (ch << 4) | (*t - 'A' + 10);
		  else
		    break;
		  ++t;
		}
	      *s++ = ch;
	      break;

	    default:
	      rcparse_warning ("unrecognized escape sequence");
	      *s++ = '\\';
	      *s++ = *t++;
	      break;
	    }
	}
      else if (*t != '"')
	*s++ = *t++;
      else if (t[1] == '\0')
	break;
      else if (t[1] == '"')
	{
	  *s++ = '"';
	  t += 2;
	}
      else
	{
	  rcparse_warning ("unexpected character after '\"'");
	  ++t;
	  assert (ISSPACE (*t));
	  while (ISSPACE (*t))
	    {
	      if ((*t) == '\n')
		++rc_lineno;
	      ++t;
	    }
	  if (*t == '\0')
	    break;
	  assert (*t == '"');
	  ++t;
	}
    }

  *s = '\0';

  *len = s - ret;

  return ret;
}

/* Allocate a unicode string of a given length.  */

static unichar *
get_unistring (int len)
{
  return (unichar *) get_string (len * sizeof (unichar));
}

/* Handle a quoted unicode string.  The quotes are stripped.  A pair of quotes
   in a string are turned into a single quote.  Adjacent strings are
   merged separated by whitespace are merged, as in C.  */

static unichar *
handle_uniquotes (rc_uint_type *len)
{
  const char *input = rclex_tok;
  unichar *ret, *s;
  const char *t;
  int ch;
  int num_xdigits;

  ret = get_unistring (strlen (input) + 1);

  s = ret;
  t = input;
  if ((*t == 'L' || *t == 'l') && t[1] == '"')
    t += 2;
  else if (*t == '"')
    ++t;
  while (*t != '\0')
    {
      if (*t == '\\')
	{
	  ++t;
	  switch (*t)
	    {
	    case '\0':
	      rcparse_warning ("backslash at end of string");
	      break;

	    case '\"':
	      rcparse_warning ("use \"\" to put \" in a string");
	      break;

	    case 'a':
	      *s++ = ESCAPE_B; /* Strange, but true...  */
	      ++t;
	      break;

	    case 'b':
	      *s++ = ESCAPE_B;
	      ++t;
	      break;

	    case 'f':
	      *s++ = ESCAPE_F;
	      ++t;
	      break;

	    case 'n':
	      *s++ = ESCAPE_N;
	      ++t;
	      break;

	    case 'r':
	      *s++ = ESCAPE_R;
	      ++t;
	      break;

	    case 't':
	      *s++ = ESCAPE_T;
	      ++t;
	      break;

	    case 'v':
	      *s++ = ESCAPE_V;
	      ++t;
	      break;

	    case '\\':
	      *s++ = (unichar) *t++;
	      break;

	    case '0': case '1': case '2': case '3':
	    case '4': case '5': case '6': case '7':
	      ch = *t - '0';
	      ++t;
	      if (*t >= '0' && *t <= '7')
		{
		  ch = (ch << 3) | (*t - '0');
		  ++t;
		  if (*t >= '0' && *t <= '7')
		    {
		      ch = (ch << 3) | (*t - '0');
		      ++t;
		    }
		}
	      *s++ = (unichar) ch;
	      break;

	    case 'x': case 'X':
	      ++t;
	      ch = 0;
	      /* We only handle two byte chars here.  Make sure
		 we finish an escape sequence like "/xB0ABC" after
		 the first two digits.  */
              num_xdigits = 4;
 	      while (num_xdigits--)
		{
		  if (*t >= '0' && *t <= '9')
		    ch = (ch << 4) | (*t - '0');
		  else if (*t >= 'a' && *t <= 'f')
		    ch = (ch << 4) | (*t - 'a' + 10);
		  else if (*t >= 'A' && *t <= 'F')
		    ch = (ch << 4) | (*t - 'A' + 10);
		  else
		    break;
		  ++t;
		}
	      *s++ = (unichar) ch;
	      break;

	    default:
	      rcparse_warning ("unrecognized escape sequence");
	      *s++ = '\\';
	      *s++ = (unichar) *t++;
	      break;
	    }
	}
      else if (*t != '"')
	*s++ = (unichar) *t++;
      else if (t[1] == '\0')
	break;
      else if (t[1] == '"')
	{
	  *s++ = '"';
	  t += 2;
	}
      else
	{
	  ++t;
	  assert (ISSPACE (*t));
	  while (ISSPACE (*t))
	    {
	      if ((*t) == '\n')
		++rc_lineno;
	      ++t;
	    }
	  if (*t == '\0')
	    break;
	  assert (*t == '"');
	  ++t;
	}
    }

  *s = '\0';

  *len = s - ret;

  return ret;
}

/* Discard all the strings we have allocated.  The parser calls this
   when it no longer needs them.  */

void
rcparse_discard_strings (void)
{
  struct alloc_string *as;

  as = strings;
  while (as != NULL)
    {
      struct alloc_string *n;

      free (as->s);
      n = as->next;
      free (as);
      as = n;
    }

  strings = NULL;
}

/* Enter rcdata mode.  */
void
rcparse_rcdata (void)
{
  rcdata_mode = 1;
}

/* Go back to normal mode from rcdata mode.  */
void
rcparse_normal (void)
{
  rcdata_mode = 0;
}

static void
rclex_tok_add_char (int ch)
{
  if (! rclex_tok || rclex_tok_max <= rclex_tok_pos)
    {
      char *h = xmalloc (rclex_tok_max + 9);

      if (! h)
	abort ();
      if (rclex_tok)
	{
	  memcpy (h, rclex_tok, rclex_tok_pos + 1);
	  free (rclex_tok);
	}
      else
	rclex_tok_pos = 0;
      rclex_tok_max += 8;
      rclex_tok = h;
    }
  if (ch != -1)
    rclex_tok[rclex_tok_pos++] = (char) ch;
  rclex_tok[rclex_tok_pos] = 0;
}

static int
rclex_readch (void)
{
  int r = -1;

  if ((r = rclex_lastch) != -1)
    rclex_lastch = -1;
  else
    {
      char ch;
      do
        {
	  if (! cpp_pipe || feof (cpp_pipe)
	      || fread (&ch, 1, 1,cpp_pipe) != 1)
	    break;
	  r = ((int) ch) & 0xff;
        }
      while (r == 0 || r == '\r');
  }
  rclex_tok_add_char (r);
  return r;
}

static int
rclex_peekch (void)
{
  int r;

  if ((r = rclex_lastch) == -1)
    {
      if ((r = rclex_readch ()) != -1)
	{
	  rclex_lastch = r;
	  if (rclex_tok_pos > 0)
	    rclex_tok[--rclex_tok_pos] = 0;
	}
    }
  return r;
}

static void
rclex_string (void)
{
  int c;
  
  while ((c = rclex_peekch ()) != -1)
    {
      if (c == '\n')
	break;
      if (c == '\\')
        {
	  rclex_readch ();
	  if ((c = rclex_peekch ()) == -1 || c == '\n')
	    break;
	  rclex_readch ();
        }
      else if (rclex_readch () == '"')
	{
	  if (rclex_peekch () == '"')
	    rclex_readch ();
	  else
	    break;
	}
    }
}

static rc_uint_type
read_digit (int ch)
{
  rc_uint_type base = 10;
  rc_uint_type ret, val;
  int warned = 0;

  ret = 0;
  if (ch == '0')
    {
      base = 8;
      switch (rclex_peekch ())
	{
	case 'o': case 'O':
	  rclex_readch ();
	  base = 8;
	  break;

	case 'x': case 'X':
	  rclex_readch ();
	  base = 16;
	  break;
	}
    }
  else
    ret = (rc_uint_type) (ch - '0');
  while ((ch = rclex_peekch ()) != -1)
    {
      if (ISDIGIT (ch))
	val = (rc_uint_type) (ch - '0');
      else if (ch >= 'a' && ch <= 'f')
	val = (rc_uint_type) ((ch - 'a') + 10);
      else if (ch >= 'A' && ch <= 'F')
	val = (rc_uint_type) ((ch - 'A') + 10);
      else
	break;
      rclex_readch ();
      if (! warned && val >= base)
	{
	  warned = 1;
	  rcparse_warning ("digit exceeds base");
	}
      ret *= base;
      ret += val;
    }
  return ret;
}

/* yyparser entry method.  */

int
yylex (void)
{
  char *s;
  unichar *us;
  rc_uint_type length;
  int ch;

  /* Make sure that rclex_tok is initialized.  */
  if (! rclex_tok)
    rclex_tok_add_char (-1);

  do
    {
      do
	{
	  /* Clear token.  */
	  rclex_tok_pos = 0;
	  rclex_tok[0] = 0;
	  
	  if ((ch = rclex_readch ()) == -1)
	    return -1;
	  if (ch == '\n')
	    ++rc_lineno;
	}
      while (ch <= 0x20);

      switch (ch)
	{
	case '#':
	  while ((ch = rclex_peekch ()) != -1 && ch != '\n')
	    rclex_readch ();
	  cpp_line ();
	  ch = IGNORED_TOKEN;
	  break;
	
	case '{':
	  ch = IGNORE_CPP (BEG);
	  break;
	
	case '}':
	  ch = IGNORE_CPP (END);
	  break;
	
	case '0': case '1': case '2': case '3': case '4':
	case '5': case '6': case '7': case '8': case '9':
	  yylval.i.val = read_digit (ch);
	  yylval.i.dword = 0;
	  switch (rclex_peekch ())
	    {
	    case 'l': case 'L':
	      rclex_readch ();
	      yylval.i.dword = 1;
	      break;
	    }
	  ch = IGNORE_CPP (NUMBER);
	  break;
	case '"':
	  rclex_string ();
	  ch = IGNORE_CPP ((! rcdata_mode ? QUOTEDSTRING : SIZEDSTRING));
	  if (ch == IGNORED_TOKEN)
	    break;
	  s = handle_quotes (&length);
	  if (! rcdata_mode)
	    yylval.s = s;
	  else
	    {
	      yylval.ss.length = length;
	      yylval.ss.s = s;
	  }
	  break;
	case 'L': case 'l':
	  if (rclex_peekch () == '"')
	    {
	      rclex_readch ();
	      rclex_string ();
	      ch = IGNORE_CPP ((! rcdata_mode ? QUOTEDUNISTRING : SIZEDUNISTRING));
	      if (ch == IGNORED_TOKEN)
		break;
	      us = handle_uniquotes (&length);
	      if (! rcdata_mode)
		yylval.uni = us;
	      else
	        {
		  yylval.suni.length = length;
		  yylval.suni.s = us;
	      }
	      break;
	    }
	  /* Fall through.  */
	default:
	  if (ISIDST (ch) || ch=='$')
	    {
	      while ((ch = rclex_peekch ()) != -1
		     && (ISIDNUM (ch) || ch == '$' || ch == '.'
		         || ch == ':' || ch == '\\' || ch == '/'
		         || ch == '_' || ch == '-')
		    )
		rclex_readch ();
	      ch = IGNORE_CPP (rclex_translatekeyword (rclex_tok));
	      if (ch == STRING)
		{
		  s = get_string (strlen (rclex_tok) + 1);
		  strcpy (s, rclex_tok);
		  yylval.s = s;
		}
	      else if (ch == BLOCK)
		{
		  const char *hs = NULL;

		  switch (yylex ())
		  {
		  case STRING:
		  case QUOTEDSTRING:
		    hs = yylval.s;
		    break;
		  case SIZEDSTRING:
		    hs = yylval.s = yylval.ss.s;
		    break;
		  }
		  if (! hs)
		    {
		      rcparse_warning ("BLOCK expects a string as argument.");
		      ch = IGNORED_TOKEN;
		    }
		  else if (! strcmp (hs, "StringFileInfo"))
		    ch = BLOCKSTRINGFILEINFO;
		  else if (! strcmp (hs, "VarFileInfo"))
		    ch = BLOCKVARFILEINFO;
		}
	      break;
	    }
	  ch = IGNORE_CPP (ch);
	  break;
	}
    }
  while (ch == IGNORED_TOKEN);

  return ch;
}
Commit	Line	Data
4a594fce NC	1	/* rclex.c -- lexer for Windows rc files parser */
	2
	3	/* Copyright 1997, 1998, 1999, 2001, 2002, 2003, 2005, 2006, 2007
	4	Free Software Foundation, Inc.
	5
	6	Written by Kai Tietz, Onevision.
	7
	8	This file is part of GNU Binutils.
	9
	10	This program is free software; you can redistribute it and/or modify
	11	it under the terms of the GNU General Public License as published by
32866df7	12	the Free Software Foundation; either version 3 of the License, or
4a594fce NC	13	(at your option) any later version.
	14
	15	This program is distributed in the hope that it will be useful,
	16	but WITHOUT ANY WARRANTY; without even the implied warranty of
	17	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	18	GNU General Public License for more details.
	19
	20	You should have received a copy of the GNU General Public License
	21	along with this program; if not, write to the Free Software
	22	Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
	23	02110-1301, USA. */
	24
32866df7	25
4a594fce NC	26	/* This is a lexer used by the Windows rc file parser. It basically
	27	just recognized a bunch of keywords. */
	28
	29	#include "sysdep.h"
	30	#include "bfd.h"
	31	#include "bucomm.h"
	32	#include "libiberty.h"
	33	#include "safe-ctype.h"
	34	#include "windres.h"
	35	#include "rcparse.h"
	36
	37	#include <assert.h>
	38
	39	/* Whether we are in rcdata mode, in which we returns the lengths of
	40	strings. */
	41
	42	static int rcdata_mode;
	43
	44	/* Whether we are supressing lines from cpp (including windows.h or
	45	headers from your C sources may bring in externs and typedefs).
	46	When active, we return IGNORED_TOKEN, which lets us ignore these
	47	outside of resource constructs. Thus, it isn't required to protect
	48	all the non-preprocessor lines in your header files with #ifdef
	49	RC_INVOKED. It also means your RC file can't include other RC
	50	files if they're named ".h". Sorry. Name them .rch or whatever. */
	51
	52	static int suppress_cpp_data;
	53
	54	#define IGNORE_CPP(x) (suppress_cpp_data ? IGNORED_TOKEN : (x))
	55
	56	/* The first filename we detect in the cpp output. We use this to
	57	tell included files from the original file. */
	58
	59	static char *initial_fn;
	60
	61	/* List of allocated strings. */
	62
	63	struct alloc_string
	64	{
	65	struct alloc_string *next;
	66	char *s;
	67	};
	68
	69	static struct alloc_string *strings;
	70
	71	struct rclex_keywords
	72	{
	73	const char *name;
	74	int tok;
	75	};
	76
	77	#define K(KEY) { #KEY, KEY }
	78	#define KRT(KEY) { #KEY, RT_##KEY }
	79
	80	static const struct rclex_keywords keywds[] =
	81	{
	82	K(ACCELERATORS), K(ALT), K(ANICURSOR), K(ANIICON), K(ASCII),
	83	K(AUTO3STATE), K(AUTOCHECKBOX), K(AUTORADIOBUTTON),
	84	K(BEDIT), { "BEGIN", BEG }, K(BITMAP), K(BLOCK), K(BUTTON),
	85	K(CAPTION), K(CHARACTERISTICS), K(CHECKBOX), K(CHECKED),
	86	K(CLASS), K(COMBOBOX), K(CONTROL), K(CTEXT), K(CURSOR),
	87	K(DEFPUSHBUTTON), K(DIALOG), K(DIALOGEX), K(DISCARDABLE),
	88	K(DLGINCLUDE), K(DLGINIT),
	89	K(EDITTEXT), K(END), K(EXSTYLE),
90	K(FILEFLAGS), K(FILEFLAGSMASK), K(FILEOS), K(FILESUBTYPE),
91	K(FILETYPE), K(FILEVERSION), K(FIXED), K(FONT), K(FONTDIR),
92	K(GRAYED), KRT(GROUP_CURSOR), KRT(GROUP_ICON), K(GROUPBOX),
93	K(HEDIT), K(HELP), K(HTML),
94	K(ICON), K(IEDIT), K(IMPURE), K(INACTIVE),
95	K(LANGUAGE), K(LISTBOX), K(LOADONCALL), K(LTEXT),
96	K(MANIFEST), K(MENU), K(MENUBARBREAK), K(MENUBREAK),
97	K(MENUEX), K(MENUITEM), K(MESSAGETABLE), K(MOVEABLE),
98	K(NOINVERT), K(NOT),
99	K(PLUGPLAY), K(POPUP), K(PRELOAD), K(PRODUCTVERSION),
100	K(PURE), K(PUSHBOX), K(PUSHBUTTON),
101	K(RADIOBUTTON), K(RCDATA), K(RTEXT),
102	K(SCROLLBAR), K(SEPARATOR), K(SHIFT), K(STATE3),
103	K(STRINGTABLE), K(STYLE),
104	K(TOOLBAR),
105	K(USERBUTTON),
106	K(VALUE), { "VERSION", VERSIONK }, K(VERSIONINFO),
107	K(VIRTKEY), K(VXD),
108	{ NULL, 0 },
109	};
110
111	/* External input stream from resrc */
112	extern FILE *cpp_pipe;
113
114	/* Lexical scanner helpers. */
115	static int rclex_lastch = -1;
116	static size_t rclex_tok_max = 0;
117	static size_t rclex_tok_pos = 0;
118	static char *rclex_tok = NULL;
119
120	static int
121	rclex_translatekeyword (const char *key)
122	{
123	if (key && ISUPPER (key[0]))
124	{
125	const struct rclex_keywords *kw = &keywds[0];
126
127	do
128	{
129	if (! strcmp (kw->name, key))
130	return kw->tok;
131	++kw;
132	}
133	while (kw->name != NULL);
134	}
135	return STRING;
136	}
137
138	/* Handle a C preprocessor line. */
139
140	static void
141	cpp_line (void)
142	{
143	const char *s = rclex_tok;
144	int line;
145	char send, fn;
d856f2dd	146	size_t len, mlen;
4a594fce NC	147
	148	++s;
	149	while (ISSPACE (*s))
	150	++s;
	151
d856f2dd NC	152	/* Check for #pragma code_page ( DEFAULT \| <nr>). */
	153	len = strlen (s);
	154	mlen = strlen ("pragma");
	155	if (len > mlen && memcmp (s, "pragma", mlen) == 0 && ISSPACE (s[mlen]))
	156	{
	157	const char *end;
	158
	159	s += mlen + 1;
	160	while (ISSPACE (*s))
	161	++s;
	162	len = strlen (s);
	163	mlen = strlen ("code_page");
	164	if (len <= mlen \|\| memcmp (s, "code_page", mlen) != 0)
	165	/* FIXME: We ought to issue a warning message about an unrecognised pragma. */
	166	return;
	167	s += mlen;
	168	while (ISSPACE (*s))
	169	++s;
	170	if (*s != '(')
	171	/* FIXME: We ought to issue an error message about a malformed pragma. */
	172	return;
	173	++s;
	174	while (ISSPACE (*s))
	175	++s;
	176	if (*s == 0 \|\| (end = strchr (s, ')')) == NULL)
	177	/* FIXME: We ought to issue an error message about a malformed pragma. */
	178	return;
	179	len = (size_t) (end - s);
	180	fn = xmalloc (len + 1);
	181	if (len)
	182	memcpy (fn, s, len);
	183	fn[len] = 0;
	184	while (len > 0 && (fn[len - 1] > 0 && fn[len - 1] <= 0x20))
	185	fn[--len] = 0;
	186	if (! len \|\| (len == strlen ("DEFAULT") && strcasecmp (fn, "DEFAULT") == 0))
	187	wind_current_codepage = wind_default_codepage;
	188	else if (len > 0)
	189	{
	190	rc_uint_type ncp;
	191
	192	if (fn[0] == '0' && (fn[1] == 'x' \|\| fn[1] == 'X'))
	193	ncp = (rc_uint_type) strtol (fn + 2, NULL, 16);
	194	else
	195	ncp = (rc_uint_type) strtol (fn, NULL, 10);
	196	if (ncp == CP_UTF16 \|\| ! unicode_is_valid_codepage (ncp))
	197	fatal (_("invalid value specified for pragma code_page.\n"));
	198	wind_current_codepage = ncp;
	199	}
	200	free (fn);
	201	return;
	202	}
	203
4a594fce NC	204	line = strtol (s, &send, 0);
	205	if (send != '\0' && ! ISSPACE (send))
	206	return;
	207
	208	/* Subtract 1 because we are about to count the newline. */
	209	rc_lineno = line - 1;
	210
	211	s = send;
	212	while (ISSPACE (*s))
	213	++s;
	214
	215	if (*s != '"')
	216	return;
	217
	218	++s;
	219	send = strchr (s, '"');
	220	if (send == NULL)
	221	return;
	222
	223	fn = xmalloc (send - s + 1);
	224	strncpy (fn, s, send - s);
	225	fn[send - s] = '\0';
	226
	227	free (rc_filename);
	228	rc_filename = fn;
	229
	230	if (! initial_fn)
	231	{
	232	initial_fn = xmalloc (strlen (fn) + 1);
	233	strcpy (initial_fn, fn);
	234	}
	235
	236	/* Allow the initial file, regardless of name. Suppress all other
	237	files if they end in ".h" (this allows included ".rc"). /
	238	if (strcmp (initial_fn, fn) == 0
	239	\|\| strcmp (fn + strlen (fn) - 2, ".h") != 0)
	240	suppress_cpp_data = 0;
	241	else
	242	suppress_cpp_data = 1;
	243	}
	244
	245	/* Allocate a string of a given length. */
	246
	247	static char *
	248	get_string (int len)
	249	{
	250	struct alloc_string *as;
	251
	252	as = xmalloc (sizeof *as);
	253	as->s = xmalloc (len);
	254
	255	as->next = strings;
	256	strings = as;
	257
	258	return as->s;
	259	}
	260
	261	/* Handle a quoted string. The quotes are stripped. A pair of quotes
	262	in a string are turned into a single quote. Adjacent strings are
	263	merged separated by whitespace are merged, as in C. */
	264
	265	static char *
	266	handle_quotes (rc_uint_type *len)
	267	{
268	const char *input = rclex_tok;
269	char ret, s;
270	const char *t;
271	int ch;
272	int num_xdigits;
273
274	ret = get_string (strlen (input) + 1);
275
276	s = ret;
277	t = input;
278	if (*t == '"')
279	++t;
280	while (*t != '\0')
281	{
282	if (*t == '\\')
283	{
284	++t;
285	switch (*t)
286	{
287	case '\0':
288	rcparse_warning ("backslash at end of string");
289	break;
290
291	case '\"':
292	rcparse_warning ("use \"\" to put \" in a string");
293	*s++ = '"';
294	++t;
295	break;
296
297	case 'a':
298	s++ = ESCAPE_B; / Strange, but true... */
299	++t;
300	break;
301
302	case 'b':
303	*s++ = ESCAPE_B;
304	++t;
305	break;
306
307	case 'f':
308	*s++ = ESCAPE_F;
309	++t;
310	break;
311
312	case 'n':
313	*s++ = ESCAPE_N;
314	++t;
315	break;
316
317	case 'r':
318	*s++ = ESCAPE_R;
319	++t;
320	break;
321
322	case 't':
323	*s++ = ESCAPE_T;
324	++t;
325	break;
326
327	case 'v':
328	*s++ = ESCAPE_V;
329	++t;
330	break;
331
332	case '\\':
333	s++ = t++;
334	break;
335
336	case '0': case '1': case '2': case '3':
337	case '4': case '5': case '6': case '7':
338	ch = *t - '0';
339	++t;
340	if (t >= '0' && t <= '7')
341	{
342	ch = (ch << 3) \| (*t - '0');
343	++t;
344	if (t >= '0' && t <= '7')
345	{
346	ch = (ch << 3) \| (*t - '0');
347	++t;
348	}
349	}
350	*s++ = ch;
351	break;
352
353	case 'x': case 'X':
354	++t;
355	ch = 0;
356	/* We only handle single byte chars here. Make sure
357	we finish an escape sequence like "/xB0ABC" after
358	the first two digits. */
359	num_xdigits = 2;
360	while (num_xdigits--)
361	{
362	if (t >= '0' && t <= '9')
363	ch = (ch << 4) \| (*t - '0');
364	else if (t >= 'a' && t <= 'f')
365	ch = (ch << 4) \| (*t - 'a' + 10);
366	else if (t >= 'A' && t <= 'F')
367	ch = (ch << 4) \| (*t - 'A' + 10);
368	else
369	break;
370	++t;
371	}
372	*s++ = ch;
373	break;
374
375	default:
376	rcparse_warning ("unrecognized escape sequence");
377	*s++ = '\\';
378	s++ = t++;
379	break;
380	}
381	}
382	else if (*t != '"')
383	s++ = t++;
384	else if (t[1] == '\0')
385	break;
386	else if (t[1] == '"')
387	{
388	*s++ = '"';
389	t += 2;
390	}
391	else
392	{
393	rcparse_warning ("unexpected character after '\"'");
394	++t;
395	assert (ISSPACE (*t));
396	while (ISSPACE (*t))
397	{
398	if ((*t) == '\n')
399	++rc_lineno;
400	++t;
401	}
402	if (*t == '\0')
403	break;
404	assert (*t == '"');
405	++t;
406	}
407	}
408
409	*s = '\0';
410
411	*len = s - ret;
412
413	return ret;
414	}
415
416	/* Allocate a unicode string of a given length. */
417
418	static unichar *
419	get_unistring (int len)
420	{
421	return (unichar ) get_string (len sizeof (unichar));
422	}
423
424	/* Handle a quoted unicode string. The quotes are stripped. A pair of quotes
425	in a string are turned into a single quote. Adjacent strings are
426	merged separated by whitespace are merged, as in C. */
427
428	static unichar *
429	handle_uniquotes (rc_uint_type *len)
430	{
431	const char *input = rclex_tok;
432	unichar ret, s;
433	const char *t;
434	int ch;
435	int num_xdigits;
436
437	ret = get_unistring (strlen (input) + 1);
438
439	s = ret;
440	t = input;
441	if ((t == 'L' \|\| t == 'l') && t[1] == '"')
442	t += 2;
443	else if (*t == '"')
444	++t;
445	while (*t != '\0')
446	{
447	if (*t == '\\')
448	{
449	++t;
450	switch (*t)
451	{
452	case '\0':
453	rcparse_warning ("backslash at end of string");
454	break;
455
456	case '\"':
457	rcparse_warning ("use \"\" to put \" in a string");
458	break;
459
460	case 'a':
461	s++ = ESCAPE_B; / Strange, but true... */
462	++t;
463	break;
464
465	case 'b':
466	*s++ = ESCAPE_B;
467	++t;
468	break;
469
470	case 'f':
471	*s++ = ESCAPE_F;
472	++t;
473	break;
474
475	case 'n':
476	*s++ = ESCAPE_N;
477	++t;
478	break;
479
480	case 'r':
481	*s++ = ESCAPE_R;
482	++t;
483	break;
484
485	case 't':
486	*s++ = ESCAPE_T;
487	++t;
488	break;
489
490	case 'v':
491	*s++ = ESCAPE_V;
492	++t;
493	break;
494
495	case '\\':
496	s++ = (unichar) t++;
497	break;
498
499	case '0': case '1': case '2': case '3':
500	case '4': case '5': case '6': case '7':
501	ch = *t - '0';
502	++t;
503	if (t >= '0' && t <= '7')
504	{
505	ch = (ch << 3) \| (*t - '0');
506	++t;
507	if (t >= '0' && t <= '7')
508	{
509	ch = (ch << 3) \| (*t - '0');
510	++t;
511	}
512	}
513	*s++ = (unichar) ch;
514	break;
515
516	case 'x': case 'X':
517	++t;
518	ch = 0;
519	/* We only handle two byte chars here. Make sure
520	we finish an escape sequence like "/xB0ABC" after
521	the first two digits. */
522	num_xdigits = 4;
523	while (num_xdigits--)
524	{
525	if (t >= '0' && t <= '9')
526	ch = (ch << 4) \| (*t - '0');
527	else if (t >= 'a' && t <= 'f')
528	ch = (ch << 4) \| (*t - 'a' + 10);
529	else if (t >= 'A' && t <= 'F')
530	ch = (ch << 4) \| (*t - 'A' + 10);
531	else
532	break;
533	++t;
534	}
535	*s++ = (unichar) ch;
536	break;
537
538	default:
539	rcparse_warning ("unrecognized escape sequence");
540	*s++ = '\\';
541	s++ = (unichar) t++;
542	break;
543	}
544	}
545	else if (*t != '"')
546	s++ = (unichar) t++;
547	else if (t[1] == '\0')
548	break;
549	else if (t[1] == '"')
550	{
551	*s++ = '"';
552	t += 2;
553	}
554	else
555	{
556	++t;
557	assert (ISSPACE (*t));
558	while (ISSPACE (*t))
559	{
560	if ((*t) == '\n')
561	++rc_lineno;
562	++t;
563	}
564	if (*t == '\0')
565	break;
566	assert (*t == '"');
567	++t;
568	}
569	}
570
571	*s = '\0';
572
573	*len = s - ret;
574
575	return ret;
576	}
577
578	/* Discard all the strings we have allocated. The parser calls this
579	when it no longer needs them. */
580
581	void
582	rcparse_discard_strings (void)
583	{
584	struct alloc_string *as;
585
586	as = strings;
587	while (as != NULL)
588	{
589	struct alloc_string *n;
590
591	free (as->s);
592	n = as->next;
593	free (as);
594	as = n;
595	}
596
597	strings = NULL;
598	}
599
600	/* Enter rcdata mode. */
601	void
602	rcparse_rcdata (void)
603	{
604	rcdata_mode = 1;
605	}
606
607	/* Go back to normal mode from rcdata mode. */
608	void
609	rcparse_normal (void)
610	{
611	rcdata_mode = 0;
612	}
613
614	static void
615	rclex_tok_add_char (int ch)
616	{
617	if (! rclex_tok \|\| rclex_tok_max <= rclex_tok_pos)
618	{
619	char *h = xmalloc (rclex_tok_max + 9);
620
621	if (! h)
622	abort ();
623	if (rclex_tok)
624	{
625	memcpy (h, rclex_tok, rclex_tok_pos + 1);
626	free (rclex_tok);
627	}
628	else
629	rclex_tok_pos = 0;
630	rclex_tok_max += 8;
631	rclex_tok = h;
632	}
633	if (ch != -1)
634	rclex_tok[rclex_tok_pos++] = (char) ch;
635	rclex_tok[rclex_tok_pos] = 0;
636	}
637
638	static int
639	rclex_readch (void)
640	{
641	int r = -1;
642
643	if ((r = rclex_lastch) != -1)
644	rclex_lastch = -1;
645	else
646	{
647	char ch;
648	do
649	{
650	if (! cpp_pipe \|\| feof (cpp_pipe)
651	\|\| fread (&ch, 1, 1,cpp_pipe) != 1)
652	break;
653	r = ((int) ch) & 0xff;
654	}
655	while (r == 0 \|\| r == '\r');
656	}
657	rclex_tok_add_char (r);
658	return r;
659	}
660
661	static int
662	rclex_peekch (void)
663	{
664	int r;
665
666	if ((r = rclex_lastch) == -1)
667	{
668	if ((r = rclex_readch ()) != -1)
669	{
670	rclex_lastch = r;
671	if (rclex_tok_pos > 0)
672	rclex_tok[--rclex_tok_pos] = 0;
673	}
674	}
675	return r;
676	}
677
678	static void
679	rclex_string (void)
680	{
681	int c;
682
683	while ((c = rclex_peekch ()) != -1)
684	{
685	if (c == '\n')
686	break;
687	if (c == '\\')
688	{
689	rclex_readch ();
690	if ((c = rclex_peekch ()) == -1 \|\| c == '\n')
691	break;
692	rclex_readch ();
693	}
694	else if (rclex_readch () == '"')
695	{
696	if (rclex_peekch () == '"')
697	rclex_readch ();
698	else
699	break;
700	}
701	}
702	}
703
704	static rc_uint_type
705	read_digit (int ch)
706	{
707	rc_uint_type base = 10;
708	rc_uint_type ret, val;
709	int warned = 0;
710
711	ret = 0;
712	if (ch == '0')
713	{
714	base = 8;
715	switch (rclex_peekch ())
716	{
717	case 'o': case 'O':
718	rclex_readch ();
719	base = 8;
720	break;
721
722	case 'x': case 'X':
723	rclex_readch ();
724	base = 16;
725	break;
726	}
727	}
728	else
729	ret = (rc_uint_type) (ch - '0');
730	while ((ch = rclex_peekch ()) != -1)
731	{
732	if (ISDIGIT (ch))
733	val = (rc_uint_type) (ch - '0');
734	else if (ch >= 'a' && ch <= 'f')
735	val = (rc_uint_type) ((ch - 'a') + 10);
736	else if (ch >= 'A' && ch <= 'F')
737	val = (rc_uint_type) ((ch - 'A') + 10);
738	else
739	break;
740	rclex_readch ();
741	if (! warned && val >= base)
742	{
743	warned = 1;
744	rcparse_warning ("digit exceeds base");
745	}
746	ret *= base;
747	ret += val;
748	}
749	return ret;
750	}
751
752	/* yyparser entry method. */
753
754	int
755	yylex (void)
756	{
757	char *s;
758	unichar *us;
759	rc_uint_type length;
760	int ch;
761
762	/* Make sure that rclex_tok is initialized. */
763	if (! rclex_tok)
764	rclex_tok_add_char (-1);
765
766	do
767	{
768	do
769	{
770	/* Clear token. */
771	rclex_tok_pos = 0;
772	rclex_tok[0] = 0;
773
774	if ((ch = rclex_readch ()) == -1)
775	return -1;
776	if (ch == '\n')
777	++rc_lineno;
778	}
779	while (ch <= 0x20);
780
781	switch (ch)
782	{
783	case '#':
784	while ((ch = rclex_peekch ()) != -1 && ch != '\n')
785	rclex_readch ();
786	cpp_line ();
787	ch = IGNORED_TOKEN;
788	break;
789
790	case '{':
791	ch = IGNORE_CPP (BEG);
792	break;
793
794	case '}':
795	ch = IGNORE_CPP (END);
796	break;
797
798	case '0': case '1': case '2': case '3': case '4':
799	case '5': case '6': case '7': case '8': case '9':
800	yylval.i.val = read_digit (ch);
801	yylval.i.dword = 0;
802	switch (rclex_peekch ())
803	{
804	case 'l': case 'L':
805	rclex_readch ();
806	yylval.i.dword = 1;
807	break;
808	}
809	ch = IGNORE_CPP (NUMBER);
810	break;
811	case '"':
812	rclex_string ();
813	ch = IGNORE_CPP ((! rcdata_mode ? QUOTEDSTRING : SIZEDSTRING));
814	if (ch == IGNORED_TOKEN)
815	break;
816	s = handle_quotes (&length);
817	if (! rcdata_mode)
818	yylval.s = s;
819	else
820	{
821	yylval.ss.length = length;
822	yylval.ss.s = s;
823	}
824	break;
825	case 'L': case 'l':
826	if (rclex_peekch () == '"')
827	{
828	rclex_readch ();
829	rclex_string ();
830	ch = IGNORE_CPP ((! rcdata_mode ? QUOTEDUNISTRING : SIZEDUNISTRING));
831	if (ch == IGNORED_TOKEN)
832	break;
833	us = handle_uniquotes (&length);
834	if (! rcdata_mode)
835	yylval.uni = us;
836	else
837	{
838	yylval.suni.length = length;
839	yylval.suni.s = us;
840	}
841	break;
842	}
843	/* Fall through. */
844	default:
845	if (ISIDST (ch) \|\| ch=='$')
846	{
aeafac0c KT	847	while ((ch = rclex_peekch ()) != -1
	848	&& (ISIDNUM (ch) \|\| ch == '$' \|\| ch == '.'
	849	\|\| ch == ':' \|\| ch == '\\' \|\| ch == '/'
e36ea2de	850	\|\| ch == '_' \|\| ch == '-')
aeafac0c	851	)
4a594fce NC	852	rclex_readch ();
	853	ch = IGNORE_CPP (rclex_translatekeyword (rclex_tok));
	854	if (ch == STRING)
	855	{
	856	s = get_string (strlen (rclex_tok) + 1);
	857	strcpy (s, rclex_tok);
	858	yylval.s = s;
	859	}
	860	else if (ch == BLOCK)
	861	{
	862	const char *hs = NULL;
	863
	864	switch (yylex ())
	865	{
	866	case STRING:
	867	case QUOTEDSTRING:
	868	hs = yylval.s;
	869	break;
	870	case SIZEDSTRING:
	871	hs = yylval.s = yylval.ss.s;
	872	break;
	873	}
	874	if (! hs)
	875	{
	876	rcparse_warning ("BLOCK expects a string as argument.");
	877	ch = IGNORED_TOKEN;
	878	}
	879	else if (! strcmp (hs, "StringFileInfo"))
	880	ch = BLOCKSTRINGFILEINFO;
	881	else if (! strcmp (hs, "VarFileInfo"))
	882	ch = BLOCKVARFILEINFO;
	883	}
	884	break;
	885	}
	886	ch = IGNORE_CPP (ch);
	887	break;
	888	}
	889	}
	890	while (ch == IGNORED_TOKEN);
	891
	892	return ch;
	893	}