[deliverable/binutils-gdb.git] / gas / app.c

/* This is the Assembler Pre-Processor
   Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.

   This file is part of GAS, the GNU Assembler.

   GAS is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   GAS is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with GAS; see the file COPYING.  If not, write to
   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */

/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
/* App, the assembler pre-processor.  This pre-processor strips out excess
   spaces, turns single-quoted characters into a decimal constant, and turns
   # <number> <filename> <garbage> into a .line <number>\n.file <filename>
   pair.  This needs better error-handling.
   */

#include <stdio.h>
#include "as.h"			/* For BAD_CASE() only */

#if (__STDC__ != 1) && !defined(const)
#define const			/* Nothing */
#endif

static char lex[256];
static const char symbol_chars[] =
"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";

#define LEX_IS_SYMBOL_COMPONENT		1
#define LEX_IS_WHITESPACE		2
#define LEX_IS_LINE_SEPARATOR		3
#define LEX_IS_COMMENT_START		4
#define LEX_IS_LINE_COMMENT_START	5
#define	LEX_IS_TWOCHAR_COMMENT_1ST	6
#define	LEX_IS_TWOCHAR_COMMENT_2ND	7
#define	LEX_IS_STRINGQUOTE		8
#define	LEX_IS_COLON			9
#define	LEX_IS_NEWLINE			10
#define	LEX_IS_ONECHAR_QUOTE		11
#define IS_SYMBOL_COMPONENT(c)		(lex[c] == LEX_IS_SYMBOL_COMPONENT)
#define IS_WHITESPACE(c)		(lex[c] == LEX_IS_WHITESPACE)
#define IS_LINE_SEPARATOR(c)		(lex[c] == LEX_IS_LINE_SEPARATOR)
#define IS_COMMENT(c)			(lex[c] == LEX_IS_COMMENT_START)
#define IS_LINE_COMMENT(c)		(lex[c] == LEX_IS_LINE_COMMENT_START)
#define	IS_NEWLINE(c)			(lex[c] == LEX_IS_NEWLINE)

static int process_escape PARAMS ((int));

/* FIXME-soon: The entire lexer/parser thingy should be
   built statically at compile time rather than dynamically
   each and every time the assembler is run.  xoxorich. */

void 
do_scrub_begin ()
{
  const char *p;

  lex[' '] = LEX_IS_WHITESPACE;
  lex['\t'] = LEX_IS_WHITESPACE;
  lex['\n'] = LEX_IS_NEWLINE;
  lex[';'] = LEX_IS_LINE_SEPARATOR;
  lex['"'] = LEX_IS_STRINGQUOTE;
#ifndef TC_HPPA
  lex['\''] = LEX_IS_ONECHAR_QUOTE;
#endif
  lex[':'] = LEX_IS_COLON;


#ifdef SINGLE_QUOTE_STRINGS
	lex['\''] = LEX_IS_STRINGQUOTE;
#endif

  /* Note that these override the previous defaults, e.g. if ';'

	   is a comment char, then it isn't a line separator.  */
  for (p = symbol_chars; *p; ++p)
    {
      lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
    }				/* declare symbol characters */

  for (p = comment_chars; *p; p++)
    {
      lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
    }				/* declare comment chars */

  for (p = line_comment_chars; *p; p++)
    {
      lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
    }				/* declare line comment chars */

  for (p = line_separator_chars; *p; p++)
    {
      lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
    }				/* declare line separators */

  /* Only allow slash-star comments if slash is not in use */
  if (lex['/'] == 0)
    {
      lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
    }
  /* FIXME-soon.  This is a bad hack but otherwise, we
	   can't do c-style comments when '/' is a line
	   comment char. xoxorich. */
  if (lex['*'] == 0)
    {
      lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
    }
}				/* do_scrub_begin() */

FILE *scrub_file;

int 
scrub_from_file ()
{
  return getc (scrub_file);
}

void 
scrub_to_file (ch)
     int ch;
{
  ungetc (ch, scrub_file);
}				/* scrub_to_file() */

char *scrub_string;
char *scrub_last_string;

int 
scrub_from_string ()
{
  return scrub_string == scrub_last_string ? EOF : *scrub_string++;
}				/* scrub_from_string() */

void 
scrub_to_string (ch)
     int ch;
{
  *--scrub_string = ch;
}				/* scrub_to_string() */

/* Saved state of the scrubber */
static int state;
static int old_state;
static char *out_string;
static char out_buf[20];
static int add_newlines = 0;

/* Data structure for saving the state of app across #include's.  Note that
   app is called asynchronously to the parsing of the .include's, so our
   state at the time .include is interpreted is completely unrelated.
   That's why we have to save it all.  */

struct app_save
  {
    int state;
    int old_state;
    char *out_string;
    char out_buf[sizeof (out_buf)];
    int add_newlines;
    char *scrub_string;
    char *scrub_last_string;
    FILE *scrub_file;
  };

char *
app_push ()
{
  register struct app_save *saved;

  saved = (struct app_save *) xmalloc (sizeof (*saved));
  saved->state = state;
  saved->old_state = old_state;
  saved->out_string = out_string;
  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
  saved->add_newlines = add_newlines;
  saved->scrub_string = scrub_string;
  saved->scrub_last_string = scrub_last_string;
  saved->scrub_file = scrub_file;

  /* do_scrub_begin() is not useful, just wastes time. */
  return (char *) saved;
}

void 
app_pop (arg)
     char *arg;
{
  register struct app_save *saved = (struct app_save *) arg;

  /* There is no do_scrub_end (). */
  state = saved->state;
  old_state = saved->old_state;
  out_string = saved->out_string;
  memcpy (out_buf, saved->out_buf, sizeof (out_buf));
  add_newlines = saved->add_newlines;
  scrub_string = saved->scrub_string;
  scrub_last_string = saved->scrub_last_string;
  scrub_file = saved->scrub_file;

  free (arg);
}				/* app_pop() */

/* @@ This assumes that \n &c are the same on host and target.  This is not
   necessarily true.  */
static int 
process_escape (ch)
     int ch;
{
  switch (ch)
    {
    case 'b':
      return '\b';
    case 'f':
      return '\f';
    case 'n':
      return '\n';
    case 'r':
      return '\r';
    case 't':
      return '\t';
    case '\'':
      return '\'';
    case '"':
      return '\"';
    default:
      return ch;
    }
}
int 
do_scrub_next_char (get, unget)
     int (*get) ();
     void (*unget) ();
{
  /*State 0: beginning of normal line
	  1: After first whitespace on line (flush more white)
	  2: After first non-white (opcode) on line (keep 1white)
	  3: after second white on line (into operands) (flush white)
	  4: after putting out a .line, put out digits
	  5: parsing a string, then go to old-state
	  6: putting out \ escape in a "d string.
	  7: After putting out a .appfile, put out string.
	  8: After putting out a .appfile string, flush until newline.
	  9: After seeing symbol char in state 3 (keep 1white after symchar)
	 10: After seeing whitespace in state 9 (keep white before symchar)
	  -1: output string in out_string and go to the state in old_state
	  -2: flush text until a '*' '/' is seen, then go to state old_state
	  */

  /* I added states 9 and 10 because the MIPS ECOFF assembler uses
     constructs like ``.loc 1 20''.  This was turning into ``.loc
     120''.  States 9 and 10 ensure that a space is never dropped in
     between characters which could appear in a identifier.  Ian
     Taylor, ian@cygnus.com.  */

  register int ch, ch2 = 0;
  int not_cpp_line = 0;

  switch (state)
    {
    case -1:
      ch = *out_string++;
      if (*out_string == 0)
	{
	  state = old_state;
	  old_state = 3;
	}
      return ch;

    case -2:
      for (;;)
	{
	  do
	    {
	      ch = (*get) ();
	    }
	  while (ch != EOF && ch != '\n' && ch != '*');
	  if (ch == '\n' || ch == EOF)
	    return ch;

	  /* At this point, ch must be a '*' */
	  while ((ch = (*get) ()) == '*')
	    {
	      ;
	    }
	  if (ch == EOF || ch == '/')
	    break;
	  (*unget) (ch);
	}
      state = old_state;
      return ' ';

    case 4:
      ch = (*get) ();
      if (ch == EOF || (ch >= '0' && ch <= '9'))
	return ch;
      else
	{
	  while (ch != EOF && IS_WHITESPACE (ch))
	    ch = (*get) ();
	  if (ch == '"')
	    {
	      (*unget) (ch);
	      out_string = "\n.appfile ";
	      old_state = 7;
	      state = -1;
	      return *out_string++;
	    }
	  else
	    {
	      while (ch != EOF && ch != '\n')
		ch = (*get) ();
	      state = 0;
	      return ch;
	    }
	}

    case 5:
      ch = (*get) ();
      if (lex[ch] == LEX_IS_STRINGQUOTE)
	{
	  state = old_state;
	  return ch;
	}
      else if (ch == '\\')
	{
	  state = 6;
	  return ch;
	}
      else if (ch == EOF)
	{
	  as_warn ("End of file in string: inserted '\"'");
	  state = old_state;
	  (*unget) ('\n');
	  return '"';
	}
      else
	{
	  return ch;
	}

    case 6:
      state = 5;
      ch = (*get) ();
      switch (ch)
	{
	  /* Handle strings broken across lines, by turning '\n' into
	     '\\' and 'n'.  */
	case '\n':
	  (*unget) ('n');
	  add_newlines++;
	  return '\\';

	case '"':
	case '\\':
	case 'b':
	case 'f':
	case 'n':
	case 'r':
	case 't':
#ifdef BACKSLASH_V
	case 'v':
#endif /* BACKSLASH_V */
	case 'x':
	case 'X':
	case '0':
	case '1':
	case '2':
	case '3':
	case '4':
	case '5':
	case '6':
	case '7':
	  break;
#if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
	default:
	  as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
	  break;
#else /* ONLY_STANDARD_ESCAPES */
	default:
	  /* Accept \x as x for any x */
	  break;
#endif /* ONLY_STANDARD_ESCAPES */

	case EOF:
	  as_warn ("End of file in string: '\"' inserted");
	  return '"';
	}
      return ch;

    case 7:
      ch = (*get) ();
      state = 5;
      old_state = 8;
      return ch;

    case 8:
      do
	ch = (*get) ();
      while (ch != '\n');
      state = 0;
      return ch;
    }

  /* OK, we are somewhere in states 0 through 4 or 9 through 10 */

  /* flushchar: */
  ch = (*get) ();
recycle:
  if (ch == EOF)
    {
      if (state != 0)
	as_warn ("End of file not at end of a line: Newline inserted.");
      return ch;
    }

  switch (lex[ch])
    {
    case LEX_IS_WHITESPACE:
      do
	/* Preserve a single whitespace character at the beginning of
	   a line.  */
	if (state == 0)
	  {
	    state = 1;
	    return ch;
	  }
	else
	  ch = (*get) ();
      while (ch != EOF && IS_WHITESPACE (ch));
      if (ch == EOF)
	return ch;

      if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch))
	{
	  /* cpp never outputs a leading space before the #, so try to
	     avoid being confused.  */
	  not_cpp_line = 1;
	  goto recycle;
	}
#ifdef MRI
      (*unget) (ch);		/* Put back */
      return ' ';		/* Always return one space at start of line */
#endif

      /* If we're in state 2, we've seen a non-white
	 character followed by whitespace.  If the next
	 character is ':', this is whitespace after a label
	 name which we can ignore.  */
      if (state == 2 && lex[ch] == LEX_IS_COLON)
	{
	  state = 0;
	  return ch;
	}

      switch (state)
	{
	case 0:
	  state++;
	  goto recycle;		/* Punted leading sp */
	case 1:
	  /* We can arrive here if we leave a leading whitespace character
	     at the beginning of a line.  */
	  goto recycle;
	case 2:
	  state = 3;
	  (*unget) (ch);
	  return ' ';		/* Sp after opco */
	case 3:
	  goto recycle;		/* Sp in operands */
	case 9:
	case 10:
	  state = 10;		/* Sp after symbol char */
	  goto recycle;
	default:
	  BAD_CASE (state);
	}
      break;

    case LEX_IS_TWOCHAR_COMMENT_1ST:
      ch2 = (*get) ();
      if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
	{
	  for (;;)
	    {
	      do
		{
		  ch2 = (*get) ();
		  if (ch2 != EOF && IS_NEWLINE (ch2))
		    add_newlines++;
		}
	      while (ch2 != EOF &&
		     (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));

	      while (ch2 != EOF &&
		     (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
		{
		  ch2 = (*get) ();
		}

	      if (ch2 == EOF
		  || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
		break;
	      (*unget) (ch);
	    }
	  if (ch2 == EOF)
	    as_warn ("End of file in multiline comment");

	  ch = ' ';
	  goto recycle;
	}
      else
	{
	  if (ch2 != EOF)
	    (*unget) (ch2);
	  if (state == 9 || state == 10)
	    state = 3;
	  return ch;
	}
      break;

    case LEX_IS_STRINGQUOTE:
      if (state == 9 || state == 10)
	old_state = 3;
      else
	old_state = state;
      state = 5;
      return ch;
#ifndef MRI
#ifndef IEEE_STYLE
    case LEX_IS_ONECHAR_QUOTE:
      ch = (*get) ();
      if (ch == EOF)
	{
	  as_warn ("End-of-file after a one-character quote; \\000 inserted");
	  ch = 0;
	}
      if (ch == '\\')
	{
	  ch = (*get) ();
	  ch = process_escape (ch);
	}
      sprintf (out_buf, "%d", (int) (unsigned char) ch);


      /* None of these 'x constants for us.  We want 'x'.  */
      if ((ch = (*get) ()) != '\'')
	{
#ifdef REQUIRE_CHAR_CLOSE_QUOTE
	  as_warn ("Missing close quote: (assumed)");
#else
	  (*unget) (ch);
#endif
	}
      if (strlen (out_buf) == 1)
	{
	  return out_buf[0];
	}
      if (state == 9 || state == 10)
	old_state = 3;
      else
	old_state = state;
      state = -1;
      out_string = out_buf;
      return *out_string++;
#endif
#endif
    case LEX_IS_COLON:
      if (state == 9 || state == 10)
	state = 3;
      else if (state != 3)
	state = 0;
      return ch;

    case LEX_IS_NEWLINE:
      /* Roll out a bunch of newlines from inside comments, etc.  */
      if (add_newlines)
	{
	  --add_newlines;
	  (*unget) (ch);
	}
      /* fall thru into... */

    case LEX_IS_LINE_SEPARATOR:
      state = 0;
      return ch;

    case LEX_IS_LINE_COMMENT_START:
      if (state == 0)		/* Only comment at start of line.  */
	{
	  /* FIXME-someday: The two character comment stuff was badly
	     thought out.  On i386, we want '/' as line comment start
	     AND we want C style comments.  hence this hack.  The
	     whole lexical process should be reworked.  xoxorich.  */
	  if (ch == '/')
	    {
	      ch2 = (*get) ();
	      if (ch2 == '*')
		{
		  state = -2;
		  return (do_scrub_next_char (get, unget));
		}
	      else
		{
		  (*unget) (ch2);
		}
	    }			/* bad hack */

	  if (ch != '#')
	    not_cpp_line = 1;

	  do
	    ch = (*get) ();
	  while (ch != EOF && IS_WHITESPACE (ch));
	  if (ch == EOF)
	    {
	      as_warn ("EOF in comment:  Newline inserted");
	      return '\n';
	    }
	  if (ch < '0' || ch > '9' || not_cpp_line)
	    {
	      /* Non-numerics:  Eat whole comment line */
	      while (ch != EOF && !IS_NEWLINE (ch))
		ch = (*get) ();
	      if (ch == EOF)
		as_warn ("EOF in Comment: Newline inserted");
	      state = 0;
	      return '\n';
	    }
	  /* Numerics begin comment.  Perhaps CPP `# 123 "filename"' */
	  (*unget) (ch);
	  old_state = 4;
	  state = -1;
	  out_string = ".appline ";
	  return *out_string++;
	}

      /* We have a line comment character which is not at the start of
	 a line.  If this is also a normal comment character, fall
	 through.  Otherwise treat it as a default character.  */
      if (strchr (comment_chars, ch) == NULL)
	goto de_fault;
      /* Fall through.  */
    case LEX_IS_COMMENT_START:
      do
	ch = (*get) ();
      while (ch != EOF && !IS_NEWLINE (ch));
      if (ch == EOF)
	as_warn ("EOF in comment:  Newline inserted");
      state = 0;
      return '\n';

    case LEX_IS_SYMBOL_COMPONENT:
      if (state == 10)
	{
	  /* This is a symbol character following another symbol
	     character, with whitespace in between.  We skipped the
	     whitespace earlier, so output it now.  */
	  (*unget) (ch);
	  state = 3;
	  return ' ';
	}
      if (state == 3)
	state = 9;
      /* Fall through.  */
    default:
    de_fault:
      /* Some relatively `normal' character.  */
      if (state == 0)
	{
	  state = 2;		/* Now seeing opcode */
	  return ch;
	}
      else if (state == 1)
	{
	  state = 2;		/* Ditto */
	  return ch;
	}
      else if (state == 9)
	{
	  if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
	    state = 3;
	  return ch;
	}
      else if (state == 10)
	{
	  state = 3;
	  return ch;
	}
      else
	{
	  return ch;		/* Opcode or operands already */
	}
    }
  return -1;
}

#ifdef TEST

const char comment_chars[] = "|";
const char line_comment_chars[] = "#";

main ()
{
  int ch;

  app_begin ();
  while ((ch = do_scrub_next_char (stdin)) != EOF)
    putc (ch, stdout);
}

as_warn (str)
     char *str;
{
  fputs (str, stderr);
  putc ('\n', stderr);
}

#endif

/* end of app.c */
Commit	Line	Data
fecd2382	1	/* This is the Assembler Pre-Processor
58d4951d	2	Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
6efd877d	3
a39116f1	4	This file is part of GAS, the GNU Assembler.
6efd877d	5
a39116f1 RP	6	GAS is free software; you can redistribute it and/or modify
	7	it under the terms of the GNU General Public License as published by
	8	the Free Software Foundation; either version 2, or (at your option)
	9	any later version.
6efd877d	10
a39116f1 RP	11	GAS is distributed in the hope that it will be useful,
	12	but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	14	GNU General Public License for more details.
6efd877d	15
a39116f1 RP	16	You should have received a copy of the GNU General Public License
	17	along with GAS; see the file COPYING. If not, write to
	18	the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
fecd2382	19
58d4951d	20	/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
fecd2382 RP	21	/* App, the assembler pre-processor. This pre-processor strips out excess
fecd2382 RP	22	spaces, turns single-quoted characters into a decimal constant, and turns
9a7d824a	23	# <number> <filename> <garbage> into a .line <number>\n.file <filename>
be06bdcd	24	pair. This needs better error-handling.
a39116f1	25	*/
fecd2382 RP	26
fecd2382 RP	27	#include <stdio.h>
6efd877d	28	#include "as.h" /* For BAD_CASE() only */
fecd2382	29
3340f7e5	30	#if (__STDC__ != 1) && !defined(const)
6efd877d	31	#define const /* Nothing */
fecd2382 RP	32	#endif
fecd2382 RP	33
6efd877d	34	static char lex[256];
6d331d71	35	static const char symbol_chars[] =
6efd877d	36	"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
fecd2382 RP	37
	38	#define LEX_IS_SYMBOL_COMPONENT 1
	39	#define LEX_IS_WHITESPACE 2
	40	#define LEX_IS_LINE_SEPARATOR 3
	41	#define LEX_IS_COMMENT_START 4
	42	#define LEX_IS_LINE_COMMENT_START 5
	43	#define LEX_IS_TWOCHAR_COMMENT_1ST 6
	44	#define LEX_IS_TWOCHAR_COMMENT_2ND 7
	45	#define LEX_IS_STRINGQUOTE 8
	46	#define LEX_IS_COLON 9
	47	#define LEX_IS_NEWLINE 10
	48	#define LEX_IS_ONECHAR_QUOTE 11
a39116f1 RP	49	#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
	50	#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
	51	#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
	52	#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
	53	#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
	54	#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
	55
385ce433 JL	56	static int process_escape PARAMS ((int));
385ce433 JL	57
a39116f1 RP	58	/* FIXME-soon: The entire lexer/parser thingy should be
	59	built statically at compile time rather than dynamically
	60	each and every time the assembler is run. xoxorich. */
fecd2382	61
6efd877d KR	62	void
	63	do_scrub_begin ()
	64	{
	65	const char *p;
	66
	67	lex[' '] = LEX_IS_WHITESPACE;
	68	lex['\t'] = LEX_IS_WHITESPACE;
	69	lex['\n'] = LEX_IS_NEWLINE;
	70	lex[';'] = LEX_IS_LINE_SEPARATOR;
	71	lex['"'] = LEX_IS_STRINGQUOTE;
58d4951d	72	#ifndef TC_HPPA
6efd877d	73	lex['\''] = LEX_IS_ONECHAR_QUOTE;
58d4951d	74	#endif
6efd877d	75	lex[':'] = LEX_IS_COLON;
7c2d4011	76
be06bdcd SC	77
	78
	79	#ifdef SINGLE_QUOTE_STRINGS
	80	lex['\''] = LEX_IS_STRINGQUOTE;
7c2d4011	81	#endif
be06bdcd	82
6efd877d	83	/* Note that these override the previous defaults, e.g. if ';'
be06bdcd	84
fecd2382	85	is a comment char, then it isn't a line separator. */
6efd877d KR	86	for (p = symbol_chars; *p; ++p)
6efd877d KR	87	{
58d4951d	88	lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
6efd877d KR	89	} /* declare symbol characters */
6efd877d KR	90
6efd877d KR	91	for (p = comment_chars; *p; p++)
6efd877d KR	92	{
58d4951d	93	lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
6efd877d KR	94	} /* declare comment chars */
6efd877d KR	95
9a7d824a ILT	96	for (p = line_comment_chars; *p; p++)
9a7d824a ILT	97	{
58d4951d	98	lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
9a7d824a ILT	99	} /* declare line comment chars */
9a7d824a ILT	100
6efd877d KR	101	for (p = line_separator_chars; *p; p++)
6efd877d KR	102	{
58d4951d	103	lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
6efd877d KR	104	} /* declare line separators */
	105
	106	/* Only allow slash-star comments if slash is not in use */
	107	if (lex['/'] == 0)
	108	{
	109	lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
	110	}
	111	/* FIXME-soon. This is a bad hack but otherwise, we
a39116f1 RP	112	can't do c-style comments when '/' is a line
a39116f1 RP	113	comment char. xoxorich. */
6efd877d KR	114	if (lex['*'] == 0)
	115	{
	116	lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
	117	}
	118	} /* do_scrub_begin() */
fecd2382 RP	119
	120	FILE *scrub_file;
	121
6efd877d KR	122	int
	123	scrub_from_file ()
	124	{
	125	return getc (scrub_file);
fecd2382 RP	126	}
fecd2382 RP	127
6efd877d KR	128	void
	129	scrub_to_file (ch)
	130	int ch;
fecd2382	131	{
6efd877d KR	132	ungetc (ch, scrub_file);
6efd877d KR	133	} /* scrub_to_file() */
fecd2382 RP	134
	135	char *scrub_string;
	136	char *scrub_last_string;
	137
6efd877d KR	138	int
	139	scrub_from_string ()
	140	{
	141	return scrub_string == scrub_last_string ? EOF : *scrub_string++;
	142	} /* scrub_from_string() */
fecd2382	143
6efd877d KR	144	void
	145	scrub_to_string (ch)
	146	int ch;
fecd2382	147	{
6efd877d KR	148	*--scrub_string = ch;
6efd877d KR	149	} /* scrub_to_string() */
fecd2382 RP	150
	151	/* Saved state of the scrubber */
	152	static int state;
	153	static int old_state;
	154	static char *out_string;
	155	static char out_buf[20];
	156	static int add_newlines = 0;
	157
	158	/* Data structure for saving the state of app across #include's. Note that
	159	app is called asynchronously to the parsing of the .include's, so our
	160	state at the time .include is interpreted is completely unrelated.
	161	That's why we have to save it all. */
	162
6efd877d KR	163	struct app_save
	164	{
	165	int state;
	166	int old_state;
	167	char *out_string;
	168	char out_buf[sizeof (out_buf)];
	169	int add_newlines;
	170	char *scrub_string;
	171	char *scrub_last_string;
	172	FILE *scrub_file;
	173	};
	174
	175	char *
	176	app_push ()
	177	{
7c2d4011 SC	178	register struct app_save *saved;
7c2d4011 SC	179
6efd877d KR	180	saved = (struct app_save ) xmalloc (sizeof (saved));
	181	saved->state = state;
	182	saved->old_state = old_state;
	183	saved->out_string = out_string;
58d4951d	184	memcpy (saved->out_buf, out_buf, sizeof (out_buf));
6efd877d KR	185	saved->add_newlines = add_newlines;
6efd877d KR	186	saved->scrub_string = scrub_string;
7c2d4011	187	saved->scrub_last_string = scrub_last_string;
6efd877d	188	saved->scrub_file = scrub_file;
7c2d4011 SC	189
7c2d4011 SC	190	/* do_scrub_begin() is not useful, just wastes time. */
6efd877d	191	return (char *) saved;
fecd2382 RP	192	}
fecd2382 RP	193
6efd877d KR	194	void
	195	app_pop (arg)
	196	char *arg;
fecd2382	197	{
6efd877d KR	198	register struct app_save saved = (struct app_save ) arg;
	199
	200	/* There is no do_scrub_end (). */
	201	state = saved->state;
	202	old_state = saved->old_state;
	203	out_string = saved->out_string;
58d4951d	204	memcpy (out_buf, saved->out_buf, sizeof (out_buf));
6efd877d KR	205	add_newlines = saved->add_newlines;
	206	scrub_string = saved->scrub_string;
	207	scrub_last_string = saved->scrub_last_string;
	208	scrub_file = saved->scrub_file;
	209
	210	free (arg);
	211	} /* app_pop() */
	212
6d331d71 KR	213	/* @@ This assumes that \n &c are the same on host and target. This is not
6d331d71 KR	214	necessarily true. */
385ce433	215	static int
6efd877d	216	process_escape (ch)
385ce433	217	int ch;
7c2d4011	218	{
6efd877d KR	219	switch (ch)
	220	{
	221	case 'b':
	222	return '\b';
	223	case 'f':
	224	return '\f';
	225	case 'n':
	226	return '\n';
	227	case 'r':
	228	return '\r';
	229	case 't':
	230	return '\t';
	231	case '\'':
	232	return '\'';
	233	case '"':
6d331d71	234	return '\"';
6efd877d KR	235	default:
	236	return ch;
	237	}
7c2d4011	238	}
6efd877d KR	239	int
	240	do_scrub_next_char (get, unget)
	241	int (*get) ();
	242	void (*unget) ();
fecd2382	243	{
6efd877d	244	/*State 0: beginning of normal line
a39116f1 RP	245	1: After first whitespace on line (flush more white)
	246	2: After first non-white (opcode) on line (keep 1white)
	247	3: after second white on line (into operands) (flush white)
	248	4: after putting out a .line, put out digits
	249	5: parsing a string, then go to old-state
	250	6: putting out \ escape in a "d string.
9a7d824a ILT	251	7: After putting out a .appfile, put out string.
9a7d824a ILT	252	8: After putting out a .appfile string, flush until newline.
f6a91cc0	253	9: After seeing symbol char in state 3 (keep 1white after symchar)
9a7d824a	254	10: After seeing whitespace in state 9 (keep white before symchar)
a39116f1 RP	255	-1: output string in out_string and go to the state in old_state
	256	-2: flush text until a '*' '/' is seen, then go to state old_state
	257	*/
6efd877d	258
9a7d824a ILT	259	/* I added states 9 and 10 because the MIPS ECOFF assembler uses
	260	constructs like ``.loc 1 20''. This was turning into ``.loc
	261	120''. States 9 and 10 ensure that a space is never dropped in
	262	between characters which could appear in a identifier. Ian
	263	Taylor, ian@cygnus.com. */
f6a91cc0	264
6efd877d	265	register int ch, ch2 = 0;
385ce433	266	int not_cpp_line = 0;
6efd877d KR	267
	268	switch (state)
	269	{
	270	case -1:
	271	ch = *out_string++;
	272	if (*out_string == 0)
	273	{
	274	state = old_state;
	275	old_state = 3;
	276	}
	277	return ch;
	278
	279	case -2:
	280	for (;;)
	281	{
	282	do
	283	{
	284	ch = (*get) ();
	285	}
	286	while (ch != EOF && ch != '\n' && ch != '*');
	287	if (ch == '\n' \|\| ch == EOF)
	288	return ch;
	289
	290	/* At this point, ch must be a '' /
	291	while ((ch = (get) ()) == '')
	292	{
	293	;
	294	}
	295	if (ch == EOF \|\| ch == '/')
	296	break;
	297	(*unget) (ch);
	298	}
	299	state = old_state;
	300	return ' ';
	301
	302	case 4:
	303	ch = (*get) ();
	304	if (ch == EOF \|\| (ch >= '0' && ch <= '9'))
	305	return ch;
	306	else
	307	{
	308	while (ch != EOF && IS_WHITESPACE (ch))
	309	ch = (*get) ();
	310	if (ch == '"')
	311	{
	312	(*unget) (ch);
9a7d824a	313	out_string = "\n.appfile ";
6efd877d KR	314	old_state = 7;
	315	state = -1;
	316	return *out_string++;
	317	}
	318	else
	319	{
	320	while (ch != EOF && ch != '\n')
	321	ch = (*get) ();
58d4951d	322	state = 0;
6efd877d KR	323	return ch;
	324	}
	325	}
	326
	327	case 5:
	328	ch = (*get) ();
	329	if (lex[ch] == LEX_IS_STRINGQUOTE)
	330	{
	331	state = old_state;
	332	return ch;
	333	}
	334	else if (ch == '\\')
	335	{
	336	state = 6;
	337	return ch;
	338	}
	339	else if (ch == EOF)
	340	{
	341	as_warn ("End of file in string: inserted '\"'");
	342	state = old_state;
	343	(*unget) ('\n');
	344	return '"';
	345	}
	346	else
	347	{
	348	return ch;
	349	}
	350
	351	case 6:
	352	state = 5;
	353	ch = (*get) ();
	354	switch (ch)
	355	{
6d331d71 KR	356	/* Handle strings broken across lines, by turning '\n' into
6d331d71 KR	357	'\\' and 'n'. */
6efd877d KR	358	case '\n':
	359	(*unget) ('n');
	360	add_newlines++;
	361	return '\\';
	362
	363	case '"':
	364	case '\\':
	365	case 'b':
	366	case 'f':
	367	case 'n':
	368	case 'r':
	369	case 't':
fecd2382	370	#ifdef BACKSLASH_V
6efd877d	371	case 'v':
fecd2382	372	#endif /* BACKSLASH_V */
385ce433 JL	373	case 'x':
385ce433 JL	374	case 'X':
6efd877d KR	375	case '0':
	376	case '1':
	377	case '2':
	378	case '3':
	379	case '4':
	380	case '5':
	381	case '6':
	382	case '7':
	383	break;
7c2d4011	384	#if defined(IGNORE_NONSTANDARD_ESCAPES) \| defined(ONLY_STANDARD_ESCAPES)
6efd877d KR	385	default:
	386	as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
	387	break;
fecd2382	388	#else /* ONLY_STANDARD_ESCAPES */
6efd877d KR	389	default:
	390	/* Accept \x as x for any x */
	391	break;
fecd2382	392	#endif /* ONLY_STANDARD_ESCAPES */
7c2d4011	393
6efd877d KR	394	case EOF:
	395	as_warn ("End of file in string: '\"' inserted");
	396	return '"';
	397	}
	398	return ch;
	399
	400	case 7:
	401	ch = (*get) ();
	402	state = 5;
	403	old_state = 8;
	404	return ch;
	405
	406	case 8:
	407	do
	408	ch = (*get) ();
	409	while (ch != '\n');
	410	state = 0;
	411	return ch;
	412	}
	413
9a7d824a	414	/* OK, we are somewhere in states 0 through 4 or 9 through 10 */
6efd877d KR	415
	416	/* flushchar: */
	417	ch = (*get) ();
	418	recycle:
	419	if (ch == EOF)
	420	{
	421	if (state != 0)
	422	as_warn ("End of file not at end of a line: Newline inserted.");
	423	return ch;
	424	}
	425
	426	switch (lex[ch])
	427	{
	428	case LEX_IS_WHITESPACE:
	429	do
385ce433 JL	430	/* Preserve a single whitespace character at the beginning of
	431	a line. */
	432	if (state == 0)
	433	{
	434	state = 1;
	435	return ch;
	436	}
	437	else
	438	ch = (*get) ();
6efd877d KR	439	while (ch != EOF && IS_WHITESPACE (ch));
	440	if (ch == EOF)
	441	return ch;
	442
	443	if (IS_COMMENT (ch) \|\| (state == 0 && IS_LINE_COMMENT (ch)) \|\| ch == '/' \|\| IS_LINE_SEPARATOR (ch))
	444	{
385ce433 JL	445	/* cpp never outputs a leading space before the #, so try to
	446	avoid being confused. */
	447	not_cpp_line = 1;
6efd877d	448	goto recycle;
fecd2382	449	}
7c2d4011	450	#ifdef MRI
6efd877d KR	451	(unget) (ch); / Put back */
6efd877d KR	452	return ' '; /* Always return one space at start of line */
7c2d4011	453	#endif
6efd877d KR	454
6efd877d KR	455	/* If we're in state 2, we've seen a non-white
6d331d71 KR	456	character followed by whitespace. If the next
	457	character is ':', this is whitespace after a label
	458	name which we can ignore. */
6efd877d KR	459	if (state == 2 && lex[ch] == LEX_IS_COLON)
	460	{
	461	state = 0;
	462	return ch;
	463	}
	464
	465	switch (state)
	466	{
	467	case 0:
	468	state++;
	469	goto recycle; /* Punted leading sp */
	470	case 1:
385ce433 JL	471	/* We can arrive here if we leave a leading whitespace character
	472	at the beginning of a line. */
	473	goto recycle;
6efd877d	474	case 2:
f6a91cc0	475	state = 3;
6efd877d KR	476	(*unget) (ch);
	477	return ' '; /* Sp after opco */
	478	case 3:
	479	goto recycle; /* Sp in operands */
9a7d824a ILT	480	case 9:
	481	case 10:
	482	state = 10; /* Sp after symbol char */
	483	goto recycle;
6efd877d KR	484	default:
	485	BAD_CASE (state);
	486	}
	487	break;
	488
	489	case LEX_IS_TWOCHAR_COMMENT_1ST:
	490	ch2 = (*get) ();
	491	if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
	492	{
	493	for (;;)
	494	{
	495	do
	496	{
	497	ch2 = (*get) ();
	498	if (ch2 != EOF && IS_NEWLINE (ch2))
	499	add_newlines++;
fecd2382	500	}
6efd877d KR	501	while (ch2 != EOF &&
	502	(lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
	503
	504	while (ch2 != EOF &&
	505	(lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
	506	{
	507	ch2 = (*get) ();
fecd2382	508	}
6efd877d KR	509
	510	if (ch2 == EOF
	511	\|\| lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
fecd2382	512	break;
6efd877d KR	513	(*unget) (ch);
	514	}
	515	if (ch2 == EOF)
	516	as_warn ("End of file in multiline comment");
	517
	518	ch = ' ';
	519	goto recycle;
	520	}
	521	else
	522	{
	523	if (ch2 != EOF)
	524	(*unget) (ch2);
9a7d824a ILT	525	if (state == 9 \|\| state == 10)
9a7d824a ILT	526	state = 3;
6efd877d KR	527	return ch;
	528	}
	529	break;
	530
	531	case LEX_IS_STRINGQUOTE:
9a7d824a ILT	532	if (state == 9 \|\| state == 10)
	533	old_state = 3;
	534	else
	535	old_state = state;
6efd877d KR	536	state = 5;
	537	return ch;
	538	#ifndef MRI
a39116f1	539	#ifndef IEEE_STYLE
6efd877d KR	540	case LEX_IS_ONECHAR_QUOTE:
	541	ch = (*get) ();
	542	if (ch == EOF)
	543	{
	544	as_warn ("End-of-file after a one-character quote; \\000 inserted");
	545	ch = 0;
	546	}
	547	if (ch == '\\')
	548	{
	549	ch = (*get) ();
	550	ch = process_escape (ch);
	551	}
	552	sprintf (out_buf, "%d", (int) (unsigned char) ch);
7c2d4011	553
6efd877d	554
9a7d824a	555	/* None of these 'x constants for us. We want 'x'. */
6efd877d KR	556	if ((ch = (*get) ()) != '\'')
6efd877d KR	557	{
fecd2382	558	#ifdef REQUIRE_CHAR_CLOSE_QUOTE
6efd877d	559	as_warn ("Missing close quote: (assumed)");
fecd2382	560	#else
6efd877d	561	(*unget) (ch);
fecd2382	562	#endif
6efd877d KR	563	}
	564	if (strlen (out_buf) == 1)
	565	{
	566	return out_buf[0];
	567	}
9a7d824a ILT	568	if (state == 9 \|\| state == 10)
	569	old_state = 3;
	570	else
	571	old_state = state;
6efd877d KR	572	state = -1;
	573	out_string = out_buf;
	574	return *out_string++;
7c2d4011	575	#endif
a39116f1	576	#endif
6efd877d	577	case LEX_IS_COLON:
9a7d824a ILT	578	if (state == 9 \|\| state == 10)
	579	state = 3;
	580	else if (state != 3)
6efd877d KR	581	state = 0;
	582	return ch;
	583
	584	case LEX_IS_NEWLINE:
	585	/* Roll out a bunch of newlines from inside comments, etc. */
	586	if (add_newlines)
	587	{
	588	--add_newlines;
	589	(*unget) (ch);
	590	}
	591	/* fall thru into... */
	592
	593	case LEX_IS_LINE_SEPARATOR:
	594	state = 0;
	595	return ch;
	596
	597	case LEX_IS_LINE_COMMENT_START:
9a7d824a	598	if (state == 0) /* Only comment at start of line. */
6efd877d	599	{
9a7d824a ILT	600	/* FIXME-someday: The two character comment stuff was badly
	601	thought out. On i386, we want '/' as line comment start
	602	AND we want C style comments. hence this hack. The
	603	whole lexical process should be reworked. xoxorich. */
	604	if (ch == '/')
f6a91cc0	605	{
9a7d824a ILT	606	ch2 = (*get) ();
	607	if (ch2 == '*')
	608	{
	609	state = -2;
	610	return (do_scrub_next_char (get, unget));
	611	}
	612	else
	613	{
	614	(*unget) (ch2);
	615	}
	616	} /* bad hack */
6efd877d	617
385ce433 JL	618	if (ch != '#')
	619	not_cpp_line = 1;
	620
9a7d824a	621	do
6efd877d	622	ch = (*get) ();
9a7d824a	623	while (ch != EOF && IS_WHITESPACE (ch));
6efd877d	624	if (ch == EOF)
9a7d824a ILT	625	{
	626	as_warn ("EOF in comment: Newline inserted");
	627	return '\n';
	628	}
385ce433	629	if (ch < '0' \|\| ch > '9' \|\| not_cpp_line)
9a7d824a ILT	630	{
	631	/* Non-numerics: Eat whole comment line */
	632	while (ch != EOF && !IS_NEWLINE (ch))
	633	ch = (*get) ();
	634	if (ch == EOF)
	635	as_warn ("EOF in Comment: Newline inserted");
	636	state = 0;
	637	return '\n';
	638	}
	639	/* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
	640	(*unget) (ch);
	641	old_state = 4;
	642	state = -1;
	643	out_string = ".appline ";
	644	return *out_string++;
6efd877d	645	}
6efd877d	646
9a7d824a ILT	647	/* We have a line comment character which is not at the start of
	648	a line. If this is also a normal comment character, fall
	649	through. Otherwise treat it as a default character. */
	650	if (strchr (comment_chars, ch) == NULL)
	651	goto de_fault;
	652	/* Fall through. */
6efd877d KR	653	case LEX_IS_COMMENT_START:
	654	do
	655	ch = (*get) ();
	656	while (ch != EOF && !IS_NEWLINE (ch));
	657	if (ch == EOF)
	658	as_warn ("EOF in comment: Newline inserted");
	659	state = 0;
	660	return '\n';
	661
f6a91cc0	662	case LEX_IS_SYMBOL_COMPONENT:
9a7d824a ILT	663	if (state == 10)
	664	{
	665	/* This is a symbol character following another symbol
	666	character, with whitespace in between. We skipped the
	667	whitespace earlier, so output it now. */
	668	(*unget) (ch);
	669	state = 3;
	670	return ' ';
	671	}
f6a91cc0 ILT	672	if (state == 3)
	673	state = 9;
	674	/* Fall through. */
6efd877d KR	675	default:
	676	de_fault:
	677	/* Some relatively `normal' character. */
	678	if (state == 0)
	679	{
	680	state = 2; /* Now seeing opcode */
	681	return ch;
fecd2382	682	}
6efd877d KR	683	else if (state == 1)
	684	{
	685	state = 2; /* Ditto */
	686	return ch;
	687	}
f6a91cc0 ILT	688	else if (state == 9)
	689	{
	690	if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
	691	state = 3;
	692	return ch;
	693	}
9a7d824a ILT	694	else if (state == 10)
	695	{
	696	state = 3;
	697	return ch;
	698	}
6efd877d KR	699	else
	700	{
	701	return ch; /* Opcode or operands already */
	702	}
	703	}
	704	return -1;
fecd2382 RP	705	}
	706
	707	#ifdef TEST
	708
6efd877d KR	709	const char comment_chars[] = "\|";
6efd877d KR	710	const char line_comment_chars[] = "#";
fecd2382	711
6efd877d	712	main ()
fecd2382	713	{
6efd877d KR	714	int ch;
	715
	716	app_begin ();
	717	while ((ch = do_scrub_next_char (stdin)) != EOF)
	718	putc (ch, stdout);
fecd2382 RP	719	}
fecd2382 RP	720
6efd877d KR	721	as_warn (str)
6efd877d KR	722	char *str;
fecd2382	723	{
6efd877d KR	724	fputs (str, stderr);
6efd877d KR	725	putc ('\n', stderr);
fecd2382	726	}
6efd877d	727
fecd2382 RP	728	#endif
fecd2382 RP	729
fecd2382	730	/* end of app.c */