[deliverable/binutils-gdb.git] / gas / app.c

/* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.

   Modified by Allen Wirfs-Brock, Instantiations Inc 2/90
   */
/* This is the Assembler Pre-Processor
   Copyright (C) 1987 Free Software Foundation, Inc.

   This file is part of GAS, the GNU Assembler.

   GAS is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   GAS is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with GAS; see the file COPYING.  If not, write to
   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */

/* App, the assembler pre-processor.  This pre-processor strips out excess
   spaces, turns single-quoted characters into a decimal constant, and turns
   # <number> <filename> <garbage> into a .line <number>\n.file <filename>
   pair.  This needs better error-handling.
   */

#include <stdio.h>
#include "as.h"			/* For BAD_CASE() only */

#if (__STDC__ != 1) && !defined(const)
#define const			/* Nothing */
#endif

static char lex[256];
static const char symbol_chars[] =
"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";

#define LEX_IS_SYMBOL_COMPONENT		1
#define LEX_IS_WHITESPACE		2
#define LEX_IS_LINE_SEPARATOR		3
#define LEX_IS_COMMENT_START		4
#define LEX_IS_LINE_COMMENT_START	5
#define	LEX_IS_TWOCHAR_COMMENT_1ST	6
#define	LEX_IS_TWOCHAR_COMMENT_2ND	7
#define	LEX_IS_STRINGQUOTE		8
#define	LEX_IS_COLON			9
#define	LEX_IS_NEWLINE			10
#define	LEX_IS_ONECHAR_QUOTE		11
#define IS_SYMBOL_COMPONENT(c)		(lex[c] == LEX_IS_SYMBOL_COMPONENT)
#define IS_WHITESPACE(c)		(lex[c] == LEX_IS_WHITESPACE)
#define IS_LINE_SEPARATOR(c)		(lex[c] == LEX_IS_LINE_SEPARATOR)
#define IS_COMMENT(c)			(lex[c] == LEX_IS_COMMENT_START)
#define IS_LINE_COMMENT(c)		(lex[c] == LEX_IS_LINE_COMMENT_START)
#define	IS_NEWLINE(c)			(lex[c] == LEX_IS_NEWLINE)

/* FIXME-soon: The entire lexer/parser thingy should be
   built statically at compile time rather than dynamically
   each and every time the assembler is run.  xoxorich. */

void 
do_scrub_begin ()
{
  const char *p;

  lex[' '] = LEX_IS_WHITESPACE;
  lex['\t'] = LEX_IS_WHITESPACE;
  lex['\n'] = LEX_IS_NEWLINE;
  lex[';'] = LEX_IS_LINE_SEPARATOR;
  lex['"'] = LEX_IS_STRINGQUOTE;
  lex['\''] = LEX_IS_ONECHAR_QUOTE;
  lex[':'] = LEX_IS_COLON;


#ifdef SINGLE_QUOTE_STRINGS
	lex['\''] = LEX_IS_STRINGQUOTE;
#endif

  /* Note that these override the previous defaults, e.g. if ';'

	   is a comment char, then it isn't a line separator.  */
  for (p = symbol_chars; *p; ++p)
    {
      lex[*p] = LEX_IS_SYMBOL_COMPONENT;
    }				/* declare symbol characters */

  for (p = comment_chars; *p; p++)
    {
      lex[*p] = LEX_IS_COMMENT_START;
    }				/* declare comment chars */

  for (p = line_comment_chars; *p; p++)
    {
      lex[*p] = LEX_IS_LINE_COMMENT_START;
    }				/* declare line comment chars */

  for (p = line_separator_chars; *p; p++)
    {
      lex[*p] = LEX_IS_LINE_SEPARATOR;
    }				/* declare line separators */

  /* Only allow slash-star comments if slash is not in use */
  if (lex['/'] == 0)
    {
      lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
    }
  /* FIXME-soon.  This is a bad hack but otherwise, we
	   can't do c-style comments when '/' is a line
	   comment char. xoxorich. */
  if (lex['*'] == 0)
    {
      lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
    }
}				/* do_scrub_begin() */

FILE *scrub_file;

int 
scrub_from_file ()
{
  return getc (scrub_file);
}

void 
scrub_to_file (ch)
     int ch;
{
  ungetc (ch, scrub_file);
}				/* scrub_to_file() */

char *scrub_string;
char *scrub_last_string;

int 
scrub_from_string ()
{
  return scrub_string == scrub_last_string ? EOF : *scrub_string++;
}				/* scrub_from_string() */

void 
scrub_to_string (ch)
     int ch;
{
  *--scrub_string = ch;
}				/* scrub_to_string() */

/* Saved state of the scrubber */
static int state;
static int old_state;
static char *out_string;
static char out_buf[20];
static int add_newlines = 0;

/* Data structure for saving the state of app across #include's.  Note that
   app is called asynchronously to the parsing of the .include's, so our
   state at the time .include is interpreted is completely unrelated.
   That's why we have to save it all.  */

struct app_save
  {
    int state;
    int old_state;
    char *out_string;
    char out_buf[sizeof (out_buf)];
    int add_newlines;
    char *scrub_string;
    char *scrub_last_string;
    FILE *scrub_file;
  };

char *
app_push ()
{
  register struct app_save *saved;

  saved = (struct app_save *) xmalloc (sizeof (*saved));
  saved->state = state;
  saved->old_state = old_state;
  saved->out_string = out_string;
  memcpy (out_buf, saved->out_buf, sizeof (out_buf));
  saved->add_newlines = add_newlines;
  saved->scrub_string = scrub_string;
  saved->scrub_last_string = scrub_last_string;
  saved->scrub_file = scrub_file;

  /* do_scrub_begin() is not useful, just wastes time. */
  return (char *) saved;
}

void 
app_pop (arg)
     char *arg;
{
  register struct app_save *saved = (struct app_save *) arg;

  /* There is no do_scrub_end (). */
  state = saved->state;
  old_state = saved->old_state;
  out_string = saved->out_string;
  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
  add_newlines = saved->add_newlines;
  scrub_string = saved->scrub_string;
  scrub_last_string = saved->scrub_last_string;
  scrub_file = saved->scrub_file;

  free (arg);
}				/* app_pop() */

/* @@ This assumes that \n &c are the same on host and target.  This is not
   necessarily true.  */
int 
process_escape (ch)
     char ch;
{
  switch (ch)
    {
    case 'b':
      return '\b';
    case 'f':
      return '\f';
    case 'n':
      return '\n';
    case 'r':
      return '\r';
    case 't':
      return '\t';
    case '\'':
      return '\'';
    case '"':
      return '\"';
    default:
      return ch;
    }
}
int 
do_scrub_next_char (get, unget)
     int (*get) ();
     void (*unget) ();
{
  /*State 0: beginning of normal line
	  1: After first whitespace on line (flush more white)
	  2: After first non-white (opcode) on line (keep 1white)
	  3: after second white on line (into operands) (flush white)
	  4: after putting out a .line, put out digits
	  5: parsing a string, then go to old-state
	  6: putting out \ escape in a "d string.
	  7: After putting out a .appfile, put out string.
	  8: After putting out a .appfile string, flush until newline.
	  9: After seeing symbol char in state 3 (keep 1white after symchar)
	 10: After seeing whitespace in state 9 (keep white before symchar)
	  -1: output string in out_string and go to the state in old_state
	  -2: flush text until a '*' '/' is seen, then go to state old_state
	  */

  /* I added states 9 and 10 because the MIPS ECOFF assembler uses
     constructs like ``.loc 1 20''.  This was turning into ``.loc
     120''.  States 9 and 10 ensure that a space is never dropped in
     between characters which could appear in a identifier.  Ian
     Taylor, ian@cygnus.com.  */

  register int ch, ch2 = 0;

  switch (state)
    {
    case -1:
      ch = *out_string++;
      if (*out_string == 0)
	{
	  state = old_state;
	  old_state = 3;
	}
      return ch;

    case -2:
      for (;;)
	{
	  do
	    {
	      ch = (*get) ();
	    }
	  while (ch != EOF && ch != '\n' && ch != '*');
	  if (ch == '\n' || ch == EOF)
	    return ch;

	  /* At this point, ch must be a '*' */
	  while ((ch = (*get) ()) == '*')
	    {
	      ;
	    }
	  if (ch == EOF || ch == '/')
	    break;
	  (*unget) (ch);
	}
      state = old_state;
      return ' ';

    case 4:
      ch = (*get) ();
      if (ch == EOF || (ch >= '0' && ch <= '9'))
	return ch;
      else
	{
	  while (ch != EOF && IS_WHITESPACE (ch))
	    ch = (*get) ();
	  if (ch == '"')
	    {
	      (*unget) (ch);
	      out_string = "\n.appfile ";
	      old_state = 7;
	      state = -1;
	      return *out_string++;
	    }
	  else
	    {
	      while (ch != EOF && ch != '\n')
		ch = (*get) ();
	      return ch;
	    }
	}

    case 5:
      ch = (*get) ();
      if (lex[ch] == LEX_IS_STRINGQUOTE)
	{
	  state = old_state;
	  return ch;
	}
      else if (ch == '\\')
	{
	  state = 6;
	  return ch;
	}
      else if (ch == EOF)
	{
	  as_warn ("End of file in string: inserted '\"'");
	  state = old_state;
	  (*unget) ('\n');
	  return '"';
	}
      else
	{
	  return ch;
	}

    case 6:
      state = 5;
      ch = (*get) ();
      switch (ch)
	{
	  /* Handle strings broken across lines, by turning '\n' into
	     '\\' and 'n'.  */
	case '\n':
	  (*unget) ('n');
	  add_newlines++;
	  return '\\';

	case '"':
	case '\\':
	case 'b':
	case 'f':
	case 'n':
	case 'r':
	case 't':
#ifdef BACKSLASH_V
	case 'v':
#endif /* BACKSLASH_V */
	case '0':
	case '1':
	case '2':
	case '3':
	case '4':
	case '5':
	case '6':
	case '7':
	  break;
#if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
	default:
	  as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
	  break;
#else /* ONLY_STANDARD_ESCAPES */
	default:
	  /* Accept \x as x for any x */
	  break;
#endif /* ONLY_STANDARD_ESCAPES */

	case EOF:
	  as_warn ("End of file in string: '\"' inserted");
	  return '"';
	}
      return ch;

    case 7:
      ch = (*get) ();
      state = 5;
      old_state = 8;
      return ch;

    case 8:
      do
	ch = (*get) ();
      while (ch != '\n');
      state = 0;
      return ch;
    }

  /* OK, we are somewhere in states 0 through 4 or 9 through 10 */

  /* flushchar: */
  ch = (*get) ();
recycle:
  if (ch == EOF)
    {
      if (state != 0)
	as_warn ("End of file not at end of a line: Newline inserted.");
      return ch;
    }

  switch (lex[ch])
    {
    case LEX_IS_WHITESPACE:
      do
	ch = (*get) ();
      while (ch != EOF && IS_WHITESPACE (ch));
      if (ch == EOF)
	return ch;

      if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch))
	{
	  goto recycle;
	}
#ifdef MRI
      (*unget) (ch);		/* Put back */
      return ' ';		/* Always return one space at start of line */
#endif

      /* If we're in state 2, we've seen a non-white
	 character followed by whitespace.  If the next
	 character is ':', this is whitespace after a label
	 name which we can ignore.  */
      if (state == 2 && lex[ch] == LEX_IS_COLON)
	{
	  state = 0;
	  return ch;
	}

      switch (state)
	{
	case 0:
	  state++;
	  goto recycle;		/* Punted leading sp */
	case 1:
	  BAD_CASE (state);	/* We can't get here */
	case 2:
	  state = 3;
	  (*unget) (ch);
	  return ' ';		/* Sp after opco */
	case 3:
	  goto recycle;		/* Sp in operands */
	case 9:
	case 10:
	  state = 10;		/* Sp after symbol char */
	  goto recycle;
	default:
	  BAD_CASE (state);
	}
      break;

    case LEX_IS_TWOCHAR_COMMENT_1ST:
      ch2 = (*get) ();
      if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
	{
	  for (;;)
	    {
	      do
		{
		  ch2 = (*get) ();
		  if (ch2 != EOF && IS_NEWLINE (ch2))
		    add_newlines++;
		}
	      while (ch2 != EOF &&
		     (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));

	      while (ch2 != EOF &&
		     (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
		{
		  ch2 = (*get) ();
		}

	      if (ch2 == EOF
		  || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
		break;
	      (*unget) (ch);
	    }
	  if (ch2 == EOF)
	    as_warn ("End of file in multiline comment");

	  ch = ' ';
	  goto recycle;
	}
      else
	{
	  if (ch2 != EOF)
	    (*unget) (ch2);
	  if (state == 9 || state == 10)
	    state = 3;
	  return ch;
	}
      break;

    case LEX_IS_STRINGQUOTE:
      if (state == 9 || state == 10)
	old_state = 3;
      else
	old_state = state;
      state = 5;
      return ch;
#ifndef MRI
#ifndef IEEE_STYLE
    case LEX_IS_ONECHAR_QUOTE:
      ch = (*get) ();
      if (ch == EOF)
	{
	  as_warn ("End-of-file after a one-character quote; \\000 inserted");
	  ch = 0;
	}
      if (ch == '\\')
	{
	  ch = (*get) ();
	  ch = process_escape (ch);
	}
      sprintf (out_buf, "%d", (int) (unsigned char) ch);


      /* None of these 'x constants for us.  We want 'x'.  */
      if ((ch = (*get) ()) != '\'')
	{
#ifdef REQUIRE_CHAR_CLOSE_QUOTE
	  as_warn ("Missing close quote: (assumed)");
#else
	  (*unget) (ch);
#endif
	}
      if (strlen (out_buf) == 1)
	{
	  return out_buf[0];
	}
      if (state == 9 || state == 10)
	old_state = 3;
      else
	old_state = state;
      state = -1;
      out_string = out_buf;
      return *out_string++;
#endif
#endif
    case LEX_IS_COLON:
      if (state == 9 || state == 10)
	state = 3;
      else if (state != 3)
	state = 0;
      return ch;

    case LEX_IS_NEWLINE:
      /* Roll out a bunch of newlines from inside comments, etc.  */
      if (add_newlines)
	{
	  --add_newlines;
	  (*unget) (ch);
	}
      /* fall thru into... */

    case LEX_IS_LINE_SEPARATOR:
      state = 0;
      return ch;

    case LEX_IS_LINE_COMMENT_START:
      if (state == 0)		/* Only comment at start of line.  */
	{
	  /* FIXME-someday: The two character comment stuff was badly
	     thought out.  On i386, we want '/' as line comment start
	     AND we want C style comments.  hence this hack.  The
	     whole lexical process should be reworked.  xoxorich.  */
	  if (ch == '/')
	    {
	      ch2 = (*get) ();
	      if (ch2 == '*')
		{
		  state = -2;
		  return (do_scrub_next_char (get, unget));
		}
	      else
		{
		  (*unget) (ch2);
		}
	    }			/* bad hack */

	  do
	    ch = (*get) ();
	  while (ch != EOF && IS_WHITESPACE (ch));
	  if (ch == EOF)
	    {
	      as_warn ("EOF in comment:  Newline inserted");
	      return '\n';
	    }
	  if (ch < '0' || ch > '9')
	    {
	      /* Non-numerics:  Eat whole comment line */
	      while (ch != EOF && !IS_NEWLINE (ch))
		ch = (*get) ();
	      if (ch == EOF)
		as_warn ("EOF in Comment: Newline inserted");
	      state = 0;
	      return '\n';
	    }
	  /* Numerics begin comment.  Perhaps CPP `# 123 "filename"' */
	  (*unget) (ch);
	  old_state = 4;
	  state = -1;
	  out_string = ".appline ";
	  return *out_string++;
	}

      /* We have a line comment character which is not at the start of
	 a line.  If this is also a normal comment character, fall
	 through.  Otherwise treat it as a default character.  */
      if (strchr (comment_chars, ch) == NULL)
	goto de_fault;
      /* Fall through.  */
    case LEX_IS_COMMENT_START:
      do
	ch = (*get) ();
      while (ch != EOF && !IS_NEWLINE (ch));
      if (ch == EOF)
	as_warn ("EOF in comment:  Newline inserted");
      state = 0;
      return '\n';

    case LEX_IS_SYMBOL_COMPONENT:
      if (state == 10)
	{
	  /* This is a symbol character following another symbol
	     character, with whitespace in between.  We skipped the
	     whitespace earlier, so output it now.  */
	  (*unget) (ch);
	  state = 3;
	  return ' ';
	}
      if (state == 3)
	state = 9;
      /* Fall through.  */
    default:
    de_fault:
      /* Some relatively `normal' character.  */
      if (state == 0)
	{
	  state = 2;		/* Now seeing opcode */
	  return ch;
	}
      else if (state == 1)
	{
	  state = 2;		/* Ditto */
	  return ch;
	}
      else if (state == 9)
	{
	  if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
	    state = 3;
	  return ch;
	}
      else if (state == 10)
	{
	  state = 3;
	  return ch;
	}
      else
	{
	  return ch;		/* Opcode or operands already */
	}
    }
  return -1;
}

#ifdef TEST

const char comment_chars[] = "|";
const char line_comment_chars[] = "#";

main ()
{
  int ch;

  app_begin ();
  while ((ch = do_scrub_next_char (stdin)) != EOF)
    putc (ch, stdout);
}

as_warn (str)
     char *str;
{
  fputs (str, stderr);
  putc ('\n', stderr);
}

#endif

/* end of app.c */
Commit	Line	Data
3340f7e5	1	/* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
6efd877d	2
a39116f1 RP	3	Modified by Allen Wirfs-Brock, Instantiations Inc 2/90
a39116f1 RP	4	*/
fecd2382 RP	5	/* This is the Assembler Pre-Processor
fecd2382 RP	6	Copyright (C) 1987 Free Software Foundation, Inc.
6efd877d	7
a39116f1	8	This file is part of GAS, the GNU Assembler.
6efd877d	9
a39116f1 RP	10	GAS is free software; you can redistribute it and/or modify
	11	it under the terms of the GNU General Public License as published by
	12	the Free Software Foundation; either version 2, or (at your option)
	13	any later version.
6efd877d	14
a39116f1 RP	15	GAS is distributed in the hope that it will be useful,
	16	but WITHOUT ANY WARRANTY; without even the implied warranty of
	17	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	18	GNU General Public License for more details.
6efd877d	19
a39116f1 RP	20	You should have received a copy of the GNU General Public License
	21	along with GAS; see the file COPYING. If not, write to
	22	the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
fecd2382 RP	23
	24	/* App, the assembler pre-processor. This pre-processor strips out excess
	25	spaces, turns single-quoted characters into a decimal constant, and turns
9a7d824a	26	# <number> <filename> <garbage> into a .line <number>\n.file <filename>
be06bdcd	27	pair. This needs better error-handling.
a39116f1	28	*/
fecd2382 RP	29
fecd2382 RP	30	#include <stdio.h>
6efd877d	31	#include "as.h" /* For BAD_CASE() only */
fecd2382	32
3340f7e5	33	#if (__STDC__ != 1) && !defined(const)
6efd877d	34	#define const /* Nothing */
fecd2382 RP	35	#endif
fecd2382 RP	36
6efd877d	37	static char lex[256];
6d331d71	38	static const char symbol_chars[] =
6efd877d	39	"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
fecd2382 RP	40
	41	#define LEX_IS_SYMBOL_COMPONENT 1
	42	#define LEX_IS_WHITESPACE 2
	43	#define LEX_IS_LINE_SEPARATOR 3
	44	#define LEX_IS_COMMENT_START 4
	45	#define LEX_IS_LINE_COMMENT_START 5
	46	#define LEX_IS_TWOCHAR_COMMENT_1ST 6
	47	#define LEX_IS_TWOCHAR_COMMENT_2ND 7
	48	#define LEX_IS_STRINGQUOTE 8
	49	#define LEX_IS_COLON 9
	50	#define LEX_IS_NEWLINE 10
	51	#define LEX_IS_ONECHAR_QUOTE 11
a39116f1 RP	52	#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
	53	#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
	54	#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
	55	#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
	56	#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
	57	#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
	58
	59	/* FIXME-soon: The entire lexer/parser thingy should be
	60	built statically at compile time rather than dynamically
	61	each and every time the assembler is run. xoxorich. */
fecd2382	62
6efd877d KR	63	void
	64	do_scrub_begin ()
	65	{
	66	const char *p;
	67
	68	lex[' '] = LEX_IS_WHITESPACE;
	69	lex['\t'] = LEX_IS_WHITESPACE;
	70	lex['\n'] = LEX_IS_NEWLINE;
	71	lex[';'] = LEX_IS_LINE_SEPARATOR;
	72	lex['"'] = LEX_IS_STRINGQUOTE;
	73	lex['\''] = LEX_IS_ONECHAR_QUOTE;
	74	lex[':'] = LEX_IS_COLON;
7c2d4011	75
be06bdcd SC	76
	77
	78	#ifdef SINGLE_QUOTE_STRINGS
	79	lex['\''] = LEX_IS_STRINGQUOTE;
7c2d4011	80	#endif
be06bdcd	81
6efd877d	82	/* Note that these override the previous defaults, e.g. if ';'
be06bdcd	83
fecd2382	84	is a comment char, then it isn't a line separator. */
6efd877d KR	85	for (p = symbol_chars; *p; ++p)
	86	{
	87	lex[*p] = LEX_IS_SYMBOL_COMPONENT;
	88	} /* declare symbol characters */
	89
6efd877d KR	90	for (p = comment_chars; *p; p++)
	91	{
	92	lex[*p] = LEX_IS_COMMENT_START;
	93	} /* declare comment chars */
	94
9a7d824a ILT	95	for (p = line_comment_chars; *p; p++)
	96	{
	97	lex[*p] = LEX_IS_LINE_COMMENT_START;
	98	} /* declare line comment chars */
	99
6efd877d KR	100	for (p = line_separator_chars; *p; p++)
	101	{
	102	lex[*p] = LEX_IS_LINE_SEPARATOR;
	103	} /* declare line separators */
	104
	105	/* Only allow slash-star comments if slash is not in use */
	106	if (lex['/'] == 0)
	107	{
	108	lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
	109	}
	110	/* FIXME-soon. This is a bad hack but otherwise, we
a39116f1 RP	111	can't do c-style comments when '/' is a line
a39116f1 RP	112	comment char. xoxorich. */
6efd877d KR	113	if (lex['*'] == 0)
	114	{
	115	lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
	116	}
	117	} /* do_scrub_begin() */
fecd2382 RP	118
	119	FILE *scrub_file;
	120
6efd877d KR	121	int
	122	scrub_from_file ()
	123	{
	124	return getc (scrub_file);
fecd2382 RP	125	}
fecd2382 RP	126
6efd877d KR	127	void
	128	scrub_to_file (ch)
	129	int ch;
fecd2382	130	{
6efd877d KR	131	ungetc (ch, scrub_file);
6efd877d KR	132	} /* scrub_to_file() */
fecd2382 RP	133
	134	char *scrub_string;
	135	char *scrub_last_string;
	136
6efd877d KR	137	int
	138	scrub_from_string ()
	139	{
	140	return scrub_string == scrub_last_string ? EOF : *scrub_string++;
	141	} /* scrub_from_string() */
fecd2382	142
6efd877d KR	143	void
	144	scrub_to_string (ch)
	145	int ch;
fecd2382	146	{
6efd877d KR	147	*--scrub_string = ch;
6efd877d KR	148	} /* scrub_to_string() */
fecd2382 RP	149
	150	/* Saved state of the scrubber */
	151	static int state;
	152	static int old_state;
	153	static char *out_string;
	154	static char out_buf[20];
	155	static int add_newlines = 0;
	156
	157	/* Data structure for saving the state of app across #include's. Note that
	158	app is called asynchronously to the parsing of the .include's, so our
	159	state at the time .include is interpreted is completely unrelated.
	160	That's why we have to save it all. */
	161
6efd877d KR	162	struct app_save
	163	{
	164	int state;
	165	int old_state;
	166	char *out_string;
	167	char out_buf[sizeof (out_buf)];
	168	int add_newlines;
	169	char *scrub_string;
	170	char *scrub_last_string;
	171	FILE *scrub_file;
	172	};
	173
	174	char *
	175	app_push ()
	176	{
7c2d4011 SC	177	register struct app_save *saved;
7c2d4011 SC	178
6efd877d KR	179	saved = (struct app_save ) xmalloc (sizeof (saved));
	180	saved->state = state;
	181	saved->old_state = old_state;
	182	saved->out_string = out_string;
6d331d71	183	memcpy (out_buf, saved->out_buf, sizeof (out_buf));
6efd877d KR	184	saved->add_newlines = add_newlines;
6efd877d KR	185	saved->scrub_string = scrub_string;
7c2d4011	186	saved->scrub_last_string = scrub_last_string;
6efd877d	187	saved->scrub_file = scrub_file;
7c2d4011 SC	188
7c2d4011 SC	189	/* do_scrub_begin() is not useful, just wastes time. */
6efd877d	190	return (char *) saved;
fecd2382 RP	191	}
fecd2382 RP	192
6efd877d KR	193	void
	194	app_pop (arg)
	195	char *arg;
fecd2382	196	{
6efd877d KR	197	register struct app_save saved = (struct app_save ) arg;
	198
	199	/* There is no do_scrub_end (). */
	200	state = saved->state;
	201	old_state = saved->old_state;
	202	out_string = saved->out_string;
	203	memcpy (saved->out_buf, out_buf, sizeof (out_buf));
	204	add_newlines = saved->add_newlines;
	205	scrub_string = saved->scrub_string;
	206	scrub_last_string = saved->scrub_last_string;
	207	scrub_file = saved->scrub_file;
	208
	209	free (arg);
	210	} /* app_pop() */
	211
6d331d71 KR	212	/* @@ This assumes that \n &c are the same on host and target. This is not
6d331d71 KR	213	necessarily true. */
6efd877d KR	214	int
	215	process_escape (ch)
	216	char ch;
7c2d4011	217	{
6efd877d KR	218	switch (ch)
	219	{
	220	case 'b':
	221	return '\b';
	222	case 'f':
	223	return '\f';
	224	case 'n':
	225	return '\n';
	226	case 'r':
	227	return '\r';
	228	case 't':
	229	return '\t';
	230	case '\'':
	231	return '\'';
	232	case '"':
6d331d71	233	return '\"';
6efd877d KR	234	default:
	235	return ch;
	236	}
7c2d4011	237	}
6efd877d KR	238	int
	239	do_scrub_next_char (get, unget)
	240	int (*get) ();
	241	void (*unget) ();
fecd2382	242	{
6efd877d	243	/*State 0: beginning of normal line
a39116f1 RP	244	1: After first whitespace on line (flush more white)
	245	2: After first non-white (opcode) on line (keep 1white)
	246	3: after second white on line (into operands) (flush white)
	247	4: after putting out a .line, put out digits
	248	5: parsing a string, then go to old-state
	249	6: putting out \ escape in a "d string.
9a7d824a ILT	250	7: After putting out a .appfile, put out string.
9a7d824a ILT	251	8: After putting out a .appfile string, flush until newline.
f6a91cc0	252	9: After seeing symbol char in state 3 (keep 1white after symchar)
9a7d824a	253	10: After seeing whitespace in state 9 (keep white before symchar)
a39116f1 RP	254	-1: output string in out_string and go to the state in old_state
	255	-2: flush text until a '*' '/' is seen, then go to state old_state
	256	*/
6efd877d	257
9a7d824a ILT	258	/* I added states 9 and 10 because the MIPS ECOFF assembler uses
	259	constructs like ``.loc 1 20''. This was turning into ``.loc
	260	120''. States 9 and 10 ensure that a space is never dropped in
	261	between characters which could appear in a identifier. Ian
	262	Taylor, ian@cygnus.com. */
f6a91cc0	263
6efd877d KR	264	register int ch, ch2 = 0;
	265
	266	switch (state)
	267	{
	268	case -1:
	269	ch = *out_string++;
	270	if (*out_string == 0)
	271	{
	272	state = old_state;
	273	old_state = 3;
	274	}
	275	return ch;
	276
	277	case -2:
	278	for (;;)
	279	{
	280	do
	281	{
	282	ch = (*get) ();
	283	}
	284	while (ch != EOF && ch != '\n' && ch != '*');
	285	if (ch == '\n' \|\| ch == EOF)
	286	return ch;
	287
	288	/* At this point, ch must be a '' /
	289	while ((ch = (get) ()) == '')
	290	{
	291	;
	292	}
	293	if (ch == EOF \|\| ch == '/')
	294	break;
	295	(*unget) (ch);
	296	}
	297	state = old_state;
	298	return ' ';
	299
	300	case 4:
	301	ch = (*get) ();
	302	if (ch == EOF \|\| (ch >= '0' && ch <= '9'))
	303	return ch;
	304	else
	305	{
	306	while (ch != EOF && IS_WHITESPACE (ch))
	307	ch = (*get) ();
	308	if (ch == '"')
	309	{
	310	(*unget) (ch);
9a7d824a	311	out_string = "\n.appfile ";
6efd877d KR	312	old_state = 7;
	313	state = -1;
	314	return *out_string++;
	315	}
	316	else
	317	{
	318	while (ch != EOF && ch != '\n')
	319	ch = (*get) ();
	320	return ch;
	321	}
	322	}
	323
	324	case 5:
	325	ch = (*get) ();
	326	if (lex[ch] == LEX_IS_STRINGQUOTE)
	327	{
	328	state = old_state;
	329	return ch;
	330	}
	331	else if (ch == '\\')
	332	{
	333	state = 6;
	334	return ch;
	335	}
	336	else if (ch == EOF)
	337	{
	338	as_warn ("End of file in string: inserted '\"'");
	339	state = old_state;
	340	(*unget) ('\n');
	341	return '"';
	342	}
	343	else
	344	{
	345	return ch;
	346	}
	347
	348	case 6:
	349	state = 5;
	350	ch = (*get) ();
	351	switch (ch)
	352	{
6d331d71 KR	353	/* Handle strings broken across lines, by turning '\n' into
6d331d71 KR	354	'\\' and 'n'. */
6efd877d KR	355	case '\n':
	356	(*unget) ('n');
	357	add_newlines++;
	358	return '\\';
	359
	360	case '"':
	361	case '\\':
	362	case 'b':
	363	case 'f':
	364	case 'n':
	365	case 'r':
	366	case 't':
fecd2382	367	#ifdef BACKSLASH_V
6efd877d	368	case 'v':
fecd2382	369	#endif /* BACKSLASH_V */
6efd877d KR	370	case '0':
	371	case '1':
	372	case '2':
	373	case '3':
	374	case '4':
	375	case '5':
	376	case '6':
	377	case '7':
	378	break;
7c2d4011	379	#if defined(IGNORE_NONSTANDARD_ESCAPES) \| defined(ONLY_STANDARD_ESCAPES)
6efd877d KR	380	default:
	381	as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
	382	break;
fecd2382	383	#else /* ONLY_STANDARD_ESCAPES */
6efd877d KR	384	default:
	385	/* Accept \x as x for any x */
	386	break;
fecd2382	387	#endif /* ONLY_STANDARD_ESCAPES */
7c2d4011	388
6efd877d KR	389	case EOF:
	390	as_warn ("End of file in string: '\"' inserted");
	391	return '"';
	392	}
	393	return ch;
	394
	395	case 7:
	396	ch = (*get) ();
	397	state = 5;
	398	old_state = 8;
	399	return ch;
	400
	401	case 8:
	402	do
	403	ch = (*get) ();
	404	while (ch != '\n');
	405	state = 0;
	406	return ch;
	407	}
	408
9a7d824a	409	/* OK, we are somewhere in states 0 through 4 or 9 through 10 */
6efd877d KR	410
	411	/* flushchar: */
	412	ch = (*get) ();
	413	recycle:
	414	if (ch == EOF)
	415	{
	416	if (state != 0)
	417	as_warn ("End of file not at end of a line: Newline inserted.");
	418	return ch;
	419	}
	420
	421	switch (lex[ch])
	422	{
	423	case LEX_IS_WHITESPACE:
	424	do
	425	ch = (*get) ();
	426	while (ch != EOF && IS_WHITESPACE (ch));
	427	if (ch == EOF)
	428	return ch;
	429
	430	if (IS_COMMENT (ch) \|\| (state == 0 && IS_LINE_COMMENT (ch)) \|\| ch == '/' \|\| IS_LINE_SEPARATOR (ch))
	431	{
	432	goto recycle;
fecd2382	433	}
7c2d4011	434	#ifdef MRI
6efd877d KR	435	(unget) (ch); / Put back */
6efd877d KR	436	return ' '; /* Always return one space at start of line */
7c2d4011	437	#endif
6efd877d KR	438
6efd877d KR	439	/* If we're in state 2, we've seen a non-white
6d331d71 KR	440	character followed by whitespace. If the next
	441	character is ':', this is whitespace after a label
	442	name which we can ignore. */
6efd877d KR	443	if (state == 2 && lex[ch] == LEX_IS_COLON)
	444	{
	445	state = 0;
	446	return ch;
	447	}
	448
	449	switch (state)
	450	{
	451	case 0:
	452	state++;
	453	goto recycle; /* Punted leading sp */
	454	case 1:
	455	BAD_CASE (state); /* We can't get here */
	456	case 2:
f6a91cc0	457	state = 3;
6efd877d KR	458	(*unget) (ch);
	459	return ' '; /* Sp after opco */
	460	case 3:
	461	goto recycle; /* Sp in operands */
9a7d824a ILT	462	case 9:
	463	case 10:
	464	state = 10; /* Sp after symbol char */
	465	goto recycle;
6efd877d KR	466	default:
	467	BAD_CASE (state);
	468	}
	469	break;
	470
	471	case LEX_IS_TWOCHAR_COMMENT_1ST:
	472	ch2 = (*get) ();
	473	if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
	474	{
	475	for (;;)
	476	{
	477	do
	478	{
	479	ch2 = (*get) ();
	480	if (ch2 != EOF && IS_NEWLINE (ch2))
	481	add_newlines++;
fecd2382	482	}
6efd877d KR	483	while (ch2 != EOF &&
	484	(lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
	485
	486	while (ch2 != EOF &&
	487	(lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
	488	{
	489	ch2 = (*get) ();
fecd2382	490	}
6efd877d KR	491
	492	if (ch2 == EOF
	493	\|\| lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
fecd2382	494	break;
6efd877d KR	495	(*unget) (ch);
	496	}
	497	if (ch2 == EOF)
	498	as_warn ("End of file in multiline comment");
	499
	500	ch = ' ';
	501	goto recycle;
	502	}
	503	else
	504	{
	505	if (ch2 != EOF)
	506	(*unget) (ch2);
9a7d824a ILT	507	if (state == 9 \|\| state == 10)
9a7d824a ILT	508	state = 3;
6efd877d KR	509	return ch;
	510	}
	511	break;
	512
	513	case LEX_IS_STRINGQUOTE:
9a7d824a ILT	514	if (state == 9 \|\| state == 10)
	515	old_state = 3;
	516	else
	517	old_state = state;
6efd877d KR	518	state = 5;
	519	return ch;
	520	#ifndef MRI
a39116f1	521	#ifndef IEEE_STYLE
6efd877d KR	522	case LEX_IS_ONECHAR_QUOTE:
	523	ch = (*get) ();
	524	if (ch == EOF)
	525	{
	526	as_warn ("End-of-file after a one-character quote; \\000 inserted");
	527	ch = 0;
	528	}
	529	if (ch == '\\')
	530	{
	531	ch = (*get) ();
	532	ch = process_escape (ch);
	533	}
	534	sprintf (out_buf, "%d", (int) (unsigned char) ch);
7c2d4011	535
6efd877d	536
9a7d824a	537	/* None of these 'x constants for us. We want 'x'. */
6efd877d KR	538	if ((ch = (*get) ()) != '\'')
6efd877d KR	539	{
fecd2382	540	#ifdef REQUIRE_CHAR_CLOSE_QUOTE
6efd877d	541	as_warn ("Missing close quote: (assumed)");
fecd2382	542	#else
6efd877d	543	(*unget) (ch);
fecd2382	544	#endif
6efd877d KR	545	}
	546	if (strlen (out_buf) == 1)
	547	{
	548	return out_buf[0];
	549	}
9a7d824a ILT	550	if (state == 9 \|\| state == 10)
	551	old_state = 3;
	552	else
	553	old_state = state;
6efd877d KR	554	state = -1;
	555	out_string = out_buf;
	556	return *out_string++;
7c2d4011	557	#endif
a39116f1	558	#endif
6efd877d	559	case LEX_IS_COLON:
9a7d824a ILT	560	if (state == 9 \|\| state == 10)
	561	state = 3;
	562	else if (state != 3)
6efd877d KR	563	state = 0;
	564	return ch;
	565
	566	case LEX_IS_NEWLINE:
	567	/* Roll out a bunch of newlines from inside comments, etc. */
	568	if (add_newlines)
	569	{
	570	--add_newlines;
	571	(*unget) (ch);
	572	}
	573	/* fall thru into... */
	574
	575	case LEX_IS_LINE_SEPARATOR:
	576	state = 0;
	577	return ch;
	578
	579	case LEX_IS_LINE_COMMENT_START:
9a7d824a	580	if (state == 0) /* Only comment at start of line. */
6efd877d	581	{
9a7d824a ILT	582	/* FIXME-someday: The two character comment stuff was badly
	583	thought out. On i386, we want '/' as line comment start
	584	AND we want C style comments. hence this hack. The
	585	whole lexical process should be reworked. xoxorich. */
	586	if (ch == '/')
f6a91cc0	587	{
9a7d824a ILT	588	ch2 = (*get) ();
	589	if (ch2 == '*')
	590	{
	591	state = -2;
	592	return (do_scrub_next_char (get, unget));
	593	}
	594	else
	595	{
	596	(*unget) (ch2);
	597	}
	598	} /* bad hack */
6efd877d	599
9a7d824a	600	do
6efd877d	601	ch = (*get) ();
9a7d824a	602	while (ch != EOF && IS_WHITESPACE (ch));
6efd877d	603	if (ch == EOF)
9a7d824a ILT	604	{
	605	as_warn ("EOF in comment: Newline inserted");
	606	return '\n';
	607	}
	608	if (ch < '0' \|\| ch > '9')
	609	{
	610	/* Non-numerics: Eat whole comment line */
	611	while (ch != EOF && !IS_NEWLINE (ch))
	612	ch = (*get) ();
	613	if (ch == EOF)
	614	as_warn ("EOF in Comment: Newline inserted");
	615	state = 0;
	616	return '\n';
	617	}
	618	/* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
	619	(*unget) (ch);
	620	old_state = 4;
	621	state = -1;
	622	out_string = ".appline ";
	623	return *out_string++;
6efd877d	624	}
6efd877d	625
9a7d824a ILT	626	/* We have a line comment character which is not at the start of
	627	a line. If this is also a normal comment character, fall
	628	through. Otherwise treat it as a default character. */
	629	if (strchr (comment_chars, ch) == NULL)
	630	goto de_fault;
	631	/* Fall through. */
6efd877d KR	632	case LEX_IS_COMMENT_START:
	633	do
	634	ch = (*get) ();
	635	while (ch != EOF && !IS_NEWLINE (ch));
	636	if (ch == EOF)
	637	as_warn ("EOF in comment: Newline inserted");
	638	state = 0;
	639	return '\n';
	640
f6a91cc0	641	case LEX_IS_SYMBOL_COMPONENT:
9a7d824a ILT	642	if (state == 10)
	643	{
	644	/* This is a symbol character following another symbol
	645	character, with whitespace in between. We skipped the
	646	whitespace earlier, so output it now. */
	647	(*unget) (ch);
	648	state = 3;
	649	return ' ';
	650	}
f6a91cc0 ILT	651	if (state == 3)
	652	state = 9;
	653	/* Fall through. */
6efd877d KR	654	default:
	655	de_fault:
	656	/* Some relatively `normal' character. */
	657	if (state == 0)
	658	{
	659	state = 2; /* Now seeing opcode */
	660	return ch;
fecd2382	661	}
6efd877d KR	662	else if (state == 1)
	663	{
	664	state = 2; /* Ditto */
	665	return ch;
	666	}
f6a91cc0 ILT	667	else if (state == 9)
	668	{
	669	if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
	670	state = 3;
	671	return ch;
	672	}
9a7d824a ILT	673	else if (state == 10)
	674	{
	675	state = 3;
	676	return ch;
	677	}
6efd877d KR	678	else
	679	{
	680	return ch; /* Opcode or operands already */
	681	}
	682	}
	683	return -1;
fecd2382 RP	684	}
	685
	686	#ifdef TEST
	687
6efd877d KR	688	const char comment_chars[] = "\|";
6efd877d KR	689	const char line_comment_chars[] = "#";
fecd2382	690
6efd877d	691	main ()
fecd2382	692	{
6efd877d KR	693	int ch;
	694
	695	app_begin ();
	696	while ((ch = do_scrub_next_char (stdin)) != EOF)
	697	putc (ch, stdout);
fecd2382 RP	698	}
fecd2382 RP	699
6efd877d KR	700	as_warn (str)
6efd877d KR	701	char *str;
fecd2382	702	{
6efd877d KR	703	fputs (str, stderr);
6efd877d KR	704	putc ('\n', stderr);
fecd2382	705	}
6efd877d	706
fecd2382 RP	707	#endif
fecd2382 RP	708
fecd2382	709	/* end of app.c */