[deliverable/binutils-gdb.git] / gas / app.c

/* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.

   Modified by Allen Wirfs-Brock, Instantiations Inc 2/90
   */
/* This is the Assembler Pre-Processor
   Copyright (C) 1987 Free Software Foundation, Inc.

   This file is part of GAS, the GNU Assembler.

   GAS is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   GAS is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with GAS; see the file COPYING.  If not, write to
   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */

/* App, the assembler pre-processor.  This pre-processor strips out excess
   spaces, turns single-quoted characters into a decimal constant, and turns
   # <number> <filename> <garbage> into a .line <number>\n.app-file <filename>
   pair.  This needs better error-handling.
   */

#include <stdio.h>
#include "as.h"			/* For BAD_CASE() only */
#include "read.h"

#if (__STDC__ != 1) && !defined(const)
#define const			/* Nothing */
#endif

static char lex[256];
static char symbol_chars[] =
"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";

#define LEX_IS_SYMBOL_COMPONENT		1
#define LEX_IS_WHITESPACE		2
#define LEX_IS_LINE_SEPARATOR		3
#define LEX_IS_COMMENT_START		4
#define LEX_IS_LINE_COMMENT_START	5
#define	LEX_IS_TWOCHAR_COMMENT_1ST	6
#define	LEX_IS_TWOCHAR_COMMENT_2ND	7
#define	LEX_IS_STRINGQUOTE		8
#define	LEX_IS_COLON			9
#define	LEX_IS_NEWLINE			10
#define	LEX_IS_ONECHAR_QUOTE		11
#define IS_SYMBOL_COMPONENT(c)		(lex[c] == LEX_IS_SYMBOL_COMPONENT)
#define IS_WHITESPACE(c)		(lex[c] == LEX_IS_WHITESPACE)
#define IS_LINE_SEPARATOR(c)		(lex[c] == LEX_IS_LINE_SEPARATOR)
#define IS_COMMENT(c)			(lex[c] == LEX_IS_COMMENT_START)
#define IS_LINE_COMMENT(c)		(lex[c] == LEX_IS_LINE_COMMENT_START)
#define	IS_NEWLINE(c)			(lex[c] == LEX_IS_NEWLINE)

/* FIXME-soon: The entire lexer/parser thingy should be
   built statically at compile time rather than dynamically
   each and every time the assembler is run.  xoxorich. */

void 
do_scrub_begin ()
{
  const char *p;

  lex[' '] = LEX_IS_WHITESPACE;
  lex['\t'] = LEX_IS_WHITESPACE;
  lex['\n'] = LEX_IS_NEWLINE;
  lex[';'] = LEX_IS_LINE_SEPARATOR;
  lex['"'] = LEX_IS_STRINGQUOTE;
  lex['\''] = LEX_IS_ONECHAR_QUOTE;
  lex[':'] = LEX_IS_COLON;


#ifdef SINGLE_QUOTE_STRINGS
	lex['\''] = LEX_IS_STRINGQUOTE;
#endif

  /* Note that these override the previous defaults, e.g. if ';'

	   is a comment char, then it isn't a line separator.  */
  for (p = symbol_chars; *p; ++p)
    {
      lex[*p] = LEX_IS_SYMBOL_COMPONENT;
    }				/* declare symbol characters */

  for (p = line_comment_chars; *p; p++)
    {
      lex[*p] = LEX_IS_LINE_COMMENT_START;
    }				/* declare line comment chars */

  for (p = comment_chars; *p; p++)
    {
      lex[*p] = LEX_IS_COMMENT_START;
    }				/* declare comment chars */

  for (p = line_separator_chars; *p; p++)
    {
      lex[*p] = LEX_IS_LINE_SEPARATOR;
    }				/* declare line separators */

  /* Only allow slash-star comments if slash is not in use */
  if (lex['/'] == 0)
    {
      lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
    }
  /* FIXME-soon.  This is a bad hack but otherwise, we
	   can't do c-style comments when '/' is a line
	   comment char. xoxorich. */
  if (lex['*'] == 0)
    {
      lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
    }
}				/* do_scrub_begin() */

FILE *scrub_file;

int 
scrub_from_file ()
{
  return getc (scrub_file);
}

void 
scrub_to_file (ch)
     int ch;
{
  ungetc (ch, scrub_file);
}				/* scrub_to_file() */

char *scrub_string;
char *scrub_last_string;

int 
scrub_from_string ()
{
  return scrub_string == scrub_last_string ? EOF : *scrub_string++;
}				/* scrub_from_string() */

void 
scrub_to_string (ch)
     int ch;
{
  *--scrub_string = ch;
}				/* scrub_to_string() */

/* Saved state of the scrubber */
static int state;
static int old_state;
static char *out_string;
static char out_buf[20];
static int add_newlines = 0;

/* Data structure for saving the state of app across #include's.  Note that
   app is called asynchronously to the parsing of the .include's, so our
   state at the time .include is interpreted is completely unrelated.
   That's why we have to save it all.  */

struct app_save
  {
    int state;
    int old_state;
    char *out_string;
    char out_buf[sizeof (out_buf)];
    int add_newlines;
    char *scrub_string;
    char *scrub_last_string;
    FILE *scrub_file;
  };

char *
app_push ()
{
  register struct app_save *saved;

  saved = (struct app_save *) xmalloc (sizeof (*saved));
  saved->state = state;
  saved->old_state = old_state;
  saved->out_string = out_string;
  bcopy (saved->out_buf, out_buf, sizeof (out_buf));
  saved->add_newlines = add_newlines;
  saved->scrub_string = scrub_string;
  saved->scrub_last_string = scrub_last_string;
  saved->scrub_file = scrub_file;

  /* do_scrub_begin() is not useful, just wastes time. */
  return (char *) saved;
}

void 
app_pop (arg)
     char *arg;
{
  register struct app_save *saved = (struct app_save *) arg;

  /* There is no do_scrub_end (). */
  state = saved->state;
  old_state = saved->old_state;
  out_string = saved->out_string;
  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
  add_newlines = saved->add_newlines;
  scrub_string = saved->scrub_string;
  scrub_last_string = saved->scrub_last_string;
  scrub_file = saved->scrub_file;

  free (arg);
}				/* app_pop() */

int 
process_escape (ch)
     char ch;
{
  switch (ch)
    {
    case 'b':
      return '\b';
    case 'f':
      return '\f';
    case 'n':
      return '\n';
    case 'r':
      return '\r';
    case 't':
      return '\t';
    case '\'':
      return '\'';
    case '"':
      return '\'';
    default:
      return ch;
    }
}
int 
do_scrub_next_char (get, unget)
     int (*get) ();
     void (*unget) ();
{
  /*State 0: beginning of normal line
	  1: After first whitespace on line (flush more white)
	  2: After first non-white (opcode) on line (keep 1white)
	  3: after second white on line (into operands) (flush white)
	  4: after putting out a .line, put out digits
	  5: parsing a string, then go to old-state
	  6: putting out \ escape in a "d string.
	  7: After putting out a .app-file, put out string.
	  8: After putting out a .app-file string, flush until newline.
	  -1: output string in out_string and go to the state in old_state
	  -2: flush text until a '*' '/' is seen, then go to state old_state
	  */

  register int ch, ch2 = 0;

  switch (state)
    {
    case -1:
      ch = *out_string++;
      if (*out_string == 0)
	{
	  state = old_state;
	  old_state = 3;
	}
      return ch;

    case -2:
      for (;;)
	{
	  do
	    {
	      ch = (*get) ();
	    }
	  while (ch != EOF && ch != '\n' && ch != '*');
	  if (ch == '\n' || ch == EOF)
	    return ch;

	  /* At this point, ch must be a '*' */
	  while ((ch = (*get) ()) == '*')
	    {
	      ;
	    }
	  if (ch == EOF || ch == '/')
	    break;
	  (*unget) (ch);
	}
      state = old_state;
      return ' ';

    case 4:
      ch = (*get) ();
      if (ch == EOF || (ch >= '0' && ch <= '9'))
	return ch;
      else
	{
	  while (ch != EOF && IS_WHITESPACE (ch))
	    ch = (*get) ();
	  if (ch == '"')
	    {
	      (*unget) (ch);
	      out_string = "\n.app-file ";
	      old_state = 7;
	      state = -1;
	      return *out_string++;
	    }
	  else
	    {
	      while (ch != EOF && ch != '\n')
		ch = (*get) ();
	      return ch;
	    }
	}

    case 5:
      ch = (*get) ();
      if (lex[ch] == LEX_IS_STRINGQUOTE)
	{
	  state = old_state;
	  return ch;
	}
      else if (ch == '\\')
	{
	  state = 6;
	  return ch;
	}
      else if (ch == EOF)
	{
	  as_warn ("End of file in string: inserted '\"'");
	  state = old_state;
	  (*unget) ('\n');
	  return '"';
	}
      else
	{
	  return ch;
	}

    case 6:
      state = 5;
      ch = (*get) ();
      switch (ch)
	{
	  /* This is neet.  Turn "string
			   more string" into "string\n  more string"
			   */
	case '\n':
	  (*unget) ('n');
	  add_newlines++;
	  return '\\';

	case '"':
	case '\\':
	case 'b':
	case 'f':
	case 'n':
	case 'r':
	case 't':
#ifdef BACKSLASH_V
	case 'v':
#endif /* BACKSLASH_V */
	case '0':
	case '1':
	case '2':
	case '3':
	case '4':
	case '5':
	case '6':
	case '7':
	  break;
#if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
	default:
	  as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
	  break;
#else /* ONLY_STANDARD_ESCAPES */
	default:
	  /* Accept \x as x for any x */
	  break;
#endif /* ONLY_STANDARD_ESCAPES */

	case EOF:
	  as_warn ("End of file in string: '\"' inserted");
	  return '"';
	}
      return ch;

    case 7:
      ch = (*get) ();
      state = 5;
      old_state = 8;
      return ch;

    case 8:
      do
	ch = (*get) ();
      while (ch != '\n');
      state = 0;
      return ch;
    }

  /* OK, we are somewhere in states 0 through 4 */

  /* flushchar: */
  ch = (*get) ();
recycle:
  if (ch == EOF)
    {
      if (state != 0)
	as_warn ("End of file not at end of a line: Newline inserted.");
      return ch;
    }

  switch (lex[ch])
    {
    case LEX_IS_WHITESPACE:
      do
	ch = (*get) ();
      while (ch != EOF && IS_WHITESPACE (ch));
      if (ch == EOF)
	return ch;

      if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch))
	{
	  goto recycle;
	}
#ifdef MRI
      (*unget) (ch);		/* Put back */
      return ' ';		/* Always return one space at start of line */
#endif

      /* If we're in state 2, we've seen a non-white
		   character followed by whitespace.  If the next
		   character is ':', this is whitespace after a label
		   name which we can ignore.  */
      if (state == 2 && lex[ch] == LEX_IS_COLON)
	{
	  state = 0;
	  return ch;
	}

      switch (state)
	{
	case 0:
	  state++;
	  goto recycle;		/* Punted leading sp */
	case 1:
	  BAD_CASE (state);	/* We can't get here */
	case 2:
	  state++;
	  (*unget) (ch);
	  return ' ';		/* Sp after opco */
	case 3:
	  goto recycle;		/* Sp in operands */
	default:
	  BAD_CASE (state);
	}
      break;

    case LEX_IS_TWOCHAR_COMMENT_1ST:
      ch2 = (*get) ();
      if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
	{
	  for (;;)
	    {
	      do
		{
		  ch2 = (*get) ();
		  if (ch2 != EOF && IS_NEWLINE (ch2))
		    add_newlines++;
		}
	      while (ch2 != EOF &&
		     (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));

	      while (ch2 != EOF &&
		     (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
		{
		  ch2 = (*get) ();
		}

	      if (ch2 == EOF
		  || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
		break;
	      (*unget) (ch);
	    }
	  if (ch2 == EOF)
	    as_warn ("End of file in multiline comment");

	  ch = ' ';
	  goto recycle;
	}
      else
	{
	  if (ch2 != EOF)
	    (*unget) (ch2);
	  return ch;
	}
      break;

    case LEX_IS_STRINGQUOTE:
      old_state = state;
      state = 5;
      return ch;
#ifndef MRI
#ifndef IEEE_STYLE
    case LEX_IS_ONECHAR_QUOTE:
      ch = (*get) ();
      if (ch == EOF)
	{
	  as_warn ("End-of-file after a one-character quote; \\000 inserted");
	  ch = 0;
	}
      if (ch == '\\')
	{
	  ch = (*get) ();
	  ch = process_escape (ch);
	}
      sprintf (out_buf, "%d", (int) (unsigned char) ch);


      /* None of these 'x constants for us.  We want 'x'.
		 */
      if ((ch = (*get) ()) != '\'')
	{
#ifdef REQUIRE_CHAR_CLOSE_QUOTE
	  as_warn ("Missing close quote: (assumed)");
#else
	  (*unget) (ch);
#endif
	}
      if (strlen (out_buf) == 1)
	{
	  return out_buf[0];
	}
      old_state = state;
      state = -1;
      out_string = out_buf;
      return *out_string++;
#endif
#endif
    case LEX_IS_COLON:
      if (state != 3)
	state = 0;
      return ch;

    case LEX_IS_NEWLINE:
      /* Roll out a bunch of newlines from inside comments, etc.  */
      if (add_newlines)
	{
	  --add_newlines;
	  (*unget) (ch);
	}
      /* fall thru into... */

    case LEX_IS_LINE_SEPARATOR:
      state = 0;
      return ch;

    case LEX_IS_LINE_COMMENT_START:
      if (state != 0)		/* Not at start of line, act normal */
	goto de_fault;

      /* FIXME-someday: The two character comment stuff was badly
    thought out.  On i386, we want '/' as line comment start
    AND we want C style comments.  hence this hack.  The
    whole lexical process should be reworked.  xoxorich.  */

      if (ch == '/' && (ch2 = (*get) ()) == '*')
	{
	  state = -2;
	  return (do_scrub_next_char (get, unget));
	}
      else
	{
	  (*unget) (ch2);
	}			/* bad hack */

      do
	ch = (*get) ();
      while (ch != EOF && IS_WHITESPACE (ch));
      if (ch == EOF)
	{
	  as_warn ("EOF in comment:  Newline inserted");
	  return '\n';
	}
      if (ch < '0' || ch > '9')
	{
	  /* Non-numerics:  Eat whole comment line */
	  while (ch != EOF && !IS_NEWLINE (ch))
	    ch = (*get) ();
	  if (ch == EOF)
	    as_warn ("EOF in Comment: Newline inserted");
	  state = 0;
	  return '\n';
	}
      /* Numerics begin comment.  Perhaps CPP `# 123 "filename"' */
      (*unget) (ch);
      old_state = 4;
      state = -1;
      out_string = ".line ";
      return *out_string++;

    case LEX_IS_COMMENT_START:
      do
	ch = (*get) ();
      while (ch != EOF && !IS_NEWLINE (ch));
      if (ch == EOF)
	as_warn ("EOF in comment:  Newline inserted");
      state = 0;
      return '\n';

    default:
    de_fault:
      /* Some relatively `normal' character.  */
      if (state == 0)
	{
	  state = 2;		/* Now seeing opcode */
	  return ch;
	}
      else if (state == 1)
	{
	  state = 2;		/* Ditto */
	  return ch;
	}
      else
	{
	  return ch;		/* Opcode or operands already */
	}
    }
  return -1;
}

#ifdef TEST

const char comment_chars[] = "|";
const char line_comment_chars[] = "#";

main ()
{
  int ch;

  app_begin ();
  while ((ch = do_scrub_next_char (stdin)) != EOF)
    putc (ch, stdout);
}

as_warn (str)
     char *str;
{
  fputs (str, stderr);
  putc ('\n', stderr);
}

#endif

/*
 * Local Variables:
 * comment-column: 0
 * fill-column: 131
 * End:
 */

/* end of app.c */
Commit	Line	Data
3340f7e5	1	/* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
6efd877d	2
a39116f1 RP	3	Modified by Allen Wirfs-Brock, Instantiations Inc 2/90
a39116f1 RP	4	*/
fecd2382 RP	5	/* This is the Assembler Pre-Processor
fecd2382 RP	6	Copyright (C) 1987 Free Software Foundation, Inc.
6efd877d	7
a39116f1	8	This file is part of GAS, the GNU Assembler.
6efd877d	9
a39116f1 RP	10	GAS is free software; you can redistribute it and/or modify
	11	it under the terms of the GNU General Public License as published by
	12	the Free Software Foundation; either version 2, or (at your option)
	13	any later version.
6efd877d	14
a39116f1 RP	15	GAS is distributed in the hope that it will be useful,
	16	but WITHOUT ANY WARRANTY; without even the implied warranty of
	17	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	18	GNU General Public License for more details.
6efd877d	19
a39116f1 RP	20	You should have received a copy of the GNU General Public License
	21	along with GAS; see the file COPYING. If not, write to
	22	the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
fecd2382 RP	23
	24	/* App, the assembler pre-processor. This pre-processor strips out excess
	25	spaces, turns single-quoted characters into a decimal constant, and turns
be06bdcd SC	26	# <number> <filename> <garbage> into a .line <number>\n.app-file <filename>
be06bdcd SC	27	pair. This needs better error-handling.
a39116f1	28	*/
fecd2382 RP	29
fecd2382 RP	30	#include <stdio.h>
6efd877d KR	31	#include "as.h" /* For BAD_CASE() only */
6efd877d KR	32	#include "read.h"
fecd2382	33
3340f7e5	34	#if (__STDC__ != 1) && !defined(const)
6efd877d	35	#define const /* Nothing */
fecd2382 RP	36	#endif
fecd2382 RP	37
6efd877d KR	38	static char lex[256];
	39	static char symbol_chars[] =
	40	"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
fecd2382 RP	41
	42	#define LEX_IS_SYMBOL_COMPONENT 1
	43	#define LEX_IS_WHITESPACE 2
	44	#define LEX_IS_LINE_SEPARATOR 3
	45	#define LEX_IS_COMMENT_START 4
	46	#define LEX_IS_LINE_COMMENT_START 5
	47	#define LEX_IS_TWOCHAR_COMMENT_1ST 6
	48	#define LEX_IS_TWOCHAR_COMMENT_2ND 7
	49	#define LEX_IS_STRINGQUOTE 8
	50	#define LEX_IS_COLON 9
	51	#define LEX_IS_NEWLINE 10
	52	#define LEX_IS_ONECHAR_QUOTE 11
a39116f1 RP	53	#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
	54	#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
	55	#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
	56	#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
	57	#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
	58	#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
	59
	60	/* FIXME-soon: The entire lexer/parser thingy should be
	61	built statically at compile time rather than dynamically
	62	each and every time the assembler is run. xoxorich. */
fecd2382	63
6efd877d KR	64	void
	65	do_scrub_begin ()
	66	{
	67	const char *p;
	68
	69	lex[' '] = LEX_IS_WHITESPACE;
	70	lex['\t'] = LEX_IS_WHITESPACE;
	71	lex['\n'] = LEX_IS_NEWLINE;
	72	lex[';'] = LEX_IS_LINE_SEPARATOR;
	73	lex['"'] = LEX_IS_STRINGQUOTE;
	74	lex['\''] = LEX_IS_ONECHAR_QUOTE;
	75	lex[':'] = LEX_IS_COLON;
7c2d4011	76
be06bdcd SC	77
	78
	79	#ifdef SINGLE_QUOTE_STRINGS
	80	lex['\''] = LEX_IS_STRINGQUOTE;
7c2d4011	81	#endif
be06bdcd	82
6efd877d	83	/* Note that these override the previous defaults, e.g. if ';'
be06bdcd	84
fecd2382	85	is a comment char, then it isn't a line separator. */
6efd877d KR	86	for (p = symbol_chars; *p; ++p)
	87	{
	88	lex[*p] = LEX_IS_SYMBOL_COMPONENT;
	89	} /* declare symbol characters */
	90
	91	for (p = line_comment_chars; *p; p++)
	92	{
	93	lex[*p] = LEX_IS_LINE_COMMENT_START;
	94	} /* declare line comment chars */
	95
	96	for (p = comment_chars; *p; p++)
	97	{
	98	lex[*p] = LEX_IS_COMMENT_START;
	99	} /* declare comment chars */
	100
	101	for (p = line_separator_chars; *p; p++)
	102	{
	103	lex[*p] = LEX_IS_LINE_SEPARATOR;
	104	} /* declare line separators */
	105
	106	/* Only allow slash-star comments if slash is not in use */
	107	if (lex['/'] == 0)
	108	{
	109	lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
	110	}
	111	/* FIXME-soon. This is a bad hack but otherwise, we
a39116f1 RP	112	can't do c-style comments when '/' is a line
a39116f1 RP	113	comment char. xoxorich. */
6efd877d KR	114	if (lex['*'] == 0)
	115	{
	116	lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
	117	}
	118	} /* do_scrub_begin() */
fecd2382 RP	119
	120	FILE *scrub_file;
	121
6efd877d KR	122	int
	123	scrub_from_file ()
	124	{
	125	return getc (scrub_file);
fecd2382 RP	126	}
fecd2382 RP	127
6efd877d KR	128	void
	129	scrub_to_file (ch)
	130	int ch;
fecd2382	131	{
6efd877d KR	132	ungetc (ch, scrub_file);
6efd877d KR	133	} /* scrub_to_file() */
fecd2382 RP	134
	135	char *scrub_string;
	136	char *scrub_last_string;
	137
6efd877d KR	138	int
	139	scrub_from_string ()
	140	{
	141	return scrub_string == scrub_last_string ? EOF : *scrub_string++;
	142	} /* scrub_from_string() */
fecd2382	143
6efd877d KR	144	void
	145	scrub_to_string (ch)
	146	int ch;
fecd2382	147	{
6efd877d KR	148	*--scrub_string = ch;
6efd877d KR	149	} /* scrub_to_string() */
fecd2382 RP	150
	151	/* Saved state of the scrubber */
	152	static int state;
	153	static int old_state;
	154	static char *out_string;
	155	static char out_buf[20];
	156	static int add_newlines = 0;
	157
	158	/* Data structure for saving the state of app across #include's. Note that
	159	app is called asynchronously to the parsing of the .include's, so our
	160	state at the time .include is interpreted is completely unrelated.
	161	That's why we have to save it all. */
	162
6efd877d KR	163	struct app_save
	164	{
	165	int state;
	166	int old_state;
	167	char *out_string;
	168	char out_buf[sizeof (out_buf)];
	169	int add_newlines;
	170	char *scrub_string;
	171	char *scrub_last_string;
	172	FILE *scrub_file;
	173	};
	174
	175	char *
	176	app_push ()
	177	{
7c2d4011 SC	178	register struct app_save *saved;
7c2d4011 SC	179
6efd877d KR	180	saved = (struct app_save ) xmalloc (sizeof (saved));
	181	saved->state = state;
	182	saved->old_state = old_state;
	183	saved->out_string = out_string;
	184	bcopy (saved->out_buf, out_buf, sizeof (out_buf));
	185	saved->add_newlines = add_newlines;
	186	saved->scrub_string = scrub_string;
7c2d4011	187	saved->scrub_last_string = scrub_last_string;
6efd877d	188	saved->scrub_file = scrub_file;
7c2d4011 SC	189
7c2d4011 SC	190	/* do_scrub_begin() is not useful, just wastes time. */
6efd877d	191	return (char *) saved;
fecd2382 RP	192	}
fecd2382 RP	193
6efd877d KR	194	void
	195	app_pop (arg)
	196	char *arg;
fecd2382	197	{
6efd877d KR	198	register struct app_save saved = (struct app_save ) arg;
	199
	200	/* There is no do_scrub_end (). */
	201	state = saved->state;
	202	old_state = saved->old_state;
	203	out_string = saved->out_string;
	204	memcpy (saved->out_buf, out_buf, sizeof (out_buf));
	205	add_newlines = saved->add_newlines;
	206	scrub_string = saved->scrub_string;
	207	scrub_last_string = saved->scrub_last_string;
	208	scrub_file = saved->scrub_file;
	209
	210	free (arg);
	211	} /* app_pop() */
	212
	213	int
	214	process_escape (ch)
	215	char ch;
7c2d4011	216	{
6efd877d KR	217	switch (ch)
	218	{
	219	case 'b':
	220	return '\b';
	221	case 'f':
	222	return '\f';
	223	case 'n':
	224	return '\n';
	225	case 'r':
	226	return '\r';
	227	case 't':
	228	return '\t';
	229	case '\'':
	230	return '\'';
	231	case '"':
	232	return '\'';
	233	default:
	234	return ch;
	235	}
7c2d4011	236	}
6efd877d KR	237	int
	238	do_scrub_next_char (get, unget)
	239	int (*get) ();
	240	void (*unget) ();
fecd2382	241	{
6efd877d	242	/*State 0: beginning of normal line
a39116f1 RP	243	1: After first whitespace on line (flush more white)
	244	2: After first non-white (opcode) on line (keep 1white)
	245	3: after second white on line (into operands) (flush white)
	246	4: after putting out a .line, put out digits
	247	5: parsing a string, then go to old-state
	248	6: putting out \ escape in a "d string.
	249	7: After putting out a .app-file, put out string.
	250	8: After putting out a .app-file string, flush until newline.
	251	-1: output string in out_string and go to the state in old_state
	252	-2: flush text until a '*' '/' is seen, then go to state old_state
	253	*/
6efd877d KR	254
	255	register int ch, ch2 = 0;
	256
	257	switch (state)
	258	{
	259	case -1:
	260	ch = *out_string++;
	261	if (*out_string == 0)
	262	{
	263	state = old_state;
	264	old_state = 3;
	265	}
	266	return ch;
	267
	268	case -2:
	269	for (;;)
	270	{
	271	do
	272	{
	273	ch = (*get) ();
	274	}
	275	while (ch != EOF && ch != '\n' && ch != '*');
	276	if (ch == '\n' \|\| ch == EOF)
	277	return ch;
	278
	279	/* At this point, ch must be a '' /
	280	while ((ch = (get) ()) == '')
	281	{
	282	;
	283	}
	284	if (ch == EOF \|\| ch == '/')
	285	break;
	286	(*unget) (ch);
	287	}
	288	state = old_state;
	289	return ' ';
	290
	291	case 4:
	292	ch = (*get) ();
	293	if (ch == EOF \|\| (ch >= '0' && ch <= '9'))
	294	return ch;
	295	else
	296	{
	297	while (ch != EOF && IS_WHITESPACE (ch))
	298	ch = (*get) ();
	299	if (ch == '"')
	300	{
	301	(*unget) (ch);
	302	out_string = "\n.app-file ";
	303	old_state = 7;
	304	state = -1;
	305	return *out_string++;
	306	}
	307	else
	308	{
	309	while (ch != EOF && ch != '\n')
	310	ch = (*get) ();
	311	return ch;
	312	}
	313	}
	314
	315	case 5:
	316	ch = (*get) ();
	317	if (lex[ch] == LEX_IS_STRINGQUOTE)
318	{
319	state = old_state;
320	return ch;
321	}
322	else if (ch == '\\')
323	{
324	state = 6;
325	return ch;
326	}
327	else if (ch == EOF)
328	{
329	as_warn ("End of file in string: inserted '\"'");
330	state = old_state;
331	(*unget) ('\n');
332	return '"';
333	}
334	else
335	{
336	return ch;
337	}
338
339	case 6:
340	state = 5;
341	ch = (*get) ();
342	switch (ch)
343	{
344	/* This is neet. Turn "string
fecd2382	345	more string" into "string\n more string"
a39116f1	346	*/
6efd877d KR	347	case '\n':
	348	(*unget) ('n');
	349	add_newlines++;
	350	return '\\';
	351
	352	case '"':
	353	case '\\':
	354	case 'b':
	355	case 'f':
	356	case 'n':
	357	case 'r':
	358	case 't':
fecd2382	359	#ifdef BACKSLASH_V
6efd877d	360	case 'v':
fecd2382	361	#endif /* BACKSLASH_V */
6efd877d KR	362	case '0':
	363	case '1':
	364	case '2':
	365	case '3':
	366	case '4':
	367	case '5':
	368	case '6':
	369	case '7':
	370	break;
7c2d4011	371	#if defined(IGNORE_NONSTANDARD_ESCAPES) \| defined(ONLY_STANDARD_ESCAPES)
6efd877d KR	372	default:
	373	as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
	374	break;
fecd2382	375	#else /* ONLY_STANDARD_ESCAPES */
6efd877d KR	376	default:
	377	/* Accept \x as x for any x */
	378	break;
fecd2382	379	#endif /* ONLY_STANDARD_ESCAPES */
7c2d4011	380
6efd877d KR	381	case EOF:
	382	as_warn ("End of file in string: '\"' inserted");
	383	return '"';
	384	}
	385	return ch;
	386
	387	case 7:
	388	ch = (*get) ();
	389	state = 5;
	390	old_state = 8;
	391	return ch;
	392
	393	case 8:
	394	do
	395	ch = (*get) ();
	396	while (ch != '\n');
	397	state = 0;
	398	return ch;
	399	}
	400
	401	/* OK, we are somewhere in states 0 through 4 */
	402
	403	/* flushchar: */
	404	ch = (*get) ();
	405	recycle:
	406	if (ch == EOF)
	407	{
	408	if (state != 0)
	409	as_warn ("End of file not at end of a line: Newline inserted.");
	410	return ch;
	411	}
	412
	413	switch (lex[ch])
	414	{
	415	case LEX_IS_WHITESPACE:
	416	do
	417	ch = (*get) ();
	418	while (ch != EOF && IS_WHITESPACE (ch));
	419	if (ch == EOF)
	420	return ch;
	421
	422	if (IS_COMMENT (ch) \|\| (state == 0 && IS_LINE_COMMENT (ch)) \|\| ch == '/' \|\| IS_LINE_SEPARATOR (ch))
	423	{
	424	goto recycle;
fecd2382	425	}
7c2d4011	426	#ifdef MRI
6efd877d KR	427	(unget) (ch); / Put back */
6efd877d KR	428	return ' '; /* Always return one space at start of line */
7c2d4011	429	#endif
6efd877d KR	430
	431	/* If we're in state 2, we've seen a non-white
	432	character followed by whitespace. If the next
	433	character is ':', this is whitespace after a label
	434	name which we can ignore. */
	435	if (state == 2 && lex[ch] == LEX_IS_COLON)
	436	{
	437	state = 0;
	438	return ch;
	439	}
	440
	441	switch (state)
	442	{
	443	case 0:
	444	state++;
	445	goto recycle; /* Punted leading sp */
	446	case 1:
	447	BAD_CASE (state); /* We can't get here */
	448	case 2:
	449	state++;
	450	(*unget) (ch);
	451	return ' '; /* Sp after opco */
	452	case 3:
	453	goto recycle; /* Sp in operands */
	454	default:
	455	BAD_CASE (state);
	456	}
	457	break;
	458
	459	case LEX_IS_TWOCHAR_COMMENT_1ST:
	460	ch2 = (*get) ();
	461	if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
	462	{
	463	for (;;)
	464	{
	465	do
	466	{
	467	ch2 = (*get) ();
	468	if (ch2 != EOF && IS_NEWLINE (ch2))
	469	add_newlines++;
fecd2382	470	}
6efd877d KR	471	while (ch2 != EOF &&
	472	(lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
	473
	474	while (ch2 != EOF &&
	475	(lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
	476	{
	477	ch2 = (*get) ();
fecd2382	478	}
6efd877d KR	479
	480	if (ch2 == EOF
	481	\|\| lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
fecd2382	482	break;
6efd877d KR	483	(*unget) (ch);
	484	}
	485	if (ch2 == EOF)
	486	as_warn ("End of file in multiline comment");
	487
	488	ch = ' ';
	489	goto recycle;
	490	}
	491	else
	492	{
	493	if (ch2 != EOF)
	494	(*unget) (ch2);
	495	return ch;
	496	}
	497	break;
	498
	499	case LEX_IS_STRINGQUOTE:
	500	old_state = state;
	501	state = 5;
	502	return ch;
	503	#ifndef MRI
a39116f1	504	#ifndef IEEE_STYLE
6efd877d KR	505	case LEX_IS_ONECHAR_QUOTE:
	506	ch = (*get) ();
	507	if (ch == EOF)
	508	{
	509	as_warn ("End-of-file after a one-character quote; \\000 inserted");
	510	ch = 0;
	511	}
	512	if (ch == '\\')
	513	{
	514	ch = (*get) ();
	515	ch = process_escape (ch);
	516	}
	517	sprintf (out_buf, "%d", (int) (unsigned char) ch);
7c2d4011	518
6efd877d KR	519
6efd877d KR	520	/* None of these 'x constants for us. We want 'x'.
fecd2382	521	*/
6efd877d KR	522	if ((ch = (*get) ()) != '\'')
6efd877d KR	523	{
fecd2382	524	#ifdef REQUIRE_CHAR_CLOSE_QUOTE
6efd877d	525	as_warn ("Missing close quote: (assumed)");
fecd2382	526	#else
6efd877d	527	(*unget) (ch);
fecd2382	528	#endif
6efd877d KR	529	}
	530	if (strlen (out_buf) == 1)
	531	{
	532	return out_buf[0];
	533	}
	534	old_state = state;
	535	state = -1;
	536	out_string = out_buf;
	537	return *out_string++;
7c2d4011	538	#endif
a39116f1	539	#endif
6efd877d KR	540	case LEX_IS_COLON:
	541	if (state != 3)
	542	state = 0;
	543	return ch;
	544
	545	case LEX_IS_NEWLINE:
	546	/* Roll out a bunch of newlines from inside comments, etc. */
	547	if (add_newlines)
	548	{
	549	--add_newlines;
	550	(*unget) (ch);
	551	}
	552	/* fall thru into... */
	553
	554	case LEX_IS_LINE_SEPARATOR:
	555	state = 0;
	556	return ch;
	557
	558	case LEX_IS_LINE_COMMENT_START:
	559	if (state != 0) /* Not at start of line, act normal */
	560	goto de_fault;
	561
	562	/* FIXME-someday: The two character comment stuff was badly
7c2d4011 SC	563	thought out. On i386, we want '/' as line comment start
	564	AND we want C style comments. hence this hack. The
	565	whole lexical process should be reworked. xoxorich. */
	566
6efd877d KR	567	if (ch == '/' && (ch2 = (get) ()) == '')
	568	{
	569	state = -2;
	570	return (do_scrub_next_char (get, unget));
	571	}
	572	else
	573	{
	574	(*unget) (ch2);
	575	} /* bad hack */
	576
	577	do
	578	ch = (*get) ();
	579	while (ch != EOF && IS_WHITESPACE (ch));
	580	if (ch == EOF)
	581	{
	582	as_warn ("EOF in comment: Newline inserted");
	583	return '\n';
	584	}
	585	if (ch < '0' \|\| ch > '9')
	586	{
	587	/* Non-numerics: Eat whole comment line */
	588	while (ch != EOF && !IS_NEWLINE (ch))
	589	ch = (*get) ();
	590	if (ch == EOF)
	591	as_warn ("EOF in Comment: Newline inserted");
	592	state = 0;
	593	return '\n';
	594	}
	595	/* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
	596	(*unget) (ch);
	597	old_state = 4;
	598	state = -1;
	599	out_string = ".line ";
	600	return *out_string++;
	601
	602	case LEX_IS_COMMENT_START:
	603	do
	604	ch = (*get) ();
	605	while (ch != EOF && !IS_NEWLINE (ch));
	606	if (ch == EOF)
	607	as_warn ("EOF in comment: Newline inserted");
	608	state = 0;
	609	return '\n';
	610
	611	default:
	612	de_fault:
	613	/* Some relatively `normal' character. */
	614	if (state == 0)
	615	{
	616	state = 2; /* Now seeing opcode */
	617	return ch;
fecd2382	618	}
6efd877d KR	619	else if (state == 1)
	620	{
	621	state = 2; /* Ditto */
	622	return ch;
	623	}
	624	else
	625	{
	626	return ch; /* Opcode or operands already */
	627	}
	628	}
	629	return -1;
fecd2382 RP	630	}
	631
	632	#ifdef TEST
	633
6efd877d KR	634	const char comment_chars[] = "\|";
6efd877d KR	635	const char line_comment_chars[] = "#";
fecd2382	636
6efd877d	637	main ()
fecd2382	638	{
6efd877d KR	639	int ch;
	640
	641	app_begin ();
	642	while ((ch = do_scrub_next_char (stdin)) != EOF)
	643	putc (ch, stdout);
fecd2382 RP	644	}
fecd2382 RP	645
6efd877d KR	646	as_warn (str)
6efd877d KR	647	char *str;
fecd2382	648	{
6efd877d KR	649	fputs (str, stderr);
6efd877d KR	650	putc ('\n', stderr);
fecd2382	651	}
6efd877d	652
fecd2382 RP	653	#endif
	654
	655	/*
	656	* Local Variables:
	657	* comment-column: 0
	658	* fill-column: 131
	659	* End:
	660	*/
	661
	662	/* end of app.c */