-/* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
-
- Modified by Allen Wirfs-Brock, Instantiations Inc 2/90
- */
/* This is the Assembler Pre-Processor
- Copyright (C) 1987 Free Software Foundation, Inc.
+ Copyright (C) 1987, 1990, 1991, 1992, 1994 Free Software Foundation, Inc.
This file is part of GAS, the GNU Assembler.
You should have received a copy of the GNU General Public License
along with GAS; see the file COPYING. If not, write to
- the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+ the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
/* App, the assembler pre-processor. This pre-processor strips out excess
spaces, turns single-quoted characters into a decimal constant, and turns
- # <number> <filename> <garbage> into a .line <number>\n.app-file <filename> pair.
- This needs better error-handling.
- */
+ # <number> <filename> <garbage> into a .line <number>\n.file <filename>
+ pair. This needs better error-handling. */
#include <stdio.h>
#include "as.h" /* For BAD_CASE() only */
-#include "read.h"
-#if (__STDC__ != 1) && !defined(const)
-#define const /* Nothing */
+#if (__STDC__ != 1)
+#ifndef const
+#define const /* empty */
+#endif
#endif
static char lex[256];
-static char symbol_chars[] =
+static const char symbol_chars[] =
"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
#define LEX_IS_SYMBOL_COMPONENT 1
#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
+static int process_escape PARAMS ((int));
+
/* FIXME-soon: The entire lexer/parser thingy should be
built statically at compile time rather than dynamically
each and every time the assembler is run. xoxorich. */
lex['\t'] = LEX_IS_WHITESPACE;
lex['\n'] = LEX_IS_NEWLINE;
lex[';'] = LEX_IS_LINE_SEPARATOR;
- lex['"'] = LEX_IS_STRINGQUOTE;
- lex['\''] = LEX_IS_ONECHAR_QUOTE;
lex[':'] = LEX_IS_COLON;
-#ifdef MRI
- lex['\''] = LEX_IS_STRINGQUOTE;
+ if (! flag_mri)
+ {
+ lex['"'] = LEX_IS_STRINGQUOTE;
+
+#ifndef TC_HPPA
+ lex['\''] = LEX_IS_ONECHAR_QUOTE;
+#endif
+
+#ifdef SINGLE_QUOTE_STRINGS
+ lex['\''] = LEX_IS_STRINGQUOTE;
#endif
- /* Note that these override the previous defaults, e.g. if ';'
- is a comment char, then it isn't a line separator. */
+ }
+
+ /* Note that these override the previous defaults, e.g. if ';' is a
+ comment char, then it isn't a line separator. */
for (p = symbol_chars; *p; ++p)
{
- lex[*p] = LEX_IS_SYMBOL_COMPONENT;
+ lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
} /* declare symbol characters */
- for (p = line_comment_chars; *p; p++)
- {
- lex[*p] = LEX_IS_LINE_COMMENT_START;
- } /* declare line comment chars */
-
for (p = comment_chars; *p; p++)
{
- lex[*p] = LEX_IS_COMMENT_START;
+ lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
} /* declare comment chars */
+ for (p = line_comment_chars; *p; p++)
+ {
+ lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
+ } /* declare line comment chars */
+
for (p = line_separator_chars; *p; p++)
{
- lex[*p] = LEX_IS_LINE_SEPARATOR;
+ lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
} /* declare line separators */
/* Only allow slash-star comments if slash is not in use */
{
lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
}
- /* FIXME-soon. This is a bad hack but otherwise, we
- can't do c-style comments when '/' is a line
- comment char. xoxorich. */
+ /* FIXME-soon. This is a bad hack but otherwise, we can't do
+ c-style comments when '/' is a line comment char. xoxorich. */
if (lex['*'] == 0)
{
lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
}
+
+ if (flag_mri)
+ {
+ lex['\''] = LEX_IS_STRINGQUOTE;
+ lex[';'] = LEX_IS_COMMENT_START;
+ lex['*'] = LEX_IS_LINE_COMMENT_START;
+ /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
+ then it can't be used in an expression. */
+ lex['!'] = LEX_IS_LINE_COMMENT_START;
+ }
} /* do_scrub_begin() */
FILE *scrub_file;
saved->state = state;
saved->old_state = old_state;
saved->out_string = out_string;
- bcopy (saved->out_buf, out_buf, sizeof (out_buf));
+ memcpy (saved->out_buf, out_buf, sizeof (out_buf));
saved->add_newlines = add_newlines;
saved->scrub_string = scrub_string;
saved->scrub_last_string = scrub_last_string;
state = saved->state;
old_state = saved->old_state;
out_string = saved->out_string;
- memcpy (saved->out_buf, out_buf, sizeof (out_buf));
+ memcpy (out_buf, saved->out_buf, sizeof (out_buf));
add_newlines = saved->add_newlines;
scrub_string = saved->scrub_string;
scrub_last_string = saved->scrub_last_string;
free (arg);
} /* app_pop() */
-int
+/* @@ This assumes that \n &c are the same on host and target. This is not
+ necessarily true. */
+static int
process_escape (ch)
- char ch;
+ int ch;
{
switch (ch)
{
case '\'':
return '\'';
case '"':
- return '\'';
+ return '\"';
default:
return ch;
}
4: after putting out a .line, put out digits
5: parsing a string, then go to old-state
6: putting out \ escape in a "d string.
- 7: After putting out a .app-file, put out string.
- 8: After putting out a .app-file string, flush until newline.
- -1: output string in out_string and go to the state in old_state
- -2: flush text until a '*' '/' is seen, then go to state old_state
+ 7: After putting out a .appfile, put out string.
+ 8: After putting out a .appfile string, flush until newline.
+ 9: After seeing symbol char in state 3 (keep 1white after symchar)
+ 10: After seeing whitespace in state 9 (keep white before symchar)
+ 11: After seeing a symbol character in state 0 (eg a label definition)
+ -1: output string in out_string and go to the state in old_state
+ -2: flush text until a '*' '/' is seen, then go to state old_state
*/
+ /* I added states 9 and 10 because the MIPS ECOFF assembler uses
+ constructs like ``.loc 1 20''. This was turning into ``.loc
+ 120''. States 9 and 10 ensure that a space is never dropped in
+ between characters which could appear in a identifier. Ian
+ Taylor, ian@cygnus.com.
+
+ I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
+ correctly on the PA (and any other target where colons are optional).
+ Jeff Law, law@cs.utah.edu. */
+
+ /* This is purely an optimization hack, and relies on gcc's inlining
+ capability. */
+#if defined (__GNUC__) && defined (__OPTIMIZE__)
+#define GET() (get == scrub_from_file ? scrub_from_file () : (*get) ())
+#else
+#define GET() ((*get) ())
+#endif
+
register int ch, ch2 = 0;
+ int not_cpp_line = 0;
switch (state)
{
{
do
{
- ch = (*get) ();
+ ch = GET ();
}
while (ch != EOF && ch != '\n' && ch != '*');
if (ch == '\n' || ch == EOF)
return ch;
/* At this point, ch must be a '*' */
- while ((ch = (*get) ()) == '*')
+ while ((ch = GET ()) == '*')
{
;
}
return ' ';
case 4:
- ch = (*get) ();
+ ch = GET ();
if (ch == EOF || (ch >= '0' && ch <= '9'))
return ch;
else
{
while (ch != EOF && IS_WHITESPACE (ch))
- ch = (*get) ();
+ ch = GET ();
if (ch == '"')
{
(*unget) (ch);
- out_string = "\n.app-file ";
+ out_string = "\n\t.appfile ";
old_state = 7;
state = -1;
return *out_string++;
else
{
while (ch != EOF && ch != '\n')
- ch = (*get) ();
+ ch = GET ();
+ state = 0;
return ch;
}
}
case 5:
- ch = (*get) ();
+ ch = GET ();
if (lex[ch] == LEX_IS_STRINGQUOTE)
{
state = old_state;
return ch;
}
+#ifndef NO_STRING_ESCAPES
else if (ch == '\\')
{
state = 6;
return ch;
}
+#endif
else if (ch == EOF)
{
as_warn ("End of file in string: inserted '\"'");
case 6:
state = 5;
- ch = (*get) ();
+ ch = GET ();
switch (ch)
{
- /* This is neet. Turn "string
- more string" into "string\n more string"
- */
+ /* Handle strings broken across lines, by turning '\n' into
+ '\\' and 'n'. */
case '\n':
(*unget) ('n');
add_newlines++;
case 'n':
case 'r':
case 't':
-#ifdef BACKSLASH_V
case 'v':
-#endif /* BACKSLASH_V */
+ case 'x':
+ case 'X':
case '0':
case '1':
case '2':
return ch;
case 7:
- ch = (*get) ();
+ ch = GET ();
state = 5;
old_state = 8;
return ch;
case 8:
do
- ch = (*get) ();
+ ch = GET ();
while (ch != '\n');
state = 0;
return ch;
}
- /* OK, we are somewhere in states 0 through 4 */
+ /* OK, we are somewhere in states 0 through 4 or 9 through 11 */
/* flushchar: */
- ch = (*get) ();
+ ch = GET ();
recycle:
if (ch == EOF)
{
if (state != 0)
- as_warn ("End of file not at end of a line: Newline inserted.");
+ {
+ as_warn ("End of file not at end of a line: Newline inserted.");
+ state = 0;
+ return '\n';
+ }
return ch;
}
{
case LEX_IS_WHITESPACE:
do
- ch = (*get) ();
+ /* Preserve a single whitespace character at the beginning of
+ a line. */
+ if (state == 0)
+ {
+ state = 1;
+ return ch;
+ }
+ else
+ ch = GET ();
while (ch != EOF && IS_WHITESPACE (ch));
if (ch == EOF)
return ch;
- if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch))
+ if (IS_COMMENT (ch)
+ || (state == 0 && IS_LINE_COMMENT (ch))
+ || ch == '/'
+ || IS_LINE_SEPARATOR (ch))
{
+ /* cpp never outputs a leading space before the #, so try to
+ avoid being confused. */
+ not_cpp_line = 1;
goto recycle;
}
-#ifdef MRI
- (*unget) (ch); /* Put back */
- return ' '; /* Always return one space at start of line */
-#endif
- /* If we're in state 2, we've seen a non-white
- character followed by whitespace. If the next
- character is ':', this is whitespace after a label
- name which we can ignore. */
- if (state == 2 && lex[ch] == LEX_IS_COLON)
+ /* If we're in state 2 or 11, we've seen a non-white character
+ followed by whitespace. If the next character is ':', this
+ is whitespace after a label name which we normally must
+ ignore. In MRI mode, though, spaces are not permitted
+ between the label and the colon. */
+ if ((state == 2 || state == 11)
+ && lex[ch] == LEX_IS_COLON
+ && ! flag_mri)
{
- state = 0;
+ state = 1;
return ch;
}
state++;
goto recycle; /* Punted leading sp */
case 1:
- BAD_CASE (state); /* We can't get here */
+ /* We can arrive here if we leave a leading whitespace character
+ at the beginning of a line. */
+ goto recycle;
case 2:
- state++;
+ state = 3;
(*unget) (ch);
return ' '; /* Sp after opco */
case 3:
goto recycle; /* Sp in operands */
+ case 9:
+ case 10:
+ state = 10; /* Sp after symbol char */
+ goto recycle;
+ case 11:
+ state = 1;
+ (*unget) (ch);
+ return ' '; /* Sp after label definition. */
default:
BAD_CASE (state);
}
break;
case LEX_IS_TWOCHAR_COMMENT_1ST:
- ch2 = (*get) ();
+ ch2 = GET ();
if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
{
for (;;)
{
do
{
- ch2 = (*get) ();
+ ch2 = GET ();
if (ch2 != EOF && IS_NEWLINE (ch2))
add_newlines++;
}
while (ch2 != EOF &&
(lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
{
- ch2 = (*get) ();
+ ch2 = GET ();
}
if (ch2 == EOF
{
if (ch2 != EOF)
(*unget) (ch2);
+ if (state == 9 || state == 10)
+ state = 3;
return ch;
}
break;
case LEX_IS_STRINGQUOTE:
- old_state = state;
+ if (state == 10)
+ {
+ /* Preserve the whitespace in foo "bar" */
+ (*unget) (ch);
+ state = 3;
+ return ' ';
+ }
+ else if (state == 9)
+ old_state = 3;
+ else
+ old_state = state;
state = 5;
return ch;
-#ifndef MRI
#ifndef IEEE_STYLE
case LEX_IS_ONECHAR_QUOTE:
- ch = (*get) ();
+ if (state == 10)
+ {
+ /* Preserve the whitespace in foo 'b' */
+ (*unget) (ch);
+ state = 3;
+ return ' ';
+ }
+ ch = GET ();
if (ch == EOF)
{
as_warn ("End-of-file after a one-character quote; \\000 inserted");
}
if (ch == '\\')
{
- ch = (*get) ();
+ ch = GET ();
ch = process_escape (ch);
}
sprintf (out_buf, "%d", (int) (unsigned char) ch);
- /* None of these 'x constants for us. We want 'x'.
- */
- if ((ch = (*get) ()) != '\'')
+ /* None of these 'x constants for us. We want 'x'. */
+ if ((ch = GET ()) != '\'')
{
#ifdef REQUIRE_CHAR_CLOSE_QUOTE
as_warn ("Missing close quote: (assumed)");
{
return out_buf[0];
}
- old_state = state;
+ if (state == 9)
+ old_state = 3;
+ else
+ old_state = state;
state = -1;
out_string = out_buf;
return *out_string++;
-#endif
#endif
case LEX_IS_COLON:
- if (state != 3)
- state = 0;
+ if (state == 9 || state == 10)
+ state = 3;
+ else if (state != 3)
+ state = 1;
return ch;
case LEX_IS_NEWLINE:
return ch;
case LEX_IS_LINE_COMMENT_START:
- if (state != 0) /* Not at start of line, act normal */
- goto de_fault;
-
- /* FIXME-someday: The two character comment stuff was badly
- thought out. On i386, we want '/' as line comment start
- AND we want C style comments. hence this hack. The
- whole lexical process should be reworked. xoxorich. */
-
- if (ch == '/' && (ch2 = (*get) ()) == '*')
- {
- state = -2;
- return (do_scrub_next_char (get, unget));
- }
- else
+ if (state == 0) /* Only comment at start of line. */
{
- (*unget) (ch2);
- } /* bad hack */
+ /* FIXME-someday: The two character comment stuff was badly
+ thought out. On i386, we want '/' as line comment start
+ AND we want C style comments. hence this hack. The
+ whole lexical process should be reworked. xoxorich. */
+ if (ch == '/')
+ {
+ ch2 = GET ();
+ if (ch2 == '*')
+ {
+ state = -2;
+ return (do_scrub_next_char (get, unget));
+ }
+ else
+ {
+ (*unget) (ch2);
+ }
+ } /* bad hack */
- do
- ch = (*get) ();
- while (ch != EOF && IS_WHITESPACE (ch));
- if (ch == EOF)
- {
- as_warn ("EOF in comment: Newline inserted");
- return '\n';
- }
- if (ch < '0' || ch > '9')
- {
- /* Non-numerics: Eat whole comment line */
- while (ch != EOF && !IS_NEWLINE (ch))
- ch = (*get) ();
+ if (ch != '#')
+ not_cpp_line = 1;
+
+ do
+ ch = GET ();
+ while (ch != EOF && IS_WHITESPACE (ch));
if (ch == EOF)
- as_warn ("EOF in Comment: Newline inserted");
- state = 0;
- return '\n';
+ {
+ as_warn ("EOF in comment: Newline inserted");
+ return '\n';
+ }
+ if (ch < '0' || ch > '9' || not_cpp_line)
+ {
+ /* Non-numerics: Eat whole comment line */
+ while (ch != EOF && !IS_NEWLINE (ch))
+ ch = GET ();
+ if (ch == EOF)
+ as_warn ("EOF in Comment: Newline inserted");
+ state = 0;
+ return '\n';
+ }
+ /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
+ (*unget) (ch);
+ old_state = 4;
+ state = -1;
+ out_string = "\t.appline ";
+ return *out_string++;
}
- /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
- (*unget) (ch);
- old_state = 4;
- state = -1;
- out_string = ".line ";
- return *out_string++;
+ /* We have a line comment character which is not at the start of
+ a line. If this is also a normal comment character, fall
+ through. Otherwise treat it as a default character. */
+ if ((flag_mri && (ch == '!' || ch == '*'))
+ || strchr (comment_chars, ch) == NULL)
+ goto de_fault;
+ /* Fall through. */
case LEX_IS_COMMENT_START:
do
- ch = (*get) ();
+ ch = GET ();
while (ch != EOF && !IS_NEWLINE (ch));
if (ch == EOF)
as_warn ("EOF in comment: Newline inserted");
state = 0;
return '\n';
+ case LEX_IS_SYMBOL_COMPONENT:
+ if (state == 10)
+ {
+ /* This is a symbol character following another symbol
+ character, with whitespace in between. We skipped the
+ whitespace earlier, so output it now. */
+ (*unget) (ch);
+ state = 3;
+ return ' ';
+ }
+ if (state == 3)
+ state = 9;
+ /* Fall through. */
default:
de_fault:
/* Some relatively `normal' character. */
if (state == 0)
{
- state = 2; /* Now seeing opcode */
+ state = 11; /* Now seeing label definition */
return ch;
}
else if (state == 1)
state = 2; /* Ditto */
return ch;
}
+ else if (state == 9)
+ {
+ if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
+ state = 3;
+ return ch;
+ }
+ else if (state == 10)
+ {
+ state = 3;
+ return ch;
+ }
else
{
return ch; /* Opcode or operands already */
}
}
return -1;
+
+#undef GET
}
#ifdef TEST
#endif
-/*
- * Local Variables:
- * comment-column: 0
- * fill-column: 131
- * End:
- */
-
/* end of app.c */