* coffswap.h (coff_swap_scnhdr_in): Don't always add IMAGE_BASE.

[deliverable/binutils-gdb.git] / gas / app.c
diff --git a/gas/app.c b/gas/app.c

index 393ab2bd2770b237595d05c07bd975679c434021..09492b426b4e94a59b3e3b13e0aa30f429e9a890 100644 (file)
--- a/gas/app.c
+++ b/gas/app.c
@@ -1,9 +1,5 @@
-/* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
-
-   Modified by Allen Wirfs-Brock, Instantiations Inc 2/90
-   */
  /* This is the Assembler Pre-Processor
-   Copyright (C) 1987 Free Software Foundation, Inc.
+   Copyright (C) 1987, 1990, 1991, 1992, 1994 Free Software Foundation, Inc.
  
     This file is part of GAS, the GNU Assembler.
  
@@ -19,24 +15,25 @@
  
     You should have received a copy of the GNU General Public License
     along with GAS; see the file COPYING.  If not, write to
-   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
+   the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  
+/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
  /* App, the assembler pre-processor.  This pre-processor strips out excess
     spaces, turns single-quoted characters into a decimal constant, and turns
-   # <number> <filename> <garbage> into a .line <number>\n.app-file <filename> pair.
-   This needs better error-handling.
-   */
+   # <number> <filename> <garbage> into a .line <number>\n.file <filename>
+   pair.  This needs better error-handling.  */
  
  #include <stdio.h>
  #include "as.h"                        /* For BAD_CASE() only */
-#include "read.h"
  
-#if (__STDC__ != 1) && !defined(const)
-#define const                  /* Nothing */
+#if (__STDC__ != 1)
+#ifndef const
+#define const  /* empty */
+#endif
  #endif
  
  static char lex[256];
-static char symbol_chars[] =
+static const char symbol_chars[] =
  "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
  
  #define LEX_IS_SYMBOL_COMPONENT                1
@@ -57,6 +54,8 @@ static char symbol_chars[] =
  #define IS_LINE_COMMENT(c)             (lex[c] == LEX_IS_LINE_COMMENT_START)
  #define        IS_NEWLINE(c)                   (lex[c] == LEX_IS_NEWLINE)
  
+static int process_escape PARAMS ((int));
+
  /* FIXME-soon: The entire lexer/parser thingy should be
     built statically at compile time rather than dynamically
     each and every time the assembler is run.  xoxorich. */
@@ -70,33 +69,41 @@ do_scrub_begin ()
    lex['\t'] = LEX_IS_WHITESPACE;
    lex['\n'] = LEX_IS_NEWLINE;
    lex[';'] = LEX_IS_LINE_SEPARATOR;
-  lex['"'] = LEX_IS_STRINGQUOTE;
-  lex['\''] = LEX_IS_ONECHAR_QUOTE;
    lex[':'] = LEX_IS_COLON;
  
-#ifdef MRI
-  lex['\''] = LEX_IS_STRINGQUOTE;
+  if (! flag_mri)
+    {
+      lex['"'] = LEX_IS_STRINGQUOTE;
+
+#ifndef TC_HPPA
+      lex['\''] = LEX_IS_ONECHAR_QUOTE;
+#endif
+
+#ifdef SINGLE_QUOTE_STRINGS
+      lex['\''] = LEX_IS_STRINGQUOTE;
  #endif
-  /* Note that these override the previous defaults, e.g. if ';'
-          is a comment char, then it isn't a line separator.  */
+    }
+
+  /* Note that these override the previous defaults, e.g. if ';' is a
+     comment char, then it isn't a line separator.  */
    for (p = symbol_chars; *p; ++p)
      {
-      lex[*p] = LEX_IS_SYMBOL_COMPONENT;
+      lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
      }                          /* declare symbol characters */
  
-  for (p = line_comment_chars; *p; p++)
-    {
-      lex[*p] = LEX_IS_LINE_COMMENT_START;
-    }                          /* declare line comment chars */
-
    for (p = comment_chars; *p; p++)
      {
-      lex[*p] = LEX_IS_COMMENT_START;
+      lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
      }                          /* declare comment chars */
  
+  for (p = line_comment_chars; *p; p++)
+    {
+      lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
+    }                          /* declare line comment chars */
+
    for (p = line_separator_chars; *p; p++)
      {
-      lex[*p] = LEX_IS_LINE_SEPARATOR;
+      lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
      }                          /* declare line separators */
  
    /* Only allow slash-star comments if slash is not in use */
@@ -104,13 +111,22 @@ do_scrub_begin ()
      {
        lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
      }
-  /* FIXME-soon.  This is a bad hack but otherwise, we
-          can't do c-style comments when '/' is a line
-          comment char. xoxorich. */
+  /* FIXME-soon.  This is a bad hack but otherwise, we can't do
+     c-style comments when '/' is a line comment char. xoxorich. */
    if (lex['*'] == 0)
      {
        lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
      }
+
+  if (flag_mri)
+    {
+      lex['\''] = LEX_IS_STRINGQUOTE;
+      lex[';'] = LEX_IS_COMMENT_START;
+      lex['*'] = LEX_IS_LINE_COMMENT_START;
+      /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
+         then it can't be used in an expression.  */
+      lex['!'] = LEX_IS_LINE_COMMENT_START;
+    }
  }                              /* do_scrub_begin() */
  
  FILE *scrub_file;
@@ -177,7 +193,7 @@ app_push ()
    saved->state = state;
    saved->old_state = old_state;
    saved->out_string = out_string;
-  bcopy (saved->out_buf, out_buf, sizeof (out_buf));
+  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
    saved->add_newlines = add_newlines;
    saved->scrub_string = scrub_string;
    saved->scrub_last_string = scrub_last_string;
@@ -197,7 +213,7 @@ app_pop (arg)
    state = saved->state;
    old_state = saved->old_state;
    out_string = saved->out_string;
-  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
+  memcpy (out_buf, saved->out_buf, sizeof (out_buf));
    add_newlines = saved->add_newlines;
    scrub_string = saved->scrub_string;
    scrub_last_string = saved->scrub_last_string;
@@ -206,9 +222,11 @@ app_pop (arg)
    free (arg);
  }                              /* app_pop() */
  
-int 
+/* @@ This assumes that \n &c are the same on host and target.  This is not
+   necessarily true.  */
+static int 
  process_escape (ch)
-     char ch;
+     int ch;
  {
    switch (ch)
      {
@@ -225,7 +243,7 @@ process_escape (ch)
      case '\'':
        return '\'';
      case '"':
-      return '\'';
+      return '\"';
      default:
        return ch;
      }
@@ -242,13 +260,35 @@ do_scrub_next_char (get, unget)
           4: after putting out a .line, put out digits
           5: parsing a string, then go to old-state
           6: putting out \ escape in a "d string.
-         7: After putting out a .app-file, put out string.
-         8: After putting out a .app-file string, flush until newline.
-         -1: output string in out_string and go to the state in old_state
-         -2: flush text until a '*' '/' is seen, then go to state old_state
+         7: After putting out a .appfile, put out string.
+         8: After putting out a .appfile string, flush until newline.
+         9: After seeing symbol char in state 3 (keep 1white after symchar)
+        10: After seeing whitespace in state 9 (keep white before symchar)
+        11: After seeing a symbol character in state 0 (eg a label definition)
+        -1: output string in out_string and go to the state in old_state
+        -2: flush text until a '*' '/' is seen, then go to state old_state
           */
  
+  /* I added states 9 and 10 because the MIPS ECOFF assembler uses
+     constructs like ``.loc 1 20''.  This was turning into ``.loc
+     120''.  States 9 and 10 ensure that a space is never dropped in
+     between characters which could appear in a identifier.  Ian
+     Taylor, ian@cygnus.com.
+
+     I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
+     correctly on the PA (and any other target where colons are optional).
+     Jeff Law, law@cs.utah.edu.  */
+
+  /* This is purely an optimization hack, and relies on gcc's inlining
+     capability.  */
+#if defined (__GNUC__) && defined (__OPTIMIZE__)
+#define GET()  (get == scrub_from_file ? scrub_from_file () : (*get) ())
+#else
+#define GET()  ((*get) ())
+#endif
+
    register int ch, ch2 = 0;
+  int not_cpp_line = 0;
  
    switch (state)
      {
@@ -266,14 +306,14 @@ do_scrub_next_char (get, unget)
         {
           do
             {
-             ch = (*get) ();
+             ch = GET ();
             }
           while (ch != EOF && ch != '\n' && ch != '*');
           if (ch == '\n' || ch == EOF)
             return ch;
  
           /* At this point, ch must be a '*' */
-         while ((ch = (*get) ()) == '*')
+         while ((ch = GET ()) == '*')
             {
               ;
             }
@@ -285,17 +325,17 @@ do_scrub_next_char (get, unget)
        return ' ';
  
      case 4:
-      ch = (*get) ();
+      ch = GET ();
        if (ch == EOF || (ch >= '0' && ch <= '9'))
         return ch;
        else
         {
           while (ch != EOF && IS_WHITESPACE (ch))
-           ch = (*get) ();
+           ch = GET ();
           if (ch == '"')
             {
               (*unget) (ch);
-             out_string = "\n.app-file ";
+             out_string = "\n\t.appfile ";
               old_state = 7;
               state = -1;
               return *out_string++;
@@ -303,23 +343,26 @@ do_scrub_next_char (get, unget)
           else
             {
               while (ch != EOF && ch != '\n')
-               ch = (*get) ();
+               ch = GET ();
+             state = 0;
               return ch;
             }
         }
  
      case 5:
-      ch = (*get) ();
+      ch = GET ();
        if (lex[ch] == LEX_IS_STRINGQUOTE)
         {
           state = old_state;
           return ch;
         }
+#ifndef NO_STRING_ESCAPES
        else if (ch == '\\')
         {
           state = 6;
           return ch;
         }
+#endif
        else if (ch == EOF)
         {
           as_warn ("End of file in string: inserted '\"'");
@@ -334,12 +377,11 @@ do_scrub_next_char (get, unget)
  
      case 6:
        state = 5;
-      ch = (*get) ();
+      ch = GET ();
        switch (ch)
         {
-         /* This is neet.  Turn "string
-                          more string" into "string\n  more string"
-                          */
+         /* Handle strings broken across lines, by turning '\n' into
+            '\\' and 'n'.  */
         case '\n':
           (*unget) ('n');
           add_newlines++;
@@ -352,9 +394,9 @@ do_scrub_next_char (get, unget)
         case 'n':
         case 'r':
         case 't':
-#ifdef BACKSLASH_V
         case 'v':
-#endif /* BACKSLASH_V */
+       case 'x':
+       case 'X':
         case '0':
         case '1':
         case '2':
@@ -381,28 +423,32 @@ do_scrub_next_char (get, unget)
        return ch;
  
      case 7:
-      ch = (*get) ();
+      ch = GET ();
        state = 5;
        old_state = 8;
        return ch;
  
      case 8:
        do
-       ch = (*get) ();
+       ch = GET ();
        while (ch != '\n');
        state = 0;
        return ch;
      }
  
-  /* OK, we are somewhere in states 0 through 4 */
+  /* OK, we are somewhere in states 0 through 4 or 9 through 11 */
  
    /* flushchar: */
-  ch = (*get) ();
+  ch = GET ();
  recycle:
    if (ch == EOF)
      {
        if (state != 0)
-       as_warn ("End of file not at end of a line: Newline inserted.");
+       {
+         as_warn ("End of file not at end of a line: Newline inserted.");
+         state = 0;
+         return '\n';
+       }
        return ch;
      }
  
@@ -410,27 +456,40 @@ recycle:
      {
      case LEX_IS_WHITESPACE:
        do
-       ch = (*get) ();
+       /* Preserve a single whitespace character at the beginning of
+          a line.  */
+       if (state == 0)
+         {
+           state = 1;
+           return ch;
+         }
+       else
+         ch = GET ();
        while (ch != EOF && IS_WHITESPACE (ch));
        if (ch == EOF)
         return ch;
  
-      if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch))
+      if (IS_COMMENT (ch)
+         || (state == 0 && IS_LINE_COMMENT (ch))
+         || ch == '/'
+         || IS_LINE_SEPARATOR (ch))
         {
+         /* cpp never outputs a leading space before the #, so try to
+            avoid being confused.  */
+         not_cpp_line = 1;
           goto recycle;
         }
-#ifdef MRI
-      (*unget) (ch);           /* Put back */
-      return ' ';              /* Always return one space at start of line */
-#endif
  
-      /* If we're in state 2, we've seen a non-white
-                  character followed by whitespace.  If the next
-                  character is ':', this is whitespace after a label
-                  name which we can ignore.  */
-      if (state == 2 && lex[ch] == LEX_IS_COLON)
+      /* If we're in state 2 or 11, we've seen a non-white character
+        followed by whitespace.  If the next character is ':', this
+        is whitespace after a label name which we normally must
+        ignore.  In MRI mode, though, spaces are not permitted
+        between the label and the colon.  */
+      if ((state == 2 || state == 11)
+         && lex[ch] == LEX_IS_COLON
+         && ! flag_mri)
         {
-         state = 0;
+         state = 1;
           return ch;
         }
  
@@ -440,27 +499,37 @@ recycle:
           state++;
           goto recycle;         /* Punted leading sp */
         case 1:
-         BAD_CASE (state);     /* We can't get here */
+         /* We can arrive here if we leave a leading whitespace character
+            at the beginning of a line.  */
+         goto recycle;
         case 2:
-         state++;
+         state = 3;
           (*unget) (ch);
           return ' ';           /* Sp after opco */
         case 3:
           goto recycle;         /* Sp in operands */
+       case 9:
+       case 10:
+         state = 10;           /* Sp after symbol char */
+         goto recycle;
+       case 11:
+         state = 1;
+         (*unget) (ch);
+         return ' ';           /* Sp after label definition.  */
         default:
           BAD_CASE (state);
         }
        break;
  
      case LEX_IS_TWOCHAR_COMMENT_1ST:
-      ch2 = (*get) ();
+      ch2 = GET ();
        if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
         {
           for (;;)
             {
               do
                 {
-                 ch2 = (*get) ();
+                 ch2 = GET ();
                   if (ch2 != EOF && IS_NEWLINE (ch2))
                     add_newlines++;
                 }
@@ -470,7 +539,7 @@ recycle:
               while (ch2 != EOF &&
                      (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
                 {
-                 ch2 = (*get) ();
+                 ch2 = GET ();
                 }
  
               if (ch2 == EOF
@@ -488,18 +557,36 @@ recycle:
         {
           if (ch2 != EOF)
             (*unget) (ch2);
+         if (state == 9 || state == 10)
+           state = 3;
           return ch;
         }
        break;
  
      case LEX_IS_STRINGQUOTE:
-      old_state = state;
+      if (state == 10)
+       {
+         /* Preserve the whitespace in foo "bar" */
+         (*unget) (ch);
+         state = 3;
+         return ' ';
+       }
+      else if (state == 9)
+       old_state = 3;
+      else
+       old_state = state;
        state = 5;
        return ch;
-#ifndef MRI
  #ifndef IEEE_STYLE
      case LEX_IS_ONECHAR_QUOTE:
-      ch = (*get) ();
+      if (state == 10)
+       {
+         /* Preserve the whitespace in foo 'b' */
+         (*unget) (ch);
+         state = 3;
+         return ' ';
+       }
+      ch = GET ();
        if (ch == EOF)
         {
           as_warn ("End-of-file after a one-character quote; \\000 inserted");
@@ -507,15 +594,14 @@ recycle:
         }
        if (ch == '\\')
         {
-         ch = (*get) ();
+         ch = GET ();
           ch = process_escape (ch);
         }
        sprintf (out_buf, "%d", (int) (unsigned char) ch);
  
  
-      /* None of these 'x constants for us.  We want 'x'.
-                */
-      if ((ch = (*get) ()) != '\'')
+      /* None of these 'x constants for us.  We want 'x'.  */
+      if ((ch = GET ()) != '\'')
         {
  #ifdef REQUIRE_CHAR_CLOSE_QUOTE
           as_warn ("Missing close quote: (assumed)");
@@ -527,15 +613,19 @@ recycle:
         {
           return out_buf[0];
         }
-      old_state = state;
+      if (state == 9)
+       old_state = 3;
+      else
+       old_state = state;
        state = -1;
        out_string = out_buf;
        return *out_string++;
-#endif
  #endif
      case LEX_IS_COLON:
-      if (state != 3)
-       state = 0;
+      if (state == 9 || state == 10)
+       state = 3;
+      else if (state != 3)
+       state = 1;
        return ch;
  
      case LEX_IS_NEWLINE:
@@ -552,64 +642,90 @@ recycle:
        return ch;
  
      case LEX_IS_LINE_COMMENT_START:
-      if (state != 0)          /* Not at start of line, act normal */
-       goto de_fault;
-
-      /* FIXME-someday: The two character comment stuff was badly
-    thought out.  On i386, we want '/' as line comment start
-    AND we want C style comments.  hence this hack.  The
-    whole lexical process should be reworked.  xoxorich.  */
-
-      if (ch == '/' && (ch2 = (*get) ()) == '*')
-       {
-         state = -2;
-         return (do_scrub_next_char (get, unget));
-       }
-      else
+      if (state == 0)          /* Only comment at start of line.  */
         {
-         (*unget) (ch2);
-       }                       /* bad hack */
+         /* FIXME-someday: The two character comment stuff was badly
+            thought out.  On i386, we want '/' as line comment start
+            AND we want C style comments.  hence this hack.  The
+            whole lexical process should be reworked.  xoxorich.  */
+         if (ch == '/')
+           {
+             ch2 = GET ();
+             if (ch2 == '*')
+               {
+                 state = -2;
+                 return (do_scrub_next_char (get, unget));
+               }
+             else
+               {
+                 (*unget) (ch2);
+               }
+           }                   /* bad hack */
  
-      do
-       ch = (*get) ();
-      while (ch != EOF && IS_WHITESPACE (ch));
-      if (ch == EOF)
-       {
-         as_warn ("EOF in comment:  Newline inserted");
-         return '\n';
-       }
-      if (ch < '0' || ch > '9')
-       {
-         /* Non-numerics:  Eat whole comment line */
-         while (ch != EOF && !IS_NEWLINE (ch))
-           ch = (*get) ();
+         if (ch != '#')
+           not_cpp_line = 1;
+
+         do
+           ch = GET ();
+         while (ch != EOF && IS_WHITESPACE (ch));
           if (ch == EOF)
-           as_warn ("EOF in Comment: Newline inserted");
-         state = 0;
-         return '\n';
+           {
+             as_warn ("EOF in comment:  Newline inserted");
+             return '\n';
+           }
+         if (ch < '0' || ch > '9' || not_cpp_line)
+           {
+             /* Non-numerics:  Eat whole comment line */
+             while (ch != EOF && !IS_NEWLINE (ch))
+               ch = GET ();
+             if (ch == EOF)
+               as_warn ("EOF in Comment: Newline inserted");
+             state = 0;
+             return '\n';
+           }
+         /* Numerics begin comment.  Perhaps CPP `# 123 "filename"' */
+         (*unget) (ch);
+         old_state = 4;
+         state = -1;
+         out_string = "\t.appline ";
+         return *out_string++;
         }
-      /* Numerics begin comment.  Perhaps CPP `# 123 "filename"' */
-      (*unget) (ch);
-      old_state = 4;
-      state = -1;
-      out_string = ".line ";
-      return *out_string++;
  
+      /* We have a line comment character which is not at the start of
+        a line.  If this is also a normal comment character, fall
+        through.  Otherwise treat it as a default character.  */
+      if ((flag_mri && (ch == '!' || ch == '*'))
+         || strchr (comment_chars, ch) == NULL)
+       goto de_fault;
+      /* Fall through.  */
      case LEX_IS_COMMENT_START:
        do
-       ch = (*get) ();
+       ch = GET ();
        while (ch != EOF && !IS_NEWLINE (ch));
        if (ch == EOF)
         as_warn ("EOF in comment:  Newline inserted");
        state = 0;
        return '\n';
  
+    case LEX_IS_SYMBOL_COMPONENT:
+      if (state == 10)
+       {
+         /* This is a symbol character following another symbol
+            character, with whitespace in between.  We skipped the
+            whitespace earlier, so output it now.  */
+         (*unget) (ch);
+         state = 3;
+         return ' ';
+       }
+      if (state == 3)
+       state = 9;
+      /* Fall through.  */
      default:
      de_fault:
        /* Some relatively `normal' character.  */
        if (state == 0)
         {
-         state = 2;            /* Now seeing opcode */
+         state = 11;           /* Now seeing label definition */
           return ch;
         }
        else if (state == 1)
@@ -617,12 +733,25 @@ recycle:
           state = 2;            /* Ditto */
           return ch;
         }
+      else if (state == 9)
+       {
+         if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
+           state = 3;
+         return ch;
+       }
+      else if (state == 10)
+       {
+         state = 3;
+         return ch;
+       }
        else
         {
           return ch;            /* Opcode or operands already */
         }
      }
    return -1;
+
+#undef GET
  }
  
  #ifdef TEST
@@ -648,11 +777,4 @@ as_warn (str)
  
  #endif
  
-/*
- * Local Variables:
- * comment-column: 0
- * fill-column: 131
- * End:
- */
-
  /* end of app.c */