GDB/MI: Add new "--language LANG" command option.

[deliverable/binutils-gdb.git] / gas / app.c
diff --git a/gas/app.c b/gas/app.c

index 46273c2823b61861c96951ec5ca3a5d6395182d9..ec3a35ee865ffdb521ab784e70e38f38d0c6cf6c 100644 (file)
--- a/gas/app.c
+++ b/gas/app.c
@@ -1,32 +1,32 @@
  /* This is the Assembler Pre-Processor
  /* This is the Assembler Pre-Processor
-   Copyright (C) 1987, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 2000
+   Copyright 1987, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
+   1999, 2000, 2001, 2002, 2003, 2005, 2006, 2007, 2008, 2009, 2010, 2012
     Free Software Foundation, Inc.
  
     This file is part of GAS, the GNU Assembler.
  
     GAS is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
     Free Software Foundation, Inc.
  
     This file is part of GAS, the GNU Assembler.
  
     GAS is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2, or (at your option)
+   the Free Software Foundation; either version 3, or (at your option)
     any later version.
  
     any later version.
  
-   GAS is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
+   GAS is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
  
     You should have received a copy of the GNU General Public License
     along with GAS; see the file COPYING.  If not, write to the Free
  
     You should have received a copy of the GNU General Public License
     along with GAS; see the file COPYING.  If not, write to the Free
-   Software Foundation, 59 Temple Place - Suite 330, Boston, MA
-   02111-1307, USA.  */
+   Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
+   02110-1301, USA.  */
  
  
-/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
-/* App, the assembler pre-processor.  This pre-processor strips out excess
-   spaces, turns single-quoted characters into a decimal constant, and turns
-   # <number> <filename> <garbage> into a .line <number>\n.file <filename>
-   pair.  This needs better error-handling.  */
+/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90.  */
+/* App, the assembler pre-processor.  This pre-processor strips out
+   excess spaces, turns single-quoted characters into a decimal
+   constant, and turns the # in # <number> <filename> <garbage> into a
+   .linefile.  This needs better error-handling.  */
  
  
-#include <stdio.h>
-#include "as.h"                        /* For BAD_CASE() only */
+#include "as.h"
  
  #if (__STDC__ != 1)
  #ifndef const
  
  #if (__STDC__ != 1)
  #ifndef const
@@ -34,18 +34,22 @@
  #endif
  #endif
  
  #endif
  #endif
  
+#ifdef H_TICK_HEX
+int enable_h_tick_hex = 0;
+#endif
+
  #ifdef TC_M68K
  /* Whether we are scrubbing in m68k MRI mode.  This is different from
     flag_m68k_mri, because the two flags will be affected by the .mri
     pseudo-op at different times.  */
  static int scrub_m68k_mri;
  #ifdef TC_M68K
  /* Whether we are scrubbing in m68k MRI mode.  This is different from
     flag_m68k_mri, because the two flags will be affected by the .mri
     pseudo-op at different times.  */
  static int scrub_m68k_mri;
-#else
-#define scrub_m68k_mri 0
-#endif
  
  /* The pseudo-op which switches in and out of MRI mode.  See the
     comment in do_scrub_chars.  */
  static const char mri_pseudo[] = ".mri 0";
  
  /* The pseudo-op which switches in and out of MRI mode.  See the
     comment in do_scrub_chars.  */
  static const char mri_pseudo[] = ".mri 0";
+#else
+#define scrub_m68k_mri 0
+#endif
  
  #if defined TC_ARM && defined OBJ_ELF
  /* The pseudo-op for which we need to special-case `@' characters.
  
  #if defined TC_ARM && defined OBJ_ELF
  /* The pseudo-op for which we need to special-case `@' characters.
@@ -77,22 +81,26 @@ static const char symbol_chars[] =
  #ifdef DOUBLEBAR_PARALLEL
  #define LEX_IS_DOUBLEBAR_1ST           13
  #endif
  #ifdef DOUBLEBAR_PARALLEL
  #define LEX_IS_DOUBLEBAR_1ST           13
  #endif
+#define LEX_IS_PARALLEL_SEPARATOR      14
+#ifdef H_TICK_HEX
+#define LEX_IS_H                       15
+#endif
  #define IS_SYMBOL_COMPONENT(c)         (lex[c] == LEX_IS_SYMBOL_COMPONENT)
  #define IS_WHITESPACE(c)               (lex[c] == LEX_IS_WHITESPACE)
  #define IS_LINE_SEPARATOR(c)           (lex[c] == LEX_IS_LINE_SEPARATOR)
  #define IS_SYMBOL_COMPONENT(c)         (lex[c] == LEX_IS_SYMBOL_COMPONENT)
  #define IS_WHITESPACE(c)               (lex[c] == LEX_IS_WHITESPACE)
  #define IS_LINE_SEPARATOR(c)           (lex[c] == LEX_IS_LINE_SEPARATOR)
+#define IS_PARALLEL_SEPARATOR(c)       (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
  #define IS_COMMENT(c)                  (lex[c] == LEX_IS_COMMENT_START)
  #define IS_LINE_COMMENT(c)             (lex[c] == LEX_IS_LINE_COMMENT_START)
  #define        IS_NEWLINE(c)                   (lex[c] == LEX_IS_NEWLINE)
  
  #define IS_COMMENT(c)                  (lex[c] == LEX_IS_COMMENT_START)
  #define IS_LINE_COMMENT(c)             (lex[c] == LEX_IS_LINE_COMMENT_START)
  #define        IS_NEWLINE(c)                   (lex[c] == LEX_IS_NEWLINE)
  
-static int process_escape PARAMS ((int));
+static int process_escape (int);
  
  /* FIXME-soon: The entire lexer/parser thingy should be
     built statically at compile time rather than dynamically
     each and every time the assembler is run.  xoxorich.  */
  
  void
  
  /* FIXME-soon: The entire lexer/parser thingy should be
     built statically at compile time rather than dynamically
     each and every time the assembler is run.  xoxorich.  */
  
  void
-do_scrub_begin (m68k_mri)
-     int m68k_mri ATTRIBUTE_UNUSED;
+do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
  {
    const char *p;
    int c;
  {
    const char *p;
    int c;
@@ -112,7 +120,7 @@ do_scrub_begin (m68k_mri)
        lex['"'] = LEX_IS_STRINGQUOTE;
  
  #if ! defined (TC_HPPA) && ! defined (TC_I370)
        lex['"'] = LEX_IS_STRINGQUOTE;
  
  #if ! defined (TC_HPPA) && ! defined (TC_I370)
-      /* I370 uses single-quotes to delimit integer, float constants */
+      /* I370 uses single-quotes to delimit integer, float constants.  */
        lex['\''] = LEX_IS_ONECHAR_QUOTE;
  #endif
  
        lex['\''] = LEX_IS_ONECHAR_QUOTE;
  #endif
  
@@ -127,9 +135,7 @@ do_scrub_begin (m68k_mri)
    /* Note that these override the previous defaults, e.g. if ';' is a
       comment char, then it isn't a line separator.  */
    for (p = symbol_chars; *p; ++p)
    /* Note that these override the previous defaults, e.g. if ';' is a
       comment char, then it isn't a line separator.  */
    for (p = symbol_chars; *p; ++p)
-    {
-      lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
-    }                          /* declare symbol characters */
+    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
  
    for (c = 128; c < 256; ++c)
      lex[c] = LEX_IS_SYMBOL_COMPONENT;
  
    for (c = 128; c < 256; ++c)
      lex[c] = LEX_IS_SYMBOL_COMPONENT;
@@ -149,26 +155,25 @@ do_scrub_begin (m68k_mri)
  #define tc_comment_chars comment_chars
  #endif
    for (p = tc_comment_chars; *p; p++)
  #define tc_comment_chars comment_chars
  #endif
    for (p = tc_comment_chars; *p; p++)
-    {
-      lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
-    }                          /* declare comment chars */
+    lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
  
    for (p = line_comment_chars; *p; p++)
  
    for (p = line_comment_chars; *p; p++)
-    {
-      lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
-    }                          /* declare line comment chars */
+    lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
  
    for (p = line_separator_chars; *p; p++)
  
    for (p = line_separator_chars; *p; p++)
-    {
-      lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
-    }                          /* declare line separators */
+    lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
+
+#ifdef tc_parallel_separator_chars
+  /* This macro permits the processor to specify all characters which
+     separate parallel insns on the same line.  */
+  for (p = tc_parallel_separator_chars; *p; p++)
+    lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
+#endif
  
    /* Only allow slash-star comments if slash is not in use.
       FIXME: This isn't right.  We should always permit them.  */
    if (lex['/'] == 0)
  
    /* Only allow slash-star comments if slash is not in use.
       FIXME: This isn't right.  We should always permit them.  */
    if (lex['/'] == 0)
-    {
-      lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
-    }
+    lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
  
  #ifdef TC_M68K
    if (m68k_mri)
  
  #ifdef TC_M68K
    if (m68k_mri)
@@ -177,7 +182,7 @@ do_scrub_begin (m68k_mri)
        lex[';'] = LEX_IS_COMMENT_START;
        lex['*'] = LEX_IS_LINE_COMMENT_START;
        /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
        lex[';'] = LEX_IS_COMMENT_START;
        lex['*'] = LEX_IS_LINE_COMMENT_START;
        /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
-         then it can't be used in an expression.  */
+        then it can't be used in an expression.  */
        lex['!'] = LEX_IS_LINE_COMMENT_START;
      }
  #endif
        lex['!'] = LEX_IS_LINE_COMMENT_START;
      }
  #endif
@@ -189,19 +194,27 @@ do_scrub_begin (m68k_mri)
    lex['|'] = LEX_IS_DOUBLEBAR_1ST;
  #endif
  #ifdef TC_D30V
    lex['|'] = LEX_IS_DOUBLEBAR_1ST;
  #endif
  #ifdef TC_D30V
-  /* must do this is we want VLIW instruction with "->" or "<-" */
+  /* Must do this is we want VLIW instruction with "->" or "<-".  */
    lex['-'] = LEX_IS_SYMBOL_COMPONENT;
  #endif
    lex['-'] = LEX_IS_SYMBOL_COMPONENT;
  #endif
-}                              /* do_scrub_begin() */
  
  
-/* Saved state of the scrubber */
+#ifdef H_TICK_HEX
+  if (enable_h_tick_hex)
+    {
+      lex['h'] = LEX_IS_H;
+      lex['H'] = LEX_IS_H;
+    }
+#endif
+}
+
+/* Saved state of the scrubber.  */
  static int state;
  static int old_state;
  static char *out_string;
  static char out_buf[20];
  static int add_newlines;
  static char *saved_input;
  static int state;
  static int old_state;
  static char *out_string;
  static char out_buf[20];
  static int add_newlines;
  static char *saved_input;
-static int saved_input_len;
+static size_t saved_input_len;
  static char input_buffer[32 * 1024];
  static const char *mri_state;
  static char mri_last_ch;
  static char input_buffer[32 * 1024];
  static const char *mri_state;
  static char mri_last_ch;
@@ -211,14 +224,15 @@ static char mri_last_ch;
     state at the time .include is interpreted is completely unrelated.
     That's why we have to save it all.  */
  
     state at the time .include is interpreted is completely unrelated.
     That's why we have to save it all.  */
  
-struct app_save {
+struct app_save
+{
    int          state;
    int          old_state;
    char *       out_string;
    char         out_buf[sizeof (out_buf)];
    int          add_newlines;
    char *       saved_input;
    int          state;
    int          old_state;
    char *       out_string;
    char         out_buf[sizeof (out_buf)];
    int          add_newlines;
    char *       saved_input;
-  int          saved_input_len;
+  size_t       saved_input_len;
  #ifdef TC_M68K
    int          scrub_m68k_mri;
  #endif
  #ifdef TC_M68K
    int          scrub_m68k_mri;
  #endif
@@ -230,7 +244,7 @@ struct app_save {
  };
  
  char *
  };
  
  char *
-app_push ()
+app_push (void)
  {
    register struct app_save *saved;
  
  {
    register struct app_save *saved;
  
@@ -244,7 +258,7 @@ app_push ()
      saved->saved_input = NULL;
    else
      {
      saved->saved_input = NULL;
    else
      {
-      saved->saved_input = xmalloc (saved_input_len);
+      saved->saved_input = (char *) xmalloc (saved_input_len);
        memcpy (saved->saved_input, saved_input, saved_input_len);
        saved->saved_input_len = saved_input_len;
      }
        memcpy (saved->saved_input, saved_input, saved_input_len);
        saved->saved_input_len = saved_input_len;
      }
@@ -261,13 +275,13 @@ app_push ()
  
    state = 0;
    saved_input = NULL;
  
    state = 0;
    saved_input = NULL;
+  add_newlines = 0;
  
    return (char *) saved;
  }
  
  void
  
    return (char *) saved;
  }
  
  void
-app_pop (arg)
-     char *arg;
+app_pop (char *arg)
  {
    register struct app_save *saved = (struct app_save *) arg;
  
  {
    register struct app_save *saved = (struct app_save *) arg;
  
@@ -281,7 +295,7 @@ app_pop (arg)
      saved_input = NULL;
    else
      {
      saved_input = NULL;
    else
      {
-      assert (saved->saved_input_len <= (int) (sizeof input_buffer));
+      gas_assert (saved->saved_input_len <= sizeof (input_buffer));
        memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
        saved_input = input_buffer;
        saved_input_len = saved->saved_input_len;
        memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
        saved_input = input_buffer;
        saved_input_len = saved->saved_input_len;
@@ -297,13 +311,13 @@ app_pop (arg)
  #endif
  
    free (arg);
  #endif
  
    free (arg);
-}                              /* app_pop() */
+}
  
  /* @@ This assumes that \n &c are the same on host and target.  This is not
     necessarily true.  */
  
  /* @@ This assumes that \n &c are the same on host and target.  This is not
     necessarily true.  */
+
  static int
  static int
-process_escape (ch)
-     int ch;
+process_escape (int ch)
  {
    switch (ch)
      {
  {
    switch (ch)
      {
@@ -337,45 +351,56 @@ process_escape (ch)
     machine, and saves its state so that it may return at any point.
     This is the way the old code used to work.  */
  
     machine, and saves its state so that it may return at any point.
     This is the way the old code used to work.  */
  
-int
-do_scrub_chars (get, tostart, tolen)
-     int (*get) PARAMS ((char *, int));
-     char *tostart;
-     int tolen;
+size_t
+do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
  {
    char *to = tostart;
    char *toend = tostart + tolen;
    char *from;
    char *fromend;
  {
    char *to = tostart;
    char *toend = tostart + tolen;
    char *from;
    char *fromend;
-  int fromlen;
+  size_t fromlen;
    register int ch, ch2 = 0;
    register int ch, ch2 = 0;
+  /* Character that started the string we're working on.  */
+  static char quotechar;
  
    /*State 0: beginning of normal line
           1: After first whitespace on line (flush more white)
           2: After first non-white (opcode) on line (keep 1white)
           3: after second white on line (into operands) (flush white)
  
    /*State 0: beginning of normal line
           1: After first whitespace on line (flush more white)
           2: After first non-white (opcode) on line (keep 1white)
           3: after second white on line (into operands) (flush white)
-         4: after putting out a .line, put out digits
+         4: after putting out a .linefile, put out digits
           5: parsing a string, then go to old-state
           6: putting out \ escape in a "d string.
           5: parsing a string, then go to old-state
           6: putting out \ escape in a "d string.
-         7: After putting out a .appfile, put out string.
-         8: After putting out a .appfile string, flush until newline.
+         7: no longer used
+         8: no longer used
           9: After seeing symbol char in state 3 (keep 1white after symchar)
          10: After seeing whitespace in state 9 (keep white before symchar)
          11: After seeing a symbol character in state 0 (eg a label definition)
          -1: output string in out_string and go to the state in old_state
          -2: flush text until a '*' '/' is seen, then go to state old_state
  #ifdef TC_V850
           9: After seeing symbol char in state 3 (keep 1white after symchar)
          10: After seeing whitespace in state 9 (keep white before symchar)
          11: After seeing a symbol character in state 0 (eg a label definition)
          -1: output string in out_string and go to the state in old_state
          -2: flush text until a '*' '/' is seen, then go to state old_state
  #ifdef TC_V850
-         12: After seeing a dash, looking for a second dash as a start of comment.
+        12: After seeing a dash, looking for a second dash as a start
+            of comment.
  #endif
  #ifdef DOUBLEBAR_PARALLEL
  #endif
  #ifdef DOUBLEBAR_PARALLEL
-        13: After seeing a vertical bar, looking for a second vertical bar as a parallel expression seperator.
+        13: After seeing a vertical bar, looking for a second
+            vertical bar as a parallel expression separator.
+#endif
+#ifdef TC_PREDICATE_START_CHAR
+        14: After seeing a predicate start character at state 0, looking
+            for a predicate end character as predicate.
+        15: After seeing a predicate start character at state 1, looking
+            for a predicate end character as predicate.
+#endif
+#ifdef TC_Z80
+        16: After seeing an 'a' or an 'A' at the start of a symbol
+        17: After seeing an 'f' or an 'F' in state 16
  #endif
           */
  
    /* I added states 9 and 10 because the MIPS ECOFF assembler uses
       constructs like ``.loc 1 20''.  This was turning into ``.loc
       120''.  States 9 and 10 ensure that a space is never dropped in
  #endif
           */
  
    /* I added states 9 and 10 because the MIPS ECOFF assembler uses
       constructs like ``.loc 1 20''.  This was turning into ``.loc
       120''.  States 9 and 10 ensure that a space is never dropped in
-     between characters which could appear in a identifier.  Ian
+     between characters which could appear in an identifier.  Ian
       Taylor, ian@cygnus.com.
  
       I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
       Taylor, ian@cygnus.com.
  
       I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
@@ -412,13 +437,13 @@ do_scrub_chars (get, tostart, tolen)
       I don't want to make such a significant change to the assembler's
       memory usage.  */
  
       I don't want to make such a significant change to the assembler's
       memory usage.  */
  
-#define PUT(pch)                       \
-  do                                   \
-    {                                  \
-      *to++ = (pch);                   \
-      if (to >= toend)                 \
-        goto tofull;                   \
-    }                                  \
+#define PUT(pch)                               \
+  do                                           \
+    {                                          \
+      *to++ = (pch);                           \
+      if (to >= toend)                         \
+       goto tofull;                            \
+    }                                          \
    while (0)
  
    if (saved_input != NULL)
    while (0)
  
    if (saved_input != NULL)
@@ -438,8 +463,8 @@ do_scrub_chars (get, tostart, tolen)
    while (1)
      {
        /* The cases in this switch end with continue, in order to
    while (1)
      {
        /* The cases in this switch end with continue, in order to
-         branch back to the top of this while loop and generate the
-         next output character in the appropriate state.  */
+        branch back to the top of this while loop and generate the
+        next output character in the appropriate state.  */
        switch (state)
         {
         case -1:
        switch (state)
         {
         case -1:
@@ -501,14 +526,10 @@ do_scrub_chars (get, tostart, tolen)
                 ch = GET ();
               if (ch == '"')
                 {
                 ch = GET ();
               if (ch == '"')
                 {
-                 UNGET (ch);
-                 if (scrub_m68k_mri)
-                   out_string = "\n\tappfile ";
-                 else
-                   out_string = "\n\t.appfile ";
-                 old_state = 7;
-                 state = -1;
-                 PUT (*out_string++);
+                 quotechar = ch;
+                 state = 5;
+                 old_state = 3;
+                 PUT (ch);
                 }
               else
                 {
                 }
               else
                 {
@@ -522,21 +543,18 @@ do_scrub_chars (get, tostart, tolen)
  
         case 5:
           /* We are going to copy everything up to a quote character,
  
         case 5:
           /* We are going to copy everything up to a quote character,
-             with special handling for a backslash.  We try to
-             optimize the copying in the simple case without using the
-             GET and PUT macros.  */
+            with special handling for a backslash.  We try to
+            optimize the copying in the simple case without using the
+            GET and PUT macros.  */
           {
             char *s;
           {
             char *s;
-           int len;
+           ptrdiff_t len;
  
             for (s = from; s < fromend; s++)
               {
                 ch = *s;
  
             for (s = from; s < fromend; s++)
               {
                 ch = *s;
-               /* This condition must be changed if the type of any
-                   other character can be LEX_IS_STRINGQUOTE.  */
                 if (ch == '\\'
                 if (ch == '\\'
-                   || ch == '"'
-                   || ch == '\''
+                   || ch == quotechar
                     || ch == '\n')
                   break;
               }
                     || ch == '\n')
                   break;
               }
@@ -548,18 +566,26 @@ do_scrub_chars (get, tostart, tolen)
                 memcpy (to, from, len);
                 to += len;
                 from += len;
                 memcpy (to, from, len);
                 to += len;
                 from += len;
+               if (to >= toend)
+                 goto tofull;
               }
           }
  
           ch = GET ();
           if (ch == EOF)
             {
               }
           }
  
           ch = GET ();
           if (ch == EOF)
             {
-             as_warn (_("end of file in string: inserted '\"'"));
+             /* This buffer is here specifically so
+                that the UNGET below will work.  */
+             static char one_char_buf[1];
+
+             as_warn (_("end of file in string; '%c' inserted"), quotechar);
               state = old_state;
               state = old_state;
+             from = fromend = one_char_buf + 1;
+             fromlen = 1;
               UNGET ('\n');
               UNGET ('\n');
-             PUT ('"');
+             PUT (quotechar);
             }
             }
-         else if (lex[ch] == LEX_IS_STRINGQUOTE)
+         else if (ch == quotechar)
             {
               state = old_state;
               PUT (ch);
             {
               state = old_state;
               PUT (ch);
@@ -574,8 +600,7 @@ do_scrub_chars (get, tostart, tolen)
           else if (scrub_m68k_mri && ch == '\n')
             {
               /* Just quietly terminate the string.  This permits lines like
           else if (scrub_m68k_mri && ch == '\n')
             {
               /* Just quietly terminate the string.  This permits lines like
-                  bne  label   loop if we haven't reach end yet
-                */
+                  bne  label   loop if we haven't reach end yet.  */
               state = old_state;
               UNGET (ch);
               PUT ('\'');
               state = old_state;
               UNGET (ch);
               PUT ('\'');
@@ -599,6 +624,11 @@ do_scrub_chars (get, tostart, tolen)
               PUT ('\\');
               continue;
  
               PUT ('\\');
               continue;
  
+           case EOF:
+             as_warn (_("end of file in string; '%c' inserted"), quotechar);
+             PUT (quotechar);
+             continue;
+
             case '"':
             case '\\':
             case 'b':
             case '"':
             case '\\':
             case 'b':
@@ -618,49 +648,94 @@ do_scrub_chars (get, tostart, tolen)
             case '6':
             case '7':
               break;
             case '6':
             case '7':
               break;
-#if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
-           default:
-             as_warn (_("Unknown escape '\\%c' in string: Ignored"), ch);
-             break;
-#else  /* ONLY_STANDARD_ESCAPES */
+
             default:
             default:
-             /* Accept \x as x for any x */
+#ifdef ONLY_STANDARD_ESCAPES
+             as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
+#endif
               break;
               break;
-#endif /* ONLY_STANDARD_ESCAPES */
-
-           case EOF:
-             as_warn (_("End of file in string: '\"' inserted"));
-             PUT ('"');
-             continue;
             }
           PUT (ch);
           continue;
  
             }
           PUT (ch);
           continue;
  
-       case 7:
+#ifdef DOUBLEBAR_PARALLEL
+       case 13:
           ch = GET ();
           ch = GET ();
-         state = 5;
-         old_state = 8;
-         if (ch == EOF)
-           goto fromeof;
-         PUT (ch);
-         continue;
+         if (ch != '|')
+           abort ();
  
  
-       case 8:
-         do
-           ch = GET ();
-         while (ch != '\n' && ch != EOF);
+         /* Reset back to state 1 and pretend that we are parsing a
+            line from just after the first white space.  */
+         state = 1;
+         PUT ('|');
+#ifdef TC_TIC6X
+         /* "||^" is used for SPMASKed instructions.  */
+         ch = GET ();
           if (ch == EOF)
             goto fromeof;
           if (ch == EOF)
             goto fromeof;
-         state = 0;
-         PUT (ch);
+         else if (ch == '^')
+           PUT ('^');
+         else
+           UNGET (ch);
+#endif
           continue;
           continue;
+#endif
+#ifdef TC_Z80
+       case 16:
+         /* We have seen an 'a' at the start of a symbol, look for an 'f'.  */
+         ch = GET ();
+         if (ch == 'f' || ch == 'F')
+           {
+             state = 17;
+             PUT (ch);
+           }
+         else
+           {
+             state = 9;
+             break;
+           }
+       case 17:
+         /* We have seen "af" at the start of a symbol,
+            a ' here is a part of that symbol.  */
+         ch = GET ();
+         state = 9;
+         if (ch == '\'')
+           /* Change to avoid warning about unclosed string.  */
+           PUT ('`');
+         else if (ch != EOF)
+           UNGET (ch);
+         break;
+#endif
         }
  
         }
  
-      /* OK, we are somewhere in states 0 through 4 or 9 through 11 */
+      /* OK, we are somewhere in states 0 through 4 or 9 through 11.  */
  
        /* flushchar: */
        ch = GET ();
  
  
        /* flushchar: */
        ch = GET ();
  
+#ifdef TC_PREDICATE_START_CHAR
+      if (ch == TC_PREDICATE_START_CHAR && (state == 0 || state == 1))
+       {
+         state += 14;
+         PUT (ch);
+         continue;
+       }
+      else if (state == 14 || state == 15)
+       {
+         if (ch == TC_PREDICATE_END_CHAR)
+           {
+             state -= 14;
+             PUT (ch);
+             ch = GET ();
+           }
+         else
+           {
+             PUT (ch);
+             continue;
+           }
+       }
+#endif
+
      recycle:
  
  #if defined TC_ARM && defined OBJ_ELF
      recycle:
  
  #if defined TC_ARM && defined OBJ_ELF
@@ -694,9 +769,9 @@ do_scrub_chars (get, tostart, tolen)
  
  #ifdef TC_M68K
        /* We want to have pseudo-ops which control whether we are in
  
  #ifdef TC_M68K
        /* We want to have pseudo-ops which control whether we are in
-         MRI mode or not.  Unfortunately, since m68k MRI mode affects
-         the scrubber, that means that we need a special purpose
-         recognizer here.  */
+        MRI mode or not.  Unfortunately, since m68k MRI mode affects
+        the scrubber, that means that we need a special purpose
+        recognizer here.  */
        if (mri_state == NULL)
         {
           if ((state == 0 || state == 1)
        if (mri_state == NULL)
         {
           if ((state == 0 || state == 1)
@@ -733,14 +808,14 @@ do_scrub_chars (get, tostart, tolen)
           else
             {
               /* We've read the entire pseudo-op.  mips_last_ch is
           else
             {
               /* We've read the entire pseudo-op.  mips_last_ch is
-                 either '0' or '1' indicating whether to enter or
-                 leave MRI mode.  */
+                either '0' or '1' indicating whether to enter or
+                leave MRI mode.  */
               do_scrub_begin (mri_last_ch == '1');
               mri_state = NULL;
  
               /* We continue handling the character as usual.  The
               do_scrub_begin (mri_last_ch == '1');
               mri_state = NULL;
  
               /* We continue handling the character as usual.  The
-                 main gas reader must also handle the .mri pseudo-op
-                 to control expression parsing and the like.  */
+                main gas reader must also handle the .mri pseudo-op
+                to control expression parsing and the like.  */
             }
         }
  #endif
             }
         }
  #endif
@@ -781,9 +856,10 @@ do_scrub_chars (get, tostart, tolen)
           if (lex[ch] == LEX_IS_COLON)
             {
               /* Only keep this white if there's no white *after* the
           if (lex[ch] == LEX_IS_COLON)
             {
               /* Only keep this white if there's no white *after* the
-                 colon.  */
+                colon.  */
               ch2 = GET ();
               ch2 = GET ();
-             UNGET (ch2);
+             if (ch2 != EOF)
+               UNGET (ch2);
               if (!IS_WHITESPACE (ch2))
                 {
                   state = 9;
               if (!IS_WHITESPACE (ch2))
                 {
                   state = 9;
@@ -795,7 +871,8 @@ do_scrub_chars (get, tostart, tolen)
  #endif
           if (IS_COMMENT (ch)
               || ch == '/'
  #endif
           if (IS_COMMENT (ch)
               || ch == '/'
-             || IS_LINE_SEPARATOR (ch))
+             || IS_LINE_SEPARATOR (ch)
+             || IS_PARALLEL_SEPARATOR (ch))
             {
               if (scrub_m68k_mri)
                 {
             {
               if (scrub_m68k_mri)
                 {
@@ -823,9 +900,6 @@ do_scrub_chars (get, tostart, tolen)
  
           switch (state)
             {
  
           switch (state)
             {
-           case 0:
-             state++;
-             goto recycle;     /* Punted leading sp */
             case 1:
               /* We can arrive here if we leave a leading whitespace
                  character at the beginning of a line.  */
             case 1:
               /* We can arrive here if we leave a leading whitespace
                  character at the beginning of a line.  */
@@ -842,7 +916,11 @@ do_scrub_chars (get, tostart, tolen)
               PUT (' ');
               break;
             case 3:
               PUT (' ');
               break;
             case 3:
+#ifndef TC_KEEP_OPERAND_SPACES
+             /* For TI C6X, we keep these spaces as they may separate
+                functional unit specifiers from operands.  */
               if (scrub_m68k_mri)
               if (scrub_m68k_mri)
+#endif
                 {
                   /* In MRI mode, we keep these spaces.  */
                   UNGET (ch);
                 {
                   /* In MRI mode, we keep these spaces.  */
                   UNGET (ch);
@@ -852,7 +930,9 @@ do_scrub_chars (get, tostart, tolen)
               goto recycle;     /* Sp in operands */
             case 9:
             case 10:
               goto recycle;     /* Sp in operands */
             case 9:
             case 10:
+#ifndef TC_KEEP_OPERAND_SPACES
               if (scrub_m68k_mri)
               if (scrub_m68k_mri)
+#endif
                 {
                   /* In MRI mode, we keep these spaces.  */
                   state = 3;
                 {
                   /* In MRI mode, we keep these spaces.  */
                   state = 3;
@@ -868,9 +948,9 @@ do_scrub_chars (get, tostart, tolen)
               else
                 {
                   /* We know that ch is not ':', since we tested that
               else
                 {
                   /* We know that ch is not ':', since we tested that
-                     case above.  Therefore this is not a label, so it
-                     must be the opcode, and we've just seen the
-                     whitespace after it.  */
+                    case above.  Therefore this is not a label, so it
+                    must be the opcode, and we've just seen the
+                    whitespace after it.  */
                   state = 3;
                 }
               UNGET (ch);
                   state = 3;
                 }
               UNGET (ch);
@@ -902,7 +982,7 @@ do_scrub_chars (get, tostart, tolen)
                     break;
  
                   /* This UNGET will ensure that we count newlines
                     break;
  
                   /* This UNGET will ensure that we count newlines
-                     correctly.  */
+                    correctly.  */
                   UNGET (ch2);
                 }
  
                   UNGET (ch2);
                 }
  
@@ -938,15 +1018,16 @@ do_scrub_chars (get, tostart, tolen)
           break;
  
         case LEX_IS_STRINGQUOTE:
           break;
  
         case LEX_IS_STRINGQUOTE:
+         quotechar = ch;
           if (state == 10)
             {
           if (state == 10)
             {
-             /* Preserve the whitespace in foo "bar" */
+             /* Preserve the whitespace in foo "bar".  */
               UNGET (ch);
               state = 3;
               PUT (' ');
  
               /* PUT didn't jump out.  We could just break, but we
               UNGET (ch);
               state = 3;
               PUT (' ');
  
               /* PUT didn't jump out.  We could just break, but we
-                 know what will happen, so optimize a bit.  */
+                know what will happen, so optimize a bit.  */
               ch = GET ();
               old_state = 3;
             }
               ch = GET ();
               old_state = 3;
             }
@@ -960,9 +1041,19 @@ do_scrub_chars (get, tostart, tolen)
  
  #ifndef IEEE_STYLE
         case LEX_IS_ONECHAR_QUOTE:
  
  #ifndef IEEE_STYLE
         case LEX_IS_ONECHAR_QUOTE:
+#ifdef H_TICK_HEX
+         if (state == 9 && enable_h_tick_hex)
+           {
+             char c;
+
+             c = GET ();
+             as_warn ("'%c found after symbol", c);
+             UNGET (c);
+           }
+#endif
           if (state == 10)
             {
           if (state == 10)
             {
-             /* Preserve the whitespace in foo 'b' */
+             /* Preserve the whitespace in foo 'b'.  */
               UNGET (ch);
               state = 3;
               PUT (' ');
               UNGET (ch);
               state = 3;
               PUT (' ');
@@ -991,7 +1082,7 @@ do_scrub_chars (get, tostart, tolen)
           if ((ch = GET ()) != '\'')
             {
  #ifdef REQUIRE_CHAR_CLOSE_QUOTE
           if ((ch = GET ()) != '\'')
             {
  #ifdef REQUIRE_CHAR_CLOSE_QUOTE
-             as_warn (_("Missing close quote: (assumed)"));
+             as_warn (_("missing close quote; (assumed)"));
  #else
               if (ch != EOF)
                 UNGET (ch);
  #else
               if (ch != EOF)
                 UNGET (ch);
@@ -1038,12 +1129,18 @@ do_scrub_chars (get, tostart, tolen)
           PUT (ch);
           break;
  
           PUT (ch);
           break;
  
+       case LEX_IS_PARALLEL_SEPARATOR:
+         state = 1;
+         PUT (ch);
+         break;
+
  #ifdef TC_V850
         case LEX_IS_DOUBLEDASH_1ST:
           ch2 = GET ();
           if (ch2 != '-')
             {
  #ifdef TC_V850
         case LEX_IS_DOUBLEDASH_1ST:
           ch2 = GET ();
           if (ch2 != '-')
             {
-             UNGET (ch2);
+             if (ch2 != EOF)
+               UNGET (ch2);
               goto de_fault;
             }
           /* Read and skip to end of line.  */
               goto de_fault;
             }
           /* Read and skip to end of line.  */
@@ -1052,10 +1149,10 @@ do_scrub_chars (get, tostart, tolen)
               ch = GET ();
             }
           while (ch != EOF && ch != '\n');
               ch = GET ();
             }
           while (ch != EOF && ch != '\n');
+
           if (ch == EOF)
           if (ch == EOF)
-           {
-             as_warn (_("end of file in comment; newline inserted"));
-           }
+           as_warn (_("end of file in comment; newline inserted"));
+
           state = 0;
           PUT ('\n');
           break;
           state = 0;
           PUT ('\n');
           break;
@@ -1063,15 +1160,15 @@ do_scrub_chars (get, tostart, tolen)
  #ifdef DOUBLEBAR_PARALLEL
         case LEX_IS_DOUBLEBAR_1ST:
           ch2 = GET ();
  #ifdef DOUBLEBAR_PARALLEL
         case LEX_IS_DOUBLEBAR_1ST:
           ch2 = GET ();
+         if (ch2 != EOF)
+           UNGET (ch2);
           if (ch2 != '|')
           if (ch2 != '|')
-           {
-             UNGET (ch2);
-             goto de_fault;
-           }
-         /* Reset back to state 1 and pretend that we are parsing a line from
-            just after the first white space.  */
-         state = 1;
-         PUT ('|');
+           goto de_fault;
+
+         /* Handle '||' in two states as invoking PUT twice might
+            result in the first one jumping out of this loop.  We'd
+            then lose track of the state and one '|' char.  */
+         state = 13;
           PUT ('|');
           break;
  #endif
           PUT ('|');
           break;
  #endif
@@ -1093,7 +1190,7 @@ do_scrub_chars (get, tostart, tolen)
                 {
                   UNGET (ch2);
                 }
                 {
                   UNGET (ch2);
                 }
-           } /* bad hack */
+           }
  
           if (state == 0 || state == 1) /* Only comment at start of line.  */
             {
  
           if (state == 0 || state == 1) /* Only comment at start of line.  */
             {
@@ -1106,19 +1203,21 @@ do_scrub_chars (get, tostart, tolen)
                   ch = GET ();
                 }
               while (ch != EOF && IS_WHITESPACE (ch));
                   ch = GET ();
                 }
               while (ch != EOF && IS_WHITESPACE (ch));
+
               if (ch == EOF)
                 {
                   as_warn (_("end of file in comment; newline inserted"));
                   PUT ('\n');
                   break;
                 }
               if (ch == EOF)
                 {
                   as_warn (_("end of file in comment; newline inserted"));
                   PUT ('\n');
                   break;
                 }
+
               if (ch < '0' || ch > '9' || state != 0 || startch != '#')
                 {
                   /* Not a cpp line.  */
                   while (ch != EOF && !IS_NEWLINE (ch))
                     ch = GET ();
                   if (ch == EOF)
               if (ch < '0' || ch > '9' || state != 0 || startch != '#')
                 {
                   /* Not a cpp line.  */
                   while (ch != EOF && !IS_NEWLINE (ch))
                     ch = GET ();
                   if (ch == EOF)
-                   as_warn (_("EOF in Comment: Newline inserted"));
+                   as_warn (_("end of file in comment; newline inserted"));
                   state = 0;
                   PUT ('\n');
                   break;
                   state = 0;
                   PUT ('\n');
                   break;
@@ -1128,9 +1227,9 @@ do_scrub_chars (get, tostart, tolen)
               old_state = 4;
               state = -1;
               if (scrub_m68k_mri)
               old_state = 4;
               state = -1;
               if (scrub_m68k_mri)
-               out_string = "\tappline ";
+               out_string = "\tlinefile ";
               else
               else
-               out_string = "\t.appline ";
+               out_string = "\t.linefile ";
               PUT (*out_string++);
               break;
             }
               PUT (*out_string++);
               break;
             }
@@ -1173,6 +1272,15 @@ do_scrub_chars (get, tostart, tolen)
           if ((symver_state != NULL) && (*symver_state == 0))
             goto de_fault;
  #endif
           if ((symver_state != NULL) && (*symver_state == 0))
             goto de_fault;
  #endif
+
+#ifdef TC_ARM
+         /* For the ARM, care is needed not to damage occurrences of \@
+            by stripping the @ onwards.  Yuck.  */
+         if (to > tostart && *(to - 1) == '\\')
+           /* Do not treat the @ as a start-of-comment.  */
+           goto de_fault;
+#endif
+
  #ifdef WARN_COMMENTS
           if (!found_comment)
             as_where (&found_comment_file, &found_comment);
  #ifdef WARN_COMMENTS
           if (!found_comment)
             as_where (&found_comment_file, &found_comment);
@@ -1188,6 +1296,26 @@ do_scrub_chars (get, tostart, tolen)
           PUT ('\n');
           break;
  
           PUT ('\n');
           break;
  
+#ifdef H_TICK_HEX
+       case LEX_IS_H:
+         /* Look for strings like H'[0-9A-Fa-f] and if found, replace
+            the H' with 0x to make them gas-style hex characters.  */
+         if (enable_h_tick_hex)
+           {
+             char quot;
+
+             quot = GET ();
+             if (quot == '\'')
+               {
+                 UNGET ('x');
+                 ch = '0';
+               }
+             else
+               UNGET (quot);
+           }
+         /* FALL THROUGH */
+#endif
+
         case LEX_IS_SYMBOL_COMPONENT:
           if (state == 10)
             {
         case LEX_IS_SYMBOL_COMPONENT:
           if (state == 10)
             {
@@ -1200,11 +1328,36 @@ do_scrub_chars (get, tostart, tolen)
               break;
             }
  
               break;
             }
  
+#ifdef TC_Z80
+         /* "af'" is a symbol containing '\''.  */
+         if (state == 3 && (ch == 'a' || ch == 'A'))
+           {
+             state = 16;
+             PUT (ch);
+             ch = GET ();
+             if (ch == 'f' || ch == 'F')
+               {
+                 state = 17;
+                 PUT (ch);
+                 break;
+               }
+             else
+               {
+                 state = 9;
+                 if (ch == EOF || !IS_SYMBOL_COMPONENT (ch))
+                   {
+                     if (ch != EOF)
+                       UNGET (ch);
+                     break;
+                   }
+               }
+           }
+#endif
           if (state == 3)
             state = 9;
  
           /* This is a common case.  Quickly copy CH and all the
           if (state == 3)
             state = 9;
  
           /* This is a common case.  Quickly copy CH and all the
-             following symbol component or normal characters.  */
+            following symbol component or normal characters.  */
           if (to + 1 < toend
               && mri_state == NULL
  #if defined TC_ARM && defined OBJ_ELF
           if (to + 1 < toend
               && mri_state == NULL
  #if defined TC_ARM && defined OBJ_ELF
@@ -1213,7 +1366,7 @@ do_scrub_chars (get, tostart, tolen)
               )
             {
               char *s;
               )
             {
               char *s;
-             int len;
+             ptrdiff_t len;
  
               for (s = from; s < fromend; s++)
                 {
  
               for (s = from; s < fromend; s++)
                 {
@@ -1225,38 +1378,25 @@ do_scrub_chars (get, tostart, tolen)
                       && type != LEX_IS_SYMBOL_COMPONENT)
                     break;
                 }
                       && type != LEX_IS_SYMBOL_COMPONENT)
                     break;
                 }
+
               if (s > from)
               if (s > from)
-               {
-                 /* Handle the last character normally, for
-                     simplicity.  */
-                 --s;
-               }
+               /* Handle the last character normally, for
+                  simplicity.  */
+               --s;
+
               len = s - from;
               len = s - from;
+
               if (len > (toend - to) - 1)
                 len = (toend - to) - 1;
               if (len > (toend - to) - 1)
                 len = (toend - to) - 1;
+
               if (len > 0)
                 {
                   PUT (ch);
               if (len > 0)
                 {
                   PUT (ch);
-                 if (len > 8)
-                   {
-                     memcpy (to, from, len);
-                     to += len;
-                     from += len;
-                   }
-                 else
-                   {
-                     switch (len)
-                       {
-                       case 8: *to++ = *from++;
-                       case 7: *to++ = *from++;
-                       case 6: *to++ = *from++;
-                       case 5: *to++ = *from++;
-                       case 4: *to++ = *from++;
-                       case 3: *to++ = *from++;
-                       case 2: *to++ = *from++;
-                       case 1: *to++ = *from++;
-                       }
-                   }
+                 memcpy (to, from, len);
+                 to += len;
+                 from += len;
+                 if (to >= toend)
+                   goto tofull;
                   ch = GET ();
                 }
             }
                   ch = GET ();
                 }
             }
@@ -1267,15 +1407,15 @@ do_scrub_chars (get, tostart, tolen)
           /* Some relatively `normal' character.  */
           if (state == 0)
             {
           /* Some relatively `normal' character.  */
           if (state == 0)
             {
-             state = 11;       /* Now seeing label definition */
+             state = 11;       /* Now seeing label definition.  */
             }
           else if (state == 1)
             {
             }
           else if (state == 1)
             {
-             state = 2;        /* Ditto */
+             state = 2;        /* Ditto.  */
             }
           else if (state == 9)
             {
             }
           else if (state == 9)
             {
-             if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
+             if (!IS_SYMBOL_COMPONENT (ch))
                 state = 3;
             }
           else if (state == 10)
                 state = 3;
             }
           else if (state == 10)
@@ -1294,7 +1434,15 @@ do_scrub_chars (get, tostart, tolen)
                      the space.  We don't have enough information to
                      make the right choice, so here we are making the
                      choice which is more likely to be correct.  */
                      the space.  We don't have enough information to
                      make the right choice, so here we are making the
                      choice which is more likely to be correct.  */
-                 PUT (' ');
+                 if (to + 1 >= toend)
+                   {
+                     /* If we're near the end of the buffer, save the
+                        character for the next time round.  Otherwise
+                        we'll lose our state.  */
+                     UNGET (ch);
+                     goto tofull;
+                   }
+                 *to++ = ' ';
                 }
  
               state = 3;
                 }
  
               state = 3;
@@ -1323,5 +1471,3 @@ do_scrub_chars (get, tostart, tolen)
  
    return to - tostart;
  }
  
    return to - tostart;
  }
-
-/* end of app.c */