* config/obj-evax.h (S_SET_OTHER, S_SET_TYPE, S_SET_DESC): Don't define.

[deliverable/binutils-gdb.git] / gas / config / tc-arm.c
diff --git a/gas/config/tc-arm.c b/gas/config/tc-arm.c

index e176adaafeccc2908d54993520ac874b625f4046..81378687b2aec21f5c75faa61e17ee391ce8d08d 100644 (file)
--- a/gas/config/tc-arm.c
+++ b/gas/config/tc-arm.c
@@ -1,6 +1,6 @@
  /* tc-arm.c -- Assemble for the ARM
     Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
-   2004, 2005, 2006, 2007, 2008, 2009
+   2004, 2005, 2006, 2007, 2008, 2009, 2010
     Free Software Foundation, Inc.
     Contributed by Richard Earnshaw (rwe@pegasus.esprit.ec.org)
         Modified by David Taylor (dtaylor@armltd.co.uk)
@@ -97,13 +97,12 @@ enum arm_float_abi
  
  /* Types of processor to assemble for. */
  #ifndef CPU_DEFAULT
-#if defined __XSCALE__
-#define CPU_DEFAULT    ARM_ARCH_XSCALE
-#else
-#if defined __thumb__
-#define CPU_DEFAULT    ARM_ARCH_V5T
-#endif
-#endif
+/* The code that was here used to select a default CPU depending on compiler
+   pre-defines which were only present when doing native builds, thus 
+   changing gas' default behaviour depending upon the build host.
+
+   If you have a target that requires a default CPU option then the you
+   should define CPU_DEFAULT here.  */
  #endif
  
  #ifndef FPU_DEFAULT
@@ -188,12 +187,14 @@ static const arm_feature_set arm_ext_v6k = ARM_FEATURE (ARM_EXT_V6K, 0);
  static const arm_feature_set arm_ext_v6z = ARM_FEATURE (ARM_EXT_V6Z, 0);
  static const arm_feature_set arm_ext_v6t2 = ARM_FEATURE (ARM_EXT_V6T2, 0);
  static const arm_feature_set arm_ext_v6_notm = ARM_FEATURE (ARM_EXT_V6_NOTM, 0);
+static const arm_feature_set arm_ext_v6_dsp = ARM_FEATURE (ARM_EXT_V6_DSP, 0);
  static const arm_feature_set arm_ext_barrier = ARM_FEATURE (ARM_EXT_BARRIER, 0);
  static const arm_feature_set arm_ext_msr = ARM_FEATURE (ARM_EXT_THUMB_MSR, 0);
  static const arm_feature_set arm_ext_div = ARM_FEATURE (ARM_EXT_DIV, 0);
  static const arm_feature_set arm_ext_v7 = ARM_FEATURE (ARM_EXT_V7, 0);
  static const arm_feature_set arm_ext_v7a = ARM_FEATURE (ARM_EXT_V7A, 0);
  static const arm_feature_set arm_ext_v7r = ARM_FEATURE (ARM_EXT_V7R, 0);
+static const arm_feature_set arm_ext_v7m = ARM_FEATURE (ARM_EXT_V7M, 0);
  static const arm_feature_set arm_ext_m =
    ARM_FEATURE (ARM_EXT_V6M | ARM_EXT_V7M, 0);
  
@@ -216,13 +217,16 @@ static const arm_feature_set fpu_vfp_ext_v1xd =
    ARM_FEATURE (0, FPU_VFP_EXT_V1xD);
  static const arm_feature_set fpu_vfp_ext_v1 = ARM_FEATURE (0, FPU_VFP_EXT_V1);
  static const arm_feature_set fpu_vfp_ext_v2 = ARM_FEATURE (0, FPU_VFP_EXT_V2);
+static const arm_feature_set fpu_vfp_ext_v3xd = ARM_FEATURE (0, FPU_VFP_EXT_V3xD);
  static const arm_feature_set fpu_vfp_ext_v3 = ARM_FEATURE (0, FPU_VFP_EXT_V3);
  static const arm_feature_set fpu_vfp_ext_d32 =
    ARM_FEATURE (0, FPU_VFP_EXT_D32);
  static const arm_feature_set fpu_neon_ext_v1 = ARM_FEATURE (0, FPU_NEON_EXT_V1);
  static const arm_feature_set fpu_vfp_v3_or_neon_ext =
    ARM_FEATURE (0, FPU_NEON_EXT_V1 | FPU_VFP_EXT_V3);
-static const arm_feature_set fpu_neon_fp16 = ARM_FEATURE (0, FPU_NEON_FP16);
+static const arm_feature_set fpu_vfp_fp16 = ARM_FEATURE (0, FPU_VFP_EXT_FP16);
+static const arm_feature_set fpu_neon_ext_fma = ARM_FEATURE (0, FPU_NEON_EXT_FMA);
+static const arm_feature_set fpu_vfp_ext_fma = ARM_FEATURE (0, FPU_VFP_EXT_FMA);
  
  static int mfloat_abi_opt = -1;
  /* Record user cpu selection for object attributes.  */
@@ -344,6 +348,9 @@ struct arm_it
       appropriate.  */
    int          uncond_value;
    struct neon_type vectype;
+  /* This does not indicate an actual NEON instruction, only that
+     the mnemonic accepts neon-style type suffixes.  */
+  int          is_neon;
    /* Set to the opcode if the instruction needs relaxation.
       Zero if the instruction is not relaxed.  */
    unsigned long        relax;
@@ -551,7 +558,7 @@ struct asm_opcode
    const char * template_name;
  
    /* Parameters to instruction.         */
-  unsigned char operands[8];
+  unsigned int operands[8];
  
    /* Conditional tag - see opcode_lookup.  */
    unsigned int tag : 4;
@@ -697,6 +704,10 @@ struct asm_opcode
  #define BAD_IT_COND    _("incorrect condition in IT block")
  #define BAD_IT_IT      _("IT falling in the range of a previous IT block")
  #define MISSING_FNSTART        _("missing .fnstart before unwinding directive")
+#define BAD_PC_ADDRESSING \
+       _("cannot use register index with PC-relative addressing")
+#define BAD_PC_WRITEBACK \
+       _("cannot use writeback with PC-relative addressing")
  
  static struct hash_control * arm_ops_hsh;
  static struct hash_control * arm_cond_hsh;
@@ -931,6 +942,8 @@ my_get_expression (expressionS * ep, char ** str, int prefix_mode)
        input_line_pointer = save_in;
        return 1;
      }
+#else
+  (void) seg;
  #endif
  
    /* Get rid of any bignums now, so that we don't generate an error for which
@@ -1047,10 +1060,10 @@ md_atof (int type, char * litP, int * sizeP)
  /* We handle all bad expressions here, so that we can report the faulty
     instruction in the error message.  */
  void
-md_operand (expressionS * expr)
+md_operand (expressionS * exp)
  {
    if (in_my_get_expression)
-    expr->X_op = O_illegal;
+    exp->X_op = O_illegal;
  }
  
  /* Immediate values.  */
@@ -1578,23 +1591,23 @@ parse_reg_list (char ** strp)
         }
        else
         {
-         expressionS expr;
+         expressionS exp;
  
-         if (my_get_expression (&expr, &str, GE_NO_PREFIX))
+         if (my_get_expression (&exp, &str, GE_NO_PREFIX))
             return FAIL;
  
-         if (expr.X_op == O_constant)
+         if (exp.X_op == O_constant)
             {
-             if (expr.X_add_number
-                 != (expr.X_add_number & 0x0000ffff))
+             if (exp.X_add_number
+                 != (exp.X_add_number & 0x0000ffff))
                 {
                   inst.error = _("invalid register mask");
                   return FAIL;
                 }
  
-             if ((range & expr.X_add_number) != 0)
+             if ((range & exp.X_add_number) != 0)
                 {
-                 int regno = range & expr.X_add_number;
+                 int regno = range & exp.X_add_number;
  
                   regno &= -regno;
                   regno = (1 << regno) - 1;
@@ -1603,7 +1616,7 @@ parse_reg_list (char ** strp)
                      regno);
                 }
  
-             range |= expr.X_add_number;
+             range |= exp.X_add_number;
             }
           else
             {
@@ -1613,7 +1626,7 @@ parse_reg_list (char ** strp)
                   return FAIL;
                 }
  
-             memcpy (&inst.reloc.exp, &expr, sizeof (expressionS));
+             memcpy (&inst.reloc.exp, &exp, sizeof (expressionS));
               inst.reloc.type = BFD_RELOC_ARM_MULTI;
               inst.reloc.pc_rel = 0;
             }
@@ -1870,7 +1883,6 @@ parse_neon_el_struct_list (char **str, unsigned *pbase,
    int lane = -1;
    int leading_brace = 0;
    enum arm_reg_type rtype = REG_TYPE_NDQ;
-  int addregs = 1;
    const char *const incr_error = _("register stride must be 1 or 2");
    const char *const type_error = _("mismatched element/structure types in list");
    struct neon_typed_alias firsttype;
@@ -1895,7 +1907,6 @@ parse_neon_el_struct_list (char **str, unsigned *pbase,
            if (rtype == REG_TYPE_NQ)
              {
                reg_incr = 1;
-              addregs = 2;
              }
            firsttype = atype;
          }
@@ -2482,14 +2493,27 @@ make_mapping_symbol (enum mstate state, valueT value, fragS *frag)
    /* Save the mapping symbols for future reference.  Also check that
       we do not place two mapping symbols at the same offset within a
       frag.  We'll handle overlap between frags in
-     check_mapping_symbols.  */
+     check_mapping_symbols.
+
+     If .fill or other data filling directive generates zero sized data,
+     the mapping symbol for the following code will have the same value
+     as the one generated for the data filling directive.  In this case,
+     we replace the old symbol with the new one at the same address.  */
    if (value == 0)
      {
-      know (frag->tc_frag_data.first_map == NULL);
+      if (frag->tc_frag_data.first_map != NULL)
+       {
+         know (S_GET_VALUE (frag->tc_frag_data.first_map) == 0);
+         symbol_remove (frag->tc_frag_data.first_map, &symbol_rootP, &symbol_lastP);
+       }
        frag->tc_frag_data.first_map = symbolP;
      }
    if (frag->tc_frag_data.last_map != NULL)
-    know (S_GET_VALUE (frag->tc_frag_data.last_map) < S_GET_VALUE (symbolP));
+    {
+      know (S_GET_VALUE (frag->tc_frag_data.last_map) <= S_GET_VALUE (symbolP));
+      if (S_GET_VALUE (frag->tc_frag_data.last_map) == S_GET_VALUE (symbolP))
+       symbol_remove (frag->tc_frag_data.last_map, &symbol_rootP, &symbol_lastP);
+    }
    frag->tc_frag_data.last_map = symbolP;
  }
  
@@ -4706,7 +4730,7 @@ static int
  parse_shifter_operand (char **str, int i)
  {
    int value;
-  expressionS expr;
+  expressionS exp;
  
    if ((value = arm_reg_parse (str, REG_TYPE_RN)) != FAIL)
      {
@@ -4730,16 +4754,16 @@ parse_shifter_operand (char **str, int i)
    if (skip_past_comma (str) == SUCCESS)
      {
        /* #x, y -- ie explicit rotation by Y.  */
-      if (my_get_expression (&expr, str, GE_NO_PREFIX))
+      if (my_get_expression (&exp, str, GE_NO_PREFIX))
         return FAIL;
  
-      if (expr.X_op != O_constant || inst.reloc.exp.X_op != O_constant)
+      if (exp.X_op != O_constant || inst.reloc.exp.X_op != O_constant)
         {
           inst.error = _("constant expression expected");
           return FAIL;
         }
  
-      value = expr.X_add_number;
+      value = exp.X_add_number;
        if (value < 0 || value > 30 || value % 2 != 0)
         {
           inst.error = _("invalid rotation");
@@ -4924,6 +4948,33 @@ parse_shifter_operand_group_reloc (char **str, int i)
    /* Never reached.  */
  }
  
+/* Parse a Neon alignment expression.  Information is written to
+   inst.operands[i].  We assume the initial ':' has been skipped.
+   
+   align       .imm = align << 8, .immisalign=1, .preind=0  */
+static parse_operand_result
+parse_neon_alignment (char **str, int i)
+{
+  char *p = *str;
+  expressionS exp;
+
+  my_get_expression (&exp, &p, GE_NO_PREFIX);
+
+  if (exp.X_op != O_constant)
+    {
+      inst.error = _("alignment must be constant");
+      return PARSE_OPERAND_FAIL;
+    }
+
+  inst.operands[i].imm = exp.X_add_number << 8;
+  inst.operands[i].immisalign = 1;
+  /* Alignments are not pre-indexes.  */
+  inst.operands[i].preind = 0;
+
+  *str = p;
+  return PARSE_OPERAND_SUCCESS;
+}
+
  /* Parse all forms of an ARM address expression.  Information is written
     to inst.operands[i] and/or inst.reloc.
  
@@ -4967,13 +5018,13 @@ parse_address_main (char **str, int i, int group_relocations,
      {
        if (skip_past_char (&p, '=') == FAIL)
         {
-         /* bare address - translate to PC-relative offset */
+         /* Bare address - translate to PC-relative offset.  */
           inst.reloc.pc_rel = 1;
           inst.operands[i].reg = REG_PC;
           inst.operands[i].isreg = 1;
           inst.operands[i].preind = 1;
         }
-      /* else a load-constant pseudo op, no special treatment needed here */
+      /* Otherwise a load-constant pseudo op, no special treatment needed here.  */
  
        if (my_get_expression (&inst.reloc.exp, &p, GE_NO_PREFIX))
         return PARSE_OPERAND_FAIL;
@@ -5007,22 +5058,15 @@ parse_address_main (char **str, int i, int group_relocations,
               return PARSE_OPERAND_FAIL;
         }
        else if (skip_past_char (&p, ':') == SUCCESS)
-        {
-          /* FIXME: '@' should be used here, but it's filtered out by generic
-             code before we get to see it here. This may be subject to
-             change.  */
-          expressionS exp;
-          my_get_expression (&exp, &p, GE_NO_PREFIX);
-          if (exp.X_op != O_constant)
-            {
-              inst.error = _("alignment must be constant");
-              return PARSE_OPERAND_FAIL;
-            }
-          inst.operands[i].imm = exp.X_add_number << 8;
-          inst.operands[i].immisalign = 1;
-          /* Alignments are not pre-indexes.  */
-          inst.operands[i].preind = 0;
-        }
+       {
+         /* FIXME: '@' should be used here, but it's filtered out by generic
+            code before we get to see it here. This may be subject to
+            change.  */
+         parse_operand_result result = parse_neon_alignment (&p, i);
+         
+         if (result != PARSE_OPERAND_SUCCESS)
+           return result;
+       }
        else
         {
           if (inst.operands[i].negative)
@@ -5086,6 +5130,15 @@ parse_address_main (char **str, int i, int group_relocations,
               return PARSE_OPERAND_FAIL;
         }
      }
+  else if (skip_past_char (&p, ':') == SUCCESS)
+    {
+      /* FIXME: '@' should be used here, but it's filtered out by generic code
+        before we get to see it here. This may be subject to change.  */
+      parse_operand_result result = parse_neon_alignment (&p, i);
+      
+      if (result != PARSE_OPERAND_SUCCESS)
+       return result;
+    }
  
    if (skip_past_char (&p, ']') == FAIL)
      {
@@ -5721,6 +5774,11 @@ parse_neon_mov (char **str, int *which_operand)
    return FAIL;
  }
  
+/* Use this macro when the operand constraints are different
+   for ARM and THUMB (e.g. ldrd).  */
+#define MIX_ARM_THUMB_OPERANDS(arm_operand, thumb_operand) \
+       ((arm_operand) | ((thumb_operand) << 16))
+
  /* Matcher codes for parse_operands.  */
  enum operand_parse_code
  {
@@ -5728,7 +5786,10 @@ enum operand_parse_code
  
    OP_RR,       /* ARM register */
    OP_RRnpc,    /* ARM register, not r15 */
+  OP_RRnpcsp,  /* ARM register, neither r15 nor r13 (a.k.a. 'BadReg') */
    OP_RRnpcb,   /* ARM register, not r15, in square brackets */
+  OP_RRnpctw,  /* ARM register, not r15 in Thumb-state or with writeback, 
+                  optional trailing ! */
    OP_RRw,      /* ARM register, not r15, optional trailing ! */
    OP_RCP,      /* Coprocessor number */
    OP_RCN,      /* Coprocessor register */
@@ -5760,7 +5821,6 @@ enum operand_parse_code
    OP_NRDLST,    /* Neon double-precision register list (d0-d31, qN aliases) */
    OP_NSTRLST,   /* Neon element/structure list */
  
-  OP_NILO,      /* Neon immediate/logic operands 2 or 2+3. (VBIC, VORR...)  */
    OP_RNDQ_I0,   /* Neon D or Q reg, or immediate zero.  */
    OP_RVSD_I0,  /* VFP S or D reg, or immediate zero.  */
    OP_RR_RNSC,   /* ARM reg or Neon scalar.  */
@@ -5768,7 +5828,7 @@ enum operand_parse_code
    OP_RNDQ_RNSC, /* Neon D or Q reg, or Neon scalar.  */
    OP_RND_RNSC,  /* Neon D reg, or Neon scalar.  */
    OP_VMOV,      /* Neon VMOV operands.  */
-  OP_RNDQ_IMVNb,/* Neon D or Q reg, or immediate good for VMVN.  */
+  OP_RNDQ_Ibig,        /* Neon D or Q reg, or big immediate for logic and VMVN.  */
    OP_RNDQ_I63b, /* Neon D or Q reg, or immediate for shift.  */
    OP_RIWR_I32z, /* iWMMXt wR register, or immediate 0 .. 32 for iWMMXt2.  */
  
@@ -5828,6 +5888,7 @@ enum operand_parse_code
  
    OP_oRR,       /* ARM register */
    OP_oRRnpc,    /* ARM register, not the PC */
+  OP_oRRnpcsp,  /* ARM register, neither the PC nor the SP (a.k.a. BadReg) */
    OP_oRRw,      /* ARM register, not r15, optional trailing ! */
    OP_oRND,       /* Optional Neon double precision register */
    OP_oRNQ,       /* Optional Neon quad precision register */
@@ -5839,6 +5900,11 @@ enum operand_parse_code
    OP_oROR,      /* ROR 0/8/16/24 */
    OP_oBARRIER,  /* Option argument for a barrier instruction.  */
  
+  /* Some pre-defined mixed (ARM/THUMB) operands.  */
+  OP_RR_npcsp          = MIX_ARM_THUMB_OPERANDS (OP_RR, OP_RRnpcsp),
+  OP_RRnpc_npcsp       = MIX_ARM_THUMB_OPERANDS (OP_RRnpc, OP_RRnpcsp),
+  OP_oRRnpc_npcsp      = MIX_ARM_THUMB_OPERANDS (OP_oRRnpc, OP_oRRnpcsp),
+
    OP_FIRST_OPTIONAL = OP_oI7b
  };
  
@@ -5847,14 +5913,15 @@ enum operand_parse_code
     structure.  Returns SUCCESS or FAIL depending on whether the
     specified grammar matched.  */
  static int
-parse_operands (char *str, const unsigned char *pattern)
+parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
  {
-  unsigned const char *upat = pattern;
+  unsigned const int *upat = pattern;
    char *backtrack_pos = 0;
    const char *backtrack_error = 0;
    int i, val, backtrack_index = 0;
    enum arm_reg_type rtype;
    parse_operand_result result;
+  unsigned int op_parse_code;
  
  #define po_char_or_fail(chr)                   \
    do                                           \
@@ -5945,7 +6012,12 @@ parse_operands (char *str, const unsigned char *pattern)
  
    for (i = 0; upat[i] != OP_stop; i++)
      {
-      if (upat[i] >= OP_FIRST_OPTIONAL)
+      op_parse_code = upat[i];
+      if (op_parse_code >= 1<<16)
+       op_parse_code = thumb ? (op_parse_code >> 16)
+                               : (op_parse_code & ((1<<16)-1));
+
+      if (op_parse_code >= OP_FIRST_OPTIONAL)
         {
           /* Remember where we are in case we need to backtrack.  */
           gas_assert (!backtrack_pos);
@@ -5957,11 +6029,13 @@ parse_operands (char *str, const unsigned char *pattern)
        if (i > 0 && (i > 1 || inst.operands[0].present))
         po_char_or_fail (',');
  
-      switch (upat[i])
+      switch (op_parse_code)
         {
           /* Registers */
         case OP_oRRnpc:
+       case OP_oRRnpcsp:
         case OP_RRnpc:
+       case OP_RRnpcsp:
         case OP_oRR:
         case OP_RR:    po_reg_or_fail (REG_TYPE_RN);      break;
         case OP_RCP:   po_reg_or_fail (REG_TYPE_CP);      break;
@@ -6000,36 +6074,6 @@ parse_operands (char *str, const unsigned char *pattern)
             scalars are accepted here, so deal with those in later code.  */
          case OP_RNSC:  po_scalar_or_goto (8, failure);    break;
  
-        /* WARNING: We can expand to two operands here. This has the potential
-           to totally confuse the backtracking mechanism! It will be OK at
-           least as long as we don't try to use optional args as well,
-           though.  */
-        case OP_NILO:
-          {
-            po_reg_or_goto (REG_TYPE_NDQ, try_imm);
-           inst.operands[i].present = 1;
-            i++;
-            skip_past_comma (&str);
-            po_reg_or_goto (REG_TYPE_NDQ, one_reg_only);
-            break;
-            one_reg_only:
-            /* Optional register operand was omitted. Unfortunately, it's in
-               operands[i-1] and we need it to be in inst.operands[i]. Fix that
-               here (this is a bit grotty).  */
-            inst.operands[i] = inst.operands[i-1];
-            inst.operands[i-1].present = 0;
-            break;
-            try_imm:
-           /* There's a possibility of getting a 64-bit immediate here, so
-              we need special handling.  */
-           if (parse_big_immediate (&str, i) == FAIL)
-             {
-               inst.error = _("immediate value is out of range");
-               goto failure;
-             }
-          }
-          break;
-
          case OP_RNDQ_I0:
            {
              po_reg_or_goto (REG_TYPE_NDQ, try_imm0);
@@ -6085,11 +6129,11 @@ parse_operands (char *str, const unsigned char *pattern)
            po_misc_or_fail (parse_neon_mov (&str, &i) == FAIL);
            break;
  
-        case OP_RNDQ_IMVNb:
+        case OP_RNDQ_Ibig:
            {
-            po_reg_or_goto (REG_TYPE_NDQ, try_mvnimm);
+            po_reg_or_goto (REG_TYPE_NDQ, try_immbig);
              break;
-            try_mvnimm:
+            try_immbig:
              /* There's a possibility of getting a 64-bit immediate here, so
                 we need special handling.  */
              if (parse_big_immediate (&str, i) == FAIL)
@@ -6115,6 +6159,7 @@ parse_operands (char *str, const unsigned char *pattern)
           po_char_or_fail (']');
           break;
  
+       case OP_RRnpctw:
         case OP_RRw:
         case OP_oRRw:
           po_reg_or_fail (REG_TYPE_RN);
@@ -6300,6 +6345,8 @@ parse_operands (char *str, const unsigned char *pattern)
                if (found != 15)
                  goto failure;
                inst.operands[i].isvec = 1;
+             /* APSR_nzcv is encoded in instructions as if it were the REG_PC.  */
+             inst.operands[i].reg = REG_PC;
              }
            else
              goto failure;
@@ -6392,13 +6439,13 @@ parse_operands (char *str, const unsigned char *pattern)
           break;
  
         default:
-         as_fatal (_("unhandled operand code %d"), upat[i]);
+         as_fatal (_("unhandled operand code %d"), op_parse_code);
         }
  
        /* Various value-based sanity checks and shared operations.  We
          do not signal immediate failures for the register constraints;
          this allows a syntax error to take precedence.  */
-      switch (upat[i])
+      switch (op_parse_code)
         {
         case OP_oRRnpc:
         case OP_RRnpc:
@@ -6410,6 +6457,24 @@ parse_operands (char *str, const unsigned char *pattern)
             inst.error = BAD_PC;
           break;
  
+       case OP_oRRnpcsp:
+       case OP_RRnpcsp:
+         if (inst.operands[i].isreg)
+           {
+             if (inst.operands[i].reg == REG_PC)
+               inst.error = BAD_PC;
+             else if (inst.operands[i].reg == REG_SP)
+               inst.error = BAD_SP;
+           }
+         break;
+
+       case OP_RRnpctw:
+         if (inst.operands[i].isreg 
+             && inst.operands[i].reg == REG_PC 
+             && (inst.operands[i].writeback || thumb))
+           inst.error = BAD_PC;
+         break;
+
         case OP_CPSF:
         case OP_ENDI:
         case OP_oROR:
@@ -6695,10 +6760,15 @@ encode_arm_addr_mode_common (int i, bfd_boolean is_t)
  static void
  encode_arm_addr_mode_2 (int i, bfd_boolean is_t)
  {
+  const bfd_boolean is_pc = (inst.operands[i].reg == REG_PC);
+
    encode_arm_addr_mode_common (i, is_t);
  
    if (inst.operands[i].immisreg)
      {
+      constraint ((inst.operands[i].imm == REG_PC
+                  || (is_pc && inst.operands[i].writeback)),
+                 BAD_PC_ADDRESSING);
        inst.instruction |= INST_IMMEDIATE;  /* yes, this is backwards */
        inst.instruction |= inst.operands[i].imm;
        if (!inst.operands[i].negative)
@@ -6716,6 +6786,16 @@ encode_arm_addr_mode_2 (int i, bfd_boolean is_t)
      }
    else /* immediate offset in inst.reloc */
      {
+      if (is_pc && !inst.reloc.pc_rel)
+       {
+         const bfd_boolean is_load = ((inst.instruction & LOAD_BIT) != 0);
+         /* BAD_PC_ADDRESSING Condition =
+              is_load => is_t
+            which becomes !is_load || is_t.  */
+         constraint ((!is_load || is_t),
+                     BAD_PC_ADDRESSING);
+       }
+
        if (inst.reloc.type == BFD_RELOC_UNUSED)
         inst.reloc.type = BFD_RELOC_ARM_OFFSET_IMM;
      }
@@ -6739,12 +6819,18 @@ encode_arm_addr_mode_3 (int i, bfd_boolean is_t)
  
    if (inst.operands[i].immisreg)
      {
+      constraint ((inst.operands[i].imm == REG_PC
+                  || inst.operands[i].reg == REG_PC),
+                 BAD_PC_ADDRESSING);
        inst.instruction |= inst.operands[i].imm;
        if (!inst.operands[i].negative)
         inst.instruction |= INDEX_UP;
      }
    else /* immediate offset in inst.reloc */
      {
+      constraint ((inst.operands[i].reg == REG_PC && !inst.reloc.pc_rel
+                  && inst.operands[i].writeback),
+                 BAD_PC_WRITEBACK);
        inst.instruction |= HWOFFSET_IMM;
        if (inst.reloc.type == BFD_RELOC_UNUSED)
         inst.reloc.type = BFD_RELOC_ARM_OFFSET_IMM8;
@@ -6936,8 +7022,16 @@ do_rd_rm_rn (void)
    unsigned Rn = inst.operands[2].reg;
    /* Enforce restrictions on SWP instruction.  */
    if ((inst.instruction & 0x0fbfffff) == 0x01000090)
-    constraint (Rn == inst.operands[0].reg || Rn == inst.operands[1].reg,
-               _("Rn must not overlap other operands"));
+    {
+      constraint (Rn == inst.operands[0].reg || Rn == inst.operands[1].reg,
+                 _("Rn must not overlap other operands"));
+
+      /* SWP{b} is deprecated for ARMv6* and ARMv7.  */
+      if (warn_on_deprecated
+         && ARM_CPU_HAS_FEATURE (selected_cpu, arm_ext_v6))
+       as_warn (_("swp{b} use is deprecated for this architecture"));
+
+    }
    inst.instruction |= inst.operands[0].reg << 12;
    inst.instruction |= inst.operands[1].reg;
    inst.instruction |= Rn << 16;
@@ -6954,6 +7048,11 @@ do_rd_rn_rm (void)
  static void
  do_rm_rd_rn (void)
  {
+  constraint ((inst.operands[2].reg == REG_PC), BAD_PC);
+  constraint (((inst.reloc.exp.X_op != O_constant
+               && inst.reloc.exp.X_op != O_illegal)
+              || inst.reloc.exp.X_add_number != 0),
+             BAD_ADDR_MODE);
    inst.instruction |= inst.operands[0].reg;
    inst.instruction |= inst.operands[1].reg << 12;
    inst.instruction |= inst.operands[2].reg << 16;
@@ -7444,6 +7543,8 @@ do_ldrex (void)
               || inst.reloc.exp.X_add_number != 0,
               _("offset must be zero in ARM encoding"));
  
+  constraint ((inst.operands[1].reg == REG_PC), BAD_PC);
+
    inst.instruction |= inst.operands[0].reg << 12;
    inst.instruction |= inst.operands[1].reg << 16;
    inst.reloc.type = BFD_RELOC_UNUSED;
@@ -7499,6 +7600,7 @@ do_ldstt (void)
  static void
  do_ldstv4 (void)
  {
+  constraint (inst.operands[0].reg == REG_PC, BAD_PC);
    inst.instruction |= inst.operands[0].reg << 12;
    if (!inst.operands[1].isreg)
      if (move_or_literal_pool (0, /*thumb_p=*/FALSE, /*mode_3=*/TRUE))
@@ -7611,6 +7713,49 @@ do_vfp_nsyn_msr (void)
    return SUCCESS;
  }
  
+static void
+do_vmrs (void)
+{
+  unsigned Rt = inst.operands[0].reg;
+  
+  if (thumb_mode && inst.operands[0].reg == REG_SP)
+    {
+      inst.error = BAD_SP;
+      return;
+    }
+
+  /* APSR_ sets isvec. All other refs to PC are illegal.  */
+  if (!inst.operands[0].isvec && inst.operands[0].reg == REG_PC)
+    {
+      inst.error = BAD_PC;
+      return;
+    }
+
+  if (inst.operands[1].reg != 1)
+    first_error (_("operand 1 must be FPSCR"));
+
+  inst.instruction |= (Rt << 12);
+}
+
+static void
+do_vmsr (void)
+{
+  unsigned Rt = inst.operands[1].reg;
+  
+  if (thumb_mode)
+    reject_bad_reg (Rt);
+  else if (Rt == REG_PC)
+    {
+      inst.error = BAD_PC;
+      return;
+    }
+
+  if (inst.operands[0].reg != 1)
+    first_error (_("operand 0 must be FPSCR"));
+
+  inst.instruction |= (Rt << 12);
+}
+
  static void
  do_mrs (void)
  {
@@ -7621,6 +7766,7 @@ do_mrs (void)
    constraint ((inst.operands[1].imm & (PSR_c|PSR_x|PSR_s|PSR_f))
               != (PSR_c|PSR_f),
               _("'CPSR' or 'SPSR' expected"));
+  constraint (inst.operands[0].reg == REG_PC, BAD_PC);
    inst.instruction |= inst.operands[0].reg << 12;
    inst.instruction |= (inst.operands[1].imm & SPSR_BIT);
  }
@@ -7649,6 +7795,8 @@ do_msr (void)
  static void
  do_mul (void)
  {
+  constraint (inst.operands[2].reg == REG_PC, BAD_PC);
+
    if (!inst.operands[2].present)
      inst.operands[2].reg = inst.operands[0].reg;
    inst.instruction |= inst.operands[0].reg << 16;
@@ -8646,12 +8794,13 @@ encode_thumb32_shifted_operand (int i)
     Thumb32 format load or store instruction.  Reject forms that cannot
     be used with such instructions.  If is_t is true, reject forms that
     cannot be used with a T instruction; if is_d is true, reject forms
-   that cannot be used with a D instruction.  */
+   that cannot be used with a D instruction.  If it is a store insn,
+   reject PC in Rn.  */
  
  static void
  encode_thumb32_addr_mode (int i, bfd_boolean is_t, bfd_boolean is_d)
  {
-  bfd_boolean is_pc = (inst.operands[i].reg == REG_PC);
+  const bfd_boolean is_pc = (inst.operands[i].reg == REG_PC);
  
    constraint (!inst.operands[i].isreg,
               _("Instruction does not support =N addresses"));
@@ -8659,7 +8808,7 @@ encode_thumb32_addr_mode (int i, bfd_boolean is_t, bfd_boolean is_d)
    inst.instruction |= inst.operands[i].reg << 16;
    if (inst.operands[i].immisreg)
      {
-      constraint (is_pc, _("cannot use register index with PC-relative addressing"));
+      constraint (is_pc, BAD_PC_ADDRESSING);
        constraint (is_t || is_d, _("cannot use register index with this instruction"));
        constraint (inst.operands[i].negative,
                   _("Thumb does not support negative register indexing"));
@@ -8684,10 +8833,11 @@ encode_thumb32_addr_mode (int i, bfd_boolean is_t, bfd_boolean is_d)
      }
    else if (inst.operands[i].preind)
      {
-      constraint (is_pc && inst.operands[i].writeback,
-                 _("cannot use writeback with PC-relative addressing"));
+      constraint (is_pc && inst.operands[i].writeback, BAD_PC_WRITEBACK);
        constraint (is_t && inst.operands[i].writeback,
                   _("cannot use writeback with this instruction"));
+      constraint (is_pc && ((inst.instruction & THUMB2_LOAD_BIT) == 0)
+                 && !inst.reloc.pc_rel, BAD_PC_ADDRESSING);
  
        if (is_d)
         {
@@ -9649,6 +9799,12 @@ encode_thumb2_ldmstm (int base, unsigned mask, bfd_boolean writeback)
  
    if (mask & (1 << 13))
      inst.error =  _("SP not allowed in register list");
+
+  if ((mask & (1 << base)) != 0
+      && writeback)
+    inst.error = _("having the base register in the register list when "
+                  "using write back is UNPREDICTABLE");
+
    if (load)
      {
        if (mask & (1 << 15))
@@ -9658,19 +9814,11 @@ encode_thumb2_ldmstm (int base, unsigned mask, bfd_boolean writeback)
            else
              set_it_insn_type_last ();
          }
-
-      if ((mask & (1 << base)) != 0
-         && writeback)
-       as_warn (_("base register should not be in register list "
-                  "when written back"));
      }
    else
      {
        if (mask & (1 << 15))
         inst.error = _("PC not allowed in register list");
-
-      if (mask & (1 << base))
-       as_warn (_("value stored for r%d is UNPREDICTABLE"), base);
      }
  
    if ((mask & (mask - 1)) == 0)
@@ -9736,7 +9884,7 @@ do_t_ldmstm (void)
               if (inst.instruction == T_MNEM_stmia
                   && (inst.operands[1].imm & mask)
                   && (inst.operands[1].imm & (mask - 1)))
-               as_warn (_("value stored for r%d is UNPREDICTABLE"),
+               as_warn (_("value stored for r%d is UNKNOWN"),
                          inst.operands[0].reg);
  
               inst.instruction = THUMB_OP16 (inst.instruction);
@@ -9776,7 +9924,7 @@ do_t_ldmstm (void)
             as_warn (_("this instruction will write back the base register"));
           if ((inst.operands[1].imm & (1 << inst.operands[0].reg))
               && (inst.operands[1].imm & ((1 << inst.operands[0].reg) - 1)))
-           as_warn (_("value stored for r%d is UNPREDICTABLE"),
+           as_warn (_("value stored for r%d is UNKNOWN"),
                      inst.operands[0].reg);
         }
        else
@@ -9804,6 +9952,8 @@ do_t_ldrex (void)
               || inst.operands[1].negative,
               BAD_ADDR_MODE);
  
+  constraint ((inst.operands[1].reg == REG_PC), BAD_PC);
+
    inst.instruction |= inst.operands[0].reg << 12;
    inst.instruction |= inst.operands[1].reg << 16;
    inst.reloc.type = BFD_RELOC_ARM_T32_OFFSET_U8;
@@ -9863,6 +10013,8 @@ do_t_ldst (void)
               /* [Rn, Rik] */
               if (Rn <= 7 && inst.operands[1].imm <= 7)
                 goto op16;
+             else if (opcode != T_MNEM_ldr && opcode != T_MNEM_str)
+               reject_bad_reg (inst.operands[1].imm);
             }
           else if ((Rn <= 7 && opcode != T_MNEM_ldrsh
                     && opcode != T_MNEM_ldrsb)
@@ -9902,6 +10054,12 @@ do_t_ldst (void)
             }
         }
        /* Definitely a 32-bit variant.  */
+
+      /* Do some validations regarding addressing modes.  */
+      if (inst.operands[1].immisreg && opcode != T_MNEM_ldr
+         && opcode != T_MNEM_str)
+       reject_bad_reg (inst.operands[1].imm);
+
        inst.instruction = THUMB_OP32 (opcode);
        inst.instruction |= inst.operands[0].reg << 12;
        encode_thumb32_addr_mode (1, /*is_t=*/FALSE, /*is_d=*/FALSE);
@@ -10250,8 +10408,8 @@ do_t_mov_cmp (void)
  
           case T_MNEM_movs:
             /* We know we have low registers at this point.
-              Generate ADD Rd, Rs, #0.  */
-           inst.instruction = T_OPCODE_ADD_I3;
+              Generate LSLS Rd, Rs, #0.  */
+           inst.instruction = T_OPCODE_LSL_I;
             inst.instruction |= Rn;
             inst.instruction |= Rm << 3;
             break;
@@ -10597,7 +10755,7 @@ do_t_nop (void)
         {
           /* PR9722: Check for Thumb2 availability before
              generating a thumb2 nop instruction.  */
-         if (ARM_CPU_HAS_FEATURE (cpu_variant, arm_arch_t2))
+         if (ARM_CPU_HAS_FEATURE (selected_cpu, arm_ext_v6t2))
             {
               inst.instruction = THUMB_OP16 (inst.instruction);
               inst.instruction |= inst.operands[0].imm << 4;
@@ -11052,6 +11210,24 @@ do_t_simd (void)
    inst.instruction |= Rm;
  }
  
+static void
+do_t_simd2 (void)
+{
+  unsigned Rd, Rn, Rm;
+
+  Rd = inst.operands[0].reg;
+  Rm = inst.operands[1].reg;
+  Rn = inst.operands[2].reg;
+
+  reject_bad_reg (Rd);
+  reject_bad_reg (Rn);
+  reject_bad_reg (Rm);
+
+  inst.instruction |= Rd << 8;
+  inst.instruction |= Rn << 16;
+  inst.instruction |= Rm;
+}
+
  static void
  do_t_smc (void)
  {
@@ -11133,6 +11309,8 @@ do_t_strex (void)
               || inst.operands[2].negative,
               BAD_ADDR_MODE);
  
+  constraint (inst.operands[2].reg == REG_PC, BAD_PC);
+
    inst.instruction |= inst.operands[0].reg << 8;
    inst.instruction |= inst.operands[1].reg << 12;
    inst.instruction |= inst.operands[2].reg << 16;
@@ -11147,8 +11325,7 @@ do_t_strexd (void)
  
    constraint (inst.operands[0].reg == inst.operands[1].reg
               || inst.operands[0].reg == inst.operands[2].reg
-             || inst.operands[0].reg == inst.operands[3].reg
-             || inst.operands[1].reg == inst.operands[2].reg,
+             || inst.operands[0].reg == inst.operands[3].reg,
               BAD_OVERLAP);
  
    inst.instruction |= inst.operands[0].reg;
@@ -11293,6 +11470,8 @@ struct neon_tab_entry
       vcge / vcgt with the operands reversed.  */       \
    X(vclt,      0x0000300, 0x1200e00, 0x1b10200),       \
    X(vcle,      0x0000310, 0x1000e00, 0x1b10180),       \
+  X(vfma,      N_INV, 0x0000c10, N_INV),               \
+  X(vfms,      N_INV, 0x0200c10, N_INV),               \
    X(vmla,      0x0000900, 0x0000d10, 0x0800040),       \
    X(vmls,      0x1000900, 0x0200d10, 0x0800440),       \
    X(vmul,      0x0000910, 0x1000d10, 0x0800840),       \
@@ -11328,8 +11507,10 @@ struct neon_tab_entry
    X(vqmovn,    0x1b20200, N_INV,     N_INV),           \
    X(vqmovun,   0x1b20240, N_INV,     N_INV),           \
    X(vnmul,      0xe200a40, 0xe200b40, N_INV),          \
-  X(vnmla,      0xe000a40, 0xe000b40, N_INV),          \
-  X(vnmls,      0xe100a40, 0xe100b40, N_INV),          \
+  X(vnmla,      0xe100a40, 0xe100b40, N_INV),          \
+  X(vnmls,      0xe100a00, 0xe100b00, N_INV),          \
+  X(vfnma,      0xe900a40, 0xe900b40, N_INV),          \
+  X(vfnms,      0xe900a00, 0xe900b00, N_INV),          \
    X(vcmp,      0xeb40a40, 0xeb40b40, N_INV),           \
    X(vcmpz,     0xeb50a40, 0xeb50b40, N_INV),           \
    X(vcmpe,     0xeb40ac0, 0xeb40bc0, N_INV),           \
@@ -11349,20 +11530,40 @@ NEON_ENC_TAB
  #undef X
  };
  
-#define NEON_ENC_INTEGER(X) (neon_enc_tab[(X) & 0x0fffffff].integer)
-#define NEON_ENC_ARMREG(X)  (neon_enc_tab[(X) & 0x0fffffff].integer)
-#define NEON_ENC_POLY(X)    (neon_enc_tab[(X) & 0x0fffffff].float_or_poly)
-#define NEON_ENC_FLOAT(X)   (neon_enc_tab[(X) & 0x0fffffff].float_or_poly)
-#define NEON_ENC_SCALAR(X)  (neon_enc_tab[(X) & 0x0fffffff].scalar_or_imm)
-#define NEON_ENC_IMMED(X)   (neon_enc_tab[(X) & 0x0fffffff].scalar_or_imm)
-#define NEON_ENC_INTERLV(X) (neon_enc_tab[(X) & 0x0fffffff].integer)
-#define NEON_ENC_LANE(X)    (neon_enc_tab[(X) & 0x0fffffff].float_or_poly)
-#define NEON_ENC_DUP(X)     (neon_enc_tab[(X) & 0x0fffffff].scalar_or_imm)
-#define NEON_ENC_SINGLE(X) \
+/* Do not use these macros; instead, use NEON_ENCODE defined below.  */
+#define NEON_ENC_INTEGER_(X) (neon_enc_tab[(X) & 0x0fffffff].integer)
+#define NEON_ENC_ARMREG_(X)  (neon_enc_tab[(X) & 0x0fffffff].integer)
+#define NEON_ENC_POLY_(X)    (neon_enc_tab[(X) & 0x0fffffff].float_or_poly)
+#define NEON_ENC_FLOAT_(X)   (neon_enc_tab[(X) & 0x0fffffff].float_or_poly)
+#define NEON_ENC_SCALAR_(X)  (neon_enc_tab[(X) & 0x0fffffff].scalar_or_imm)
+#define NEON_ENC_IMMED_(X)   (neon_enc_tab[(X) & 0x0fffffff].scalar_or_imm)
+#define NEON_ENC_INTERLV_(X) (neon_enc_tab[(X) & 0x0fffffff].integer)
+#define NEON_ENC_LANE_(X)    (neon_enc_tab[(X) & 0x0fffffff].float_or_poly)
+#define NEON_ENC_DUP_(X)     (neon_enc_tab[(X) & 0x0fffffff].scalar_or_imm)
+#define NEON_ENC_SINGLE_(X) \
    ((neon_enc_tab[(X) & 0x0fffffff].integer) | ((X) & 0xf0000000))
-#define NEON_ENC_DOUBLE(X) \
+#define NEON_ENC_DOUBLE_(X) \
    ((neon_enc_tab[(X) & 0x0fffffff].float_or_poly) | ((X) & 0xf0000000))
  
+#define NEON_ENCODE(type, inst)                                        \
+  do                                                           \
+    {                                                          \
+      inst.instruction = NEON_ENC_##type##_ (inst.instruction);        \
+      inst.is_neon = 1;                                                \
+    }                                                          \
+  while (0)
+
+#define check_neon_suffixes                                            \
+  do                                                                   \
+    {                                                                  \
+      if (!inst.error && inst.vectype.elems > 0 && !inst.is_neon)      \
+       {                                                               \
+         as_bad (_("invalid neon suffix for non neon instruction"));   \
+         return;                                                       \
+       }                                                               \
+    }                                                                  \
+  while (0)
+
  /* Define shapes for instruction operands. The following mnemonic characters
     are used in this table:
  
@@ -11638,6 +11839,8 @@ neon_select_shape (enum neon_shape shape, ...)
              case SE_L:
                break;
              }
+         if (!matches)
+           break;
          }
        if (matches)
          break;
@@ -11975,8 +12178,17 @@ neon_check_type (unsigned els, enum neon_shape ns, ...)
              {
                if ((thisarg & N_VFP) != 0)
                  {
-                  enum neon_shape_el regshape = neon_shape_tab[ns].el[i];
-                  unsigned regwidth = neon_shape_el_size[regshape], match;
+                  enum neon_shape_el regshape;
+                  unsigned regwidth, match;
+
+                 /* PR 11136: Catch the case where we are passed a shape of NS_NULL.  */
+                 if (ns == NS_NULL)
+                   {
+                     first_error (_("invalid instruction shape"));
+                     return badtype;
+                   }
+                  regshape = neon_shape_tab[ns].el[i];
+                  regwidth = neon_shape_el_size[regshape];
  
                    /* In VFP mode, operands must match register widths. If we
                       have a key operand, use its width, else use the width of
@@ -12028,6 +12240,8 @@ neon_check_type (unsigned els, enum neon_shape ns, ...)
  static void
  do_vfp_cond_or_thumb (void)
  {
+  inst.is_neon = 1;
+
    if (thumb_mode)
      inst.instruction |= 0xe0000000;
    else
@@ -12056,6 +12270,8 @@ do_vfp_nsyn_opcode (const char *opname)
                  thumb_mode ? *opcode->tvariant : *opcode->avariant),
                _(BAD_FPU));
  
+  inst.is_neon = 1;
+
    if (thumb_mode)
      {
        inst.instruction = opcode->tvalue;
@@ -12121,9 +12337,8 @@ try_vfp_nsyn (int args, void (*pfn) (enum neon_shape))
        pfn (rs);
        return SUCCESS;
      }
-  else
-    inst.error = NULL;
  
+  inst.error = NULL;
    return FAIL;
  }
  
@@ -12137,14 +12352,35 @@ do_vfp_nsyn_mla_mls (enum neon_shape rs)
        if (is_mla)
          do_vfp_nsyn_opcode ("fmacs");
        else
-        do_vfp_nsyn_opcode ("fmscs");
+        do_vfp_nsyn_opcode ("fnmacs");
      }
    else
      {
        if (is_mla)
          do_vfp_nsyn_opcode ("fmacd");
        else
-        do_vfp_nsyn_opcode ("fmscd");
+        do_vfp_nsyn_opcode ("fnmacd");
+    }
+}
+
+static void
+do_vfp_nsyn_fma_fms (enum neon_shape rs)
+{
+  int is_fma = (inst.instruction & 0x0fffffff) == N_MNEM_vfma;
+
+  if (rs == NS_FFF)
+    {
+      if (is_fma)
+        do_vfp_nsyn_opcode ("ffmas");
+      else
+        do_vfp_nsyn_opcode ("ffnmas");
+    }
+  else
+    {
+      if (is_fma)
+        do_vfp_nsyn_opcode ("ffmad");
+      else
+        do_vfp_nsyn_opcode ("ffnmad");
      }
  }
  
@@ -12236,12 +12472,12 @@ do_vfp_nsyn_nmul (void)
  
    if (rs == NS_FFF)
      {
-      inst.instruction = NEON_ENC_SINGLE (inst.instruction);
+      NEON_ENCODE (SINGLE, inst);
        do_vfp_sp_dyadic ();
      }
    else
      {
-      inst.instruction = NEON_ENC_DOUBLE (inst.instruction);
+      NEON_ENCODE (DOUBLE, inst);
        do_vfp_dp_rd_rn_rm ();
      }
    do_vfp_cond_or_thumb ();
@@ -12257,12 +12493,12 @@ do_vfp_nsyn_cmp (void)
  
        if (rs == NS_FF)
          {
-          inst.instruction = NEON_ENC_SINGLE (inst.instruction);
+          NEON_ENCODE (SINGLE, inst);
            do_vfp_sp_monadic ();
          }
        else
          {
-          inst.instruction = NEON_ENC_DOUBLE (inst.instruction);
+          NEON_ENCODE (DOUBLE, inst);
            do_vfp_dp_rd_rm ();
          }
      }
@@ -12285,12 +12521,12 @@ do_vfp_nsyn_cmp (void)
  
        if (rs == NS_FI)
          {
-          inst.instruction = NEON_ENC_SINGLE (inst.instruction);
+          NEON_ENCODE (SINGLE, inst);
            do_vfp_sp_compare_z ();
          }
        else
          {
-          inst.instruction = NEON_ENC_DOUBLE (inst.instruction);
+          NEON_ENCODE (DOUBLE, inst);
            do_vfp_dp_rd ();
          }
      }
@@ -12331,9 +12567,12 @@ do_vfp_nsyn_pop (void)
  /* Fix up Neon data-processing instructions, ORing in the correct bits for
     ARM mode or Thumb mode and moving the encoded bit 24 to bit 28.  */
  
-static unsigned
-neon_dp_fixup (unsigned i)
+static void
+neon_dp_fixup (struct arm_it* insn)
  {
+  unsigned int i = insn->instruction;
+  insn->is_neon = 1;
+
    if (thumb_mode)
      {
        /* The U bit is at bit 24 by default. Move to bit 28 in Thumb mode.  */
@@ -12347,7 +12586,7 @@ neon_dp_fixup (unsigned i)
    else
      i |= 0xf2000000;
  
-  return i;
+  insn->instruction = i;
  }
  
  /* Turn a size (8, 16, 32, 64) into the respective bit number minus 3
@@ -12384,7 +12623,7 @@ neon_three_same (int isquad, int ubit, int size)
    if (size != -1)
      inst.instruction |= neon_logbits (size) << 20;
  
-  inst.instruction = neon_dp_fixup (inst.instruction);
+  neon_dp_fixup (&inst);
  }
  
  /* Encode instructions of the form:
@@ -12407,7 +12646,7 @@ neon_two_same (int qbit, int ubit, int size)
    if (size != -1)
      inst.instruction |= neon_logbits (size) << 18;
  
-  inst.instruction = neon_dp_fixup (inst.instruction);
+  neon_dp_fixup (&inst);
  }
  
  /* Neon instruction encoders, in approximate order of appearance.  */
@@ -12446,7 +12685,7 @@ neon_imm_shift (int write_ubit, int uval, int isquad, struct neon_type_el et,
    if (write_ubit)
      inst.instruction |= (uval != 0) << 24;
  
-  inst.instruction = neon_dp_fixup (inst.instruction);
+  neon_dp_fixup (&inst);
  }
  
  static void
@@ -12456,7 +12695,7 @@ do_neon_shl_imm (void)
      {
        enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
        struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_KEY | N_I_ALL);
-      inst.instruction = NEON_ENC_IMMED (inst.instruction);
+      NEON_ENCODE (IMMED, inst);
        neon_imm_shift (FALSE, 0, neon_quad (rs), et, inst.operands[2].imm);
      }
    else
@@ -12476,7 +12715,7 @@ do_neon_shl_imm (void)
        tmp = inst.operands[2].reg;
        inst.operands[2].reg = inst.operands[1].reg;
        inst.operands[1].reg = tmp;
-      inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+      NEON_ENCODE (INTEGER, inst);
        neon_three_same (neon_quad (rs), et.type == NT_unsigned, et.size);
      }
  }
@@ -12489,7 +12728,7 @@ do_neon_qshl_imm (void)
        enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
        struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_SU_ALL | N_KEY);
  
-      inst.instruction = NEON_ENC_IMMED (inst.instruction);
+      NEON_ENCODE (IMMED, inst);
        neon_imm_shift (TRUE, et.type == NT_unsigned, neon_quad (rs), et,
                        inst.operands[2].imm);
      }
@@ -12504,7 +12743,7 @@ do_neon_qshl_imm (void)
        tmp = inst.operands[2].reg;
        inst.operands[2].reg = inst.operands[1].reg;
        inst.operands[1].reg = tmp;
-      inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+      NEON_ENCODE (INTEGER, inst);
        neon_three_same (neon_quad (rs), et.type == NT_unsigned, et.size);
      }
  }
@@ -12778,12 +13017,17 @@ do_neon_logic (void)
        enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
        neon_check_type (3, rs, N_IGNORE_TYPE);
        /* U bit and size field were set as part of the bitmask.  */
-      inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+      NEON_ENCODE (INTEGER, inst);
        neon_three_same (neon_quad (rs), 0, -1);
      }
    else
      {
-      enum neon_shape rs = neon_select_shape (NS_DI, NS_QI, NS_NULL);
+      const int three_ops_form = (inst.operands[2].present
+                                 && !inst.operands[2].isreg);
+      const int immoperand = (three_ops_form ? 2 : 1);
+      enum neon_shape rs = (three_ops_form
+                           ? neon_select_shape (NS_DDI, NS_QQI, NS_NULL)
+                           : neon_select_shape (NS_DI, NS_QI, NS_NULL));
        struct neon_type_el et = neon_check_type (2, rs,
          N_I8 | N_I16 | N_I32 | N_I64 | N_F32 | N_KEY, N_EQK);
        enum neon_opc opcode = (enum neon_opc) inst.instruction & 0x0fffffff;
@@ -12793,15 +13037,19 @@ do_neon_logic (void)
        if (et.type == NT_invtype)
          return;
  
-      inst.instruction = NEON_ENC_IMMED (inst.instruction);
+      if (three_ops_form)
+       constraint (inst.operands[0].reg != inst.operands[1].reg,
+                   _("first and second operands shall be the same register"));
+
+      NEON_ENCODE (IMMED, inst);
  
-      immbits = inst.operands[1].imm;
+      immbits = inst.operands[immoperand].imm;
        if (et.size == 64)
         {
           /* .i64 is a pseudo-op, so the immediate must be a repeating
              pattern.  */
-         if (immbits != (inst.operands[1].regisimm ?
-                         inst.operands[1].reg : 0))
+         if (immbits != (inst.operands[immoperand].regisimm ?
+                         inst.operands[immoperand].reg : 0))
             {
               /* Set immbits to an invalid constant.  */
               immbits = 0xdeadbeef;
@@ -12843,7 +13091,7 @@ do_neon_logic (void)
        inst.instruction |= cmode << 8;
        neon_write_immbits (immbits);
  
-      inst.instruction = neon_dp_fixup (inst.instruction);
+      neon_dp_fixup (&inst);
      }
  }
  
@@ -12864,12 +13112,12 @@ neon_dyadic_misc (enum neon_el_type ubit_meaning, unsigned types,
                                              types | N_KEY);
    if (et.type == NT_float)
      {
-      inst.instruction = NEON_ENC_FLOAT (inst.instruction);
+      NEON_ENCODE (FLOAT, inst);
        neon_three_same (neon_quad (rs), 0, -1);
      }
    else
      {
-      inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+      NEON_ENCODE (INTEGER, inst);
        neon_three_same (neon_quad (rs), et.type == ubit_meaning, et.size);
      }
  }
@@ -13002,7 +13250,7 @@ neon_compare (unsigned regtypes, unsigned immtypes, int invert)
        struct neon_type_el et = neon_check_type (2, rs,
          N_EQK | N_SIZ, immtypes | N_KEY);
  
-      inst.instruction = NEON_ENC_IMMED (inst.instruction);
+      NEON_ENCODE (IMMED, inst);
        inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
        inst.instruction |= HI1 (inst.operands[0].reg) << 22;
        inst.instruction |= LOW4 (inst.operands[1].reg);
@@ -13011,7 +13259,7 @@ neon_compare (unsigned regtypes, unsigned immtypes, int invert)
        inst.instruction |= (et.type == NT_float) << 10;
        inst.instruction |= neon_logbits (et.size) << 18;
  
-      inst.instruction = neon_dp_fixup (inst.instruction);
+      neon_dp_fixup (&inst);
      }
  }
  
@@ -13087,7 +13335,7 @@ neon_mul_mac (struct neon_type_el et, int ubit)
    inst.instruction |= neon_logbits (et.size) << 20;
    inst.instruction |= (ubit != 0) << 24;
  
-  inst.instruction = neon_dp_fixup (inst.instruction);
+  neon_dp_fixup (&inst);
  }
  
  static void
@@ -13104,7 +13352,7 @@ do_neon_mac_maybe_scalar (void)
        enum neon_shape rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
        struct neon_type_el et = neon_check_type (3, rs,
          N_EQK, N_EQK, N_I16 | N_I32 | N_F32 | N_KEY);
-      inst.instruction = NEON_ENC_SCALAR (inst.instruction);
+      NEON_ENCODE (SCALAR, inst);
        neon_mul_mac (et, neon_quad (rs));
      }
    else
@@ -13115,6 +13363,18 @@ do_neon_mac_maybe_scalar (void)
      }
  }
  
+static void
+do_neon_fmac (void)
+{
+  if (try_vfp_nsyn (3, do_vfp_nsyn_fma_fms) == SUCCESS)
+    return;
+
+  if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
+    return;
+
+  neon_dyadic_misc (NT_untyped, N_IF_32, 0);
+}
+
  static void
  do_neon_tst (void)
  {
@@ -13151,7 +13411,7 @@ do_neon_qdmulh (void)
        enum neon_shape rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
        struct neon_type_el et = neon_check_type (3, rs,
          N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
-      inst.instruction = NEON_ENC_SCALAR (inst.instruction);
+      NEON_ENCODE (SCALAR, inst);
        neon_mul_mac (et, neon_quad (rs));
      }
    else
@@ -13159,7 +13419,7 @@ do_neon_qdmulh (void)
        enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
        struct neon_type_el et = neon_check_type (3, rs,
          N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
-      inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+      NEON_ENCODE (INTEGER, inst);
        /* The U bit (rounding) comes from bit mask.  */
        neon_three_same (neon_quad (rs), 0, et.size);
      }
@@ -13212,7 +13472,7 @@ do_neon_abs_neg (void)
    inst.instruction |= (et.type == NT_float) << 10;
    inst.instruction |= neon_logbits (et.size) << 18;
  
-  inst.instruction = neon_dp_fixup (inst.instruction);
+  neon_dp_fixup (&inst);
  }
  
  static void
@@ -13263,7 +13523,7 @@ do_neon_qmovn (void)
      N_EQK | N_HLF, N_SU_16_64 | N_KEY);
    /* Saturating move where operands can be signed or unsigned, and the
       destination has the same signedness.  */
-  inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+  NEON_ENCODE (INTEGER, inst);
    if (et.type == NT_unsigned)
      inst.instruction |= 0xc0;
    else
@@ -13277,7 +13537,7 @@ do_neon_qmovun (void)
    struct neon_type_el et = neon_check_type (2, NS_DQ,
      N_EQK | N_HLF | N_UNS, N_S16 | N_S32 | N_S64 | N_KEY);
    /* Saturating move with unsigned results. Operands must be signed.  */
-  inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+  NEON_ENCODE (INTEGER, inst);
    neon_two_same (0, 1, et.size / 2);
  }
  
@@ -13343,7 +13603,7 @@ do_neon_movn (void)
  {
    struct neon_type_el et = neon_check_type (2, NS_DQ,
      N_EQK | N_HLF, N_I16 | N_I32 | N_I64 | N_KEY);
-  inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+  NEON_ENCODE (INTEGER, inst);
    neon_two_same (0, 1, et.size / 2);
  }
  
@@ -13383,21 +13643,21 @@ do_neon_shll (void)
    if (imm == et.size)
      {
        /* Maximum shift variant.  */
-      inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+      NEON_ENCODE (INTEGER, inst);
        inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
        inst.instruction |= HI1 (inst.operands[0].reg) << 22;
        inst.instruction |= LOW4 (inst.operands[1].reg);
        inst.instruction |= HI1 (inst.operands[1].reg) << 5;
        inst.instruction |= neon_logbits (et.size) << 18;
  
-      inst.instruction = neon_dp_fixup (inst.instruction);
+      neon_dp_fixup (&inst);
      }
    else
      {
        /* A more-specific type check for non-max versions.  */
        et = neon_check_type (2, NS_QDI,
          N_EQK | N_DBL, N_SU_32 | N_KEY);
-      inst.instruction = NEON_ENC_IMMED (inst.instruction);
+      NEON_ENCODE (IMMED, inst);
        neon_imm_shift (TRUE, et.type == NT_unsigned, 0, et, imm);
      }
  }
@@ -13549,12 +13809,22 @@ do_vfp_nsyn_cvtz (void)
  }
  
  static void
-do_neon_cvt (void)
+do_neon_cvt_1 (bfd_boolean round_to_zero ATTRIBUTE_UNUSED)
  {
    enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_FFI, NS_DD, NS_QQ,
      NS_FD, NS_DF, NS_FF, NS_QD, NS_DQ, NS_NULL);
    int flavour = neon_cvt_flavour (rs);
  
+  /* PR11109: Handle round-to-zero for VCVT conversions.  */
+  if (round_to_zero
+      && ARM_CPU_HAS_FEATURE (cpu_variant, fpu_arch_vfp_v2)
+      && (flavour == 0 || flavour == 1 || flavour == 8 || flavour == 9)
+      && (rs == NS_FD || rs == NS_FF))
+    {
+      do_vfp_nsyn_cvtz ();
+      return;
+    }
+
    /* VFP rather than Neon conversions.  */
    if (flavour >= 6)
      {
@@ -13578,7 +13848,7 @@ do_neon_cvt (void)
          if (inst.operands[2].present && inst.operands[2].imm == 0)
            goto int_encode;
         immbits = 32 - inst.operands[2].imm;
-        inst.instruction = NEON_ENC_IMMED (inst.instruction);
+        NEON_ENCODE (IMMED, inst);
          if (flavour != -1)
            inst.instruction |= enctab[flavour];
          inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
@@ -13589,7 +13859,7 @@ do_neon_cvt (void)
          inst.instruction |= 1 << 21;
          inst.instruction |= immbits << 16;
  
-        inst.instruction = neon_dp_fixup (inst.instruction);
+        neon_dp_fixup (&inst);
        }
        break;
  
@@ -13599,7 +13869,7 @@ do_neon_cvt (void)
        {
          unsigned enctab[] = { 0x100, 0x180, 0x0, 0x080 };
  
-        inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+        NEON_ENCODE (INTEGER, inst);
  
          if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
            return;
@@ -13614,7 +13884,7 @@ do_neon_cvt (void)
          inst.instruction |= neon_quad (rs) << 6;
          inst.instruction |= 2 << 18;
  
-        inst.instruction = neon_dp_fixup (inst.instruction);
+        neon_dp_fixup (&inst);
        }
      break;
  
@@ -13645,7 +13915,7 @@ do_neon_cvt (void)
        inst.instruction |= HI1 (inst.operands[0].reg) << 22;
        inst.instruction |= LOW4 (inst.operands[1].reg);
        inst.instruction |= HI1 (inst.operands[1].reg) << 5;
-      inst.instruction = neon_dp_fixup (inst.instruction);
+      neon_dp_fixup (&inst);
        break;
  
      default:
@@ -13654,6 +13924,18 @@ do_neon_cvt (void)
      }
  }
  
+static void
+do_neon_cvtr (void)
+{
+  do_neon_cvt_1 (FALSE);
+}
+
+static void
+do_neon_cvt (void)
+{
+  do_neon_cvt_1 (TRUE);
+}
+
  static void
  do_neon_cvtb (void)
  {
@@ -13741,7 +14023,7 @@ do_neon_mvn (void)
      {
        enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
  
-      inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+      NEON_ENCODE (INTEGER, inst);
        inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
        inst.instruction |= HI1 (inst.operands[0].reg) << 22;
        inst.instruction |= LOW4 (inst.operands[1].reg);
@@ -13750,11 +14032,11 @@ do_neon_mvn (void)
      }
    else
      {
-      inst.instruction = NEON_ENC_IMMED (inst.instruction);
+      NEON_ENCODE (IMMED, inst);
        neon_move_immediate ();
      }
  
-  inst.instruction = neon_dp_fixup (inst.instruction);
+  neon_dp_fixup (&inst);
  }
  
  /* Encode instructions of form:
@@ -13774,7 +14056,7 @@ neon_mixed_length (struct neon_type_el et, unsigned size)
    inst.instruction |= (et.type == NT_unsigned) << 24;
    inst.instruction |= neon_logbits (size) << 20;
  
-  inst.instruction = neon_dp_fixup (inst.instruction);
+  neon_dp_fixup (&inst);
  }
  
  static void
@@ -13801,14 +14083,14 @@ neon_mac_reg_scalar_long (unsigned regtypes, unsigned scalartypes)
      {
        struct neon_type_el et = neon_check_type (3, NS_QDS,
          N_EQK | N_DBL, N_EQK, regtypes | N_KEY);
-      inst.instruction = NEON_ENC_SCALAR (inst.instruction);
+      NEON_ENCODE (SCALAR, inst);
        neon_mul_mac (et, et.type == NT_unsigned);
      }
    else
      {
        struct neon_type_el et = neon_check_type (3, NS_QDD,
          N_EQK | N_DBL, N_EQK, scalartypes | N_KEY);
-      inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+      NEON_ENCODE (INTEGER, inst);
        neon_mixed_length (et, et.size);
      }
  }
@@ -13854,9 +14136,9 @@ do_neon_vmull (void)
        struct neon_type_el et = neon_check_type (3, NS_QDD,
          N_EQK | N_DBL, N_EQK, N_SU_32 | N_P8 | N_KEY);
        if (et.type == NT_poly)
-        inst.instruction = NEON_ENC_POLY (inst.instruction);
+        NEON_ENCODE (POLY, inst);
        else
-        inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+        NEON_ENCODE (INTEGER, inst);
        /* For polynomial encoding, size field must be 0b00 and the U bit must be
           zero. Should be OK as-is.  */
        neon_mixed_length (et, et.size);
@@ -13882,7 +14164,7 @@ do_neon_ext (void)
    inst.instruction |= neon_quad (rs) << 6;
    inst.instruction |= imm << 8;
  
-  inst.instruction = neon_dp_fixup (inst.instruction);
+  neon_dp_fixup (&inst);
  }
  
  static void
@@ -13918,7 +14200,7 @@ do_neon_dup (void)
        if (vfp_or_neon_is_neon (NEON_CHECK_CC) == FAIL)
          return;
  
-      inst.instruction = NEON_ENC_SCALAR (inst.instruction);
+      NEON_ENCODE (SCALAR, inst);
        inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
        inst.instruction |= HI1 (inst.operands[0].reg) << 22;
        inst.instruction |= LOW4 (dm);
@@ -13927,7 +14209,7 @@ do_neon_dup (void)
        inst.instruction |= x << 17;
        inst.instruction |= sizebits << 16;
  
-      inst.instruction = neon_dp_fixup (inst.instruction);
+      neon_dp_fixup (&inst);
      }
    else
      {
@@ -13935,7 +14217,7 @@ do_neon_dup (void)
        struct neon_type_el et = neon_check_type (2, rs,
          N_8 | N_16 | N_32 | N_KEY, N_EQK);
        /* Duplicate ARM register to lanes of vector.  */
-      inst.instruction = NEON_ENC_ARMREG (inst.instruction);
+      NEON_ENCODE (ARMREG, inst);
        switch (et.size)
          {
          case 8:  inst.instruction |= 0x400000; break;
@@ -14032,7 +14314,7 @@ do_neon_mov (void)
          inst.instruction |= HI1 (inst.operands[1].reg) << 7;
          inst.instruction |= neon_quad (rs) << 6;
  
-        inst.instruction = neon_dp_fixup (inst.instruction);
+        neon_dp_fixup (&inst);
        }
        break;
  
@@ -14052,18 +14334,19 @@ do_neon_mov (void)
          return;
        inst.instruction = 0x0800010;
        neon_move_immediate ();
-      inst.instruction = neon_dp_fixup (inst.instruction);
+      neon_dp_fixup (&inst);
        break;
  
      case NS_SR:  /* case 4.  */
        {
          unsigned bcdebits = 0;
-        struct neon_type_el et = neon_check_type (2, NS_NULL,
-          N_8 | N_16 | N_32 | N_KEY, N_EQK);
-        int logsize = neon_logbits (et.size);
+        int logsize;
          unsigned dn = NEON_SCALAR_REG (inst.operands[0].reg);
          unsigned x = NEON_SCALAR_INDEX (inst.operands[0].reg);
  
+        et = neon_check_type (2, NS_NULL, N_8 | N_16 | N_32 | N_KEY, N_EQK);
+        logsize = neon_logbits (et.size);
+
          constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1),
                      _(BAD_FPU));
          constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_v1)
@@ -14105,13 +14388,15 @@ do_neon_mov (void)
  
      case NS_RS:  /* case 6.  */
        {
-        struct neon_type_el et = neon_check_type (2, NS_NULL,
-          N_EQK, N_S8 | N_S16 | N_U8 | N_U16 | N_32 | N_KEY);
-        unsigned logsize = neon_logbits (et.size);
+        unsigned logsize;
          unsigned dn = NEON_SCALAR_REG (inst.operands[1].reg);
          unsigned x = NEON_SCALAR_INDEX (inst.operands[1].reg);
          unsigned abcdebits = 0;
  
+       et = neon_check_type (2, NS_NULL,
+                             N_EQK, N_S8 | N_S16 | N_U8 | N_U16 | N_32 | N_KEY);
+        logsize = neon_logbits (et.size);
+
          constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1),
                      _(BAD_FPU));
          constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_v1)
@@ -14238,7 +14523,7 @@ do_neon_trn (void)
    enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
    struct neon_type_el et = neon_check_type (2, rs,
      N_EQK, N_8 | N_16 | N_32 | N_KEY);
-  inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+  NEON_ENCODE (INTEGER, inst);
    neon_two_same (neon_quad (rs), 1, et.size);
  }
  
@@ -14342,7 +14627,7 @@ do_neon_tbl_tbx (void)
    inst.instruction |= HI1 (inst.operands[2].reg) << 5;
    inst.instruction |= listlenbits << 8;
  
-  inst.instruction = neon_dp_fixup (inst.instruction);
+  neon_dp_fixup (&inst);
  }
  
  static void
@@ -14428,12 +14713,13 @@ do_neon_ld_st_interleave (void)
        {
        case 64: alignbits = 1; break;
        case 128:
-        if (NEON_REGLIST_LENGTH (inst.operands[0].imm) == 3)
+        if (NEON_REGLIST_LENGTH (inst.operands[0].imm) != 2
+           && NEON_REGLIST_LENGTH (inst.operands[0].imm) != 4)
            goto bad_alignment;
          alignbits = 2;
          break;
        case 256:
-        if (NEON_REGLIST_LENGTH (inst.operands[0].imm) == 3)
+        if (NEON_REGLIST_LENGTH (inst.operands[0].imm) != 4)
            goto bad_alignment;
          alignbits = 3;
          break;
@@ -14668,20 +14954,23 @@ do_neon_ld_dup (void)
  static void
  do_neon_ldx_stx (void)
  {
+  if (inst.operands[1].isreg)
+    constraint (inst.operands[1].reg == REG_PC, BAD_PC);
+
    switch (NEON_LANE (inst.operands[0].imm))
      {
      case NEON_INTERLEAVE_LANES:
-      inst.instruction = NEON_ENC_INTERLV (inst.instruction);
+      NEON_ENCODE (INTERLV, inst);
        do_neon_ld_st_interleave ();
        break;
  
      case NEON_ALL_LANES:
-      inst.instruction = NEON_ENC_DUP (inst.instruction);
+      NEON_ENCODE (DUP, inst);
        do_neon_ld_dup ();
        break;
  
      default:
-      inst.instruction = NEON_ENC_LANE (inst.instruction);
+      NEON_ENCODE (LANE, inst);
        do_neon_ld_st_lane ();
      }
  
@@ -15467,7 +15756,7 @@ md_assemble (char *str)
           || (thumb_mode == 1
               && !ARM_CPU_HAS_FEATURE (variant, *opcode->tvariant)))
         {
-         as_bad (_("selected processor does not support `%s'"), str);
+         as_bad (_("selected processor does not support Thumb mode `%s'"), str);
           return;
         }
        if (inst.cond != COND_ALWAYS && !unified_syntax
@@ -15492,7 +15781,7 @@ md_assemble (char *str)
                 inst.size_req = 2;
               else if (inst.size_req == 4)
                 {
-                 as_bad (_("selected processor does not support `%s'"), str);
+                 as_bad (_("selected processor does not support Thumb-2 mode `%s'"), str);
                   return;
                 }
             }
@@ -15500,7 +15789,7 @@ md_assemble (char *str)
  
        inst.instruction = opcode->tvalue;
  
-      if (!parse_operands (p, opcode->operands))
+      if (!parse_operands (p, opcode->operands, /*thumb=*/TRUE))
          {
            /* Prepare the it_insn_type for those encodings that don't set
               it.  */
@@ -15539,6 +15828,8 @@ md_assemble (char *str)
         ARM_MERGE_FEATURE_SETS (thumb_arch_used, thumb_arch_used,
                                 arm_ext_v6t2);
  
+      check_neon_suffixes;
+
        if (!inst.error)
         {
           mapping_state (MAP_THUMB);
@@ -15556,7 +15847,7 @@ md_assemble (char *str)
           && !(opcode->avariant &&
                ARM_CPU_HAS_FEATURE (cpu_variant, *opcode->avariant)))
         {
-         as_bad (_("selected processor does not support `%s'"), str);
+         as_bad (_("selected processor does not support ARM mode `%s'"), str);
           return;
         }
        if (inst.size_req)
@@ -15571,7 +15862,7 @@ md_assemble (char *str)
        else
         inst.instruction |= inst.cond << 28;
        inst.size = INSN_SIZE;
-      if (!parse_operands (p, opcode->operands))
+      if (!parse_operands (p, opcode->operands, /*thumb=*/FALSE))
          {
            it_fsm_pre_encode ();
            opcode->aencode ();
@@ -15584,6 +15875,9 @@ md_assemble (char *str)
        else
         ARM_MERGE_FEATURE_SETS (arm_arch_used, arm_arch_used,
                                 *opcode->avariant);
+
+      check_neon_suffixes;
+
        if (!inst.error)
         {
           mapping_state (MAP_ARM);
@@ -15947,7 +16241,8 @@ static struct reloc_entry reloc_names[] =
    { "tlsldm",  BFD_RELOC_ARM_TLS_LDM32}, { "TLSLDM",  BFD_RELOC_ARM_TLS_LDM32},
    { "tlsldo",  BFD_RELOC_ARM_TLS_LDO32}, { "TLSLDO",  BFD_RELOC_ARM_TLS_LDO32},
    { "gottpoff",BFD_RELOC_ARM_TLS_IE32},  { "GOTTPOFF",BFD_RELOC_ARM_TLS_IE32},
-  { "tpoff",   BFD_RELOC_ARM_TLS_LE32},  { "TPOFF",   BFD_RELOC_ARM_TLS_LE32}
+  { "tpoff",   BFD_RELOC_ARM_TLS_LE32},  { "TPOFF",   BFD_RELOC_ARM_TLS_LE32},
+  { "got_prel", BFD_RELOC_ARM_GOT_PREL}, { "GOT_PREL", BFD_RELOC_ARM_GOT_PREL}
  };
  #endif
  
@@ -15992,6 +16287,18 @@ static struct asm_barrier_opt barrier_opt_names[] =
  #define OPS5(a,b,c,d,e)          { OP_##a,OP_##b,OP_##c,OP_##d,OP_##e, }
  #define OPS6(a,b,c,d,e,f) { OP_##a,OP_##b,OP_##c,OP_##d,OP_##e,OP_##f, }
  
+/* These macros are similar to the OPSn, but do not prepend the OP_ prefix.
+   This is useful when mixing operands for ARM and THUMB, i.e. using the
+   MIX_ARM_THUMB_OPERANDS macro.
+   In order to use these macros, prefix the number of operands with _
+   e.g. _3.  */
+#define OPS_1(a)          { a, }
+#define OPS_2(a,b)        { a,b, }
+#define OPS_3(a,b,c)      { a,b,c, }
+#define OPS_4(a,b,c,d)    { a,b,c,d, }
+#define OPS_5(a,b,c,d,e)   { a,b,c,d,e, }
+#define OPS_6(a,b,c,d,e,f) { a,b,c,d,e,f, }
+
  /* These macros abstract out the exact format of the mnemonic table and
     save some repeated characters.  */
  
@@ -16169,9 +16476,6 @@ static struct asm_barrier_opt barrier_opt_names[] =
  
  #define do_0 0
  
-/* Thumb-only, unconditional.  */
-#define UT(mnem,  op, nops, ops, te) TUE (mnem,  0, op, nops, ops, 0, te)
-
  static const struct asm_opcode insns[] =
  {
  #define ARM_VARIANT &arm_ext_v1 /* Core ARM Instructions.  */
@@ -16212,9 +16516,11 @@ static const struct asm_opcode insns[] =
   tC3("mvns",   1f00000, _mvns,    2, (RR, SH),      mov,  t_mvn_tst),
  
   tCE("ldr",    4100000, _ldr,     2, (RR, ADDRGLDR),ldst, t_ldst),
- tC3("ldrb",   4500000, _ldrb,    2, (RR, ADDRGLDR),ldst, t_ldst),
- tCE("str",    4000000, _str,     2, (RR, ADDRGLDR),ldst, t_ldst),
- tC3("strb",   4400000, _strb,    2, (RR, ADDRGLDR),ldst, t_ldst),
+ tC3("ldrb",   4500000, _ldrb,    2, (RRnpc_npcsp, ADDRGLDR),ldst, t_ldst),
+ tCE("str",    4000000, _str,     _2, (MIX_ARM_THUMB_OPERANDS (OP_RR,
+                                                               OP_RRnpc),
+                                       OP_ADDRGLDR),ldst, t_ldst),
+ tC3("strb",   4400000, _strb,    2, (RRnpc_npcsp, ADDRGLDR),ldst, t_ldst),
  
   tCE("stm",    8800000, _stmia,    2, (RRw, REGLST), ldmstm, t_ldmstm),
   tC3("stmia",  8800000, _stmia,    2, (RRw, REGLST), ldmstm, t_ldmstm),
@@ -16264,10 +16570,10 @@ static const struct asm_opcode insns[] =
   TC3w("teqs",  1300000, ea900f00, 2, (RR, SH),      cmp,  t_mvn_tst),
    CL("teqp",   130f000,           2, (RR, SH),      cmp),
  
- TC3("ldrt",   4300000, f8500e00, 2, (RR, ADDR),    ldstt, t_ldstt),
- TC3("ldrbt",  4700000, f8100e00, 2, (RR, ADDR),    ldstt, t_ldstt),
- TC3("strt",   4200000, f8400e00, 2, (RR, ADDR),    ldstt, t_ldstt),
- TC3("strbt",  4600000, f8000e00, 2, (RR, ADDR),    ldstt, t_ldstt),
+ TC3("ldrt",   4300000, f8500e00, 2, (RRnpc_npcsp, ADDR),ldstt, t_ldstt),
+ TC3("ldrbt",  4700000, f8100e00, 2, (RRnpc_npcsp, ADDR),ldstt, t_ldstt),
+ TC3("strt",   4200000, f8400e00, 2, (RR_npcsp, ADDR),   ldstt, t_ldstt),
+ TC3("strbt",  4600000, f8000e00, 2, (RRnpc_npcsp, ADDR),ldstt, t_ldstt),
  
   TC3("stmdb",  9000000, e9000000, 2, (RRw, REGLST), ldmstm, t_ldmstm),
   TC3("stmfd",     9000000, e9000000, 2, (RRw, REGLST), ldmstm, t_ldmstm),
@@ -16344,12 +16650,12 @@ static const struct asm_opcode insns[] =
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT  & arm_ext_v4t
  
- tC3("ldrh",   01000b0, _ldrh,     2, (RR, ADDRGLDRS), ldstv4, t_ldst),
- tC3("strh",   00000b0, _strh,     2, (RR, ADDRGLDRS), ldstv4, t_ldst),
- tC3("ldrsh",  01000f0, _ldrsh,    2, (RR, ADDRGLDRS), ldstv4, t_ldst),
- tC3("ldrsb",  01000d0, _ldrsb,    2, (RR, ADDRGLDRS), ldstv4, t_ldst),
- tCM("ld","sh",        01000f0, _ldrsh,    2, (RR, ADDRGLDRS), ldstv4, t_ldst),
- tCM("ld","sb",        01000d0, _ldrsb,    2, (RR, ADDRGLDRS), ldstv4, t_ldst),
+ tC3("ldrh",   01000b0, _ldrh,     2, (RRnpc_npcsp, ADDRGLDRS), ldstv4, t_ldst),
+ tC3("strh",   00000b0, _strh,     2, (RRnpc_npcsp, ADDRGLDRS), ldstv4, t_ldst),
+ tC3("ldrsh",  01000f0, _ldrsh,    2, (RRnpc_npcsp, ADDRGLDRS), ldstv4, t_ldst),
+ tC3("ldrsb",  01000d0, _ldrsb,    2, (RRnpc_npcsp, ADDRGLDRS), ldstv4, t_ldst),
+ tCM("ld","sh",        01000f0, _ldrsh,    2, (RRnpc_npcsp, ADDRGLDRS), ldstv4, t_ldst),
+ tCM("ld","sb",        01000d0, _ldrsb,    2, (RRnpc_npcsp, ADDRGLDRS), ldstv4, t_ldst),
  
  #undef  ARM_VARIANT
  #define ARM_VARIANT  & arm_ext_v4t_5
@@ -16383,6 +16689,8 @@ static const struct asm_opcode insns[] =
  
  #undef  ARM_VARIANT
  #define ARM_VARIANT  & arm_ext_v5exp /*  ARM Architecture 5TExP.  */
+#undef THUMB_VARIANT
+#define THUMB_VARIANT &arm_ext_v5exp
  
   TCE("smlabb", 1000080, fb100000, 4, (RRnpc, RRnpc, RRnpc, RRnpc),   smla, t_mla),
   TCE("smlatb", 10000a0, fb100020, 4, (RRnpc, RRnpc, RRnpc, RRnpc),   smla, t_mla),
@@ -16405,17 +16713,21 @@ static const struct asm_opcode insns[] =
   TCE("smulwb", 12000a0, fb30f000, 3, (RRnpc, RRnpc, RRnpc),        smul, t_simd),
   TCE("smulwt", 12000e0, fb30f010, 3, (RRnpc, RRnpc, RRnpc),        smul, t_simd),
  
- TCE("qadd",   1000050, fa80f080, 3, (RRnpc, RRnpc, RRnpc),        rd_rm_rn, t_simd),
- TCE("qdadd",  1400050, fa80f090, 3, (RRnpc, RRnpc, RRnpc),        rd_rm_rn, t_simd),
- TCE("qsub",   1200050, fa80f0a0, 3, (RRnpc, RRnpc, RRnpc),        rd_rm_rn, t_simd),
- TCE("qdsub",  1600050, fa80f0b0, 3, (RRnpc, RRnpc, RRnpc),        rd_rm_rn, t_simd),
+ TCE("qadd",   1000050, fa80f080, 3, (RRnpc, RRnpc, RRnpc),        rd_rm_rn, t_simd2),
+ TCE("qdadd",  1400050, fa80f090, 3, (RRnpc, RRnpc, RRnpc),        rd_rm_rn, t_simd2),
+ TCE("qsub",   1200050, fa80f0a0, 3, (RRnpc, RRnpc, RRnpc),        rd_rm_rn, t_simd2),
+ TCE("qdsub",  1600050, fa80f0b0, 3, (RRnpc, RRnpc, RRnpc),        rd_rm_rn, t_simd2),
  
  #undef  ARM_VARIANT
  #define ARM_VARIANT  & arm_ext_v5e /*  ARM Architecture 5TE.  */
+#undef THUMB_VARIANT
+#define THUMB_VARIANT &arm_ext_v6t2
  
   TUF("pld",    450f000, f810f000, 1, (ADDR),                pld,  t_pld),
- TC3("ldrd",   00000d0, e8500000, 3, (RRnpc, oRRnpc, ADDRGLDRS), ldrd, t_ldstd),
- TC3("strd",   00000f0, e8400000, 3, (RRnpc, oRRnpc, ADDRGLDRS), ldrd, t_ldstd),
+ TC3("ldrd",   00000d0, e8500000, 3, (RRnpc_npcsp, oRRnpc_npcsp, ADDRGLDRS),
+     ldrd, t_ldstd),
+ TC3("strd",   00000f0, e8400000, 3, (RRnpc_npcsp, oRRnpc_npcsp,
+                                      ADDRGLDRS), ldrd, t_ldstd),
  
   TCE("mcrr",   c400000, ec400000, 5, (RCP, I15b, RRnpc, RRnpc, RCN), co_reg2c, co_reg2c),
   TCE("mrrc",   c500000, ec500000, 5, (RCP, I15b, RRnpc, RRnpc, RCN), co_reg2c, co_reg2c),
@@ -16444,18 +16756,34 @@ static const struct asm_opcode insns[] =
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT  & arm_ext_v6t2
  
- TCE("ldrex",  1900f9f, e8500f00, 2, (RRnpc, ADDR),              ldrex, t_ldrex),
- TCE("strex",  1800f90, e8400000, 3, (RRnpc, RRnpc, ADDR),        strex,  t_strex),
+ TCE("ldrex",  1900f9f, e8500f00, 2, (RRnpc_npcsp, ADDR),        ldrex, t_ldrex),
+ TCE("strex",  1800f90, e8400000, 3, (RRnpc_npcsp, RRnpc_npcsp, ADDR),
+                                     strex,  t_strex),
   TUF("mcrr2",  c400000, fc400000, 5, (RCP, I15b, RRnpc, RRnpc, RCN), co_reg2c, co_reg2c),
   TUF("mrrc2",  c500000, fc500000, 5, (RCP, I15b, RRnpc, RRnpc, RCN), co_reg2c, co_reg2c),
  
   TCE("ssat",   6a00010, f3000000, 4, (RRnpc, I32, RRnpc, oSHllar),ssat,   t_ssat),
   TCE("usat",   6e00010, f3800000, 4, (RRnpc, I31, RRnpc, oSHllar),usat,   t_usat),
  
-/*  ARM V6 not included in V7M (eg. integer SIMD).  */
+/*  ARM V6 not included in V7M.  */
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT  & arm_ext_v6_notm
+ TUF("rfeia",  8900a00, e990c000, 1, (RRw),                       rfe, rfe),
+  UF(rfeib,    9900a00,           1, (RRw),                       rfe),
+  UF(rfeda,    8100a00,           1, (RRw),                       rfe),
+ TUF("rfedb",  9100a00, e810c000, 1, (RRw),                       rfe, rfe),
+ TUF("rfefd",  8900a00, e990c000, 1, (RRw),                       rfe, rfe),
+  UF(rfefa,    9900a00,           1, (RRw),                       rfe),
+  UF(rfeea,    8100a00,           1, (RRw),                       rfe),
+ TUF("rfeed",  9100a00, e810c000, 1, (RRw),                       rfe, rfe),
+ TUF("srsia",  8c00500, e980c000, 2, (oRRw, I31w),                srs,  srs),
+  UF(srsib,    9c00500,           2, (oRRw, I31w),                srs),
+  UF(srsda,    8400500,           2, (oRRw, I31w),                srs),
+ TUF("srsdb",  9400500, e800c000, 2, (oRRw, I31w),                srs,  srs),
  
+/*  ARM V6 not included in V7M (eg. integer SIMD).  */
+#undef  THUMB_VARIANT
+#define THUMB_VARIANT  & arm_ext_v6_dsp
   TUF("cps",    1020000, f3af8100, 1, (I31b),                     imm0, t_cps),
   TCE("pkhbt",  6800010, eac00000, 4, (RRnpc, RRnpc, RRnpc, oSHll),   pkhbt, t_pkhbt),
   TCE("pkhtb",  6800050, eac00020, 4, (RRnpc, RRnpc, RRnpc, oSHar),   pkhtb, t_pkhtb),
@@ -16519,14 +16847,6 @@ static const struct asm_opcode insns[] =
   /* Old name for USAX.  */
   TCE("usubaddx",       6500f50, fae0f040, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("usub8",  6500ff0, fac0f040, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
- TUF("rfeia",  8900a00, e990c000, 1, (RRw),                       rfe, rfe),
-  UF(rfeib,    9900a00,           1, (RRw),                       rfe),
-  UF(rfeda,    8100a00,           1, (RRw),                       rfe),
- TUF("rfedb",  9100a00, e810c000, 1, (RRw),                       rfe, rfe),
- TUF("rfefd",  8900a00, e990c000, 1, (RRw),                       rfe, rfe),
-  UF(rfefa,    9900a00,           1, (RRw),                       rfe),
-  UF(rfeea,    8100a00,           1, (RRw),                       rfe),
- TUF("rfeed",  9100a00, e810c000, 1, (RRw),                       rfe, rfe),
   TCE("sxtah",  6b00070, fa00f080, 4, (RRnpc, RRnpc, RRnpc, oROR), sxtah, t_sxtah),
   TCE("sxtab16",        6800070, fa20f080, 4, (RRnpc, RRnpc, RRnpc, oROR), sxtah, t_sxtah),
   TCE("sxtab",  6a00070, fa40f080, 4, (RRnpc, RRnpc, RRnpc, oROR), sxtah, t_sxtah),
@@ -16554,10 +16874,6 @@ static const struct asm_opcode insns[] =
   TCE("smuadx", 700f030, fb20f010, 3, (RRnpc, RRnpc, RRnpc),       smul, t_simd),
   TCE("smusd",  700f050, fb40f000, 3, (RRnpc, RRnpc, RRnpc),       smul, t_simd),
   TCE("smusdx", 700f070, fb40f010, 3, (RRnpc, RRnpc, RRnpc),       smul, t_simd),
- TUF("srsia",  8c00500, e980c000, 2, (oRRw, I31w),                srs,  srs),
-  UF(srsib,    9c00500,           2, (oRRw, I31w),                srs),
-  UF(srsda,    8400500,           2, (oRRw, I31w),                srs),
- TUF("srsdb",  9400500, e800c000, 2, (oRRw, I31w),                srs,  srs),
   TCE("ssat16", 6a00f30, f3200000, 3, (RRnpc, I16, RRnpc),         ssat16, t_ssat16),
   TCE("umaal",  0400090, fbe00060, 4, (RRnpc, RRnpc, RRnpc, RRnpc),smlal,  t_mlal),
   TCE("usad8",  780f010, fb70f000, 3, (RRnpc, RRnpc, RRnpc),       smul,   t_simd),
@@ -16576,17 +16892,21 @@ static const struct asm_opcode insns[] =
  
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT  & arm_ext_v6_notm
-
- TCE("ldrexd", 1b00f9f, e8d0007f, 3, (RRnpc, oRRnpc, RRnpcb),        ldrexd, t_ldrexd),
- TCE("strexd", 1a00f90, e8c00070, 4, (RRnpc, RRnpc, oRRnpc, RRnpcb), strexd, t_strexd),
+ TCE("ldrexd", 1b00f9f, e8d0007f, 3, (RRnpc_npcsp, oRRnpc_npcsp, RRnpcb),
+                                     ldrexd, t_ldrexd),
+ TCE("strexd", 1a00f90, e8c00070, 4, (RRnpc_npcsp, RRnpc_npcsp, oRRnpc_npcsp,
+                                      RRnpcb), strexd, t_strexd),
  
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT  & arm_ext_v6t2
-
- TCE("ldrexb", 1d00f9f, e8d00f4f, 2, (RRnpc, RRnpcb),                rd_rn,  rd_rn),
- TCE("ldrexh", 1f00f9f, e8d00f5f, 2, (RRnpc, RRnpcb),                rd_rn,  rd_rn),
- TCE("strexb", 1c00f90, e8c00f40, 3, (RRnpc, RRnpc, ADDR),           strex,  rm_rd_rn),
- TCE("strexh", 1e00f90, e8c00f50, 3, (RRnpc, RRnpc, ADDR),           strex,  rm_rd_rn),
+ TCE("ldrexb", 1d00f9f, e8d00f4f, 2, (RRnpc_npcsp,RRnpcb),
+     rd_rn,  rd_rn),
+ TCE("ldrexh", 1f00f9f, e8d00f5f, 2, (RRnpc_npcsp, RRnpcb),
+     rd_rn,  rd_rn),
+ TCE("strexb", 1c00f90, e8c00f40, 3, (RRnpc_npcsp, RRnpc_npcsp, ADDR),
+     strex, rm_rd_rn),
+ TCE("strexh", 1e00f90, e8c00f50, 3, (RRnpc_npcsp, RRnpc_npcsp, ADDR),
+     strex, rm_rd_rn), 
   TUF("clrex",  57ff01f, f3bf8f2f, 0, (),                             noargs, noargs),
  
  #undef  ARM_VARIANT
@@ -16607,13 +16927,16 @@ static const struct asm_opcode insns[] =
   TCE("movt",   3400000, f2c00000, 2, (RRnpc, HALF),                mov16, t_mov16),
   TCE("rbit",   6ff0f30, fa90f0a0, 2, (RR, RR),                     rd_rm, t_rbit),
  
- TC3("ldrht",  03000b0, f8300e00, 2, (RR, ADDR), ldsttv4, t_ldstt),
- TC3("ldrsht", 03000f0, f9300e00, 2, (RR, ADDR), ldsttv4, t_ldstt),
- TC3("ldrsbt", 03000d0, f9100e00, 2, (RR, ADDR), ldsttv4, t_ldstt),
- TC3("strht",  02000b0, f8200e00, 2, (RR, ADDR), ldsttv4, t_ldstt),
+ TC3("ldrht",  03000b0, f8300e00, 2, (RRnpc_npcsp, ADDR), ldsttv4, t_ldstt),
+ TC3("ldrsht", 03000f0, f9300e00, 2, (RRnpc_npcsp, ADDR), ldsttv4, t_ldstt),
+ TC3("ldrsbt", 03000d0, f9100e00, 2, (RRnpc_npcsp, ADDR), ldsttv4, t_ldstt),
+ TC3("strht",  02000b0, f8200e00, 2, (RRnpc_npcsp, ADDR), ldsttv4, t_ldstt),
  
-  UT("cbnz",      b900,    2, (RR, EXP), t_cbz),
-  UT("cbz",       b100,    2, (RR, EXP), t_cbz),
+ /* Thumb-only instructions.  */
+#undef ARM_VARIANT
+#define ARM_VARIANT NULL
+  TUE("cbnz",     0,           b900,     2, (RR, EXP), 0, t_cbz),
+  TUE("cbz",      0,           b100,     2, (RR, EXP), 0, t_cbz),
  
   /* ARM does not really have an IT instruction, so always allow it.
      The opcode is copied from Thumb in order to allow warnings in
@@ -17128,6 +17451,8 @@ static const struct asm_opcode insns[] =
   cCE("fmrs",   e100a10, 2, (RR, RVS),        vfp_reg_from_sp),
   cCE("fmsr",   e000a10, 2, (RVS, RR),        vfp_sp_from_reg),
   cCE("fmstat", ef1fa10, 0, (),               noargs),
+ cCE("vmrs",   ef10a10, 2, (APSR_RR, RVC),   vmrs),
+ cCE("vmsr",   ee10a10, 2, (RVC, RR),        vmsr),
   cCE("fsitos", eb80ac0, 2, (RVS, RVS),       vfp_sp_monadic),
   cCE("fuitos", eb80a40, 2, (RVS, RVS),       vfp_sp_monadic),
   cCE("ftosis", ebd0a40, 2, (RVS, RVS),       vfp_sp_monadic),
@@ -17140,22 +17465,22 @@ static const struct asm_opcode insns[] =
    /* Memory operations.         */
   cCE("flds",   d100a00, 2, (RVS, ADDRGLDC),  vfp_sp_ldst),
   cCE("fsts",   d000a00, 2, (RVS, ADDRGLDC),  vfp_sp_ldst),
- cCE("fldmias",        c900a00, 2, (RRw, VRSLST),    vfp_sp_ldstmia),
- cCE("fldmfds",        c900a00, 2, (RRw, VRSLST),    vfp_sp_ldstmia),
- cCE("fldmdbs",        d300a00, 2, (RRw, VRSLST),    vfp_sp_ldstmdb),
- cCE("fldmeas",        d300a00, 2, (RRw, VRSLST),    vfp_sp_ldstmdb),
- cCE("fldmiax",        c900b00, 2, (RRw, VRDLST),    vfp_xp_ldstmia),
- cCE("fldmfdx",        c900b00, 2, (RRw, VRDLST),    vfp_xp_ldstmia),
- cCE("fldmdbx",        d300b00, 2, (RRw, VRDLST),    vfp_xp_ldstmdb),
- cCE("fldmeax",        d300b00, 2, (RRw, VRDLST),    vfp_xp_ldstmdb),
- cCE("fstmias",        c800a00, 2, (RRw, VRSLST),    vfp_sp_ldstmia),
- cCE("fstmeas",        c800a00, 2, (RRw, VRSLST),    vfp_sp_ldstmia),
- cCE("fstmdbs",        d200a00, 2, (RRw, VRSLST),    vfp_sp_ldstmdb),
- cCE("fstmfds",        d200a00, 2, (RRw, VRSLST),    vfp_sp_ldstmdb),
- cCE("fstmiax",        c800b00, 2, (RRw, VRDLST),    vfp_xp_ldstmia),
- cCE("fstmeax",        c800b00, 2, (RRw, VRDLST),    vfp_xp_ldstmia),
- cCE("fstmdbx",        d200b00, 2, (RRw, VRDLST),    vfp_xp_ldstmdb),
- cCE("fstmfdx",        d200b00, 2, (RRw, VRDLST),    vfp_xp_ldstmdb),
+ cCE("fldmias",        c900a00, 2, (RRnpctw, VRSLST),    vfp_sp_ldstmia),
+ cCE("fldmfds",        c900a00, 2, (RRnpctw, VRSLST),    vfp_sp_ldstmia),
+ cCE("fldmdbs",        d300a00, 2, (RRnpctw, VRSLST),    vfp_sp_ldstmdb),
+ cCE("fldmeas",        d300a00, 2, (RRnpctw, VRSLST),    vfp_sp_ldstmdb),
+ cCE("fldmiax",        c900b00, 2, (RRnpctw, VRDLST),    vfp_xp_ldstmia),
+ cCE("fldmfdx",        c900b00, 2, (RRnpctw, VRDLST),    vfp_xp_ldstmia),
+ cCE("fldmdbx",        d300b00, 2, (RRnpctw, VRDLST),    vfp_xp_ldstmdb),
+ cCE("fldmeax",        d300b00, 2, (RRnpctw, VRDLST),    vfp_xp_ldstmdb),
+ cCE("fstmias",        c800a00, 2, (RRnpctw, VRSLST),    vfp_sp_ldstmia),
+ cCE("fstmeas",        c800a00, 2, (RRnpctw, VRSLST),    vfp_sp_ldstmia),
+ cCE("fstmdbs",        d200a00, 2, (RRnpctw, VRSLST),    vfp_sp_ldstmdb),
+ cCE("fstmfds",        d200a00, 2, (RRnpctw, VRSLST),    vfp_sp_ldstmdb),
+ cCE("fstmiax",        c800b00, 2, (RRnpctw, VRDLST),    vfp_xp_ldstmia),
+ cCE("fstmeax",        c800b00, 2, (RRnpctw, VRDLST),    vfp_xp_ldstmia),
+ cCE("fstmdbx",        d200b00, 2, (RRnpctw, VRDLST),    vfp_xp_ldstmdb),
+ cCE("fstmfdx",        d200b00, 2, (RRnpctw, VRDLST),    vfp_xp_ldstmdb),
  
    /* Monadic operations.  */
   cCE("fabss",  eb00ac0, 2, (RVS, RVS),       vfp_sp_monadic),
@@ -17179,6 +17504,19 @@ static const struct asm_opcode insns[] =
   cCE("fcmpes", eb40ac0, 2, (RVS, RVS),       vfp_sp_monadic),
   cCE("fcmpezs",        eb50ac0, 1, (RVS),            vfp_sp_compare_z),
  
+ /* Double precision load/store are still present on single precision
+    implementations.  */
+ cCE("fldd",   d100b00, 2, (RVD, ADDRGLDC),  vfp_dp_ldst),
+ cCE("fstd",   d000b00, 2, (RVD, ADDRGLDC),  vfp_dp_ldst),
+ cCE("fldmiad",        c900b00, 2, (RRnpctw, VRDLST),    vfp_dp_ldstmia),
+ cCE("fldmfdd",        c900b00, 2, (RRnpctw, VRDLST),    vfp_dp_ldstmia),
+ cCE("fldmdbd",        d300b00, 2, (RRnpctw, VRDLST),    vfp_dp_ldstmdb),
+ cCE("fldmead",        d300b00, 2, (RRnpctw, VRDLST),    vfp_dp_ldstmdb),
+ cCE("fstmiad",        c800b00, 2, (RRnpctw, VRDLST),    vfp_dp_ldstmia),
+ cCE("fstmead",        c800b00, 2, (RRnpctw, VRDLST),    vfp_dp_ldstmia),
+ cCE("fstmdbd",        d200b00, 2, (RRnpctw, VRDLST),    vfp_dp_ldstmdb),
+ cCE("fstmfdd",        d200b00, 2, (RRnpctw, VRDLST),    vfp_dp_ldstmdb),
+
  #undef  ARM_VARIANT
  #define ARM_VARIANT  & fpu_vfp_ext_v1 /* VFP V1 (Double precision).  */
  
@@ -17197,18 +17535,6 @@ static const struct asm_opcode insns[] =
   cCE("ftouid", ebc0b40, 2, (RVS, RVD),       vfp_sp_dp_cvt),
   cCE("ftouizd",        ebc0bc0, 2, (RVS, RVD),       vfp_sp_dp_cvt),
  
-  /* Memory operations.         */
- cCE("fldd",   d100b00, 2, (RVD, ADDRGLDC),  vfp_dp_ldst),
- cCE("fstd",   d000b00, 2, (RVD, ADDRGLDC),  vfp_dp_ldst),
- cCE("fldmiad",        c900b00, 2, (RRw, VRDLST),    vfp_dp_ldstmia),
- cCE("fldmfdd",        c900b00, 2, (RRw, VRDLST),    vfp_dp_ldstmia),
- cCE("fldmdbd",        d300b00, 2, (RRw, VRDLST),    vfp_dp_ldstmdb),
- cCE("fldmead",        d300b00, 2, (RRw, VRDLST),    vfp_dp_ldstmdb),
- cCE("fstmiad",        c800b00, 2, (RRw, VRDLST),    vfp_dp_ldstmia),
- cCE("fstmead",        c800b00, 2, (RRw, VRDLST),    vfp_dp_ldstmia),
- cCE("fstmdbd",        d200b00, 2, (RRw, VRDLST),    vfp_dp_ldstmdb),
- cCE("fstmfdd",        d200b00, 2, (RRw, VRDLST),    vfp_dp_ldstmdb),
-
    /* Monadic operations.  */
   cCE("fabsd",  eb00bc0, 2, (RVD, RVD),       vfp_dp_rd_rm),
   cCE("fnegd",  eb10b40, 2, (RVD, RVD),       vfp_dp_rd_rm),
@@ -17269,16 +17595,17 @@ static const struct asm_opcode insns[] =
   NCEF(vabs,     1b10300, 2, (RNSDQ, RNSDQ), neon_abs_neg),
   NCEF(vneg,     1b10380, 2, (RNSDQ, RNSDQ), neon_abs_neg),
  
- NCE(vldm,      c900b00, 2, (RRw, VRSDLST), neon_ldm_stm),
- NCE(vldmia,    c900b00, 2, (RRw, VRSDLST), neon_ldm_stm),
- NCE(vldmdb,    d100b00, 2, (RRw, VRSDLST), neon_ldm_stm),
- NCE(vstm,      c800b00, 2, (RRw, VRSDLST), neon_ldm_stm),
- NCE(vstmia,    c800b00, 2, (RRw, VRSDLST), neon_ldm_stm),
- NCE(vstmdb,    d000b00, 2, (RRw, VRSDLST), neon_ldm_stm),
+ NCE(vldm,      c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
+ NCE(vldmia,    c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
+ NCE(vldmdb,    d100b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
+ NCE(vstm,      c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
+ NCE(vstmia,    c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
+ NCE(vstmdb,    d000b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
   NCE(vldr,      d100b00, 2, (RVSD, ADDRGLDC), neon_ldr_str),
   NCE(vstr,      d000b00, 2, (RVSD, ADDRGLDC), neon_ldr_str),
  
- nCEF(vcvt,     _vcvt,    3, (RNSDQ, RNSDQ, oI32b), neon_cvt),
+ nCEF(vcvt,     _vcvt,   3, (RNSDQ, RNSDQ, oI32b), neon_cvt),
+ nCEF(vcvtr,    _vcvt,   2, (RNSDQ, RNSDQ), neon_cvtr),
   nCEF(vcvtb,   _vcvt,   2, (RVS, RVS), neon_cvtb),
   nCEF(vcvtt,   _vcvt,   2, (RVS, RVS), neon_cvtt),
  
@@ -17319,16 +17646,16 @@ static const struct asm_opcode insns[] =
   nUF(vqshl,     _vqshl,   3, (RNDQ, oRNDQ, RNDQ_I63b), neon_qshl_imm),
   nUF(vqshlq,    _vqshl,   3, (RNQ,  oRNQ,  RNDQ_I63b), neon_qshl_imm),
    /* Logic ops, types optional & ignored.  */
- nUF(vand,      _vand,    2, (RNDQ, NILO),        neon_logic),
- nUF(vandq,     _vand,    2, (RNQ,  NILO),        neon_logic),
- nUF(vbic,      _vbic,    2, (RNDQ, NILO),        neon_logic),
- nUF(vbicq,     _vbic,    2, (RNQ,  NILO),        neon_logic),
- nUF(vorr,      _vorr,    2, (RNDQ, NILO),        neon_logic),
- nUF(vorrq,     _vorr,    2, (RNQ,  NILO),        neon_logic),
- nUF(vorn,      _vorn,    2, (RNDQ, NILO),        neon_logic),
- nUF(vornq,     _vorn,    2, (RNQ,  NILO),        neon_logic),
- nUF(veor,      _veor,    3, (RNDQ, oRNDQ, RNDQ), neon_logic),
- nUF(veorq,     _veor,    3, (RNQ,  oRNQ,  RNQ),  neon_logic),
+ nUF(vand,      _vand,    3, (RNDQ, oRNDQ, RNDQ_Ibig), neon_logic),
+ nUF(vandq,     _vand,    3, (RNQ,  oRNQ,  RNDQ_Ibig), neon_logic),
+ nUF(vbic,      _vbic,    3, (RNDQ, oRNDQ, RNDQ_Ibig), neon_logic),
+ nUF(vbicq,     _vbic,    3, (RNQ,  oRNQ,  RNDQ_Ibig), neon_logic),
+ nUF(vorr,      _vorr,    3, (RNDQ, oRNDQ, RNDQ_Ibig), neon_logic),
+ nUF(vorrq,     _vorr,    3, (RNQ,  oRNQ,  RNDQ_Ibig), neon_logic),
+ nUF(vorn,      _vorn,    3, (RNDQ, oRNDQ, RNDQ_Ibig), neon_logic),
+ nUF(vornq,     _vorn,    3, (RNQ,  oRNQ,  RNDQ_Ibig), neon_logic),
+ nUF(veor,      _veor,    3, (RNDQ, oRNDQ, RNDQ),      neon_logic),
+ nUF(veorq,     _veor,    3, (RNQ,  oRNQ,  RNQ),       neon_logic),
    /* Bitfield ops, untyped.  */
   NUF(vbsl,      1100110, 3, (RNDQ, RNDQ, RNDQ), neon_bitfield),
   NUF(vbslq,     1100110, 3, (RNQ,  RNQ,  RNQ),  neon_bitfield),
@@ -17427,8 +17754,8 @@ static const struct asm_opcode insns[] =
    /* CVT with optional immediate for fixed-point variant.  */
   nUF(vcvtq,     _vcvt,    3, (RNQ, RNQ, oI32b), neon_cvt),
  
- nUF(vmvn,      _vmvn,    2, (RNDQ, RNDQ_IMVNb), neon_mvn),
- nUF(vmvnq,     _vmvn,    2, (RNQ,  RNDQ_IMVNb), neon_mvn),
+ nUF(vmvn,      _vmvn,    2, (RNDQ, RNDQ_Ibig), neon_mvn),
+ nUF(vmvnq,     _vmvn,    2, (RNQ,  RNDQ_Ibig), neon_mvn),
  
    /* Data processing, three registers of different lengths.  */
    /* Dyadic, long insns. Types S8 S16 S32 U8 U16 U32.  */
@@ -17535,29 +17862,52 @@ static const struct asm_opcode insns[] =
   nUF(vst4,      _vst4,    2, (NSTRLST, ADDR),  neon_ldx_stx),
  
  #undef  THUMB_VARIANT
+#define THUMB_VARIANT &fpu_vfp_ext_v3xd
+#undef ARM_VARIANT
+#define ARM_VARIANT &fpu_vfp_ext_v3xd
+ cCE("fconsts",   eb00a00, 2, (RVS, I255),      vfp_sp_const),
+ cCE("fshtos",    eba0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
+ cCE("fsltos",    eba0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
+ cCE("fuhtos",    ebb0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
+ cCE("fultos",    ebb0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
+ cCE("ftoshs",    ebe0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
+ cCE("ftosls",    ebe0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
+ cCE("ftouhs",    ebf0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
+ cCE("ftouls",    ebf0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
+
+#undef THUMB_VARIANT
  #define THUMB_VARIANT  & fpu_vfp_ext_v3
  #undef  ARM_VARIANT
  #define ARM_VARIANT    & fpu_vfp_ext_v3
  
- cCE("fconsts",   eb00a00, 2, (RVS, I255),      vfp_sp_const),
   cCE("fconstd",   eb00b00, 2, (RVD, I255),      vfp_dp_const),
- cCE("fshtos",    eba0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
   cCE("fshtod",    eba0b40, 2, (RVD, I16z),      vfp_dp_conv_16),
- cCE("fsltos",    eba0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
   cCE("fsltod",    eba0bc0, 2, (RVD, I32),       vfp_dp_conv_32),
- cCE("fuhtos",    ebb0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
   cCE("fuhtod",    ebb0b40, 2, (RVD, I16z),      vfp_dp_conv_16),
- cCE("fultos",    ebb0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
   cCE("fultod",    ebb0bc0, 2, (RVD, I32),       vfp_dp_conv_32),
- cCE("ftoshs",    ebe0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
   cCE("ftoshd",    ebe0b40, 2, (RVD, I16z),      vfp_dp_conv_16),
- cCE("ftosls",    ebe0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
   cCE("ftosld",    ebe0bc0, 2, (RVD, I32),       vfp_dp_conv_32),
- cCE("ftouhs",    ebf0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
   cCE("ftouhd",    ebf0b40, 2, (RVD, I16z),      vfp_dp_conv_16),
- cCE("ftouls",    ebf0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
   cCE("ftould",    ebf0bc0, 2, (RVD, I32),       vfp_dp_conv_32),
  
+#undef ARM_VARIANT
+#define ARM_VARIANT &fpu_vfp_ext_fma
+#undef THUMB_VARIANT
+#define THUMB_VARIANT &fpu_vfp_ext_fma
+ /* Mnemonics shared by Neon and VFP.  These are included in the
+    VFP FMA variant; NEON and VFP FMA always includes the NEON
+    FMA instructions.  */
+ nCEF(vfma,     _vfma,    3, (RNSDQ, oRNSDQ, RNSDQ), neon_fmac),
+ nCEF(vfms,     _vfms,    3, (RNSDQ, oRNSDQ, RNSDQ), neon_fmac),
+ /* ffmas/ffmad/ffmss/ffmsd are dummy mnemonics to satisfy gas;
+    the v form should always be used.  */
+ cCE("ffmas",  ea00a00, 3, (RVS, RVS, RVS),  vfp_sp_dyadic),
+ cCE("ffnmas", ea00a40, 3, (RVS, RVS, RVS),  vfp_sp_dyadic),
+ cCE("ffmad",  ea00b00, 3, (RVD, RVD, RVD),  vfp_dp_rd_rn_rm),
+ cCE("ffnmad", ea00b40, 3, (RVD, RVD, RVD),  vfp_dp_rd_rn_rm),
+ nCE(vfnma,     _vfnma,   3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
+ nCE(vfnms,     _vfnms,   3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
+
  #undef THUMB_VARIANT
  #undef  ARM_VARIANT
  #define ARM_VARIANT  & arm_cext_xscale /* Intel XScale extensions.  */
@@ -18215,8 +18565,10 @@ relax_adr (fragS *fragp, asection *sec, long stretch)
    offsetT val;
  
    /* Assume worst case for symbols not known to be in the same section.  */
-  if (!S_IS_DEFINED (fragp->fr_symbol)
-      || sec != S_GET_SEGMENT (fragp->fr_symbol))
+  if (fragp->fr_symbol == NULL
+      || !S_IS_DEFINED (fragp->fr_symbol)
+      || sec != S_GET_SEGMENT (fragp->fr_symbol)
+      || S_IS_WEAK (fragp->fr_symbol))
      return 4;
  
    val = relaxed_symbol_addr (fragp, stretch);
@@ -18259,7 +18611,8 @@ relax_branch (fragS *fragp, asection *sec, int bits, long stretch)
  
    /* Assume worst case for symbols not known to be in the same section.  */
    if (!S_IS_DEFINED (fragp->fr_symbol)
-      || sec != S_GET_SEGMENT (fragp->fr_symbol))
+      || sec != S_GET_SEGMENT (fragp->fr_symbol)
+      || S_IS_WEAK (fragp->fr_symbol))
      return 4;
  
  #ifdef OBJ_ELF
@@ -18435,9 +18788,7 @@ arm_handle_align (fragS * fragP)
    if (bytes > MAX_MEM_FOR_RS_ALIGN_CODE)
      bytes &= MAX_MEM_FOR_RS_ALIGN_CODE;
  
-#ifdef OBJ_ELF
    gas_assert ((fragP->tc_frag_data.thumb_mode & MODE_RECORDED) != 0);
-#endif
  
    if (fragP->tc_frag_data.thumb_mode & (~ MODE_RECORDED))
      {
@@ -18543,7 +18894,7 @@ void
  arm_init_frag (fragS * fragP, int max_chars ATTRIBUTE_UNUSED)
  {
    /* Record whether this frag is in an ARM or a THUMB area.  */
-  fragP->tc_frag_data.thumb_mode = thumb_mode;
+  fragP->tc_frag_data.thumb_mode = thumb_mode | MODE_RECORDED;
  }
  
  #else /* OBJ_ELF is defined.  */
@@ -18987,12 +19338,12 @@ tc_arm_regname_to_dw2regnum (char *regname)
  void
  tc_pe_dwarf2_emit_offset (symbolS *symbol, unsigned int size)
  {
-  expressionS expr;
+  expressionS exp;
  
-  expr.X_op = O_secrel;
-  expr.X_add_symbol = symbol;
-  expr.X_add_number = 0;
-  emit_expr (&expr, size);
+  exp.X_op = O_secrel;
+  exp.X_add_symbol = symbol;
+  exp.X_add_number = 0;
+  emit_expr (&exp, size);
  }
  #endif
  
@@ -19049,7 +19400,9 @@ md_pcrel_from_section (fixS * fixP, segT seg)
        return base + 4;
  
      case BFD_RELOC_THUMB_PCREL_BRANCH23:
-       if (fixP->fx_addsy
+      if (fixP->fx_addsy
+         && (S_GET_SEGMENT (fixP->fx_addsy) == seg)
+         && (!S_IS_EXTERNAL (fixP->fx_addsy))
           && ARM_IS_FUNC (fixP->fx_addsy)
           && ARM_CPU_HAS_FEATURE (selected_cpu, arm_ext_v5t))
         base = fixP->fx_where + fixP->fx_frag->fr_address;
@@ -19057,8 +19410,10 @@ md_pcrel_from_section (fixS * fixP, segT seg)
  
        /* BLX is like branches above, but forces the low two bits of PC to
          zero.  */
-     case BFD_RELOC_THUMB_PCREL_BLX:
-       if (fixP->fx_addsy
+    case BFD_RELOC_THUMB_PCREL_BLX:
+      if (fixP->fx_addsy
+         && (S_GET_SEGMENT (fixP->fx_addsy) == seg)
+         && (!S_IS_EXTERNAL (fixP->fx_addsy))
           && THUMB_IS_FUNC (fixP->fx_addsy)
           && ARM_CPU_HAS_FEATURE (selected_cpu, arm_ext_v5t))
         base = fixP->fx_where + fixP->fx_frag->fr_address;
@@ -19067,18 +19422,22 @@ md_pcrel_from_section (fixS * fixP, segT seg)
        /* ARM mode branches are offset by +8.  However, the Windows CE
          loader expects the relocation not to take this into account.  */
      case BFD_RELOC_ARM_PCREL_BLX:
-       if (fixP->fx_addsy
+      if (fixP->fx_addsy
+         && (S_GET_SEGMENT (fixP->fx_addsy) == seg)
+         && (!S_IS_EXTERNAL (fixP->fx_addsy))
           && ARM_IS_FUNC (fixP->fx_addsy)
           && ARM_CPU_HAS_FEATURE (selected_cpu, arm_ext_v5t))
         base = fixP->fx_where + fixP->fx_frag->fr_address;
-       return base + 8;
+      return base + 8;
  
-      case BFD_RELOC_ARM_PCREL_CALL:
-       if (fixP->fx_addsy
+    case BFD_RELOC_ARM_PCREL_CALL:
+      if (fixP->fx_addsy
+         && (S_GET_SEGMENT (fixP->fx_addsy) == seg)
+         && (!S_IS_EXTERNAL (fixP->fx_addsy))
           && THUMB_IS_FUNC (fixP->fx_addsy)
           && ARM_CPU_HAS_FEATURE (selected_cpu, arm_ext_v5t))
         base = fixP->fx_where + fixP->fx_frag->fr_address;
-       return base + 8;
+      return base + 8;
  
      case BFD_RELOC_ARM_PCREL_BRANCH:
      case BFD_RELOC_ARM_PCREL_JUMP:
@@ -19392,6 +19751,31 @@ arm_optimize_expr (expressionS *l, operatorT op, expressionS *r)
    return FALSE;
  }
  
+/* Encode Thumb2 unconditional branches and calls. The encoding
+   for the 2 are identical for the immediate values.  */
+
+static void
+encode_thumb2_b_bl_offset (char * buf, offsetT value)
+{
+#define T2I1I2MASK  ((1 << 13) | (1 << 11))
+  offsetT newval;
+  offsetT newval2;
+  addressT S, I1, I2, lo, hi;
+
+  S = (value >> 24) & 0x01;
+  I1 = (value >> 23) & 0x01;
+  I2 = (value >> 22) & 0x01;
+  hi = (value >> 12) & 0x3ff;
+  lo = (value >> 1) & 0x7ff; 
+  newval   = md_chars_to_number (buf, THUMB_SIZE);
+  newval2  = md_chars_to_number (buf + THUMB_SIZE, THUMB_SIZE);
+  newval  |= (S << 10) | hi;
+  newval2 &=  ~T2I1I2MASK;
+  newval2 |= (((I1 ^ S) << 13) | ((I2 ^ S) << 11) | lo) ^ T2I1I2MASK;
+  md_number_to_chars (buf, newval, THUMB_SIZE);
+  md_number_to_chars (buf + THUMB_SIZE, newval2, THUMB_SIZE);
+}
+
  void
  md_apply_fix (fixS *   fixP,
                valueT * valP,
@@ -19439,22 +19823,23 @@ md_apply_fix (fixS *  fixP,
          not have a reloc for it, so tc_gen_reloc will reject it.  */
        fixP->fx_done = 1;
  
-      if (fixP->fx_addsy
-         && ! S_IS_DEFINED (fixP->fx_addsy))
+      if (fixP->fx_addsy)
         {
-         as_bad_where (fixP->fx_file, fixP->fx_line,
-                       _("undefined symbol %s used as an immediate value"),
-                       S_GET_NAME (fixP->fx_addsy));
-         break;
-       }
+         const char *msg = 0;
  
-      if (fixP->fx_addsy
-         && S_GET_SEGMENT (fixP->fx_addsy) != seg)
-       {
-         as_bad_where (fixP->fx_file, fixP->fx_line,
-                       _("symbol %s is in a different section"),
-                       S_GET_NAME (fixP->fx_addsy));
-         break;
+         if (! S_IS_DEFINED (fixP->fx_addsy))
+           msg = _("undefined symbol %s used as an immediate value");
+         else if (S_GET_SEGMENT (fixP->fx_addsy) != seg)
+           msg = _("symbol %s is in a different section");
+         else if (S_IS_WEAK (fixP->fx_addsy))
+           msg = _("symbol %s is weak and may be overridden later");
+
+         if (msg)
+           {
+             as_bad_where (fixP->fx_file, fixP->fx_line,
+                           msg, S_GET_NAME (fixP->fx_addsy));
+             break;
+           }
         }
  
        newimm = encode_arm_immediate (value);
@@ -19480,24 +19865,25 @@ md_apply_fix (fixS *  fixP,
         unsigned int highpart = 0;
         unsigned int newinsn  = 0xe1a00000; /* nop.  */
  
-       if (fixP->fx_addsy
-           && ! S_IS_DEFINED (fixP->fx_addsy))
+       if (fixP->fx_addsy)
           {
-           as_bad_where (fixP->fx_file, fixP->fx_line,
-                         _("undefined symbol %s used as an immediate value"),
-                         S_GET_NAME (fixP->fx_addsy));
-           break;
-         }
+           const char *msg = 0;
  
-       if (fixP->fx_addsy
-           && S_GET_SEGMENT (fixP->fx_addsy) != seg)
-         {
-           as_bad_where (fixP->fx_file, fixP->fx_line,
-                         _("symbol %s is in a different section"),
-                         S_GET_NAME (fixP->fx_addsy));
-           break;
-         }
+           if (! S_IS_DEFINED (fixP->fx_addsy))
+             msg = _("undefined symbol %s used as an immediate value");
+           else if (S_GET_SEGMENT (fixP->fx_addsy) != seg)
+             msg = _("symbol %s is in a different section");
+           else if (S_IS_WEAK (fixP->fx_addsy))
+             msg = _("symbol %s is weak and may be overridden later");
  
+           if (msg)
+             {
+               as_bad_where (fixP->fx_file, fixP->fx_line,
+                             msg, S_GET_NAME (fixP->fx_addsy));
+               break;
+             }
+         }
+       
         newimm = encode_arm_immediate (value);
         temp = md_chars_to_number (buf, INSN_SIZE);
  
@@ -20084,10 +20470,6 @@ md_apply_fix (fixS *   fixP,
          fixP->fx_r_type = BFD_RELOC_THUMB_PCREL_BRANCH23;
  #endif
  
-      if ((value & ~0x3fffff) && ((value & ~0x3fffff) != ~0x3fffff))
-       as_bad_where (fixP->fx_file, fixP->fx_line,
-                     _("branch out of range"));
-
        if (fixP->fx_r_type == BFD_RELOC_THUMB_PCREL_BLX)
         /* For a BLX instruction, make sure that the relocation is rounded up
            to a word boundary.  This follows the semantics of the instruction
@@ -20095,17 +20477,25 @@ md_apply_fix (fixS *  fixP,
            1 of the base address.  */
         value = (value + 1) & ~ 1;
  
-      if (fixP->fx_done || !seg->use_rela_p)
-       {
-         offsetT newval2;
  
-         newval   = md_chars_to_number (buf, THUMB_SIZE);
-         newval2  = md_chars_to_number (buf + THUMB_SIZE, THUMB_SIZE);
-         newval  |= (value & 0x7fffff) >> 12;
-         newval2 |= (value & 0xfff) >> 1;
-         md_number_to_chars (buf, newval, THUMB_SIZE);
-         md_number_to_chars (buf + THUMB_SIZE, newval2, THUMB_SIZE);
+       if ((value & ~0x3fffff) && ((value & ~0x3fffff) != ~0x3fffff))
+       {
+         if (!(ARM_CPU_HAS_FEATURE (cpu_variant, arm_arch_t2)))
+           {
+             as_bad_where (fixP->fx_file, fixP->fx_line,
+                           _("branch out of range"));
+           }
+         else if ((value & ~0x1ffffff)
+                  && ((value & ~0x1ffffff) != ~0x1ffffff))
+             {
+               as_bad_where (fixP->fx_file, fixP->fx_line,
+                           _("Thumb2 branch out of range"));
+             }
         }
+
+      if (fixP->fx_done || !seg->use_rela_p)
+       encode_thumb2_b_bl_offset (buf, value);
+
        break;
  
      case BFD_RELOC_THUMB_PCREL_BRANCH25:
@@ -20114,26 +20504,8 @@ md_apply_fix (fixS *   fixP,
                       _("branch out of range"));
  
        if (fixP->fx_done || !seg->use_rela_p)
-       {
-         offsetT newval2;
-         addressT S, I1, I2, lo, hi;
-
-         S  = (value & 0x01000000) >> 24;
-         I1 = (value & 0x00800000) >> 23;
-         I2 = (value & 0x00400000) >> 22;
-         hi = (value & 0x003ff000) >> 12;
-         lo = (value & 0x00000ffe) >> 1;
+         encode_thumb2_b_bl_offset (buf, value);
  
-         I1 = !(I1 ^ S);
-         I2 = !(I2 ^ S);
-
-         newval   = md_chars_to_number (buf, THUMB_SIZE);
-         newval2  = md_chars_to_number (buf + THUMB_SIZE, THUMB_SIZE);
-         newval  |= (S << 10) | hi;
-         newval2 |= (I1 << 13) | (I2 << 11) | lo;
-         md_number_to_chars (buf, newval, THUMB_SIZE);
-         md_number_to_chars (buf + THUMB_SIZE, newval2, THUMB_SIZE);
-       }
        break;
  
      case BFD_RELOC_8:
@@ -20160,7 +20532,12 @@ md_apply_fix (fixS *   fixP,
        if (fixP->fx_done || !seg->use_rela_p)
         md_number_to_chars (buf, 0, 4);
        break;
-      
+
+    case BFD_RELOC_ARM_GOT_PREL:
+      if (fixP->fx_done || !seg->use_rela_p)
+        md_number_to_chars (buf, value, 4);
+      break;
+
      case BFD_RELOC_ARM_TARGET2:
        /* TARGET2 is not partial-inplace, so we need to write the
           addend here for REL targets, because it won't be written out
@@ -20745,6 +21122,7 @@ tc_gen_reloc (asection *section, fixS *fixp)
  #ifdef OBJ_ELF
      case BFD_RELOC_ARM_GOT32:
      case BFD_RELOC_ARM_GOTOFF:
+    case BFD_RELOC_ARM_GOT_PREL:
      case BFD_RELOC_ARM_PLT32:
      case BFD_RELOC_ARM_TARGET1:
      case BFD_RELOC_ARM_ROSEGREL32:
@@ -21862,6 +22240,7 @@ static const struct arm_cpu_option_table arm_cpus[] =
                                                            NULL},
    {"cortex-r4",                ARM_ARCH_V7R,    FPU_NONE,        NULL},
    {"cortex-r4f",       ARM_ARCH_V7R,    FPU_ARCH_VFP_V3D16,      NULL},
+  {"cortex-m4",                ARM_ARCH_V7EM,   FPU_NONE,        NULL},
    {"cortex-m3",                ARM_ARCH_V7M,    FPU_NONE,        NULL},
    {"cortex-m1",                ARM_ARCH_V6M,    FPU_NONE,        NULL},
    {"cortex-m0",                ARM_ARCH_V6M,    FPU_NONE,        NULL},
@@ -21923,6 +22302,7 @@ static const struct arm_arch_option_table arm_archs[] =
    {"armv7-a",          ARM_ARCH_V7A,    FPU_ARCH_VFP},
    {"armv7-r",          ARM_ARCH_V7R,    FPU_ARCH_VFP},
    {"armv7-m",          ARM_ARCH_V7M,    FPU_ARCH_VFP},
+  {"armv7e-m",         ARM_ARCH_V7EM,   FPU_ARCH_VFP},
    {"xscale",           ARM_ARCH_XSCALE, FPU_ARCH_VFP},
    {"iwmmxt",           ARM_ARCH_IWMMXT, FPU_ARCH_VFP},
    {"iwmmxt2",          ARM_ARCH_IWMMXT2,FPU_ARCH_VFP},
@@ -21967,7 +22347,11 @@ static const struct arm_option_cpu_value_table arm_fpus[] =
    {"vfpxd",            FPU_ARCH_VFP_V1xD},
    {"vfpv2",            FPU_ARCH_VFP_V2},
    {"vfpv3",            FPU_ARCH_VFP_V3},
+  {"vfpv3-fp16",       FPU_ARCH_VFP_V3_FP16},
    {"vfpv3-d16",                FPU_ARCH_VFP_V3D16},
+  {"vfpv3-d16-fp16",   FPU_ARCH_VFP_V3D16_FP16},
+  {"vfpv3xd",          FPU_ARCH_VFP_V3xD},
+  {"vfpv3xd-fp16",     FPU_ARCH_VFP_V3xD_FP16},
    {"arm1020t",         FPU_ARCH_VFP_V1},
    {"arm1020e",         FPU_ARCH_VFP_V2},
    {"arm1136jfs",       FPU_ARCH_VFP_V2},
@@ -21975,6 +22359,10 @@ static const struct arm_option_cpu_value_table arm_fpus[] =
    {"maverick",         FPU_ARCH_MAVERICK},
    {"neon",              FPU_ARCH_VFP_V3_PLUS_NEON_V1},
    {"neon-fp16",                FPU_ARCH_NEON_FP16},
+  {"vfpv4",            FPU_ARCH_VFP_V4},
+  {"vfpv4-d16",                FPU_ARCH_VFP_V4D16},
+  {"fpv4-sp-d16",      FPU_ARCH_VFP_V4_SP_D16},
+  {"neon-vfpv4",       FPU_ARCH_NEON_VFP_V4},
    {NULL,               ARM_ARCH_NONE}
  };
  
@@ -22437,24 +22825,40 @@ aeabi_set_public_attributes (void)
         }
      }
  
+  /* The table lookup above finds the last architecture to contribute
+     a new feature.  Unfortunately, Tag13 is a subset of the union of
+     v6T2 and v7-M, so it is never seen as contributing a new feature.
+     We can not search for the last entry which is entirely used,
+     because if no CPU is specified we build up only those flags
+     actually used.  Perhaps we should separate out the specified
+     and implicit cases.  Avoid taking this path for -march=all by
+     checking for contradictory v7-A / v7-M features.  */
+  if (arch == 10
+      && !ARM_CPU_HAS_FEATURE (flags, arm_ext_v7a)
+      && ARM_CPU_HAS_FEATURE (flags, arm_ext_v7m)
+      && ARM_CPU_HAS_FEATURE (flags, arm_ext_v6_dsp))
+    arch = 13;
+
    /* Tag_CPU_name.  */
    if (selected_cpu_name[0])
      {
-      char *p;
+      char *q;
  
-      p = selected_cpu_name;
-      if (strncmp (p, "armv", 4) == 0)
+      q = selected_cpu_name;
+      if (strncmp (q, "armv", 4) == 0)
         {
           int i;
  
-         p += 4;
-         for (i = 0; p[i]; i++)
-           p[i] = TOUPPER (p[i]);
+         q += 4;
+         for (i = 0; q[i]; i++)
+           q[i] = TOUPPER (q[i]);
         }
-      aeabi_set_attribute_string (Tag_CPU_name, p);
+      aeabi_set_attribute_string (Tag_CPU_name, q);
      }
+
    /* Tag_CPU_arch.  */
    aeabi_set_attribute_int (Tag_CPU_arch, arch);
+
    /* Tag_CPU_arch_profile.  */
    if (ARM_CPU_HAS_FEATURE (flags, arm_ext_v7a))
      aeabi_set_attribute_int (Tag_CPU_arch_profile, 'A');
@@ -22462,36 +22866,62 @@ aeabi_set_public_attributes (void)
      aeabi_set_attribute_int (Tag_CPU_arch_profile, 'R');
    else if (ARM_CPU_HAS_FEATURE (flags, arm_ext_m))
      aeabi_set_attribute_int (Tag_CPU_arch_profile, 'M');
+
    /* Tag_ARM_ISA_use.  */
    if (ARM_CPU_HAS_FEATURE (flags, arm_ext_v1)
        || arch == 0)
      aeabi_set_attribute_int (Tag_ARM_ISA_use, 1);
+
    /* Tag_THUMB_ISA_use.  */
    if (ARM_CPU_HAS_FEATURE (flags, arm_ext_v4t)
        || arch == 0)
      aeabi_set_attribute_int (Tag_THUMB_ISA_use,
         ARM_CPU_HAS_FEATURE (flags, arm_arch_t2) ? 2 : 1);
+
    /* Tag_VFP_arch.  */
-  if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_d32))
+  if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_fma))
+    aeabi_set_attribute_int (Tag_VFP_arch,
+                            ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_d32)
+                            ? 5 : 6);
+  else if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_d32))
      aeabi_set_attribute_int (Tag_VFP_arch, 3);
-  else if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_v3))
+  else if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_v3xd))
      aeabi_set_attribute_int (Tag_VFP_arch, 4);
    else if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_v2))
      aeabi_set_attribute_int (Tag_VFP_arch, 2);
    else if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_v1)
             || ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_v1xd))
      aeabi_set_attribute_int (Tag_VFP_arch, 1);
+
+  /* Tag_ABI_HardFP_use.  */
+  if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_v1xd)
+      && !ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_v1))
+    aeabi_set_attribute_int (Tag_ABI_HardFP_use, 1);
+
    /* Tag_WMMX_arch.  */
    if (ARM_CPU_HAS_FEATURE (flags, arm_cext_iwmmxt2))
      aeabi_set_attribute_int (Tag_WMMX_arch, 2);
    else if (ARM_CPU_HAS_FEATURE (flags, arm_cext_iwmmxt))
      aeabi_set_attribute_int (Tag_WMMX_arch, 1);
+
    /* Tag_Advanced_SIMD_arch (formerly Tag_NEON_arch).  */
    if (ARM_CPU_HAS_FEATURE (flags, fpu_neon_ext_v1))
-    aeabi_set_attribute_int (Tag_Advanced_SIMD_arch, 1);
+    aeabi_set_attribute_int
+      (Tag_Advanced_SIMD_arch, (ARM_CPU_HAS_FEATURE (flags, fpu_neon_ext_fma)
+                               ? 2 : 1));
+  
    /* Tag_VFP_HP_extension (formerly Tag_NEON_FP16_arch).  */
-  if (ARM_CPU_HAS_FEATURE (flags, fpu_neon_fp16))
+  if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_fp16))
      aeabi_set_attribute_int (Tag_VFP_HP_extension, 1);
+
+  /* Tag_DIV_use.  */
+  if (ARM_CPU_HAS_FEATURE (flags, arm_ext_div))
+    aeabi_set_attribute_int (Tag_DIV_use, 0);
+  /* Fill this in when gas supports v7a sdiv/udiv.
+    else if (... v7a with div extension used ...)
+      aeabi_set_attribute_int (Tag_DIV_use, 2);  */
+  else
+    aeabi_set_attribute_int (Tag_DIV_use, 1);
  }
  
  /* Add the default contents for the .ARM.attributes section.  */
@@ -22672,6 +23102,7 @@ arm_convert_symbolic_attribute (const char *name)
        T (Tag_CPU_arch_profile),
        T (Tag_ARM_ISA_use),
        T (Tag_THUMB_ISA_use),
+      T (Tag_FP_arch),
        T (Tag_VFP_arch),
        T (Tag_WMMX_arch),
        T (Tag_Advanced_SIMD_arch),
@@ -22686,7 +23117,9 @@ arm_convert_symbolic_attribute (const char *name)
        T (Tag_ABI_FP_exceptions),
        T (Tag_ABI_FP_user_exceptions),
        T (Tag_ABI_FP_number_model),
+      T (Tag_ABI_align_needed),
        T (Tag_ABI_align8_needed),
+      T (Tag_ABI_align_preserved),
        T (Tag_ABI_align8_preserved),
        T (Tag_ABI_enum_size),
        T (Tag_ABI_HardFP_use),
@@ -22696,14 +23129,17 @@ arm_convert_symbolic_attribute (const char *name)
        T (Tag_ABI_FP_optimization_goals),
        T (Tag_compatibility),
        T (Tag_CPU_unaligned_access),
+      T (Tag_FP_HP_extension),
        T (Tag_VFP_HP_extension),
        T (Tag_ABI_FP_16bit_format),
+      T (Tag_MPextension_use),
+      T (Tag_DIV_use),
        T (Tag_nodefaults),
        T (Tag_also_compatible_with),
        T (Tag_conformance),
        T (Tag_T2EE_use),
        T (Tag_Virtualization_use),
-      T (Tag_MPextension_use)
+      /* We deliberately do not include Tag_MPextension_use_legacy.  */
  #undef T
      };
    unsigned int i;