[ARM] Add feature check for ARMv8.1 AdvSIMD instructions.

[deliverable/binutils-gdb.git] / gas / config / tc-arm.c
diff --git a/gas/config/tc-arm.c b/gas/config/tc-arm.c

index 78b1ae5a4326cbad4413b9004f0402c0251a61af..e725f41abb4404cc37f0fbc186f6690510891c64 100644 (file)
--- a/gas/config/tc-arm.c
+++ b/gas/config/tc-arm.c
@@ -1,5 +1,5 @@
  /* tc-arm.c -- Assemble for the ARM
  /* tc-arm.c -- Assemble for the ARM
-   Copyright (C) 1994-2015 Free Software Foundation, Inc.
+   Copyright (C) 1994-2016 Free Software Foundation, Inc.
     Contributed by Richard Earnshaw (rwe@pegasus.esprit.ec.org)
         Modified by David Taylor (dtaylor@armltd.co.uk)
         Cirrus coprocessor mods by Aldy Hernandez (aldyh@redhat.com)
     Contributed by Richard Earnshaw (rwe@pegasus.esprit.ec.org)
         Modified by David Taylor (dtaylor@armltd.co.uk)
         Cirrus coprocessor mods by Aldy Hernandez (aldyh@redhat.com)
@@ -201,12 +201,24 @@ static const arm_feature_set arm_ext_v7r = ARM_FEATURE_CORE_LOW (ARM_EXT_V7R);
  static const arm_feature_set arm_ext_v7m = ARM_FEATURE_CORE_LOW (ARM_EXT_V7M);
  static const arm_feature_set arm_ext_v8 = ARM_FEATURE_CORE_LOW (ARM_EXT_V8);
  static const arm_feature_set arm_ext_m =
  static const arm_feature_set arm_ext_v7m = ARM_FEATURE_CORE_LOW (ARM_EXT_V7M);
  static const arm_feature_set arm_ext_v8 = ARM_FEATURE_CORE_LOW (ARM_EXT_V8);
  static const arm_feature_set arm_ext_m =
-  ARM_FEATURE_CORE_LOW (ARM_EXT_V6M | ARM_EXT_OS | ARM_EXT_V7M);
+  ARM_FEATURE_CORE (ARM_EXT_V6M | ARM_EXT_OS | ARM_EXT_V7M, ARM_EXT2_V8M);
  static const arm_feature_set arm_ext_mp = ARM_FEATURE_CORE_LOW (ARM_EXT_MP);
  static const arm_feature_set arm_ext_sec = ARM_FEATURE_CORE_LOW (ARM_EXT_SEC);
  static const arm_feature_set arm_ext_os = ARM_FEATURE_CORE_LOW (ARM_EXT_OS);
  static const arm_feature_set arm_ext_adiv = ARM_FEATURE_CORE_LOW (ARM_EXT_ADIV);
  static const arm_feature_set arm_ext_virt = ARM_FEATURE_CORE_LOW (ARM_EXT_VIRT);
  static const arm_feature_set arm_ext_mp = ARM_FEATURE_CORE_LOW (ARM_EXT_MP);
  static const arm_feature_set arm_ext_sec = ARM_FEATURE_CORE_LOW (ARM_EXT_SEC);
  static const arm_feature_set arm_ext_os = ARM_FEATURE_CORE_LOW (ARM_EXT_OS);
  static const arm_feature_set arm_ext_adiv = ARM_FEATURE_CORE_LOW (ARM_EXT_ADIV);
  static const arm_feature_set arm_ext_virt = ARM_FEATURE_CORE_LOW (ARM_EXT_VIRT);
+static const arm_feature_set arm_ext_pan = ARM_FEATURE_CORE_HIGH (ARM_EXT2_PAN);
+static const arm_feature_set arm_ext_v8m = ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8M);
+static const arm_feature_set arm_ext_v6t2_v8m =
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_V6T2_V8M);
+/* Instructions shared between ARMv8-A and ARMv8-M.  */
+static const arm_feature_set arm_ext_atomics =
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_ATOMICS);
+static const arm_feature_set arm_ext_v8_2 =
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8_2A);
+/* FP16 instructions.  */
+static const arm_feature_set arm_ext_fp16 =
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST);
  
  static const arm_feature_set arm_arch_any = ARM_ANY;
  static const arm_feature_set arm_arch_full = ARM_FEATURE (-1, -1, -1);
  
  static const arm_feature_set arm_arch_any = ARM_ANY;
  static const arm_feature_set arm_arch_full = ARM_FEATURE (-1, -1, -1);
@@ -258,12 +270,14 @@ static const arm_feature_set fpu_crypto_ext_armv8 =
    ARM_FEATURE_COPROC (FPU_CRYPTO_EXT_ARMV8);
  static const arm_feature_set crc_ext_armv8 =
    ARM_FEATURE_COPROC (CRC_EXT_ARMV8);
    ARM_FEATURE_COPROC (FPU_CRYPTO_EXT_ARMV8);
  static const arm_feature_set crc_ext_armv8 =
    ARM_FEATURE_COPROC (CRC_EXT_ARMV8);
+static const arm_feature_set fpu_neon_ext_v8_1 =
+  ARM_FEATURE_COPROC (FPU_NEON_EXT_RDMA);
  
  static int mfloat_abi_opt = -1;
  /* Record user cpu selection for object attributes.  */
  static arm_feature_set selected_cpu = ARM_ARCH_NONE;
  /* Must be long enough to hold any of the names in arm_cpus.  */
  
  static int mfloat_abi_opt = -1;
  /* Record user cpu selection for object attributes.  */
  static arm_feature_set selected_cpu = ARM_ARCH_NONE;
  /* Must be long enough to hold any of the names in arm_cpus.  */
-static char selected_cpu_name[16];
+static char selected_cpu_name[20];
  
  extern FLONUM_TYPE generic_floating_point_number;
  
  
  extern FLONUM_TYPE generic_floating_point_number;
  
@@ -502,7 +516,7 @@ struct asm_barrier_opt
  
  struct reloc_entry
  {
  
  struct reloc_entry
  {
-  char *                    name;
+  const char *                    name;
    bfd_reloc_code_real_type  reloc;
  };
  
    bfd_reloc_code_real_type  reloc;
  };
  
@@ -770,7 +784,8 @@ struct asm_opcode
         _("cannot use register index with PC-relative addressing")
  #define BAD_PC_WRITEBACK \
         _("cannot use writeback with PC-relative addressing")
         _("cannot use register index with PC-relative addressing")
  #define BAD_PC_WRITEBACK \
         _("cannot use writeback with PC-relative addressing")
-#define BAD_RANGE     _("branch out of range")
+#define BAD_RANGE      _("branch out of range")
+#define BAD_FP16       _("selected processor does not support fp16 instruction")
  #define UNPRED_REG(R)  _("using " R " results in unpredictable behaviour")
  
  static struct hash_control * arm_ops_hsh;
  #define UNPRED_REG(R)  _("using " R " results in unpredictable behaviour")
  
  static struct hash_control * arm_ops_hsh;
@@ -2865,10 +2880,9 @@ s_thumb_set (int equiv)
    /* Especial apologies for the random logic:
       This just grew, and could be parsed much more simply!
       Dean - in haste.  */
    /* Especial apologies for the random logic:
       This just grew, and could be parsed much more simply!
       Dean - in haste.  */
-  name     = input_line_pointer;
-  delim            = get_symbol_end ();
+  delim            = get_symbol_name (& name);
    end_name  = input_line_pointer;
    end_name  = input_line_pointer;
-  *end_name = delim;
+  (void) restore_line_pointer (delim);
  
    if (*input_line_pointer != ',')
      {
  
    if (*input_line_pointer != ',')
      {
@@ -2948,8 +2962,7 @@ s_syntax (int unused ATTRIBUTE_UNUSED)
  {
    char *name, delim;
  
  {
    char *name, delim;
  
-  name = input_line_pointer;
-  delim = get_symbol_end ();
+  delim = get_symbol_name (& name);
  
    if (!strcasecmp (name, "unified"))
      unified_syntax = TRUE;
  
    if (!strcasecmp (name, "unified"))
      unified_syntax = TRUE;
@@ -2960,59 +2973,12 @@ s_syntax (int unused ATTRIBUTE_UNUSED)
        as_bad (_("unrecognized syntax mode \"%s\""), name);
        return;
      }
        as_bad (_("unrecognized syntax mode \"%s\""), name);
        return;
      }
-  *input_line_pointer = delim;
+  (void) restore_line_pointer (delim);
    demand_empty_rest_of_line ();
  }
  
  /* Directives: sectioning and alignment.  */
  
    demand_empty_rest_of_line ();
  }
  
  /* Directives: sectioning and alignment.  */
  
-/* Same as s_align_ptwo but align 0 => align 2.         */
-
-static void
-s_align (int unused ATTRIBUTE_UNUSED)
-{
-  int temp;
-  bfd_boolean fill_p;
-  long temp_fill;
-  long max_alignment = 15;
-
-  temp = get_absolute_expression ();
-  if (temp > max_alignment)
-    as_bad (_("alignment too large: %d assumed"), temp = max_alignment);
-  else if (temp < 0)
-    {
-      as_bad (_("alignment negative. 0 assumed."));
-      temp = 0;
-    }
-
-  if (*input_line_pointer == ',')
-    {
-      input_line_pointer++;
-      temp_fill = get_absolute_expression ();
-      fill_p = TRUE;
-    }
-  else
-    {
-      fill_p = FALSE;
-      temp_fill = 0;
-    }
-
-  if (!temp)
-    temp = 2;
-
-  /* Only make a frag if we HAVE to.  */
-  if (temp && !need_pass_2)
-    {
-      if (!fill_p && subseg_text_p (now_seg))
-       frag_align_code (temp, 0);
-      else
-       frag_align (temp, (int) temp_fill, 0);
-    }
-  demand_empty_rest_of_line ();
-
-  record_alignment (now_seg, temp);
-}
-
  static void
  s_bss (int ignore ATTRIBUTE_UNUSED)
  {
  static void
  s_bss (int ignore ATTRIBUTE_UNUSED)
  {
@@ -3357,13 +3323,13 @@ add_to_lit_pool (unsigned int nbytes)
  }
  
  bfd_boolean
  }
  
  bfd_boolean
-tc_start_label_without_colon (char unused1 ATTRIBUTE_UNUSED, const char * rest)
+tc_start_label_without_colon (void)
  {
    bfd_boolean ret = TRUE;
  
    if (codecomposer_syntax && asmfunc_state == WAITING_ASMFUNC_NAME)
      {
  {
    bfd_boolean ret = TRUE;
  
    if (codecomposer_syntax && asmfunc_state == WAITING_ASMFUNC_NAME)
      {
-      const char *label = rest;
+      const char *label = input_line_pointer;
  
        while (!is_end_of_line[(int) label[-1]])
         --label;
  
        while (!is_end_of_line[(int) label[-1]])
         --label;
@@ -3924,9 +3890,10 @@ s_arm_unwind_personality (int ignored ATTRIBUTE_UNUSED)
    if (unwind.personality_routine || unwind.personality_index != -1)
      as_bad (_("duplicate .personality directive"));
  
    if (unwind.personality_routine || unwind.personality_index != -1)
      as_bad (_("duplicate .personality directive"));
  
-  name = input_line_pointer;
-  c = get_symbol_end ();
+  c = get_symbol_name (& name);
    p = input_line_pointer;
    p = input_line_pointer;
+  if (c == '"')
+    ++ input_line_pointer;
    unwind.personality_routine = symbol_find_or_make (name);
    *p = c;
    demand_empty_rest_of_line ();
    unwind.personality_routine = symbol_find_or_make (name);
    *p = c;
    demand_empty_rest_of_line ();
@@ -4688,7 +4655,7 @@ const pseudo_typeS md_pseudo_table[] =
    { "qn",          s_qn,          0 },
    { "unreq",      s_unreq,       0 },
    { "bss",        s_bss,         0 },
    { "qn",          s_qn,          0 },
    { "unreq",      s_unreq,       0 },
    { "bss",        s_bss,         0 },
-  { "align",      s_align,       0 },
+  { "align",      s_align_ptwo,  2 },
    { "arm",        s_arm,         0 },
    { "thumb",      s_thumb,       0 },
    { "code",       s_code,        0 },
    { "arm",        s_arm,         0 },
    { "thumb",      s_thumb,       0 },
    { "code",       s_code,        0 },
@@ -4921,7 +4888,9 @@ parse_fpa_immediate (char ** str)
      {
        /* FIXME: 5 = X_PRECISION, should be #define'd where we can use it.
          Ditto for 15.  */
      {
        /* FIXME: 5 = X_PRECISION, should be #define'd where we can use it.
          Ditto for 15.  */
-      if (gen_to_words (words, 5, (long) 15) == 0)
+#define X_PRECISION 5
+#define E_PRECISION 15L
+      if (gen_to_words (words, X_PRECISION, E_PRECISION) == 0)
         {
           for (i = 0; i < NUM_FLOAT_VALS; i++)
             {
         {
           for (i = 0; i < NUM_FLOAT_VALS; i++)
             {
@@ -5316,7 +5285,28 @@ static struct group_reloc_table_entry group_reloc_table[] =
        BFD_RELOC_ARM_ALU_SB_G2,         /* ALU */
        BFD_RELOC_ARM_LDR_SB_G2,         /* LDR */
        BFD_RELOC_ARM_LDRS_SB_G2,                /* LDRS */
        BFD_RELOC_ARM_ALU_SB_G2,         /* ALU */
        BFD_RELOC_ARM_LDR_SB_G2,         /* LDR */
        BFD_RELOC_ARM_LDRS_SB_G2,                /* LDRS */
-      BFD_RELOC_ARM_LDC_SB_G2 }        };      /* LDC */
+      BFD_RELOC_ARM_LDC_SB_G2 },       /* LDC */
+    /* Absolute thumb alu relocations.  */
+    { "lower0_7",
+      BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC,/* ALU.  */
+      0,                               /* LDR.  */
+      0,                               /* LDRS.  */
+      0 },                             /* LDC.  */
+    { "lower8_15",
+      BFD_RELOC_ARM_THUMB_ALU_ABS_G1_NC,/* ALU.  */
+      0,                               /* LDR.  */
+      0,                               /* LDRS.  */
+      0 },                             /* LDC.  */
+    { "upper0_7",
+      BFD_RELOC_ARM_THUMB_ALU_ABS_G2_NC,/* ALU.  */
+      0,                               /* LDR.  */
+      0,                               /* LDRS.  */
+      0 },                             /* LDC.  */
+    { "upper8_15",
+      BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC,/* ALU.  */
+      0,                               /* LDR.  */
+      0,                               /* LDRS.  */
+      0 } };                           /* LDC.  */
  
  /* Given the address of a pointer pointing to the textual name of a group
     relocation as may appear in assembler source, attempt to find its details
  
  /* Given the address of a pointer pointing to the textual name of a group
     relocation as may appear in assembler source, attempt to find its details
@@ -6095,6 +6085,16 @@ parse_cond (char **str)
    return c->value;
  }
  
    return c->value;
  }
  
+/* Record a use of the given feature.  */
+static void
+record_feature_use (const arm_feature_set *feature)
+{
+  if (thumb_mode)
+    ARM_MERGE_FEATURE_SETS (thumb_arch_used, thumb_arch_used, *feature);
+  else
+    ARM_MERGE_FEATURE_SETS (arm_arch_used, arm_arch_used, *feature);
+}
+
  /* If the given feature available in the selected CPU, mark it as used.
     Returns TRUE iff feature is available.  */
  static bfd_boolean
  /* If the given feature available in the selected CPU, mark it as used.
     Returns TRUE iff feature is available.  */
  static bfd_boolean
@@ -6106,10 +6106,7 @@ mark_feature_used (const arm_feature_set *feature)
  
    /* Add the appropriate architecture feature for the barrier option used.
       */
  
    /* Add the appropriate architecture feature for the barrier option used.
       */
-  if (thumb_mode)
-    ARM_MERGE_FEATURE_SETS (thumb_arch_used, thumb_arch_used, *feature);
-  else
-    ARM_MERGE_FEATURE_SETS (arm_arch_used, arm_arch_used, *feature);
+  record_feature_use (feature);
  
    return TRUE;
  }
  
    return TRUE;
  }
@@ -7274,6 +7271,26 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
  
  #define rotate_left(v, n) (v << (n & 31) | v >> ((32 - n) & 31))
  
  
  #define rotate_left(v, n) (v << (n & 31) | v >> ((32 - n) & 31))
  
+/* If the current inst is scalar ARMv8.2 fp16 instruction, do special encoding.
+
+   The only binary encoding difference is the Coprocessor number.  Coprocessor
+   9 is used for half-precision calculations or conversions.  The format of the
+   instruction is the same as the equivalent Coprocessor 10 instuction that
+   exists for Single-Precision operation.  */
+
+static void
+do_scalar_fp16_v82_encode (void)
+{
+  if (inst.cond != COND_ALWAYS)
+    as_warn (_("ARMv8.2 scalar fp16 instruction cannot be conditional,"
+              " the behaviour is UNPREDICTABLE"));
+  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16),
+             _(BAD_FP16));
+
+  inst.instruction = (inst.instruction & 0xfffff0ff) | 0x900;
+  mark_feature_used (&arm_ext_fp16);
+}
+
  /* If VAL can be encoded in the immediate field of an ARM instruction,
     return the encoded form.  Otherwise, return FAIL.  */
  
  /* If VAL can be encoded in the immediate field of an ARM instruction,
     return the encoded form.  Otherwise, return FAIL.  */
  
@@ -7282,7 +7299,10 @@ encode_arm_immediate (unsigned int val)
  {
    unsigned int a, i;
  
  {
    unsigned int a, i;
  
-  for (i = 0; i < 32; i += 2)
+  if (val <= 0xff)
+    return val;
+
+  for (i = 2; i < 32; i += 2)
      if ((a = rotate_left (val, i)) <= 0xff)
        return a | (i << 7); /* 12-bit pack: [shift-cnt,const].  */
  
      if ((a = rotate_left (val, i)) <= 0xff)
        return a | (i << 7); /* 12-bit pack: [shift-cnt,const].  */
  
@@ -7747,6 +7767,54 @@ neon_cmode_for_move_imm (unsigned immlo, unsigned immhi, int float_p,
    return FAIL;
  }
  
    return FAIL;
  }
  
+#if defined BFD_HOST_64_BIT
+/* Returns TRUE if double precision value V may be cast
+   to single precision without loss of accuracy.  */
+
+static bfd_boolean
+is_double_a_single (bfd_int64_t v)
+{
+  int exp = (int)((v >> 52) & 0x7FF);
+  bfd_int64_t mantissa = (v & (bfd_int64_t)0xFFFFFFFFFFFFFULL);
+
+  return (exp == 0 || exp == 0x7FF
+         || (exp >= 1023 - 126 && exp <= 1023 + 127))
+    && (mantissa & 0x1FFFFFFFl) == 0;
+}
+
+/* Returns a double precision value casted to single precision
+   (ignoring the least significant bits in exponent and mantissa).  */
+
+static int
+double_to_single (bfd_int64_t v)
+{
+  int sign = (int) ((v >> 63) & 1l);
+  int exp = (int) ((v >> 52) & 0x7FF);
+  bfd_int64_t mantissa = (v & (bfd_int64_t)0xFFFFFFFFFFFFFULL);
+
+  if (exp == 0x7FF)
+    exp = 0xFF;
+  else
+    {
+      exp = exp - 1023 + 127;
+      if (exp >= 0xFF)
+       {
+         /* Infinity.  */
+         exp = 0x7F;
+         mantissa = 0;
+       }
+      else if (exp < 0)
+       {
+         /* No denormalized numbers.  */
+         exp = 0;
+         mantissa = 0;
+       }
+    }
+  mantissa >>= 29;
+  return (sign << 31) | (exp << 23) | mantissa;
+}
+#endif /* BFD_HOST_64_BIT */
+
  enum lit_type
  {
    CONST_THUMB,
  enum lit_type
  {
    CONST_THUMB,
@@ -7754,6 +7822,8 @@ enum lit_type
    CONST_VEC
  };
  
    CONST_VEC
  };
  
+static void do_vfp_nsyn_opcode (const char *);
+
  /* inst.reloc.exp describes an "=expr" load pseudo-operation.
     Determine whether it can be performed with a move instruction; if
     it can, convert inst.instruction to that move instruction and
  /* inst.reloc.exp describes an "=expr" load pseudo-operation.
     Determine whether it can be performed with a move instruction; if
     it can, convert inst.instruction to that move instruction and
@@ -7769,7 +7839,6 @@ move_or_literal_pool (int i, enum lit_type t, bfd_boolean mode_3)
    unsigned long tbit;
    bfd_boolean thumb_p = (t == CONST_THUMB);
    bfd_boolean arm_p   = (t == CONST_ARM);
    unsigned long tbit;
    bfd_boolean thumb_p = (t == CONST_THUMB);
    bfd_boolean arm_p   = (t == CONST_ARM);
-  bfd_boolean vec64_p = (t == CONST_VEC) && !inst.operands[i].issingle;
  
    if (thumb_p)
      tbit = (inst.instruction > 0xffff) ? THUMB2_LOAD_BIT : THUMB_LOAD_BIT;
  
    if (thumb_p)
      tbit = (inst.instruction > 0xffff) ? THUMB2_LOAD_BIT : THUMB_LOAD_BIT;
@@ -7781,6 +7850,7 @@ move_or_literal_pool (int i, enum lit_type t, bfd_boolean mode_3)
        inst.error = _("invalid pseudo operation");
        return TRUE;
      }
        inst.error = _("invalid pseudo operation");
        return TRUE;
      }
+
    if (inst.reloc.exp.X_op != O_constant
        && inst.reloc.exp.X_op != O_symbol
        && inst.reloc.exp.X_op != O_big)
    if (inst.reloc.exp.X_op != O_constant
        && inst.reloc.exp.X_op != O_symbol
        && inst.reloc.exp.X_op != O_big)
@@ -7788,77 +7858,202 @@ move_or_literal_pool (int i, enum lit_type t, bfd_boolean mode_3)
        inst.error = _("constant expression expected");
        return TRUE;
      }
        inst.error = _("constant expression expected");
        return TRUE;
      }
-  if ((inst.reloc.exp.X_op == O_constant
-       || inst.reloc.exp.X_op == O_big)
-      && !inst.operands[i].issingle)
+
+  if (inst.reloc.exp.X_op == O_constant
+      || inst.reloc.exp.X_op == O_big)
      {
      {
-      if (thumb_p && inst.reloc.exp.X_op == O_constant)
+#if defined BFD_HOST_64_BIT
+      bfd_int64_t v;
+#else
+      offsetT v;
+#endif
+      if (inst.reloc.exp.X_op == O_big)
         {
         {
-         if (!unified_syntax && (inst.reloc.exp.X_add_number & ~0xFF) == 0)
+         LITTLENUM_TYPE w[X_PRECISION];
+         LITTLENUM_TYPE * l;
+
+         if (inst.reloc.exp.X_add_number == -1)
             {
             {
-             /* This can be done with a mov(1) instruction.  */
-             inst.instruction  = T_OPCODE_MOV_I8 | (inst.operands[i].reg << 8);
-             inst.instruction |= inst.reloc.exp.X_add_number;
-             return TRUE;
+             gen_to_words (w, X_PRECISION, E_PRECISION);
+             l = w;
+             /* FIXME: Should we check words w[2..5] ?  */
             }
             }
+         else
+           l = generic_bignum;
+
+#if defined BFD_HOST_64_BIT
+         v =
+           ((((((((bfd_int64_t) l[3] & LITTLENUM_MASK)
+                 << LITTLENUM_NUMBER_OF_BITS)
+                | ((bfd_int64_t) l[2] & LITTLENUM_MASK))
+               << LITTLENUM_NUMBER_OF_BITS)
+              | ((bfd_int64_t) l[1] & LITTLENUM_MASK))
+             << LITTLENUM_NUMBER_OF_BITS)
+            | ((bfd_int64_t) l[0] & LITTLENUM_MASK));
+#else
+         v = ((l[1] & LITTLENUM_MASK) << LITTLENUM_NUMBER_OF_BITS)
+           |  (l[0] & LITTLENUM_MASK);
+#endif
         }
         }
-      else if (arm_p && inst.reloc.exp.X_op == O_constant)
+      else
+       v = inst.reloc.exp.X_add_number;
+
+      if (!inst.operands[i].issingle)
         {
         {
-         int value = encode_arm_immediate (inst.reloc.exp.X_add_number);
-         if (value != FAIL)
+         if (thumb_p)
             {
             {
-             /* This can be done with a mov instruction.  */
-             inst.instruction &= LITERAL_MASK;
-             inst.instruction |= INST_IMMEDIATE | (OPCODE_MOV << DATA_OP_SHIFT);
-             inst.instruction |= value & 0xfff;
-             return TRUE;
+             /* This can be encoded only for a low register.  */
+             if ((v & ~0xFF) == 0 && (inst.operands[i].reg < 8))
+               {
+                 /* This can be done with a mov(1) instruction.  */
+                 inst.instruction = T_OPCODE_MOV_I8 | (inst.operands[i].reg << 8);
+                 inst.instruction |= v;
+                 return TRUE;
+               }
+
+             if (ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v6t2)
+                 || ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v6t2_v8m))
+               {
+                 /* Check if on thumb2 it can be done with a mov.w, mvn or
+                    movw instruction.  */
+                 unsigned int newimm;
+                 bfd_boolean isNegated;
+
+                 newimm = encode_thumb32_immediate (v);
+                 if (newimm != (unsigned int) FAIL)
+                   isNegated = FALSE;
+                 else
+                   {
+                     newimm = encode_thumb32_immediate (~v);
+                     if (newimm != (unsigned int) FAIL)
+                       isNegated = TRUE;
+                   }
+
+                 /* The number can be loaded with a mov.w or mvn
+                    instruction.  */
+                 if (newimm != (unsigned int) FAIL
+                     && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v6t2))
+                   {
+                     inst.instruction = (0xf04f0000  /*  MOV.W.  */
+                                         | (inst.operands[i].reg << 8));
+                     /* Change to MOVN.  */
+                     inst.instruction |= (isNegated ? 0x200000 : 0);
+                     inst.instruction |= (newimm & 0x800) << 15;
+                     inst.instruction |= (newimm & 0x700) << 4;
+                     inst.instruction |= (newimm & 0x0ff);
+                     return TRUE;
+                   }
+                 /* The number can be loaded with a movw instruction.  */
+                 else if ((v & ~0xFFFF) == 0
+                          && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v6t2_v8m))
+                   {
+                     int imm = v & 0xFFFF;
+
+                     inst.instruction = 0xf2400000;  /* MOVW.  */
+                     inst.instruction |= (inst.operands[i].reg << 8);
+                     inst.instruction |= (imm & 0xf000) << 4;
+                     inst.instruction |= (imm & 0x0800) << 15;
+                     inst.instruction |= (imm & 0x0700) << 4;
+                     inst.instruction |= (imm & 0x00ff);
+                     return TRUE;
+                   }
+               }
             }
             }
+         else if (arm_p)
+           {
+             int value = encode_arm_immediate (v);
  
  
-         value = encode_arm_immediate (~inst.reloc.exp.X_add_number);
-         if (value != FAIL)
+             if (value != FAIL)
+               {
+                 /* This can be done with a mov instruction.  */
+                 inst.instruction &= LITERAL_MASK;
+                 inst.instruction |= INST_IMMEDIATE | (OPCODE_MOV << DATA_OP_SHIFT);
+                 inst.instruction |= value & 0xfff;
+                 return TRUE;
+               }
+
+             value = encode_arm_immediate (~ v);
+             if (value != FAIL)
+               {
+                 /* This can be done with a mvn instruction.  */
+                 inst.instruction &= LITERAL_MASK;
+                 inst.instruction |= INST_IMMEDIATE | (OPCODE_MVN << DATA_OP_SHIFT);
+                 inst.instruction |= value & 0xfff;
+                 return TRUE;
+               }
+           }
+         else if (t == CONST_VEC)
             {
             {
-             /* This can be done with a mvn instruction.  */
-             inst.instruction &= LITERAL_MASK;
-             inst.instruction |= INST_IMMEDIATE | (OPCODE_MVN << DATA_OP_SHIFT);
-             inst.instruction |= value & 0xfff;
-             return TRUE;
+             int op = 0;
+             unsigned immbits = 0;
+             unsigned immlo = inst.operands[1].imm;
+             unsigned immhi = inst.operands[1].regisimm
+               ? inst.operands[1].reg
+               : inst.reloc.exp.X_unsigned
+               ? 0
+               : ((bfd_int64_t)((int) immlo)) >> 32;
+             int cmode = neon_cmode_for_move_imm (immlo, immhi, FALSE, &immbits,
+                                                  &op, 64, NT_invtype);
+
+             if (cmode == FAIL)
+               {
+                 neon_invert_size (&immlo, &immhi, 64);
+                 op = !op;
+                 cmode = neon_cmode_for_move_imm (immlo, immhi, FALSE, &immbits,
+                                                  &op, 64, NT_invtype);
+               }
+
+             if (cmode != FAIL)
+               {
+                 inst.instruction = (inst.instruction & VLDR_VMOV_SAME)
+                   | (1 << 23)
+                   | (cmode << 8)
+                   | (op << 5)
+                   | (1 << 4);
+
+                 /* Fill other bits in vmov encoding for both thumb and arm.  */
+                 if (thumb_mode)
+                   inst.instruction |= (0x7U << 29) | (0xF << 24);
+                 else
+                   inst.instruction |= (0xFU << 28) | (0x1 << 25);
+                 neon_write_immbits (immbits);
+                 return TRUE;
+               }
             }
         }
             }
         }
-      else if (vec64_p)
-       {
-         int op = 0;
-         unsigned immbits = 0;
-         unsigned immlo = inst.operands[1].imm;
-         unsigned immhi = inst.operands[1].regisimm
-                          ? inst.operands[1].reg
-                          : inst.reloc.exp.X_unsigned
-                            ? 0
-                            : ((bfd_int64_t)((int) immlo)) >> 32;
-         int cmode = neon_cmode_for_move_imm (immlo, immhi, FALSE, &immbits,
-                                              &op, 64, NT_invtype);
  
  
-         if (cmode == FAIL)
+      if (t == CONST_VEC)
+       {
+         /* Check if vldr Rx, =constant could be optimized to vmov Rx, #constant.  */
+         if (inst.operands[i].issingle
+             && is_quarter_float (inst.operands[1].imm)
+             && ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v3xd))
             {
             {
-             neon_invert_size (&immlo, &immhi, 64);
-             op = !op;
-             cmode = neon_cmode_for_move_imm (immlo, immhi, FALSE, &immbits,
-                                              &op, 64, NT_invtype);
+             inst.operands[1].imm =
+               neon_qfloat_bits (v);
+             do_vfp_nsyn_opcode ("fconsts");
+             return TRUE;
             }
             }
-         if (cmode != FAIL)
+
+         /* If our host does not support a 64-bit type then we cannot perform
+            the following optimization.  This mean that there will be a
+            discrepancy between the output produced by an assembler built for
+            a 32-bit-only host and the output produced from a 64-bit host, but
+            this cannot be helped.  */
+#if defined BFD_HOST_64_BIT
+         else if (!inst.operands[1].issingle
+                  && ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v3))
             {
             {
-             inst.instruction = (inst.instruction & VLDR_VMOV_SAME)
-                                 | (1 << 23)
-                                 | (cmode << 8)
-                                 | (op << 5)
-                                 | (1 << 4);
-             /* Fill other bits in vmov encoding for both thumb and arm.  */
-             if (thumb_mode)
-               inst.instruction |= (0x7 << 29) | (0xF << 24);
-             else
-               inst.instruction |= (0xF << 28) | (0x1 << 25);
-             neon_write_immbits (immbits);
-             return TRUE;
+             if (is_double_a_single (v)
+                 && is_quarter_float (double_to_single (v)))
+               {
+                 inst.operands[1].imm =
+                   neon_qfloat_bits (double_to_single (v));
+                 do_vfp_nsyn_opcode ("fconstd");
+                 return TRUE;
+               }
             }
             }
+#endif
         }
      }
  
         }
      }
  
@@ -7891,7 +8086,12 @@ encode_arm_cp_address (int i, int wb_ok, int unind_ok, int reloc_override)
  {
    if (!inst.operands[i].isreg)
      {
  {
    if (!inst.operands[i].isreg)
      {
-      gas_assert (inst.operands[0].isvec);
+      /* PR 18256 */
+      if (! inst.operands[0].isvec)
+       {
+         inst.error = _("invalid co-processor operand");
+         return FAIL;
+       }
        if (move_or_literal_pool (0, CONST_VEC, /*mode_3=*/FALSE))
         return SUCCESS;
      }
        if (move_or_literal_pool (0, CONST_VEC, /*mode_3=*/FALSE))
         return SUCCESS;
      }
@@ -7993,6 +8193,13 @@ do_rn_rd (void)
    inst.instruction |= inst.operands[1].reg << 12;
  }
  
    inst.instruction |= inst.operands[1].reg << 12;
  }
  
+static void
+do_tt (void)
+{
+  inst.instruction |= inst.operands[0].reg << 8;
+  inst.instruction |= inst.operands[1].reg << 16;
+}
+
  static bfd_boolean
  check_obsolete (const arm_feature_set *feature, const char *msg)
  {
  static bfd_boolean
  check_obsolete (const arm_feature_set *feature, const char *msg)
  {
@@ -8791,8 +8998,6 @@ do_mov16 (void)
      }
  }
  
      }
  }
  
-static void do_vfp_nsyn_opcode (const char *);
-
  static int
  do_vfp_nsyn_mrs (void)
  {
  static int
  do_vfp_nsyn_mrs (void)
  {
@@ -9176,6 +9381,24 @@ do_swi (void)
    inst.reloc.pc_rel = 0;
  }
  
    inst.reloc.pc_rel = 0;
  }
  
+static void
+do_setpan (void)
+{
+  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_pan),
+             _("selected processor does not support SETPAN instruction"));
+
+  inst.instruction |= ((inst.operands[0].imm & 1) << 9);
+}
+
+static void
+do_t_setpan (void)
+{
+  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_pan),
+             _("selected processor does not support SETPAN instruction"));
+
+  inst.instruction |= (inst.operands[0].imm << 3);
+}
+
  /* ARM V5E (El Segundo) signed-multiply-accumulate (argument parse)
     SMLAxy{cond} Rd,Rm,Rs,Rn
     SMLAWy{cond} Rd,Rm,Rs,Rn
  /* ARM V5E (El Segundo) signed-multiply-accumulate (argument parse)
     SMLAxy{cond} Rd,Rm,Rs,Rn
     SMLAWy{cond} Rd,Rm,Rs,Rn
@@ -9771,7 +9994,7 @@ do_iwmmxt_wldstd (void)
        && inst.operands[1].immisreg)
      {
        inst.instruction &= ~0x1a000ff;
        && inst.operands[1].immisreg)
      {
        inst.instruction &= ~0x1a000ff;
-      inst.instruction |= (0xf << 28);
+      inst.instruction |= (0xfU << 28);
        if (inst.operands[1].preind)
         inst.instruction |= PRE_INDEX;
        if (!inst.operands[1].negative)
        if (inst.operands[1].preind)
         inst.instruction |= PRE_INDEX;
        if (!inst.operands[1].negative)
@@ -9850,7 +10073,7 @@ do_iwmmxt_wrwrwr_or_imm5 (void)
        }
      /* Map 32 -> 0, etc.  */
      inst.operands[2].imm &= 0x1f;
        }
      /* Map 32 -> 0, etc.  */
      inst.operands[2].imm &= 0x1f;
-    inst.instruction |= (0xf << 28) | ((inst.operands[2].imm & 0x10) << 4) | (inst.operands[2].imm & 0xf);
+    inst.instruction |= (0xfU << 28) | ((inst.operands[2].imm & 0x10) << 4) | (inst.operands[2].imm & 0xf);
    }
  }
  \f
    }
  }
  \f
@@ -10247,7 +10470,9 @@ do_t_add_sub (void)
                 {
                   inst.instruction = THUMB_OP16(opcode);
                   inst.instruction |= (Rd << 4) | Rs;
                 {
                   inst.instruction = THUMB_OP16(opcode);
                   inst.instruction |= (Rd << 4) | Rs;
-                 inst.reloc.type = BFD_RELOC_ARM_THUMB_ADD;
+                 if (inst.reloc.type < BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC
+                     || inst.reloc.type > BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC)
+                   inst.reloc.type = BFD_RELOC_ARM_THUMB_ADD;
                   if (inst.size_req != 2)
                     inst.relax = opcode;
                 }
                   if (inst.size_req != 2)
                     inst.relax = opcode;
                 }
@@ -10755,6 +10980,10 @@ do_t_branch (void)
         reloc = BFD_RELOC_THUMB_PCREL_BRANCH25;
        else
         {
         reloc = BFD_RELOC_THUMB_PCREL_BRANCH25;
        else
         {
+         constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v6t2),
+                     _("selected architecture does not support "
+                       "wide conditional branch instruction"));
+
           gas_assert (cond != 0xF);
           inst.instruction |= cond << 22;
           reloc = BFD_RELOC_THUMB_PCREL_BRANCH20;
           gas_assert (cond != 0xF);
           inst.instruction |= cond << 22;
           reloc = BFD_RELOC_THUMB_PCREL_BRANCH20;
@@ -11598,9 +11827,13 @@ do_t_mov_cmp (void)
               inst.instruction = THUMB_OP16 (opcode);
               inst.instruction |= Rn << 8;
               if (inst.size_req == 2)
               inst.instruction = THUMB_OP16 (opcode);
               inst.instruction |= Rn << 8;
               if (inst.size_req == 2)
-               inst.reloc.type = BFD_RELOC_ARM_THUMB_IMM;
+               {
+                 if (inst.reloc.type < BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC
+                     || inst.reloc.type > BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC)
+                   inst.reloc.type = BFD_RELOC_ARM_THUMB_IMM;
+               }
               else
               else
-               inst.relax = opcode;
+                 inst.relax = opcode;
             }
           else
             {
             }
           else
             {
@@ -12892,6 +13125,8 @@ struct neon_tab_entry
    X(vqdmull,   0x0800d00, N_INV,     0x0800b40),       \
    X(vqdmulh,    0x0000b00, N_INV,     0x0800c40),      \
    X(vqrdmulh,   0x1000b00, N_INV,     0x0800d40),      \
    X(vqdmull,   0x0800d00, N_INV,     0x0800b40),       \
    X(vqdmulh,    0x0000b00, N_INV,     0x0800c40),      \
    X(vqrdmulh,   0x1000b00, N_INV,     0x0800d40),      \
+  X(vqrdmlah,   0x3000b10, N_INV,     0x0800e40),      \
+  X(vqrdmlsh,   0x3000c10, N_INV,     0x0800f40),      \
    X(vshl,      0x0000400, N_INV,     0x0800510),       \
    X(vqshl,     0x0000410, N_INV,     0x0800710),       \
    X(vand,      0x0000110, N_INV,     0x0800030),       \
    X(vshl,      0x0000400, N_INV,     0x0800510),       \
    X(vqshl,     0x0000410, N_INV,     0x0800710),       \
    X(vand,      0x0000110, N_INV,     0x0800030),       \
@@ -13045,7 +13280,19 @@ NEON_ENC_TAB
    X(2, (S, R), SINGLE),                        \
    X(2, (R, S), SINGLE),                        \
    X(2, (F, R), SINGLE),                        \
    X(2, (S, R), SINGLE),                        \
    X(2, (R, S), SINGLE),                        \
    X(2, (F, R), SINGLE),                        \
-  X(2, (R, F), SINGLE)
+  X(2, (R, F), SINGLE),                        \
+/* Half float shape supported so far.  */\
+  X (2, (H, D), MIXED),                        \
+  X (2, (D, H), MIXED),                        \
+  X (2, (H, F), MIXED),                        \
+  X (2, (F, H), MIXED),                        \
+  X (2, (H, H), HALF),                 \
+  X (2, (H, R), HALF),                 \
+  X (2, (R, H), HALF),                 \
+  X (2, (H, I), HALF),                 \
+  X (3, (H, H, H), HALF),              \
+  X (3, (H, F, I), MIXED),             \
+  X (3, (F, H, I), MIXED)
  
  #define S2(A,B)                NS_##A##B
  #define S3(A,B,C)      NS_##A##B##C
  
  #define S2(A,B)                NS_##A##B
  #define S3(A,B,C)      NS_##A##B##C
@@ -13066,6 +13313,7 @@ enum neon_shape
  
  enum neon_shape_class
  {
  
  enum neon_shape_class
  {
+  SC_HALF,
    SC_SINGLE,
    SC_DOUBLE,
    SC_QUAD,
    SC_SINGLE,
    SC_DOUBLE,
    SC_QUAD,
@@ -13083,6 +13331,7 @@ static enum neon_shape_class neon_shape_class[] =
  
  enum neon_shape_el
  {
  
  enum neon_shape_el
  {
+  SE_H,
    SE_F,
    SE_D,
    SE_Q,
    SE_F,
    SE_D,
    SE_Q,
@@ -13095,6 +13344,7 @@ enum neon_shape_el
  /* Register widths of above.  */
  static unsigned neon_shape_el_size[] =
  {
  /* Register widths of above.  */
  static unsigned neon_shape_el_size[] =
  {
+  16,
    32,
    64,
    128,
    32,
    64,
    128,
@@ -13179,6 +13429,7 @@ enum neon_type_mask
  #define N_SUF_32   (N_SU_32 | N_F32)
  #define N_I_ALL    (N_I8 | N_I16 | N_I32 | N_I64)
  #define N_IF_32    (N_I8 | N_I16 | N_I32 | N_F32)
  #define N_SUF_32   (N_SU_32 | N_F32)
  #define N_I_ALL    (N_I8 | N_I16 | N_I32 | N_I64)
  #define N_IF_32    (N_I8 | N_I16 | N_I32 | N_F32)
+#define N_F_ALL    (N_F16 | N_F32 | N_F64)
  
  /* Pass this as the first type argument to neon_check_type to ignore types
     altogether.  */
  
  /* Pass this as the first type argument to neon_check_type to ignore types
     altogether.  */
@@ -13220,11 +13471,56 @@ neon_select_shape (enum neon_shape shape, ...)
  
           switch (neon_shape_tab[shape].el[j])
             {
  
           switch (neon_shape_tab[shape].el[j])
             {
+             /* If a  .f16,  .16,  .u16,  .s16 type specifier is given over
+                a VFP single precision register operand, it's essentially
+                means only half of the register is used.
+
+                If the type specifier is given after the mnemonics, the
+                information is stored in inst.vectype.  If the type specifier
+                is given after register operand, the information is stored
+                in inst.operands[].vectype.
+
+                When there is only one type specifier, and all the register
+                operands are the same type of hardware register, the type
+                specifier applies to all register operands.
+
+                If no type specifier is given, the shape is inferred from
+                operand information.
+
+                for example:
+                vadd.f16 s0, s1, s2:           NS_HHH
+                vabs.f16 s0, s1:               NS_HH
+                vmov.f16 s0, r1:               NS_HR
+                vmov.f16 r0, s1:               NS_RH
+                vcvt.f16 r0, s1:               NS_RH
+                vcvt.f16.s32   s2, s2, #29:    NS_HFI
+                vcvt.f16.s32   s2, s2:         NS_HF
+             */
+           case SE_H:
+             if (!(inst.operands[j].isreg
+                   && inst.operands[j].isvec
+                   && inst.operands[j].issingle
+                   && !inst.operands[j].isquad
+                   && ((inst.vectype.elems == 1
+                        && inst.vectype.el[0].size == 16)
+                       || (inst.vectype.elems > 1
+                           && inst.vectype.el[j].size == 16)
+                       || (inst.vectype.elems == 0
+                           && inst.operands[j].vectype.type != NT_invtype
+                           && inst.operands[j].vectype.size == 16))))
+               matches = 0;
+             break;
+
             case SE_F:
               if (!(inst.operands[j].isreg
                     && inst.operands[j].isvec
                     && inst.operands[j].issingle
             case SE_F:
               if (!(inst.operands[j].isreg
                     && inst.operands[j].isvec
                     && inst.operands[j].issingle
-                   && !inst.operands[j].isquad))
+                   && !inst.operands[j].isquad
+                   && ((inst.vectype.elems == 1 && inst.vectype.el[0].size == 32)
+                       || (inst.vectype.elems > 1 && inst.vectype.el[j].size == 32)
+                       || (inst.vectype.elems == 0
+                           && (inst.operands[j].vectype.size == 32
+                               || inst.operands[j].vectype.type == NT_invtype)))))
                 matches = 0;
               break;
  
                 matches = 0;
               break;
  
@@ -13440,7 +13736,7 @@ el_type_of_type_chk (enum neon_el_type *type, unsigned *size,
      *type = NT_untyped;
    else if ((mask & (N_P8 | N_P16 | N_P64)) != 0)
      *type = NT_poly;
      *type = NT_untyped;
    else if ((mask & (N_P8 | N_P16 | N_P64)) != 0)
      *type = NT_poly;
-  else if ((mask & (N_F16 | N_F32 | N_F64)) != 0)
+  else if ((mask & (N_F_ALL)) != 0)
      *type = NT_float;
    else
      return FAIL;
      *type = NT_float;
    else
      return FAIL;
@@ -13628,6 +13924,18 @@ neon_check_type (unsigned els, enum neon_shape ns, ...)
                   else
                     match = g_size;
  
                   else
                     match = g_size;
  
+                 /* FP16 will use a single precision register.  */
+                 if (regwidth == 32 && match == 16)
+                   {
+                     if (ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16))
+                       match = regwidth;
+                     else
+                       {
+                         inst.error = _(BAD_FP16);
+                         return badtype;
+                       }
+                   }
+
                   if (regwidth != match)
                     {
                       first_error (_("operand size must match register width"));
                   if (regwidth != match)
                     {
                       first_error (_("operand size must match register width"));
@@ -13719,12 +14027,16 @@ do_vfp_nsyn_add_sub (enum neon_shape rs)
  {
    int is_add = (inst.instruction & 0x0fffffff) == N_MNEM_vadd;
  
  {
    int is_add = (inst.instruction & 0x0fffffff) == N_MNEM_vadd;
  
-  if (rs == NS_FFF)
+  if (rs == NS_FFF || rs == NS_HHH)
      {
        if (is_add)
         do_vfp_nsyn_opcode ("fadds");
        else
         do_vfp_nsyn_opcode ("fsubs");
      {
        if (is_add)
         do_vfp_nsyn_opcode ("fadds");
        else
         do_vfp_nsyn_opcode ("fsubs");
+
+      /* ARMv8.2 fp16 instruction.  */
+      if (rs == NS_HHH)
+       do_scalar_fp16_v82_encode ();
      }
    else
      {
      }
    else
      {
@@ -13747,15 +14059,14 @@ try_vfp_nsyn (int args, void (*pfn) (enum neon_shape))
    switch (args)
      {
      case 2:
    switch (args)
      {
      case 2:
-      rs = neon_select_shape (NS_FF, NS_DD, NS_NULL);
-      et = neon_check_type (2, rs,
-       N_EQK | N_VFP, N_F32 | N_F64 | N_KEY | N_VFP);
+      rs = neon_select_shape (NS_HH, NS_FF, NS_DD, NS_NULL);
+      et = neon_check_type (2, rs, N_EQK | N_VFP, N_F_ALL | N_KEY | N_VFP);
        break;
  
      case 3:
        break;
  
      case 3:
-      rs = neon_select_shape (NS_FFF, NS_DDD, NS_NULL);
-      et = neon_check_type (3, rs,
-       N_EQK | N_VFP, N_EQK | N_VFP, N_F32 | N_F64 | N_KEY | N_VFP);
+      rs = neon_select_shape (NS_HHH, NS_FFF, NS_DDD, NS_NULL);
+      et = neon_check_type (3, rs, N_EQK | N_VFP, N_EQK | N_VFP,
+                           N_F_ALL | N_KEY | N_VFP);
        break;
  
      default:
        break;
  
      default:
@@ -13777,12 +14088,16 @@ do_vfp_nsyn_mla_mls (enum neon_shape rs)
  {
    int is_mla = (inst.instruction & 0x0fffffff) == N_MNEM_vmla;
  
  {
    int is_mla = (inst.instruction & 0x0fffffff) == N_MNEM_vmla;
  
-  if (rs == NS_FFF)
+  if (rs == NS_FFF || rs == NS_HHH)
      {
        if (is_mla)
         do_vfp_nsyn_opcode ("fmacs");
        else
         do_vfp_nsyn_opcode ("fnmacs");
      {
        if (is_mla)
         do_vfp_nsyn_opcode ("fmacs");
        else
         do_vfp_nsyn_opcode ("fnmacs");
+
+      /* ARMv8.2 fp16 instruction.  */
+      if (rs == NS_HHH)
+       do_scalar_fp16_v82_encode ();
      }
    else
      {
      }
    else
      {
@@ -13798,12 +14113,16 @@ do_vfp_nsyn_fma_fms (enum neon_shape rs)
  {
    int is_fma = (inst.instruction & 0x0fffffff) == N_MNEM_vfma;
  
  {
    int is_fma = (inst.instruction & 0x0fffffff) == N_MNEM_vfma;
  
-  if (rs == NS_FFF)
+  if (rs == NS_FFF || rs == NS_HHH)
      {
        if (is_fma)
         do_vfp_nsyn_opcode ("ffmas");
        else
         do_vfp_nsyn_opcode ("ffnmas");
      {
        if (is_fma)
         do_vfp_nsyn_opcode ("ffmas");
        else
         do_vfp_nsyn_opcode ("ffnmas");
+
+      /* ARMv8.2 fp16 instruction.  */
+      if (rs == NS_HHH)
+       do_scalar_fp16_v82_encode ();
      }
    else
      {
      }
    else
      {
@@ -13817,8 +14136,14 @@ do_vfp_nsyn_fma_fms (enum neon_shape rs)
  static void
  do_vfp_nsyn_mul (enum neon_shape rs)
  {
  static void
  do_vfp_nsyn_mul (enum neon_shape rs)
  {
-  if (rs == NS_FFF)
-    do_vfp_nsyn_opcode ("fmuls");
+  if (rs == NS_FFF || rs == NS_HHH)
+    {
+      do_vfp_nsyn_opcode ("fmuls");
+
+      /* ARMv8.2 fp16 instruction.  */
+      if (rs == NS_HHH)
+       do_scalar_fp16_v82_encode ();
+    }
    else
      do_vfp_nsyn_opcode ("fmuld");
  }
    else
      do_vfp_nsyn_opcode ("fmuld");
  }
@@ -13827,14 +14152,18 @@ static void
  do_vfp_nsyn_abs_neg (enum neon_shape rs)
  {
    int is_neg = (inst.instruction & 0x80) != 0;
  do_vfp_nsyn_abs_neg (enum neon_shape rs)
  {
    int is_neg = (inst.instruction & 0x80) != 0;
-  neon_check_type (2, rs, N_EQK | N_VFP, N_F32 | N_F64 | N_VFP | N_KEY);
+  neon_check_type (2, rs, N_EQK | N_VFP, N_F_ALL | N_VFP | N_KEY);
  
  
-  if (rs == NS_FF)
+  if (rs == NS_FF || rs == NS_HH)
      {
        if (is_neg)
         do_vfp_nsyn_opcode ("fnegs");
        else
         do_vfp_nsyn_opcode ("fabss");
      {
        if (is_neg)
         do_vfp_nsyn_opcode ("fnegs");
        else
         do_vfp_nsyn_opcode ("fabss");
+
+      /* ARMv8.2 fp16 instruction.  */
+      if (rs == NS_HH)
+       do_scalar_fp16_v82_encode ();
      }
    else
      {
      }
    else
      {
@@ -13871,11 +14200,17 @@ do_vfp_nsyn_ldm_stm (int is_dbmode)
  static void
  do_vfp_nsyn_sqrt (void)
  {
  static void
  do_vfp_nsyn_sqrt (void)
  {
-  enum neon_shape rs = neon_select_shape (NS_FF, NS_DD, NS_NULL);
-  neon_check_type (2, rs, N_EQK | N_VFP, N_F32 | N_F64 | N_KEY | N_VFP);
+  enum neon_shape rs = neon_select_shape (NS_HH, NS_FF, NS_DD, NS_NULL);
+  neon_check_type (2, rs, N_EQK | N_VFP, N_F_ALL | N_KEY | N_VFP);
+
+  if (rs == NS_FF || rs == NS_HH)
+    {
+      do_vfp_nsyn_opcode ("fsqrts");
  
  
-  if (rs == NS_FF)
-    do_vfp_nsyn_opcode ("fsqrts");
+      /* ARMv8.2 fp16 instruction.  */
+      if (rs == NS_HH)
+       do_scalar_fp16_v82_encode ();
+    }
    else
      do_vfp_nsyn_opcode ("fsqrtd");
  }
    else
      do_vfp_nsyn_opcode ("fsqrtd");
  }
@@ -13883,12 +14218,18 @@ do_vfp_nsyn_sqrt (void)
  static void
  do_vfp_nsyn_div (void)
  {
  static void
  do_vfp_nsyn_div (void)
  {
-  enum neon_shape rs = neon_select_shape (NS_FFF, NS_DDD, NS_NULL);
+  enum neon_shape rs = neon_select_shape (NS_HHH, NS_FFF, NS_DDD, NS_NULL);
    neon_check_type (3, rs, N_EQK | N_VFP, N_EQK | N_VFP,
    neon_check_type (3, rs, N_EQK | N_VFP, N_EQK | N_VFP,
-    N_F32 | N_F64 | N_KEY | N_VFP);
+                  N_F_ALL | N_KEY | N_VFP);
+
+  if (rs == NS_FFF || rs == NS_HHH)
+    {
+      do_vfp_nsyn_opcode ("fdivs");
  
  
-  if (rs == NS_FFF)
-    do_vfp_nsyn_opcode ("fdivs");
+      /* ARMv8.2 fp16 instruction.  */
+      if (rs == NS_HHH)
+       do_scalar_fp16_v82_encode ();
+    }
    else
      do_vfp_nsyn_opcode ("fdivd");
  }
    else
      do_vfp_nsyn_opcode ("fdivd");
  }
@@ -13896,14 +14237,18 @@ do_vfp_nsyn_div (void)
  static void
  do_vfp_nsyn_nmul (void)
  {
  static void
  do_vfp_nsyn_nmul (void)
  {
-  enum neon_shape rs = neon_select_shape (NS_FFF, NS_DDD, NS_NULL);
+  enum neon_shape rs = neon_select_shape (NS_HHH, NS_FFF, NS_DDD, NS_NULL);
    neon_check_type (3, rs, N_EQK | N_VFP, N_EQK | N_VFP,
    neon_check_type (3, rs, N_EQK | N_VFP, N_EQK | N_VFP,
-    N_F32 | N_F64 | N_KEY | N_VFP);
+                  N_F_ALL | N_KEY | N_VFP);
  
  
-  if (rs == NS_FFF)
+  if (rs == NS_FFF || rs == NS_HHH)
      {
        NEON_ENCODE (SINGLE, inst);
        do_vfp_sp_dyadic ();
      {
        NEON_ENCODE (SINGLE, inst);
        do_vfp_sp_dyadic ();
+
+      /* ARMv8.2 fp16 instruction.  */
+      if (rs == NS_HHH)
+       do_scalar_fp16_v82_encode ();
      }
    else
      {
      }
    else
      {
@@ -13911,17 +14256,19 @@ do_vfp_nsyn_nmul (void)
        do_vfp_dp_rd_rn_rm ();
      }
    do_vfp_cond_or_thumb ();
        do_vfp_dp_rd_rn_rm ();
      }
    do_vfp_cond_or_thumb ();
+
  }
  
  static void
  do_vfp_nsyn_cmp (void)
  {
  }
  
  static void
  do_vfp_nsyn_cmp (void)
  {
+  enum neon_shape rs;
    if (inst.operands[1].isreg)
      {
    if (inst.operands[1].isreg)
      {
-      enum neon_shape rs = neon_select_shape (NS_FF, NS_DD, NS_NULL);
-      neon_check_type (2, rs, N_EQK | N_VFP, N_F32 | N_F64 | N_KEY | N_VFP);
+      rs = neon_select_shape (NS_HH, NS_FF, NS_DD, NS_NULL);
+      neon_check_type (2, rs, N_EQK | N_VFP, N_F_ALL | N_KEY | N_VFP);
  
  
-      if (rs == NS_FF)
+      if (rs == NS_FF || rs == NS_HH)
         {
           NEON_ENCODE (SINGLE, inst);
           do_vfp_sp_monadic ();
         {
           NEON_ENCODE (SINGLE, inst);
           do_vfp_sp_monadic ();
@@ -13934,8 +14281,8 @@ do_vfp_nsyn_cmp (void)
      }
    else
      {
      }
    else
      {
-      enum neon_shape rs = neon_select_shape (NS_FI, NS_DI, NS_NULL);
-      neon_check_type (2, rs, N_F32 | N_F64 | N_KEY | N_VFP, N_EQK);
+      rs = neon_select_shape (NS_HI, NS_FI, NS_DI, NS_NULL);
+      neon_check_type (2, rs, N_F_ALL | N_KEY | N_VFP, N_EQK);
  
        switch (inst.instruction & 0x0fffffff)
         {
  
        switch (inst.instruction & 0x0fffffff)
         {
@@ -13949,7 +14296,7 @@ do_vfp_nsyn_cmp (void)
           abort ();
         }
  
           abort ();
         }
  
-      if (rs == NS_FI)
+      if (rs == NS_FI || rs == NS_HI)
         {
           NEON_ENCODE (SINGLE, inst);
           do_vfp_sp_compare_z ();
         {
           NEON_ENCODE (SINGLE, inst);
           do_vfp_sp_compare_z ();
@@ -13961,6 +14308,10 @@ do_vfp_nsyn_cmp (void)
         }
      }
    do_vfp_cond_or_thumb ();
         }
      }
    do_vfp_cond_or_thumb ();
+
+  /* ARMv8.2 fp16 instruction.  */
+  if (rs == NS_HI || rs == NS_HH)
+    do_scalar_fp16_v82_encode ();
  }
  
  static void
  }
  
  static void
@@ -14678,6 +15029,38 @@ do_neon_qdmulh (void)
      }
  }
  
      }
  }
  
+static void
+do_neon_qrdmlah (void)
+{
+  /* Check we're on the correct architecture.  */
+  if (!mark_feature_used (&fpu_neon_ext_armv8))
+    inst.error =
+      _("instruction form not available on this architecture.");
+  else if (!mark_feature_used (&fpu_neon_ext_v8_1))
+    {
+      as_warn (_("this instruction implies use of ARMv8.1 AdvSIMD."));
+      record_feature_use (&fpu_neon_ext_v8_1);
+    }
+
+  if (inst.operands[2].isscalar)
+    {
+      enum neon_shape rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
+      struct neon_type_el et = neon_check_type (3, rs,
+       N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
+      NEON_ENCODE (SCALAR, inst);
+      neon_mul_mac (et, neon_quad (rs));
+    }
+  else
+    {
+      enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+      struct neon_type_el et = neon_check_type (3, rs,
+       N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
+      NEON_ENCODE (INTEGER, inst);
+      /* The U bit (rounding) comes from bit mask.  */
+      neon_three_same (neon_quad (rs), 0, et.size);
+    }
+}
+
  static void
  do_neon_fcmp_absolute (void)
  {
  static void
  do_neon_fcmp_absolute (void)
  {
@@ -14926,6 +15309,13 @@ do_neon_shll (void)
    /* Half-precision conversions.  */                                         \
    CVT_VAR (f32_f16, N_F32, N_F16, whole_reg,   NULL,     NULL,     NULL)      \
    CVT_VAR (f16_f32, N_F16, N_F32, whole_reg,   NULL,     NULL,     NULL)      \
    /* Half-precision conversions.  */                                         \
    CVT_VAR (f32_f16, N_F32, N_F16, whole_reg,   NULL,     NULL,     NULL)      \
    CVT_VAR (f16_f32, N_F16, N_F32, whole_reg,   NULL,     NULL,     NULL)      \
+  /* New VCVT instructions introduced by ARMv8.2 fp16 extension.             \
+     Compared with single/double precision variants, only the co-processor    \
+     field is different, so the encoding flow is reused here.  */            \
+  CVT_VAR (f16_s32, N_F16 | N_KEY, N_S32, N_VFP, "fsltos", "fsitos", NULL)    \
+  CVT_VAR (f16_u32, N_F16 | N_KEY, N_U32, N_VFP, "fultos", "fuitos", NULL)    \
+  CVT_VAR (u32_f16, N_U32, N_F16 | N_KEY, N_VFP, "ftouls", "ftouis", "ftouizs")\
+  CVT_VAR (s32_f16, N_S32, N_F16 | N_KEY, N_VFP, "ftosls", "ftosis", "ftosizs")\
    /* VFP instructions.  */                                                   \
    CVT_VAR (f32_f64, N_F32, N_F64, N_VFP,       NULL,     "fcvtsd", NULL)      \
    CVT_VAR (f64_f32, N_F64, N_F32, N_VFP,       NULL,     "fcvtds", NULL)      \
    /* VFP instructions.  */                                                   \
    CVT_VAR (f32_f64, N_F32, N_F64, N_VFP,       NULL,     "fcvtsd", NULL)      \
    CVT_VAR (f64_f32, N_F64, N_F32, N_VFP,       NULL,     "fcvtds", NULL)      \
@@ -15000,7 +15390,8 @@ do_vfp_nsyn_cvt (enum neon_shape rs, enum neon_cvt_flavour flavour)
  {
    const char *opname = 0;
  
  {
    const char *opname = 0;
  
-  if (rs == NS_DDI || rs == NS_QQI || rs == NS_FFI)
+  if (rs == NS_DDI || rs == NS_QQI || rs == NS_FFI
+      || rs == NS_FHI || rs == NS_HFI)
      {
        /* Conversions with immediate bitshift.  */
        const char *enc[] =
      {
        /* Conversions with immediate bitshift.  */
        const char *enc[] =
@@ -15037,12 +15428,19 @@ do_vfp_nsyn_cvt (enum neon_shape rs, enum neon_cvt_flavour flavour)
  
    if (opname)
      do_vfp_nsyn_opcode (opname);
  
    if (opname)
      do_vfp_nsyn_opcode (opname);
+
+  /* ARMv8.2 fp16 VCVT instruction.  */
+  if (flavour == neon_cvt_flavour_s32_f16
+      || flavour == neon_cvt_flavour_u32_f16
+      || flavour == neon_cvt_flavour_f16_u32
+      || flavour == neon_cvt_flavour_f16_s32)
+    do_scalar_fp16_v82_encode ();
  }
  
  static void
  do_vfp_nsyn_cvtz (void)
  {
  }
  
  static void
  do_vfp_nsyn_cvtz (void)
  {
-  enum neon_shape rs = neon_select_shape (NS_FF, NS_FD, NS_NULL);
+  enum neon_shape rs = neon_select_shape (NS_FH, NS_FF, NS_FD, NS_NULL);
    enum neon_cvt_flavour flavour = get_neon_cvt_flavour (rs);
    const char *enc[] =
      {
    enum neon_cvt_flavour flavour = get_neon_cvt_flavour (rs);
    const char *enc[] =
      {
@@ -15070,6 +15468,11 @@ do_vfp_nsyn_cvt_fpv8 (enum neon_cvt_flavour flavour,
      constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8),
                 _(BAD_FPU));
  
      constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8),
                 _(BAD_FPU));
  
+  if (flavour == neon_cvt_flavour_s32_f16
+      || flavour == neon_cvt_flavour_u32_f16)
+    constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16),
+               _(BAD_FP16));
+
    set_it_insn_type (OUTSIDE_IT_INSN);
  
    switch (flavour)
    set_it_insn_type (OUTSIDE_IT_INSN);
  
    switch (flavour)
@@ -15082,6 +15485,10 @@ do_vfp_nsyn_cvt_fpv8 (enum neon_cvt_flavour flavour,
        sz = 0;
        op = 1;
        break;
        sz = 0;
        op = 1;
        break;
+    case neon_cvt_flavour_s32_f16:
+      sz = 0;
+      op = 1;
+      break;
      case neon_cvt_flavour_u32_f64:
        sz = 1;
        op = 0;
      case neon_cvt_flavour_u32_f64:
        sz = 1;
        op = 0;
@@ -15090,6 +15497,10 @@ do_vfp_nsyn_cvt_fpv8 (enum neon_cvt_flavour flavour,
        sz = 0;
        op = 0;
        break;
        sz = 0;
        op = 0;
        break;
+    case neon_cvt_flavour_u32_f16:
+      sz = 0;
+      op = 0;
+      break;
      default:
        first_error (_("invalid instruction shape"));
        return;
      default:
        first_error (_("invalid instruction shape"));
        return;
@@ -15108,6 +15519,11 @@ do_vfp_nsyn_cvt_fpv8 (enum neon_cvt_flavour flavour,
    encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd);
    encode_arm_vfp_reg (inst.operands[1].reg, sz == 1 ? VFP_REG_Dm : VFP_REG_Sm);
    inst.instruction |= sz << 8;
    encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd);
    encode_arm_vfp_reg (inst.operands[1].reg, sz == 1 ? VFP_REG_Dm : VFP_REG_Sm);
    inst.instruction |= sz << 8;
+
+  /* ARMv8.2 fp16 VCVT instruction.  */
+  if (flavour == neon_cvt_flavour_s32_f16
+      ||flavour == neon_cvt_flavour_u32_f16)
+    do_scalar_fp16_v82_encode ();
    inst.instruction |= op << 7;
    inst.instruction |= rm << 16;
    inst.instruction |= 0xf0000000;
    inst.instruction |= op << 7;
    inst.instruction |= rm << 16;
    inst.instruction |= 0xf0000000;
@@ -15118,7 +15534,9 @@ static void
  do_neon_cvt_1 (enum neon_cvt_mode mode)
  {
    enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_FFI, NS_DD, NS_QQ,
  do_neon_cvt_1 (enum neon_cvt_mode mode)
  {
    enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_FFI, NS_DD, NS_QQ,
-    NS_FD, NS_DF, NS_FF, NS_QD, NS_DQ, NS_NULL);
+                                         NS_FD, NS_DF, NS_FF, NS_QD, NS_DQ,
+                                         NS_FH, NS_HF, NS_FHI, NS_HFI,
+                                         NS_NULL);
    enum neon_cvt_flavour flavour = get_neon_cvt_flavour (rs);
  
    /* PR11109: Handle round-to-zero for VCVT conversions.  */
    enum neon_cvt_flavour flavour = get_neon_cvt_flavour (rs);
  
    /* PR11109: Handle round-to-zero for VCVT conversions.  */
@@ -15134,6 +15552,18 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
        return;
      }
  
        return;
      }
  
+  /* ARMv8.2 fp16 VCVT conversions.  */
+  if (mode == neon_cvt_mode_z
+      && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16)
+      && (flavour == neon_cvt_flavour_s32_f16
+         || flavour == neon_cvt_flavour_u32_f16)
+      && (rs == NS_FH))
+    {
+      do_vfp_nsyn_cvtz ();
+      do_scalar_fp16_v82_encode ();
+      return;
+    }
+
    /* VFP rather than Neon conversions.  */
    if (flavour >= neon_cvt_flavour_first_fp)
      {
    /* VFP rather than Neon conversions.  */
    if (flavour >= neon_cvt_flavour_first_fp)
      {
@@ -15318,7 +15748,8 @@ do_neon_cvttb_2 (bfd_boolean t, bfd_boolean to, bfd_boolean is_double)
  static void
  do_neon_cvttb_1 (bfd_boolean t)
  {
  static void
  do_neon_cvttb_1 (bfd_boolean t)
  {
-  enum neon_shape rs = neon_select_shape (NS_FF, NS_FD, NS_DF, NS_NULL);
+  enum neon_shape rs = neon_select_shape (NS_HF, NS_HD, NS_FH, NS_FF, NS_FD,
+                                         NS_DF, NS_DH, NS_NULL);
  
    if (rs == NS_NULL)
      return;
  
    if (rs == NS_NULL)
      return;
@@ -15698,8 +16129,9 @@ static void
  do_neon_mov (void)
  {
    enum neon_shape rs = neon_select_shape (NS_RRFF, NS_FFRR, NS_DRR, NS_RRD,
  do_neon_mov (void)
  {
    enum neon_shape rs = neon_select_shape (NS_RRFF, NS_FFRR, NS_DRR, NS_RRD,
-    NS_QQ, NS_DD, NS_QI, NS_DI, NS_SR, NS_RS, NS_FF, NS_FI, NS_RF, NS_FR,
-    NS_NULL);
+                                         NS_QQ, NS_DD, NS_QI, NS_DI, NS_SR,
+                                         NS_RS, NS_FF, NS_FI, NS_RF, NS_FR,
+                                         NS_HR, NS_RH, NS_HI, NS_NULL);
    struct neon_type_el et;
    const char *ldconst = 0;
  
    struct neon_type_el et;
    const char *ldconst = 0;
  
@@ -15877,6 +16309,7 @@ do_neon_mov (void)
        do_vfp_nsyn_opcode ("fcpys");
        break;
  
        do_vfp_nsyn_opcode ("fcpys");
        break;
  
+    case NS_HI:
      case NS_FI:  /* case 10 (fconsts).  */
        ldconst = "fconsts";
        encode_fconstd:
      case NS_FI:  /* case 10 (fconsts).  */
        ldconst = "fconsts";
        encode_fconstd:
@@ -15884,17 +16317,29 @@ do_neon_mov (void)
         {
           inst.operands[1].imm = neon_qfloat_bits (inst.operands[1].imm);
           do_vfp_nsyn_opcode (ldconst);
         {
           inst.operands[1].imm = neon_qfloat_bits (inst.operands[1].imm);
           do_vfp_nsyn_opcode (ldconst);
+
+         /* ARMv8.2 fp16 vmov.f16 instruction.  */
+         if (rs == NS_HI)
+           do_scalar_fp16_v82_encode ();
         }
        else
         first_error (_("immediate out of range"));
        break;
  
         }
        else
         first_error (_("immediate out of range"));
        break;
  
+    case NS_RH:
      case NS_RF:  /* case 12 (fmrs).  */
        do_vfp_nsyn_opcode ("fmrs");
      case NS_RF:  /* case 12 (fmrs).  */
        do_vfp_nsyn_opcode ("fmrs");
+      /* ARMv8.2 fp16 vmov.f16 instruction.  */
+      if (rs == NS_RH)
+       do_scalar_fp16_v82_encode ();
        break;
  
        break;
  
+    case NS_HR:
      case NS_FR:  /* case 13 (fmsr).  */
        do_vfp_nsyn_opcode ("fmsr");
      case NS_FR:  /* case 13 (fmsr).  */
        do_vfp_nsyn_opcode ("fmsr");
+      /* ARMv8.2 fp16 vmov.f16 instruction.  */
+      if (rs == NS_HR)
+       do_scalar_fp16_v82_encode ();
        break;
  
      /* The encoders for the fmrrs and fmsrr instructions expect three operands
        break;
  
      /* The encoders for the fmrrs and fmsrr instructions expect three operands
@@ -15950,6 +16395,21 @@ do_neon_rshift_round_imm (void)
                   et.size - imm);
  }
  
                   et.size - imm);
  }
  
+static void
+do_neon_movhf (void)
+{
+  enum neon_shape rs = neon_select_shape (NS_HH, NS_NULL);
+  constraint (rs != NS_HH, _("invalid suffix"));
+
+  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8),
+             _(BAD_FPU));
+
+  do_vfp_sp_monadic ();
+
+  inst.is_neon = 1;
+  inst.instruction |= 0xf0000000;
+}
+
  static void
  do_neon_movl (void)
  {
  static void
  do_neon_movl (void)
  {
@@ -16126,6 +16586,10 @@ do_neon_ldr_str (void)
         do_vfp_nsyn_opcode ("flds");
        else
         do_vfp_nsyn_opcode ("fsts");
         do_vfp_nsyn_opcode ("flds");
        else
         do_vfp_nsyn_opcode ("fsts");
+
+      /* ARMv8.2 vldr.16/vstr.16 instruction.  */
+      if (inst.vectype.el[0].size == 16)
+       do_scalar_fp16_v82_encode ();
      }
    else
      {
      }
    else
      {
@@ -16483,8 +16947,14 @@ do_vfp_nsyn_fpv8 (enum neon_shape rs)
  
    NEON_ENCODE (FPV8, inst);
  
  
    NEON_ENCODE (FPV8, inst);
  
-  if (rs == NS_FFF)
-    do_vfp_sp_dyadic ();
+  if (rs == NS_FFF || rs == NS_HHH)
+    {
+      do_vfp_sp_dyadic ();
+
+      /* ARMv8.2 fp16 instruction.  */
+      if (rs == NS_HHH)
+       do_scalar_fp16_v82_encode ();
+    }
    else
      do_vfp_dp_rd_rn_rm ();
  
    else
      do_vfp_dp_rd_rn_rm ();
  
@@ -16520,7 +16990,7 @@ do_vmaxnm (void)
  static void
  do_vrint_1 (enum neon_cvt_mode mode)
  {
  static void
  do_vrint_1 (enum neon_cvt_mode mode)
  {
-  enum neon_shape rs = neon_select_shape (NS_FF, NS_DD, NS_QQ, NS_NULL);
+  enum neon_shape rs = neon_select_shape (NS_HH, NS_FF, NS_DD, NS_QQ, NS_NULL);
    struct neon_type_el et;
  
    if (rs == NS_NULL)
    struct neon_type_el et;
  
    if (rs == NS_NULL)
@@ -16532,7 +17002,8 @@ do_vrint_1 (enum neon_cvt_mode mode)
      constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8),
                 _(BAD_FPU));
  
      constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8),
                 _(BAD_FPU));
  
-  et = neon_check_type (2, rs, N_EQK | N_VFP, N_F32 | N_F64 | N_KEY | N_VFP);
+  et = neon_check_type (2, rs, N_EQK | N_VFP, N_F_ALL | N_KEY
+                       | N_VFP);
    if (et.type != NT_invtype)
      {
        /* VFP encodings.  */
    if (et.type != NT_invtype)
      {
        /* VFP encodings.  */
@@ -16541,7 +17012,7 @@ do_vrint_1 (enum neon_cvt_mode mode)
         set_it_insn_type (OUTSIDE_IT_INSN);
  
        NEON_ENCODE (FPV8, inst);
         set_it_insn_type (OUTSIDE_IT_INSN);
  
        NEON_ENCODE (FPV8, inst);
-      if (rs == NS_FF)
+      if (rs == NS_FF || rs == NS_HH)
         do_vfp_sp_monadic ();
        else
         do_vfp_dp_rd_rm ();
         do_vfp_sp_monadic ();
        else
         do_vfp_dp_rd_rm ();
@@ -16560,6 +17031,10 @@ do_vrint_1 (enum neon_cvt_mode mode)
  
        inst.instruction |= (rs == NS_DD) << 8;
        do_vfp_cond_or_thumb ();
  
        inst.instruction |= (rs == NS_DD) << 8;
        do_vfp_cond_or_thumb ();
+
+      /* ARMv8.2 fp16 vrint instruction.  */
+      if (rs == NS_HH)
+      do_scalar_fp16_v82_encode ();
      }
    else
      {
      }
    else
      {
@@ -17387,7 +17862,7 @@ handle_it_state (void)
           else
             {
               if ((implicit_it_mode & IMPLICIT_IT_MODE_THUMB)
           else
             {
               if ((implicit_it_mode & IMPLICIT_IT_MODE_THUMB)
-                 && ARM_CPU_HAS_FEATURE (cpu_variant, arm_arch_t2))
+                 && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v6t2))
                 {
                   /* Automatically generate the IT instruction.  */
                   new_automatic_it_block (inst.cond);
                 {
                   /* Automatically generate the IT instruction.  */
                   new_automatic_it_block (inst.cond);
@@ -17619,6 +18094,56 @@ in_it_block (void)
    return now_it.state != OUTSIDE_IT_BLOCK;
  }
  
    return now_it.state != OUTSIDE_IT_BLOCK;
  }
  
+/* Whether OPCODE only has T32 encoding.  Since this function is only used by
+   t32_insn_ok, OPCODE enabled by v6t2 extension bit do not need to be listed
+   here, hence the "known" in the function name.  */
+
+static bfd_boolean
+known_t32_only_insn (const struct asm_opcode *opcode)
+{
+  /* Original Thumb-1 wide instruction.  */
+  if (opcode->tencode == do_t_blx
+      || opcode->tencode == do_t_branch23
+      || ARM_CPU_HAS_FEATURE (*opcode->tvariant, arm_ext_msr)
+      || ARM_CPU_HAS_FEATURE (*opcode->tvariant, arm_ext_barrier))
+    return TRUE;
+
+  /* Wide-only instruction added to ARMv8-M.  */
+  if (ARM_CPU_HAS_FEATURE (*opcode->tvariant, arm_ext_v8m)
+      || ARM_CPU_HAS_FEATURE (*opcode->tvariant, arm_ext_atomics)
+      || ARM_CPU_HAS_FEATURE (*opcode->tvariant, arm_ext_v6t2_v8m)
+      || ARM_CPU_HAS_FEATURE (*opcode->tvariant, arm_ext_div))
+    return TRUE;
+
+  return FALSE;
+}
+
+/* Whether wide instruction variant can be used if available for a valid OPCODE
+   in ARCH.  */
+
+static bfd_boolean
+t32_insn_ok (arm_feature_set arch, const struct asm_opcode *opcode)
+{
+  if (known_t32_only_insn (opcode))
+    return TRUE;
+
+  /* Instruction with narrow and wide encoding added to ARMv8-M.  Availability
+     of variant T3 of B.W is checked in do_t_branch.  */
+  if (ARM_CPU_HAS_FEATURE (arch, arm_ext_v8m)
+      && opcode->tencode == do_t_branch)
+    return TRUE;
+
+  /* Wide instruction variants of all instructions with narrow *and* wide
+     variants become available with ARMv6t2.  Other opcodes are either
+     narrow-only or wide-only and are thus available if OPCODE is valid.  */
+  if (ARM_CPU_HAS_FEATURE (arch, arm_ext_v6t2))
+    return TRUE;
+
+  /* OPCODE with narrow only instruction variant or wide variant not
+     available.  */
+  return FALSE;
+}
+
  void
  md_assemble (char *str)
  {
  void
  md_assemble (char *str)
  {
@@ -17668,7 +18193,7 @@ md_assemble (char *str)
           || (thumb_mode == 1
               && !ARM_CPU_HAS_FEATURE (variant, *opcode->tvariant)))
         {
           || (thumb_mode == 1
               && !ARM_CPU_HAS_FEATURE (variant, *opcode->tvariant)))
         {
-         as_bad (_("selected processor does not support Thumb mode `%s'"), str);
+         as_bad (_("selected processor does not support `%s' in Thumb mode"), str);
           return;
         }
        if (inst.cond != COND_ALWAYS && !unified_syntax
           return;
         }
        if (inst.cond != COND_ALWAYS && !unified_syntax
@@ -17678,24 +18203,28 @@ md_assemble (char *str)
           return;
         }
  
           return;
         }
  
-      if (!ARM_CPU_HAS_FEATURE (variant, arm_ext_v6t2))
+      /* Two things are addressed here:
+        1) Implicit require narrow instructions on Thumb-1.
+           This avoids relaxation accidentally introducing Thumb-2
+           instructions.
+        2) Reject wide instructions in non Thumb-2 cores.
+
+        Only instructions with narrow and wide variants need to be handled
+        but selecting all non wide-only instructions is easier.  */
+      if (!ARM_CPU_HAS_FEATURE (variant, arm_ext_v6t2)
+         && !t32_insn_ok (variant, opcode))
         {
         {
-         if (opcode->tencode != do_t_blx && opcode->tencode != do_t_branch23
-             && !(ARM_CPU_HAS_FEATURE(*opcode->tvariant, arm_ext_msr)
-                  || ARM_CPU_HAS_FEATURE(*opcode->tvariant, arm_ext_barrier)))
+         if (inst.size_req == 0)
+           inst.size_req = 2;
+         else if (inst.size_req == 4)
             {
             {
-             /* Two things are addressed here.
-                1) Implicit require narrow instructions on Thumb-1.
-                   This avoids relaxation accidentally introducing Thumb-2
-                    instructions.
-                2) Reject wide instructions in non Thumb-2 cores.  */
-             if (inst.size_req == 0)
-               inst.size_req = 2;
-             else if (inst.size_req == 4)
-               {
-                 as_bad (_("selected processor does not support Thumb-2 mode `%s'"), str);
-                 return;
-               }
+             if (ARM_CPU_HAS_FEATURE (variant, arm_ext_v8m))
+               as_bad (_("selected processor does not support 32bit wide "
+                         "variant of instruction `%s'"), str);
+             else
+               as_bad (_("selected processor does not support `%s' in "
+                         "Thumb-2 mode"), str);
+             return;
             }
         }
  
             }
         }
  
@@ -17730,13 +18259,14 @@ md_assemble (char *str)
        ARM_MERGE_FEATURE_SETS (thumb_arch_used, thumb_arch_used,
                               *opcode->tvariant);
        /* Many Thumb-2 instructions also have Thumb-1 variants, so explicitly
        ARM_MERGE_FEATURE_SETS (thumb_arch_used, thumb_arch_used,
                               *opcode->tvariant);
        /* Many Thumb-2 instructions also have Thumb-1 variants, so explicitly
-        set those bits when Thumb-2 32-bit instructions are seen.  ie.
-        anything other than bl/blx and v6-M instructions.
-        The impact of relaxable instructions will be considered later after we
-        finish all relaxation.  */
-      if ((inst.size == 4 && (inst.instruction & 0xf800e800) != 0xf000e800)
-         && !(ARM_CPU_HAS_FEATURE (*opcode->tvariant, arm_ext_msr)
-              || ARM_CPU_HAS_FEATURE (*opcode->tvariant, arm_ext_barrier)))
+        set those bits when Thumb-2 32-bit instructions are seen.  The impact
+        of relaxable instructions will be considered later after we finish all
+        relaxation.  */
+      if (ARM_FEATURE_CORE_EQUAL (cpu_variant, arm_arch_any))
+       variant = arm_arch_none;
+      else
+       variant = cpu_variant;
+      if (inst.size == 4 && !t32_insn_ok (variant, opcode))
         ARM_MERGE_FEATURE_SETS (thumb_arch_used, thumb_arch_used,
                                 arm_ext_v6t2);
  
         ARM_MERGE_FEATURE_SETS (thumb_arch_used, thumb_arch_used,
                                 arm_ext_v6t2);
  
@@ -17759,7 +18289,7 @@ md_assemble (char *str)
           && !(opcode->avariant &&
                ARM_CPU_HAS_FEATURE (cpu_variant, *opcode->avariant)))
         {
           && !(opcode->avariant &&
                ARM_CPU_HAS_FEATURE (cpu_variant, *opcode->avariant)))
         {
-         as_bad (_("selected processor does not support ARM mode `%s'"), str);
+         as_bad (_("selected processor does not support `%s' in ARM mode"), str);
           return;
         }
        if (inst.size_req)
           return;
         }
        if (inst.size_req)
@@ -17770,7 +18300,7 @@ md_assemble (char *str)
  
        inst.instruction = opcode->avalue;
        if (opcode->tag == OT_unconditionalF)
  
        inst.instruction = opcode->avalue;
        if (opcode->tag == OT_unconditionalF)
-       inst.instruction |= 0xF << 28;
+       inst.instruction |= 0xFU << 28;
        else
         inst.instruction |= inst.cond << 28;
        inst.size = INSN_SIZE;
        else
         inst.instruction |= inst.cond << 28;
        inst.size = INSN_SIZE;
@@ -18459,7 +18989,7 @@ static const struct asm_opcode insns[] =
    CL("cmnp",   170f000,           2, (RR, SH),      cmp),
  
   tCE("mov",    1a00000, _mov,     2, (RR, SH),      mov,  t_mov_cmp),
    CL("cmnp",   170f000,           2, (RR, SH),      cmp),
  
   tCE("mov",    1a00000, _mov,     2, (RR, SH),      mov,  t_mov_cmp),
- tC3("movs",   1b00000, _movs,    2, (RR, SH),      mov,  t_mov_cmp),
+ tC3("movs",   1b00000, _movs,    2, (RR, SHG),     mov,  t_mov_cmp),
   tCE("mvn",    1e00000, _mvn,     2, (RR, SH),      mov,  t_mvn_tst),
   tC3("mvns",   1f00000, _mvns,    2, (RR, SH),      mov,  t_mvn_tst),
  
   tCE("mvn",    1e00000, _mvn,     2, (RR, SH),      mov,  t_mvn_tst),
   tC3("mvns",   1f00000, _mvns,    2, (RR, SH),      mov,  t_mvn_tst),
  
@@ -18703,11 +19233,14 @@ static const struct asm_opcode insns[] =
   TUF("setend",    1010000, b650,     1, (ENDI),                     setend, t_setend),
  
  #undef  THUMB_VARIANT
   TUF("setend",    1010000, b650,     1, (ENDI),                     setend, t_setend),
  
  #undef  THUMB_VARIANT
-#define THUMB_VARIANT  & arm_ext_v6t2
+#define THUMB_VARIANT  & arm_ext_v6t2_v8m
  
   TCE("ldrex",  1900f9f, e8500f00, 2, (RRnpc_npcsp, ADDR),        ldrex, t_ldrex),
   TCE("strex",  1800f90, e8400000, 3, (RRnpc_npcsp, RRnpc_npcsp, ADDR),
                                       strex,  t_strex),
  
   TCE("ldrex",  1900f9f, e8500f00, 2, (RRnpc_npcsp, ADDR),        ldrex, t_ldrex),
   TCE("strex",  1800f90, e8400000, 3, (RRnpc_npcsp, RRnpc_npcsp, ADDR),
                                       strex,  t_strex),
+#undef  THUMB_VARIANT
+#define THUMB_VARIANT  & arm_ext_v6t2
+
   TUF("mcrr2",  c400000, fc400000, 5, (RCP, I15b, RRnpc, RRnpc, RCN), co_reg2c, co_reg2c),
   TUF("mrrc2",  c500000, fc500000, 5, (RCP, I15b, RRnpc, RRnpc, RCN), co_reg2c, co_reg2c),
  
   TUF("mcrr2",  c400000, fc400000, 5, (RCP, I15b, RRnpc, RRnpc, RCN), co_reg2c, co_reg2c),
   TUF("mrrc2",  c500000, fc500000, 5, (RCP, I15b, RRnpc, RRnpc, RCN), co_reg2c, co_reg2c),
  
@@ -18735,11 +19268,11 @@ static const struct asm_opcode insns[] =
    UF(srsed,    8400500,           2, (oRRw, I31w),                srs),
   TUF("srsdb",  9400500, e800c000, 2, (oRRw, I31w),                srs,  srs),
   TUF("srsfd",  9400500, e800c000, 2, (oRRw, I31w),                srs,  srs),
    UF(srsed,    8400500,           2, (oRRw, I31w),                srs),
   TUF("srsdb",  9400500, e800c000, 2, (oRRw, I31w),                srs,  srs),
   TUF("srsfd",  9400500, e800c000, 2, (oRRw, I31w),                srs,  srs),
+ TUF("cps",    1020000, f3af8100, 1, (I31b),                     imm0, t_cps),
  
  /*  ARM V6 not included in V7M (eg. integer SIMD).  */
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT  & arm_ext_v6_dsp
  
  /*  ARM V6 not included in V7M (eg. integer SIMD).  */
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT  & arm_ext_v6_dsp
- TUF("cps",    1020000, f3af8100, 1, (I31b),                     imm0, t_cps),
   TCE("pkhbt",  6800010, eac00000, 4, (RRnpc, RRnpc, RRnpc, oSHll),   pkhbt, t_pkhbt),
   TCE("pkhtb",  6800050, eac00020, 4, (RRnpc, RRnpc, RRnpc, oSHar),   pkhtb, t_pkhtb),
   TCE("qadd16", 6200f10, fa90f010, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("pkhbt",  6800010, eac00000, 4, (RRnpc, RRnpc, RRnpc, oSHll),   pkhbt, t_pkhbt),
   TCE("pkhtb",  6800050, eac00020, 4, (RRnpc, RRnpc, RRnpc, oSHar),   pkhtb, t_pkhtb),
   TCE("qadd16", 6200f10, fa90f010, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
@@ -18853,7 +19386,7 @@ static const struct asm_opcode insns[] =
                                        RRnpcb), strexd, t_strexd),
  
  #undef  THUMB_VARIANT
                                        RRnpcb), strexd, t_strexd),
  
  #undef  THUMB_VARIANT
-#define THUMB_VARIANT  & arm_ext_v6t2
+#define THUMB_VARIANT  & arm_ext_v6t2_v8m
   TCE("ldrexb", 1d00f9f, e8d00f4f, 2, (RRnpc_npcsp,RRnpcb),
       rd_rn,  rd_rn),
   TCE("ldrexh", 1f00f9f, e8d00f5f, 2, (RRnpc_npcsp, RRnpcb),
   TCE("ldrexb", 1d00f9f, e8d00f4f, 2, (RRnpc_npcsp,RRnpcb),
       rd_rn,  rd_rn),
   TCE("ldrexh", 1f00f9f, e8d00f5f, 2, (RRnpc_npcsp, RRnpcb),
@@ -18879,6 +19412,13 @@ static const struct asm_opcode insns[] =
   TCE("hvc",    1400070, f7e08000, 1, (EXPi), hvc, t_hvc),
   TCE("eret",   160006e, f3de8f00, 0, (), noargs, noargs),
  
   TCE("hvc",    1400070, f7e08000, 1, (EXPi), hvc, t_hvc),
   TCE("eret",   160006e, f3de8f00, 0, (), noargs, noargs),
  
+#undef ARM_VARIANT
+#define        ARM_VARIANT    & arm_ext_pan
+#undef THUMB_VARIANT
+#define        THUMB_VARIANT  & arm_ext_pan
+
+ TUF("setpan", 1100000, b610, 1, (I7), setpan, t_setpan),
+
  #undef  ARM_VARIANT
  #define ARM_VARIANT    & arm_ext_v6t2
  #undef  THUMB_VARIANT
  #undef  ARM_VARIANT
  #define ARM_VARIANT    & arm_ext_v6t2
  #undef  THUMB_VARIANT
@@ -18890,8 +19430,6 @@ static const struct asm_opcode insns[] =
   TCE("ubfx",   7e00050, f3c00000, 4, (RR, RR, I31, I32),          bfx, t_bfx),
  
   TCE("mls",    0600090, fb000010, 4, (RRnpc, RRnpc, RRnpc, RRnpc), mlas, t_mla),
   TCE("ubfx",   7e00050, f3c00000, 4, (RR, RR, I31, I32),          bfx, t_bfx),
  
   TCE("mls",    0600090, fb000010, 4, (RRnpc, RRnpc, RRnpc, RRnpc), mlas, t_mla),
- TCE("movw",   3000000, f2400000, 2, (RRnpc, HALF),                mov16, t_mov16),
- TCE("movt",   3400000, f2c00000, 2, (RRnpc, HALF),                mov16, t_mov16),
   TCE("rbit",   6ff0f30, fa90f0a0, 2, (RR, RR),                     rd_rm, t_rbit),
  
   TC3("ldrht",  03000b0, f8300e00, 2, (RRnpc_npcsp, ADDR), ldsttv4, t_ldstt),
   TCE("rbit",   6ff0f30, fa90f0a0, 2, (RR, RR),                     rd_rm, t_rbit),
  
   TC3("ldrht",  03000b0, f8300e00, 2, (RRnpc_npcsp, ADDR), ldsttv4, t_ldstt),
@@ -18899,6 +19437,11 @@ static const struct asm_opcode insns[] =
   TC3("ldrsbt", 03000d0, f9100e00, 2, (RRnpc_npcsp, ADDR), ldsttv4, t_ldstt),
   TC3("strht",  02000b0, f8200e00, 2, (RRnpc_npcsp, ADDR), ldsttv4, t_ldstt),
  
   TC3("ldrsbt", 03000d0, f9100e00, 2, (RRnpc_npcsp, ADDR), ldsttv4, t_ldstt),
   TC3("strht",  02000b0, f8200e00, 2, (RRnpc_npcsp, ADDR), ldsttv4, t_ldstt),
  
+#undef  THUMB_VARIANT
+#define THUMB_VARIANT  & arm_ext_v6t2_v8m
+ TCE("movw",   3000000, f2400000, 2, (RRnpc, HALF),                mov16, t_mov16),
+ TCE("movt",   3400000, f2c00000, 2, (RRnpc, HALF),                mov16, t_mov16),
+
   /* Thumb-only instructions.  */
  #undef  ARM_VARIANT
  #define ARM_VARIANT NULL
   /* Thumb-only instructions.  */
  #undef  ARM_VARIANT
  #define ARM_VARIANT NULL
@@ -18910,6 +19453,8 @@ static const struct asm_opcode insns[] =
      -mimplicit-it=[never | arm] modes.  */
  #undef  ARM_VARIANT
  #define ARM_VARIANT  & arm_ext_v1
      -mimplicit-it=[never | arm] modes.  */
  #undef  ARM_VARIANT
  #define ARM_VARIANT  & arm_ext_v1
+#undef  THUMB_VARIANT
+#define THUMB_VARIANT  & arm_ext_v6t2
  
   TUE("it",        bf08,        bf08,     1, (COND),   it,    t_it),
   TUE("itt",       bf0c,        bf0c,     1, (COND),   it,    t_it),
  
   TUE("it",        bf08,        bf08,     1, (COND),   it,    t_it),
   TUE("itt",       bf0c,        bf0c,     1, (COND),   it,    t_it),
@@ -18979,31 +19524,35 @@ static const struct asm_opcode insns[] =
   /* AArchv8 instructions.  */
  #undef  ARM_VARIANT
  #define ARM_VARIANT   & arm_ext_v8
   /* AArchv8 instructions.  */
  #undef  ARM_VARIANT
  #define ARM_VARIANT   & arm_ext_v8
+
+/* Instructions shared between armv8-a and armv8-m.  */
  #undef  THUMB_VARIANT
  #undef  THUMB_VARIANT
-#define THUMB_VARIANT & arm_ext_v8
+#define THUMB_VARIANT & arm_ext_atomics
  
  
- tCE("sevl",   320f005, _sevl,    0, (),               noargs, t_hint),
- TUE("hlt",    1000070, ba80,     1, (oIffffb),        bkpt,   t_hlt),
+ TCE("lda",    1900c9f, e8d00faf, 2, (RRnpc, RRnpcb),  rd_rn,  rd_rn),
+ TCE("ldab",   1d00c9f, e8d00f8f, 2, (RRnpc, RRnpcb),  rd_rn,  rd_rn),
+ TCE("ldah",   1f00c9f, e8d00f9f, 2, (RRnpc, RRnpcb),  rd_rn,  rd_rn),
+ TCE("stl",    180fc90, e8c00faf, 2, (RRnpc, RRnpcb),  rm_rn,  rd_rn),
+ TCE("stlb",   1c0fc90, e8c00f8f, 2, (RRnpc, RRnpcb),  rm_rn,  rd_rn),
+ TCE("stlh",   1e0fc90, e8c00f9f, 2, (RRnpc, RRnpcb),  rm_rn,  rd_rn),
   TCE("ldaex",  1900e9f, e8d00fef, 2, (RRnpc, RRnpcb),  rd_rn,  rd_rn),
   TCE("ldaex",  1900e9f, e8d00fef, 2, (RRnpc, RRnpcb),  rd_rn,  rd_rn),
- TCE("ldaexd", 1b00e9f, e8d000ff, 3, (RRnpc, oRRnpc, RRnpcb),
-                                                       ldrexd, t_ldrexd),
   TCE("ldaexb", 1d00e9f, e8d00fcf, 2, (RRnpc,RRnpcb),   rd_rn,  rd_rn),
   TCE("ldaexh", 1f00e9f, e8d00fdf, 2, (RRnpc, RRnpcb),  rd_rn,  rd_rn),
   TCE("stlex",  1800e90, e8c00fe0, 3, (RRnpc, RRnpc, RRnpcb),
                                                         stlex,  t_stlex),
   TCE("ldaexb", 1d00e9f, e8d00fcf, 2, (RRnpc,RRnpcb),   rd_rn,  rd_rn),
   TCE("ldaexh", 1f00e9f, e8d00fdf, 2, (RRnpc, RRnpcb),  rd_rn,  rd_rn),
   TCE("stlex",  1800e90, e8c00fe0, 3, (RRnpc, RRnpc, RRnpcb),
                                                         stlex,  t_stlex),
- TCE("stlexd", 1a00e90, e8c000f0, 4, (RRnpc, RRnpc, oRRnpc, RRnpcb),
-                                                       strexd, t_strexd),
   TCE("stlexb", 1c00e90, e8c00fc0, 3, (RRnpc, RRnpc, RRnpcb),
                                                         stlex, t_stlex),
   TCE("stlexh", 1e00e90, e8c00fd0, 3, (RRnpc, RRnpc, RRnpcb),
                                                         stlex, t_stlex),
   TCE("stlexb", 1c00e90, e8c00fc0, 3, (RRnpc, RRnpc, RRnpcb),
                                                         stlex, t_stlex),
   TCE("stlexh", 1e00e90, e8c00fd0, 3, (RRnpc, RRnpc, RRnpcb),
                                                         stlex, t_stlex),
- TCE("lda",    1900c9f, e8d00faf, 2, (RRnpc, RRnpcb),  rd_rn,  rd_rn),
- TCE("ldab",   1d00c9f, e8d00f8f, 2, (RRnpc, RRnpcb),  rd_rn,  rd_rn),
- TCE("ldah",   1f00c9f, e8d00f9f, 2, (RRnpc, RRnpcb),  rd_rn,  rd_rn),
- TCE("stl",    180fc90, e8c00faf, 2, (RRnpc, RRnpcb),  rm_rn,  rd_rn),
- TCE("stlb",   1c0fc90, e8c00f8f, 2, (RRnpc, RRnpcb),  rm_rn,  rd_rn),
- TCE("stlh",   1e0fc90, e8c00f9f, 2, (RRnpc, RRnpcb),  rm_rn,  rd_rn),
+#undef  THUMB_VARIANT
+#define THUMB_VARIANT & arm_ext_v8
  
  
+ tCE("sevl",   320f005, _sevl,    0, (),               noargs, t_hint),
+ TUE("hlt",    1000070, ba80,     1, (oIffffb),        bkpt,   t_hlt),
+ TCE("ldaexd", 1b00e9f, e8d000ff, 3, (RRnpc, oRRnpc, RRnpcb),
+                                                       ldrexd, t_ldrexd),
+ TCE("stlexd", 1a00e90, e8c000f0, 4, (RRnpc, RRnpc, oRRnpc, RRnpcb),
+                                                       strexd, t_strexd),
   /* ARMv8 T32 only.  */
  #undef  ARM_VARIANT
  #define ARM_VARIANT  NULL
   /* ARMv8 T32 only.  */
  #undef  ARM_VARIANT
  #define ARM_VARIANT  NULL
@@ -19067,6 +19616,13 @@ static const struct asm_opcode insns[] =
    TUEc("crc32ch",1200240, fad0f090, 3, (RR, oRR, RR), crc32ch),
    TUEc("crc32cw",1400240, fad0f0a0, 3, (RR, oRR, RR), crc32cw),
  
    TUEc("crc32ch",1200240, fad0f090, 3, (RR, oRR, RR), crc32ch),
    TUEc("crc32cw",1400240, fad0f0a0, 3, (RR, oRR, RR), crc32cw),
  
+ /* ARMv8.2 RAS extension.  */
+#undef  ARM_VARIANT
+#define ARM_VARIANT   & arm_ext_v8_2
+#undef  THUMB_VARIANT
+#define THUMB_VARIANT & arm_ext_v8_2
+ TUE ("esb", 320f010, f3af8010, 0, (), noargs,  noargs),
+
  #undef  ARM_VARIANT
  #define ARM_VARIANT  & fpu_fpa_ext_v1  /* Core FPA instruction set (V1).  */
  #undef  THUMB_VARIANT
  #undef  ARM_VARIANT
  #define ARM_VARIANT  & fpu_fpa_ext_v1  /* Core FPA instruction set (V1).  */
  #undef  THUMB_VARIANT
@@ -19683,6 +20239,15 @@ static const struct asm_opcode insns[] =
   NCE(vmov,      0,       1, (VMOV), neon_mov),
   NCE(vmovq,     0,       1, (VMOV), neon_mov),
  
   NCE(vmov,      0,       1, (VMOV), neon_mov),
   NCE(vmovq,     0,       1, (VMOV), neon_mov),
  
+#undef  ARM_VARIANT
+#define ARM_VARIANT    & arm_ext_fp16
+#undef  THUMB_VARIANT
+#define THUMB_VARIANT  & arm_ext_fp16
+ /* New instructions added from v8.2, allowing the extraction and insertion of
+    the upper 16 bits of a 32-bit vector register.  */
+ NCE (vmovx,     eb00a40,       2, (RVS, RVS), neon_movhf),
+ NCE (vins,      eb00ac0,       2, (RVS, RVS), neon_movhf),
+
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT  & fpu_neon_ext_v1
  #undef  ARM_VARIANT
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT  & fpu_neon_ext_v1
  #undef  ARM_VARIANT
@@ -19784,6 +20349,11 @@ static const struct asm_opcode insns[] =
   NUF(vrecpsq,   0000f10,  3, (RNQ,  oRNQ,  RNQ),  neon_step),
   NUF(vrsqrts,   0200f10,  3, (RNDQ, oRNDQ, RNDQ), neon_step),
   NUF(vrsqrtsq,  0200f10,  3, (RNQ,  oRNQ,  RNQ),  neon_step),
   NUF(vrecpsq,   0000f10,  3, (RNQ,  oRNQ,  RNQ),  neon_step),
   NUF(vrsqrts,   0200f10,  3, (RNDQ, oRNDQ, RNDQ), neon_step),
   NUF(vrsqrtsq,  0200f10,  3, (RNQ,  oRNQ,  RNQ),  neon_step),
+ /* ARM v8.1 extension.  */
+ nUF (vqrdmlah,  _vqrdmlah, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qrdmlah),
+ nUF (vqrdmlahq, _vqrdmlah, 3, (RNQ,  oRNQ,  RNDQ_RNSC), neon_qrdmlah),
+ nUF (vqrdmlsh,  _vqrdmlsh, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qrdmlah),
+ nUF (vqrdmlshq, _vqrdmlsh, 3, (RNQ,  oRNQ,  RNDQ_RNSC), neon_qrdmlah),
  
    /* Two address, int/float. Types S8 S16 S32 F32.  */
   NUF(vabsq,     1b10300, 2, (RNQ,  RNQ),      neon_abs_neg),
  
    /* Two address, int/float. Types S8 S16 S32 F32.  */
   NUF(vabsq,     1b10300, 2, (RNQ,  RNQ),      neon_abs_neg),
@@ -20296,6 +20866,13 @@ static const struct asm_opcode insns[] =
   cCE("cfmsub32",e100600, 4, (RMAX, RMFX, RMFX, RMFX), mav_quad),
   cCE("cfmadda32", e200600, 4, (RMAX, RMAX, RMFX, RMFX), mav_quad),
   cCE("cfmsuba32", e300600, 4, (RMAX, RMAX, RMFX, RMFX), mav_quad),
   cCE("cfmsub32",e100600, 4, (RMAX, RMFX, RMFX, RMFX), mav_quad),
   cCE("cfmadda32", e200600, 4, (RMAX, RMAX, RMFX, RMFX), mav_quad),
   cCE("cfmsuba32", e300600, 4, (RMAX, RMAX, RMFX, RMFX), mav_quad),
+
+#undef  ARM_VARIANT
+#define ARM_VARIANT NULL
+#undef  THUMB_VARIANT
+#define THUMB_VARIANT & arm_ext_v8m
+ TUE("tt", 0, e840f000, 2, (RRnpc, RRnpc), 0, tt),
+ TUE("ttt", 0, e840f040, 2, (RRnpc, RRnpc), 0, tt),
  };
  #undef ARM_VARIANT
  #undef THUMB_VARIANT
  };
  #undef ARM_VARIANT
  #undef THUMB_VARIANT
@@ -20850,7 +21427,7 @@ md_section_align (segT   segment ATTRIBUTE_UNUSED,
        int align;
  
        align = bfd_get_section_alignment (stdoutput, segment);
        int align;
  
        align = bfd_get_section_alignment (stdoutput, segment);
-      size = ((size + (1 << align) - 1) & ((valueT) -1 << align));
+      size = ((size + (1 << align) - 1) & (-((valueT) 1 << align)));
      }
  #endif
  
      }
  #endif
  
@@ -21025,27 +21602,29 @@ arm_init_frag (fragS * fragP, int max_chars ATTRIBUTE_UNUSED)
  void
  arm_init_frag (fragS * fragP, int max_chars)
  {
  void
  arm_init_frag (fragS * fragP, int max_chars)
  {
+  int frag_thumb_mode;
+
    /* If the current ARM vs THUMB mode has not already
       been recorded into this frag then do so now.  */
    if ((fragP->tc_frag_data.thumb_mode & MODE_RECORDED) == 0)
    /* If the current ARM vs THUMB mode has not already
       been recorded into this frag then do so now.  */
    if ((fragP->tc_frag_data.thumb_mode & MODE_RECORDED) == 0)
-    {
-      fragP->tc_frag_data.thumb_mode = thumb_mode | MODE_RECORDED;
+    fragP->tc_frag_data.thumb_mode = thumb_mode | MODE_RECORDED;
  
  
-      /* Record a mapping symbol for alignment frags.  We will delete this
-        later if the alignment ends up empty.  */
-      switch (fragP->fr_type)
-       {
-         case rs_align:
-         case rs_align_test:
-         case rs_fill:
-           mapping_state_2 (MAP_DATA, max_chars);
-           break;
-         case rs_align_code:
-           mapping_state_2 (thumb_mode ? MAP_THUMB : MAP_ARM, max_chars);
-           break;
-         default:
-           break;
-       }
+  frag_thumb_mode = fragP->tc_frag_data.thumb_mode ^ MODE_RECORDED;
+
+  /* Record a mapping symbol for alignment frags.  We will delete this
+     later if the alignment ends up empty.  */
+  switch (fragP->fr_type)
+    {
+    case rs_align:
+    case rs_align_test:
+    case rs_fill:
+      mapping_state_2 (MAP_DATA, max_chars);
+      break;
+    case rs_align_code:
+      mapping_state_2 (frag_thumb_mode ? MAP_THUMB : MAP_ARM, max_chars);
+      break;
+    default:
+      break;
      }
  }
  
      }
  }
  
@@ -21620,6 +22199,51 @@ md_pcrel_from_section (fixS * fixP, segT seg)
      }
  }
  
      }
  }
  
+static bfd_boolean flag_warn_syms = TRUE;
+
+bfd_boolean
+arm_tc_equal_in_insn (int c ATTRIBUTE_UNUSED, char * name)
+{
+  /* PR 18347 - Warn if the user attempts to create a symbol with the same
+     name as an ARM instruction.  Whilst strictly speaking it is allowed, it
+     does mean that the resulting code might be very confusing to the reader.
+     Also this warning can be triggered if the user omits an operand before
+     an immediate address, eg:
+
+       LDR =foo
+
+     GAS treats this as an assignment of the value of the symbol foo to a
+     symbol LDR, and so (without this code) it will not issue any kind of
+     warning or error message.
+
+     Note - ARM instructions are case-insensitive but the strings in the hash
+     table are all stored in lower case, so we must first ensure that name is
+     lower case too.  */
+  if (flag_warn_syms && arm_ops_hsh)
+    {
+      char * nbuf = strdup (name);
+      char * p;
+
+      for (p = nbuf; *p; p++)
+       *p = TOLOWER (*p);
+      if (hash_find (arm_ops_hsh, nbuf) != NULL)
+       {
+         static struct hash_control * already_warned = NULL;
+
+         if (already_warned == NULL)
+           already_warned = hash_new ();
+         /* Only warn about the symbol once.  To keep the code
+            simple we let hash_insert do the lookup for us.  */
+         if (hash_insert (already_warned, name, NULL) == NULL)
+           as_warn (_("[-mwarn-syms]: Assignment makes a symbol match an ARM instruction: %s"), name);
+       }
+      else
+       free (nbuf);
+    }
+
+  return FALSE;
+}
+
  /* Under ELF we need to default _GLOBAL_OFFSET_TABLE.
     Otherwise we have no need to default values of symbols.  */
  
  /* Under ELF we need to default _GLOBAL_OFFSET_TABLE.
     Otherwise we have no need to default values of symbols.  */
  
@@ -22645,7 +23269,7 @@ md_apply_fix (fixS *    fixP,
  
        if ((value & ~0x3fffff) && ((value & ~0x3fffff) != ~0x3fffff))
         {
  
        if ((value & ~0x3fffff) && ((value & ~0x3fffff) != ~0x3fffff))
         {
-         if (!(ARM_CPU_HAS_FEATURE (cpu_variant, arm_arch_t2)))
+         if (!(ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v6t2)))
             as_bad_where (fixP->fx_file, fixP->fx_line, BAD_RANGE);
           else if ((value & ~0x1ffffff)
                    && ((value & ~0x1ffffff) != ~0x1ffffff))
             as_bad_where (fixP->fx_file, fixP->fx_line, BAD_RANGE);
           else if ((value & ~0x1ffffff)
                    && ((value & ~0x1ffffff) != ~0x1ffffff))
@@ -22744,7 +23368,20 @@ md_apply_fix (fixS *   fixP,
  
      case BFD_RELOC_ARM_CP_OFF_IMM:
      case BFD_RELOC_ARM_T32_CP_OFF_IMM:
  
      case BFD_RELOC_ARM_CP_OFF_IMM:
      case BFD_RELOC_ARM_T32_CP_OFF_IMM:
-      if (value < -1023 || value > 1023 || (value & 3))
+      if (fixP->fx_r_type == BFD_RELOC_ARM_CP_OFF_IMM)
+       newval = md_chars_to_number (buf, INSN_SIZE);
+      else
+       newval = get_thumb32_insn (buf);
+      if ((newval & 0x0f200f00) == 0x0d000900)
+       {
+         /* This is a fp16 vstr/vldr.  The immediate offset in the mnemonic
+            has permitted values that are multiples of 2, in the range 0
+            to 510.  */
+         if (value < -510 || value > 510 || (value & 1))
+           as_bad_where (fixP->fx_file, fixP->fx_line,
+                         _("co-processor offset out of range"));
+       }
+      else if (value < -1023 || value > 1023 || (value & 3))
         as_bad_where (fixP->fx_file, fixP->fx_line,
                       _("co-processor offset out of range"));
      cp_off_common:
         as_bad_where (fixP->fx_file, fixP->fx_line,
                       _("co-processor offset out of range"));
      cp_off_common:
@@ -22761,6 +23398,17 @@ md_apply_fix (fixS *   fixP,
        else
         {
           newval &= 0xff7fff00;
        else
         {
           newval &= 0xff7fff00;
+         if ((newval & 0x0f200f00) == 0x0d000900)
+           {
+             /* This is a fp16 vstr/vldr.
+
+                It requires the immediate offset in the instruction is shifted
+                left by 1 to be a half-word offset.
+
+                Here, left shift by 1 first, and later right shift by 2
+                should get the right offset.  */
+             value <<= 1;
+           }
           newval |= (value >> 2) | (sign ? INDEX_UP : 0);
         }
        if (fixP->fx_r_type == BFD_RELOC_ARM_CP_OFF_IMM
           newval |= (value >> 2) | (sign ? INDEX_UP : 0);
         }
        if (fixP->fx_r_type == BFD_RELOC_ARM_CP_OFF_IMM
@@ -22886,7 +23534,7 @@ md_apply_fix (fixS *    fixP,
  
         if (rd == REG_SP)
           {
  
         if (rd == REG_SP)
           {
-           if (value & ~0x1fc)
+           if (value & ~0x1fc)
               as_bad_where (fixP->fx_file, fixP->fx_line,
                             _("invalid immediate for stack address calculation"));
             newval = subtract ? T_OPCODE_SUB_ST : T_OPCODE_ADD_ST;
               as_bad_where (fixP->fx_file, fixP->fx_line,
                             _("invalid immediate for stack address calculation"));
             newval = subtract ? T_OPCODE_SUB_ST : T_OPCODE_ADD_ST;
@@ -22894,10 +23542,49 @@ md_apply_fix (fixS *  fixP,
           }
         else if (rs == REG_PC || rs == REG_SP)
           {
           }
         else if (rs == REG_PC || rs == REG_SP)
           {
+           /* PR gas/18541.  If the addition is for a defined symbol
+              within range of an ADR instruction then accept it.  */
+           if (subtract
+               && value == 4
+               && fixP->fx_addsy != NULL)
+             {
+               subtract = 0;
+
+               if (! S_IS_DEFINED (fixP->fx_addsy)
+                   || S_GET_SEGMENT (fixP->fx_addsy) != seg
+                   || S_IS_WEAK (fixP->fx_addsy))
+                 {
+                   as_bad_where (fixP->fx_file, fixP->fx_line,
+                                 _("address calculation needs a strongly defined nearby symbol"));
+                 }
+               else
+                 {
+                   offsetT v = fixP->fx_where + fixP->fx_frag->fr_address;
+
+                   /* Round up to the next 4-byte boundary.  */
+                   if (v & 3)
+                     v = (v + 3) & ~ 3;
+                   else
+                     v += 4;
+                   v = S_GET_VALUE (fixP->fx_addsy) - v;
+
+                   if (v & ~0x3fc)
+                     {
+                       as_bad_where (fixP->fx_file, fixP->fx_line,
+                                     _("symbol too far away"));
+                     }
+                   else
+                     {
+                       fixP->fx_done = 1;
+                       value = v;
+                     }
+                 }
+             }
+
             if (subtract || value & ~0x3fc)
               as_bad_where (fixP->fx_file, fixP->fx_line,
                             _("invalid immediate for address calculation (value = 0x%08lX)"),
             if (subtract || value & ~0x3fc)
               as_bad_where (fixP->fx_file, fixP->fx_line,
                             _("invalid immediate for address calculation (value = 0x%08lX)"),
-                           (unsigned long) value);
+                           (unsigned long) (subtract ? - value : value));
             newval = (rs == REG_PC ? T_OPCODE_ADD_PC : T_OPCODE_ADD_SP);
             newval |= rd << 8;
             newval |= value >> 2;
             newval = (rs == REG_PC ? T_OPCODE_ADD_PC : T_OPCODE_ADD_SP);
             newval |= rd << 8;
             newval |= value >> 2;
@@ -22995,6 +23682,68 @@ md_apply_fix (fixS *   fixP,
         }
        return;
  
         }
        return;
  
+   case BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC:
+   case BFD_RELOC_ARM_THUMB_ALU_ABS_G1_NC:
+   case BFD_RELOC_ARM_THUMB_ALU_ABS_G2_NC:
+   case BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC:
+      gas_assert (!fixP->fx_done);
+      {
+       bfd_vma insn;
+       bfd_boolean is_mov;
+       bfd_vma encoded_addend = value;
+
+       /* Check that addend can be encoded in instruction.  */
+       if (!seg->use_rela_p && (value < 0 || value > 255))
+         as_bad_where (fixP->fx_file, fixP->fx_line,
+                       _("the offset 0x%08lX is not representable"),
+                       (unsigned long) encoded_addend);
+
+       /* Extract the instruction.  */
+       insn = md_chars_to_number (buf, THUMB_SIZE);
+       is_mov = (insn & 0xf800) == 0x2000;
+
+       /* Encode insn.  */
+       if (is_mov)
+         {
+           if (!seg->use_rela_p)
+             insn |= encoded_addend;
+         }
+       else
+         {
+           int rd, rs;
+
+           /* Extract the instruction.  */
+            /* Encoding is the following
+               0x8000  SUB
+               0x00F0  Rd
+               0x000F  Rs
+            */
+            /* The following conditions must be true :
+               - ADD
+               - Rd == Rs
+               - Rd <= 7
+            */
+           rd = (insn >> 4) & 0xf;
+           rs = insn & 0xf;
+           if ((insn & 0x8000) || (rd != rs) || rd > 7)
+             as_bad_where (fixP->fx_file, fixP->fx_line,
+                       _("Unable to process relocation for thumb opcode: %lx"),
+                       (unsigned long) insn);
+
+           /* Encode as ADD immediate8 thumb 1 code.  */
+           insn = 0x3000 | (rd << 8);
+
+           /* Place the encoded addend into the first 8 bits of the
+              instruction.  */
+           if (!seg->use_rela_p)
+             insn |= encoded_addend;
+         }
+
+       /* Update the instruction.  */
+       md_number_to_chars (buf, insn, THUMB_SIZE);
+      }
+      break;
+
     case BFD_RELOC_ARM_ALU_PC_G0_NC:
     case BFD_RELOC_ARM_ALU_PC_G0:
     case BFD_RELOC_ARM_ALU_PC_G1_NC:
     case BFD_RELOC_ARM_ALU_PC_G0_NC:
     case BFD_RELOC_ARM_ALU_PC_G0:
     case BFD_RELOC_ARM_ALU_PC_G1_NC:
@@ -23300,7 +24049,6 @@ tc_gen_reloc (asection *section, fixS *fixp)
      case BFD_RELOC_ARM_SBREL32:
      case BFD_RELOC_ARM_PREL31:
      case BFD_RELOC_ARM_TARGET2:
      case BFD_RELOC_ARM_SBREL32:
      case BFD_RELOC_ARM_PREL31:
      case BFD_RELOC_ARM_TARGET2:
-    case BFD_RELOC_ARM_TLS_LE32:
      case BFD_RELOC_ARM_TLS_LDO32:
      case BFD_RELOC_ARM_PCREL_CALL:
      case BFD_RELOC_ARM_PCREL_JUMP:
      case BFD_RELOC_ARM_TLS_LDO32:
      case BFD_RELOC_ARM_PCREL_CALL:
      case BFD_RELOC_ARM_PCREL_JUMP:
@@ -23333,11 +24081,16 @@ tc_gen_reloc (asection *section, fixS *fixp)
      case BFD_RELOC_ARM_LDC_SB_G1:
      case BFD_RELOC_ARM_LDC_SB_G2:
      case BFD_RELOC_ARM_V4BX:
      case BFD_RELOC_ARM_LDC_SB_G1:
      case BFD_RELOC_ARM_LDC_SB_G2:
      case BFD_RELOC_ARM_V4BX:
+    case BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC:
+    case BFD_RELOC_ARM_THUMB_ALU_ABS_G1_NC:
+    case BFD_RELOC_ARM_THUMB_ALU_ABS_G2_NC:
+    case BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC:
        code = fixp->fx_r_type;
        break;
  
      case BFD_RELOC_ARM_TLS_GOTDESC:
      case BFD_RELOC_ARM_TLS_GD32:
        code = fixp->fx_r_type;
        break;
  
      case BFD_RELOC_ARM_TLS_GOTDESC:
      case BFD_RELOC_ARM_TLS_GD32:
+    case BFD_RELOC_ARM_TLS_LE32:
      case BFD_RELOC_ARM_TLS_IE32:
      case BFD_RELOC_ARM_TLS_LDM32:
        /* BFD will include the symbol's address in the addend.
      case BFD_RELOC_ARM_TLS_IE32:
      case BFD_RELOC_ARM_TLS_LDM32:
        /* BFD will include the symbol's address in the addend.
@@ -23381,7 +24134,7 @@ tc_gen_reloc (asection *section, fixS *fixp)
  
      default:
        {
  
      default:
        {
-       char * type;
+       const char * type;
  
         switch (fixp->fx_r_type)
           {
  
         switch (fixp->fx_r_type)
           {
@@ -23634,12 +24387,17 @@ arm_fix_adjustable (fixS * fixP)
        || fixP->fx_r_type == BFD_RELOC_ARM_THUMB_MOVT_PCREL)
      return FALSE;
  
        || fixP->fx_r_type == BFD_RELOC_ARM_THUMB_MOVT_PCREL)
      return FALSE;
  
+  /* BFD_RELOC_ARM_THUMB_ALU_ABS_Gx_NC relocations have VERY limited
+     offsets, so keep these symbols.  */
+  if (fixP->fx_r_type >= BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC
+      && fixP->fx_r_type <= BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC)
+    return FALSE;
+
    return TRUE;
  }
  #endif /* defined (OBJ_ELF) || defined (OBJ_COFF) */
  
  #ifdef OBJ_ELF
    return TRUE;
  }
  #endif /* defined (OBJ_ELF) || defined (OBJ_COFF) */
  
  #ifdef OBJ_ELF
-
  const char *
  elf32_arm_target_format (void)
  {
  const char *
  elf32_arm_target_format (void)
  {
@@ -24113,6 +24871,7 @@ md_begin (void)
               -mthumb-interwork          Code supports ARM/Thumb interworking
  
               -m[no-]warn-deprecated     Warn about deprecated features
               -mthumb-interwork          Code supports ARM/Thumb interworking
  
               -m[no-]warn-deprecated     Warn about deprecated features
+             -m[no-]warn-syms           Warn when symbols match instructions
  
        For now we will also provide support for:
  
  
        For now we will also provide support for:
  
@@ -24181,15 +24940,16 @@ struct option md_longopts[] =
    {NULL, no_argument, NULL, 0}
  };
  
    {NULL, no_argument, NULL, 0}
  };
  
+
  size_t md_longopts_size = sizeof (md_longopts);
  
  struct arm_option_table
  {
  size_t md_longopts_size = sizeof (md_longopts);
  
  struct arm_option_table
  {
-  char *option;                /* Option name to match.  */
-  char *help;          /* Help information.  */
+  const char *option;          /* Option name to match.  */
+  const char *help;            /* Help information.  */
    int  *var;           /* Variable to change.  */
    int  value;          /* What to change it to.  */
    int  *var;           /* Variable to change.  */
    int  value;          /* What to change it to.  */
-  char *deprecated;    /* If non-null, print this message.  */
+  const char *deprecated;      /* If non-null, print this message.  */
  };
  
  struct arm_option_table arm_opts[] =
  };
  
  struct arm_option_table arm_opts[] =
@@ -24215,15 +24975,17 @@ struct arm_option_table arm_opts[] =
    {"mwarn-deprecated", NULL, &warn_on_deprecated, 1, NULL},
    {"mno-warn-deprecated", N_("do not warn on use of deprecated feature"),
     &warn_on_deprecated, 0, NULL},
    {"mwarn-deprecated", NULL, &warn_on_deprecated, 1, NULL},
    {"mno-warn-deprecated", N_("do not warn on use of deprecated feature"),
     &warn_on_deprecated, 0, NULL},
+  {"mwarn-syms", N_("warn about symbols that match instruction names [default]"), (int *) (& flag_warn_syms), TRUE, NULL},
+  {"mno-warn-syms", N_("disable warnings about symobls that match instructions"), (int *) (& flag_warn_syms), FALSE, NULL},
    {NULL, NULL, NULL, 0, NULL}
  };
  
  struct arm_legacy_option_table
  {
    {NULL, NULL, NULL, 0, NULL}
  };
  
  struct arm_legacy_option_table
  {
-  char *option;                                /* Option name to match.  */
+  const char *option;                          /* Option name to match.  */
    const arm_feature_set        **var;          /* Variable to change.  */
    const arm_feature_set        value;          /* What to change it to.  */
    const arm_feature_set        **var;          /* Variable to change.  */
    const arm_feature_set        value;          /* What to change it to.  */
-  char *deprecated;                    /* If non-null, print this message.  */
+  const char *deprecated;                      /* If non-null, print this message.  */
  };
  
  const struct arm_legacy_option_table arm_legacy_opts[] =
  };
  
  const struct arm_legacy_option_table arm_legacy_opts[] =
@@ -24341,7 +25103,7 @@ const struct arm_legacy_option_table arm_legacy_opts[] =
  
  struct arm_cpu_option_table
  {
  
  struct arm_cpu_option_table
  {
-  char *name;
+  const char *name;
    size_t name_len;
    const arm_feature_set        value;
    /* For some CPUs we assume an FPU unless the user explicitly sets
    size_t name_len;
    const arm_feature_set        value;
    /* For some CPUs we assume an FPU unless the user explicitly sets
@@ -24441,8 +25203,8 @@ static const struct arm_cpu_option_table arm_cpus[] =
    ARM_CPU_OPT ("mpcorenovfp",  ARM_ARCH_V6K,    FPU_NONE,        "MPCore"),
    ARM_CPU_OPT ("arm1156t2-s",  ARM_ARCH_V6T2,   FPU_NONE,        NULL),
    ARM_CPU_OPT ("arm1156t2f-s", ARM_ARCH_V6T2,   FPU_ARCH_VFP_V2, NULL),
    ARM_CPU_OPT ("mpcorenovfp",  ARM_ARCH_V6K,    FPU_NONE,        "MPCore"),
    ARM_CPU_OPT ("arm1156t2-s",  ARM_ARCH_V6T2,   FPU_NONE,        NULL),
    ARM_CPU_OPT ("arm1156t2f-s", ARM_ARCH_V6T2,   FPU_ARCH_VFP_V2, NULL),
-  ARM_CPU_OPT ("arm1176jz-s",  ARM_ARCH_V6ZK,   FPU_NONE,        NULL),
-  ARM_CPU_OPT ("arm1176jzf-s", ARM_ARCH_V6ZK,   FPU_ARCH_VFP_V2, NULL),
+  ARM_CPU_OPT ("arm1176jz-s",  ARM_ARCH_V6KZ,   FPU_NONE,        NULL),
+  ARM_CPU_OPT ("arm1176jzf-s", ARM_ARCH_V6KZ,   FPU_ARCH_VFP_V2, NULL),
    ARM_CPU_OPT ("cortex-a5",    ARM_ARCH_V7A_MP_SEC,
                                                  FPU_NONE,        "Cortex-A5"),
    ARM_CPU_OPT ("cortex-a7",    ARM_ARCH_V7VE,   FPU_ARCH_NEON_VFP_V4,
    ARM_CPU_OPT ("cortex-a5",    ARM_ARCH_V7A_MP_SEC,
                                                  FPU_NONE,        "Cortex-A5"),
    ARM_CPU_OPT ("cortex-a7",    ARM_ARCH_V7VE,   FPU_ARCH_NEON_VFP_V4,
@@ -24461,6 +25223,10 @@ static const struct arm_cpu_option_table arm_cpus[] =
                                                                   "Cortex-A15"),
    ARM_CPU_OPT ("cortex-a17",   ARM_ARCH_V7VE,   FPU_ARCH_NEON_VFP_V4,
                                                                   "Cortex-A17"),
                                                                   "Cortex-A15"),
    ARM_CPU_OPT ("cortex-a17",   ARM_ARCH_V7VE,   FPU_ARCH_NEON_VFP_V4,
                                                                   "Cortex-A17"),
+  ARM_CPU_OPT ("cortex-a32",    ARM_ARCH_V8A,    FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
+                                                                 "Cortex-A32"),
+  ARM_CPU_OPT ("cortex-a35",    ARM_ARCH_V8A,    FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
+                                                                 "Cortex-A35"),
    ARM_CPU_OPT ("cortex-a53",    ARM_ARCH_V8A,    FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
                                                                   "Cortex-A53"),
    ARM_CPU_OPT ("cortex-a57",    ARM_ARCH_V8A,    FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
    ARM_CPU_OPT ("cortex-a53",    ARM_ARCH_V8A,    FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
                                                                   "Cortex-A53"),
    ARM_CPU_OPT ("cortex-a57",    ARM_ARCH_V8A,    FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
@@ -24484,6 +25250,10 @@ static const struct arm_cpu_option_table arm_cpus[] =
    ARM_CPU_OPT ("exynos-m1",    ARM_ARCH_V8A,    FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
                                                                   "Samsung " \
                                                                   "Exynos M1"),
    ARM_CPU_OPT ("exynos-m1",    ARM_ARCH_V8A,    FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
                                                                   "Samsung " \
                                                                   "Exynos M1"),
+  ARM_CPU_OPT ("qdf24xx",      ARM_ARCH_V8A,    FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
+                                                                 "Qualcomm "
+                                                                 "QDF24XX"),
+
    /* ??? XSCALE is really an architecture.  */
    ARM_CPU_OPT ("xscale",       ARM_ARCH_XSCALE, FPU_ARCH_VFP_V2, NULL),
    /* ??? iwmmxt is not a processor.  */
    /* ??? XSCALE is really an architecture.  */
    ARM_CPU_OPT ("xscale",       ARM_ARCH_XSCALE, FPU_ARCH_VFP_V2, NULL),
    /* ??? iwmmxt is not a processor.  */
@@ -24494,11 +25264,13 @@ static const struct arm_cpu_option_table arm_cpus[] =
    ARM_CPU_OPT ("ep9312",       ARM_FEATURE_LOW (ARM_AEXT_V4T, ARM_CEXT_MAVERICK),
                                                  FPU_ARCH_MAVERICK, "ARM920T"),
    /* Marvell processors.  */
    ARM_CPU_OPT ("ep9312",       ARM_FEATURE_LOW (ARM_AEXT_V4T, ARM_CEXT_MAVERICK),
                                                  FPU_ARCH_MAVERICK, "ARM920T"),
    /* Marvell processors.  */
-  ARM_CPU_OPT ("marvell-pj4",   ARM_FEATURE_CORE_LOW (ARM_AEXT_V7A | ARM_EXT_MP
-                                                     | ARM_EXT_SEC),
+  ARM_CPU_OPT ("marvell-pj4",   ARM_FEATURE_CORE (ARM_AEXT_V7A | ARM_EXT_MP
+                                                 | ARM_EXT_SEC,
+                                                 ARM_EXT2_V6T2_V8M),
                                                 FPU_ARCH_VFP_V3D16, NULL),
                                                 FPU_ARCH_VFP_V3D16, NULL),
-  ARM_CPU_OPT ("marvell-whitney", ARM_FEATURE_CORE_LOW (ARM_AEXT_V7A | ARM_EXT_MP
-                                                       | ARM_EXT_SEC),
+  ARM_CPU_OPT ("marvell-whitney", ARM_FEATURE_CORE (ARM_AEXT_V7A | ARM_EXT_MP
+                                                   | ARM_EXT_SEC,
+                                                   ARM_EXT2_V6T2_V8M),
                                                FPU_ARCH_NEON_VFP_V4, NULL),
    /* APM X-Gene family.  */
    ARM_CPU_OPT ("xgene1",        ARM_ARCH_V8A,    FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
                                                FPU_ARCH_NEON_VFP_V4, NULL),
    /* APM X-Gene family.  */
    ARM_CPU_OPT ("xgene1",        ARM_ARCH_V8A,    FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
@@ -24512,7 +25284,7 @@ static const struct arm_cpu_option_table arm_cpus[] =
  
  struct arm_arch_option_table
  {
  
  struct arm_arch_option_table
  {
-  char *name;
+  const char *name;
    size_t name_len;
    const arm_feature_set        value;
    const arm_feature_set        default_fpu;
    size_t name_len;
    const arm_feature_set        value;
    const arm_feature_set        default_fpu;
@@ -24544,11 +25316,17 @@ static const struct arm_arch_option_table arm_archs[] =
    ARM_ARCH_OPT ("armv6j",      ARM_ARCH_V6,     FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv6k",      ARM_ARCH_V6K,    FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv6z",      ARM_ARCH_V6Z,    FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv6j",      ARM_ARCH_V6,     FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv6k",      ARM_ARCH_V6K,    FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv6z",      ARM_ARCH_V6Z,    FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv6zk",     ARM_ARCH_V6ZK,   FPU_ARCH_VFP),
+  /* The official spelling of this variant is ARMv6KZ, the name "armv6zk" is
+     kept to preserve existing behaviour.  */
+  ARM_ARCH_OPT ("armv6kz",     ARM_ARCH_V6KZ,   FPU_ARCH_VFP),
+  ARM_ARCH_OPT ("armv6zk",     ARM_ARCH_V6KZ,   FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv6t2",     ARM_ARCH_V6T2,   FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv6kt2",    ARM_ARCH_V6KT2,  FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv6zt2",    ARM_ARCH_V6ZT2,  FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv6t2",     ARM_ARCH_V6T2,   FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv6kt2",    ARM_ARCH_V6KT2,  FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv6zt2",    ARM_ARCH_V6ZT2,  FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv6zkt2",   ARM_ARCH_V6ZKT2, FPU_ARCH_VFP),
+  /* The official spelling of this variant is ARMv6KZ, the name "armv6zkt2" is
+     kept to preserve existing behaviour.  */
+  ARM_ARCH_OPT ("armv6kzt2",   ARM_ARCH_V6KZT2, FPU_ARCH_VFP),
+  ARM_ARCH_OPT ("armv6zkt2",   ARM_ARCH_V6KZT2, FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv6-m",     ARM_ARCH_V6M,    FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv6s-m",    ARM_ARCH_V6SM,   FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv7",       ARM_ARCH_V7,     FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv6-m",     ARM_ARCH_V6M,    FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv6s-m",    ARM_ARCH_V6SM,   FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv7",       ARM_ARCH_V7,     FPU_ARCH_VFP),
@@ -24562,7 +25340,11 @@ static const struct arm_arch_option_table arm_archs[] =
    ARM_ARCH_OPT ("armv7-r",     ARM_ARCH_V7R,    FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv7-m",     ARM_ARCH_V7M,    FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv7e-m",    ARM_ARCH_V7EM,   FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv7-r",     ARM_ARCH_V7R,    FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv7-m",     ARM_ARCH_V7M,    FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv7e-m",    ARM_ARCH_V7EM,   FPU_ARCH_VFP),
+  ARM_ARCH_OPT ("armv8-m.base",        ARM_ARCH_V8M_BASE, FPU_ARCH_VFP),
+  ARM_ARCH_OPT ("armv8-m.main",        ARM_ARCH_V8M_MAIN, FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv8-a",     ARM_ARCH_V8A,    FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv8-a",     ARM_ARCH_V8A,    FPU_ARCH_VFP),
+  ARM_ARCH_OPT ("armv8.1-a",   ARM_ARCH_V8_1A,  FPU_ARCH_VFP),
+  ARM_ARCH_OPT ("armv8.2-a",   ARM_ARCH_V8_2A,  FPU_ARCH_VFP),
    ARM_ARCH_OPT ("xscale",      ARM_ARCH_XSCALE, FPU_ARCH_VFP),
    ARM_ARCH_OPT ("iwmmxt",      ARM_ARCH_IWMMXT, FPU_ARCH_VFP),
    ARM_ARCH_OPT ("iwmmxt2",     ARM_ARCH_IWMMXT2,FPU_ARCH_VFP),
    ARM_ARCH_OPT ("xscale",      ARM_ARCH_XSCALE, FPU_ARCH_VFP),
    ARM_ARCH_OPT ("iwmmxt",      ARM_ARCH_IWMMXT, FPU_ARCH_VFP),
    ARM_ARCH_OPT ("iwmmxt2",     ARM_ARCH_IWMMXT2,FPU_ARCH_VFP),
@@ -24573,7 +25355,7 @@ static const struct arm_arch_option_table arm_archs[] =
  /* ISA extensions in the co-processor and main instruction set space.  */
  struct arm_option_extension_value_table
  {
  /* ISA extensions in the co-processor and main instruction set space.  */
  struct arm_option_extension_value_table
  {
-  char *name;
+  const char *name;
    size_t name_len;
    const arm_feature_set merge_value;
    const arm_feature_set clear_value;
    size_t name_len;
    const arm_feature_set merge_value;
    const arm_feature_set clear_value;
@@ -24592,6 +25374,9 @@ static const struct arm_option_extension_value_table arm_extensions[] =
                                    ARM_FEATURE_CORE_LOW (ARM_EXT_V8)),
    ARM_EXT_OPT ("fp",     FPU_ARCH_VFP_ARMV8, ARM_FEATURE_COPROC (FPU_VFP_ARMV8),
                                    ARM_FEATURE_CORE_LOW (ARM_EXT_V8)),
                                    ARM_FEATURE_CORE_LOW (ARM_EXT_V8)),
    ARM_EXT_OPT ("fp",     FPU_ARCH_VFP_ARMV8, ARM_FEATURE_COPROC (FPU_VFP_ARMV8),
                                    ARM_FEATURE_CORE_LOW (ARM_EXT_V8)),
+  ARM_EXT_OPT ("fp16",  ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+                       ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+                       ARM_ARCH_V8_2A),
    ARM_EXT_OPT ("idiv", ARM_FEATURE_CORE_LOW (ARM_EXT_ADIV | ARM_EXT_DIV),
                         ARM_FEATURE_CORE_LOW (ARM_EXT_ADIV | ARM_EXT_DIV),
                                    ARM_FEATURE_CORE_LOW (ARM_EXT_V7A | ARM_EXT_V7R)),
    ARM_EXT_OPT ("idiv", ARM_FEATURE_CORE_LOW (ARM_EXT_ADIV | ARM_EXT_DIV),
                         ARM_FEATURE_CORE_LOW (ARM_EXT_ADIV | ARM_EXT_DIV),
                                    ARM_FEATURE_CORE_LOW (ARM_EXT_V7A | ARM_EXT_V7R)),
@@ -24604,15 +25389,21 @@ static const struct arm_option_extension_value_table arm_extensions[] =
    ARM_EXT_OPT ("mp",   ARM_FEATURE_CORE_LOW (ARM_EXT_MP),
                         ARM_FEATURE_CORE_LOW (ARM_EXT_MP),
                                    ARM_FEATURE_CORE_LOW (ARM_EXT_V7A | ARM_EXT_V7R)),
    ARM_EXT_OPT ("mp",   ARM_FEATURE_CORE_LOW (ARM_EXT_MP),
                         ARM_FEATURE_CORE_LOW (ARM_EXT_MP),
                                    ARM_FEATURE_CORE_LOW (ARM_EXT_V7A | ARM_EXT_V7R)),
-  ARM_EXT_OPT ("simd",   FPU_ARCH_NEON_VFP_ARMV8,
-                       ARM_FEATURE_COPROC (FPU_NEON_ARMV8),
-                                  ARM_FEATURE_CORE_LOW (ARM_EXT_V8)),
    ARM_EXT_OPT ("os",   ARM_FEATURE_CORE_LOW (ARM_EXT_OS),
                         ARM_FEATURE_CORE_LOW (ARM_EXT_OS),
                                    ARM_FEATURE_CORE_LOW (ARM_EXT_V6M)),
    ARM_EXT_OPT ("os",   ARM_FEATURE_CORE_LOW (ARM_EXT_OS),
                         ARM_FEATURE_CORE_LOW (ARM_EXT_OS),
                                    ARM_FEATURE_CORE_LOW (ARM_EXT_V6M)),
+  ARM_EXT_OPT ("pan",  ARM_FEATURE_CORE_HIGH (ARM_EXT2_PAN),
+                       ARM_FEATURE (ARM_EXT_V8, ARM_EXT2_PAN, 0),
+                       ARM_FEATURE_CORE_LOW (ARM_EXT_V8)),
+  ARM_EXT_OPT ("rdma",  FPU_ARCH_NEON_VFP_ARMV8_1,
+                       ARM_FEATURE_COPROC (FPU_NEON_ARMV8 | FPU_NEON_EXT_RDMA),
+                       ARM_FEATURE_CORE_LOW (ARM_EXT_V8)),
    ARM_EXT_OPT ("sec",  ARM_FEATURE_CORE_LOW (ARM_EXT_SEC),
                         ARM_FEATURE_CORE_LOW (ARM_EXT_SEC),
                                    ARM_FEATURE_CORE_LOW (ARM_EXT_V6K | ARM_EXT_V7A)),
    ARM_EXT_OPT ("sec",  ARM_FEATURE_CORE_LOW (ARM_EXT_SEC),
                         ARM_FEATURE_CORE_LOW (ARM_EXT_SEC),
                                    ARM_FEATURE_CORE_LOW (ARM_EXT_V6K | ARM_EXT_V7A)),
+  ARM_EXT_OPT ("simd",  FPU_ARCH_NEON_VFP_ARMV8,
+                       ARM_FEATURE_COPROC (FPU_NEON_ARMV8),
+                       ARM_FEATURE_CORE_LOW (ARM_EXT_V8)),
    ARM_EXT_OPT ("virt", ARM_FEATURE_CORE_LOW (ARM_EXT_VIRT | ARM_EXT_ADIV
                                      | ARM_EXT_DIV),
                         ARM_FEATURE_CORE_LOW (ARM_EXT_VIRT),
    ARM_EXT_OPT ("virt", ARM_FEATURE_CORE_LOW (ARM_EXT_VIRT | ARM_EXT_ADIV
                                      | ARM_EXT_DIV),
                         ARM_FEATURE_CORE_LOW (ARM_EXT_VIRT),
@@ -24626,7 +25417,7 @@ static const struct arm_option_extension_value_table arm_extensions[] =
  /* ISA floating-point and Advanced SIMD extensions.  */
  struct arm_option_fpu_value_table
  {
  /* ISA floating-point and Advanced SIMD extensions.  */
  struct arm_option_fpu_value_table
  {
-  char *name;
+  const char *name;
    const arm_feature_set value;
  };
  
    const arm_feature_set value;
  };
  
@@ -24674,12 +25465,15 @@ static const struct arm_option_fpu_value_table arm_fpus[] =
    {"neon-fp-armv8",    FPU_ARCH_NEON_VFP_ARMV8},
    {"crypto-neon-fp-armv8",
                         FPU_ARCH_CRYPTO_NEON_VFP_ARMV8},
    {"neon-fp-armv8",    FPU_ARCH_NEON_VFP_ARMV8},
    {"crypto-neon-fp-armv8",
                         FPU_ARCH_CRYPTO_NEON_VFP_ARMV8},
+  {"neon-fp-armv8.1",  FPU_ARCH_NEON_VFP_ARMV8_1},
+  {"crypto-neon-fp-armv8.1",
+                       FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_1},
    {NULL,               ARM_ARCH_NONE}
  };
  
  struct arm_option_value_table
  {
    {NULL,               ARM_ARCH_NONE}
  };
  
  struct arm_option_value_table
  {
-  char *name;
+  const char *name;
    long value;
  };
  
    long value;
  };
  
@@ -24704,10 +25498,10 @@ static const struct arm_option_value_table arm_eabis[] =
  
  struct arm_long_option_table
  {
  
  struct arm_long_option_table
  {
-  char * option;               /* Substring to match.  */
-  char * help;                 /* Help information.  */
+  const char * option;         /* Substring to match.  */
+  const char * help;                   /* Help information.  */
    int (* func) (char * subopt);        /* Function to decode sub-option.  */
    int (* func) (char * subopt);        /* Function to decode sub-option.  */
-  char * deprecated;           /* If non-null, print this message.  */
+  const char * deprecated;             /* If non-null, print this message.  */
  };
  
  static bfd_boolean
  };
  
  static bfd_boolean
@@ -24857,11 +25651,17 @@ arm_parse_cpu (char *str)
         mcpu_cpu_opt = &opt->value;
         mcpu_fpu_opt = &opt->default_fpu;
         if (opt->canonical_name)
         mcpu_cpu_opt = &opt->value;
         mcpu_fpu_opt = &opt->default_fpu;
         if (opt->canonical_name)
-         strcpy (selected_cpu_name, opt->canonical_name);
+         {
+           gas_assert (sizeof selected_cpu_name > strlen (opt->canonical_name));
+           strcpy (selected_cpu_name, opt->canonical_name);
+         }
         else
           {
             size_t i;
  
         else
           {
             size_t i;
  
+           if (len >= sizeof selected_cpu_name)
+             len = (sizeof selected_cpu_name) - 1;
+
             for (i = 0; i < len; i++)
               selected_cpu_name[i] = TOUPPER (opt->name[i]);
             selected_cpu_name[i] = 0;
             for (i = 0; i < len; i++)
               selected_cpu_name[i] = TOUPPER (opt->name[i]);
             selected_cpu_name[i] = 0;
@@ -25143,8 +25943,9 @@ typedef struct
    arm_feature_set flags;
  } cpu_arch_ver_table;
  
    arm_feature_set flags;
  } cpu_arch_ver_table;
  
-/* Mapping from CPU features to EABI CPU arch values.  Table must be sorted
-   least features first.  */
+/* Mapping from CPU features to EABI CPU arch values.  As a general rule, table
+   must be sorted least features first but some reordering is needed, eg. for
+   Thumb-2 instructions to be detected as coming from ARMv6T2.  */
  static const cpu_arch_ver_table cpu_arch_ver[] =
  {
      {1, ARM_ARCH_V4},
  static const cpu_arch_ver_table cpu_arch_ver[] =
  {
      {1, ARM_ARCH_V4},
@@ -25163,6 +25964,8 @@ static const cpu_arch_ver_table cpu_arch_ver[] =
      {10, ARM_ARCH_V7R},
      {10, ARM_ARCH_V7M},
      {14, ARM_ARCH_V8A},
      {10, ARM_ARCH_V7R},
      {10, ARM_ARCH_V7M},
      {14, ARM_ARCH_V8A},
+    {16, ARM_ARCH_V8M_BASE},
+    {17, ARM_ARCH_V8M_MAIN},
      {0, ARM_ARCH_NONE}
  };
  
      {0, ARM_ARCH_NONE}
  };
  
@@ -25195,6 +25998,7 @@ aeabi_set_public_attributes (void)
    int fp16_optional = 0;
    arm_feature_set flags;
    arm_feature_set tmp;
    int fp16_optional = 0;
    arm_feature_set flags;
    arm_feature_set tmp;
+  arm_feature_set arm_arch_v8m_base = ARM_ARCH_V8M_BASE;
    const cpu_arch_ver_table *p;
  
    /* Choose the architecture based on the capabilities of the requested cpu
    const cpu_arch_ver_table *p;
  
    /* Choose the architecture based on the capabilities of the requested cpu
@@ -25243,11 +26047,22 @@ aeabi_set_public_attributes (void)
       actually used.  Perhaps we should separate out the specified
       and implicit cases.  Avoid taking this path for -march=all by
       checking for contradictory v7-A / v7-M features.  */
       actually used.  Perhaps we should separate out the specified
       and implicit cases.  Avoid taking this path for -march=all by
       checking for contradictory v7-A / v7-M features.  */
-  if (arch == 10
+  if (arch == TAG_CPU_ARCH_V7
        && !ARM_CPU_HAS_FEATURE (flags, arm_ext_v7a)
        && ARM_CPU_HAS_FEATURE (flags, arm_ext_v7m)
        && ARM_CPU_HAS_FEATURE (flags, arm_ext_v6_dsp))
        && !ARM_CPU_HAS_FEATURE (flags, arm_ext_v7a)
        && ARM_CPU_HAS_FEATURE (flags, arm_ext_v7m)
        && ARM_CPU_HAS_FEATURE (flags, arm_ext_v6_dsp))
-    arch = 13;
+    arch = TAG_CPU_ARCH_V7E_M;
+
+  ARM_CLEAR_FEATURE (tmp, flags, arm_arch_v8m_base);
+  if (arch == TAG_CPU_ARCH_V8M_BASE && ARM_CPU_HAS_FEATURE (tmp, arm_arch_any))
+    arch = TAG_CPU_ARCH_V8M_MAIN;
+
+  /* In cpu_arch_ver ARMv8-A is before ARMv8-M for atomics to be detected as
+     coming from ARMv8-A.  However, since ARMv8-A has more instructions than
+     ARMv8-M, -march=all must be detected as ARMv8-A.  */
+  if (arch == TAG_CPU_ARCH_V8M_MAIN
+      && ARM_FEATURE_CORE_EQUAL (selected_cpu, arm_arch_any))
+    arch = TAG_CPU_ARCH_V8;
  
    /* Tag_CPU_name.  */
    if (selected_cpu_name[0])
  
    /* Tag_CPU_name.  */
    if (selected_cpu_name[0])
@@ -25270,7 +26085,10 @@ aeabi_set_public_attributes (void)
    aeabi_set_attribute_int (Tag_CPU_arch, arch);
  
    /* Tag_CPU_arch_profile.  */
    aeabi_set_attribute_int (Tag_CPU_arch, arch);
  
    /* Tag_CPU_arch_profile.  */
-  if (ARM_CPU_HAS_FEATURE (flags, arm_ext_v7a))
+  if (ARM_CPU_HAS_FEATURE (flags, arm_ext_v7a)
+      || ARM_CPU_HAS_FEATURE (flags, arm_ext_v8)
+      || (ARM_CPU_HAS_FEATURE (flags, arm_ext_atomics)
+         && !ARM_CPU_HAS_FEATURE (flags, arm_ext_v8m)))
      profile = 'A';
    else if (ARM_CPU_HAS_FEATURE (flags, arm_ext_v7r))
      profile = 'R';
      profile = 'A';
    else if (ARM_CPU_HAS_FEATURE (flags, arm_ext_v7r))
      profile = 'R';
@@ -25290,8 +26108,18 @@ aeabi_set_public_attributes (void)
    /* Tag_THUMB_ISA_use.  */
    if (ARM_CPU_HAS_FEATURE (flags, arm_ext_v4t)
        || arch == 0)
    /* Tag_THUMB_ISA_use.  */
    if (ARM_CPU_HAS_FEATURE (flags, arm_ext_v4t)
        || arch == 0)
-    aeabi_set_attribute_int (Tag_THUMB_ISA_use,
-       ARM_CPU_HAS_FEATURE (flags, arm_arch_t2) ? 2 : 1);
+    {
+      int thumb_isa_use;
+
+      if (!ARM_CPU_HAS_FEATURE (flags, arm_ext_v8)
+         && ARM_CPU_HAS_FEATURE (flags, arm_ext_v8m))
+       thumb_isa_use = 3;
+      else if (ARM_CPU_HAS_FEATURE (flags, arm_arch_t2))
+       thumb_isa_use = 2;
+      else
+       thumb_isa_use = 1;
+      aeabi_set_attribute_int (Tag_THUMB_ISA_use, thumb_isa_use);
+    }
  
    /* Tag_VFP_arch.  */
    if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_armv8xd))
  
    /* Tag_VFP_arch.  */
    if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_armv8xd))
@@ -25355,12 +26183,15 @@ aeabi_set_public_attributes (void)
       in ARM state, or when Thumb integer divide instructions have been used,
       but we have no architecture profile set, nor have we any ARM instructions.
  
       in ARM state, or when Thumb integer divide instructions have been used,
       but we have no architecture profile set, nor have we any ARM instructions.
  
-     For ARMv8 we set the tag to 0 as integer divide is implied by the base
-     architecture.
+     For ARMv8-A and ARMv8-M we set the tag to 0 as integer divide is implied
+     by the base architecture.
  
       For new architectures we will have to check these tests.  */
  
       For new architectures we will have to check these tests.  */
-  gas_assert (arch <= TAG_CPU_ARCH_V8);
-  if (ARM_CPU_HAS_FEATURE (flags, arm_ext_v8))
+  gas_assert (arch <= TAG_CPU_ARCH_V8
+             || (arch >= TAG_CPU_ARCH_V8M_BASE
+                 && arch <= TAG_CPU_ARCH_V8M_MAIN));
+  if (ARM_CPU_HAS_FEATURE (flags, arm_ext_v8)
+      || ARM_CPU_HAS_FEATURE (flags, arm_ext_v8m))
      aeabi_set_attribute_int (Tag_DIV_use, 0);
    else if (ARM_CPU_HAS_FEATURE (flags, arm_ext_adiv)
            || (profile == '\0'
      aeabi_set_attribute_int (Tag_DIV_use, 0);
    else if (ARM_CPU_HAS_FEATURE (flags, arm_ext_adiv)
            || (profile == '\0'