[ARM] Add feature check for ARMv8.1 AdvSIMD instructions.

[deliverable/binutils-gdb.git] / gas / config / tc-arm.c
diff --git a/gas/config/tc-arm.c b/gas/config/tc-arm.c

index d2085a3f75f1f6d3a0c0747dfd8388b7852e8e5e..e725f41abb4404cc37f0fbc186f6690510891c64 100644 (file)
--- a/gas/config/tc-arm.c
+++ b/gas/config/tc-arm.c
@@ -1,5 +1,5 @@
  /* tc-arm.c -- Assemble for the ARM
  /* tc-arm.c -- Assemble for the ARM
-   Copyright 1994-2013 Free Software Foundation, Inc.
+   Copyright (C) 1994-2016 Free Software Foundation, Inc.
     Contributed by Richard Earnshaw (rwe@pegasus.esprit.ec.org)
         Modified by David Taylor (dtaylor@armltd.co.uk)
         Cirrus coprocessor mods by Aldy Hernandez (aldyh@redhat.com)
     Contributed by Richard Earnshaw (rwe@pegasus.esprit.ec.org)
         Modified by David Taylor (dtaylor@armltd.co.uk)
         Cirrus coprocessor mods by Aldy Hernandez (aldyh@redhat.com)
@@ -137,6 +137,8 @@ static int fix_v4bx      = FALSE;
  /* Warn on using deprecated features.  */
  static int warn_on_deprecated = TRUE;
  
  /* Warn on using deprecated features.  */
  static int warn_on_deprecated = TRUE;
  
+/* Understand CodeComposer Studio assembly syntax.  */
+bfd_boolean codecomposer_syntax = FALSE;
  
  /* Variables that we set while parsing command-line options.  Once all
     options have been read we re-process these values to set the real
  
  /* Variables that we set while parsing command-line options.  Once all
     options have been read we re-process these values to set the real
@@ -166,93 +168,124 @@ static const arm_feature_set fpu_endian_pure = FPU_ARCH_ENDIAN_PURE;
  static const arm_feature_set cpu_default = CPU_DEFAULT;
  #endif
  
  static const arm_feature_set cpu_default = CPU_DEFAULT;
  #endif
  
-static const arm_feature_set arm_ext_v1 = ARM_FEATURE (ARM_EXT_V1, 0);
-static const arm_feature_set arm_ext_v2 = ARM_FEATURE (ARM_EXT_V1, 0);
-static const arm_feature_set arm_ext_v2s = ARM_FEATURE (ARM_EXT_V2S, 0);
-static const arm_feature_set arm_ext_v3 = ARM_FEATURE (ARM_EXT_V3, 0);
-static const arm_feature_set arm_ext_v3m = ARM_FEATURE (ARM_EXT_V3M, 0);
-static const arm_feature_set arm_ext_v4 = ARM_FEATURE (ARM_EXT_V4, 0);
-static const arm_feature_set arm_ext_v4t = ARM_FEATURE (ARM_EXT_V4T, 0);
-static const arm_feature_set arm_ext_v5 = ARM_FEATURE (ARM_EXT_V5, 0);
+static const arm_feature_set arm_ext_v1 = ARM_FEATURE_CORE_LOW (ARM_EXT_V1);
+static const arm_feature_set arm_ext_v2 = ARM_FEATURE_CORE_LOW (ARM_EXT_V1);
+static const arm_feature_set arm_ext_v2s = ARM_FEATURE_CORE_LOW (ARM_EXT_V2S);
+static const arm_feature_set arm_ext_v3 = ARM_FEATURE_CORE_LOW (ARM_EXT_V3);
+static const arm_feature_set arm_ext_v3m = ARM_FEATURE_CORE_LOW (ARM_EXT_V3M);
+static const arm_feature_set arm_ext_v4 = ARM_FEATURE_CORE_LOW (ARM_EXT_V4);
+static const arm_feature_set arm_ext_v4t = ARM_FEATURE_CORE_LOW (ARM_EXT_V4T);
+static const arm_feature_set arm_ext_v5 = ARM_FEATURE_CORE_LOW (ARM_EXT_V5);
  static const arm_feature_set arm_ext_v4t_5 =
  static const arm_feature_set arm_ext_v4t_5 =
-  ARM_FEATURE (ARM_EXT_V4T | ARM_EXT_V5, 0);
-static const arm_feature_set arm_ext_v5t = ARM_FEATURE (ARM_EXT_V5T, 0);
-static const arm_feature_set arm_ext_v5e = ARM_FEATURE (ARM_EXT_V5E, 0);
-static const arm_feature_set arm_ext_v5exp = ARM_FEATURE (ARM_EXT_V5ExP, 0);
-static const arm_feature_set arm_ext_v5j = ARM_FEATURE (ARM_EXT_V5J, 0);
-static const arm_feature_set arm_ext_v6 = ARM_FEATURE (ARM_EXT_V6, 0);
-static const arm_feature_set arm_ext_v6k = ARM_FEATURE (ARM_EXT_V6K, 0);
-static const arm_feature_set arm_ext_v6t2 = ARM_FEATURE (ARM_EXT_V6T2, 0);
-static const arm_feature_set arm_ext_v6m = ARM_FEATURE (ARM_EXT_V6M, 0);
-static const arm_feature_set arm_ext_v6_notm = ARM_FEATURE (ARM_EXT_V6_NOTM, 0);
-static const arm_feature_set arm_ext_v6_dsp = ARM_FEATURE (ARM_EXT_V6_DSP, 0);
-static const arm_feature_set arm_ext_barrier = ARM_FEATURE (ARM_EXT_BARRIER, 0);
-static const arm_feature_set arm_ext_msr = ARM_FEATURE (ARM_EXT_THUMB_MSR, 0);
-static const arm_feature_set arm_ext_div = ARM_FEATURE (ARM_EXT_DIV, 0);
-static const arm_feature_set arm_ext_v7 = ARM_FEATURE (ARM_EXT_V7, 0);
-static const arm_feature_set arm_ext_v7a = ARM_FEATURE (ARM_EXT_V7A, 0);
-static const arm_feature_set arm_ext_v7r = ARM_FEATURE (ARM_EXT_V7R, 0);
-static const arm_feature_set arm_ext_v7m = ARM_FEATURE (ARM_EXT_V7M, 0);
-static const arm_feature_set arm_ext_v8 = ARM_FEATURE (ARM_EXT_V8, 0);
+  ARM_FEATURE_CORE_LOW (ARM_EXT_V4T | ARM_EXT_V5);
+static const arm_feature_set arm_ext_v5t = ARM_FEATURE_CORE_LOW (ARM_EXT_V5T);
+static const arm_feature_set arm_ext_v5e = ARM_FEATURE_CORE_LOW (ARM_EXT_V5E);
+static const arm_feature_set arm_ext_v5exp = ARM_FEATURE_CORE_LOW (ARM_EXT_V5ExP);
+static const arm_feature_set arm_ext_v5j = ARM_FEATURE_CORE_LOW (ARM_EXT_V5J);
+static const arm_feature_set arm_ext_v6 = ARM_FEATURE_CORE_LOW (ARM_EXT_V6);
+static const arm_feature_set arm_ext_v6k = ARM_FEATURE_CORE_LOW (ARM_EXT_V6K);
+static const arm_feature_set arm_ext_v6t2 = ARM_FEATURE_CORE_LOW (ARM_EXT_V6T2);
+static const arm_feature_set arm_ext_v6m = ARM_FEATURE_CORE_LOW (ARM_EXT_V6M);
+static const arm_feature_set arm_ext_v6_notm =
+  ARM_FEATURE_CORE_LOW (ARM_EXT_V6_NOTM);
+static const arm_feature_set arm_ext_v6_dsp =
+  ARM_FEATURE_CORE_LOW (ARM_EXT_V6_DSP);
+static const arm_feature_set arm_ext_barrier =
+  ARM_FEATURE_CORE_LOW (ARM_EXT_BARRIER);
+static const arm_feature_set arm_ext_msr =
+  ARM_FEATURE_CORE_LOW (ARM_EXT_THUMB_MSR);
+static const arm_feature_set arm_ext_div = ARM_FEATURE_CORE_LOW (ARM_EXT_DIV);
+static const arm_feature_set arm_ext_v7 = ARM_FEATURE_CORE_LOW (ARM_EXT_V7);
+static const arm_feature_set arm_ext_v7a = ARM_FEATURE_CORE_LOW (ARM_EXT_V7A);
+static const arm_feature_set arm_ext_v7r = ARM_FEATURE_CORE_LOW (ARM_EXT_V7R);
+static const arm_feature_set arm_ext_v7m = ARM_FEATURE_CORE_LOW (ARM_EXT_V7M);
+static const arm_feature_set arm_ext_v8 = ARM_FEATURE_CORE_LOW (ARM_EXT_V8);
  static const arm_feature_set arm_ext_m =
  static const arm_feature_set arm_ext_m =
-  ARM_FEATURE (ARM_EXT_V6M | ARM_EXT_OS | ARM_EXT_V7M, 0);
-static const arm_feature_set arm_ext_mp = ARM_FEATURE (ARM_EXT_MP, 0);
-static const arm_feature_set arm_ext_sec = ARM_FEATURE (ARM_EXT_SEC, 0);
-static const arm_feature_set arm_ext_os = ARM_FEATURE (ARM_EXT_OS, 0);
-static const arm_feature_set arm_ext_adiv = ARM_FEATURE (ARM_EXT_ADIV, 0);
-static const arm_feature_set arm_ext_virt = ARM_FEATURE (ARM_EXT_VIRT, 0);
+  ARM_FEATURE_CORE (ARM_EXT_V6M | ARM_EXT_OS | ARM_EXT_V7M, ARM_EXT2_V8M);
+static const arm_feature_set arm_ext_mp = ARM_FEATURE_CORE_LOW (ARM_EXT_MP);
+static const arm_feature_set arm_ext_sec = ARM_FEATURE_CORE_LOW (ARM_EXT_SEC);
+static const arm_feature_set arm_ext_os = ARM_FEATURE_CORE_LOW (ARM_EXT_OS);
+static const arm_feature_set arm_ext_adiv = ARM_FEATURE_CORE_LOW (ARM_EXT_ADIV);
+static const arm_feature_set arm_ext_virt = ARM_FEATURE_CORE_LOW (ARM_EXT_VIRT);
+static const arm_feature_set arm_ext_pan = ARM_FEATURE_CORE_HIGH (ARM_EXT2_PAN);
+static const arm_feature_set arm_ext_v8m = ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8M);
+static const arm_feature_set arm_ext_v6t2_v8m =
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_V6T2_V8M);
+/* Instructions shared between ARMv8-A and ARMv8-M.  */
+static const arm_feature_set arm_ext_atomics =
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_ATOMICS);
+static const arm_feature_set arm_ext_v8_2 =
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8_2A);
+/* FP16 instructions.  */
+static const arm_feature_set arm_ext_fp16 =
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST);
  
  static const arm_feature_set arm_arch_any = ARM_ANY;
  
  static const arm_feature_set arm_arch_any = ARM_ANY;
-static const arm_feature_set arm_arch_full = ARM_FEATURE (-1, -1);
+static const arm_feature_set arm_arch_full = ARM_FEATURE (-1, -1, -1);
  static const arm_feature_set arm_arch_t2 = ARM_ARCH_THUMB2;
  static const arm_feature_set arm_arch_none = ARM_ARCH_NONE;
  static const arm_feature_set arm_arch_v6m_only = ARM_ARCH_V6M_ONLY;
  
  static const arm_feature_set arm_cext_iwmmxt2 =
  static const arm_feature_set arm_arch_t2 = ARM_ARCH_THUMB2;
  static const arm_feature_set arm_arch_none = ARM_ARCH_NONE;
  static const arm_feature_set arm_arch_v6m_only = ARM_ARCH_V6M_ONLY;
  
  static const arm_feature_set arm_cext_iwmmxt2 =
-  ARM_FEATURE (0, ARM_CEXT_IWMMXT2);
+  ARM_FEATURE_COPROC (ARM_CEXT_IWMMXT2);
  static const arm_feature_set arm_cext_iwmmxt =
  static const arm_feature_set arm_cext_iwmmxt =
-  ARM_FEATURE (0, ARM_CEXT_IWMMXT);
+  ARM_FEATURE_COPROC (ARM_CEXT_IWMMXT);
  static const arm_feature_set arm_cext_xscale =
  static const arm_feature_set arm_cext_xscale =
-  ARM_FEATURE (0, ARM_CEXT_XSCALE);
+  ARM_FEATURE_COPROC (ARM_CEXT_XSCALE);
  static const arm_feature_set arm_cext_maverick =
  static const arm_feature_set arm_cext_maverick =
-  ARM_FEATURE (0, ARM_CEXT_MAVERICK);
-static const arm_feature_set fpu_fpa_ext_v1 = ARM_FEATURE (0, FPU_FPA_EXT_V1);
-static const arm_feature_set fpu_fpa_ext_v2 = ARM_FEATURE (0, FPU_FPA_EXT_V2);
+  ARM_FEATURE_COPROC (ARM_CEXT_MAVERICK);
+static const arm_feature_set fpu_fpa_ext_v1 =
+  ARM_FEATURE_COPROC (FPU_FPA_EXT_V1);
+static const arm_feature_set fpu_fpa_ext_v2 =
+  ARM_FEATURE_COPROC (FPU_FPA_EXT_V2);
  static const arm_feature_set fpu_vfp_ext_v1xd =
  static const arm_feature_set fpu_vfp_ext_v1xd =
-  ARM_FEATURE (0, FPU_VFP_EXT_V1xD);
-static const arm_feature_set fpu_vfp_ext_v1 = ARM_FEATURE (0, FPU_VFP_EXT_V1);
-static const arm_feature_set fpu_vfp_ext_v2 = ARM_FEATURE (0, FPU_VFP_EXT_V2);
-static const arm_feature_set fpu_vfp_ext_v3xd = ARM_FEATURE (0, FPU_VFP_EXT_V3xD);
-static const arm_feature_set fpu_vfp_ext_v3 = ARM_FEATURE (0, FPU_VFP_EXT_V3);
+  ARM_FEATURE_COPROC (FPU_VFP_EXT_V1xD);
+static const arm_feature_set fpu_vfp_ext_v1 =
+  ARM_FEATURE_COPROC (FPU_VFP_EXT_V1);
+static const arm_feature_set fpu_vfp_ext_v2 =
+  ARM_FEATURE_COPROC (FPU_VFP_EXT_V2);
+static const arm_feature_set fpu_vfp_ext_v3xd =
+  ARM_FEATURE_COPROC (FPU_VFP_EXT_V3xD);
+static const arm_feature_set fpu_vfp_ext_v3 =
+  ARM_FEATURE_COPROC (FPU_VFP_EXT_V3);
  static const arm_feature_set fpu_vfp_ext_d32 =
  static const arm_feature_set fpu_vfp_ext_d32 =
-  ARM_FEATURE (0, FPU_VFP_EXT_D32);
-static const arm_feature_set fpu_neon_ext_v1 = ARM_FEATURE (0, FPU_NEON_EXT_V1);
+  ARM_FEATURE_COPROC (FPU_VFP_EXT_D32);
+static const arm_feature_set fpu_neon_ext_v1 =
+  ARM_FEATURE_COPROC (FPU_NEON_EXT_V1);
  static const arm_feature_set fpu_vfp_v3_or_neon_ext =
  static const arm_feature_set fpu_vfp_v3_or_neon_ext =
-  ARM_FEATURE (0, FPU_NEON_EXT_V1 | FPU_VFP_EXT_V3);
-static const arm_feature_set fpu_vfp_fp16 = ARM_FEATURE (0, FPU_VFP_EXT_FP16);
-static const arm_feature_set fpu_neon_ext_fma = ARM_FEATURE (0, FPU_NEON_EXT_FMA);
-static const arm_feature_set fpu_vfp_ext_fma = ARM_FEATURE (0, FPU_VFP_EXT_FMA);
+  ARM_FEATURE_COPROC (FPU_NEON_EXT_V1 | FPU_VFP_EXT_V3);
+static const arm_feature_set fpu_vfp_fp16 =
+  ARM_FEATURE_COPROC (FPU_VFP_EXT_FP16);
+static const arm_feature_set fpu_neon_ext_fma =
+  ARM_FEATURE_COPROC (FPU_NEON_EXT_FMA);
+static const arm_feature_set fpu_vfp_ext_fma =
+  ARM_FEATURE_COPROC (FPU_VFP_EXT_FMA);
  static const arm_feature_set fpu_vfp_ext_armv8 =
  static const arm_feature_set fpu_vfp_ext_armv8 =
-  ARM_FEATURE (0, FPU_VFP_EXT_ARMV8);
+  ARM_FEATURE_COPROC (FPU_VFP_EXT_ARMV8);
+static const arm_feature_set fpu_vfp_ext_armv8xd =
+  ARM_FEATURE_COPROC (FPU_VFP_EXT_ARMV8xD);
  static const arm_feature_set fpu_neon_ext_armv8 =
  static const arm_feature_set fpu_neon_ext_armv8 =
-  ARM_FEATURE (0, FPU_NEON_EXT_ARMV8);
+  ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8);
  static const arm_feature_set fpu_crypto_ext_armv8 =
  static const arm_feature_set fpu_crypto_ext_armv8 =
-  ARM_FEATURE (0, FPU_CRYPTO_EXT_ARMV8);
+  ARM_FEATURE_COPROC (FPU_CRYPTO_EXT_ARMV8);
  static const arm_feature_set crc_ext_armv8 =
  static const arm_feature_set crc_ext_armv8 =
-  ARM_FEATURE (0, CRC_EXT_ARMV8);
+  ARM_FEATURE_COPROC (CRC_EXT_ARMV8);
+static const arm_feature_set fpu_neon_ext_v8_1 =
+  ARM_FEATURE_COPROC (FPU_NEON_EXT_RDMA);
  
  static int mfloat_abi_opt = -1;
  /* Record user cpu selection for object attributes.  */
  static arm_feature_set selected_cpu = ARM_ARCH_NONE;
  /* Must be long enough to hold any of the names in arm_cpus.  */
  
  static int mfloat_abi_opt = -1;
  /* Record user cpu selection for object attributes.  */
  static arm_feature_set selected_cpu = ARM_ARCH_NONE;
  /* Must be long enough to hold any of the names in arm_cpus.  */
-static char selected_cpu_name[16];
+static char selected_cpu_name[20];
+
+extern FLONUM_TYPE generic_floating_point_number;
  
  /* Return if no cpu was selected on command-line.  */
  static bfd_boolean
  no_cpu_selected (void)
  {
  
  /* Return if no cpu was selected on command-line.  */
  static bfd_boolean
  no_cpu_selected (void)
  {
-  return selected_cpu.core == arm_arch_none.core
-    && selected_cpu.coproc == arm_arch_none.coproc;
+  return ARM_FEATURE_EQUAL (selected_cpu, arm_arch_none);
  }
  
  #ifdef OBJ_ELF
  }
  
  #ifdef OBJ_ELF
@@ -483,7 +516,7 @@ struct asm_barrier_opt
  
  struct reloc_entry
  {
  
  struct reloc_entry
  {
-  char *                    name;
+  const char *                    name;
    bfd_reloc_code_real_type  reloc;
  };
  
    bfd_reloc_code_real_type  reloc;
  };
  
@@ -628,6 +661,7 @@ struct asm_opcode
  #define LITERAL_MASK   0xf000f000
  #define OPCODE_MASK    0xfe1fffff
  #define V4_STR_BIT     0x00000020
  #define LITERAL_MASK   0xf000f000
  #define OPCODE_MASK    0xfe1fffff
  #define V4_STR_BIT     0x00000020
+#define VLDR_VMOV_SAME 0x0040f000
  
  #define T2_SUBS_PC_LR  0xf3de8f00
  
  
  #define T2_SUBS_PC_LR  0xf3de8f00
  
@@ -750,7 +784,8 @@ struct asm_opcode
         _("cannot use register index with PC-relative addressing")
  #define BAD_PC_WRITEBACK \
         _("cannot use writeback with PC-relative addressing")
         _("cannot use register index with PC-relative addressing")
  #define BAD_PC_WRITEBACK \
         _("cannot use writeback with PC-relative addressing")
-#define BAD_RANGE     _("branch out of range")
+#define BAD_RANGE      _("branch out of range")
+#define BAD_FP16       _("selected processor does not support fp16 instruction")
  #define UNPRED_REG(R)  _("using " R " results in unpredictable behaviour")
  
  static struct hash_control * arm_ops_hsh;
  #define UNPRED_REG(R)  _("using " R " results in unpredictable behaviour")
  
  static struct hash_control * arm_ops_hsh;
@@ -790,11 +825,21 @@ typedef struct literal_pool
    struct dwarf2_line_info locs [MAX_LITERAL_POOL_SIZE];
  #endif
    struct literal_pool *  next;
    struct dwarf2_line_info locs [MAX_LITERAL_POOL_SIZE];
  #endif
    struct literal_pool *  next;
+  unsigned int          alignment;
  } literal_pool;
  
  /* Pointer to a linked list of literal pools.  */
  literal_pool * list_of_pools = NULL;
  
  } literal_pool;
  
  /* Pointer to a linked list of literal pools.  */
  literal_pool * list_of_pools = NULL;
  
+typedef enum asmfunc_states
+{
+  OUTSIDE_ASMFUNC,
+  WAITING_ASMFUNC_NAME,
+  WAITING_ENDASMFUNC
+} asmfunc_states;
+
+static asmfunc_states asmfunc_state = OUTSIDE_ASMFUNC;
+
  #ifdef OBJ_ELF
  #  define now_it seg_info (now_seg)->tc_segment_info_data.current_it
  #else
  #ifdef OBJ_ELF
  #  define now_it seg_info (now_seg)->tc_segment_info_data.current_it
  #else
@@ -853,7 +898,7 @@ static void it_fsm_post_encode (void);
  
  /* This array holds the chars that always start a comment.  If the
     pre-processor is disabled, these aren't very useful.         */
  
  /* This array holds the chars that always start a comment.  If the
     pre-processor is disabled, these aren't very useful.         */
-const char comment_chars[] = "@";
+char arm_comment_chars[] = "@";
  
  /* This array holds the chars that only start a comment at the beginning of
     a line.  If the line seems to have the form '# 123 filename'
  
  /* This array holds the chars that only start a comment at the beginning of
     a line.  If the line seems to have the form '# 123 filename'
@@ -864,7 +909,7 @@ const char comment_chars[] = "@";
  /* Also note that comments like this one will always work.  */
  const char line_comment_chars[] = "#";
  
  /* Also note that comments like this one will always work.  */
  const char line_comment_chars[] = "#";
  
-const char line_separator_chars[] = ";";
+char arm_line_separator_chars[] = ";";
  
  /* Chars that can be used to separate mant
     from exp in floating point numbers. */
  
  /* Chars that can be used to separate mant
     from exp in floating point numbers. */
@@ -1641,7 +1686,7 @@ parse_reg_list (char ** strp)
                  || (in_range = 1, *str++ == '-'));
           str--;
  
                  || (in_range = 1, *str++ == '-'));
           str--;
  
-         if (*str++ != '}')
+         if (skip_past_char (&str, '}') == FAIL)
             {
               first_error (_("missing `}'"));
               return FAIL;
             {
               first_error (_("missing `}'"));
               return FAIL;
@@ -2615,13 +2660,12 @@ static void mapping_state_2 (enum mstate state, int max_chars);
  /* Set the mapping state to STATE.  Only call this when about to
     emit some STATE bytes to the file.  */
  
  /* Set the mapping state to STATE.  Only call this when about to
     emit some STATE bytes to the file.  */
  
+#define TRANSITION(from, to) (mapstate == (from) && state == (to))
  void
  mapping_state (enum mstate state)
  {
    enum mstate mapstate = seg_info (now_seg)->tc_segment_info_data.mapstate;
  
  void
  mapping_state (enum mstate state)
  {
    enum mstate mapstate = seg_info (now_seg)->tc_segment_info_data.mapstate;
  
-#define TRANSITION(from, to) (mapstate == (from) && state == (to))
-
    if (mapstate == state)
      /* The mapping symbol has already been emitted.
         There is nothing else to do.  */
    if (mapstate == state)
      /* The mapping symbol has already been emitted.
         There is nothing else to do.  */
@@ -2644,24 +2688,10 @@ mapping_state (enum mstate state)
      record_alignment (now_seg, state == MAP_ARM ? 2 : 1);
  
    if (TRANSITION (MAP_UNDEFINED, MAP_DATA))
      record_alignment (now_seg, state == MAP_ARM ? 2 : 1);
  
    if (TRANSITION (MAP_UNDEFINED, MAP_DATA))
-    /* This case will be evaluated later in the next else.  */
+    /* This case will be evaluated later.  */
      return;
      return;
-  else if (TRANSITION (MAP_UNDEFINED, MAP_ARM)
-         || TRANSITION (MAP_UNDEFINED, MAP_THUMB))
-    {
-      /* Only add the symbol if the offset is > 0:
-        if we're at the first frag, check it's size > 0;
-        if we're not at the first frag, then for sure
-           the offset is > 0.  */
-      struct frag * const frag_first = seg_info (now_seg)->frchainP->frch_root;
-      const int add_symbol = (frag_now != frag_first) || (frag_now_fix () > 0);
-
-      if (add_symbol)
-       make_mapping_symbol (MAP_DATA, (valueT) 0, frag_first);
-    }
  
    mapping_state_2 (state, 0);
  
    mapping_state_2 (state, 0);
-#undef TRANSITION
  }
  
  /* Same as mapping_state, but MAX_CHARS bytes have already been
  }
  
  /* Same as mapping_state, but MAX_CHARS bytes have already been
@@ -2680,9 +2710,20 @@ mapping_state_2 (enum mstate state, int max_chars)
         There is nothing else to do.  */
      return;
  
         There is nothing else to do.  */
      return;
  
+  if (TRANSITION (MAP_UNDEFINED, MAP_ARM)
+         || TRANSITION (MAP_UNDEFINED, MAP_THUMB))
+    {
+      struct frag * const frag_first = seg_info (now_seg)->frchainP->frch_root;
+      const int add_symbol = (frag_now != frag_first) || (frag_now_fix () > 0);
+
+      if (add_symbol)
+       make_mapping_symbol (MAP_DATA, (valueT) 0, frag_first);
+    }
+
    seg_info (now_seg)->tc_segment_info_data.mapstate = state;
    make_mapping_symbol (state, (valueT) frag_now_fix () - max_chars, frag_now);
  }
    seg_info (now_seg)->tc_segment_info_data.mapstate = state;
    make_mapping_symbol (state, (valueT) frag_now_fix () - max_chars, frag_now);
  }
+#undef TRANSITION
  #else
  #define mapping_state(x) ((void)0)
  #define mapping_state_2(x, y) ((void)0)
  #else
  #define mapping_state(x) ((void)0)
  #define mapping_state_2(x, y) ((void)0)
@@ -2839,10 +2880,9 @@ s_thumb_set (int equiv)
    /* Especial apologies for the random logic:
       This just grew, and could be parsed much more simply!
       Dean - in haste.  */
    /* Especial apologies for the random logic:
       This just grew, and could be parsed much more simply!
       Dean - in haste.  */
-  name     = input_line_pointer;
-  delim            = get_symbol_end ();
+  delim            = get_symbol_name (& name);
    end_name  = input_line_pointer;
    end_name  = input_line_pointer;
-  *end_name = delim;
+  (void) restore_line_pointer (delim);
  
    if (*input_line_pointer != ',')
      {
  
    if (*input_line_pointer != ',')
      {
@@ -2922,8 +2962,7 @@ s_syntax (int unused ATTRIBUTE_UNUSED)
  {
    char *name, delim;
  
  {
    char *name, delim;
  
-  name = input_line_pointer;
-  delim = get_symbol_end ();
+  delim = get_symbol_name (& name);
  
    if (!strcasecmp (name, "unified"))
      unified_syntax = TRUE;
  
    if (!strcasecmp (name, "unified"))
      unified_syntax = TRUE;
@@ -2934,59 +2973,12 @@ s_syntax (int unused ATTRIBUTE_UNUSED)
        as_bad (_("unrecognized syntax mode \"%s\""), name);
        return;
      }
        as_bad (_("unrecognized syntax mode \"%s\""), name);
        return;
      }
-  *input_line_pointer = delim;
+  (void) restore_line_pointer (delim);
    demand_empty_rest_of_line ();
  }
  
  /* Directives: sectioning and alignment.  */
  
    demand_empty_rest_of_line ();
  }
  
  /* Directives: sectioning and alignment.  */
  
-/* Same as s_align_ptwo but align 0 => align 2.         */
-
-static void
-s_align (int unused ATTRIBUTE_UNUSED)
-{
-  int temp;
-  bfd_boolean fill_p;
-  long temp_fill;
-  long max_alignment = 15;
-
-  temp = get_absolute_expression ();
-  if (temp > max_alignment)
-    as_bad (_("alignment too large: %d assumed"), temp = max_alignment);
-  else if (temp < 0)
-    {
-      as_bad (_("alignment negative. 0 assumed."));
-      temp = 0;
-    }
-
-  if (*input_line_pointer == ',')
-    {
-      input_line_pointer++;
-      temp_fill = get_absolute_expression ();
-      fill_p = TRUE;
-    }
-  else
-    {
-      fill_p = FALSE;
-      temp_fill = 0;
-    }
-
-  if (!temp)
-    temp = 2;
-
-  /* Only make a frag if we HAVE to.  */
-  if (temp && !need_pass_2)
-    {
-      if (!fill_p && subseg_text_p (now_seg))
-       frag_align_code (temp, 0);
-      else
-       frag_align (temp, (int) temp_fill, 0);
-    }
-  demand_empty_rest_of_line ();
-
-  record_alignment (now_seg, temp);
-}
-
  static void
  s_bss (int ignore ATTRIBUTE_UNUSED)
  {
  static void
  s_bss (int ignore ATTRIBUTE_UNUSED)
  {
@@ -3012,6 +3004,104 @@ s_even (int ignore ATTRIBUTE_UNUSED)
    demand_empty_rest_of_line ();
  }
  
    demand_empty_rest_of_line ();
  }
  
+/* Directives: CodeComposer Studio.  */
+
+/*  .ref  (for CodeComposer Studio syntax only).  */
+static void
+s_ccs_ref (int unused ATTRIBUTE_UNUSED)
+{
+  if (codecomposer_syntax)
+    ignore_rest_of_line ();
+  else
+    as_bad (_(".ref pseudo-op only available with -mccs flag."));
+}
+
+/*  If name is not NULL, then it is used for marking the beginning of a
+    function, wherease if it is NULL then it means the function end.  */
+static void
+asmfunc_debug (const char * name)
+{
+  static const char * last_name = NULL;
+
+  if (name != NULL)
+    {
+      gas_assert (last_name == NULL);
+      last_name = name;
+
+      if (debug_type == DEBUG_STABS)
+         stabs_generate_asm_func (name, name);
+    }
+  else
+    {
+      gas_assert (last_name != NULL);
+
+      if (debug_type == DEBUG_STABS)
+        stabs_generate_asm_endfunc (last_name, last_name);
+
+      last_name = NULL;
+    }
+}
+
+static void
+s_ccs_asmfunc (int unused ATTRIBUTE_UNUSED)
+{
+  if (codecomposer_syntax)
+    {
+      switch (asmfunc_state)
+       {
+       case OUTSIDE_ASMFUNC:
+         asmfunc_state = WAITING_ASMFUNC_NAME;
+         break;
+
+       case WAITING_ASMFUNC_NAME:
+         as_bad (_(".asmfunc repeated."));
+         break;
+
+       case WAITING_ENDASMFUNC:
+         as_bad (_(".asmfunc without function."));
+         break;
+       }
+      demand_empty_rest_of_line ();
+    }
+  else
+    as_bad (_(".asmfunc pseudo-op only available with -mccs flag."));
+}
+
+static void
+s_ccs_endasmfunc (int unused ATTRIBUTE_UNUSED)
+{
+  if (codecomposer_syntax)
+    {
+      switch (asmfunc_state)
+       {
+       case OUTSIDE_ASMFUNC:
+         as_bad (_(".endasmfunc without a .asmfunc."));
+         break;
+
+       case WAITING_ASMFUNC_NAME:
+         as_bad (_(".endasmfunc without function."));
+         break;
+
+       case WAITING_ENDASMFUNC:
+         asmfunc_state = OUTSIDE_ASMFUNC;
+         asmfunc_debug (NULL);
+         break;
+       }
+      demand_empty_rest_of_line ();
+    }
+  else
+    as_bad (_(".endasmfunc pseudo-op only available with -mccs flag."));
+}
+
+static void
+s_ccs_def (int name)
+{
+  if (codecomposer_syntax)
+    s_globl (name);
+  else
+    as_bad (_(".def pseudo-op only available with -mccs flag."));
+}
+
  /* Directives: Literal pools.  */
  
  static literal_pool *
  /* Directives: Literal pools.  */
  
  static literal_pool *
@@ -3050,6 +3140,7 @@ find_or_make_literal_pool (void)
        pool->sub_section            = now_subseg;
        pool->next           = list_of_pools;
        pool->symbol         = NULL;
        pool->sub_section            = now_subseg;
        pool->next           = list_of_pools;
        pool->symbol         = NULL;
+      pool->alignment      = 2;
  
        /* Add it to the list.  */
        list_of_pools = pool;
  
        /* Add it to the list.  */
        list_of_pools = pool;
@@ -3071,33 +3162,74 @@ find_or_make_literal_pool (void)
     structure to the relevant literal pool.  */
  
  static int
     structure to the relevant literal pool.  */
  
  static int
-add_to_lit_pool (void)
+add_to_lit_pool (unsigned int nbytes)
  {
  {
+#define PADDING_SLOT 0x1
+#define LIT_ENTRY_SIZE_MASK 0xFF
    literal_pool * pool;
    literal_pool * pool;
-  unsigned int entry;
+  unsigned int entry, pool_size = 0;
+  bfd_boolean padding_slot_p = FALSE;
+  unsigned imm1 = 0;
+  unsigned imm2 = 0;
+
+  if (nbytes == 8)
+    {
+      imm1 = inst.operands[1].imm;
+      imm2 = (inst.operands[1].regisimm ? inst.operands[1].reg
+              : inst.reloc.exp.X_unsigned ? 0
+              : ((bfd_int64_t) inst.operands[1].imm) >> 32);
+      if (target_big_endian)
+       {
+         imm1 = imm2;
+         imm2 = inst.operands[1].imm;
+       }
+    }
  
    pool = find_or_make_literal_pool ();
  
    /* Check if this literal value is already in the pool.  */
    for (entry = 0; entry < pool->next_free_entry; entry ++)
      {
  
    pool = find_or_make_literal_pool ();
  
    /* Check if this literal value is already in the pool.  */
    for (entry = 0; entry < pool->next_free_entry; entry ++)
      {
-      if ((pool->literals[entry].X_op == inst.reloc.exp.X_op)
-         && (inst.reloc.exp.X_op == O_constant)
-         && (pool->literals[entry].X_add_number
-             == inst.reloc.exp.X_add_number)
-         && (pool->literals[entry].X_unsigned
-             == inst.reloc.exp.X_unsigned))
+      if (nbytes == 4)
+       {
+         if ((pool->literals[entry].X_op == inst.reloc.exp.X_op)
+             && (inst.reloc.exp.X_op == O_constant)
+             && (pool->literals[entry].X_add_number
+                 == inst.reloc.exp.X_add_number)
+             && (pool->literals[entry].X_md == nbytes)
+             && (pool->literals[entry].X_unsigned
+                 == inst.reloc.exp.X_unsigned))
+           break;
+
+         if ((pool->literals[entry].X_op == inst.reloc.exp.X_op)
+             && (inst.reloc.exp.X_op == O_symbol)
+             && (pool->literals[entry].X_add_number
+                 == inst.reloc.exp.X_add_number)
+             && (pool->literals[entry].X_add_symbol
+                 == inst.reloc.exp.X_add_symbol)
+             && (pool->literals[entry].X_op_symbol
+                 == inst.reloc.exp.X_op_symbol)
+             && (pool->literals[entry].X_md == nbytes))
+           break;
+       }
+      else if ((nbytes == 8)
+              && !(pool_size & 0x7)
+              && ((entry + 1) != pool->next_free_entry)
+              && (pool->literals[entry].X_op == O_constant)
+              && (pool->literals[entry].X_add_number == (offsetT) imm1)
+              && (pool->literals[entry].X_unsigned
+                  == inst.reloc.exp.X_unsigned)
+              && (pool->literals[entry + 1].X_op == O_constant)
+              && (pool->literals[entry + 1].X_add_number == (offsetT) imm2)
+              && (pool->literals[entry + 1].X_unsigned
+                  == inst.reloc.exp.X_unsigned))
         break;
  
         break;
  
-      if ((pool->literals[entry].X_op == inst.reloc.exp.X_op)
-         && (inst.reloc.exp.X_op == O_symbol)
-         && (pool->literals[entry].X_add_number
-             == inst.reloc.exp.X_add_number)
-         && (pool->literals[entry].X_add_symbol
-             == inst.reloc.exp.X_add_symbol)
-         && (pool->literals[entry].X_op_symbol
-             == inst.reloc.exp.X_op_symbol))
+      padding_slot_p = ((pool->literals[entry].X_md >> 8) == PADDING_SLOT);
+      if (padding_slot_p && (nbytes == 4))
         break;
         break;
+
+      pool_size += 4;
      }
  
    /* Do we need to create a new entry? */
      }
  
    /* Do we need to create a new entry? */
@@ -3109,7 +3241,64 @@ add_to_lit_pool (void)
           return FAIL;
         }
  
           return FAIL;
         }
  
-      pool->literals[entry] = inst.reloc.exp;
+      if (nbytes == 8)
+       {
+         /* For 8-byte entries, we align to an 8-byte boundary,
+            and split it into two 4-byte entries, because on 32-bit
+            host, 8-byte constants are treated as big num, thus
+            saved in "generic_bignum" which will be overwritten
+            by later assignments.
+
+            We also need to make sure there is enough space for
+            the split.
+
+            We also check to make sure the literal operand is a
+            constant number.  */
+         if (!(inst.reloc.exp.X_op == O_constant
+               || inst.reloc.exp.X_op == O_big))
+           {
+             inst.error = _("invalid type for literal pool");
+             return FAIL;
+           }
+         else if (pool_size & 0x7)
+           {
+             if ((entry + 2) >= MAX_LITERAL_POOL_SIZE)
+               {
+                 inst.error = _("literal pool overflow");
+                 return FAIL;
+               }
+
+             pool->literals[entry] = inst.reloc.exp;
+             pool->literals[entry].X_add_number = 0;
+             pool->literals[entry++].X_md = (PADDING_SLOT << 8) | 4;
+             pool->next_free_entry += 1;
+             pool_size += 4;
+           }
+         else if ((entry + 1) >= MAX_LITERAL_POOL_SIZE)
+           {
+             inst.error = _("literal pool overflow");
+             return FAIL;
+           }
+
+         pool->literals[entry] = inst.reloc.exp;
+         pool->literals[entry].X_op = O_constant;
+         pool->literals[entry].X_add_number = imm1;
+         pool->literals[entry].X_unsigned = inst.reloc.exp.X_unsigned;
+         pool->literals[entry++].X_md = 4;
+         pool->literals[entry] = inst.reloc.exp;
+         pool->literals[entry].X_op = O_constant;
+         pool->literals[entry].X_add_number = imm2;
+         pool->literals[entry].X_unsigned = inst.reloc.exp.X_unsigned;
+         pool->literals[entry].X_md = 4;
+         pool->alignment = 3;
+         pool->next_free_entry += 1;
+       }
+      else
+       {
+         pool->literals[entry] = inst.reloc.exp;
+         pool->literals[entry].X_md = 4;
+       }
+
  #ifdef OBJ_ELF
        /* PR ld/12974: Record the location of the first source line to reference
          this entry in the literal pool.  If it turns out during linking that the
  #ifdef OBJ_ELF
        /* PR ld/12974: Record the location of the first source line to reference
          this entry in the literal pool.  If it turns out during linking that the
@@ -3120,14 +3309,45 @@ add_to_lit_pool (void)
  #endif
        pool->next_free_entry += 1;
      }
  #endif
        pool->next_free_entry += 1;
      }
+  else if (padding_slot_p)
+    {
+      pool->literals[entry] = inst.reloc.exp;
+      pool->literals[entry].X_md = nbytes;
+    }
  
    inst.reloc.exp.X_op        = O_symbol;
  
    inst.reloc.exp.X_op        = O_symbol;
-  inst.reloc.exp.X_add_number = ((int) entry) * 4;
+  inst.reloc.exp.X_add_number = pool_size;
    inst.reloc.exp.X_add_symbol = pool->symbol;
  
    return SUCCESS;
  }
  
    inst.reloc.exp.X_add_symbol = pool->symbol;
  
    return SUCCESS;
  }
  
+bfd_boolean
+tc_start_label_without_colon (void)
+{
+  bfd_boolean ret = TRUE;
+
+  if (codecomposer_syntax && asmfunc_state == WAITING_ASMFUNC_NAME)
+    {
+      const char *label = input_line_pointer;
+
+      while (!is_end_of_line[(int) label[-1]])
+       --label;
+
+      if (*label == '.')
+       {
+         as_bad (_("Invalid label '%s'"), label);
+         ret = FALSE;
+       }
+
+      asmfunc_debug (label);
+
+      asmfunc_state = WAITING_ENDASMFUNC;
+    }
+
+  return ret;
+}
+
  /* Can't use symbol_new here, so have to create a symbol and then at
     a later date assign it a value. Thats what these functions do.  */
  
  /* Can't use symbol_new here, so have to create a symbol and then at
     a later date assign it a value. Thats what these functions do.  */
  
@@ -3138,7 +3358,7 @@ symbol_locate (symbolS *    symbolP,
                valueT       valu,       /* Symbol value.  */
                fragS *      frag)       /* Associated fragment.  */
  {
                valueT       valu,       /* Symbol value.  */
                fragS *      frag)       /* Associated fragment.  */
  {
-  unsigned int name_length;
+  size_t name_length;
    char * preserved_copy_of_name;
  
    name_length = strlen (name) + 1;   /* +1 for \0.  */
    char * preserved_copy_of_name;
  
    name_length = strlen (name) + 1;   /* +1 for \0.  */
@@ -3179,7 +3399,6 @@ symbol_locate (symbolS *    symbolP,
  #endif /* DEBUG_SYMS  */
  }
  
  #endif /* DEBUG_SYMS  */
  }
  
-
  static void
  s_ltorg (int ignored ATTRIBUTE_UNUSED)
  {
  static void
  s_ltorg (int ignored ATTRIBUTE_UNUSED)
  {
@@ -3193,15 +3412,17 @@ s_ltorg (int ignored ATTRIBUTE_UNUSED)
        || pool->next_free_entry == 0)
      return;
  
        || pool->next_free_entry == 0)
      return;
  
-  mapping_state (MAP_DATA);
-
    /* Align pool as you have word accesses.
       Only make a frag if we have to.  */
    if (!need_pass_2)
    /* Align pool as you have word accesses.
       Only make a frag if we have to.  */
    if (!need_pass_2)
-    frag_align (2, 0, 0);
+    frag_align (pool->alignment, 0, 0);
  
    record_alignment (now_seg, 2);
  
  
    record_alignment (now_seg, 2);
  
+#ifdef OBJ_ELF
+  seg_info (now_seg)->tc_segment_info_data.mapstate = MAP_DATA;
+  make_mapping_symbol (MAP_DATA, (valueT) frag_now_fix (), frag_now);
+#endif
    sprintf (sym_name, "$$lit_\002%x", pool->id);
  
    symbol_locate (pool->symbol, sym_name, now_seg,
    sprintf (sym_name, "$$lit_\002%x", pool->id);
  
    symbol_locate (pool->symbol, sym_name, now_seg,
@@ -3221,7 +3442,8 @@ s_ltorg (int ignored ATTRIBUTE_UNUSED)
         dwarf2_gen_line_info (frag_now_fix (), pool->locs + entry);
  #endif
        /* First output the expression in the instruction to the pool.  */
         dwarf2_gen_line_info (frag_now_fix (), pool->locs + entry);
  #endif
        /* First output the expression in the instruction to the pool.  */
-      emit_expr (&(pool->literals[entry]), 4); /* .word  */
+      emit_expr (&(pool->literals[entry]),
+                pool->literals[entry].X_md & LIT_ENTRY_SIZE_MASK);
      }
  
    /* Mark the pool as empty.  */
      }
  
    /* Mark the pool as empty.  */
@@ -3316,7 +3538,8 @@ s_arm_elf_cons (int nbytes)
                   memcpy (base, save_buf, p - base);
  
                   offset = nbytes - size;
                   memcpy (base, save_buf, p - base);
  
                   offset = nbytes - size;
-                 p = frag_more ((int) nbytes);
+                 p = frag_more (nbytes);
+                 memset (p, 0, nbytes);
                   fix_new_exp (frag_now, p - frag_now->fr_literal + offset,
                                size, &exp, 0, (enum bfd_reloc_code_real) reloc);
                 }
                   fix_new_exp (frag_now, p - frag_now->fr_literal + offset,
                                size, &exp, 0, (enum bfd_reloc_code_real) reloc);
                 }
@@ -3667,9 +3890,10 @@ s_arm_unwind_personality (int ignored ATTRIBUTE_UNUSED)
    if (unwind.personality_routine || unwind.personality_index != -1)
      as_bad (_("duplicate .personality directive"));
  
    if (unwind.personality_routine || unwind.personality_index != -1)
      as_bad (_("duplicate .personality directive"));
  
-  name = input_line_pointer;
-  c = get_symbol_end ();
+  c = get_symbol_name (& name);
    p = input_line_pointer;
    p = input_line_pointer;
+  if (c == '"')
+    ++ input_line_pointer;
    unwind.personality_routine = symbol_find_or_make (name);
    *p = c;
    demand_empty_rest_of_line ();
    unwind.personality_routine = symbol_find_or_make (name);
    *p = c;
    demand_empty_rest_of_line ();
@@ -3935,8 +4159,7 @@ s_arm_unwind_save_mmxwr (void)
      }
    while (skip_past_comma (&input_line_pointer) != FAIL);
  
      }
    while (skip_past_comma (&input_line_pointer) != FAIL);
  
-  if (*input_line_pointer == '}')
-    input_line_pointer++;
+  skip_past_char (&input_line_pointer, '}');
  
    demand_empty_rest_of_line ();
  
  
    demand_empty_rest_of_line ();
  
@@ -4070,8 +4293,7 @@ s_arm_unwind_save_mmxwcg (void)
      }
    while (skip_past_comma (&input_line_pointer) != FAIL);
  
      }
    while (skip_past_comma (&input_line_pointer) != FAIL);
  
-  if (*input_line_pointer == '}')
-    input_line_pointer++;
+  skip_past_char (&input_line_pointer, '}');
  
    demand_empty_rest_of_line ();
  
  
    demand_empty_rest_of_line ();
  
@@ -4136,15 +4358,24 @@ s_arm_unwind_save (int arch_v6)
        s_arm_unwind_save_fpa (reg->number);
        return;
  
        s_arm_unwind_save_fpa (reg->number);
        return;
  
-    case REG_TYPE_RN:    s_arm_unwind_save_core ();   return;
+    case REG_TYPE_RN:
+      s_arm_unwind_save_core ();
+      return;
+
      case REG_TYPE_VFD:
        if (arch_v6)
         s_arm_unwind_save_vfp_armv6 ();
        else
         s_arm_unwind_save_vfp ();
        return;
      case REG_TYPE_VFD:
        if (arch_v6)
         s_arm_unwind_save_vfp_armv6 ();
        else
         s_arm_unwind_save_vfp ();
        return;
-    case REG_TYPE_MMXWR:  s_arm_unwind_save_mmxwr ();  return;
-    case REG_TYPE_MMXWCG: s_arm_unwind_save_mmxwcg (); return;
+
+    case REG_TYPE_MMXWR:
+      s_arm_unwind_save_mmxwr ();
+      return;
+
+    case REG_TYPE_MMXWCG:
+      s_arm_unwind_save_mmxwcg ();
+      return;
  
      default:
        as_bad (_(".unwind_save does not support this kind of register"));
  
      default:
        as_bad (_(".unwind_save does not support this kind of register"));
@@ -4424,7 +4655,7 @@ const pseudo_typeS md_pseudo_table[] =
    { "qn",          s_qn,          0 },
    { "unreq",      s_unreq,       0 },
    { "bss",        s_bss,         0 },
    { "qn",          s_qn,          0 },
    { "unreq",      s_unreq,       0 },
    { "bss",        s_bss,         0 },
-  { "align",      s_align,       0 },
+  { "align",      s_align_ptwo,  2 },
    { "arm",        s_arm,         0 },
    { "thumb",      s_thumb,       0 },
    { "code",       s_code,        0 },
    { "arm",        s_arm,         0 },
    { "thumb",      s_thumb,       0 },
    { "code",       s_code,        0 },
@@ -4479,6 +4710,13 @@ const pseudo_typeS md_pseudo_table[] =
  #ifdef TE_PE
    {"secrel32", pe_directive_secrel, 0},
  #endif
  #ifdef TE_PE
    {"secrel32", pe_directive_secrel, 0},
  #endif
+
+  /* These are for compatibility with CodeComposer Studio.  */
+  {"ref",          s_ccs_ref,        0},
+  {"def",          s_ccs_def,        0},
+  {"asmfunc",      s_ccs_asmfunc,    0},
+  {"endasmfunc",   s_ccs_endasmfunc, 0},
+
    { 0, 0, 0 }
  };
  \f
    { 0, 0, 0 }
  };
  \f
@@ -4517,28 +4755,31 @@ parse_immediate (char **str, int *val, int min, int max,
     instructions. Puts the result directly in inst.operands[i].  */
  
  static int
     instructions. Puts the result directly in inst.operands[i].  */
  
  static int
-parse_big_immediate (char **str, int i)
+parse_big_immediate (char **str, int i, expressionS *in_exp,
+                    bfd_boolean allow_symbol_p)
  {
    expressionS exp;
  {
    expressionS exp;
+  expressionS *exp_p = in_exp ? in_exp : &exp;
    char *ptr = *str;
  
    char *ptr = *str;
  
-  my_get_expression (&exp, &ptr, GE_OPT_PREFIX_BIG);
+  my_get_expression (exp_p, &ptr, GE_OPT_PREFIX_BIG);
  
  
-  if (exp.X_op == O_constant)
+  if (exp_p->X_op == O_constant)
      {
      {
-      inst.operands[i].imm = exp.X_add_number & 0xffffffff;
+      inst.operands[i].imm = exp_p->X_add_number & 0xffffffff;
        /* If we're on a 64-bit host, then a 64-bit number can be returned using
          O_constant.  We have to be careful not to break compilation for
          32-bit X_add_number, though.  */
        /* If we're on a 64-bit host, then a 64-bit number can be returned using
          O_constant.  We have to be careful not to break compilation for
          32-bit X_add_number, though.  */
-      if ((exp.X_add_number & ~(offsetT)(0xffffffffU)) != 0)
+      if ((exp_p->X_add_number & ~(offsetT)(0xffffffffU)) != 0)
         {
         {
-         /* X >> 32 is illegal if sizeof (exp.X_add_number) == 4.  */
-         inst.operands[i].reg = ((exp.X_add_number >> 16) >> 16) & 0xffffffff;
+         /* X >> 32 is illegal if sizeof (exp_p->X_add_number) == 4.  */
+         inst.operands[i].reg = (((exp_p->X_add_number >> 16) >> 16)
+                                 & 0xffffffff);
           inst.operands[i].regisimm = 1;
         }
      }
           inst.operands[i].regisimm = 1;
         }
      }
-  else if (exp.X_op == O_big
-          && LITTLENUM_NUMBER_OF_BITS * exp.X_add_number > 32)
+  else if (exp_p->X_op == O_big
+          && LITTLENUM_NUMBER_OF_BITS * exp_p->X_add_number > 32)
      {
        unsigned parts = 32 / LITTLENUM_NUMBER_OF_BITS, j, idx = 0;
  
      {
        unsigned parts = 32 / LITTLENUM_NUMBER_OF_BITS, j, idx = 0;
  
@@ -4551,7 +4792,7 @@ parse_big_immediate (char **str, int i)
          PR 11972: Bignums can now be sign-extended to the
          size of a .octa so check that the out of range bits
          are all zero or all one.  */
          PR 11972: Bignums can now be sign-extended to the
          size of a .octa so check that the out of range bits
          are all zero or all one.  */
-      if (LITTLENUM_NUMBER_OF_BITS * exp.X_add_number > 64)
+      if (LITTLENUM_NUMBER_OF_BITS * exp_p->X_add_number > 64)
         {
           LITTLENUM_TYPE m = -1;
  
         {
           LITTLENUM_TYPE m = -1;
  
@@ -4559,7 +4800,7 @@ parse_big_immediate (char **str, int i)
               && generic_bignum[parts * 2] != m)
             return FAIL;
  
               && generic_bignum[parts * 2] != m)
             return FAIL;
  
-         for (j = parts * 2 + 1; j < (unsigned) exp.X_add_number; j++)
+         for (j = parts * 2 + 1; j < (unsigned) exp_p->X_add_number; j++)
             if (generic_bignum[j] != generic_bignum[j-1])
               return FAIL;
         }
             if (generic_bignum[j] != generic_bignum[j-1])
               return FAIL;
         }
@@ -4574,7 +4815,7 @@ parse_big_immediate (char **str, int i)
                                 << (LITTLENUM_NUMBER_OF_BITS * j);
        inst.operands[i].regisimm = 1;
      }
                                 << (LITTLENUM_NUMBER_OF_BITS * j);
        inst.operands[i].regisimm = 1;
      }
-  else
+  else if (!(exp_p->X_op == O_symbol && allow_symbol_p))
      return FAIL;
  
    *str = ptr;
      return FAIL;
  
    *str = ptr;
@@ -4647,7 +4888,9 @@ parse_fpa_immediate (char ** str)
      {
        /* FIXME: 5 = X_PRECISION, should be #define'd where we can use it.
          Ditto for 15.  */
      {
        /* FIXME: 5 = X_PRECISION, should be #define'd where we can use it.
          Ditto for 15.  */
-      if (gen_to_words (words, 5, (long) 15) == 0)
+#define X_PRECISION 5
+#define E_PRECISION 15L
+      if (gen_to_words (words, X_PRECISION, E_PRECISION) == 0)
         {
           for (i = 0; i < NUM_FLOAT_VALS; i++)
             {
         {
           for (i = 0; i < NUM_FLOAT_VALS; i++)
             {
@@ -4683,6 +4926,41 @@ is_quarter_float (unsigned imm)
    return (imm & 0x7ffff) == 0 && ((imm & 0x7e000000) ^ bs) == 0;
  }
  
    return (imm & 0x7ffff) == 0 && ((imm & 0x7e000000) ^ bs) == 0;
  }
  
+
+/* Detect the presence of a floating point or integer zero constant,
+   i.e. #0.0 or #0.  */
+
+static bfd_boolean
+parse_ifimm_zero (char **in)
+{
+  int error_code;
+
+  if (!is_immediate_prefix (**in))
+    return FALSE;
+
+  ++*in;
+
+  /* Accept #0x0 as a synonym for #0.  */
+  if (strncmp (*in, "0x", 2) == 0)
+    {
+      int val;
+      if (parse_immediate (in, &val, 0, 0, TRUE) == FAIL)
+        return FALSE;
+      return TRUE;
+    }
+
+  error_code = atof_generic (in, ".", EXP_CHARS,
+                             &generic_floating_point_number);
+
+  if (!error_code
+      && generic_floating_point_number.sign == '+'
+      && (generic_floating_point_number.low
+          > generic_floating_point_number.leader))
+    return TRUE;
+
+  return FALSE;
+}
+
  /* Parse an 8-bit "quarter-precision" floating point number of the form:
     0baBbbbbbc defgh000 00000000 00000000.
     The zero and minus-zero cases need special handling, since they can't be
  /* Parse an 8-bit "quarter-precision" floating point number of the form:
     0baBbbbbbc defgh000 00000000 00000000.
     The zero and minus-zero cases need special handling, since they can't be
@@ -5007,7 +5285,28 @@ static struct group_reloc_table_entry group_reloc_table[] =
        BFD_RELOC_ARM_ALU_SB_G2,         /* ALU */
        BFD_RELOC_ARM_LDR_SB_G2,         /* LDR */
        BFD_RELOC_ARM_LDRS_SB_G2,                /* LDRS */
        BFD_RELOC_ARM_ALU_SB_G2,         /* ALU */
        BFD_RELOC_ARM_LDR_SB_G2,         /* LDR */
        BFD_RELOC_ARM_LDRS_SB_G2,                /* LDRS */
-      BFD_RELOC_ARM_LDC_SB_G2 }        };      /* LDC */
+      BFD_RELOC_ARM_LDC_SB_G2 },       /* LDC */
+    /* Absolute thumb alu relocations.  */
+    { "lower0_7",
+      BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC,/* ALU.  */
+      0,                               /* LDR.  */
+      0,                               /* LDRS.  */
+      0 },                             /* LDC.  */
+    { "lower8_15",
+      BFD_RELOC_ARM_THUMB_ALU_ABS_G1_NC,/* ALU.  */
+      0,                               /* LDR.  */
+      0,                               /* LDRS.  */
+      0 },                             /* LDC.  */
+    { "upper0_7",
+      BFD_RELOC_ARM_THUMB_ALU_ABS_G2_NC,/* ALU.  */
+      0,                               /* LDR.  */
+      0,                               /* LDRS.  */
+      0 },                             /* LDC.  */
+    { "upper8_15",
+      BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC,/* ALU.  */
+      0,                               /* LDR.  */
+      0,                               /* LDRS.  */
+      0 } };                           /* LDC.  */
  
  /* Given the address of a pointer pointing to the textual name of a group
     relocation as may appear in assembler source, attempt to find its details
  
  /* Given the address of a pointer pointing to the textual name of a group
     relocation as may appear in assembler source, attempt to find its details
@@ -5167,10 +5466,12 @@ parse_address_main (char **str, int i, int group_relocations,
           inst.operands[i].reg = REG_PC;
           inst.operands[i].isreg = 1;
           inst.operands[i].preind = 1;
           inst.operands[i].reg = REG_PC;
           inst.operands[i].isreg = 1;
           inst.operands[i].preind = 1;
-       }
-      /* Otherwise a load-constant pseudo op, no special treatment needed here.  */
  
  
-      if (my_get_expression (&inst.reloc.exp, &p, GE_NO_PREFIX))
+         if (my_get_expression (&inst.reloc.exp, &p, GE_OPT_PREFIX_BIG))
+           return PARSE_OPERAND_FAIL;
+       }
+      else if (parse_big_immediate (&p, i, &inst.reloc.exp,
+                                   /*allow_symbol_p=*/TRUE))
         return PARSE_OPERAND_FAIL;
  
        *str = p;
         return PARSE_OPERAND_FAIL;
  
        *str = p;
@@ -5471,7 +5772,7 @@ parse_psr (char **str, bfd_boolean lhs)
    /* PR gas/12698:  If the user has specified -march=all then m_profile will
       be TRUE, but we want to ignore it in this case as we are building for any
       CPU type, including non-m variants.  */
    /* PR gas/12698:  If the user has specified -march=all then m_profile will
       be TRUE, but we want to ignore it in this case as we are building for any
       CPU type, including non-m variants.  */
-  if (selected_cpu.core == arm_arch_any.core)
+  if (ARM_FEATURE_CORE_EQUAL (selected_cpu, arm_arch_any))
      m_profile = FALSE;
  
    /* CPSR's and SPSR's can now be lowercase.  This is just a convenience
      m_profile = FALSE;
  
    /* CPSR's and SPSR's can now be lowercase.  This is just a convenience
@@ -5784,6 +6085,16 @@ parse_cond (char **str)
    return c->value;
  }
  
    return c->value;
  }
  
+/* Record a use of the given feature.  */
+static void
+record_feature_use (const arm_feature_set *feature)
+{
+  if (thumb_mode)
+    ARM_MERGE_FEATURE_SETS (thumb_arch_used, thumb_arch_used, *feature);
+  else
+    ARM_MERGE_FEATURE_SETS (arm_arch_used, arm_arch_used, *feature);
+}
+
  /* If the given feature available in the selected CPU, mark it as used.
     Returns TRUE iff feature is available.  */
  static bfd_boolean
  /* If the given feature available in the selected CPU, mark it as used.
     Returns TRUE iff feature is available.  */
  static bfd_boolean
@@ -5795,10 +6106,7 @@ mark_feature_used (const arm_feature_set *feature)
  
    /* Add the appropriate architecture feature for the barrier option used.
       */
  
    /* Add the appropriate architecture feature for the barrier option used.
       */
-  if (thumb_mode)
-    ARM_MERGE_FEATURE_SETS (thumb_arch_used, thumb_arch_used, *feature);
-  else
-    ARM_MERGE_FEATURE_SETS (arm_arch_used, arm_arch_used, *feature);
+  record_feature_use (feature);
  
    return TRUE;
  }
  
    return TRUE;
  }
@@ -6000,7 +6308,8 @@ parse_neon_mov (char **str, int *which_operand)
              Case 10: VMOV.F32 <Sd>, #<imm>
              Case 11: VMOV.F64 <Dd>, #<imm>  */
         inst.operands[i].immisfloat = 1;
              Case 10: VMOV.F32 <Sd>, #<imm>
              Case 11: VMOV.F64 <Dd>, #<imm>  */
         inst.operands[i].immisfloat = 1;
-      else if (parse_big_immediate (&ptr, i) == SUCCESS)
+      else if (parse_big_immediate (&ptr, i, NULL, /*allow_symbol_p=*/FALSE)
+              == SUCCESS)
           /* Case 2: VMOV<c><q>.<dt> <Qd>, #<imm>
              Case 3: VMOV<c><q>.<dt> <Dd>, #<imm>  */
         ;
           /* Case 2: VMOV<c><q>.<dt> <Qd>, #<imm>
              Case 3: VMOV<c><q>.<dt> <Dd>, #<imm>  */
         ;
@@ -6153,6 +6462,7 @@ enum operand_parse_code
  
    OP_RNDQ_I0,   /* Neon D or Q reg, or immediate zero.  */
    OP_RVSD_I0,  /* VFP S or D reg, or immediate zero.  */
  
    OP_RNDQ_I0,   /* Neon D or Q reg, or immediate zero.  */
    OP_RVSD_I0,  /* VFP S or D reg, or immediate zero.  */
+  OP_RSVD_FI0, /* VFP S or D reg, or floating point immediate zero.  */
    OP_RR_RNSC,   /* ARM reg or Neon scalar.  */
    OP_RNSDQ_RNSC, /* Vector S, D or Q reg, or Neon scalar.  */
    OP_RNDQ_RNSC, /* Neon D or Q reg, or Neon scalar.  */
    OP_RR_RNSC,   /* ARM reg or Neon scalar.  */
    OP_RNSDQ_RNSC, /* Vector S, D or Q reg, or Neon scalar.  */
    OP_RNDQ_RNSC, /* Neon D or Q reg, or Neon scalar.  */
@@ -6436,6 +6746,22 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
           po_reg_or_goto (REG_TYPE_VFSD, try_imm0);
           break;
  
           po_reg_or_goto (REG_TYPE_VFSD, try_imm0);
           break;
  
+       case OP_RSVD_FI0:
+         {
+           po_reg_or_goto (REG_TYPE_VFSD, try_ifimm0);
+           break;
+           try_ifimm0:
+           if (parse_ifimm_zero (&str))
+             inst.operands[i].imm = 0;
+           else
+           {
+             inst.error
+               = _("only floating point zero is allowed as immediate value");
+             goto failure;
+           }
+         }
+         break;
+
         case OP_RR_RNSC:
           {
             po_scalar_or_goto (8, try_rr);
         case OP_RR_RNSC:
           {
             po_scalar_or_goto (8, try_rr);
@@ -6485,7 +6811,8 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
             try_immbig:
             /* There's a possibility of getting a 64-bit immediate here, so
                we need special handling.  */
             try_immbig:
             /* There's a possibility of getting a 64-bit immediate here, so
                we need special handling.  */
-           if (parse_big_immediate (&str, i) == FAIL)
+           if (parse_big_immediate (&str, i, NULL, /*allow_symbol_p=*/FALSE)
+               == FAIL)
               {
                 inst.error = _("immediate value is out of range");
                 goto failure;
               {
                 inst.error = _("immediate value is out of range");
                 goto failure;
@@ -6721,7 +7048,7 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
           val = parse_reg_list (&str);
           if (*str == '^')
             {
           val = parse_reg_list (&str);
           if (*str == '^')
             {
-             inst.operands[1].writeback = 1;
+             inst.operands[i].writeback = 1;
               str++;
             }
           break;
               str++;
             }
           break;
@@ -6937,12 +7264,32 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
  #define warn_deprecated_sp(reg)                        \
    do                                           \
      if (warn_on_deprecated && reg == REG_SP)   \
  #define warn_deprecated_sp(reg)                        \
    do                                           \
      if (warn_on_deprecated && reg == REG_SP)   \
-       as_warn (_("use of r13 is deprecated"));        \
+       as_tsktsk (_("use of r13 is deprecated"));      \
    while (0)
  
  /* Functions for operand encoding.  ARM, then Thumb.  */
  
    while (0)
  
  /* Functions for operand encoding.  ARM, then Thumb.  */
  
-#define rotate_left(v, n) (v << n | v >> (32 - n))
+#define rotate_left(v, n) (v << (n & 31) | v >> ((32 - n) & 31))
+
+/* If the current inst is scalar ARMv8.2 fp16 instruction, do special encoding.
+
+   The only binary encoding difference is the Coprocessor number.  Coprocessor
+   9 is used for half-precision calculations or conversions.  The format of the
+   instruction is the same as the equivalent Coprocessor 10 instuction that
+   exists for Single-Precision operation.  */
+
+static void
+do_scalar_fp16_v82_encode (void)
+{
+  if (inst.cond != COND_ALWAYS)
+    as_warn (_("ARMv8.2 scalar fp16 instruction cannot be conditional,"
+              " the behaviour is UNPREDICTABLE"));
+  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16),
+             _(BAD_FP16));
+
+  inst.instruction = (inst.instruction & 0xfffff0ff) | 0x900;
+  mark_feature_used (&arm_ext_fp16);
+}
  
  /* If VAL can be encoded in the immediate field of an ARM instruction,
     return the encoded form.  Otherwise, return FAIL.  */
  
  /* If VAL can be encoded in the immediate field of an ARM instruction,
     return the encoded form.  Otherwise, return FAIL.  */
@@ -6952,7 +7299,10 @@ encode_arm_immediate (unsigned int val)
  {
    unsigned int a, i;
  
  {
    unsigned int a, i;
  
-  for (i = 0; i < 32; i += 2)
+  if (val <= 0xff)
+    return val;
+
+  for (i = 2; i < 32; i += 2)
      if ((a = rotate_left (val, i)) <= 0xff)
        return a | (i << 7); /* 12-bit pack: [shift-cnt,const].  */
  
      if ((a = rotate_left (val, i)) <= 0xff)
        return a | (i << 7); /* 12-bit pack: [shift-cnt,const].  */
  
@@ -7169,7 +7519,7 @@ encode_arm_addr_mode_2 (int i, bfd_boolean is_t)
           if (warn_on_deprecated
               && !is_load
               && ARM_CPU_HAS_FEATURE (selected_cpu, arm_ext_v7))
           if (warn_on_deprecated
               && !is_load
               && ARM_CPU_HAS_FEATURE (selected_cpu, arm_ext_v7))
-           as_warn (_("use of PC in this instruction is deprecated"));
+           as_tsktsk (_("use of PC in this instruction is deprecated"));
         }
  
        if (inst.reloc.type == BFD_RELOC_UNUSED)
         }
  
        if (inst.reloc.type == BFD_RELOC_UNUSED)
@@ -7226,70 +7576,253 @@ encode_arm_addr_mode_3 (int i, bfd_boolean is_t)
      }
  }
  
      }
  }
  
-/* inst.operands[i] was set up by parse_address.  Encode it into an
-   ARM-format instruction.  Reject all forms which cannot be encoded
-   into a coprocessor load/store instruction.  If wb_ok is false,
-   reject use of writeback; if unind_ok is false, reject use of
-   unindexed addressing.  If reloc_override is not 0, use it instead
-   of BFD_ARM_CP_OFF_IMM, unless the initial relocation is a group one
-   (in which case it is preserved).  */
+/* Write immediate bits [7:0] to the following locations:
  
  
-static int
-encode_arm_cp_address (int i, int wb_ok, int unind_ok, int reloc_override)
+  |28/24|23     19|18 16|15                    4|3     0|
+  |  a  |x x x x x|b c d|x x x x x x x x x x x x|e f g h|
+
+  This function is used by VMOV/VMVN/VORR/VBIC.  */
+
+static void
+neon_write_immbits (unsigned immbits)
  {
  {
-  inst.instruction |= inst.operands[i].reg << 16;
+  inst.instruction |= immbits & 0xf;
+  inst.instruction |= ((immbits >> 4) & 0x7) << 16;
+  inst.instruction |= ((immbits >> 7) & 0x1) << (thumb_mode ? 28 : 24);
+}
  
  
-  gas_assert (!(inst.operands[i].preind && inst.operands[i].postind));
+/* Invert low-order SIZE bits of XHI:XLO.  */
  
  
-  if (!inst.operands[i].preind && !inst.operands[i].postind) /* unindexed */
-    {
-      gas_assert (!inst.operands[i].writeback);
-      if (!unind_ok)
+static void
+neon_invert_size (unsigned *xlo, unsigned *xhi, int size)
+{
+  unsigned immlo = xlo ? *xlo : 0;
+  unsigned immhi = xhi ? *xhi : 0;
+
+  switch (size)
+    {
+    case 8:
+      immlo = (~immlo) & 0xff;
+      break;
+
+    case 16:
+      immlo = (~immlo) & 0xffff;
+      break;
+
+    case 64:
+      immhi = (~immhi) & 0xffffffff;
+      /* fall through.  */
+
+    case 32:
+      immlo = (~immlo) & 0xffffffff;
+      break;
+
+    default:
+      abort ();
+    }
+
+  if (xlo)
+    *xlo = immlo;
+
+  if (xhi)
+    *xhi = immhi;
+}
+
+/* True if IMM has form 0bAAAAAAAABBBBBBBBCCCCCCCCDDDDDDDD for bits
+   A, B, C, D.  */
+
+static int
+neon_bits_same_in_bytes (unsigned imm)
+{
+  return ((imm & 0x000000ff) == 0 || (imm & 0x000000ff) == 0x000000ff)
+        && ((imm & 0x0000ff00) == 0 || (imm & 0x0000ff00) == 0x0000ff00)
+        && ((imm & 0x00ff0000) == 0 || (imm & 0x00ff0000) == 0x00ff0000)
+        && ((imm & 0xff000000) == 0 || (imm & 0xff000000) == 0xff000000);
+}
+
+/* For immediate of above form, return 0bABCD.  */
+
+static unsigned
+neon_squash_bits (unsigned imm)
+{
+  return (imm & 0x01) | ((imm & 0x0100) >> 7) | ((imm & 0x010000) >> 14)
+        | ((imm & 0x01000000) >> 21);
+}
+
+/* Compress quarter-float representation to 0b...000 abcdefgh.  */
+
+static unsigned
+neon_qfloat_bits (unsigned imm)
+{
+  return ((imm >> 19) & 0x7f) | ((imm >> 24) & 0x80);
+}
+
+/* Returns CMODE. IMMBITS [7:0] is set to bits suitable for inserting into
+   the instruction. *OP is passed as the initial value of the op field, and
+   may be set to a different value depending on the constant (i.e.
+   "MOV I64, 0bAAAAAAAABBBB..." which uses OP = 1 despite being MOV not
+   MVN).  If the immediate looks like a repeated pattern then also
+   try smaller element sizes.  */
+
+static int
+neon_cmode_for_move_imm (unsigned immlo, unsigned immhi, int float_p,
+                        unsigned *immbits, int *op, int size,
+                        enum neon_el_type type)
+{
+  /* Only permit float immediates (including 0.0/-0.0) if the operand type is
+     float.  */
+  if (type == NT_float && !float_p)
+    return FAIL;
+
+  if (type == NT_float && is_quarter_float (immlo) && immhi == 0)
+    {
+      if (size != 32 || *op == 1)
+       return FAIL;
+      *immbits = neon_qfloat_bits (immlo);
+      return 0xf;
+    }
+
+  if (size == 64)
+    {
+      if (neon_bits_same_in_bytes (immhi)
+         && neon_bits_same_in_bytes (immlo))
         {
         {
-         inst.error = _("instruction does not support unindexed addressing");
-         return FAIL;
+         if (*op == 1)
+           return FAIL;
+         *immbits = (neon_squash_bits (immhi) << 4)
+                    | neon_squash_bits (immlo);
+         *op = 1;
+         return 0xe;
         }
         }
-      inst.instruction |= inst.operands[i].imm;
-      inst.instruction |= INDEX_UP;
-      return SUCCESS;
+
+      if (immhi != immlo)
+       return FAIL;
      }
  
      }
  
-  if (inst.operands[i].preind)
-    inst.instruction |= PRE_INDEX;
+  if (size >= 32)
+    {
+      if (immlo == (immlo & 0x000000ff))
+       {
+         *immbits = immlo;
+         return 0x0;
+       }
+      else if (immlo == (immlo & 0x0000ff00))
+       {
+         *immbits = immlo >> 8;
+         return 0x2;
+       }
+      else if (immlo == (immlo & 0x00ff0000))
+       {
+         *immbits = immlo >> 16;
+         return 0x4;
+       }
+      else if (immlo == (immlo & 0xff000000))
+       {
+         *immbits = immlo >> 24;
+         return 0x6;
+       }
+      else if (immlo == ((immlo & 0x0000ff00) | 0x000000ff))
+       {
+         *immbits = (immlo >> 8) & 0xff;
+         return 0xc;
+       }
+      else if (immlo == ((immlo & 0x00ff0000) | 0x0000ffff))
+       {
+         *immbits = (immlo >> 16) & 0xff;
+         return 0xd;
+       }
  
  
-  if (inst.operands[i].writeback)
+      if ((immlo & 0xffff) != (immlo >> 16))
+       return FAIL;
+      immlo &= 0xffff;
+    }
+
+  if (size >= 16)
      {
      {
-      if (inst.operands[i].reg == REG_PC)
+      if (immlo == (immlo & 0x000000ff))
         {
         {
-         inst.error = _("pc may not be used with write-back");
-         return FAIL;
+         *immbits = immlo;
+         return 0x8;
         }
         }
-      if (!wb_ok)
+      else if (immlo == (immlo & 0x0000ff00))
         {
         {
-         inst.error = _("instruction does not support writeback");
-         return FAIL;
+         *immbits = immlo >> 8;
+         return 0xa;
         }
         }
-      inst.instruction |= WRITE_BACK;
+
+      if ((immlo & 0xff) != (immlo >> 8))
+       return FAIL;
+      immlo &= 0xff;
      }
  
      }
  
-  if (reloc_override)
-    inst.reloc.type = (bfd_reloc_code_real_type) reloc_override;
-  else if ((inst.reloc.type < BFD_RELOC_ARM_ALU_PC_G0_NC
-           || inst.reloc.type > BFD_RELOC_ARM_LDC_SB_G2)
-          && inst.reloc.type != BFD_RELOC_ARM_LDR_PC_G0)
+  if (immlo == (immlo & 0x000000ff))
      {
      {
-      if (thumb_mode)
-       inst.reloc.type = BFD_RELOC_ARM_T32_CP_OFF_IMM;
-      else
-       inst.reloc.type = BFD_RELOC_ARM_CP_OFF_IMM;
+      /* Don't allow MVN with 8-bit immediate.  */
+      if (*op == 1)
+       return FAIL;
+      *immbits = immlo;
+      return 0xe;
      }
  
      }
  
-  /* Prefer + for zero encoded value.  */
-  if (!inst.operands[i].negative)
-    inst.instruction |= INDEX_UP;
+  return FAIL;
+}
  
  
-  return SUCCESS;
+#if defined BFD_HOST_64_BIT
+/* Returns TRUE if double precision value V may be cast
+   to single precision without loss of accuracy.  */
+
+static bfd_boolean
+is_double_a_single (bfd_int64_t v)
+{
+  int exp = (int)((v >> 52) & 0x7FF);
+  bfd_int64_t mantissa = (v & (bfd_int64_t)0xFFFFFFFFFFFFFULL);
+
+  return (exp == 0 || exp == 0x7FF
+         || (exp >= 1023 - 126 && exp <= 1023 + 127))
+    && (mantissa & 0x1FFFFFFFl) == 0;
+}
+
+/* Returns a double precision value casted to single precision
+   (ignoring the least significant bits in exponent and mantissa).  */
+
+static int
+double_to_single (bfd_int64_t v)
+{
+  int sign = (int) ((v >> 63) & 1l);
+  int exp = (int) ((v >> 52) & 0x7FF);
+  bfd_int64_t mantissa = (v & (bfd_int64_t)0xFFFFFFFFFFFFFULL);
+
+  if (exp == 0x7FF)
+    exp = 0xFF;
+  else
+    {
+      exp = exp - 1023 + 127;
+      if (exp >= 0xFF)
+       {
+         /* Infinity.  */
+         exp = 0x7F;
+         mantissa = 0;
+       }
+      else if (exp < 0)
+       {
+         /* No denormalized numbers.  */
+         exp = 0;
+         mantissa = 0;
+       }
+    }
+  mantissa >>= 29;
+  return (sign << 31) | (exp << 23) | mantissa;
  }
  }
+#endif /* BFD_HOST_64_BIT */
+
+enum lit_type
+{
+  CONST_THUMB,
+  CONST_ARM,
+  CONST_VEC
+};
+
+static void do_vfp_nsyn_opcode (const char *);
  
  /* inst.reloc.exp describes an "=expr" load pseudo-operation.
     Determine whether it can be performed with a move instruction; if
  
  /* inst.reloc.exp describes an "=expr" load pseudo-operation.
     Determine whether it can be performed with a move instruction; if
@@ -7301,9 +7834,11 @@ encode_arm_cp_address (int i, int wb_ok, int unind_ok, int reloc_override)
     inst.operands[i] describes the destination register.         */
  
  static bfd_boolean
     inst.operands[i] describes the destination register.         */
  
  static bfd_boolean
-move_or_literal_pool (int i, bfd_boolean thumb_p, bfd_boolean mode_3)
+move_or_literal_pool (int i, enum lit_type t, bfd_boolean mode_3)
  {
    unsigned long tbit;
  {
    unsigned long tbit;
+  bfd_boolean thumb_p = (t == CONST_THUMB);
+  bfd_boolean arm_p   = (t == CONST_ARM);
  
    if (thumb_p)
      tbit = (inst.instruction > 0xffff) ? THUMB2_LOAD_BIT : THUMB_LOAD_BIT;
  
    if (thumb_p)
      tbit = (inst.instruction > 0xffff) ? THUMB2_LOAD_BIT : THUMB_LOAD_BIT;
@@ -7315,52 +7850,217 @@ move_or_literal_pool (int i, bfd_boolean thumb_p, bfd_boolean mode_3)
        inst.error = _("invalid pseudo operation");
        return TRUE;
      }
        inst.error = _("invalid pseudo operation");
        return TRUE;
      }
-  if (inst.reloc.exp.X_op != O_constant && inst.reloc.exp.X_op != O_symbol)
+
+  if (inst.reloc.exp.X_op != O_constant
+      && inst.reloc.exp.X_op != O_symbol
+      && inst.reloc.exp.X_op != O_big)
      {
        inst.error = _("constant expression expected");
        return TRUE;
      }
      {
        inst.error = _("constant expression expected");
        return TRUE;
      }
-  if (inst.reloc.exp.X_op == O_constant)
+
+  if (inst.reloc.exp.X_op == O_constant
+      || inst.reloc.exp.X_op == O_big)
      {
      {
-      if (thumb_p)
+#if defined BFD_HOST_64_BIT
+      bfd_int64_t v;
+#else
+      offsetT v;
+#endif
+      if (inst.reloc.exp.X_op == O_big)
         {
         {
-         if (!unified_syntax && (inst.reloc.exp.X_add_number & ~0xFF) == 0)
+         LITTLENUM_TYPE w[X_PRECISION];
+         LITTLENUM_TYPE * l;
+
+         if (inst.reloc.exp.X_add_number == -1)
             {
             {
-             /* This can be done with a mov(1) instruction.  */
-             inst.instruction  = T_OPCODE_MOV_I8 | (inst.operands[i].reg << 8);
-             inst.instruction |= inst.reloc.exp.X_add_number;
-             return TRUE;
+             gen_to_words (w, X_PRECISION, E_PRECISION);
+             l = w;
+             /* FIXME: Should we check words w[2..5] ?  */
             }
             }
+         else
+           l = generic_bignum;
+
+#if defined BFD_HOST_64_BIT
+         v =
+           ((((((((bfd_int64_t) l[3] & LITTLENUM_MASK)
+                 << LITTLENUM_NUMBER_OF_BITS)
+                | ((bfd_int64_t) l[2] & LITTLENUM_MASK))
+               << LITTLENUM_NUMBER_OF_BITS)
+              | ((bfd_int64_t) l[1] & LITTLENUM_MASK))
+             << LITTLENUM_NUMBER_OF_BITS)
+            | ((bfd_int64_t) l[0] & LITTLENUM_MASK));
+#else
+         v = ((l[1] & LITTLENUM_MASK) << LITTLENUM_NUMBER_OF_BITS)
+           |  (l[0] & LITTLENUM_MASK);
+#endif
         }
        else
         }
        else
+       v = inst.reloc.exp.X_add_number;
+
+      if (!inst.operands[i].issingle)
         {
         {
-         int value = encode_arm_immediate (inst.reloc.exp.X_add_number);
-         if (value != FAIL)
+         if (thumb_p)
             {
             {
-             /* This can be done with a mov instruction.  */
-             inst.instruction &= LITERAL_MASK;
-             inst.instruction |= INST_IMMEDIATE | (OPCODE_MOV << DATA_OP_SHIFT);
-             inst.instruction |= value & 0xfff;
-             return TRUE;
+             /* This can be encoded only for a low register.  */
+             if ((v & ~0xFF) == 0 && (inst.operands[i].reg < 8))
+               {
+                 /* This can be done with a mov(1) instruction.  */
+                 inst.instruction = T_OPCODE_MOV_I8 | (inst.operands[i].reg << 8);
+                 inst.instruction |= v;
+                 return TRUE;
+               }
+
+             if (ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v6t2)
+                 || ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v6t2_v8m))
+               {
+                 /* Check if on thumb2 it can be done with a mov.w, mvn or
+                    movw instruction.  */
+                 unsigned int newimm;
+                 bfd_boolean isNegated;
+
+                 newimm = encode_thumb32_immediate (v);
+                 if (newimm != (unsigned int) FAIL)
+                   isNegated = FALSE;
+                 else
+                   {
+                     newimm = encode_thumb32_immediate (~v);
+                     if (newimm != (unsigned int) FAIL)
+                       isNegated = TRUE;
+                   }
+
+                 /* The number can be loaded with a mov.w or mvn
+                    instruction.  */
+                 if (newimm != (unsigned int) FAIL
+                     && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v6t2))
+                   {
+                     inst.instruction = (0xf04f0000  /*  MOV.W.  */
+                                         | (inst.operands[i].reg << 8));
+                     /* Change to MOVN.  */
+                     inst.instruction |= (isNegated ? 0x200000 : 0);
+                     inst.instruction |= (newimm & 0x800) << 15;
+                     inst.instruction |= (newimm & 0x700) << 4;
+                     inst.instruction |= (newimm & 0x0ff);
+                     return TRUE;
+                   }
+                 /* The number can be loaded with a movw instruction.  */
+                 else if ((v & ~0xFFFF) == 0
+                          && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v6t2_v8m))
+                   {
+                     int imm = v & 0xFFFF;
+
+                     inst.instruction = 0xf2400000;  /* MOVW.  */
+                     inst.instruction |= (inst.operands[i].reg << 8);
+                     inst.instruction |= (imm & 0xf000) << 4;
+                     inst.instruction |= (imm & 0x0800) << 15;
+                     inst.instruction |= (imm & 0x0700) << 4;
+                     inst.instruction |= (imm & 0x00ff);
+                     return TRUE;
+                   }
+               }
             }
             }
+         else if (arm_p)
+           {
+             int value = encode_arm_immediate (v);
+
+             if (value != FAIL)
+               {
+                 /* This can be done with a mov instruction.  */
+                 inst.instruction &= LITERAL_MASK;
+                 inst.instruction |= INST_IMMEDIATE | (OPCODE_MOV << DATA_OP_SHIFT);
+                 inst.instruction |= value & 0xfff;
+                 return TRUE;
+               }
+
+             value = encode_arm_immediate (~ v);
+             if (value != FAIL)
+               {
+                 /* This can be done with a mvn instruction.  */
+                 inst.instruction &= LITERAL_MASK;
+                 inst.instruction |= INST_IMMEDIATE | (OPCODE_MVN << DATA_OP_SHIFT);
+                 inst.instruction |= value & 0xfff;
+                 return TRUE;
+               }
+           }
+         else if (t == CONST_VEC)
+           {
+             int op = 0;
+             unsigned immbits = 0;
+             unsigned immlo = inst.operands[1].imm;
+             unsigned immhi = inst.operands[1].regisimm
+               ? inst.operands[1].reg
+               : inst.reloc.exp.X_unsigned
+               ? 0
+               : ((bfd_int64_t)((int) immlo)) >> 32;
+             int cmode = neon_cmode_for_move_imm (immlo, immhi, FALSE, &immbits,
+                                                  &op, 64, NT_invtype);
+
+             if (cmode == FAIL)
+               {
+                 neon_invert_size (&immlo, &immhi, 64);
+                 op = !op;
+                 cmode = neon_cmode_for_move_imm (immlo, immhi, FALSE, &immbits,
+                                                  &op, 64, NT_invtype);
+               }
+
+             if (cmode != FAIL)
+               {
+                 inst.instruction = (inst.instruction & VLDR_VMOV_SAME)
+                   | (1 << 23)
+                   | (cmode << 8)
+                   | (op << 5)
+                   | (1 << 4);
+
+                 /* Fill other bits in vmov encoding for both thumb and arm.  */
+                 if (thumb_mode)
+                   inst.instruction |= (0x7U << 29) | (0xF << 24);
+                 else
+                   inst.instruction |= (0xFU << 28) | (0x1 << 25);
+                 neon_write_immbits (immbits);
+                 return TRUE;
+               }
+           }
+       }
  
  
-         value = encode_arm_immediate (~inst.reloc.exp.X_add_number);
-         if (value != FAIL)
+      if (t == CONST_VEC)
+       {
+         /* Check if vldr Rx, =constant could be optimized to vmov Rx, #constant.  */
+         if (inst.operands[i].issingle
+             && is_quarter_float (inst.operands[1].imm)
+             && ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v3xd))
             {
             {
-             /* This can be done with a mvn instruction.  */
-             inst.instruction &= LITERAL_MASK;
-             inst.instruction |= INST_IMMEDIATE | (OPCODE_MVN << DATA_OP_SHIFT);
-             inst.instruction |= value & 0xfff;
+             inst.operands[1].imm =
+               neon_qfloat_bits (v);
+             do_vfp_nsyn_opcode ("fconsts");
               return TRUE;
             }
               return TRUE;
             }
+
+         /* If our host does not support a 64-bit type then we cannot perform
+            the following optimization.  This mean that there will be a
+            discrepancy between the output produced by an assembler built for
+            a 32-bit-only host and the output produced from a 64-bit host, but
+            this cannot be helped.  */
+#if defined BFD_HOST_64_BIT
+         else if (!inst.operands[1].issingle
+                  && ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v3))
+           {
+             if (is_double_a_single (v)
+                 && is_quarter_float (double_to_single (v)))
+               {
+                 inst.operands[1].imm =
+                   neon_qfloat_bits (double_to_single (v));
+                 do_vfp_nsyn_opcode ("fconstd");
+                 return TRUE;
+               }
+           }
+#endif
         }
      }
  
         }
      }
  
-  if (add_to_lit_pool () == FAIL)
-    {
-      inst.error = _("literal pool insertion failed");
-      return TRUE;
-    }
+  if (add_to_lit_pool ((!inst.operands[i].isvec
+                       || inst.operands[i].issingle) ? 4 : 8) == FAIL)
+    return TRUE;
+
    inst.operands[1].reg = REG_PC;
    inst.operands[1].isreg = 1;
    inst.operands[1].preind = 1;
    inst.operands[1].reg = REG_PC;
    inst.operands[1].isreg = 1;
    inst.operands[1].preind = 1;
@@ -7373,6 +8073,83 @@ move_or_literal_pool (int i, bfd_boolean thumb_p, bfd_boolean mode_3)
    return FALSE;
  }
  
    return FALSE;
  }
  
+/* inst.operands[i] was set up by parse_address.  Encode it into an
+   ARM-format instruction.  Reject all forms which cannot be encoded
+   into a coprocessor load/store instruction.  If wb_ok is false,
+   reject use of writeback; if unind_ok is false, reject use of
+   unindexed addressing.  If reloc_override is not 0, use it instead
+   of BFD_ARM_CP_OFF_IMM, unless the initial relocation is a group one
+   (in which case it is preserved).  */
+
+static int
+encode_arm_cp_address (int i, int wb_ok, int unind_ok, int reloc_override)
+{
+  if (!inst.operands[i].isreg)
+    {
+      /* PR 18256 */
+      if (! inst.operands[0].isvec)
+       {
+         inst.error = _("invalid co-processor operand");
+         return FAIL;
+       }
+      if (move_or_literal_pool (0, CONST_VEC, /*mode_3=*/FALSE))
+       return SUCCESS;
+    }
+
+  inst.instruction |= inst.operands[i].reg << 16;
+
+  gas_assert (!(inst.operands[i].preind && inst.operands[i].postind));
+
+  if (!inst.operands[i].preind && !inst.operands[i].postind) /* unindexed */
+    {
+      gas_assert (!inst.operands[i].writeback);
+      if (!unind_ok)
+       {
+         inst.error = _("instruction does not support unindexed addressing");
+         return FAIL;
+       }
+      inst.instruction |= inst.operands[i].imm;
+      inst.instruction |= INDEX_UP;
+      return SUCCESS;
+    }
+
+  if (inst.operands[i].preind)
+    inst.instruction |= PRE_INDEX;
+
+  if (inst.operands[i].writeback)
+    {
+      if (inst.operands[i].reg == REG_PC)
+       {
+         inst.error = _("pc may not be used with write-back");
+         return FAIL;
+       }
+      if (!wb_ok)
+       {
+         inst.error = _("instruction does not support writeback");
+         return FAIL;
+       }
+      inst.instruction |= WRITE_BACK;
+    }
+
+  if (reloc_override)
+    inst.reloc.type = (bfd_reloc_code_real_type) reloc_override;
+  else if ((inst.reloc.type < BFD_RELOC_ARM_ALU_PC_G0_NC
+           || inst.reloc.type > BFD_RELOC_ARM_LDC_SB_G2)
+          && inst.reloc.type != BFD_RELOC_ARM_LDR_PC_G0)
+    {
+      if (thumb_mode)
+       inst.reloc.type = BFD_RELOC_ARM_T32_CP_OFF_IMM;
+      else
+       inst.reloc.type = BFD_RELOC_ARM_CP_OFF_IMM;
+    }
+
+  /* Prefer + for zero encoded value.  */
+  if (!inst.operands[i].negative)
+    inst.instruction |= INDEX_UP;
+
+  return SUCCESS;
+}
+
  /* Functions for instruction encoding, sorted by sub-architecture.
     First some generics; their names are taken from the conventional
     bit positions for register arguments in ARM format instructions.  */
  /* Functions for instruction encoding, sorted by sub-architecture.
     First some generics; their names are taken from the conventional
     bit positions for register arguments in ARM format instructions.  */
@@ -7416,12 +8193,19 @@ do_rn_rd (void)
    inst.instruction |= inst.operands[1].reg << 12;
  }
  
    inst.instruction |= inst.operands[1].reg << 12;
  }
  
+static void
+do_tt (void)
+{
+  inst.instruction |= inst.operands[0].reg << 8;
+  inst.instruction |= inst.operands[1].reg << 16;
+}
+
  static bfd_boolean
  check_obsolete (const arm_feature_set *feature, const char *msg)
  {
    if (ARM_CPU_IS_ANY (cpu_variant))
      {
  static bfd_boolean
  check_obsolete (const arm_feature_set *feature, const char *msg)
  {
    if (ARM_CPU_IS_ANY (cpu_variant))
      {
-      as_warn ("%s", msg);
+      as_tsktsk ("%s", msg);
        return TRUE;
      }
    else if (ARM_CPU_HAS_FEATURE (cpu_variant, *feature))
        return TRUE;
      }
    else if (ARM_CPU_HAS_FEATURE (cpu_variant, *feature))
@@ -7449,7 +8233,7 @@ do_rd_rm_rn (void)
                            _("swp{b} use is obsoleted for ARMv8 and later"))
           && warn_on_deprecated
           && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v6))
                            _("swp{b} use is obsoleted for ARMv8 and later"))
           && warn_on_deprecated
           && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v6))
-       as_warn (_("swp{b} use is deprecated for ARMv6 and ARMv7"));
+       as_tsktsk (_("swp{b} use is deprecated for ARMv6 and ARMv7"));
      }
  
    inst.instruction |= inst.operands[0].reg << 12;
      }
  
    inst.instruction |= inst.operands[0].reg << 12;
@@ -7767,19 +8551,19 @@ struct deprecated_coproc_regs_s
  static struct deprecated_coproc_regs_s deprecated_coproc_regs[] =
  {
      {15, 0, 7, 10, 5,                                  /* CP15DMB.  */
  static struct deprecated_coproc_regs_s deprecated_coproc_regs[] =
  {
      {15, 0, 7, 10, 5,                                  /* CP15DMB.  */
-     ARM_FEATURE (ARM_EXT_V8, 0), ARM_FEATURE (0, 0),
+     ARM_FEATURE_CORE_LOW (ARM_EXT_V8), ARM_ARCH_NONE,
       DEPR_ACCESS_V8, NULL},
      {15, 0, 7, 10, 4,                                  /* CP15DSB.  */
       DEPR_ACCESS_V8, NULL},
      {15, 0, 7, 10, 4,                                  /* CP15DSB.  */
-     ARM_FEATURE (ARM_EXT_V8, 0), ARM_FEATURE (0, 0),
+     ARM_FEATURE_CORE_LOW (ARM_EXT_V8), ARM_ARCH_NONE,
       DEPR_ACCESS_V8, NULL},
      {15, 0, 7,  5, 4,                                  /* CP15ISB.  */
       DEPR_ACCESS_V8, NULL},
      {15, 0, 7,  5, 4,                                  /* CP15ISB.  */
-     ARM_FEATURE (ARM_EXT_V8, 0), ARM_FEATURE (0, 0),
+     ARM_FEATURE_CORE_LOW (ARM_EXT_V8), ARM_ARCH_NONE,
       DEPR_ACCESS_V8, NULL},
      {14, 6, 1,  0, 0,                                  /* TEEHBR.  */
       DEPR_ACCESS_V8, NULL},
      {14, 6, 1,  0, 0,                                  /* TEEHBR.  */
-     ARM_FEATURE (ARM_EXT_V8, 0), ARM_FEATURE (0, 0),
+     ARM_FEATURE_CORE_LOW (ARM_EXT_V8), ARM_ARCH_NONE,
       DEPR_ACCESS_V8, NULL},
      {14, 6, 0,  0, 0,                                  /* TEECR.  */
       DEPR_ACCESS_V8, NULL},
      {14, 6, 0,  0, 0,                                  /* TEECR.  */
-     ARM_FEATURE (ARM_EXT_V8, 0), ARM_FEATURE (0, 0),
+     ARM_FEATURE_CORE_LOW (ARM_EXT_V8), ARM_ARCH_NONE,
       DEPR_ACCESS_V8, NULL},
  };
  
       DEPR_ACCESS_V8, NULL},
  };
  
@@ -7826,7 +8610,7 @@ do_co_reg (void)
             if (! ARM_CPU_IS_ANY (cpu_variant)
                 && warn_on_deprecated
                 && ARM_CPU_HAS_FEATURE (cpu_variant, r->deprecated))
             if (! ARM_CPU_IS_ANY (cpu_variant)
                 && warn_on_deprecated
                 && ARM_CPU_HAS_FEATURE (cpu_variant, r->deprecated))
-             as_warn ("%s", r->dep_msg);
+             as_tsktsk ("%s", r->dep_msg);
           }
        }
  
           }
        }
  
@@ -8103,7 +8887,7 @@ do_ldst (void)
  {
    inst.instruction |= inst.operands[0].reg << 12;
    if (!inst.operands[1].isreg)
  {
    inst.instruction |= inst.operands[0].reg << 12;
    if (!inst.operands[1].isreg)
-    if (move_or_literal_pool (0, /*thumb_p=*/FALSE, /*mode_3=*/FALSE))
+    if (move_or_literal_pool (0, CONST_ARM, /*mode_3=*/FALSE))
        return;
    encode_arm_addr_mode_2 (1, /*is_t=*/FALSE);
    check_ldr_r15_aligned ();
        return;
    encode_arm_addr_mode_2 (1, /*is_t=*/FALSE);
    check_ldr_r15_aligned ();
@@ -8136,7 +8920,7 @@ do_ldstv4 (void)
    constraint (inst.operands[0].reg == REG_PC, BAD_PC);
    inst.instruction |= inst.operands[0].reg << 12;
    if (!inst.operands[1].isreg)
    constraint (inst.operands[0].reg == REG_PC, BAD_PC);
    inst.instruction |= inst.operands[0].reg << 12;
    if (!inst.operands[1].isreg)
-    if (move_or_literal_pool (0, /*thumb_p=*/FALSE, /*mode_3=*/TRUE))
+    if (move_or_literal_pool (0, CONST_ARM, /*mode_3=*/TRUE))
        return;
    encode_arm_addr_mode_3 (1, /*is_t=*/FALSE);
  }
        return;
    encode_arm_addr_mode_3 (1, /*is_t=*/FALSE);
  }
@@ -8214,8 +8998,6 @@ do_mov16 (void)
      }
  }
  
      }
  }
  
-static void do_vfp_nsyn_opcode (const char *);
-
  static int
  do_vfp_nsyn_mrs (void)
  {
  static int
  do_vfp_nsyn_mrs (void)
  {
@@ -8472,6 +9254,8 @@ do_pli (void)
  static void
  do_push_pop (void)
  {
  static void
  do_push_pop (void)
  {
+  constraint (inst.operands[0].writeback,
+             _("push/pop do not support {reglist}^"));
    inst.operands[1] = inst.operands[0];
    memset (&inst.operands[0], 0, sizeof inst.operands[0]);
    inst.operands[0].isreg = 1;
    inst.operands[1] = inst.operands[0];
    memset (&inst.operands[0], 0, sizeof inst.operands[0]);
    inst.operands[0].isreg = 1;
@@ -8549,7 +9333,7 @@ do_setend (void)
  {
    if (warn_on_deprecated
        && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8))
  {
    if (warn_on_deprecated
        && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8))
-      as_warn (_("setend use is deprecated for ARMv8"));
+      as_tsktsk (_("setend use is deprecated for ARMv8"));
  
    if (inst.operands[0].imm)
      inst.instruction |= 0x200;
  
    if (inst.operands[0].imm)
      inst.instruction |= 0x200;
@@ -8597,6 +9381,24 @@ do_swi (void)
    inst.reloc.pc_rel = 0;
  }
  
    inst.reloc.pc_rel = 0;
  }
  
+static void
+do_setpan (void)
+{
+  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_pan),
+             _("selected processor does not support SETPAN instruction"));
+
+  inst.instruction |= ((inst.operands[0].imm & 1) << 9);
+}
+
+static void
+do_t_setpan (void)
+{
+  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_pan),
+             _("selected processor does not support SETPAN instruction"));
+
+  inst.instruction |= (inst.operands[0].imm << 3);
+}
+
  /* ARM V5E (El Segundo) signed-multiply-accumulate (argument parse)
     SMLAxy{cond} Rd,Rm,Rs,Rn
     SMLAWy{cond} Rd,Rm,Rs,Rn
  /* ARM V5E (El Segundo) signed-multiply-accumulate (argument parse)
     SMLAxy{cond} Rd,Rm,Rs,Rn
     SMLAWy{cond} Rd,Rm,Rs,Rn
@@ -9192,7 +9994,7 @@ do_iwmmxt_wldstd (void)
        && inst.operands[1].immisreg)
      {
        inst.instruction &= ~0x1a000ff;
        && inst.operands[1].immisreg)
      {
        inst.instruction &= ~0x1a000ff;
-      inst.instruction |= (0xf << 28);
+      inst.instruction |= (0xfU << 28);
        if (inst.operands[1].preind)
         inst.instruction |= PRE_INDEX;
        if (!inst.operands[1].negative)
        if (inst.operands[1].preind)
         inst.instruction |= PRE_INDEX;
        if (!inst.operands[1].negative)
@@ -9271,7 +10073,7 @@ do_iwmmxt_wrwrwr_or_imm5 (void)
        }
      /* Map 32 -> 0, etc.  */
      inst.operands[2].imm &= 0x1f;
        }
      /* Map 32 -> 0, etc.  */
      inst.operands[2].imm &= 0x1f;
-    inst.instruction |= (0xf << 28) | ((inst.operands[2].imm & 0x10) << 4) | (inst.operands[2].imm & 0xf);
+    inst.instruction |= (0xfU << 28) | ((inst.operands[2].imm & 0x10) << 4) | (inst.operands[2].imm & 0xf);
    }
  }
  \f
    }
  }
  \f
@@ -9565,7 +10367,8 @@ encode_thumb32_addr_mode (int i, bfd_boolean is_t, bfd_boolean is_d)
    X(_wfe,   bf20, f3af8002),                   \
    X(_wfi,   bf30, f3af8003),                   \
    X(_sev,   bf40, f3af8004),                    \
    X(_wfe,   bf20, f3af8002),                   \
    X(_wfi,   bf30, f3af8003),                   \
    X(_sev,   bf40, f3af8004),                    \
-  X(_sevl,  bf50, f3af8005)
+  X(_sevl,  bf50, f3af8005),                   \
+  X(_udf,   de00, f7f0a000)
  
  /* To catch errors in encoding functions, the codes are all offset by
     0xF800, putting them in one of the 32-bit prefix ranges, ergo undefined
  
  /* To catch errors in encoding functions, the codes are all offset by
     0xF800, putting them in one of the 32-bit prefix ranges, ergo undefined
@@ -9667,7 +10470,9 @@ do_t_add_sub (void)
                 {
                   inst.instruction = THUMB_OP16(opcode);
                   inst.instruction |= (Rd << 4) | Rs;
                 {
                   inst.instruction = THUMB_OP16(opcode);
                   inst.instruction |= (Rd << 4) | Rs;
-                 inst.reloc.type = BFD_RELOC_ARM_THUMB_ADD;
+                 if (inst.reloc.type < BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC
+                     || inst.reloc.type > BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC)
+                   inst.reloc.type = BFD_RELOC_ARM_THUMB_ADD;
                   if (inst.size_req != 2)
                     inst.relax = opcode;
                 }
                   if (inst.size_req != 2)
                     inst.relax = opcode;
                 }
@@ -10175,6 +10980,10 @@ do_t_branch (void)
         reloc = BFD_RELOC_THUMB_PCREL_BRANCH25;
        else
         {
         reloc = BFD_RELOC_THUMB_PCREL_BRANCH25;
        else
         {
+         constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v6t2),
+                     _("selected architecture does not support "
+                       "wide conditional branch instruction"));
+
           gas_assert (cond != 0xF);
           inst.instruction |= cond << 22;
           reloc = BFD_RELOC_THUMB_PCREL_BRANCH20;
           gas_assert (cond != 0xF);
           inst.instruction |= cond << 22;
           reloc = BFD_RELOC_THUMB_PCREL_BRANCH20;
@@ -10679,7 +11488,7 @@ do_t_ldst (void)
         {
           if (opcode <= 0xffff)
             inst.instruction = THUMB_OP32 (opcode);
         {
           if (opcode <= 0xffff)
             inst.instruction = THUMB_OP32 (opcode);
-         if (move_or_literal_pool (0, /*thumb_p=*/TRUE, /*mode_3=*/FALSE))
+         if (move_or_literal_pool (0, CONST_THUMB, /*mode_3=*/FALSE))
             return;
         }
        if (inst.operands[1].isreg
             return;
         }
        if (inst.operands[1].isreg
@@ -10785,7 +11594,7 @@ do_t_ldst (void)
  
    inst.instruction = THUMB_OP16 (inst.instruction);
    if (!inst.operands[1].isreg)
  
    inst.instruction = THUMB_OP16 (inst.instruction);
    if (!inst.operands[1].isreg)
-    if (move_or_literal_pool (0, /*thumb_p=*/TRUE, /*mode_3=*/FALSE))
+    if (move_or_literal_pool (0, CONST_THUMB, /*mode_3=*/FALSE))
        return;
  
    constraint (!inst.operands[1].preind
        return;
  
    constraint (!inst.operands[1].preind
@@ -10991,7 +11800,7 @@ do_t_mov_cmp (void)
                   if ((Rn == REG_SP || Rn == REG_PC)
                       && (Rm == REG_SP || Rm == REG_PC))
                     {
                   if ((Rn == REG_SP || Rn == REG_PC)
                       && (Rm == REG_SP || Rm == REG_PC))
                     {
-                     as_warn (_("Use of r%u as a source register is "
+                     as_tsktsk (_("Use of r%u as a source register is "
                                  "deprecated when r%u is the destination "
                                  "register."), Rm, Rn);
                     }
                                  "deprecated when r%u is the destination "
                                  "register."), Rm, Rn);
                     }
@@ -11018,9 +11827,13 @@ do_t_mov_cmp (void)
               inst.instruction = THUMB_OP16 (opcode);
               inst.instruction |= Rn << 8;
               if (inst.size_req == 2)
               inst.instruction = THUMB_OP16 (opcode);
               inst.instruction |= Rn << 8;
               if (inst.size_req == 2)
-               inst.reloc.type = BFD_RELOC_ARM_THUMB_IMM;
+               {
+                 if (inst.reloc.type < BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC
+                     || inst.reloc.type > BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC)
+                   inst.reloc.type = BFD_RELOC_ARM_THUMB_IMM;
+               }
               else
               else
-               inst.relax = opcode;
+                 inst.relax = opcode;
             }
           else
             {
             }
           else
             {
@@ -11268,7 +12081,8 @@ do_t_mvn_tst (void)
           || inst.operands[1].shifted
           || Rn > 7 || Rm > 7)
         narrow = FALSE;
           || inst.operands[1].shifted
           || Rn > 7 || Rm > 7)
         narrow = FALSE;
-      else if (inst.instruction == T_MNEM_cmn)
+      else if (inst.instruction == T_MNEM_cmn
+              || inst.instruction == T_MNEM_tst)
         narrow = TRUE;
        else if (THUMB_SETS_FLAGS (inst.instruction))
         narrow = !in_it_block ();
         narrow = TRUE;
        else if (THUMB_SETS_FLAGS (inst.instruction))
         narrow = !in_it_block ();
@@ -11352,7 +12166,8 @@ do_t_mrs (void)
           /* PR gas/12698:  The constraint is only applied for m_profile.
              If the user has specified -march=all, we want to ignore it as
              we are building for any CPU type, including non-m variants.  */
           /* PR gas/12698:  The constraint is only applied for m_profile.
              If the user has specified -march=all, we want to ignore it as
              we are building for any CPU type, including non-m variants.  */
-         bfd_boolean m_profile = selected_cpu.core != arm_arch_any.core;
+         bfd_boolean m_profile =
+           !ARM_FEATURE_CORE_EQUAL (selected_cpu, arm_arch_any);
           constraint ((flags != 0) && m_profile, _("selected processor does "
                                                    "not support requested special purpose register"));
         }
           constraint ((flags != 0) && m_profile, _("selected processor does "
                                                    "not support requested special purpose register"));
         }
@@ -11392,7 +12207,8 @@ do_t_msr (void)
        /* PR gas/12698:  The constraint is only applied for m_profile.
          If the user has specified -march=all, we want to ignore it as
          we are building for any CPU type, including non-m variants.  */
        /* PR gas/12698:  The constraint is only applied for m_profile.
          If the user has specified -march=all, we want to ignore it as
          we are building for any CPU type, including non-m variants.  */
-      bfd_boolean m_profile = selected_cpu.core != arm_arch_any.core;
+      bfd_boolean m_profile =
+       !ARM_FEATURE_CORE_EQUAL (selected_cpu, arm_arch_any);
        constraint (((ARM_CPU_HAS_FEATURE (selected_cpu, arm_ext_v6_dsp)
            && (bits & ~(PSR_s | PSR_f)) != 0)
           || (!ARM_CPU_HAS_FEATURE (selected_cpu, arm_ext_v6_dsp)
        constraint (((ARM_CPU_HAS_FEATURE (selected_cpu, arm_ext_v6_dsp)
            && (bits & ~(PSR_s | PSR_f)) != 0)
           || (!ARM_CPU_HAS_FEATURE (selected_cpu, arm_ext_v6_dsp)
@@ -11672,12 +12488,11 @@ do_t_push_pop (void)
               _("expression too complex"));
  
    mask = inst.operands[0].imm;
               _("expression too complex"));
  
    mask = inst.operands[0].imm;
-  if ((mask & ~0xff) == 0)
+  if (inst.size_req != 4 && (mask & ~0xff) == 0)
      inst.instruction = THUMB_OP16 (inst.instruction) | mask;
      inst.instruction = THUMB_OP16 (inst.instruction) | mask;
-  else if ((inst.instruction == T_MNEM_push
-           && (mask & ~0xff) == 1 << REG_LR)
-          || (inst.instruction == T_MNEM_pop
-              && (mask & ~0xff) == 1 << REG_PC))
+  else if (inst.size_req != 4
+          && (mask & ~0xff) == (1 << (inst.instruction == T_MNEM_push
+                                      ? REG_LR : REG_PC)))
      {
        inst.instruction = THUMB_OP16 (inst.instruction);
        inst.instruction |= THUMB_PP_PC_LR;
      {
        inst.instruction = THUMB_OP16 (inst.instruction);
        inst.instruction |= THUMB_PP_PC_LR;
@@ -11815,7 +12630,7 @@ do_t_setend (void)
  {
    if (warn_on_deprecated
        && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8))
  {
    if (warn_on_deprecated
        && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8))
-      as_warn (_("setend use is deprecated for ARMv8"));
+      as_tsktsk (_("setend use is deprecated for ARMv8"));
  
    set_it_insn_type (OUTSIDE_IT_INSN);
    if (inst.operands[0].imm)
  
    set_it_insn_type (OUTSIDE_IT_INSN);
    if (inst.operands[0].imm)
@@ -12220,6 +13035,30 @@ do_t_tb (void)
    inst.instruction |= (Rn << 16) | Rm;
  }
  
    inst.instruction |= (Rn << 16) | Rm;
  }
  
+static void
+do_t_udf (void)
+{
+  if (!inst.operands[0].present)
+    inst.operands[0].imm = 0;
+
+  if ((unsigned int) inst.operands[0].imm > 255 || inst.size_req == 4)
+    {
+      constraint (inst.size_req == 2,
+                  _("immediate value out of range"));
+      inst.instruction = THUMB_OP32 (inst.instruction);
+      inst.instruction |= (inst.operands[0].imm & 0xf000u) << 4;
+      inst.instruction |= (inst.operands[0].imm & 0x0fffu) << 0;
+    }
+  else
+    {
+      inst.instruction = THUMB_OP16 (inst.instruction);
+      inst.instruction |= inst.operands[0].imm;
+    }
+
+  set_it_insn_type (NEUTRAL_IT_INSN);
+}
+
+
  static void
  do_t_usat (void)
  {
  static void
  do_t_usat (void)
  {
@@ -12286,6 +13125,8 @@ struct neon_tab_entry
    X(vqdmull,   0x0800d00, N_INV,     0x0800b40),       \
    X(vqdmulh,    0x0000b00, N_INV,     0x0800c40),      \
    X(vqrdmulh,   0x1000b00, N_INV,     0x0800d40),      \
    X(vqdmull,   0x0800d00, N_INV,     0x0800b40),       \
    X(vqdmulh,    0x0000b00, N_INV,     0x0800c40),      \
    X(vqrdmulh,   0x1000b00, N_INV,     0x0800d40),      \
+  X(vqrdmlah,   0x3000b10, N_INV,     0x0800e40),      \
+  X(vqrdmlsh,   0x3000c10, N_INV,     0x0800f40),      \
    X(vshl,      0x0000400, N_INV,     0x0800510),       \
    X(vqshl,     0x0000410, N_INV,     0x0800710),       \
    X(vand,      0x0000110, N_INV,     0x0800030),       \
    X(vshl,      0x0000400, N_INV,     0x0800510),       \
    X(vqshl,     0x0000410, N_INV,     0x0800710),       \
    X(vand,      0x0000110, N_INV,     0x0800030),       \
@@ -12439,7 +13280,19 @@ NEON_ENC_TAB
    X(2, (S, R), SINGLE),                        \
    X(2, (R, S), SINGLE),                        \
    X(2, (F, R), SINGLE),                        \
    X(2, (S, R), SINGLE),                        \
    X(2, (R, S), SINGLE),                        \
    X(2, (F, R), SINGLE),                        \
-  X(2, (R, F), SINGLE)
+  X(2, (R, F), SINGLE),                        \
+/* Half float shape supported so far.  */\
+  X (2, (H, D), MIXED),                        \
+  X (2, (D, H), MIXED),                        \
+  X (2, (H, F), MIXED),                        \
+  X (2, (F, H), MIXED),                        \
+  X (2, (H, H), HALF),                 \
+  X (2, (H, R), HALF),                 \
+  X (2, (R, H), HALF),                 \
+  X (2, (H, I), HALF),                 \
+  X (3, (H, H, H), HALF),              \
+  X (3, (H, F, I), MIXED),             \
+  X (3, (F, H, I), MIXED)
  
  #define S2(A,B)                NS_##A##B
  #define S3(A,B,C)      NS_##A##B##C
  
  #define S2(A,B)                NS_##A##B
  #define S3(A,B,C)      NS_##A##B##C
@@ -12460,6 +13313,7 @@ enum neon_shape
  
  enum neon_shape_class
  {
  
  enum neon_shape_class
  {
+  SC_HALF,
    SC_SINGLE,
    SC_DOUBLE,
    SC_QUAD,
    SC_SINGLE,
    SC_DOUBLE,
    SC_QUAD,
@@ -12477,6 +13331,7 @@ static enum neon_shape_class neon_shape_class[] =
  
  enum neon_shape_el
  {
  
  enum neon_shape_el
  {
+  SE_H,
    SE_F,
    SE_D,
    SE_Q,
    SE_F,
    SE_D,
    SE_Q,
@@ -12489,6 +13344,7 @@ enum neon_shape_el
  /* Register widths of above.  */
  static unsigned neon_shape_el_size[] =
  {
  /* Register widths of above.  */
  static unsigned neon_shape_el_size[] =
  {
+  16,
    32,
    64,
    128,
    32,
    64,
    128,
@@ -12573,6 +13429,7 @@ enum neon_type_mask
  #define N_SUF_32   (N_SU_32 | N_F32)
  #define N_I_ALL    (N_I8 | N_I16 | N_I32 | N_I64)
  #define N_IF_32    (N_I8 | N_I16 | N_I32 | N_F32)
  #define N_SUF_32   (N_SU_32 | N_F32)
  #define N_I_ALL    (N_I8 | N_I16 | N_I32 | N_I64)
  #define N_IF_32    (N_I8 | N_I16 | N_I32 | N_F32)
+#define N_F_ALL    (N_F16 | N_F32 | N_F64)
  
  /* Pass this as the first type argument to neon_check_type to ignore types
     altogether.  */
  
  /* Pass this as the first type argument to neon_check_type to ignore types
     altogether.  */
@@ -12614,11 +13471,56 @@ neon_select_shape (enum neon_shape shape, ...)
  
           switch (neon_shape_tab[shape].el[j])
             {
  
           switch (neon_shape_tab[shape].el[j])
             {
+             /* If a  .f16,  .16,  .u16,  .s16 type specifier is given over
+                a VFP single precision register operand, it's essentially
+                means only half of the register is used.
+
+                If the type specifier is given after the mnemonics, the
+                information is stored in inst.vectype.  If the type specifier
+                is given after register operand, the information is stored
+                in inst.operands[].vectype.
+
+                When there is only one type specifier, and all the register
+                operands are the same type of hardware register, the type
+                specifier applies to all register operands.
+
+                If no type specifier is given, the shape is inferred from
+                operand information.
+
+                for example:
+                vadd.f16 s0, s1, s2:           NS_HHH
+                vabs.f16 s0, s1:               NS_HH
+                vmov.f16 s0, r1:               NS_HR
+                vmov.f16 r0, s1:               NS_RH
+                vcvt.f16 r0, s1:               NS_RH
+                vcvt.f16.s32   s2, s2, #29:    NS_HFI
+                vcvt.f16.s32   s2, s2:         NS_HF
+             */
+           case SE_H:
+             if (!(inst.operands[j].isreg
+                   && inst.operands[j].isvec
+                   && inst.operands[j].issingle
+                   && !inst.operands[j].isquad
+                   && ((inst.vectype.elems == 1
+                        && inst.vectype.el[0].size == 16)
+                       || (inst.vectype.elems > 1
+                           && inst.vectype.el[j].size == 16)
+                       || (inst.vectype.elems == 0
+                           && inst.operands[j].vectype.type != NT_invtype
+                           && inst.operands[j].vectype.size == 16))))
+               matches = 0;
+             break;
+
             case SE_F:
               if (!(inst.operands[j].isreg
                     && inst.operands[j].isvec
                     && inst.operands[j].issingle
             case SE_F:
               if (!(inst.operands[j].isreg
                     && inst.operands[j].isvec
                     && inst.operands[j].issingle
-                   && !inst.operands[j].isquad))
+                   && !inst.operands[j].isquad
+                   && ((inst.vectype.elems == 1 && inst.vectype.el[0].size == 32)
+                       || (inst.vectype.elems > 1 && inst.vectype.el[j].size == 32)
+                       || (inst.vectype.elems == 0
+                           && (inst.operands[j].vectype.size == 32
+                               || inst.operands[j].vectype.type == NT_invtype)))))
                 matches = 0;
               break;
  
                 matches = 0;
               break;
  
@@ -12834,7 +13736,7 @@ el_type_of_type_chk (enum neon_el_type *type, unsigned *size,
      *type = NT_untyped;
    else if ((mask & (N_P8 | N_P16 | N_P64)) != 0)
      *type = NT_poly;
      *type = NT_untyped;
    else if ((mask & (N_P8 | N_P16 | N_P64)) != 0)
      *type = NT_poly;
-  else if ((mask & (N_F16 | N_F32 | N_F64)) != 0)
+  else if ((mask & (N_F_ALL)) != 0)
      *type = NT_float;
    else
      return FAIL;
      *type = NT_float;
    else
      return FAIL;
@@ -13022,6 +13924,18 @@ neon_check_type (unsigned els, enum neon_shape ns, ...)
                   else
                     match = g_size;
  
                   else
                     match = g_size;
  
+                 /* FP16 will use a single precision register.  */
+                 if (regwidth == 32 && match == 16)
+                   {
+                     if (ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16))
+                       match = regwidth;
+                     else
+                       {
+                         inst.error = _(BAD_FP16);
+                         return badtype;
+                       }
+                   }
+
                   if (regwidth != match)
                     {
                       first_error (_("operand size must match register width"));
                   if (regwidth != match)
                     {
                       first_error (_("operand size must match register width"));
@@ -13113,12 +14027,16 @@ do_vfp_nsyn_add_sub (enum neon_shape rs)
  {
    int is_add = (inst.instruction & 0x0fffffff) == N_MNEM_vadd;
  
  {
    int is_add = (inst.instruction & 0x0fffffff) == N_MNEM_vadd;
  
-  if (rs == NS_FFF)
+  if (rs == NS_FFF || rs == NS_HHH)
      {
        if (is_add)
         do_vfp_nsyn_opcode ("fadds");
        else
         do_vfp_nsyn_opcode ("fsubs");
      {
        if (is_add)
         do_vfp_nsyn_opcode ("fadds");
        else
         do_vfp_nsyn_opcode ("fsubs");
+
+      /* ARMv8.2 fp16 instruction.  */
+      if (rs == NS_HHH)
+       do_scalar_fp16_v82_encode ();
      }
    else
      {
      }
    else
      {
@@ -13141,15 +14059,14 @@ try_vfp_nsyn (int args, void (*pfn) (enum neon_shape))
    switch (args)
      {
      case 2:
    switch (args)
      {
      case 2:
-      rs = neon_select_shape (NS_FF, NS_DD, NS_NULL);
-      et = neon_check_type (2, rs,
-       N_EQK | N_VFP, N_F32 | N_F64 | N_KEY | N_VFP);
+      rs = neon_select_shape (NS_HH, NS_FF, NS_DD, NS_NULL);
+      et = neon_check_type (2, rs, N_EQK | N_VFP, N_F_ALL | N_KEY | N_VFP);
        break;
  
      case 3:
        break;
  
      case 3:
-      rs = neon_select_shape (NS_FFF, NS_DDD, NS_NULL);
-      et = neon_check_type (3, rs,
-       N_EQK | N_VFP, N_EQK | N_VFP, N_F32 | N_F64 | N_KEY | N_VFP);
+      rs = neon_select_shape (NS_HHH, NS_FFF, NS_DDD, NS_NULL);
+      et = neon_check_type (3, rs, N_EQK | N_VFP, N_EQK | N_VFP,
+                           N_F_ALL | N_KEY | N_VFP);
        break;
  
      default:
        break;
  
      default:
@@ -13171,12 +14088,16 @@ do_vfp_nsyn_mla_mls (enum neon_shape rs)
  {
    int is_mla = (inst.instruction & 0x0fffffff) == N_MNEM_vmla;
  
  {
    int is_mla = (inst.instruction & 0x0fffffff) == N_MNEM_vmla;
  
-  if (rs == NS_FFF)
+  if (rs == NS_FFF || rs == NS_HHH)
      {
        if (is_mla)
         do_vfp_nsyn_opcode ("fmacs");
        else
         do_vfp_nsyn_opcode ("fnmacs");
      {
        if (is_mla)
         do_vfp_nsyn_opcode ("fmacs");
        else
         do_vfp_nsyn_opcode ("fnmacs");
+
+      /* ARMv8.2 fp16 instruction.  */
+      if (rs == NS_HHH)
+       do_scalar_fp16_v82_encode ();
      }
    else
      {
      }
    else
      {
@@ -13192,12 +14113,16 @@ do_vfp_nsyn_fma_fms (enum neon_shape rs)
  {
    int is_fma = (inst.instruction & 0x0fffffff) == N_MNEM_vfma;
  
  {
    int is_fma = (inst.instruction & 0x0fffffff) == N_MNEM_vfma;
  
-  if (rs == NS_FFF)
+  if (rs == NS_FFF || rs == NS_HHH)
      {
        if (is_fma)
         do_vfp_nsyn_opcode ("ffmas");
        else
         do_vfp_nsyn_opcode ("ffnmas");
      {
        if (is_fma)
         do_vfp_nsyn_opcode ("ffmas");
        else
         do_vfp_nsyn_opcode ("ffnmas");
+
+      /* ARMv8.2 fp16 instruction.  */
+      if (rs == NS_HHH)
+       do_scalar_fp16_v82_encode ();
      }
    else
      {
      }
    else
      {
@@ -13211,8 +14136,14 @@ do_vfp_nsyn_fma_fms (enum neon_shape rs)
  static void
  do_vfp_nsyn_mul (enum neon_shape rs)
  {
  static void
  do_vfp_nsyn_mul (enum neon_shape rs)
  {
-  if (rs == NS_FFF)
-    do_vfp_nsyn_opcode ("fmuls");
+  if (rs == NS_FFF || rs == NS_HHH)
+    {
+      do_vfp_nsyn_opcode ("fmuls");
+
+      /* ARMv8.2 fp16 instruction.  */
+      if (rs == NS_HHH)
+       do_scalar_fp16_v82_encode ();
+    }
    else
      do_vfp_nsyn_opcode ("fmuld");
  }
    else
      do_vfp_nsyn_opcode ("fmuld");
  }
@@ -13221,14 +14152,18 @@ static void
  do_vfp_nsyn_abs_neg (enum neon_shape rs)
  {
    int is_neg = (inst.instruction & 0x80) != 0;
  do_vfp_nsyn_abs_neg (enum neon_shape rs)
  {
    int is_neg = (inst.instruction & 0x80) != 0;
-  neon_check_type (2, rs, N_EQK | N_VFP, N_F32 | N_F64 | N_VFP | N_KEY);
+  neon_check_type (2, rs, N_EQK | N_VFP, N_F_ALL | N_VFP | N_KEY);
  
  
-  if (rs == NS_FF)
+  if (rs == NS_FF || rs == NS_HH)
      {
        if (is_neg)
         do_vfp_nsyn_opcode ("fnegs");
        else
         do_vfp_nsyn_opcode ("fabss");
      {
        if (is_neg)
         do_vfp_nsyn_opcode ("fnegs");
        else
         do_vfp_nsyn_opcode ("fabss");
+
+      /* ARMv8.2 fp16 instruction.  */
+      if (rs == NS_HH)
+       do_scalar_fp16_v82_encode ();
      }
    else
      {
      }
    else
      {
@@ -13265,11 +14200,17 @@ do_vfp_nsyn_ldm_stm (int is_dbmode)
  static void
  do_vfp_nsyn_sqrt (void)
  {
  static void
  do_vfp_nsyn_sqrt (void)
  {
-  enum neon_shape rs = neon_select_shape (NS_FF, NS_DD, NS_NULL);
-  neon_check_type (2, rs, N_EQK | N_VFP, N_F32 | N_F64 | N_KEY | N_VFP);
+  enum neon_shape rs = neon_select_shape (NS_HH, NS_FF, NS_DD, NS_NULL);
+  neon_check_type (2, rs, N_EQK | N_VFP, N_F_ALL | N_KEY | N_VFP);
+
+  if (rs == NS_FF || rs == NS_HH)
+    {
+      do_vfp_nsyn_opcode ("fsqrts");
  
  
-  if (rs == NS_FF)
-    do_vfp_nsyn_opcode ("fsqrts");
+      /* ARMv8.2 fp16 instruction.  */
+      if (rs == NS_HH)
+       do_scalar_fp16_v82_encode ();
+    }
    else
      do_vfp_nsyn_opcode ("fsqrtd");
  }
    else
      do_vfp_nsyn_opcode ("fsqrtd");
  }
@@ -13277,12 +14218,18 @@ do_vfp_nsyn_sqrt (void)
  static void
  do_vfp_nsyn_div (void)
  {
  static void
  do_vfp_nsyn_div (void)
  {
-  enum neon_shape rs = neon_select_shape (NS_FFF, NS_DDD, NS_NULL);
+  enum neon_shape rs = neon_select_shape (NS_HHH, NS_FFF, NS_DDD, NS_NULL);
    neon_check_type (3, rs, N_EQK | N_VFP, N_EQK | N_VFP,
    neon_check_type (3, rs, N_EQK | N_VFP, N_EQK | N_VFP,
-    N_F32 | N_F64 | N_KEY | N_VFP);
+                  N_F_ALL | N_KEY | N_VFP);
+
+  if (rs == NS_FFF || rs == NS_HHH)
+    {
+      do_vfp_nsyn_opcode ("fdivs");
  
  
-  if (rs == NS_FFF)
-    do_vfp_nsyn_opcode ("fdivs");
+      /* ARMv8.2 fp16 instruction.  */
+      if (rs == NS_HHH)
+       do_scalar_fp16_v82_encode ();
+    }
    else
      do_vfp_nsyn_opcode ("fdivd");
  }
    else
      do_vfp_nsyn_opcode ("fdivd");
  }
@@ -13290,14 +14237,18 @@ do_vfp_nsyn_div (void)
  static void
  do_vfp_nsyn_nmul (void)
  {
  static void
  do_vfp_nsyn_nmul (void)
  {
-  enum neon_shape rs = neon_select_shape (NS_FFF, NS_DDD, NS_NULL);
+  enum neon_shape rs = neon_select_shape (NS_HHH, NS_FFF, NS_DDD, NS_NULL);
    neon_check_type (3, rs, N_EQK | N_VFP, N_EQK | N_VFP,
    neon_check_type (3, rs, N_EQK | N_VFP, N_EQK | N_VFP,
-    N_F32 | N_F64 | N_KEY | N_VFP);
+                  N_F_ALL | N_KEY | N_VFP);
  
  
-  if (rs == NS_FFF)
+  if (rs == NS_FFF || rs == NS_HHH)
      {
        NEON_ENCODE (SINGLE, inst);
        do_vfp_sp_dyadic ();
      {
        NEON_ENCODE (SINGLE, inst);
        do_vfp_sp_dyadic ();
+
+      /* ARMv8.2 fp16 instruction.  */
+      if (rs == NS_HHH)
+       do_scalar_fp16_v82_encode ();
      }
    else
      {
      }
    else
      {
@@ -13305,17 +14256,19 @@ do_vfp_nsyn_nmul (void)
        do_vfp_dp_rd_rn_rm ();
      }
    do_vfp_cond_or_thumb ();
        do_vfp_dp_rd_rn_rm ();
      }
    do_vfp_cond_or_thumb ();
+
  }
  
  static void
  do_vfp_nsyn_cmp (void)
  {
  }
  
  static void
  do_vfp_nsyn_cmp (void)
  {
+  enum neon_shape rs;
    if (inst.operands[1].isreg)
      {
    if (inst.operands[1].isreg)
      {
-      enum neon_shape rs = neon_select_shape (NS_FF, NS_DD, NS_NULL);
-      neon_check_type (2, rs, N_EQK | N_VFP, N_F32 | N_F64 | N_KEY | N_VFP);
+      rs = neon_select_shape (NS_HH, NS_FF, NS_DD, NS_NULL);
+      neon_check_type (2, rs, N_EQK | N_VFP, N_F_ALL | N_KEY | N_VFP);
  
  
-      if (rs == NS_FF)
+      if (rs == NS_FF || rs == NS_HH)
         {
           NEON_ENCODE (SINGLE, inst);
           do_vfp_sp_monadic ();
         {
           NEON_ENCODE (SINGLE, inst);
           do_vfp_sp_monadic ();
@@ -13328,8 +14281,8 @@ do_vfp_nsyn_cmp (void)
      }
    else
      {
      }
    else
      {
-      enum neon_shape rs = neon_select_shape (NS_FI, NS_DI, NS_NULL);
-      neon_check_type (2, rs, N_F32 | N_F64 | N_KEY | N_VFP, N_EQK);
+      rs = neon_select_shape (NS_HI, NS_FI, NS_DI, NS_NULL);
+      neon_check_type (2, rs, N_F_ALL | N_KEY | N_VFP, N_EQK);
  
        switch (inst.instruction & 0x0fffffff)
         {
  
        switch (inst.instruction & 0x0fffffff)
         {
@@ -13343,7 +14296,7 @@ do_vfp_nsyn_cmp (void)
           abort ();
         }
  
           abort ();
         }
  
-      if (rs == NS_FI)
+      if (rs == NS_FI || rs == NS_HI)
         {
           NEON_ENCODE (SINGLE, inst);
           do_vfp_sp_compare_z ();
         {
           NEON_ENCODE (SINGLE, inst);
           do_vfp_sp_compare_z ();
@@ -13355,6 +14308,10 @@ do_vfp_nsyn_cmp (void)
         }
      }
    do_vfp_cond_or_thumb ();
         }
      }
    do_vfp_cond_or_thumb ();
+
+  /* ARMv8.2 fp16 instruction.  */
+  if (rs == NS_HI || rs == NS_HH)
+    do_scalar_fp16_v82_encode ();
  }
  
  static void
  }
  
  static void
@@ -13519,8 +14476,12 @@ do_neon_shl_imm (void)
      {
        enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
        struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_KEY | N_I_ALL);
      {
        enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
        struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_KEY | N_I_ALL);
+      int imm = inst.operands[2].imm;
+
+      constraint (imm < 0 || (unsigned)imm >= et.size,
+                 _("immediate out of range for shift"));
        NEON_ENCODE (IMMED, inst);
        NEON_ENCODE (IMMED, inst);
-      neon_imm_shift (FALSE, 0, neon_quad (rs), et, inst.operands[2].imm);
+      neon_imm_shift (FALSE, 0, neon_quad (rs), et, imm);
      }
    else
      {
      }
    else
      {
@@ -13551,10 +14512,12 @@ do_neon_qshl_imm (void)
      {
        enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
        struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_SU_ALL | N_KEY);
      {
        enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
        struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_SU_ALL | N_KEY);
+      int imm = inst.operands[2].imm;
  
  
+      constraint (imm < 0 || (unsigned)imm >= et.size,
+                 _("immediate out of range for shift"));
        NEON_ENCODE (IMMED, inst);
        NEON_ENCODE (IMMED, inst);
-      neon_imm_shift (TRUE, et.type == NT_unsigned, neon_quad (rs), et,
-                     inst.operands[2].imm);
+      neon_imm_shift (TRUE, et.type == NT_unsigned, neon_quad (rs), et, imm);
      }
    else
      {
      }
    else
      {
@@ -13642,197 +14605,6 @@ neon_cmode_for_logic_imm (unsigned immediate, unsigned *immbits, int size)
    return FAIL;
  }
  
    return FAIL;
  }
  
-/* True if IMM has form 0bAAAAAAAABBBBBBBBCCCCCCCCDDDDDDDD for bits
-   A, B, C, D.  */
-
-static int
-neon_bits_same_in_bytes (unsigned imm)
-{
-  return ((imm & 0x000000ff) == 0 || (imm & 0x000000ff) == 0x000000ff)
-        && ((imm & 0x0000ff00) == 0 || (imm & 0x0000ff00) == 0x0000ff00)
-        && ((imm & 0x00ff0000) == 0 || (imm & 0x00ff0000) == 0x00ff0000)
-        && ((imm & 0xff000000) == 0 || (imm & 0xff000000) == 0xff000000);
-}
-
-/* For immediate of above form, return 0bABCD.  */
-
-static unsigned
-neon_squash_bits (unsigned imm)
-{
-  return (imm & 0x01) | ((imm & 0x0100) >> 7) | ((imm & 0x010000) >> 14)
-        | ((imm & 0x01000000) >> 21);
-}
-
-/* Compress quarter-float representation to 0b...000 abcdefgh.  */
-
-static unsigned
-neon_qfloat_bits (unsigned imm)
-{
-  return ((imm >> 19) & 0x7f) | ((imm >> 24) & 0x80);
-}
-
-/* Returns CMODE. IMMBITS [7:0] is set to bits suitable for inserting into
-   the instruction. *OP is passed as the initial value of the op field, and
-   may be set to a different value depending on the constant (i.e.
-   "MOV I64, 0bAAAAAAAABBBB..." which uses OP = 1 despite being MOV not
-   MVN).  If the immediate looks like a repeated pattern then also
-   try smaller element sizes.  */
-
-static int
-neon_cmode_for_move_imm (unsigned immlo, unsigned immhi, int float_p,
-                        unsigned *immbits, int *op, int size,
-                        enum neon_el_type type)
-{
-  /* Only permit float immediates (including 0.0/-0.0) if the operand type is
-     float.  */
-  if (type == NT_float && !float_p)
-    return FAIL;
-
-  if (type == NT_float && is_quarter_float (immlo) && immhi == 0)
-    {
-      if (size != 32 || *op == 1)
-       return FAIL;
-      *immbits = neon_qfloat_bits (immlo);
-      return 0xf;
-    }
-
-  if (size == 64)
-    {
-      if (neon_bits_same_in_bytes (immhi)
-         && neon_bits_same_in_bytes (immlo))
-       {
-         if (*op == 1)
-           return FAIL;
-         *immbits = (neon_squash_bits (immhi) << 4)
-                    | neon_squash_bits (immlo);
-         *op = 1;
-         return 0xe;
-       }
-
-      if (immhi != immlo)
-       return FAIL;
-    }
-
-  if (size >= 32)
-    {
-      if (immlo == (immlo & 0x000000ff))
-       {
-         *immbits = immlo;
-         return 0x0;
-       }
-      else if (immlo == (immlo & 0x0000ff00))
-       {
-         *immbits = immlo >> 8;
-         return 0x2;
-       }
-      else if (immlo == (immlo & 0x00ff0000))
-       {
-         *immbits = immlo >> 16;
-         return 0x4;
-       }
-      else if (immlo == (immlo & 0xff000000))
-       {
-         *immbits = immlo >> 24;
-         return 0x6;
-       }
-      else if (immlo == ((immlo & 0x0000ff00) | 0x000000ff))
-       {
-         *immbits = (immlo >> 8) & 0xff;
-         return 0xc;
-       }
-      else if (immlo == ((immlo & 0x00ff0000) | 0x0000ffff))
-       {
-         *immbits = (immlo >> 16) & 0xff;
-         return 0xd;
-       }
-
-      if ((immlo & 0xffff) != (immlo >> 16))
-       return FAIL;
-      immlo &= 0xffff;
-    }
-
-  if (size >= 16)
-    {
-      if (immlo == (immlo & 0x000000ff))
-       {
-         *immbits = immlo;
-         return 0x8;
-       }
-      else if (immlo == (immlo & 0x0000ff00))
-       {
-         *immbits = immlo >> 8;
-         return 0xa;
-       }
-
-      if ((immlo & 0xff) != (immlo >> 8))
-       return FAIL;
-      immlo &= 0xff;
-    }
-
-  if (immlo == (immlo & 0x000000ff))
-    {
-      /* Don't allow MVN with 8-bit immediate.  */
-      if (*op == 1)
-       return FAIL;
-      *immbits = immlo;
-      return 0xe;
-    }
-
-  return FAIL;
-}
-
-/* Write immediate bits [7:0] to the following locations:
-
-  |28/24|23     19|18 16|15                    4|3     0|
-  |  a  |x x x x x|b c d|x x x x x x x x x x x x|e f g h|
-
-  This function is used by VMOV/VMVN/VORR/VBIC.  */
-
-static void
-neon_write_immbits (unsigned immbits)
-{
-  inst.instruction |= immbits & 0xf;
-  inst.instruction |= ((immbits >> 4) & 0x7) << 16;
-  inst.instruction |= ((immbits >> 7) & 0x1) << 24;
-}
-
-/* Invert low-order SIZE bits of XHI:XLO.  */
-
-static void
-neon_invert_size (unsigned *xlo, unsigned *xhi, int size)
-{
-  unsigned immlo = xlo ? *xlo : 0;
-  unsigned immhi = xhi ? *xhi : 0;
-
-  switch (size)
-    {
-    case 8:
-      immlo = (~immlo) & 0xff;
-      break;
-
-    case 16:
-      immlo = (~immlo) & 0xffff;
-      break;
-
-    case 64:
-      immhi = (~immhi) & 0xffffffff;
-      /* fall through.  */
-
-    case 32:
-      immlo = (~immlo) & 0xffffffff;
-      break;
-
-    default:
-      abort ();
-    }
-
-  if (xlo)
-    *xlo = immlo;
-
-  if (xhi)
-    *xhi = immhi;
-}
-
  static void
  do_neon_logic (void)
  {
  static void
  do_neon_logic (void)
  {
@@ -14257,6 +15029,38 @@ do_neon_qdmulh (void)
      }
  }
  
      }
  }
  
+static void
+do_neon_qrdmlah (void)
+{
+  /* Check we're on the correct architecture.  */
+  if (!mark_feature_used (&fpu_neon_ext_armv8))
+    inst.error =
+      _("instruction form not available on this architecture.");
+  else if (!mark_feature_used (&fpu_neon_ext_v8_1))
+    {
+      as_warn (_("this instruction implies use of ARMv8.1 AdvSIMD."));
+      record_feature_use (&fpu_neon_ext_v8_1);
+    }
+
+  if (inst.operands[2].isscalar)
+    {
+      enum neon_shape rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
+      struct neon_type_el et = neon_check_type (3, rs,
+       N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
+      NEON_ENCODE (SCALAR, inst);
+      neon_mul_mac (et, neon_quad (rs));
+    }
+  else
+    {
+      enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+      struct neon_type_el et = neon_check_type (3, rs,
+       N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
+      NEON_ENCODE (INTEGER, inst);
+      /* The U bit (rounding) comes from bit mask.  */
+      neon_three_same (neon_quad (rs), 0, et.size);
+    }
+}
+
  static void
  do_neon_fcmp_absolute (void)
  {
  static void
  do_neon_fcmp_absolute (void)
  {
@@ -14505,6 +15309,13 @@ do_neon_shll (void)
    /* Half-precision conversions.  */                                         \
    CVT_VAR (f32_f16, N_F32, N_F16, whole_reg,   NULL,     NULL,     NULL)      \
    CVT_VAR (f16_f32, N_F16, N_F32, whole_reg,   NULL,     NULL,     NULL)      \
    /* Half-precision conversions.  */                                         \
    CVT_VAR (f32_f16, N_F32, N_F16, whole_reg,   NULL,     NULL,     NULL)      \
    CVT_VAR (f16_f32, N_F16, N_F32, whole_reg,   NULL,     NULL,     NULL)      \
+  /* New VCVT instructions introduced by ARMv8.2 fp16 extension.             \
+     Compared with single/double precision variants, only the co-processor    \
+     field is different, so the encoding flow is reused here.  */            \
+  CVT_VAR (f16_s32, N_F16 | N_KEY, N_S32, N_VFP, "fsltos", "fsitos", NULL)    \
+  CVT_VAR (f16_u32, N_F16 | N_KEY, N_U32, N_VFP, "fultos", "fuitos", NULL)    \
+  CVT_VAR (u32_f16, N_U32, N_F16 | N_KEY, N_VFP, "ftouls", "ftouis", "ftouizs")\
+  CVT_VAR (s32_f16, N_S32, N_F16 | N_KEY, N_VFP, "ftosls", "ftosis", "ftosizs")\
    /* VFP instructions.  */                                                   \
    CVT_VAR (f32_f64, N_F32, N_F64, N_VFP,       NULL,     "fcvtsd", NULL)      \
    CVT_VAR (f64_f32, N_F64, N_F32, N_VFP,       NULL,     "fcvtds", NULL)      \
    /* VFP instructions.  */                                                   \
    CVT_VAR (f32_f64, N_F32, N_F64, N_VFP,       NULL,     "fcvtsd", NULL)      \
    CVT_VAR (f64_f32, N_F64, N_F32, N_VFP,       NULL,     "fcvtds", NULL)      \
@@ -14579,7 +15390,8 @@ do_vfp_nsyn_cvt (enum neon_shape rs, enum neon_cvt_flavour flavour)
  {
    const char *opname = 0;
  
  {
    const char *opname = 0;
  
-  if (rs == NS_DDI || rs == NS_QQI || rs == NS_FFI)
+  if (rs == NS_DDI || rs == NS_QQI || rs == NS_FFI
+      || rs == NS_FHI || rs == NS_HFI)
      {
        /* Conversions with immediate bitshift.  */
        const char *enc[] =
      {
        /* Conversions with immediate bitshift.  */
        const char *enc[] =
@@ -14616,12 +15428,19 @@ do_vfp_nsyn_cvt (enum neon_shape rs, enum neon_cvt_flavour flavour)
  
    if (opname)
      do_vfp_nsyn_opcode (opname);
  
    if (opname)
      do_vfp_nsyn_opcode (opname);
+
+  /* ARMv8.2 fp16 VCVT instruction.  */
+  if (flavour == neon_cvt_flavour_s32_f16
+      || flavour == neon_cvt_flavour_u32_f16
+      || flavour == neon_cvt_flavour_f16_u32
+      || flavour == neon_cvt_flavour_f16_s32)
+    do_scalar_fp16_v82_encode ();
  }
  
  static void
  do_vfp_nsyn_cvtz (void)
  {
  }
  
  static void
  do_vfp_nsyn_cvtz (void)
  {
-  enum neon_shape rs = neon_select_shape (NS_FF, NS_FD, NS_NULL);
+  enum neon_shape rs = neon_select_shape (NS_FH, NS_FF, NS_FD, NS_NULL);
    enum neon_cvt_flavour flavour = get_neon_cvt_flavour (rs);
    const char *enc[] =
      {
    enum neon_cvt_flavour flavour = get_neon_cvt_flavour (rs);
    const char *enc[] =
      {
@@ -14642,18 +15461,34 @@ do_vfp_nsyn_cvt_fpv8 (enum neon_cvt_flavour flavour,
    int sz, op;
    int rm;
  
    int sz, op;
    int rm;
  
+  /* Targets like FPv5-SP-D16 don't support FP v8 instructions with
+     D register operands.  */
+  if (flavour == neon_cvt_flavour_s32_f64
+      || flavour == neon_cvt_flavour_u32_f64)
+    constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8),
+               _(BAD_FPU));
+
+  if (flavour == neon_cvt_flavour_s32_f16
+      || flavour == neon_cvt_flavour_u32_f16)
+    constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16),
+               _(BAD_FP16));
+
    set_it_insn_type (OUTSIDE_IT_INSN);
  
    switch (flavour)
      {
      case neon_cvt_flavour_s32_f64:
        sz = 1;
    set_it_insn_type (OUTSIDE_IT_INSN);
  
    switch (flavour)
      {
      case neon_cvt_flavour_s32_f64:
        sz = 1;
-      op = 0;
+      op = 1;
        break;
      case neon_cvt_flavour_s32_f32:
        sz = 0;
        op = 1;
        break;
        break;
      case neon_cvt_flavour_s32_f32:
        sz = 0;
        op = 1;
        break;
+    case neon_cvt_flavour_s32_f16:
+      sz = 0;
+      op = 1;
+      break;
      case neon_cvt_flavour_u32_f64:
        sz = 1;
        op = 0;
      case neon_cvt_flavour_u32_f64:
        sz = 1;
        op = 0;
@@ -14662,6 +15497,10 @@ do_vfp_nsyn_cvt_fpv8 (enum neon_cvt_flavour flavour,
        sz = 0;
        op = 0;
        break;
        sz = 0;
        op = 0;
        break;
+    case neon_cvt_flavour_u32_f16:
+      sz = 0;
+      op = 0;
+      break;
      default:
        first_error (_("invalid instruction shape"));
        return;
      default:
        first_error (_("invalid instruction shape"));
        return;
@@ -14680,6 +15519,11 @@ do_vfp_nsyn_cvt_fpv8 (enum neon_cvt_flavour flavour,
    encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd);
    encode_arm_vfp_reg (inst.operands[1].reg, sz == 1 ? VFP_REG_Dm : VFP_REG_Sm);
    inst.instruction |= sz << 8;
    encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd);
    encode_arm_vfp_reg (inst.operands[1].reg, sz == 1 ? VFP_REG_Dm : VFP_REG_Sm);
    inst.instruction |= sz << 8;
+
+  /* ARMv8.2 fp16 VCVT instruction.  */
+  if (flavour == neon_cvt_flavour_s32_f16
+      ||flavour == neon_cvt_flavour_u32_f16)
+    do_scalar_fp16_v82_encode ();
    inst.instruction |= op << 7;
    inst.instruction |= rm << 16;
    inst.instruction |= 0xf0000000;
    inst.instruction |= op << 7;
    inst.instruction |= rm << 16;
    inst.instruction |= 0xf0000000;
@@ -14690,7 +15534,9 @@ static void
  do_neon_cvt_1 (enum neon_cvt_mode mode)
  {
    enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_FFI, NS_DD, NS_QQ,
  do_neon_cvt_1 (enum neon_cvt_mode mode)
  {
    enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_FFI, NS_DD, NS_QQ,
-    NS_FD, NS_DF, NS_FF, NS_QD, NS_DQ, NS_NULL);
+                                         NS_FD, NS_DF, NS_FF, NS_QD, NS_DQ,
+                                         NS_FH, NS_HF, NS_FHI, NS_HFI,
+                                         NS_NULL);
    enum neon_cvt_flavour flavour = get_neon_cvt_flavour (rs);
  
    /* PR11109: Handle round-to-zero for VCVT conversions.  */
    enum neon_cvt_flavour flavour = get_neon_cvt_flavour (rs);
  
    /* PR11109: Handle round-to-zero for VCVT conversions.  */
@@ -14706,6 +15552,18 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
        return;
      }
  
        return;
      }
  
+  /* ARMv8.2 fp16 VCVT conversions.  */
+  if (mode == neon_cvt_mode_z
+      && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16)
+      && (flavour == neon_cvt_flavour_s32_f16
+         || flavour == neon_cvt_flavour_u32_f16)
+      && (rs == NS_FH))
+    {
+      do_vfp_nsyn_cvtz ();
+      do_scalar_fp16_v82_encode ();
+      return;
+    }
+
    /* VFP rather than Neon conversions.  */
    if (flavour >= neon_cvt_flavour_first_fp)
      {
    /* VFP rather than Neon conversions.  */
    if (flavour >= neon_cvt_flavour_first_fp)
      {
@@ -14890,7 +15748,8 @@ do_neon_cvttb_2 (bfd_boolean t, bfd_boolean to, bfd_boolean is_double)
  static void
  do_neon_cvttb_1 (bfd_boolean t)
  {
  static void
  do_neon_cvttb_1 (bfd_boolean t)
  {
-  enum neon_shape rs = neon_select_shape (NS_FF, NS_FD, NS_DF, NS_NULL);
+  enum neon_shape rs = neon_select_shape (NS_HF, NS_HD, NS_FH, NS_FF, NS_FD,
+                                         NS_DF, NS_DH, NS_NULL);
  
    if (rs == NS_NULL)
      return;
  
    if (rs == NS_NULL)
      return;
@@ -14906,11 +15765,21 @@ do_neon_cvttb_1 (bfd_boolean t)
      }
    else if (neon_check_type (2, rs, N_F16, N_F64 | N_VFP).type != NT_invtype)
      {
      }
    else if (neon_check_type (2, rs, N_F16, N_F64 | N_VFP).type != NT_invtype)
      {
+      /* The VCVTB and VCVTT instructions with D-register operands
+         don't work for SP only targets.  */
+      constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8),
+                 _(BAD_FPU));
+
        inst.error = NULL;
        do_neon_cvttb_2 (t, /*to=*/TRUE, /*is_double=*/TRUE);
      }
    else if (neon_check_type (2, rs, N_F64 | N_VFP, N_F16).type != NT_invtype)
      {
        inst.error = NULL;
        do_neon_cvttb_2 (t, /*to=*/TRUE, /*is_double=*/TRUE);
      }
    else if (neon_check_type (2, rs, N_F64 | N_VFP, N_F16).type != NT_invtype)
      {
+      /* The VCVTB and VCVTT instructions with D-register operands
+         don't work for SP only targets.  */
+      constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8),
+                 _(BAD_FPU));
+
        inst.error = NULL;
        do_neon_cvttb_2 (t, /*to=*/FALSE, /*is_double=*/TRUE);
      }
        inst.error = NULL;
        do_neon_cvttb_2 (t, /*to=*/FALSE, /*is_double=*/TRUE);
      }
@@ -15260,8 +16129,9 @@ static void
  do_neon_mov (void)
  {
    enum neon_shape rs = neon_select_shape (NS_RRFF, NS_FFRR, NS_DRR, NS_RRD,
  do_neon_mov (void)
  {
    enum neon_shape rs = neon_select_shape (NS_RRFF, NS_FFRR, NS_DRR, NS_RRD,
-    NS_QQ, NS_DD, NS_QI, NS_DI, NS_SR, NS_RS, NS_FF, NS_FI, NS_RF, NS_FR,
-    NS_NULL);
+                                         NS_QQ, NS_DD, NS_QI, NS_DI, NS_SR,
+                                         NS_RS, NS_FF, NS_FI, NS_RF, NS_FR,
+                                         NS_HR, NS_RH, NS_HI, NS_NULL);
    struct neon_type_el et;
    const char *ldconst = 0;
  
    struct neon_type_el et;
    const char *ldconst = 0;
  
@@ -15439,6 +16309,7 @@ do_neon_mov (void)
        do_vfp_nsyn_opcode ("fcpys");
        break;
  
        do_vfp_nsyn_opcode ("fcpys");
        break;
  
+    case NS_HI:
      case NS_FI:  /* case 10 (fconsts).  */
        ldconst = "fconsts";
        encode_fconstd:
      case NS_FI:  /* case 10 (fconsts).  */
        ldconst = "fconsts";
        encode_fconstd:
@@ -15446,17 +16317,29 @@ do_neon_mov (void)
         {
           inst.operands[1].imm = neon_qfloat_bits (inst.operands[1].imm);
           do_vfp_nsyn_opcode (ldconst);
         {
           inst.operands[1].imm = neon_qfloat_bits (inst.operands[1].imm);
           do_vfp_nsyn_opcode (ldconst);
+
+         /* ARMv8.2 fp16 vmov.f16 instruction.  */
+         if (rs == NS_HI)
+           do_scalar_fp16_v82_encode ();
         }
        else
         first_error (_("immediate out of range"));
        break;
  
         }
        else
         first_error (_("immediate out of range"));
        break;
  
+    case NS_RH:
      case NS_RF:  /* case 12 (fmrs).  */
        do_vfp_nsyn_opcode ("fmrs");
      case NS_RF:  /* case 12 (fmrs).  */
        do_vfp_nsyn_opcode ("fmrs");
+      /* ARMv8.2 fp16 vmov.f16 instruction.  */
+      if (rs == NS_RH)
+       do_scalar_fp16_v82_encode ();
        break;
  
        break;
  
+    case NS_HR:
      case NS_FR:  /* case 13 (fmsr).  */
        do_vfp_nsyn_opcode ("fmsr");
      case NS_FR:  /* case 13 (fmsr).  */
        do_vfp_nsyn_opcode ("fmsr");
+      /* ARMv8.2 fp16 vmov.f16 instruction.  */
+      if (rs == NS_HR)
+       do_scalar_fp16_v82_encode ();
        break;
  
      /* The encoders for the fmrrs and fmsrr instructions expect three operands
        break;
  
      /* The encoders for the fmrrs and fmsrr instructions expect three operands
@@ -15506,10 +16389,25 @@ do_neon_rshift_round_imm (void)
        return;
      }
  
        return;
      }
  
-  constraint (imm < 1 || (unsigned)imm > et.size,
-             _("immediate out of range for shift"));
-  neon_imm_shift (TRUE, et.type == NT_unsigned, neon_quad (rs), et,
-                 et.size - imm);
+  constraint (imm < 1 || (unsigned)imm > et.size,
+             _("immediate out of range for shift"));
+  neon_imm_shift (TRUE, et.type == NT_unsigned, neon_quad (rs), et,
+                 et.size - imm);
+}
+
+static void
+do_neon_movhf (void)
+{
+  enum neon_shape rs = neon_select_shape (NS_HH, NS_NULL);
+  constraint (rs != NS_HH, _("invalid suffix"));
+
+  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8),
+             _(BAD_FPU));
+
+  do_vfp_sp_monadic ();
+
+  inst.is_neon = 1;
+  inst.instruction |= 0xf0000000;
  }
  
  static void
  }
  
  static void
@@ -15679,7 +16577,7 @@ do_neon_ldr_str (void)
        if (thumb_mode)
         inst.error = _("Use of PC here is UNPREDICTABLE");
        else if (warn_on_deprecated)
        if (thumb_mode)
         inst.error = _("Use of PC here is UNPREDICTABLE");
        else if (warn_on_deprecated)
-       as_warn (_("Use of PC here is deprecated"));
+       as_tsktsk (_("Use of PC here is deprecated"));
      }
  
    if (inst.operands[0].issingle)
      }
  
    if (inst.operands[0].issingle)
@@ -15688,6 +16586,10 @@ do_neon_ldr_str (void)
         do_vfp_nsyn_opcode ("flds");
        else
         do_vfp_nsyn_opcode ("fsts");
         do_vfp_nsyn_opcode ("flds");
        else
         do_vfp_nsyn_opcode ("fsts");
+
+      /* ARMv8.2 vldr.16/vstr.16 instruction.  */
+      if (inst.vectype.el[0].size == 16)
+       do_scalar_fp16_v82_encode ();
      }
    else
      {
      }
    else
      {
@@ -15760,6 +16662,8 @@ do_neon_ld_st_interleave (void)
    typebits = typetable[idx];
  
    constraint (typebits == -1, _("bad list type for instruction"));
    typebits = typetable[idx];
  
    constraint (typebits == -1, _("bad list type for instruction"));
+  constraint (((inst.instruction >> 8) & 3) && et.size == 64,
+             _("bad element type for instruction"));
  
    inst.instruction &= ~0xf00;
    inst.instruction |= typebits << 8;
  
    inst.instruction &= ~0xf00;
    inst.instruction |= typebits << 8;
@@ -16010,12 +16914,20 @@ do_neon_ldx_stx (void)
                   _("bad register for post-index"));
        inst.instruction |= postreg;
      }
                   _("bad register for post-index"));
        inst.instruction |= postreg;
      }
-  else if (inst.operands[1].writeback)
+  else
      {
      {
-      inst.instruction |= 0xd;
+      constraint (inst.operands[1].immisreg, BAD_ADDR_MODE);
+      constraint (inst.reloc.exp.X_op != O_constant
+                 || inst.reloc.exp.X_add_number != 0,
+                 BAD_ADDR_MODE);
+
+      if (inst.operands[1].writeback)
+       {
+         inst.instruction |= 0xd;
+       }
+      else
+       inst.instruction |= 0xf;
      }
      }
-  else
-    inst.instruction |= 0xf;
  
    if (thumb_mode)
      inst.instruction |= 0xf9000000;
  
    if (thumb_mode)
      inst.instruction |= 0xf9000000;
@@ -16027,10 +16939,22 @@ do_neon_ldx_stx (void)
  static void
  do_vfp_nsyn_fpv8 (enum neon_shape rs)
  {
  static void
  do_vfp_nsyn_fpv8 (enum neon_shape rs)
  {
+  /* Targets like FPv5-SP-D16 don't support FP v8 instructions with
+     D register operands.  */
+  if (neon_shape_class[rs] == SC_DOUBLE)
+    constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8),
+               _(BAD_FPU));
+
    NEON_ENCODE (FPV8, inst);
  
    NEON_ENCODE (FPV8, inst);
  
-  if (rs == NS_FFF)
-    do_vfp_sp_dyadic ();
+  if (rs == NS_FFF || rs == NS_HHH)
+    {
+      do_vfp_sp_dyadic ();
+
+      /* ARMv8.2 fp16 instruction.  */
+      if (rs == NS_HHH)
+       do_scalar_fp16_v82_encode ();
+    }
    else
      do_vfp_dp_rd_rn_rm ();
  
    else
      do_vfp_dp_rd_rn_rm ();
  
@@ -16066,13 +16990,20 @@ do_vmaxnm (void)
  static void
  do_vrint_1 (enum neon_cvt_mode mode)
  {
  static void
  do_vrint_1 (enum neon_cvt_mode mode)
  {
-  enum neon_shape rs = neon_select_shape (NS_FF, NS_DD, NS_QQ, NS_NULL);
+  enum neon_shape rs = neon_select_shape (NS_HH, NS_FF, NS_DD, NS_QQ, NS_NULL);
    struct neon_type_el et;
  
    if (rs == NS_NULL)
      return;
  
    struct neon_type_el et;
  
    if (rs == NS_NULL)
      return;
  
-  et = neon_check_type (2, rs, N_EQK | N_VFP, N_F32 | N_F64 | N_KEY | N_VFP);
+  /* Targets like FPv5-SP-D16 don't support FP v8 instructions with
+     D register operands.  */
+  if (neon_shape_class[rs] == SC_DOUBLE)
+    constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8),
+               _(BAD_FPU));
+
+  et = neon_check_type (2, rs, N_EQK | N_VFP, N_F_ALL | N_KEY
+                       | N_VFP);
    if (et.type != NT_invtype)
      {
        /* VFP encodings.  */
    if (et.type != NT_invtype)
      {
        /* VFP encodings.  */
@@ -16081,7 +17012,7 @@ do_vrint_1 (enum neon_cvt_mode mode)
         set_it_insn_type (OUTSIDE_IT_INSN);
  
        NEON_ENCODE (FPV8, inst);
         set_it_insn_type (OUTSIDE_IT_INSN);
  
        NEON_ENCODE (FPV8, inst);
-      if (rs == NS_FF)
+      if (rs == NS_FF || rs == NS_HH)
         do_vfp_sp_monadic ();
        else
         do_vfp_dp_rd_rm ();
         do_vfp_sp_monadic ();
        else
         do_vfp_dp_rd_rm ();
@@ -16100,6 +17031,10 @@ do_vrint_1 (enum neon_cvt_mode mode)
  
        inst.instruction |= (rs == NS_DD) << 8;
        do_vfp_cond_or_thumb ();
  
        inst.instruction |= (rs == NS_DD) << 8;
        do_vfp_cond_or_thumb ();
+
+      /* ARMv8.2 fp16 vrint instruction.  */
+      if (rs == NS_HH)
+      do_scalar_fp16_v82_encode ();
      }
    else
      {
      }
    else
      {
@@ -16684,7 +17619,7 @@ opcode_lookup (char **str)
         }
  
        if (warn_on_deprecated && unified_syntax)
         }
  
        if (warn_on_deprecated && unified_syntax)
-       as_warn (_("conditional infixes are deprecated in unified syntax"));
+       as_tsktsk (_("conditional infixes are deprecated in unified syntax"));
        affix = base + (opcode->tag - OT_odd_infix_0);
        cond = (const struct asm_cond *) hash_find_n (arm_cond_hsh, affix, 2);
        gas_assert (cond);
        affix = base + (opcode->tag - OT_odd_infix_0);
        cond = (const struct asm_cond *) hash_find_n (arm_cond_hsh, affix, 2);
        gas_assert (cond);
@@ -16770,7 +17705,7 @@ opcode_lookup (char **str)
        if (warn_on_deprecated && unified_syntax
           && (opcode->tag == OT_cinfix3
               || opcode->tag == OT_cinfix3_deprecated))
        if (warn_on_deprecated && unified_syntax
           && (opcode->tag == OT_cinfix3
               || opcode->tag == OT_cinfix3_deprecated))
-       as_warn (_("conditional infixes are deprecated in unified syntax"));
+       as_tsktsk (_("conditional infixes are deprecated in unified syntax"));
  
        inst.cond = cond->value;
        return opcode;
  
        inst.cond = cond->value;
        return opcode;
@@ -16927,7 +17862,7 @@ handle_it_state (void)
           else
             {
               if ((implicit_it_mode & IMPLICIT_IT_MODE_THUMB)
           else
             {
               if ((implicit_it_mode & IMPLICIT_IT_MODE_THUMB)
-                 && ARM_CPU_HAS_FEATURE (cpu_variant, arm_arch_t2))
+                 && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v6t2))
                 {
                   /* Automatically generate the IT instruction.  */
                   new_automatic_it_block (inst.cond);
                 {
                   /* Automatically generate the IT instruction.  */
                   new_automatic_it_block (inst.cond);
@@ -17079,6 +18014,9 @@ static const struct depr_insn_mask depr_it_insns[] = {
    { 0x4800, 0xf800, N_("Literal loads") },
    { 0x4478, 0xf478, N_("Hi-register ADD, MOV, CMP, BX, BLX using pc") },
    { 0x4487, 0xfc87, N_("Hi-register ADD, MOV, CMP using pc") },
    { 0x4800, 0xf800, N_("Literal loads") },
    { 0x4478, 0xf478, N_("Hi-register ADD, MOV, CMP, BX, BLX using pc") },
    { 0x4487, 0xfc87, N_("Hi-register ADD, MOV, CMP using pc") },
+  /* NOTE: 0x00dd is not the real encoding, instead, it is the 'tvalue'
+     field in asm_opcode. 'tvalue' is used at the stage this check happen.  */
+  { 0x00dd, 0x7fff, N_("ADD/SUB sp, sp #imm") },
    { 0, 0, NULL }
  };
  
    { 0, 0, NULL }
  };
  
@@ -17097,7 +18035,7 @@ it_fsm_post_encode (void)
      {
        if (inst.instruction >= 0x10000)
         {
      {
        if (inst.instruction >= 0x10000)
         {
-         as_warn (_("IT blocks containing 32-bit Thumb instructions are "
+         as_tsktsk (_("IT blocks containing 32-bit Thumb instructions are "
                      "deprecated in ARMv8"));
           now_it.warn_deprecated = TRUE;
         }
                      "deprecated in ARMv8"));
           now_it.warn_deprecated = TRUE;
         }
@@ -17109,7 +18047,7 @@ it_fsm_post_encode (void)
             {
               if ((inst.instruction & p->mask) == p->pattern)
                 {
             {
               if ((inst.instruction & p->mask) == p->pattern)
                 {
-                 as_warn (_("IT blocks containing 16-bit Thumb instructions "
+                 as_tsktsk (_("IT blocks containing 16-bit Thumb instructions "
                              "of the following class are deprecated in ARMv8: "
                              "%s"), p->description);
                   now_it.warn_deprecated = TRUE;
                              "of the following class are deprecated in ARMv8: "
                              "%s"), p->description);
                   now_it.warn_deprecated = TRUE;
@@ -17122,7 +18060,7 @@ it_fsm_post_encode (void)
  
        if (now_it.block_length > 1)
         {
  
        if (now_it.block_length > 1)
         {
-         as_warn (_("IT blocks containing more than one conditional "
+         as_tsktsk (_("IT blocks containing more than one conditional "
                      "instruction are deprecated in ARMv8"));
           now_it.warn_deprecated = TRUE;
         }
                      "instruction are deprecated in ARMv8"));
           now_it.warn_deprecated = TRUE;
         }
@@ -17156,6 +18094,56 @@ in_it_block (void)
    return now_it.state != OUTSIDE_IT_BLOCK;
  }
  
    return now_it.state != OUTSIDE_IT_BLOCK;
  }
  
+/* Whether OPCODE only has T32 encoding.  Since this function is only used by
+   t32_insn_ok, OPCODE enabled by v6t2 extension bit do not need to be listed
+   here, hence the "known" in the function name.  */
+
+static bfd_boolean
+known_t32_only_insn (const struct asm_opcode *opcode)
+{
+  /* Original Thumb-1 wide instruction.  */
+  if (opcode->tencode == do_t_blx
+      || opcode->tencode == do_t_branch23
+      || ARM_CPU_HAS_FEATURE (*opcode->tvariant, arm_ext_msr)
+      || ARM_CPU_HAS_FEATURE (*opcode->tvariant, arm_ext_barrier))
+    return TRUE;
+
+  /* Wide-only instruction added to ARMv8-M.  */
+  if (ARM_CPU_HAS_FEATURE (*opcode->tvariant, arm_ext_v8m)
+      || ARM_CPU_HAS_FEATURE (*opcode->tvariant, arm_ext_atomics)
+      || ARM_CPU_HAS_FEATURE (*opcode->tvariant, arm_ext_v6t2_v8m)
+      || ARM_CPU_HAS_FEATURE (*opcode->tvariant, arm_ext_div))
+    return TRUE;
+
+  return FALSE;
+}
+
+/* Whether wide instruction variant can be used if available for a valid OPCODE
+   in ARCH.  */
+
+static bfd_boolean
+t32_insn_ok (arm_feature_set arch, const struct asm_opcode *opcode)
+{
+  if (known_t32_only_insn (opcode))
+    return TRUE;
+
+  /* Instruction with narrow and wide encoding added to ARMv8-M.  Availability
+     of variant T3 of B.W is checked in do_t_branch.  */
+  if (ARM_CPU_HAS_FEATURE (arch, arm_ext_v8m)
+      && opcode->tencode == do_t_branch)
+    return TRUE;
+
+  /* Wide instruction variants of all instructions with narrow *and* wide
+     variants become available with ARMv6t2.  Other opcodes are either
+     narrow-only or wide-only and are thus available if OPCODE is valid.  */
+  if (ARM_CPU_HAS_FEATURE (arch, arm_ext_v6t2))
+    return TRUE;
+
+  /* OPCODE with narrow only instruction variant or wide variant not
+     available.  */
+  return FALSE;
+}
+
  void
  md_assemble (char *str)
  {
  void
  md_assemble (char *str)
  {
@@ -17186,7 +18174,7 @@ md_assemble (char *str)
      }
  
    if (warn_on_deprecated && opcode->tag == OT_cinfix3_deprecated)
      }
  
    if (warn_on_deprecated && opcode->tag == OT_cinfix3_deprecated)
-    as_warn (_("s suffix on comparison instruction is deprecated"));
+    as_tsktsk (_("s suffix on comparison instruction is deprecated"));
  
    /* The value which unconditional instructions should have in place of the
       condition field.  */
  
    /* The value which unconditional instructions should have in place of the
       condition field.  */
@@ -17205,7 +18193,7 @@ md_assemble (char *str)
           || (thumb_mode == 1
               && !ARM_CPU_HAS_FEATURE (variant, *opcode->tvariant)))
         {
           || (thumb_mode == 1
               && !ARM_CPU_HAS_FEATURE (variant, *opcode->tvariant)))
         {
-         as_bad (_("selected processor does not support Thumb mode `%s'"), str);
+         as_bad (_("selected processor does not support `%s' in Thumb mode"), str);
           return;
         }
        if (inst.cond != COND_ALWAYS && !unified_syntax
           return;
         }
        if (inst.cond != COND_ALWAYS && !unified_syntax
@@ -17215,24 +18203,28 @@ md_assemble (char *str)
           return;
         }
  
           return;
         }
  
-      if (!ARM_CPU_HAS_FEATURE (variant, arm_ext_v6t2))
+      /* Two things are addressed here:
+        1) Implicit require narrow instructions on Thumb-1.
+           This avoids relaxation accidentally introducing Thumb-2
+           instructions.
+        2) Reject wide instructions in non Thumb-2 cores.
+
+        Only instructions with narrow and wide variants need to be handled
+        but selecting all non wide-only instructions is easier.  */
+      if (!ARM_CPU_HAS_FEATURE (variant, arm_ext_v6t2)
+         && !t32_insn_ok (variant, opcode))
         {
         {
-         if (opcode->tencode != do_t_blx && opcode->tencode != do_t_branch23
-             && !(ARM_CPU_HAS_FEATURE(*opcode->tvariant, arm_ext_msr)
-                  || ARM_CPU_HAS_FEATURE(*opcode->tvariant, arm_ext_barrier)))
+         if (inst.size_req == 0)
+           inst.size_req = 2;
+         else if (inst.size_req == 4)
             {
             {
-             /* Two things are addressed here.
-                1) Implicit require narrow instructions on Thumb-1.
-                   This avoids relaxation accidentally introducing Thumb-2
-                    instructions.
-                2) Reject wide instructions in non Thumb-2 cores.  */
-             if (inst.size_req == 0)
-               inst.size_req = 2;
-             else if (inst.size_req == 4)
-               {
-                 as_bad (_("selected processor does not support Thumb-2 mode `%s'"), str);
-                 return;
-               }
+             if (ARM_CPU_HAS_FEATURE (variant, arm_ext_v8m))
+               as_bad (_("selected processor does not support 32bit wide "
+                         "variant of instruction `%s'"), str);
+             else
+               as_bad (_("selected processor does not support `%s' in "
+                         "Thumb-2 mode"), str);
+             return;
             }
         }
  
             }
         }
  
@@ -17267,13 +18259,14 @@ md_assemble (char *str)
        ARM_MERGE_FEATURE_SETS (thumb_arch_used, thumb_arch_used,
                               *opcode->tvariant);
        /* Many Thumb-2 instructions also have Thumb-1 variants, so explicitly
        ARM_MERGE_FEATURE_SETS (thumb_arch_used, thumb_arch_used,
                               *opcode->tvariant);
        /* Many Thumb-2 instructions also have Thumb-1 variants, so explicitly
-        set those bits when Thumb-2 32-bit instructions are seen.  ie.
-        anything other than bl/blx and v6-M instructions.
-        This is overly pessimistic for relaxable instructions.  */
-      if (((inst.size == 4 && (inst.instruction & 0xf800e800) != 0xf000e800)
-          || inst.relax)
-         && !(ARM_CPU_HAS_FEATURE (*opcode->tvariant, arm_ext_msr)
-              || ARM_CPU_HAS_FEATURE (*opcode->tvariant, arm_ext_barrier)))
+        set those bits when Thumb-2 32-bit instructions are seen.  The impact
+        of relaxable instructions will be considered later after we finish all
+        relaxation.  */
+      if (ARM_FEATURE_CORE_EQUAL (cpu_variant, arm_arch_any))
+       variant = arm_arch_none;
+      else
+       variant = cpu_variant;
+      if (inst.size == 4 && !t32_insn_ok (variant, opcode))
         ARM_MERGE_FEATURE_SETS (thumb_arch_used, thumb_arch_used,
                                 arm_ext_v6t2);
  
         ARM_MERGE_FEATURE_SETS (thumb_arch_used, thumb_arch_used,
                                 arm_ext_v6t2);
  
@@ -17296,7 +18289,7 @@ md_assemble (char *str)
           && !(opcode->avariant &&
                ARM_CPU_HAS_FEATURE (cpu_variant, *opcode->avariant)))
         {
           && !(opcode->avariant &&
                ARM_CPU_HAS_FEATURE (cpu_variant, *opcode->avariant)))
         {
-         as_bad (_("selected processor does not support ARM mode `%s'"), str);
+         as_bad (_("selected processor does not support `%s' in ARM mode"), str);
           return;
         }
        if (inst.size_req)
           return;
         }
        if (inst.size_req)
@@ -17307,7 +18300,7 @@ md_assemble (char *str)
  
        inst.instruction = opcode->avalue;
        if (opcode->tag == OT_unconditionalF)
  
        inst.instruction = opcode->avalue;
        if (opcode->tag == OT_unconditionalF)
-       inst.instruction |= 0xF << 28;
+       inst.instruction |= 0xFU << 28;
        else
         inst.instruction |= inst.cond << 28;
        inst.size = INSN_SIZE;
        else
         inst.instruction |= inst.cond << 28;
        inst.size = INSN_SIZE;
@@ -17759,8 +18752,8 @@ static const struct asm_cond conds[] =
  };
  
  #define UL_BARRIER(L,U,CODE,FEAT) \
  };
  
  #define UL_BARRIER(L,U,CODE,FEAT) \
-  { L, CODE, ARM_FEATURE (FEAT, 0) }, \
-  { U, CODE, ARM_FEATURE (FEAT, 0) }
+  { L, CODE, ARM_FEATURE_CORE_LOW (FEAT) }, \
+  { U, CODE, ARM_FEATURE_CORE_LOW (FEAT) }
  
  static struct asm_barrier_opt barrier_opt_names[] =
  {
  
  static struct asm_barrier_opt barrier_opt_names[] =
  {
@@ -17963,8 +18956,8 @@ static struct asm_barrier_opt barrier_opt_names[] =
  
  static const struct asm_opcode insns[] =
  {
  
  static const struct asm_opcode insns[] =
  {
-#define ARM_VARIANT &arm_ext_v1 /* Core ARM Instructions.  */
-#define THUMB_VARIANT &arm_ext_v4t
+#define ARM_VARIANT    & arm_ext_v1 /* Core ARM Instructions.  */
+#define THUMB_VARIANT  & arm_ext_v4t
   tCE("and",    0000000, _and,     3, (RR, oRR, SH), arit, t_arit3c),
   tC3("ands",   0100000, _ands,    3, (RR, oRR, SH), arit, t_arit3c),
   tCE("eor",    0200000, _eor,     3, (RR, oRR, SH), arit, t_arit3c),
   tCE("and",    0000000, _and,     3, (RR, oRR, SH), arit, t_arit3c),
   tC3("ands",   0100000, _ands,    3, (RR, oRR, SH), arit, t_arit3c),
   tCE("eor",    0200000, _eor,     3, (RR, oRR, SH), arit, t_arit3c),
@@ -17996,7 +18989,7 @@ static const struct asm_opcode insns[] =
    CL("cmnp",   170f000,           2, (RR, SH),      cmp),
  
   tCE("mov",    1a00000, _mov,     2, (RR, SH),      mov,  t_mov_cmp),
    CL("cmnp",   170f000,           2, (RR, SH),      cmp),
  
   tCE("mov",    1a00000, _mov,     2, (RR, SH),      mov,  t_mov_cmp),
- tC3("movs",   1b00000, _movs,    2, (RR, SH),      mov,  t_mov_cmp),
+ tC3("movs",   1b00000, _movs,    2, (RR, SHG),     mov,  t_mov_cmp),
   tCE("mvn",    1e00000, _mvn,     2, (RR, SH),      mov,  t_mvn_tst),
   tC3("mvns",   1f00000, _mvns,    2, (RR, SH),      mov,  t_mvn_tst),
  
   tCE("mvn",    1e00000, _mvn,     2, (RR, SH),      mov,  t_mvn_tst),
   tC3("mvns",   1f00000, _mvns,    2, (RR, SH),      mov,  t_mvn_tst),
  
@@ -18023,6 +19016,7 @@ static const struct asm_opcode insns[] =
   tCE("adr",    28f0000, _adr,     2, (RR, EXP),     adr,  t_adr),
    C3(adrl,     28f0000,           2, (RR, EXP),     adrl),
   tCE("nop",    1a00000, _nop,     1, (oI255c),      nop,  t_nop),
   tCE("adr",    28f0000, _adr,     2, (RR, EXP),     adr,  t_adr),
    C3(adrl,     28f0000,           2, (RR, EXP),     adrl),
   tCE("nop",    1a00000, _nop,     1, (oI255c),      nop,  t_nop),
+ tCE("udf",    7f000f0, _udf,     1, (oIffffb),     bkpt, t_udf),
  
    /* Thumb-compatibility pseudo ops.  */
   tCE("lsl",    1a00000, _lsl,     3, (RR, oRR, SH), shift, t_shift),
  
    /* Thumb-compatibility pseudo ops.  */
   tCE("lsl",    1a00000, _lsl,     3, (RR, oRR, SH), shift, t_shift),
@@ -18173,9 +19167,9 @@ static const struct asm_opcode insns[] =
   TUF("mrc2",   e100010, fe100010, 6, (RCP, I7b, RR, RCN, RCN, oI7b),   co_reg, co_reg),
  
  #undef  ARM_VARIANT
   TUF("mrc2",   e100010, fe100010, 6, (RCP, I7b, RR, RCN, RCN, oI7b),   co_reg, co_reg),
  
  #undef  ARM_VARIANT
-#define ARM_VARIANT  & arm_ext_v5exp /*  ARM Architecture 5TExP.  */
-#undef THUMB_VARIANT
-#define THUMB_VARIANT &arm_ext_v5exp
+#define ARM_VARIANT    & arm_ext_v5exp /*  ARM Architecture 5TExP.  */
+#undef  THUMB_VARIANT
+#define THUMB_VARIANT  & arm_ext_v5exp
  
   TCE("smlabb", 1000080, fb100000, 4, (RRnpc, RRnpc, RRnpc, RRnpc),   smla, t_mla),
   TCE("smlatb", 10000a0, fb100020, 4, (RRnpc, RRnpc, RRnpc, RRnpc),   smla, t_mla),
  
   TCE("smlabb", 1000080, fb100000, 4, (RRnpc, RRnpc, RRnpc, RRnpc),   smla, t_mla),
   TCE("smlatb", 10000a0, fb100020, 4, (RRnpc, RRnpc, RRnpc, RRnpc),   smla, t_mla),
@@ -18204,9 +19198,9 @@ static const struct asm_opcode insns[] =
   TCE("qdsub",  1600050, fa80f0b0, 3, (RRnpc, RRnpc, RRnpc),        rd_rm_rn, t_simd2),
  
  #undef  ARM_VARIANT
   TCE("qdsub",  1600050, fa80f0b0, 3, (RRnpc, RRnpc, RRnpc),        rd_rm_rn, t_simd2),
  
  #undef  ARM_VARIANT
-#define ARM_VARIANT  & arm_ext_v5e /*  ARM Architecture 5TE.  */
-#undef THUMB_VARIANT
-#define THUMB_VARIANT &arm_ext_v6t2
+#define ARM_VARIANT    & arm_ext_v5e /*  ARM Architecture 5TE.  */
+#undef  THUMB_VARIANT
+#define THUMB_VARIANT  & arm_ext_v6t2
  
   TUF("pld",    450f000, f810f000, 1, (ADDR),                pld,  t_pld),
   TC3("ldrd",   00000d0, e8500000, 3, (RRnpc_npcsp, oRRnpc_npcsp, ADDRGLDRS),
  
   TUF("pld",    450f000, f810f000, 1, (ADDR),                pld,  t_pld),
   TC3("ldrd",   00000d0, e8500000, 3, (RRnpc_npcsp, oRRnpc_npcsp, ADDRGLDRS),
@@ -18239,11 +19233,14 @@ static const struct asm_opcode insns[] =
   TUF("setend",    1010000, b650,     1, (ENDI),                     setend, t_setend),
  
  #undef  THUMB_VARIANT
   TUF("setend",    1010000, b650,     1, (ENDI),                     setend, t_setend),
  
  #undef  THUMB_VARIANT
-#define THUMB_VARIANT  & arm_ext_v6t2
+#define THUMB_VARIANT  & arm_ext_v6t2_v8m
  
   TCE("ldrex",  1900f9f, e8500f00, 2, (RRnpc_npcsp, ADDR),        ldrex, t_ldrex),
   TCE("strex",  1800f90, e8400000, 3, (RRnpc_npcsp, RRnpc_npcsp, ADDR),
                                       strex,  t_strex),
  
   TCE("ldrex",  1900f9f, e8500f00, 2, (RRnpc_npcsp, ADDR),        ldrex, t_ldrex),
   TCE("strex",  1800f90, e8400000, 3, (RRnpc_npcsp, RRnpc_npcsp, ADDR),
                                       strex,  t_strex),
+#undef  THUMB_VARIANT
+#define THUMB_VARIANT  & arm_ext_v6t2
+
   TUF("mcrr2",  c400000, fc400000, 5, (RCP, I15b, RRnpc, RRnpc, RCN), co_reg2c, co_reg2c),
   TUF("mrrc2",  c500000, fc500000, 5, (RCP, I15b, RRnpc, RRnpc, RCN), co_reg2c, co_reg2c),
  
   TUF("mcrr2",  c400000, fc400000, 5, (RCP, I15b, RRnpc, RRnpc, RCN), co_reg2c, co_reg2c),
   TUF("mrrc2",  c500000, fc500000, 5, (RCP, I15b, RRnpc, RRnpc, RCN), co_reg2c, co_reg2c),
  
@@ -18271,51 +19268,51 @@ static const struct asm_opcode insns[] =
    UF(srsed,    8400500,           2, (oRRw, I31w),                srs),
   TUF("srsdb",  9400500, e800c000, 2, (oRRw, I31w),                srs,  srs),
   TUF("srsfd",  9400500, e800c000, 2, (oRRw, I31w),                srs,  srs),
    UF(srsed,    8400500,           2, (oRRw, I31w),                srs),
   TUF("srsdb",  9400500, e800c000, 2, (oRRw, I31w),                srs,  srs),
   TUF("srsfd",  9400500, e800c000, 2, (oRRw, I31w),                srs,  srs),
+ TUF("cps",    1020000, f3af8100, 1, (I31b),                     imm0, t_cps),
  
  /*  ARM V6 not included in V7M (eg. integer SIMD).  */
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT  & arm_ext_v6_dsp
  
  /*  ARM V6 not included in V7M (eg. integer SIMD).  */
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT  & arm_ext_v6_dsp
- TUF("cps",    1020000, f3af8100, 1, (I31b),                     imm0, t_cps),
   TCE("pkhbt",  6800010, eac00000, 4, (RRnpc, RRnpc, RRnpc, oSHll),   pkhbt, t_pkhbt),
   TCE("pkhtb",  6800050, eac00020, 4, (RRnpc, RRnpc, RRnpc, oSHar),   pkhtb, t_pkhtb),
   TCE("qadd16", 6200f10, fa90f010, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("qadd8",  6200f90, fa80f010, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("qasx",   6200f30, faa0f010, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   /* Old name for QASX.  */
   TCE("pkhbt",  6800010, eac00000, 4, (RRnpc, RRnpc, RRnpc, oSHll),   pkhbt, t_pkhbt),
   TCE("pkhtb",  6800050, eac00020, 4, (RRnpc, RRnpc, RRnpc, oSHar),   pkhtb, t_pkhtb),
   TCE("qadd16", 6200f10, fa90f010, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("qadd8",  6200f90, fa80f010, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("qasx",   6200f30, faa0f010, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   /* Old name for QASX.  */
- TCE("qaddsubx",       6200f30, faa0f010, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
+ TCE("qaddsubx",6200f30, faa0f010, 3, (RRnpc, RRnpc, RRnpc),      rd_rn_rm, t_simd),
   TCE("qsax",   6200f50, fae0f010, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   /* Old name for QSAX.  */
   TCE("qsax",   6200f50, fae0f010, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   /* Old name for QSAX.  */
- TCE("qsubaddx",       6200f50, fae0f010, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
+ TCE("qsubaddx",6200f50, fae0f010, 3, (RRnpc, RRnpc, RRnpc),      rd_rn_rm, t_simd),
   TCE("qsub16", 6200f70, fad0f010, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("qsub8",  6200ff0, fac0f010, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("sadd16", 6100f10, fa90f000, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("sadd8",  6100f90, fa80f000, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("sasx",   6100f30, faa0f000, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   /* Old name for SASX.  */
   TCE("qsub16", 6200f70, fad0f010, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("qsub8",  6200ff0, fac0f010, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("sadd16", 6100f10, fa90f000, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("sadd8",  6100f90, fa80f000, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("sasx",   6100f30, faa0f000, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   /* Old name for SASX.  */
- TCE("saddsubx",       6100f30, faa0f000, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
+ TCE("saddsubx",6100f30, faa0f000, 3, (RRnpc, RRnpc, RRnpc),      rd_rn_rm, t_simd),
   TCE("shadd16",        6300f10, fa90f020, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("shadd8", 6300f90, fa80f020, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("shadd16",        6300f10, fa90f020, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("shadd8", 6300f90, fa80f020, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
- TCE("shasx",     6300f30, faa0f020, 3, (RRnpc, RRnpc, RRnpc),    rd_rn_rm, t_simd),
+ TCE("shasx",   6300f30, faa0f020, 3, (RRnpc, RRnpc, RRnpc),      rd_rn_rm, t_simd),
   /* Old name for SHASX.  */
   TCE("shaddsubx", 6300f30, faa0f020, 3, (RRnpc, RRnpc, RRnpc),    rd_rn_rm, t_simd),
   /* Old name for SHASX.  */
   TCE("shaddsubx", 6300f30, faa0f020, 3, (RRnpc, RRnpc, RRnpc),    rd_rn_rm, t_simd),
- TCE("shsax",      6300f50, fae0f020, 3, (RRnpc, RRnpc, RRnpc),           rd_rn_rm, t_simd),
+ TCE("shsax",     6300f50, fae0f020, 3, (RRnpc, RRnpc, RRnpc),    rd_rn_rm, t_simd),
   /* Old name for SHSAX.  */
   TCE("shsubaddx", 6300f50, fae0f020, 3, (RRnpc, RRnpc, RRnpc),    rd_rn_rm, t_simd),
   TCE("shsub16",        6300f70, fad0f020, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("shsub8", 6300ff0, fac0f020, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("ssax",   6100f50, fae0f000, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   /* Old name for SSAX.  */
   /* Old name for SHSAX.  */
   TCE("shsubaddx", 6300f50, fae0f020, 3, (RRnpc, RRnpc, RRnpc),    rd_rn_rm, t_simd),
   TCE("shsub16",        6300f70, fad0f020, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("shsub8", 6300ff0, fac0f020, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("ssax",   6100f50, fae0f000, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   /* Old name for SSAX.  */
- TCE("ssubaddx",       6100f50, fae0f000, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
+ TCE("ssubaddx",6100f50, fae0f000, 3, (RRnpc, RRnpc, RRnpc),      rd_rn_rm, t_simd),
   TCE("ssub16", 6100f70, fad0f000, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("ssub8",  6100ff0, fac0f000, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("uadd16", 6500f10, fa90f040, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("uadd8",  6500f90, fa80f040, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("uasx",   6500f30, faa0f040, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   /* Old name for UASX.  */
   TCE("ssub16", 6100f70, fad0f000, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("ssub8",  6100ff0, fac0f000, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("uadd16", 6500f10, fa90f040, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("uadd8",  6500f90, fa80f040, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("uasx",   6500f30, faa0f040, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   /* Old name for UASX.  */
- TCE("uaddsubx",       6500f30, faa0f040, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
+ TCE("uaddsubx",6500f30, faa0f040, 3, (RRnpc, RRnpc, RRnpc),      rd_rn_rm, t_simd),
   TCE("uhadd16",        6700f10, fa90f060, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("uhadd8", 6700f90, fa80f060, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("uhadd16",        6700f10, fa90f060, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("uhadd8", 6700f90, fa80f060, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
- TCE("uhasx",     6700f30, faa0f060, 3, (RRnpc, RRnpc, RRnpc),    rd_rn_rm, t_simd),
+ TCE("uhasx",   6700f30, faa0f060, 3, (RRnpc, RRnpc, RRnpc),      rd_rn_rm, t_simd),
   /* Old name for UHASX.  */
   TCE("uhaddsubx", 6700f30, faa0f060, 3, (RRnpc, RRnpc, RRnpc),    rd_rn_rm, t_simd),
   TCE("uhsax",     6700f50, fae0f060, 3, (RRnpc, RRnpc, RRnpc),    rd_rn_rm, t_simd),
   /* Old name for UHASX.  */
   TCE("uhaddsubx", 6700f30, faa0f060, 3, (RRnpc, RRnpc, RRnpc),    rd_rn_rm, t_simd),
   TCE("uhsax",     6700f50, fae0f060, 3, (RRnpc, RRnpc, RRnpc),    rd_rn_rm, t_simd),
@@ -18325,7 +19322,7 @@ static const struct asm_opcode insns[] =
   TCE("uhsub8", 6700ff0, fac0f060, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("uqadd16",        6600f10, fa90f050, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("uqadd8", 6600f90, fa80f050, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("uhsub8", 6700ff0, fac0f060, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("uqadd16",        6600f10, fa90f050, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("uqadd8", 6600f90, fa80f050, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
- TCE("uqasx",     6600f30, faa0f050, 3, (RRnpc, RRnpc, RRnpc),    rd_rn_rm, t_simd),
+ TCE("uqasx",   6600f30, faa0f050, 3, (RRnpc, RRnpc, RRnpc),      rd_rn_rm, t_simd),
   /* Old name for UQASX.  */
   TCE("uqaddsubx", 6600f30, faa0f050, 3, (RRnpc, RRnpc, RRnpc),    rd_rn_rm, t_simd),
   TCE("uqsax",     6600f50, fae0f050, 3, (RRnpc, RRnpc, RRnpc),    rd_rn_rm, t_simd),
   /* Old name for UQASX.  */
   TCE("uqaddsubx", 6600f30, faa0f050, 3, (RRnpc, RRnpc, RRnpc),    rd_rn_rm, t_simd),
   TCE("uqsax",     6600f50, fae0f050, 3, (RRnpc, RRnpc, RRnpc),    rd_rn_rm, t_simd),
@@ -18336,7 +19333,7 @@ static const struct asm_opcode insns[] =
   TCE("usub16", 6500f70, fad0f040, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("usax",   6500f50, fae0f040, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   /* Old name for USAX.  */
   TCE("usub16", 6500f70, fad0f040, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("usax",   6500f50, fae0f040, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   /* Old name for USAX.  */
- TCE("usubaddx",       6500f50, fae0f040, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
+ TCE("usubaddx",6500f50, fae0f040, 3, (RRnpc, RRnpc, RRnpc),      rd_rn_rm, t_simd),
   TCE("usub8",  6500ff0, fac0f040, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("sxtah",  6b00070, fa00f080, 4, (RRnpc, RRnpc, RRnpc, oROR), sxtah, t_sxtah),
   TCE("sxtab16",        6800070, fa20f080, 4, (RRnpc, RRnpc, RRnpc, oROR), sxtah, t_sxtah),
   TCE("usub8",  6500ff0, fac0f040, 3, (RRnpc, RRnpc, RRnpc),       rd_rn_rm, t_simd),
   TCE("sxtah",  6b00070, fa00f080, 4, (RRnpc, RRnpc, RRnpc, oROR), sxtah, t_sxtah),
   TCE("sxtab16",        6800070, fa20f080, 4, (RRnpc, RRnpc, RRnpc, oROR), sxtah, t_sxtah),
@@ -18389,7 +19386,7 @@ static const struct asm_opcode insns[] =
                                        RRnpcb), strexd, t_strexd),
  
  #undef  THUMB_VARIANT
                                        RRnpcb), strexd, t_strexd),
  
  #undef  THUMB_VARIANT
-#define THUMB_VARIANT  & arm_ext_v6t2
+#define THUMB_VARIANT  & arm_ext_v6t2_v8m
   TCE("ldrexb", 1d00f9f, e8d00f4f, 2, (RRnpc_npcsp,RRnpcb),
       rd_rn,  rd_rn),
   TCE("ldrexh", 1f00f9f, e8d00f5f, 2, (RRnpc_npcsp, RRnpcb),
   TCE("ldrexb", 1d00f9f, e8d00f4f, 2, (RRnpc_npcsp,RRnpcb),
       rd_rn,  rd_rn),
   TCE("ldrexh", 1f00f9f, e8d00f5f, 2, (RRnpc_npcsp, RRnpcb),
@@ -18402,7 +19399,7 @@ static const struct asm_opcode insns[] =
  
  #undef  ARM_VARIANT
  #define ARM_VARIANT    & arm_ext_sec
  
  #undef  ARM_VARIANT
  #define ARM_VARIANT    & arm_ext_sec
-#undef THUMB_VARIANT
+#undef  THUMB_VARIANT
  #define THUMB_VARIANT  & arm_ext_sec
  
   TCE("smc",    1600070, f7f08000, 1, (EXPi), smc, t_smc),
  #define THUMB_VARIANT  & arm_ext_sec
  
   TCE("smc",    1600070, f7f08000, 1, (EXPi), smc, t_smc),
@@ -18415,8 +19412,15 @@ static const struct asm_opcode insns[] =
   TCE("hvc",    1400070, f7e08000, 1, (EXPi), hvc, t_hvc),
   TCE("eret",   160006e, f3de8f00, 0, (), noargs, noargs),
  
   TCE("hvc",    1400070, f7e08000, 1, (EXPi), hvc, t_hvc),
   TCE("eret",   160006e, f3de8f00, 0, (), noargs, noargs),
  
+#undef ARM_VARIANT
+#define        ARM_VARIANT    & arm_ext_pan
+#undef THUMB_VARIANT
+#define        THUMB_VARIANT  & arm_ext_pan
+
+ TUF("setpan", 1100000, b610, 1, (I7), setpan, t_setpan),
+
  #undef  ARM_VARIANT
  #undef  ARM_VARIANT
-#define ARM_VARIANT  & arm_ext_v6t2
+#define ARM_VARIANT    & arm_ext_v6t2
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT  & arm_ext_v6t2
  
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT  & arm_ext_v6t2
  
@@ -18426,8 +19430,6 @@ static const struct asm_opcode insns[] =
   TCE("ubfx",   7e00050, f3c00000, 4, (RR, RR, I31, I32),          bfx, t_bfx),
  
   TCE("mls",    0600090, fb000010, 4, (RRnpc, RRnpc, RRnpc, RRnpc), mlas, t_mla),
   TCE("ubfx",   7e00050, f3c00000, 4, (RR, RR, I31, I32),          bfx, t_bfx),
  
   TCE("mls",    0600090, fb000010, 4, (RRnpc, RRnpc, RRnpc, RRnpc), mlas, t_mla),
- TCE("movw",   3000000, f2400000, 2, (RRnpc, HALF),                mov16, t_mov16),
- TCE("movt",   3400000, f2c00000, 2, (RRnpc, HALF),                mov16, t_mov16),
   TCE("rbit",   6ff0f30, fa90f0a0, 2, (RR, RR),                     rd_rm, t_rbit),
  
   TC3("ldrht",  03000b0, f8300e00, 2, (RRnpc_npcsp, ADDR), ldsttv4, t_ldstt),
   TCE("rbit",   6ff0f30, fa90f0a0, 2, (RR, RR),                     rd_rm, t_rbit),
  
   TC3("ldrht",  03000b0, f8300e00, 2, (RRnpc_npcsp, ADDR), ldsttv4, t_ldstt),
@@ -18435,8 +19437,13 @@ static const struct asm_opcode insns[] =
   TC3("ldrsbt", 03000d0, f9100e00, 2, (RRnpc_npcsp, ADDR), ldsttv4, t_ldstt),
   TC3("strht",  02000b0, f8200e00, 2, (RRnpc_npcsp, ADDR), ldsttv4, t_ldstt),
  
   TC3("ldrsbt", 03000d0, f9100e00, 2, (RRnpc_npcsp, ADDR), ldsttv4, t_ldstt),
   TC3("strht",  02000b0, f8200e00, 2, (RRnpc_npcsp, ADDR), ldsttv4, t_ldstt),
  
+#undef  THUMB_VARIANT
+#define THUMB_VARIANT  & arm_ext_v6t2_v8m
+ TCE("movw",   3000000, f2400000, 2, (RRnpc, HALF),                mov16, t_mov16),
+ TCE("movt",   3400000, f2c00000, 2, (RRnpc, HALF),                mov16, t_mov16),
+
   /* Thumb-only instructions.  */
   /* Thumb-only instructions.  */
-#undef ARM_VARIANT
+#undef  ARM_VARIANT
  #define ARM_VARIANT NULL
    TUE("cbnz",     0,           b900,     2, (RR, EXP), 0, t_cbz),
    TUE("cbz",      0,           b100,     2, (RR, EXP), 0, t_cbz),
  #define ARM_VARIANT NULL
    TUE("cbnz",     0,           b900,     2, (RR, EXP), 0, t_cbz),
    TUE("cbz",      0,           b100,     2, (RR, EXP), 0, t_cbz),
@@ -18446,6 +19453,8 @@ static const struct asm_opcode insns[] =
      -mimplicit-it=[never | arm] modes.  */
  #undef  ARM_VARIANT
  #define ARM_VARIANT  & arm_ext_v1
      -mimplicit-it=[never | arm] modes.  */
  #undef  ARM_VARIANT
  #define ARM_VARIANT  & arm_ext_v1
+#undef  THUMB_VARIANT
+#define THUMB_VARIANT  & arm_ext_v6t2
  
   TUE("it",        bf08,        bf08,     1, (COND),   it,    t_it),
   TUE("itt",       bf0c,        bf0c,     1, (COND),   it,    t_it),
  
   TUE("it",        bf08,        bf08,     1, (COND),   it,    t_it),
   TUE("itt",       bf0c,        bf0c,     1, (COND),   it,    t_it),
@@ -18505,9 +19514,9 @@ static const struct asm_opcode insns[] =
   TUF("pli",    450f000, f910f000, 1, (ADDR),     pli,      t_pld),
   TCE("dbg",    320f0f0, f3af80f0, 1, (I15),      dbg,      t_dbg),
  
   TUF("pli",    450f000, f910f000, 1, (ADDR),     pli,      t_pld),
   TCE("dbg",    320f0f0, f3af80f0, 1, (I15),      dbg,      t_dbg),
  
-#undef ARM_VARIANT
+#undef  ARM_VARIANT
  #define ARM_VARIANT    & arm_ext_mp
  #define ARM_VARIANT    & arm_ext_mp
-#undef THUMB_VARIANT
+#undef  THUMB_VARIANT
  #define THUMB_VARIANT  & arm_ext_mp
  
   TUF("pldw",   410f000, f830f000, 1, (ADDR),   pld,    t_pld),
  #define THUMB_VARIANT  & arm_ext_mp
  
   TUF("pldw",   410f000, f830f000, 1, (ADDR),   pld,    t_pld),
@@ -18515,33 +19524,37 @@ static const struct asm_opcode insns[] =
   /* AArchv8 instructions.  */
  #undef  ARM_VARIANT
  #define ARM_VARIANT   & arm_ext_v8
   /* AArchv8 instructions.  */
  #undef  ARM_VARIANT
  #define ARM_VARIANT   & arm_ext_v8
+
+/* Instructions shared between armv8-a and armv8-m.  */
  #undef  THUMB_VARIANT
  #undef  THUMB_VARIANT
-#define THUMB_VARIANT & arm_ext_v8
+#define THUMB_VARIANT & arm_ext_atomics
  
  
- tCE("sevl",   320f005, _sevl,    0, (),               noargs, t_hint),
- TUE("hlt",    1000070, ba80,     1, (oIffffb),        bkpt,   t_hlt),
+ TCE("lda",    1900c9f, e8d00faf, 2, (RRnpc, RRnpcb),  rd_rn,  rd_rn),
+ TCE("ldab",   1d00c9f, e8d00f8f, 2, (RRnpc, RRnpcb),  rd_rn,  rd_rn),
+ TCE("ldah",   1f00c9f, e8d00f9f, 2, (RRnpc, RRnpcb),  rd_rn,  rd_rn),
+ TCE("stl",    180fc90, e8c00faf, 2, (RRnpc, RRnpcb),  rm_rn,  rd_rn),
+ TCE("stlb",   1c0fc90, e8c00f8f, 2, (RRnpc, RRnpcb),  rm_rn,  rd_rn),
+ TCE("stlh",   1e0fc90, e8c00f9f, 2, (RRnpc, RRnpcb),  rm_rn,  rd_rn),
   TCE("ldaex",  1900e9f, e8d00fef, 2, (RRnpc, RRnpcb),  rd_rn,  rd_rn),
   TCE("ldaex",  1900e9f, e8d00fef, 2, (RRnpc, RRnpcb),  rd_rn,  rd_rn),
- TCE("ldaexd", 1b00e9f, e8d000ff, 3, (RRnpc, oRRnpc, RRnpcb),
-                                                       ldrexd, t_ldrexd),
   TCE("ldaexb", 1d00e9f, e8d00fcf, 2, (RRnpc,RRnpcb),   rd_rn,  rd_rn),
   TCE("ldaexh", 1f00e9f, e8d00fdf, 2, (RRnpc, RRnpcb),  rd_rn,  rd_rn),
   TCE("stlex",  1800e90, e8c00fe0, 3, (RRnpc, RRnpc, RRnpcb),
                                                         stlex,  t_stlex),
   TCE("ldaexb", 1d00e9f, e8d00fcf, 2, (RRnpc,RRnpcb),   rd_rn,  rd_rn),
   TCE("ldaexh", 1f00e9f, e8d00fdf, 2, (RRnpc, RRnpcb),  rd_rn,  rd_rn),
   TCE("stlex",  1800e90, e8c00fe0, 3, (RRnpc, RRnpc, RRnpcb),
                                                         stlex,  t_stlex),
- TCE("stlexd", 1a00e90, e8c000f0, 4, (RRnpc, RRnpc, oRRnpc, RRnpcb),
-                                                       strexd, t_strexd),
   TCE("stlexb", 1c00e90, e8c00fc0, 3, (RRnpc, RRnpc, RRnpcb),
                                                         stlex, t_stlex),
   TCE("stlexh", 1e00e90, e8c00fd0, 3, (RRnpc, RRnpc, RRnpcb),
                                                         stlex, t_stlex),
   TCE("stlexb", 1c00e90, e8c00fc0, 3, (RRnpc, RRnpc, RRnpcb),
                                                         stlex, t_stlex),
   TCE("stlexh", 1e00e90, e8c00fd0, 3, (RRnpc, RRnpc, RRnpcb),
                                                         stlex, t_stlex),
- TCE("lda",    1900c9f, e8d00faf, 2, (RRnpc, RRnpcb),  rd_rn,  rd_rn),
- TCE("ldab",   1d00c9f, e8d00f8f, 2, (RRnpc, RRnpcb),  rd_rn,  rd_rn),
- TCE("ldah",   1f00c9f, e8d00f9f, 2, (RRnpc, RRnpcb),  rd_rn,  rd_rn),
- TCE("stl",    180fc90, e8c00faf, 2, (RRnpc, RRnpcb),  rm_rn,  rd_rn),
- TCE("stlb",   1c0fc90, e8c00f8f, 2, (RRnpc, RRnpcb),  rm_rn,  rd_rn),
- TCE("stlh",   1e0fc90, e8c00f9f, 2, (RRnpc, RRnpcb),  rm_rn,  rd_rn),
+#undef  THUMB_VARIANT
+#define THUMB_VARIANT & arm_ext_v8
  
  
+ tCE("sevl",   320f005, _sevl,    0, (),               noargs, t_hint),
+ TUE("hlt",    1000070, ba80,     1, (oIffffb),        bkpt,   t_hlt),
+ TCE("ldaexd", 1b00e9f, e8d000ff, 3, (RRnpc, oRRnpc, RRnpcb),
+                                                       ldrexd, t_ldrexd),
+ TCE("stlexd", 1a00e90, e8c000f0, 4, (RRnpc, RRnpc, oRRnpc, RRnpcb),
+                                                       strexd, t_strexd),
   /* ARMv8 T32 only.  */
   /* ARMv8 T32 only.  */
-#undef ARM_VARIANT
+#undef  ARM_VARIANT
  #define ARM_VARIANT  NULL
   TUF("dcps1",  0,       f78f8001, 0, (),       noargs, noargs),
   TUF("dcps2",  0,       f78f8002, 0, (),       noargs, noargs),
  #define ARM_VARIANT  NULL
   TUF("dcps1",  0,       f78f8001, 0, (),       noargs, noargs),
   TUF("dcps2",  0,       f78f8002, 0, (),       noargs, noargs),
@@ -18549,9 +19562,9 @@ static const struct asm_opcode insns[] =
  
    /* FP for ARMv8.  */
  #undef  ARM_VARIANT
  
    /* FP for ARMv8.  */
  #undef  ARM_VARIANT
-#define ARM_VARIANT & fpu_vfp_ext_armv8
+#define ARM_VARIANT   & fpu_vfp_ext_armv8xd
  #undef  THUMB_VARIANT
  #undef  THUMB_VARIANT
-#define THUMB_VARIANT & fpu_vfp_ext_armv8
+#define THUMB_VARIANT & fpu_vfp_ext_armv8xd
  
    nUF(vseleq, _vseleq, 3, (RVSD, RVSD, RVSD),          vsel),
    nUF(vselvs, _vselvs, 3, (RVSD, RVSD, RVSD),          vsel),
  
    nUF(vseleq, _vseleq, 3, (RVSD, RVSD, RVSD),          vsel),
    nUF(vselvs, _vselvs, 3, (RVSD, RVSD, RVSD),          vsel),
@@ -18593,7 +19606,7 @@ static const struct asm_opcode insns[] =
    nUF(sha256su0, _sha2op, 2, (RNQ, RNQ), sha256su0),
  
  #undef  ARM_VARIANT
    nUF(sha256su0, _sha2op, 2, (RNQ, RNQ), sha256su0),
  
  #undef  ARM_VARIANT
-#define ARM_VARIANT & crc_ext_armv8
+#define ARM_VARIANT   & crc_ext_armv8
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT & crc_ext_armv8
    TUEc("crc32b", 1000040, fac0f080, 3, (RR, oRR, RR), crc32b),
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT & crc_ext_armv8
    TUEc("crc32b", 1000040, fac0f080, 3, (RR, oRR, RR), crc32b),
@@ -18603,6 +19616,13 @@ static const struct asm_opcode insns[] =
    TUEc("crc32ch",1200240, fad0f090, 3, (RR, oRR, RR), crc32ch),
    TUEc("crc32cw",1400240, fad0f0a0, 3, (RR, oRR, RR), crc32cw),
  
    TUEc("crc32ch",1200240, fad0f090, 3, (RR, oRR, RR), crc32ch),
    TUEc("crc32cw",1400240, fad0f0a0, 3, (RR, oRR, RR), crc32cw),
  
+ /* ARMv8.2 RAS extension.  */
+#undef  ARM_VARIANT
+#define ARM_VARIANT   & arm_ext_v8_2
+#undef  THUMB_VARIANT
+#define THUMB_VARIANT & arm_ext_v8_2
+ TUE ("esb", 320f010, f3af8010, 0, (), noargs,  noargs),
+
  #undef  ARM_VARIANT
  #define ARM_VARIANT  & fpu_fpa_ext_v1  /* Core FPA instruction set (V1).  */
  #undef  THUMB_VARIANT
  #undef  ARM_VARIANT
  #define ARM_VARIANT  & fpu_fpa_ext_v1  /* Core FPA instruction set (V1).  */
  #undef  THUMB_VARIANT
@@ -19183,8 +20203,8 @@ static const struct asm_opcode insns[] =
   nCE(vnmul,     _vnmul,   3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
   nCE(vnmla,     _vnmla,   3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
   nCE(vnmls,     _vnmls,   3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
   nCE(vnmul,     _vnmul,   3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
   nCE(vnmla,     _vnmla,   3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
   nCE(vnmls,     _vnmls,   3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
- nCE(vcmp,      _vcmp,    2, (RVSD, RVSD_I0),    vfp_nsyn_cmp),
- nCE(vcmpe,     _vcmpe,   2, (RVSD, RVSD_I0),    vfp_nsyn_cmp),
+ nCE(vcmp,      _vcmp,    2, (RVSD, RSVD_FI0),    vfp_nsyn_cmp),
+ nCE(vcmpe,     _vcmpe,   2, (RVSD, RSVD_FI0),    vfp_nsyn_cmp),
   NCE(vpush,     0,       1, (VRSDLST),          vfp_nsyn_push),
   NCE(vpop,      0,       1, (VRSDLST),          vfp_nsyn_pop),
   NCE(vcvtz,     0,       2, (RVSD, RVSD),       vfp_nsyn_cvtz),
   NCE(vpush,     0,       1, (VRSDLST),          vfp_nsyn_push),
   NCE(vpop,      0,       1, (VRSDLST),          vfp_nsyn_pop),
   NCE(vcvtz,     0,       2, (RVSD, RVSD),       vfp_nsyn_cvtz),
@@ -19219,6 +20239,15 @@ static const struct asm_opcode insns[] =
   NCE(vmov,      0,       1, (VMOV), neon_mov),
   NCE(vmovq,     0,       1, (VMOV), neon_mov),
  
   NCE(vmov,      0,       1, (VMOV), neon_mov),
   NCE(vmovq,     0,       1, (VMOV), neon_mov),
  
+#undef  ARM_VARIANT
+#define ARM_VARIANT    & arm_ext_fp16
+#undef  THUMB_VARIANT
+#define THUMB_VARIANT  & arm_ext_fp16
+ /* New instructions added from v8.2, allowing the extraction and insertion of
+    the upper 16 bits of a 32-bit vector register.  */
+ NCE (vmovx,     eb00a40,       2, (RVS, RVS), neon_movhf),
+ NCE (vins,      eb00ac0,       2, (RVS, RVS), neon_movhf),
+
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT  & fpu_neon_ext_v1
  #undef  ARM_VARIANT
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT  & fpu_neon_ext_v1
  #undef  ARM_VARIANT
@@ -19320,6 +20349,11 @@ static const struct asm_opcode insns[] =
   NUF(vrecpsq,   0000f10,  3, (RNQ,  oRNQ,  RNQ),  neon_step),
   NUF(vrsqrts,   0200f10,  3, (RNDQ, oRNDQ, RNDQ), neon_step),
   NUF(vrsqrtsq,  0200f10,  3, (RNQ,  oRNQ,  RNQ),  neon_step),
   NUF(vrecpsq,   0000f10,  3, (RNQ,  oRNQ,  RNQ),  neon_step),
   NUF(vrsqrts,   0200f10,  3, (RNDQ, oRNDQ, RNDQ), neon_step),
   NUF(vrsqrtsq,  0200f10,  3, (RNQ,  oRNQ,  RNQ),  neon_step),
+ /* ARM v8.1 extension.  */
+ nUF (vqrdmlah,  _vqrdmlah, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qrdmlah),
+ nUF (vqrdmlahq, _vqrdmlah, 3, (RNQ,  oRNQ,  RNDQ_RNSC), neon_qrdmlah),
+ nUF (vqrdmlsh,  _vqrdmlsh, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qrdmlah),
+ nUF (vqrdmlshq, _vqrdmlsh, 3, (RNQ,  oRNQ,  RNDQ_RNSC), neon_qrdmlah),
  
    /* Two address, int/float. Types S8 S16 S32 F32.  */
   NUF(vabsq,     1b10300, 2, (RNQ,  RNQ),      neon_abs_neg),
  
    /* Two address, int/float. Types S8 S16 S32 F32.  */
   NUF(vabsq,     1b10300, 2, (RNQ,  RNQ),      neon_abs_neg),
@@ -19467,9 +20501,9 @@ static const struct asm_opcode insns[] =
   nUF(vst4,      _vst4,    2, (NSTRLST, ADDR),  neon_ldx_stx),
  
  #undef  THUMB_VARIANT
   nUF(vst4,      _vst4,    2, (NSTRLST, ADDR),  neon_ldx_stx),
  
  #undef  THUMB_VARIANT
-#define THUMB_VARIANT &fpu_vfp_ext_v3xd
-#undef ARM_VARIANT
-#define ARM_VARIANT &fpu_vfp_ext_v3xd
+#define THUMB_VARIANT & fpu_vfp_ext_v3xd
+#undef  ARM_VARIANT
+#define ARM_VARIANT   & fpu_vfp_ext_v3xd
   cCE("fconsts",   eb00a00, 2, (RVS, I255),      vfp_sp_const),
   cCE("fshtos",    eba0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
   cCE("fsltos",    eba0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
   cCE("fconsts",   eb00a00, 2, (RVS, I255),      vfp_sp_const),
   cCE("fshtos",    eba0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
   cCE("fsltos",    eba0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
@@ -19480,7 +20514,7 @@ static const struct asm_opcode insns[] =
   cCE("ftouhs",    ebf0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
   cCE("ftouls",    ebf0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
  
   cCE("ftouhs",    ebf0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
   cCE("ftouls",    ebf0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
  
-#undef THUMB_VARIANT
+#undef  THUMB_VARIANT
  #define THUMB_VARIANT  & fpu_vfp_ext_v3
  #undef  ARM_VARIANT
  #define ARM_VARIANT    & fpu_vfp_ext_v3
  #define THUMB_VARIANT  & fpu_vfp_ext_v3
  #undef  ARM_VARIANT
  #define ARM_VARIANT    & fpu_vfp_ext_v3
@@ -19495,10 +20529,10 @@ static const struct asm_opcode insns[] =
   cCE("ftouhd",    ebf0b40, 2, (RVD, I16z),      vfp_dp_conv_16),
   cCE("ftould",    ebf0bc0, 2, (RVD, I32),       vfp_dp_conv_32),
  
   cCE("ftouhd",    ebf0b40, 2, (RVD, I16z),      vfp_dp_conv_16),
   cCE("ftould",    ebf0bc0, 2, (RVD, I32),       vfp_dp_conv_32),
  
-#undef ARM_VARIANT
-#define ARM_VARIANT &fpu_vfp_ext_fma
-#undef THUMB_VARIANT
-#define THUMB_VARIANT &fpu_vfp_ext_fma
+#undef  ARM_VARIANT
+#define ARM_VARIANT    & fpu_vfp_ext_fma
+#undef  THUMB_VARIANT
+#define THUMB_VARIANT  & fpu_vfp_ext_fma
   /* Mnemonics shared by Neon and VFP.  These are included in the
      VFP FMA variant; NEON and VFP FMA always includes the NEON
      FMA instructions.  */
   /* Mnemonics shared by Neon and VFP.  These are included in the
      VFP FMA variant; NEON and VFP FMA always includes the NEON
      FMA instructions.  */
@@ -19538,12 +20572,12 @@ static const struct asm_opcode insns[] =
   cCE("textrcb",        e130170, 2, (RR, I7),               iwmmxt_textrc),
   cCE("textrch",        e530170, 2, (RR, I7),               iwmmxt_textrc),
   cCE("textrcw",        e930170, 2, (RR, I7),               iwmmxt_textrc),
   cCE("textrcb",        e130170, 2, (RR, I7),               iwmmxt_textrc),
   cCE("textrch",        e530170, 2, (RR, I7),               iwmmxt_textrc),
   cCE("textrcw",        e930170, 2, (RR, I7),               iwmmxt_textrc),
- cCE("textrmub",       e100070, 3, (RR, RIWR, I7),         iwmmxt_textrm),
- cCE("textrmuh",       e500070, 3, (RR, RIWR, I7),         iwmmxt_textrm),
- cCE("textrmuw",       e900070, 3, (RR, RIWR, I7),         iwmmxt_textrm),
- cCE("textrmsb",       e100078, 3, (RR, RIWR, I7),         iwmmxt_textrm),
- cCE("textrmsh",       e500078, 3, (RR, RIWR, I7),         iwmmxt_textrm),
- cCE("textrmsw",       e900078, 3, (RR, RIWR, I7),         iwmmxt_textrm),
+ cCE("textrmub",e100070, 3, (RR, RIWR, I7),        iwmmxt_textrm),
+ cCE("textrmuh",e500070, 3, (RR, RIWR, I7),        iwmmxt_textrm),
+ cCE("textrmuw",e900070, 3, (RR, RIWR, I7),        iwmmxt_textrm),
+ cCE("textrmsb",e100078, 3, (RR, RIWR, I7),        iwmmxt_textrm),
+ cCE("textrmsh",e500078, 3, (RR, RIWR, I7),        iwmmxt_textrm),
+ cCE("textrmsw",e900078, 3, (RR, RIWR, I7),        iwmmxt_textrm),
   cCE("tinsrb", e600010, 3, (RIWR, RR, I7),         iwmmxt_tinsr),
   cCE("tinsrh", e600050, 3, (RIWR, RR, I7),         iwmmxt_tinsr),
   cCE("tinsrw", e600090, 3, (RIWR, RR, I7),         iwmmxt_tinsr),
   cCE("tinsrb", e600010, 3, (RIWR, RR, I7),         iwmmxt_tinsr),
   cCE("tinsrh", e600050, 3, (RIWR, RR, I7),         iwmmxt_tinsr),
   cCE("tinsrw", e600090, 3, (RIWR, RR, I7),         iwmmxt_tinsr),
@@ -19555,9 +20589,9 @@ static const struct asm_opcode insns[] =
   cCE("tmiabt", e2d0010, 3, (RIWR, RR, RR),         iwmmxt_tmia),
   cCE("tmiatb", e2e0010, 3, (RIWR, RR, RR),         iwmmxt_tmia),
   cCE("tmiatt", e2f0010, 3, (RIWR, RR, RR),         iwmmxt_tmia),
   cCE("tmiabt", e2d0010, 3, (RIWR, RR, RR),         iwmmxt_tmia),
   cCE("tmiatb", e2e0010, 3, (RIWR, RR, RR),         iwmmxt_tmia),
   cCE("tmiatt", e2f0010, 3, (RIWR, RR, RR),         iwmmxt_tmia),
- cCE("tmovmskb",       e100030, 2, (RR, RIWR),             rd_rn),
- cCE("tmovmskh",       e500030, 2, (RR, RIWR),             rd_rn),
- cCE("tmovmskw",       e900030, 2, (RR, RIWR),             rd_rn),
+ cCE("tmovmskb",e100030, 2, (RR, RIWR),                    rd_rn),
+ cCE("tmovmskh",e500030, 2, (RR, RIWR),                    rd_rn),
+ cCE("tmovmskw",e900030, 2, (RR, RIWR),                    rd_rn),
   cCE("tmrc",   e100110, 2, (RR, RIWC_RIWG),        rd_rn),
   cCE("tmrrc",  c500000, 3, (RR, RR, RIWR),         rd_rn_rm),
   cCE("torcb",  e13f150, 1, (RR),                   iwmmxt_tandorc),
   cCE("tmrc",   e100110, 2, (RR, RIWC_RIWG),        rd_rn),
   cCE("tmrrc",  c500000, 3, (RR, RR, RIWR),         rd_rn_rm),
   cCE("torcb",  e13f150, 1, (RR),                   iwmmxt_tandorc),
@@ -19576,10 +20610,10 @@ static const struct asm_opcode insns[] =
   cCE("waddw",  e800180, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
   cCE("waddwus",        e900180, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
   cCE("waligni",        e000020, 4, (RIWR, RIWR, RIWR, I7), iwmmxt_waligni),
   cCE("waddw",  e800180, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
   cCE("waddwus",        e900180, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
   cCE("waligni",        e000020, 4, (RIWR, RIWR, RIWR, I7), iwmmxt_waligni),
- cCE("walignr0",       e800020, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
- cCE("walignr1",       e900020, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
- cCE("walignr2",       ea00020, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
- cCE("walignr3",       eb00020, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
+ cCE("walignr0",e800020, 3, (RIWR, RIWR, RIWR),            rd_rn_rm),
+ cCE("walignr1",e900020, 3, (RIWR, RIWR, RIWR),            rd_rn_rm),
+ cCE("walignr2",ea00020, 3, (RIWR, RIWR, RIWR),            rd_rn_rm),
+ cCE("walignr3",eb00020, 3, (RIWR, RIWR, RIWR),            rd_rn_rm),
   cCE("wand",   e200000, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
   cCE("wandn",  e300000, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
   cCE("wavg2b", e800000, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
   cCE("wand",   e200000, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
   cCE("wandn",  e300000, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
   cCE("wavg2b", e800000, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
@@ -19589,12 +20623,12 @@ static const struct asm_opcode insns[] =
   cCE("wcmpeqb",        e000060, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
   cCE("wcmpeqh",        e400060, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
   cCE("wcmpeqw",        e800060, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
   cCE("wcmpeqb",        e000060, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
   cCE("wcmpeqh",        e400060, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
   cCE("wcmpeqw",        e800060, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
- cCE("wcmpgtub",       e100060, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
- cCE("wcmpgtuh",       e500060, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
- cCE("wcmpgtuw",       e900060, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
- cCE("wcmpgtsb",       e300060, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
- cCE("wcmpgtsh",       e700060, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
- cCE("wcmpgtsw",       eb00060, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
+ cCE("wcmpgtub",e100060, 3, (RIWR, RIWR, RIWR),            rd_rn_rm),
+ cCE("wcmpgtuh",e500060, 3, (RIWR, RIWR, RIWR),            rd_rn_rm),
+ cCE("wcmpgtuw",e900060, 3, (RIWR, RIWR, RIWR),            rd_rn_rm),
+ cCE("wcmpgtsb",e300060, 3, (RIWR, RIWR, RIWR),            rd_rn_rm),
+ cCE("wcmpgtsh",e700060, 3, (RIWR, RIWR, RIWR),            rd_rn_rm),
+ cCE("wcmpgtsw",eb00060, 3, (RIWR, RIWR, RIWR),            rd_rn_rm),
   cCE("wldrb",  c100000, 2, (RIWR, ADDR),           iwmmxt_wldstbh),
   cCE("wldrh",  c500000, 2, (RIWR, ADDR),           iwmmxt_wldstbh),
   cCE("wldrw",  c100100, 2, (RIWR_RIWC, ADDR),      iwmmxt_wldstw),
   cCE("wldrb",  c100000, 2, (RIWR, ADDR),           iwmmxt_wldstbh),
   cCE("wldrh",  c500000, 2, (RIWR, ADDR),           iwmmxt_wldstbh),
   cCE("wldrw",  c100100, 2, (RIWR_RIWC, ADDR),      iwmmxt_wldstw),
@@ -19623,12 +20657,12 @@ static const struct asm_opcode insns[] =
   cCE("wmulum", e100100, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
   cCE("wmulul", e000100, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
   cCE("wor",    e000000, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
   cCE("wmulum", e100100, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
   cCE("wmulul", e000100, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
   cCE("wor",    e000000, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
- cCE("wpackhss",       e700080, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
- cCE("wpackhus",       e500080, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
- cCE("wpackwss",       eb00080, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
- cCE("wpackwus",       e900080, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
- cCE("wpackdss",       ef00080, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
- cCE("wpackdus",       ed00080, 3, (RIWR, RIWR, RIWR),     rd_rn_rm),
+ cCE("wpackhss",e700080, 3, (RIWR, RIWR, RIWR),            rd_rn_rm),
+ cCE("wpackhus",e500080, 3, (RIWR, RIWR, RIWR),            rd_rn_rm),
+ cCE("wpackwss",eb00080, 3, (RIWR, RIWR, RIWR),            rd_rn_rm),
+ cCE("wpackwus",e900080, 3, (RIWR, RIWR, RIWR),            rd_rn_rm),
+ cCE("wpackdss",ef00080, 3, (RIWR, RIWR, RIWR),            rd_rn_rm),
+ cCE("wpackdus",ed00080, 3, (RIWR, RIWR, RIWR),            rd_rn_rm),
   cCE("wrorh",  e700040, 3, (RIWR, RIWR, RIWR_I32z),iwmmxt_wrwrwr_or_imm5),
   cCE("wrorhg", e700148, 3, (RIWR, RIWR, RIWG),     rd_rn_rm),
   cCE("wrorw",  eb00040, 3, (RIWR, RIWR, RIWR_I32z),iwmmxt_wrwrwr_or_imm5),
   cCE("wrorh",  e700040, 3, (RIWR, RIWR, RIWR_I32z),iwmmxt_wrwrwr_or_imm5),
   cCE("wrorhg", e700148, 3, (RIWR, RIWR, RIWG),     rd_rn_rm),
   cCE("wrorw",  eb00040, 3, (RIWR, RIWR, RIWR_I32z),iwmmxt_wrwrwr_or_imm5),
@@ -19770,36 +20804,36 @@ static const struct asm_opcode insns[] =
   cCE("cfmvrdl",        e100410, 2, (RR, RMD),                rd_rn),
   cCE("cfmvdhr",        e000430, 2, (RMD, RR),                rn_rd),
   cCE("cfmvrdh",        e100430, 2, (RR, RMD),                rd_rn),
   cCE("cfmvrdl",        e100410, 2, (RR, RMD),                rd_rn),
   cCE("cfmvdhr",        e000430, 2, (RMD, RR),                rn_rd),
   cCE("cfmvrdh",        e100430, 2, (RR, RMD),                rd_rn),
- cCE("cfmv64lr",       e000510, 2, (RMDX, RR),               rn_rd),
- cCE("cfmvr64l",       e100510, 2, (RR, RMDX),               rd_rn),
- cCE("cfmv64hr",       e000530, 2, (RMDX, RR),               rn_rd),
- cCE("cfmvr64h",       e100530, 2, (RR, RMDX),               rd_rn),
- cCE("cfmval32",       e200440, 2, (RMAX, RMFX),             rd_rn),
- cCE("cfmv32al",       e100440, 2, (RMFX, RMAX),             rd_rn),
- cCE("cfmvam32",       e200460, 2, (RMAX, RMFX),             rd_rn),
- cCE("cfmv32am",       e100460, 2, (RMFX, RMAX),             rd_rn),
- cCE("cfmvah32",       e200480, 2, (RMAX, RMFX),             rd_rn),
- cCE("cfmv32ah",       e100480, 2, (RMFX, RMAX),             rd_rn),
+ cCE("cfmv64lr",e000510, 2, (RMDX, RR),                      rn_rd),
+ cCE("cfmvr64l",e100510, 2, (RR, RMDX),                      rd_rn),
+ cCE("cfmv64hr",e000530, 2, (RMDX, RR),                      rn_rd),
+ cCE("cfmvr64h",e100530, 2, (RR, RMDX),                      rd_rn),
+ cCE("cfmval32",e200440, 2, (RMAX, RMFX),            rd_rn),
+ cCE("cfmv32al",e100440, 2, (RMFX, RMAX),            rd_rn),
+ cCE("cfmvam32",e200460, 2, (RMAX, RMFX),            rd_rn),
+ cCE("cfmv32am",e100460, 2, (RMFX, RMAX),            rd_rn),
+ cCE("cfmvah32",e200480, 2, (RMAX, RMFX),            rd_rn),
+ cCE("cfmv32ah",e100480, 2, (RMFX, RMAX),            rd_rn),
   cCE("cfmva32",        e2004a0, 2, (RMAX, RMFX),             rd_rn),
   cCE("cfmv32a",        e1004a0, 2, (RMFX, RMAX),             rd_rn),
   cCE("cfmva64",        e2004c0, 2, (RMAX, RMDX),             rd_rn),
   cCE("cfmv64a",        e1004c0, 2, (RMDX, RMAX),             rd_rn),
   cCE("cfmva32",        e2004a0, 2, (RMAX, RMFX),             rd_rn),
   cCE("cfmv32a",        e1004a0, 2, (RMFX, RMAX),             rd_rn),
   cCE("cfmva64",        e2004c0, 2, (RMAX, RMDX),             rd_rn),
   cCE("cfmv64a",        e1004c0, 2, (RMDX, RMAX),             rd_rn),
- cCE("cfmvsc32",       e2004e0, 2, (RMDS, RMDX),             mav_dspsc),
- cCE("cfmv32sc",       e1004e0, 2, (RMDX, RMDS),             rd),
+ cCE("cfmvsc32",e2004e0, 2, (RMDS, RMDX),            mav_dspsc),
+ cCE("cfmv32sc",e1004e0, 2, (RMDX, RMDS),            rd),
   cCE("cfcpys", e000400, 2, (RMF, RMF),               rd_rn),
   cCE("cfcpyd", e000420, 2, (RMD, RMD),               rd_rn),
   cCE("cfcvtsd",        e000460, 2, (RMD, RMF),               rd_rn),
   cCE("cfcvtds",        e000440, 2, (RMF, RMD),               rd_rn),
   cCE("cfcpys", e000400, 2, (RMF, RMF),               rd_rn),
   cCE("cfcpyd", e000420, 2, (RMD, RMD),               rd_rn),
   cCE("cfcvtsd",        e000460, 2, (RMD, RMF),               rd_rn),
   cCE("cfcvtds",        e000440, 2, (RMF, RMD),               rd_rn),
- cCE("cfcvt32s",       e000480, 2, (RMF, RMFX),              rd_rn),
- cCE("cfcvt32d",       e0004a0, 2, (RMD, RMFX),              rd_rn),
- cCE("cfcvt64s",       e0004c0, 2, (RMF, RMDX),              rd_rn),
- cCE("cfcvt64d",       e0004e0, 2, (RMD, RMDX),              rd_rn),
- cCE("cfcvts32",       e100580, 2, (RMFX, RMF),              rd_rn),
- cCE("cfcvtd32",       e1005a0, 2, (RMFX, RMD),              rd_rn),
+ cCE("cfcvt32s",e000480, 2, (RMF, RMFX),             rd_rn),
+ cCE("cfcvt32d",e0004a0, 2, (RMD, RMFX),             rd_rn),
+ cCE("cfcvt64s",e0004c0, 2, (RMF, RMDX),             rd_rn),
+ cCE("cfcvt64d",e0004e0, 2, (RMD, RMDX),             rd_rn),
+ cCE("cfcvts32",e100580, 2, (RMFX, RMF),             rd_rn),
+ cCE("cfcvtd32",e1005a0, 2, (RMFX, RMD),             rd_rn),
   cCE("cftruncs32",e1005c0, 2, (RMFX, RMF),           rd_rn),
   cCE("cftruncd32",e1005e0, 2, (RMFX, RMD),           rd_rn),
   cCE("cftruncs32",e1005c0, 2, (RMFX, RMF),           rd_rn),
   cCE("cftruncd32",e1005e0, 2, (RMFX, RMD),           rd_rn),
- cCE("cfrshl32",       e000550, 3, (RMFX, RMFX, RR),         mav_triple),
- cCE("cfrshl64",       e000570, 3, (RMDX, RMDX, RR),         mav_triple),
+ cCE("cfrshl32",e000550, 3, (RMFX, RMFX, RR),        mav_triple),
+ cCE("cfrshl64",e000570, 3, (RMDX, RMDX, RR),        mav_triple),
   cCE("cfsh32", e000500, 3, (RMFX, RMFX, I63s),       mav_shift),
   cCE("cfsh64", e200500, 3, (RMDX, RMDX, I63s),       mav_shift),
   cCE("cfcmps", e100490, 3, (RR, RMF, RMF),           rd_rn_rm),
   cCE("cfsh32", e000500, 3, (RMFX, RMFX, I63s),       mav_shift),
   cCE("cfsh64", e200500, 3, (RMDX, RMDX, I63s),       mav_shift),
   cCE("cfcmps", e100490, 3, (RR, RMF, RMF),           rd_rn_rm),
@@ -19828,10 +20862,17 @@ static const struct asm_opcode insns[] =
   cCE("cfmul64",        e100520, 3, (RMDX, RMDX, RMDX),       rd_rn_rm),
   cCE("cfmac32",        e100540, 3, (RMFX, RMFX, RMFX),       rd_rn_rm),
   cCE("cfmsc32",        e100560, 3, (RMFX, RMFX, RMFX),       rd_rn_rm),
   cCE("cfmul64",        e100520, 3, (RMDX, RMDX, RMDX),       rd_rn_rm),
   cCE("cfmac32",        e100540, 3, (RMFX, RMFX, RMFX),       rd_rn_rm),
   cCE("cfmsc32",        e100560, 3, (RMFX, RMFX, RMFX),       rd_rn_rm),
- cCE("cfmadd32",       e000600, 4, (RMAX, RMFX, RMFX, RMFX), mav_quad),
- cCE("cfmsub32",       e100600, 4, (RMAX, RMFX, RMFX, RMFX), mav_quad),
+ cCE("cfmadd32",e000600, 4, (RMAX, RMFX, RMFX, RMFX), mav_quad),
+ cCE("cfmsub32",e100600, 4, (RMAX, RMFX, RMFX, RMFX), mav_quad),
   cCE("cfmadda32", e200600, 4, (RMAX, RMAX, RMFX, RMFX), mav_quad),
   cCE("cfmsuba32", e300600, 4, (RMAX, RMAX, RMFX, RMFX), mav_quad),
   cCE("cfmadda32", e200600, 4, (RMAX, RMAX, RMFX, RMFX), mav_quad),
   cCE("cfmsuba32", e300600, 4, (RMAX, RMAX, RMFX, RMFX), mav_quad),
+
+#undef  ARM_VARIANT
+#define ARM_VARIANT NULL
+#undef  THUMB_VARIANT
+#define THUMB_VARIANT & arm_ext_v8m
+ TUE("tt", 0, e840f000, 2, (RRnpc, RRnpc), 0, tt),
+ TUE("ttt", 0, e840f040, 2, (RRnpc, RRnpc), 0, tt),
  };
  #undef ARM_VARIANT
  #undef THUMB_VARIANT
  };
  #undef ARM_VARIANT
  #undef THUMB_VARIANT
@@ -20104,6 +21145,11 @@ md_convert_frag (bfd *abfd, segT asec ATTRIBUTE_UNUSED, fragS *fragp)
    fixp->fx_file = fragp->fr_file;
    fixp->fx_line = fragp->fr_line;
    fragp->fr_fix += fragp->fr_var;
    fixp->fx_file = fragp->fr_file;
    fixp->fx_line = fragp->fr_line;
    fragp->fr_fix += fragp->fr_var;
+
+  /* Set whether we use thumb-2 ISA based on final relaxation results.  */
+  if (thumb_mode && fragp->fr_var == 4 && no_cpu_selected ()
+      && !ARM_CPU_HAS_FEATURE (thumb_arch_used, arm_arch_t2))
+    ARM_MERGE_FEATURE_SETS (arm_arch_used, thumb_arch_used, arm_ext_v6t2);
  }
  
  /* Return the size of a relaxable immediate operand instruction.
  }
  
  /* Return the size of a relaxable immediate operand instruction.
@@ -20381,7 +21427,7 @@ md_section_align (segT   segment ATTRIBUTE_UNUSED,
        int align;
  
        align = bfd_get_section_alignment (stdoutput, segment);
        int align;
  
        align = bfd_get_section_alignment (stdoutput, segment);
-      size = ((size + (1 << align) - 1) & ((valueT) -1 << align));
+      size = ((size + (1 << align) - 1) & (-((valueT) 1 << align)));
      }
  #endif
  
      }
  #endif
  
@@ -20444,7 +21490,8 @@ arm_handle_align (fragS * fragP)
  
    if (fragP->tc_frag_data.thumb_mode & (~ MODE_RECORDED))
      {
  
    if (fragP->tc_frag_data.thumb_mode & (~ MODE_RECORDED))
      {
-      if (ARM_CPU_HAS_FEATURE (selected_cpu, arm_ext_v6t2))
+      if (ARM_CPU_HAS_FEATURE (selected_cpu_name[0]
+                              ? selected_cpu : arm_arch_none, arm_ext_v6t2))
         {
           narrow_noop = thumb_noop[1][target_big_endian];
           noop = wide_thumb_noop[target_big_endian];
         {
           narrow_noop = thumb_noop[1][target_big_endian];
           noop = wide_thumb_noop[target_big_endian];
@@ -20458,7 +21505,9 @@ arm_handle_align (fragS * fragP)
      }
    else
      {
      }
    else
      {
-      noop = arm_noop[ARM_CPU_HAS_FEATURE (selected_cpu, arm_ext_v6k) != 0]
+      noop = arm_noop[ARM_CPU_HAS_FEATURE (selected_cpu_name[0]
+                                          ? selected_cpu : arm_arch_none,
+                                          arm_ext_v6k) != 0]
                      [target_big_endian];
        noop_size = 4;
  #ifdef OBJ_ELF
                      [target_big_endian];
        noop_size = 4;
  #ifdef OBJ_ELF
@@ -20553,27 +21602,29 @@ arm_init_frag (fragS * fragP, int max_chars ATTRIBUTE_UNUSED)
  void
  arm_init_frag (fragS * fragP, int max_chars)
  {
  void
  arm_init_frag (fragS * fragP, int max_chars)
  {
+  int frag_thumb_mode;
+
    /* If the current ARM vs THUMB mode has not already
       been recorded into this frag then do so now.  */
    if ((fragP->tc_frag_data.thumb_mode & MODE_RECORDED) == 0)
    /* If the current ARM vs THUMB mode has not already
       been recorded into this frag then do so now.  */
    if ((fragP->tc_frag_data.thumb_mode & MODE_RECORDED) == 0)
-    {
-      fragP->tc_frag_data.thumb_mode = thumb_mode | MODE_RECORDED;
+    fragP->tc_frag_data.thumb_mode = thumb_mode | MODE_RECORDED;
  
  
-      /* Record a mapping symbol for alignment frags.  We will delete this
-        later if the alignment ends up empty.  */
-      switch (fragP->fr_type)
-       {
-         case rs_align:
-         case rs_align_test:
-         case rs_fill:
-           mapping_state_2 (MAP_DATA, max_chars);
-           break;
-         case rs_align_code:
-           mapping_state_2 (thumb_mode ? MAP_THUMB : MAP_ARM, max_chars);
-           break;
-         default:
-           break;
-       }
+  frag_thumb_mode = fragP->tc_frag_data.thumb_mode ^ MODE_RECORDED;
+
+  /* Record a mapping symbol for alignment frags.  We will delete this
+     later if the alignment ends up empty.  */
+  switch (fragP->fr_type)
+    {
+    case rs_align:
+    case rs_align_test:
+    case rs_fill:
+      mapping_state_2 (MAP_DATA, max_chars);
+      break;
+    case rs_align_code:
+      mapping_state_2 (frag_thumb_mode ? MAP_THUMB : MAP_ARM, max_chars);
+      break;
+    default:
+      break;
      }
  }
  
      }
  }
  
@@ -20807,7 +21858,7 @@ start_unwind_section (const segT text_seg, int idx)
  
  /* Start an unwind table entry.         HAVE_DATA is nonzero if we have additional
     personality routine data.  Returns zero, or the index table value for
  
  /* Start an unwind table entry.         HAVE_DATA is nonzero if we have additional
     personality routine data.  Returns zero, or the index table value for
-   and inline entry.  */
+   an inline entry.  */
  
  static valueT
  create_unwind_entry (int have_data)
  
  static valueT
  create_unwind_entry (int have_data)
@@ -20878,7 +21929,12 @@ create_unwind_entry (int have_data)
      }
    else
      {
      }
    else
      {
-      gas_assert (unwind.personality_index == -1);
+      /* PR 16765: Missing or misplaced unwind directives can trigger this.  */
+      if (unwind.personality_index != -1)
+       {
+         as_bad (_("attempt to recreate an unwind entry"));
+         return 1;
+       }
  
        /* An extra byte is required for the opcode count.       */
        size = unwind.opcode_count + 1;
  
        /* An extra byte is required for the opcode count.       */
        size = unwind.opcode_count + 1;
@@ -20985,11 +22041,19 @@ int
  tc_arm_regname_to_dw2regnum (char *regname)
  {
    int reg = arm_reg_parse (&regname, REG_TYPE_RN);
  tc_arm_regname_to_dw2regnum (char *regname)
  {
    int reg = arm_reg_parse (&regname, REG_TYPE_RN);
+  if (reg != FAIL)
+    return reg;
  
  
-  if (reg == FAIL)
-    return -1;
+  /* PR 16694: Allow VFP registers as well.  */
+  reg = arm_reg_parse (&regname, REG_TYPE_VFS);
+  if (reg != FAIL)
+    return 64 + reg;
  
  
-  return reg;
+  reg = arm_reg_parse (&regname, REG_TYPE_VFD);
+  if (reg != FAIL)
+    return reg + 256;
+
+  return -1;
  }
  
  #ifdef TE_PE
  }
  
  #ifdef TE_PE
@@ -21135,6 +22199,51 @@ md_pcrel_from_section (fixS * fixP, segT seg)
      }
  }
  
      }
  }
  
+static bfd_boolean flag_warn_syms = TRUE;
+
+bfd_boolean
+arm_tc_equal_in_insn (int c ATTRIBUTE_UNUSED, char * name)
+{
+  /* PR 18347 - Warn if the user attempts to create a symbol with the same
+     name as an ARM instruction.  Whilst strictly speaking it is allowed, it
+     does mean that the resulting code might be very confusing to the reader.
+     Also this warning can be triggered if the user omits an operand before
+     an immediate address, eg:
+
+       LDR =foo
+
+     GAS treats this as an assignment of the value of the symbol foo to a
+     symbol LDR, and so (without this code) it will not issue any kind of
+     warning or error message.
+
+     Note - ARM instructions are case-insensitive but the strings in the hash
+     table are all stored in lower case, so we must first ensure that name is
+     lower case too.  */
+  if (flag_warn_syms && arm_ops_hsh)
+    {
+      char * nbuf = strdup (name);
+      char * p;
+
+      for (p = nbuf; *p; p++)
+       *p = TOLOWER (*p);
+      if (hash_find (arm_ops_hsh, nbuf) != NULL)
+       {
+         static struct hash_control * already_warned = NULL;
+
+         if (already_warned == NULL)
+           already_warned = hash_new ();
+         /* Only warn about the symbol once.  To keep the code
+            simple we let hash_insert do the lookup for us.  */
+         if (hash_insert (already_warned, name, NULL) == NULL)
+           as_warn (_("[-mwarn-syms]: Assignment makes a symbol match an ARM instruction: %s"), name);
+       }
+      else
+       free (nbuf);
+    }
+
+  return FALSE;
+}
+
  /* Under ELF we need to default _GLOBAL_OFFSET_TABLE.
     Otherwise we have no need to default values of symbols.  */
  
  /* Under ELF we need to default _GLOBAL_OFFSET_TABLE.
     Otherwise we have no need to default values of symbols.  */
  
@@ -22160,7 +23269,7 @@ md_apply_fix (fixS *    fixP,
  
        if ((value & ~0x3fffff) && ((value & ~0x3fffff) != ~0x3fffff))
         {
  
        if ((value & ~0x3fffff) && ((value & ~0x3fffff) != ~0x3fffff))
         {
-         if (!(ARM_CPU_HAS_FEATURE (cpu_variant, arm_arch_t2)))
+         if (!(ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v6t2)))
             as_bad_where (fixP->fx_file, fixP->fx_line, BAD_RANGE);
           else if ((value & ~0x1ffffff)
                    && ((value & ~0x1ffffff) != ~0x1ffffff))
             as_bad_where (fixP->fx_file, fixP->fx_line, BAD_RANGE);
           else if ((value & ~0x1ffffff)
                    && ((value & ~0x1ffffff) != ~0x1ffffff))
@@ -22184,7 +23293,7 @@ md_apply_fix (fixS *    fixP,
  
      case BFD_RELOC_8:
        if (fixP->fx_done || !seg->use_rela_p)
  
      case BFD_RELOC_8:
        if (fixP->fx_done || !seg->use_rela_p)
-       md_number_to_chars (buf, value, 1);
+       *buf = value;
        break;
  
      case BFD_RELOC_16:
        break;
  
      case BFD_RELOC_16:
@@ -22197,9 +23306,6 @@ md_apply_fix (fixS *    fixP,
      case BFD_RELOC_ARM_THM_TLS_CALL:
      case BFD_RELOC_ARM_TLS_DESCSEQ:
      case BFD_RELOC_ARM_THM_TLS_DESCSEQ:
      case BFD_RELOC_ARM_THM_TLS_CALL:
      case BFD_RELOC_ARM_TLS_DESCSEQ:
      case BFD_RELOC_ARM_THM_TLS_DESCSEQ:
-      S_SET_THREAD_LOCAL (fixP->fx_addsy);
-      break;
-
      case BFD_RELOC_ARM_TLS_GOTDESC:
      case BFD_RELOC_ARM_TLS_GD32:
      case BFD_RELOC_ARM_TLS_LE32:
      case BFD_RELOC_ARM_TLS_GOTDESC:
      case BFD_RELOC_ARM_TLS_GD32:
      case BFD_RELOC_ARM_TLS_LE32:
@@ -22207,12 +23313,10 @@ md_apply_fix (fixS *  fixP,
      case BFD_RELOC_ARM_TLS_LDM32:
      case BFD_RELOC_ARM_TLS_LDO32:
        S_SET_THREAD_LOCAL (fixP->fx_addsy);
      case BFD_RELOC_ARM_TLS_LDM32:
      case BFD_RELOC_ARM_TLS_LDO32:
        S_SET_THREAD_LOCAL (fixP->fx_addsy);
-      /* fall through */
+      break;
  
      case BFD_RELOC_ARM_GOT32:
      case BFD_RELOC_ARM_GOTOFF:
  
      case BFD_RELOC_ARM_GOT32:
      case BFD_RELOC_ARM_GOTOFF:
-      if (fixP->fx_done || !seg->use_rela_p)
-       md_number_to_chars (buf, 0, 4);
        break;
  
      case BFD_RELOC_ARM_GOT_PREL:
        break;
  
      case BFD_RELOC_ARM_GOT_PREL:
@@ -22264,7 +23368,20 @@ md_apply_fix (fixS *   fixP,
  
      case BFD_RELOC_ARM_CP_OFF_IMM:
      case BFD_RELOC_ARM_T32_CP_OFF_IMM:
  
      case BFD_RELOC_ARM_CP_OFF_IMM:
      case BFD_RELOC_ARM_T32_CP_OFF_IMM:
-      if (value < -1023 || value > 1023 || (value & 3))
+      if (fixP->fx_r_type == BFD_RELOC_ARM_CP_OFF_IMM)
+       newval = md_chars_to_number (buf, INSN_SIZE);
+      else
+       newval = get_thumb32_insn (buf);
+      if ((newval & 0x0f200f00) == 0x0d000900)
+       {
+         /* This is a fp16 vstr/vldr.  The immediate offset in the mnemonic
+            has permitted values that are multiples of 2, in the range 0
+            to 510.  */
+         if (value < -510 || value > 510 || (value & 1))
+           as_bad_where (fixP->fx_file, fixP->fx_line,
+                         _("co-processor offset out of range"));
+       }
+      else if (value < -1023 || value > 1023 || (value & 3))
         as_bad_where (fixP->fx_file, fixP->fx_line,
                       _("co-processor offset out of range"));
      cp_off_common:
         as_bad_where (fixP->fx_file, fixP->fx_line,
                       _("co-processor offset out of range"));
      cp_off_common:
@@ -22281,6 +23398,17 @@ md_apply_fix (fixS *   fixP,
        else
         {
           newval &= 0xff7fff00;
        else
         {
           newval &= 0xff7fff00;
+         if ((newval & 0x0f200f00) == 0x0d000900)
+           {
+             /* This is a fp16 vstr/vldr.
+
+                It requires the immediate offset in the instruction is shifted
+                left by 1 to be a half-word offset.
+
+                Here, left shift by 1 first, and later right shift by 2
+                should get the right offset.  */
+             value <<= 1;
+           }
           newval |= (value >> 2) | (sign ? INDEX_UP : 0);
         }
        if (fixP->fx_r_type == BFD_RELOC_ARM_CP_OFF_IMM
           newval |= (value >> 2) | (sign ? INDEX_UP : 0);
         }
        if (fixP->fx_r_type == BFD_RELOC_ARM_CP_OFF_IMM
@@ -22406,7 +23534,7 @@ md_apply_fix (fixS *    fixP,
  
         if (rd == REG_SP)
           {
  
         if (rd == REG_SP)
           {
-           if (value & ~0x1fc)
+           if (value & ~0x1fc)
               as_bad_where (fixP->fx_file, fixP->fx_line,
                             _("invalid immediate for stack address calculation"));
             newval = subtract ? T_OPCODE_SUB_ST : T_OPCODE_ADD_ST;
               as_bad_where (fixP->fx_file, fixP->fx_line,
                             _("invalid immediate for stack address calculation"));
             newval = subtract ? T_OPCODE_SUB_ST : T_OPCODE_ADD_ST;
@@ -22414,10 +23542,49 @@ md_apply_fix (fixS *  fixP,
           }
         else if (rs == REG_PC || rs == REG_SP)
           {
           }
         else if (rs == REG_PC || rs == REG_SP)
           {
+           /* PR gas/18541.  If the addition is for a defined symbol
+              within range of an ADR instruction then accept it.  */
+           if (subtract
+               && value == 4
+               && fixP->fx_addsy != NULL)
+             {
+               subtract = 0;
+
+               if (! S_IS_DEFINED (fixP->fx_addsy)
+                   || S_GET_SEGMENT (fixP->fx_addsy) != seg
+                   || S_IS_WEAK (fixP->fx_addsy))
+                 {
+                   as_bad_where (fixP->fx_file, fixP->fx_line,
+                                 _("address calculation needs a strongly defined nearby symbol"));
+                 }
+               else
+                 {
+                   offsetT v = fixP->fx_where + fixP->fx_frag->fr_address;
+
+                   /* Round up to the next 4-byte boundary.  */
+                   if (v & 3)
+                     v = (v + 3) & ~ 3;
+                   else
+                     v += 4;
+                   v = S_GET_VALUE (fixP->fx_addsy) - v;
+
+                   if (v & ~0x3fc)
+                     {
+                       as_bad_where (fixP->fx_file, fixP->fx_line,
+                                     _("symbol too far away"));
+                     }
+                   else
+                     {
+                       fixP->fx_done = 1;
+                       value = v;
+                     }
+                 }
+             }
+
             if (subtract || value & ~0x3fc)
               as_bad_where (fixP->fx_file, fixP->fx_line,
                             _("invalid immediate for address calculation (value = 0x%08lX)"),
             if (subtract || value & ~0x3fc)
               as_bad_where (fixP->fx_file, fixP->fx_line,
                             _("invalid immediate for address calculation (value = 0x%08lX)"),
-                           (unsigned long) value);
+                           (unsigned long) (subtract ? - value : value));
             newval = (rs == REG_PC ? T_OPCODE_ADD_PC : T_OPCODE_ADD_SP);
             newval |= rd << 8;
             newval |= value >> 2;
             newval = (rs == REG_PC ? T_OPCODE_ADD_PC : T_OPCODE_ADD_SP);
             newval |= rd << 8;
             newval |= value >> 2;
@@ -22515,6 +23682,68 @@ md_apply_fix (fixS *   fixP,
         }
        return;
  
         }
        return;
  
+   case BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC:
+   case BFD_RELOC_ARM_THUMB_ALU_ABS_G1_NC:
+   case BFD_RELOC_ARM_THUMB_ALU_ABS_G2_NC:
+   case BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC:
+      gas_assert (!fixP->fx_done);
+      {
+       bfd_vma insn;
+       bfd_boolean is_mov;
+       bfd_vma encoded_addend = value;
+
+       /* Check that addend can be encoded in instruction.  */
+       if (!seg->use_rela_p && (value < 0 || value > 255))
+         as_bad_where (fixP->fx_file, fixP->fx_line,
+                       _("the offset 0x%08lX is not representable"),
+                       (unsigned long) encoded_addend);
+
+       /* Extract the instruction.  */
+       insn = md_chars_to_number (buf, THUMB_SIZE);
+       is_mov = (insn & 0xf800) == 0x2000;
+
+       /* Encode insn.  */
+       if (is_mov)
+         {
+           if (!seg->use_rela_p)
+             insn |= encoded_addend;
+         }
+       else
+         {
+           int rd, rs;
+
+           /* Extract the instruction.  */
+            /* Encoding is the following
+               0x8000  SUB
+               0x00F0  Rd
+               0x000F  Rs
+            */
+            /* The following conditions must be true :
+               - ADD
+               - Rd == Rs
+               - Rd <= 7
+            */
+           rd = (insn >> 4) & 0xf;
+           rs = insn & 0xf;
+           if ((insn & 0x8000) || (rd != rs) || rd > 7)
+             as_bad_where (fixP->fx_file, fixP->fx_line,
+                       _("Unable to process relocation for thumb opcode: %lx"),
+                       (unsigned long) insn);
+
+           /* Encode as ADD immediate8 thumb 1 code.  */
+           insn = 0x3000 | (rd << 8);
+
+           /* Place the encoded addend into the first 8 bits of the
+              instruction.  */
+           if (!seg->use_rela_p)
+             insn |= encoded_addend;
+         }
+
+       /* Update the instruction.  */
+       md_number_to_chars (buf, insn, THUMB_SIZE);
+      }
+      break;
+
     case BFD_RELOC_ARM_ALU_PC_G0_NC:
     case BFD_RELOC_ARM_ALU_PC_G0:
     case BFD_RELOC_ARM_ALU_PC_G1_NC:
     case BFD_RELOC_ARM_ALU_PC_G0_NC:
     case BFD_RELOC_ARM_ALU_PC_G0:
     case BFD_RELOC_ARM_ALU_PC_G1_NC:
@@ -22820,7 +24049,6 @@ tc_gen_reloc (asection *section, fixS *fixp)
      case BFD_RELOC_ARM_SBREL32:
      case BFD_RELOC_ARM_PREL31:
      case BFD_RELOC_ARM_TARGET2:
      case BFD_RELOC_ARM_SBREL32:
      case BFD_RELOC_ARM_PREL31:
      case BFD_RELOC_ARM_TARGET2:
-    case BFD_RELOC_ARM_TLS_LE32:
      case BFD_RELOC_ARM_TLS_LDO32:
      case BFD_RELOC_ARM_PCREL_CALL:
      case BFD_RELOC_ARM_PCREL_JUMP:
      case BFD_RELOC_ARM_TLS_LDO32:
      case BFD_RELOC_ARM_PCREL_CALL:
      case BFD_RELOC_ARM_PCREL_JUMP:
@@ -22853,11 +24081,16 @@ tc_gen_reloc (asection *section, fixS *fixp)
      case BFD_RELOC_ARM_LDC_SB_G1:
      case BFD_RELOC_ARM_LDC_SB_G2:
      case BFD_RELOC_ARM_V4BX:
      case BFD_RELOC_ARM_LDC_SB_G1:
      case BFD_RELOC_ARM_LDC_SB_G2:
      case BFD_RELOC_ARM_V4BX:
+    case BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC:
+    case BFD_RELOC_ARM_THUMB_ALU_ABS_G1_NC:
+    case BFD_RELOC_ARM_THUMB_ALU_ABS_G2_NC:
+    case BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC:
        code = fixp->fx_r_type;
        break;
  
      case BFD_RELOC_ARM_TLS_GOTDESC:
      case BFD_RELOC_ARM_TLS_GD32:
        code = fixp->fx_r_type;
        break;
  
      case BFD_RELOC_ARM_TLS_GOTDESC:
      case BFD_RELOC_ARM_TLS_GD32:
+    case BFD_RELOC_ARM_TLS_LE32:
      case BFD_RELOC_ARM_TLS_IE32:
      case BFD_RELOC_ARM_TLS_LDM32:
        /* BFD will include the symbol's address in the addend.
      case BFD_RELOC_ARM_TLS_IE32:
      case BFD_RELOC_ARM_TLS_LDM32:
        /* BFD will include the symbol's address in the addend.
@@ -22901,7 +24134,7 @@ tc_gen_reloc (asection *section, fixS *fixp)
  
      default:
        {
  
      default:
        {
-       char * type;
+       const char * type;
  
         switch (fixp->fx_r_type)
           {
  
         switch (fixp->fx_r_type)
           {
@@ -22961,9 +24194,9 @@ void
  cons_fix_new_arm (fragS *      frag,
                   int           where,
                   int           size,
  cons_fix_new_arm (fragS *      frag,
                   int           where,
                   int           size,
-                 expressionS * exp)
+                 expressionS * exp,
+                 bfd_reloc_code_real_type reloc)
  {
  {
-  bfd_reloc_code_real_type type;
    int pcrel = 0;
  
    /* Pick a reloc.
    int pcrel = 0;
  
    /* Pick a reloc.
@@ -22971,17 +24204,17 @@ cons_fix_new_arm (fragS *     frag,
    switch (size)
      {
      case 1:
    switch (size)
      {
      case 1:
-      type = BFD_RELOC_8;
+      reloc = BFD_RELOC_8;
        break;
      case 2:
        break;
      case 2:
-      type = BFD_RELOC_16;
+      reloc = BFD_RELOC_16;
        break;
      case 4:
      default:
        break;
      case 4:
      default:
-      type = BFD_RELOC_32;
+      reloc = BFD_RELOC_32;
        break;
      case 8:
        break;
      case 8:
-      type = BFD_RELOC_64;
+      reloc = BFD_RELOC_64;
        break;
      }
  
        break;
      }
  
@@ -22989,11 +24222,11 @@ cons_fix_new_arm (fragS *     frag,
    if (exp->X_op == O_secrel)
    {
      exp->X_op = O_symbol;
    if (exp->X_op == O_secrel)
    {
      exp->X_op = O_symbol;
-    type = BFD_RELOC_32_SECREL;
+    reloc = BFD_RELOC_32_SECREL;
    }
  #endif
  
    }
  #endif
  
-  fix_new_exp (frag, where, (int) size, exp, pcrel, type);
+  fix_new_exp (frag, where, size, exp, pcrel, reloc);
  }
  
  #if defined (OBJ_COFF)
  }
  
  #if defined (OBJ_COFF)
@@ -23154,12 +24387,17 @@ arm_fix_adjustable (fixS * fixP)
        || fixP->fx_r_type == BFD_RELOC_ARM_THUMB_MOVT_PCREL)
      return FALSE;
  
        || fixP->fx_r_type == BFD_RELOC_ARM_THUMB_MOVT_PCREL)
      return FALSE;
  
+  /* BFD_RELOC_ARM_THUMB_ALU_ABS_Gx_NC relocations have VERY limited
+     offsets, so keep these symbols.  */
+  if (fixP->fx_r_type >= BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC
+      && fixP->fx_r_type <= BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC)
+    return FALSE;
+
    return TRUE;
  }
  #endif /* defined (OBJ_ELF) || defined (OBJ_COFF) */
  
  #ifdef OBJ_ELF
    return TRUE;
  }
  #endif /* defined (OBJ_ELF) || defined (OBJ_COFF) */
  
  #ifdef OBJ_ELF
-
  const char *
  elf32_arm_target_format (void)
  {
  const char *
  elf32_arm_target_format (void)
  {
@@ -23484,6 +24722,8 @@ md_begin (void)
        mcpu_cpu_opt = &cpu_default;
        selected_cpu = cpu_default;
      }
        mcpu_cpu_opt = &cpu_default;
        selected_cpu = cpu_default;
      }
+  else if (no_cpu_selected ())
+    selected_cpu = cpu_default;
  #else
    if (mcpu_cpu_opt)
      selected_cpu = *mcpu_cpu_opt;
  #else
    if (mcpu_cpu_opt)
      selected_cpu = *mcpu_cpu_opt;
@@ -23631,6 +24871,7 @@ md_begin (void)
               -mthumb-interwork          Code supports ARM/Thumb interworking
  
               -m[no-]warn-deprecated     Warn about deprecated features
               -mthumb-interwork          Code supports ARM/Thumb interworking
  
               -m[no-]warn-deprecated     Warn about deprecated features
+             -m[no-]warn-syms           Warn when symbols match instructions
  
        For now we will also provide support for:
  
  
        For now we will also provide support for:
  
@@ -23699,15 +24940,16 @@ struct option md_longopts[] =
    {NULL, no_argument, NULL, 0}
  };
  
    {NULL, no_argument, NULL, 0}
  };
  
+
  size_t md_longopts_size = sizeof (md_longopts);
  
  struct arm_option_table
  {
  size_t md_longopts_size = sizeof (md_longopts);
  
  struct arm_option_table
  {
-  char *option;                /* Option name to match.  */
-  char *help;          /* Help information.  */
+  const char *option;          /* Option name to match.  */
+  const char *help;            /* Help information.  */
    int  *var;           /* Variable to change.  */
    int  value;          /* What to change it to.  */
    int  *var;           /* Variable to change.  */
    int  value;          /* What to change it to.  */
-  char *deprecated;    /* If non-null, print this message.  */
+  const char *deprecated;      /* If non-null, print this message.  */
  };
  
  struct arm_option_table arm_opts[] =
  };
  
  struct arm_option_table arm_opts[] =
@@ -23733,15 +24975,17 @@ struct arm_option_table arm_opts[] =
    {"mwarn-deprecated", NULL, &warn_on_deprecated, 1, NULL},
    {"mno-warn-deprecated", N_("do not warn on use of deprecated feature"),
     &warn_on_deprecated, 0, NULL},
    {"mwarn-deprecated", NULL, &warn_on_deprecated, 1, NULL},
    {"mno-warn-deprecated", N_("do not warn on use of deprecated feature"),
     &warn_on_deprecated, 0, NULL},
+  {"mwarn-syms", N_("warn about symbols that match instruction names [default]"), (int *) (& flag_warn_syms), TRUE, NULL},
+  {"mno-warn-syms", N_("disable warnings about symobls that match instructions"), (int *) (& flag_warn_syms), FALSE, NULL},
    {NULL, NULL, NULL, 0, NULL}
  };
  
  struct arm_legacy_option_table
  {
    {NULL, NULL, NULL, 0, NULL}
  };
  
  struct arm_legacy_option_table
  {
-  char *option;                                /* Option name to match.  */
+  const char *option;                          /* Option name to match.  */
    const arm_feature_set        **var;          /* Variable to change.  */
    const arm_feature_set        value;          /* What to change it to.  */
    const arm_feature_set        **var;          /* Variable to change.  */
    const arm_feature_set        value;          /* What to change it to.  */
-  char *deprecated;                    /* If non-null, print this message.  */
+  const char *deprecated;                      /* If non-null, print this message.  */
  };
  
  const struct arm_legacy_option_table arm_legacy_opts[] =
  };
  
  const struct arm_legacy_option_table arm_legacy_opts[] =
@@ -23859,7 +25103,7 @@ const struct arm_legacy_option_table arm_legacy_opts[] =
  
  struct arm_cpu_option_table
  {
  
  struct arm_cpu_option_table
  {
-  char *name;
+  const char *name;
    size_t name_len;
    const arm_feature_set        value;
    /* For some CPUs we assume an FPU unless the user explicitly sets
    size_t name_len;
    const arm_feature_set        value;
    /* For some CPUs we assume an FPU unless the user explicitly sets
@@ -23959,31 +25203,36 @@ static const struct arm_cpu_option_table arm_cpus[] =
    ARM_CPU_OPT ("mpcorenovfp",  ARM_ARCH_V6K,    FPU_NONE,        "MPCore"),
    ARM_CPU_OPT ("arm1156t2-s",  ARM_ARCH_V6T2,   FPU_NONE,        NULL),
    ARM_CPU_OPT ("arm1156t2f-s", ARM_ARCH_V6T2,   FPU_ARCH_VFP_V2, NULL),
    ARM_CPU_OPT ("mpcorenovfp",  ARM_ARCH_V6K,    FPU_NONE,        "MPCore"),
    ARM_CPU_OPT ("arm1156t2-s",  ARM_ARCH_V6T2,   FPU_NONE,        NULL),
    ARM_CPU_OPT ("arm1156t2f-s", ARM_ARCH_V6T2,   FPU_ARCH_VFP_V2, NULL),
-  ARM_CPU_OPT ("arm1176jz-s",  ARM_ARCH_V6ZK,   FPU_NONE,        NULL),
-  ARM_CPU_OPT ("arm1176jzf-s", ARM_ARCH_V6ZK,   FPU_ARCH_VFP_V2, NULL),
+  ARM_CPU_OPT ("arm1176jz-s",  ARM_ARCH_V6KZ,   FPU_NONE,        NULL),
+  ARM_CPU_OPT ("arm1176jzf-s", ARM_ARCH_V6KZ,   FPU_ARCH_VFP_V2, NULL),
    ARM_CPU_OPT ("cortex-a5",    ARM_ARCH_V7A_MP_SEC,
                                                  FPU_NONE,        "Cortex-A5"),
    ARM_CPU_OPT ("cortex-a5",    ARM_ARCH_V7A_MP_SEC,
                                                  FPU_NONE,        "Cortex-A5"),
-  ARM_CPU_OPT ("cortex-a7",    ARM_ARCH_V7A_IDIV_MP_SEC_VIRT,
-                                                FPU_ARCH_NEON_VFP_V4,
+  ARM_CPU_OPT ("cortex-a7",    ARM_ARCH_V7VE,   FPU_ARCH_NEON_VFP_V4,
                                                                   "Cortex-A7"),
    ARM_CPU_OPT ("cortex-a8",    ARM_ARCH_V7A_SEC,
                                                                   "Cortex-A7"),
    ARM_CPU_OPT ("cortex-a8",    ARM_ARCH_V7A_SEC,
-                                                ARM_FEATURE (0, FPU_VFP_V3
+                                                ARM_FEATURE_COPROC (FPU_VFP_V3
                                                         | FPU_NEON_EXT_V1),
                                                                   "Cortex-A8"),
    ARM_CPU_OPT ("cortex-a9",    ARM_ARCH_V7A_MP_SEC,
                                                         | FPU_NEON_EXT_V1),
                                                                   "Cortex-A8"),
    ARM_CPU_OPT ("cortex-a9",    ARM_ARCH_V7A_MP_SEC,
-                                                ARM_FEATURE (0, FPU_VFP_V3
+                                                ARM_FEATURE_COPROC (FPU_VFP_V3
                                                         | FPU_NEON_EXT_V1),
                                                                   "Cortex-A9"),
                                                         | FPU_NEON_EXT_V1),
                                                                   "Cortex-A9"),
-  ARM_CPU_OPT ("cortex-a12",   ARM_ARCH_V7A_IDIV_MP_SEC_VIRT,
-                                                FPU_ARCH_NEON_VFP_V4,
+  ARM_CPU_OPT ("cortex-a12",   ARM_ARCH_V7VE,   FPU_ARCH_NEON_VFP_V4,
                                                                   "Cortex-A12"),
                                                                   "Cortex-A12"),
-  ARM_CPU_OPT ("cortex-a15",   ARM_ARCH_V7A_IDIV_MP_SEC_VIRT,
-                                                FPU_ARCH_NEON_VFP_V4,
+  ARM_CPU_OPT ("cortex-a15",   ARM_ARCH_V7VE,   FPU_ARCH_NEON_VFP_V4,
                                                                   "Cortex-A15"),
                                                                   "Cortex-A15"),
+  ARM_CPU_OPT ("cortex-a17",   ARM_ARCH_V7VE,   FPU_ARCH_NEON_VFP_V4,
+                                                                 "Cortex-A17"),
+  ARM_CPU_OPT ("cortex-a32",    ARM_ARCH_V8A,    FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
+                                                                 "Cortex-A32"),
+  ARM_CPU_OPT ("cortex-a35",    ARM_ARCH_V8A,    FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
+                                                                 "Cortex-A35"),
    ARM_CPU_OPT ("cortex-a53",    ARM_ARCH_V8A,    FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
                                                                   "Cortex-A53"),
    ARM_CPU_OPT ("cortex-a57",    ARM_ARCH_V8A,    FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
                                                                   "Cortex-A57"),
    ARM_CPU_OPT ("cortex-a53",    ARM_ARCH_V8A,    FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
                                                                   "Cortex-A53"),
    ARM_CPU_OPT ("cortex-a57",    ARM_ARCH_V8A,    FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
                                                                   "Cortex-A57"),
+  ARM_CPU_OPT ("cortex-a72",    ARM_ARCH_V8A,    FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
+                                                                 "Cortex-A72"),
    ARM_CPU_OPT ("cortex-r4",    ARM_ARCH_V7R,    FPU_NONE,        "Cortex-R4"),
    ARM_CPU_OPT ("cortex-r4f",   ARM_ARCH_V7R,    FPU_ARCH_VFP_V3D16,
                                                                   "Cortex-R4F"),
    ARM_CPU_OPT ("cortex-r4",    ARM_ARCH_V7R,    FPU_NONE,        "Cortex-R4"),
    ARM_CPU_OPT ("cortex-r4f",   ARM_ARCH_V7R,    FPU_ARCH_VFP_V3D16,
                                                                   "Cortex-R4F"),
@@ -23992,11 +25241,19 @@ static const struct arm_cpu_option_table arm_cpus[] =
    ARM_CPU_OPT ("cortex-r7",    ARM_ARCH_V7R_IDIV,
                                                  FPU_ARCH_VFP_V3D16,
                                                                   "Cortex-R7"),
    ARM_CPU_OPT ("cortex-r7",    ARM_ARCH_V7R_IDIV,
                                                  FPU_ARCH_VFP_V3D16,
                                                                   "Cortex-R7"),
+  ARM_CPU_OPT ("cortex-m7",    ARM_ARCH_V7EM,   FPU_NONE,        "Cortex-M7"),
    ARM_CPU_OPT ("cortex-m4",    ARM_ARCH_V7EM,   FPU_NONE,        "Cortex-M4"),
    ARM_CPU_OPT ("cortex-m3",    ARM_ARCH_V7M,    FPU_NONE,        "Cortex-M3"),
    ARM_CPU_OPT ("cortex-m1",    ARM_ARCH_V6SM,   FPU_NONE,        "Cortex-M1"),
    ARM_CPU_OPT ("cortex-m0",    ARM_ARCH_V6SM,   FPU_NONE,        "Cortex-M0"),
    ARM_CPU_OPT ("cortex-m0plus",        ARM_ARCH_V6SM,   FPU_NONE,        "Cortex-M0+"),
    ARM_CPU_OPT ("cortex-m4",    ARM_ARCH_V7EM,   FPU_NONE,        "Cortex-M4"),
    ARM_CPU_OPT ("cortex-m3",    ARM_ARCH_V7M,    FPU_NONE,        "Cortex-M3"),
    ARM_CPU_OPT ("cortex-m1",    ARM_ARCH_V6SM,   FPU_NONE,        "Cortex-M1"),
    ARM_CPU_OPT ("cortex-m0",    ARM_ARCH_V6SM,   FPU_NONE,        "Cortex-M0"),
    ARM_CPU_OPT ("cortex-m0plus",        ARM_ARCH_V6SM,   FPU_NONE,        "Cortex-M0+"),
+  ARM_CPU_OPT ("exynos-m1",    ARM_ARCH_V8A,    FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
+                                                                 "Samsung " \
+                                                                 "Exynos M1"),
+  ARM_CPU_OPT ("qdf24xx",      ARM_ARCH_V8A,    FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
+                                                                 "Qualcomm "
+                                                                 "QDF24XX"),
+
    /* ??? XSCALE is really an architecture.  */
    ARM_CPU_OPT ("xscale",       ARM_ARCH_XSCALE, FPU_ARCH_VFP_V2, NULL),
    /* ??? iwmmxt is not a processor.  */
    /* ??? XSCALE is really an architecture.  */
    ARM_CPU_OPT ("xscale",       ARM_ARCH_XSCALE, FPU_ARCH_VFP_V2, NULL),
    /* ??? iwmmxt is not a processor.  */
@@ -24004,11 +25261,22 @@ static const struct arm_cpu_option_table arm_cpus[] =
    ARM_CPU_OPT ("iwmmxt2",      ARM_ARCH_IWMMXT2,FPU_ARCH_VFP_V2, NULL),
    ARM_CPU_OPT ("i80200",       ARM_ARCH_XSCALE, FPU_ARCH_VFP_V2, NULL),
    /* Maverick */
    ARM_CPU_OPT ("iwmmxt2",      ARM_ARCH_IWMMXT2,FPU_ARCH_VFP_V2, NULL),
    ARM_CPU_OPT ("i80200",       ARM_ARCH_XSCALE, FPU_ARCH_VFP_V2, NULL),
    /* Maverick */
-  ARM_CPU_OPT ("ep9312",       ARM_FEATURE (ARM_AEXT_V4T, ARM_CEXT_MAVERICK),
+  ARM_CPU_OPT ("ep9312",       ARM_FEATURE_LOW (ARM_AEXT_V4T, ARM_CEXT_MAVERICK),
                                                  FPU_ARCH_MAVERICK, "ARM920T"),
    /* Marvell processors.  */
                                                  FPU_ARCH_MAVERICK, "ARM920T"),
    /* Marvell processors.  */
-  ARM_CPU_OPT ("marvell-pj4",   ARM_FEATURE (ARM_AEXT_V7A | ARM_EXT_MP | ARM_EXT_SEC, 0),
+  ARM_CPU_OPT ("marvell-pj4",   ARM_FEATURE_CORE (ARM_AEXT_V7A | ARM_EXT_MP
+                                                 | ARM_EXT_SEC,
+                                                 ARM_EXT2_V6T2_V8M),
                                                 FPU_ARCH_VFP_V3D16, NULL),
                                                 FPU_ARCH_VFP_V3D16, NULL),
+  ARM_CPU_OPT ("marvell-whitney", ARM_FEATURE_CORE (ARM_AEXT_V7A | ARM_EXT_MP
+                                                   | ARM_EXT_SEC,
+                                                   ARM_EXT2_V6T2_V8M),
+                                              FPU_ARCH_NEON_VFP_V4, NULL),
+  /* APM X-Gene family.  */
+  ARM_CPU_OPT ("xgene1",        ARM_ARCH_V8A,    FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
+                                                                 "APM X-Gene 1"),
+  ARM_CPU_OPT ("xgene2",        ARM_ARCH_V8A,    FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
+                                                                 "APM X-Gene 2"),
  
    { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE, NULL }
  };
  
    { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE, NULL }
  };
@@ -24016,7 +25284,7 @@ static const struct arm_cpu_option_table arm_cpus[] =
  
  struct arm_arch_option_table
  {
  
  struct arm_arch_option_table
  {
-  char *name;
+  const char *name;
    size_t name_len;
    const arm_feature_set        value;
    const arm_feature_set        default_fpu;
    size_t name_len;
    const arm_feature_set        value;
    const arm_feature_set        default_fpu;
@@ -24048,24 +25316,35 @@ static const struct arm_arch_option_table arm_archs[] =
    ARM_ARCH_OPT ("armv6j",      ARM_ARCH_V6,     FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv6k",      ARM_ARCH_V6K,    FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv6z",      ARM_ARCH_V6Z,    FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv6j",      ARM_ARCH_V6,     FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv6k",      ARM_ARCH_V6K,    FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv6z",      ARM_ARCH_V6Z,    FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv6zk",     ARM_ARCH_V6ZK,   FPU_ARCH_VFP),
+  /* The official spelling of this variant is ARMv6KZ, the name "armv6zk" is
+     kept to preserve existing behaviour.  */
+  ARM_ARCH_OPT ("armv6kz",     ARM_ARCH_V6KZ,   FPU_ARCH_VFP),
+  ARM_ARCH_OPT ("armv6zk",     ARM_ARCH_V6KZ,   FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv6t2",     ARM_ARCH_V6T2,   FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv6kt2",    ARM_ARCH_V6KT2,  FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv6zt2",    ARM_ARCH_V6ZT2,  FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv6t2",     ARM_ARCH_V6T2,   FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv6kt2",    ARM_ARCH_V6KT2,  FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv6zt2",    ARM_ARCH_V6ZT2,  FPU_ARCH_VFP),
-  ARM_ARCH_OPT ("armv6zkt2",   ARM_ARCH_V6ZKT2, FPU_ARCH_VFP),
+  /* The official spelling of this variant is ARMv6KZ, the name "armv6zkt2" is
+     kept to preserve existing behaviour.  */
+  ARM_ARCH_OPT ("armv6kzt2",   ARM_ARCH_V6KZT2, FPU_ARCH_VFP),
+  ARM_ARCH_OPT ("armv6zkt2",   ARM_ARCH_V6KZT2, FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv6-m",     ARM_ARCH_V6M,    FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv6s-m",    ARM_ARCH_V6SM,   FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv7",       ARM_ARCH_V7,     FPU_ARCH_VFP),
    /* The official spelling of the ARMv7 profile variants is the dashed form.
       Accept the non-dashed form for compatibility with old toolchains.  */
    ARM_ARCH_OPT ("armv7a",      ARM_ARCH_V7A,    FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv6-m",     ARM_ARCH_V6M,    FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv6s-m",    ARM_ARCH_V6SM,   FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv7",       ARM_ARCH_V7,     FPU_ARCH_VFP),
    /* The official spelling of the ARMv7 profile variants is the dashed form.
       Accept the non-dashed form for compatibility with old toolchains.  */
    ARM_ARCH_OPT ("armv7a",      ARM_ARCH_V7A,    FPU_ARCH_VFP),
+  ARM_ARCH_OPT ("armv7ve",     ARM_ARCH_V7VE,   FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv7r",      ARM_ARCH_V7R,    FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv7m",      ARM_ARCH_V7M,    FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv7-a",     ARM_ARCH_V7A,    FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv7-r",     ARM_ARCH_V7R,    FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv7-m",     ARM_ARCH_V7M,    FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv7e-m",    ARM_ARCH_V7EM,   FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv7r",      ARM_ARCH_V7R,    FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv7m",      ARM_ARCH_V7M,    FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv7-a",     ARM_ARCH_V7A,    FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv7-r",     ARM_ARCH_V7R,    FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv7-m",     ARM_ARCH_V7M,    FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv7e-m",    ARM_ARCH_V7EM,   FPU_ARCH_VFP),
+  ARM_ARCH_OPT ("armv8-m.base",        ARM_ARCH_V8M_BASE, FPU_ARCH_VFP),
+  ARM_ARCH_OPT ("armv8-m.main",        ARM_ARCH_V8M_MAIN, FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv8-a",     ARM_ARCH_V8A,    FPU_ARCH_VFP),
    ARM_ARCH_OPT ("armv8-a",     ARM_ARCH_V8A,    FPU_ARCH_VFP),
+  ARM_ARCH_OPT ("armv8.1-a",   ARM_ARCH_V8_1A,  FPU_ARCH_VFP),
+  ARM_ARCH_OPT ("armv8.2-a",   ARM_ARCH_V8_2A,  FPU_ARCH_VFP),
    ARM_ARCH_OPT ("xscale",      ARM_ARCH_XSCALE, FPU_ARCH_VFP),
    ARM_ARCH_OPT ("iwmmxt",      ARM_ARCH_IWMMXT, FPU_ARCH_VFP),
    ARM_ARCH_OPT ("iwmmxt2",     ARM_ARCH_IWMMXT2,FPU_ARCH_VFP),
    ARM_ARCH_OPT ("xscale",      ARM_ARCH_XSCALE, FPU_ARCH_VFP),
    ARM_ARCH_OPT ("iwmmxt",      ARM_ARCH_IWMMXT, FPU_ARCH_VFP),
    ARM_ARCH_OPT ("iwmmxt2",     ARM_ARCH_IWMMXT2,FPU_ARCH_VFP),
@@ -24076,49 +25355,69 @@ static const struct arm_arch_option_table arm_archs[] =
  /* ISA extensions in the co-processor and main instruction set space.  */
  struct arm_option_extension_value_table
  {
  /* ISA extensions in the co-processor and main instruction set space.  */
  struct arm_option_extension_value_table
  {
-  char *name;
+  const char *name;
    size_t name_len;
    size_t name_len;
-  const arm_feature_set value;
+  const arm_feature_set merge_value;
+  const arm_feature_set clear_value;
    const arm_feature_set allowed_archs;
  };
  
  /* The following table must be in alphabetical order with a NULL last entry.
     */
    const arm_feature_set allowed_archs;
  };
  
  /* The following table must be in alphabetical order with a NULL last entry.
     */
-#define ARM_EXT_OPT(N, V, AA) { N, sizeof (N) - 1, V, AA }
+#define ARM_EXT_OPT(N, M, C, AA) { N, sizeof (N) - 1, M, C, AA }
  static const struct arm_option_extension_value_table arm_extensions[] =
  {
  static const struct arm_option_extension_value_table arm_extensions[] =
  {
-  ARM_EXT_OPT ("crc",  ARCH_CRC_ARMV8, ARM_FEATURE (ARM_EXT_V8, 0)),
+  ARM_EXT_OPT ("crc",  ARCH_CRC_ARMV8, ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+                        ARM_FEATURE_CORE_LOW (ARM_EXT_V8)),
    ARM_EXT_OPT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
    ARM_EXT_OPT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
-                                  ARM_FEATURE (ARM_EXT_V8, 0)),
-  ARM_EXT_OPT ("fp",     FPU_ARCH_VFP_ARMV8,
-                                  ARM_FEATURE (ARM_EXT_V8, 0)),
-  ARM_EXT_OPT ("idiv", ARM_FEATURE (ARM_EXT_ADIV | ARM_EXT_DIV, 0),
-                                  ARM_FEATURE (ARM_EXT_V7A | ARM_EXT_V7R, 0)),
-  ARM_EXT_OPT ("iwmmxt",ARM_FEATURE (0, ARM_CEXT_IWMMXT),      ARM_ANY),
-  ARM_EXT_OPT ("iwmmxt2",
-                       ARM_FEATURE (0, ARM_CEXT_IWMMXT2),      ARM_ANY),
-  ARM_EXT_OPT ("maverick",
-                       ARM_FEATURE (0, ARM_CEXT_MAVERICK),     ARM_ANY),
-  ARM_EXT_OPT ("mp",   ARM_FEATURE (ARM_EXT_MP, 0),
-                                  ARM_FEATURE (ARM_EXT_V7A | ARM_EXT_V7R, 0)),
-  ARM_EXT_OPT ("simd",   FPU_ARCH_NEON_VFP_ARMV8,
-                                  ARM_FEATURE (ARM_EXT_V8, 0)),
-  ARM_EXT_OPT ("os",   ARM_FEATURE (ARM_EXT_OS, 0),
-                                  ARM_FEATURE (ARM_EXT_V6M, 0)),
-  ARM_EXT_OPT ("sec",  ARM_FEATURE (ARM_EXT_SEC, 0),
-                                  ARM_FEATURE (ARM_EXT_V6K | ARM_EXT_V7A, 0)),
-  ARM_EXT_OPT ("virt", ARM_FEATURE (ARM_EXT_VIRT | ARM_EXT_ADIV
-                                    | ARM_EXT_DIV, 0),
-                                  ARM_FEATURE (ARM_EXT_V7A, 0)),
-  ARM_EXT_OPT ("xscale",ARM_FEATURE (0, ARM_CEXT_XSCALE),      ARM_ANY),
-  { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
+                        ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8),
+                                  ARM_FEATURE_CORE_LOW (ARM_EXT_V8)),
+  ARM_EXT_OPT ("fp",     FPU_ARCH_VFP_ARMV8, ARM_FEATURE_COPROC (FPU_VFP_ARMV8),
+                                  ARM_FEATURE_CORE_LOW (ARM_EXT_V8)),
+  ARM_EXT_OPT ("fp16",  ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+                       ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+                       ARM_ARCH_V8_2A),
+  ARM_EXT_OPT ("idiv", ARM_FEATURE_CORE_LOW (ARM_EXT_ADIV | ARM_EXT_DIV),
+                       ARM_FEATURE_CORE_LOW (ARM_EXT_ADIV | ARM_EXT_DIV),
+                                  ARM_FEATURE_CORE_LOW (ARM_EXT_V7A | ARM_EXT_V7R)),
+  ARM_EXT_OPT ("iwmmxt",ARM_FEATURE_COPROC (ARM_CEXT_IWMMXT),
+                       ARM_FEATURE_COPROC (ARM_CEXT_IWMMXT), ARM_ANY),
+  ARM_EXT_OPT ("iwmmxt2", ARM_FEATURE_COPROC (ARM_CEXT_IWMMXT2),
+                       ARM_FEATURE_COPROC (ARM_CEXT_IWMMXT2), ARM_ANY),
+  ARM_EXT_OPT ("maverick", ARM_FEATURE_COPROC (ARM_CEXT_MAVERICK),
+                       ARM_FEATURE_COPROC (ARM_CEXT_MAVERICK), ARM_ANY),
+  ARM_EXT_OPT ("mp",   ARM_FEATURE_CORE_LOW (ARM_EXT_MP),
+                       ARM_FEATURE_CORE_LOW (ARM_EXT_MP),
+                                  ARM_FEATURE_CORE_LOW (ARM_EXT_V7A | ARM_EXT_V7R)),
+  ARM_EXT_OPT ("os",   ARM_FEATURE_CORE_LOW (ARM_EXT_OS),
+                       ARM_FEATURE_CORE_LOW (ARM_EXT_OS),
+                                  ARM_FEATURE_CORE_LOW (ARM_EXT_V6M)),
+  ARM_EXT_OPT ("pan",  ARM_FEATURE_CORE_HIGH (ARM_EXT2_PAN),
+                       ARM_FEATURE (ARM_EXT_V8, ARM_EXT2_PAN, 0),
+                       ARM_FEATURE_CORE_LOW (ARM_EXT_V8)),
+  ARM_EXT_OPT ("rdma",  FPU_ARCH_NEON_VFP_ARMV8_1,
+                       ARM_FEATURE_COPROC (FPU_NEON_ARMV8 | FPU_NEON_EXT_RDMA),
+                       ARM_FEATURE_CORE_LOW (ARM_EXT_V8)),
+  ARM_EXT_OPT ("sec",  ARM_FEATURE_CORE_LOW (ARM_EXT_SEC),
+                       ARM_FEATURE_CORE_LOW (ARM_EXT_SEC),
+                                  ARM_FEATURE_CORE_LOW (ARM_EXT_V6K | ARM_EXT_V7A)),
+  ARM_EXT_OPT ("simd",  FPU_ARCH_NEON_VFP_ARMV8,
+                       ARM_FEATURE_COPROC (FPU_NEON_ARMV8),
+                       ARM_FEATURE_CORE_LOW (ARM_EXT_V8)),
+  ARM_EXT_OPT ("virt", ARM_FEATURE_CORE_LOW (ARM_EXT_VIRT | ARM_EXT_ADIV
+                                    | ARM_EXT_DIV),
+                       ARM_FEATURE_CORE_LOW (ARM_EXT_VIRT),
+                                  ARM_FEATURE_CORE_LOW (ARM_EXT_V7A)),
+  ARM_EXT_OPT ("xscale",ARM_FEATURE_COPROC (ARM_CEXT_XSCALE),
+                       ARM_FEATURE_COPROC (ARM_CEXT_XSCALE), ARM_ANY),
+  { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE, ARM_ARCH_NONE }
  };
  #undef ARM_EXT_OPT
  
  /* ISA floating-point and Advanced SIMD extensions.  */
  struct arm_option_fpu_value_table
  {
  };
  #undef ARM_EXT_OPT
  
  /* ISA floating-point and Advanced SIMD extensions.  */
  struct arm_option_fpu_value_table
  {
-  char *name;
+  const char *name;
    const arm_feature_set value;
  };
  
    const arm_feature_set value;
  };
  
@@ -24159,17 +25458,22 @@ static const struct arm_option_fpu_value_table arm_fpus[] =
    {"vfpv4",            FPU_ARCH_VFP_V4},
    {"vfpv4-d16",                FPU_ARCH_VFP_V4D16},
    {"fpv4-sp-d16",      FPU_ARCH_VFP_V4_SP_D16},
    {"vfpv4",            FPU_ARCH_VFP_V4},
    {"vfpv4-d16",                FPU_ARCH_VFP_V4D16},
    {"fpv4-sp-d16",      FPU_ARCH_VFP_V4_SP_D16},
+  {"fpv5-d16",         FPU_ARCH_VFP_V5D16},
+  {"fpv5-sp-d16",      FPU_ARCH_VFP_V5_SP_D16},
    {"neon-vfpv4",       FPU_ARCH_NEON_VFP_V4},
    {"fp-armv8",         FPU_ARCH_VFP_ARMV8},
    {"neon-fp-armv8",    FPU_ARCH_NEON_VFP_ARMV8},
    {"crypto-neon-fp-armv8",
                         FPU_ARCH_CRYPTO_NEON_VFP_ARMV8},
    {"neon-vfpv4",       FPU_ARCH_NEON_VFP_V4},
    {"fp-armv8",         FPU_ARCH_VFP_ARMV8},
    {"neon-fp-armv8",    FPU_ARCH_NEON_VFP_ARMV8},
    {"crypto-neon-fp-armv8",
                         FPU_ARCH_CRYPTO_NEON_VFP_ARMV8},
+  {"neon-fp-armv8.1",  FPU_ARCH_NEON_VFP_ARMV8_1},
+  {"crypto-neon-fp-armv8.1",
+                       FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_1},
    {NULL,               ARM_ARCH_NONE}
  };
  
  struct arm_option_value_table
  {
    {NULL,               ARM_ARCH_NONE}
  };
  
  struct arm_option_value_table
  {
-  char *name;
+  const char *name;
    long value;
  };
  
    long value;
  };
  
@@ -24194,10 +25498,10 @@ static const struct arm_option_value_table arm_eabis[] =
  
  struct arm_long_option_table
  {
  
  struct arm_long_option_table
  {
-  char * option;               /* Substring to match.  */
-  char * help;                 /* Help information.  */
+  const char * option;         /* Substring to match.  */
+  const char * help;                   /* Help information.  */
    int (* func) (char * subopt);        /* Function to decode sub-option.  */
    int (* func) (char * subopt);        /* Function to decode sub-option.  */
-  char * deprecated;           /* If non-null, print this message.  */
+  const char * deprecated;             /* If non-null, print this message.  */
  };
  
  static bfd_boolean
  };
  
  static bfd_boolean
@@ -24286,9 +25590,9 @@ arm_parse_extension (char *str, const arm_feature_set **opt_p)
  
             /* Add or remove the extension.  */
             if (adding_value)
  
             /* Add or remove the extension.  */
             if (adding_value)
-             ARM_MERGE_FEATURE_SETS (*ext_set, *ext_set, opt->value);
+             ARM_MERGE_FEATURE_SETS (*ext_set, *ext_set, opt->merge_value);
             else
             else
-             ARM_CLEAR_FEATURE (*ext_set, *ext_set, opt->value);
+             ARM_CLEAR_FEATURE (*ext_set, *ext_set, opt->clear_value);
  
             break;
           }
  
             break;
           }
@@ -24347,11 +25651,17 @@ arm_parse_cpu (char *str)
         mcpu_cpu_opt = &opt->value;
         mcpu_fpu_opt = &opt->default_fpu;
         if (opt->canonical_name)
         mcpu_cpu_opt = &opt->value;
         mcpu_fpu_opt = &opt->default_fpu;
         if (opt->canonical_name)
-         strcpy (selected_cpu_name, opt->canonical_name);
+         {
+           gas_assert (sizeof selected_cpu_name > strlen (opt->canonical_name));
+           strcpy (selected_cpu_name, opt->canonical_name);
+         }
         else
           {
             size_t i;
  
         else
           {
             size_t i;
  
+           if (len >= sizeof selected_cpu_name)
+             len = (sizeof selected_cpu_name) - 1;
+
             for (i = 0; i < len; i++)
               selected_cpu_name[i] = TOUPPER (opt->name[i]);
             selected_cpu_name[i] = 0;
             for (i = 0; i < len; i++)
               selected_cpu_name[i] = TOUPPER (opt->name[i]);
             selected_cpu_name[i] = 0;
@@ -24474,6 +25784,15 @@ arm_parse_it_mode (char * str)
    return ret;
  }
  
    return ret;
  }
  
+static bfd_boolean
+arm_ccs_mode (char * unused ATTRIBUTE_UNUSED)
+{
+  codecomposer_syntax = TRUE;
+  arm_comment_chars[0] = ';';
+  arm_line_separator_chars[0] = 0;
+  return TRUE;
+}
+
  struct arm_long_option_table arm_long_opts[] =
  {
    {"mcpu=", N_("<cpu name>\t  assemble for CPU <cpu name>"),
  struct arm_long_option_table arm_long_opts[] =
  {
    {"mcpu=", N_("<cpu name>\t  assemble for CPU <cpu name>"),
@@ -24490,6 +25809,8 @@ struct arm_long_option_table arm_long_opts[] =
  #endif
    {"mimplicit-it=", N_("<mode>\t  controls implicit insertion of IT instructions"),
     arm_parse_it_mode, NULL},
  #endif
    {"mimplicit-it=", N_("<mode>\t  controls implicit insertion of IT instructions"),
     arm_parse_it_mode, NULL},
+  {"mccs", N_("\t\t\t  TI CodeComposer Studio syntax compatibility mode"),
+   arm_ccs_mode, NULL},
    {NULL, NULL, 0, NULL}
  };
  
    {NULL, NULL, 0, NULL}
  };
  
@@ -24622,8 +25943,9 @@ typedef struct
    arm_feature_set flags;
  } cpu_arch_ver_table;
  
    arm_feature_set flags;
  } cpu_arch_ver_table;
  
-/* Mapping from CPU features to EABI CPU arch values.  Table must be sorted
-   least features first.  */
+/* Mapping from CPU features to EABI CPU arch values.  As a general rule, table
+   must be sorted least features first but some reordering is needed, eg. for
+   Thumb-2 instructions to be detected as coming from ARMv6T2.  */
  static const cpu_arch_ver_table cpu_arch_ver[] =
  {
      {1, ARM_ARCH_V4},
  static const cpu_arch_ver_table cpu_arch_ver[] =
  {
      {1, ARM_ARCH_V4},
@@ -24638,10 +25960,12 @@ static const cpu_arch_ver_table cpu_arch_ver[] =
      {11, ARM_ARCH_V6M},
      {12, ARM_ARCH_V6SM},
      {8, ARM_ARCH_V6T2},
      {11, ARM_ARCH_V6M},
      {12, ARM_ARCH_V6SM},
      {8, ARM_ARCH_V6T2},
-    {10, ARM_ARCH_V7A_IDIV_MP_SEC_VIRT},
+    {10, ARM_ARCH_V7VE},
      {10, ARM_ARCH_V7R},
      {10, ARM_ARCH_V7M},
      {14, ARM_ARCH_V8A},
      {10, ARM_ARCH_V7R},
      {10, ARM_ARCH_V7M},
      {14, ARM_ARCH_V8A},
+    {16, ARM_ARCH_V8M_BASE},
+    {17, ARM_ARCH_V8M_MAIN},
      {0, ARM_ARCH_NONE}
  };
  
      {0, ARM_ARCH_NONE}
  };
  
@@ -24665,7 +25989,7 @@ aeabi_set_attribute_string (int tag, const char *value)
  }
  
  /* Set the public EABI object attributes.  */
  }
  
  /* Set the public EABI object attributes.  */
-static void
+void
  aeabi_set_public_attributes (void)
  {
    int arch;
  aeabi_set_public_attributes (void)
  {
    int arch;
@@ -24674,6 +25998,7 @@ aeabi_set_public_attributes (void)
    int fp16_optional = 0;
    arm_feature_set flags;
    arm_feature_set tmp;
    int fp16_optional = 0;
    arm_feature_set flags;
    arm_feature_set tmp;
+  arm_feature_set arm_arch_v8m_base = ARM_ARCH_V8M_BASE;
    const cpu_arch_ver_table *p;
  
    /* Choose the architecture based on the capabilities of the requested cpu
    const cpu_arch_ver_table *p;
  
    /* Choose the architecture based on the capabilities of the requested cpu
@@ -24688,6 +26013,8 @@ aeabi_set_public_attributes (void)
    if (ARM_CPU_HAS_FEATURE (thumb_arch_used, arm_arch_any))
      ARM_MERGE_FEATURE_SETS (flags, flags, arm_ext_v4t);
  
    if (ARM_CPU_HAS_FEATURE (thumb_arch_used, arm_arch_any))
      ARM_MERGE_FEATURE_SETS (flags, flags, arm_ext_v4t);
  
+  selected_cpu = flags;
+
    /* Allow the user to override the reported architecture.  */
    if (object_arch)
      {
    /* Allow the user to override the reported architecture.  */
    if (object_arch)
      {
@@ -24720,11 +26047,22 @@ aeabi_set_public_attributes (void)
       actually used.  Perhaps we should separate out the specified
       and implicit cases.  Avoid taking this path for -march=all by
       checking for contradictory v7-A / v7-M features.  */
       actually used.  Perhaps we should separate out the specified
       and implicit cases.  Avoid taking this path for -march=all by
       checking for contradictory v7-A / v7-M features.  */
-  if (arch == 10
+  if (arch == TAG_CPU_ARCH_V7
        && !ARM_CPU_HAS_FEATURE (flags, arm_ext_v7a)
        && ARM_CPU_HAS_FEATURE (flags, arm_ext_v7m)
        && ARM_CPU_HAS_FEATURE (flags, arm_ext_v6_dsp))
        && !ARM_CPU_HAS_FEATURE (flags, arm_ext_v7a)
        && ARM_CPU_HAS_FEATURE (flags, arm_ext_v7m)
        && ARM_CPU_HAS_FEATURE (flags, arm_ext_v6_dsp))
-    arch = 13;
+    arch = TAG_CPU_ARCH_V7E_M;
+
+  ARM_CLEAR_FEATURE (tmp, flags, arm_arch_v8m_base);
+  if (arch == TAG_CPU_ARCH_V8M_BASE && ARM_CPU_HAS_FEATURE (tmp, arm_arch_any))
+    arch = TAG_CPU_ARCH_V8M_MAIN;
+
+  /* In cpu_arch_ver ARMv8-A is before ARMv8-M for atomics to be detected as
+     coming from ARMv8-A.  However, since ARMv8-A has more instructions than
+     ARMv8-M, -march=all must be detected as ARMv8-A.  */
+  if (arch == TAG_CPU_ARCH_V8M_MAIN
+      && ARM_FEATURE_CORE_EQUAL (selected_cpu, arm_arch_any))
+    arch = TAG_CPU_ARCH_V8;
  
    /* Tag_CPU_name.  */
    if (selected_cpu_name[0])
  
    /* Tag_CPU_name.  */
    if (selected_cpu_name[0])
@@ -24747,7 +26085,10 @@ aeabi_set_public_attributes (void)
    aeabi_set_attribute_int (Tag_CPU_arch, arch);
  
    /* Tag_CPU_arch_profile.  */
    aeabi_set_attribute_int (Tag_CPU_arch, arch);
  
    /* Tag_CPU_arch_profile.  */
-  if (ARM_CPU_HAS_FEATURE (flags, arm_ext_v7a))
+  if (ARM_CPU_HAS_FEATURE (flags, arm_ext_v7a)
+      || ARM_CPU_HAS_FEATURE (flags, arm_ext_v8)
+      || (ARM_CPU_HAS_FEATURE (flags, arm_ext_atomics)
+         && !ARM_CPU_HAS_FEATURE (flags, arm_ext_v8m)))
      profile = 'A';
    else if (ARM_CPU_HAS_FEATURE (flags, arm_ext_v7r))
      profile = 'R';
      profile = 'A';
    else if (ARM_CPU_HAS_FEATURE (flags, arm_ext_v7r))
      profile = 'R';
@@ -24767,12 +26108,24 @@ aeabi_set_public_attributes (void)
    /* Tag_THUMB_ISA_use.  */
    if (ARM_CPU_HAS_FEATURE (flags, arm_ext_v4t)
        || arch == 0)
    /* Tag_THUMB_ISA_use.  */
    if (ARM_CPU_HAS_FEATURE (flags, arm_ext_v4t)
        || arch == 0)
-    aeabi_set_attribute_int (Tag_THUMB_ISA_use,
-       ARM_CPU_HAS_FEATURE (flags, arm_arch_t2) ? 2 : 1);
+    {
+      int thumb_isa_use;
+
+      if (!ARM_CPU_HAS_FEATURE (flags, arm_ext_v8)
+         && ARM_CPU_HAS_FEATURE (flags, arm_ext_v8m))
+       thumb_isa_use = 3;
+      else if (ARM_CPU_HAS_FEATURE (flags, arm_arch_t2))
+       thumb_isa_use = 2;
+      else
+       thumb_isa_use = 1;
+      aeabi_set_attribute_int (Tag_THUMB_ISA_use, thumb_isa_use);
+    }
  
    /* Tag_VFP_arch.  */
  
    /* Tag_VFP_arch.  */
-  if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_armv8))
-    aeabi_set_attribute_int (Tag_VFP_arch, 7);
+  if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_armv8xd))
+    aeabi_set_attribute_int (Tag_VFP_arch,
+                            ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_d32)
+                            ? 7 : 8);
    else if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_fma))
      aeabi_set_attribute_int (Tag_VFP_arch,
                              ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_d32)
    else if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_fma))
      aeabi_set_attribute_int (Tag_VFP_arch,
                              ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_d32)
@@ -24830,12 +26183,15 @@ aeabi_set_public_attributes (void)
       in ARM state, or when Thumb integer divide instructions have been used,
       but we have no architecture profile set, nor have we any ARM instructions.
  
       in ARM state, or when Thumb integer divide instructions have been used,
       but we have no architecture profile set, nor have we any ARM instructions.
  
-     For ARMv8 we set the tag to 0 as integer divide is implied by the base
-     architecture.
+     For ARMv8-A and ARMv8-M we set the tag to 0 as integer divide is implied
+     by the base architecture.
  
       For new architectures we will have to check these tests.  */
  
       For new architectures we will have to check these tests.  */
-  gas_assert (arch <= TAG_CPU_ARCH_V8);
-  if (ARM_CPU_HAS_FEATURE (flags, arm_ext_v8))
+  gas_assert (arch <= TAG_CPU_ARCH_V8
+             || (arch >= TAG_CPU_ARCH_V8M_BASE
+                 && arch <= TAG_CPU_ARCH_V8M_MAIN));
+  if (ARM_CPU_HAS_FEATURE (flags, arm_ext_v8)
+      || ARM_CPU_HAS_FEATURE (flags, arm_ext_v8m))
      aeabi_set_attribute_int (Tag_DIV_use, 0);
    else if (ARM_CPU_HAS_FEATURE (flags, arm_ext_adiv)
            || (profile == '\0'
      aeabi_set_attribute_int (Tag_DIV_use, 0);
    else if (ARM_CPU_HAS_FEATURE (flags, arm_ext_adiv)
            || (profile == '\0'
@@ -25008,9 +26364,10 @@ s_arm_arch_extension (int ignored ATTRIBUTE_UNUSED)
           }
  
         if (adding_value)
           }
  
         if (adding_value)
-         ARM_MERGE_FEATURE_SETS (selected_cpu, selected_cpu, opt->value);
+         ARM_MERGE_FEATURE_SETS (selected_cpu, selected_cpu,
+                                 opt->merge_value);
         else
         else
-         ARM_CLEAR_FEATURE (selected_cpu, selected_cpu, opt->value);
+         ARM_CLEAR_FEATURE (selected_cpu, selected_cpu, opt->clear_value);
  
         mcpu_cpu_opt = &selected_cpu;
         ARM_MERGE_FEATURE_SETS (cpu_variant, *mcpu_cpu_opt, *mfpu_opt);
  
         mcpu_cpu_opt = &selected_cpu;
         ARM_MERGE_FEATURE_SETS (cpu_variant, *mcpu_cpu_opt, *mfpu_opt);
@@ -25020,7 +26377,7 @@ s_arm_arch_extension (int ignored ATTRIBUTE_UNUSED)
        }
  
    if (opt->name == NULL)
        }
  
    if (opt->name == NULL)
-    as_bad (_("unknown architecture `%s'\n"), name);
+    as_bad (_("unknown architecture extension `%s'\n"), name);
  
    *input_line_pointer = saved_char;
    ignore_rest_of_line ();
  
    *input_line_pointer = saved_char;
    ignore_rest_of_line ();
@@ -25140,14 +26497,18 @@ arm_convert_symbolic_attribute (const char *name)
  }
  
  
  }
  
  
-/* Apply sym value for relocations only in the case that
-   they are for local symbols and you have the respective
-   architectural feature for blx and simple switches.  */
+/* Apply sym value for relocations only in the case that they are for
+   local symbols in the same segment as the fixup and you have the
+   respective architectural feature for blx and simple switches.  */
  int
  int
-arm_apply_sym_value (struct fix * fixP)
+arm_apply_sym_value (struct fix * fixP, segT this_seg)
  {
    if (fixP->fx_addsy
        && ARM_CPU_HAS_FEATURE (selected_cpu, arm_ext_v5t)
  {
    if (fixP->fx_addsy
        && ARM_CPU_HAS_FEATURE (selected_cpu, arm_ext_v5t)
+      /* PR 17444: If the local symbol is in a different section then a reloc
+        will always be generated for it, so applying the symbol value now
+        will result in a double offset being stored in the relocation.  */
+      && (S_GET_SEGMENT (fixP->fx_addsy) == this_seg)
        && !S_FORCE_RELOC (fixP->fx_addsy, TRUE))
      {
        switch (fixP->fx_r_type)
        && !S_FORCE_RELOC (fixP->fx_addsy, TRUE))
      {
        switch (fixP->fx_r_type)
@@ -25161,7 +26522,7 @@ arm_apply_sym_value (struct fix * fixP)
         case BFD_RELOC_ARM_PCREL_CALL:
         case BFD_RELOC_THUMB_PCREL_BLX:
           if (THUMB_IS_FUNC (fixP->fx_addsy))
         case BFD_RELOC_ARM_PCREL_CALL:
         case BFD_RELOC_THUMB_PCREL_BLX:
           if (THUMB_IS_FUNC (fixP->fx_addsy))
-             return 1;
+           return 1;
           break;
  
         default:
           break;
  
         default: