x86: drop further pointless/bogus DefaultSize

[deliverable/binutils-gdb.git] / gas / config / tc-arm.c
diff --git a/gas/config/tc-arm.c b/gas/config/tc-arm.c

index 9196476c1f0e877ca816fb95aa1661fb90d17b9f..16cbac4279ef96d0edff57f83cb20935ae5196eb 100644 (file)
--- a/gas/config/tc-arm.c
+++ b/gas/config/tc-arm.c
@@ -1,5 +1,5 @@
  /* tc-arm.c -- Assemble for the ARM
  /* tc-arm.c -- Assemble for the ARM
-   Copyright (C) 1994-2019 Free Software Foundation, Inc.
+   Copyright (C) 1994-2020 Free Software Foundation, Inc.
     Contributed by Richard Earnshaw (rwe@pegasus.esprit.ec.org)
         Modified by David Taylor (dtaylor@armltd.co.uk)
         Cirrus coprocessor mods by Aldy Hernandez (aldyh@redhat.com)
     Contributed by Richard Earnshaw (rwe@pegasus.esprit.ec.org)
         Modified by David Taylor (dtaylor@armltd.co.uk)
         Cirrus coprocessor mods by Aldy Hernandez (aldyh@redhat.com)
@@ -32,6 +32,7 @@
  #include "obstack.h"
  #include "libiberty.h"
  #include "opcode/arm.h"
  #include "obstack.h"
  #include "libiberty.h"
  #include "opcode/arm.h"
+#include "cpu-arm.h"
  
  #ifdef OBJ_ELF
  #include "elf/arm.h"
  
  #ifdef OBJ_ELF
  #include "elf/arm.h"
@@ -106,6 +107,15 @@ enum arm_float_abi
     should define CPU_DEFAULT here.  */
  #endif
  
     should define CPU_DEFAULT here.  */
  #endif
  
+/* Perform range checks on positive and negative overflows by checking if the
+   VALUE given fits within the range of an BITS sized immediate.  */
+static bfd_boolean out_of_range_p (offsetT value, offsetT bits)
+ {
+  gas_assert (bits < (offsetT)(sizeof (value) * 8));
+  return (value & ~((1 << bits)-1))
+         && ((value & ~((1 << bits)-1)) != ~((1 << bits)-1));
+}
+
  #ifndef FPU_DEFAULT
  # ifdef TE_LINUX
  #  define FPU_DEFAULT FPU_ARCH_FPA
  #ifndef FPU_DEFAULT
  # ifdef TE_LINUX
  #  define FPU_DEFAULT FPU_ARCH_FPA
@@ -144,6 +154,7 @@ static int pic_code      = FALSE;
  static int fix_v4bx         = FALSE;
  /* Warn on using deprecated features.  */
  static int warn_on_deprecated = TRUE;
  static int fix_v4bx         = FALSE;
  /* Warn on using deprecated features.  */
  static int warn_on_deprecated = TRUE;
+static int warn_on_restrict_it = FALSE;
  
  /* Understand CodeComposer Studio assembly syntax.  */
  bfd_boolean codecomposer_syntax = FALSE;
  
  /* Understand CodeComposer Studio assembly syntax.  */
  bfd_boolean codecomposer_syntax = FALSE;
@@ -265,11 +276,15 @@ static const arm_feature_set arm_ext_sb =
    ARM_FEATURE_CORE_HIGH (ARM_EXT2_SB);
  static const arm_feature_set arm_ext_predres =
    ARM_FEATURE_CORE_HIGH (ARM_EXT2_PREDRES);
    ARM_FEATURE_CORE_HIGH (ARM_EXT2_SB);
  static const arm_feature_set arm_ext_predres =
    ARM_FEATURE_CORE_HIGH (ARM_EXT2_PREDRES);
+static const arm_feature_set arm_ext_bf16 =
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16);
+static const arm_feature_set arm_ext_i8mm =
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM);
+static const arm_feature_set arm_ext_crc =
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC);
  
  static const arm_feature_set arm_arch_any = ARM_ANY;
  
  static const arm_feature_set arm_arch_any = ARM_ANY;
-#ifdef OBJ_ELF
  static const arm_feature_set fpu_any = FPU_ANY;
  static const arm_feature_set fpu_any = FPU_ANY;
-#endif
  static const arm_feature_set arm_arch_full ATTRIBUTE_UNUSED = ARM_FEATURE (-1, -1, -1);
  static const arm_feature_set arm_arch_t2 = ARM_ARCH_THUMB2;
  static const arm_feature_set arm_arch_none = ARM_ARCH_NONE;
  static const arm_feature_set arm_arch_full ATTRIBUTE_UNUSED = ARM_FEATURE (-1, -1, -1);
  static const arm_feature_set arm_arch_t2 = ARM_ARCH_THUMB2;
  static const arm_feature_set arm_arch_none = ARM_ARCH_NONE;
@@ -303,9 +318,9 @@ static const arm_feature_set fpu_neon_ext_v1 =
  static const arm_feature_set fpu_vfp_v3_or_neon_ext =
    ARM_FEATURE_COPROC (FPU_NEON_EXT_V1 | FPU_VFP_EXT_V3);
  static const arm_feature_set mve_ext =
  static const arm_feature_set fpu_vfp_v3_or_neon_ext =
    ARM_FEATURE_COPROC (FPU_NEON_EXT_V1 | FPU_VFP_EXT_V3);
  static const arm_feature_set mve_ext =
-  ARM_FEATURE_COPROC (FPU_MVE);
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_MVE);
  static const arm_feature_set mve_fp_ext =
  static const arm_feature_set mve_fp_ext =
-  ARM_FEATURE_COPROC (FPU_MVE_FP);
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_MVE_FP);
  #ifdef OBJ_ELF
  static const arm_feature_set fpu_vfp_fp16 =
    ARM_FEATURE_COPROC (FPU_VFP_EXT_FP16);
  #ifdef OBJ_ELF
  static const arm_feature_set fpu_vfp_fp16 =
    ARM_FEATURE_COPROC (FPU_VFP_EXT_FP16);
@@ -322,8 +337,6 @@ static const arm_feature_set fpu_neon_ext_armv8 =
    ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8);
  static const arm_feature_set fpu_crypto_ext_armv8 =
    ARM_FEATURE_COPROC (FPU_CRYPTO_EXT_ARMV8);
    ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8);
  static const arm_feature_set fpu_crypto_ext_armv8 =
    ARM_FEATURE_COPROC (FPU_CRYPTO_EXT_ARMV8);
-static const arm_feature_set crc_ext_armv8 =
-  ARM_FEATURE_COPROC (CRC_EXT_ARMV8);
  static const arm_feature_set fpu_neon_ext_v8_1 =
    ARM_FEATURE_COPROC (FPU_NEON_EXT_RDMA);
  static const arm_feature_set fpu_neon_ext_dotprod =
  static const arm_feature_set fpu_neon_ext_v8_1 =
    ARM_FEATURE_COPROC (FPU_NEON_EXT_RDMA);
  static const arm_feature_set fpu_neon_ext_dotprod =
@@ -345,6 +358,7 @@ static arm_feature_set selected_fpu = FPU_NONE;
  /* Feature bits selected by the last .object_arch directive.  */
  static arm_feature_set selected_object_arch = ARM_ARCH_NONE;
  /* Must be long enough to hold any of the names in arm_cpus.  */
  /* Feature bits selected by the last .object_arch directive.  */
  static arm_feature_set selected_object_arch = ARM_ARCH_NONE;
  /* Must be long enough to hold any of the names in arm_cpus.  */
+static const struct arm_ext_table * selected_ctx_ext_table = NULL;
  static char selected_cpu_name[20];
  
  extern FLONUM_TYPE generic_floating_point_number;
  static char selected_cpu_name[20];
  
  extern FLONUM_TYPE generic_floating_point_number;
@@ -436,6 +450,7 @@ enum neon_el_type
    NT_float,
    NT_poly,
    NT_signed,
    NT_float,
    NT_poly,
    NT_signed,
+  NT_bfloat,
    NT_unsigned
  };
  
    NT_unsigned
  };
  
@@ -883,6 +898,7 @@ struct asm_opcode
         _("cannot use writeback with PC-relative addressing")
  #define BAD_RANGE      _("branch out of range")
  #define BAD_FP16       _("selected processor does not support fp16 instruction")
         _("cannot use writeback with PC-relative addressing")
  #define BAD_RANGE      _("branch out of range")
  #define BAD_FP16       _("selected processor does not support fp16 instruction")
+#define BAD_BF16       _("selected processor does not support bf16 instruction")
  #define UNPRED_REG(R)  _("using " R " results in unpredictable behaviour")
  #define THUMB1_RELOC_ONLY  _("relocation valid in thumb1 code only")
  #define MVE_NOT_IT     _("Warning: instruction is UNPREDICTABLE in an IT " \
  #define UNPRED_REG(R)  _("using " R " results in unpredictable behaviour")
  #define THUMB1_RELOC_ONLY  _("relocation valid in thumb1 code only")
  #define MVE_NOT_IT     _("Warning: instruction is UNPREDICTABLE in an IT " \
@@ -1009,6 +1025,9 @@ static void it_fsm_post_encode (void);
      }                                                  \
    while (0)
  
      }                                                  \
    while (0)
  
+/* Toggle value[pos].  */
+#define TOGGLE_BIT(value, pos) (value ^ (1 << pos))
+
  /* Pure syntax.         */
  
  /* This array holds the chars that always start a comment.  If the
  /* Pure syntax.         */
  
  /* This array holds the chars that always start a comment.  If the
@@ -1034,7 +1053,7 @@ const char EXP_CHARS[] = "eE";
  /* As in 0f12.456  */
  /* or   0d1.2345e12  */
  
  /* As in 0f12.456  */
  /* or   0d1.2345e12  */
  
-const char FLT_CHARS[] = "rRsSfFdDxXeEpP";
+const char FLT_CHARS[] = "rRsSfFdDxXeEpPHh";
  
  /* Prefix characters that indicate the start of an immediate
     value.  */
  
  /* Prefix characters that indicate the start of an immediate
     value.  */
@@ -1044,6 +1063,16 @@ const char FLT_CHARS[] = "rRsSfFdDxXeEpP";
  
  #define skip_whitespace(str)  do { if (*(str) == ' ') ++(str); } while (0)
  
  
  #define skip_whitespace(str)  do { if (*(str) == ' ') ++(str); } while (0)
  
+enum fp_16bit_format
+{
+  ARM_FP16_FORMAT_IEEE         = 0x1,
+  ARM_FP16_FORMAT_ALTERNATIVE  = 0x2,
+  ARM_FP16_FORMAT_DEFAULT      = 0x3
+};
+
+static enum fp_16bit_format fp16_format = ARM_FP16_FORMAT_DEFAULT;
+
+
  static inline int
  skip_past_char (char ** str, char c)
  {
  static inline int
  skip_past_char (char ** str, char c)
  {
@@ -1185,6 +1214,57 @@ md_atof (int type, char * litP, int * sizeP)
  
    switch (type)
      {
  
    switch (type)
      {
+    case 'H':
+    case 'h':
+      prec = 1;
+      break;
+
+    /* If this is a bfloat16, then parse it slightly differently, as it
+       does not follow the IEEE specification for floating point numbers
+       exactly.  */
+    case 'b':
+      {
+       FLONUM_TYPE generic_float;
+
+       t = atof_ieee_detail (input_line_pointer, 1, 8, words, &generic_float);
+
+       if (t)
+         input_line_pointer = t;
+       else
+         return _("invalid floating point number");
+
+       switch (generic_float.sign)
+         {
+         /* Is +Inf.  */
+         case 'P':
+           words[0] = 0x7f80;
+           break;
+
+         /* Is -Inf.  */
+         case 'N':
+           words[0] = 0xff80;
+           break;
+
+         /* Is NaN.  */
+         /* bfloat16 has two types of NaN - quiet and signalling.
+            Quiet NaN has bit[6] == 1 && faction != 0, whereas
+            signalling NaN's have bit[0] == 0 && fraction != 0.
+            Chosen this specific encoding as it is the same form
+            as used by other IEEE 754 encodings in GAS.  */
+         case 0:
+           words[0] = 0x7fff;
+           break;
+
+         default:
+           break;
+         }
+
+       *sizeP = 2;
+
+       md_number_to_chars (litP, (valueT) words[0], sizeof (LITTLENUM_TYPE));
+
+       return NULL;
+      }
      case 'f':
      case 'F':
      case 's':
      case 'f':
      case 'F':
      case 's':
@@ -1219,34 +1299,29 @@ md_atof (int type, char * litP, int * sizeP)
      input_line_pointer = t;
    *sizeP = prec * sizeof (LITTLENUM_TYPE);
  
      input_line_pointer = t;
    *sizeP = prec * sizeof (LITTLENUM_TYPE);
  
-  if (target_big_endian)
-    {
-      for (i = 0; i < prec; i++)
-       {
-         md_number_to_chars (litP, (valueT) words[i], sizeof (LITTLENUM_TYPE));
-         litP += sizeof (LITTLENUM_TYPE);
-       }
-    }
+  if (target_big_endian || prec == 1)
+    for (i = 0; i < prec; i++)
+      {
+       md_number_to_chars (litP, (valueT) words[i], sizeof (LITTLENUM_TYPE));
+       litP += sizeof (LITTLENUM_TYPE);
+      }
+  else if (ARM_CPU_HAS_FEATURE (cpu_variant, fpu_endian_pure))
+    for (i = prec - 1; i >= 0; i--)
+      {
+       md_number_to_chars (litP, (valueT) words[i], sizeof (LITTLENUM_TYPE));
+       litP += sizeof (LITTLENUM_TYPE);
+      }
    else
    else
-    {
-      if (ARM_CPU_HAS_FEATURE (cpu_variant, fpu_endian_pure))
-       for (i = prec - 1; i >= 0; i--)
-         {
-           md_number_to_chars (litP, (valueT) words[i], sizeof (LITTLENUM_TYPE));
-           litP += sizeof (LITTLENUM_TYPE);
-         }
-      else
-       /* For a 4 byte float the order of elements in `words' is 1 0.
-          For an 8 byte float the order is 1 0 3 2.  */
-       for (i = 0; i < prec; i += 2)
-         {
-           md_number_to_chars (litP, (valueT) words[i + 1],
-                               sizeof (LITTLENUM_TYPE));
-           md_number_to_chars (litP + sizeof (LITTLENUM_TYPE),
-                               (valueT) words[i], sizeof (LITTLENUM_TYPE));
-           litP += 2 * sizeof (LITTLENUM_TYPE);
-         }
-    }
+    /* For a 4 byte float the order of elements in `words' is 1 0.
+       For an 8 byte float the order is 1 0 3 2.  */
+    for (i = 0; i < prec; i += 2)
+      {
+       md_number_to_chars (litP, (valueT) words[i + 1],
+                           sizeof (LITTLENUM_TYPE));
+       md_number_to_chars (litP + sizeof (LITTLENUM_TYPE),
+                           (valueT) words[i], sizeof (LITTLENUM_TYPE));
+       litP += 2 * sizeof (LITTLENUM_TYPE);
+      }
  
    return NULL;
  }
  
    return NULL;
  }
@@ -1445,6 +1520,28 @@ parse_neon_type (struct neon_type *type, char **str)
           thissize = 64;
           ptr++;
           goto done;
           thissize = 64;
           ptr++;
           goto done;
+       case 'b':
+         thistype = NT_bfloat;
+         switch (TOLOWER (*(++ptr)))
+           {
+           case 'f':
+             ptr += 1;
+             thissize = strtoul (ptr, &ptr, 10);
+             if (thissize != 16)
+               {
+                 as_bad (_("bad size %d in type specifier"), thissize);
+                 return FAIL;
+               }
+             goto done;
+           case '0': case '1': case '2': case '3': case '4':
+           case '5': case '6': case '7': case '8': case '9':
+           case ' ': case '.':
+             as_bad (_("unexpected type character `b' -- did you mean `bf'?"));
+             return FAIL;
+           default:
+             break;
+           }
+         break;
         default:
           as_bad (_("unexpected character `%c' in type specifier"), *ptr);
           return FAIL;
         default:
           as_bad (_("unexpected character `%c' in type specifier"), *ptr);
           return FAIL;
@@ -1846,7 +1943,7 @@ parse_reg_list (char ** strp, enum reg_list_els etype)
               const char apsr_str[] = "apsr";
               int apsr_str_len = strlen (apsr_str);
  
               const char apsr_str[] = "apsr";
               int apsr_str_len = strlen (apsr_str);
  
-             reg = arm_reg_parse (&str, REGLIST_RN);
+             reg = arm_reg_parse (&str, REG_TYPE_RN);
               if (etype == REGLIST_CLRM)
                 {
                   if (reg == REG_SP || reg == REG_PC)
               if (etype == REGLIST_CLRM)
                 {
                   if (reg == REG_SP || reg == REG_PC)
@@ -4922,6 +5019,55 @@ pe_directive_secrel (int dummy ATTRIBUTE_UNUSED)
  }
  #endif /* TE_PE */
  
  }
  #endif /* TE_PE */
  
+int
+arm_is_largest_exponent_ok (int precision)
+{
+  /* precision == 1 ensures that this will only return
+     true for 16 bit floats.  */
+  return (precision == 1) && (fp16_format == ARM_FP16_FORMAT_ALTERNATIVE);
+}
+
+static void
+set_fp16_format (int dummy ATTRIBUTE_UNUSED)
+{
+  char saved_char;
+  char* name;
+  enum fp_16bit_format new_format;
+
+  new_format = ARM_FP16_FORMAT_DEFAULT;
+
+  name = input_line_pointer;
+  while (*input_line_pointer && !ISSPACE (*input_line_pointer))
+    input_line_pointer++;
+
+  saved_char = *input_line_pointer;
+  *input_line_pointer = 0;
+
+  if (strcasecmp (name, "ieee") == 0)
+    new_format = ARM_FP16_FORMAT_IEEE;
+  else if (strcasecmp (name, "alternative") == 0)
+    new_format = ARM_FP16_FORMAT_ALTERNATIVE;
+  else
+    {
+      as_bad (_("unrecognised float16 format \"%s\""), name);
+      goto cleanup;
+    }
+
+  /* Only set fp16_format if it is still the default (aka not already
+     been set yet).  */
+  if (fp16_format == ARM_FP16_FORMAT_DEFAULT)
+    fp16_format = new_format;
+  else
+    {
+      if (new_format != fp16_format)
+       as_warn (_("float16 format cannot be set more than once, ignoring."));
+    }
+
+cleanup:
+  *input_line_pointer = saved_char;
+  ignore_rest_of_line ();
+}
+
  /* This table describes all the machine specific pseudo-ops the assembler
     has to support.  The fields are:
       pseudo-op name without dot
  /* This table describes all the machine specific pseudo-ops the assembler
     has to support.  The fields are:
       pseudo-op name without dot
@@ -4989,6 +5135,7 @@ const pseudo_typeS md_pseudo_table[] =
    { "extend",     float_cons, 'x' },
    { "ldouble",    float_cons, 'x' },
    { "packed",     float_cons, 'p' },
    { "extend",     float_cons, 'x' },
    { "ldouble",    float_cons, 'x' },
    { "packed",     float_cons, 'p' },
+  { "bfloat16",           float_cons, 'b' },
  #ifdef TE_PE
    {"secrel32", pe_directive_secrel, 0},
  #endif
  #ifdef TE_PE
    {"secrel32", pe_directive_secrel, 0},
  #endif
@@ -4999,9 +5146,12 @@ const pseudo_typeS md_pseudo_table[] =
    {"asmfunc",      s_ccs_asmfunc,    0},
    {"endasmfunc",   s_ccs_endasmfunc, 0},
  
    {"asmfunc",      s_ccs_asmfunc,    0},
    {"endasmfunc",   s_ccs_endasmfunc, 0},
  
+  {"float16", float_cons, 'h' },
+  {"float16_format", set_fp16_format, 0 },
+
    { 0, 0, 0 }
  };
    { 0, 0, 0 }
  };
-\f
+
  /* Parser functions used exclusively in instruction operands.  */
  
  /* Generic immediate-value read function for use in insn parsing.
  /* Parser functions used exclusively in instruction operands.  */
  
  /* Generic immediate-value read function for use in insn parsing.
@@ -6678,8 +6828,10 @@ parse_neon_mov (char **str, int *which_operand)
               inst.operands[i].present = 1;
             }
         }
               inst.operands[i].present = 1;
             }
         }
-      else if ((val = arm_typed_reg_parse (&ptr, REG_TYPE_NSDQ, &rtype,
-                                          &optype)) != FAIL)
+      else if (((val = arm_typed_reg_parse (&ptr, REG_TYPE_NSDQ, &rtype,
+               &optype)) != FAIL)
+              || ((val = arm_typed_reg_parse (&ptr, REG_TYPE_MQ, &rtype,
+                  &optype)) != FAIL))
         {
           /* Case 0: VMOV<c><q> <Qd>, <Qm>
              Case 1: VMOV<c><q> <Dd>, <Dm>
         {
           /* Case 0: VMOV<c><q> <Qd>, <Qm>
              Case 1: VMOV<c><q> <Dd>, <Dm>
@@ -6902,6 +7054,7 @@ enum operand_parse_code
    OP_RNSD,      /* Neon single or double precision register */
    OP_RNDQ,      /* Neon double or quad precision register */
    OP_RNDQMQ,     /* Neon double, quad or MVE vector register.  */
    OP_RNSD,      /* Neon single or double precision register */
    OP_RNDQ,      /* Neon double or quad precision register */
    OP_RNDQMQ,     /* Neon double, quad or MVE vector register.  */
+  OP_RNDQMQR,   /* Neon double, quad, MVE vector or ARM register.  */
    OP_RNSDQ,    /* Neon single, double or quad precision register */
    OP_RNSC,      /* Neon scalar D[X] */
    OP_RVC,      /* VFP control register */
    OP_RNSDQ,    /* Neon single, double or quad precision register */
    OP_RNSC,      /* Neon scalar D[X] */
    OP_RVC,      /* VFP control register */
@@ -6922,12 +7075,14 @@ enum operand_parse_code
                    GPR (no SP/SP)  */
    OP_RMQ,      /* MVE vector register.  */
    OP_RMQRZ,    /* MVE vector or ARM register including ZR.  */
                    GPR (no SP/SP)  */
    OP_RMQ,      /* MVE vector register.  */
    OP_RMQRZ,    /* MVE vector or ARM register including ZR.  */
+  OP_RMQRR,     /* MVE vector or ARM register.  */
  
    /* New operands for Armv8.1-M Mainline.  */
    OP_LR,       /* ARM LR register */
    OP_RRe,      /* ARM register, only even numbered.  */
    OP_RRo,      /* ARM register, only odd numbered, not r13 or r15.  */
    OP_RRnpcsp_I32, /* ARM register (no BadReg) or literal 1 .. 32 */
  
    /* New operands for Armv8.1-M Mainline.  */
    OP_LR,       /* ARM LR register */
    OP_RRe,      /* ARM register, only even numbered.  */
    OP_RRo,      /* ARM register, only odd numbered, not r13 or r15.  */
    OP_RRnpcsp_I32, /* ARM register (no BadReg) or literal 1 .. 32 */
+  OP_RR_ZR,    /* ARM register or ZR but no PC */
  
    OP_REGLST,   /* ARM register list */
    OP_CLRMLST,  /* CLRM register list */
  
    OP_REGLST,   /* ARM register list */
    OP_CLRMLST,  /* CLRM register list */
@@ -6950,7 +7105,12 @@ enum operand_parse_code
    OP_RNSDQ_RNSC, /* Vector S, D or Q reg, or Neon scalar.  */
    OP_RNSDQ_RNSC_MQ, /* Vector S, D or Q reg, Neon scalar or MVE vector register.
                      */
    OP_RNSDQ_RNSC, /* Vector S, D or Q reg, or Neon scalar.  */
    OP_RNSDQ_RNSC_MQ, /* Vector S, D or Q reg, Neon scalar or MVE vector register.
                      */
+  OP_RNSDQ_RNSC_MQ_RR, /* Vector S, D or Q reg, or MVE vector reg , or Neon
+                         scalar, or ARM register.  */
    OP_RNDQ_RNSC, /* Neon D or Q reg, or Neon scalar.  */
    OP_RNDQ_RNSC, /* Neon D or Q reg, or Neon scalar.  */
+  OP_RNDQ_RNSC_RR, /* Neon D or Q reg, Neon scalar, or ARM register.  */
+  OP_RNDQMQ_RNSC_RR, /* Neon D or Q reg, Neon scalar, MVE vector or ARM
+                       register.  */
    OP_RNDQMQ_RNSC, /* Neon D, Q or MVE vector reg, or Neon scalar.  */
    OP_RND_RNSC,  /* Neon D reg, or Neon scalar.  */
    OP_VMOV,      /* Neon VMOV operands.  */
    OP_RNDQMQ_RNSC, /* Neon D, Q or MVE vector reg, or Neon scalar.  */
    OP_RND_RNSC,  /* Neon D reg, or Neon scalar.  */
    OP_VMOV,      /* Neon VMOV operands.  */
@@ -6958,6 +7118,8 @@ enum operand_parse_code
    /* Neon D, Q or MVE vector register, or big immediate for logic and VMVN.  */
    OP_RNDQMQ_Ibig,
    OP_RNDQ_I63b, /* Neon D or Q reg, or immediate for shift.  */
    /* Neon D, Q or MVE vector register, or big immediate for logic and VMVN.  */
    OP_RNDQMQ_Ibig,
    OP_RNDQ_I63b, /* Neon D or Q reg, or immediate for shift.  */
+  OP_RNDQMQ_I63b_RR, /* Neon D or Q reg, immediate for shift, MVE vector or
+                       ARM register.  */
    OP_RIWR_I32z, /* iWMMXt wR register, or immediate 0 .. 32 for iWMMXt2.  */
    OP_VLDR,     /* VLDR operand.  */
  
    OP_RIWR_I32z, /* iWMMXt wR register, or immediate 0 .. 32 for iWMMXt2.  */
    OP_VLDR,     /* VLDR operand.  */
  
@@ -6970,6 +7132,7 @@ enum operand_parse_code
    OP_I31w,     /*                 0 .. 31, optional trailing ! */
    OP_I32,      /*                 1 .. 32 */
    OP_I32z,     /*                 0 .. 32 */
    OP_I31w,     /*                 0 .. 31, optional trailing ! */
    OP_I32,      /*                 1 .. 32 */
    OP_I32z,     /*                 0 .. 32 */
+  OP_I48_I64,  /*                 48 or 64 */
    OP_I63,      /*                 0 .. 63 */
    OP_I63s,     /*               -64 .. 63 */
    OP_I64,      /*                 1 .. 64 */
    OP_I63,      /*                 0 .. 63 */
    OP_I63s,     /*               -64 .. 63 */
    OP_I64,      /*                 1 .. 64 */
@@ -7121,6 +7284,25 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
      }                                                          \
    while (0)
  
      }                                                          \
    while (0)
  
+#define po_imm1_or_imm2_or_fail(imm1, imm2, popt)              \
+  do                                                           \
+    {                                                          \
+      expressionS exp;                                         \
+      my_get_expression (&exp, &str, popt);                    \
+      if (exp.X_op != O_constant)                              \
+       {                                                       \
+         inst.error = _("constant expression required");       \
+         goto failure;                                         \
+       }                                                       \
+      if (exp.X_add_number != imm1 && exp.X_add_number != imm2) \
+       {                                                       \
+         inst.error = _("immediate value 48 or 64 expected");  \
+         goto failure;                                         \
+       }                                                       \
+      inst.operands[i].imm = exp.X_add_number;                 \
+    }                                                          \
+  while (0)
+
  #define po_scalar_or_goto(elsz, label, reg_type)                       \
    do                                                                   \
      {                                                                  \
  #define po_scalar_or_goto(elsz, label, reg_type)                       \
    do                                                                   \
      {                                                                  \
@@ -7223,7 +7405,20 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
           break;
           /* Also accept generic coprocessor regs for unknown registers.  */
           coproc_reg:
           break;
           /* Also accept generic coprocessor regs for unknown registers.  */
           coproc_reg:
-         po_reg_or_fail (REG_TYPE_CN);
+         po_reg_or_goto (REG_TYPE_CN, vpr_po);
+         break;
+         /* Also accept P0 or p0 for VPR.P0.  Since P0 is already an
+            existing register with a value of 0, this seems like the
+            best way to parse P0.  */
+         vpr_po:
+         if (strncasecmp (str, "P0", 2) == 0)
+           {
+             str += 2;
+             inst.operands[i].isreg = 1;
+             inst.operands[i].reg = 13;
+           }
+         else
+           goto failure;
           break;
         case OP_RMF:   po_reg_or_fail (REG_TYPE_MVF);     break;
         case OP_RMD:   po_reg_or_fail (REG_TYPE_MVD);     break;
           break;
         case OP_RMF:   po_reg_or_fail (REG_TYPE_MVF);     break;
         case OP_RMD:   po_reg_or_fail (REG_TYPE_MVD);     break;
@@ -7242,6 +7437,10 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
         try_nq:
         case OP_RNQ:   po_reg_or_fail (REG_TYPE_NQ);      break;
         case OP_RNSD:  po_reg_or_fail (REG_TYPE_NSD);     break;
         try_nq:
         case OP_RNQ:   po_reg_or_fail (REG_TYPE_NQ);      break;
         case OP_RNSD:  po_reg_or_fail (REG_TYPE_NSD);     break;
+       case OP_RNDQMQR:
+         po_reg_or_goto (REG_TYPE_RN, try_rndqmq);
+         break;
+       try_rndqmq:
         case OP_oRNDQMQ:
         case OP_RNDQMQ:
           po_reg_or_goto (REG_TYPE_MQ, try_rndq);
         case OP_oRNDQMQ:
         case OP_RNDQMQ:
           po_reg_or_goto (REG_TYPE_MQ, try_rndq);
@@ -7271,6 +7470,10 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
           po_reg_or_fail (REG_TYPE_NSDQ);
           inst.error = 0;
           break;
           po_reg_or_fail (REG_TYPE_NSDQ);
           inst.error = 0;
           break;
+       case OP_RMQRR:
+         po_reg_or_goto (REG_TYPE_RN, try_rmq);
+         break;
+       try_rmq:
         case OP_RMQ:
           po_reg_or_fail (REG_TYPE_MQ);
           break;
         case OP_RMQ:
           po_reg_or_fail (REG_TYPE_MQ);
           break;
@@ -7320,6 +7523,10 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
           }
           break;
  
           }
           break;
  
+       case OP_RNSDQ_RNSC_MQ_RR:
+         po_reg_or_goto (REG_TYPE_RN, try_rnsdq_rnsc_mq);
+         break;
+       try_rnsdq_rnsc_mq:
         case OP_RNSDQ_RNSC_MQ:
           po_reg_or_goto (REG_TYPE_MQ, try_rnsdq_rnsc);
           break;
         case OP_RNSDQ_RNSC_MQ:
           po_reg_or_goto (REG_TYPE_MQ, try_rnsdq_rnsc);
           break;
@@ -7347,6 +7554,13 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
           }
           break;
  
           }
           break;
  
+       case OP_RNDQMQ_RNSC_RR:
+         po_reg_or_goto (REG_TYPE_MQ, try_rndq_rnsc_rr);
+         break;
+       try_rndq_rnsc_rr:
+       case OP_RNDQ_RNSC_RR:
+         po_reg_or_goto (REG_TYPE_RN, try_rndq_rnsc);
+         break;
         case OP_RNDQMQ_RNSC:
           po_reg_or_goto (REG_TYPE_MQ, try_rndq_rnsc);
           break;
         case OP_RNDQMQ_RNSC:
           po_reg_or_goto (REG_TYPE_MQ, try_rndq_rnsc);
           break;
@@ -7395,6 +7609,13 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
           }
           break;
  
           }
           break;
  
+       case OP_RNDQMQ_I63b_RR:
+         po_reg_or_goto (REG_TYPE_MQ, try_rndq_i63b_rr);
+         break;
+       try_rndq_i63b_rr:
+         po_reg_or_goto (REG_TYPE_RN, try_rndq_i63b);
+         break;
+       try_rndq_i63b:
         case OP_RNDQ_I63b:
           {
             po_reg_or_goto (REG_TYPE_NDQ, try_shimm);
         case OP_RNDQ_I63b:
           {
             po_reg_or_goto (REG_TYPE_NDQ, try_shimm);
@@ -7426,6 +7647,7 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
         case OP_I31:     po_imm_or_fail (  0,     31, FALSE);   break;
         case OP_I32:     po_imm_or_fail (  1,     32, FALSE);   break;
         case OP_I32z:    po_imm_or_fail (  0,     32, FALSE);   break;
         case OP_I31:     po_imm_or_fail (  0,     31, FALSE);   break;
         case OP_I32:     po_imm_or_fail (  1,     32, FALSE);   break;
         case OP_I32z:    po_imm_or_fail (  0,     32, FALSE);   break;
+       case OP_I48_I64: po_imm1_or_imm2_or_fail (48, 64, FALSE); break;
         case OP_I63s:    po_imm_or_fail (-64,     63, FALSE);   break;
         case OP_I63:     po_imm_or_fail (  0,     63, FALSE);   break;
         case OP_I64:     po_imm_or_fail (  1,     64, FALSE);   break;
         case OP_I63s:    po_imm_or_fail (-64,     63, FALSE);   break;
         case OP_I63:     po_imm_or_fail (  0,     63, FALSE);   break;
         case OP_I64:     po_imm_or_fail (  1,     64, FALSE);   break;
@@ -7524,6 +7746,9 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
         case OP_RRnpc_I0: po_reg_or_goto (REG_TYPE_RN, I0);   break;
         I0:               po_imm_or_fail (0, 0, FALSE);       break;
  
         case OP_RRnpc_I0: po_reg_or_goto (REG_TYPE_RN, I0);   break;
         I0:               po_imm_or_fail (0, 0, FALSE);       break;
  
+       case OP_RRnpcsp_I32: po_reg_or_goto (REG_TYPE_RN, I32); break;
+       I32:                 po_imm_or_fail (1, 32, FALSE);     break;
+
         case OP_RF_IF:    po_reg_or_goto (REG_TYPE_FN, IF);   break;
         IF:
           if (!is_immediate_prefix (*str))
         case OP_RF_IF:    po_reg_or_goto (REG_TYPE_FN, IF);   break;
         IF:
           if (!is_immediate_prefix (*str))
@@ -7755,6 +7980,8 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
         case OP_oRMQRZ:
           po_reg_or_goto (REG_TYPE_MQ, try_rr_zr);
           break;
         case OP_oRMQRZ:
           po_reg_or_goto (REG_TYPE_MQ, try_rr_zr);
           break;
+
+       case OP_RR_ZR:
         try_rr_zr:
           po_reg_or_goto (REG_TYPE_RN, ZR);
           break;
         try_rr_zr:
           po_reg_or_goto (REG_TYPE_RN, ZR);
           break;
@@ -7783,6 +8010,7 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
  
         case OP_oRRnpcsp:
         case OP_RRnpcsp:
  
         case OP_oRRnpcsp:
         case OP_RRnpcsp:
+       case OP_RRnpcsp_I32:
           if (inst.operands[i].isreg)
             {
               if (inst.operands[i].reg == REG_PC)
           if (inst.operands[i].isreg)
             {
               if (inst.operands[i].reg == REG_PC)
@@ -7841,6 +8069,7 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
  
         case OP_RMQRZ:
         case OP_oRMQRZ:
  
         case OP_RMQRZ:
         case OP_oRMQRZ:
+       case OP_RR_ZR:
           if (!inst.operands[i].iszr && inst.operands[i].reg == REG_PC)
             inst.error = BAD_PC;
           break;
           if (!inst.operands[i].iszr && inst.operands[i].reg == REG_PC)
             inst.error = BAD_PC;
           break;
@@ -8661,6 +8890,11 @@ move_or_literal_pool (int i, enum lit_type t, bfd_boolean mode_3)
                       inst.instruction |= (imm & 0x0800) << 15;
                       inst.instruction |= (imm & 0x0700) << 4;
                       inst.instruction |= (imm & 0x00ff);
                       inst.instruction |= (imm & 0x0800) << 15;
                       inst.instruction |= (imm & 0x0700) << 4;
                       inst.instruction |= (imm & 0x00ff);
+                     /*  In case this replacement is being done on Armv8-M
+                         Baseline we need to make sure to disable the
+                         instruction size check, as otherwise GAS will reject
+                         the use of this T32 instruction.  */
+                     inst.size_req = 0;
                       return TRUE;
                     }
                 }
                       return TRUE;
                     }
                 }
@@ -9785,10 +10019,42 @@ do_vmrs (void)
        return;
      }
  
        return;
      }
  
-  /* MVFR2 is only valid at ARMv8-A.  */
-  if (inst.operands[1].reg == 5)
-    constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8),
-               _(BAD_FPU));
+  switch (inst.operands[1].reg)
+    {
+    /* MVFR2 is only valid for Armv8-A.  */
+    case 5:
+      constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8),
+                 _(BAD_FPU));
+      break;
+
+    /* Check for new Armv8.1-M Mainline changes to <spec_reg>.  */
+    case 1: /* fpscr.  */
+      constraint (!(ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)
+                   || ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd)),
+                 _(BAD_FPU));
+      break;
+
+    case 14: /* fpcxt_ns.  */
+    case 15: /* fpcxt_s.  */
+      constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8_1m_main),
+                 _("selected processor does not support instruction"));
+      break;
+
+    case  2: /* fpscr_nzcvqc.  */
+    case 12: /* vpr.  */
+    case 13: /* p0.  */
+      constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8_1m_main)
+                 || (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)
+                     && !ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd)),
+                 _("selected processor does not support instruction"));
+      if (inst.operands[0].reg != 2
+         && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+       as_warn (_("accessing MVE system register without MVE is UNPREDICTABLE"));
+      break;
+
+    default:
+      break;
+    }
  
    /* APSR_ sets isvec. All other refs to PC are illegal.  */
    if (!inst.operands[0].isvec && Rt == REG_PC)
  
    /* APSR_ sets isvec. All other refs to PC are illegal.  */
    if (!inst.operands[0].isvec && Rt == REG_PC)
@@ -9816,10 +10082,42 @@ do_vmsr (void)
        return;
      }
  
        return;
      }
  
-  /* MVFR2 is only valid for ARMv8-A.  */
-  if (inst.operands[0].reg == 5)
-    constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8),
-               _(BAD_FPU));
+  switch (inst.operands[0].reg)
+    {
+    /* MVFR2 is only valid for Armv8-A.  */
+    case 5:
+      constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8),
+                 _(BAD_FPU));
+      break;
+
+    /* Check for new Armv8.1-M Mainline changes to <spec_reg>.  */
+    case  1: /* fpcr.  */
+      constraint (!(ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)
+                   || ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd)),
+                 _(BAD_FPU));
+      break;
+
+    case 14: /* fpcxt_ns.  */
+    case 15: /* fpcxt_s.  */
+      constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8_1m_main),
+                 _("selected processor does not support instruction"));
+      break;
+
+    case  2: /* fpscr_nzcvqc.  */
+    case 12: /* vpr.  */
+    case 13: /* p0.  */
+      constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8_1m_main)
+                 || (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)
+                     && !ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd)),
+                 _("selected processor does not support instruction"));
+      if (inst.operands[0].reg != 2
+         && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+       as_warn (_("accessing MVE system register without MVE is UNPREDICTABLE"));
+      break;
+
+    default:
+      break;
+    }
  
    /* If we get through parsing the register name, we just insert the number
       generated into the instruction without further validation.  */
  
    /* If we get through parsing the register name, we just insert the number
       generated into the instruction without further validation.  */
@@ -10119,6 +10417,9 @@ do_shift (void)
  static void
  do_smc (void)
  {
  static void
  do_smc (void)
  {
+  unsigned int value = inst.relocs[0].exp.X_add_number;
+  constraint (value > 0xf, _("immediate too large (bigger than 0xF)"));
+
    inst.relocs[0].type = BFD_RELOC_ARM_SMC;
    inst.relocs[0].pc_rel = 0;
  }
    inst.relocs[0].type = BFD_RELOC_ARM_SMC;
    inst.relocs[0].pc_rel = 0;
  }
@@ -11063,7 +11364,7 @@ encode_thumb32_addr_mode (int i, bfd_boolean is_t, bfd_boolean is_d)
      inst.error = _("instruction does not accept unindexed addressing");
  }
  
      inst.error = _("instruction does not accept unindexed addressing");
  }
  
-/* Table of Thumb instructions which exist in both 16- and 32-bit
+/* Table of Thumb instructions which exist in 16- and/or 32-bit
     encodings (the latter only in post-V6T2 cores).  The index is the
     value used in the insns table below.  When there is more than one
     possible 16-bit encoding for the instruction, this table always
     encodings (the latter only in post-V6T2 cores).  The index is the
     value used in the insns table below.  When there is more than one
     possible 16-bit encoding for the instruction, this table always
@@ -11092,16 +11393,27 @@ encode_thumb32_addr_mode (int i, bfd_boolean is_t, bfd_boolean is_d)
    X(_bflx,  0000, f070e001),                   \
    X(_bic,   4380, ea200000),                   \
    X(_bics,  4380, ea300000),                   \
    X(_bflx,  0000, f070e001),                   \
    X(_bic,   4380, ea200000),                   \
    X(_bics,  4380, ea300000),                   \
+  X(_cinc,  0000, ea509000),                   \
+  X(_cinv,  0000, ea50a000),                   \
    X(_cmn,   42c0, eb100f00),                   \
    X(_cmp,   2800, ebb00f00),                   \
    X(_cmn,   42c0, eb100f00),                   \
    X(_cmp,   2800, ebb00f00),                   \
+  X(_cneg,  0000, ea50b000),                   \
    X(_cpsie, b660, f3af8400),                   \
    X(_cpsid, b670, f3af8600),                   \
    X(_cpy,   4600, ea4f0000),                   \
    X(_cpsie, b660, f3af8400),                   \
    X(_cpsid, b670, f3af8600),                   \
    X(_cpy,   4600, ea4f0000),                   \
+  X(_csel,  0000, ea508000),                   \
+  X(_cset,  0000, ea5f900f),                   \
+  X(_csetm, 0000, ea5fa00f),                   \
+  X(_csinc, 0000, ea509000),                   \
+  X(_csinv, 0000, ea50a000),                   \
+  X(_csneg, 0000, ea50b000),                   \
    X(_dec_sp,80dd, f1ad0d00),                   \
    X(_dls,   0000, f040e001),                   \
    X(_dec_sp,80dd, f1ad0d00),                   \
    X(_dls,   0000, f040e001),                   \
+  X(_dlstp, 0000, f000e001),                   \
    X(_eor,   4040, ea800000),                   \
    X(_eors,  4040, ea900000),                   \
    X(_inc_sp,00dd, f10d0d00),                   \
    X(_eor,   4040, ea800000),                   \
    X(_eors,  4040, ea900000),                   \
    X(_inc_sp,00dd, f10d0d00),                   \
+  X(_lctp,  0000, f00fe001),                   \
    X(_ldmia, c800, e8900000),                   \
    X(_ldr,   6800, f8500000),                   \
    X(_ldrb,  7800, f8100000),                   \
    X(_ldmia, c800, e8900000),                   \
    X(_ldr,   6800, f8500000),                   \
    X(_ldrb,  7800, f8100000),                   \
@@ -11112,6 +11424,7 @@ encode_thumb32_addr_mode (int i, bfd_boolean is_t, bfd_boolean is_d)
    X(_ldr_pc2,4800, f85f0000),                  \
    X(_ldr_sp,9800, f85d0000),                   \
    X(_le,    0000, f00fc001),                   \
    X(_ldr_pc2,4800, f85f0000),                  \
    X(_ldr_sp,9800, f85d0000),                   \
    X(_le,    0000, f00fc001),                   \
+  X(_letp,  0000, f01fc001),                   \
    X(_lsl,   0000, fa00f000),                   \
    X(_lsls,  0000, fa10f000),                   \
    X(_lsr,   0800, fa20f000),                   \
    X(_lsl,   0000, fa00f000),                   \
    X(_lsls,  0000, fa10f000),                   \
    X(_lsr,   0800, fa20f000),                   \
@@ -11154,6 +11467,7 @@ encode_thumb32_addr_mode (int i, bfd_boolean is_t, bfd_boolean is_d)
    X(_wfe,   bf20, f3af8002),                   \
    X(_wfi,   bf30, f3af8003),                   \
    X(_wls,   0000, f040c001),                   \
    X(_wfe,   bf20, f3af8002),                   \
    X(_wfi,   bf30, f3af8003),                   \
    X(_wls,   0000, f040c001),                   \
+  X(_wlstp, 0000, f000c001),                   \
    X(_sev,   bf40, f3af8004),                    \
    X(_sevl,  bf50, f3af8005),                   \
    X(_udf,   de00, f7f0a000)
    X(_sev,   bf40, f3af8004),                    \
    X(_sevl,  bf50, f3af8005),                   \
    X(_udf,   de00, f7f0a000)
@@ -11907,6 +12221,60 @@ do_t_clz (void)
    inst.instruction |= Rm;
  }
  
    inst.instruction |= Rm;
  }
  
+/* For the Armv8.1-M conditional instructions.  */
+static void
+do_t_cond (void)
+{
+  unsigned Rd, Rn, Rm;
+  signed int cond;
+
+  constraint (inst.cond != COND_ALWAYS, BAD_COND);
+
+  Rd = inst.operands[0].reg;
+  switch (inst.instruction)
+    {
+      case T_MNEM_csinc:
+      case T_MNEM_csinv:
+      case T_MNEM_csneg:
+      case T_MNEM_csel:
+       Rn = inst.operands[1].reg;
+       Rm = inst.operands[2].reg;
+       cond = inst.operands[3].imm;
+       constraint (Rn == REG_SP, BAD_SP);
+       constraint (Rm == REG_SP, BAD_SP);
+       break;
+
+      case T_MNEM_cinc:
+      case T_MNEM_cinv:
+      case T_MNEM_cneg:
+       Rn = inst.operands[1].reg;
+       cond = inst.operands[2].imm;
+       /* Invert the last bit to invert the cond.  */
+       cond = TOGGLE_BIT (cond, 0);
+       constraint (Rn == REG_SP, BAD_SP);
+       Rm = Rn;
+       break;
+
+      case T_MNEM_csetm:
+      case T_MNEM_cset:
+       cond = inst.operands[1].imm;
+       /* Invert the last bit to invert the cond.  */
+       cond = TOGGLE_BIT (cond, 0);
+       Rn = REG_PC;
+       Rm = REG_PC;
+       break;
+
+      default: abort ();
+    }
+
+  set_pred_insn_type (OUTSIDE_PRED_INSN);
+  inst.instruction = THUMB_OP32 (inst.instruction);
+  inst.instruction |= Rd << 8;
+  inst.instruction |= Rn << 16;
+  inst.instruction |= Rm;
+  inst.instruction |= cond << 4;
+}
+
  static void
  do_t_csdb (void)
  {
  static void
  do_t_csdb (void)
  {
@@ -13682,10 +14050,11 @@ do_t_smc (void)
               _("SMC is not permitted on this architecture"));
    constraint (inst.relocs[0].exp.X_op != O_constant,
               _("expression too complex"));
               _("SMC is not permitted on this architecture"));
    constraint (inst.relocs[0].exp.X_op != O_constant,
               _("expression too complex"));
+  constraint (value > 0xf, _("immediate too large (bigger than 0xF)"));
+
    inst.relocs[0].type = BFD_RELOC_UNUSED;
    inst.relocs[0].type = BFD_RELOC_UNUSED;
-  inst.instruction |= (value & 0xf000) >> 12;
-  inst.instruction |= (value & 0x0ff0);
    inst.instruction |= (value & 0x000f) << 16;
    inst.instruction |= (value & 0x000f) << 16;
+
    /* PR gas/15623: SMC instructions must be last in an IT block.  */
    set_pred_insn_type_last ();
  }
    /* PR gas/15623: SMC instructions must be last in an IT block.  */
    set_pred_insn_type_last ();
  }
@@ -14079,35 +14448,52 @@ v8_1_loop_reloc (int is_le)
      }
  }
  
      }
  }
  
-/* To handle the Scalar Low Overhead Loop instructions
-   in Armv8.1-M Mainline.  */
+/* For shifts with four operands in MVE.  */
  static void
  static void
-do_t_loloop (void)
+do_mve_scalar_shift1 (void)
  {
  {
-  unsigned long insn = inst.instruction;
+  unsigned int value = inst.operands[2].imm;
  
  
-  set_pred_insn_type (OUTSIDE_PRED_INSN);
-  inst.instruction = THUMB_OP32 (inst.instruction);
+  inst.instruction |= inst.operands[0].reg << 16;
+  inst.instruction |= inst.operands[1].reg << 8;
  
  
-  switch (insn)
-    {
-    case T_MNEM_le:
-      /* le <label>.  */
-      if (!inst.operands[0].present)
-       inst.instruction |= 1 << 21;
+  /* Setting the bit for saturation.  */
+  inst.instruction |= ((value == 64) ? 0: 1) << 7;
  
  
-      v8_1_loop_reloc (TRUE);
-      break;
+  /* Assuming Rm is already checked not to be 11x1.  */
+  constraint (inst.operands[3].reg == inst.operands[0].reg, BAD_OVERLAP);
+  constraint (inst.operands[3].reg == inst.operands[1].reg, BAD_OVERLAP);
+  inst.instruction |= inst.operands[3].reg << 12;
+}
  
  
-    case T_MNEM_wls:
-      v8_1_loop_reloc (FALSE);
-      /* Fall through.  */
-    case T_MNEM_dls:
-      constraint (inst.operands[1].isreg != 1, BAD_ARGS);
-      inst.instruction |= (inst.operands[1].reg << 16);
-      break;
+/* For shifts in MVE.  */
+static void
+do_mve_scalar_shift (void)
+{
+  if (!inst.operands[2].present)
+    {
+      inst.operands[2] = inst.operands[1];
+      inst.operands[1].reg = 0xf;
+    }
+
+  inst.instruction |= inst.operands[0].reg << 16;
+  inst.instruction |= inst.operands[1].reg << 8;
  
  
-    default: abort();
+  if (inst.operands[2].isreg)
+    {
+      /* Assuming Rm is already checked not to be 11x1.  */
+      constraint (inst.operands[2].reg == inst.operands[0].reg, BAD_OVERLAP);
+      constraint (inst.operands[2].reg == inst.operands[1].reg, BAD_OVERLAP);
+      inst.instruction |= inst.operands[2].reg << 12;
+    }
+  else
+    {
+      /* Assuming imm is already checked as [1,32].  */
+      unsigned int value = inst.operands[2].imm;
+      inst.instruction |= (value & 0x1c) << 10;
+      inst.instruction |= (value & 0x03) << 6;
+      /* Change last 4 bits from 0xd to 0xf.  */
+      inst.instruction |= 0x2;
      }
  }
  
      }
  }
  
@@ -14123,6 +14509,7 @@ do_t_loloop (void)
  #define M_MNEM_vmlsdavax  0xeef01e21
  #define M_MNEM_vmullt  0xee011e00
  #define M_MNEM_vmullb  0xee010e00
  #define M_MNEM_vmlsdavax  0xeef01e21
  #define M_MNEM_vmullt  0xee011e00
  #define M_MNEM_vmullb  0xee010e00
+#define M_MNEM_vctp    0xf000e801
  #define M_MNEM_vst20   0xfc801e00
  #define M_MNEM_vst21   0xfc801e20
  #define M_MNEM_vst40   0xfc801e01
  #define M_MNEM_vst20   0xfc801e00
  #define M_MNEM_vst21   0xfc801e20
  #define M_MNEM_vst40   0xfc801e01
@@ -14158,6 +14545,44 @@ do_t_loloop (void)
  #define M_MNEM_vdwdup  0xee011f60
  #define M_MNEM_vidup   0xee010f6e
  #define M_MNEM_viwdup  0xee010f60
  #define M_MNEM_vdwdup  0xee011f60
  #define M_MNEM_vidup   0xee010f6e
  #define M_MNEM_viwdup  0xee010f60
+#define M_MNEM_vmaxv   0xeee20f00
+#define M_MNEM_vmaxav  0xeee00f00
+#define M_MNEM_vminv   0xeee20f80
+#define M_MNEM_vminav  0xeee00f80
+#define M_MNEM_vmlaldav          0xee800e00
+#define M_MNEM_vmlaldava  0xee800e20
+#define M_MNEM_vmlaldavx  0xee801e00
+#define M_MNEM_vmlaldavax 0xee801e20
+#define M_MNEM_vmlsldav          0xee800e01
+#define M_MNEM_vmlsldava  0xee800e21
+#define M_MNEM_vmlsldavx  0xee801e01
+#define M_MNEM_vmlsldavax 0xee801e21
+#define M_MNEM_vrmlaldavhx  0xee801f00
+#define M_MNEM_vrmlaldavhax 0xee801f20
+#define M_MNEM_vrmlsldavh   0xfe800e01
+#define M_MNEM_vrmlsldavha  0xfe800e21
+#define M_MNEM_vrmlsldavhx  0xfe801e01
+#define M_MNEM_vrmlsldavhax 0xfe801e21
+#define M_MNEM_vqmovnt   0xee331e01
+#define M_MNEM_vqmovnb   0xee330e01
+#define M_MNEM_vqmovunt          0xee311e81
+#define M_MNEM_vqmovunb          0xee310e81
+#define M_MNEM_vshrnt      0xee801fc1
+#define M_MNEM_vshrnb      0xee800fc1
+#define M_MNEM_vrshrnt     0xfe801fc1
+#define M_MNEM_vqshrnt     0xee801f40
+#define M_MNEM_vqshrnb     0xee800f40
+#define M_MNEM_vqshrunt            0xee801fc0
+#define M_MNEM_vqshrunb            0xee800fc0
+#define M_MNEM_vrshrnb     0xfe800fc1
+#define M_MNEM_vqrshrnt            0xee801f41
+#define M_MNEM_vqrshrnb            0xee800f41
+#define M_MNEM_vqrshrunt    0xfe801fc0
+#define M_MNEM_vqrshrunb    0xfe800fc0
+
+/* Bfloat16 instruction encoder helpers.  */
+#define B_MNEM_vfmat 0xfc300850
+#define B_MNEM_vfmab 0xfc300810
  
  /* Neon instruction encoder helpers.  */
  
  
  /* Neon instruction encoder helpers.  */
  
@@ -14322,6 +14747,7 @@ NEON_ENC_TAB
       - a table used to drive neon_select_shape.  */
  
  #define NEON_SHAPE_DEF                 \
       - a table used to drive neon_select_shape.  */
  
  #define NEON_SHAPE_DEF                 \
+  X(4, (R, R, Q, Q), QUAD),            \
    X(4, (Q, R, R, I), QUAD),            \
    X(4, (R, R, S, S), QUAD),            \
    X(4, (S, S, R, R), QUAD),            \
    X(4, (Q, R, R, I), QUAD),            \
    X(4, (R, R, S, S), QUAD),            \
    X(4, (S, S, R, R), QUAD),            \
@@ -14374,6 +14800,8 @@ NEON_ENC_TAB
    X(2, (R, S), SINGLE),                        \
    X(2, (F, R), SINGLE),                        \
    X(2, (R, F), SINGLE),                        \
    X(2, (R, S), SINGLE),                        \
    X(2, (F, R), SINGLE),                        \
    X(2, (R, F), SINGLE),                        \
+/* Used for MVE tail predicated loop instructions.  */\
+  X(2, (R, R), QUAD),                  \
  /* Half float shape supported so far.  */\
    X (2, (H, D), MIXED),                        \
    X (2, (D, H), MIXED),                        \
  /* Half float shape supported so far.  */\
    X (2, (H, D), MIXED),                        \
    X (2, (D, H), MIXED),                        \
@@ -14501,6 +14929,7 @@ enum neon_type_mask
    N_F32  = 0x0080000,
    N_F64  = 0x0100000,
    N_P64         = 0x0200000,
    N_F32  = 0x0080000,
    N_F64  = 0x0100000,
    N_P64         = 0x0200000,
+  N_BF16 = 0x0400000,
    N_KEY  = 0x1000000, /* Key element (main type specifier).  */
    N_EQK  = 0x2000000, /* Given operand has the same type & size as the key.  */
    N_VFP  = 0x4000000, /* VFP mode: operand size must match register width.  */
    N_KEY  = 0x1000000, /* Key element (main type specifier).  */
    N_EQK  = 0x2000000, /* Given operand has the same type & size as the key.  */
    N_VFP  = 0x4000000, /* VFP mode: operand size must match register width.  */
@@ -14799,6 +15228,10 @@ type_chk_of_el_type (enum neon_el_type type, unsigned size)
         }
        break;
  
         }
        break;
  
+    case NT_bfloat:
+      if (size == 16) return N_BF16;
+      break;
+
      default: ;
      }
  
      default: ;
      }
  
@@ -14817,7 +15250,8 @@ el_type_of_type_chk (enum neon_el_type *type, unsigned *size,
  
    if ((mask & (N_S8 | N_U8 | N_I8 | N_8 | N_P8)) != 0)
      *size = 8;
  
    if ((mask & (N_S8 | N_U8 | N_I8 | N_8 | N_P8)) != 0)
      *size = 8;
-  else if ((mask & (N_S16 | N_U16 | N_I16 | N_16 | N_F16 | N_P16)) != 0)
+  else if ((mask & (N_S16 | N_U16 | N_I16 | N_16 | N_F16 | N_P16 | N_BF16))
+          != 0)
      *size = 16;
    else if ((mask & (N_S32 | N_U32 | N_I32 | N_32 | N_F32)) != 0)
      *size = 32;
      *size = 16;
    else if ((mask & (N_S32 | N_U32 | N_I32 | N_32 | N_F32)) != 0)
      *size = 32;
@@ -14838,6 +15272,8 @@ el_type_of_type_chk (enum neon_el_type *type, unsigned *size,
      *type = NT_poly;
    else if ((mask & (N_F_ALL)) != 0)
      *type = NT_float;
      *type = NT_poly;
    else if ((mask & (N_F_ALL)) != 0)
      *type = NT_float;
+  else if ((mask & (N_BF16)) != 0)
+    *type = NT_bfloat;
    else
      return FAIL;
  
    else
      return FAIL;
  
@@ -15455,6 +15891,45 @@ mve_get_vcmp_vpt_cond (struct neon_type_el et)
    abort ();
  }
  
    abort ();
  }
  
+/* For VCTP (create vector tail predicate) in MVE.  */
+static void
+do_mve_vctp (void)
+{
+  int dt = 0;
+  unsigned size = 0x0;
+
+  if (inst.cond > COND_ALWAYS)
+    inst.pred_insn_type = INSIDE_VPT_INSN;
+  else
+    inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+  /* This is a typical MVE instruction which has no type but have size 8, 16,
+     32 and 64.  For instructions with no type, inst.vectype.el[j].type is set
+     to NT_untyped and size is updated in inst.vectype.el[j].size.  */
+  if ((inst.operands[0].present) && (inst.vectype.el[0].type == NT_untyped))
+    dt = inst.vectype.el[0].size;
+
+  /* Setting this does not indicate an actual NEON instruction, but only
+     indicates that the mnemonic accepts neon-style type suffixes.  */
+  inst.is_neon = 1;
+
+  switch (dt)
+    {
+      case 8:
+       break;
+      case 16:
+       size = 0x1; break;
+      case 32:
+       size = 0x2; break;
+      case 64:
+       size = 0x3; break;
+      default:
+       first_error (_("Type is not allowed for this instruction"));
+    }
+  inst.instruction |= size << 20;
+  inst.instruction |= inst.operands[0].reg << 16;
+}
+
  static void
  do_mve_vpt (void)
  {
  static void
  do_mve_vpt (void)
  {
@@ -15586,28 +16061,48 @@ do_mve_vcmp (void)
  }
  
  static void
  }
  
  static void
-do_mve_vfmas (void)
+do_mve_vmaxa_vmina (void)
  {
  {
-  enum neon_shape rs = neon_select_shape (NS_QQR, NS_NULL);
-  struct neon_type_el et
-    = neon_check_type (3, rs, N_F_MVE | N_KEY, N_EQK, N_EQK);
-
    if (inst.cond > COND_ALWAYS)
      inst.pred_insn_type = INSIDE_VPT_INSN;
    else
      inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
  
    if (inst.cond > COND_ALWAYS)
      inst.pred_insn_type = INSIDE_VPT_INSN;
    else
      inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
  
-  if (inst.operands[2].reg == REG_SP)
-    as_tsktsk (MVE_BAD_SP);
-  else if (inst.operands[2].reg == REG_PC)
-    as_tsktsk (MVE_BAD_PC);
+  enum neon_shape rs = neon_select_shape (NS_QQ, NS_NULL);
+  struct neon_type_el et
+    = neon_check_type (2, rs, N_EQK, N_KEY | N_S8 | N_S16 | N_S32);
  
  
-  inst.instruction |= (et.size == 16) << 28;
    inst.instruction |= HI1 (inst.operands[0].reg) << 22;
    inst.instruction |= HI1 (inst.operands[0].reg) << 22;
-  inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+  inst.instruction |= neon_logbits (et.size) << 18;
    inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
    inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
-  inst.instruction |= HI1 (inst.operands[1].reg) << 7;
-  inst.instruction |= inst.operands[2].reg;
+  inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+  inst.instruction |= LOW4 (inst.operands[1].reg);
+  inst.is_neon = 1;
+}
+
+static void
+do_mve_vfmas (void)
+{
+  enum neon_shape rs = neon_select_shape (NS_QQR, NS_NULL);
+  struct neon_type_el et
+    = neon_check_type (3, rs, N_F_MVE | N_KEY, N_EQK, N_EQK);
+
+  if (inst.cond > COND_ALWAYS)
+    inst.pred_insn_type = INSIDE_VPT_INSN;
+  else
+    inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+  if (inst.operands[2].reg == REG_SP)
+    as_tsktsk (MVE_BAD_SP);
+  else if (inst.operands[2].reg == REG_PC)
+    as_tsktsk (MVE_BAD_PC);
+
+  inst.instruction |= (et.size == 16) << 28;
+  inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+  inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+  inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+  inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+  inst.instruction |= inst.operands[2].reg;
    inst.is_neon = 1;
  }
  
    inst.is_neon = 1;
  }
  
@@ -15654,6 +16149,215 @@ do_mve_viddup (void)
    inst.is_neon = 1;
  }
  
    inst.is_neon = 1;
  }
  
+static void
+do_mve_vmlas (void)
+{
+  enum neon_shape rs = neon_select_shape (NS_QQR, NS_NULL);
+  struct neon_type_el et
+    = neon_check_type (3, rs, N_EQK, N_EQK, N_SU_MVE | N_KEY);
+
+  if (inst.operands[2].reg == REG_PC)
+    as_tsktsk (MVE_BAD_PC);
+  else if (inst.operands[2].reg == REG_SP)
+    as_tsktsk (MVE_BAD_SP);
+
+  if (inst.cond > COND_ALWAYS)
+    inst.pred_insn_type = INSIDE_VPT_INSN;
+  else
+    inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+  inst.instruction |= (et.type == NT_unsigned) << 28;
+  inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+  inst.instruction |= neon_logbits (et.size) << 20;
+  inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+  inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+  inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+  inst.instruction |= inst.operands[2].reg;
+  inst.is_neon = 1;
+}
+
+static void
+do_mve_vshll (void)
+{
+  struct neon_type_el et
+    = neon_check_type (2, NS_QQI, N_EQK, N_S8 | N_U8 | N_S16 | N_U16 | N_KEY);
+
+  if (inst.cond > COND_ALWAYS)
+    inst.pred_insn_type = INSIDE_VPT_INSN;
+  else
+    inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+  int imm = inst.operands[2].imm;
+  constraint (imm < 1 || (unsigned)imm > et.size,
+             _("immediate value out of range"));
+
+  if ((unsigned)imm == et.size)
+    {
+      inst.instruction |= neon_logbits (et.size) << 18;
+      inst.instruction |= 0x110001;
+    }
+  else
+    {
+      inst.instruction |= (et.size + imm) << 16;
+      inst.instruction |= 0x800140;
+    }
+
+  inst.instruction |= (et.type == NT_unsigned) << 28;
+  inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+  inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+  inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+  inst.instruction |= LOW4 (inst.operands[1].reg);
+  inst.is_neon = 1;
+}
+
+static void
+do_mve_vshlc (void)
+{
+  if (inst.cond > COND_ALWAYS)
+    inst.pred_insn_type = INSIDE_VPT_INSN;
+  else
+    inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+  if (inst.operands[1].reg == REG_PC)
+    as_tsktsk (MVE_BAD_PC);
+  else if (inst.operands[1].reg == REG_SP)
+    as_tsktsk (MVE_BAD_SP);
+
+  int imm = inst.operands[2].imm;
+  constraint (imm < 1 || imm > 32, _("immediate value out of range"));
+
+  inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+  inst.instruction |= (imm & 0x1f) << 16;
+  inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+  inst.instruction |= inst.operands[1].reg;
+  inst.is_neon = 1;
+}
+
+static void
+do_mve_vshrn (void)
+{
+  unsigned types;
+  switch (inst.instruction)
+    {
+    case M_MNEM_vshrnt:
+    case M_MNEM_vshrnb:
+    case M_MNEM_vrshrnt:
+    case M_MNEM_vrshrnb:
+      types = N_I16 | N_I32;
+      break;
+    case M_MNEM_vqshrnt:
+    case M_MNEM_vqshrnb:
+    case M_MNEM_vqrshrnt:
+    case M_MNEM_vqrshrnb:
+      types = N_U16 | N_U32 | N_S16 | N_S32;
+      break;
+    case M_MNEM_vqshrunt:
+    case M_MNEM_vqshrunb:
+    case M_MNEM_vqrshrunt:
+    case M_MNEM_vqrshrunb:
+      types = N_S16 | N_S32;
+      break;
+    default:
+      abort ();
+    }
+
+  struct neon_type_el et = neon_check_type (2, NS_QQI, N_EQK, types | N_KEY);
+
+  if (inst.cond > COND_ALWAYS)
+    inst.pred_insn_type = INSIDE_VPT_INSN;
+  else
+    inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+  unsigned Qd = inst.operands[0].reg;
+  unsigned Qm = inst.operands[1].reg;
+  unsigned imm = inst.operands[2].imm;
+  constraint (imm < 1 || ((unsigned) imm) > (et.size / 2),
+             et.size == 16
+             ? _("immediate operand expected in the range [1,8]")
+             : _("immediate operand expected in the range [1,16]"));
+
+  inst.instruction |= (et.type == NT_unsigned) << 28;
+  inst.instruction |= HI1 (Qd) << 22;
+  inst.instruction |= (et.size - imm) << 16;
+  inst.instruction |= LOW4 (Qd) << 12;
+  inst.instruction |= HI1 (Qm) << 5;
+  inst.instruction |= LOW4 (Qm);
+  inst.is_neon = 1;
+}
+
+static void
+do_mve_vqmovn (void)
+{
+  struct neon_type_el et;
+  if (inst.instruction == M_MNEM_vqmovnt
+     || inst.instruction == M_MNEM_vqmovnb)
+    et = neon_check_type (2, NS_QQ, N_EQK,
+                         N_U16 | N_U32 | N_S16 | N_S32 | N_KEY);
+  else
+    et = neon_check_type (2, NS_QQ, N_EQK, N_S16 | N_S32 | N_KEY);
+
+  if (inst.cond > COND_ALWAYS)
+    inst.pred_insn_type = INSIDE_VPT_INSN;
+  else
+    inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+  inst.instruction |= (et.type == NT_unsigned) << 28;
+  inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+  inst.instruction |= (et.size == 32) << 18;
+  inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+  inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+  inst.instruction |= LOW4 (inst.operands[1].reg);
+  inst.is_neon = 1;
+}
+
+static void
+do_mve_vpsel (void)
+{
+  neon_select_shape (NS_QQQ, NS_NULL);
+
+  if (inst.cond > COND_ALWAYS)
+    inst.pred_insn_type = INSIDE_VPT_INSN;
+  else
+    inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+  inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+  inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+  inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+  inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+  inst.instruction |= HI1 (inst.operands[2].reg) << 5;
+  inst.instruction |= LOW4 (inst.operands[2].reg);
+  inst.is_neon = 1;
+}
+
+static void
+do_mve_vpnot (void)
+{
+  if (inst.cond > COND_ALWAYS)
+    inst.pred_insn_type = INSIDE_VPT_INSN;
+  else
+    inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+}
+
+static void
+do_mve_vmaxnma_vminnma (void)
+{
+  enum neon_shape rs = neon_select_shape (NS_QQ, NS_NULL);
+  struct neon_type_el et
+    = neon_check_type (2, rs, N_EQK, N_F_MVE | N_KEY);
+
+  if (inst.cond > COND_ALWAYS)
+    inst.pred_insn_type = INSIDE_VPT_INSN;
+  else
+    inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+  inst.instruction |= (et.size == 16) << 28;
+  inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+  inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+  inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+  inst.instruction |= LOW4 (inst.operands[1].reg);
+  inst.is_neon = 1;
+}
+
  static void
  do_mve_vcmul (void)
  {
  static void
  do_mve_vcmul (void)
  {
@@ -15686,6 +16390,66 @@ do_mve_vcmul (void)
    inst.is_neon = 1;
  }
  
    inst.is_neon = 1;
  }
  
+/* To handle the Low Overhead Loop instructions
+   in Armv8.1-M Mainline and MVE.  */
+static void
+do_t_loloop (void)
+{
+  unsigned long insn = inst.instruction;
+
+  inst.instruction = THUMB_OP32 (inst.instruction);
+
+  if (insn == T_MNEM_lctp)
+    return;
+
+  set_pred_insn_type (MVE_OUTSIDE_PRED_INSN);
+
+  if (insn == T_MNEM_wlstp || insn == T_MNEM_dlstp)
+    {
+      struct neon_type_el et
+       = neon_check_type (2, NS_RR, N_EQK, N_8 | N_16 | N_32 | N_64 | N_KEY);
+      inst.instruction |= neon_logbits (et.size) << 20;
+      inst.is_neon = 1;
+    }
+
+  switch (insn)
+    {
+    case T_MNEM_letp:
+      constraint (!inst.operands[0].present,
+                 _("expected LR"));
+      /* fall through.  */
+    case T_MNEM_le:
+      /* le <label>.  */
+      if (!inst.operands[0].present)
+       inst.instruction |= 1 << 21;
+
+      v8_1_loop_reloc (TRUE);
+      break;
+
+    case T_MNEM_wls:
+    case T_MNEM_wlstp:
+      v8_1_loop_reloc (FALSE);
+      /* fall through.  */
+    case T_MNEM_dlstp:
+    case T_MNEM_dls:
+      constraint (inst.operands[1].isreg != 1, BAD_ARGS);
+
+      if (insn == T_MNEM_wlstp || insn == T_MNEM_dlstp)
+       constraint (inst.operands[1].reg == REG_PC, BAD_PC);
+      else if (inst.operands[1].reg == REG_PC)
+       as_tsktsk (MVE_BAD_PC);
+      if (inst.operands[1].reg == REG_SP)
+       as_tsktsk (MVE_BAD_SP);
+
+      inst.instruction |= (inst.operands[1].reg << 16);
+      break;
+
+    default:
+      abort ();
+    }
+}
+
+
  static void
  do_vfp_nsyn_cmp (void)
  {
  static void
  do_vfp_nsyn_cmp (void)
  {
@@ -15764,36 +16528,6 @@ nsyn_insert_sp (void)
    inst.operands[0].present = 1;
  }
  
    inst.operands[0].present = 1;
  }
  
-static void
-do_vfp_nsyn_push (void)
-{
-  nsyn_insert_sp ();
-
-  constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16,
-             _("register list must contain at least 1 and at most 16 "
-               "registers"));
-
-  if (inst.operands[1].issingle)
-    do_vfp_nsyn_opcode ("fstmdbs");
-  else
-    do_vfp_nsyn_opcode ("fstmdbd");
-}
-
-static void
-do_vfp_nsyn_pop (void)
-{
-  nsyn_insert_sp ();
-
-  constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16,
-             _("register list must contain at least 1 and at most 16 "
-               "registers"));
-
-  if (inst.operands[1].issingle)
-    do_vfp_nsyn_opcode ("fldmias");
-  else
-    do_vfp_nsyn_opcode ("fldmiad");
-}
-
  /* Fix up Neon data-processing instructions, ORing in the correct bits for
     ARM mode or Thumb mode and moving the encoded bit 24 to bit 28.  */
  
  /* Fix up Neon data-processing instructions, ORing in the correct bits for
     ARM mode or Thumb mode and moving the encoded bit 24 to bit 28.  */
  
@@ -15820,7 +16554,7 @@ neon_dp_fixup (struct arm_it* insn)
  }
  
  static void
  }
  
  static void
-mve_encode_qqr (int size, int fp)
+mve_encode_qqr (int size, int U, int fp)
  {
    if (inst.operands[2].reg == REG_SP)
      as_tsktsk (MVE_BAD_SP);
  {
    if (inst.operands[2].reg == REG_SP)
      as_tsktsk (MVE_BAD_SP);
@@ -15835,6 +16569,9 @@ mve_encode_qqr (int size, int fp)
        /* vsub.  */
        else if (((unsigned)inst.instruction) == 0x200d00)
         inst.instruction = 0xee301f40;
        /* vsub.  */
        else if (((unsigned)inst.instruction) == 0x200d00)
         inst.instruction = 0xee301f40;
+      /* vmul.  */
+      else if (((unsigned)inst.instruction) == 0x1000d10)
+       inst.instruction = 0xee310e60;
  
        /* Setting size which is 1 for F16 and 0 for F32.  */
        inst.instruction |= (size == 16) << 28;
  
        /* Setting size which is 1 for F16 and 0 for F32.  */
        inst.instruction |= (size == 16) << 28;
@@ -15847,6 +16584,37 @@ mve_encode_qqr (int size, int fp)
        /* vsub.  */
        else if (((unsigned)inst.instruction) == 0x1000800)
         inst.instruction = 0xee011f40;
        /* vsub.  */
        else if (((unsigned)inst.instruction) == 0x1000800)
         inst.instruction = 0xee011f40;
+      /* vhadd.  */
+      else if (((unsigned)inst.instruction) == 0)
+       inst.instruction = 0xee000f40;
+      /* vhsub.  */
+      else if (((unsigned)inst.instruction) == 0x200)
+       inst.instruction = 0xee001f40;
+      /* vmla.  */
+      else if (((unsigned)inst.instruction) == 0x900)
+       inst.instruction = 0xee010e40;
+      /* vmul.  */
+      else if (((unsigned)inst.instruction) == 0x910)
+       inst.instruction = 0xee011e60;
+      /* vqadd.  */
+      else if (((unsigned)inst.instruction) == 0x10)
+       inst.instruction = 0xee000f60;
+      /* vqsub.  */
+      else if (((unsigned)inst.instruction) == 0x210)
+       inst.instruction = 0xee001f60;
+      /* vqrdmlah.  */
+      else if (((unsigned)inst.instruction) == 0x3000b10)
+       inst.instruction = 0xee000e40;
+      /* vqdmulh.  */
+      else if (((unsigned)inst.instruction) == 0x0000b00)
+       inst.instruction = 0xee010e60;
+      /* vqrdmulh.  */
+      else if (((unsigned)inst.instruction) == 0x1000b00)
+       inst.instruction = 0xfe010e60;
+
+      /* Set U-bit.  */
+      inst.instruction |= U << 28;
+
        /* Setting bits for size.  */
        inst.instruction |= neon_logbits (size) << 20;
      }
        /* Setting bits for size.  */
        inst.instruction |= neon_logbits (size) << 20;
      }
@@ -15897,16 +16665,24 @@ mve_encode_rq (unsigned bit28, unsigned size)
    inst.is_neon = 1;
  }
  
    inst.is_neon = 1;
  }
  
-/* Encode insns with bit pattern:
-
-  |28/24|23|22 |21 20|19 16|15 12|11    8|7|6|5|4|3  0|
-  |  U  |x |D  |size | Rn  | Rd  |x x x x|N|Q|M|x| Rm |
+static void
+mve_encode_rrqq (unsigned U, unsigned size)
+{
+  constraint (inst.operands[3].reg > 14, MVE_BAD_QREG);
  
  
-  SIZE is passed in bits. -1 means size field isn't changed, in case it has a
-  different meaning for some instruction.  */
+  inst.instruction |= U << 28;
+  inst.instruction |= (inst.operands[1].reg >> 1) << 20;
+  inst.instruction |= LOW4 (inst.operands[2].reg) << 16;
+  inst.instruction |= (size == 32) << 16;
+  inst.instruction |= inst.operands[0].reg << 12;
+  inst.instruction |= HI1 (inst.operands[2].reg) << 7;
+  inst.instruction |= inst.operands[3].reg;
+  inst.is_neon = 1;
+}
  
  
+/* Helper function for neon_three_same handling the operands.  */
  static void
  static void
-neon_three_same (int isquad, int ubit, int size)
+neon_three_args (int isquad)
  {
    inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
    inst.instruction |= HI1 (inst.operands[0].reg) << 22;
  {
    inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
    inst.instruction |= HI1 (inst.operands[0].reg) << 22;
@@ -15915,6 +16691,21 @@ neon_three_same (int isquad, int ubit, int size)
    inst.instruction |= LOW4 (inst.operands[2].reg);
    inst.instruction |= HI1 (inst.operands[2].reg) << 5;
    inst.instruction |= (isquad != 0) << 6;
    inst.instruction |= LOW4 (inst.operands[2].reg);
    inst.instruction |= HI1 (inst.operands[2].reg) << 5;
    inst.instruction |= (isquad != 0) << 6;
+  inst.is_neon = 1;
+}
+
+/* Encode insns with bit pattern:
+
+  |28/24|23|22 |21 20|19 16|15 12|11    8|7|6|5|4|3  0|
+  |  U  |x |D  |size | Rn  | Rd  |x x x x|N|Q|M|x| Rm |
+
+  SIZE is passed in bits. -1 means size field isn't changed, in case it has a
+  different meaning for some instruction.  */
+
+static void
+neon_three_same (int isquad, int ubit, int size)
+{
+  neon_three_args (isquad);
    inst.instruction |= (ubit != 0) << 24;
    if (size != -1)
      inst.instruction |= neon_logbits (size) << 20;
    inst.instruction |= (ubit != 0) << 24;
    if (size != -1)
      inst.instruction |= neon_logbits (size) << 20;
@@ -15942,7 +16733,97 @@ neon_two_same (int qbit, int ubit, int size)
    if (size != -1)
      inst.instruction |= neon_logbits (size) << 18;
  
    if (size != -1)
      inst.instruction |= neon_logbits (size) << 18;
  
-  neon_dp_fixup (&inst);
+  neon_dp_fixup (&inst);
+}
+
+enum vfp_or_neon_is_neon_bits
+{
+NEON_CHECK_CC = 1,
+NEON_CHECK_ARCH = 2,
+NEON_CHECK_ARCH8 = 4
+};
+
+/* Call this function if an instruction which may have belonged to the VFP or
+ Neon instruction sets, but turned out to be a Neon instruction (due to the
+ operand types involved, etc.). We have to check and/or fix-up a couple of
+ things:
+
+   - Make sure the user hasn't attempted to make a Neon instruction
+     conditional.
+   - Alter the value in the condition code field if necessary.
+   - Make sure that the arch supports Neon instructions.
+
+ Which of these operations take place depends on bits from enum
+ vfp_or_neon_is_neon_bits.
+
+ WARNING: This function has side effects! If NEON_CHECK_CC is used and the
+ current instruction's condition is COND_ALWAYS, the condition field is
+ changed to inst.uncond_value.  This is necessary because instructions shared
+ between VFP and Neon may be conditional for the VFP variants only, and the
+ unconditional Neon version must have, e.g., 0xF in the condition field.  */
+
+static int
+vfp_or_neon_is_neon (unsigned check)
+{
+/* Conditions are always legal in Thumb mode (IT blocks).  */
+if (!thumb_mode && (check & NEON_CHECK_CC))
+  {
+    if (inst.cond != COND_ALWAYS)
+      {
+       first_error (_(BAD_COND));
+       return FAIL;
+      }
+    if (inst.uncond_value != -1)
+      inst.instruction |= inst.uncond_value << 28;
+  }
+
+
+  if (((check & NEON_CHECK_ARCH) && !mark_feature_used (&fpu_neon_ext_v1))
+      || ((check & NEON_CHECK_ARCH8)
+         && !mark_feature_used (&fpu_neon_ext_armv8)))
+    {
+      first_error (_(BAD_FPU));
+      return FAIL;
+    }
+
+return SUCCESS;
+}
+
+
+/* Return TRUE if the SIMD instruction is available for the current
+   cpu_variant.  FP is set to TRUE if this is a SIMD floating-point
+   instruction.  CHECK contains th.  CHECK contains the set of bits to pass to
+   vfp_or_neon_is_neon for the NEON specific checks.  */
+
+static bfd_boolean
+check_simd_pred_availability (int fp, unsigned check)
+{
+if (inst.cond > COND_ALWAYS)
+  {
+    if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+      {
+       inst.error = BAD_FPU;
+       return FALSE;
+      }
+    inst.pred_insn_type = INSIDE_VPT_INSN;
+  }
+else if (inst.cond < COND_ALWAYS)
+  {
+    if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+      inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+    else if (vfp_or_neon_is_neon (check) == FAIL)
+      return FALSE;
+  }
+else
+  {
+    if (!ARM_CPU_HAS_FEATURE (cpu_variant, fp ? mve_fp_ext : mve_ext)
+       && vfp_or_neon_is_neon (check) == FAIL)
+      return FALSE;
+
+    if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+      inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+  }
+return TRUE;
  }
  
  /* Neon instruction encoders, in approximate order of appearance.  */
  }
  
  /* Neon instruction encoders, in approximate order of appearance.  */
@@ -15950,19 +16831,46 @@ neon_two_same (int qbit, int ubit, int size)
  static void
  do_neon_dyadic_i_su (void)
  {
  static void
  do_neon_dyadic_i_su (void)
  {
-  enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
-  struct neon_type_el et = neon_check_type (3, rs,
-    N_EQK, N_EQK, N_SU_32 | N_KEY);
-  neon_three_same (neon_quad (rs), et.type == NT_unsigned, et.size);
+  if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC))
+   return;
+
+  enum neon_shape rs;
+  struct neon_type_el et;
+  if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+    rs = neon_select_shape (NS_QQQ, NS_QQR, NS_NULL);
+  else
+    rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+
+  et = neon_check_type (3, rs, N_EQK, N_EQK, N_SU_32 | N_KEY);
+
+
+  if (rs != NS_QQR)
+    neon_three_same (neon_quad (rs), et.type == NT_unsigned, et.size);
+  else
+    mve_encode_qqr (et.size, et.type == NT_unsigned, 0);
  }
  
  static void
  do_neon_dyadic_i64_su (void)
  {
  }
  
  static void
  do_neon_dyadic_i64_su (void)
  {
-  enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
-  struct neon_type_el et = neon_check_type (3, rs,
-    N_EQK, N_EQK, N_SU_ALL | N_KEY);
-  neon_three_same (neon_quad (rs), et.type == NT_unsigned, et.size);
+  if (!check_simd_pred_availability (FALSE, NEON_CHECK_CC | NEON_CHECK_ARCH))
+    return;
+  enum neon_shape rs;
+  struct neon_type_el et;
+  if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+    {
+      rs = neon_select_shape (NS_QQR, NS_QQQ, NS_NULL);
+      et = neon_check_type (3, rs, N_EQK, N_EQK, N_SU_MVE | N_KEY);
+    }
+  else
+    {
+      rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+      et = neon_check_type (3, rs, N_EQK, N_EQK, N_SU_ALL | N_KEY);
+    }
+  if (rs == NS_QQR)
+    mve_encode_qqr (et.size, et.type == NT_unsigned, 0);
+  else
+    neon_three_same (neon_quad (rs), et.type == NT_unsigned, et.size);
  }
  
  static void
  }
  
  static void
@@ -15985,12 +16893,25 @@ neon_imm_shift (int write_ubit, int uval, int isquad, struct neon_type_el et,
  }
  
  static void
  }
  
  static void
-do_neon_shl_imm (void)
+do_neon_shl (void)
  {
  {
+  if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC))
+   return;
+
    if (!inst.operands[2].isreg)
      {
    if (!inst.operands[2].isreg)
      {
-      enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
-      struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_KEY | N_I_ALL);
+      enum neon_shape rs;
+      struct neon_type_el et;
+      if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+       {
+         rs = neon_select_shape (NS_QQI, NS_NULL);
+         et = neon_check_type (2, rs, N_EQK, N_KEY | N_I_MVE);
+       }
+      else
+       {
+         rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
+         et = neon_check_type (2, rs, N_EQK, N_KEY | N_I_ALL);
+       }
        int imm = inst.operands[2].imm;
  
        constraint (imm < 0 || (unsigned)imm >= et.size,
        int imm = inst.operands[2].imm;
  
        constraint (imm < 0 || (unsigned)imm >= et.size,
@@ -16000,33 +16921,77 @@ do_neon_shl_imm (void)
      }
    else
      {
      }
    else
      {
-      enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
-      struct neon_type_el et = neon_check_type (3, rs,
-       N_EQK, N_SU_ALL | N_KEY, N_EQK | N_SGN);
-      unsigned int tmp;
-
-      /* VSHL/VQSHL 3-register variants have syntax such as:
-          vshl.xx Dd, Dm, Dn
-        whereas other 3-register operations encoded by neon_three_same have
-        syntax like:
-          vadd.xx Dd, Dn, Dm
-        (i.e. with Dn & Dm reversed). Swap operands[1].reg and operands[2].reg
-        here.  */
-      tmp = inst.operands[2].reg;
-      inst.operands[2].reg = inst.operands[1].reg;
-      inst.operands[1].reg = tmp;
-      NEON_ENCODE (INTEGER, inst);
-      neon_three_same (neon_quad (rs), et.type == NT_unsigned, et.size);
+      enum neon_shape rs;
+      struct neon_type_el et;
+      if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+       {
+         rs = neon_select_shape (NS_QQQ, NS_QQR, NS_NULL);
+         et = neon_check_type (3, rs, N_EQK, N_SU_MVE | N_KEY, N_EQK | N_EQK);
+       }
+      else
+       {
+         rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+         et = neon_check_type (3, rs, N_EQK, N_SU_ALL | N_KEY, N_EQK | N_SGN);
+       }
+
+
+      if (rs == NS_QQR)
+       {
+         constraint (inst.operands[0].reg != inst.operands[1].reg,
+                      _("invalid instruction shape"));
+         if (inst.operands[2].reg == REG_SP)
+           as_tsktsk (MVE_BAD_SP);
+         else if (inst.operands[2].reg == REG_PC)
+           as_tsktsk (MVE_BAD_PC);
+
+         inst.instruction = 0xee311e60;
+         inst.instruction |= (et.type == NT_unsigned) << 28;
+         inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+         inst.instruction |= neon_logbits (et.size) << 18;
+         inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+         inst.instruction |= inst.operands[2].reg;
+         inst.is_neon = 1;
+       }
+      else
+       {
+         unsigned int tmp;
+
+         /* VSHL/VQSHL 3-register variants have syntax such as:
+              vshl.xx Dd, Dm, Dn
+            whereas other 3-register operations encoded by neon_three_same have
+            syntax like:
+              vadd.xx Dd, Dn, Dm
+            (i.e. with Dn & Dm reversed). Swap operands[1].reg and
+            operands[2].reg here.  */
+         tmp = inst.operands[2].reg;
+         inst.operands[2].reg = inst.operands[1].reg;
+         inst.operands[1].reg = tmp;
+         NEON_ENCODE (INTEGER, inst);
+         neon_three_same (neon_quad (rs), et.type == NT_unsigned, et.size);
+       }
      }
  }
  
  static void
      }
  }
  
  static void
-do_neon_qshl_imm (void)
+do_neon_qshl (void)
  {
  {
+  if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC))
+   return;
+
    if (!inst.operands[2].isreg)
      {
    if (!inst.operands[2].isreg)
      {
-      enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
-      struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_SU_ALL | N_KEY);
+      enum neon_shape rs;
+      struct neon_type_el et;
+      if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+       {
+         rs = neon_select_shape (NS_QQI, NS_NULL);
+         et = neon_check_type (2, rs, N_EQK, N_KEY | N_SU_MVE);
+       }
+      else
+       {
+         rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
+         et = neon_check_type (2, rs, N_EQK, N_SU_ALL | N_KEY);
+       }
        int imm = inst.operands[2].imm;
  
        constraint (imm < 0 || (unsigned)imm >= et.size,
        int imm = inst.operands[2].imm;
  
        constraint (imm < 0 || (unsigned)imm >= et.size,
@@ -16036,32 +17001,103 @@ do_neon_qshl_imm (void)
      }
    else
      {
      }
    else
      {
-      enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
-      struct neon_type_el et = neon_check_type (3, rs,
-       N_EQK, N_SU_ALL | N_KEY, N_EQK | N_SGN);
-      unsigned int tmp;
+      enum neon_shape rs;
+      struct neon_type_el et;
  
  
-      /* See note in do_neon_shl_imm.  */
-      tmp = inst.operands[2].reg;
-      inst.operands[2].reg = inst.operands[1].reg;
-      inst.operands[1].reg = tmp;
-      NEON_ENCODE (INTEGER, inst);
-      neon_three_same (neon_quad (rs), et.type == NT_unsigned, et.size);
+      if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+       {
+         rs = neon_select_shape (NS_QQQ, NS_QQR, NS_NULL);
+         et = neon_check_type (3, rs, N_EQK, N_SU_MVE | N_KEY, N_EQK | N_EQK);
+       }
+      else
+       {
+         rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+         et = neon_check_type (3, rs, N_EQK, N_SU_ALL | N_KEY, N_EQK | N_SGN);
+       }
+
+      if (rs == NS_QQR)
+       {
+         constraint (inst.operands[0].reg != inst.operands[1].reg,
+                      _("invalid instruction shape"));
+         if (inst.operands[2].reg == REG_SP)
+           as_tsktsk (MVE_BAD_SP);
+         else if (inst.operands[2].reg == REG_PC)
+           as_tsktsk (MVE_BAD_PC);
+
+         inst.instruction = 0xee311ee0;
+         inst.instruction |= (et.type == NT_unsigned) << 28;
+         inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+         inst.instruction |= neon_logbits (et.size) << 18;
+         inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+         inst.instruction |= inst.operands[2].reg;
+         inst.is_neon = 1;
+       }
+      else
+       {
+         unsigned int tmp;
+
+         /* See note in do_neon_shl.  */
+         tmp = inst.operands[2].reg;
+         inst.operands[2].reg = inst.operands[1].reg;
+         inst.operands[1].reg = tmp;
+         NEON_ENCODE (INTEGER, inst);
+         neon_three_same (neon_quad (rs), et.type == NT_unsigned, et.size);
+       }
      }
  }
  
  static void
  do_neon_rshl (void)
  {
      }
  }
  
  static void
  do_neon_rshl (void)
  {
-  enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
-  struct neon_type_el et = neon_check_type (3, rs,
-    N_EQK, N_EQK, N_SU_ALL | N_KEY);
+  if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC))
+   return;
+
+  enum neon_shape rs;
+  struct neon_type_el et;
+  if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+    {
+      rs = neon_select_shape (NS_QQR, NS_QQQ, NS_NULL);
+      et = neon_check_type (3, rs, N_EQK, N_EQK, N_SU_MVE | N_KEY);
+    }
+  else
+    {
+      rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+      et = neon_check_type (3, rs, N_EQK, N_EQK, N_SU_ALL | N_KEY);
+    }
+
    unsigned int tmp;
  
    unsigned int tmp;
  
-  tmp = inst.operands[2].reg;
-  inst.operands[2].reg = inst.operands[1].reg;
-  inst.operands[1].reg = tmp;
-  neon_three_same (neon_quad (rs), et.type == NT_unsigned, et.size);
+  if (rs == NS_QQR)
+    {
+      if (inst.operands[2].reg == REG_PC)
+       as_tsktsk (MVE_BAD_PC);
+      else if (inst.operands[2].reg == REG_SP)
+       as_tsktsk (MVE_BAD_SP);
+
+      constraint (inst.operands[0].reg != inst.operands[1].reg,
+                 _("invalid instruction shape"));
+
+      if (inst.instruction == 0x0000510)
+       /* We are dealing with vqrshl.  */
+       inst.instruction = 0xee331ee0;
+      else
+       /* We are dealing with vrshl.  */
+       inst.instruction = 0xee331e60;
+
+      inst.instruction |= (et.type == NT_unsigned) << 28;
+      inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+      inst.instruction |= neon_logbits (et.size) << 18;
+      inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+      inst.instruction |= inst.operands[2].reg;
+      inst.is_neon = 1;
+    }
+  else
+    {
+      tmp = inst.operands[2].reg;
+      inst.operands[2].reg = inst.operands[1].reg;
+      inst.operands[1].reg = tmp;
+      neon_three_same (neon_quad (rs), et.type == NT_unsigned, et.size);
+    }
  }
  
  static int
  }
  
  static int
@@ -16120,90 +17156,6 @@ neon_cmode_for_logic_imm (unsigned immediate, unsigned *immbits, int size)
    return FAIL;
  }
  
    return FAIL;
  }
  
-enum vfp_or_neon_is_neon_bits
-{
-NEON_CHECK_CC = 1,
-NEON_CHECK_ARCH = 2,
-NEON_CHECK_ARCH8 = 4
-};
-
-/* Call this function if an instruction which may have belonged to the VFP or
- Neon instruction sets, but turned out to be a Neon instruction (due to the
- operand types involved, etc.). We have to check and/or fix-up a couple of
- things:
-
-   - Make sure the user hasn't attempted to make a Neon instruction
-     conditional.
-   - Alter the value in the condition code field if necessary.
-   - Make sure that the arch supports Neon instructions.
-
- Which of these operations take place depends on bits from enum
- vfp_or_neon_is_neon_bits.
-
- WARNING: This function has side effects! If NEON_CHECK_CC is used and the
- current instruction's condition is COND_ALWAYS, the condition field is
- changed to inst.uncond_value.  This is necessary because instructions shared
- between VFP and Neon may be conditional for the VFP variants only, and the
- unconditional Neon version must have, e.g., 0xF in the condition field.  */
-
-static int
-vfp_or_neon_is_neon (unsigned check)
-{
-/* Conditions are always legal in Thumb mode (IT blocks).  */
-if (!thumb_mode && (check & NEON_CHECK_CC))
-  {
-    if (inst.cond != COND_ALWAYS)
-      {
-       first_error (_(BAD_COND));
-       return FAIL;
-      }
-    if (inst.uncond_value != -1)
-      inst.instruction |= inst.uncond_value << 28;
-  }
-
-
-  if (((check & NEON_CHECK_ARCH) && !mark_feature_used (&fpu_neon_ext_v1))
-      || ((check & NEON_CHECK_ARCH8)
-         && !mark_feature_used (&fpu_neon_ext_armv8)))
-    {
-      first_error (_(BAD_FPU));
-      return FAIL;
-    }
-
-return SUCCESS;
-}
-
-static int
-check_simd_pred_availability (int fp, unsigned check)
-{
-if (inst.cond > COND_ALWAYS)
-  {
-    if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
-      {
-       inst.error = BAD_FPU;
-       return 1;
-      }
-    inst.pred_insn_type = INSIDE_VPT_INSN;
-  }
-else if (inst.cond < COND_ALWAYS)
-  {
-    if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
-      inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
-    else if (vfp_or_neon_is_neon (check) == FAIL)
-      return 2;
-  }
-else
-  {
-    if (!ARM_CPU_HAS_FEATURE (cpu_variant, fp ? mve_fp_ext : mve_ext)
-       && vfp_or_neon_is_neon (check) == FAIL)
-      return 3;
-
-    if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
-      inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
-  }
-return 0;
-}
-
  static void
  do_neon_logic (void)
  {
  static void
  do_neon_logic (void)
  {
@@ -16211,8 +17163,8 @@ do_neon_logic (void)
      {
        enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
        if (rs == NS_QQQ
      {
        enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
        if (rs == NS_QQQ
-         && check_simd_pred_availability (0, NEON_CHECK_ARCH | NEON_CHECK_CC)
-         == FAIL)
+         && !check_simd_pred_availability (FALSE,
+                                           NEON_CHECK_ARCH | NEON_CHECK_CC))
         return;
        else if (rs != NS_QQQ
                && !ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_v1))
         return;
        else if (rs != NS_QQQ
                && !ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_v1))
@@ -16234,8 +17186,8 @@ do_neon_logic (void)
        /* Because neon_select_shape makes the second operand a copy of the first
          if the second operand is not present.  */
        if (rs == NS_QQI
        /* Because neon_select_shape makes the second operand a copy of the first
          if the second operand is not present.  */
        if (rs == NS_QQI
-         && check_simd_pred_availability (0, NEON_CHECK_ARCH | NEON_CHECK_CC)
-         == FAIL)
+         && !check_simd_pred_availability (FALSE,
+                                           NEON_CHECK_ARCH | NEON_CHECK_CC))
         return;
        else if (rs != NS_QQI
                && !ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_v1))
         return;
        else if (rs != NS_QQI
                && !ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_v1))
@@ -16332,7 +17284,7 @@ neon_dyadic_misc (enum neon_el_type ubit_meaning, unsigned types,
      {
        NEON_ENCODE (FLOAT, inst);
        if (rs == NS_QQR)
      {
        NEON_ENCODE (FLOAT, inst);
        if (rs == NS_QQR)
-       mve_encode_qqr (et.size, 1);
+       mve_encode_qqr (et.size, 0, 1);
        else
         neon_three_same (neon_quad (rs), 0, et.size == 16 ? (int) et.size : -1);
      }
        else
         neon_three_same (neon_quad (rs), 0, et.size == 16 ? (int) et.size : -1);
      }
@@ -16340,7 +17292,7 @@ neon_dyadic_misc (enum neon_el_type ubit_meaning, unsigned types,
      {
        NEON_ENCODE (INTEGER, inst);
        if (rs == NS_QQR)
      {
        NEON_ENCODE (INTEGER, inst);
        if (rs == NS_QQR)
-       mve_encode_qqr (et.size, 0);
+       mve_encode_qqr (et.size, et.type == ubit_meaning, 0);
        else
         neon_three_same (neon_quad (rs), et.type == ubit_meaning, et.size);
      }
        else
         neon_three_same (neon_quad (rs), et.type == ubit_meaning, et.size);
      }
@@ -16404,6 +17356,7 @@ static void
  do_mve_vstr_vldr_RQ (int size, int elsize, int load)
  {
      unsigned os = inst.operands[1].imm >> 5;
  do_mve_vstr_vldr_RQ (int size, int elsize, int load)
  {
      unsigned os = inst.operands[1].imm >> 5;
+    unsigned type = inst.vectype.el[0].type;
      constraint (os != 0 && size == 8,
                 _("can not shift offsets when accessing less than half-word"));
      constraint (os && os != neon_logbits (size),
      constraint (os != 0 && size == 8,
                 _("can not shift offsets when accessing less than half-word"));
      constraint (os && os != neon_logbits (size),
@@ -16434,15 +17387,14 @@ do_mve_vstr_vldr_RQ (int size, int elsize, int load)
         constraint (inst.operands[0].reg == (inst.operands[1].imm & 0x1f),
                     _("destination register and offset register may not be"
                     " the same"));
         constraint (inst.operands[0].reg == (inst.operands[1].imm & 0x1f),
                     _("destination register and offset register may not be"
                     " the same"));
-       constraint (size == elsize && inst.vectype.el[0].type != NT_unsigned,
+       constraint (size == elsize && type == NT_signed, BAD_EL_TYPE);
+       constraint (size != elsize && type != NT_unsigned && type != NT_signed,
                     BAD_EL_TYPE);
                     BAD_EL_TYPE);
-       constraint (inst.vectype.el[0].type != NT_unsigned
-                   && inst.vectype.el[0].type != NT_signed, BAD_EL_TYPE);
-       inst.instruction |= (inst.vectype.el[0].type == NT_unsigned) << 28;
+       inst.instruction |= ((size == elsize) || (type == NT_unsigned)) << 28;
        }
      else
        {
        }
      else
        {
-       constraint (inst.vectype.el[0].type != NT_untyped, BAD_EL_TYPE);
+       constraint (type != NT_untyped, BAD_EL_TYPE);
        }
  
      inst.instruction |= 1 << 23;
        }
  
      inst.instruction |= 1 << 23;
@@ -16673,8 +17625,13 @@ do_neon_dyadic_if_su (void)
    struct neon_type_el et = neon_check_type (3, rs, N_EQK , N_EQK,
                                             N_SUF_32 | N_KEY);
  
    struct neon_type_el et = neon_check_type (3, rs, N_EQK , N_EQK,
                                             N_SUF_32 | N_KEY);
  
-  if (check_simd_pred_availability (et.type == NT_float,
-                                   NEON_CHECK_ARCH | NEON_CHECK_CC))
+  constraint ((inst.instruction == ((unsigned) N_MNEM_vmax)
+              || inst.instruction == ((unsigned) N_MNEM_vmin))
+             && et.type == NT_float
+             && !ARM_CPU_HAS_FEATURE (cpu_variant,fpu_neon_ext_v1), BAD_FPU);
+
+  if (!check_simd_pred_availability (et.type == NT_float,
+                                    NEON_CHECK_ARCH | NEON_CHECK_CC))
      return;
  
    neon_dyadic_misc (NT_unsigned, N_SUF_32, 0);
      return;
  
    neon_dyadic_misc (NT_unsigned, N_SUF_32, 0);
@@ -16698,8 +17655,8 @@ do_neon_addsub_if_i (void)
       they are predicated or not.  */
    if ((rs == NS_QQQ || rs == NS_QQR) && et.size != 64)
      {
       they are predicated or not.  */
    if ((rs == NS_QQQ || rs == NS_QQR) && et.size != 64)
      {
-      if (check_simd_pred_availability (et.type == NT_float,
-                                       NEON_CHECK_ARCH | NEON_CHECK_CC))
+      if (!check_simd_pred_availability (et.type == NT_float,
+                                        NEON_CHECK_ARCH | NEON_CHECK_CC))
         return;
      }
    else
         return;
      }
    else
@@ -16860,37 +17817,135 @@ do_neon_mac_maybe_scalar (void)
    if (try_vfp_nsyn (3, do_vfp_nsyn_mla_mls) == SUCCESS)
      return;
  
    if (try_vfp_nsyn (3, do_vfp_nsyn_mla_mls) == SUCCESS)
      return;
  
-  if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
+  if (!check_simd_pred_availability (FALSE, NEON_CHECK_CC | NEON_CHECK_ARCH))
      return;
  
    if (inst.operands[2].isscalar)
      {
      return;
  
    if (inst.operands[2].isscalar)
      {
+      constraint (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), BAD_FPU);
        enum neon_shape rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
        struct neon_type_el et = neon_check_type (3, rs,
         N_EQK, N_EQK, N_I16 | N_I32 | N_F_16_32 | N_KEY);
        NEON_ENCODE (SCALAR, inst);
        neon_mul_mac (et, neon_quad (rs));
      }
        enum neon_shape rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
        struct neon_type_el et = neon_check_type (3, rs,
         N_EQK, N_EQK, N_I16 | N_I32 | N_F_16_32 | N_KEY);
        NEON_ENCODE (SCALAR, inst);
        neon_mul_mac (et, neon_quad (rs));
      }
+  else if (!inst.operands[2].isvec)
+    {
+      constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), BAD_FPU);
+
+      enum neon_shape rs = neon_select_shape (NS_QQR, NS_NULL);
+      neon_check_type (3, rs, N_EQK, N_EQK, N_SU_MVE | N_KEY);
+
+      neon_dyadic_misc (NT_unsigned, N_SU_MVE, 0);
+    }
+  else
+    {
+      constraint (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), BAD_FPU);
+      /* The "untyped" case can't happen.  Do this to stop the "U" bit being
+        affected if we specify unsigned args.  */
+      neon_dyadic_misc (NT_untyped, N_IF_32, 0);
+    }
+}
+
+static void
+do_bfloat_vfma (void)
+{
+  constraint (!mark_feature_used (&fpu_neon_ext_armv8), _(BAD_FPU));
+  constraint (!mark_feature_used (&arm_ext_bf16), _(BAD_BF16));
+  enum neon_shape rs;
+  int t_bit = 0;
+
+  if (inst.instruction != B_MNEM_vfmab)
+  {
+      t_bit = 1;
+      inst.instruction = B_MNEM_vfmat;
+  }
+
+  if (inst.operands[2].isscalar)
+    {
+      rs = neon_select_shape (NS_QQS, NS_NULL);
+      neon_check_type (3, rs, N_EQK, N_EQK, N_BF16 | N_KEY);
+
+      inst.instruction |= (1 << 25);
+      int index = inst.operands[2].reg & 0xf;
+      constraint (!(index < 4), _("index must be in the range 0 to 3"));
+      inst.operands[2].reg >>= 4;
+      constraint (!(inst.operands[2].reg < 8),
+                 _("indexed register must be less than 8"));
+      neon_three_args (t_bit);
+      inst.instruction |= ((index & 1) << 3);
+      inst.instruction |= ((index & 2) << 4);
+    }
    else
      {
    else
      {
-      /* The "untyped" case can't happen.  Do this to stop the "U" bit being
-        affected if we specify unsigned args.  */
-      neon_dyadic_misc (NT_untyped, N_IF_32, 0);
+      rs = neon_select_shape (NS_QQQ, NS_NULL);
+      neon_check_type (3, rs, N_EQK, N_EQK, N_BF16 | N_KEY);
+      neon_three_args (t_bit);
      }
      }
+
  }
  
  static void
  do_neon_fmac (void)
  {
  }
  
  static void
  do_neon_fmac (void)
  {
-  if (try_vfp_nsyn (3, do_vfp_nsyn_fma_fms) == SUCCESS)
+  if (ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_fma)
+      && try_vfp_nsyn (3, do_vfp_nsyn_fma_fms) == SUCCESS)
      return;
  
      return;
  
-  if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
+  if (!check_simd_pred_availability (TRUE, NEON_CHECK_CC | NEON_CHECK_ARCH))
      return;
  
      return;
  
+  if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_fp_ext))
+    {
+      enum neon_shape rs = neon_select_shape (NS_QQQ, NS_QQR, NS_NULL);
+      struct neon_type_el et = neon_check_type (3, rs, N_F_MVE | N_KEY, N_EQK,
+                                               N_EQK);
+
+      if (rs == NS_QQR)
+       {
+
+         if (inst.operands[2].reg == REG_SP)
+           as_tsktsk (MVE_BAD_SP);
+         else if (inst.operands[2].reg == REG_PC)
+           as_tsktsk (MVE_BAD_PC);
+
+         inst.instruction = 0xee310e40;
+         inst.instruction |= (et.size == 16) << 28;
+         inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+         inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+         inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+         inst.instruction |= HI1 (inst.operands[1].reg) << 6;
+         inst.instruction |= inst.operands[2].reg;
+         inst.is_neon = 1;
+         return;
+       }
+    }
+  else
+    {
+      constraint (!inst.operands[2].isvec, BAD_FPU);
+    }
+
    neon_dyadic_misc (NT_untyped, N_IF_32, 0);
  }
  
    neon_dyadic_misc (NT_untyped, N_IF_32, 0);
  }
  
+static void
+do_mve_vfma (void)
+{
+  if (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_bf16) &&
+      inst.cond == COND_ALWAYS)
+    {
+      constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), BAD_FPU);
+      inst.instruction = N_MNEM_vfma;
+      inst.pred_insn_type = INSIDE_VPT_INSN;
+      inst.cond = 0xf;
+      return do_neon_fmac();
+    }
+  else
+    {
+      do_bfloat_vfma();
+    }
+}
+
  static void
  do_neon_tst (void)
  {
  static void
  do_neon_tst (void)
  {
@@ -16910,20 +17965,45 @@ do_neon_mul (void)
    if (try_vfp_nsyn (3, do_vfp_nsyn_mul) == SUCCESS)
      return;
  
    if (try_vfp_nsyn (3, do_vfp_nsyn_mul) == SUCCESS)
      return;
  
-  if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
+  if (!check_simd_pred_availability (FALSE, NEON_CHECK_CC | NEON_CHECK_ARCH))
      return;
  
    if (inst.operands[2].isscalar)
      return;
  
    if (inst.operands[2].isscalar)
-    do_neon_mac_maybe_scalar ();
+    {
+      constraint (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), BAD_FPU);
+      do_neon_mac_maybe_scalar ();
+    }
    else
    else
-    neon_dyadic_misc (NT_poly, N_I8 | N_I16 | N_I32 | N_F16 | N_F32 | N_P8, 0);
+    {
+      if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+       {
+         enum neon_shape rs = neon_select_shape (NS_QQR, NS_QQQ, NS_NULL);
+         struct neon_type_el et
+           = neon_check_type (3, rs, N_EQK, N_EQK, N_I_MVE | N_F_MVE | N_KEY);
+         if (et.type == NT_float)
+           constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_fp_ext),
+                       BAD_FPU);
+
+         neon_dyadic_misc (NT_float, N_I_MVE | N_F_MVE, 0);
+       }
+      else
+       {
+         constraint (!inst.operands[2].isvec, BAD_FPU);
+         neon_dyadic_misc (NT_poly,
+                           N_I8 | N_I16 | N_I32 | N_F16 | N_F32 | N_P8, 0);
+       }
+    }
  }
  
  static void
  do_neon_qdmulh (void)
  {
  }
  
  static void
  do_neon_qdmulh (void)
  {
+  if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC))
+   return;
+
    if (inst.operands[2].isscalar)
      {
    if (inst.operands[2].isscalar)
      {
+      constraint (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), BAD_FPU);
        enum neon_shape rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
        struct neon_type_el et = neon_check_type (3, rs,
         N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
        enum neon_shape rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
        struct neon_type_el et = neon_check_type (3, rs,
         N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
@@ -16932,12 +18012,27 @@ do_neon_qdmulh (void)
      }
    else
      {
      }
    else
      {
-      enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
-      struct neon_type_el et = neon_check_type (3, rs,
-       N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
+      enum neon_shape rs;
+      struct neon_type_el et;
+      if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+       {
+         rs = neon_select_shape (NS_QQR, NS_QQQ, NS_NULL);
+         et = neon_check_type (3, rs,
+           N_EQK, N_EQK, N_S8 | N_S16 | N_S32 | N_KEY);
+       }
+      else
+       {
+         rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+         et = neon_check_type (3, rs,
+           N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
+       }
+
        NEON_ENCODE (INTEGER, inst);
        NEON_ENCODE (INTEGER, inst);
-      /* The U bit (rounding) comes from bit mask.  */
-      neon_three_same (neon_quad (rs), 0, et.size);
+      if (rs == NS_QQR)
+       mve_encode_qqr (et.size, 0, 0);
+      else
+       /* The U bit (rounding) comes from bit mask.  */
+       neon_three_same (neon_quad (rs), 0, et.size);
      }
  }
  
      }
  }
  
@@ -16961,6 +18056,59 @@ do_mve_vaddv (void)
    mve_encode_rq (et.type == NT_unsigned, et.size);
  }
  
    mve_encode_rq (et.type == NT_unsigned, et.size);
  }
  
+static void
+do_mve_vhcadd (void)
+{
+  enum neon_shape rs = neon_select_shape (NS_QQQI, NS_NULL);
+  struct neon_type_el et
+    = neon_check_type (3, rs, N_EQK, N_EQK, N_S8 | N_S16 | N_S32 | N_KEY);
+
+  if (inst.cond > COND_ALWAYS)
+    inst.pred_insn_type = INSIDE_VPT_INSN;
+  else
+    inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+  unsigned rot = inst.relocs[0].exp.X_add_number;
+  constraint (rot != 90 && rot != 270, _("immediate out of range"));
+
+  if (et.size == 32 && inst.operands[0].reg == inst.operands[2].reg)
+    as_tsktsk (_("Warning: 32-bit element size and same first and third "
+                "operand makes instruction UNPREDICTABLE"));
+
+  mve_encode_qqq (0, et.size);
+  inst.instruction |= (rot == 270) << 12;
+  inst.is_neon = 1;
+}
+
+static void
+do_mve_vqdmull (void)
+{
+  enum neon_shape rs = neon_select_shape (NS_QQQ, NS_QQR, NS_NULL);
+  struct neon_type_el et
+    = neon_check_type (3, rs, N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
+
+  if (et.size == 32
+      && (inst.operands[0].reg == inst.operands[1].reg
+         || (rs == NS_QQQ && inst.operands[0].reg == inst.operands[2].reg)))
+    as_tsktsk (BAD_MVE_SRCDEST);
+
+  if (inst.cond > COND_ALWAYS)
+    inst.pred_insn_type = INSIDE_VPT_INSN;
+  else
+    inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+  if (rs == NS_QQQ)
+    {
+      mve_encode_qqq (et.size == 32, 64);
+      inst.instruction |= 1;
+    }
+  else
+    {
+      mve_encode_qqr (64, et.size == 32, 0);
+      inst.instruction |= 0x3 << 5;
+    }
+}
+
  static void
  do_mve_vadc (void)
  {
  static void
  do_mve_vadc (void)
  {
@@ -16991,7 +18139,7 @@ do_mve_vbrsr (void)
    else
      inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
  
    else
      inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
  
-  mve_encode_qqr (et.size, 0);
+  mve_encode_qqr (et.size, 0, 0);
  }
  
  static void
  }
  
  static void
@@ -17007,6 +18155,52 @@ do_mve_vsbc (void)
    mve_encode_qqq (1, 64);
  }
  
    mve_encode_qqq (1, 64);
  }
  
+static void
+do_mve_vmulh (void)
+{
+  enum neon_shape rs = neon_select_shape (NS_QQQ, NS_NULL);
+  struct neon_type_el et
+    = neon_check_type (3, rs, N_EQK, N_EQK, N_SU_MVE | N_KEY);
+
+  if (inst.cond > COND_ALWAYS)
+    inst.pred_insn_type = INSIDE_VPT_INSN;
+  else
+    inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+  mve_encode_qqq (et.type == NT_unsigned, et.size);
+}
+
+static void
+do_mve_vqdmlah (void)
+{
+  enum neon_shape rs = neon_select_shape (NS_QQR, NS_NULL);
+  struct neon_type_el et
+    = neon_check_type (3, rs, N_EQK, N_EQK, N_S_32 | N_KEY);
+
+  if (inst.cond > COND_ALWAYS)
+    inst.pred_insn_type = INSIDE_VPT_INSN;
+  else
+    inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+  mve_encode_qqr (et.size, et.type == NT_unsigned, 0);
+}
+
+static void
+do_mve_vqdmladh (void)
+{
+  enum neon_shape rs = neon_select_shape (NS_QQQ, NS_NULL);
+  struct neon_type_el et
+    = neon_check_type (3, rs, N_EQK, N_EQK, N_S8 | N_S16 | N_S32 | N_KEY);
+
+  if (inst.cond > COND_ALWAYS)
+    inst.pred_insn_type = INSIDE_VPT_INSN;
+  else
+    inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+  mve_encode_qqq (0, et.size);
+}
+
+
  static void
  do_mve_vmull (void)
  {
  static void
  do_mve_vmull (void)
  {
@@ -17122,34 +18316,159 @@ do_mve_vmladav (void)
  }
  
  static void
  }
  
  static void
-do_neon_qrdmlah (void)
+do_mve_vmlaldav (void)
+{
+  enum neon_shape rs = neon_select_shape (NS_RRQQ, NS_NULL);
+  struct neon_type_el et
+    = neon_check_type (4, rs, N_EQK, N_EQK, N_EQK,
+                      N_S16 | N_S32 | N_U16 | N_U32 | N_KEY);
+
+  if (et.type == NT_unsigned
+      && (inst.instruction == M_MNEM_vmlsldav
+         || inst.instruction == M_MNEM_vmlsldava
+         || inst.instruction == M_MNEM_vmlsldavx
+         || inst.instruction == M_MNEM_vmlsldavax))
+    first_error (BAD_SIMD_TYPE);
+
+  if (inst.cond > COND_ALWAYS)
+    inst.pred_insn_type = INSIDE_VPT_INSN;
+  else
+    inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+  mve_encode_rrqq (et.type == NT_unsigned, et.size);
+}
+
+static void
+do_mve_vrmlaldavh (void)
  {
  {
-  /* Check we're on the correct architecture.  */
-  if (!mark_feature_used (&fpu_neon_ext_armv8))
-    inst.error =
-      _("instruction form not available on this architecture.");
-  else if (!mark_feature_used (&fpu_neon_ext_v8_1))
+  struct neon_type_el et;
+  if (inst.instruction == M_MNEM_vrmlsldavh
+     || inst.instruction == M_MNEM_vrmlsldavha
+     || inst.instruction == M_MNEM_vrmlsldavhx
+     || inst.instruction == M_MNEM_vrmlsldavhax)
      {
      {
-      as_warn (_("this instruction implies use of ARMv8.1 AdvSIMD."));
-      record_feature_use (&fpu_neon_ext_v8_1);
+      et = neon_check_type (4, NS_RRQQ, N_EQK, N_EQK, N_EQK, N_S32 | N_KEY);
+      if (inst.operands[1].reg == REG_SP)
+       as_tsktsk (MVE_BAD_SP);
      }
      }
+  else
+    {
+      if (inst.instruction == M_MNEM_vrmlaldavhx
+         || inst.instruction == M_MNEM_vrmlaldavhax)
+       et = neon_check_type (4, NS_RRQQ, N_EQK, N_EQK, N_EQK, N_S32 | N_KEY);
+      else
+       et = neon_check_type (4, NS_RRQQ, N_EQK, N_EQK, N_EQK,
+                             N_U32 | N_S32 | N_KEY);
+      /* vrmlaldavh's encoding with SP as the second, odd, GPR operand may alias
+        with vmax/min instructions, making the use of SP in assembly really
+        nonsensical, so instead of issuing a warning like we do for other uses
+        of SP for the odd register operand we error out.  */
+      constraint (inst.operands[1].reg == REG_SP, BAD_SP);
+    }
+
+  /* Make sure we still check the second operand is an odd one and that PC is
+     disallowed.  This because we are parsing for any GPR operand, to be able
+     to distinguish between giving a warning or an error for SP as described
+     above.  */
+  constraint ((inst.operands[1].reg % 2) != 1, BAD_EVEN);
+  constraint (inst.operands[1].reg == REG_PC, BAD_PC);
  
  
-  if (inst.operands[2].isscalar)
+  if (inst.cond > COND_ALWAYS)
+    inst.pred_insn_type = INSIDE_VPT_INSN;
+  else
+    inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+  mve_encode_rrqq (et.type == NT_unsigned, 0);
+}
+
+
+static void
+do_mve_vmaxnmv (void)
+{
+  enum neon_shape rs = neon_select_shape (NS_RQ, NS_NULL);
+  struct neon_type_el et
+    = neon_check_type (2, rs, N_EQK, N_F_MVE | N_KEY);
+
+  if (inst.cond > COND_ALWAYS)
+    inst.pred_insn_type = INSIDE_VPT_INSN;
+  else
+    inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+  if (inst.operands[0].reg == REG_SP)
+    as_tsktsk (MVE_BAD_SP);
+  else if (inst.operands[0].reg == REG_PC)
+    as_tsktsk (MVE_BAD_PC);
+
+  mve_encode_rq (et.size == 16, 64);
+}
+
+static void
+do_mve_vmaxv (void)
+{
+  enum neon_shape rs = neon_select_shape (NS_RQ, NS_NULL);
+  struct neon_type_el et;
+
+  if (inst.instruction == M_MNEM_vmaxv || inst.instruction == M_MNEM_vminv)
+    et = neon_check_type (2, rs, N_EQK, N_SU_MVE | N_KEY);
+  else
+    et = neon_check_type (2, rs, N_EQK, N_S8 | N_S16 | N_S32 | N_KEY);
+
+  if (inst.cond > COND_ALWAYS)
+    inst.pred_insn_type = INSIDE_VPT_INSN;
+  else
+    inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN;
+
+  if (inst.operands[0].reg == REG_SP)
+    as_tsktsk (MVE_BAD_SP);
+  else if (inst.operands[0].reg == REG_PC)
+    as_tsktsk (MVE_BAD_PC);
+
+  mve_encode_rq (et.type == NT_unsigned, et.size);
+}
+
+
+static void
+do_neon_qrdmlah (void)
+{
+  if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC))
+   return;
+  if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
      {
      {
-      enum neon_shape rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
-      struct neon_type_el et = neon_check_type (3, rs,
-       N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
-      NEON_ENCODE (SCALAR, inst);
-      neon_mul_mac (et, neon_quad (rs));
+      /* Check we're on the correct architecture.  */
+      if (!mark_feature_used (&fpu_neon_ext_armv8))
+       inst.error
+         = _("instruction form not available on this architecture.");
+      else if (!mark_feature_used (&fpu_neon_ext_v8_1))
+       {
+         as_warn (_("this instruction implies use of ARMv8.1 AdvSIMD."));
+         record_feature_use (&fpu_neon_ext_v8_1);
+       }
+       if (inst.operands[2].isscalar)
+         {
+           enum neon_shape rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
+           struct neon_type_el et = neon_check_type (3, rs,
+             N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
+           NEON_ENCODE (SCALAR, inst);
+           neon_mul_mac (et, neon_quad (rs));
+         }
+       else
+         {
+           enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+           struct neon_type_el et = neon_check_type (3, rs,
+             N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
+           NEON_ENCODE (INTEGER, inst);
+           /* The U bit (rounding) comes from bit mask.  */
+           neon_three_same (neon_quad (rs), 0, et.size);
+         }
      }
    else
      {
      }
    else
      {
-      enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
-      struct neon_type_el et = neon_check_type (3, rs,
-       N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
+      enum neon_shape rs = neon_select_shape (NS_QQR, NS_NULL);
+      struct neon_type_el et
+       = neon_check_type (3, rs, N_EQK, N_EQK, N_S_32 | N_KEY);
+
        NEON_ENCODE (INTEGER, inst);
        NEON_ENCODE (INTEGER, inst);
-      /* The U bit (rounding) comes from bit mask.  */
-      neon_three_same (neon_quad (rs), 0, et.size);
+      mve_encode_qqr (et.size, et.type == NT_unsigned, 0);
      }
  }
  
      }
  }
  
@@ -17191,8 +18510,8 @@ do_neon_abs_neg (void)
    rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
    et = neon_check_type (2, rs, N_EQK, N_S_32 | N_F_16_32 | N_KEY);
  
    rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
    et = neon_check_type (2, rs, N_EQK, N_S_32 | N_F_16_32 | N_KEY);
  
-  if (check_simd_pred_availability (et.type == NT_float,
-                                   NEON_CHECK_ARCH | NEON_CHECK_CC))
+  if (!check_simd_pred_availability (et.type == NT_float,
+                                    NEON_CHECK_ARCH | NEON_CHECK_CC))
      return;
  
    inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
      return;
  
    inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
@@ -17209,9 +18528,23 @@ do_neon_abs_neg (void)
  static void
  do_neon_sli (void)
  {
  static void
  do_neon_sli (void)
  {
-  enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
-  struct neon_type_el et = neon_check_type (2, rs,
-    N_EQK, N_8 | N_16 | N_32 | N_64 | N_KEY);
+  if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC))
+    return;
+
+  enum neon_shape rs;
+  struct neon_type_el et;
+  if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+    {
+      rs = neon_select_shape (NS_QQI, NS_NULL);
+      et = neon_check_type (2, rs, N_EQK, N_8 | N_16 | N_32 | N_KEY);
+    }
+  else
+    {
+      rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
+      et = neon_check_type (2, rs, N_EQK, N_8 | N_16 | N_32 | N_64 | N_KEY);
+    }
+
+
    int imm = inst.operands[2].imm;
    constraint (imm < 0 || (unsigned)imm >= et.size,
               _("immediate out of range for insert"));
    int imm = inst.operands[2].imm;
    constraint (imm < 0 || (unsigned)imm >= et.size,
               _("immediate out of range for insert"));
@@ -17221,21 +18554,48 @@ do_neon_sli (void)
  static void
  do_neon_sri (void)
  {
  static void
  do_neon_sri (void)
  {
-  enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
-  struct neon_type_el et = neon_check_type (2, rs,
-    N_EQK, N_8 | N_16 | N_32 | N_64 | N_KEY);
+  if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC))
+    return;
+
+  enum neon_shape rs;
+  struct neon_type_el et;
+  if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+    {
+      rs = neon_select_shape (NS_QQI, NS_NULL);
+      et = neon_check_type (2, rs, N_EQK, N_8 | N_16 | N_32 | N_KEY);
+    }
+  else
+    {
+      rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
+      et = neon_check_type (2, rs, N_EQK, N_8 | N_16 | N_32 | N_64 | N_KEY);
+    }
+
    int imm = inst.operands[2].imm;
    constraint (imm < 1 || (unsigned)imm > et.size,
               _("immediate out of range for insert"));
    neon_imm_shift (FALSE, 0, neon_quad (rs), et, et.size - imm);
  }
  
    int imm = inst.operands[2].imm;
    constraint (imm < 1 || (unsigned)imm > et.size,
               _("immediate out of range for insert"));
    neon_imm_shift (FALSE, 0, neon_quad (rs), et, et.size - imm);
  }
  
-static void
-do_neon_qshlu_imm (void)
-{
-  enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
-  struct neon_type_el et = neon_check_type (2, rs,
-    N_EQK | N_UNS, N_S8 | N_S16 | N_S32 | N_S64 | N_KEY);
+static void
+do_neon_qshlu_imm (void)
+{
+  if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC))
+    return;
+
+  enum neon_shape rs;
+  struct neon_type_el et;
+  if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+    {
+      rs = neon_select_shape (NS_QQI, NS_NULL);
+      et = neon_check_type (2, rs, N_EQK, N_S8 | N_S16 | N_S32 | N_KEY);
+    }
+  else
+    {
+      rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
+      et = neon_check_type (2, rs, N_EQK | N_UNS,
+                           N_S8 | N_S16 | N_S32 | N_S64 | N_KEY);
+    }
+
    int imm = inst.operands[2].imm;
    constraint (imm < 0 || (unsigned)imm >= et.size,
               _("immediate out of range for shift"));
    int imm = inst.operands[2].imm;
    constraint (imm < 0 || (unsigned)imm >= et.size,
               _("immediate out of range for shift"));
@@ -17415,6 +18775,7 @@ do_neon_shll (void)
    CVT_VAR (f16_u32, N_F16 | N_KEY, N_U32, N_VFP, "fultos", "fuitos", NULL)    \
    CVT_VAR (u32_f16, N_U32, N_F16 | N_KEY, N_VFP, "ftouls", "ftouis", "ftouizs")\
    CVT_VAR (s32_f16, N_S32, N_F16 | N_KEY, N_VFP, "ftosls", "ftosis", "ftosizs")\
    CVT_VAR (f16_u32, N_F16 | N_KEY, N_U32, N_VFP, "fultos", "fuitos", NULL)    \
    CVT_VAR (u32_f16, N_U32, N_F16 | N_KEY, N_VFP, "ftouls", "ftouis", "ftouizs")\
    CVT_VAR (s32_f16, N_S32, N_F16 | N_KEY, N_VFP, "ftosls", "ftosis", "ftosizs")\
+  CVT_VAR (bf16_f32, N_BF16, N_F32, whole_reg,   NULL, NULL, NULL)           \
    /* VFP instructions.  */                                                   \
    CVT_VAR (f32_f64, N_F32, N_F64, N_VFP,       NULL,     "fcvtsd", NULL)      \
    CVT_VAR (f64_f32, N_F64, N_F32, N_VFP,       NULL,     "fcvtds", NULL)      \
    /* VFP instructions.  */                                                   \
    CVT_VAR (f32_f64, N_F32, N_F64, N_VFP,       NULL,     "fcvtsd", NULL)      \
    CVT_VAR (f64_f32, N_F64, N_F32, N_VFP,       NULL,     "fcvtds", NULL)      \
@@ -17692,7 +19053,8 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
               || flavour == neon_cvt_flavour_s32_f32
               || flavour == neon_cvt_flavour_u32_f32))
         {
               || flavour == neon_cvt_flavour_s32_f32
               || flavour == neon_cvt_flavour_u32_f32))
         {
-         if (check_simd_pred_availability (1, NEON_CHECK_CC | NEON_CHECK_ARCH))
+         if (!check_simd_pred_availability (TRUE,
+                                            NEON_CHECK_CC | NEON_CHECK_ARCH))
             return;
         }
        else if (mode == neon_cvt_mode_n)
             return;
         }
        else if (mode == neon_cvt_mode_n)
@@ -17779,8 +19141,8 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
               || flavour == neon_cvt_flavour_s32_f32
               || flavour == neon_cvt_flavour_u32_f32))
         {
               || flavour == neon_cvt_flavour_s32_f32
               || flavour == neon_cvt_flavour_u32_f32))
         {
-         if (check_simd_pred_availability (1,
-                                           NEON_CHECK_CC | NEON_CHECK_ARCH8))
+         if (!check_simd_pred_availability (TRUE,
+                                            NEON_CHECK_CC | NEON_CHECK_ARCH8))
             return;
         }
        else if (mode == neon_cvt_mode_z
             return;
         }
        else if (mode == neon_cvt_mode_z
@@ -17793,8 +19155,8 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
                    || flavour == neon_cvt_flavour_s32_f32
                    || flavour == neon_cvt_flavour_u32_f32))
         {
                    || flavour == neon_cvt_flavour_s32_f32
                    || flavour == neon_cvt_flavour_u32_f32))
         {
-         if (check_simd_pred_availability (1,
-                                           NEON_CHECK_CC | NEON_CHECK_ARCH))
+         if (!check_simd_pred_availability (TRUE,
+                                            NEON_CHECK_CC | NEON_CHECK_ARCH))
             return;
         }
        /* fall through.  */
             return;
         }
        /* fall through.  */
@@ -17803,8 +19165,8 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
         {
  
           NEON_ENCODE (FLOAT, inst);
         {
  
           NEON_ENCODE (FLOAT, inst);
-         if (check_simd_pred_availability (1,
-                                           NEON_CHECK_CC | NEON_CHECK_ARCH8))
+         if (!check_simd_pred_availability (TRUE,
+                                            NEON_CHECK_CC | NEON_CHECK_ARCH8))
             return;
  
           inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
             return;
  
           inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
@@ -17881,8 +19243,21 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
           }
  
        if (rs == NS_DQ)
           }
  
        if (rs == NS_DQ)
-       inst.instruction = 0x3b60600;
+       {
+         if (flavour == neon_cvt_flavour_bf16_f32)
+           {
+             if (vfp_or_neon_is_neon (NEON_CHECK_ARCH8) == FAIL)
+               return;
+             constraint (!mark_feature_used (&arm_ext_bf16), _(BAD_BF16));
+             /* VCVT.bf16.f32.  */
+             inst.instruction = 0x11b60640;
+           }
+         else
+           /* VCVT.f16.f32.  */
+           inst.instruction = 0x3b60600;
+       }
        else
        else
+       /* VCVT.f32.f16.  */
         inst.instruction = 0x3b60700;
  
        inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
         inst.instruction = 0x3b60700;
  
        inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
@@ -17964,7 +19339,7 @@ do_neon_cvttb_1 (bfd_boolean t)
    else if (rs == NS_QQ || rs == NS_QQI)
      {
        int single_to_half = 0;
    else if (rs == NS_QQ || rs == NS_QQI)
      {
        int single_to_half = 0;
-      if (check_simd_pred_availability (1, NEON_CHECK_ARCH))
+      if (!check_simd_pred_availability (TRUE, NEON_CHECK_ARCH))
         return;
  
        enum neon_cvt_flavour flavour = get_neon_cvt_flavour (rs);
         return;
  
        enum neon_cvt_flavour flavour = get_neon_cvt_flavour (rs);
@@ -18032,6 +19407,14 @@ do_neon_cvttb_1 (bfd_boolean t)
        inst.error = NULL;
        do_neon_cvttb_2 (t, /*to=*/FALSE, /*is_double=*/TRUE);
      }
        inst.error = NULL;
        do_neon_cvttb_2 (t, /*to=*/FALSE, /*is_double=*/TRUE);
      }
+  else if (neon_check_type (2, rs, N_BF16 | N_VFP, N_F32).type != NT_invtype)
+    {
+      constraint (!mark_feature_used (&arm_ext_bf16), _(BAD_BF16));
+      inst.error = NULL;
+      inst.instruction |= (1 << 8);
+      inst.instruction &= ~(1 << 9);
+      do_neon_cvttb_2 (t, /*to=*/TRUE, /*is_double=*/FALSE);
+    }
    else
      return;
  }
    else
      return;
  }
@@ -18104,9 +19487,16 @@ neon_move_immediate (void)
  static void
  do_neon_mvn (void)
  {
  static void
  do_neon_mvn (void)
  {
+  if (!check_simd_pred_availability (FALSE, NEON_CHECK_CC | NEON_CHECK_ARCH))
+    return;
+
    if (inst.operands[1].isreg)
      {
    if (inst.operands[1].isreg)
      {
-      enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
+      enum neon_shape rs;
+      if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+       rs = neon_select_shape (NS_QQ, NS_NULL);
+      else
+       rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
  
        NEON_ENCODE (INTEGER, inst);
        inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
  
        NEON_ENCODE (INTEGER, inst);
        inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
@@ -18122,6 +19512,11 @@ do_neon_mvn (void)
      }
  
    neon_dp_fixup (&inst);
      }
  
    neon_dp_fixup (&inst);
+
+  if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+    {
+      constraint (!inst.operands[1].isreg && !inst.operands[0].isquad, BAD_FPU);
+    }
  }
  
  /* Encode instructions of form:
  }
  
  /* Encode instructions of form:
@@ -18269,16 +19664,6 @@ do_neon_fmac_maybe_scalar_long (int subtype)
       0x2.  */
    int size = -1;
  
       0x2.  */
    int size = -1;
  
-  if (inst.cond != COND_ALWAYS)
-    as_warn (_("vfmal/vfmsl with FP16 type cannot be conditional, the "
-              "behaviour is UNPREDICTABLE"));
-
-  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16_fml),
-             _(BAD_FP16));
-
-  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8),
-             _(BAD_FPU));
-
    /* vfmal/vfmsl are in three-same D/Q register format or the third operand can
       be a scalar index register.  */
    if (inst.operands[2].isscalar)
    /* vfmal/vfmsl are in three-same D/Q register format or the third operand can
       be a scalar index register.  */
    if (inst.operands[2].isscalar)
@@ -18297,7 +19682,16 @@ do_neon_fmac_maybe_scalar_long (int subtype)
        rs = neon_select_shape (NS_DHH, NS_QDD, NS_NULL);
      }
  
        rs = neon_select_shape (NS_DHH, NS_QDD, NS_NULL);
      }
  
-  neon_check_type (3, rs, N_EQK, N_EQK, N_KEY | N_F16);
+
+  if (inst.cond != COND_ALWAYS)
+    as_warn (_("vfmal/vfmsl with FP16 type cannot be conditional, the "
+              "behaviour is UNPREDICTABLE"));
+
+  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16_fml),
+             _(BAD_FP16));
+
+  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8),
+             _(BAD_FPU));
  
    /* "opcode" from template has included "ubit", so simply pass 0 here.  Also,
       the "S" bit in size field has been reused to differentiate vfmal and vfmsl,
  
    /* "opcode" from template has included "ubit", so simply pass 0 here.  Also,
       the "S" bit in size field has been reused to differentiate vfmal and vfmsl,
@@ -18434,14 +19828,29 @@ do_neon_ext (void)
  static void
  do_neon_rev (void)
  {
  static void
  do_neon_rev (void)
  {
-  enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
+  if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC))
+   return;
+
+  enum neon_shape rs;
+  if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+    rs = neon_select_shape (NS_QQ, NS_NULL);
+  else
+    rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
+
    struct neon_type_el et = neon_check_type (2, rs,
      N_EQK, N_8 | N_16 | N_32 | N_KEY);
    struct neon_type_el et = neon_check_type (2, rs,
      N_EQK, N_8 | N_16 | N_32 | N_KEY);
+
    unsigned op = (inst.instruction >> 7) & 3;
    /* N (width of reversed regions) is encoded as part of the bitmask. We
       extract it here to check the elements to be reversed are smaller.
       Otherwise we'd get a reserved instruction.  */
    unsigned elsize = (op == 2) ? 16 : (op == 1) ? 32 : (op == 0) ? 64 : 0;
    unsigned op = (inst.instruction >> 7) & 3;
    /* N (width of reversed regions) is encoded as part of the bitmask. We
       extract it here to check the elements to be reversed are smaller.
       Otherwise we'd get a reserved instruction.  */
    unsigned elsize = (op == 2) ? 16 : (op == 1) ? 32 : (op == 0) ? 64 : 0;
+
+  if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext) && elsize == 64
+      && inst.operands[0].reg == inst.operands[1].reg)
+    as_tsktsk (_("Warning: 64-bit element size and same destination and source"
+                " operands makes instruction UNPREDICTABLE"));
+
    gas_assert (elsize != 0);
    constraint (et.size >= elsize,
               _("elements must be smaller than reversal region"));
    gas_assert (elsize != 0);
    constraint (et.size >= elsize,
               _("elements must be smaller than reversal region"));
@@ -18484,7 +19893,7 @@ do_neon_dup (void)
         N_8 | N_16 | N_32 | N_KEY, N_EQK);
        if (rs == NS_QR)
         {
         N_8 | N_16 | N_32 | N_KEY, N_EQK);
        if (rs == NS_QR)
         {
-         if (check_simd_pred_availability (0, NEON_CHECK_ARCH))
+         if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH))
             return;
         }
        else
             return;
         }
        else
@@ -18641,7 +20050,13 @@ do_neon_mov (void)
        et = neon_check_type (2, rs, N_EQK, N_F64 | N_KEY);
        /* It is not an error here if no type is given.  */
        inst.error = NULL;
        et = neon_check_type (2, rs, N_EQK, N_F64 | N_KEY);
        /* It is not an error here if no type is given.  */
        inst.error = NULL;
-      if (et.type == NT_float && et.size == 64)
+
+      /* In MVE we interpret the following instructions as same, so ignoring
+        the following type (float) and size (64) checks.
+        a: VMOV<c><q> <Dd>, <Dm>
+        b: VMOV<c><q>.F64 <Dd>, <Dm>.  */
+      if ((et.type == NT_float && et.size == 64)
+         || (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)))
         {
           do_vfp_nsyn_opcode ("fcpyd");
           break;
         {
           do_vfp_nsyn_opcode ("fcpyd");
           break;
@@ -18650,7 +20065,8 @@ do_neon_mov (void)
  
      case NS_QQ:  /* case 0/1.  */
        {
  
      case NS_QQ:  /* case 0/1.  */
        {
-       if (check_simd_pred_availability (0, NEON_CHECK_CC | NEON_CHECK_ARCH))
+       if (!check_simd_pred_availability (FALSE,
+                                          NEON_CHECK_CC | NEON_CHECK_ARCH))
           return;
         /* The architecture manual I have doesn't explicitly state which
            value the U bit should have for register->register moves, but
           return;
         /* The architecture manual I have doesn't explicitly state which
            value the U bit should have for register->register moves, but
@@ -18680,7 +20096,8 @@ do_neon_mov (void)
        /* fall through.  */
  
      case NS_QI:  /* case 2/3.  */
        /* fall through.  */
  
      case NS_QI:  /* case 2/3.  */
-      if (check_simd_pred_availability (0, NEON_CHECK_CC | NEON_CHECK_ARCH))
+      if (!check_simd_pred_availability (FALSE,
+                                        NEON_CHECK_CC | NEON_CHECK_ARCH))
         return;
        inst.instruction = 0x0800010;
        neon_move_immediate ();
         return;
        inst.instruction = 0x0800010;
        neon_move_immediate ();
@@ -18985,8 +20402,22 @@ do_mve_movl (void)
  static void
  do_neon_rshift_round_imm (void)
  {
  static void
  do_neon_rshift_round_imm (void)
  {
-  enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
-  struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_SU_ALL | N_KEY);
+  if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC))
+   return;
+
+  enum neon_shape rs;
+  struct neon_type_el et;
+
+  if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+    {
+      rs = neon_select_shape (NS_QQI, NS_NULL);
+      et = neon_check_type (2, rs, N_EQK, N_SU_MVE | N_KEY);
+    }
+  else
+    {
+      rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
+      et = neon_check_type (2, rs, N_EQK, N_SU_ALL | N_KEY);
+    }
    int imm = inst.operands[2].imm;
  
    /* imm == 0 case is encoded as VMOV for V{R}SHR.  */
    int imm = inst.operands[2].imm;
  
    /* imm == 0 case is encoded as VMOV for V{R}SHR.  */
@@ -19068,7 +20499,14 @@ do_neon_zip_uzp (void)
  static void
  do_neon_sat_abs_neg (void)
  {
  static void
  do_neon_sat_abs_neg (void)
  {
-  enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
+  if (!check_simd_pred_availability (FALSE, NEON_CHECK_CC | NEON_CHECK_ARCH))
+    return;
+
+  enum neon_shape rs;
+  if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+    rs = neon_select_shape (NS_QQ, NS_NULL);
+  else
+    rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
    struct neon_type_el et = neon_check_type (2, rs,
      N_EQK, N_S8 | N_S16 | N_S32 | N_KEY);
    neon_two_same (neon_quad (rs), 1, et.size);
    struct neon_type_el et = neon_check_type (2, rs,
      N_EQK, N_S8 | N_S16 | N_S32 | N_KEY);
    neon_two_same (neon_quad (rs), 1, et.size);
@@ -19097,7 +20535,7 @@ do_neon_recip_est (void)
  static void
  do_neon_cls (void)
  {
  static void
  do_neon_cls (void)
  {
-  if (check_simd_pred_availability (0, NEON_CHECK_ARCH | NEON_CHECK_CC))
+  if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC))
      return;
  
    enum neon_shape rs;
      return;
  
    enum neon_shape rs;
@@ -19114,7 +20552,7 @@ do_neon_cls (void)
  static void
  do_neon_clz (void)
  {
  static void
  do_neon_clz (void)
  {
-  if (check_simd_pred_availability (0, NEON_CHECK_ARCH | NEON_CHECK_CC))
+  if (!check_simd_pred_availability (FALSE, NEON_CHECK_ARCH | NEON_CHECK_CC))
      return;
  
    enum neon_shape rs;
      return;
  
    enum neon_shape rs;
@@ -19171,6 +20609,9 @@ do_neon_tbl_tbx (void)
  static void
  do_neon_ldm_stm (void)
  {
  static void
  do_neon_ldm_stm (void)
  {
+  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd)
+             && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext),
+             _(BAD_FPU));
    /* P, U and L bits are part of bitmask.  */
    int is_dbmode = (inst.instruction & (1 << 24)) != 0;
    unsigned offsetbits = inst.operands[1].imm * 2;
    /* P, U and L bits are part of bitmask.  */
    int is_dbmode = (inst.instruction & (1 << 24)) != 0;
    unsigned offsetbits = inst.operands[1].imm * 2;
@@ -19198,6 +20639,49 @@ do_neon_ldm_stm (void)
    do_vfp_cond_or_thumb ();
  }
  
    do_vfp_cond_or_thumb ();
  }
  
+static void
+do_vfp_nsyn_pop (void)
+{
+  nsyn_insert_sp ();
+  if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) {
+    return do_vfp_nsyn_opcode ("vldm");
+  }
+
+  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd),
+             _(BAD_FPU));
+
+  constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16,
+             _("register list must contain at least 1 and at most 16 "
+               "registers"));
+
+  if (inst.operands[1].issingle)
+    do_vfp_nsyn_opcode ("fldmias");
+  else
+    do_vfp_nsyn_opcode ("fldmiad");
+}
+
+static void
+do_vfp_nsyn_push (void)
+{
+  nsyn_insert_sp ();
+  if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) {
+    return do_vfp_nsyn_opcode ("vstmdb");
+  }
+
+  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd),
+             _(BAD_FPU));
+
+  constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16,
+             _("register list must contain at least 1 and at most 16 "
+               "registers"));
+
+  if (inst.operands[1].issingle)
+    do_vfp_nsyn_opcode ("fstmdbs");
+  else
+    do_vfp_nsyn_opcode ("fstmdbd");
+}
+
+
  static void
  do_neon_ldr_str (void)
  {
  static void
  do_neon_ldr_str (void)
  {
@@ -19278,7 +20762,8 @@ do_vldr_vstr (void)
    /* VLDR/VSTR.  */
    else
      {
    /* VLDR/VSTR.  */
    else
      {
-      if (!mark_feature_used (&fpu_vfp_ext_v1xd))
+      if (!mark_feature_used (&fpu_vfp_ext_v1xd)
+         && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
         as_bad (_("Instruction not permitted on this architecture"));
        do_neon_ldr_str ();
      }
         as_bad (_("Instruction not permitted on this architecture"));
        do_neon_ldr_str ();
      }
@@ -19661,12 +21146,13 @@ do_vsel (void)
  static void
  do_vmaxnm (void)
  {
  static void
  do_vmaxnm (void)
  {
-  set_pred_insn_type (OUTSIDE_PRED_INSN);
+  if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))
+    set_pred_insn_type (OUTSIDE_PRED_INSN);
  
    if (try_vfp_nsyn (3, do_vfp_nsyn_fpv8) == SUCCESS)
      return;
  
  
    if (try_vfp_nsyn (3, do_vfp_nsyn_fpv8) == SUCCESS)
      return;
  
-  if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH8) == FAIL)
+  if (!check_simd_pred_availability (TRUE, NEON_CHECK_CC | NEON_CHECK_ARCH8))
      return;
  
    neon_dyadic_misc (NT_untyped, N_F_16_32, 0);
      return;
  
    neon_dyadic_misc (NT_untyped, N_F_16_32, 0);
@@ -19730,12 +21216,12 @@ do_vrint_1 (enum neon_cvt_mode mode)
        if (et.type == NT_invtype)
         return;
  
        if (et.type == NT_invtype)
         return;
  
-      set_pred_insn_type (OUTSIDE_PRED_INSN);
-      NEON_ENCODE (FLOAT, inst);
-
-      if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH8) == FAIL)
+      if (!check_simd_pred_availability (TRUE,
+                                        NEON_CHECK_CC | NEON_CHECK_ARCH8))
         return;
  
         return;
  
+      NEON_ENCODE (FLOAT, inst);
+
        inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
        inst.instruction |= HI1 (inst.operands[0].reg) << 22;
        inst.instruction |= LOW4 (inst.operands[1].reg);
        inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
        inst.instruction |= HI1 (inst.operands[0].reg) << 22;
        inst.instruction |= LOW4 (inst.operands[1].reg);
@@ -19834,7 +21320,8 @@ do_vcmla (void)
               _("immediate out of range"));
    rot /= 90;
  
               _("immediate out of range"));
    rot /= 90;
  
-  if (check_simd_pred_availability (1, NEON_CHECK_ARCH8 | NEON_CHECK_CC))
+  if (!check_simd_pred_availability (TRUE,
+                                    NEON_CHECK_ARCH8 | NEON_CHECK_CC))
      return;
  
    if (inst.operands[2].isscalar)
      return;
  
    if (inst.operands[2].isscalar)
@@ -19911,8 +21398,8 @@ do_vcadd (void)
    if (et.type == NT_invtype)
      return;
  
    if (et.type == NT_invtype)
      return;
  
-  if (check_simd_pred_availability (et.type == NT_float, NEON_CHECK_ARCH8
-                                   | NEON_CHECK_CC))
+  if (!check_simd_pred_availability (et.type == NT_float,
+                                    NEON_CHECK_ARCH8 | NEON_CHECK_CC))
      return;
  
    if (et.type == NT_float)
      return;
  
    if (et.type == NT_float)
@@ -20012,6 +21499,79 @@ do_neon_dotproduct_u (void)
    return do_neon_dotproduct (1);
  }
  
    return do_neon_dotproduct (1);
  }
  
+static void
+do_vusdot (void)
+{
+  enum neon_shape rs;
+  set_pred_insn_type (OUTSIDE_PRED_INSN);
+  if (inst.operands[2].isscalar)
+    {
+      rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
+      neon_check_type (3, rs, N_EQK, N_EQK, N_S8 | N_KEY);
+
+      inst.instruction |= (1 << 25);
+      int index = inst.operands[2].reg & 0xf;
+      constraint ((index != 1 && index != 0), _("index must be 0 or 1"));
+      inst.operands[2].reg >>= 4;
+      constraint (!(inst.operands[2].reg < 16),
+                 _("indexed register must be less than 16"));
+      neon_three_args (rs == NS_QQS);
+      inst.instruction |= (index << 5);
+    }
+  else
+    {
+      inst.instruction |= (1 << 21);
+      rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+      neon_check_type (3, rs, N_EQK, N_EQK, N_S8 | N_KEY);
+      neon_three_args (rs == NS_QQQ);
+    }
+}
+
+static void
+do_vsudot (void)
+{
+  enum neon_shape rs;
+  set_pred_insn_type (OUTSIDE_PRED_INSN);
+  if (inst.operands[2].isscalar)
+    {
+      rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
+      neon_check_type (3, rs, N_EQK, N_EQK, N_U8 | N_KEY);
+
+      inst.instruction |= (1 << 25);
+      int index = inst.operands[2].reg & 0xf;
+      constraint ((index != 1 && index != 0), _("index must be 0 or 1"));
+      inst.operands[2].reg >>= 4;
+      constraint (!(inst.operands[2].reg < 16),
+                 _("indexed register must be less than 16"));
+      neon_three_args (rs == NS_QQS);
+      inst.instruction |= (index << 5);
+    }
+}
+
+static void
+do_vsmmla (void)
+{
+  enum neon_shape rs = neon_select_shape (NS_QQQ, NS_NULL);
+  neon_check_type (3, rs, N_EQK, N_EQK, N_S8 | N_KEY);
+
+  set_pred_insn_type (OUTSIDE_PRED_INSN);
+
+  neon_three_args (1);
+
+}
+
+static void
+do_vummla (void)
+{
+  enum neon_shape rs = neon_select_shape (NS_QQQ, NS_NULL);
+  neon_check_type (3, rs, N_EQK, N_EQK, N_U8 | N_KEY);
+
+  set_pred_insn_type (OUTSIDE_PRED_INSN);
+
+  neon_three_args (1);
+
+}
+
  /* Crypto v1 instructions.  */
  static void
  do_crypto_2op_1 (unsigned elttype, int op)
  /* Crypto v1 instructions.  */
  static void
  do_crypto_2op_1 (unsigned elttype, int op)
@@ -20201,6 +21761,46 @@ do_vjcvt (void)
    do_vfp_cond_or_thumb ();
  }
  
    do_vfp_cond_or_thumb ();
  }
  
+static void
+do_vdot (void)
+{
+  enum neon_shape rs;
+  constraint (!mark_feature_used (&fpu_neon_ext_armv8), _(BAD_FPU));
+  set_pred_insn_type (OUTSIDE_PRED_INSN);
+  if (inst.operands[2].isscalar)
+    {
+      rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
+      neon_check_type (3, rs, N_EQK, N_EQK, N_BF16 | N_KEY);
+
+      inst.instruction |= (1 << 25);
+      int index = inst.operands[2].reg & 0xf;
+      constraint ((index != 1 && index != 0), _("index must be 0 or 1"));
+      inst.operands[2].reg >>= 4;
+      constraint (!(inst.operands[2].reg < 16),
+                 _("indexed register must be less than 16"));
+      neon_three_args (rs == NS_QQS);
+      inst.instruction |= (index << 5);
+    }
+  else
+    {
+      rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+      neon_check_type (3, rs, N_EQK, N_EQK, N_BF16 | N_KEY);
+      neon_three_args (rs == NS_QQQ);
+    }
+}
+
+static void
+do_vmmla (void)
+{
+  enum neon_shape rs = neon_select_shape (NS_QQQ, NS_NULL);
+  neon_check_type (3, rs, N_EQK, N_EQK, N_BF16 | N_KEY);
+
+  constraint (!mark_feature_used (&fpu_neon_ext_armv8), _(BAD_FPU));
+  set_pred_insn_type (OUTSIDE_PRED_INSN);
+
+  neon_three_args (1);
+}
+
  \f
  /* Overall per-instruction processing. */
  
  \f
  /* Overall per-instruction processing. */
  
@@ -21189,6 +22789,7 @@ it_fsm_post_encode (void)
      handle_pred_state ();
  
    if (now_pred.insn_cond
      handle_pred_state ();
  
    if (now_pred.insn_cond
+      && warn_on_restrict_it
        && !now_pred.warn_deprecated
        && warn_on_deprecated
        && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8)
        && !now_pred.warn_deprecated
        && warn_on_deprecated
        && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8)
@@ -21589,7 +23190,7 @@ arm_frob_label (symbolS * sym)
       out of the jump table, and chaos would ensue.  */
    if (label_is_thumb_function_name
        && (S_GET_NAME (sym)[0] != '.' || S_GET_NAME (sym)[1] != 'L')
       out of the jump table, and chaos would ensue.  */
    if (label_is_thumb_function_name
        && (S_GET_NAME (sym)[0] != '.' || S_GET_NAME (sym)[1] != 'L')
-      && (bfd_get_section_flags (stdoutput, now_seg) & SEC_CODE) != 0)
+      && (bfd_section_flags (now_seg) & SEC_CODE) != 0)
      {
        /* When the address of a Thumb function is taken the bottom
          bit of that address should be set.  This will allow
      {
        /* When the address of a Thumb function is taken the bottom
          bit of that address should be set.  This will allow
@@ -21749,6 +23350,10 @@ static const struct reg_entry reg_names[] =
    REGDEF(mvfr0,7,VFC), REGDEF(mvfr1,6,VFC),
    REGDEF(MVFR0,7,VFC), REGDEF(MVFR1,6,VFC),
    REGDEF(mvfr2,5,VFC), REGDEF(MVFR2,5,VFC),
    REGDEF(mvfr0,7,VFC), REGDEF(mvfr1,6,VFC),
    REGDEF(MVFR0,7,VFC), REGDEF(MVFR1,6,VFC),
    REGDEF(mvfr2,5,VFC), REGDEF(MVFR2,5,VFC),
+  REGDEF(fpscr_nzcvqc,2,VFC), REGDEF(FPSCR_nzcvqc,2,VFC),
+  REGDEF(vpr,12,VFC), REGDEF(VPR,12,VFC),
+  REGDEF(fpcxt_ns,14,VFC), REGDEF(FPCXT_NS,14,VFC),
+  REGDEF(fpcxt_s,15,VFC), REGDEF(FPCXT_S,15,VFC),
  
    /* Maverick DSP coprocessor registers.  */
    REGSET(mvf,MVF),  REGSET(mvd,MVD),  REGSET(mvfx,MVFX),  REGSET(mvdx,MVDX),
  
    /* Maverick DSP coprocessor registers.  */
    REGSET(mvf,MVF),  REGSET(mvd,MVD),  REGSET(mvfx,MVFX),  REGSET(mvdx,MVDX),
@@ -22869,15 +24474,13 @@ static const struct asm_opcode insns[] =
    nUF(vselvs, _vselvs, 3, (RVSD, RVSD, RVSD),          vsel),
    nUF(vselge, _vselge, 3, (RVSD, RVSD, RVSD),          vsel),
    nUF(vselgt, _vselgt, 3, (RVSD, RVSD, RVSD),          vsel),
    nUF(vselvs, _vselvs, 3, (RVSD, RVSD, RVSD),          vsel),
    nUF(vselge, _vselge, 3, (RVSD, RVSD, RVSD),          vsel),
    nUF(vselgt, _vselgt, 3, (RVSD, RVSD, RVSD),          vsel),
-  nUF(vmaxnm, _vmaxnm, 3, (RNSDQ, oRNSDQ, RNSDQ),      vmaxnm),
-  nUF(vminnm, _vminnm, 3, (RNSDQ, oRNSDQ, RNSDQ),      vmaxnm),
    nCE(vrintr, _vrintr, 2, (RNSDQ, oRNSDQ),             vrintr),
    nCE(vrintr, _vrintr, 2, (RNSDQ, oRNSDQ),             vrintr),
-  nCE(vrintz, _vrintr, 2, (RNSDQ, oRNSDQ),             vrintz),
-  nCE(vrintx, _vrintr, 2, (RNSDQ, oRNSDQ),             vrintx),
-  nUF(vrinta, _vrinta, 2, (RNSDQ, oRNSDQ),             vrinta),
-  nUF(vrintn, _vrinta, 2, (RNSDQ, oRNSDQ),             vrintn),
-  nUF(vrintp, _vrinta, 2, (RNSDQ, oRNSDQ),             vrintp),
-  nUF(vrintm, _vrinta, 2, (RNSDQ, oRNSDQ),             vrintm),
+  mnCE(vrintz, _vrintr, 2, (RNSDQMQ, oRNSDQMQ),                vrintz),
+  mnCE(vrintx, _vrintr, 2, (RNSDQMQ, oRNSDQMQ),                vrintx),
+  mnUF(vrinta, _vrinta, 2, (RNSDQMQ, oRNSDQMQ),                vrinta),
+  mnUF(vrintn, _vrinta, 2, (RNSDQMQ, oRNSDQMQ),                vrintn),
+  mnUF(vrintp, _vrinta, 2, (RNSDQMQ, oRNSDQMQ),                vrintp),
+  mnUF(vrintm, _vrinta, 2, (RNSDQMQ, oRNSDQMQ),                vrintm),
  
    /* Crypto v1 extensions.  */
  #undef  ARM_VARIANT
  
    /* Crypto v1 extensions.  */
  #undef  ARM_VARIANT
@@ -22901,9 +24504,9 @@ static const struct asm_opcode insns[] =
    nUF(sha256su0, _sha2op, 2, (RNQ, RNQ), sha256su0),
  
  #undef  ARM_VARIANT
    nUF(sha256su0, _sha2op, 2, (RNQ, RNQ), sha256su0),
  
  #undef  ARM_VARIANT
-#define ARM_VARIANT   & crc_ext_armv8
+#define ARM_VARIANT   & arm_ext_crc
  #undef  THUMB_VARIANT
  #undef  THUMB_VARIANT
-#define THUMB_VARIANT & crc_ext_armv8
+#define THUMB_VARIANT & arm_ext_crc
    TUEc("crc32b", 1000040, fac0f080, 3, (RR, oRR, RR), crc32b),
    TUEc("crc32h", 1200040, fac0f090, 3, (RR, oRR, RR), crc32h),
    TUEc("crc32w", 1400040, fac0f0a0, 3, (RR, oRR, RR), crc32w),
    TUEc("crc32b", 1000040, fac0f080, 3, (RR, oRR, RR), crc32b),
    TUEc("crc32h", 1200040, fac0f090, 3, (RR, oRR, RR), crc32h),
    TUEc("crc32w", 1400040, fac0f0a0, 3, (RR, oRR, RR), crc32w),
@@ -23378,11 +24981,18 @@ static const struct asm_opcode insns[] =
  
  #undef  ARM_VARIANT
  #define ARM_VARIANT  & fpu_vfp_ext_v1xd  /* VFP V1xD (single precision).  */
  
  #undef  ARM_VARIANT
  #define ARM_VARIANT  & fpu_vfp_ext_v1xd  /* VFP V1xD (single precision).  */
+#undef THUMB_VARIANT
+#define THUMB_VARIANT  & arm_ext_v6t2
+ mcCE(vmrs,    ef00a10, 2, (APSR_RR, RVC),   vmrs),
+ mcCE(vmsr,    ee00a10, 2, (RVC, RR),        vmsr),
+ mcCE(fldd,    d100b00, 2, (RVD, ADDRGLDC),  vfp_dp_ldst),
+ mcCE(fstd,    d000b00, 2, (RVD, ADDRGLDC),  vfp_dp_ldst),
+ mcCE(flds,    d100a00, 2, (RVS, ADDRGLDC),  vfp_sp_ldst),
+ mcCE(fsts,    d000a00, 2, (RVS, ADDRGLDC),  vfp_sp_ldst),
+#undef THUMB_VARIANT
  
    /* Moves and type conversions.  */
   cCE("fmstat", ef1fa10, 0, (),               noargs),
  
    /* Moves and type conversions.  */
   cCE("fmstat", ef1fa10, 0, (),               noargs),
- cCE("vmrs",   ef00a10, 2, (APSR_RR, RVC),   vmrs),
- cCE("vmsr",   ee00a10, 2, (RVC, RR),        vmsr),
   cCE("fsitos", eb80ac0, 2, (RVS, RVS),       vfp_sp_monadic),
   cCE("fuitos", eb80a40, 2, (RVS, RVS),       vfp_sp_monadic),
   cCE("ftosis", ebd0a40, 2, (RVS, RVS),       vfp_sp_monadic),
   cCE("fsitos", eb80ac0, 2, (RVS, RVS),       vfp_sp_monadic),
   cCE("fuitos", eb80a40, 2, (RVS, RVS),       vfp_sp_monadic),
   cCE("ftosis", ebd0a40, 2, (RVS, RVS),       vfp_sp_monadic),
@@ -23393,8 +25003,6 @@ static const struct asm_opcode insns[] =
   cCE("fmxr",   ee00a10, 2, (RVC, RR),        rn_rd),
  
    /* Memory operations.         */
   cCE("fmxr",   ee00a10, 2, (RVC, RR),        rn_rd),
  
    /* Memory operations.         */
- cCE("flds",   d100a00, 2, (RVS, ADDRGLDC),  vfp_sp_ldst),
- cCE("fsts",   d000a00, 2, (RVS, ADDRGLDC),  vfp_sp_ldst),
   cCE("fldmias",        c900a00, 2, (RRnpctw, VRSLST),    vfp_sp_ldstmia),
   cCE("fldmfds",        c900a00, 2, (RRnpctw, VRSLST),    vfp_sp_ldstmia),
   cCE("fldmdbs",        d300a00, 2, (RRnpctw, VRSLST),    vfp_sp_ldstmdb),
   cCE("fldmias",        c900a00, 2, (RRnpctw, VRSLST),    vfp_sp_ldstmia),
   cCE("fldmfds",        c900a00, 2, (RRnpctw, VRSLST),    vfp_sp_ldstmia),
   cCE("fldmdbs",        d300a00, 2, (RRnpctw, VRSLST),    vfp_sp_ldstmdb),
@@ -23436,8 +25044,6 @@ static const struct asm_opcode insns[] =
  
   /* Double precision load/store are still present on single precision
      implementations.  */
  
   /* Double precision load/store are still present on single precision
      implementations.  */
- cCE("fldd",   d100b00, 2, (RVD, ADDRGLDC),  vfp_dp_ldst),
- cCE("fstd",   d000b00, 2, (RVD, ADDRGLDC),  vfp_dp_ldst),
   cCE("fldmiad",        c900b00, 2, (RRnpctw, VRDLST),    vfp_dp_ldstmia),
   cCE("fldmfdd",        c900b00, 2, (RRnpctw, VRDLST),    vfp_dp_ldstmia),
   cCE("fldmdbd",        d300b00, 2, (RRnpctw, VRDLST),    vfp_dp_ldstmdb),
   cCE("fldmiad",        c900b00, 2, (RRnpctw, VRDLST),    vfp_dp_ldstmia),
   cCE("fldmfdd",        c900b00, 2, (RRnpctw, VRDLST),    vfp_dp_ldstmia),
   cCE("fldmdbd",        d300b00, 2, (RRnpctw, VRDLST),    vfp_dp_ldstmdb),
@@ -23490,6 +25096,19 @@ static const struct asm_opcode insns[] =
     Individual encoder functions perform additional architecture checks.  */
  #undef  ARM_VARIANT
  #define ARM_VARIANT    & fpu_vfp_ext_v1xd
     Individual encoder functions perform additional architecture checks.  */
  #undef  ARM_VARIANT
  #define ARM_VARIANT    & fpu_vfp_ext_v1xd
+#undef  THUMB_VARIANT
+#define THUMB_VARIANT  & arm_ext_v6t2
+
+ NCE(vldm,      c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
+ NCE(vldmia,    c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
+ NCE(vldmdb,    d100b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
+ NCE(vstm,      c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
+ NCE(vstmia,    c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
+ NCE(vstmdb,    d000b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
+
+ NCE(vpop,      0,       1, (VRSDLST),          vfp_nsyn_pop),
+ NCE(vpush,     0,       1, (VRSDLST),          vfp_nsyn_push),
+
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT  & fpu_vfp_ext_v1xd
  
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT  & fpu_vfp_ext_v1xd
  
@@ -23499,22 +25118,11 @@ static const struct asm_opcode insns[] =
   nCE(vnmul,     _vnmul,   3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
   nCE(vnmla,     _vnmla,   3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
   nCE(vnmls,     _vnmls,   3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
   nCE(vnmul,     _vnmul,   3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
   nCE(vnmla,     _vnmla,   3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
   nCE(vnmls,     _vnmls,   3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
- NCE(vpush,     0,       1, (VRSDLST),          vfp_nsyn_push),
- NCE(vpop,      0,       1, (VRSDLST),          vfp_nsyn_pop),
   NCE(vcvtz,     0,       2, (RVSD, RVSD),       vfp_nsyn_cvtz),
  
    /* Mnemonics shared by Neon and VFP.  */
   NCE(vcvtz,     0,       2, (RVSD, RVSD),       vfp_nsyn_cvtz),
  
    /* Mnemonics shared by Neon and VFP.  */
- nCEF(vmul,     _vmul,    3, (RNSDQ, oRNSDQ, RNSDQ_RNSC), neon_mul),
- nCEF(vmla,     _vmla,    3, (RNSDQ, oRNSDQ, RNSDQ_RNSC), neon_mac_maybe_scalar),
   nCEF(vmls,     _vmls,    3, (RNSDQ, oRNSDQ, RNSDQ_RNSC), neon_mac_maybe_scalar),
  
   nCEF(vmls,     _vmls,    3, (RNSDQ, oRNSDQ, RNSDQ_RNSC), neon_mac_maybe_scalar),
  
- NCE(vldm,      c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
- NCE(vldmia,    c900b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
- NCE(vldmdb,    d100b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
- NCE(vstm,      c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
- NCE(vstmia,    c800b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
- NCE(vstmdb,    d000b00, 2, (RRnpctw, VRSDLST), neon_ldm_stm),
-
   mnCEF(vcvt,     _vcvt,   3, (RNSDQMQ, RNSDQMQ, oI32z), neon_cvt),
   nCEF(vcvtr,    _vcvt,   2, (RNSDQ, RNSDQ), neon_cvtr),
   MNCEF(vcvtb,  eb20a40, 3, (RVSDMQ, RVSDMQ, oI32b), neon_cvtb),
   mnCEF(vcvt,     _vcvt,   3, (RNSDQMQ, RNSDQMQ, oI32z), neon_cvt),
   nCEF(vcvtr,    _vcvt,   2, (RNSDQ, RNSDQ), neon_cvtr),
   MNCEF(vcvtb,  eb20a40, 3, (RVSDMQ, RVSDMQ, oI32b), neon_cvtb),
@@ -23543,8 +25151,8 @@ static const struct asm_opcode insns[] =
   NCE (vins,      eb00ac0,       2, (RVS, RVS), neon_movhf),
  
   /* New backported fma/fms instructions optional in v8.2.  */
   NCE (vins,      eb00ac0,       2, (RVS, RVS), neon_movhf),
  
   /* New backported fma/fms instructions optional in v8.2.  */
- NCE (vfmal, 810, 3, (RNDQ, RNSD, RNSD_RNSC), neon_vfmal),
- NCE (vfmsl, 810, 3, (RNDQ, RNSD, RNSD_RNSC), neon_vfmsl),
+ NUF (vfmsl, 810, 3, (RNDQ, RNSD, RNSD_RNSC), neon_vfmsl),
+ NUF (vfmal, 810, 3, (RNDQ, RNSD, RNSD_RNSC), neon_vfmal),
  
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT  & fpu_neon_ext_v1
  
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT  & fpu_neon_ext_v1
@@ -23555,28 +25163,19 @@ static const struct asm_opcode insns[] =
    /* integer ops, valid types S8 S16 S32 U8 U16 U32.  */
   NUF(vaba,      0000710, 3, (RNDQ, RNDQ,  RNDQ), neon_dyadic_i_su),
   NUF(vabaq,     0000710, 3, (RNQ,  RNQ,   RNQ),  neon_dyadic_i_su),
    /* integer ops, valid types S8 S16 S32 U8 U16 U32.  */
   NUF(vaba,      0000710, 3, (RNDQ, RNDQ,  RNDQ), neon_dyadic_i_su),
   NUF(vabaq,     0000710, 3, (RNQ,  RNQ,   RNQ),  neon_dyadic_i_su),
- NUF(vhadd,     0000000, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i_su),
   NUF(vhaddq,    0000000, 3, (RNQ,  oRNQ,  RNQ),  neon_dyadic_i_su),
   NUF(vhaddq,    0000000, 3, (RNQ,  oRNQ,  RNQ),  neon_dyadic_i_su),
- NUF(vrhadd,    0000100, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i_su),
   NUF(vrhaddq,   0000100, 3, (RNQ,  oRNQ,  RNQ),  neon_dyadic_i_su),
   NUF(vrhaddq,   0000100, 3, (RNQ,  oRNQ,  RNQ),  neon_dyadic_i_su),
- NUF(vhsub,     0000200, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i_su),
   NUF(vhsubq,    0000200, 3, (RNQ,  oRNQ,  RNQ),  neon_dyadic_i_su),
    /* integer ops, valid types S8 S16 S32 S64 U8 U16 U32 U64.  */
   NUF(vhsubq,    0000200, 3, (RNQ,  oRNQ,  RNQ),  neon_dyadic_i_su),
    /* integer ops, valid types S8 S16 S32 S64 U8 U16 U32 U64.  */
- NUF(vqadd,     0000010, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i64_su),
   NUF(vqaddq,    0000010, 3, (RNQ,  oRNQ,  RNQ),  neon_dyadic_i64_su),
   NUF(vqaddq,    0000010, 3, (RNQ,  oRNQ,  RNQ),  neon_dyadic_i64_su),
- NUF(vqsub,     0000210, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i64_su),
   NUF(vqsubq,    0000210, 3, (RNQ,  oRNQ,  RNQ),  neon_dyadic_i64_su),
   NUF(vqsubq,    0000210, 3, (RNQ,  oRNQ,  RNQ),  neon_dyadic_i64_su),
- NUF(vrshl,     0000500, 3, (RNDQ, oRNDQ, RNDQ), neon_rshl),
   NUF(vrshlq,    0000500, 3, (RNQ,  oRNQ,  RNQ),  neon_rshl),
   NUF(vrshlq,    0000500, 3, (RNQ,  oRNQ,  RNQ),  neon_rshl),
- NUF(vqrshl,    0000510, 3, (RNDQ, oRNDQ, RNDQ), neon_rshl),
   NUF(vqrshlq,   0000510, 3, (RNQ,  oRNQ,  RNQ),  neon_rshl),
    /* If not immediate, fall back to neon_dyadic_i64_su.
   NUF(vqrshlq,   0000510, 3, (RNQ,  oRNQ,  RNQ),  neon_rshl),
    /* If not immediate, fall back to neon_dyadic_i64_su.
-     shl_imm should accept I8 I16 I32 I64,
-     qshl_imm should accept S8 S16 S32 S64 U8 U16 U32 U64.  */
- nUF(vshl,      _vshl,    3, (RNDQ, oRNDQ, RNDQ_I63b), neon_shl_imm),
- nUF(vshlq,     _vshl,    3, (RNQ,  oRNQ,  RNDQ_I63b), neon_shl_imm),
- nUF(vqshl,     _vqshl,   3, (RNDQ, oRNDQ, RNDQ_I63b), neon_qshl_imm),
- nUF(vqshlq,    _vqshl,   3, (RNQ,  oRNQ,  RNDQ_I63b), neon_qshl_imm),
+     shl should accept I8 I16 I32 I64,
+     qshl should accept S8 S16 S32 S64 U8 U16 U32 U64.  */
+ nUF(vshlq,     _vshl,    3, (RNQ,  oRNQ,  RNDQ_I63b), neon_shl),
+ nUF(vqshlq,    _vqshl,   3, (RNQ,  oRNQ,  RNDQ_I63b), neon_qshl),
    /* Logic ops, types optional & ignored.  */
   nUF(vandq,     _vand,    3, (RNQ,  oRNQ,  RNDQ_Ibig), neon_logic),
   nUF(vbicq,     _vbic,    3, (RNQ,  oRNQ,  RNDQ_Ibig), neon_logic),
    /* Logic ops, types optional & ignored.  */
   nUF(vandq,     _vand,    3, (RNQ,  oRNQ,  RNDQ_Ibig), neon_logic),
   nUF(vbicq,     _vbic,    3, (RNQ,  oRNQ,  RNDQ_Ibig), neon_logic),
@@ -23592,9 +25191,7 @@ static const struct asm_opcode insns[] =
   NUF(vbifq,     1300110, 3, (RNQ,  RNQ,  RNQ),  neon_bitfield),
    /* Int and float variants, types S8 S16 S32 U8 U16 U32 F16 F32.  */
   nUF(vabdq,     _vabd,    3, (RNQ,  oRNQ,  RNQ),  neon_dyadic_if_su),
   NUF(vbifq,     1300110, 3, (RNQ,  RNQ,  RNQ),  neon_bitfield),
    /* Int and float variants, types S8 S16 S32 U8 U16 U32 F16 F32.  */
   nUF(vabdq,     _vabd,    3, (RNQ,  oRNQ,  RNQ),  neon_dyadic_if_su),
- nUF(vmax,      _vmax,    3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_if_su),
   nUF(vmaxq,     _vmax,    3, (RNQ,  oRNQ,  RNQ),  neon_dyadic_if_su),
   nUF(vmaxq,     _vmax,    3, (RNQ,  oRNQ,  RNQ),  neon_dyadic_if_su),
- nUF(vmin,      _vmin,    3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_if_su),
   nUF(vminq,     _vmin,    3, (RNQ,  oRNQ,  RNQ),  neon_dyadic_if_su),
    /* Comparisons. Types S8 S16 S32 U8 U16 U32 F32. Non-immediate versions fall
       back to neon_dyadic_if_su.  */
   nUF(vminq,     _vmin,    3, (RNQ,  oRNQ,  RNQ),  neon_dyadic_if_su),
    /* Comparisons. Types S8 S16 S32 U8 U16 U32 F32. Non-immediate versions fall
       back to neon_dyadic_if_su.  */
@@ -23625,9 +25222,7 @@ static const struct asm_opcode insns[] =
    /* VMUL takes I8 I16 I32 F32 P8.  */
   nUF(vmulq,     _vmul,     3, (RNQ,  oRNQ,  RNDQ_RNSC), neon_mul),
    /* VQD{R}MULH takes S16 S32.  */
    /* VMUL takes I8 I16 I32 F32 P8.  */
   nUF(vmulq,     _vmul,     3, (RNQ,  oRNQ,  RNDQ_RNSC), neon_mul),
    /* VQD{R}MULH takes S16 S32.  */
- nUF(vqdmulh,   _vqdmulh,  3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qdmulh),
   nUF(vqdmulhq,  _vqdmulh,  3, (RNQ,  oRNQ,  RNDQ_RNSC), neon_qdmulh),
   nUF(vqdmulhq,  _vqdmulh,  3, (RNQ,  oRNQ,  RNDQ_RNSC), neon_qdmulh),
- nUF(vqrdmulh,  _vqrdmulh, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qdmulh),
   nUF(vqrdmulhq, _vqrdmulh, 3, (RNQ,  oRNQ,  RNDQ_RNSC), neon_qdmulh),
   NUF(vacge,     0000e10,  3, (RNDQ, oRNDQ, RNDQ), neon_fcmp_absolute),
   NUF(vacgeq,    0000e10,  3, (RNQ,  oRNQ,  RNQ),  neon_fcmp_absolute),
   nUF(vqrdmulhq, _vqrdmulh, 3, (RNQ,  oRNQ,  RNDQ_RNSC), neon_qdmulh),
   NUF(vacge,     0000e10,  3, (RNDQ, oRNDQ, RNDQ), neon_fcmp_absolute),
   NUF(vacgeq,    0000e10,  3, (RNQ,  oRNQ,  RNQ),  neon_fcmp_absolute),
@@ -23642,7 +25237,6 @@ static const struct asm_opcode insns[] =
   NUF(vrsqrts,   0200f10,  3, (RNDQ, oRNDQ, RNDQ), neon_step),
   NUF(vrsqrtsq,  0200f10,  3, (RNQ,  oRNQ,  RNQ),  neon_step),
   /* ARM v8.1 extension.  */
   NUF(vrsqrts,   0200f10,  3, (RNDQ, oRNDQ, RNDQ), neon_step),
   NUF(vrsqrtsq,  0200f10,  3, (RNQ,  oRNQ,  RNQ),  neon_step),
   /* ARM v8.1 extension.  */
- nUF (vqrdmlah,  _vqrdmlah, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qrdmlah),
   nUF (vqrdmlahq, _vqrdmlah, 3, (RNQ,  oRNQ,  RNDQ_RNSC), neon_qrdmlah),
   nUF (vqrdmlsh,  _vqrdmlsh, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qrdmlah),
   nUF (vqrdmlshq, _vqrdmlsh, 3, (RNQ,  oRNQ,  RNDQ_RNSC), neon_qrdmlah),
   nUF (vqrdmlahq, _vqrdmlah, 3, (RNQ,  oRNQ,  RNDQ_RNSC), neon_qrdmlah),
   nUF (vqrdmlsh,  _vqrdmlsh, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qrdmlah),
   nUF (vqrdmlshq, _vqrdmlsh, 3, (RNQ,  oRNQ,  RNDQ_RNSC), neon_qrdmlah),
@@ -23654,21 +25248,16 @@ static const struct asm_opcode insns[] =
    /* Data processing with two registers and a shift amount.  */
    /* Right shifts, and variants with rounding.
       Types accepted S8 S16 S32 S64 U8 U16 U32 U64.  */
    /* Data processing with two registers and a shift amount.  */
    /* Right shifts, and variants with rounding.
       Types accepted S8 S16 S32 S64 U8 U16 U32 U64.  */
- NUF(vshr,      0800010, 3, (RNDQ, oRNDQ, I64z), neon_rshift_round_imm),
   NUF(vshrq,     0800010, 3, (RNQ,  oRNQ,  I64z), neon_rshift_round_imm),
   NUF(vshrq,     0800010, 3, (RNQ,  oRNQ,  I64z), neon_rshift_round_imm),
- NUF(vrshr,     0800210, 3, (RNDQ, oRNDQ, I64z), neon_rshift_round_imm),
   NUF(vrshrq,    0800210, 3, (RNQ,  oRNQ,  I64z), neon_rshift_round_imm),
   NUF(vsra,      0800110, 3, (RNDQ, oRNDQ, I64),  neon_rshift_round_imm),
   NUF(vsraq,     0800110, 3, (RNQ,  oRNQ,  I64),  neon_rshift_round_imm),
   NUF(vrsra,     0800310, 3, (RNDQ, oRNDQ, I64),  neon_rshift_round_imm),
   NUF(vrsraq,    0800310, 3, (RNQ,  oRNQ,  I64),  neon_rshift_round_imm),
    /* Shift and insert. Sizes accepted 8 16 32 64.  */
   NUF(vrshrq,    0800210, 3, (RNQ,  oRNQ,  I64z), neon_rshift_round_imm),
   NUF(vsra,      0800110, 3, (RNDQ, oRNDQ, I64),  neon_rshift_round_imm),
   NUF(vsraq,     0800110, 3, (RNQ,  oRNQ,  I64),  neon_rshift_round_imm),
   NUF(vrsra,     0800310, 3, (RNDQ, oRNDQ, I64),  neon_rshift_round_imm),
   NUF(vrsraq,    0800310, 3, (RNQ,  oRNQ,  I64),  neon_rshift_round_imm),
    /* Shift and insert. Sizes accepted 8 16 32 64.  */
- NUF(vsli,      1800510, 3, (RNDQ, oRNDQ, I63), neon_sli),
   NUF(vsliq,     1800510, 3, (RNQ,  oRNQ,  I63), neon_sli),
   NUF(vsliq,     1800510, 3, (RNQ,  oRNQ,  I63), neon_sli),
- NUF(vsri,      1800410, 3, (RNDQ, oRNDQ, I64), neon_sri),
   NUF(vsriq,     1800410, 3, (RNQ,  oRNQ,  I64), neon_sri),
    /* QSHL{U} immediate accepts S8 S16 S32 S64 U8 U16 U32 U64.  */
   NUF(vsriq,     1800410, 3, (RNQ,  oRNQ,  I64), neon_sri),
    /* QSHL{U} immediate accepts S8 S16 S32 S64 U8 U16 U32 U64.  */
- NUF(vqshlu,    1800610, 3, (RNDQ, oRNDQ, I63), neon_qshlu_imm),
   NUF(vqshluq,   1800610, 3, (RNQ,  oRNQ,  I63), neon_qshlu_imm),
    /* Right shift immediate, saturating & narrowing, with rounding variants.
       Types accepted S16 S32 S64 U16 U32 U64.  */
   NUF(vqshluq,   1800610, 3, (RNQ,  oRNQ,  I63), neon_qshlu_imm),
    /* Right shift immediate, saturating & narrowing, with rounding variants.
       Types accepted S16 S32 S64 U16 U32 U64.  */
@@ -23685,7 +25274,6 @@ static const struct asm_opcode insns[] =
    /* CVT with optional immediate for fixed-point variant.  */
   nUF(vcvtq,     _vcvt,    3, (RNQ, RNQ, oI32b), neon_cvt),
  
    /* CVT with optional immediate for fixed-point variant.  */
   nUF(vcvtq,     _vcvt,    3, (RNQ, RNQ, oI32b), neon_cvt),
  
- nUF(vmvn,      _vmvn,    2, (RNDQ, RNDQ_Ibig), neon_mvn),
   nUF(vmvnq,     _vmvn,    2, (RNQ,  RNDQ_Ibig), neon_mvn),
  
    /* Data processing, three registers of different lengths.  */
   nUF(vmvnq,     _vmvn,    2, (RNQ,  RNDQ_Ibig), neon_mvn),
  
    /* Data processing, three registers of different lengths.  */
@@ -23717,11 +25305,8 @@ static const struct asm_opcode insns[] =
  
    /* Two registers, miscellaneous.  */
    /* Reverse. Sizes 8 16 32 (must be < size in opcode).  */
  
    /* Two registers, miscellaneous.  */
    /* Reverse. Sizes 8 16 32 (must be < size in opcode).  */
- NUF(vrev64,    1b00000, 2, (RNDQ, RNDQ),     neon_rev),
   NUF(vrev64q,   1b00000, 2, (RNQ,  RNQ),      neon_rev),
   NUF(vrev64q,   1b00000, 2, (RNQ,  RNQ),      neon_rev),
- NUF(vrev32,    1b00080, 2, (RNDQ, RNDQ),     neon_rev),
   NUF(vrev32q,   1b00080, 2, (RNQ,  RNQ),      neon_rev),
   NUF(vrev32q,   1b00080, 2, (RNQ,  RNQ),      neon_rev),
- NUF(vrev16,    1b00100, 2, (RNDQ, RNDQ),     neon_rev),
   NUF(vrev16q,   1b00100, 2, (RNQ,  RNQ),      neon_rev),
    /* Vector replicate. Sizes 8 16 32.  */
   nCE(vdupq,     _vdup,    2, (RNQ,  RR_RNSC),  neon_dup),
   NUF(vrev16q,   1b00100, 2, (RNQ,  RNQ),      neon_rev),
    /* Vector replicate. Sizes 8 16 32.  */
   nCE(vdupq,     _vdup,    2, (RNQ,  RR_RNSC),  neon_dup),
@@ -23739,9 +25324,7 @@ static const struct asm_opcode insns[] =
   NUF(vuzp,      1b20100, 2, (RNDQ, RNDQ),     neon_zip_uzp),
   NUF(vuzpq,     1b20100, 2, (RNQ,  RNQ),      neon_zip_uzp),
    /* VQABS / VQNEG. Types S8 S16 S32.  */
   NUF(vuzp,      1b20100, 2, (RNDQ, RNDQ),     neon_zip_uzp),
   NUF(vuzpq,     1b20100, 2, (RNQ,  RNQ),      neon_zip_uzp),
    /* VQABS / VQNEG. Types S8 S16 S32.  */
- NUF(vqabs,     1b00700, 2, (RNDQ, RNDQ),     neon_sat_abs_neg),
   NUF(vqabsq,    1b00700, 2, (RNQ,  RNQ),      neon_sat_abs_neg),
   NUF(vqabsq,    1b00700, 2, (RNQ,  RNQ),      neon_sat_abs_neg),
- NUF(vqneg,     1b00780, 2, (RNDQ, RNDQ),     neon_sat_abs_neg),
   NUF(vqnegq,    1b00780, 2, (RNQ,  RNQ),      neon_sat_abs_neg),
    /* Pairwise, lengthening. Types S8 S16 S32 U8 U16 U32.  */
   NUF(vpadal,    1b00600, 2, (RNDQ, RNDQ),     neon_pair_long),
   NUF(vqnegq,    1b00780, 2, (RNQ,  RNQ),      neon_sat_abs_neg),
    /* Pairwise, lengthening. Types S8 S16 S32 U8 U16 U32.  */
   NUF(vpadal,    1b00600, 2, (RNDQ, RNDQ),     neon_pair_long),
@@ -23819,11 +25402,13 @@ static const struct asm_opcode insns[] =
  #define ARM_VARIANT    & fpu_vfp_ext_fma
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT  & fpu_vfp_ext_fma
  #define ARM_VARIANT    & fpu_vfp_ext_fma
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT  & fpu_vfp_ext_fma
- /* Mnemonics shared by Neon and VFP.  These are included in the
+ /* Mnemonics shared by Neon, VFP, MVE and BF16.  These are included in the
      VFP FMA variant; NEON and VFP FMA always includes the NEON
      FMA instructions.  */
      VFP FMA variant; NEON and VFP FMA always includes the NEON
      FMA instructions.  */
- nCEF(vfma,     _vfma,    3, (RNSDQ, oRNSDQ, RNSDQ), neon_fmac),
- nCEF(vfms,     _vfms,    3, (RNSDQ, oRNSDQ, RNSDQ), neon_fmac),
+ mnCEF(vfma,     _vfma,    3, (RNSDQMQ, oRNSDQMQ, RNSDQMQR), neon_fmac),
+ TUF ("vfmat",    c300850,    fc300850,  3, (RNSDQMQ, oRNSDQMQ, RNSDQ_RNSC_MQ_RR), mve_vfma, mve_vfma),
+ mnCEF(vfms,     _vfms,    3, (RNSDQMQ, oRNSDQMQ, RNSDQMQ),  neon_fmac),
+
   /* ffmas/ffmad/ffmss/ffmsd are dummy mnemonics to satisfy gas;
      the v form should always be used.  */
   cCE("ffmas",  ea00a00, 3, (RVS, RVS, RVS),  vfp_sp_dyadic),
   /* ffmas/ffmad/ffmss/ffmsd are dummy mnemonics to satisfy gas;
      the v form should always be used.  */
   cCE("ffmas",  ea00a00, 3, (RVS, RVS, RVS),  vfp_sp_dyadic),
@@ -24191,6 +25776,16 @@ static const struct asm_opcode insns[] =
   /* Armv8.1-M Mainline instructions.  */
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT & arm_ext_v8_1m_main
   /* Armv8.1-M Mainline instructions.  */
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT & arm_ext_v8_1m_main
+ toU("cinc",  _cinc,  3, (RRnpcsp, RR_ZR, COND),       t_cond),
+ toU("cinv",  _cinv,  3, (RRnpcsp, RR_ZR, COND),       t_cond),
+ toU("cneg",  _cneg,  3, (RRnpcsp, RR_ZR, COND),       t_cond),
+ toU("csel",  _csel,  4, (RRnpcsp, RR_ZR, RR_ZR, COND),        t_cond),
+ toU("csetm", _csetm, 2, (RRnpcsp, COND),              t_cond),
+ toU("cset",  _cset,  2, (RRnpcsp, COND),              t_cond),
+ toU("csinc", _csinc, 4, (RRnpcsp, RR_ZR, RR_ZR, COND),        t_cond),
+ toU("csinv", _csinv, 4, (RRnpcsp, RR_ZR, RR_ZR, COND),        t_cond),
+ toU("csneg", _csneg, 4, (RRnpcsp, RR_ZR, RR_ZR, COND),        t_cond),
+
   toC("bf",     _bf,    2, (EXPs, EXPs),             t_branch_future),
   toU("bfcsel", _bfcsel,        4, (EXPs, EXPs, EXPs, COND), t_branch_future),
   toC("bfx",    _bfx,   2, (EXPs, RRnpcsp),          t_branch_future),
   toC("bf",     _bf,    2, (EXPs, EXPs),             t_branch_future),
   toU("bfcsel", _bfcsel,        4, (EXPs, EXPs, EXPs, COND), t_branch_future),
   toC("bfx",    _bfx,   2, (EXPs, RRnpcsp),          t_branch_future),
@@ -24206,6 +25801,21 @@ static const struct asm_opcode insns[] =
  
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT & mve_ext
  
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT & mve_ext
+ ToC("lsll",   ea50010d, 3, (RRe, RRo, RRnpcsp_I32), mve_scalar_shift),
+ ToC("lsrl",   ea50011f, 3, (RRe, RRo, I32),         mve_scalar_shift),
+ ToC("asrl",   ea50012d, 3, (RRe, RRo, RRnpcsp_I32), mve_scalar_shift),
+ ToC("uqrshll",        ea51010d, 4, (RRe, RRo, I48_I64, RRnpcsp), mve_scalar_shift1),
+ ToC("sqrshrl",        ea51012d, 4, (RRe, RRo, I48_I64, RRnpcsp), mve_scalar_shift1),
+ ToC("uqshll", ea51010f, 3, (RRe, RRo, I32),         mve_scalar_shift),
+ ToC("urshrl", ea51011f, 3, (RRe, RRo, I32),         mve_scalar_shift),
+ ToC("srshrl", ea51012f, 3, (RRe, RRo, I32),         mve_scalar_shift),
+ ToC("sqshll", ea51013f, 3, (RRe, RRo, I32),         mve_scalar_shift),
+ ToC("uqrshl", ea500f0d, 2, (RRnpcsp, RRnpcsp),      mve_scalar_shift),
+ ToC("sqrshr", ea500f2d, 2, (RRnpcsp, RRnpcsp),      mve_scalar_shift),
+ ToC("uqshl",  ea500f0f, 2, (RRnpcsp, I32),          mve_scalar_shift),
+ ToC("urshr",  ea500f1f, 2, (RRnpcsp, I32),          mve_scalar_shift),
+ ToC("srshr",  ea500f2f, 2, (RRnpcsp, I32),          mve_scalar_shift),
+ ToC("sqshl",  ea500f3f, 2, (RRnpcsp, I32),          mve_scalar_shift),
  
   ToC("vpt",    ee410f00, 3, (COND, RMQ, RMQRZ), mve_vpt),
   ToC("vptt",   ee018f00, 3, (COND, RMQ, RMQRZ), mve_vpt),
  
   ToC("vpt",    ee410f00, 3, (COND, RMQ, RMQRZ), mve_vpt),
   ToC("vptt",   ee018f00, 3, (COND, RMQ, RMQRZ), mve_vpt),
@@ -24240,6 +25850,8 @@ static const struct asm_opcode insns[] =
   ToC("vpsteee",        fe712f4d, 0, (), mve_vpt),
  
   /* MVE and MVE FP only.  */
   ToC("vpsteee",        fe712f4d, 0, (), mve_vpt),
  
   /* MVE and MVE FP only.  */
+ mToC("vhcadd",        ee000f00,   4, (RMQ, RMQ, RMQ, EXPi),             mve_vhcadd),
+ mCEF(vctp,    _vctp,      1, (RRnpc),                           mve_vctp),
   mCEF(vadc,    _vadc,      3, (RMQ, RMQ, RMQ),                   mve_vadc),
   mCEF(vadci,   _vadci,     3, (RMQ, RMQ, RMQ),                   mve_vadc),
   mToC("vsbc",  fe300f00,   3, (RMQ, RMQ, RMQ),                   mve_vsbc),
   mCEF(vadc,    _vadc,      3, (RMQ, RMQ, RMQ),                   mve_vadc),
   mCEF(vadci,   _vadci,     3, (RMQ, RMQ, RMQ),                   mve_vadc),
   mToC("vsbc",  fe300f00,   3, (RMQ, RMQ, RMQ),                   mve_vsbc),
@@ -24289,16 +25901,97 @@ static const struct asm_opcode insns[] =
   mCEF(vdwdup,  _vdwdup,    4, (RMQ, RRe, RR, EXPi),              mve_viddup),
   mCEF(vidup,   _vidup,     3, (RMQ, RRe, EXPi),                  mve_viddup),
   mCEF(viwdup,  _viwdup,    4, (RMQ, RRe, RR, EXPi),              mve_viddup),
   mCEF(vdwdup,  _vdwdup,    4, (RMQ, RRe, RR, EXPi),              mve_viddup),
   mCEF(vidup,   _vidup,     3, (RMQ, RRe, EXPi),                  mve_viddup),
   mCEF(viwdup,  _viwdup,    4, (RMQ, RRe, RR, EXPi),              mve_viddup),
+ mToC("vmaxa", ee330e81,   2, (RMQ, RMQ),                        mve_vmaxa_vmina),
+ mToC("vmina", ee331e81,   2, (RMQ, RMQ),                        mve_vmaxa_vmina),
+ mCEF(vmaxv,   _vmaxv,   2, (RR, RMQ),                           mve_vmaxv),
+ mCEF(vmaxav,  _vmaxav,  2, (RR, RMQ),                           mve_vmaxv),
+ mCEF(vminv,   _vminv,   2, (RR, RMQ),                           mve_vmaxv),
+ mCEF(vminav,  _vminav,  2, (RR, RMQ),                           mve_vmaxv),
+
+ mCEF(vmlaldav,          _vmlaldav,    4, (RRe, RRo, RMQ, RMQ),        mve_vmlaldav),
+ mCEF(vmlaldava,  _vmlaldava,  4, (RRe, RRo, RMQ, RMQ),        mve_vmlaldav),
+ mCEF(vmlaldavx,  _vmlaldavx,  4, (RRe, RRo, RMQ, RMQ),        mve_vmlaldav),
+ mCEF(vmlaldavax, _vmlaldavax, 4, (RRe, RRo, RMQ, RMQ),        mve_vmlaldav),
+ mCEF(vmlalv,    _vmlaldav,    4, (RRe, RRo, RMQ, RMQ),        mve_vmlaldav),
+ mCEF(vmlalva,   _vmlaldava,   4, (RRe, RRo, RMQ, RMQ),        mve_vmlaldav),
+ mCEF(vmlsldav,          _vmlsldav,    4, (RRe, RRo, RMQ, RMQ),        mve_vmlaldav),
+ mCEF(vmlsldava,  _vmlsldava,  4, (RRe, RRo, RMQ, RMQ),        mve_vmlaldav),
+ mCEF(vmlsldavx,  _vmlsldavx,  4, (RRe, RRo, RMQ, RMQ),        mve_vmlaldav),
+ mCEF(vmlsldavax, _vmlsldavax, 4, (RRe, RRo, RMQ, RMQ),        mve_vmlaldav),
+ mToC("vrmlaldavh", ee800f00,     4, (RRe, RR, RMQ, RMQ),  mve_vrmlaldavh),
+ mToC("vrmlaldavha",ee800f20,     4, (RRe, RR, RMQ, RMQ),  mve_vrmlaldavh),
+ mCEF(vrmlaldavhx,  _vrmlaldavhx,  4, (RRe, RR, RMQ, RMQ),  mve_vrmlaldavh),
+ mCEF(vrmlaldavhax, _vrmlaldavhax, 4, (RRe, RR, RMQ, RMQ),  mve_vrmlaldavh),
+ mToC("vrmlalvh",   ee800f00,     4, (RRe, RR, RMQ, RMQ),  mve_vrmlaldavh),
+ mToC("vrmlalvha",  ee800f20,     4, (RRe, RR, RMQ, RMQ),  mve_vrmlaldavh),
+ mCEF(vrmlsldavh,   _vrmlsldavh,   4, (RRe, RR, RMQ, RMQ),  mve_vrmlaldavh),
+ mCEF(vrmlsldavha,  _vrmlsldavha,  4, (RRe, RR, RMQ, RMQ),  mve_vrmlaldavh),
+ mCEF(vrmlsldavhx,  _vrmlsldavhx,  4, (RRe, RR, RMQ, RMQ),  mve_vrmlaldavh),
+ mCEF(vrmlsldavhax, _vrmlsldavhax, 4, (RRe, RR, RMQ, RMQ),  mve_vrmlaldavh),
+
+ mToC("vmlas",   ee011e40,     3, (RMQ, RMQ, RR),              mve_vmlas),
+ mToC("vmulh",   ee010e01,     3, (RMQ, RMQ, RMQ),             mve_vmulh),
+ mToC("vrmulh",          ee011e01,     3, (RMQ, RMQ, RMQ),             mve_vmulh),
+ mToC("vpnot",   fe310f4d,     0, (),                          mve_vpnot),
+ mToC("vpsel",   fe310f01,     3, (RMQ, RMQ, RMQ),             mve_vpsel),
+
+ mToC("vqdmladh",  ee000e00,   3, (RMQ, RMQ, RMQ),             mve_vqdmladh),
+ mToC("vqdmladhx", ee001e00,   3, (RMQ, RMQ, RMQ),             mve_vqdmladh),
+ mToC("vqrdmladh", ee000e01,   3, (RMQ, RMQ, RMQ),             mve_vqdmladh),
+ mToC("vqrdmladhx",ee001e01,   3, (RMQ, RMQ, RMQ),             mve_vqdmladh),
+ mToC("vqdmlsdh",  fe000e00,   3, (RMQ, RMQ, RMQ),             mve_vqdmladh),
+ mToC("vqdmlsdhx", fe001e00,   3, (RMQ, RMQ, RMQ),             mve_vqdmladh),
+ mToC("vqrdmlsdh", fe000e01,   3, (RMQ, RMQ, RMQ),             mve_vqdmladh),
+ mToC("vqrdmlsdhx",fe001e01,   3, (RMQ, RMQ, RMQ),             mve_vqdmladh),
+ mToC("vqdmlah",   ee000e60,   3, (RMQ, RMQ, RR),              mve_vqdmlah),
+ mToC("vqdmlash",  ee001e60,   3, (RMQ, RMQ, RR),              mve_vqdmlah),
+ mToC("vqrdmlash", ee001e40,   3, (RMQ, RMQ, RR),              mve_vqdmlah),
+ mToC("vqdmullt",  ee301f00,   3, (RMQ, RMQ, RMQRR),           mve_vqdmull),
+ mToC("vqdmullb",  ee300f00,   3, (RMQ, RMQ, RMQRR),           mve_vqdmull),
+ mCEF(vqmovnt,   _vqmovnt,     2, (RMQ, RMQ),                  mve_vqmovn),
+ mCEF(vqmovnb,   _vqmovnb,     2, (RMQ, RMQ),                  mve_vqmovn),
+ mCEF(vqmovunt,          _vqmovunt,    2, (RMQ, RMQ),                  mve_vqmovn),
+ mCEF(vqmovunb,          _vqmovunb,    2, (RMQ, RMQ),                  mve_vqmovn),
+
+ mCEF(vshrnt,    _vshrnt,      3, (RMQ, RMQ, I32z),    mve_vshrn),
+ mCEF(vshrnb,    _vshrnb,      3, (RMQ, RMQ, I32z),    mve_vshrn),
+ mCEF(vrshrnt,   _vrshrnt,     3, (RMQ, RMQ, I32z),    mve_vshrn),
+ mCEF(vrshrnb,   _vrshrnb,     3, (RMQ, RMQ, I32z),    mve_vshrn),
+ mCEF(vqshrnt,   _vqrshrnt,    3, (RMQ, RMQ, I32z),    mve_vshrn),
+ mCEF(vqshrnb,   _vqrshrnb,    3, (RMQ, RMQ, I32z),    mve_vshrn),
+ mCEF(vqshrunt,          _vqrshrunt,   3, (RMQ, RMQ, I32z),    mve_vshrn),
+ mCEF(vqshrunb,          _vqrshrunb,   3, (RMQ, RMQ, I32z),    mve_vshrn),
+ mCEF(vqrshrnt,          _vqrshrnt,    3, (RMQ, RMQ, I32z),    mve_vshrn),
+ mCEF(vqrshrnb,          _vqrshrnb,    3, (RMQ, RMQ, I32z),    mve_vshrn),
+ mCEF(vqrshrunt,  _vqrshrunt,  3, (RMQ, RMQ, I32z),    mve_vshrn),
+ mCEF(vqrshrunb,  _vqrshrunb,  3, (RMQ, RMQ, I32z),    mve_vshrn),
+
+ mToC("vshlc",     eea00fc0,      3, (RMQ, RR, I32z),      mve_vshlc),
+ mToC("vshllt",            ee201e00,      3, (RMQ, RMQ, I32),      mve_vshll),
+ mToC("vshllb",            ee200e00,      3, (RMQ, RMQ, I32),      mve_vshll),
+
+ toU("dlstp",  _dlstp, 2, (LR, RR),      t_loloop),
+ toU("wlstp",  _wlstp, 3, (LR, RR, EXP), t_loloop),
+ toU("letp",   _letp,  2, (LR, EXP),     t_loloop),
+ toU("lctp",   _lctp,  0, (),            t_loloop),
  
  #undef THUMB_VARIANT
  #define THUMB_VARIANT & mve_fp_ext
   mToC("vcmul", ee300e00,   4, (RMQ, RMQ, RMQ, EXPi),             mve_vcmul),
   mToC("vfmas", ee311e40,   3, (RMQ, RMQ, RR),                    mve_vfmas),
  
  #undef THUMB_VARIANT
  #define THUMB_VARIANT & mve_fp_ext
   mToC("vcmul", ee300e00,   4, (RMQ, RMQ, RMQ, EXPi),             mve_vcmul),
   mToC("vfmas", ee311e40,   3, (RMQ, RMQ, RR),                    mve_vfmas),
+ mToC("vmaxnma", ee3f0e81, 2, (RMQ, RMQ),                        mve_vmaxnma_vminnma),
+ mToC("vminnma", ee3f1e81, 2, (RMQ, RMQ),                        mve_vmaxnma_vminnma),
+ mToC("vmaxnmv", eeee0f00, 2, (RR, RMQ),                         mve_vmaxnmv),
+ mToC("vmaxnmav",eeec0f00, 2, (RR, RMQ),                         mve_vmaxnmv),
+ mToC("vminnmv", eeee0f80, 2, (RR, RMQ),                         mve_vmaxnmv),
+ mToC("vminnmav",eeec0f80, 2, (RR, RMQ),                         mve_vmaxnmv),
  
  #undef  ARM_VARIANT
  #define ARM_VARIANT  & fpu_vfp_ext_v1
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT  & arm_ext_v6t2
  
  #undef  ARM_VARIANT
  #define ARM_VARIANT  & fpu_vfp_ext_v1
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT  & arm_ext_v6t2
+ mnCEF(vmla,     _vmla,    3, (RNSDQMQ, oRNSDQMQ, RNSDQ_RNSC_MQ_RR), neon_mac_maybe_scalar),
+ mnCEF(vmul,     _vmul,    3, (RNSDQMQ, oRNSDQMQ, RNSDQ_RNSC_MQ_RR), neon_mul),
  
   mcCE(fcpyd,   eb00b40, 2, (RVD, RVD),       vfp_dp_rd_rm),
  
  
   mcCE(fcpyd,   eb00b40, 2, (RVD, RVD),       vfp_dp_rd_rm),
  
@@ -24337,6 +26030,8 @@ static const struct asm_opcode insns[] =
   mnUF(vcvtp,  _vcvta,  2, (RNSDQMQ, oRNSDQMQ),         neon_cvtp),
   mnUF(vcvtn,  _vcvta,  3, (RNSDQMQ, oRNSDQMQ, oI32z),  neon_cvtn),
   mnUF(vcvtm,  _vcvta,  2, (RNSDQMQ, oRNSDQMQ),         neon_cvtm),
   mnUF(vcvtp,  _vcvta,  2, (RNSDQMQ, oRNSDQMQ),         neon_cvtp),
   mnUF(vcvtn,  _vcvta,  3, (RNSDQMQ, oRNSDQMQ, oI32z),  neon_cvtn),
   mnUF(vcvtm,  _vcvta,  2, (RNSDQMQ, oRNSDQMQ),         neon_cvtm),
+ mnUF(vmaxnm, _vmaxnm, 3, (RNSDQMQ, oRNSDQMQ, RNSDQMQ),        vmaxnm),
+ mnUF(vminnm, _vminnm, 3, (RNSDQMQ, oRNSDQMQ, RNSDQMQ),        vmaxnm),
  
  #undef ARM_VARIANT
  #define ARM_VARIANT & fpu_neon_ext_v1
  
  #undef ARM_VARIANT
  #define ARM_VARIANT & fpu_neon_ext_v1
@@ -24352,6 +26047,31 @@ static const struct asm_opcode insns[] =
   MNUF(vcls,      1b00400,        2, (RNDQMQ, RNDQMQ),               neon_cls),
   MNUF(vclz,      1b00480,        2, (RNDQMQ, RNDQMQ),               neon_clz),
   mnCE(vdup,      _vdup,                  2, (RNDQMQ, RR_RNSC),              neon_dup),
   MNUF(vcls,      1b00400,        2, (RNDQMQ, RNDQMQ),               neon_cls),
   MNUF(vclz,      1b00480,        2, (RNDQMQ, RNDQMQ),               neon_clz),
   mnCE(vdup,      _vdup,                  2, (RNDQMQ, RR_RNSC),              neon_dup),
+ MNUF(vhadd,     00000000,       3, (RNDQMQ, oRNDQMQ, RNDQMQR),  neon_dyadic_i_su),
+ MNUF(vrhadd,    00000100,       3, (RNDQMQ, oRNDQMQ, RNDQMQ),   neon_dyadic_i_su),
+ MNUF(vhsub,     00000200,       3, (RNDQMQ, oRNDQMQ, RNDQMQR),  neon_dyadic_i_su),
+ mnUF(vmin,      _vmin,    3, (RNDQMQ, oRNDQMQ, RNDQMQ), neon_dyadic_if_su),
+ mnUF(vmax,      _vmax,    3, (RNDQMQ, oRNDQMQ, RNDQMQ), neon_dyadic_if_su),
+ MNUF(vqadd,     0000010,  3, (RNDQMQ, oRNDQMQ, RNDQMQR), neon_dyadic_i64_su),
+ MNUF(vqsub,     0000210,  3, (RNDQMQ, oRNDQMQ, RNDQMQR), neon_dyadic_i64_su),
+ mnUF(vmvn,      _vmvn,    2, (RNDQMQ, RNDQMQ_Ibig), neon_mvn),
+ MNUF(vqabs,     1b00700,  2, (RNDQMQ, RNDQMQ),     neon_sat_abs_neg),
+ MNUF(vqneg,     1b00780,  2, (RNDQMQ, RNDQMQ),     neon_sat_abs_neg),
+ mnUF(vqrdmlah,  _vqrdmlah,3, (RNDQMQ, oRNDQMQ, RNDQ_RNSC_RR), neon_qrdmlah),
+ mnUF(vqdmulh,   _vqdmulh, 3, (RNDQMQ, oRNDQMQ, RNDQMQ_RNSC_RR), neon_qdmulh),
+ mnUF(vqrdmulh,  _vqrdmulh,3, (RNDQMQ, oRNDQMQ, RNDQMQ_RNSC_RR), neon_qdmulh),
+ MNUF(vqrshl,    0000510,  3, (RNDQMQ, oRNDQMQ, RNDQMQR), neon_rshl),
+ MNUF(vrshl,     0000500,  3, (RNDQMQ, oRNDQMQ, RNDQMQR), neon_rshl),
+ MNUF(vshr,      0800010,  3, (RNDQMQ, oRNDQMQ, I64z), neon_rshift_round_imm),
+ MNUF(vrshr,     0800210,  3, (RNDQMQ, oRNDQMQ, I64z), neon_rshift_round_imm),
+ MNUF(vsli,      1800510,  3, (RNDQMQ, oRNDQMQ, I63),  neon_sli),
+ MNUF(vsri,      1800410,  3, (RNDQMQ, oRNDQMQ, I64z), neon_sri),
+ MNUF(vrev64,    1b00000,  2, (RNDQMQ, RNDQMQ),     neon_rev),
+ MNUF(vrev32,    1b00080,  2, (RNDQMQ, RNDQMQ),     neon_rev),
+ MNUF(vrev16,    1b00100,  2, (RNDQMQ, RNDQMQ),     neon_rev),
+ mnUF(vshl,     _vshl,    3, (RNDQMQ, oRNDQMQ, RNDQMQ_I63b_RR), neon_shl),
+ mnUF(vqshl,     _vqshl,   3, (RNDQMQ, oRNDQMQ, RNDQMQ_I63b_RR), neon_qshl),
+ MNUF(vqshlu,    1800610,  3, (RNDQMQ, oRNDQMQ, I63),           neon_qshlu_imm),
  
  #undef ARM_VARIANT
  #define ARM_VARIANT & arm_ext_v8_3
  
  #undef ARM_VARIANT
  #define ARM_VARIANT & arm_ext_v8_3
@@ -24359,6 +26079,24 @@ static const struct asm_opcode insns[] =
  #define        THUMB_VARIANT & arm_ext_v6t2_v8m
   MNUF (vcadd, 0, 4, (RNDQMQ, RNDQMQ, RNDQMQ, EXPi), vcadd),
   MNUF (vcmla, 0, 4, (RNDQMQ, RNDQMQ, RNDQMQ_RNSC, EXPi), vcmla),
  #define        THUMB_VARIANT & arm_ext_v6t2_v8m
   MNUF (vcadd, 0, 4, (RNDQMQ, RNDQMQ, RNDQMQ, EXPi), vcadd),
   MNUF (vcmla, 0, 4, (RNDQMQ, RNDQMQ, RNDQMQ_RNSC, EXPi), vcmla),
+
+#undef ARM_VARIANT
+#define ARM_VARIANT &arm_ext_bf16
+#undef THUMB_VARIANT
+#define        THUMB_VARIANT &arm_ext_bf16
+ TUF ("vdot", c000d00, fc000d00, 3, (RNDQ, RNDQ, RNDQ_RNSC), vdot, vdot),
+ TUF ("vmmla", c000c40, fc000c40, 3, (RNQ, RNQ, RNQ), vmmla, vmmla),
+ TUF ("vfmab", c300810, fc300810, 3, (RNDQ, RNDQ, RNDQ_RNSC), bfloat_vfma, bfloat_vfma),
+
+#undef ARM_VARIANT
+#define ARM_VARIANT &arm_ext_i8mm
+#undef THUMB_VARIANT
+#define        THUMB_VARIANT &arm_ext_i8mm
+ TUF ("vsmmla", c200c40, fc200c40, 3, (RNQ, RNQ, RNQ), vsmmla, vsmmla),
+ TUF ("vummla", c200c50, fc200c50, 3, (RNQ, RNQ, RNQ), vummla, vummla),
+ TUF ("vusmmla", ca00c40, fca00c40, 3, (RNQ, RNQ, RNQ), vsmmla, vsmmla),
+ TUF ("vusdot", c800d00, fc800d00, 3, (RNDQ, RNDQ, RNDQ_RNSC), vusdot, vusdot),
+ TUF ("vsudot", c800d10, fc800d10, 3, (RNDQ, RNDQ, RNSC), vsudot, vsudot),
  };
  #undef ARM_VARIANT
  #undef THUMB_VARIANT
  };
  #undef ARM_VARIANT
  #undef THUMB_VARIANT
@@ -25088,7 +26826,7 @@ arm_init_frag (fragS * fragP, int max_chars)
  
    /* PR 21809: Do not set a mapping state for debug sections
       - it just confuses other tools.  */
  
    /* PR 21809: Do not set a mapping state for debug sections
       - it just confuses other tools.  */
-  if (bfd_get_section_flags (NULL, now_seg) & SEC_DEBUGGING)
+  if (bfd_section_flags (now_seg) & SEC_DEBUGGING)
      return;
  
    frag_thumb_mode = fragP->tc_frag_data.thumb_mode ^ MODE_RECORDED;
      return;
  
    frag_thumb_mode = fragP->tc_frag_data.thumb_mode ^ MODE_RECORDED;
@@ -26511,11 +28249,12 @@ md_apply_fix (fixS *  fixP,
        break;
  
      case BFD_RELOC_ARM_SMC:
        break;
  
      case BFD_RELOC_ARM_SMC:
-      if (((unsigned long) value) > 0xffff)
+      if (((unsigned long) value) > 0xf)
         as_bad_where (fixP->fx_file, fixP->fx_line,
                       _("invalid smc expression"));
         as_bad_where (fixP->fx_file, fixP->fx_line,
                       _("invalid smc expression"));
+
        newval = md_chars_to_number (buf, INSN_SIZE);
        newval = md_chars_to_number (buf, INSN_SIZE);
-      newval |= (value & 0xf) | ((value & 0xfff0) << 4);
+      newval |= (value & 0xf);
        md_number_to_chars (buf, newval, INSN_SIZE);
        break;
  
        md_number_to_chars (buf, newval, INSN_SIZE);
        break;
  
@@ -26684,7 +28423,7 @@ md_apply_fix (fixS *    fixP,
        break;
  
      case BFD_RELOC_THUMB_PCREL_BRANCH9: /* Conditional branch. */
        break;
  
      case BFD_RELOC_THUMB_PCREL_BRANCH9: /* Conditional branch. */
-      if ((value & ~0xff) && ((value & ~0xff) != ~0xff))
+      if (out_of_range_p (value, 8))
         as_bad_where (fixP->fx_file, fixP->fx_line, BAD_RANGE);
  
        if (fixP->fx_done || !seg->use_rela_p)
         as_bad_where (fixP->fx_file, fixP->fx_line, BAD_RANGE);
  
        if (fixP->fx_done || !seg->use_rela_p)
@@ -26696,7 +28435,7 @@ md_apply_fix (fixS *    fixP,
        break;
  
      case BFD_RELOC_THUMB_PCREL_BRANCH12: /* Unconditional branch.  */
        break;
  
      case BFD_RELOC_THUMB_PCREL_BRANCH12: /* Unconditional branch.  */
-      if ((value & ~0x7ff) && ((value & ~0x7ff) != ~0x7ff))
+      if (out_of_range_p (value, 11))
         as_bad_where (fixP->fx_file, fixP->fx_line, BAD_RANGE);
  
        if (fixP->fx_done || !seg->use_rela_p)
         as_bad_where (fixP->fx_file, fixP->fx_line, BAD_RANGE);
  
        if (fixP->fx_done || !seg->use_rela_p)
@@ -26707,6 +28446,7 @@ md_apply_fix (fixS *    fixP,
         }
        break;
  
         }
        break;
  
+    /* This relocation is misnamed, it should be BRANCH21.  */
      case BFD_RELOC_THUMB_PCREL_BRANCH20:
        if (fixP->fx_addsy
           && (S_GET_SEGMENT (fixP->fx_addsy) == seg)
      case BFD_RELOC_THUMB_PCREL_BRANCH20:
        if (fixP->fx_addsy
           && (S_GET_SEGMENT (fixP->fx_addsy) == seg)
@@ -26717,7 +28457,7 @@ md_apply_fix (fixS *    fixP,
           /* Force a relocation for a branch 20 bits wide.  */
           fixP->fx_done = 0;
         }
           /* Force a relocation for a branch 20 bits wide.  */
           fixP->fx_done = 0;
         }
-      if ((value & ~0x1fffff) && ((value & ~0x0fffff) != ~0x0fffff))
+      if (out_of_range_p (value, 20))
         as_bad_where (fixP->fx_file, fixP->fx_line,
                       _("conditional branch out of range"));
  
         as_bad_where (fixP->fx_file, fixP->fx_line,
                       _("conditional branch out of range"));
  
@@ -26796,12 +28536,11 @@ md_apply_fix (fixS *  fixP,
          fixP->fx_r_type = BFD_RELOC_THUMB_PCREL_BRANCH23;
  #endif
  
          fixP->fx_r_type = BFD_RELOC_THUMB_PCREL_BRANCH23;
  #endif
  
-      if ((value & ~0x3fffff) && ((value & ~0x3fffff) != ~0x3fffff))
+      if (out_of_range_p (value, 22))
         {
           if (!(ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v6t2)))
             as_bad_where (fixP->fx_file, fixP->fx_line, BAD_RANGE);
         {
           if (!(ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v6t2)))
             as_bad_where (fixP->fx_file, fixP->fx_line, BAD_RANGE);
-         else if ((value & ~0x1ffffff)
-                  && ((value & ~0x1ffffff) != ~0x1ffffff))
+         else if (out_of_range_p (value, 24))
             as_bad_where (fixP->fx_file, fixP->fx_line,
                           _("Thumb2 branch out of range"));
         }
             as_bad_where (fixP->fx_file, fixP->fx_line,
                           _("Thumb2 branch out of range"));
         }
@@ -26812,7 +28551,7 @@ md_apply_fix (fixS *    fixP,
        break;
  
      case BFD_RELOC_THUMB_PCREL_BRANCH25:
        break;
  
      case BFD_RELOC_THUMB_PCREL_BRANCH25:
-      if ((value & ~0x0ffffff) && ((value & ~0x0ffffff) != ~0x0ffffff))
+      if (out_of_range_p (value, 24))
         as_bad_where (fixP->fx_file, fixP->fx_line, BAD_RANGE);
  
        if (fixP->fx_done || !seg->use_rela_p)
         as_bad_where (fixP->fx_file, fixP->fx_line, BAD_RANGE);
  
        if (fixP->fx_done || !seg->use_rela_p)
@@ -27656,9 +29395,10 @@ md_apply_fix (fixS *   fixP,
         }
  
        bfd_vma insn = get_thumb32_insn (buf);
         }
  
        bfd_vma insn = get_thumb32_insn (buf);
-      /* le lr, <label> or le <label> */
+      /* le lr, <label>, le <label> or letp lr, <label> */
        if (((insn & 0xffffffff) == 0xf00fc001)
        if (((insn & 0xffffffff) == 0xf00fc001)
-         || ((insn & 0xffffffff) == 0xf02fc001))
+         || ((insn & 0xffffffff) == 0xf02fc001)
+         || ((insn & 0xffffffff) == 0xf01fc001))
         value = -value;
  
        if (v8_1_branch_value_check (value, 12, FALSE) == FAIL)
         value = -value;
  
        if (v8_1_branch_value_check (value, 12, FALSE) == FAIL)
@@ -28614,9 +30354,8 @@ md_begin (void)
  
         if (sec != NULL)
           {
  
         if (sec != NULL)
           {
-           bfd_set_section_flags
-             (stdoutput, sec, SEC_READONLY | SEC_DEBUGGING /* | SEC_HAS_CONTENTS */);
-           bfd_set_section_size (stdoutput, sec, 0);
+           bfd_set_section_flags (sec, SEC_READONLY | SEC_DEBUGGING);
+           bfd_set_section_size (sec, 0);
             bfd_set_section_contents (stdoutput, sec, NULL, 0, 0);
           }
        }
             bfd_set_section_contents (stdoutput, sec, NULL, 0, 0);
           }
        }
@@ -28792,6 +30531,11 @@ struct arm_option_table arm_opts[] =
    {"mwarn-deprecated", NULL, &warn_on_deprecated, 1, NULL},
    {"mno-warn-deprecated", N_("do not warn on use of deprecated feature"),
     &warn_on_deprecated, 0, NULL},
    {"mwarn-deprecated", NULL, &warn_on_deprecated, 1, NULL},
    {"mno-warn-deprecated", N_("do not warn on use of deprecated feature"),
     &warn_on_deprecated, 0, NULL},
+
+  {"mwarn-restrict-it", N_("warn about performance deprecated IT instructions"
+   " in ARMv8-A and ARMv8-R"), &warn_on_restrict_it, 1, NULL},
+  {"mno-warn-restrict-it", NULL, &warn_on_restrict_it, 0, NULL},
+
    {"mwarn-syms", N_("warn about symbols that match instruction names [default]"), (int *) (& flag_warn_syms), TRUE, NULL},
    {"mno-warn-syms", N_("disable warnings about symobls that match instructions"), (int *) (& flag_warn_syms), FALSE, NULL},
    {NULL, NULL, NULL, 0, NULL}
    {"mwarn-syms", N_("warn about symbols that match instruction names [default]"), (int *) (& flag_warn_syms), TRUE, NULL},
    {"mno-warn-syms", N_("disable warnings about symobls that match instructions"), (int *) (& flag_warn_syms), FALSE, NULL},
    {NULL, NULL, NULL, 0, NULL}
@@ -29209,25 +30953,25 @@ static const struct arm_cpu_option_table arm_cpus[] =
                ARM_ARCH_NONE,
                FPU_ARCH_NEON_VFP_V4),
    ARM_CPU_OPT ("cortex-a32",     "Cortex-A32",        ARM_ARCH_V8A,
                ARM_ARCH_NONE,
                FPU_ARCH_NEON_VFP_V4),
    ARM_CPU_OPT ("cortex-a32",     "Cortex-A32",        ARM_ARCH_V8A,
-              ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+              ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC),
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
    ARM_CPU_OPT ("cortex-a35",     "Cortex-A35",        ARM_ARCH_V8A,
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
    ARM_CPU_OPT ("cortex-a35",     "Cortex-A35",        ARM_ARCH_V8A,
-              ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+              ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC),
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
    ARM_CPU_OPT ("cortex-a53",     "Cortex-A53",        ARM_ARCH_V8A,
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
    ARM_CPU_OPT ("cortex-a53",     "Cortex-A53",        ARM_ARCH_V8A,
-              ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+              ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC),
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
    ARM_CPU_OPT ("cortex-a55",    "Cortex-A55",         ARM_ARCH_V8_2A,
                ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_DOTPROD),
    ARM_CPU_OPT ("cortex-a57",     "Cortex-A57",        ARM_ARCH_V8A,
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
    ARM_CPU_OPT ("cortex-a55",    "Cortex-A55",         ARM_ARCH_V8_2A,
                ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_DOTPROD),
    ARM_CPU_OPT ("cortex-a57",     "Cortex-A57",        ARM_ARCH_V8A,
-              ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+              ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC),
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
    ARM_CPU_OPT ("cortex-a72",     "Cortex-A72",        ARM_ARCH_V8A,
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
    ARM_CPU_OPT ("cortex-a72",     "Cortex-A72",        ARM_ARCH_V8A,
-             ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+              ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC),
               FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
    ARM_CPU_OPT ("cortex-a73",     "Cortex-A73",        ARM_ARCH_V8A,
               FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
    ARM_CPU_OPT ("cortex-a73",     "Cortex-A73",        ARM_ARCH_V8A,
-             ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+              ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC),
               FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
    ARM_CPU_OPT ("cortex-a75",    "Cortex-A75",         ARM_ARCH_V8_2A,
                ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
               FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
    ARM_CPU_OPT ("cortex-a75",    "Cortex-A75",         ARM_ARCH_V8_2A,
                ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
@@ -29235,6 +30979,12 @@ static const struct arm_cpu_option_table arm_cpus[] =
    ARM_CPU_OPT ("cortex-a76",    "Cortex-A76",         ARM_ARCH_V8_2A,
                ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_DOTPROD),
    ARM_CPU_OPT ("cortex-a76",    "Cortex-A76",         ARM_ARCH_V8_2A,
                ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_DOTPROD),
+  ARM_CPU_OPT ("cortex-a76ae",    "Cortex-A76AE",      ARM_ARCH_V8_2A,
+              ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+              FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_DOTPROD),
+  ARM_CPU_OPT ("cortex-a77",    "Cortex-A77",         ARM_ARCH_V8_2A,
+              ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+              FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_DOTPROD),
    ARM_CPU_OPT ("ares",    "Ares",             ARM_ARCH_V8_2A,
                ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_DOTPROD),
    ARM_CPU_OPT ("ares",    "Ares",             ARM_ARCH_V8_2A,
                ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_DOTPROD),
@@ -29254,8 +31004,11 @@ static const struct arm_cpu_option_table arm_cpus[] =
                ARM_FEATURE_CORE_LOW (ARM_EXT_ADIV),
                FPU_ARCH_VFP_V3D16),
    ARM_CPU_OPT ("cortex-r52",     "Cortex-R52",        ARM_ARCH_V8R,
                ARM_FEATURE_CORE_LOW (ARM_EXT_ADIV),
                FPU_ARCH_VFP_V3D16),
    ARM_CPU_OPT ("cortex-r52",     "Cortex-R52",        ARM_ARCH_V8R,
-             ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+              ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC),
               FPU_ARCH_NEON_VFP_ARMV8),
               FPU_ARCH_NEON_VFP_ARMV8),
+  ARM_CPU_OPT ("cortex-m35p",    "Cortex-M35P",       ARM_ARCH_V8M_MAIN,
+              ARM_FEATURE_CORE_LOW (ARM_EXT_V5ExP | ARM_EXT_V6_DSP),
+              FPU_NONE),
    ARM_CPU_OPT ("cortex-m33",     "Cortex-M33",        ARM_ARCH_V8M_MAIN,
                ARM_FEATURE_CORE_LOW (ARM_EXT_V5ExP | ARM_EXT_V6_DSP),
                FPU_NONE),
    ARM_CPU_OPT ("cortex-m33",     "Cortex-M33",        ARM_ARCH_V8M_MAIN,
                ARM_FEATURE_CORE_LOW (ARM_EXT_V5ExP | ARM_EXT_V6_DSP),
                FPU_NONE),
@@ -29281,7 +31034,7 @@ static const struct arm_cpu_option_table arm_cpus[] =
                ARM_ARCH_NONE,
                FPU_NONE),
    ARM_CPU_OPT ("exynos-m1",      "Samsung Exynos M1", ARM_ARCH_V8A,
                ARM_ARCH_NONE,
                FPU_NONE),
    ARM_CPU_OPT ("exynos-m1",      "Samsung Exynos M1", ARM_ARCH_V8A,
-              ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+              ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC),
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
    ARM_CPU_OPT ("neoverse-n1",    "Neoverse N1",               ARM_ARCH_V8_2A,
                ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
    ARM_CPU_OPT ("neoverse-n1",    "Neoverse N1",               ARM_ARCH_V8_2A,
                ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
@@ -29320,7 +31073,7 @@ static const struct arm_cpu_option_table arm_cpus[] =
                ARM_ARCH_NONE,
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
    ARM_CPU_OPT ("xgene2",         "APM X-Gene 2",      ARM_ARCH_V8A,
                ARM_ARCH_NONE,
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
    ARM_CPU_OPT ("xgene2",         "APM X-Gene 2",      ARM_ARCH_V8A,
-              ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+              ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC),
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
  
    { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE, ARM_ARCH_NONE, NULL }
                FPU_ARCH_CRYPTO_NEON_VFP_ARMV8),
  
    { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE, ARM_ARCH_NONE, NULL }
@@ -29440,7 +31193,7 @@ static const struct arm_ext_table armv7em_ext_table[] =
  
  static const struct arm_ext_table armv8a_ext_table[] =
  {
  
  static const struct arm_ext_table armv8a_ext_table[] =
  {
-  ARM_ADD ("crc", ARCH_CRC_ARMV8),
+  ARM_ADD ("crc", ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC)),
    ARM_ADD ("simd", FPU_ARCH_NEON_VFP_ARMV8),
    ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
            ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
    ARM_ADD ("simd", FPU_ARCH_NEON_VFP_ARMV8),
    ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
            ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
@@ -29473,6 +31226,8 @@ static const struct arm_ext_table armv82a_ext_table[] =
    ARM_ADD ("simd", FPU_ARCH_NEON_VFP_ARMV8_1),
    ARM_ADD ("fp16", FPU_ARCH_NEON_VFP_ARMV8_2_FP16),
    ARM_ADD ("fp16fml", FPU_ARCH_NEON_VFP_ARMV8_2_FP16FML),
    ARM_ADD ("simd", FPU_ARCH_NEON_VFP_ARMV8_1),
    ARM_ADD ("fp16", FPU_ARCH_NEON_VFP_ARMV8_2_FP16),
    ARM_ADD ("fp16fml", FPU_ARCH_NEON_VFP_ARMV8_2_FP16FML),
+  ARM_ADD ("bf16", ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16)),
+  ARM_ADD ("i8mm", ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM)),
    ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_1,
            ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
    ARM_ADD ("dotprod", FPU_ARCH_DOTPROD_NEON_VFP_ARMV8),
    ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_1,
            ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
    ARM_ADD ("dotprod", FPU_ARCH_DOTPROD_NEON_VFP_ARMV8),
@@ -29489,6 +31244,8 @@ static const struct arm_ext_table armv84a_ext_table[] =
  {
    ARM_ADD ("simd", FPU_ARCH_DOTPROD_NEON_VFP_ARMV8),
    ARM_ADD ("fp16", FPU_ARCH_NEON_VFP_ARMV8_4_FP16FML),
  {
    ARM_ADD ("simd", FPU_ARCH_DOTPROD_NEON_VFP_ARMV8),
    ARM_ADD ("fp16", FPU_ARCH_NEON_VFP_ARMV8_4_FP16FML),
+  ARM_ADD ("bf16", ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16)),
+  ARM_ADD ("i8mm", ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM)),
    ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_4,
            ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
  
    ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_4,
            ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
  
@@ -29504,6 +31261,8 @@ static const struct arm_ext_table armv85a_ext_table[] =
  {
    ARM_ADD ("simd", FPU_ARCH_DOTPROD_NEON_VFP_ARMV8),
    ARM_ADD ("fp16", FPU_ARCH_NEON_VFP_ARMV8_4_FP16FML),
  {
    ARM_ADD ("simd", FPU_ARCH_DOTPROD_NEON_VFP_ARMV8),
    ARM_ADD ("fp16", FPU_ARCH_NEON_VFP_ARMV8_4_FP16FML),
+  ARM_ADD ("bf16", ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16)),
+  ARM_ADD ("i8mm", ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM)),
    ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_4,
            ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
  
    ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_4,
            ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
  
@@ -29513,6 +31272,12 @@ static const struct arm_ext_table armv85a_ext_table[] =
    { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
  };
  
    { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
  };
  
+static const struct arm_ext_table armv86a_ext_table[] =
+{
+  ARM_ADD ("i8mm", ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM)),
+  { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
+};
+
  static const struct arm_ext_table armv8m_main_ext_table[] =
  {
    ARM_EXT ("dsp", ARM_FEATURE_CORE_LOW (ARM_EXT_V5ExP | ARM_EXT_V6_DSP),
  static const struct arm_ext_table armv8m_main_ext_table[] =
  {
    ARM_EXT ("dsp", ARM_FEATURE_CORE_LOW (ARM_EXT_V5ExP | ARM_EXT_V6_DSP),
@@ -29533,18 +31298,17 @@ static const struct arm_ext_table armv8_1m_main_ext_table[] =
    ARM_ADD ("fp.dp",
            ARM_FEATURE (0, ARM_EXT2_FP16_INST,
                         FPU_VFP_V5D16 | FPU_VFP_EXT_FP16 | FPU_VFP_EXT_FMA)),
    ARM_ADD ("fp.dp",
            ARM_FEATURE (0, ARM_EXT2_FP16_INST,
                         FPU_VFP_V5D16 | FPU_VFP_EXT_FP16 | FPU_VFP_EXT_FMA)),
-  ARM_EXT ("mve", ARM_FEATURE_COPROC (FPU_MVE),
-          ARM_FEATURE_COPROC (FPU_MVE | FPU_MVE_FP)),
+  ARM_EXT ("mve", ARM_FEATURE_CORE_HIGH (ARM_EXT2_MVE),
+          ARM_FEATURE_CORE_HIGH (ARM_EXT2_MVE | ARM_EXT2_MVE_FP)),
    ARM_ADD ("mve.fp",
    ARM_ADD ("mve.fp",
-          ARM_FEATURE (0, ARM_EXT2_FP16_INST,
-                       FPU_MVE | FPU_MVE_FP | FPU_VFP_V5_SP_D16 |
-                       FPU_VFP_EXT_FP16 | FPU_VFP_EXT_FMA)),
+          ARM_FEATURE (0, ARM_EXT2_FP16_INST | ARM_EXT2_MVE | ARM_EXT2_MVE_FP,
+                       FPU_VFP_V5_SP_D16 | FPU_VFP_EXT_FP16 | FPU_VFP_EXT_FMA)),
    { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
  };
  
  static const struct arm_ext_table armv8r_ext_table[] =
  {
    { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
  };
  
  static const struct arm_ext_table armv8r_ext_table[] =
  {
-  ARM_ADD ("crc", ARCH_CRC_ARMV8),
+  ARM_ADD ("crc", ARM_FEATURE_CORE_HIGH (ARM_EXT2_CRC)),
    ARM_ADD ("simd", FPU_ARCH_NEON_VFP_ARMV8),
    ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
            ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
    ARM_ADD ("simd", FPU_ARCH_NEON_VFP_ARMV8),
    ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
            ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
@@ -29618,6 +31382,7 @@ static const struct arm_arch_option_table arm_archs[] =
    ARM_ARCH_OPT2 ("armv8-r",      ARM_ARCH_V8R,         FPU_ARCH_VFP, armv8r),
    ARM_ARCH_OPT2 ("armv8.4-a",    ARM_ARCH_V8_4A,       FPU_ARCH_VFP, armv84a),
    ARM_ARCH_OPT2 ("armv8.5-a",    ARM_ARCH_V8_5A,       FPU_ARCH_VFP, armv85a),
    ARM_ARCH_OPT2 ("armv8-r",      ARM_ARCH_V8R,         FPU_ARCH_VFP, armv8r),
    ARM_ARCH_OPT2 ("armv8.4-a",    ARM_ARCH_V8_4A,       FPU_ARCH_VFP, armv84a),
    ARM_ARCH_OPT2 ("armv8.5-a",    ARM_ARCH_V8_5A,       FPU_ARCH_VFP, armv85a),
+  ARM_ARCH_OPT2 ("armv8.6-a",    ARM_ARCH_V8_6A,       FPU_ARCH_VFP, armv86a),
    ARM_ARCH_OPT ("xscale",        ARM_ARCH_XSCALE,      FPU_ARCH_VFP),
    ARM_ARCH_OPT ("iwmmxt",        ARM_ARCH_IWMMXT,      FPU_ARCH_VFP),
    ARM_ARCH_OPT ("iwmmxt2",       ARM_ARCH_IWMMXT2,     FPU_ARCH_VFP),
    ARM_ARCH_OPT ("xscale",        ARM_ARCH_XSCALE,      FPU_ARCH_VFP),
    ARM_ARCH_OPT ("iwmmxt",        ARM_ARCH_IWMMXT,      FPU_ARCH_VFP),
    ARM_ARCH_OPT ("iwmmxt2",       ARM_ARCH_IWMMXT2,     FPU_ARCH_VFP),
@@ -29648,7 +31413,8 @@ struct arm_option_extension_value_table
     use the context sensitive approach using arm_ext_table's.  */
  static const struct arm_option_extension_value_table arm_extensions[] =
  {
     use the context sensitive approach using arm_ext_table's.  */
  static const struct arm_option_extension_value_table arm_extensions[] =
  {
-  ARM_EXT_OPT ("crc",  ARCH_CRC_ARMV8, ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
+  ARM_EXT_OPT ("crc",   ARM_FEATURE_CORE_HIGH(ARM_EXT2_CRC),
+                        ARM_FEATURE_CORE_HIGH(ARM_EXT2_CRC),
                          ARM_FEATURE_CORE_LOW (ARM_EXT_V8)),
    ARM_EXT_OPT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
                          ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8),
                          ARM_FEATURE_CORE_LOW (ARM_EXT_V8)),
    ARM_EXT_OPT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
                          ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8),
@@ -29984,6 +31750,22 @@ arm_parse_extension (const char *str, const arm_feature_set *opt_set,
    return TRUE;
  }
  
    return TRUE;
  }
  
+static bfd_boolean
+arm_parse_fp16_opt (const char *str)
+{
+  if (strcasecmp (str, "ieee") == 0)
+    fp16_format = ARM_FP16_FORMAT_IEEE;
+  else if (strcasecmp (str, "alternative") == 0)
+    fp16_format = ARM_FP16_FORMAT_ALTERNATIVE;
+  else
+    {
+      as_bad (_("unrecognised float16 format \"%s\""), str);
+      return FALSE;
+    }
+
+  return TRUE;
+}
+
  static bfd_boolean
  arm_parse_cpu (const char *str)
  {
  static bfd_boolean
  arm_parse_cpu (const char *str)
  {
@@ -30063,6 +31845,7 @@ arm_parse_arch (const char *str)
           march_ext_opt = XNEW (arm_feature_set);
         *march_ext_opt = arm_arch_none;
         march_fpu_opt = &opt->default_fpu;
           march_ext_opt = XNEW (arm_feature_set);
         *march_ext_opt = arm_arch_none;
         march_fpu_opt = &opt->default_fpu;
+       selected_ctx_ext_table = opt->ext_table;
         strcpy (selected_cpu_name, opt->name);
  
         if (ext != NULL)
         strcpy (selected_cpu_name, opt->name);
  
         if (ext != NULL)
@@ -30175,6 +31958,12 @@ struct arm_long_option_table arm_long_opts[] =
     arm_parse_it_mode, NULL},
    {"mccs", N_("\t\t\t  TI CodeComposer Studio syntax compatibility mode"),
     arm_ccs_mode, NULL},
     arm_parse_it_mode, NULL},
    {"mccs", N_("\t\t\t  TI CodeComposer Studio syntax compatibility mode"),
     arm_ccs_mode, NULL},
+  {"mfp16-format=",
+   N_("[ieee|alternative]\n\
+                          set the encoding for half precision floating point "
+                         "numbers to IEEE\n\
+                          or Arm alternative format."),
+   arm_parse_fp16_opt, NULL },
    {NULL, NULL, 0, NULL}
  };
  
    {NULL, NULL, 0, NULL}
  };
  
@@ -30376,7 +32165,8 @@ static const cpu_arch_ver_table cpu_arch_ver[] =
      {TAG_CPU_ARCH_V8,        ARM_ARCH_V8_4A},
      {TAG_CPU_ARCH_V8,        ARM_ARCH_V8_5A},
      {TAG_CPU_ARCH_V8_1M_MAIN, ARM_ARCH_V8_1M_MAIN},
      {TAG_CPU_ARCH_V8,        ARM_ARCH_V8_4A},
      {TAG_CPU_ARCH_V8,        ARM_ARCH_V8_5A},
      {TAG_CPU_ARCH_V8_1M_MAIN, ARM_ARCH_V8_1M_MAIN},
-    {-1,                     ARM_ARCH_NONE}
+    {TAG_CPU_ARCH_V8,      ARM_ARCH_V8_6A},
+    {-1,                   ARM_ARCH_NONE}
  };
  
  /* Set an attribute if it has not already been set by the user.  */
  };
  
  /* Set an attribute if it has not already been set by the user.  */
@@ -30756,6 +32546,9 @@ aeabi_set_public_attributes (void)
      virt_sec |= 2;
    if (virt_sec != 0)
      aeabi_set_attribute_int (Tag_Virtualization_use, virt_sec);
      virt_sec |= 2;
    if (virt_sec != 0)
      aeabi_set_attribute_int (Tag_Virtualization_use, virt_sec);
+
+  if (fp16_format != ARM_FP16_FORMAT_DEFAULT)
+    aeabi_set_attribute_int (Tag_ABI_FP_16bit_format, fp16_format);
  }
  
  /* Post relaxation hook.  Recompute ARM attributes now that relaxation is
  }
  
  /* Post relaxation hook.  Recompute ARM attributes now that relaxation is
@@ -30846,6 +32639,7 @@ s_arm_arch (int ignored ATTRIBUTE_UNUSED)
      if (streq (opt->name, name))
        {
         selected_arch = opt->value;
      if (streq (opt->name, name))
        {
         selected_arch = opt->value;
+       selected_ctx_ext_table = opt->ext_table;
         selected_ext = arm_arch_none;
         selected_cpu = selected_arch;
         strcpy (selected_cpu_name, opt->name);
         selected_ext = arm_arch_none;
         selected_cpu = selected_arch;
         strcpy (selected_cpu_name, opt->name);
@@ -30913,6 +32707,35 @@ s_arm_arch_extension (int ignored ATTRIBUTE_UNUSED)
        name += 2;
      }
  
        name += 2;
      }
  
+  /* Check the context specific extension table */
+  if (selected_ctx_ext_table)
+    {
+      const struct arm_ext_table * ext_opt;
+      for (ext_opt = selected_ctx_ext_table; ext_opt->name != NULL; ext_opt++)
+        {
+          if (streq (ext_opt->name, name))
+           {
+             if (adding_value)
+               {
+                 if (ARM_FEATURE_ZERO (ext_opt->merge))
+                   /* TODO: Option not supported.  When we remove the
+                   legacy table this case should error out.  */
+                   continue;
+                 ARM_MERGE_FEATURE_SETS (selected_ext, selected_ext,
+                                         ext_opt->merge);
+               }
+             else
+               ARM_CLEAR_FEATURE (selected_ext, selected_ext, ext_opt->clear);
+
+             ARM_MERGE_FEATURE_SETS (selected_cpu, selected_arch, selected_ext);
+             ARM_MERGE_FEATURE_SETS (cpu_variant, selected_cpu, selected_fpu);
+             *input_line_pointer = saved_char;
+             demand_empty_rest_of_line ();
+             return;
+           }
+       }
+    }
+
    for (opt = arm_extensions; opt->name != NULL; opt++)
      if (streq (opt->name, name))
        {
    for (opt = arm_extensions; opt->name != NULL; opt++)
      if (streq (opt->name, name))
        {
@@ -30977,6 +32800,7 @@ s_arm_fpu (int ignored ATTRIBUTE_UNUSED)
      if (streq (opt->name, name))
        {
         selected_fpu = opt->value;
      if (streq (opt->name, name))
        {
         selected_fpu = opt->value;
+       ARM_CLEAR_FEATURE (selected_cpu, selected_cpu, fpu_any);
  #ifndef CPU_DEFAULT
         if (no_cpu_selected ())
           ARM_MERGE_FEATURE_SETS (cpu_variant, arm_arch_any, selected_fpu);
  #ifndef CPU_DEFAULT
         if (no_cpu_selected ())
           ARM_MERGE_FEATURE_SETS (cpu_variant, arm_arch_any, selected_fpu);