[ARM] Add feature check for ARMv8.1 AdvSIMD instructions.

[deliverable/binutils-gdb.git] / gas / config / tc-arm.c
diff --git a/gas/config/tc-arm.c b/gas/config/tc-arm.c

index 80230e45d82c183d4a8971eea69509ebf5f714e1..e725f41abb4404cc37f0fbc186f6690510891c64 100644 (file)
--- a/gas/config/tc-arm.c
+++ b/gas/config/tc-arm.c
@@ -271,7 +271,7 @@ static const arm_feature_set fpu_crypto_ext_armv8 =
  static const arm_feature_set crc_ext_armv8 =
    ARM_FEATURE_COPROC (CRC_EXT_ARMV8);
  static const arm_feature_set fpu_neon_ext_v8_1 =
-  ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8 | FPU_NEON_EXT_RDMA);
+  ARM_FEATURE_COPROC (FPU_NEON_EXT_RDMA);
  
  static int mfloat_abi_opt = -1;
  /* Record user cpu selection for object attributes.  */
@@ -516,7 +516,7 @@ struct asm_barrier_opt
  
  struct reloc_entry
  {
-  char *                    name;
+  const char *                    name;
    bfd_reloc_code_real_type  reloc;
  };
  
@@ -784,7 +784,8 @@ struct asm_opcode
         _("cannot use register index with PC-relative addressing")
  #define BAD_PC_WRITEBACK \
         _("cannot use writeback with PC-relative addressing")
-#define BAD_RANGE     _("branch out of range")
+#define BAD_RANGE      _("branch out of range")
+#define BAD_FP16       _("selected processor does not support fp16 instruction")
  #define UNPRED_REG(R)  _("using " R " results in unpredictable behaviour")
  
  static struct hash_control * arm_ops_hsh;
@@ -6084,6 +6085,16 @@ parse_cond (char **str)
    return c->value;
  }
  
+/* Record a use of the given feature.  */
+static void
+record_feature_use (const arm_feature_set *feature)
+{
+  if (thumb_mode)
+    ARM_MERGE_FEATURE_SETS (thumb_arch_used, thumb_arch_used, *feature);
+  else
+    ARM_MERGE_FEATURE_SETS (arm_arch_used, arm_arch_used, *feature);
+}
+
  /* If the given feature available in the selected CPU, mark it as used.
     Returns TRUE iff feature is available.  */
  static bfd_boolean
@@ -6095,10 +6106,7 @@ mark_feature_used (const arm_feature_set *feature)
  
    /* Add the appropriate architecture feature for the barrier option used.
       */
-  if (thumb_mode)
-    ARM_MERGE_FEATURE_SETS (thumb_arch_used, thumb_arch_used, *feature);
-  else
-    ARM_MERGE_FEATURE_SETS (arm_arch_used, arm_arch_used, *feature);
+  record_feature_use (feature);
  
    return TRUE;
  }
@@ -7263,6 +7271,26 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
  
  #define rotate_left(v, n) (v << (n & 31) | v >> ((32 - n) & 31))
  
+/* If the current inst is scalar ARMv8.2 fp16 instruction, do special encoding.
+
+   The only binary encoding difference is the Coprocessor number.  Coprocessor
+   9 is used for half-precision calculations or conversions.  The format of the
+   instruction is the same as the equivalent Coprocessor 10 instuction that
+   exists for Single-Precision operation.  */
+
+static void
+do_scalar_fp16_v82_encode (void)
+{
+  if (inst.cond != COND_ALWAYS)
+    as_warn (_("ARMv8.2 scalar fp16 instruction cannot be conditional,"
+              " the behaviour is UNPREDICTABLE"));
+  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16),
+             _(BAD_FP16));
+
+  inst.instruction = (inst.instruction & 0xfffff0ff) | 0x900;
+  mark_feature_used (&arm_ext_fp16);
+}
+
  /* If VAL can be encoded in the immediate field of an ARM instruction,
     return the encoded form.  Otherwise, return FAIL.  */
  
@@ -13252,7 +13280,19 @@ NEON_ENC_TAB
    X(2, (S, R), SINGLE),                        \
    X(2, (R, S), SINGLE),                        \
    X(2, (F, R), SINGLE),                        \
-  X(2, (R, F), SINGLE)
+  X(2, (R, F), SINGLE),                        \
+/* Half float shape supported so far.  */\
+  X (2, (H, D), MIXED),                        \
+  X (2, (D, H), MIXED),                        \
+  X (2, (H, F), MIXED),                        \
+  X (2, (F, H), MIXED),                        \
+  X (2, (H, H), HALF),                 \
+  X (2, (H, R), HALF),                 \
+  X (2, (R, H), HALF),                 \
+  X (2, (H, I), HALF),                 \
+  X (3, (H, H, H), HALF),              \
+  X (3, (H, F, I), MIXED),             \
+  X (3, (F, H, I), MIXED)
  
  #define S2(A,B)                NS_##A##B
  #define S3(A,B,C)      NS_##A##B##C
@@ -13273,6 +13313,7 @@ enum neon_shape
  
  enum neon_shape_class
  {
+  SC_HALF,
    SC_SINGLE,
    SC_DOUBLE,
    SC_QUAD,
@@ -13290,6 +13331,7 @@ static enum neon_shape_class neon_shape_class[] =
  
  enum neon_shape_el
  {
+  SE_H,
    SE_F,
    SE_D,
    SE_Q,
@@ -13302,6 +13344,7 @@ enum neon_shape_el
  /* Register widths of above.  */
  static unsigned neon_shape_el_size[] =
  {
+  16,
    32,
    64,
    128,
@@ -13386,6 +13429,7 @@ enum neon_type_mask
  #define N_SUF_32   (N_SU_32 | N_F32)
  #define N_I_ALL    (N_I8 | N_I16 | N_I32 | N_I64)
  #define N_IF_32    (N_I8 | N_I16 | N_I32 | N_F32)
+#define N_F_ALL    (N_F16 | N_F32 | N_F64)
  
  /* Pass this as the first type argument to neon_check_type to ignore types
     altogether.  */
@@ -13427,11 +13471,56 @@ neon_select_shape (enum neon_shape shape, ...)
  
           switch (neon_shape_tab[shape].el[j])
             {
+             /* If a  .f16,  .16,  .u16,  .s16 type specifier is given over
+                a VFP single precision register operand, it's essentially
+                means only half of the register is used.
+
+                If the type specifier is given after the mnemonics, the
+                information is stored in inst.vectype.  If the type specifier
+                is given after register operand, the information is stored
+                in inst.operands[].vectype.
+
+                When there is only one type specifier, and all the register
+                operands are the same type of hardware register, the type
+                specifier applies to all register operands.
+
+                If no type specifier is given, the shape is inferred from
+                operand information.
+
+                for example:
+                vadd.f16 s0, s1, s2:           NS_HHH
+                vabs.f16 s0, s1:               NS_HH
+                vmov.f16 s0, r1:               NS_HR
+                vmov.f16 r0, s1:               NS_RH
+                vcvt.f16 r0, s1:               NS_RH
+                vcvt.f16.s32   s2, s2, #29:    NS_HFI
+                vcvt.f16.s32   s2, s2:         NS_HF
+             */
+           case SE_H:
+             if (!(inst.operands[j].isreg
+                   && inst.operands[j].isvec
+                   && inst.operands[j].issingle
+                   && !inst.operands[j].isquad
+                   && ((inst.vectype.elems == 1
+                        && inst.vectype.el[0].size == 16)
+                       || (inst.vectype.elems > 1
+                           && inst.vectype.el[j].size == 16)
+                       || (inst.vectype.elems == 0
+                           && inst.operands[j].vectype.type != NT_invtype
+                           && inst.operands[j].vectype.size == 16))))
+               matches = 0;
+             break;
+
             case SE_F:
               if (!(inst.operands[j].isreg
                     && inst.operands[j].isvec
                     && inst.operands[j].issingle
-                   && !inst.operands[j].isquad))
+                   && !inst.operands[j].isquad
+                   && ((inst.vectype.elems == 1 && inst.vectype.el[0].size == 32)
+                       || (inst.vectype.elems > 1 && inst.vectype.el[j].size == 32)
+                       || (inst.vectype.elems == 0
+                           && (inst.operands[j].vectype.size == 32
+                               || inst.operands[j].vectype.type == NT_invtype)))))
                 matches = 0;
               break;
  
@@ -13647,7 +13736,7 @@ el_type_of_type_chk (enum neon_el_type *type, unsigned *size,
      *type = NT_untyped;
    else if ((mask & (N_P8 | N_P16 | N_P64)) != 0)
      *type = NT_poly;
-  else if ((mask & (N_F16 | N_F32 | N_F64)) != 0)
+  else if ((mask & (N_F_ALL)) != 0)
      *type = NT_float;
    else
      return FAIL;
@@ -13835,6 +13924,18 @@ neon_check_type (unsigned els, enum neon_shape ns, ...)
                   else
                     match = g_size;
  
+                 /* FP16 will use a single precision register.  */
+                 if (regwidth == 32 && match == 16)
+                   {
+                     if (ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16))
+                       match = regwidth;
+                     else
+                       {
+                         inst.error = _(BAD_FP16);
+                         return badtype;
+                       }
+                   }
+
                   if (regwidth != match)
                     {
                       first_error (_("operand size must match register width"));
@@ -13926,12 +14027,16 @@ do_vfp_nsyn_add_sub (enum neon_shape rs)
  {
    int is_add = (inst.instruction & 0x0fffffff) == N_MNEM_vadd;
  
-  if (rs == NS_FFF)
+  if (rs == NS_FFF || rs == NS_HHH)
      {
        if (is_add)
         do_vfp_nsyn_opcode ("fadds");
        else
         do_vfp_nsyn_opcode ("fsubs");
+
+      /* ARMv8.2 fp16 instruction.  */
+      if (rs == NS_HHH)
+       do_scalar_fp16_v82_encode ();
      }
    else
      {
@@ -13954,15 +14059,14 @@ try_vfp_nsyn (int args, void (*pfn) (enum neon_shape))
    switch (args)
      {
      case 2:
-      rs = neon_select_shape (NS_FF, NS_DD, NS_NULL);
-      et = neon_check_type (2, rs,
-       N_EQK | N_VFP, N_F32 | N_F64 | N_KEY | N_VFP);
+      rs = neon_select_shape (NS_HH, NS_FF, NS_DD, NS_NULL);
+      et = neon_check_type (2, rs, N_EQK | N_VFP, N_F_ALL | N_KEY | N_VFP);
        break;
  
      case 3:
-      rs = neon_select_shape (NS_FFF, NS_DDD, NS_NULL);
-      et = neon_check_type (3, rs,
-       N_EQK | N_VFP, N_EQK | N_VFP, N_F32 | N_F64 | N_KEY | N_VFP);
+      rs = neon_select_shape (NS_HHH, NS_FFF, NS_DDD, NS_NULL);
+      et = neon_check_type (3, rs, N_EQK | N_VFP, N_EQK | N_VFP,
+                           N_F_ALL | N_KEY | N_VFP);
        break;
  
      default:
@@ -13984,12 +14088,16 @@ do_vfp_nsyn_mla_mls (enum neon_shape rs)
  {
    int is_mla = (inst.instruction & 0x0fffffff) == N_MNEM_vmla;
  
-  if (rs == NS_FFF)
+  if (rs == NS_FFF || rs == NS_HHH)
      {
        if (is_mla)
         do_vfp_nsyn_opcode ("fmacs");
        else
         do_vfp_nsyn_opcode ("fnmacs");
+
+      /* ARMv8.2 fp16 instruction.  */
+      if (rs == NS_HHH)
+       do_scalar_fp16_v82_encode ();
      }
    else
      {
@@ -14005,12 +14113,16 @@ do_vfp_nsyn_fma_fms (enum neon_shape rs)
  {
    int is_fma = (inst.instruction & 0x0fffffff) == N_MNEM_vfma;
  
-  if (rs == NS_FFF)
+  if (rs == NS_FFF || rs == NS_HHH)
      {
        if (is_fma)
         do_vfp_nsyn_opcode ("ffmas");
        else
         do_vfp_nsyn_opcode ("ffnmas");
+
+      /* ARMv8.2 fp16 instruction.  */
+      if (rs == NS_HHH)
+       do_scalar_fp16_v82_encode ();
      }
    else
      {
@@ -14024,8 +14136,14 @@ do_vfp_nsyn_fma_fms (enum neon_shape rs)
  static void
  do_vfp_nsyn_mul (enum neon_shape rs)
  {
-  if (rs == NS_FFF)
-    do_vfp_nsyn_opcode ("fmuls");
+  if (rs == NS_FFF || rs == NS_HHH)
+    {
+      do_vfp_nsyn_opcode ("fmuls");
+
+      /* ARMv8.2 fp16 instruction.  */
+      if (rs == NS_HHH)
+       do_scalar_fp16_v82_encode ();
+    }
    else
      do_vfp_nsyn_opcode ("fmuld");
  }
@@ -14034,14 +14152,18 @@ static void
  do_vfp_nsyn_abs_neg (enum neon_shape rs)
  {
    int is_neg = (inst.instruction & 0x80) != 0;
-  neon_check_type (2, rs, N_EQK | N_VFP, N_F32 | N_F64 | N_VFP | N_KEY);
+  neon_check_type (2, rs, N_EQK | N_VFP, N_F_ALL | N_VFP | N_KEY);
  
-  if (rs == NS_FF)
+  if (rs == NS_FF || rs == NS_HH)
      {
        if (is_neg)
         do_vfp_nsyn_opcode ("fnegs");
        else
         do_vfp_nsyn_opcode ("fabss");
+
+      /* ARMv8.2 fp16 instruction.  */
+      if (rs == NS_HH)
+       do_scalar_fp16_v82_encode ();
      }
    else
      {
@@ -14078,11 +14200,17 @@ do_vfp_nsyn_ldm_stm (int is_dbmode)
  static void
  do_vfp_nsyn_sqrt (void)
  {
-  enum neon_shape rs = neon_select_shape (NS_FF, NS_DD, NS_NULL);
-  neon_check_type (2, rs, N_EQK | N_VFP, N_F32 | N_F64 | N_KEY | N_VFP);
+  enum neon_shape rs = neon_select_shape (NS_HH, NS_FF, NS_DD, NS_NULL);
+  neon_check_type (2, rs, N_EQK | N_VFP, N_F_ALL | N_KEY | N_VFP);
+
+  if (rs == NS_FF || rs == NS_HH)
+    {
+      do_vfp_nsyn_opcode ("fsqrts");
  
-  if (rs == NS_FF)
-    do_vfp_nsyn_opcode ("fsqrts");
+      /* ARMv8.2 fp16 instruction.  */
+      if (rs == NS_HH)
+       do_scalar_fp16_v82_encode ();
+    }
    else
      do_vfp_nsyn_opcode ("fsqrtd");
  }
@@ -14090,12 +14218,18 @@ do_vfp_nsyn_sqrt (void)
  static void
  do_vfp_nsyn_div (void)
  {
-  enum neon_shape rs = neon_select_shape (NS_FFF, NS_DDD, NS_NULL);
+  enum neon_shape rs = neon_select_shape (NS_HHH, NS_FFF, NS_DDD, NS_NULL);
    neon_check_type (3, rs, N_EQK | N_VFP, N_EQK | N_VFP,
-    N_F32 | N_F64 | N_KEY | N_VFP);
+                  N_F_ALL | N_KEY | N_VFP);
  
-  if (rs == NS_FFF)
-    do_vfp_nsyn_opcode ("fdivs");
+  if (rs == NS_FFF || rs == NS_HHH)
+    {
+      do_vfp_nsyn_opcode ("fdivs");
+
+      /* ARMv8.2 fp16 instruction.  */
+      if (rs == NS_HHH)
+       do_scalar_fp16_v82_encode ();
+    }
    else
      do_vfp_nsyn_opcode ("fdivd");
  }
@@ -14103,14 +14237,18 @@ do_vfp_nsyn_div (void)
  static void
  do_vfp_nsyn_nmul (void)
  {
-  enum neon_shape rs = neon_select_shape (NS_FFF, NS_DDD, NS_NULL);
+  enum neon_shape rs = neon_select_shape (NS_HHH, NS_FFF, NS_DDD, NS_NULL);
    neon_check_type (3, rs, N_EQK | N_VFP, N_EQK | N_VFP,
-    N_F32 | N_F64 | N_KEY | N_VFP);
+                  N_F_ALL | N_KEY | N_VFP);
  
-  if (rs == NS_FFF)
+  if (rs == NS_FFF || rs == NS_HHH)
      {
        NEON_ENCODE (SINGLE, inst);
        do_vfp_sp_dyadic ();
+
+      /* ARMv8.2 fp16 instruction.  */
+      if (rs == NS_HHH)
+       do_scalar_fp16_v82_encode ();
      }
    else
      {
@@ -14118,17 +14256,19 @@ do_vfp_nsyn_nmul (void)
        do_vfp_dp_rd_rn_rm ();
      }
    do_vfp_cond_or_thumb ();
+
  }
  
  static void
  do_vfp_nsyn_cmp (void)
  {
+  enum neon_shape rs;
    if (inst.operands[1].isreg)
      {
-      enum neon_shape rs = neon_select_shape (NS_FF, NS_DD, NS_NULL);
-      neon_check_type (2, rs, N_EQK | N_VFP, N_F32 | N_F64 | N_KEY | N_VFP);
+      rs = neon_select_shape (NS_HH, NS_FF, NS_DD, NS_NULL);
+      neon_check_type (2, rs, N_EQK | N_VFP, N_F_ALL | N_KEY | N_VFP);
  
-      if (rs == NS_FF)
+      if (rs == NS_FF || rs == NS_HH)
         {
           NEON_ENCODE (SINGLE, inst);
           do_vfp_sp_monadic ();
@@ -14141,8 +14281,8 @@ do_vfp_nsyn_cmp (void)
      }
    else
      {
-      enum neon_shape rs = neon_select_shape (NS_FI, NS_DI, NS_NULL);
-      neon_check_type (2, rs, N_F32 | N_F64 | N_KEY | N_VFP, N_EQK);
+      rs = neon_select_shape (NS_HI, NS_FI, NS_DI, NS_NULL);
+      neon_check_type (2, rs, N_F_ALL | N_KEY | N_VFP, N_EQK);
  
        switch (inst.instruction & 0x0fffffff)
         {
@@ -14156,7 +14296,7 @@ do_vfp_nsyn_cmp (void)
           abort ();
         }
  
-      if (rs == NS_FI)
+      if (rs == NS_FI || rs == NS_HI)
         {
           NEON_ENCODE (SINGLE, inst);
           do_vfp_sp_compare_z ();
@@ -14168,6 +14308,10 @@ do_vfp_nsyn_cmp (void)
         }
      }
    do_vfp_cond_or_thumb ();
+
+  /* ARMv8.2 fp16 instruction.  */
+  if (rs == NS_HI || rs == NS_HH)
+    do_scalar_fp16_v82_encode ();
  }
  
  static void
@@ -14885,6 +15029,38 @@ do_neon_qdmulh (void)
      }
  }
  
+static void
+do_neon_qrdmlah (void)
+{
+  /* Check we're on the correct architecture.  */
+  if (!mark_feature_used (&fpu_neon_ext_armv8))
+    inst.error =
+      _("instruction form not available on this architecture.");
+  else if (!mark_feature_used (&fpu_neon_ext_v8_1))
+    {
+      as_warn (_("this instruction implies use of ARMv8.1 AdvSIMD."));
+      record_feature_use (&fpu_neon_ext_v8_1);
+    }
+
+  if (inst.operands[2].isscalar)
+    {
+      enum neon_shape rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
+      struct neon_type_el et = neon_check_type (3, rs,
+       N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
+      NEON_ENCODE (SCALAR, inst);
+      neon_mul_mac (et, neon_quad (rs));
+    }
+  else
+    {
+      enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+      struct neon_type_el et = neon_check_type (3, rs,
+       N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
+      NEON_ENCODE (INTEGER, inst);
+      /* The U bit (rounding) comes from bit mask.  */
+      neon_three_same (neon_quad (rs), 0, et.size);
+    }
+}
+
  static void
  do_neon_fcmp_absolute (void)
  {
@@ -15133,6 +15309,13 @@ do_neon_shll (void)
    /* Half-precision conversions.  */                                         \
    CVT_VAR (f32_f16, N_F32, N_F16, whole_reg,   NULL,     NULL,     NULL)      \
    CVT_VAR (f16_f32, N_F16, N_F32, whole_reg,   NULL,     NULL,     NULL)      \
+  /* New VCVT instructions introduced by ARMv8.2 fp16 extension.             \
+     Compared with single/double precision variants, only the co-processor    \
+     field is different, so the encoding flow is reused here.  */            \
+  CVT_VAR (f16_s32, N_F16 | N_KEY, N_S32, N_VFP, "fsltos", "fsitos", NULL)    \
+  CVT_VAR (f16_u32, N_F16 | N_KEY, N_U32, N_VFP, "fultos", "fuitos", NULL)    \
+  CVT_VAR (u32_f16, N_U32, N_F16 | N_KEY, N_VFP, "ftouls", "ftouis", "ftouizs")\
+  CVT_VAR (s32_f16, N_S32, N_F16 | N_KEY, N_VFP, "ftosls", "ftosis", "ftosizs")\
    /* VFP instructions.  */                                                   \
    CVT_VAR (f32_f64, N_F32, N_F64, N_VFP,       NULL,     "fcvtsd", NULL)      \
    CVT_VAR (f64_f32, N_F64, N_F32, N_VFP,       NULL,     "fcvtds", NULL)      \
@@ -15207,7 +15390,8 @@ do_vfp_nsyn_cvt (enum neon_shape rs, enum neon_cvt_flavour flavour)
  {
    const char *opname = 0;
  
-  if (rs == NS_DDI || rs == NS_QQI || rs == NS_FFI)
+  if (rs == NS_DDI || rs == NS_QQI || rs == NS_FFI
+      || rs == NS_FHI || rs == NS_HFI)
      {
        /* Conversions with immediate bitshift.  */
        const char *enc[] =
@@ -15244,12 +15428,19 @@ do_vfp_nsyn_cvt (enum neon_shape rs, enum neon_cvt_flavour flavour)
  
    if (opname)
      do_vfp_nsyn_opcode (opname);
+
+  /* ARMv8.2 fp16 VCVT instruction.  */
+  if (flavour == neon_cvt_flavour_s32_f16
+      || flavour == neon_cvt_flavour_u32_f16
+      || flavour == neon_cvt_flavour_f16_u32
+      || flavour == neon_cvt_flavour_f16_s32)
+    do_scalar_fp16_v82_encode ();
  }
  
  static void
  do_vfp_nsyn_cvtz (void)
  {
-  enum neon_shape rs = neon_select_shape (NS_FF, NS_FD, NS_NULL);
+  enum neon_shape rs = neon_select_shape (NS_FH, NS_FF, NS_FD, NS_NULL);
    enum neon_cvt_flavour flavour = get_neon_cvt_flavour (rs);
    const char *enc[] =
      {
@@ -15277,6 +15468,11 @@ do_vfp_nsyn_cvt_fpv8 (enum neon_cvt_flavour flavour,
      constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8),
                 _(BAD_FPU));
  
+  if (flavour == neon_cvt_flavour_s32_f16
+      || flavour == neon_cvt_flavour_u32_f16)
+    constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16),
+               _(BAD_FP16));
+
    set_it_insn_type (OUTSIDE_IT_INSN);
  
    switch (flavour)
@@ -15289,6 +15485,10 @@ do_vfp_nsyn_cvt_fpv8 (enum neon_cvt_flavour flavour,
        sz = 0;
        op = 1;
        break;
+    case neon_cvt_flavour_s32_f16:
+      sz = 0;
+      op = 1;
+      break;
      case neon_cvt_flavour_u32_f64:
        sz = 1;
        op = 0;
@@ -15297,6 +15497,10 @@ do_vfp_nsyn_cvt_fpv8 (enum neon_cvt_flavour flavour,
        sz = 0;
        op = 0;
        break;
+    case neon_cvt_flavour_u32_f16:
+      sz = 0;
+      op = 0;
+      break;
      default:
        first_error (_("invalid instruction shape"));
        return;
@@ -15315,6 +15519,11 @@ do_vfp_nsyn_cvt_fpv8 (enum neon_cvt_flavour flavour,
    encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd);
    encode_arm_vfp_reg (inst.operands[1].reg, sz == 1 ? VFP_REG_Dm : VFP_REG_Sm);
    inst.instruction |= sz << 8;
+
+  /* ARMv8.2 fp16 VCVT instruction.  */
+  if (flavour == neon_cvt_flavour_s32_f16
+      ||flavour == neon_cvt_flavour_u32_f16)
+    do_scalar_fp16_v82_encode ();
    inst.instruction |= op << 7;
    inst.instruction |= rm << 16;
    inst.instruction |= 0xf0000000;
@@ -15325,7 +15534,9 @@ static void
  do_neon_cvt_1 (enum neon_cvt_mode mode)
  {
    enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_FFI, NS_DD, NS_QQ,
-    NS_FD, NS_DF, NS_FF, NS_QD, NS_DQ, NS_NULL);
+                                         NS_FD, NS_DF, NS_FF, NS_QD, NS_DQ,
+                                         NS_FH, NS_HF, NS_FHI, NS_HFI,
+                                         NS_NULL);
    enum neon_cvt_flavour flavour = get_neon_cvt_flavour (rs);
  
    /* PR11109: Handle round-to-zero for VCVT conversions.  */
@@ -15341,6 +15552,18 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
        return;
      }
  
+  /* ARMv8.2 fp16 VCVT conversions.  */
+  if (mode == neon_cvt_mode_z
+      && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16)
+      && (flavour == neon_cvt_flavour_s32_f16
+         || flavour == neon_cvt_flavour_u32_f16)
+      && (rs == NS_FH))
+    {
+      do_vfp_nsyn_cvtz ();
+      do_scalar_fp16_v82_encode ();
+      return;
+    }
+
    /* VFP rather than Neon conversions.  */
    if (flavour >= neon_cvt_flavour_first_fp)
      {
@@ -15525,7 +15748,8 @@ do_neon_cvttb_2 (bfd_boolean t, bfd_boolean to, bfd_boolean is_double)
  static void
  do_neon_cvttb_1 (bfd_boolean t)
  {
-  enum neon_shape rs = neon_select_shape (NS_FF, NS_FD, NS_DF, NS_NULL);
+  enum neon_shape rs = neon_select_shape (NS_HF, NS_HD, NS_FH, NS_FF, NS_FD,
+                                         NS_DF, NS_DH, NS_NULL);
  
    if (rs == NS_NULL)
      return;
@@ -15905,8 +16129,9 @@ static void
  do_neon_mov (void)
  {
    enum neon_shape rs = neon_select_shape (NS_RRFF, NS_FFRR, NS_DRR, NS_RRD,
-    NS_QQ, NS_DD, NS_QI, NS_DI, NS_SR, NS_RS, NS_FF, NS_FI, NS_RF, NS_FR,
-    NS_NULL);
+                                         NS_QQ, NS_DD, NS_QI, NS_DI, NS_SR,
+                                         NS_RS, NS_FF, NS_FI, NS_RF, NS_FR,
+                                         NS_HR, NS_RH, NS_HI, NS_NULL);
    struct neon_type_el et;
    const char *ldconst = 0;
  
@@ -16084,6 +16309,7 @@ do_neon_mov (void)
        do_vfp_nsyn_opcode ("fcpys");
        break;
  
+    case NS_HI:
      case NS_FI:  /* case 10 (fconsts).  */
        ldconst = "fconsts";
        encode_fconstd:
@@ -16091,17 +16317,29 @@ do_neon_mov (void)
         {
           inst.operands[1].imm = neon_qfloat_bits (inst.operands[1].imm);
           do_vfp_nsyn_opcode (ldconst);
+
+         /* ARMv8.2 fp16 vmov.f16 instruction.  */
+         if (rs == NS_HI)
+           do_scalar_fp16_v82_encode ();
         }
        else
         first_error (_("immediate out of range"));
        break;
  
+    case NS_RH:
      case NS_RF:  /* case 12 (fmrs).  */
        do_vfp_nsyn_opcode ("fmrs");
+      /* ARMv8.2 fp16 vmov.f16 instruction.  */
+      if (rs == NS_RH)
+       do_scalar_fp16_v82_encode ();
        break;
  
+    case NS_HR:
      case NS_FR:  /* case 13 (fmsr).  */
        do_vfp_nsyn_opcode ("fmsr");
+      /* ARMv8.2 fp16 vmov.f16 instruction.  */
+      if (rs == NS_HR)
+       do_scalar_fp16_v82_encode ();
        break;
  
      /* The encoders for the fmrrs and fmsrr instructions expect three operands
@@ -16157,6 +16395,21 @@ do_neon_rshift_round_imm (void)
                   et.size - imm);
  }
  
+static void
+do_neon_movhf (void)
+{
+  enum neon_shape rs = neon_select_shape (NS_HH, NS_NULL);
+  constraint (rs != NS_HH, _("invalid suffix"));
+
+  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8),
+             _(BAD_FPU));
+
+  do_vfp_sp_monadic ();
+
+  inst.is_neon = 1;
+  inst.instruction |= 0xf0000000;
+}
+
  static void
  do_neon_movl (void)
  {
@@ -16333,6 +16586,10 @@ do_neon_ldr_str (void)
         do_vfp_nsyn_opcode ("flds");
        else
         do_vfp_nsyn_opcode ("fsts");
+
+      /* ARMv8.2 vldr.16/vstr.16 instruction.  */
+      if (inst.vectype.el[0].size == 16)
+       do_scalar_fp16_v82_encode ();
      }
    else
      {
@@ -16690,8 +16947,14 @@ do_vfp_nsyn_fpv8 (enum neon_shape rs)
  
    NEON_ENCODE (FPV8, inst);
  
-  if (rs == NS_FFF)
-    do_vfp_sp_dyadic ();
+  if (rs == NS_FFF || rs == NS_HHH)
+    {
+      do_vfp_sp_dyadic ();
+
+      /* ARMv8.2 fp16 instruction.  */
+      if (rs == NS_HHH)
+       do_scalar_fp16_v82_encode ();
+    }
    else
      do_vfp_dp_rd_rn_rm ();
  
@@ -16727,7 +16990,7 @@ do_vmaxnm (void)
  static void
  do_vrint_1 (enum neon_cvt_mode mode)
  {
-  enum neon_shape rs = neon_select_shape (NS_FF, NS_DD, NS_QQ, NS_NULL);
+  enum neon_shape rs = neon_select_shape (NS_HH, NS_FF, NS_DD, NS_QQ, NS_NULL);
    struct neon_type_el et;
  
    if (rs == NS_NULL)
@@ -16739,7 +17002,8 @@ do_vrint_1 (enum neon_cvt_mode mode)
      constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8),
                 _(BAD_FPU));
  
-  et = neon_check_type (2, rs, N_EQK | N_VFP, N_F32 | N_F64 | N_KEY | N_VFP);
+  et = neon_check_type (2, rs, N_EQK | N_VFP, N_F_ALL | N_KEY
+                       | N_VFP);
    if (et.type != NT_invtype)
      {
        /* VFP encodings.  */
@@ -16748,7 +17012,7 @@ do_vrint_1 (enum neon_cvt_mode mode)
         set_it_insn_type (OUTSIDE_IT_INSN);
  
        NEON_ENCODE (FPV8, inst);
-      if (rs == NS_FF)
+      if (rs == NS_FF || rs == NS_HH)
         do_vfp_sp_monadic ();
        else
         do_vfp_dp_rd_rm ();
@@ -16767,6 +17031,10 @@ do_vrint_1 (enum neon_cvt_mode mode)
  
        inst.instruction |= (rs == NS_DD) << 8;
        do_vfp_cond_or_thumb ();
+
+      /* ARMv8.2 fp16 vrint instruction.  */
+      if (rs == NS_HH)
+      do_scalar_fp16_v82_encode ();
      }
    else
      {
@@ -19971,6 +20239,15 @@ static const struct asm_opcode insns[] =
   NCE(vmov,      0,       1, (VMOV), neon_mov),
   NCE(vmovq,     0,       1, (VMOV), neon_mov),
  
+#undef  ARM_VARIANT
+#define ARM_VARIANT    & arm_ext_fp16
+#undef  THUMB_VARIANT
+#define THUMB_VARIANT  & arm_ext_fp16
+ /* New instructions added from v8.2, allowing the extraction and insertion of
+    the upper 16 bits of a 32-bit vector register.  */
+ NCE (vmovx,     eb00a40,       2, (RVS, RVS), neon_movhf),
+ NCE (vins,      eb00ac0,       2, (RVS, RVS), neon_movhf),
+
  #undef  THUMB_VARIANT
  #define THUMB_VARIANT  & fpu_neon_ext_v1
  #undef  ARM_VARIANT
@@ -20073,10 +20350,10 @@ static const struct asm_opcode insns[] =
   NUF(vrsqrts,   0200f10,  3, (RNDQ, oRNDQ, RNDQ), neon_step),
   NUF(vrsqrtsq,  0200f10,  3, (RNQ,  oRNQ,  RNQ),  neon_step),
   /* ARM v8.1 extension.  */
- nUF(vqrdmlah,  _vqrdmlah, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qdmulh),
- nUF(vqrdmlahq, _vqrdmlah, 3, (RNQ,  oRNQ,  RNDQ_RNSC), neon_qdmulh),
- nUF(vqrdmlsh,  _vqrdmlsh, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qdmulh),
- nUF(vqrdmlshq, _vqrdmlsh, 3, (RNQ,  oRNQ,  RNDQ_RNSC), neon_qdmulh),
+ nUF (vqrdmlah,  _vqrdmlah, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qrdmlah),
+ nUF (vqrdmlahq, _vqrdmlah, 3, (RNQ,  oRNQ,  RNDQ_RNSC), neon_qrdmlah),
+ nUF (vqrdmlsh,  _vqrdmlsh, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qrdmlah),
+ nUF (vqrdmlshq, _vqrdmlsh, 3, (RNQ,  oRNQ,  RNDQ_RNSC), neon_qrdmlah),
  
    /* Two address, int/float. Types S8 S16 S32 F32.  */
   NUF(vabsq,     1b10300, 2, (RNQ,  RNQ),      neon_abs_neg),
@@ -23091,7 +23368,20 @@ md_apply_fix (fixS *   fixP,
  
      case BFD_RELOC_ARM_CP_OFF_IMM:
      case BFD_RELOC_ARM_T32_CP_OFF_IMM:
-      if (value < -1023 || value > 1023 || (value & 3))
+      if (fixP->fx_r_type == BFD_RELOC_ARM_CP_OFF_IMM)
+       newval = md_chars_to_number (buf, INSN_SIZE);
+      else
+       newval = get_thumb32_insn (buf);
+      if ((newval & 0x0f200f00) == 0x0d000900)
+       {
+         /* This is a fp16 vstr/vldr.  The immediate offset in the mnemonic
+            has permitted values that are multiples of 2, in the range 0
+            to 510.  */
+         if (value < -510 || value > 510 || (value & 1))
+           as_bad_where (fixP->fx_file, fixP->fx_line,
+                         _("co-processor offset out of range"));
+       }
+      else if (value < -1023 || value > 1023 || (value & 3))
         as_bad_where (fixP->fx_file, fixP->fx_line,
                       _("co-processor offset out of range"));
      cp_off_common:
@@ -23108,6 +23398,17 @@ md_apply_fix (fixS *   fixP,
        else
         {
           newval &= 0xff7fff00;
+         if ((newval & 0x0f200f00) == 0x0d000900)
+           {
+             /* This is a fp16 vstr/vldr.
+
+                It requires the immediate offset in the instruction is shifted
+                left by 1 to be a half-word offset.
+
+                Here, left shift by 1 first, and later right shift by 2
+                should get the right offset.  */
+             value <<= 1;
+           }
           newval |= (value >> 2) | (sign ? INDEX_UP : 0);
         }
        if (fixP->fx_r_type == BFD_RELOC_ARM_CP_OFF_IMM
@@ -23833,7 +24134,7 @@ tc_gen_reloc (asection *section, fixS *fixp)
  
      default:
        {
-       char * type;
+       const char * type;
  
         switch (fixp->fx_r_type)
           {
@@ -24644,11 +24945,11 @@ size_t md_longopts_size = sizeof (md_longopts);
  
  struct arm_option_table
  {
-  char *option;                /* Option name to match.  */
-  char *help;          /* Help information.  */
+  const char *option;          /* Option name to match.  */
+  const char *help;            /* Help information.  */
    int  *var;           /* Variable to change.  */
    int  value;          /* What to change it to.  */
-  char *deprecated;    /* If non-null, print this message.  */
+  const char *deprecated;      /* If non-null, print this message.  */
  };
  
  struct arm_option_table arm_opts[] =
@@ -24681,10 +24982,10 @@ struct arm_option_table arm_opts[] =
  
  struct arm_legacy_option_table
  {
-  char *option;                                /* Option name to match.  */
+  const char *option;                          /* Option name to match.  */
    const arm_feature_set        **var;          /* Variable to change.  */
    const arm_feature_set        value;          /* What to change it to.  */
-  char *deprecated;                    /* If non-null, print this message.  */
+  const char *deprecated;                      /* If non-null, print this message.  */
  };
  
  const struct arm_legacy_option_table arm_legacy_opts[] =
@@ -24802,7 +25103,7 @@ const struct arm_legacy_option_table arm_legacy_opts[] =
  
  struct arm_cpu_option_table
  {
-  char *name;
+  const char *name;
    size_t name_len;
    const arm_feature_set        value;
    /* For some CPUs we assume an FPU unless the user explicitly sets
@@ -24983,7 +25284,7 @@ static const struct arm_cpu_option_table arm_cpus[] =
  
  struct arm_arch_option_table
  {
-  char *name;
+  const char *name;
    size_t name_len;
    const arm_feature_set        value;
    const arm_feature_set        default_fpu;
@@ -25054,7 +25355,7 @@ static const struct arm_arch_option_table arm_archs[] =
  /* ISA extensions in the co-processor and main instruction set space.  */
  struct arm_option_extension_value_table
  {
-  char *name;
+  const char *name;
    size_t name_len;
    const arm_feature_set merge_value;
    const arm_feature_set clear_value;
@@ -25088,25 +25389,25 @@ static const struct arm_option_extension_value_table arm_extensions[] =
    ARM_EXT_OPT ("mp",   ARM_FEATURE_CORE_LOW (ARM_EXT_MP),
                         ARM_FEATURE_CORE_LOW (ARM_EXT_MP),
                                    ARM_FEATURE_CORE_LOW (ARM_EXT_V7A | ARM_EXT_V7R)),
-  ARM_EXT_OPT ("simd",   FPU_ARCH_NEON_VFP_ARMV8,
-                       ARM_FEATURE_COPROC (FPU_NEON_ARMV8),
-                                  ARM_FEATURE_CORE_LOW (ARM_EXT_V8)),
    ARM_EXT_OPT ("os",   ARM_FEATURE_CORE_LOW (ARM_EXT_OS),
                         ARM_FEATURE_CORE_LOW (ARM_EXT_OS),
                                    ARM_FEATURE_CORE_LOW (ARM_EXT_V6M)),
    ARM_EXT_OPT ("pan",  ARM_FEATURE_CORE_HIGH (ARM_EXT2_PAN),
                         ARM_FEATURE (ARM_EXT_V8, ARM_EXT2_PAN, 0),
                         ARM_FEATURE_CORE_LOW (ARM_EXT_V8)),
+  ARM_EXT_OPT ("rdma",  FPU_ARCH_NEON_VFP_ARMV8_1,
+                       ARM_FEATURE_COPROC (FPU_NEON_ARMV8 | FPU_NEON_EXT_RDMA),
+                       ARM_FEATURE_CORE_LOW (ARM_EXT_V8)),
    ARM_EXT_OPT ("sec",  ARM_FEATURE_CORE_LOW (ARM_EXT_SEC),
                         ARM_FEATURE_CORE_LOW (ARM_EXT_SEC),
                                    ARM_FEATURE_CORE_LOW (ARM_EXT_V6K | ARM_EXT_V7A)),
+  ARM_EXT_OPT ("simd",  FPU_ARCH_NEON_VFP_ARMV8,
+                       ARM_FEATURE_COPROC (FPU_NEON_ARMV8),
+                       ARM_FEATURE_CORE_LOW (ARM_EXT_V8)),
    ARM_EXT_OPT ("virt", ARM_FEATURE_CORE_LOW (ARM_EXT_VIRT | ARM_EXT_ADIV
                                      | ARM_EXT_DIV),
                         ARM_FEATURE_CORE_LOW (ARM_EXT_VIRT),
                                    ARM_FEATURE_CORE_LOW (ARM_EXT_V7A)),
-  ARM_EXT_OPT ("rdma",  FPU_ARCH_NEON_VFP_ARMV8,
-                       ARM_FEATURE_COPROC (FPU_NEON_ARMV8 | FPU_NEON_EXT_RDMA),
-                                  ARM_FEATURE_CORE_LOW (ARM_EXT_V8)),
    ARM_EXT_OPT ("xscale",ARM_FEATURE_COPROC (ARM_CEXT_XSCALE),
                         ARM_FEATURE_COPROC (ARM_CEXT_XSCALE), ARM_ANY),
    { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE, ARM_ARCH_NONE }
@@ -25116,7 +25417,7 @@ static const struct arm_option_extension_value_table arm_extensions[] =
  /* ISA floating-point and Advanced SIMD extensions.  */
  struct arm_option_fpu_value_table
  {
-  char *name;
+  const char *name;
    const arm_feature_set value;
  };
  
@@ -25172,7 +25473,7 @@ static const struct arm_option_fpu_value_table arm_fpus[] =
  
  struct arm_option_value_table
  {
-  char *name;
+  const char *name;
    long value;
  };
  
@@ -25197,10 +25498,10 @@ static const struct arm_option_value_table arm_eabis[] =
  
  struct arm_long_option_table
  {
-  char * option;               /* Substring to match.  */
-  char * help;                 /* Help information.  */
+  const char * option;         /* Substring to match.  */
+  const char * help;                   /* Help information.  */
    int (* func) (char * subopt);        /* Function to decode sub-option.  */
-  char * deprecated;           /* If non-null, print this message.  */
+  const char * deprecated;             /* If non-null, print this message.  */
  };
  
  static bfd_boolean