x86: allow VEX et al encodings in 16-bit (protected) mode

[deliverable/binutils-gdb.git] / gas / config / tc-i386.c
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c

index 2204d00d8c0f21b8a289a5066dab3192ada82a74..4e5518816a4bc06632c243619786c223dc73f1e8 100644 (file)
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -1,5 +1,5 @@
  /* tc-i386.c -- Assemble code for the Intel 80386
-   Copyright (C) 1989-2018 Free Software Foundation, Inc.
+   Copyright (C) 1989-2019 Free Software Foundation, Inc.
  
     This file is part of GAS, the GNU Assembler.
  
@@ -33,6 +33,17 @@
  #include "elf/x86-64.h"
  #include "opcodes/i386-init.h"
  
+#ifdef HAVE_LIMITS_H
+#include <limits.h>
+#else
+#ifdef HAVE_SYS_PARAM_H
+#include <sys/param.h>
+#endif
+#ifndef INT_MAX
+#define INT_MAX (int) (((unsigned) (-1)) >> 1)
+#endif
+#endif
+
  #ifndef REGISTER_WARNINGS
  #define REGISTER_WARNINGS 1
  #endif
@@ -1069,6 +1080,12 @@ static const arch_entry cpu_arch[] =
      CPU_MOVDIRI_FLAGS, 0 },
    { STRING_COMMA_LEN (".movdir64b"), PROCESSOR_UNKNOWN,
      CPU_MOVDIR64B_FLAGS, 0 },
+  { STRING_COMMA_LEN (".avx512_bf16"), PROCESSOR_UNKNOWN,
+    CPU_AVX512_BF16_FLAGS, 0 },
+  { STRING_COMMA_LEN (".avx512_vp2intersect"), PROCESSOR_UNKNOWN,
+    CPU_AVX512_VP2INTERSECT_FLAGS, 0 },
+  { STRING_COMMA_LEN (".enqcmd"), PROCESSOR_UNKNOWN,
+    CPU_ENQCMD_FLAGS, 0 },
  };
  
  static const noarch_entry cpu_noarch[] =
@@ -1108,6 +1125,9 @@ static const noarch_entry cpu_noarch[] =
    { STRING_COMMA_LEN ("noshstk"), CPU_ANY_SHSTK_FLAGS },
    { STRING_COMMA_LEN ("nomovdiri"), CPU_ANY_MOVDIRI_FLAGS },
    { STRING_COMMA_LEN ("nomovdir64b"), CPU_ANY_MOVDIR64B_FLAGS },
+  { STRING_COMMA_LEN ("noavx512_bf16"), CPU_ANY_AVX512_BF16_FLAGS },
+  { STRING_COMMA_LEN ("noavx512_vp2intersect"), CPU_ANY_SHSTK_FLAGS },
+  { STRING_COMMA_LEN ("noenqcmd"), CPU_ANY_ENQCMD_FLAGS },
  };
  
  #ifdef I386COFF
@@ -1285,7 +1305,16 @@ i386_output_nops (char *where, const unsigned char *const *patt,
    /* Place the longer NOP first.  */
    int last;
    int offset;
-  const unsigned char *nops =  patt[max_single_nop_size - 1];
+  const unsigned char *nops;
+
+  if (max_single_nop_size < 1)
+    {
+      as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"),
+               max_single_nop_size);
+      return;
+    }
+
+  nops = patt[max_single_nop_size - 1];
  
    /* Use the smaller one if the requsted one isn't available.  */
    if (nops == NULL)
@@ -1866,8 +1895,6 @@ operand_type_xor (i386_operand_type x, i386_operand_type y)
    return x;
  }
  
-static const i386_operand_type acc32 = OPERAND_TYPE_ACC32;
-static const i386_operand_type acc64 = OPERAND_TYPE_ACC64;
  static const i386_operand_type disp16 = OPERAND_TYPE_DISP16;
  static const i386_operand_type disp32 = OPERAND_TYPE_DISP32;
  static const i386_operand_type disp32s = OPERAND_TYPE_DISP32S;
@@ -2975,7 +3002,7 @@ static void pe (expressionS *);
  static void ps (symbolS *);
  
  static void
-pi (char *line, i386_insn *x)
+pi (const char *line, i386_insn *x)
  {
    unsigned int j;
  
@@ -3076,6 +3103,10 @@ const type_names[] =
    { OPERAND_TYPE_REG16, "r16" },
    { OPERAND_TYPE_REG32, "r32" },
    { OPERAND_TYPE_REG64, "r64" },
+  { OPERAND_TYPE_ACC8, "acc8" },
+  { OPERAND_TYPE_ACC16, "acc16" },
+  { OPERAND_TYPE_ACC32, "acc32" },
+  { OPERAND_TYPE_ACC64, "acc64" },
    { OPERAND_TYPE_IMM8, "i8" },
    { OPERAND_TYPE_IMM8, "i8s" },
    { OPERAND_TYPE_IMM16, "i16" },
@@ -3098,7 +3129,6 @@ const type_names[] =
    { OPERAND_TYPE_FLOATACC, "FAcc" },
    { OPERAND_TYPE_SREG2, "SReg2" },
    { OPERAND_TYPE_SREG3, "SReg3" },
-  { OPERAND_TYPE_ACC, "Acc" },
    { OPERAND_TYPE_JUMPABSOLUTE, "Jump Absolute" },
    { OPERAND_TYPE_REGMMX, "rMMX" },
    { OPERAND_TYPE_REGXMM, "rXMM" },
@@ -3117,7 +3147,7 @@ pt (i386_operand_type t)
    for (j = 0; j < ARRAY_SIZE (type_names); j++)
      {
        a = operand_type_and (t, type_names[j].mask);
-      if (!operand_type_all_zero (&a))
+      if (operand_type_equal (&a, &type_names[j].mask))
         fprintf (stdout, "%s, ",  type_names[j].name);
      }
    fflush (stdout);
@@ -3453,7 +3483,7 @@ build_vex_prefix (const insn_template *t)
    else if (i.tm.opcode_modifier.vexw)
      w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
    else
-    w = (i.rex & REX_W) ? 1 : 0;
+    w = (flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1) ? 1 : 0;
  
    /* Use 2-byte VEX prefix if possible.  */
    if (w == 0
@@ -3646,7 +3676,7 @@ build_evex_prefix (void)
    else if (i.tm.opcode_modifier.vexw)
      w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
    else
-    w = (i.rex & REX_W) ? 1 : 0;
+    w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;
  
    /* Encode the U bit.  */
    implied_prefix |= 0x4;
@@ -3919,7 +3949,10 @@ optimize_encoding (void)
                                 && i.tm.extension_opcode == 0x4)
                             || ((i.tm.base_opcode == 0xf6
                                  || i.tm.base_opcode == 0xc6)
-                               && i.tm.extension_opcode == 0x0)))))
+                               && i.tm.extension_opcode == 0x0)))
+                   || (fits_in_imm7 (i.op[0].imms->X_add_number)
+                       && i.tm.base_opcode == 0x83
+                       && i.tm.extension_opcode == 0x4)))
                || (i.types[0].bitfield.qword
                    && ((i.reg_operands == 2
                         && i.op[0].regs == i.op[1].regs
@@ -3933,6 +3966,7 @@ optimize_encoding (void)
      {
        /* Optimize: -O:
            andq $imm31, %r64   -> andl $imm31, %r32
+          andq $imm7, %r64    -> andl $imm7, %r32
            testq $imm31, %r64  -> testl $imm31, %r32
            xorq %r64, %r64     -> xorl %r32, %r32
            subq %r64, %r64     -> subl %r32, %r32
@@ -3966,8 +4000,7 @@ optimize_encoding (void)
             }
         }
      }
-  else if (optimize > 1
-          && i.reg_operands == 3
+  else if (i.reg_operands == 3
            && i.op[0].regs == i.op[1].regs
            && !i.types[2].bitfield.xmmword
            && (i.tm.opcode_modifier.vex
@@ -3975,10 +4008,10 @@ optimize_encoding (void)
                    && !i.rounding
                    && is_evex_encoding (&i.tm)
                    && (i.vec_encoding != vex_encoding_evex
+                      || cpu_arch_isa_flags.bitfield.cpuavx512vl
                        || i.tm.cpu_flags.bitfield.cpuavx512vl
                        || (i.tm.operand_types[2].bitfield.zmmword
-                          && i.types[2].bitfield.ymmword)
-                      || cpu_arch_isa_flags.bitfield.cpuavx512vl)))
+                          && i.types[2].bitfield.ymmword))))
            && ((i.tm.base_opcode == 0x55
                 || i.tm.base_opcode == 0x6655
                 || i.tm.base_opcode == 0x66df
@@ -3995,15 +4028,15 @@ optimize_encoding (void)
                 || i.tm.base_opcode == 0x6647)
                && i.tm.extension_opcode == None))
      {
-      /* Optimize: -O2:
+      /* Optimize: -O1:
            VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
            vpsubq and vpsubw:
              EVEX VOP %zmmM, %zmmM, %zmmN
                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
-              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
              EVEX VOP %ymmM, %ymmM, %ymmN
                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
-              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
              VEX VOP %ymmM, %ymmM, %ymmN
                -> VEX VOP %xmmM, %xmmM, %xmmN
            VOP, one of vpandn and vpxor:
@@ -4012,17 +4045,17 @@ optimize_encoding (void)
            VOP, one of vpandnd and vpandnq:
              EVEX VOP %zmmM, %zmmM, %zmmN
                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
-              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
              EVEX VOP %ymmM, %ymmM, %ymmN
                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
-              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
            VOP, one of vpxord and vpxorq:
              EVEX VOP %zmmM, %zmmM, %zmmN
                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
-              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
              EVEX VOP %ymmM, %ymmM, %ymmN
                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
-              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
            VOP, one of kxord and kxorq:
              VEX VOP %kM, %kM, %kN
                -> VEX kxorw %kM, %kM, %kN
@@ -4032,14 +4065,16 @@ optimize_encoding (void)
         */
        if (is_evex_encoding (&i.tm))
         {
-         if (i.vec_encoding == vex_encoding_evex)
-           i.tm.opcode_modifier.evex = EVEX128;
-         else
+         if (i.vec_encoding != vex_encoding_evex)
             {
               i.tm.opcode_modifier.vex = VEX128;
               i.tm.opcode_modifier.vexw = VEXW0;
               i.tm.opcode_modifier.evex = 0;
             }
+         else if (optimize > 1)
+           i.tm.opcode_modifier.evex = EVEX128;
+         else
+           return;
         }
        else if (i.tm.operand_types[0].bitfield.regmask)
         {
@@ -4056,6 +4091,73 @@ optimize_encoding (void)
             i.types[j].bitfield.ymmword = 0;
           }
      }
+  else if (i.vec_encoding != vex_encoding_evex
+          && !i.types[0].bitfield.zmmword
+          && !i.types[1].bitfield.zmmword
+          && !i.mask
+          && is_evex_encoding (&i.tm)
+          && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x666f
+              || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf36f
+              || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f)
+          && i.tm.extension_opcode == None)
+    {
+      /* Optimize: -O1:
+          VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
+          vmovdqu32 and vmovdqu64:
+            EVEX VOP %xmmM, %xmmN
+              -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
+            EVEX VOP %ymmM, %ymmN
+              -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
+            EVEX VOP %xmmM, mem
+              -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
+            EVEX VOP %ymmM, mem
+              -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
+            EVEX VOP mem, %xmmN
+              -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
+            EVEX VOP mem, %ymmN
+              -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
+       */
+      for (j = 0; j < 2; j++)
+       if (operand_type_check (i.types[j], disp)
+           && i.op[j].disps->X_op == O_constant)
+         {
+           /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix
+              has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4
+              bytes, we choose EVEX Disp8 over VEX Disp32.  */
+           int evex_disp8, vex_disp8;
+           unsigned int memshift = i.memshift;
+           offsetT n = i.op[j].disps->X_add_number;
+
+           evex_disp8 = fits_in_disp8 (n);
+           i.memshift = 0;
+           vex_disp8 = fits_in_disp8 (n);
+           if (evex_disp8 != vex_disp8)
+             {
+               i.memshift = memshift;
+               return;
+             }
+
+           i.types[j].bitfield.disp8 = vex_disp8;
+           break;
+         }
+      if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f)
+       i.tm.base_opcode ^= 0xf36f ^ 0xf26f;
+      i.tm.opcode_modifier.vex
+       = i.types[0].bitfield.ymmword ? VEX256 : VEX128;
+      i.tm.opcode_modifier.vexw = VEXW0;
+      i.tm.opcode_modifier.evex = 0;
+      i.tm.opcode_modifier.masking = 0;
+      i.tm.opcode_modifier.disp8memshift = 0;
+      i.memshift = 0;
+      for (j = 0; j < 2; j++)
+       if (operand_type_check (i.types[j], disp)
+           && i.op[j].disps->X_op == O_constant)
+         {
+           i.types[j].bitfield.disp8
+             = fits_in_disp8 (i.op[j].disps->X_add_number);
+           break;
+         }
+    }
  }
  
  /* This is the guts of the machine-dependent assembler.  LINE points to a
@@ -4286,9 +4388,9 @@ md_assemble (char *line)
  
    if (is_any_vex_encoding (&i.tm))
      {
-      if (flag_code == CODE_16BIT)
+      if (!cpu_arch_flags.bitfield.cpui286)
         {
-         as_bad (_("instruction `%s' isn't supported in 16-bit mode."),
+         as_bad (_("instruction `%s' isn't supported outside of protected mode."),
                   i.tm.name);
           return;
         }
@@ -4450,10 +4552,10 @@ parse_insn (char *line, char *mnemonic)
             }
           /* If we are in 16-bit mode, do not allow addr16 or data16.
              Similarly, in 32-bit mode, do not allow addr32 or data32.  */
-         if ((current_templates->start->opcode_modifier.size16
-              || current_templates->start->opcode_modifier.size32)
+         if ((current_templates->start->opcode_modifier.size == SIZE16
+              || current_templates->start->opcode_modifier.size == SIZE32)
               && flag_code != CODE_64BIT
-             && (current_templates->start->opcode_modifier.size32
+             && ((current_templates->start->opcode_modifier.size == SIZE32)
                   ^ (flag_code == CODE_16BIT)))
             {
               as_bad (_("redundant %s prefix"),
@@ -4561,46 +4663,50 @@ parse_insn (char *line, char *mnemonic)
    if (!current_templates)
      {
  check_suffix:
-      /* See if we can get a match by trimming off a suffix.  */
-      switch (mnem_p[-1])
+      if (mnem_p > mnemonic)
         {
-       case WORD_MNEM_SUFFIX:
-         if (intel_syntax && (intel_float_operand (mnemonic) & 2))
-           i.suffix = SHORT_MNEM_SUFFIX;
-         else
-           /* Fall through.  */
-       case BYTE_MNEM_SUFFIX:
-       case QWORD_MNEM_SUFFIX:
-         i.suffix = mnem_p[-1];
-         mnem_p[-1] = '\0';
-         current_templates = (const templates *) hash_find (op_hash,
-                                                             mnemonic);
-         break;
-       case SHORT_MNEM_SUFFIX:
-       case LONG_MNEM_SUFFIX:
-         if (!intel_syntax)
-           {
-             i.suffix = mnem_p[-1];
-             mnem_p[-1] = '\0';
-             current_templates = (const templates *) hash_find (op_hash,
-                                                                 mnemonic);
-           }
-         break;
-
-         /* Intel Syntax.  */
-       case 'd':
-         if (intel_syntax)
+         /* See if we can get a match by trimming off a suffix.  */
+         switch (mnem_p[-1])
             {
-             if (intel_float_operand (mnemonic) == 1)
+           case WORD_MNEM_SUFFIX:
+             if (intel_syntax && (intel_float_operand (mnemonic) & 2))
                 i.suffix = SHORT_MNEM_SUFFIX;
               else
-               i.suffix = LONG_MNEM_SUFFIX;
+               /* Fall through.  */
+             case BYTE_MNEM_SUFFIX:
+             case QWORD_MNEM_SUFFIX:
+               i.suffix = mnem_p[-1];
               mnem_p[-1] = '\0';
               current_templates = (const templates *) hash_find (op_hash,
-                                                                 mnemonic);
+                                                                mnemonic);
+             break;
+           case SHORT_MNEM_SUFFIX:
+           case LONG_MNEM_SUFFIX:
+             if (!intel_syntax)
+               {
+                 i.suffix = mnem_p[-1];
+                 mnem_p[-1] = '\0';
+                 current_templates = (const templates *) hash_find (op_hash,
+                                                                    mnemonic);
+               }
+             break;
+
+             /* Intel Syntax.  */
+           case 'd':
+             if (intel_syntax)
+               {
+                 if (intel_float_operand (mnemonic) == 1)
+                   i.suffix = SHORT_MNEM_SUFFIX;
+                 else
+                   i.suffix = LONG_MNEM_SUFFIX;
+                 mnem_p[-1] = '\0';
+                 current_templates = (const templates *) hash_find (op_hash,
+                                                                    mnemonic);
+               }
+             break;
             }
-         break;
         }
+
        if (!current_templates)
         {
           as_bad (_("no such instruction: `%s'"), token_start);
@@ -5709,8 +5815,8 @@ match_template (char mnem_suffix)
              zero-extend %eax to %rax.  */
           if (flag_code == CODE_64BIT
               && t->base_opcode == 0x90
-             && operand_type_equal (&i.types [0], &acc32)
-             && operand_type_equal (&i.types [1], &acc32))
+             && i.types[0].bitfield.acc && i.types[0].bitfield.dword
+             && i.types[1].bitfield.acc && i.types[1].bitfield.dword)
             continue;
           /* xrelease mov %eax, <disp> is another special case. It must not
              match the accumulator-only encoding of mov.  */
@@ -6045,11 +6151,11 @@ process_suffix (void)
  {
    /* If matched instruction specifies an explicit instruction mnemonic
       suffix, use it.  */
-  if (i.tm.opcode_modifier.size16)
+  if (i.tm.opcode_modifier.size == SIZE16)
      i.suffix = WORD_MNEM_SUFFIX;
-  else if (i.tm.opcode_modifier.size32)
+  else if (i.tm.opcode_modifier.size == SIZE32)
      i.suffix = LONG_MNEM_SUFFIX;
-  else if (i.tm.opcode_modifier.size64)
+  else if (i.tm.opcode_modifier.size == SIZE64)
      i.suffix = QWORD_MNEM_SUFFIX;
    else if (i.reg_operands)
      {
@@ -6156,7 +6262,19 @@ process_suffix (void)
            /* exclude fldenv/frstor/fsave/fstenv */
            && i.tm.opcode_modifier.no_ssuf)
      {
-      i.suffix = stackop_size;
+      if (stackop_size == LONG_MNEM_SUFFIX
+         && i.tm.base_opcode == 0xcf)
+       {
+         /* stackop_size is set to LONG_MNEM_SUFFIX for the
+            .code16gcc directive to support 16-bit mode with
+            32-bit address.  For IRET without a suffix, generate
+            16-bit IRET (opcode 0xcf) to return from an interrupt
+            handler.  */
+         i.suffix = WORD_MNEM_SUFFIX;
+         as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
+       }
+      else
+       i.suffix = stackop_size;
      }
    else if (intel_syntax
            && !i.suffix
@@ -6269,9 +6387,7 @@ process_suffix (void)
        else if (i.suffix != QWORD_MNEM_SUFFIX
                && !i.tm.opcode_modifier.ignoresize
                && !i.tm.opcode_modifier.floatmf
-              && !i.tm.opcode_modifier.vex
-              && !i.tm.opcode_modifier.vexopcode
-              && !is_evex_encoding (&i.tm)
+              && !is_any_vex_encoding (&i.tm)
                && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
                    || (flag_code == CODE_64BIT
                        && i.tm.opcode_modifier.jumpbyte)))
@@ -6294,8 +6410,8 @@ process_suffix (void)
           && ! (i.operands == 2
                 && i.tm.base_opcode == 0x90
                 && i.tm.extension_opcode == None
-               && operand_type_equal (&i.types [0], &acc64)
-               && operand_type_equal (&i.types [1], &acc64)))
+               && i.types[0].bitfield.acc && i.types[0].bitfield.qword
+               && i.types[1].bitfield.acc && i.types[1].bitfield.qword))
         i.rex |= REX_W;
  
        break;
@@ -7701,6 +7817,12 @@ need_plt32_p (symbolS *s)
    if (!IS_ELF)
      return FALSE;
  
+#ifdef TE_SOLARIS
+  /* Don't emit PLT32 relocation on Solaris: neither native linker nor
+     krtld support it.  */
+  return FALSE;
+#endif
+
    /* Since there is no need to prepare for PLT branch on x86-64, we
       can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
       be used as a marker for 32-bit PC-relative branches.  */
@@ -7896,7 +8018,6 @@ x86_cleanup (void)
    if (!IS_ELF || !x86_used_note)
      return;
  
-  x86_isa_1_used |= GNU_PROPERTY_X86_UINT32_VALID;
    x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86;
  
    /* The .note.gnu.property section layout:
@@ -8059,6 +8180,8 @@ output_insn (void)
         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_VBMI2;
        if (i.tm.cpu_flags.bitfield.cpuavx512_vnni)
         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_VNNI;
+      if (i.tm.cpu_flags.bitfield.cpuavx512_bf16)
+       x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_BF16;
  
        if (i.tm.cpu_flags.bitfield.cpu8087
           || i.tm.cpu_flags.bitfield.cpu287
@@ -8419,12 +8542,13 @@ output_disp (fragS *insn_start_frag, offsetT insn_start_off)
               /* Check for "call/jmp *mem", "mov mem, %reg",
                  "test %reg, mem" and "binop mem, %reg" where binop
                  is one of adc, add, and, cmp, or, sbb, sub, xor
-                instructions.  Always generate R_386_GOT32X for
-                "sym*GOT" operand in 32-bit mode.  */
-             if ((generate_relax_relocations
-                  || (!object_64bit
-                      && i.rm.mode == 0
-                      && i.rm.regmem == 5))
+                instructions without data prefix.  Always generate
+                R_386_GOT32X for "sym*GOT" operand in 32-bit mode.  */
+             if (i.prefix[DATA_PREFIX] == 0
+                 && (generate_relax_relocations
+                     || (!object_64bit
+                         && i.rm.mode == 0
+                         && i.rm.regmem == 5))
                   && (i.rm.mode == 2
                       || (i.rm.mode == 0 && i.rm.regmem == 5))
                   && ((i.operands == 1
@@ -8919,6 +9043,15 @@ x86_cons (expressionS *exp, int size)
               as_bad (_("missing or invalid expression `%s'"), save);
               *input_line_pointer = c;
             }
+         else if ((got_reloc == BFD_RELOC_386_PLT32
+                   || got_reloc == BFD_RELOC_X86_64_PLT32)
+                  && exp->X_op != O_symbol)
+           {
+             char c = *input_line_pointer;
+             *input_line_pointer = 0;
+             as_bad (_("invalid PLT expression `%s'"), save);
+             *input_line_pointer = c;
+           }
         }
      }
    else
@@ -10533,9 +10666,11 @@ md_apply_fix (fixS *fixP, valueT *valP, segT seg ATTRIBUTE_UNUSED)
        {
        case BFD_RELOC_386_PLT32:
        case BFD_RELOC_X86_64_PLT32:
-       /* Make the jump instruction point to the address of the operand.  At
-          runtime we merely add the offset to the actual PLT entry.  */
-       value = -4;
+       /* Make the jump instruction point to the address of the operand.
+          At runtime we merely add the offset to the actual PLT entry.
+          NB: Subtract the offset size only for jump instructions.  */
+       if (fixP->fx_pcrel)
+         value = -4;
         break;
  
        case BFD_RELOC_386_TLS_GD:
@@ -11327,7 +11462,7 @@ md_parse_option (int c, const char *arg)
         {
           optimize_for_space = 1;
           /* Turn on all encoding optimizations.  */
-         optimize = -1;
+         optimize = INT_MAX;
         }
        else
         {