x86: fold to-scalar-int conversion insns

[deliverable/binutils-gdb.git] / gas / config / tc-i386.c
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c

index 4cf7b9daf5355b571342caee0517b0ed92e3d3cb..f201d1ae9dde38c4a4c9b8ad87dd002d9f328db1 100644 (file)
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -81,9 +81,6 @@
  #define SHORT_MNEM_SUFFIX 's'
  #define LONG_MNEM_SUFFIX  'l'
  #define QWORD_MNEM_SUFFIX  'q'
-#define XMMWORD_MNEM_SUFFIX  'x'
-#define YMMWORD_MNEM_SUFFIX 'y'
-#define ZMMWORD_MNEM_SUFFIX 'z'
  /* Intel Syntax.  Use a non-ascii letter since since it never appears
     in instructions.  */
  #define LONG_DOUBLE_MNEM_SUFFIX '\1'
@@ -265,7 +262,6 @@ enum i386_error
      number_of_operands_mismatch,
      invalid_instruction_suffix,
      bad_imm4,
-    old_gcc_only,
      unsupported_with_intel_mnemonic,
      unsupported_syntax,
      unsupported,
@@ -565,9 +561,6 @@ static int intel64;
     0 if att mnemonic.  */
  static int intel_mnemonic = !SYSV386_COMPAT;
  
-/* 1 if support old (<= 2.8.1) versions of gcc.  */
-static int old_gcc = OLDGCC_COMPAT;
-
  /* 1 if pseudo registers are permitted.  */
  static int allow_pseudo_reg = 0;
  
@@ -1687,15 +1680,9 @@ cpu_flags_and_not (i386_cpu_flags x, i386_cpu_flags y)
  
  #define CPU_FLAGS_ARCH_MATCH           0x1
  #define CPU_FLAGS_64BIT_MATCH          0x2
-#define CPU_FLAGS_AES_MATCH            0x4
-#define CPU_FLAGS_PCLMUL_MATCH         0x8
-#define CPU_FLAGS_AVX_MATCH           0x10
  
-#define CPU_FLAGS_32BIT_MATCH \
-  (CPU_FLAGS_ARCH_MATCH | CPU_FLAGS_AES_MATCH \
-   | CPU_FLAGS_PCLMUL_MATCH | CPU_FLAGS_AVX_MATCH)
  #define CPU_FLAGS_PERFECT_MATCH \
-  (CPU_FLAGS_32BIT_MATCH | CPU_FLAGS_64BIT_MATCH)
+  (CPU_FLAGS_ARCH_MATCH | CPU_FLAGS_64BIT_MATCH)
  
  /* Return CPU flags match bits. */
  
@@ -1711,55 +1698,42 @@ cpu_flags_match (const insn_template *t)
    if (cpu_flags_all_zero (&x))
      {
        /* This instruction is available on all archs.  */
-      match |= CPU_FLAGS_32BIT_MATCH;
+      match |= CPU_FLAGS_ARCH_MATCH;
      }
    else
      {
        /* This instruction is available only on some archs.  */
        i386_cpu_flags cpu = cpu_arch_flags;
  
+      /* AVX512VL is no standalone feature - match it and then strip it.  */
+      if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
+       return match;
+      x.bitfield.cpuavx512vl = 0;
+
        cpu = cpu_flags_and (x, cpu);
        if (!cpu_flags_all_zero (&cpu))
         {
           if (x.bitfield.cpuavx)
             {
-             /* We only need to check AES/PCLMUL/SSE2AVX with AVX.  */
-             if (cpu.bitfield.cpuavx)
-               {
-                 /* Check SSE2AVX.  */
-                 if (!t->opcode_modifier.sse2avx|| sse2avx)
-                   {
-                     match |= (CPU_FLAGS_ARCH_MATCH
-                               | CPU_FLAGS_AVX_MATCH);
-                     /* Check AES.  */
-                     if (!x.bitfield.cpuaes || cpu.bitfield.cpuaes)
-                       match |= CPU_FLAGS_AES_MATCH;
-                     /* Check PCLMUL.  */
-                     if (!x.bitfield.cpupclmul
-                         || cpu.bitfield.cpupclmul)
-                       match |= CPU_FLAGS_PCLMUL_MATCH;
-                   }
-               }
-             else
+             /* We need to check a few extra flags with AVX.  */
+             if (cpu.bitfield.cpuavx
+                 && (!t->opcode_modifier.sse2avx || sse2avx)
+                 && (!x.bitfield.cpuaes || cpu.bitfield.cpuaes)
+                 && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
+                 && (!x.bitfield.cpupclmul || cpu.bitfield.cpupclmul))
                 match |= CPU_FLAGS_ARCH_MATCH;
             }
-         else if (x.bitfield.cpuavx512vl)
+         else if (x.bitfield.cpuavx512f)
             {
-             /* Match AVX512VL.  */
-             if (cpu.bitfield.cpuavx512vl)
-               {
-                 /* Need another match.  */
-                 cpu.bitfield.cpuavx512vl = 0;
-                 if (!cpu_flags_all_zero (&cpu))
-                   match |= CPU_FLAGS_32BIT_MATCH;
-                 else
-                   match |= CPU_FLAGS_ARCH_MATCH;
-               }
-             else
+             /* We need to check a few extra flags with AVX512F.  */
+             if (cpu.bitfield.cpuavx512f
+                 && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
+                 && (!x.bitfield.cpuvaes || cpu.bitfield.cpuvaes)
+                 && (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
                 match |= CPU_FLAGS_ARCH_MATCH;
             }
           else
-           match |= CPU_FLAGS_32BIT_MATCH;
+           match |= CPU_FLAGS_ARCH_MATCH;
         }
      }
    return match;
@@ -1963,10 +1937,13 @@ match_mem_size (const insn_template *t, unsigned int j)
                    && !t->operand_types[j].bitfield.fword)
                /* For scalar opcode templates to allow register and memory
                   operands at the same time, some special casing is needed
-                 here.  */
+                 here.  Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
+                 down-conversion vpmov*.  */
                || ((t->operand_types[j].bitfield.regsimd
                     && !t->opcode_modifier.broadcast
-                   && (t->operand_types[j].bitfield.dword
+                   && (t->operand_types[j].bitfield.byte
+                       || t->operand_types[j].bitfield.word
+                       || t->operand_types[j].bitfield.dword
                         || t->operand_types[j].bitfield.qword))
                    ? (i.types[j].bitfield.xmmword
                       || i.types[j].bitfield.ymmword
@@ -2648,6 +2625,10 @@ set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
                   cpu_arch_flags = flags;
                   cpu_arch_isa_flags = flags;
                 }
+             else
+               cpu_arch_isa_flags
+                 = cpu_flags_or (cpu_arch_isa_flags,
+                                 cpu_arch[j].flags);
               (void) restore_line_pointer (e);
               demand_empty_rest_of_line ();
               return;
@@ -3473,6 +3454,14 @@ build_vex_prefix (const insn_template *t)
      }
  }
  
+static INLINE bfd_boolean
+is_evex_encoding (const insn_template *t)
+{
+  return t->opcode_modifier.evex
+        || t->opcode_modifier.broadcast || t->opcode_modifier.masking
+        || t->opcode_modifier.staticrounding || t->opcode_modifier.sae;
+}
+
  /* Build the EVEX prefix.  */
  
  static void
@@ -3607,6 +3596,29 @@ build_evex_prefix (void)
        /* Encode the vector length.  */
        unsigned int vec_length;
  
+      if (!i.tm.opcode_modifier.evex
+         || i.tm.opcode_modifier.evex == EVEXDYN)
+       {
+         unsigned int op;
+
+         vec_length = 0;
+         for (op = 0; op < i.tm.operands; ++op)
+           if (i.tm.operand_types[op].bitfield.xmmword
+               + i.tm.operand_types[op].bitfield.ymmword
+               + i.tm.operand_types[op].bitfield.zmmword > 1)
+             {
+               if (i.types[op].bitfield.zmmword)
+                 i.tm.opcode_modifier.evex = EVEX512;
+               else if (i.types[op].bitfield.ymmword)
+                 i.tm.opcode_modifier.evex = EVEX256;
+               else if (i.types[op].bitfield.xmmword)
+                 i.tm.opcode_modifier.evex = EVEX128;
+               else
+                 continue;
+               break;
+             }
+       }
+
        switch (i.tm.opcode_modifier.evex)
         {
         case EVEXLIG: /* LL' is ignored */
@@ -3704,7 +3716,8 @@ bad_register_operand:
    gas_assert (i.imm_operands <= 1
               && (i.operands <= 2
                   || ((i.tm.opcode_modifier.vex
-                      || i.tm.opcode_modifier.evex)
+                      || i.tm.opcode_modifier.vexopcode
+                      || is_evex_encoding (&i.tm))
                       && i.operands <= 4)));
  
    exp = &im_expressions[i.imm_operands++];
@@ -3795,7 +3808,8 @@ optimize_encoding (void)
         }
      }
    else if (flag_code == CODE_64BIT
-          && ((i.reg_operands == 1
+          && ((i.types[1].bitfield.qword
+               && i.reg_operands == 1
                 && i.imm_operands == 1
                 && i.op[0].imms->X_op == O_constant
                 && ((i.tm.base_opcode == 0xb0
@@ -3810,12 +3824,16 @@ optimize_encoding (void)
                             || ((i.tm.base_opcode == 0xf6
                                  || i.tm.base_opcode == 0xc6)
                                 && i.tm.extension_opcode == 0x0)))))
-              || (i.reg_operands == 2
-                  && i.op[0].regs == i.op[1].regs
-                  && ((i.tm.base_opcode == 0x30
-                       || i.tm.base_opcode == 0x28)
-                      && i.tm.extension_opcode == None)))
-          && i.types[1].bitfield.qword)
+              || (i.types[0].bitfield.qword
+                  && ((i.reg_operands == 2
+                       && i.op[0].regs == i.op[1].regs
+                       && ((i.tm.base_opcode == 0x30
+                            || i.tm.base_opcode == 0x28)
+                           && i.tm.extension_opcode == None))
+                      || (i.reg_operands == 1
+                          && i.operands == 1
+                          && i.tm.base_opcode == 0x30
+                          && i.tm.extension_opcode == None)))))
      {
        /* Optimize: -O:
            andq $imm31, %r64   -> andl $imm31, %r32
@@ -3859,8 +3877,10 @@ optimize_encoding (void)
            && (i.tm.opcode_modifier.vex
                || (!i.mask
                    && !i.rounding
-                  && i.tm.opcode_modifier.evex
-                  && cpu_arch_flags.bitfield.cpuavx512vl))
+                  && is_evex_encoding (&i.tm)
+                  && (i.vec_encoding != vex_encoding_evex
+                      || i.tm.cpu_flags.bitfield.cpuavx512vl
+                      || cpu_arch_isa_flags.bitfield.cpuavx512vl)))
            && ((i.tm.base_opcode == 0x55
                 || i.tm.base_opcode == 0x6655
                 || i.tm.base_opcode == 0x66df
@@ -3902,15 +3922,9 @@ optimize_encoding (void)
                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
         */
-      if (i.tm.opcode_modifier.evex)
+      if (is_evex_encoding (&i.tm))
         {
-         /* If only lower 16 vector registers are used, we can use
-            VEX encoding.  */
-         for (j = 0; j < 3; j++)
-           if (register_number (i.op[j].regs) > 15)
-             break;
-
-         if (j < 3)
+         if (i.vec_encoding == vex_encoding_evex)
             i.tm.opcode_modifier.evex = EVEX128;
           else
             {
@@ -4144,7 +4158,8 @@ md_assemble (char *line)
        as_warn (_("translating to `%sp'"), i.tm.name);
      }
  
-  if (i.tm.opcode_modifier.vex || i.tm.opcode_modifier.evex)
+  if (i.tm.opcode_modifier.vex || i.tm.opcode_modifier.vexopcode
+      || is_evex_encoding (&i.tm))
      {
        if (flag_code == CODE_16BIT)
         {
@@ -4506,34 +4521,26 @@ check_suffix:
      {
        supported |= cpu_flags_match (t);
        if (supported == CPU_FLAGS_PERFECT_MATCH)
-       goto skip;
-    }
+       {
+         if (!cpu_arch_flags.bitfield.cpui386 && (flag_code != CODE_16BIT))
+           as_warn (_("use .code16 to ensure correct addressing mode"));
  
-  if (!(supported & CPU_FLAGS_64BIT_MATCH))
-    {
-      as_bad (flag_code == CODE_64BIT
-             ? _("`%s' is not supported in 64-bit mode")
-             : _("`%s' is only supported in 64-bit mode"),
-             current_templates->start->name);
-      return NULL;
-    }
-  if (supported != CPU_FLAGS_PERFECT_MATCH)
-    {
-      as_bad (_("`%s' is not supported on `%s%s'"),
-             current_templates->start->name,
-             cpu_arch_name ? cpu_arch_name : default_arch,
-             cpu_sub_arch_name ? cpu_sub_arch_name : "");
-      return NULL;
+         return l;
+       }
      }
  
-skip:
-  if (!cpu_arch_flags.bitfield.cpui386
-          && (flag_code != CODE_16BIT))
-    {
-      as_warn (_("use .code16 to ensure correct addressing mode"));
-    }
+  if (!(supported & CPU_FLAGS_64BIT_MATCH))
+    as_bad (flag_code == CODE_64BIT
+           ? _("`%s' is not supported in 64-bit mode")
+           : _("`%s' is only supported in 64-bit mode"),
+           current_templates->start->name);
+  else
+    as_bad (_("`%s' is not supported on `%s%s'"),
+           current_templates->start->name,
+           cpu_arch_name ? cpu_arch_name : default_arch,
+           cpu_sub_arch_name ? cpu_sub_arch_name : "");
  
-  return l;
+  return NULL;
  }
  
  static char *
@@ -5046,14 +5053,15 @@ check_VecOperands (const insn_template *t)
        /* Check if specified broadcast is supported in this instruction,
          and it's applied to memory operand of DWORD or QWORD type,
          depending on VecESize.  */
+      op = i.broadcast->operand;
        if (i.broadcast->type != t->opcode_modifier.broadcast
-         || !i.types[i.broadcast->operand].bitfield.mem
+         || !i.types[op].bitfield.mem
           || (t->opcode_modifier.vecesize == 0
-             && !i.types[i.broadcast->operand].bitfield.dword
-             && !i.types[i.broadcast->operand].bitfield.unspecified)
+             && !i.types[op].bitfield.dword
+             && !i.types[op].bitfield.unspecified)
           || (t->opcode_modifier.vecesize == 1
-             && !i.types[i.broadcast->operand].bitfield.qword
-             && !i.types[i.broadcast->operand].bitfield.unspecified))
+             && !i.types[op].bitfield.qword
+             && !i.types[op].bitfield.unspecified))
         goto bad_broadcast;
  
        broadcasted_opnd_size = t->opcode_modifier.vecesize ? 64 : 32;
@@ -5069,9 +5077,9 @@ check_VecOperands (const insn_template *t)
         goto bad_broadcast;
  
        if ((broadcasted_opnd_size == 256
-          && !t->operand_types[i.broadcast->operand].bitfield.ymmword)
+          && !t->operand_types[op].bitfield.ymmword)
           || (broadcasted_opnd_size == 512
-             && !t->operand_types[i.broadcast->operand].bitfield.zmmword))
+             && !t->operand_types[op].bitfield.zmmword))
         {
         bad_broadcast:
           i.error = unsupported_broadcast;
@@ -5174,7 +5182,7 @@ VEX_check_operands (const insn_template *t)
    if (i.vec_encoding == vex_encoding_evex)
      {
        /* This instruction must be encoded with EVEX prefix.  */
-      if (!t->opcode_modifier.evex)
+      if (!is_evex_encoding (t))
         {
           i.error = unsupported;
           return 1;
@@ -5277,11 +5285,6 @@ match_template (char mnem_suffix)
        if (!found_cpu_match)
         continue;
  
-      /* Check old gcc support. */
-      i.error = old_gcc_only;
-      if (!old_gcc && t->opcode_modifier.oldgcc)
-       continue;
-
        /* Check AT&T mnemonic.   */
        i.error = unsupported_with_intel_mnemonic;
        if (intel_mnemonic && t->opcode_modifier.attmnemonic)
@@ -5425,6 +5428,14 @@ match_template (char mnem_suffix)
               && operand_type_equal (&i.types [0], &acc32)
               && operand_type_equal (&i.types [1], &acc32))
             continue;
+         /* xrelease mov %eax, <disp> is another special case. It must not
+            match the accumulator-only encoding of mov.  */
+         if (flag_code != CODE_64BIT
+             && i.hle_prefix
+             && t->base_opcode == 0xa0
+             && i.types[0].bitfield.acc
+             && operand_type_check (i.types[1], anymem))
+           continue;
           /* If we want store form, we reverse direction of operands.  */
           if (i.dir_encoding == dir_encoding_store
               && t->opcode_modifier.d)
@@ -5511,23 +5522,29 @@ check_reverse:
                 case 4:
                   if (!operand_type_match (overlap3, i.types[3])
                       || (check_register
-                         && !operand_type_register_match (i.types[2],
-                                                          operand_types[2],
-                                                          i.types[3],
-                                                          operand_types[3])))
+                         && (!operand_type_register_match (i.types[1],
+                                                           operand_types[1],
+                                                           i.types[3],
+                                                           operand_types[3])
+                             || !operand_type_register_match (i.types[2],
+                                                              operand_types[2],
+                                                              i.types[3],
+                                                              operand_types[3]))))
                     continue;
                   /* Fall through.  */
                 case 3:
                   /* Here we make use of the fact that there are no
-                    reverse match 3 operand instructions, and all 3
-                    operand instructions only need to be checked for
-                    register consistency between operands 2 and 3.  */
+                    reverse match 3 operand instructions.  */
                   if (!operand_type_match (overlap2, i.types[2])
                       || (check_register
-                         && !operand_type_register_match (i.types[1],
-                                                          operand_types[1],
-                                                          i.types[2],
-                                                          operand_types[2])))
+                         && (!operand_type_register_match (i.types[0],
+                                                           operand_types[0],
+                                                           i.types[2],
+                                                           operand_types[2])
+                             || !operand_type_register_match (i.types[1],
+                                                              operand_types[1],
+                                                              i.types[2],
+                                                              operand_types[2]))))
                     continue;
                   break;
                 }
@@ -5578,9 +5595,6 @@ check_reverse:
         case bad_imm4:
           err_msg = _("constant doesn't fit in 4 bits");
           break;
-       case old_gcc_only:
-         err_msg = _("only supported with old gcc");
-         break;
         case unsupported_with_intel_mnemonic:
           err_msg = _("unsupported with Intel mnemonic");
           break;
@@ -5763,26 +5777,19 @@ process_suffix (void)
                 if (!i.tm.operand_types[op].bitfield.inoutportreg
                     && !i.tm.operand_types[op].bitfield.shiftcount)
                   {
-                   if (i.types[op].bitfield.reg && i.types[op].bitfield.byte)
-                     {
-                       i.suffix = BYTE_MNEM_SUFFIX;
-                       break;
-                     }
-                   if (i.types[op].bitfield.reg && i.types[op].bitfield.word)
-                     {
-                       i.suffix = WORD_MNEM_SUFFIX;
-                       break;
-                     }
-                   if (i.types[op].bitfield.reg && i.types[op].bitfield.dword)
-                     {
-                       i.suffix = LONG_MNEM_SUFFIX;
-                       break;
-                     }
-                   if (i.types[op].bitfield.reg && i.types[op].bitfield.qword)
-                     {
-                       i.suffix = QWORD_MNEM_SUFFIX;
-                       break;
-                     }
+                   if (!i.types[op].bitfield.reg)
+                     continue;
+                   if (i.types[op].bitfield.byte)
+                     i.suffix = BYTE_MNEM_SUFFIX;
+                   else if (i.types[op].bitfield.word)
+                     i.suffix = WORD_MNEM_SUFFIX;
+                   else if (i.types[op].bitfield.dword)
+                     i.suffix = LONG_MNEM_SUFFIX;
+                   else if (i.types[op].bitfield.qword)
+                     i.suffix = QWORD_MNEM_SUFFIX;
+                   else
+                     continue;
+                   break;
                   }
             }
         }
@@ -5799,7 +5806,9 @@ process_suffix (void)
         {
           if (intel_syntax
               && i.tm.opcode_modifier.ignoresize
-             && i.tm.opcode_modifier.no_lsuf)
+             && i.tm.opcode_modifier.no_lsuf
+             && !i.tm.opcode_modifier.todword
+             && !i.tm.opcode_modifier.toqword)
             i.suffix = 0;
           else if (!check_long_reg ())
             return 0;
@@ -5808,7 +5817,9 @@ process_suffix (void)
         {
           if (intel_syntax
               && i.tm.opcode_modifier.ignoresize
-             && i.tm.opcode_modifier.no_qsuf)
+             && i.tm.opcode_modifier.no_qsuf
+             && !i.tm.opcode_modifier.todword
+             && !i.tm.opcode_modifier.toqword)
             i.suffix = 0;
           else if (!check_qword_reg ())
             return 0;
@@ -5822,13 +5833,6 @@ process_suffix (void)
           else if (!check_word_reg ())
             return 0;
         }
-      else if (i.suffix == XMMWORD_MNEM_SUFFIX
-              || i.suffix == YMMWORD_MNEM_SUFFIX
-              || i.suffix == ZMMWORD_MNEM_SUFFIX)
-       {
-         /* Skip if the instruction has x/y/z suffix.  match_template
-            should check if it is a valid suffix.  */
-       }
        else if (intel_syntax && i.tm.opcode_modifier.ignoresize)
         /* Do nothing if the instruction is going to ignore the prefix.  */
         ;
@@ -5909,15 +5913,19 @@ process_suffix (void)
         }
      }
  
-  /* Change the opcode based on the operand size given by i.suffix;
-     We don't need to change things for byte insns.  */
-
-  if (i.suffix
-      && i.suffix != BYTE_MNEM_SUFFIX
-      && i.suffix != XMMWORD_MNEM_SUFFIX
-      && i.suffix != YMMWORD_MNEM_SUFFIX
-      && i.suffix != ZMMWORD_MNEM_SUFFIX)
+  /* Change the opcode based on the operand size given by i.suffix.  */
+  switch (i.suffix)
      {
+    /* Size floating point instruction.  */
+    case LONG_MNEM_SUFFIX:
+      if (i.tm.opcode_modifier.floatmf)
+       {
+         i.tm.base_opcode ^= 4;
+         break;
+       }
+    /* fall through */
+    case WORD_MNEM_SUFFIX:
+    case QWORD_MNEM_SUFFIX:
        /* It's not a byte, select word/dword operation.  */
        if (i.tm.opcode_modifier.w)
         {
@@ -5926,7 +5934,8 @@ process_suffix (void)
           else
             i.tm.base_opcode |= 1;
         }
-
+    /* fall through */
+    case SHORT_MNEM_SUFFIX:
        /* Now select between word & dword operations via the operand
          size prefix, except for instructions that will ignore this
          prefix anyway.  */
@@ -5942,7 +5951,6 @@ process_suffix (void)
               return 0;
         }
        else if (i.suffix != QWORD_MNEM_SUFFIX
-              && i.suffix != LONG_DOUBLE_MNEM_SUFFIX
                && !i.tm.opcode_modifier.ignoresize
                && !i.tm.opcode_modifier.floatmf
                && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
@@ -5961,27 +5969,17 @@ process_suffix (void)
        /* Set mode64 for an operand.  */
        if (i.suffix == QWORD_MNEM_SUFFIX
           && flag_code == CODE_64BIT
-         && !i.tm.opcode_modifier.norex64)
-       {
+         && !i.tm.opcode_modifier.norex64
           /* Special case for xchg %rax,%rax.  It is NOP and doesn't
-            need rex64.  cmpxchg8b is also a special case. */
-         if (! (i.operands == 2
-                && i.tm.base_opcode == 0x90
-                && i.tm.extension_opcode == None
-                && operand_type_equal (&i.types [0], &acc64)
-                && operand_type_equal (&i.types [1], &acc64))
-             && ! (i.operands == 1
-                   && i.tm.base_opcode == 0xfc7
-                   && i.tm.extension_opcode == 1
-                   && !operand_type_check (i.types [0], reg)
-                   && operand_type_check (i.types [0], anymem)))
-           i.rex |= REX_W;
-       }
-
-      /* Size floating point instruction.  */
-      if (i.suffix == LONG_MNEM_SUFFIX)
-       if (i.tm.opcode_modifier.floatmf)
-         i.tm.base_opcode ^= 4;
+            need rex64. */
+         && ! (i.operands == 2
+               && i.tm.base_opcode == 0x90
+               && i.tm.extension_opcode == None
+               && operand_type_equal (&i.types [0], &acc64)
+               && operand_type_equal (&i.types [1], &acc64)))
+       i.rex |= REX_W;
+
+      break;
      }
  
    return 1;
@@ -6727,7 +6725,7 @@ build_modrm_byte (void)
             }
           break;
         case 5:
-         if (i.tm.opcode_modifier.evex)
+         if (is_evex_encoding (&i.tm))
             {
               /* For EVEX instructions, when there are 5 operands, the
                  first one must be immediate operand.  If the second one
@@ -7205,9 +7203,10 @@ build_modrm_byte (void)
                 }
               else
                 {
-                 /* There are only 2 operands.  */
-                 gas_assert (op < 2 && i.operands == 2);
-                 vex_reg = 1;
+                 /* There are only 2 non-immediate operands.  */
+                 gas_assert (op < i.imm_operands + 2
+                             && i.operands == i.imm_operands + 2);
+                 vex_reg = i.imm_operands + 1;
                 }
             }
           else
@@ -8546,6 +8545,12 @@ check_VecOperations (char *op_string, char *op_end)
               return NULL;
             }
           op_string++;
+
+         /* Strip whitespace since the addition of pseudo prefixes
+            changed how the scrubber treats '{'.  */
+         if (is_space_char (*op_string))
+           ++op_string;
+
           continue;
         }
      unknown_vec_op:
@@ -10326,7 +10331,7 @@ const char *md_shortopts = "qnO::";
  #define OPTION_MSYNTAX (OPTION_MD_BASE + 6)
  #define OPTION_MINDEX_REG (OPTION_MD_BASE + 7)
  #define OPTION_MNAKED_REG (OPTION_MD_BASE + 8)
-#define OPTION_MOLD_GCC (OPTION_MD_BASE + 9)
+#define OPTION_MRELAX_RELOCATIONS (OPTION_MD_BASE + 9)
  #define OPTION_MSSE2AVX (OPTION_MD_BASE + 10)
  #define OPTION_MSSE_CHECK (OPTION_MD_BASE + 11)
  #define OPTION_MOPERAND_CHECK (OPTION_MD_BASE + 12)
@@ -10342,7 +10347,6 @@ const char *md_shortopts = "qnO::";
  #define OPTION_MAMD64 (OPTION_MD_BASE + 22)
  #define OPTION_MINTEL64 (OPTION_MD_BASE + 23)
  #define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24)
-#define OPTION_MRELAX_RELOCATIONS (OPTION_MD_BASE + 25)
  
  struct option md_longopts[] =
  {
@@ -10362,7 +10366,6 @@ struct option md_longopts[] =
    {"msyntax", required_argument, NULL, OPTION_MSYNTAX},
    {"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG},
    {"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG},
-  {"mold-gcc", no_argument, NULL, OPTION_MOLD_GCC},
    {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
    {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
    {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
@@ -10547,6 +10550,10 @@ md_parse_option (int c, const char *arg)
                       cpu_arch_flags = flags;
                       cpu_arch_isa_flags = flags;
                     }
+                 else
+                   cpu_arch_isa_flags
+                     = cpu_flags_or (cpu_arch_isa_flags,
+                                     cpu_arch[j].flags);
                   break;
                 }
             }
@@ -10634,10 +10641,6 @@ md_parse_option (int c, const char *arg)
        allow_naked_reg = 1;
        break;
  
-    case OPTION_MOLD_GCC:
-      old_gcc = 1;
-      break;
-
      case OPTION_MSSE2AVX:
        sse2avx = 1;
        break;
@@ -10948,8 +10951,6 @@ md_show_usage (FILE *stream)
    fprintf (stream, _("\
    -mnaked-reg             don't require `%%' prefix for registers\n"));
    fprintf (stream, _("\
-  -mold-gcc               support old (<= 2.8.1) versions of gcc\n"));
-  fprintf (stream, _("\
    -madd-bnd-prefix        add BND prefix for all valid branches\n"));
    fprintf (stream, _("\
    -mshared                disable branch optimization for shared code\n"));