X-Git-Url: http://drtracing.org/?a=blobdiff_plain;f=gas%2Fconfig%2Ftc-i386.c;h=c4c34a1c3f1396336ea856935657cdc5e78a6867;hb=2dde194857729c4f436395683e233c203ae768ab;hp=7d170f491dace71f8d60f4506a874f4ed3f3276f;hpb=922d8de8c1bd5cbaa9eadd77437daea021864c1d;p=deliverable%2Fbinutils-gdb.git

diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index 7d170f491d..c4c34a1c3f 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -1,6 +1,6 @@
 /* tc-i386.c -- Assemble code for the Intel 80386
    Copyright 1989, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
    Free Software Foundation, Inc.
 
    This file is part of GAS, the GNU Assembler.
@@ -59,14 +59,15 @@
    WAIT_PREFIX must be the first prefix since FWAIT is really is an
    instruction, and so must come before any prefixes.
    The preferred prefix order is SEG_PREFIX, ADDR_PREFIX, DATA_PREFIX,
-   LOCKREP_PREFIX.  */
+   REP_PREFIX, LOCK_PREFIX.  */
 #define WAIT_PREFIX	0
 #define SEG_PREFIX	1
 #define ADDR_PREFIX	2
 #define DATA_PREFIX	3
-#define LOCKREP_PREFIX	4
-#define REX_PREFIX	5       /* must come last.  */
-#define MAX_PREFIXES	6	/* max prefixes per opcode */
+#define REP_PREFIX	4
+#define LOCK_PREFIX	5
+#define REX_PREFIX	6       /* must come last.  */
+#define MAX_PREFIXES	7	/* max prefixes per opcode */
 
 /* we define the syntax here (modulo base,index,scale syntax) */
 #define REGISTER_PREFIX '%'
@@ -97,8 +98,8 @@
   */
 typedef struct
 {
-  const template *start;
-  const template *end;
+  const insn_template *start;
+  const insn_template *end;
 }
 templates;
 
@@ -127,11 +128,15 @@ sib_byte;
 typedef struct
 {
   const char *name;		/* arch name */
+  unsigned int len;		/* arch string length */
   enum processor_type type;	/* arch type */
   i386_cpu_flags flags;		/* cpu feature flags */
+  unsigned int skip;		/* show_arch should skip this. */
+  unsigned int negated;		/* turn off indicated flags.  */
 }
 arch_entry;
 
+static void update_code_flag (int, int);
 static void set_code_flag (int);
 static void set_16bit_gcc_code_flag (int);
 static void set_intel_syntax (int);
@@ -159,7 +164,7 @@ static void swap_operands (void);
 static void swap_2_operands (int, int);
 static void optimize_imm (void);
 static void optimize_disp (void);
-static const template *match_template (void);
+static const insn_template *match_template (void);
 static int check_string (void);
 static int process_suffix (void);
 static int check_byte_reg (void);
@@ -201,10 +206,24 @@ union i386_op
     const reg_entry *regs;
   };
 
+enum i386_error
+  {
+    operand_size_mismatch,
+    operand_type_mismatch,
+    register_type_mismatch,
+    number_of_operands_mismatch,
+    invalid_instruction_suffix,
+    bad_imm4,
+    old_gcc_only,
+    unsupported_with_intel_mnemonic,
+    unsupported_syntax,
+    unsupported
+  };
+
 struct _i386_insn
   {
     /* TM holds the template for the insn were currently assembling.  */
-    template tm;
+    insn_template tm;
 
     /* SUFFIX holds the instruction size suffix for byte, word, dword
        or qword, if given.  */
@@ -256,7 +275,13 @@ struct _i386_insn
     vex_prefix vex;
 
     /* Swap operand in encoding.  */
-    unsigned int swap_operand : 1;
+    unsigned int swap_operand;
+
+    /* Force 32bit displacement in encoding.  */
+    unsigned int disp32_encoding;
+
+    /* Error message.  */
+    enum i386_error error;
   };
 
 typedef struct _i386_insn i386_insn;
@@ -364,8 +389,24 @@ enum flag_code {
 
 static enum flag_code flag_code;
 static unsigned int object_64bit;
+static unsigned int disallow_64bit_reloc;
 static int use_rela_relocations = 0;
 
+#if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
+     || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
+     || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
+
+/* The ELF ABI to use.  */
+enum x86_elf_abi
+{
+  I386_ABI,
+  X86_64_ABI,
+  X86_64_X32_ABI
+};
+
+static enum x86_elf_abi x86_elf_abi = I386_ABI;
+#endif
+
 /* The names used to print error messages.  */
 static const char *flag_code_names[] =
   {
@@ -445,6 +486,13 @@ static unsigned int no_cond_jump_promotion = 0;
 /* Encode SSE instructions with VEX prefix.  */
 static unsigned int sse2avx;
 
+/* Encode scalar AVX instructions with specific vector length.  */
+static enum
+  {
+    vex128 = 0,
+    vex256
+  } avxscalar;
+
 /* Pre-defined "_GLOBAL_OFFSET_TABLE_".  */
 static symbolS *GOT_symbol;
 
@@ -535,120 +583,158 @@ const relax_typeS md_relax_table[] =
 
 static const arch_entry cpu_arch[] =
 {
-  { "generic32", PROCESSOR_GENERIC32,
-    CPU_GENERIC32_FLAGS },
-  { "generic64", PROCESSOR_GENERIC64,
-    CPU_GENERIC64_FLAGS },
-  { "i8086", PROCESSOR_UNKNOWN,
-    CPU_NONE_FLAGS },
-  { "i186", PROCESSOR_UNKNOWN,
-    CPU_I186_FLAGS },
-  { "i286", PROCESSOR_UNKNOWN,
-    CPU_I286_FLAGS },
-  { "i386", PROCESSOR_I386,
-    CPU_I386_FLAGS },
-  { "i486", PROCESSOR_I486,
-    CPU_I486_FLAGS },
-  { "i586", PROCESSOR_PENTIUM,
-    CPU_I586_FLAGS },
-  { "i686", PROCESSOR_PENTIUMPRO,
-    CPU_I686_FLAGS },
-  { "pentium", PROCESSOR_PENTIUM,
-    CPU_I586_FLAGS },
-  { "pentiumpro", PROCESSOR_PENTIUMPRO,
-    CPU_I686_FLAGS },
-  { "pentiumii", PROCESSOR_PENTIUMPRO,
-    CPU_P2_FLAGS },
-  { "pentiumiii",PROCESSOR_PENTIUMPRO,
-    CPU_P3_FLAGS },
-  { "pentium4", PROCESSOR_PENTIUM4,
-    CPU_P4_FLAGS },
-  { "prescott", PROCESSOR_NOCONA,
-    CPU_CORE_FLAGS },
-  { "nocona", PROCESSOR_NOCONA,
-    CPU_NOCONA_FLAGS },
-  { "yonah", PROCESSOR_CORE,
-    CPU_CORE_FLAGS },
-  { "core", PROCESSOR_CORE,
-    CPU_CORE_FLAGS },
-  { "merom", PROCESSOR_CORE2,
-    CPU_CORE2_FLAGS },
-  { "core2", PROCESSOR_CORE2,
-    CPU_CORE2_FLAGS },
-  { "corei7", PROCESSOR_COREI7,
-    CPU_COREI7_FLAGS },
-  { "k6", PROCESSOR_K6,
-    CPU_K6_FLAGS },
-  { "k6_2", PROCESSOR_K6,
-    CPU_K6_2_FLAGS },
-  { "athlon", PROCESSOR_ATHLON,
-    CPU_ATHLON_FLAGS },
-  { "sledgehammer", PROCESSOR_K8,
-    CPU_K8_FLAGS },
-  { "opteron", PROCESSOR_K8,
-    CPU_K8_FLAGS },
-  { "k8", PROCESSOR_K8,
-    CPU_K8_FLAGS },
-  { "amdfam10", PROCESSOR_AMDFAM10,
-    CPU_AMDFAM10_FLAGS },
-  { ".mmx", PROCESSOR_UNKNOWN,
-    CPU_MMX_FLAGS },
-  { ".sse", PROCESSOR_UNKNOWN,
-    CPU_SSE_FLAGS },
-  { ".sse2", PROCESSOR_UNKNOWN,
-    CPU_SSE2_FLAGS },
-  { ".sse3", PROCESSOR_UNKNOWN,
-    CPU_SSE3_FLAGS },
-  { ".ssse3", PROCESSOR_UNKNOWN,
-    CPU_SSSE3_FLAGS },
-  { ".sse4.1", PROCESSOR_UNKNOWN,
-    CPU_SSE4_1_FLAGS },
-  { ".sse4.2", PROCESSOR_UNKNOWN,
-    CPU_SSE4_2_FLAGS },
-  { ".sse4", PROCESSOR_UNKNOWN,
-    CPU_SSE4_2_FLAGS },
-  { ".avx", PROCESSOR_UNKNOWN,
-    CPU_AVX_FLAGS },
-  { ".vmx", PROCESSOR_UNKNOWN,
-    CPU_VMX_FLAGS },
-  { ".smx", PROCESSOR_UNKNOWN,
-    CPU_SMX_FLAGS },
-  { ".xsave", PROCESSOR_UNKNOWN,
-    CPU_XSAVE_FLAGS },
-  { ".aes", PROCESSOR_UNKNOWN,
-    CPU_AES_FLAGS },
-  { ".pclmul", PROCESSOR_UNKNOWN,
-    CPU_PCLMUL_FLAGS },
-  { ".clmul", PROCESSOR_UNKNOWN,
-    CPU_PCLMUL_FLAGS },
-  { ".fma", PROCESSOR_UNKNOWN,
-    CPU_FMA_FLAGS },
-  { ".fma4", PROCESSOR_UNKNOWN,
-    CPU_FMA4_FLAGS },
-  { ".movbe", PROCESSOR_UNKNOWN,
-    CPU_MOVBE_FLAGS },
-  { ".ept", PROCESSOR_UNKNOWN,
-    CPU_EPT_FLAGS },
-  { ".clflush", PROCESSOR_UNKNOWN,
-    CPU_CLFLUSH_FLAGS },
-  { ".syscall", PROCESSOR_UNKNOWN,
-    CPU_SYSCALL_FLAGS },
-  { ".rdtscp", PROCESSOR_UNKNOWN,
-    CPU_RDTSCP_FLAGS },
-  { ".3dnow", PROCESSOR_UNKNOWN,
-    CPU_3DNOW_FLAGS },
-  { ".3dnowa", PROCESSOR_UNKNOWN,
-    CPU_3DNOWA_FLAGS },
-  { ".padlock", PROCESSOR_UNKNOWN,
-    CPU_PADLOCK_FLAGS },
-  { ".pacifica", PROCESSOR_UNKNOWN,
-    CPU_SVME_FLAGS },
-  { ".svme", PROCESSOR_UNKNOWN,
-    CPU_SVME_FLAGS },
-  { ".sse4a", PROCESSOR_UNKNOWN,
-    CPU_SSE4A_FLAGS },
-  { ".abm", PROCESSOR_UNKNOWN,
-    CPU_ABM_FLAGS },
+  /* Do not replace the first two entries - i386_target_format()
+     relies on them being there in this order.  */
+  { STRING_COMMA_LEN ("generic32"), PROCESSOR_GENERIC32,
+    CPU_GENERIC32_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("generic64"), PROCESSOR_GENERIC64,
+    CPU_GENERIC64_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("i8086"), PROCESSOR_UNKNOWN,
+    CPU_NONE_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("i186"), PROCESSOR_UNKNOWN,
+    CPU_I186_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("i286"), PROCESSOR_UNKNOWN,
+    CPU_I286_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("i386"), PROCESSOR_I386,
+    CPU_I386_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("i486"), PROCESSOR_I486,
+    CPU_I486_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("i586"), PROCESSOR_PENTIUM,
+    CPU_I586_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("i686"), PROCESSOR_PENTIUMPRO,
+    CPU_I686_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("pentium"), PROCESSOR_PENTIUM,
+    CPU_I586_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("pentiumpro"), PROCESSOR_PENTIUMPRO,
+    CPU_PENTIUMPRO_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("pentiumii"), PROCESSOR_PENTIUMPRO,
+    CPU_P2_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("pentiumiii"),PROCESSOR_PENTIUMPRO,
+    CPU_P3_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("pentium4"), PROCESSOR_PENTIUM4,
+    CPU_P4_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("prescott"), PROCESSOR_NOCONA,
+    CPU_CORE_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("nocona"), PROCESSOR_NOCONA,
+    CPU_NOCONA_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("yonah"), PROCESSOR_CORE,
+    CPU_CORE_FLAGS, 1, 0 },
+  { STRING_COMMA_LEN ("core"), PROCESSOR_CORE,
+    CPU_CORE_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("merom"), PROCESSOR_CORE2,
+    CPU_CORE2_FLAGS, 1, 0 },
+  { STRING_COMMA_LEN ("core2"), PROCESSOR_CORE2,
+    CPU_CORE2_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("corei7"), PROCESSOR_COREI7,
+    CPU_COREI7_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("l1om"), PROCESSOR_L1OM,
+    CPU_L1OM_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("k6"), PROCESSOR_K6,
+    CPU_K6_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("k6_2"), PROCESSOR_K6,
+    CPU_K6_2_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("athlon"), PROCESSOR_ATHLON,
+    CPU_ATHLON_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("sledgehammer"), PROCESSOR_K8,
+    CPU_K8_FLAGS, 1, 0 },
+  { STRING_COMMA_LEN ("opteron"), PROCESSOR_K8,
+    CPU_K8_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("k8"), PROCESSOR_K8,
+    CPU_K8_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("amdfam10"), PROCESSOR_AMDFAM10,
+    CPU_AMDFAM10_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("bdver1"), PROCESSOR_BDVER1,
+    CPU_BDVER1_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".8087"), PROCESSOR_UNKNOWN,
+    CPU_8087_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".287"), PROCESSOR_UNKNOWN,
+    CPU_287_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".387"), PROCESSOR_UNKNOWN,
+    CPU_387_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".no87"), PROCESSOR_UNKNOWN,
+    CPU_ANY87_FLAGS, 0, 1 },
+  { STRING_COMMA_LEN (".mmx"), PROCESSOR_UNKNOWN,
+    CPU_MMX_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".nommx"), PROCESSOR_UNKNOWN,
+    CPU_3DNOWA_FLAGS, 0, 1 },
+  { STRING_COMMA_LEN (".sse"), PROCESSOR_UNKNOWN,
+    CPU_SSE_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".sse2"), PROCESSOR_UNKNOWN,
+    CPU_SSE2_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".sse3"), PROCESSOR_UNKNOWN,
+    CPU_SSE3_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".ssse3"), PROCESSOR_UNKNOWN,
+    CPU_SSSE3_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".sse4.1"), PROCESSOR_UNKNOWN,
+    CPU_SSE4_1_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".sse4.2"), PROCESSOR_UNKNOWN,
+    CPU_SSE4_2_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".sse4"), PROCESSOR_UNKNOWN,
+    CPU_SSE4_2_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".nosse"), PROCESSOR_UNKNOWN,
+    CPU_ANY_SSE_FLAGS, 0, 1 },
+  { STRING_COMMA_LEN (".avx"), PROCESSOR_UNKNOWN,
+    CPU_AVX_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".noavx"), PROCESSOR_UNKNOWN,
+    CPU_ANY_AVX_FLAGS, 0, 1 },
+  { STRING_COMMA_LEN (".vmx"), PROCESSOR_UNKNOWN,
+    CPU_VMX_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".smx"), PROCESSOR_UNKNOWN,
+    CPU_SMX_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".xsave"), PROCESSOR_UNKNOWN,
+    CPU_XSAVE_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".xsaveopt"), PROCESSOR_UNKNOWN,
+    CPU_XSAVEOPT_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".aes"), PROCESSOR_UNKNOWN,
+    CPU_AES_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".pclmul"), PROCESSOR_UNKNOWN,
+    CPU_PCLMUL_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".clmul"), PROCESSOR_UNKNOWN,
+    CPU_PCLMUL_FLAGS, 1, 0 },
+  { STRING_COMMA_LEN (".fsgsbase"), PROCESSOR_UNKNOWN,
+    CPU_FSGSBASE_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".rdrnd"), PROCESSOR_UNKNOWN,
+    CPU_RDRND_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".f16c"), PROCESSOR_UNKNOWN,
+    CPU_F16C_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".fma"), PROCESSOR_UNKNOWN,
+    CPU_FMA_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".fma4"), PROCESSOR_UNKNOWN,
+    CPU_FMA4_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".xop"), PROCESSOR_UNKNOWN,
+    CPU_XOP_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".lwp"), PROCESSOR_UNKNOWN,
+    CPU_LWP_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".movbe"), PROCESSOR_UNKNOWN,
+    CPU_MOVBE_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".ept"), PROCESSOR_UNKNOWN,
+    CPU_EPT_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".clflush"), PROCESSOR_UNKNOWN,
+    CPU_CLFLUSH_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".nop"), PROCESSOR_UNKNOWN,
+    CPU_NOP_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".syscall"), PROCESSOR_UNKNOWN,
+    CPU_SYSCALL_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".rdtscp"), PROCESSOR_UNKNOWN,
+    CPU_RDTSCP_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".3dnow"), PROCESSOR_UNKNOWN,
+    CPU_3DNOW_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".3dnowa"), PROCESSOR_UNKNOWN,
+    CPU_3DNOWA_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".padlock"), PROCESSOR_UNKNOWN,
+    CPU_PADLOCK_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".pacifica"), PROCESSOR_UNKNOWN,
+    CPU_SVME_FLAGS, 1, 0 },
+  { STRING_COMMA_LEN (".svme"), PROCESSOR_UNKNOWN,
+    CPU_SVME_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".sse4a"), PROCESSOR_UNKNOWN,
+    CPU_SSE4A_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".abm"), PROCESSOR_UNKNOWN,
+    CPU_ABM_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".bmi"), PROCESSOR_UNKNOWN,
+    CPU_BMI_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".tbm"), PROCESSOR_UNKNOWN,
+    CPU_TBM_FLAGS, 0, 0 },
 };
 
 #ifdef I386COFF
@@ -934,10 +1020,11 @@ i386_align_code (fragS *fragP, int count)
      PROCESSOR_CORE, PROCESSOR_CORE2, PROCESSOR_COREI7, and
      PROCESSOR_GENERIC64, alt_long_patt will be used.
      3. For PROCESSOR_ATHLON, PROCESSOR_K6, PROCESSOR_K8 and
-     PROCESSOR_AMDFAM10, alt_short_patt will be used.
+     PROCESSOR_AMDFAM10, and PROCESSOR_BDVER1, alt_short_patt
+     will be used.
 
      When -mtune= isn't used, alt_long_patt will be used if
-     cpu_arch_isa_flags has Cpu686. Otherwise, f32_patt will
+     cpu_arch_isa_flags has CpuNop.  Otherwise, f32_patt will
      be used.
 
      When -march= or .arch is used, we can't use anything beyond
@@ -967,18 +1054,18 @@ i386_align_code (fragS *fragP, int count)
 	    {
 	    case PROCESSOR_UNKNOWN:
 	      /* We use cpu_arch_isa_flags to check if we SHOULD
-		 optimize for Cpu686.  */
-	      if (fragP->tc_frag_data.isa_flags.bitfield.cpui686)
+		 optimize with nops.  */
+	      if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
 		patt = alt_long_patt;
 	      else
 		patt = f32_patt;
 	      break;
-	    case PROCESSOR_PENTIUMPRO:
 	    case PROCESSOR_PENTIUM4:
 	    case PROCESSOR_NOCONA:
 	    case PROCESSOR_CORE:
 	    case PROCESSOR_CORE2:
 	    case PROCESSOR_COREI7:
+	    case PROCESSOR_L1OM:
 	    case PROCESSOR_GENERIC64:
 	      patt = alt_long_patt;
 	      break;
@@ -986,11 +1073,13 @@ i386_align_code (fragS *fragP, int count)
 	    case PROCESSOR_ATHLON:
 	    case PROCESSOR_K8:
 	    case PROCESSOR_AMDFAM10:
+	    case PROCESSOR_BDVER1:
 	      patt = alt_short_patt;
 	      break;
 	    case PROCESSOR_I386:
 	    case PROCESSOR_I486:
 	    case PROCESSOR_PENTIUM:
+	    case PROCESSOR_PENTIUMPRO:
 	    case PROCESSOR_GENERIC32:
 	      patt = f32_patt;
 	      break;
@@ -1013,10 +1102,11 @@ i386_align_code (fragS *fragP, int count)
 	    case PROCESSOR_ATHLON:
 	    case PROCESSOR_K8:
 	    case PROCESSOR_AMDFAM10:
+	    case PROCESSOR_BDVER1:
 	    case PROCESSOR_GENERIC32:
 	      /* We use cpu_arch_isa_flags to check if we CAN optimize
-		 for Cpu686.  */
-	      if (fragP->tc_frag_data.isa_flags.bitfield.cpui686)
+		 with nops.  */
+	      if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
 		patt = alt_short_patt;
 	      else
 		patt = f32_patt;
@@ -1027,7 +1117,8 @@ i386_align_code (fragS *fragP, int count)
 	    case PROCESSOR_CORE:
 	    case PROCESSOR_CORE2:
 	    case PROCESSOR_COREI7:
-	      if (fragP->tc_frag_data.isa_flags.bitfield.cpui686)
+	    case PROCESSOR_L1OM:
+	      if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
 		patt = alt_long_patt;
 	      else
 		patt = f32_patt;
@@ -1042,8 +1133,16 @@ i386_align_code (fragS *fragP, int count)
 	{
 	  /* If the padding is less than 15 bytes, we use the normal
 	     ones.  Otherwise, we use a jump instruction and adjust
-	     its offset.  */
-	  if (count < 15)
+	     its offset.   */
+	  int limit;
+
+	  /* For 64bit, the limit is 3 bytes.  */
+	  if (flag_code == CODE_64BIT
+	      && fragP->tc_frag_data.isa_flags.bitfield.cpulm)
+	    limit = 3;
+	  else
+	    limit = 15;
+	  if (count < limit)
 	    memcpy (fragP->fr_literal + fragP->fr_fix,
 		    patt[count - 1], count);
 	  else
@@ -1228,6 +1327,24 @@ cpu_flags_or (i386_cpu_flags x, i386_cpu_flags y)
   return x;
 }
 
+static INLINE i386_cpu_flags
+cpu_flags_and_not (i386_cpu_flags x, i386_cpu_flags y)
+{
+  switch (ARRAY_SIZE (x.array))
+    {
+    case 3:
+      x.array [2] &= ~y.array [2];
+    case 2:
+      x.array [1] &= ~y.array [1];
+    case 1:
+      x.array [0] &= ~y.array [0];
+      break;
+    default:
+      abort ();
+    }
+  return x;
+}
+
 #define CPU_FLAGS_ARCH_MATCH		0x1
 #define CPU_FLAGS_64BIT_MATCH		0x2
 #define CPU_FLAGS_AES_MATCH		0x4
@@ -1243,7 +1360,7 @@ cpu_flags_or (i386_cpu_flags x, i386_cpu_flags y)
 /* Return CPU flags match bits. */
 
 static int
-cpu_flags_match (const template *t)
+cpu_flags_match (const insn_template *t)
 {
   i386_cpu_flags x = t->cpu_flags;
   int match = cpu_flags_check_cpu64 (x) ? CPU_FLAGS_64BIT_MATCH : 0;
@@ -1373,6 +1490,7 @@ static const i386_operand_type imm64 = OPERAND_TYPE_IMM64;
 static const i386_operand_type imm16_32 = OPERAND_TYPE_IMM16_32;
 static const i386_operand_type imm16_32s = OPERAND_TYPE_IMM16_32S;
 static const i386_operand_type imm16_32_32s = OPERAND_TYPE_IMM16_32_32S;
+static const i386_operand_type vec_imm4 = OPERAND_TYPE_VEC_IMM4;
 
 enum operand_type
 {
@@ -1427,7 +1545,7 @@ operand_type_check (i386_operand_type t, enum operand_type c)
    operand J for instruction template T.  */
 
 static INLINE int
-match_reg_size (const template *t, unsigned int j)
+match_reg_size (const insn_template *t, unsigned int j)
 {
   return !((i.types[j].bitfield.byte
 	    && !t->operand_types[j].bitfield.byte)
@@ -1443,7 +1561,7 @@ match_reg_size (const template *t, unsigned int j)
    instruction template T.  */
 
 static INLINE int
-match_mem_size (const template *t, unsigned int j)
+match_mem_size (const insn_template *t, unsigned int j)
 {
   return (match_reg_size (t, j)
 	  && !((i.types[j].bitfield.unspecified
@@ -1462,7 +1580,7 @@ match_mem_size (const template *t, unsigned int j)
    instruction template T.  */
 
 static INLINE int
-operand_size_match (const template *t)
+operand_size_match (const insn_template *t)
 {
   unsigned int j;
   int match = 1;
@@ -1493,9 +1611,14 @@ operand_size_match (const template *t)
 	}
     }
 
-  if (match
-      || (!t->opcode_modifier.d && !t->opcode_modifier.floatd))
+  if (match)
     return match;
+  else if (!t->opcode_modifier.d && !t->opcode_modifier.floatd)
+    {
+mismatch:
+      i.error = operand_size_mismatch;
+      return 0;
+    }
 
   /* Check reverse.  */
   gas_assert (i.operands == 2);
@@ -1505,17 +1628,11 @@ operand_size_match (const template *t)
     {
       if (t->operand_types[j].bitfield.acc
 	  && !match_reg_size (t, j ? 0 : 1))
-	{
-	  match = 0;
-	  break;
-	}
+	goto mismatch;
 
       if (i.types[j].bitfield.mem
 	  && !match_mem_size (t, j ? 0 : 1))
-	{
-	  match = 0;
-	  break;
-	}
+	goto mismatch;
     }
 
   return match;
@@ -1538,10 +1655,15 @@ operand_type_match (i386_operand_type overlap,
   temp.bitfield.xmmword = 0;
   temp.bitfield.ymmword = 0;
   if (operand_type_all_zero (&temp))
-    return 0;
+    goto mismatch;
+
+  if (given.bitfield.baseindex == overlap.bitfield.baseindex
+      && given.bitfield.jumpabsolute == overlap.bitfield.jumpabsolute)
+    return 1;
 
-  return (given.bitfield.baseindex == overlap.bitfield.baseindex
-	  && given.bitfield.jumpabsolute == overlap.bitfield.jumpabsolute);
+mismatch:
+  i.error = operand_type_mismatch;
+  return 0;
 }
 
 /* If given types g0 and g1 are registers they must be of the same type
@@ -1584,10 +1706,15 @@ operand_type_register_match (i386_operand_type m0,
       t1.bitfield.reg64 = 1;
     }
 
-  return (!(t0.bitfield.reg8 & t1.bitfield.reg8)
-	  && !(t0.bitfield.reg16 & t1.bitfield.reg16)
-	  && !(t0.bitfield.reg32 & t1.bitfield.reg32)
-	  && !(t0.bitfield.reg64 & t1.bitfield.reg64));
+  if (!(t0.bitfield.reg8 & t1.bitfield.reg8)
+      && !(t0.bitfield.reg16 & t1.bitfield.reg16)
+      && !(t0.bitfield.reg32 & t1.bitfield.reg32)
+      && !(t0.bitfield.reg64 & t1.bitfield.reg64))
+    return 1;
+
+  i.error = register_type_mismatch;
+
+  return 0;
 }
 
 static INLINE unsigned int
@@ -1648,6 +1775,12 @@ fits_in_unsigned_long (offsetT num ATTRIBUTE_UNUSED)
 #endif
 }				/* fits_in_unsigned_long() */
 
+static INLINE int
+fits_in_imm4 (offsetT num)
+{
+  return (num & 0xf) == num;
+}
+
 static i386_operand_type
 smallest_imm_type (offsetT num)
 {
@@ -1718,10 +1851,13 @@ offset_in_range (offsetT val, int size)
     default: abort ();
     }
 
-  /* If BFD64, sign extend val.  */
-  if (!use_rela_relocations)
+#ifdef BFD64
+  /* If BFD64, sign extend val for 32bit address mode.  */
+  if (flag_code != CODE_64BIT
+      || i.prefix[ADDR_PREFIX])
     if ((val & ~(((addressT) 2 << 31) - 1)) == 0)
       val = (val ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
+#endif
 
   if ((val & ~mask) != 0 && (val & ~mask) != ~mask)
     {
@@ -1734,13 +1870,26 @@ offset_in_range (offsetT val, int size)
   return val & mask;
 }
 
-/* Returns 0 if attempting to add a prefix where one from the same
-   class already exists, 1 if non rep/repne added, 2 if rep/repne
-   added.  */
-static int
+enum PREFIX_GROUP
+{
+  PREFIX_EXIST = 0,
+  PREFIX_LOCK,
+  PREFIX_REP,
+  PREFIX_OTHER
+};
+
+/* Returns
+   a. PREFIX_EXIST if attempting to add a prefix where one from the
+   same class already exists.
+   b. PREFIX_LOCK if lock prefix is added.
+   c. PREFIX_REP if rep/repne prefix is added.
+   d. PREFIX_OTHER if other prefix is added.
+ */
+
+static enum PREFIX_GROUP
 add_prefix (unsigned int prefix)
 {
-  int ret = 1;
+  enum PREFIX_GROUP ret = PREFIX_OTHER;
   unsigned int q;
 
   if (prefix >= REX_OPCODE && prefix < REX_OPCODE + 16
@@ -1749,7 +1898,7 @@ add_prefix (unsigned int prefix)
       if ((i.prefix[REX_PREFIX] & prefix & REX_W)
 	  || ((i.prefix[REX_PREFIX] & (REX_R | REX_X | REX_B))
 	      && (prefix & (REX_R | REX_X | REX_B))))
-	ret = 0;
+	ret = PREFIX_EXIST;
       q = REX_PREFIX;
     }
   else
@@ -1770,10 +1919,13 @@ add_prefix (unsigned int prefix)
 
 	case REPNE_PREFIX_OPCODE:
 	case REPE_PREFIX_OPCODE:
-	  ret = 2;
-	  /* fall thru */
+	  q = REP_PREFIX;
+	  ret = PREFIX_REP;
+	  break;
+
 	case LOCK_PREFIX_OPCODE:
-	  q = LOCKREP_PREFIX;
+	  q = LOCK_PREFIX;
+	  ret = PREFIX_LOCK;
 	  break;
 
 	case FWAIT_OPCODE:
@@ -1789,7 +1941,7 @@ add_prefix (unsigned int prefix)
 	  break;
 	}
       if (i.prefix[q] != 0)
-	ret = 0;
+	ret = PREFIX_EXIST;
     }
 
   if (ret)
@@ -1805,9 +1957,11 @@ add_prefix (unsigned int prefix)
 }
 
 static void
-set_code_flag (int value)
+update_code_flag (int value, int check)
 {
-  flag_code = value;
+  PRINTF_LIKE ((*as_error));
+
+  flag_code = (enum flag_code) value;
   if (flag_code == CODE_64BIT)
     {
       cpu_arch_flags.bitfield.cpu64 = 1;
@@ -1820,19 +1974,35 @@ set_code_flag (int value)
     }
   if (value == CODE_64BIT && !cpu_arch_flags.bitfield.cpulm )
     {
-      as_bad (_("64bit mode not supported on this CPU."));
+      if (check)
+	as_error = as_fatal;
+      else
+	as_error = as_bad;
+      (*as_error) (_("64bit mode not supported on `%s'."),
+		   cpu_arch_name ? cpu_arch_name : default_arch);
     }
   if (value == CODE_32BIT && !cpu_arch_flags.bitfield.cpui386)
     {
-      as_bad (_("32bit mode not supported on this CPU."));
+      if (check)
+	as_error = as_fatal;
+      else
+	as_error = as_bad;
+      (*as_error) (_("32bit mode not supported on `%s'."),
+		   cpu_arch_name ? cpu_arch_name : default_arch);
     }
   stackop_size = '\0';
 }
 
+static void
+set_code_flag (int value)
+{
+  update_code_flag (value, 0);
+}
+
 static void
 set_16bit_gcc_code_flag (int new_code_flag)
 {
-  flag_code = new_code_flag;
+  flag_code = (enum flag_code) new_code_flag;
   if (flag_code != CODE_16BIT)
     abort ();
   cpu_arch_flags.bitfield.cpu64 = 0;
@@ -1915,6 +2085,35 @@ set_sse_check (int dummy ATTRIBUTE_UNUSED)
   demand_empty_rest_of_line ();
 }
 
+static void
+check_cpu_arch_compatible (const char *name ATTRIBUTE_UNUSED,
+			   i386_cpu_flags new_flag ATTRIBUTE_UNUSED)
+{
+#if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
+  static const char *arch;
+
+  /* Intel LIOM is only supported on ELF.  */
+  if (!IS_ELF)
+    return;
+
+  if (!arch)
+    {
+      /* Use cpu_arch_name if it is set in md_parse_option.  Otherwise
+	 use default_arch.  */
+      arch = cpu_arch_name;
+      if (!arch)
+	arch = default_arch;
+    }
+
+  /* If we are targeting Intel L1OM, we must enable it.  */
+  if (get_elf_backend_data (stdoutput)->elf_machine_code != EM_L1OM
+      || new_flag.bitfield.cpul1om)
+    return;
+
+  as_bad (_("`%s' is not supported on `%s'"), name, arch);
+#endif
+}
+
 static void
 set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
 {
@@ -1924,18 +2123,20 @@ set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
     {
       char *string = input_line_pointer;
       int e = get_symbol_end ();
-      unsigned int i;
+      unsigned int j;
       i386_cpu_flags flags;
 
-      for (i = 0; i < ARRAY_SIZE (cpu_arch); i++)
+      for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
 	{
-	  if (strcmp (string, cpu_arch[i].name) == 0)
+	  if (strcmp (string, cpu_arch[j].name) == 0)
 	    {
+	      check_cpu_arch_compatible (string, cpu_arch[j].flags);
+
 	      if (*string != '.')
 		{
-		  cpu_arch_name = cpu_arch[i].name;
+		  cpu_arch_name = cpu_arch[j].name;
 		  cpu_sub_arch_name = NULL;
-		  cpu_arch_flags = cpu_arch[i].flags;
+		  cpu_arch_flags = cpu_arch[j].flags;
 		  if (flag_code == CODE_64BIT)
 		    {
 		      cpu_arch_flags.bitfield.cpu64 = 1;
@@ -1946,8 +2147,8 @@ set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
 		      cpu_arch_flags.bitfield.cpu64 = 0;
 		      cpu_arch_flags.bitfield.cpuno64 = 1;
 		    }
-		  cpu_arch_isa = cpu_arch[i].type;
-		  cpu_arch_isa_flags = cpu_arch[i].flags;
+		  cpu_arch_isa = cpu_arch[j].type;
+		  cpu_arch_isa_flags = cpu_arch[j].flags;
 		  if (!cpu_arch_tune_set)
 		    {
 		      cpu_arch_tune = cpu_arch_isa;
@@ -1956,28 +2157,33 @@ set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
 		  break;
 		}
 
-	      flags = cpu_flags_or (cpu_arch_flags,
-				    cpu_arch[i].flags);
+	      if (!cpu_arch[j].negated)
+		flags = cpu_flags_or (cpu_arch_flags,
+				      cpu_arch[j].flags);
+	      else
+		flags = cpu_flags_and_not (cpu_arch_flags,
+					   cpu_arch[j].flags);
 	      if (!cpu_flags_equal (&flags, &cpu_arch_flags))
 		{
 		  if (cpu_sub_arch_name)
 		    {
 		      char *name = cpu_sub_arch_name;
 		      cpu_sub_arch_name = concat (name,
-						  cpu_arch[i].name,
+						  cpu_arch[j].name,
 						  (const char *) NULL);
 		      free (name);
 		    }
 		  else
-		    cpu_sub_arch_name = xstrdup (cpu_arch[i].name);
+		    cpu_sub_arch_name = xstrdup (cpu_arch[j].name);
 		  cpu_arch_flags = flags;
+		  cpu_arch_isa_flags = flags;
 		}
 	      *input_line_pointer = e;
 	      demand_empty_rest_of_line ();
 	      return;
 	    }
 	}
-      if (i >= ARRAY_SIZE (cpu_arch))
+      if (j >= ARRAY_SIZE (cpu_arch))
 	as_bad (_("no such architecture: `%s'"), string);
 
       *input_line_pointer = e;
@@ -2005,11 +2211,37 @@ set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
   demand_empty_rest_of_line ();
 }
 
+enum bfd_architecture
+i386_arch (void)
+{
+  if (cpu_arch_isa == PROCESSOR_L1OM)
+    {
+      if (OUTPUT_FLAVOR != bfd_target_elf_flavour
+	  || flag_code != CODE_64BIT)
+	as_fatal (_("Intel L1OM is 64bit ELF only"));
+      return bfd_arch_l1om;
+    }
+  else
+    return bfd_arch_i386;
+}
+
 unsigned long
 i386_mach ()
 {
-  if (!strcmp (default_arch, "x86_64"))
-    return bfd_mach_x86_64;
+  if (!strncmp (default_arch, "x86_64", 6))
+    {
+      if (cpu_arch_isa == PROCESSOR_L1OM)
+	{
+	  if (OUTPUT_FLAVOR != bfd_target_elf_flavour
+	      || default_arch[6] != '\0')
+	    as_fatal (_("Intel L1OM is 64bit ELF only"));
+	  return bfd_mach_l1om;
+	}
+      else if (default_arch[6] == '\0')
+	return bfd_mach_x86_64;
+      else
+	return bfd_mach_x64_32;
+    }
   else if (!strcmp (default_arch, "i386"))
     return bfd_mach_i386_i386;
   else
@@ -2025,7 +2257,7 @@ md_begin ()
   op_hash = hash_new ();
 
   {
-    const template *optab;
+    const insn_template *optab;
     templates *core_optab;
 
     /* Setup for loop.  */
@@ -2140,7 +2372,12 @@ md_begin ()
 
   if (flag_code == CODE_64BIT)
     {
+#if defined (OBJ_COFF) && defined (TE_PE)
+      x86_dwarf2_return_column = (OUTPUT_FLAVOR == bfd_target_coff_flavour
+				  ? 32 : 16);
+#else
       x86_dwarf2_return_column = 16;
+#endif
       x86_cie_data_alignment = -8;
     }
   else
@@ -2160,7 +2397,7 @@ i386_print_statistics (FILE *file)
 #ifdef DEBUG386
 
 /* Debugging routines for md_assemble.  */
-static void pte (template *);
+static void pte (insn_template *);
 static void pt (i386_operand_type);
 static void pe (expressionS *);
 static void ps (symbolS *);
@@ -2168,7 +2405,7 @@ static void ps (symbolS *);
 static void
 pi (char *line, i386_insn *x)
 {
-  unsigned int i;
+  unsigned int j;
 
   fprintf (stdout, "%s: template ", line);
   pte (&x->tm);
@@ -2185,35 +2422,35 @@ pi (char *line, i386_insn *x)
 	   (x->rex & REX_R) != 0,
 	   (x->rex & REX_X) != 0,
 	   (x->rex & REX_B) != 0);
-  for (i = 0; i < x->operands; i++)
+  for (j = 0; j < x->operands; j++)
     {
-      fprintf (stdout, "    #%d:  ", i + 1);
-      pt (x->types[i]);
+      fprintf (stdout, "    #%d:  ", j + 1);
+      pt (x->types[j]);
       fprintf (stdout, "\n");
-      if (x->types[i].bitfield.reg8
-	  || x->types[i].bitfield.reg16
-	  || x->types[i].bitfield.reg32
-	  || x->types[i].bitfield.reg64
-	  || x->types[i].bitfield.regmmx
-	  || x->types[i].bitfield.regxmm
-	  || x->types[i].bitfield.regymm
-	  || x->types[i].bitfield.sreg2
-	  || x->types[i].bitfield.sreg3
-	  || x->types[i].bitfield.control
-	  || x->types[i].bitfield.debug
-	  || x->types[i].bitfield.test)
-	fprintf (stdout, "%s\n", x->op[i].regs->reg_name);
-      if (operand_type_check (x->types[i], imm))
-	pe (x->op[i].imms);
-      if (operand_type_check (x->types[i], disp))
-	pe (x->op[i].disps);
+      if (x->types[j].bitfield.reg8
+	  || x->types[j].bitfield.reg16
+	  || x->types[j].bitfield.reg32
+	  || x->types[j].bitfield.reg64
+	  || x->types[j].bitfield.regmmx
+	  || x->types[j].bitfield.regxmm
+	  || x->types[j].bitfield.regymm
+	  || x->types[j].bitfield.sreg2
+	  || x->types[j].bitfield.sreg3
+	  || x->types[j].bitfield.control
+	  || x->types[j].bitfield.debug
+	  || x->types[j].bitfield.test)
+	fprintf (stdout, "%s\n", x->op[j].regs->reg_name);
+      if (operand_type_check (x->types[j], imm))
+	pe (x->op[j].imms);
+      if (operand_type_check (x->types[j], disp))
+	pe (x->op[j].disps);
     }
 }
 
 static void
-pte (template *t)
+pte (insn_template *t)
 {
-  unsigned int i;
+  unsigned int j;
   fprintf (stdout, " %d operands ", t->operands);
   fprintf (stdout, "opcode %x ", t->base_opcode);
   if (t->extension_opcode != None)
@@ -2223,10 +2460,10 @@ pte (template *t)
   if (t->opcode_modifier.w)
     fprintf (stdout, "W");
   fprintf (stdout, "\n");
-  for (i = 0; i < t->operands; i++)
+  for (j = 0; j < t->operands; j++)
     {
-      fprintf (stdout, "    #%d type ", i + 1);
-      pt (t->operand_types[i]);
+      fprintf (stdout, "    #%d type ", j + 1);
+      pt (t->operand_types[j]);
       fprintf (stdout, "\n");
     }
 }
@@ -2326,7 +2563,7 @@ reloc (unsigned int size,
 {
   if (other != NO_RELOC)
     {
-      reloc_howto_type *reloc;
+      reloc_howto_type *rel;
 
       if (size == 8)
 	switch (other)
@@ -2357,18 +2594,18 @@ reloc (unsigned int size,
       if (size == 4 && flag_code != CODE_64BIT)
 	sign = -1;
 
-      reloc = bfd_reloc_type_lookup (stdoutput, other);
-      if (!reloc)
+      rel = bfd_reloc_type_lookup (stdoutput, other);
+      if (!rel)
 	as_bad (_("unknown relocation (%u)"), other);
-      else if (size != bfd_get_reloc_size (reloc))
+      else if (size != bfd_get_reloc_size (rel))
 	as_bad (_("%u-byte relocation cannot be applied to %u-byte field"),
-		bfd_get_reloc_size (reloc),
+		bfd_get_reloc_size (rel),
 		size);
-      else if (pcrel && !reloc->pc_relative)
+      else if (pcrel && !rel->pc_relative)
 	as_bad (_("non-pc-relative relocation for pc-relative field"));
-      else if ((reloc->complain_on_overflow == complain_overflow_signed
+      else if ((rel->complain_on_overflow == complain_overflow_signed
 		&& !sign)
-	       || (reloc->complain_on_overflow == complain_overflow_unsigned
+	       || (rel->complain_on_overflow == complain_overflow_unsigned
 		   && sign > 0))
 	as_bad (_("relocated field and relocation type differ in signedness"));
       else
@@ -2466,10 +2703,6 @@ tc_i386_fix_adjustable (fixS *fixP ATTRIBUTE_UNUSED)
       || fixP->fx_r_type == BFD_RELOC_VTABLE_INHERIT
       || fixP->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
     return 0;
-
-  if (fixP->fx_addsy != NULL
-      && symbol_get_bfdsym (fixP->fx_addsy)->flags & BSF_GNU_INDIRECT_FUNCTION)
-    return 0;
 #endif
   return 1;
 }
@@ -2530,7 +2763,7 @@ intel_float_operand (const char *mnemonic)
 /* Build the VEX prefix.  */
 
 static void
-build_vex_prefix (const template *t)
+build_vex_prefix (const insn_template *t)
 {
   unsigned int register_specifier;
   unsigned int implied_prefix;
@@ -2551,7 +2784,7 @@ build_vex_prefix (const template *t)
      operand.  */
   if (!i.swap_operand
       && i.operands == i.reg_operands
-      && i.tm.opcode_modifier.vex0f
+      && i.tm.opcode_modifier.vexopcode == VEX0F
       && i.tm.opcode_modifier.s
       && i.rex == REX_B)
     {
@@ -2577,7 +2810,10 @@ build_vex_prefix (const template *t)
       i.tm = t[1];
     }
 
-  vector_length = i.tm.opcode_modifier.vex256 ? 1 : 0;
+  if (i.tm.opcode_modifier.vex == VEXScalar)
+    vector_length = avxscalar;
+  else
+    vector_length = i.tm.opcode_modifier.vex == VEX256 ? 1 : 0;
 
   switch ((i.tm.base_opcode >> 8) & 0xff)
     {
@@ -2598,7 +2834,8 @@ build_vex_prefix (const template *t)
     }
 
   /* Use 2-byte VEX prefix if possible.  */
-  if (i.tm.opcode_modifier.vex0f
+  if (i.tm.opcode_modifier.vexopcode == VEX0F
+      && i.tm.opcode_modifier.vexw != VEXW1
       && (i.rex & (REX_W | REX_X | REX_B)) == 0)
     {
       /* 2-byte VEX prefix.  */
@@ -2619,17 +2856,37 @@ build_vex_prefix (const template *t)
       /* 3-byte VEX prefix.  */
       unsigned int m, w;
 
-      if (i.tm.opcode_modifier.vex0f)
-	m = 0x1;
-      else if (i.tm.opcode_modifier.vex0f38)
-	m = 0x2;
-      else if (i.tm.opcode_modifier.vex0f3a)
-	m = 0x3;
-      else
-	abort ();
-
       i.vex.length = 3;
-      i.vex.bytes[0] = 0xc4;
+
+      switch (i.tm.opcode_modifier.vexopcode)
+	{
+	case VEX0F:
+	  m = 0x1;
+	  i.vex.bytes[0] = 0xc4;
+	  break;
+	case VEX0F38:
+	  m = 0x2;
+	  i.vex.bytes[0] = 0xc4;
+	  break;
+	case VEX0F3A:
+	  m = 0x3;
+	  i.vex.bytes[0] = 0xc4;
+	  break;
+	case XOP08:
+	  m = 0x8;
+	  i.vex.bytes[0] = 0x8f;
+	  break;
+	case XOP09:
+	  m = 0x9;
+	  i.vex.bytes[0] = 0x8f;
+	  break;
+	case XOP0A:
+	  m = 0xa;
+	  i.vex.bytes[0] = 0x8f;
+	  break;
+	default:
+	  abort ();
+	}
 
       /* The high 3 bits of the second VEX byte are 1's compliment
 	 of RXB bits from REX.  */
@@ -2637,12 +2894,12 @@ build_vex_prefix (const template *t)
 
       /* Check the REX.W bit.  */
       w = (i.rex & REX_W) ? 1 : 0;
-      if (i.tm.opcode_modifier.vexw0 || i.tm.opcode_modifier.vexw1)
+      if (i.tm.opcode_modifier.vexw)
 	{
 	  if (w)
 	    abort ();
 
-	  if (i.tm.opcode_modifier.vexw1)
+	  if (i.tm.opcode_modifier.vexw == VEXW1)
 	    w = 1;
 	}
 
@@ -2706,7 +2963,7 @@ md_assemble (char *line)
 {
   unsigned int j;
   char mnemonic[MAX_MNEM_SIZE];
-  const template *t;
+  const insn_template *t;
 
   /* Initialize globals.  */
   memset (&i, '\0', sizeof (i));
@@ -2757,6 +3014,7 @@ md_assemble (char *line)
   /* Don't optimize displacement for movabs since it only takes 64bit
      displacement.  */
   if (i.disp_operands
+      && !i.disp32_encoding
       && (flag_code != CODE_64BIT
 	  || strcmp (mnemonic, "movabs") != 0))
     optimize_disp ();
@@ -2802,6 +3060,18 @@ md_assemble (char *line)
     if (!add_prefix (FWAIT_OPCODE))
       return;
 
+  /* Check for lock without a lockable instruction.  Destination operand
+     must be memory unless it is xchg (0x86).  */
+  if (i.prefix[LOCK_PREFIX]
+      && (!i.tm.opcode_modifier.islockable
+	  || i.mem_operands == 0
+	  || (i.tm.base_opcode != 0x86
+	      && !operand_type_check (i.types[i.operands - 1], anymem))))
+    {
+      as_bad (_("expecting lockable instruction after `lock'"));
+      return;
+    }
+
   /* Check string instruction segment overrides.  */
   if (i.tm.opcode_modifier.isstring && i.mem_operands != 0)
     {
@@ -2813,6 +3083,10 @@ md_assemble (char *line)
   if (!process_suffix ())
     return;
 
+  /* Update operand types.  */
+  for (j = 0; j < i.operands; j++)
+    i.types[j] = operand_type_and (i.types[j], i.tm.operand_types[j]);
+
   /* Make still unresolved immediate matches conform to size of immediate
      given in i.suffix.  */
   if (!finalize_imm ())
@@ -2821,12 +3095,15 @@ md_assemble (char *line)
   if (i.types[0].bitfield.imm1)
     i.imm_operands = 0;	/* kludge for shift insns.  */
 
-  for (j = 0; j < 3; j++)
-    if (i.types[j].bitfield.inoutportreg
-	|| i.types[j].bitfield.shiftcount
-	|| i.types[j].bitfield.acc
-	|| i.types[j].bitfield.floatacc)
-      i.reg_operands--;
+  /* We only need to check those implicit registers for instructions
+     with 3 operands or less.  */
+  if (i.operands <= 3)
+    for (j = 0; j < i.operands; j++)
+      if (i.types[j].bitfield.inoutportreg
+	  || i.types[j].bitfield.shiftcount
+	  || i.types[j].bitfield.acc
+	  || i.types[j].bitfield.floatacc)
+	i.reg_operands--;
 
   /* ImmExt should be processed after SSE2AVX.  */
   if (!i.tm.opcode_modifier.sse2avx
@@ -2848,8 +3125,12 @@ md_assemble (char *line)
   if (i.tm.opcode_modifier.vex)
     build_vex_prefix (t);
 
-  /* Handle conversion of 'int $3' --> special int3 insn.  */
-  if (i.tm.base_opcode == INT_OPCODE && i.op[0].imms->X_add_number == 3)
+  /* Handle conversion of 'int $3' --> special int3 insn.  XOP or FMA4
+     instructions may define INT_OPCODE as well, so avoid this corner
+     case for those instructions that use MODRM.  */
+  if (i.tm.base_opcode == INT_OPCODE
+      && !i.tm.opcode_modifier.modrm
+      && i.op[0].imms->X_add_number == 3)
     {
       i.tm.base_opcode = INT3_OPCODE;
       i.imm_operands = 0;
@@ -2920,7 +3201,7 @@ parse_insn (char *line, char *mnemonic)
   char *token_start = l;
   char *mnem_p;
   int supported;
-  const template *t;
+  const insn_template *t;
   char *dot_p = NULL;
 
   /* Non-zero if we found a prefix only acceptable with string insns.  */
@@ -2961,7 +3242,7 @@ parse_insn (char *line, char *mnemonic)
 	}
 
       /* Look up instruction (or prefix) via hash table.  */
-      current_templates = hash_find (op_hash, mnemonic);
+      current_templates = (const templates *) hash_find (op_hash, mnemonic);
 
       if (*l != END_OF_INSN
 	  && (!is_space_char (*l) || l[1] != END_OF_INSN)
@@ -2991,11 +3272,13 @@ parse_insn (char *line, char *mnemonic)
 	  /* Add prefix, checking for repeated prefixes.  */
 	  switch (add_prefix (current_templates->start->base_opcode))
 	    {
-	    case 0:
+	    case PREFIX_EXIST:
 	      return NULL;
-	    case 2:
+	    case PREFIX_REP:
 	      expecting_string_instruction = current_templates->start->name;
 	      break;
+	    default:
+	      break;
 	    }
 	  /* Skip past PREFIX_SEPARATOR and reset token_start.  */
 	  token_start = ++l;
@@ -3006,14 +3289,20 @@ parse_insn (char *line, char *mnemonic)
 
   if (!current_templates)
     {
-      /* Check if we should swap operand in encoding.  */
+      /* Check if we should swap operand or force 32bit displacement in
+	 encoding.  */
       if (mnem_p - 2 == dot_p && dot_p[1] == 's')
 	i.swap_operand = 1;
+      else if (mnem_p - 4 == dot_p 
+	       && dot_p[1] == 'd'
+	       && dot_p[2] == '3'
+	       && dot_p[3] == '2')
+	i.disp32_encoding = 1;
       else
 	goto check_suffix;
       mnem_p = dot_p;
       *dot_p = '\0';
-      current_templates = hash_find (op_hash, mnemonic);
+      current_templates = (const templates *) hash_find (op_hash, mnemonic);
     }
 
   if (!current_templates)
@@ -3030,7 +3319,8 @@ check_suffix:
 	case QWORD_MNEM_SUFFIX:
 	  i.suffix = mnem_p[-1];
 	  mnem_p[-1] = '\0';
-	  current_templates = hash_find (op_hash, mnemonic);
+	  current_templates = (const templates *) hash_find (op_hash,
+                                                             mnemonic);
 	  break;
 	case SHORT_MNEM_SUFFIX:
 	case LONG_MNEM_SUFFIX:
@@ -3038,7 +3328,8 @@ check_suffix:
 	    {
 	      i.suffix = mnem_p[-1];
 	      mnem_p[-1] = '\0';
-	      current_templates = hash_find (op_hash, mnemonic);
+	      current_templates = (const templates *) hash_find (op_hash,
+                                                                 mnemonic);
 	    }
 	  break;
 
@@ -3051,7 +3342,8 @@ check_suffix:
 	      else
 		i.suffix = LONG_MNEM_SUFFIX;
 	      mnem_p[-1] = '\0';
-	      current_templates = hash_find (op_hash, mnemonic);
+	      current_templates = (const templates *) hash_find (op_hash,
+                                                                 mnemonic);
 	    }
 	  break;
 	}
@@ -3424,7 +3716,7 @@ optimize_imm (void)
 	       than those matching the insn suffix.  */
 	    {
 	      i386_operand_type mask, allowed;
-	      const template *t;
+	      const insn_template *t;
 
 	      operand_type_set (&mask, 0);
 	      operand_type_set (&allowed, 0);
@@ -3472,28 +3764,28 @@ optimize_disp (void)
       {
 	if (i.op[op].disps->X_op == O_constant)
 	  {
-	    offsetT disp = i.op[op].disps->X_add_number;
+	    offsetT op_disp = i.op[op].disps->X_add_number;
 
 	    if (i.types[op].bitfield.disp16
-		&& (disp & ~(offsetT) 0xffff) == 0)
+		&& (op_disp & ~(offsetT) 0xffff) == 0)
 	      {
 		/* If this operand is at most 16 bits, convert
 		   to a signed 16 bit number and don't use 64bit
 		   displacement.  */
-		disp = (((disp & 0xffff) ^ 0x8000) - 0x8000);
+		op_disp = (((op_disp & 0xffff) ^ 0x8000) - 0x8000);
 		i.types[op].bitfield.disp64 = 0;
 	      }
 	    if (i.types[op].bitfield.disp32
-		&& (disp & ~(((offsetT) 2 << 31) - 1)) == 0)
+		&& (op_disp & ~(((offsetT) 2 << 31) - 1)) == 0)
 	      {
 		/* If this operand is at most 32 bits, convert
 		   to a signed 32 bit number and don't use 64bit
 		   displacement.  */
-		disp &= (((offsetT) 2 << 31) - 1);
-		disp = (disp ^ ((offsetT) 1 << 31)) - ((addressT) 1 << 31);
+		op_disp &= (((offsetT) 2 << 31) - 1);
+		op_disp = (op_disp ^ ((offsetT) 1 << 31)) - ((addressT) 1 << 31);
 		i.types[op].bitfield.disp64 = 0;
 	      }
-	    if (!disp && i.types[op].bitfield.baseindex)
+	    if (!op_disp && i.types[op].bitfield.baseindex)
 	      {
 		i.types[op].bitfield.disp8 = 0;
 		i.types[op].bitfield.disp16 = 0;
@@ -3505,18 +3797,19 @@ optimize_disp (void)
 	      }
 	    else if (flag_code == CODE_64BIT)
 	      {
-		if (fits_in_signed_long (disp))
+		if (fits_in_signed_long (op_disp))
 		  {
 		    i.types[op].bitfield.disp64 = 0;
 		    i.types[op].bitfield.disp32s = 1;
 		  }
-		if (fits_in_unsigned_long (disp))
+		if (i.prefix[ADDR_PREFIX]
+		    && fits_in_unsigned_long (op_disp))
 		  i.types[op].bitfield.disp32 = 1;
 	      }
 	    if ((i.types[op].bitfield.disp32
 		 || i.types[op].bitfield.disp32s
 		 || i.types[op].bitfield.disp16)
-		&& fits_in_signed_byte (disp))
+		&& fits_in_signed_byte (op_disp))
 	      i.types[op].bitfield.disp8 = 1;
 	  }
 	else if (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
@@ -3536,11 +3829,37 @@ optimize_disp (void)
       }
 }
 
-static const template *
+/* Check if operands are valid for the instruction.  Update VEX
+   operand types.  */
+
+static int
+VEX_check_operands (const insn_template *t)
+{
+  if (!t->opcode_modifier.vex)
+    return 0;
+
+  /* Only check VEX_Imm4, which must be the first operand.  */
+  if (t->operand_types[0].bitfield.vec_imm4)
+    {
+      if (i.op[0].imms->X_op != O_constant
+	  || !fits_in_imm4 (i.op[0].imms->X_add_number))
+	{
+	  i.error = bad_imm4;
+	  return 1;
+	}
+
+      /* Turn off Imm8 so that update_imm won't complain.  */
+      i.types[0] = vec_imm4;
+    }
+
+  return 0;
+}
+
+static const insn_template *
 match_template (void)
 {
   /* Points to template once we've found it.  */
-  const template *t;
+  const insn_template *t;
   i386_operand_type overlap0, overlap1, overlap2, overlap3;
   i386_operand_type overlap4;
   unsigned int found_reverse_match;
@@ -3572,34 +3891,41 @@ match_template (void)
   else if (i.suffix == LONG_DOUBLE_MNEM_SUFFIX)
     suffix_check.no_ldsuf = 1;
 
+  /* Must have right number of operands.  */
+  i.error = number_of_operands_mismatch;
+
   for (t = current_templates->start; t < current_templates->end; t++)
     {
       addr_prefix_disp = -1;
 
-      /* Must have right number of operands.  */
       if (i.operands != t->operands)
 	continue;
 
       /* Check processor support.  */
+      i.error = unsupported;
       found_cpu_match = (cpu_flags_match (t)
 			 == CPU_FLAGS_PERFECT_MATCH);
       if (!found_cpu_match)
 	continue;
 
       /* Check old gcc support. */
+      i.error = old_gcc_only;
       if (!old_gcc && t->opcode_modifier.oldgcc)
 	continue;
 
       /* Check AT&T mnemonic.   */
+      i.error = unsupported_with_intel_mnemonic;
       if (intel_mnemonic && t->opcode_modifier.attmnemonic)
 	continue;
 
-      /* Check AT&T syntax Intel syntax.   */
+      /* Check AT&T/Intel syntax.   */
+      i.error = unsupported_syntax;
       if ((intel_syntax && t->opcode_modifier.attsyntax)
 	  || (!intel_syntax && t->opcode_modifier.intelsyntax))
 	continue;
 
       /* Check the suffix, except for some instructions in intel mode.  */
+      i.error = invalid_instruction_suffix;
       if ((!intel_syntax || !t->opcode_modifier.ignoresize)
 	  && ((t->opcode_modifier.no_bsuf && suffix_check.no_bsuf)
 	      || (t->opcode_modifier.no_wsuf && suffix_check.no_wsuf)
@@ -3699,9 +4025,8 @@ match_template (void)
 	    }
 	  }
 
-      /* We check register size only if size of operands can be
-	 encoded the canonical way.  */
-      check_register = t->opcode_modifier.w;
+      /* We check register size if needed.  */
+      check_register = t->opcode_modifier.checkregsize;
       overlap0 = operand_type_and (i.types[0], operand_types[0]);
       switch (t->operands)
 	{
@@ -3840,6 +4165,10 @@ check_reverse:
 	  continue;
 	}
 
+      /* Check if VEX operands are valid.  */
+      if (VEX_check_operands (t))
+	continue;
+
       /* We've found a match; break out of loop.  */
       break;
     }
@@ -3847,12 +4176,44 @@ check_reverse:
   if (t == current_templates->end)
     {
       /* We found no match.  */
-      if (intel_syntax)
-	as_bad (_("ambiguous operand size or operands invalid for `%s'"),
-		current_templates->start->name);
-      else
-	as_bad (_("suffix or operands invalid for `%s'"),
-		current_templates->start->name);
+      const char *err_msg;
+      switch (i.error)
+	{
+	default:
+	  abort ();
+	case operand_size_mismatch:
+	  err_msg = _("operand size mismatch");
+	  break;
+	case operand_type_mismatch:
+	  err_msg = _("operand type mismatch");
+	  break;
+	case register_type_mismatch:
+	  err_msg = _("register type mismatch");
+	  break;
+	case number_of_operands_mismatch:
+	  err_msg = _("number of operands mismatch");
+	  break;
+	case invalid_instruction_suffix:
+	  err_msg = _("invalid instruction suffix");
+	  break;
+	case bad_imm4:
+	  err_msg = _("Imm4 isn't the first operand");
+	  break;
+	case old_gcc_only:
+	  err_msg = _("only supported with old gcc");
+	  break;
+	case unsupported_with_intel_mnemonic:
+	  err_msg = _("unsupported with Intel mnemonic");
+	  break;
+	case unsupported_syntax:
+	  err_msg = _("unsupported syntax");
+	  break;
+	case unsupported:
+	  err_msg = _("unsupported");
+	  break;
+	}
+      as_bad (_("%s for `%s'"), err_msg,
+	      current_templates->start->name);
       return NULL;
     }
 
@@ -4007,12 +4368,20 @@ process_suffix (void)
 	}
       else if (i.suffix == BYTE_MNEM_SUFFIX)
 	{
-	  if (!check_byte_reg ())
+	  if (intel_syntax
+	      && i.tm.opcode_modifier.ignoresize
+	      && i.tm.opcode_modifier.no_bsuf)
+	    i.suffix = 0;
+	  else if (!check_byte_reg ())
 	    return 0;
 	}
       else if (i.suffix == LONG_MNEM_SUFFIX)
 	{
-	  if (!check_long_reg ())
+	  if (intel_syntax
+	      && i.tm.opcode_modifier.ignoresize
+	      && i.tm.opcode_modifier.no_lsuf)
+	    i.suffix = 0;
+	  else if (!check_long_reg ())
 	    return 0;
 	}
       else if (i.suffix == QWORD_MNEM_SUFFIX)
@@ -4026,7 +4395,11 @@ process_suffix (void)
 	}
       else if (i.suffix == WORD_MNEM_SUFFIX)
 	{
-	  if (!check_word_reg ())
+	  if (intel_syntax
+	      && i.tm.opcode_modifier.ignoresize
+	      && i.tm.opcode_modifier.no_wsuf)
+	    i.suffix = 0;
+	  else if (!check_word_reg ())
 	    return 0;
 	}
       else if (i.suffix == XMMWORD_MNEM_SUFFIX
@@ -4204,10 +4577,6 @@ check_byte_reg (void)
       if (i.types[op].bitfield.reg8)
 	continue;
 
-      /* Don't generate this warning if not needed.  */
-      if (intel_syntax && i.tm.opcode_modifier.byteokintel)
-	continue;
-
       /* crc32 doesn't generate this warning.  */
       if (i.tm.base_opcode == 0xf20f38f0)
 	continue;
@@ -4431,9 +4800,7 @@ check_word_reg (void)
 static int
 update_imm (unsigned int j)
 {
-  i386_operand_type overlap;
-
-  overlap = operand_type_and (i.types[j], i.tm.operand_types[j]);
+  i386_operand_type overlap = i.types[j];
   if ((overlap.bitfield.imm8
        || overlap.bitfield.imm8s
        || overlap.bitfield.imm16
@@ -4497,14 +4864,19 @@ update_imm (unsigned int j)
 static int
 finalize_imm (void)
 {
-  unsigned int j;
+  unsigned int j, n;
 
-  for (j = 0; j < 2; j++)
-    if (update_imm (j) == 0)
-      return 0;
+  /* Update the first 2 immediate operands.  */
+  n = i.operands > 2 ? 2 : i.operands;
+  if (n)
+    {
+      for (j = 0; j < n; j++)
+	if (update_imm (j) == 0)
+	  return 0;
 
-  i.types[2] = operand_type_and (i.types[2], i.tm.operand_types[2]);
-  gas_assert (operand_type_check (i.types[2], imm) == 0);
+      /* The 3rd operand can't be immediate operand.  */
+      gas_assert (operand_type_check (i.types[2], imm) == 0);
+    }
 
   return 1;
 }
@@ -4512,13 +4884,14 @@ finalize_imm (void)
 static int
 bad_implicit_operand (int xmm)
 {
-  const char *reg = xmm ? "xmm0" : "ymm0";
+  const char *ireg = xmm ? "xmm0" : "ymm0";
+
   if (intel_syntax)
     as_bad (_("the last operand of `%s' must be `%s%s'"),
-	    i.tm.name, register_prefix, reg);
+	    i.tm.name, register_prefix, ireg);
   else
     as_bad (_("the first operand of `%s' must be `%s%s'"),
-	    i.tm.name, register_prefix, reg);
+	    i.tm.name, register_prefix, ireg);
   return 0;
 }
 
@@ -4530,17 +4903,15 @@ process_operands (void)
      unnecessary segment overrides.  */
   const seg_entry *default_seg = 0;
 
-  if (i.tm.opcode_modifier.sse2avx
-      && (i.tm.opcode_modifier.vexnds
-	  || i.tm.opcode_modifier.vexndd))
+  if (i.tm.opcode_modifier.sse2avx && i.tm.opcode_modifier.vexvvvv)
     {
-      unsigned int dup = i.operands;
-      unsigned int dest = dup - 1;
+      unsigned int dupl = i.operands;
+      unsigned int dest = dupl - 1;
       unsigned int j;
 
       /* The destination must be an xmm register.  */
       gas_assert (i.reg_operands
-		  && MAX_OPERANDS > dup
+		  && MAX_OPERANDS > dupl
 		  && operand_type_equal (&i.types[dest], &regxmm));
 
       if (i.tm.opcode_modifier.firstxmm0)
@@ -4550,7 +4921,7 @@ process_operands (void)
 	  if (i.op[0].regs->reg_num != 0)
 	    return bad_implicit_operand (1);
 
-	  if (i.tm.opcode_modifier.vex3sources)
+	  if (i.tm.opcode_modifier.vexsources == VEX3SOURCES)
 	    {
 	      /* Keep xmm0 for instructions with VEX prefix and 3
 		 sources.  */
@@ -4571,8 +4942,9 @@ process_operands (void)
 	}
       else if (i.tm.opcode_modifier.implicit1stxmm0)
 	{
-	  gas_assert ((MAX_OPERANDS - 1) > dup
-		      && i.tm.opcode_modifier.vex3sources);
+	  gas_assert ((MAX_OPERANDS - 1) > dupl
+		      && (i.tm.opcode_modifier.vexsources
+			  == VEX3SOURCES));
 
 	  /* Add the implicit xmm0 for instructions with VEX prefix
 	     and 3 sources.  */
@@ -4591,11 +4963,11 @@ process_operands (void)
 	  i.reg_operands += 2;
 	  i.tm.operands += 2;
 
-	  dup++;
+	  dupl++;
 	  dest++;
-	  i.op[dup] = i.op[dest];
-	  i.types[dup] = i.types[dest];
-	  i.tm.operand_types[dup] = i.tm.operand_types[dest];
+	  i.op[dupl] = i.op[dest];
+	  i.types[dupl] = i.types[dest];
+	  i.tm.operand_types[dupl] = i.tm.operand_types[dest];
 	}
       else
 	{
@@ -4604,9 +4976,9 @@ duplicate:
 	  i.reg_operands++;
 	  i.tm.operands++;
 
-	  i.op[dup] = i.op[dest];
-	  i.types[dup] = i.types[dest];
-	  i.tm.operand_types[dup] = i.tm.operand_types[dest];
+	  i.op[dupl] = i.op[dest];
+	  i.types[dupl] = i.types[dest];
+	  i.tm.operand_types[dupl] = i.tm.operand_types[dest];
 	}
 
        if (i.tm.opcode_modifier.immext)
@@ -4754,67 +5126,130 @@ build_modrm_byte (void)
 
   /* The first operand of instructions with VEX prefix and 3 sources
      must be VEX_Imm4.  */
-  vex_3_sources = i.tm.opcode_modifier.vex3sources;
+  vex_3_sources = i.tm.opcode_modifier.vexsources == VEX3SOURCES;
   if (vex_3_sources)
     {
-      unsigned int nds, reg;
+      unsigned int nds, reg_slot;
+      expressionS *exp;
 
       if (i.tm.opcode_modifier.veximmext
-	  && i.tm.opcode_modifier.immext)
-	{
-	  dest = i.operands - 2;
-	  gas_assert (dest == 3);
-	}
+          && i.tm.opcode_modifier.immext)
+        {
+          dest = i.operands - 2;
+          gas_assert (dest == 3);
+        }
       else
-      dest = i.operands - 1;
+        dest = i.operands - 1;
       nds = dest - 1;
 
-      /* This instruction must have 4 register operands 
-	 or 3 register operands plus 1 memory operand.  
-	 It must have VexNDS and VexImmExt.  */
+      /* There are 2 kinds of instructions:
+         1. 5 operands: 4 register operands or 3 register operands
+         plus 1 memory operand plus one Vec_Imm4 operand, VexXDS, and
+         VexW0 or VexW1.  The destination must be either XMM or YMM
+         register.
+         2. 4 operands: 4 register operands or 3 register operands
+         plus 1 memory operand, VexXDS, and VexImmExt  */
       gas_assert ((i.reg_operands == 4
-		      || (i.reg_operands == 3 && i.mem_operands == 1))
-		  && i.tm.opcode_modifier.vexnds
-		  && i.tm.opcode_modifier.veximmext
-	    && (operand_type_equal (&i.tm.operand_types[dest], &regxmm)
-		|| operand_type_equal (&i.tm.operand_types[dest], &regymm)));
-
-      /* Generate an 8bit immediate operand to encode the register
-	 operand.  */
-      expressionS *exp = &im_expressions[i.imm_operands++];
-      i.op[i.operands].imms = exp;
-      i.types[i.operands] = imm8;
-      i.operands++;
-      /* If VexW1 is set, the first operand is the source and
-	 the second operand is encoded in the immediate operand.  */
-      if (i.tm.opcode_modifier.vexw1)
-	{
-	  source = 0;
-	  reg = 1;
-	}
+                   || (i.reg_operands == 3 && i.mem_operands == 1))
+                  && i.tm.opcode_modifier.vexvvvv == VEXXDS
+                  && (i.tm.opcode_modifier.veximmext
+                      || (i.imm_operands == 1
+                          && i.types[0].bitfield.vec_imm4
+                          && (i.tm.opcode_modifier.vexw == VEXW0
+                              || i.tm.opcode_modifier.vexw == VEXW1)
+                          && (operand_type_equal (&i.tm.operand_types[dest], &regxmm)
+                              || operand_type_equal (&i.tm.operand_types[dest], &regymm)))));
+
+      if (i.imm_operands == 0)
+        {
+          /* When there is no immediate operand, generate an 8bit
+             immediate operand to encode the first operand.  */
+          exp = &im_expressions[i.imm_operands++];
+          i.op[i.operands].imms = exp;
+          i.types[i.operands] = imm8;
+          i.operands++;
+          /* If VexW1 is set, the first operand is the source and
+             the second operand is encoded in the immediate operand.  */
+          if (i.tm.opcode_modifier.vexw == VEXW1)
+            {
+              source = 0;
+              reg_slot = 1;
+            }
+          else
+            {
+              source = 1;
+              reg_slot = 0;
+            }
+
+          /* FMA swaps REG and NDS.  */
+          if (i.tm.cpu_flags.bitfield.cpufma)
+            {
+              unsigned int tmp;
+              tmp = reg_slot;
+              reg_slot = nds;
+              nds = tmp;
+            }
+
+          gas_assert (operand_type_equal (&i.tm.operand_types[reg_slot],
+					  &regxmm)
+                      || operand_type_equal (&i.tm.operand_types[reg_slot],
+                                             &regymm));
+          exp->X_op = O_constant;
+          exp->X_add_number
+              = ((i.op[reg_slot].regs->reg_num
+                  + ((i.op[reg_slot].regs->reg_flags & RegRex) ? 8 : 0))
+		 << 4);
+        }
       else
-	{
-	  source = 1;
-	  reg = 0;
-	}      
-      /* FMA4 swaps REG and NDS.  */
-      if (i.tm.cpu_flags.bitfield.cpufma4)
-	{
-	  unsigned int tmp;
-	  tmp = reg;
-	  reg = nds;
-	  nds = tmp;
-	}      
-      gas_assert ((operand_type_equal (&i.tm.operand_types[reg], &regxmm)
-		   || operand_type_equal (&i.tm.operand_types[reg],
-					  &regymm)) 
-		  && (operand_type_equal (&i.tm.operand_types[nds], &regxmm)
-		      || operand_type_equal (&i.tm.operand_types[nds], 
-					     &regymm)));
-      exp->X_op = O_constant;
-      exp->X_add_number
-	= ((i.op[reg].regs->reg_num
-	    + ((i.op[reg].regs->reg_flags & RegRex) ? 8 : 0)) << 4);      
+        {
+          unsigned int imm_slot;
+
+          if (i.tm.opcode_modifier.vexw == VEXW0)
+            {
+              /* If VexW0 is set, the third operand is the source and
+                 the second operand is encoded in the immediate
+                 operand.  */
+              source = 2;
+              reg_slot = 1;
+            }
+          else
+            {
+              /* VexW1 is set, the second operand is the source and
+                 the third operand is encoded in the immediate
+                 operand.  */
+              source = 1;
+              reg_slot = 2;
+            }
+
+          if (i.tm.opcode_modifier.immext)
+            {
+              /* When ImmExt is set, the immdiate byte is the last
+                 operand.  */
+              imm_slot = i.operands - 1;
+              source--;
+              reg_slot--;
+            }
+          else
+            {
+              imm_slot = 0;
+
+              /* Turn on Imm8 so that output_imm will generate it.  */
+              i.types[imm_slot].bitfield.imm8 = 1;
+            }
+
+          gas_assert (operand_type_equal (&i.tm.operand_types[reg_slot],
+					  &regxmm)
+		      || operand_type_equal (&i.tm.operand_types[reg_slot],
+					     &regymm));
+          i.op[imm_slot].imms->X_add_number
+              |= ((i.op[reg_slot].regs->reg_num
+                   + ((i.op[reg_slot].regs->reg_flags & RegRex) ? 8 : 0))
+		  << 4);
+        }
+
+      gas_assert (operand_type_equal (&i.tm.operand_types[nds], &regxmm)
+                  || operand_type_equal (&i.tm.operand_types[nds],
+                                         &regymm));
       i.vex.register_specifier = i.op[nds].regs;
     }
   else
@@ -4828,9 +5263,9 @@ build_modrm_byte (void)
      a instruction with VEX prefix and 3 sources.  */
   if (i.mem_operands == 0
       && ((i.reg_operands == 2
-	   && !i.tm.opcode_modifier.vexndd)
+	   && i.tm.opcode_modifier.vexvvvv <= VEXXDS)
 	  || (i.reg_operands == 3
-	      && i.tm.opcode_modifier.vexnds)
+	      && i.tm.opcode_modifier.vexvvvv == VEXXDS)
 	  || (i.reg_operands == 4 && vex_3_sources)))
     {
       switch (i.operands)
@@ -4845,7 +5280,7 @@ build_modrm_byte (void)
 	     is an instruction with VexNDS. */
 	  gas_assert (i.imm_operands == 1
 		      || (i.imm_operands == 0
-			  && (i.tm.opcode_modifier.vexnds
+			  && (i.tm.opcode_modifier.vexvvvv == VEXXDS
 			      || i.types[0].bitfield.shiftcount)));
 	  if (operand_type_check (i.types[0], imm)
 	      || i.types[0].bitfield.shiftcount)
@@ -4864,19 +5299,19 @@ build_modrm_byte (void)
 	  gas_assert ((i.imm_operands == 2
 		       && i.types[0].bitfield.imm8
 		       && i.types[1].bitfield.imm8)
-		      || (i.tm.opcode_modifier.vexnds
+		      || (i.tm.opcode_modifier.vexvvvv == VEXXDS
 			  && i.imm_operands == 1
 			  && (i.types[0].bitfield.imm8
 			      || i.types[i.operands - 1].bitfield.imm8)));
-	  if (i.tm.opcode_modifier.vexnds)
+	  if (i.imm_operands == 2)
+	    source = 2;
+	  else
 	    {
 	      if (i.types[0].bitfield.imm8)
 		source = 1;
 	      else
 		source = 0;
 	    }
-	  else
-	    source = 2;
 	  break;
 	case 5:
 	  break;
@@ -4888,19 +5323,37 @@ build_modrm_byte (void)
 	{
 	  dest = source + 1;
 
-	  if (i.tm.opcode_modifier.vexnds)
+	  if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
 	    {
 	      /* For instructions with VexNDS, the register-only
-		 source operand must be XMM or YMM register. It is
-		 encoded in VEX prefix.  We need to clear RegMem bit
-		 before calling operand_type_equal.  */
-	      i386_operand_type op = i.tm.operand_types[dest];
+		 source operand must be 32/64bit integer, XMM or
+		 YMM register.  It is encoded in VEX prefix.  We
+		 need to clear RegMem bit before calling
+		 operand_type_equal.  */
+
+	      i386_operand_type op;
+	      unsigned int vvvv;
+
+	      /* Check register-only source operand when two source
+		 operands are swapped.  */
+	      if (!i.tm.operand_types[source].bitfield.baseindex
+		  && i.tm.operand_types[dest].bitfield.baseindex)
+		{
+		  vvvv = source;
+		  source = dest;
+		}
+	      else
+		vvvv = dest;
+
+	      op = i.tm.operand_types[vvvv];
 	      op.bitfield.regmem = 0;
 	      if ((dest + 1) >= i.operands
-		  || (!operand_type_equal (&op, &regxmm)
+		  || (op.bitfield.reg32 != 1
+		      && !op.bitfield.reg64 != 1
+		      && !operand_type_equal (&op, &regxmm)
 		      && !operand_type_equal (&op, &regymm)))
 		abort ();
-	      i.vex.register_specifier = i.op[dest].regs;
+	      i.vex.register_specifier = i.op[vvvv].regs;
 	      dest++;
 	    }
 	}
@@ -5144,11 +5597,56 @@ build_modrm_byte (void)
       else
 	mem = ~0;
 
+      if (i.tm.opcode_modifier.vexsources == XOP2SOURCES)
+	{
+	  if (operand_type_check (i.types[0], imm))
+	    i.vex.register_specifier = NULL;
+	  else
+	    {
+	      /* VEX.vvvv encodes one of the sources when the first
+		 operand is not an immediate.  */
+	      if (i.tm.opcode_modifier.vexw == VEXW0)
+		i.vex.register_specifier = i.op[0].regs;
+	      else
+		i.vex.register_specifier = i.op[1].regs;
+	    }
+
+	  /* Destination is a XMM register encoded in the ModRM.reg
+	     and VEX.R bit.  */
+	  i.rm.reg = i.op[2].regs->reg_num;
+	  if ((i.op[2].regs->reg_flags & RegRex) != 0)
+	    i.rex |= REX_R;
+
+	  /* ModRM.rm and VEX.B encodes the other source.  */
+	  if (!i.mem_operands)
+	    {
+	      i.rm.mode = 3;
+
+	      if (i.tm.opcode_modifier.vexw == VEXW0)
+		i.rm.regmem = i.op[1].regs->reg_num;
+	      else
+		i.rm.regmem = i.op[0].regs->reg_num;
+
+	      if ((i.op[1].regs->reg_flags & RegRex) != 0)
+		i.rex |= REX_B;
+	    }
+	}
+      else if (i.tm.opcode_modifier.vexvvvv == VEXLWP)
+	{
+	  i.vex.register_specifier = i.op[2].regs;
+	  if (!i.mem_operands)
+	    {
+	      i.rm.mode = 3;
+	      i.rm.regmem = i.op[1].regs->reg_num;
+	      if ((i.op[1].regs->reg_flags & RegRex) != 0)
+		i.rex |= REX_B;
+	    }
+	}
       /* Fill in i.rm.reg or i.rm.regmem field with register operand
 	 (if any) based on i.tm.extension_opcode.  Again, we must be
 	 careful to make sure that segment/control/debug/test/MMX
 	 registers are coded into the i.rm.reg field.  */
-      if (i.reg_operands)
+      else if (i.reg_operands)
 	{
 	  unsigned int op;
 	  unsigned int vex_reg = ~0;
@@ -5170,7 +5668,7 @@ build_modrm_byte (void)
 
 	  if (vex_3_sources)
 	    op = dest;
-	  else if (i.tm.opcode_modifier.vexnds)
+	  else if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
 	    {
 	      /* For instructions with VexNDS, the register-only
 		 source operand is encoded in VEX prefix. */
@@ -5183,47 +5681,73 @@ build_modrm_byte (void)
 		}
 	      else
 		{
-		  vex_reg = op + 1;
-		  gas_assert (vex_reg < i.operands);
+		  /* Check register-only source operand when two source
+		     operands are swapped.  */
+		  if (!i.tm.operand_types[op].bitfield.baseindex
+		      && i.tm.operand_types[op + 1].bitfield.baseindex)
+		    {
+		      vex_reg = op;
+		      op += 2;
+		      gas_assert (mem == (vex_reg + 1)
+				  && op < i.operands);
+		    }
+		  else
+		    {
+		      vex_reg = op + 1;
+		      gas_assert (vex_reg < i.operands);
+		    }
 		}
 	    }
-	  else if (i.tm.opcode_modifier.vexndd)
+	  else if (i.tm.opcode_modifier.vexvvvv == VEXNDD)
 	    {
-	      /* For instructions with VexNDD, there should be
-		 no memory operand and the register destination
+	      /* For instructions with VexNDD, the register destination
 		 is encoded in VEX prefix.  */
-	      gas_assert (i.mem_operands == 0
-			  && (op + 2) == i.operands);
-	      vex_reg = op + 1;
+	      if (i.mem_operands == 0)
+		{
+		  /* There is no memory operand.  */
+		  gas_assert ((op + 2) == i.operands);
+		  vex_reg = op + 1;
+		}
+	      else
+		{ 
+		  /* There are only 2 operands.  */
+		  gas_assert (op < 2 && i.operands == 2);
+		  vex_reg = 1;
+		}
 	    }
 	  else
 	    gas_assert (op < i.operands);
 
 	  if (vex_reg != (unsigned int) ~0)
 	    {
-	      gas_assert (i.reg_operands == 2);
+	      i386_operand_type *type = &i.tm.operand_types[vex_reg];
 
-	      if (!operand_type_equal (&i.tm.operand_types[vex_reg],
-				       & regxmm)
-		  && !operand_type_equal (&i.tm.operand_types[vex_reg],
-					  &regymm))
+	      if (type->bitfield.reg32 != 1
+		  && type->bitfield.reg64 != 1
+		  && !operand_type_equal (type, &regxmm)
+		  && !operand_type_equal (type, &regymm))
 		abort ();
+
 	      i.vex.register_specifier = i.op[vex_reg].regs;
 	    }
 
-	  /* If there is an extension opcode to put here, the
-	     register number must be put into the regmem field.  */
-	  if (i.tm.extension_opcode != None)
+	  /* Don't set OP operand twice.  */
+	  if (vex_reg != op)
 	    {
-	      i.rm.regmem = i.op[op].regs->reg_num;
-	      if ((i.op[op].regs->reg_flags & RegRex) != 0)
-		i.rex |= REX_B;
-	    }
-	  else
-	    {
-	      i.rm.reg = i.op[op].regs->reg_num;
-	      if ((i.op[op].regs->reg_flags & RegRex) != 0)
-		i.rex |= REX_R;
+	      /* If there is an extension opcode to put here, the
+		 register number must be put into the regmem field.  */
+	      if (i.tm.extension_opcode != None)
+		{
+		  i.rm.regmem = i.op[op].regs->reg_num;
+		  if ((i.op[op].regs->reg_flags & RegRex) != 0)
+		    i.rex |= REX_B;
+		}
+	      else
+		{
+		  i.rm.reg = i.op[op].regs->reg_num;
+		  if ((i.op[op].regs->reg_flags & RegRex) != 0)
+		    i.rex |= REX_R;
+		}
 	    }
 
 	  /* Now, if no memory operand has set i.rm.mode = 0, 1, 2 we
@@ -5244,15 +5768,15 @@ static void
 output_branch (void)
 {
   char *p;
+  int size;
   int code16;
   int prefix;
   relax_substateT subtype;
   symbolS *sym;
   offsetT off;
 
-  code16 = 0;
-  if (flag_code == CODE_16BIT)
-    code16 = CODE16;
+  code16 = flag_code == CODE_16BIT ? CODE16 : 0;
+  size = i.disp32_encoding ? BIG : SMALL;
 
   prefix = 0;
   if (i.prefix[DATA_PREFIX] != 0)
@@ -5295,11 +5819,11 @@ output_branch (void)
   *p = i.tm.base_opcode;
 
   if ((unsigned char) *p == JUMP_PC_RELATIVE)
-    subtype = ENCODE_RELAX_STATE (UNCOND_JUMP, SMALL);
+    subtype = ENCODE_RELAX_STATE (UNCOND_JUMP, size);
   else if (cpu_arch_flags.bitfield.cpui386)
-    subtype = ENCODE_RELAX_STATE (COND_JUMP, SMALL);
+    subtype = ENCODE_RELAX_STATE (COND_JUMP, size);
   else
-    subtype = ENCODE_RELAX_STATE (COND_JUMP86, SMALL);
+    subtype = ENCODE_RELAX_STATE (COND_JUMP86, size);
   subtype |= code16;
 
   sym = i.op[0].disps->X_add_symbol;
@@ -5499,7 +6023,7 @@ output_insn (void)
 		    {
 check_prefix:
 		      if (prefix != REPE_PREFIX_OPCODE
-			  || (i.prefix[LOCKREP_PREFIX]
+			  || (i.prefix[REP_PREFIX]
 			      != REPE_PREFIX_OPCODE))
 			add_prefix (prefix);
 		    }
@@ -5894,7 +6418,7 @@ x86_cons_fix_new (fragS *frag, unsigned int off, unsigned int len,
    is non-null set it to the length of the string we removed from the
    input line.  Otherwise return NULL.  */
 static char *
-lex_got (enum bfd_reloc_code_real *reloc,
+lex_got (enum bfd_reloc_code_real *rel,
 	 int *adjust,
 	 i386_operand_type *types)
 {
@@ -5905,60 +6429,60 @@ lex_got (enum bfd_reloc_code_real *reloc,
      and adjust the reloc according to the real size in reloc().  */
   static const struct {
     const char *str;
+    int len;
     const enum bfd_reloc_code_real rel[2];
     const i386_operand_type types64;
   } gotrel[] = {
-    { "PLTOFF",   { 0,
-		    BFD_RELOC_X86_64_PLTOFF64 },
+    { STRING_COMMA_LEN ("PLTOFF"),   { _dummy_first_bfd_reloc_code_real,
+				       BFD_RELOC_X86_64_PLTOFF64 },
       OPERAND_TYPE_IMM64 },
-    { "PLT",      { BFD_RELOC_386_PLT32,
-		    BFD_RELOC_X86_64_PLT32    },
+    { STRING_COMMA_LEN ("PLT"),      { BFD_RELOC_386_PLT32,
+				       BFD_RELOC_X86_64_PLT32    },
       OPERAND_TYPE_IMM32_32S_DISP32 },
-    { "GOTPLT",   { 0,
-		    BFD_RELOC_X86_64_GOTPLT64 },
+    { STRING_COMMA_LEN ("GOTPLT"),   { _dummy_first_bfd_reloc_code_real,
+				       BFD_RELOC_X86_64_GOTPLT64 },
       OPERAND_TYPE_IMM64_DISP64 },
-    { "GOTOFF",   { BFD_RELOC_386_GOTOFF,
-		    BFD_RELOC_X86_64_GOTOFF64 },
+    { STRING_COMMA_LEN ("GOTOFF"),   { BFD_RELOC_386_GOTOFF,
+				       BFD_RELOC_X86_64_GOTOFF64 },
       OPERAND_TYPE_IMM64_DISP64 },
-    { "GOTPCREL", { 0,
-		    BFD_RELOC_X86_64_GOTPCREL },
+    { STRING_COMMA_LEN ("GOTPCREL"), { _dummy_first_bfd_reloc_code_real,
+				       BFD_RELOC_X86_64_GOTPCREL },
       OPERAND_TYPE_IMM32_32S_DISP32 },
-    { "TLSGD",    { BFD_RELOC_386_TLS_GD,
-		    BFD_RELOC_X86_64_TLSGD    },
+    { STRING_COMMA_LEN ("TLSGD"),    { BFD_RELOC_386_TLS_GD,
+				       BFD_RELOC_X86_64_TLSGD    },
       OPERAND_TYPE_IMM32_32S_DISP32 },
-    { "TLSLDM",   { BFD_RELOC_386_TLS_LDM,
-		    0                         },
+    { STRING_COMMA_LEN ("TLSLDM"),   { BFD_RELOC_386_TLS_LDM,
+				       _dummy_first_bfd_reloc_code_real },
       OPERAND_TYPE_NONE },
-    { "TLSLD",    { 0,
-		    BFD_RELOC_X86_64_TLSLD    },
+    { STRING_COMMA_LEN ("TLSLD"),    { _dummy_first_bfd_reloc_code_real,
+				       BFD_RELOC_X86_64_TLSLD    },
       OPERAND_TYPE_IMM32_32S_DISP32 },
-    { "GOTTPOFF", { BFD_RELOC_386_TLS_IE_32,
-		    BFD_RELOC_X86_64_GOTTPOFF },
+    { STRING_COMMA_LEN ("GOTTPOFF"), { BFD_RELOC_386_TLS_IE_32,
+				       BFD_RELOC_X86_64_GOTTPOFF },
       OPERAND_TYPE_IMM32_32S_DISP32 },
-    { "TPOFF",    { BFD_RELOC_386_TLS_LE_32,
-		    BFD_RELOC_X86_64_TPOFF32  },
+    { STRING_COMMA_LEN ("TPOFF"),    { BFD_RELOC_386_TLS_LE_32,
+				       BFD_RELOC_X86_64_TPOFF32  },
       OPERAND_TYPE_IMM32_32S_64_DISP32_64 },
-    { "NTPOFF",   { BFD_RELOC_386_TLS_LE,
-		    0                         },
+    { STRING_COMMA_LEN ("NTPOFF"),   { BFD_RELOC_386_TLS_LE,
+				       _dummy_first_bfd_reloc_code_real },
       OPERAND_TYPE_NONE },
-    { "DTPOFF",   { BFD_RELOC_386_TLS_LDO_32,
-		    BFD_RELOC_X86_64_DTPOFF32 },
-
+    { STRING_COMMA_LEN ("DTPOFF"),   { BFD_RELOC_386_TLS_LDO_32,
+				       BFD_RELOC_X86_64_DTPOFF32 },
       OPERAND_TYPE_IMM32_32S_64_DISP32_64 },
-    { "GOTNTPOFF",{ BFD_RELOC_386_TLS_GOTIE,
-		    0                         },
+    { STRING_COMMA_LEN ("GOTNTPOFF"),{ BFD_RELOC_386_TLS_GOTIE,
+				       _dummy_first_bfd_reloc_code_real },
       OPERAND_TYPE_NONE },
-    { "INDNTPOFF",{ BFD_RELOC_386_TLS_IE,
-		    0                         },
+    { STRING_COMMA_LEN ("INDNTPOFF"),{ BFD_RELOC_386_TLS_IE,
+				       _dummy_first_bfd_reloc_code_real },
       OPERAND_TYPE_NONE },
-    { "GOT",      { BFD_RELOC_386_GOT32,
-		    BFD_RELOC_X86_64_GOT32    },
+    { STRING_COMMA_LEN ("GOT"),      { BFD_RELOC_386_GOT32,
+				       BFD_RELOC_X86_64_GOT32    },
       OPERAND_TYPE_IMM32_32S_64_DISP32 },
-    { "TLSDESC",  { BFD_RELOC_386_TLS_GOTDESC,
-		    BFD_RELOC_X86_64_GOTPC32_TLSDESC },
+    { STRING_COMMA_LEN ("TLSDESC"),  { BFD_RELOC_386_TLS_GOTDESC,
+				       BFD_RELOC_X86_64_GOTPC32_TLSDESC },
       OPERAND_TYPE_IMM32_32S_DISP32 },
-    { "TLSCALL",  { BFD_RELOC_386_TLS_DESC_CALL,
-		    BFD_RELOC_X86_64_TLSDESC_CALL },
+    { STRING_COMMA_LEN ("TLSCALL"),  { BFD_RELOC_386_TLS_DESC_CALL,
+				       BFD_RELOC_X86_64_TLSDESC_CALL },
       OPERAND_TYPE_IMM32_32S_DISP32 },
   };
   char *cp;
@@ -5973,9 +6497,7 @@ lex_got (enum bfd_reloc_code_real *reloc,
 
   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
     {
-      int len;
-
-      len = strlen (gotrel[j].str);
+      int len = gotrel[j].len;
       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
 	{
 	  if (gotrel[j].rel[object_64bit] != 0)
@@ -5983,7 +6505,7 @@ lex_got (enum bfd_reloc_code_real *reloc,
 	      int first, second;
 	      char *tmpbuf, *past_reloc;
 
-	      *reloc = gotrel[j].rel[object_64bit];
+	      *rel = gotrel[j].rel[object_64bit];
 	      if (adjust)
 		*adjust = len;
 
@@ -6014,7 +6536,7 @@ lex_got (enum bfd_reloc_code_real *reloc,
 
 	      /* Allocate and copy string.  The trailing NUL shouldn't
 		 be necessary, but be safe.  */
-	      tmpbuf = xmalloc (first + second + 2);
+	      tmpbuf = (char *) xmalloc (first + second + 2);
 	      memcpy (tmpbuf, input_line_pointer, first);
 	      if (second != 0 && *past_reloc != ' ')
 		/* Replace the relocation token with ' ', so that
@@ -6040,6 +6562,7 @@ x86_cons (expressionS *exp, int size)
 {
   intel_syntax = -intel_syntax;
 
+  exp->X_md = 0;
   if (size == 4 || (object_64bit && size == 8))
     {
       /* Handle @GOTOFF and the like in an expression.  */
@@ -6086,7 +6609,8 @@ x86_cons (expressionS *exp, int size)
 }
 #endif
 
-static void signed_cons (int size)
+static void
+signed_cons (int size)
 {
   if (flag_code == CODE_64BIT)
     cons_sign = 1;
@@ -6171,16 +6695,17 @@ i386_finalize_immediate (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
 {
   if (exp->X_op == O_absent || exp->X_op == O_illegal || exp->X_op == O_big)
     {
-      as_bad (_("missing or invalid immediate expression `%s'"),
-	      imm_start);
+      if (imm_start)
+	as_bad (_("missing or invalid immediate expression `%s'"),
+		imm_start);
       return 0;
     }
   else if (exp->X_op == O_constant)
     {
       /* Size it properly later.  */
       i.types[this_operand].bitfield.imm64 = 1;
-      /* If BFD64, sign extend val.  */
-      if (!use_rela_relocations
+      /* If not 64bit, sign extend val.  */
+      if (flag_code != CODE_64BIT
 	  && (exp->X_add_number & ~(((addressT) 2 << 31) - 1)) == 0)
 	exp->X_add_number
 	  = (exp->X_add_number ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
@@ -6200,7 +6725,8 @@ i386_finalize_immediate (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
 #endif
   else if (!intel_syntax && exp->X_op == O_register)
     {
-      as_bad (_("illegal immediate register operand %s"), imm_start);
+      if (imm_start)
+	as_bad (_("illegal immediate register operand %s"), imm_start);
       return 0;
     }
   else
@@ -6434,7 +6960,8 @@ i386_finalize_displacement (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
 	goto inv_disp;
 
       if (S_IS_LOCAL (exp->X_add_symbol)
-	  && S_GET_SEGMENT (exp->X_add_symbol) != undefined_section)
+	  && S_GET_SEGMENT (exp->X_add_symbol) != undefined_section
+	  && S_GET_SEGMENT (exp->X_add_symbol) != expr_section)
 	section_symbol (S_GET_SEGMENT (exp->X_add_symbol));
       exp->X_op = O_subtract;
       exp->X_op_symbol = GOT_symbol;
@@ -6456,6 +6983,25 @@ i386_finalize_displacement (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
       ret = 0;
     }
 
+  else if (flag_code == CODE_64BIT
+	   && !i.prefix[ADDR_PREFIX]
+	   && exp->X_op == O_constant)
+    {
+      /* Since displacement is signed extended to 64bit, don't allow
+	 disp32 and turn off disp32s if they are out of range.  */
+      i.types[this_operand].bitfield.disp32 = 0;
+      if (!fits_in_signed_long (exp->X_add_number))
+	{
+	  i.types[this_operand].bitfield.disp32s = 0;
+	  if (i.types[this_operand].bitfield.baseindex)
+	    {
+	      as_bad (_("0x%lx out range of signed 32bit displacement"),
+		      (long) exp->X_add_number);
+	      ret = 0;
+	    }
+	}
+    }
+
 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
   else if (exp->X_op != O_constant
 	   && OUTPUT_FLAVOR == bfd_target_aout_flavour
@@ -6975,7 +7521,9 @@ md_estimate_size_before_relax (fragP, segment)
 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
       || (IS_ELF
 	  && (S_IS_EXTERNAL (fragP->fr_symbol)
-	      || S_IS_WEAK (fragP->fr_symbol)))
+	      || S_IS_WEAK (fragP->fr_symbol)
+	      || ((symbol_get_bfdsym (fragP->fr_symbol)->flags
+		   & BSF_GNU_INDIRECT_FUNCTION))))
 #endif
 #if defined (OBJ_COFF) && defined (TE_PE)
       || (OUTPUT_FLAVOR == bfd_target_coff_flavour
@@ -6991,7 +7539,7 @@ md_estimate_size_before_relax (fragP, segment)
       int old_fr_fix;
 
       if (fragP->fr_var != NO_RELOC)
-	reloc_type = fragP->fr_var;
+	reloc_type = (enum bfd_reloc_code_real) fragP->fr_var;
       else if (size == 2)
 	reloc_type = BFD_RELOC_16_PCREL;
       else
@@ -7256,7 +7804,7 @@ md_apply_fix (fixP, valP, seg)
 	  if ((sym_seg == seg
 	       || (symbol_section_p (fixP->fx_addsy)
 		   && sym_seg != absolute_section))
-	      && !TC_FORCE_RELOCATION (fixP))
+	      && !generic_force_reloc (fixP))
 	    {
 	      /* Yes, we add the values in twice.  This is because
 		 bfd_install_relocation subtracts them out again.  I think
@@ -7440,7 +7988,7 @@ parse_real_register (char *reg_string, char **end_op)
 	      if (*s == ')')
 		{
 		  *end_op = s + 1;
-		  r = hash_find (reg_hash, "st(0)");
+		  r = (const reg_entry *) hash_find (reg_hash, "st(0)");
 		  know (r);
 		  return r + fpr;
 		}
@@ -7464,6 +8012,12 @@ parse_real_register (char *reg_string, char **end_op)
       && !cpu_arch_flags.bitfield.cpui386)
     return (const reg_entry *) NULL;
 
+  if (r->reg_type.bitfield.floatreg
+      && !cpu_arch_flags.bitfield.cpu8087
+      && !cpu_arch_flags.bitfield.cpu287
+      && !cpu_arch_flags.bitfield.cpu387)
+    return (const reg_entry *) NULL;
+
   if (r->reg_type.bitfield.regmmx && !cpu_arch_flags.bitfield.cpummx)
     return (const reg_entry *) NULL;
 
@@ -7606,6 +8160,8 @@ const char *md_shortopts = "qn";
 #define OPTION_MOLD_GCC (OPTION_MD_BASE + 9)
 #define OPTION_MSSE2AVX (OPTION_MD_BASE + 10)
 #define OPTION_MSSE_CHECK (OPTION_MD_BASE + 11)
+#define OPTION_MAVXSCALAR (OPTION_MD_BASE + 12)
+#define OPTION_X32 (OPTION_MD_BASE + 13)
 
 struct option md_longopts[] =
 {
@@ -7613,6 +8169,9 @@ struct option md_longopts[] =
 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
      || defined (TE_PE) || defined (TE_PEP))
   {"64", no_argument, NULL, OPTION_64},
+#endif
+#if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
+  {"x32", no_argument, NULL, OPTION_X32},
 #endif
   {"divide", no_argument, NULL, OPTION_DIVIDE},
   {"march", required_argument, NULL, OPTION_MARCH},
@@ -7624,6 +8183,7 @@ struct option md_longopts[] =
   {"mold-gcc", no_argument, NULL, OPTION_MOLD_GCC},
   {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
   {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
+  {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR},
   {NULL, no_argument, NULL, 0}
 };
 size_t md_longopts_size = sizeof (md_longopts);
@@ -7631,7 +8191,7 @@ size_t md_longopts_size = sizeof (md_longopts);
 int
 md_parse_option (int c, char *arg)
 {
-  unsigned int i;
+  unsigned int j;
   char *arch, *next;
 
   switch (c)
@@ -7687,6 +8247,28 @@ md_parse_option (int c, char *arg)
       break;
 #endif
 
+#if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
+    case OPTION_X32:
+      if (IS_ELF)
+	{
+	  const char **list, **l;
+
+	  list = bfd_target_list ();
+	  for (l = list; *l != NULL; l++)
+	    if (CONST_STRNEQ (*l, "elf32-x86-64"))
+	      {
+		default_arch = "x86_64:32";
+		break;
+	      }
+	  if (*l == NULL)
+	    as_fatal (_("No compiled in support for 32bit x86_64"));
+	  free (list);
+	}
+      else
+	as_fatal (_("32bit x86_64 is only supported for ELF"));
+      break;
+#endif
+
     case OPTION_32:
       default_arch = "i386";
       break;
@@ -7717,16 +8299,19 @@ md_parse_option (int c, char *arg)
 	  next = strchr (arch, '+');
 	  if (next)
 	    *next++ = '\0';
-	  for (i = 0; i < ARRAY_SIZE (cpu_arch); i++)
+	  for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
 	    {
-	      if (strcmp (arch, cpu_arch [i].name) == 0)
+	      if (strcmp (arch, cpu_arch [j].name) == 0)
 		{
 		  /* Processor.  */
-		  cpu_arch_name = cpu_arch[i].name;
+		  if (! cpu_arch[j].flags.bitfield.cpui386)
+		    continue;
+
+		  cpu_arch_name = cpu_arch[j].name;
 		  cpu_sub_arch_name = NULL;
-		  cpu_arch_flags = cpu_arch[i].flags;
-		  cpu_arch_isa = cpu_arch[i].type;
-		  cpu_arch_isa_flags = cpu_arch[i].flags;
+		  cpu_arch_flags = cpu_arch[j].flags;
+		  cpu_arch_isa = cpu_arch[j].type;
+		  cpu_arch_isa_flags = cpu_arch[j].flags;
 		  if (!cpu_arch_tune_set)
 		    {
 		      cpu_arch_tune = cpu_arch_isa;
@@ -7734,32 +8319,38 @@ md_parse_option (int c, char *arg)
 		    }
 		  break;
 		}
-	      else if (*cpu_arch [i].name == '.'
-		       && strcmp (arch, cpu_arch [i].name + 1) == 0)
+	      else if (*cpu_arch [j].name == '.'
+		       && strcmp (arch, cpu_arch [j].name + 1) == 0)
 		{
 		  /* ISA entension.  */
 		  i386_cpu_flags flags;
-		  flags = cpu_flags_or (cpu_arch_flags,
-					cpu_arch[i].flags);
+
+		  if (!cpu_arch[j].negated)
+		    flags = cpu_flags_or (cpu_arch_flags,
+					  cpu_arch[j].flags);
+		  else
+		    flags = cpu_flags_and_not (cpu_arch_flags,
+					       cpu_arch[j].flags);
 		  if (!cpu_flags_equal (&flags, &cpu_arch_flags))
 		    {
 		      if (cpu_sub_arch_name)
 			{
 			  char *name = cpu_sub_arch_name;
 			  cpu_sub_arch_name = concat (name,
-						      cpu_arch[i].name,
+						      cpu_arch[j].name,
 						      (const char *) NULL);
 			  free (name);
 			}
 		      else
-			cpu_sub_arch_name = xstrdup (cpu_arch[i].name);
+			cpu_sub_arch_name = xstrdup (cpu_arch[j].name);
 		      cpu_arch_flags = flags;
+		      cpu_arch_isa_flags = flags;
 		    }
 		  break;
 		}
 	    }
 
-	  if (i >= ARRAY_SIZE (cpu_arch))
+	  if (j >= ARRAY_SIZE (cpu_arch))
 	    as_fatal (_("Invalid -march= option: `%s'"), arg);
 
 	  arch = next;
@@ -7770,17 +8361,17 @@ md_parse_option (int c, char *arg)
     case OPTION_MTUNE:
       if (*arg == '.')
 	as_fatal (_("Invalid -mtune= option: `%s'"), arg);
-      for (i = 0; i < ARRAY_SIZE (cpu_arch); i++)
+      for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
 	{
-	  if (strcmp (arg, cpu_arch [i].name) == 0)
+	  if (strcmp (arg, cpu_arch [j].name) == 0)
 	    {
 	      cpu_arch_tune_set = 1;
-	      cpu_arch_tune = cpu_arch [i].type;
-	      cpu_arch_tune_flags = cpu_arch[i].flags;
+	      cpu_arch_tune = cpu_arch [j].type;
+	      cpu_arch_tune_flags = cpu_arch[j].flags;
 	      break;
 	    }
 	}
-      if (i >= ARRAY_SIZE (cpu_arch))
+      if (j >= ARRAY_SIZE (cpu_arch))
 	as_fatal (_("Invalid -mtune= option: `%s'"), arg);
       break;
 
@@ -7829,15 +8420,102 @@ md_parse_option (int c, char *arg)
 	as_fatal (_("Invalid -msse-check= option: `%s'"), arg);
       break;
 
+    case OPTION_MAVXSCALAR:
+      if (strcasecmp (arg, "128") == 0)
+	avxscalar = vex128;
+      else if (strcasecmp (arg, "256") == 0)
+	avxscalar = vex256;
+      else
+	as_fatal (_("Invalid -mavxscalar= option: `%s'"), arg);
+      break;
+
     default:
       return 0;
     }
   return 1;
 }
 
+#define MESSAGE_TEMPLATE \
+"                                                                                "
+
+static void
+show_arch (FILE *stream, int ext, int check)
+{
+  static char message[] = MESSAGE_TEMPLATE;
+  char *start = message + 27;
+  char *p;
+  int size = sizeof (MESSAGE_TEMPLATE);
+  int left;
+  const char *name;
+  int len;
+  unsigned int j;
+
+  p = start;
+  left = size - (start - message);
+  for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
+    {
+      /* Should it be skipped?  */
+      if (cpu_arch [j].skip)
+	continue;
+
+      name = cpu_arch [j].name;
+      len = cpu_arch [j].len;
+      if (*name == '.')
+	{
+	  /* It is an extension.  Skip if we aren't asked to show it.  */
+	  if (ext)
+	    {
+	      name++;
+	      len--;
+	    }
+	  else
+	    continue;
+	}
+      else if (ext)
+	{
+	  /* It is an processor.  Skip if we show only extension.  */
+	  continue;
+	}
+      else if (check && ! cpu_arch[j].flags.bitfield.cpui386)
+	{
+	  /* It is an impossible processor - skip.  */
+	  continue;
+	}
+
+      /* Reserve 2 spaces for ", " or ",\0" */
+      left -= len + 2;
+
+      /* Check if there is any room.  */
+      if (left >= 0)
+	{
+	  if (p != start)
+	    {
+	      *p++ = ',';
+	      *p++ = ' ';
+	    }
+	  p = mempcpy (p, name, len);
+	}
+      else
+	{
+	  /* Output the current message now and start a new one.  */
+	  *p++ = ',';
+	  *p = '\0';
+	  fprintf (stream, "%s\n", message);
+	  p = start;
+	  left = size - (start - message) - len - 2;
+	  
+	  gas_assert (left >= 0);
+
+	  p = mempcpy (p, name, len);
+	}
+    }
+
+  *p = '\0';
+  fprintf (stream, "%s\n", message);
+}
+
 void
-md_show_usage (stream)
-     FILE *stream;
+md_show_usage (FILE *stream)
 {
 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
   fprintf (stream, _("\
@@ -7855,7 +8533,7 @@ md_show_usage (stream)
 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
      || defined (TE_PE) || defined (TE_PEP))
   fprintf (stream, _("\
-  --32/--64               generate 32bit/64bit code\n"));
+  --32/--64/--x32         generate 32bit/64bit/x32 code\n"));
 #endif
 #ifdef SVR4_COMMENT_CHARS
   fprintf (stream, _("\
@@ -7866,28 +8544,23 @@ md_show_usage (stream)
 #endif
   fprintf (stream, _("\
   -march=CPU[,+EXTENSION...]\n\
-                          generate code for CPU and EXTENSION, CPU is one of:\n\
-                           i8086, i186, i286, i386, i486, pentium, pentiumpro,\n\
-                           pentiumii, pentiumiii, pentium4, prescott, nocona,\n\
-                           core, core2, corei7, k6, k6_2, athlon, k8, amdfam10,\n\
-                           generic32, generic64\n\
-                          EXTENSION is combination of:\n\
-                           mmx, sse, sse2, sse3, ssse3, sse4.1, sse4.2, sse4,\n\
-                           avx, vmx, smx, xsave, movbe, ept, aes, pclmul, fma,\n\
-                           clflush, syscall, rdtscp, 3dnow, 3dnowa, sse4a,\n\
-                           svme, abm, padlock, fma4\n"));
+                          generate code for CPU and EXTENSION, CPU is one of:\n"));
+  show_arch (stream, 0, 1);
+  fprintf (stream, _("\
+                          EXTENSION is combination of:\n"));
+  show_arch (stream, 1, 0);
   fprintf (stream, _("\
-  -mtune=CPU              optimize for CPU, CPU is one of:\n\
-                           i8086, i186, i286, i386, i486, pentium, pentiumpro,\n\
-                           pentiumii, pentiumiii, pentium4, prescott, nocona,\n\
-                           core, core2, corei7, k6, k6_2, athlon, k8, amdfam10,\n\
-                           generic32, generic64\n"));
+  -mtune=CPU              optimize for CPU, CPU is one of:\n"));
+  show_arch (stream, 0, 0);
   fprintf (stream, _("\
   -msse2avx               encode SSE instructions with VEX prefix\n"));
   fprintf (stream, _("\
   -msse-check=[none|error|warning]\n\
                           check SSE instructions\n"));
   fprintf (stream, _("\
+  -mavxscalar=[128|256]   encode scalar AVX instructions with specific vector\n\
+                           length\n"));
+  fprintf (stream, _("\
   -mmnemonic=[att|intel]  use AT&T/Intel mnemonic\n"));
   fprintf (stream, _("\
   -msyntax=[att|intel]    use AT&T/Intel syntax\n"));
@@ -7908,77 +8581,72 @@ md_show_usage (stream)
 const char *
 i386_target_format (void)
 {
-  if (!strcmp (default_arch, "x86_64"))
+  if (!strncmp (default_arch, "x86_64", 6))
     {
-      set_code_flag (CODE_64BIT);
-      if (cpu_flags_all_zero (&cpu_arch_isa_flags))
-	{
-	  cpu_arch_isa_flags.bitfield.cpui186 = 1;
-	  cpu_arch_isa_flags.bitfield.cpui286 = 1;
-	  cpu_arch_isa_flags.bitfield.cpui386 = 1;
-	  cpu_arch_isa_flags.bitfield.cpui486 = 1;
-	  cpu_arch_isa_flags.bitfield.cpui586 = 1;
-	  cpu_arch_isa_flags.bitfield.cpui686 = 1;
-	  cpu_arch_isa_flags.bitfield.cpuclflush = 1;
-	  cpu_arch_isa_flags.bitfield.cpummx= 1;
-	  cpu_arch_isa_flags.bitfield.cpusse = 1;
-	  cpu_arch_isa_flags.bitfield.cpusse2 = 1;
-	}
-      if (cpu_flags_all_zero (&cpu_arch_tune_flags))
-	{
-	  cpu_arch_tune_flags.bitfield.cpui186 = 1;
-	  cpu_arch_tune_flags.bitfield.cpui286 = 1;
-	  cpu_arch_tune_flags.bitfield.cpui386 = 1;
-	  cpu_arch_tune_flags.bitfield.cpui486 = 1;
-	  cpu_arch_tune_flags.bitfield.cpui586 = 1;
-	  cpu_arch_tune_flags.bitfield.cpui686 = 1;
-	  cpu_arch_tune_flags.bitfield.cpuclflush = 1;
-	  cpu_arch_tune_flags.bitfield.cpummx= 1;
-	  cpu_arch_tune_flags.bitfield.cpusse = 1;
-	  cpu_arch_tune_flags.bitfield.cpusse2 = 1;
-	}
+      update_code_flag (CODE_64BIT, 1);
+      if (default_arch[6] == '\0')
+	x86_elf_abi = X86_64_ABI;
+      else
+	x86_elf_abi = X86_64_X32_ABI;
     }
   else if (!strcmp (default_arch, "i386"))
-    {
-      set_code_flag (CODE_32BIT);
-      if (cpu_flags_all_zero (&cpu_arch_isa_flags))
-	{
-	  cpu_arch_isa_flags.bitfield.cpui186 = 1;
-	  cpu_arch_isa_flags.bitfield.cpui286 = 1;
-	  cpu_arch_isa_flags.bitfield.cpui386 = 1;
-	}
-      if (cpu_flags_all_zero (&cpu_arch_tune_flags))
-	{
-	  cpu_arch_tune_flags.bitfield.cpui186 = 1;
-	  cpu_arch_tune_flags.bitfield.cpui286 = 1;
-	  cpu_arch_tune_flags.bitfield.cpui386 = 1;
-	}
-    }
+    update_code_flag (CODE_32BIT, 1);
   else
     as_fatal (_("Unknown architecture"));
+
+  if (cpu_flags_all_zero (&cpu_arch_isa_flags))
+    cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].flags;
+  if (cpu_flags_all_zero (&cpu_arch_tune_flags))
+    cpu_arch_tune_flags = cpu_arch[flag_code == CODE_64BIT].flags;
+
   switch (OUTPUT_FLAVOR)
     {
-#if defined (TE_PE) || defined (TE_PEP)
-    case bfd_target_coff_flavour:
-      return flag_code == CODE_64BIT ? "pe-x86-64" : "pe-i386";
-#endif
-#ifdef OBJ_MAYBE_AOUT
+#if defined (OBJ_MAYBE_AOUT) || defined (OBJ_AOUT)
     case bfd_target_aout_flavour:
       return AOUT_TARGET_FORMAT;
 #endif
-#ifdef OBJ_MAYBE_COFF
+#if defined (OBJ_MAYBE_COFF) || defined (OBJ_COFF)
+# if defined (TE_PE) || defined (TE_PEP)
+    case bfd_target_coff_flavour:
+      return flag_code == CODE_64BIT ? "pe-x86-64" : "pe-i386";
+# elif defined (TE_GO32)
+    case bfd_target_coff_flavour:
+      return "coff-go32";
+# else
     case bfd_target_coff_flavour:
       return "coff-i386";
+# endif
 #endif
 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
     case bfd_target_elf_flavour:
       {
-	if (flag_code == CODE_64BIT)
+	const char *format;
+
+	switch (x86_elf_abi)
 	  {
+	  default:
+	    format = ELF_TARGET_FORMAT;
+	    break;
+	  case X86_64_ABI:
+	    use_rela_relocations = 1;
 	    object_64bit = 1;
+	    format = ELF_TARGET_FORMAT64;
+	    break;
+	  case X86_64_X32_ABI:
 	    use_rela_relocations = 1;
+	    object_64bit = 1;
+	    disallow_64bit_reloc = 1;
+	    format = ELF_TARGET_FORMAT32;
+	    break;
 	  }
-	return flag_code == CODE_64BIT ? ELF_TARGET_FORMAT64 : ELF_TARGET_FORMAT;
+	if (cpu_arch_isa == PROCESSOR_L1OM)
+	  {
+	    if (x86_elf_abi != X86_64_ABI)
+	      as_fatal (_("Intel L1OM is 64bit only"));
+	    return ELF_TARGET_L1OM_FORMAT;
+	  }
+	else
+	  return format;
       }
 #endif
 #if defined (OBJ_MACH_O)
@@ -8269,6 +8937,27 @@ tc_gen_reloc (section, fixp)
   /* Use the rela in 64bit mode.  */
   else
     {
+      if (disallow_64bit_reloc)
+	switch (code)
+	  {
+	  case BFD_RELOC_64:
+	  case BFD_RELOC_X86_64_DTPOFF64:
+	  case BFD_RELOC_X86_64_TPOFF64:
+	  case BFD_RELOC_64_PCREL:
+	  case BFD_RELOC_X86_64_GOTOFF64:
+	  case BFD_RELOC_X86_64_GOT64:
+	  case BFD_RELOC_X86_64_GOTPCREL64:
+	  case BFD_RELOC_X86_64_GOTPC64:
+	  case BFD_RELOC_X86_64_GOTPLT64:
+	  case BFD_RELOC_X86_64_PLTOFF64:
+	    as_bad_where (fixp->fx_file, fixp->fx_line,
+			  _("cannot represent relocation type %s in x32 mode"),
+			  bfd_get_reloc_code_name (code));
+	    break;
+	  default:
+	    break;
+	  }
+
       if (!fixp->fx_pcrel)
 	rel->addend = fixp->fx_offset;
       else
@@ -8384,12 +9073,12 @@ i386_solaris_fix_up_eh_frame (segT sec)
 void
 tc_pe_dwarf2_emit_offset (symbolS *symbol, unsigned int size)
 {
-  expressionS expr;
+  expressionS exp;
 
-  expr.X_op = O_secrel;
-  expr.X_add_symbol = symbol;
-  expr.X_add_number = 0;
-  emit_expr (&expr, size);
+  exp.X_op = O_secrel;
+  exp.X_add_symbol = symbol;
+  exp.X_add_number = 0;
+  emit_expr (&exp, size);
 }
 #endif
 
@@ -8404,10 +9093,10 @@ x86_64_section_letter (int letter, char **ptr_msg)
       if (letter == 'l')
 	return SHF_X86_64_LARGE;
 
-      *ptr_msg = _("Bad .section directive: want a,l,w,x,M,S,G,T in string");
+      *ptr_msg = _("bad .section directive: want a,l,w,x,M,S,G,T in string");
     }
   else
-    *ptr_msg = _("Bad .section directive: want a,w,x,M,S,G,T in string");
+    *ptr_msg = _("bad .section directive: want a,w,x,M,S,G,T in string");
   return -1;
 }