/* tc-i386.c -- Assemble code for the Intel 80386
- Copyright (C) 1989-2018 Free Software Foundation, Inc.
+ Copyright (C) 1989-2019 Free Software Foundation, Inc.
This file is part of GAS, the GNU Assembler.
#include "elf/x86-64.h"
#include "opcodes/i386-init.h"
+#ifdef HAVE_LIMITS_H
+#include <limits.h>
+#else
+#ifdef HAVE_SYS_PARAM_H
+#include <sys/param.h>
+#endif
+#ifndef INT_MAX
+#define INT_MAX (int) (((unsigned) (-1)) >> 1)
+#endif
+#endif
+
#ifndef REGISTER_WARNINGS
#define REGISTER_WARNINGS 1
#endif
CPU_MOVDIRI_FLAGS, 0 },
{ STRING_COMMA_LEN (".movdir64b"), PROCESSOR_UNKNOWN,
CPU_MOVDIR64B_FLAGS, 0 },
+ { STRING_COMMA_LEN (".avx512_bf16"), PROCESSOR_UNKNOWN,
+ CPU_AVX512_BF16_FLAGS, 0 },
+ { STRING_COMMA_LEN (".avx512_vp2intersect"), PROCESSOR_UNKNOWN,
+ CPU_AVX512_VP2INTERSECT_FLAGS, 0 },
+ { STRING_COMMA_LEN (".enqcmd"), PROCESSOR_UNKNOWN,
+ CPU_ENQCMD_FLAGS, 0 },
};
static const noarch_entry cpu_noarch[] =
{ STRING_COMMA_LEN ("noshstk"), CPU_ANY_SHSTK_FLAGS },
{ STRING_COMMA_LEN ("nomovdiri"), CPU_ANY_MOVDIRI_FLAGS },
{ STRING_COMMA_LEN ("nomovdir64b"), CPU_ANY_MOVDIR64B_FLAGS },
+ { STRING_COMMA_LEN ("noavx512_bf16"), CPU_ANY_AVX512_BF16_FLAGS },
+ { STRING_COMMA_LEN ("noavx512_vp2intersect"), CPU_ANY_SHSTK_FLAGS },
+ { STRING_COMMA_LEN ("noenqcmd"), CPU_ANY_ENQCMD_FLAGS },
};
#ifdef I386COFF
/* Place the longer NOP first. */
int last;
int offset;
- const unsigned char *nops = patt[max_single_nop_size - 1];
+ const unsigned char *nops;
+
+ if (max_single_nop_size < 1)
+ {
+ as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"),
+ max_single_nop_size);
+ return;
+ }
+
+ nops = patt[max_single_nop_size - 1];
/* Use the smaller one if the requsted one isn't available. */
if (nops == NULL)
return x;
}
-static const i386_operand_type acc32 = OPERAND_TYPE_ACC32;
-static const i386_operand_type acc64 = OPERAND_TYPE_ACC64;
static const i386_operand_type disp16 = OPERAND_TYPE_DISP16;
static const i386_operand_type disp32 = OPERAND_TYPE_DISP32;
static const i386_operand_type disp32s = OPERAND_TYPE_DISP32S;
static void ps (symbolS *);
static void
-pi (char *line, i386_insn *x)
+pi (const char *line, i386_insn *x)
{
unsigned int j;
{ OPERAND_TYPE_REG16, "r16" },
{ OPERAND_TYPE_REG32, "r32" },
{ OPERAND_TYPE_REG64, "r64" },
+ { OPERAND_TYPE_ACC8, "acc8" },
+ { OPERAND_TYPE_ACC16, "acc16" },
+ { OPERAND_TYPE_ACC32, "acc32" },
+ { OPERAND_TYPE_ACC64, "acc64" },
{ OPERAND_TYPE_IMM8, "i8" },
{ OPERAND_TYPE_IMM8, "i8s" },
{ OPERAND_TYPE_IMM16, "i16" },
{ OPERAND_TYPE_FLOATACC, "FAcc" },
{ OPERAND_TYPE_SREG2, "SReg2" },
{ OPERAND_TYPE_SREG3, "SReg3" },
- { OPERAND_TYPE_ACC, "Acc" },
{ OPERAND_TYPE_JUMPABSOLUTE, "Jump Absolute" },
{ OPERAND_TYPE_REGMMX, "rMMX" },
{ OPERAND_TYPE_REGXMM, "rXMM" },
for (j = 0; j < ARRAY_SIZE (type_names); j++)
{
a = operand_type_and (t, type_names[j].mask);
- if (!operand_type_all_zero (&a))
+ if (operand_type_equal (&a, &type_names[j].mask))
fprintf (stdout, "%s, ", type_names[j].name);
}
fflush (stdout);
else if (i.tm.opcode_modifier.vexw)
w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
else
- w = (i.rex & REX_W) ? 1 : 0;
+ w = (flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1) ? 1 : 0;
/* Use 2-byte VEX prefix if possible. */
if (w == 0
else if (i.tm.opcode_modifier.vexw)
w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
else
- w = (i.rex & REX_W) ? 1 : 0;
+ w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;
/* Encode the U bit. */
implied_prefix |= 0x4;
&& i.tm.extension_opcode == 0x4)
|| ((i.tm.base_opcode == 0xf6
|| i.tm.base_opcode == 0xc6)
- && i.tm.extension_opcode == 0x0)))))
+ && i.tm.extension_opcode == 0x0)))
+ || (fits_in_imm7 (i.op[0].imms->X_add_number)
+ && i.tm.base_opcode == 0x83
+ && i.tm.extension_opcode == 0x4)))
|| (i.types[0].bitfield.qword
&& ((i.reg_operands == 2
&& i.op[0].regs == i.op[1].regs
{
/* Optimize: -O:
andq $imm31, %r64 -> andl $imm31, %r32
+ andq $imm7, %r64 -> andl $imm7, %r32
testq $imm31, %r64 -> testl $imm31, %r32
xorq %r64, %r64 -> xorl %r32, %r32
subq %r64, %r64 -> subl %r32, %r32
}
}
}
- else if (optimize > 1
- && i.reg_operands == 3
+ else if (i.reg_operands == 3
&& i.op[0].regs == i.op[1].regs
&& !i.types[2].bitfield.xmmword
&& (i.tm.opcode_modifier.vex
&& !i.rounding
&& is_evex_encoding (&i.tm)
&& (i.vec_encoding != vex_encoding_evex
+ || cpu_arch_isa_flags.bitfield.cpuavx512vl
|| i.tm.cpu_flags.bitfield.cpuavx512vl
|| (i.tm.operand_types[2].bitfield.zmmword
- && i.types[2].bitfield.ymmword)
- || cpu_arch_isa_flags.bitfield.cpuavx512vl)))
+ && i.types[2].bitfield.ymmword))))
&& ((i.tm.base_opcode == 0x55
|| i.tm.base_opcode == 0x6655
|| i.tm.base_opcode == 0x66df
|| i.tm.base_opcode == 0x6647)
&& i.tm.extension_opcode == None))
{
- /* Optimize: -O2:
+ /* Optimize: -O1:
VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
vpsubq and vpsubw:
EVEX VOP %zmmM, %zmmM, %zmmN
-> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
- -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+ -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
EVEX VOP %ymmM, %ymmM, %ymmN
-> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
- -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+ -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
VEX VOP %ymmM, %ymmM, %ymmN
-> VEX VOP %xmmM, %xmmM, %xmmN
VOP, one of vpandn and vpxor:
VOP, one of vpandnd and vpandnq:
EVEX VOP %zmmM, %zmmM, %zmmN
-> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
- -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+ -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
EVEX VOP %ymmM, %ymmM, %ymmN
-> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
- -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+ -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
VOP, one of vpxord and vpxorq:
EVEX VOP %zmmM, %zmmM, %zmmN
-> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
- -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+ -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
EVEX VOP %ymmM, %ymmM, %ymmN
-> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
- -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+ -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
VOP, one of kxord and kxorq:
VEX VOP %kM, %kM, %kN
-> VEX kxorw %kM, %kM, %kN
*/
if (is_evex_encoding (&i.tm))
{
- if (i.vec_encoding == vex_encoding_evex)
- i.tm.opcode_modifier.evex = EVEX128;
- else
+ if (i.vec_encoding != vex_encoding_evex)
{
i.tm.opcode_modifier.vex = VEX128;
i.tm.opcode_modifier.vexw = VEXW0;
i.tm.opcode_modifier.evex = 0;
}
+ else if (optimize > 1)
+ i.tm.opcode_modifier.evex = EVEX128;
+ else
+ return;
}
else if (i.tm.operand_types[0].bitfield.regmask)
{
i.types[j].bitfield.ymmword = 0;
}
}
+ else if (i.vec_encoding != vex_encoding_evex
+ && !i.types[0].bitfield.zmmword
+ && !i.types[1].bitfield.zmmword
+ && !i.mask
+ && is_evex_encoding (&i.tm)
+ && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x666f
+ || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf36f
+ || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f)
+ && i.tm.extension_opcode == None)
+ {
+ /* Optimize: -O1:
+ VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
+ vmovdqu32 and vmovdqu64:
+ EVEX VOP %xmmM, %xmmN
+ -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
+ EVEX VOP %ymmM, %ymmN
+ -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
+ EVEX VOP %xmmM, mem
+ -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
+ EVEX VOP %ymmM, mem
+ -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
+ EVEX VOP mem, %xmmN
+ -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
+ EVEX VOP mem, %ymmN
+ -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
+ */
+ for (j = 0; j < 2; j++)
+ if (operand_type_check (i.types[j], disp)
+ && i.op[j].disps->X_op == O_constant)
+ {
+ /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix
+ has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4
+ bytes, we choose EVEX Disp8 over VEX Disp32. */
+ int evex_disp8, vex_disp8;
+ unsigned int memshift = i.memshift;
+ offsetT n = i.op[j].disps->X_add_number;
+
+ evex_disp8 = fits_in_disp8 (n);
+ i.memshift = 0;
+ vex_disp8 = fits_in_disp8 (n);
+ if (evex_disp8 != vex_disp8)
+ {
+ i.memshift = memshift;
+ return;
+ }
+
+ i.types[j].bitfield.disp8 = vex_disp8;
+ break;
+ }
+ if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f)
+ i.tm.base_opcode ^= 0xf36f ^ 0xf26f;
+ i.tm.opcode_modifier.vex
+ = i.types[0].bitfield.ymmword ? VEX256 : VEX128;
+ i.tm.opcode_modifier.vexw = VEXW0;
+ i.tm.opcode_modifier.evex = 0;
+ i.tm.opcode_modifier.masking = 0;
+ i.tm.opcode_modifier.disp8memshift = 0;
+ i.memshift = 0;
+ for (j = 0; j < 2; j++)
+ if (operand_type_check (i.types[j], disp)
+ && i.op[j].disps->X_op == O_constant)
+ {
+ i.types[j].bitfield.disp8
+ = fits_in_disp8 (i.op[j].disps->X_add_number);
+ break;
+ }
+ }
}
/* This is the guts of the machine-dependent assembler. LINE points to a
if (is_any_vex_encoding (&i.tm))
{
- if (flag_code == CODE_16BIT)
+ if (!cpu_arch_flags.bitfield.cpui286)
{
- as_bad (_("instruction `%s' isn't supported in 16-bit mode."),
+ as_bad (_("instruction `%s' isn't supported outside of protected mode."),
i.tm.name);
return;
}
}
/* If we are in 16-bit mode, do not allow addr16 or data16.
Similarly, in 32-bit mode, do not allow addr32 or data32. */
- if ((current_templates->start->opcode_modifier.size16
- || current_templates->start->opcode_modifier.size32)
+ if ((current_templates->start->opcode_modifier.size == SIZE16
+ || current_templates->start->opcode_modifier.size == SIZE32)
&& flag_code != CODE_64BIT
- && (current_templates->start->opcode_modifier.size32
+ && ((current_templates->start->opcode_modifier.size == SIZE32)
^ (flag_code == CODE_16BIT)))
{
as_bad (_("redundant %s prefix"),
if (!current_templates)
{
check_suffix:
- /* See if we can get a match by trimming off a suffix. */
- switch (mnem_p[-1])
+ if (mnem_p > mnemonic)
{
- case WORD_MNEM_SUFFIX:
- if (intel_syntax && (intel_float_operand (mnemonic) & 2))
- i.suffix = SHORT_MNEM_SUFFIX;
- else
- /* Fall through. */
- case BYTE_MNEM_SUFFIX:
- case QWORD_MNEM_SUFFIX:
- i.suffix = mnem_p[-1];
- mnem_p[-1] = '\0';
- current_templates = (const templates *) hash_find (op_hash,
- mnemonic);
- break;
- case SHORT_MNEM_SUFFIX:
- case LONG_MNEM_SUFFIX:
- if (!intel_syntax)
- {
- i.suffix = mnem_p[-1];
- mnem_p[-1] = '\0';
- current_templates = (const templates *) hash_find (op_hash,
- mnemonic);
- }
- break;
-
- /* Intel Syntax. */
- case 'd':
- if (intel_syntax)
+ /* See if we can get a match by trimming off a suffix. */
+ switch (mnem_p[-1])
{
- if (intel_float_operand (mnemonic) == 1)
+ case WORD_MNEM_SUFFIX:
+ if (intel_syntax && (intel_float_operand (mnemonic) & 2))
i.suffix = SHORT_MNEM_SUFFIX;
else
- i.suffix = LONG_MNEM_SUFFIX;
+ /* Fall through. */
+ case BYTE_MNEM_SUFFIX:
+ case QWORD_MNEM_SUFFIX:
+ i.suffix = mnem_p[-1];
mnem_p[-1] = '\0';
current_templates = (const templates *) hash_find (op_hash,
- mnemonic);
+ mnemonic);
+ break;
+ case SHORT_MNEM_SUFFIX:
+ case LONG_MNEM_SUFFIX:
+ if (!intel_syntax)
+ {
+ i.suffix = mnem_p[-1];
+ mnem_p[-1] = '\0';
+ current_templates = (const templates *) hash_find (op_hash,
+ mnemonic);
+ }
+ break;
+
+ /* Intel Syntax. */
+ case 'd':
+ if (intel_syntax)
+ {
+ if (intel_float_operand (mnemonic) == 1)
+ i.suffix = SHORT_MNEM_SUFFIX;
+ else
+ i.suffix = LONG_MNEM_SUFFIX;
+ mnem_p[-1] = '\0';
+ current_templates = (const templates *) hash_find (op_hash,
+ mnemonic);
+ }
+ break;
}
- break;
}
+
if (!current_templates)
{
as_bad (_("no such instruction: `%s'"), token_start);
zero-extend %eax to %rax. */
if (flag_code == CODE_64BIT
&& t->base_opcode == 0x90
- && operand_type_equal (&i.types [0], &acc32)
- && operand_type_equal (&i.types [1], &acc32))
+ && i.types[0].bitfield.acc && i.types[0].bitfield.dword
+ && i.types[1].bitfield.acc && i.types[1].bitfield.dword)
continue;
/* xrelease mov %eax, <disp> is another special case. It must not
match the accumulator-only encoding of mov. */
{
/* If matched instruction specifies an explicit instruction mnemonic
suffix, use it. */
- if (i.tm.opcode_modifier.size16)
+ if (i.tm.opcode_modifier.size == SIZE16)
i.suffix = WORD_MNEM_SUFFIX;
- else if (i.tm.opcode_modifier.size32)
+ else if (i.tm.opcode_modifier.size == SIZE32)
i.suffix = LONG_MNEM_SUFFIX;
- else if (i.tm.opcode_modifier.size64)
+ else if (i.tm.opcode_modifier.size == SIZE64)
i.suffix = QWORD_MNEM_SUFFIX;
else if (i.reg_operands)
{
/* exclude fldenv/frstor/fsave/fstenv */
&& i.tm.opcode_modifier.no_ssuf)
{
- i.suffix = stackop_size;
+ if (stackop_size == LONG_MNEM_SUFFIX
+ && i.tm.base_opcode == 0xcf)
+ {
+ /* stackop_size is set to LONG_MNEM_SUFFIX for the
+ .code16gcc directive to support 16-bit mode with
+ 32-bit address. For IRET without a suffix, generate
+ 16-bit IRET (opcode 0xcf) to return from an interrupt
+ handler. */
+ i.suffix = WORD_MNEM_SUFFIX;
+ as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
+ }
+ else
+ i.suffix = stackop_size;
}
else if (intel_syntax
&& !i.suffix
else if (i.suffix != QWORD_MNEM_SUFFIX
&& !i.tm.opcode_modifier.ignoresize
&& !i.tm.opcode_modifier.floatmf
- && !i.tm.opcode_modifier.vex
- && !i.tm.opcode_modifier.vexopcode
- && !is_evex_encoding (&i.tm)
+ && !is_any_vex_encoding (&i.tm)
&& ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
|| (flag_code == CODE_64BIT
&& i.tm.opcode_modifier.jumpbyte)))
&& ! (i.operands == 2
&& i.tm.base_opcode == 0x90
&& i.tm.extension_opcode == None
- && operand_type_equal (&i.types [0], &acc64)
- && operand_type_equal (&i.types [1], &acc64)))
+ && i.types[0].bitfield.acc && i.types[0].bitfield.qword
+ && i.types[1].bitfield.acc && i.types[1].bitfield.qword))
i.rex |= REX_W;
break;
if (!IS_ELF)
return FALSE;
+#ifdef TE_SOLARIS
+ /* Don't emit PLT32 relocation on Solaris: neither native linker nor
+ krtld support it. */
+ return FALSE;
+#endif
+
/* Since there is no need to prepare for PLT branch on x86-64, we
can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
be used as a marker for 32-bit PC-relative branches. */
if (!IS_ELF || !x86_used_note)
return;
- x86_isa_1_used |= GNU_PROPERTY_X86_UINT32_VALID;
x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86;
/* The .note.gnu.property section layout:
x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_VBMI2;
if (i.tm.cpu_flags.bitfield.cpuavx512_vnni)
x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_VNNI;
+ if (i.tm.cpu_flags.bitfield.cpuavx512_bf16)
+ x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_BF16;
if (i.tm.cpu_flags.bitfield.cpu8087
|| i.tm.cpu_flags.bitfield.cpu287
/* Check for "call/jmp *mem", "mov mem, %reg",
"test %reg, mem" and "binop mem, %reg" where binop
is one of adc, add, and, cmp, or, sbb, sub, xor
- instructions. Always generate R_386_GOT32X for
- "sym*GOT" operand in 32-bit mode. */
- if ((generate_relax_relocations
- || (!object_64bit
- && i.rm.mode == 0
- && i.rm.regmem == 5))
+ instructions without data prefix. Always generate
+ R_386_GOT32X for "sym*GOT" operand in 32-bit mode. */
+ if (i.prefix[DATA_PREFIX] == 0
+ && (generate_relax_relocations
+ || (!object_64bit
+ && i.rm.mode == 0
+ && i.rm.regmem == 5))
&& (i.rm.mode == 2
|| (i.rm.mode == 0 && i.rm.regmem == 5))
&& ((i.operands == 1
as_bad (_("missing or invalid expression `%s'"), save);
*input_line_pointer = c;
}
+ else if ((got_reloc == BFD_RELOC_386_PLT32
+ || got_reloc == BFD_RELOC_X86_64_PLT32)
+ && exp->X_op != O_symbol)
+ {
+ char c = *input_line_pointer;
+ *input_line_pointer = 0;
+ as_bad (_("invalid PLT expression `%s'"), save);
+ *input_line_pointer = c;
+ }
}
}
else
{
case BFD_RELOC_386_PLT32:
case BFD_RELOC_X86_64_PLT32:
- /* Make the jump instruction point to the address of the operand. At
- runtime we merely add the offset to the actual PLT entry. */
- value = -4;
+ /* Make the jump instruction point to the address of the operand.
+ At runtime we merely add the offset to the actual PLT entry.
+ NB: Subtract the offset size only for jump instructions. */
+ if (fixP->fx_pcrel)
+ value = -4;
break;
case BFD_RELOC_386_TLS_GD:
{
optimize_for_space = 1;
/* Turn on all encoding optimizations. */
- optimize = -1;
+ optimize = INT_MAX;
}
else
{