X-Git-Url: http://drtracing.org/?a=blobdiff_plain;f=gas%2Fconfig%2Ftc-i386.c;h=4816b54f93c7eb8d509f16bc22ca0d4c9c46529f;hb=462cac5884ed4c38e6180b2e2769aaa5225e695b;hp=2204d00d8c0f21b8a289a5066dab3192ada82a74;hpb=03751133023c5623cf60f8d3222cb79a376331f9;p=deliverable%2Fbinutils-gdb.git diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index 2204d00d8c..4816b54f93 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -1,5 +1,5 @@ /* tc-i386.c -- Assemble code for the Intel 80386 - Copyright (C) 1989-2018 Free Software Foundation, Inc. + Copyright (C) 1989-2019 Free Software Foundation, Inc. This file is part of GAS, the GNU Assembler. @@ -33,6 +33,17 @@ #include "elf/x86-64.h" #include "opcodes/i386-init.h" +#ifdef HAVE_LIMITS_H +#include +#else +#ifdef HAVE_SYS_PARAM_H +#include +#endif +#ifndef INT_MAX +#define INT_MAX (int) (((unsigned) (-1)) >> 1) +#endif +#endif + #ifndef REGISTER_WARNINGS #define REGISTER_WARNINGS 1 #endif @@ -1069,6 +1080,8 @@ static const arch_entry cpu_arch[] = CPU_MOVDIRI_FLAGS, 0 }, { STRING_COMMA_LEN (".movdir64b"), PROCESSOR_UNKNOWN, CPU_MOVDIR64B_FLAGS, 0 }, + { STRING_COMMA_LEN (".avx512_bf16"), PROCESSOR_UNKNOWN, + CPU_AVX512_BF16_FLAGS, 0 }, }; static const noarch_entry cpu_noarch[] = @@ -1108,6 +1121,7 @@ static const noarch_entry cpu_noarch[] = { STRING_COMMA_LEN ("noshstk"), CPU_ANY_SHSTK_FLAGS }, { STRING_COMMA_LEN ("nomovdiri"), CPU_ANY_MOVDIRI_FLAGS }, { STRING_COMMA_LEN ("nomovdir64b"), CPU_ANY_MOVDIR64B_FLAGS }, + { STRING_COMMA_LEN ("noavx512_bf16"), CPU_ANY_AVX512_BF16_FLAGS }, }; #ifdef I386COFF @@ -3453,7 +3467,7 @@ build_vex_prefix (const insn_template *t) else if (i.tm.opcode_modifier.vexw) w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0; else - w = (i.rex & REX_W) ? 1 : 0; + w = (flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1) ? 1 : 0; /* Use 2-byte VEX prefix if possible. */ if (w == 0 @@ -3646,7 +3660,7 @@ build_evex_prefix (void) else if (i.tm.opcode_modifier.vexw) w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0; else - w = (i.rex & REX_W) ? 1 : 0; + w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0; /* Encode the U bit. */ implied_prefix |= 0x4; @@ -3966,8 +3980,7 @@ optimize_encoding (void) } } } - else if (optimize > 1 - && i.reg_operands == 3 + else if (i.reg_operands == 3 && i.op[0].regs == i.op[1].regs && !i.types[2].bitfield.xmmword && (i.tm.opcode_modifier.vex @@ -3975,10 +3988,10 @@ optimize_encoding (void) && !i.rounding && is_evex_encoding (&i.tm) && (i.vec_encoding != vex_encoding_evex + || cpu_arch_isa_flags.bitfield.cpuavx512vl || i.tm.cpu_flags.bitfield.cpuavx512vl || (i.tm.operand_types[2].bitfield.zmmword - && i.types[2].bitfield.ymmword) - || cpu_arch_isa_flags.bitfield.cpuavx512vl))) + && i.types[2].bitfield.ymmword)))) && ((i.tm.base_opcode == 0x55 || i.tm.base_opcode == 0x6655 || i.tm.base_opcode == 0x66df @@ -3995,15 +4008,15 @@ optimize_encoding (void) || i.tm.base_opcode == 0x6647) && i.tm.extension_opcode == None)) { - /* Optimize: -O2: + /* Optimize: -O1: VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd, vpsubq and vpsubw: EVEX VOP %zmmM, %zmmM, %zmmN -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16) - -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) + -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2) EVEX VOP %ymmM, %ymmM, %ymmN -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16) - -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) + -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2) VEX VOP %ymmM, %ymmM, %ymmN -> VEX VOP %xmmM, %xmmM, %xmmN VOP, one of vpandn and vpxor: @@ -4012,17 +4025,17 @@ optimize_encoding (void) VOP, one of vpandnd and vpandnq: EVEX VOP %zmmM, %zmmM, %zmmN -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16) - -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) + -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2) EVEX VOP %ymmM, %ymmM, %ymmN -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16) - -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) + -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2) VOP, one of vpxord and vpxorq: EVEX VOP %zmmM, %zmmM, %zmmN -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16) - -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) + -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2) EVEX VOP %ymmM, %ymmM, %ymmN -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16) - -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) + -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2) VOP, one of kxord and kxorq: VEX VOP %kM, %kM, %kN -> VEX kxorw %kM, %kM, %kN @@ -4032,14 +4045,16 @@ optimize_encoding (void) */ if (is_evex_encoding (&i.tm)) { - if (i.vec_encoding == vex_encoding_evex) - i.tm.opcode_modifier.evex = EVEX128; - else + if (i.vec_encoding != vex_encoding_evex) { i.tm.opcode_modifier.vex = VEX128; i.tm.opcode_modifier.vexw = VEXW0; i.tm.opcode_modifier.evex = 0; } + else if (optimize > 1) + i.tm.opcode_modifier.evex = EVEX128; + else + return; } else if (i.tm.operand_types[0].bitfield.regmask) { @@ -4056,6 +4071,73 @@ optimize_encoding (void) i.types[j].bitfield.ymmword = 0; } } + else if (i.vec_encoding != vex_encoding_evex + && !i.types[0].bitfield.zmmword + && !i.types[1].bitfield.zmmword + && !i.mask + && is_evex_encoding (&i.tm) + && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x666f + || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf36f + || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f) + && i.tm.extension_opcode == None) + { + /* Optimize: -O1: + VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16, + vmovdqu32 and vmovdqu64: + EVEX VOP %xmmM, %xmmN + -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16) + EVEX VOP %ymmM, %ymmN + -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16) + EVEX VOP %xmmM, mem + -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16) + EVEX VOP %ymmM, mem + -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16) + EVEX VOP mem, %xmmN + -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16) + EVEX VOP mem, %ymmN + -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16) + */ + for (j = 0; j < 2; j++) + if (operand_type_check (i.types[j], disp) + && i.op[j].disps->X_op == O_constant) + { + /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix + has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4 + bytes, we choose EVEX Disp8 over VEX Disp32. */ + int evex_disp8, vex_disp8; + unsigned int memshift = i.memshift; + offsetT n = i.op[j].disps->X_add_number; + + evex_disp8 = fits_in_disp8 (n); + i.memshift = 0; + vex_disp8 = fits_in_disp8 (n); + if (evex_disp8 != vex_disp8) + { + i.memshift = memshift; + return; + } + + i.types[j].bitfield.disp8 = vex_disp8; + break; + } + if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f) + i.tm.base_opcode ^= 0xf36f ^ 0xf26f; + i.tm.opcode_modifier.vex + = i.types[0].bitfield.ymmword ? VEX256 : VEX128; + i.tm.opcode_modifier.vexw = VEXW0; + i.tm.opcode_modifier.evex = 0; + i.tm.opcode_modifier.masking = 0; + i.tm.opcode_modifier.disp8memshift = 0; + i.memshift = 0; + for (j = 0; j < 2; j++) + if (operand_type_check (i.types[j], disp) + && i.op[j].disps->X_op == O_constant) + { + i.types[j].bitfield.disp8 + = fits_in_disp8 (i.op[j].disps->X_add_number); + break; + } + } } /* This is the guts of the machine-dependent assembler. LINE points to a @@ -4450,10 +4532,10 @@ parse_insn (char *line, char *mnemonic) } /* If we are in 16-bit mode, do not allow addr16 or data16. Similarly, in 32-bit mode, do not allow addr32 or data32. */ - if ((current_templates->start->opcode_modifier.size16 - || current_templates->start->opcode_modifier.size32) + if ((current_templates->start->opcode_modifier.size == SIZE16 + || current_templates->start->opcode_modifier.size == SIZE32) && flag_code != CODE_64BIT - && (current_templates->start->opcode_modifier.size32 + && ((current_templates->start->opcode_modifier.size == SIZE32) ^ (flag_code == CODE_16BIT))) { as_bad (_("redundant %s prefix"), @@ -4561,46 +4643,50 @@ parse_insn (char *line, char *mnemonic) if (!current_templates) { check_suffix: - /* See if we can get a match by trimming off a suffix. */ - switch (mnem_p[-1]) + if (mnem_p > mnemonic) { - case WORD_MNEM_SUFFIX: - if (intel_syntax && (intel_float_operand (mnemonic) & 2)) - i.suffix = SHORT_MNEM_SUFFIX; - else - /* Fall through. */ - case BYTE_MNEM_SUFFIX: - case QWORD_MNEM_SUFFIX: - i.suffix = mnem_p[-1]; - mnem_p[-1] = '\0'; - current_templates = (const templates *) hash_find (op_hash, - mnemonic); - break; - case SHORT_MNEM_SUFFIX: - case LONG_MNEM_SUFFIX: - if (!intel_syntax) + /* See if we can get a match by trimming off a suffix. */ + switch (mnem_p[-1]) { - i.suffix = mnem_p[-1]; - mnem_p[-1] = '\0'; - current_templates = (const templates *) hash_find (op_hash, - mnemonic); - } - break; - - /* Intel Syntax. */ - case 'd': - if (intel_syntax) - { - if (intel_float_operand (mnemonic) == 1) + case WORD_MNEM_SUFFIX: + if (intel_syntax && (intel_float_operand (mnemonic) & 2)) i.suffix = SHORT_MNEM_SUFFIX; else - i.suffix = LONG_MNEM_SUFFIX; + /* Fall through. */ + case BYTE_MNEM_SUFFIX: + case QWORD_MNEM_SUFFIX: + i.suffix = mnem_p[-1]; mnem_p[-1] = '\0'; current_templates = (const templates *) hash_find (op_hash, - mnemonic); + mnemonic); + break; + case SHORT_MNEM_SUFFIX: + case LONG_MNEM_SUFFIX: + if (!intel_syntax) + { + i.suffix = mnem_p[-1]; + mnem_p[-1] = '\0'; + current_templates = (const templates *) hash_find (op_hash, + mnemonic); + } + break; + + /* Intel Syntax. */ + case 'd': + if (intel_syntax) + { + if (intel_float_operand (mnemonic) == 1) + i.suffix = SHORT_MNEM_SUFFIX; + else + i.suffix = LONG_MNEM_SUFFIX; + mnem_p[-1] = '\0'; + current_templates = (const templates *) hash_find (op_hash, + mnemonic); + } + break; } - break; } + if (!current_templates) { as_bad (_("no such instruction: `%s'"), token_start); @@ -6045,11 +6131,11 @@ process_suffix (void) { /* If matched instruction specifies an explicit instruction mnemonic suffix, use it. */ - if (i.tm.opcode_modifier.size16) + if (i.tm.opcode_modifier.size == SIZE16) i.suffix = WORD_MNEM_SUFFIX; - else if (i.tm.opcode_modifier.size32) + else if (i.tm.opcode_modifier.size == SIZE32) i.suffix = LONG_MNEM_SUFFIX; - else if (i.tm.opcode_modifier.size64) + else if (i.tm.opcode_modifier.size == SIZE64) i.suffix = QWORD_MNEM_SUFFIX; else if (i.reg_operands) { @@ -7896,7 +7982,6 @@ x86_cleanup (void) if (!IS_ELF || !x86_used_note) return; - x86_isa_1_used |= GNU_PROPERTY_X86_UINT32_VALID; x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86; /* The .note.gnu.property section layout: @@ -8059,6 +8144,8 @@ output_insn (void) x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_VBMI2; if (i.tm.cpu_flags.bitfield.cpuavx512_vnni) x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_VNNI; + if (i.tm.cpu_flags.bitfield.cpuavx512_bf16) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_BF16; if (i.tm.cpu_flags.bitfield.cpu8087 || i.tm.cpu_flags.bitfield.cpu287 @@ -8419,12 +8506,13 @@ output_disp (fragS *insn_start_frag, offsetT insn_start_off) /* Check for "call/jmp *mem", "mov mem, %reg", "test %reg, mem" and "binop mem, %reg" where binop is one of adc, add, and, cmp, or, sbb, sub, xor - instructions. Always generate R_386_GOT32X for - "sym*GOT" operand in 32-bit mode. */ - if ((generate_relax_relocations - || (!object_64bit - && i.rm.mode == 0 - && i.rm.regmem == 5)) + instructions without data prefix. Always generate + R_386_GOT32X for "sym*GOT" operand in 32-bit mode. */ + if (i.prefix[DATA_PREFIX] == 0 + && (generate_relax_relocations + || (!object_64bit + && i.rm.mode == 0 + && i.rm.regmem == 5)) && (i.rm.mode == 2 || (i.rm.mode == 0 && i.rm.regmem == 5)) && ((i.operands == 1 @@ -8919,6 +9007,15 @@ x86_cons (expressionS *exp, int size) as_bad (_("missing or invalid expression `%s'"), save); *input_line_pointer = c; } + else if ((got_reloc == BFD_RELOC_386_PLT32 + || got_reloc == BFD_RELOC_X86_64_PLT32) + && exp->X_op != O_symbol) + { + char c = *input_line_pointer; + *input_line_pointer = 0; + as_bad (_("invalid PLT expression `%s'"), save); + *input_line_pointer = c; + } } } else @@ -10533,9 +10630,11 @@ md_apply_fix (fixS *fixP, valueT *valP, segT seg ATTRIBUTE_UNUSED) { case BFD_RELOC_386_PLT32: case BFD_RELOC_X86_64_PLT32: - /* Make the jump instruction point to the address of the operand. At - runtime we merely add the offset to the actual PLT entry. */ - value = -4; + /* Make the jump instruction point to the address of the operand. + At runtime we merely add the offset to the actual PLT entry. + NB: Subtract the offset size only for jump instructions. */ + if (fixP->fx_pcrel) + value = -4; break; case BFD_RELOC_386_TLS_GD: @@ -11327,7 +11426,7 @@ md_parse_option (int c, const char *arg) { optimize_for_space = 1; /* Turn on all encoding optimizations. */ - optimize = -1; + optimize = INT_MAX; } else {