X-Git-Url: http://drtracing.org/?a=blobdiff_plain;f=gas%2Fconfig%2Ftc-i386.c;h=ac141b84d0b6ee914ab5ed67bdf094da367efeac;hb=5990e377e5a339bce715fabfc3e45b24b459a7af;hp=20cd1adf85ba9f4b3de24275863e65beab00d1f2;hpb=3528c362d9471524cfe8a76c692081838b292d64;p=deliverable%2Fbinutils-gdb.git diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index 20cd1adf85..ac141b84d0 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -1,5 +1,5 @@ /* tc-i386.c -- Assemble code for the Intel 80386 - Copyright (C) 1989-2019 Free Software Foundation, Inc. + Copyright (C) 1989-2020 Free Software Foundation, Inc. This file is part of GAS, the GNU Assembler. @@ -44,10 +44,6 @@ #endif #endif -#ifndef REGISTER_WARNINGS -#define REGISTER_WARNINGS 1 -#endif - #ifndef INFER_ADDR_PREFIX #define INFER_ADDR_PREFIX 1 #endif @@ -182,6 +178,7 @@ static char *parse_insn (char *, char *); static char *parse_operands (char *, const char *); static void swap_operands (void); static void swap_2_operands (int, int); +static enum flag_code i386_addressing_mode (void); static void optimize_imm (void); static void optimize_disp (void); static const insn_template *match_template (char); @@ -353,6 +350,12 @@ struct _i386_insn unsigned int prefixes; unsigned char prefix[MAX_PREFIXES]; + /* Register is in low 3 bits of opcode. */ + bfd_boolean short_form; + + /* The operand to a branch insn indicates an absolute branch. */ + bfd_boolean jumpabsolute; + /* Has MMX register operands. */ bfd_boolean has_regmmx; @@ -365,6 +368,9 @@ struct _i386_insn /* Has ZMM register operands. */ bfd_boolean has_regzmm; + /* Has GOTPC or TLS relocation. */ + bfd_boolean has_gotpc_tls_reloc; + /* RM and SIB are the modrm byte and the sib byte where the addressing modes of this insn are encoded. */ modrm_byte rm; @@ -412,7 +418,7 @@ struct _i386_insn enum { vex_encoding_default = 0, - vex_encoding_vex2, + vex_encoding_vex, vex_encoding_vex3, vex_encoding_evex } vec_encoding; @@ -559,6 +565,8 @@ static enum flag_code flag_code; static unsigned int object_64bit; static unsigned int disallow_64bit_reloc; static int use_rela_relocations = 0; +/* __tls_get_addr/___tls_get_addr symbol for TLS. */ +static const char *tls_get_addr; #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \ || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \ @@ -589,9 +597,11 @@ static int shared = 0; 0 if att syntax. */ static int intel_syntax = 0; -/* 1 for Intel64 ISA, - 0 if AMD64 ISA. */ -static int intel64; +static enum x86_64_isa +{ + amd64 = 1, /* AMD64 ISA. */ + intel64 /* Intel64 ISA. */ +} isa64; /* 1 for intel mnemonic, 0 if att mnemonic. */ @@ -619,6 +629,21 @@ static int omit_lock_prefix = 0; "lock addl $0, (%{re}sp)". */ static int avoid_fence = 0; +/* Type of the previous instruction. */ +static struct + { + segT seg; + const char *file; + const char *name; + unsigned int line; + enum last_insn_kind + { + last_insn_other = 0, + last_insn_directive, + last_insn_prefix + } kind; + } last_insn; + /* 1 if the assembler should generate relax relocations. */ static int generate_relax_relocations @@ -632,6 +657,44 @@ static enum check_kind } sse_check, operand_check = check_warning; +/* Non-zero if branches should be aligned within power of 2 boundary. */ +static int align_branch_power = 0; + +/* Types of branches to align. */ +enum align_branch_kind + { + align_branch_none = 0, + align_branch_jcc = 1, + align_branch_fused = 2, + align_branch_jmp = 3, + align_branch_call = 4, + align_branch_indirect = 5, + align_branch_ret = 6 + }; + +/* Type bits of branches to align. */ +enum align_branch_bit + { + align_branch_jcc_bit = 1 << align_branch_jcc, + align_branch_fused_bit = 1 << align_branch_fused, + align_branch_jmp_bit = 1 << align_branch_jmp, + align_branch_call_bit = 1 << align_branch_call, + align_branch_indirect_bit = 1 << align_branch_indirect, + align_branch_ret_bit = 1 << align_branch_ret + }; + +static unsigned int align_branch = (align_branch_jcc_bit + | align_branch_fused_bit + | align_branch_jmp_bit); + +/* The maximum padding size for fused jcc. CMP like instruction can + be 9 bytes and jcc can be 6 bytes. Leave room just in case for + prefixes. */ +#define MAX_FUSED_JCC_PADDING_SIZE 20 + +/* The maximum number of prefixes added for an instruction. */ +static unsigned int align_branch_prefix_size = 5; + /* Optimization: 1. Clear the REX_W bit with register operand if possible. 2. Above plus use 128bit vector instruction to clear the full vector @@ -735,12 +798,19 @@ int x86_cie_data_alignment; /* Interface to relax_segment. There are 3 major relax states for 386 jump insns because the different types of jumps add different sizes to frags when we're - figuring out what sort of jump to choose to reach a given label. */ + figuring out what sort of jump to choose to reach a given label. + + BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING are used to align + branches which are handled by md_estimate_size_before_relax() and + i386_generic_table_relax_frag(). */ /* Types. */ #define UNCOND_JUMP 0 #define COND_JUMP 1 #define COND_JUMP86 2 +#define BRANCH_PADDING 3 +#define BRANCH_PREFIX 4 +#define FUSED_JCC_PADDING 5 /* Sizes. */ #define CODE16 1 @@ -1381,6 +1451,12 @@ i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit) case rs_fill_nop: case rs_align_code: break; + case rs_machine_dependent: + /* Allow NOP padding for jumps and calls. */ + if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING + || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING) + break; + /* Fall through. */ default: return; } @@ -1525,7 +1601,7 @@ i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit) return; } } - else + else if (fragP->fr_type != rs_machine_dependent) fragP->fr_var = count; if ((count / max_single_nop_size) > max_number_of_nops) @@ -1613,6 +1689,7 @@ operand_type_set (union i386_operand_type *x, unsigned int v) } x->bitfield.class = ClassNone; + x->bitfield.instance = InstanceNone; } static INLINE int @@ -1829,6 +1906,8 @@ operand_type_and (i386_operand_type x, i386_operand_type y) { if (x.bitfield.class != y.bitfield.class) x.bitfield.class = ClassNone; + if (x.bitfield.instance != y.bitfield.instance) + x.bitfield.instance = InstanceNone; switch (ARRAY_SIZE (x.array)) { @@ -1851,6 +1930,7 @@ static INLINE i386_operand_type operand_type_and_not (i386_operand_type x, i386_operand_type y) { gas_assert (y.bitfield.class == ClassNone); + gas_assert (y.bitfield.instance == InstanceNone); switch (ARRAY_SIZE (x.array)) { @@ -1875,6 +1955,9 @@ operand_type_or (i386_operand_type x, i386_operand_type y) gas_assert (x.bitfield.class == ClassNone || y.bitfield.class == ClassNone || x.bitfield.class == y.bitfield.class); + gas_assert (x.bitfield.instance == InstanceNone || + y.bitfield.instance == InstanceNone || + x.bitfield.instance == y.bitfield.instance); switch (ARRAY_SIZE (x.array)) { @@ -1897,6 +1980,7 @@ static INLINE i386_operand_type operand_type_xor (i386_operand_type x, i386_operand_type y) { gas_assert (y.bitfield.class == ClassNone); + gas_assert (y.bitfield.instance == InstanceNone); switch (ARRAY_SIZE (x.array)) { @@ -2055,11 +2139,9 @@ operand_size_match (const insn_template *t) { unsigned int j, match = MATCH_STRAIGHT; - /* Don't check jump instructions. */ + /* Don't check non-absolute jump instructions. */ if (t->opcode_modifier.jump - || t->opcode_modifier.jumpbyte - || t->opcode_modifier.jumpdword - || t->opcode_modifier.jumpintersegment) + && t->opcode_modifier.jump != JUMP_ABSOLUTE) return match; /* Check memory and accumulator operand size. */ @@ -2067,7 +2149,7 @@ operand_size_match (const insn_template *t) { if (i.types[j].bitfield.class != Reg && i.types[j].bitfield.class != RegSIMD - && t->operand_types[j].bitfield.anysize) + && t->opcode_modifier.anysize) continue; if (t->operand_types[j].bitfield.class == Reg @@ -2084,7 +2166,7 @@ operand_size_match (const insn_template *t) break; } - if (t->operand_types[j].bitfield.acc + if (t->operand_types[j].bitfield.instance == Accum && (!match_operand_size (t, j, j) || !match_simd_size (t, j, j))) { match = 0; @@ -2121,7 +2203,7 @@ mismatch: && !match_simd_size (t, j, given)) goto mismatch; - if (t->operand_types[j].bitfield.acc + if (t->operand_types[j].bitfield.instance == Accum && (!match_operand_size (t, j, given) || !match_simd_size (t, j, given))) goto mismatch; @@ -2139,7 +2221,6 @@ operand_type_match (i386_operand_type overlap, { i386_operand_type temp = overlap; - temp.bitfield.jumpabsolute = 0; temp.bitfield.unspecified = 0; temp.bitfield.byte = 0; temp.bitfield.word = 0; @@ -2153,8 +2234,7 @@ operand_type_match (i386_operand_type overlap, if (operand_type_all_zero (&temp)) goto mismatch; - if (given.bitfield.baseindex == overlap.bitfield.baseindex - && given.bitfield.jumpabsolute == overlap.bitfield.jumpabsolute) + if (given.bitfield.baseindex == overlap.bitfield.baseindex) return 1; mismatch: @@ -2164,8 +2244,7 @@ mismatch: /* If given types g0 and g1 are registers they must be of the same type unless the expected operand type register overlap is null. - Memory operand size of certain SIMD instructions is also being checked - here. */ + Some Intel syntax memory operand size checking also happens here. */ static INLINE int operand_type_register_match (i386_operand_type g0, @@ -2177,14 +2256,16 @@ operand_type_register_match (i386_operand_type g0, && g0.bitfield.class != RegSIMD && (!operand_type_check (g0, anymem) || g0.bitfield.unspecified - || t0.bitfield.class != RegSIMD)) + || (t0.bitfield.class != Reg + && t0.bitfield.class != RegSIMD))) return 1; if (g1.bitfield.class != Reg && g1.bitfield.class != RegSIMD && (!operand_type_check (g1, anymem) || g1.bitfield.unspecified - || t1.bitfield.class != RegSIMD)) + || (t1.bitfield.class != Reg + && t1.bitfield.class != RegSIMD))) return 1; if (g0.bitfield.byte == g1.bitfield.byte @@ -3004,6 +3085,11 @@ md_begin (void) x86_dwarf2_return_column = 8; x86_cie_data_alignment = -4; } + + /* NB: FUSED_JCC_PADDING frag must have sufficient room so that it + can be turned into BRANCH_PREFIX frag. */ + if (align_branch_prefix_size > MAX_FUSED_JCC_PADDING_SIZE) + abort (); } void @@ -3147,13 +3233,11 @@ const type_names[] = { OPERAND_TYPE_FLOATREG, "FReg" }, { OPERAND_TYPE_FLOATACC, "FAcc" }, { OPERAND_TYPE_SREG, "SReg" }, - { OPERAND_TYPE_JUMPABSOLUTE, "Jump Absolute" }, { OPERAND_TYPE_REGMMX, "rMMX" }, { OPERAND_TYPE_REGXMM, "rXMM" }, { OPERAND_TYPE_REGYMM, "rYMM" }, { OPERAND_TYPE_REGZMM, "rZMM" }, { OPERAND_TYPE_REGMASK, "Mask reg" }, - { OPERAND_TYPE_ESSEG, "es" }, }; static void @@ -3842,52 +3926,6 @@ process_immext (void) { expressionS *exp; - if ((i.tm.cpu_flags.bitfield.cpusse3 || i.tm.cpu_flags.bitfield.cpusvme) - && i.operands > 0) - { - /* MONITOR/MWAIT as well as SVME instructions have fixed operands - with an opcode suffix which is coded in the same place as an - 8-bit immediate field would be. - Here we check those operands and remove them afterwards. */ - unsigned int x; - - for (x = 0; x < i.operands; x++) - if (register_number (i.op[x].regs) != x) - as_bad (_("can't use register '%s%s' as operand %d in '%s'."), - register_prefix, i.op[x].regs->reg_name, x + 1, - i.tm.name); - - i.operands = 0; - } - - if (i.tm.cpu_flags.bitfield.cpumwaitx && i.operands > 0) - { - /* MONITORX/MWAITX instructions have fixed operands with an opcode - suffix which is coded in the same place as an 8-bit immediate - field would be. - Here we check those operands and remove them afterwards. */ - unsigned int x; - - if (i.operands != 3) - abort(); - - for (x = 0; x < 2; x++) - if (register_number (i.op[x].regs) != x) - goto bad_register_operand; - - /* Check for third operand for mwaitx/monitorx insn. */ - if (register_number (i.op[x].regs) - != (x + (i.tm.extension_opcode == 0xfb))) - { -bad_register_operand: - as_bad (_("can't use register '%s%s' as operand %d in '%s'."), - register_prefix, i.op[x].regs->reg_name, x+1, - i.tm.name); - } - - i.operands = 0; - } - /* These AMD 3DNow! and SSE2 instructions have an opcode suffix which is coded in the same place as an 8-bit immediate field would be. Here we fake an 8-bit immediate operand from the @@ -3954,13 +3992,13 @@ optimize_encoding (void) unsigned int j; if (optimize_for_space + && !is_any_vex_encoding (&i.tm) && i.reg_operands == 1 && i.imm_operands == 1 && !i.types[1].bitfield.byte && i.op[0].imms->X_op == O_constant && fits_in_imm7 (i.op[0].imms->X_add_number) - && ((i.tm.base_opcode == 0xa8 - && i.tm.extension_opcode == None) + && (i.tm.base_opcode == 0xa8 || (i.tm.base_opcode == 0xf6 && i.tm.extension_opcode == 0x0))) { @@ -3973,21 +4011,20 @@ optimize_encoding (void) i.types[1].bitfield.byte = 1; /* Ignore the suffix. */ i.suffix = 0; - if (base_regnum >= 4 - && !(i.op[1].regs->reg_flags & RegRex)) - { - /* Handle SP, BP, SI and DI registers. */ - if (i.types[1].bitfield.word) - j = 16; - else if (i.types[1].bitfield.dword) - j = 32; - else - j = 48; - i.op[1].regs -= j; - } + /* Convert to byte registers. */ + if (i.types[1].bitfield.word) + j = 16; + else if (i.types[1].bitfield.dword) + j = 32; + else + j = 48; + if (!(i.op[1].regs->reg_flags & RegRex) && base_regnum < 4) + j += 8; + i.op[1].regs -= j; } } else if (flag_code == CODE_64BIT + && !is_any_vex_encoding (&i.tm) && ((i.types[1].bitfield.qword && i.reg_operands == 1 && i.imm_operands == 1 @@ -3996,9 +4033,8 @@ optimize_encoding (void) && i.tm.extension_opcode == None && fits_in_unsigned_long (i.op[0].imms->X_add_number)) || (fits_in_imm31 (i.op[0].imms->X_add_number) - && (((i.tm.base_opcode == 0x24 - || i.tm.base_opcode == 0xa8) - && i.tm.extension_opcode == None) + && ((i.tm.base_opcode == 0x24 + || i.tm.base_opcode == 0xa8) || (i.tm.base_opcode == 0x80 && i.tm.extension_opcode == 0x4) || ((i.tm.base_opcode == 0xf6 @@ -4010,13 +4046,11 @@ optimize_encoding (void) || (i.types[0].bitfield.qword && ((i.reg_operands == 2 && i.op[0].regs == i.op[1].regs - && ((i.tm.base_opcode == 0x30 - || i.tm.base_opcode == 0x28) - && i.tm.extension_opcode == None)) + && (i.tm.base_opcode == 0x30 + || i.tm.base_opcode == 0x28)) || (i.reg_operands == 1 && i.operands == 1 - && i.tm.base_opcode == 0x30 - && i.tm.extension_opcode == None))))) + && i.tm.base_opcode == 0x30))))) { /* Optimize: -O: andq $imm31, %r64 -> andl $imm31, %r32 @@ -4050,13 +4084,13 @@ optimize_encoding (void) i.tm.base_opcode = 0xb8; i.tm.extension_opcode = None; i.tm.opcode_modifier.w = 0; - i.tm.opcode_modifier.shortform = 1; i.tm.opcode_modifier.modrm = 0; } } } else if (optimize > 1 && !optimize_for_space + && !is_any_vex_encoding (&i.tm) && i.reg_operands == 2 && i.op[0].regs == i.op[1].regs && ((i.tm.base_opcode & ~(Opcode_D | 1)) == 0x8 @@ -4154,7 +4188,7 @@ optimize_encoding (void) else return; } - else if (i.tm.operand_types[0].bitfield.regmask) + else if (i.tm.operand_types[0].bitfield.class == RegMask) { i.tm.base_opcode &= 0xff; i.tm.opcode_modifier.vexw = VEXW0; @@ -4327,14 +4361,17 @@ md_assemble (char *line) if (sse_check != check_none && !i.tm.opcode_modifier.noavx && !i.tm.cpu_flags.bitfield.cpuavx + && !i.tm.cpu_flags.bitfield.cpuavx512f && (i.tm.cpu_flags.bitfield.cpusse || i.tm.cpu_flags.bitfield.cpusse2 || i.tm.cpu_flags.bitfield.cpusse3 || i.tm.cpu_flags.bitfield.cpussse3 || i.tm.cpu_flags.bitfield.cpusse4_1 || i.tm.cpu_flags.bitfield.cpusse4_2 + || i.tm.cpu_flags.bitfield.cpusse4a || i.tm.cpu_flags.bitfield.cpupclmul || i.tm.cpu_flags.bitfield.cpuaes + || i.tm.cpu_flags.bitfield.cpusha || i.tm.cpu_flags.bitfield.cpugfni)) { (sse_check == check_warning @@ -4424,8 +4461,9 @@ md_assemble (char *line) } /* Check string instruction segment overrides. */ - if (i.tm.opcode_modifier.isstring && i.mem_operands != 0) + if (i.tm.opcode_modifier.isstring >= IS_STRING_ES_OP0) { + gas_assert (i.mem_operands); if (!check_string ()) return; i.disp_operands = 0; @@ -4453,9 +4491,8 @@ md_assemble (char *line) with 3 operands or less. */ if (i.operands <= 3) for (j = 0; j < i.operands; j++) - if (i.types[j].bitfield.inoutportreg - || i.types[j].bitfield.shiftcount - || (i.types[j].bitfield.acc && !i.types[j].bitfield.xmmword)) + if (i.types[j].bitfield.instance != InstanceNone + && !i.types[j].bitfield.xmmword) i.reg_operands--; /* ImmExt should be processed after SSE2AVX. */ @@ -4501,9 +4538,9 @@ md_assemble (char *line) i.imm_operands = 0; } - if ((i.tm.opcode_modifier.jump - || i.tm.opcode_modifier.jumpbyte - || i.tm.opcode_modifier.jumpdword) + if ((i.tm.opcode_modifier.jump == JUMP + || i.tm.opcode_modifier.jump == JUMP_BYTE + || i.tm.opcode_modifier.jump == JUMP_DWORD) && i.op[0].disps->X_op == O_constant) { /* Convert "jmp constant" (and "call constant") to a jump (call) to @@ -4537,6 +4574,7 @@ md_assemble (char *line) if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte && (i.op[x].regs->reg_flags & RegRex64) == 0) { + gas_assert (!(i.op[x].regs->reg_flags & RegRex)); /* In case it is "hi" register, give up. */ if (i.op[x].regs->reg_num > 3) as_bad (_("can't encode register '%s%s' in an " @@ -4555,7 +4593,7 @@ md_assemble (char *line) if (i.rex == 0 && i.rex_encoding) { /* Check if we can add a REX_OPCODE byte. Look for 8 bit operand - that uses legacy register. If it is "hi" register, don't add + that uses legacy register. If it is "hi" register, don't add the REX_OPCODE byte. */ int x; for (x = 0; x < 2; x++) @@ -4564,6 +4602,7 @@ md_assemble (char *line) && (i.op[x].regs->reg_flags & RegRex64) == 0 && i.op[x].regs->reg_num > 3) { + gas_assert (!(i.op[x].regs->reg_flags & RegRex)); i.rex_encoding = FALSE; break; } @@ -4577,6 +4616,17 @@ md_assemble (char *line) /* We are ready to output the insn. */ output_insn (); + + last_insn.seg = now_seg; + + if (i.tm.opcode_modifier.isprefix) + { + last_insn.kind = last_insn_prefix; + last_insn.name = i.tm.name; + last_insn.file = as_where (&last_insn.line); + } + else + last_insn.kind = last_insn_other; } static char * @@ -4673,8 +4723,8 @@ parse_insn (char *line, char *mnemonic) i.dir_encoding = dir_encoding_store; break; case 0x4: - /* {vex2} */ - i.vec_encoding = vex_encoding_vex2; + /* {vex} */ + i.vec_encoding = vex_encoding_vex; break; case 0x5: /* {vex3} */ @@ -4803,8 +4853,8 @@ check_suffix: } } - if (current_templates->start->opcode_modifier.jump - || current_templates->start->opcode_modifier.jumpbyte) + if (current_templates->start->opcode_modifier.jump == JUMP + || current_templates->start->opcode_modifier.jump == JUMP_BYTE) { /* Check for a branch hint. We allow ",pt" and ",pn" for predict taken and predict not taken respectively. @@ -5076,9 +5126,9 @@ optimize_imm (void) else if (i.reg_operands) { /* Figure out a suffix from the last register operand specified. - We can't do this properly yet, ie. excluding InOutPortReg, - but the following works for instructions with immediates. - In any case, we can't set i.suffix yet. */ + We can't do this properly yet, i.e. excluding special register + instances, but the following works for instructions with + immediates. In any case, we can't set i.suffix yet. */ for (op = i.operands; --op >= 0;) if (i.types[op].bitfield.class != Reg) continue; @@ -5693,12 +5743,10 @@ match_template (char mnem_suffix) i386_operand_type overlap0, overlap1, overlap2, overlap3; i386_operand_type overlap4; unsigned int found_reverse_match; - i386_opcode_modifier suffix_check, mnemsuf_check; + i386_opcode_modifier suffix_check; i386_operand_type operand_types [MAX_OPERANDS]; int addr_prefix_disp; - unsigned int j; - unsigned int found_cpu_match, size_match; - unsigned int check_register; + unsigned int j, size_match, check_register; enum i386_error specific_error = 0; #if MAX_OPERANDS != 5 @@ -5708,33 +5756,33 @@ match_template (char mnem_suffix) found_reverse_match = 0; addr_prefix_disp = -1; + /* Prepare for mnemonic suffix check. */ memset (&suffix_check, 0, sizeof (suffix_check)); - if (intel_syntax && i.broadcast) - /* nothing */; - else if (i.suffix == BYTE_MNEM_SUFFIX) - suffix_check.no_bsuf = 1; - else if (i.suffix == WORD_MNEM_SUFFIX) - suffix_check.no_wsuf = 1; - else if (i.suffix == SHORT_MNEM_SUFFIX) - suffix_check.no_ssuf = 1; - else if (i.suffix == LONG_MNEM_SUFFIX) - suffix_check.no_lsuf = 1; - else if (i.suffix == QWORD_MNEM_SUFFIX) - suffix_check.no_qsuf = 1; - else if (i.suffix == LONG_DOUBLE_MNEM_SUFFIX) - suffix_check.no_ldsuf = 1; - - memset (&mnemsuf_check, 0, sizeof (mnemsuf_check)); - if (intel_syntax) + switch (mnem_suffix) { - switch (mnem_suffix) - { - case BYTE_MNEM_SUFFIX: mnemsuf_check.no_bsuf = 1; break; - case WORD_MNEM_SUFFIX: mnemsuf_check.no_wsuf = 1; break; - case SHORT_MNEM_SUFFIX: mnemsuf_check.no_ssuf = 1; break; - case LONG_MNEM_SUFFIX: mnemsuf_check.no_lsuf = 1; break; - case QWORD_MNEM_SUFFIX: mnemsuf_check.no_qsuf = 1; break; - } + case BYTE_MNEM_SUFFIX: + suffix_check.no_bsuf = 1; + break; + case WORD_MNEM_SUFFIX: + suffix_check.no_wsuf = 1; + break; + case SHORT_MNEM_SUFFIX: + suffix_check.no_ssuf = 1; + break; + case LONG_MNEM_SUFFIX: + suffix_check.no_lsuf = 1; + break; + case QWORD_MNEM_SUFFIX: + suffix_check.no_qsuf = 1; + break; + default: + /* NB: In Intel syntax, normally we can check for memory operand + size when there is no mnemonic suffix. But jmp and call have + 2 different encodings with Dword memory operand size, one with + No_ldSuf and the other without. i.suffix is set to + LONG_DOUBLE_MNEM_SUFFIX to skip the one with No_ldSuf. */ + if (i.suffix == LONG_DOUBLE_MNEM_SUFFIX) + suffix_check.no_ldsuf = 1; } /* Must have right number of operands. */ @@ -5750,9 +5798,7 @@ match_template (char mnem_suffix) /* Check processor support. */ i.error = unsupported; - found_cpu_match = (cpu_flags_match (t) - == CPU_FLAGS_PERFECT_MATCH); - if (!found_cpu_match) + if (cpu_flags_match (t) != CPU_FLAGS_PERFECT_MATCH) continue; /* Check AT&T mnemonic. */ @@ -5760,37 +5806,58 @@ match_template (char mnem_suffix) if (intel_mnemonic && t->opcode_modifier.attmnemonic) continue; - /* Check AT&T/Intel syntax and Intel64/AMD64 ISA. */ + /* Check AT&T/Intel syntax. */ i.error = unsupported_syntax; if ((intel_syntax && t->opcode_modifier.attsyntax) - || (!intel_syntax && t->opcode_modifier.intelsyntax) - || (intel64 && t->opcode_modifier.amd64) - || (!intel64 && t->opcode_modifier.intel64)) + || (!intel_syntax && t->opcode_modifier.intelsyntax)) continue; - /* Check the suffix, except for some instructions in intel mode. */ + /* Check Intel64/AMD64 ISA. */ + switch (isa64) + { + default: + /* Default: Don't accept Intel64. */ + if (t->opcode_modifier.isa64 == INTEL64) + continue; + break; + case amd64: + /* -mamd64: Don't accept Intel64 and Intel64 only. */ + if (t->opcode_modifier.isa64 >= INTEL64) + continue; + break; + case intel64: + /* -mintel64: Don't accept AMD64. */ + if (t->opcode_modifier.isa64 == AMD64 && flag_code == CODE_64BIT) + continue; + break; + } + + /* Check the suffix. */ i.error = invalid_instruction_suffix; - if ((!intel_syntax || !t->opcode_modifier.ignoresize) - && ((t->opcode_modifier.no_bsuf && suffix_check.no_bsuf) - || (t->opcode_modifier.no_wsuf && suffix_check.no_wsuf) - || (t->opcode_modifier.no_lsuf && suffix_check.no_lsuf) - || (t->opcode_modifier.no_ssuf && suffix_check.no_ssuf) - || (t->opcode_modifier.no_qsuf && suffix_check.no_qsuf) - || (t->opcode_modifier.no_ldsuf && suffix_check.no_ldsuf))) - continue; - /* In Intel mode all mnemonic suffixes must be explicitly allowed. */ - if ((t->opcode_modifier.no_bsuf && mnemsuf_check.no_bsuf) - || (t->opcode_modifier.no_wsuf && mnemsuf_check.no_wsuf) - || (t->opcode_modifier.no_lsuf && mnemsuf_check.no_lsuf) - || (t->opcode_modifier.no_ssuf && mnemsuf_check.no_ssuf) - || (t->opcode_modifier.no_qsuf && mnemsuf_check.no_qsuf) - || (t->opcode_modifier.no_ldsuf && mnemsuf_check.no_ldsuf)) + if ((t->opcode_modifier.no_bsuf && suffix_check.no_bsuf) + || (t->opcode_modifier.no_wsuf && suffix_check.no_wsuf) + || (t->opcode_modifier.no_lsuf && suffix_check.no_lsuf) + || (t->opcode_modifier.no_ssuf && suffix_check.no_ssuf) + || (t->opcode_modifier.no_qsuf && suffix_check.no_qsuf) + || (t->opcode_modifier.no_ldsuf && suffix_check.no_ldsuf)) continue; size_match = operand_size_match (t); if (!size_match) continue; + /* This is intentionally not + + if (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)) + + as the case of a missing * on the operand is accepted (perhaps with + a warning, issued further down). */ + if (i.jumpabsolute && t->opcode_modifier.jump != JUMP_ABSOLUTE) + { + i.error = operand_type_mismatch; + continue; + } + for (j = 0; j < MAX_OPERANDS; j++) operand_types[j] = t->operand_types[j]; @@ -5832,51 +5899,50 @@ match_template (char mnem_suffix) break; } - /* Address size prefix will turn Disp64/Disp32/Disp16 operand - into Disp32/Disp16/Disp32 operand. */ - if (i.prefix[ADDR_PREFIX] != 0) - { - /* There should be only one Disp operand. */ - switch (flag_code) - { - case CODE_16BIT: - for (j = 0; j < MAX_OPERANDS; j++) - { - if (operand_types[j].bitfield.disp16) - { - addr_prefix_disp = j; - operand_types[j].bitfield.disp32 = 1; - operand_types[j].bitfield.disp16 = 0; - break; - } - } + if (!t->opcode_modifier.jump + || t->opcode_modifier.jump == JUMP_ABSOLUTE) + { + /* There should be only one Disp operand. */ + for (j = 0; j < MAX_OPERANDS; j++) + if (operand_type_check (operand_types[j], disp)) break; - case CODE_32BIT: - for (j = 0; j < MAX_OPERANDS; j++) + if (j < MAX_OPERANDS) + { + bfd_boolean override = (i.prefix[ADDR_PREFIX] != 0); + + addr_prefix_disp = j; + + /* Address size prefix will turn Disp64/Disp32S/Disp32/Disp16 + operand into Disp32/Disp32/Disp16/Disp32 operand. */ + switch (flag_code) { - if (operand_types[j].bitfield.disp32) + case CODE_16BIT: + override = !override; + /* Fall through. */ + case CODE_32BIT: + if (operand_types[j].bitfield.disp32 + && operand_types[j].bitfield.disp16) { - addr_prefix_disp = j; - operand_types[j].bitfield.disp32 = 0; - operand_types[j].bitfield.disp16 = 1; - break; + operand_types[j].bitfield.disp16 = override; + operand_types[j].bitfield.disp32 = !override; } - } - break; - case CODE_64BIT: - for (j = 0; j < MAX_OPERANDS; j++) - { - if (operand_types[j].bitfield.disp64) + operand_types[j].bitfield.disp32s = 0; + operand_types[j].bitfield.disp64 = 0; + break; + + case CODE_64BIT: + if (operand_types[j].bitfield.disp32s + || operand_types[j].bitfield.disp64) { - addr_prefix_disp = j; - operand_types[j].bitfield.disp64 = 0; - operand_types[j].bitfield.disp32 = 1; - break; + operand_types[j].bitfield.disp64 &= !override; + operand_types[j].bitfield.disp32s &= !override; + operand_types[j].bitfield.disp32 = override; } + operand_types[j].bitfield.disp16 = 0; + break; } - break; } - } + } /* Force 0x8b encoding for "mov foo@GOT, %eax". */ if (i.reloc[0] == BFD_RELOC_386_GOT32 && t->base_opcode == 0xa0) @@ -5906,15 +5972,17 @@ match_template (char mnem_suffix) zero-extend %eax to %rax. */ if (flag_code == CODE_64BIT && t->base_opcode == 0x90 - && i.types[0].bitfield.acc && i.types[0].bitfield.dword - && i.types[1].bitfield.acc && i.types[1].bitfield.dword) + && i.types[0].bitfield.instance == Accum + && i.types[0].bitfield.dword + && i.types[1].bitfield.instance == Accum + && i.types[1].bitfield.dword) continue; /* xrelease mov %eax, is another special case. It must not match the accumulator-only encoding of mov. */ if (flag_code != CODE_64BIT && i.hle_prefix && t->base_opcode == 0xa0 - && i.types[0].bitfield.acc + && i.types[0].bitfield.instance == Accum && (i.flags[1] & Operand_Mem)) continue; /* Fall through. */ @@ -6071,8 +6139,6 @@ check_reverse: /* Found either forward/reverse 2, 3 or 4 operand match here: slip through to break. */ } - if (!found_cpu_match) - continue; /* Check if vector and VEX operands are valid. */ if (check_VecOperands (t) || VEX_check_operands (t)) @@ -6166,11 +6232,8 @@ check_reverse: if (!quiet_warnings) { if (!intel_syntax - && (i.types[0].bitfield.jumpabsolute - != operand_types[0].bitfield.jumpabsolute)) - { - as_warn (_("indirect %s without `*'"), t->name); - } + && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))) + as_warn (_("indirect %s without `*'"), t->name); if (t->opcode_modifier.isprefix && t->opcode_modifier.ignoresize) @@ -6214,35 +6277,24 @@ check_reverse: static int check_string (void) { - unsigned int mem_op = i.flags[0] & Operand_Mem ? 0 : 1; + unsigned int es_op = i.tm.opcode_modifier.isstring - IS_STRING_ES_OP0; + unsigned int op = i.tm.operand_types[0].bitfield.baseindex ? es_op : 0; - if (i.tm.operand_types[mem_op].bitfield.esseg) - { - if (i.seg[0] != NULL && i.seg[0] != &es) - { - as_bad (_("`%s' operand %d must use `%ses' segment"), - i.tm.name, - intel_syntax ? i.tm.operands - mem_op : mem_op + 1, - register_prefix); - return 0; - } - /* There's only ever one segment override allowed per instruction. - This instruction possibly has a legal segment override on the - second operand, so copy the segment to where non-string - instructions store it, allowing common code. */ - i.seg[0] = i.seg[1]; - } - else if (i.tm.operand_types[mem_op + 1].bitfield.esseg) + if (i.seg[op] != NULL && i.seg[op] != &es) { - if (i.seg[1] != NULL && i.seg[1] != &es) - { - as_bad (_("`%s' operand %d must use `%ses' segment"), - i.tm.name, - intel_syntax ? i.tm.operands - mem_op - 1 : mem_op + 2, - register_prefix); - return 0; - } + as_bad (_("`%s' operand %u must use `%ses' segment"), + i.tm.name, + intel_syntax ? i.tm.operands - es_op : es_op + 1, + register_prefix); + return 0; } + + /* There's only ever one segment override allowed per instruction. + This instruction possibly has a legal segment override on the + second operand, so copy the segment to where non-string + instructions store it, allowing common code. */ + i.seg[op] = i.seg[1]; + return 1; } @@ -6257,60 +6309,37 @@ process_suffix (void) i.suffix = LONG_MNEM_SUFFIX; else if (i.tm.opcode_modifier.size == SIZE64) i.suffix = QWORD_MNEM_SUFFIX; - else if (i.reg_operands) + else if (i.reg_operands + && (i.operands > 1 || i.types[0].bitfield.class == Reg)) { /* If there's no instruction mnemonic suffix we try to invent one - based on register operands. */ + based on GPR operands. */ if (!i.suffix) { /* We take i.suffix from the last register operand specified, Destination register type is more significant than source register type. crc32 in SSE4.2 prefers source register type. */ - if (i.tm.base_opcode == 0xf20f38f0 - && i.types[0].bitfield.class == Reg) - { - if (i.types[0].bitfield.byte) - i.suffix = BYTE_MNEM_SUFFIX; - else if (i.types[0].bitfield.word) - i.suffix = WORD_MNEM_SUFFIX; - else if (i.types[0].bitfield.dword) - i.suffix = LONG_MNEM_SUFFIX; - else if (i.types[0].bitfield.qword) - i.suffix = QWORD_MNEM_SUFFIX; - } + unsigned int op = i.tm.base_opcode != 0xf20f38f0 ? i.operands : 1; - if (!i.suffix) - { - int op; - - if (i.tm.base_opcode == 0xf20f38f0) - { - /* We have to know the operand size for crc32. */ - as_bad (_("ambiguous memory operand size for `%s`"), - i.tm.name); - return 0; - } - - for (op = i.operands; --op >= 0;) - if (!i.tm.operand_types[op].bitfield.inoutportreg - && !i.tm.operand_types[op].bitfield.shiftcount) - { - if (i.types[op].bitfield.class != Reg) - continue; - if (i.types[op].bitfield.byte) - i.suffix = BYTE_MNEM_SUFFIX; - else if (i.types[op].bitfield.word) - i.suffix = WORD_MNEM_SUFFIX; - else if (i.types[op].bitfield.dword) - i.suffix = LONG_MNEM_SUFFIX; - else if (i.types[op].bitfield.qword) - i.suffix = QWORD_MNEM_SUFFIX; - else - continue; - break; - } - } + while (op--) + if (i.tm.operand_types[op].bitfield.instance == InstanceNone + || i.tm.operand_types[op].bitfield.instance == Accum) + { + if (i.types[op].bitfield.class != Reg) + continue; + if (i.types[op].bitfield.byte) + i.suffix = BYTE_MNEM_SUFFIX; + else if (i.types[op].bitfield.word) + i.suffix = WORD_MNEM_SUFFIX; + else if (i.types[op].bitfield.dword) + i.suffix = LONG_MNEM_SUFFIX; + else if (i.types[op].bitfield.qword) + i.suffix = QWORD_MNEM_SUFFIX; + else + continue; + break; + } } else if (i.suffix == BYTE_MNEM_SUFFIX) { @@ -6358,30 +6387,31 @@ process_suffix (void) else abort (); } - else if (i.tm.opcode_modifier.defaultsize - && !i.suffix - /* exclude fldenv/frstor/fsave/fstenv */ - && i.tm.opcode_modifier.no_ssuf) + else if (i.tm.opcode_modifier.defaultsize && !i.suffix) { - if (stackop_size == LONG_MNEM_SUFFIX - && i.tm.base_opcode == 0xcf) + i.suffix = stackop_size; + if (stackop_size == LONG_MNEM_SUFFIX) { /* stackop_size is set to LONG_MNEM_SUFFIX for the .code16gcc directive to support 16-bit mode with 32-bit address. For IRET without a suffix, generate 16-bit IRET (opcode 0xcf) to return from an interrupt handler. */ - i.suffix = WORD_MNEM_SUFFIX; - as_warn (_("generating 16-bit `iret' for .code16gcc directive")); + if (i.tm.base_opcode == 0xcf) + { + i.suffix = WORD_MNEM_SUFFIX; + as_warn (_("generating 16-bit `iret' for .code16gcc directive")); + } + /* Warn about changed behavior for segment register push/pop. */ + else if ((i.tm.base_opcode | 1) == 0x07) + as_warn (_("generating 32-bit `%s', unlike earlier gas versions"), + i.tm.name); } - else - i.suffix = stackop_size; } - else if (intel_syntax - && !i.suffix - && (i.tm.operand_types[0].bitfield.jumpabsolute - || i.tm.opcode_modifier.jumpbyte - || i.tm.opcode_modifier.jumpintersegment + else if (!i.suffix + && (i.tm.opcode_modifier.jump == JUMP_ABSOLUTE + || i.tm.opcode_modifier.jump == JUMP_BYTE + || i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT || (i.tm.base_opcode == 0x0f01 /* [ls][gi]dt */ && i.tm.extension_opcode <= 3))) { @@ -6405,45 +6435,69 @@ process_suffix (void) } } - if (!i.suffix) + if (!i.suffix + && (!i.tm.opcode_modifier.defaultsize + /* Also cover lret/retf/iret in 64-bit mode. */ + || (flag_code == CODE_64BIT + && !i.tm.opcode_modifier.no_lsuf + && !i.tm.opcode_modifier.no_qsuf)) + && !i.tm.opcode_modifier.ignoresize + /* Accept FLDENV et al without suffix. */ + && (i.tm.opcode_modifier.no_ssuf || i.tm.opcode_modifier.floatmf)) { - if (!intel_syntax) + unsigned int suffixes; + + suffixes = !i.tm.opcode_modifier.no_bsuf; + if (!i.tm.opcode_modifier.no_wsuf) + suffixes |= 1 << 1; + if (!i.tm.opcode_modifier.no_lsuf) + suffixes |= 1 << 2; + if (!i.tm.opcode_modifier.no_ldsuf) + suffixes |= 1 << 3; + if (!i.tm.opcode_modifier.no_ssuf) + suffixes |= 1 << 4; + if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf) + suffixes |= 1 << 5; + + /* Are multiple suffixes allowed? */ + if (suffixes & (suffixes - 1)) { - if (i.tm.opcode_modifier.w) + if (intel_syntax + && (!i.tm.opcode_modifier.defaultsize + || operand_check == check_error)) { - as_bad (_("no instruction mnemonic suffix given and " - "no register operands; can't size instruction")); + as_bad (_("ambiguous operand size for `%s'"), i.tm.name); return 0; } - } - else - { - unsigned int suffixes; - - suffixes = !i.tm.opcode_modifier.no_bsuf; - if (!i.tm.opcode_modifier.no_wsuf) - suffixes |= 1 << 1; - if (!i.tm.opcode_modifier.no_lsuf) - suffixes |= 1 << 2; - if (!i.tm.opcode_modifier.no_ldsuf) - suffixes |= 1 << 3; - if (!i.tm.opcode_modifier.no_ssuf) - suffixes |= 1 << 4; - if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf) - suffixes |= 1 << 5; - - /* There are more than suffix matches. */ - if (i.tm.opcode_modifier.w - || ((suffixes & (suffixes - 1)) - && !i.tm.opcode_modifier.defaultsize - && !i.tm.opcode_modifier.ignoresize)) + if (operand_check == check_error) { - as_bad (_("ambiguous operand size for `%s'"), i.tm.name); + as_bad (_("no instruction mnemonic suffix given and " + "no register operands; can't size `%s'"), i.tm.name); return 0; } + if (operand_check == check_warning) + as_warn (_("%s; using default for `%s'"), + intel_syntax + ? _("ambiguous operand size") + : _("no instruction mnemonic suffix given and " + "no register operands"), + i.tm.name); + + if (i.tm.opcode_modifier.floatmf) + i.suffix = SHORT_MNEM_SUFFIX; + else if (flag_code == CODE_16BIT) + i.suffix = WORD_MNEM_SUFFIX; + else if (!i.tm.opcode_modifier.no_lsuf) + i.suffix = LONG_MNEM_SUFFIX; + else + i.suffix = QWORD_MNEM_SUFFIX; } } + if (!i.tm.opcode_modifier.modrm && i.reg_operands && i.tm.operands < 3) + i.short_form = (i.tm.operand_types[0].bitfield.class == Reg) + != (i.tm.operand_types[1].bitfield.class == Reg); + /* Change the opcode based on the operand size given by i.suffix. */ switch (i.suffix) { @@ -6460,7 +6514,7 @@ process_suffix (void) /* It's not a byte, select word/dword operation. */ if (i.tm.opcode_modifier.w) { - if (i.tm.opcode_modifier.shortform) + if (i.short_form) i.tm.base_opcode |= 8; else i.tm.base_opcode |= 1; @@ -6473,7 +6527,7 @@ process_suffix (void) if (i.reg_operands > 0 && i.types[0].bitfield.class == Reg && i.tm.opcode_modifier.addrprefixopreg - && (i.tm.opcode_modifier.immext + && (i.tm.operand_types[0].bitfield.instance == Accum || i.operands == 1)) { /* The address size override prefix changes the size of the @@ -6491,11 +6545,11 @@ process_suffix (void) && !is_any_vex_encoding (&i.tm) && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT) || (flag_code == CODE_64BIT - && i.tm.opcode_modifier.jumpbyte))) + && i.tm.opcode_modifier.jump == JUMP_BYTE))) { unsigned int prefix = DATA_PREFIX_OPCODE; - if (i.tm.opcode_modifier.jumpbyte) /* jcxz, loop */ + if (i.tm.opcode_modifier.jump == JUMP_BYTE) /* jcxz, loop */ prefix = ADDR_PREFIX_OPCODE; if (!add_prefix (prefix)) @@ -6511,8 +6565,10 @@ process_suffix (void) && ! (i.operands == 2 && i.tm.base_opcode == 0x90 && i.tm.extension_opcode == None - && i.types[0].bitfield.acc && i.types[0].bitfield.qword - && i.types[1].bitfield.acc && i.types[1].bitfield.qword)) + && i.types[0].bitfield.instance == Accum + && i.types[0].bitfield.qword + && i.types[1].bitfield.instance == Accum + && i.types[1].bitfield.qword)) i.rex |= REX_W; break; @@ -6521,7 +6577,7 @@ process_suffix (void) if (i.reg_operands != 0 && i.operands > 1 && i.tm.opcode_modifier.addrprefixopreg - && !i.tm.opcode_modifier.immext) + && i.tm.operand_types[0].bitfield.instance != Accum) { /* Check invalid register operand when the address size override prefix changes the size of register operands. */ @@ -6574,34 +6630,14 @@ check_byte_reg (void) continue; /* I/O port address operands are OK too. */ - if (i.tm.operand_types[op].bitfield.inoutportreg) + if (i.tm.operand_types[op].bitfield.instance == RegD + && i.tm.operand_types[op].bitfield.word) continue; - /* crc32 doesn't generate this warning. */ - if (i.tm.base_opcode == 0xf20f38f0) + /* crc32 only wants its source operand checked here. */ + if (i.tm.base_opcode == 0xf20f38f0 && op) continue; - if ((i.types[op].bitfield.word - || i.types[op].bitfield.dword - || i.types[op].bitfield.qword) - && i.op[op].regs->reg_num < 4 - /* Prohibit these changes in 64bit mode, since the lowering - would be more complicated. */ - && flag_code != CODE_64BIT) - { -#if REGISTER_WARNINGS - if (!quiet_warnings) - as_warn (_("using `%s%s' instead of `%s%s' due to `%c' suffix"), - register_prefix, - (i.op[op].regs + (i.types[op].bitfield.word - ? REGNAM_AL - REGNAM_AX - : REGNAM_AL - REGNAM_EAX))->reg_name, - register_prefix, - i.op[op].regs->reg_name, - i.suffix); -#endif - continue; - } /* Any other register is bad. */ if (i.types[op].bitfield.class == Reg || i.types[op].bitfield.class == RegMMX @@ -6635,7 +6671,7 @@ check_long_reg (void) them. (eg. movzb) */ else if (i.types[op].bitfield.byte && (i.tm.operand_types[op].bitfield.class == Reg - || i.tm.operand_types[op].bitfield.acc) + || i.tm.operand_types[op].bitfield.instance == Accum) && (i.tm.operand_types[op].bitfield.word || i.tm.operand_types[op].bitfield.dword)) { @@ -6646,37 +6682,27 @@ check_long_reg (void) i.suffix); return 0; } - /* Warn if the e prefix on a general reg is missing. */ - else if ((!quiet_warnings || flag_code == CODE_64BIT) - && i.types[op].bitfield.word + /* Error if the e prefix on a general reg is missing. */ + else if (i.types[op].bitfield.word && (i.tm.operand_types[op].bitfield.class == Reg - || i.tm.operand_types[op].bitfield.acc) + || i.tm.operand_types[op].bitfield.instance == Accum) && i.tm.operand_types[op].bitfield.dword) { - /* Prohibit these changes in the 64bit mode, since the - lowering is more complicated. */ - if (flag_code == CODE_64BIT) - { - as_bad (_("incorrect register `%s%s' used with `%c' suffix"), - register_prefix, i.op[op].regs->reg_name, - i.suffix); - return 0; - } -#if REGISTER_WARNINGS - as_warn (_("using `%s%s' instead of `%s%s' due to `%c' suffix"), - register_prefix, - (i.op[op].regs + REGNAM_EAX - REGNAM_AX)->reg_name, - register_prefix, i.op[op].regs->reg_name, i.suffix); -#endif + as_bad (_("incorrect register `%s%s' used with `%c' suffix"), + register_prefix, i.op[op].regs->reg_name, + i.suffix); + return 0; } /* Warn if the r prefix on a general reg is present. */ else if (i.types[op].bitfield.qword && (i.tm.operand_types[op].bitfield.class == Reg - || i.tm.operand_types[op].bitfield.acc) + || i.tm.operand_types[op].bitfield.instance == Accum) && i.tm.operand_types[op].bitfield.dword) { if (intel_syntax - && i.tm.opcode_modifier.toqword + && (i.tm.opcode_modifier.toqword + /* Also convert to QWORD for MOVSXD. */ + || i.tm.base_opcode == 0x63) && i.types[0].bitfield.class != RegSIMD) { /* Convert to QWORD. We want REX byte. */ @@ -6706,7 +6732,7 @@ check_qword_reg (void) them. (eg. movzb) */ else if (i.types[op].bitfield.byte && (i.tm.operand_types[op].bitfield.class == Reg - || i.tm.operand_types[op].bitfield.acc) + || i.tm.operand_types[op].bitfield.instance == Accum) && (i.tm.operand_types[op].bitfield.word || i.tm.operand_types[op].bitfield.dword)) { @@ -6721,7 +6747,7 @@ check_qword_reg (void) else if ((i.types[op].bitfield.word || i.types[op].bitfield.dword) && (i.tm.operand_types[op].bitfield.class == Reg - || i.tm.operand_types[op].bitfield.acc) + || i.tm.operand_types[op].bitfield.instance == Accum) && i.tm.operand_types[op].bitfield.qword) { /* Prohibit these changes in the 64bit mode, since the @@ -6756,7 +6782,7 @@ check_word_reg (void) them. (eg. movzb) */ else if (i.types[op].bitfield.byte && (i.tm.operand_types[op].bitfield.class == Reg - || i.tm.operand_types[op].bitfield.acc) + || i.tm.operand_types[op].bitfield.instance == Accum) && (i.tm.operand_types[op].bitfield.word || i.tm.operand_types[op].bitfield.dword)) { @@ -6767,29 +6793,17 @@ check_word_reg (void) i.suffix); return 0; } - /* Warn if the e or r prefix on a general reg is present. */ - else if ((!quiet_warnings || flag_code == CODE_64BIT) - && (i.types[op].bitfield.dword + /* Error if the e or r prefix on a general reg is present. */ + else if ((i.types[op].bitfield.dword || i.types[op].bitfield.qword) && (i.tm.operand_types[op].bitfield.class == Reg - || i.tm.operand_types[op].bitfield.acc) + || i.tm.operand_types[op].bitfield.instance == Accum) && i.tm.operand_types[op].bitfield.word) { - /* Prohibit these changes in the 64bit mode, since the - lowering is more complicated. */ - if (flag_code == CODE_64BIT) - { - as_bad (_("incorrect register `%s%s' used with `%c' suffix"), - register_prefix, i.op[op].regs->reg_name, - i.suffix); - return 0; - } -#if REGISTER_WARNINGS - as_warn (_("using `%s%s' instead of `%s%s' due to `%c' suffix"), - register_prefix, - (i.op[op].regs + REGNAM_AX - REGNAM_EAX)->reg_name, - register_prefix, i.op[op].regs->reg_name, i.suffix); -#endif + as_bad (_("incorrect register `%s%s' used with `%c' suffix"), + register_prefix, i.op[op].regs->reg_name, + i.suffix); + return 0; } return 1; } @@ -6897,14 +6911,14 @@ process_operands (void) && MAX_OPERANDS > dupl && operand_type_equal (&i.types[dest], ®xmm)); - if (i.tm.operand_types[0].bitfield.acc + if (i.tm.operand_types[0].bitfield.instance == Accum && i.tm.operand_types[0].bitfield.xmmword) { if (i.tm.opcode_modifier.vexsources == VEX3SOURCES) { /* Keep xmm0 for instructions with VEX prefix and 3 sources. */ - i.tm.operand_types[0].bitfield.acc = 0; + i.tm.operand_types[0].bitfield.instance = InstanceNone; i.tm.operand_types[0].bitfield.class = RegSIMD; goto duplicate; } @@ -6969,7 +6983,7 @@ duplicate: if (i.tm.opcode_modifier.immext) process_immext (); } - else if (i.tm.operand_types[0].bitfield.acc + else if (i.tm.operand_types[0].bitfield.instance == Accum && i.tm.operand_types[0].bitfield.xmmword) { unsigned int j; @@ -7065,7 +7079,7 @@ duplicate: on one of their operands, the default segment is ds. */ default_seg = &ds; } - else if (i.tm.opcode_modifier.shortform) + else if (i.short_form) { /* The register or float register operand is in operand 0 or 1. */ @@ -7216,9 +7230,11 @@ build_modrm_byte (void) gas_assert (i.imm_operands == 1 || (i.imm_operands == 0 && (i.tm.opcode_modifier.vexvvvv == VEXXDS - || i.types[0].bitfield.shiftcount))); + || (i.types[0].bitfield.instance == RegC + && i.types[0].bitfield.byte)))); if (operand_type_check (i.types[0], imm) - || i.types[0].bitfield.shiftcount) + || (i.types[0].bitfield.instance == RegC + && i.types[0].bitfield.byte)) source = 1; else source = 0; @@ -7682,8 +7698,8 @@ build_modrm_byte (void) for (op = 0; op < i.operands; op++) { if (i.types[op].bitfield.class == Reg - || i.types[op].bitfield.regbnd - || i.types[op].bitfield.regmask + || i.types[op].bitfield.class == RegBND + || i.types[op].bitfield.class == RegMask || i.types[op].bitfield.class == SReg || i.types[op].bitfield.class == RegCR || i.types[op].bitfield.class == RegDR @@ -7809,6 +7825,18 @@ build_modrm_byte (void) return default_seg; } +static unsigned int +flip_code16 (unsigned int code16) +{ + gas_assert (i.tm.operands == 1); + + return !(i.prefix[REX_PREFIX] & REX_W) + && (code16 ? i.tm.operand_types[0].bitfield.disp32 + || i.tm.operand_types[0].bitfield.disp32s + : i.tm.operand_types[0].bitfield.disp16) + ? CODE16 : 0; +} + static void output_branch (void) { @@ -7828,7 +7856,7 @@ output_branch (void) { prefix = 1; i.prefixes -= 1; - code16 ^= CODE16; + code16 ^= flip_code16(code16); } /* Pentium4 branch hints. */ if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */ @@ -7846,12 +7874,12 @@ output_branch (void) /* BND prefixed jump. */ if (i.prefix[BND_PREFIX] != 0) { - FRAG_APPEND_1_CHAR (i.prefix[BND_PREFIX]); - i.prefixes -= 1; + prefix++; + i.prefixes--; } - if (i.prefixes != 0 && !intel_syntax) - as_warn (_("skipping prefixes on this instruction")); + if (i.prefixes != 0) + as_warn (_("skipping prefixes on `%s'"), i.tm.name); /* It's always a symbol; End frag & setup for relax. Make sure there is enough room in this frag for the largest @@ -7866,6 +7894,8 @@ output_branch (void) if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE) *p++ = i.prefix[SEG_PREFIX]; + if (i.prefix[BND_PREFIX] != 0) + *p++ = BND_PREFIX_OPCODE; if (i.prefix[REX_PREFIX] != 0) *p++ = i.prefix[REX_PREFIX]; *p = i.tm.base_opcode; @@ -7941,7 +7971,7 @@ output_jump (void) fixS *fixP; bfd_reloc_code_real_type jump_reloc = i.reloc[0]; - if (i.tm.opcode_modifier.jumpbyte) + if (i.tm.opcode_modifier.jump == JUMP_BYTE) { /* This is a loop or jecxz type instruction. */ size = 1; @@ -7970,7 +8000,7 @@ output_jump (void) { FRAG_APPEND_1_CHAR (DATA_PREFIX_OPCODE); i.prefixes -= 1; - code16 ^= CODE16; + code16 ^= flip_code16(code16); } size = 4; @@ -7978,21 +8008,21 @@ output_jump (void) size = 2; } - if (i.prefix[REX_PREFIX] != 0) + /* BND prefixed jump. */ + if (i.prefix[BND_PREFIX] != 0) { - FRAG_APPEND_1_CHAR (i.prefix[REX_PREFIX]); + FRAG_APPEND_1_CHAR (i.prefix[BND_PREFIX]); i.prefixes -= 1; } - /* BND prefixed jump. */ - if (i.prefix[BND_PREFIX] != 0) + if (i.prefix[REX_PREFIX] != 0) { - FRAG_APPEND_1_CHAR (i.prefix[BND_PREFIX]); + FRAG_APPEND_1_CHAR (i.prefix[REX_PREFIX]); i.prefixes -= 1; } - if (i.prefixes != 0 && !intel_syntax) - as_warn (_("skipping prefixes on this instruction")); + if (i.prefixes != 0) + as_warn (_("skipping prefixes on `%s'"), i.tm.name); p = frag_more (i.tm.opcode_length + size); switch (i.tm.opcode_length) @@ -8045,18 +8075,15 @@ output_interseg_jump (void) i.prefixes -= 1; code16 ^= CODE16; } - if (i.prefix[REX_PREFIX] != 0) - { - prefix++; - i.prefixes -= 1; - } + + gas_assert (!i.prefix[REX_PREFIX]); size = 4; if (code16) size = 2; - if (i.prefixes != 0 && !intel_syntax) - as_warn (_("skipping prefixes on this instruction")); + if (i.prefixes != 0) + as_warn (_("skipping prefixes on `%s'"), i.tm.name); /* 1 opcode; 2 segment; offset */ p = frag_more (prefix + 1 + 2 + size); @@ -8230,32 +8257,227 @@ encoding_length (const fragS *start_frag, offsetT start_off, return len - start_off + (frag_now_ptr - frag_now->fr_literal); } -static void -output_insn (void) +/* Return 1 for test, and, cmp, add, sub, inc and dec which may + be macro-fused with conditional jumps. */ + +static int +maybe_fused_with_jcc_p (void) { - fragS *insn_start_frag; - offsetT insn_start_off; + /* No RIP address. */ + if (i.base_reg && i.base_reg->reg_num == RegIP) + return 0; -#if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) - if (IS_ELF && x86_used_note) + /* No VEX/EVEX encoding. */ + if (is_any_vex_encoding (&i.tm)) + return 0; + + /* and, add, sub with destination register. */ + if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25) + || i.tm.base_opcode <= 5 + || (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d) + || ((i.tm.base_opcode | 3) == 0x83 + && ((i.tm.extension_opcode | 1) == 0x5 + || i.tm.extension_opcode == 0x0))) + return (i.types[1].bitfield.class == Reg + || i.types[1].bitfield.instance == Accum); + + /* test, cmp with any register. */ + if ((i.tm.base_opcode | 1) == 0x85 + || (i.tm.base_opcode | 1) == 0xa9 + || ((i.tm.base_opcode | 1) == 0xf7 + && i.tm.extension_opcode == 0) + || (i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d) + || ((i.tm.base_opcode | 3) == 0x83 + && (i.tm.extension_opcode == 0x7))) + return (i.types[0].bitfield.class == Reg + || i.types[0].bitfield.instance == Accum + || i.types[1].bitfield.class == Reg + || i.types[1].bitfield.instance == Accum); + + /* inc, dec with any register. */ + if ((i.tm.cpu_flags.bitfield.cpuno64 + && (i.tm.base_opcode | 0xf) == 0x4f) + || ((i.tm.base_opcode | 1) == 0xff + && i.tm.extension_opcode <= 0x1)) + return (i.types[0].bitfield.class == Reg + || i.types[0].bitfield.instance == Accum); + + return 0; +} + +/* Return 1 if a FUSED_JCC_PADDING frag should be generated. */ + +static int +add_fused_jcc_padding_frag_p (void) +{ + /* NB: Don't work with COND_JUMP86 without i386. */ + if (!align_branch_power + || now_seg == absolute_section + || !cpu_arch_flags.bitfield.cpui386 + || !(align_branch & align_branch_fused_bit)) + return 0; + + if (maybe_fused_with_jcc_p ()) { - if (i.tm.cpu_flags.bitfield.cpucmov) - x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_CMOV; - if (i.tm.cpu_flags.bitfield.cpusse) - x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE; - if (i.tm.cpu_flags.bitfield.cpusse2) - x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE2; - if (i.tm.cpu_flags.bitfield.cpusse3) - x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE3; - if (i.tm.cpu_flags.bitfield.cpussse3) - x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSSE3; - if (i.tm.cpu_flags.bitfield.cpusse4_1) - x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE4_1; - if (i.tm.cpu_flags.bitfield.cpusse4_2) - x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE4_2; - if (i.tm.cpu_flags.bitfield.cpuavx) - x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX; - if (i.tm.cpu_flags.bitfield.cpuavx2) + if (last_insn.kind == last_insn_other + || last_insn.seg != now_seg) + return 1; + if (flag_debug) + as_warn_where (last_insn.file, last_insn.line, + _("`%s` skips -malign-branch-boundary on `%s`"), + last_insn.name, i.tm.name); + } + + return 0; +} + +/* Return 1 if a BRANCH_PREFIX frag should be generated. */ + +static int +add_branch_prefix_frag_p (void) +{ + /* NB: Don't work with COND_JUMP86 without i386. Don't add prefix + to PadLock instructions since they include prefixes in opcode. */ + if (!align_branch_power + || !align_branch_prefix_size + || now_seg == absolute_section + || i.tm.cpu_flags.bitfield.cpupadlock + || !cpu_arch_flags.bitfield.cpui386) + return 0; + + /* Don't add prefix if it is a prefix or there is no operand in case + that segment prefix is special. */ + if (!i.operands || i.tm.opcode_modifier.isprefix) + return 0; + + if (last_insn.kind == last_insn_other + || last_insn.seg != now_seg) + return 1; + + if (flag_debug) + as_warn_where (last_insn.file, last_insn.line, + _("`%s` skips -malign-branch-boundary on `%s`"), + last_insn.name, i.tm.name); + + return 0; +} + +/* Return 1 if a BRANCH_PADDING frag should be generated. */ + +static int +add_branch_padding_frag_p (enum align_branch_kind *branch_p) +{ + int add_padding; + + /* NB: Don't work with COND_JUMP86 without i386. */ + if (!align_branch_power + || now_seg == absolute_section + || !cpu_arch_flags.bitfield.cpui386) + return 0; + + add_padding = 0; + + /* Check for jcc and direct jmp. */ + if (i.tm.opcode_modifier.jump == JUMP) + { + if (i.tm.base_opcode == JUMP_PC_RELATIVE) + { + *branch_p = align_branch_jmp; + add_padding = align_branch & align_branch_jmp_bit; + } + else + { + *branch_p = align_branch_jcc; + if ((align_branch & align_branch_jcc_bit)) + add_padding = 1; + } + } + else if (is_any_vex_encoding (&i.tm)) + return 0; + else if ((i.tm.base_opcode | 1) == 0xc3) + { + /* Near ret. */ + *branch_p = align_branch_ret; + if ((align_branch & align_branch_ret_bit)) + add_padding = 1; + } + else + { + /* Check for indirect jmp, direct and indirect calls. */ + if (i.tm.base_opcode == 0xe8) + { + /* Direct call. */ + *branch_p = align_branch_call; + if ((align_branch & align_branch_call_bit)) + add_padding = 1; + } + else if (i.tm.base_opcode == 0xff + && (i.tm.extension_opcode == 2 + || i.tm.extension_opcode == 4)) + { + /* Indirect call and jmp. */ + *branch_p = align_branch_indirect; + if ((align_branch & align_branch_indirect_bit)) + add_padding = 1; + } + + if (add_padding + && i.disp_operands + && tls_get_addr + && (i.op[0].disps->X_op == O_symbol + || (i.op[0].disps->X_op == O_subtract + && i.op[0].disps->X_op_symbol == GOT_symbol))) + { + symbolS *s = i.op[0].disps->X_add_symbol; + /* No padding to call to global or undefined tls_get_addr. */ + if ((S_IS_EXTERNAL (s) || !S_IS_DEFINED (s)) + && strcmp (S_GET_NAME (s), tls_get_addr) == 0) + return 0; + } + } + + if (add_padding + && last_insn.kind != last_insn_other + && last_insn.seg == now_seg) + { + if (flag_debug) + as_warn_where (last_insn.file, last_insn.line, + _("`%s` skips -malign-branch-boundary on `%s`"), + last_insn.name, i.tm.name); + return 0; + } + + return add_padding; +} + +static void +output_insn (void) +{ + fragS *insn_start_frag; + offsetT insn_start_off; + fragS *fragP = NULL; + enum align_branch_kind branch = align_branch_none; + +#if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) + if (IS_ELF && x86_used_note) + { + if (i.tm.cpu_flags.bitfield.cpucmov) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_CMOV; + if (i.tm.cpu_flags.bitfield.cpusse) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE; + if (i.tm.cpu_flags.bitfield.cpusse2) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE2; + if (i.tm.cpu_flags.bitfield.cpusse3) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE3; + if (i.tm.cpu_flags.bitfield.cpussse3) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSSE3; + if (i.tm.cpu_flags.bitfield.cpusse4_1) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE4_1; + if (i.tm.cpu_flags.bitfield.cpusse4_2) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE4_2; + if (i.tm.cpu_flags.bitfield.cpuavx) + x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX; + if (i.tm.cpu_flags.bitfield.cpuavx2) x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX2; if (i.tm.cpu_flags.bitfield.cpufma) x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_FMA; @@ -8296,15 +8518,9 @@ output_insn (void) || i.tm.cpu_flags.bitfield.cpu687 || i.tm.cpu_flags.bitfield.cpufisttp) x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87; - /* Don't set GNU_PROPERTY_X86_FEATURE_2_MMX for prefetchtXXX nor - Xfence instructions. */ - if (i.tm.base_opcode != 0xf18 - && i.tm.base_opcode != 0xf0d - && i.tm.base_opcode != 0xfaef8 - && (i.has_regmmx - || i.tm.cpu_flags.bitfield.cpummx - || i.tm.cpu_flags.bitfield.cpua3dnow - || i.tm.cpu_flags.bitfield.cpua3dnowa)) + if (i.has_regmmx + || i.tm.base_opcode == 0xf77 /* emms */ + || i.tm.base_opcode == 0xf0e /* femms */) x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MMX; if (i.has_regxmm) x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM; @@ -8331,13 +8547,38 @@ output_insn (void) insn_start_frag = frag_now; insn_start_off = frag_now_fix (); + if (add_branch_padding_frag_p (&branch)) + { + char *p; + /* Branch can be 8 bytes. Leave some room for prefixes. */ + unsigned int max_branch_padding_size = 14; + + /* Align section to boundary. */ + record_alignment (now_seg, align_branch_power); + + /* Make room for padding. */ + frag_grow (max_branch_padding_size); + + /* Start of the padding. */ + p = frag_more (0); + + fragP = frag_now; + + frag_var (rs_machine_dependent, max_branch_padding_size, 0, + ENCODE_RELAX_STATE (BRANCH_PADDING, 0), + NULL, 0, p); + + fragP->tc_frag_data.branch_type = branch; + fragP->tc_frag_data.max_bytes = max_branch_padding_size; + } + /* Output jumps. */ - if (i.tm.opcode_modifier.jump) + if (i.tm.opcode_modifier.jump == JUMP) output_branch (); - else if (i.tm.opcode_modifier.jumpbyte - || i.tm.opcode_modifier.jumpdword) + else if (i.tm.opcode_modifier.jump == JUMP_BYTE + || i.tm.opcode_modifier.jump == JUMP_DWORD) output_jump (); - else if (i.tm.opcode_modifier.jumpintersegment) + else if (i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT) output_interseg_jump (); else { @@ -8369,6 +8610,41 @@ output_insn (void) i.prefix[LOCK_PREFIX] = 0; } + if (branch) + /* Skip if this is a branch. */ + ; + else if (add_fused_jcc_padding_frag_p ()) + { + /* Make room for padding. */ + frag_grow (MAX_FUSED_JCC_PADDING_SIZE); + p = frag_more (0); + + fragP = frag_now; + + frag_var (rs_machine_dependent, MAX_FUSED_JCC_PADDING_SIZE, 0, + ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0), + NULL, 0, p); + + fragP->tc_frag_data.branch_type = align_branch_fused; + fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE; + } + else if (add_branch_prefix_frag_p ()) + { + unsigned int max_prefix_size = align_branch_prefix_size; + + /* Make room for padding. */ + frag_grow (max_prefix_size); + p = frag_more (0); + + fragP = frag_now; + + frag_var (rs_machine_dependent, max_prefix_size, 0, + ENCODE_RELAX_STATE (BRANCH_PREFIX, 0), + NULL, 0, p); + + fragP->tc_frag_data.max_bytes = max_prefix_size; + } + /* Since the VEX/EVEX prefix contains the implicit prefix, we don't need the explicit prefix. */ if (!i.tm.opcode_modifier.vex && !i.tm.opcode_modifier.evex) @@ -8407,10 +8683,13 @@ output_insn (void) #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF) /* For x32, add a dummy REX_OPCODE prefix for mov/add with R_X86_64_GOTTPOFF relocation so that linker can safely - perform IE->LE optimization. */ + perform IE->LE optimization. A dummy REX_OPCODE prefix + is also needed for lea with R_X86_64_GOTPC32_TLSDESC + relocation for GDesc -> IE/LE optimization. */ if (x86_elf_abi == X86_64_X32_ABI && i.operands == 2 - && i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF + && (i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF + || i.reloc[0] == BFD_RELOC_X86_64_GOTPC32_TLSDESC) && i.prefix[REX_PREFIX] == 0) add_prefix (REX_OPCODE); #endif @@ -8516,9 +8795,105 @@ output_insn (void) if (j > 15) as_warn (_("instruction length of %u bytes exceeds the limit of 15"), j); + else if (fragP) + { + /* NB: Don't add prefix with GOTPC relocation since + output_disp() above depends on the fixed encoding + length. Can't add prefix with TLS relocation since + it breaks TLS linker optimization. */ + unsigned int max = i.has_gotpc_tls_reloc ? 0 : 15 - j; + /* Prefix count on the current instruction. */ + unsigned int count = i.vex.length; + unsigned int k; + for (k = 0; k < ARRAY_SIZE (i.prefix); k++) + /* REX byte is encoded in VEX/EVEX prefix. */ + if (i.prefix[k] && (k != REX_PREFIX || !i.vex.length)) + count++; + + /* Count prefixes for extended opcode maps. */ + if (!i.vex.length) + switch (i.tm.opcode_length) + { + case 3: + if (((i.tm.base_opcode >> 16) & 0xff) == 0xf) + { + count++; + switch ((i.tm.base_opcode >> 8) & 0xff) + { + case 0x38: + case 0x3a: + count++; + break; + default: + break; + } + } + break; + case 2: + if (((i.tm.base_opcode >> 8) & 0xff) == 0xf) + count++; + break; + case 1: + break; + default: + abort (); + } + + if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) + == BRANCH_PREFIX) + { + /* Set the maximum prefix size in BRANCH_PREFIX + frag. */ + if (fragP->tc_frag_data.max_bytes > max) + fragP->tc_frag_data.max_bytes = max; + if (fragP->tc_frag_data.max_bytes > count) + fragP->tc_frag_data.max_bytes -= count; + else + fragP->tc_frag_data.max_bytes = 0; + } + else + { + /* Remember the maximum prefix size in FUSED_JCC_PADDING + frag. */ + unsigned int max_prefix_size; + if (align_branch_prefix_size > max) + max_prefix_size = max; + else + max_prefix_size = align_branch_prefix_size; + if (max_prefix_size > count) + fragP->tc_frag_data.max_prefix_length + = max_prefix_size - count; + } + + /* Use existing segment prefix if possible. Use CS + segment prefix in 64-bit mode. In 32-bit mode, use SS + segment prefix with ESP/EBP base register and use DS + segment prefix without ESP/EBP base register. */ + if (i.prefix[SEG_PREFIX]) + fragP->tc_frag_data.default_prefix = i.prefix[SEG_PREFIX]; + else if (flag_code == CODE_64BIT) + fragP->tc_frag_data.default_prefix = CS_PREFIX_OPCODE; + else if (i.base_reg + && (i.base_reg->reg_num == 4 + || i.base_reg->reg_num == 5)) + fragP->tc_frag_data.default_prefix = SS_PREFIX_OPCODE; + else + fragP->tc_frag_data.default_prefix = DS_PREFIX_OPCODE; + } } } + /* NB: Don't work with COND_JUMP86 without i386. */ + if (align_branch_power + && now_seg != absolute_section + && cpu_arch_flags.bitfield.cpui386) + { + /* Terminate each frag so that we can add prefix and check for + fused jcc. */ + frag_wane (frag_now); + frag_new (0); + } + #ifdef DEBUG386 if (flag_debug) { @@ -8628,6 +9003,7 @@ output_disp (fragS *insn_start_frag, offsetT insn_start_off) if (!object_64bit) { reloc_type = BFD_RELOC_386_GOTPC; + i.has_gotpc_tls_reloc = TRUE; i.op[n].imms->X_add_number += encoding_length (insn_start_frag, insn_start_off, p); } @@ -8639,6 +9015,27 @@ output_disp (fragS *insn_start_frag, offsetT insn_start_off) insn, and that is taken care of in other code. */ reloc_type = BFD_RELOC_X86_64_GOTPC32; } + else if (align_branch_power) + { + switch (reloc_type) + { + case BFD_RELOC_386_TLS_GD: + case BFD_RELOC_386_TLS_LDM: + case BFD_RELOC_386_TLS_IE: + case BFD_RELOC_386_TLS_IE_32: + case BFD_RELOC_386_TLS_GOTIE: + case BFD_RELOC_386_TLS_GOTDESC: + case BFD_RELOC_386_TLS_DESC_CALL: + case BFD_RELOC_X86_64_TLSGD: + case BFD_RELOC_X86_64_TLSLD: + case BFD_RELOC_X86_64_GOTTPOFF: + case BFD_RELOC_X86_64_GOTPC32_TLSDESC: + case BFD_RELOC_X86_64_TLSDESC_CALL: + i.has_gotpc_tls_reloc = TRUE; + default: + break; + } + } fixP = fix_new_exp (frag_now, p - frag_now->fr_literal, size, i.op[n].disps, pcrel, reloc_type); @@ -8654,13 +9051,14 @@ output_disp (fragS *insn_start_frag, offsetT insn_start_off) && i.rm.regmem == 5)) && (i.rm.mode == 2 || (i.rm.mode == 0 && i.rm.regmem == 5)) + && !is_any_vex_encoding(&i.tm) && ((i.operands == 1 && i.tm.base_opcode == 0xff && (i.rm.reg == 2 || i.rm.reg == 4)) || (i.operands == 2 && (i.tm.base_opcode == 0x8b || i.tm.base_opcode == 0x85 - || (i.tm.base_opcode & 0xc7) == 0x03)))) + || (i.tm.base_opcode & ~0x38) == 0x03)))) { if (object_64bit) { @@ -8780,6 +9178,7 @@ output_imm (fragS *insn_start_frag, offsetT insn_start_off) reloc_type = BFD_RELOC_X86_64_GOTPC32; else if (size == 8) reloc_type = BFD_RELOC_X86_64_GOTPC64; + i.has_gotpc_tls_reloc = TRUE; i.op[n].imms->X_add_number += encoding_length (insn_start_frag, insn_start_off, p); } @@ -9238,7 +9637,7 @@ check_VecOperations (char *op_string, char *op_end) else if ((mask = parse_register (op_string, &end_op)) != NULL) { /* k0 can't be used for write mask. */ - if (!mask->reg_type.bitfield.regmask || mask->reg_num == 0) + if (mask->reg_type.bitfield.class != RegMask || !mask->reg_num) { as_bad (_("`%s%s' can't be used for write mask"), register_prefix, mask->reg_name); @@ -9516,11 +9915,12 @@ i386_displacement (char *disp_start, char *disp_end) } operand_type_set (&bigdisp, 0); - if ((i.types[this_operand].bitfield.jumpabsolute) - || (!current_templates->start->opcode_modifier.jump - && !current_templates->start->opcode_modifier.jumpdword)) + if (i.jumpabsolute + || i.types[this_operand].bitfield.baseindex + || (current_templates->start->opcode_modifier.jump != JUMP + && current_templates->start->opcode_modifier.jump != JUMP_DWORD)) { - bigdisp.bitfield.disp32 = 1; + i386_addressing_mode (); override = (i.prefix[ADDR_PREFIX] != 0); if (flag_code == CODE_64BIT) { @@ -9529,27 +9929,47 @@ i386_displacement (char *disp_start, char *disp_end) bigdisp.bitfield.disp32s = 1; bigdisp.bitfield.disp64 = 1; } + else + bigdisp.bitfield.disp32 = 1; } else if ((flag_code == CODE_16BIT) ^ override) - { - bigdisp.bitfield.disp32 = 0; bigdisp.bitfield.disp16 = 1; - } + else + bigdisp.bitfield.disp32 = 1; } else { - /* For PC-relative branches, the width of the displacement - is dependent upon data size, not address size. */ + /* For PC-relative branches, the width of the displacement may be + dependent upon data size, but is never dependent upon address size. + Also make sure to not unintentionally match against a non-PC-relative + branch template. */ + static templates aux_templates; + const insn_template *t = current_templates->start; + bfd_boolean has_intel64 = FALSE; + + aux_templates.start = t; + while (++t < current_templates->end) + { + if (t->opcode_modifier.jump + != current_templates->start->opcode_modifier.jump) + break; + if ((t->opcode_modifier.isa64 >= INTEL64)) + has_intel64 = TRUE; + } + if (t < current_templates->end) + { + aux_templates.end = t; + current_templates = &aux_templates; + } + override = (i.prefix[DATA_PREFIX] != 0); if (flag_code == CODE_64BIT) { - if (override || i.suffix == WORD_MNEM_SUFFIX) + if ((override || i.suffix == WORD_MNEM_SUFFIX) + && (!intel64 || !has_intel64)) bigdisp.bitfield.disp16 = 1; else - { - bigdisp.bitfield.disp32 = 1; - bigdisp.bitfield.disp32s = 1; - } + bigdisp.bitfield.disp32s = 1; } else { @@ -9722,6 +10142,11 @@ i386_finalize_displacement (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp, } #endif + if (current_templates->start->opcode_modifier.jump == JUMP_BYTE + /* Constants get taken care of by optimize_disp(). */ + && exp->X_op != O_constant) + i.types[this_operand].bitfield.disp8 = 1; + /* Check if this is a displacement only operand. */ bigdisp = i.types[this_operand]; bigdisp.bitfield.disp8 = 0; @@ -9821,16 +10246,16 @@ i386_index_check (const char *operand_string) if (current_templates->start->opcode_modifier.repprefixok) { - i386_operand_type type = current_templates->end[-1].operand_types[0]; + int es_op = current_templates->end[-1].opcode_modifier.isstring + - IS_STRING_ES_OP0; + int op = 0; - if (!type.bitfield.baseindex + if (!current_templates->end[-1].operand_types[0].bitfield.baseindex || ((!i.mem_operands != !intel_syntax) && current_templates->end[-1].operand_types[1] .bitfield.baseindex)) - type = current_templates->end[-1].operand_types[1]; - expected_reg = hash_find (reg_hash, - di_si[addr_mode][type.bitfield.esseg]); - + op = 1; + expected_reg = hash_find (reg_hash, di_si[addr_mode][op == es_op]); } else expected_reg = hash_find (reg_hash, bx[addr_mode]); @@ -10047,7 +10472,7 @@ i386_att_operand (char *operand_string) ++op_string; if (is_space_char (*op_string)) ++op_string; - i.types[this_operand].bitfield.jumpabsolute = 1; + i.jumpabsolute = TRUE; } /* Check if operand is a register. */ @@ -10103,7 +10528,7 @@ i386_att_operand (char *operand_string) ++op_string; if (is_space_char (*op_string)) ++op_string; - i.types[this_operand].bitfield.jumpabsolute = 1; + i.jumpabsolute = TRUE; } goto do_memory_reference; } @@ -10137,7 +10562,7 @@ i386_att_operand (char *operand_string) else if (*op_string == IMMEDIATE_PREFIX) { ++op_string; - if (i.types[this_operand].bitfield.jumpabsolute) + if (i.jumpabsolute) { as_bad (_("immediate operand illegal with absolute jump")); return 0; @@ -10329,7 +10754,8 @@ i386_att_operand (char *operand_string) /* Special case for (%dx) while doing input/output op. */ if (i.base_reg - && i.base_reg->reg_type.bitfield.inoutportreg + && i.base_reg->reg_type.bitfield.instance == RegD + && i.base_reg->reg_type.bitfield.word && i.index_reg == 0 && i.log2_scale_factor == 0 && i.seg[i.mem_operands] == 0 @@ -10404,6 +10830,362 @@ elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var) } #endif +/* Return the next non-empty frag. */ + +static fragS * +i386_next_non_empty_frag (fragS *fragP) +{ + /* There may be a frag with a ".fill 0" when there is no room in + the current frag for frag_grow in output_insn. */ + for (fragP = fragP->fr_next; + (fragP != NULL + && fragP->fr_type == rs_fill + && fragP->fr_fix == 0); + fragP = fragP->fr_next) + ; + return fragP; +} + +/* Return the next jcc frag after BRANCH_PADDING. */ + +static fragS * +i386_next_jcc_frag (fragS *fragP) +{ + if (!fragP) + return NULL; + + if (fragP->fr_type == rs_machine_dependent + && (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) + == BRANCH_PADDING)) + { + fragP = i386_next_non_empty_frag (fragP); + if (fragP->fr_type != rs_machine_dependent) + return NULL; + if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == COND_JUMP) + return fragP; + } + + return NULL; +} + +/* Classify BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags. */ + +static void +i386_classify_machine_dependent_frag (fragS *fragP) +{ + fragS *cmp_fragP; + fragS *pad_fragP; + fragS *branch_fragP; + fragS *next_fragP; + unsigned int max_prefix_length; + + if (fragP->tc_frag_data.classified) + return; + + /* First scan for BRANCH_PADDING and FUSED_JCC_PADDING. Convert + FUSED_JCC_PADDING and merge BRANCH_PADDING. */ + for (next_fragP = fragP; + next_fragP != NULL; + next_fragP = next_fragP->fr_next) + { + next_fragP->tc_frag_data.classified = 1; + if (next_fragP->fr_type == rs_machine_dependent) + switch (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)) + { + case BRANCH_PADDING: + /* The BRANCH_PADDING frag must be followed by a branch + frag. */ + branch_fragP = i386_next_non_empty_frag (next_fragP); + next_fragP->tc_frag_data.u.branch_fragP = branch_fragP; + break; + case FUSED_JCC_PADDING: + /* Check if this is a fused jcc: + FUSED_JCC_PADDING + CMP like instruction + BRANCH_PADDING + COND_JUMP + */ + cmp_fragP = i386_next_non_empty_frag (next_fragP); + pad_fragP = i386_next_non_empty_frag (cmp_fragP); + branch_fragP = i386_next_jcc_frag (pad_fragP); + if (branch_fragP) + { + /* The BRANCH_PADDING frag is merged with the + FUSED_JCC_PADDING frag. */ + next_fragP->tc_frag_data.u.branch_fragP = branch_fragP; + /* CMP like instruction size. */ + next_fragP->tc_frag_data.cmp_size = cmp_fragP->fr_fix; + frag_wane (pad_fragP); + /* Skip to branch_fragP. */ + next_fragP = branch_fragP; + } + else if (next_fragP->tc_frag_data.max_prefix_length) + { + /* Turn FUSED_JCC_PADDING into BRANCH_PREFIX if it isn't + a fused jcc. */ + next_fragP->fr_subtype + = ENCODE_RELAX_STATE (BRANCH_PREFIX, 0); + next_fragP->tc_frag_data.max_bytes + = next_fragP->tc_frag_data.max_prefix_length; + /* This will be updated in the BRANCH_PREFIX scan. */ + next_fragP->tc_frag_data.max_prefix_length = 0; + } + else + frag_wane (next_fragP); + break; + } + } + + /* Stop if there is no BRANCH_PREFIX. */ + if (!align_branch_prefix_size) + return; + + /* Scan for BRANCH_PREFIX. */ + for (; fragP != NULL; fragP = fragP->fr_next) + { + if (fragP->fr_type != rs_machine_dependent + || (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) + != BRANCH_PREFIX)) + continue; + + /* Count all BRANCH_PREFIX frags before BRANCH_PADDING and + COND_JUMP_PREFIX. */ + max_prefix_length = 0; + for (next_fragP = fragP; + next_fragP != NULL; + next_fragP = next_fragP->fr_next) + { + if (next_fragP->fr_type == rs_fill) + /* Skip rs_fill frags. */ + continue; + else if (next_fragP->fr_type != rs_machine_dependent) + /* Stop for all other frags. */ + break; + + /* rs_machine_dependent frags. */ + if (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype) + == BRANCH_PREFIX) + { + /* Count BRANCH_PREFIX frags. */ + if (max_prefix_length >= MAX_FUSED_JCC_PADDING_SIZE) + { + max_prefix_length = MAX_FUSED_JCC_PADDING_SIZE; + frag_wane (next_fragP); + } + else + max_prefix_length + += next_fragP->tc_frag_data.max_bytes; + } + else if ((TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype) + == BRANCH_PADDING) + || (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype) + == FUSED_JCC_PADDING)) + { + /* Stop at BRANCH_PADDING and FUSED_JCC_PADDING. */ + fragP->tc_frag_data.u.padding_fragP = next_fragP; + break; + } + else + /* Stop for other rs_machine_dependent frags. */ + break; + } + + fragP->tc_frag_data.max_prefix_length = max_prefix_length; + + /* Skip to the next frag. */ + fragP = next_fragP; + } +} + +/* Compute padding size for + + FUSED_JCC_PADDING + CMP like instruction + BRANCH_PADDING + COND_JUMP/UNCOND_JUMP + + or + + BRANCH_PADDING + COND_JUMP/UNCOND_JUMP + */ + +static int +i386_branch_padding_size (fragS *fragP, offsetT address) +{ + unsigned int offset, size, padding_size; + fragS *branch_fragP = fragP->tc_frag_data.u.branch_fragP; + + /* The start address of the BRANCH_PADDING or FUSED_JCC_PADDING frag. */ + if (!address) + address = fragP->fr_address; + address += fragP->fr_fix; + + /* CMP like instrunction size. */ + size = fragP->tc_frag_data.cmp_size; + + /* The base size of the branch frag. */ + size += branch_fragP->fr_fix; + + /* Add opcode and displacement bytes for the rs_machine_dependent + branch frag. */ + if (branch_fragP->fr_type == rs_machine_dependent) + size += md_relax_table[branch_fragP->fr_subtype].rlx_length; + + /* Check if branch is within boundary and doesn't end at the last + byte. */ + offset = address & ((1U << align_branch_power) - 1); + if ((offset + size) >= (1U << align_branch_power)) + /* Padding needed to avoid crossing boundary. */ + padding_size = (1U << align_branch_power) - offset; + else + /* No padding needed. */ + padding_size = 0; + + /* The return value may be saved in tc_frag_data.length which is + unsigned byte. */ + if (!fits_in_unsigned_byte (padding_size)) + abort (); + + return padding_size; +} + +/* i386_generic_table_relax_frag() + + Handle BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags to + grow/shrink padding to align branch frags. Hand others to + relax_frag(). */ + +long +i386_generic_table_relax_frag (segT segment, fragS *fragP, long stretch) +{ + if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING + || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING) + { + long padding_size = i386_branch_padding_size (fragP, 0); + long grow = padding_size - fragP->tc_frag_data.length; + + /* When the BRANCH_PREFIX frag is used, the computed address + must match the actual address and there should be no padding. */ + if (fragP->tc_frag_data.padding_address + && (fragP->tc_frag_data.padding_address != fragP->fr_address + || padding_size)) + abort (); + + /* Update the padding size. */ + if (grow) + fragP->tc_frag_data.length = padding_size; + + return grow; + } + else if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX) + { + fragS *padding_fragP, *next_fragP; + long padding_size, left_size, last_size; + + padding_fragP = fragP->tc_frag_data.u.padding_fragP; + if (!padding_fragP) + /* Use the padding set by the leading BRANCH_PREFIX frag. */ + return (fragP->tc_frag_data.length + - fragP->tc_frag_data.last_length); + + /* Compute the relative address of the padding frag in the very + first time where the BRANCH_PREFIX frag sizes are zero. */ + if (!fragP->tc_frag_data.padding_address) + fragP->tc_frag_data.padding_address + = padding_fragP->fr_address - (fragP->fr_address - stretch); + + /* First update the last length from the previous interation. */ + left_size = fragP->tc_frag_data.prefix_length; + for (next_fragP = fragP; + next_fragP != padding_fragP; + next_fragP = next_fragP->fr_next) + if (next_fragP->fr_type == rs_machine_dependent + && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype) + == BRANCH_PREFIX)) + { + if (left_size) + { + int max = next_fragP->tc_frag_data.max_bytes; + if (max) + { + int size; + if (max > left_size) + size = left_size; + else + size = max; + left_size -= size; + next_fragP->tc_frag_data.last_length = size; + } + } + else + next_fragP->tc_frag_data.last_length = 0; + } + + /* Check the padding size for the padding frag. */ + padding_size = i386_branch_padding_size + (padding_fragP, (fragP->fr_address + + fragP->tc_frag_data.padding_address)); + + last_size = fragP->tc_frag_data.prefix_length; + /* Check if there is change from the last interation. */ + if (padding_size == last_size) + { + /* Update the expected address of the padding frag. */ + padding_fragP->tc_frag_data.padding_address + = (fragP->fr_address + padding_size + + fragP->tc_frag_data.padding_address); + return 0; + } + + if (padding_size > fragP->tc_frag_data.max_prefix_length) + { + /* No padding if there is no sufficient room. Clear the + expected address of the padding frag. */ + padding_fragP->tc_frag_data.padding_address = 0; + padding_size = 0; + } + else + /* Store the expected address of the padding frag. */ + padding_fragP->tc_frag_data.padding_address + = (fragP->fr_address + padding_size + + fragP->tc_frag_data.padding_address); + + fragP->tc_frag_data.prefix_length = padding_size; + + /* Update the length for the current interation. */ + left_size = padding_size; + for (next_fragP = fragP; + next_fragP != padding_fragP; + next_fragP = next_fragP->fr_next) + if (next_fragP->fr_type == rs_machine_dependent + && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype) + == BRANCH_PREFIX)) + { + if (left_size) + { + int max = next_fragP->tc_frag_data.max_bytes; + if (max) + { + int size; + if (max > left_size) + size = left_size; + else + size = max; + left_size -= size; + next_fragP->tc_frag_data.length = size; + } + } + else + next_fragP->tc_frag_data.length = 0; + } + + return (fragP->tc_frag_data.length + - fragP->tc_frag_data.last_length); + } + return relax_frag (segment, fragP, stretch); +} + /* md_estimate_size_before_relax() Called just before relax() for rs_machine_dependent frags. The x86 @@ -10420,6 +11202,14 @@ elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var) int md_estimate_size_before_relax (fragS *fragP, segT segment) { + if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING + || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX + || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING) + { + i386_classify_machine_dependent_frag (fragP); + return fragP->tc_frag_data.length; + } + /* We've already got fragP->fr_subtype right; all we have to do is check for un-relaxable symbols. On an ELF system, we can't relax an externally visible symbol, because it may be overridden by a @@ -10553,6 +11343,106 @@ md_convert_frag (bfd *abfd ATTRIBUTE_UNUSED, segT sec ATTRIBUTE_UNUSED, unsigned int extension = 0; offsetT displacement_from_opcode_start; + if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING + || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING + || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX) + { + /* Generate nop padding. */ + unsigned int size = fragP->tc_frag_data.length; + if (size) + { + if (size > fragP->tc_frag_data.max_bytes) + abort (); + + if (flag_debug) + { + const char *msg; + const char *branch = "branch"; + const char *prefix = ""; + fragS *padding_fragP; + if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) + == BRANCH_PREFIX) + { + padding_fragP = fragP->tc_frag_data.u.padding_fragP; + switch (fragP->tc_frag_data.default_prefix) + { + default: + abort (); + break; + case CS_PREFIX_OPCODE: + prefix = " cs"; + break; + case DS_PREFIX_OPCODE: + prefix = " ds"; + break; + case ES_PREFIX_OPCODE: + prefix = " es"; + break; + case FS_PREFIX_OPCODE: + prefix = " fs"; + break; + case GS_PREFIX_OPCODE: + prefix = " gs"; + break; + case SS_PREFIX_OPCODE: + prefix = " ss"; + break; + } + if (padding_fragP) + msg = _("%s:%u: add %d%s at 0x%llx to align " + "%s within %d-byte boundary\n"); + else + msg = _("%s:%u: add additional %d%s at 0x%llx to " + "align %s within %d-byte boundary\n"); + } + else + { + padding_fragP = fragP; + msg = _("%s:%u: add %d%s-byte nop at 0x%llx to align " + "%s within %d-byte boundary\n"); + } + + if (padding_fragP) + switch (padding_fragP->tc_frag_data.branch_type) + { + case align_branch_jcc: + branch = "jcc"; + break; + case align_branch_fused: + branch = "fused jcc"; + break; + case align_branch_jmp: + branch = "jmp"; + break; + case align_branch_call: + branch = "call"; + break; + case align_branch_indirect: + branch = "indiret branch"; + break; + case align_branch_ret: + branch = "ret"; + break; + default: + break; + } + + fprintf (stdout, msg, + fragP->fr_file, fragP->fr_line, size, prefix, + (long long) fragP->fr_address, branch, + 1 << align_branch_power); + } + if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX) + memset (fragP->fr_opcode, + fragP->tc_frag_data.default_prefix, size); + else + i386_generate_nops (fragP, (char *) fragP->fr_opcode, + size, 0); + fragP->fr_fix += size; + } + return; + } + opcode = (unsigned char *) fragP->fr_opcode; /* Address we want to reach in file space. */ @@ -10935,7 +11825,8 @@ parse_real_register (char *reg_string, char **end_op) if (!cpu_arch_flags.bitfield.cpuavx512f) { - if (r->reg_type.bitfield.zmmword || r->reg_type.bitfield.regmask) + if (r->reg_type.bitfield.zmmword + || r->reg_type.bitfield.class == RegMask) return (const reg_entry *) NULL; if (!cpu_arch_flags.bitfield.cpuavx) @@ -10948,7 +11839,7 @@ parse_real_register (char *reg_string, char **end_op) } } - if (r->reg_type.bitfield.regbnd && !cpu_arch_flags.bitfield.cpumpx) + if (r->reg_type.bitfield.class == RegBND && !cpu_arch_flags.bitfield.cpumpx) return (const reg_entry *) NULL; /* Don't allow fake index register unless allow_index_reg isn't 0. */ @@ -11110,6 +12001,10 @@ const char *md_shortopts = "qnO::"; #define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24) #define OPTION_X86_USED_NOTE (OPTION_MD_BASE + 25) #define OPTION_MVEXWIG (OPTION_MD_BASE + 26) +#define OPTION_MALIGN_BRANCH_BOUNDARY (OPTION_MD_BASE + 27) +#define OPTION_MALIGN_BRANCH_PREFIX_SIZE (OPTION_MD_BASE + 28) +#define OPTION_MALIGN_BRANCH (OPTION_MD_BASE + 29) +#define OPTION_MBRANCHES_WITH_32B_BOUNDARIES (OPTION_MD_BASE + 30) struct option md_longopts[] = { @@ -11145,6 +12040,10 @@ struct option md_longopts[] = {"mfence-as-lock-add", required_argument, NULL, OPTION_MFENCE_AS_LOCK_ADD}, {"mrelax-relocations", required_argument, NULL, OPTION_MRELAX_RELOCATIONS}, {"mevexrcig", required_argument, NULL, OPTION_MEVEXRCIG}, + {"malign-branch-boundary", required_argument, NULL, OPTION_MALIGN_BRANCH_BOUNDARY}, + {"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE}, + {"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH}, + {"mbranches-within-32B-boundaries", no_argument, NULL, OPTION_MBRANCHES_WITH_32B_BOUNDARIES}, {"mamd64", no_argument, NULL, OPTION_MAMD64}, {"mintel64", no_argument, NULL, OPTION_MINTEL64}, {NULL, no_argument, NULL, 0} @@ -11155,7 +12054,7 @@ int md_parse_option (int c, const char *arg) { unsigned int j; - char *arch, *next, *saved; + char *arch, *next, *saved, *type; switch (c) { @@ -11533,12 +12432,94 @@ md_parse_option (int c, const char *arg) as_fatal (_("invalid -mrelax-relocations= option: `%s'"), arg); break; + case OPTION_MALIGN_BRANCH_BOUNDARY: + { + char *end; + long int align = strtoul (arg, &end, 0); + if (*end == '\0') + { + if (align == 0) + { + align_branch_power = 0; + break; + } + else if (align >= 16) + { + int align_power; + for (align_power = 0; + (align & 1) == 0; + align >>= 1, align_power++) + continue; + /* Limit alignment power to 31. */ + if (align == 1 && align_power < 32) + { + align_branch_power = align_power; + break; + } + } + } + as_fatal (_("invalid -malign-branch-boundary= value: %s"), arg); + } + break; + + case OPTION_MALIGN_BRANCH_PREFIX_SIZE: + { + char *end; + int align = strtoul (arg, &end, 0); + /* Some processors only support 5 prefixes. */ + if (*end == '\0' && align >= 0 && align < 6) + { + align_branch_prefix_size = align; + break; + } + as_fatal (_("invalid -malign-branch-prefix-size= value: %s"), + arg); + } + break; + + case OPTION_MALIGN_BRANCH: + align_branch = 0; + saved = xstrdup (arg); + type = saved; + do + { + next = strchr (type, '+'); + if (next) + *next++ = '\0'; + if (strcasecmp (type, "jcc") == 0) + align_branch |= align_branch_jcc_bit; + else if (strcasecmp (type, "fused") == 0) + align_branch |= align_branch_fused_bit; + else if (strcasecmp (type, "jmp") == 0) + align_branch |= align_branch_jmp_bit; + else if (strcasecmp (type, "call") == 0) + align_branch |= align_branch_call_bit; + else if (strcasecmp (type, "ret") == 0) + align_branch |= align_branch_ret_bit; + else if (strcasecmp (type, "indirect") == 0) + align_branch |= align_branch_indirect_bit; + else + as_fatal (_("invalid -malign-branch= option: `%s'"), arg); + type = next; + } + while (next != NULL); + free (saved); + break; + + case OPTION_MBRANCHES_WITH_32B_BOUNDARIES: + align_branch_power = 5; + align_branch_prefix_size = 5; + align_branch = (align_branch_jcc_bit + | align_branch_fused_bit + | align_branch_jmp_bit); + break; + case OPTION_MAMD64: - intel64 = 0; + isa64 = amd64; break; case OPTION_MINTEL64: - intel64 = 1; + isa64 = intel64; break; case 'O': @@ -11785,6 +12766,20 @@ md_show_usage (FILE *stream) fprintf (stream, _("\ generate relax relocations\n")); fprintf (stream, _("\ + -malign-branch-boundary=NUM (default: 0)\n\ + align branches within NUM byte boundary\n")); + fprintf (stream, _("\ + -malign-branch=TYPE[+TYPE...] (default: jcc+fused+jmp)\n\ + TYPE is combination of jcc, fused, jmp, call, ret,\n\ + indirect\n\ + specify types of branches to align\n")); + fprintf (stream, _("\ + -malign-branch-prefix-size=NUM (default: 5)\n\ + align branches with NUM prefixes per instruction\n")); + fprintf (stream, _("\ + -mbranches-within-32B-boundaries\n\ + align branches within 32 byte boundary\n")); + fprintf (stream, _("\ -mamd64 accept only AMD64 ISA [default]\n")); fprintf (stream, _("\ -mintel64 accept only Intel64 ISA\n")); @@ -11868,15 +12863,24 @@ i386_target_format (void) { default: format = ELF_TARGET_FORMAT; +#ifndef TE_SOLARIS + tls_get_addr = "___tls_get_addr"; +#endif break; case X86_64_ABI: use_rela_relocations = 1; object_64bit = 1; +#ifndef TE_SOLARIS + tls_get_addr = "__tls_get_addr"; +#endif format = ELF_TARGET_FORMAT64; break; case X86_64_X32_ABI: use_rela_relocations = 1; object_64bit = 1; +#ifndef TE_SOLARIS + tls_get_addr = "__tls_get_addr"; +#endif disallow_64bit_reloc = 1; format = ELF_TARGET_FORMAT32; break; @@ -11993,6 +12997,21 @@ s_bss (int ignore ATTRIBUTE_UNUSED) #endif +/* Remember constant directive. */ + +void +i386_cons_align (int ignore ATTRIBUTE_UNUSED) +{ + if (last_insn.kind != last_insn_directive + && (bfd_section_flags (now_seg) & SEC_CODE)) + { + last_insn.seg = now_seg; + last_insn.kind = last_insn_directive; + last_insn.name = "constant directive"; + last_insn.file = as_where (&last_insn.line); + } +} + void i386_validate_fix (fixS *fixp) {