X-Git-Url: http://drtracing.org/?a=blobdiff_plain;f=gas%2Fconfig%2Ftc-i386.c;h=3d935c1bd8d90a3db88e95897a409438d55dafb5;hb=76bc74dc40db159493206725ff7d65ad5cd4897d;hp=26684c8e9847fa7b8fd1ed035d483cbba01715ef;hpb=0b1cf022c8646c5065eed31d3b2889d7a679f88c;p=deliverable%2Fbinutils-gdb.git diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index 26684c8e98..3d935c1bd8 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -7,7 +7,7 @@ GAS is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) + the Free Software Foundation; either version 3, or (at your option) any later version. GAS is distributed in the hope that it will be useful, @@ -32,7 +32,6 @@ #include "subsegs.h" #include "dwarf2dbg.h" #include "dw2gencfi.h" -#include "opcodes/i386-opc.h" #include "elf/x86-64.h" #ifndef REGISTER_WARNINGS @@ -432,7 +431,7 @@ static const arch_entry cpu_arch[] = Cpu186}, {"i286", PROCESSOR_UNKNOWN, Cpu186|Cpu286}, - {"i386", PROCESSOR_GENERIC32, + {"i386", PROCESSOR_I386, Cpu186|Cpu286|Cpu386}, {"i486", PROCESSOR_I486, Cpu186|Cpu286|Cpu386|Cpu486}, @@ -499,6 +498,12 @@ static const arch_entry cpu_arch[] = CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3}, {".ssse3", PROCESSOR_UNKNOWN, CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3|CpuSSSE3}, + {".sse4.1", PROCESSOR_UNKNOWN, + CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3|CpuSSSE3|CpuSSE4_1}, + {".sse4.2", PROCESSOR_UNKNOWN, + CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3|CpuSSSE3|CpuSSE4}, + {".sse4", PROCESSOR_UNKNOWN, + CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3|CpuSSSE3|CpuSSE4}, {".3dnow", PROCESSOR_UNKNOWN, CpuMMX|Cpu3dnow}, {".3dnowa", PROCESSOR_UNKNOWN, @@ -603,9 +608,6 @@ i386_align_code (fragS *fragP, int count) static const char f32_14[] = {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */ 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ - static const char f32_15[] = - {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */ - 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90}; static const char f16_3[] = {0x8d,0x74,0x00}; /* lea 0(%esi),%esi */ static const char f16_4[] = @@ -622,13 +624,17 @@ i386_align_code (fragS *fragP, int count) static const char f16_8[] = {0x8d,0xb4,0x00,0x00, /* lea 0w(%si),%si */ 0x8d,0xbd,0x00,0x00}; /* lea 0w(%di),%di */ + static const char jump_31[] = + {0xeb,0x1d,0x90,0x90,0x90,0x90,0x90, /* jmp .+31; lotsa nops */ + 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90, + 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90, + 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90}; static const char *const f32_patt[] = { f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8, - f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15 + f32_9, f32_10, f32_11, f32_12, f32_13, f32_14 }; static const char *const f16_patt[] = { - f32_1, f32_2, f16_3, f16_4, f16_5, f16_6, f16_7, f16_8, - f32_15, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15 + f32_1, f32_2, f16_3, f16_4, f16_5, f16_6, f16_7, f16_8 }; /* nopl (%[re]ax) */ static const char alt_3[] = @@ -735,57 +741,40 @@ i386_align_code (fragS *fragP, int count) alt_long_14, alt_long_15 }; - if (count <= 0 || count > 15) + /* Only align for at least a positive non-zero boundary. */ + if (count <= 0 || count > MAX_MEM_FOR_RS_ALIGN_CODE) return; /* We need to decide which NOP sequence to use for 32bit and 64bit. When -mtune= is used: - 1. For PROCESSOR_I486, PROCESSOR_PENTIUM and PROCESSOR_GENERIC32, - f32_patt will be used. - 2. For PROCESSOR_K8 and PROCESSOR_AMDFAM10 in 64bit, NOPs with - 0x66 prefix will be used. - 3. For PROCESSOR_CORE2, alt_long_patt will be used. - 4. For PROCESSOR_PENTIUMPRO, PROCESSOR_PENTIUM4, PROCESSOR_NOCONA, - PROCESSOR_CORE, PROCESSOR_CORE2, PROCESSOR_K6, PROCESSOR_ATHLON - and PROCESSOR_GENERIC64, alt_short_patt will be used. + 1. For PROCESSOR_I386, PROCESSOR_I486, PROCESSOR_PENTIUM and + PROCESSOR_GENERIC32, f32_patt will be used. + 2. For PROCESSOR_PENTIUMPRO, PROCESSOR_PENTIUM4, PROCESSOR_NOCONA, + PROCESSOR_CORE, PROCESSOR_CORE2, and PROCESSOR_GENERIC64, + alt_long_patt will be used. + 3. For PROCESSOR_ATHLON, PROCESSOR_K6, PROCESSOR_K8 and + PROCESSOR_AMDFAM10, alt_short_patt will be used. - When -mtune= isn't used, alt_short_patt will be used if - cpu_arch_isa_flags has Cpu686. Otherwise, f32_patt will be used. + When -mtune= isn't used, alt_long_patt will be used if + cpu_arch_isa_flags has Cpu686. Otherwise, f32_patt will + be used. When -march= or .arch is used, we can't use anything beyond cpu_arch_isa_flags. */ if (flag_code == CODE_16BIT) { - memcpy (fragP->fr_literal + fragP->fr_fix, - f16_patt[count - 1], count); if (count > 8) - /* Adjust jump offset. */ - fragP->fr_literal[fragP->fr_fix + 1] = count - 2; - } - else if (flag_code == CODE_64BIT && cpu_arch_tune == PROCESSOR_K8) - { - int i; - int nnops = (count + 3) / 4; - int len = count / nnops; - int remains = count - nnops * len; - int pos = 0; - - /* The recommended way to pad 64bit code is to use NOPs preceded - by maximally four 0x66 prefixes. Balance the size of nops. */ - for (i = 0; i < remains; i++) - { - memset (fragP->fr_literal + fragP->fr_fix + pos, 0x66, len); - fragP->fr_literal[fragP->fr_fix + pos + len] = 0x90; - pos += len + 1; - } - for (; i < nnops; i++) { - memset (fragP->fr_literal + fragP->fr_fix + pos, 0x66, len - 1); - fragP->fr_literal[fragP->fr_fix + pos + len - 1] = 0x90; - pos += len; + memcpy (fragP->fr_literal + fragP->fr_fix, + jump_31, count); + /* Adjust jump offset. */ + fragP->fr_literal[fragP->fr_fix + 1] = count - 2; } + else + memcpy (fragP->fr_literal + fragP->fr_fix, + f16_patt[count - 1], count); } else { @@ -800,24 +789,25 @@ i386_align_code (fragS *fragP, int count) /* We use cpu_arch_isa_flags to check if we SHOULD optimize for Cpu686. */ if ((cpu_arch_isa_flags & Cpu686) != 0) - patt = alt_short_patt; + patt = alt_long_patt; else patt = f32_patt; break; - case PROCESSOR_CORE2: - patt = alt_long_patt; - break; case PROCESSOR_PENTIUMPRO: case PROCESSOR_PENTIUM4: case PROCESSOR_NOCONA: case PROCESSOR_CORE: + case PROCESSOR_CORE2: + case PROCESSOR_GENERIC64: + patt = alt_long_patt; + break; case PROCESSOR_K6: case PROCESSOR_ATHLON: case PROCESSOR_K8: - case PROCESSOR_GENERIC64: case PROCESSOR_AMDFAM10: patt = alt_short_patt; break; + case PROCESSOR_I386: case PROCESSOR_I486: case PROCESSOR_PENTIUM: case PROCESSOR_GENERIC32: @@ -835,12 +825,9 @@ i386_align_code (fragS *fragP, int count) abort (); break; + case PROCESSOR_I386: case PROCESSOR_I486: case PROCESSOR_PENTIUM: - case PROCESSOR_PENTIUMPRO: - case PROCESSOR_PENTIUM4: - case PROCESSOR_NOCONA: - case PROCESSOR_CORE: case PROCESSOR_K6: case PROCESSOR_ATHLON: case PROCESSOR_K8: @@ -853,6 +840,10 @@ i386_align_code (fragS *fragP, int count) else patt = f32_patt; break; + case PROCESSOR_PENTIUMPRO: + case PROCESSOR_PENTIUM4: + case PROCESSOR_NOCONA: + case PROCESSOR_CORE: case PROCESSOR_CORE2: if ((cpu_arch_isa_flags & Cpu686) != 0) patt = alt_long_patt; @@ -860,13 +851,44 @@ i386_align_code (fragS *fragP, int count) patt = f32_patt; break; case PROCESSOR_GENERIC64: - patt = alt_short_patt; + patt = alt_long_patt; break; } } - memcpy (fragP->fr_literal + fragP->fr_fix, - patt[count - 1], count); + if (patt == f32_patt) + { + /* If the padding is less than 15 bytes, we use the normal + ones. Otherwise, we use a jump instruction and adjust + its offset. */ + if (count < 15) + memcpy (fragP->fr_literal + fragP->fr_fix, + patt[count - 1], count); + else + { + memcpy (fragP->fr_literal + fragP->fr_fix, + jump_31, count); + /* Adjust jump offset. */ + fragP->fr_literal[fragP->fr_fix + 1] = count - 2; + } + } + else + { + /* Maximum length of an instruction is 15 byte. If the + padding is greater than 15 bytes and we don't use jump, + we have to break it into smaller pieces. */ + int padding = count; + while (padding > 15) + { + padding -= 15; + memcpy (fragP->fr_literal + fragP->fr_fix + padding, + patt [14], 15); + } + + if (padding) + memcpy (fragP->fr_literal + fragP->fr_fix, + patt [padding - 1], padding); + } } fragP->fr_var = count; } @@ -992,9 +1014,9 @@ add_prefix (unsigned int prefix) if (prefix >= REX_OPCODE && prefix < REX_OPCODE + 16 && flag_code == CODE_64BIT) { - if ((i.prefix[REX_PREFIX] & prefix & REX_MODE64) - || ((i.prefix[REX_PREFIX] & (REX_EXTX | REX_EXTY | REX_EXTZ)) - && (prefix & (REX_EXTX | REX_EXTY | REX_EXTZ)))) + if ((i.prefix[REX_PREFIX] & prefix & REX_W) + || ((i.prefix[REX_PREFIX] & (REX_R | REX_X | REX_B)) + && (prefix & (REX_R | REX_X | REX_B)))) ret = 0; q = REX_PREFIX; } @@ -1238,8 +1260,9 @@ md_begin () reg_hash = hash_new (); { const reg_entry *regtab; + unsigned int regtab_size = i386_regtab_size; - for (regtab = i386_regtab; regtab->reg_name != NULL; regtab++) + for (regtab = i386_regtab; regtab_size--; regtab++) { hash_err = hash_insert (reg_hash, regtab->reg_name, (PTR) regtab); if (hash_err) @@ -1294,6 +1317,7 @@ md_begin () #endif digit_chars['-'] = '-'; mnemonic_chars['-'] = '-'; + mnemonic_chars['.'] = '.'; identifier_chars['_'] = '_'; identifier_chars['.'] = '.'; @@ -1353,10 +1377,10 @@ pi (char *line, i386_insn *x) fprintf (stdout, " sib: base %x index %x scale %x\n", x->sib.base, x->sib.index, x->sib.scale); fprintf (stdout, " rex: 64bit %x extX %x extY %x extZ %x\n", - (x->rex & REX_MODE64) != 0, - (x->rex & REX_EXTX) != 0, - (x->rex & REX_EXTY) != 0, - (x->rex & REX_EXTZ) != 0); + (x->rex & REX_W) != 0, + (x->rex & REX_R) != 0, + (x->rex & REX_X) != 0, + (x->rex & REX_B) != 0); for (i = 0; i < x->operands; i++) { fprintf (stdout, " #%d: ", i + 1); @@ -1828,8 +1852,11 @@ md_assemble (line) for (x = 0; x < i.operands; x++) if (i.op[x].regs->reg_num != x) - as_bad (_("can't use register '%%%s' as operand %d in '%s'."), - i.op[x].regs->reg_name, x + 1, i.tm.name); + as_bad (_("can't use register '%s%s' as operand %d in '%s'."), + register_prefix, + i.op[x].regs->reg_name, + x + 1, + i.tm.name); i.operands = 0; } @@ -1878,7 +1905,7 @@ md_assemble (line) } if ((i.tm.opcode_modifier & Rex64) != 0) - i.rex |= REX_MODE64; + i.rex |= REX_W; /* For 8 bit registers we need an empty rex prefix. Also if the instruction already has a prefix, we need to convert old @@ -1902,9 +1929,9 @@ md_assemble (line) { /* In case it is "hi" register, give up. */ if (i.op[x].regs->reg_num > 3) - as_bad (_("can't encode register '%%%s' in an " + as_bad (_("can't encode register '%s%s' in an " "instruction requiring REX prefix."), - i.op[x].regs->reg_name); + register_prefix, i.op[x].regs->reg_name); /* Otherwise it is equivalent to the extended register. Since the encoding doesn't change this is merely @@ -2621,6 +2648,15 @@ match_template (void) continue; break; case 2: + /* xchg %eax, %eax is a special case. It is an aliase for nop + only in 32bit mode and we can use opcode 0x90. In 64bit + mode, we can't use 0x90 for xchg %eax, %eax since it should + zero-extend %eax to %rax. */ + if (flag_code == CODE_64BIT + && t->base_opcode == 0x90 + && i.types [0] == (Acc | Reg32) + && i.types [1] == (Acc | Reg32)) + continue; case 3: case 4: overlap1 = i.types[1] & operand_types[1]; @@ -2628,9 +2664,10 @@ match_template (void) || !MATCH (overlap1, i.types[1], operand_types[1]) /* monitor in SSE3 is a very special case. The first register and the second register may have different - sizes. */ + sizes. The same applies to crc32 in SSE4.2. */ || !((t->base_opcode == 0x0f01 && t->extension_opcode == 0xc8) + || t->base_opcode == 0xf20f38f1 || CONSISTENT_REGISTER_MATCH (overlap0, i.types[0], operand_types[0], overlap1, i.types[1], @@ -2817,19 +2854,44 @@ process_suffix (void) { /* We take i.suffix from the last register operand specified, Destination register type is more significant than source - register type. */ - int op; - - for (op = i.operands; --op >= 0;) - if ((i.types[op] & Reg) - && !(i.tm.operand_types[op] & InOutPortReg)) - { - i.suffix = ((i.types[op] & Reg8) ? BYTE_MNEM_SUFFIX : - (i.types[op] & Reg16) ? WORD_MNEM_SUFFIX : - (i.types[op] & Reg64) ? QWORD_MNEM_SUFFIX : + register type. crc32 in SSE4.2 prefers source register + type. */ + if (i.tm.base_opcode == 0xf20f38f1) + { + if ((i.types[0] & Reg)) + i.suffix = ((i.types[0] & Reg16) ? WORD_MNEM_SUFFIX : LONG_MNEM_SUFFIX); - break; - } + } + else if (i.tm.base_opcode == 0xf20f38f0) + { + if ((i.types[0] & Reg8)) + i.suffix = BYTE_MNEM_SUFFIX; + } + + if (!i.suffix) + { + int op; + + if (i.tm.base_opcode == 0xf20f38f1 + || i.tm.base_opcode == 0xf20f38f0) + { + /* We have to know the operand size for crc32. */ + as_bad (_("ambiguous memory operand size for `%s`"), + i.tm.name); + return 0; + } + + for (op = i.operands; --op >= 0;) + if ((i.types[op] & Reg) + && !(i.tm.operand_types[op] & InOutPortReg)) + { + i.suffix = ((i.types[op] & Reg8) ? BYTE_MNEM_SUFFIX : + (i.types[op] & Reg16) ? WORD_MNEM_SUFFIX : + (i.types[op] & Reg64) ? QWORD_MNEM_SUFFIX : + LONG_MNEM_SUFFIX); + break; + } + } } else if (i.suffix == BYTE_MNEM_SUFFIX) { @@ -2974,8 +3036,8 @@ process_suffix (void) if (i.operands != 2 || i.types [0] != (Acc | Reg64) || i.types [1] != (Acc | Reg64) - || strcmp (i.tm.name, "xchg") != 0) - i.rex |= REX_MODE64; + || i.tm.base_opcode != 0x90) + i.rex |= REX_W; } /* Size floating point instruction. */ @@ -3009,6 +3071,10 @@ check_byte_reg (void) || i.tm.base_opcode == 0xfbf)) continue; + /* crc32 doesn't generate this warning. */ + if (i.tm.base_opcode == 0xf20f38f0) + continue; + if ((i.types[op] & WordReg) && i.op[op].regs->reg_num < 4) { /* Prohibit these changes in the 64bit mode, since the @@ -3024,10 +3090,12 @@ check_byte_reg (void) #if REGISTER_WARNINGS if (!quiet_warnings && (i.tm.operand_types[op] & InOutPortReg) == 0) - as_warn (_("using `%%%s' instead of `%%%s' due to `%c' suffix"), + as_warn (_("using `%s%s' instead of `%s%s' due to `%c' suffix"), + register_prefix, (i.op[op].regs + (i.types[op] & Reg16 ? REGNAM_AL - REGNAM_AX : REGNAM_AL - REGNAM_EAX))->reg_name, + register_prefix, i.op[op].regs->reg_name, i.suffix); #endif @@ -3039,7 +3107,8 @@ check_byte_reg (void) | Control | Debug | Test | FloatReg | FloatAcc)) { - as_bad (_("`%%%s' not allowed with `%s%c'"), + as_bad (_("`%s%s' not allowed with `%s%c'"), + register_prefix, i.op[op].regs->reg_name, i.tm.name, i.suffix); @@ -3060,7 +3129,8 @@ check_long_reg (void) if ((i.types[op] & Reg8) != 0 && (i.tm.operand_types[op] & (Reg16 | Reg32 | Acc)) != 0) { - as_bad (_("`%%%s' not allowed with `%s%c'"), + as_bad (_("`%s%s' not allowed with `%s%c'"), + register_prefix, i.op[op].regs->reg_name, i.tm.name, i.suffix); @@ -3082,8 +3152,10 @@ check_long_reg (void) } #if REGISTER_WARNINGS else - as_warn (_("using `%%%s' instead of `%%%s' due to `%c' suffix"), + as_warn (_("using `%s%s' instead of `%s%s' due to `%c' suffix"), + register_prefix, (i.op[op].regs + REGNAM_EAX - REGNAM_AX)->reg_name, + register_prefix, i.op[op].regs->reg_name, i.suffix); #endif @@ -3111,7 +3183,8 @@ check_qword_reg (void) if ((i.types[op] & Reg8) != 0 && (i.tm.operand_types[op] & (Reg16 | Reg32 | Acc)) != 0) { - as_bad (_("`%%%s' not allowed with `%s%c'"), + as_bad (_("`%s%s' not allowed with `%s%c'"), + register_prefix, i.op[op].regs->reg_name, i.tm.name, i.suffix); @@ -3142,7 +3215,8 @@ check_word_reg (void) if ((i.types[op] & Reg8) != 0 && (i.tm.operand_types[op] & (Reg16 | Reg32 | Acc)) != 0) { - as_bad (_("`%%%s' not allowed with `%s%c'"), + as_bad (_("`%s%s' not allowed with `%s%c'"), + register_prefix, i.op[op].regs->reg_name, i.tm.name, i.suffix); @@ -3164,8 +3238,10 @@ check_word_reg (void) } else #if REGISTER_WARNINGS - as_warn (_("using `%%%s' instead of `%%%s' due to `%c' suffix"), + as_warn (_("using `%s%s' instead of `%s%s' due to `%c' suffix"), + register_prefix, (i.op[op].regs + REGNAM_AX - REGNAM_EAX)->reg_name, + register_prefix, i.op[op].regs->reg_name, i.suffix); #endif @@ -3265,16 +3341,49 @@ process_operands (void) /* The imul $imm, %reg instruction is converted into imul $imm, %reg, %reg, and the clr %reg instruction is converted into xor %reg, %reg. */ - if (i.tm.opcode_modifier & regKludge) - { - unsigned int first_reg_op = (i.types[0] & Reg) ? 0 : 1; - /* Pretend we saw the extra register operand. */ - assert (i.reg_operands == 1 - && i.op[first_reg_op + 1].regs == 0); - i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs; - i.types[first_reg_op + 1] = i.types[first_reg_op]; - i.operands++; - i.reg_operands++; + if (i.tm.opcode_modifier & RegKludge) + { + if ((i.tm.cpu_flags & CpuSSE4_1)) + { + /* The first operand in instruction blendvpd, blendvps and + pblendvb in SSE4.1 is implicit and must be xmm0. */ + assert (i.operands == 3 + && i.reg_operands >= 2 + && i.types[0] == RegXMM); + if (i.op[0].regs->reg_num != 0) + { + if (intel_syntax) + as_bad (_("the last operand of `%s' must be `%sxmm0'"), + i.tm.name, register_prefix); + else + as_bad (_("the first operand of `%s' must be `%sxmm0'"), + i.tm.name, register_prefix); + return 0; + } + i.op[0] = i.op[1]; + i.op[1] = i.op[2]; + i.types[0] = i.types[1]; + i.types[1] = i.types[2]; + i.operands--; + i.reg_operands--; + + /* We need to adjust fields in i.tm since they are used by + build_modrm_byte. */ + i.tm.operand_types [0] = i.tm.operand_types [1]; + i.tm.operand_types [1] = i.tm.operand_types [2]; + i.tm.operands--; + } + else + { + unsigned int first_reg_op = (i.types[0] & Reg) ? 0 : 1; + /* Pretend we saw the extra register operand. */ + assert (i.reg_operands == 1 + && i.op[first_reg_op + 1].regs == 0); + i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs; + i.types[first_reg_op + 1] = i.types[first_reg_op]; + i.operands++; + i.reg_operands++; + } } if (i.tm.opcode_modifier & ShortForm) @@ -3289,7 +3398,7 @@ process_operands (void) } i.tm.base_opcode |= (i.op[0].regs->reg_num << 3); if ((i.op[0].regs->reg_flags & RegRex) != 0) - i.rex |= REX_EXTZ; + i.rex |= REX_B; } else { @@ -3298,7 +3407,7 @@ process_operands (void) /* Register goes in low 3 bits of opcode. */ i.tm.base_opcode |= i.op[op].regs->reg_num; if ((i.op[op].regs->reg_flags & RegRex) != 0) - i.rex |= REX_EXTZ; + i.rex |= REX_B; if (!quiet_warnings && (i.tm.opcode_modifier & Ugh) != 0) { /* Warn about some common errors, but press on regardless. @@ -3306,15 +3415,15 @@ process_operands (void) if (i.operands == 2) { /* Reversed arguments on faddp, fsubp, etc. */ - as_warn (_("translating to `%s %%%s,%%%s'"), i.tm.name, - i.op[1].regs->reg_name, - i.op[0].regs->reg_name); + as_warn (_("translating to `%s %s%s,%s%s'"), i.tm.name, + register_prefix, i.op[1].regs->reg_name, + register_prefix, i.op[0].regs->reg_name); } else { /* Extraneous `l' suffix on fp insn. */ - as_warn (_("translating to `%s %%%s'"), i.tm.name, - i.op[0].regs->reg_name); + as_warn (_("translating to `%s %s%s'"), i.tm.name, + register_prefix, i.op[0].regs->reg_name); } } } @@ -3402,29 +3511,29 @@ build_modrm_byte (void) destination operand, then we assume the source operand may sometimes be a memory operand and so we need to store the destination in the i.rm.reg field. */ - if ((i.tm.operand_types[dest] & AnyMem) == 0) + if ((i.tm.operand_types[dest] & (AnyMem | RegMem)) == 0) { i.rm.reg = i.op[dest].regs->reg_num; i.rm.regmem = i.op[source].regs->reg_num; if ((i.op[dest].regs->reg_flags & RegRex) != 0) - i.rex |= REX_EXTX; + i.rex |= REX_R; if ((i.op[source].regs->reg_flags & RegRex) != 0) - i.rex |= REX_EXTZ; + i.rex |= REX_B; } else { i.rm.reg = i.op[source].regs->reg_num; i.rm.regmem = i.op[dest].regs->reg_num; if ((i.op[dest].regs->reg_flags & RegRex) != 0) - i.rex |= REX_EXTZ; + i.rex |= REX_B; if ((i.op[source].regs->reg_flags & RegRex) != 0) - i.rex |= REX_EXTX; + i.rex |= REX_R; } - if (flag_code != CODE_64BIT && (i.rex & (REX_EXTX | REX_EXTZ))) + if (flag_code != CODE_64BIT && (i.rex & (REX_R | REX_B))) { if (!((i.types[0] | i.types[1]) & Control)) abort (); - i.rex &= ~(REX_EXTX | REX_EXTZ); + i.rex &= ~(REX_R | REX_B); add_prefix (LOCK_PREFIX_OPCODE); } } @@ -3486,7 +3595,7 @@ build_modrm_byte (void) else i.types[op] |= Disp32S; if ((i.index_reg->reg_flags & RegRex) != 0) - i.rex |= REX_EXTY; + i.rex |= REX_X; } } /* RIP addressing for 64bit mode. */ @@ -3539,7 +3648,7 @@ build_modrm_byte (void) i.rm.regmem = i.base_reg->reg_num; if ((i.base_reg->reg_flags & RegRex) != 0) - i.rex |= REX_EXTZ; + i.rex |= REX_B; i.sib.base = i.base_reg->reg_num; /* x86-64 ignores REX prefix bit here to avoid decoder complications. */ @@ -3576,7 +3685,7 @@ build_modrm_byte (void) i.sib.index = i.index_reg->reg_num; i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING; if ((i.index_reg->reg_flags & RegRex) != 0) - i.rex |= REX_EXTY; + i.rex |= REX_X; } if (i.disp_operands @@ -3624,13 +3733,13 @@ build_modrm_byte (void) { i.rm.regmem = i.op[op].regs->reg_num; if ((i.op[op].regs->reg_flags & RegRex) != 0) - i.rex |= REX_EXTZ; + i.rex |= REX_B; } else { i.rm.reg = i.op[op].regs->reg_num; if ((i.op[op].regs->reg_flags & RegRex) != 0) - i.rex |= REX_EXTX; + i.rex |= REX_R; } /* Now, if no memory operand has set i.rm.mode = 0, 1, 2 we @@ -3883,11 +3992,12 @@ output_insn (void) unsigned char *q; unsigned int prefix; - /* All opcodes on i386 have either 1 or 2 bytes. Supplemental - Streaming SIMD extensions 3 Instructions have 3 bytes. We may - use one more higher byte to specify a prefix the instruction - requires. */ - if ((i.tm.cpu_flags & CpuSSSE3) != 0) + /* All opcodes on i386 have either 1 or 2 bytes. SSSE3 and + SSE4 instructions have 3 bytes. We may use one more higher + byte to specify a prefix the instruction requires. Exclude + instructions which are in both SSE4 and ABM. */ + if ((i.tm.cpu_flags & (CpuSSSE3 | CpuSSE4)) != 0 + && (i.tm.cpu_flags & CpuABM) == 0) { if (i.tm.base_opcode & 0xff000000) { @@ -3928,7 +4038,8 @@ output_insn (void) } else { - if ((i.tm.cpu_flags & CpuSSSE3) != 0) + if ((i.tm.cpu_flags & (CpuSSSE3 | CpuSSE4)) != 0 + && (i.tm.cpu_flags & CpuABM) == 0) { p = frag_more (3); *p++ = (i.tm.base_opcode >> 16) & 0xff; @@ -3982,6 +4093,40 @@ output_insn (void) #endif /* DEBUG386 */ } +/* Return the size of the displacement operand N. */ + +static int +disp_size (unsigned int n) +{ + int size = 4; + if (i.types[n] & (Disp8 | Disp16 | Disp64)) + { + size = 2; + if (i.types[n] & Disp8) + size = 1; + if (i.types[n] & Disp64) + size = 8; + } + return size; +} + +/* Return the size of the immediate operand N. */ + +static int +imm_size (unsigned int n) +{ + int size = 4; + if (i.types[n] & (Imm8 | Imm8S | Imm16 | Imm64)) + { + size = 2; + if (i.types[n] & (Imm8 | Imm8S)) + size = 1; + if (i.types[n] & Imm64) + size = 8; + } + return size; +} + static void output_disp (fragS *insn_start_frag, offsetT insn_start_off) { @@ -3994,18 +4139,9 @@ output_disp (fragS *insn_start_frag, offsetT insn_start_off) { if (i.op[n].disps->X_op == O_constant) { - int size; + int size = disp_size (n); offsetT val; - size = 4; - if (i.types[n] & (Disp8 | Disp16 | Disp64)) - { - size = 2; - if (i.types[n] & Disp8) - size = 1; - if (i.types[n] & Disp64) - size = 8; - } val = offset_in_range (i.op[n].disps->X_add_number, size); p = frag_more (size); @@ -4014,45 +4150,32 @@ output_disp (fragS *insn_start_frag, offsetT insn_start_off) else { enum bfd_reloc_code_real reloc_type; - int size = 4; - int sign = 0; + int size = disp_size (n); + int sign = (i.types[n] & Disp32S) != 0; int pcrel = (i.flags[n] & Operand_PCrel) != 0; + /* We can't have 8 bit displacement here. */ + assert ((i.types[n] & Disp8) == 0); + /* The PC relative address is computed relative to the instruction boundary, so in case immediate fields follows, we need to adjust the value. */ if (pcrel && i.imm_operands) { - int imm_size = 4; unsigned int n1; + int sz = 0; for (n1 = 0; n1 < i.operands; n1++) if (i.types[n1] & Imm) { - if (i.types[n1] & (Imm8 | Imm8S | Imm16 | Imm64)) - { - imm_size = 2; - if (i.types[n1] & (Imm8 | Imm8S)) - imm_size = 1; - if (i.types[n1] & Imm64) - imm_size = 8; - } - break; + /* Only one immediate is allowed for PC + relative address. */ + assert (sz == 0); + sz = imm_size (n1); + i.op[n].disps->X_add_number -= sz; } /* We should find the immediate. */ - if (n1 == i.operands) - abort (); - i.op[n].disps->X_add_number -= imm_size; - } - - if (i.types[n] & Disp32S) - sign = 1; - - if (i.types[n] & (Disp16 | Disp64)) - { - size = 2; - if (i.types[n] & Disp64) - size = 8; + assert (sz != 0); } p = frag_more (size); @@ -4117,18 +4240,9 @@ output_imm (fragS *insn_start_frag, offsetT insn_start_off) { if (i.op[n].imms->X_op == O_constant) { - int size; + int size = imm_size (n); offsetT val; - size = 4; - if (i.types[n] & (Imm8 | Imm8S | Imm16 | Imm64)) - { - size = 2; - if (i.types[n] & (Imm8 | Imm8S)) - size = 1; - else if (i.types[n] & Imm64) - size = 8; - } val = offset_in_range (i.op[n].imms->X_add_number, size); p = frag_more (size); @@ -4141,21 +4255,15 @@ output_imm (fragS *insn_start_frag, offsetT insn_start_off) non-absolute imms). Try to support other sizes ... */ enum bfd_reloc_code_real reloc_type; - int size = 4; - int sign = 0; + int size = imm_size (n); + int sign; if ((i.types[n] & (Imm32S)) && (i.suffix == QWORD_MNEM_SUFFIX || (!i.suffix && (i.tm.opcode_modifier & No_lSuf)))) sign = 1; - if (i.types[n] & (Imm8 | Imm8S | Imm16 | Imm64)) - { - size = 2; - if (i.types[n] & (Imm8 | Imm8S)) - size = 1; - if (i.types[n] & Imm64) - size = 8; - } + else + sign = 0; p = frag_more (size); reloc_type = reloc (size, 0, sign, i.reloc[n]); @@ -4383,9 +4491,6 @@ lex_got (enum bfd_reloc_code_real *reloc, if (GOT_symbol == NULL) GOT_symbol = symbol_find_or_make (GLOBAL_OFFSET_TABLE_NAME); - /* Replace the relocation token with ' ', so that - errors like foo@GOTOFF1 will be detected. */ - /* The length of the first part of our input line. */ first = cp - input_line_pointer; @@ -4401,9 +4506,12 @@ lex_got (enum bfd_reloc_code_real *reloc, be necessary, but be safe. */ tmpbuf = xmalloc (first + second + 2); memcpy (tmpbuf, input_line_pointer, first); - tmpbuf[first] = ' '; - memcpy (tmpbuf + first + 1, past_reloc, second); - tmpbuf[first + second + 1] = '\0'; + if (second != 0 && *past_reloc != ' ') + /* Replace the relocation token with ' ', so that + errors like foo@GOTOFF1 will be detected. */ + tmpbuf[first++] = ' '; + memcpy (tmpbuf + first, past_reloc, second); + tmpbuf[first + second] = '\0'; return tmpbuf; } @@ -5741,14 +5849,16 @@ parse_real_register (char *reg_string, char **end_op) ++s; if (*s >= '0' && *s <= '7') { - r = &i386_float_regtab[*s - '0']; + int fpr = *s - '0'; ++s; if (is_space_char (*s)) ++s; if (*s == ')') { *end_op = s + 1; - return r; + r = hash_find (reg_hash, "st(0)"); + know (r); + return r + fpr; } } /* We have "%st(" then garbage. */ @@ -5791,7 +5901,7 @@ parse_register (char *reg_string, char **end_op) know (e->X_op == O_register); know (e->X_add_number >= 0 - && (valueT) e->X_add_number < ARRAY_SIZE (i386_regtab)); + && (valueT) e->X_add_number < i386_regtab_size); r = i386_regtab + e->X_add_number; *end_op = input_line_pointer; } @@ -6022,28 +6132,8 @@ md_show_usage (stream) } -#if defined(TE_PEP) -const char * -x86_64_target_format (void) -{ - if (strcmp (default_arch, "x86_64") == 0) - { - set_code_flag (CODE_64BIT); - return COFF_TARGET_FORMAT; - } - else if (strcmp (default_arch, "i386") == 0) - { - set_code_flag (CODE_32BIT); - return "coff-i386"; - } - - as_fatal (_("Unknown architecture")); - return NULL; -} -#endif - #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \ - || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)) + || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) || defined (TE_PEP)) /* Pick the target format to use. */ @@ -6074,6 +6164,11 @@ i386_target_format (void) as_fatal (_("Unknown architecture")); switch (OUTPUT_FLAVOR) { +#ifdef TE_PEP + case bfd_target_coff_flavour: + return flag_code == CODE_64BIT ? COFF_TARGET_FORMAT : "coff-i386"; + break; +#endif #ifdef OBJ_MAYBE_AOUT case bfd_target_aout_flavour: return AOUT_TARGET_FORMAT;