gas/config/tc-i386.c

   1 /* tc-i386.c -- Assemble code for the Intel 80386
   2    Copyright (C) 1989-2020 Free Software Foundation, Inc.
   3
   4    This file is part of GAS, the GNU Assembler.
   5
   6    GAS is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GAS is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GAS; see the file COPYING.  If not, write to the Free
  18    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
  19    02110-1301, USA.  */
  20
  21 /* Intel 80386 machine specific gas.
  22    Written by Eliot Dresselhaus (eliot@mgm.mit.edu).
  23    x86_64 support by Jan Hubicka (jh@suse.cz)
  24    VIA PadLock support by Michal Ludvig (mludvig@suse.cz)
  25    Bugs & suggestions are completely welcome.  This is free software.
  26    Please help us make it better.  */
  27
  28 #include "as.h"
  29 #include "safe-ctype.h"
  30 #include "subsegs.h"
  31 #include "dwarf2dbg.h"
  32 #include "dw2gencfi.h"
  33 #include "elf/x86-64.h"
  34 #include "opcodes/i386-init.h"
  35
  36 #ifdef HAVE_LIMITS_H
  37 #include <limits.h>
  38 #else
  39 #ifdef HAVE_SYS_PARAM_H
  40 #include <sys/param.h>
  41 #endif
  42 #ifndef INT_MAX
  43 #define INT_MAX (int) (((unsigned) (-1)) >> 1)
  44 #endif
  45 #endif
  46
  47 #ifndef INFER_ADDR_PREFIX
  48 #define INFER_ADDR_PREFIX 1
  49 #endif
  50
  51 #ifndef DEFAULT_ARCH
  52 #define DEFAULT_ARCH "i386"
  53 #endif
  54
  55 #ifndef INLINE
  56 #if __GNUC__ >= 2
  57 #define INLINE __inline__
  58 #else
  59 #define INLINE
  60 #endif
  61 #endif
  62
  63 /* Prefixes will be emitted in the order defined below.
  64    WAIT_PREFIX must be the first prefix since FWAIT is really is an
  65    instruction, and so must come before any prefixes.
  66    The preferred prefix order is SEG_PREFIX, ADDR_PREFIX, DATA_PREFIX,
  67    REP_PREFIX/HLE_PREFIX, LOCK_PREFIX.  */
  68 #define WAIT_PREFIX     0
  69 #define SEG_PREFIX      1
  70 #define ADDR_PREFIX     2
  71 #define DATA_PREFIX     3
  72 #define REP_PREFIX      4
  73 #define HLE_PREFIX      REP_PREFIX
  74 #define BND_PREFIX      REP_PREFIX
  75 #define LOCK_PREFIX     5
  76 #define REX_PREFIX      6       /* must come last.  */
  77 #define MAX_PREFIXES    7       /* max prefixes per opcode */
  78
  79 /* we define the syntax here (modulo base,index,scale syntax) */
  80 #define REGISTER_PREFIX '%'
  81 #define IMMEDIATE_PREFIX '$'
  82 #define ABSOLUTE_PREFIX '*'
  83
  84 /* these are the instruction mnemonic suffixes in AT&T syntax or
  85    memory operand size in Intel syntax.  */
  86 #define WORD_MNEM_SUFFIX  'w'
  87 #define BYTE_MNEM_SUFFIX  'b'
  88 #define SHORT_MNEM_SUFFIX 's'
  89 #define LONG_MNEM_SUFFIX  'l'
  90 #define QWORD_MNEM_SUFFIX  'q'
  91 /* Intel Syntax.  Use a non-ascii letter since since it never appears
  92    in instructions.  */
  93 #define LONG_DOUBLE_MNEM_SUFFIX '\1'
  94
  95 #define END_OF_INSN '\0'
  96
  97 /* This matches the C -> StaticRounding alias in the opcode table.  */
  98 #define commutative staticrounding
  99
 100 /*
 101   'templates' is for grouping together 'template' structures for opcodes
 102   of the same name.  This is only used for storing the insns in the grand
 103   ole hash table of insns.
 104   The templates themselves start at START and range up to (but not including)
 105   END.
 106   */
 107 typedef struct
 108 {
 109   const insn_template *start;
 110   const insn_template *end;
 111 }
 112 templates;
 113
 114 /* 386 operand encoding bytes:  see 386 book for details of this.  */
 115 typedef struct
 116 {
 117   unsigned int regmem;  /* codes register or memory operand */
 118   unsigned int reg;     /* codes register operand (or extended opcode) */
 119   unsigned int mode;    /* how to interpret regmem & reg */
 120 }
 121 modrm_byte;
 122
 123 /* x86-64 extension prefix.  */
 124 typedef int rex_byte;
 125
 126 /* 386 opcode byte to code indirect addressing.  */
 127 typedef struct
 128 {
 129   unsigned base;
 130   unsigned index;
 131   unsigned scale;
 132 }
 133 sib_byte;
 134
 135 /* x86 arch names, types and features */
 136 typedef struct
 137 {
 138   const char *name;             /* arch name */
 139   unsigned int len;             /* arch string length */
 140   enum processor_type type;     /* arch type */
 141   i386_cpu_flags flags;         /* cpu feature flags */
 142   unsigned int skip;            /* show_arch should skip this. */
 143 }
 144 arch_entry;
 145
 146 /* Used to turn off indicated flags.  */
 147 typedef struct
 148 {
 149   const char *name;             /* arch name */
 150   unsigned int len;             /* arch string length */
 151   i386_cpu_flags flags;         /* cpu feature flags */
 152 }
 153 noarch_entry;
 154
 155 static void update_code_flag (int, int);
 156 static void set_code_flag (int);
 157 static void set_16bit_gcc_code_flag (int);
 158 static void set_intel_syntax (int);
 159 static void set_intel_mnemonic (int);
 160 static void set_allow_index_reg (int);
 161 static void set_check (int);
 162 static void set_cpu_arch (int);
 163 #ifdef TE_PE
 164 static void pe_directive_secrel (int);
 165 #endif
 166 static void signed_cons (int);
 167 static char *output_invalid (int c);
 168 static int i386_finalize_immediate (segT, expressionS *, i386_operand_type,
 169                                     const char *);
 170 static int i386_finalize_displacement (segT, expressionS *, i386_operand_type,
 171                                        const char *);
 172 static int i386_att_operand (char *);
 173 static int i386_intel_operand (char *, int);
 174 static int i386_intel_simplify (expressionS *);
 175 static int i386_intel_parse_name (const char *, expressionS *);
 176 static const reg_entry *parse_register (char *, char **);
 177 static char *parse_insn (char *, char *);
 178 static char *parse_operands (char *, const char *);
 179 static void swap_operands (void);
 180 static void swap_2_operands (int, int);
 181 static enum flag_code i386_addressing_mode (void);
 182 static void optimize_imm (void);
 183 static void optimize_disp (void);
 184 static const insn_template *match_template (char);
 185 static int check_string (void);
 186 static int process_suffix (void);
 187 static int check_byte_reg (void);
 188 static int check_long_reg (void);
 189 static int check_qword_reg (void);
 190 static int check_word_reg (void);
 191 static int finalize_imm (void);
 192 static int process_operands (void);
 193 static const seg_entry *build_modrm_byte (void);
 194 static void output_insn (void);
 195 static void output_imm (fragS *, offsetT);
 196 static void output_disp (fragS *, offsetT);
 197 #ifndef I386COFF
 198 static void s_bss (int);
 199 #endif
 200 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 201 static void handle_large_common (int small ATTRIBUTE_UNUSED);
 202
 203 /* GNU_PROPERTY_X86_ISA_1_USED.  */
 204 static unsigned int x86_isa_1_used;
 205 /* GNU_PROPERTY_X86_FEATURE_2_USED.  */
 206 static unsigned int x86_feature_2_used;
 207 /* Generate x86 used ISA and feature properties.  */
 208 static unsigned int x86_used_note = DEFAULT_X86_USED_NOTE;
 209 #endif
 210
 211 static const char *default_arch = DEFAULT_ARCH;
 212
 213 /* parse_register() returns this when a register alias cannot be used.  */
 214 static const reg_entry bad_reg = { "<bad>", OPERAND_TYPE_NONE, 0, 0,
 215                                    { Dw2Inval, Dw2Inval } };
 216
 217 /* This struct describes rounding control and SAE in the instruction.  */
 218 struct RC_Operation
 219 {
 220   enum rc_type
 221     {
 222       rne = 0,
 223       rd,
 224       ru,
 225       rz,
 226       saeonly
 227     } type;
 228   int operand;
 229 };
 230
 231 static struct RC_Operation rc_op;
 232
 233 /* The struct describes masking, applied to OPERAND in the instruction.
 234    MASK is a pointer to the corresponding mask register.  ZEROING tells
 235    whether merging or zeroing mask is used.  */
 236 struct Mask_Operation
 237 {
 238   const reg_entry *mask;
 239   unsigned int zeroing;
 240   /* The operand where this operation is associated.  */
 241   int operand;
 242 };
 243
 244 static struct Mask_Operation mask_op;
 245
 246 /* The struct describes broadcasting, applied to OPERAND.  FACTOR is
 247    broadcast factor.  */
 248 struct Broadcast_Operation
 249 {
 250   /* Type of broadcast: {1to2}, {1to4}, {1to8}, or {1to16}.  */
 251   int type;
 252
 253   /* Index of broadcasted operand.  */
 254   int operand;
 255
 256   /* Number of bytes to broadcast.  */
 257   int bytes;
 258 };
 259
 260 static struct Broadcast_Operation broadcast_op;
 261
 262 /* VEX prefix.  */
 263 typedef struct
 264 {
 265   /* VEX prefix is either 2 byte or 3 byte.  EVEX is 4 byte.  */
 266   unsigned char bytes[4];
 267   unsigned int length;
 268   /* Destination or source register specifier.  */
 269   const reg_entry *register_specifier;
 270 } vex_prefix;
 271
 272 /* 'md_assemble ()' gathers together information and puts it into a
 273    i386_insn.  */
 274
 275 union i386_op
 276   {
 277     expressionS *disps;
 278     expressionS *imms;
 279     const reg_entry *regs;
 280   };
 281
 282 enum i386_error
 283   {
 284     operand_size_mismatch,
 285     operand_type_mismatch,
 286     register_type_mismatch,
 287     number_of_operands_mismatch,
 288     invalid_instruction_suffix,
 289     bad_imm4,
 290     unsupported_with_intel_mnemonic,
 291     unsupported_syntax,
 292     unsupported,
 293     invalid_sib_address,
 294     invalid_vsib_address,
 295     invalid_vector_register_set,
 296     invalid_tmm_register_set,
 297     unsupported_vector_index_register,
 298     unsupported_broadcast,
 299     broadcast_needed,
 300     unsupported_masking,
 301     mask_not_on_destination,
 302     no_default_mask,
 303     unsupported_rc_sae,
 304     rc_sae_operand_not_last_imm,
 305     invalid_register_operand,
 306   };
 307
 308 struct _i386_insn
 309   {
 310     /* TM holds the template for the insn were currently assembling.  */
 311     insn_template tm;
 312
 313     /* SUFFIX holds the instruction size suffix for byte, word, dword
 314        or qword, if given.  */
 315     char suffix;
 316
 317     /* OPERANDS gives the number of given operands.  */
 318     unsigned int operands;
 319
 320     /* REG_OPERANDS, DISP_OPERANDS, MEM_OPERANDS, IMM_OPERANDS give the number
 321        of given register, displacement, memory operands and immediate
 322        operands.  */
 323     unsigned int reg_operands, disp_operands, mem_operands, imm_operands;
 324
 325     /* TYPES [i] is the type (see above #defines) which tells us how to
 326        use OP[i] for the corresponding operand.  */
 327     i386_operand_type types[MAX_OPERANDS];
 328
 329     /* Displacement expression, immediate expression, or register for each
 330        operand.  */
 331     union i386_op op[MAX_OPERANDS];
 332
 333     /* Flags for operands.  */
 334     unsigned int flags[MAX_OPERANDS];
 335 #define Operand_PCrel 1
 336 #define Operand_Mem   2
 337
 338     /* Relocation type for operand */
 339     enum bfd_reloc_code_real reloc[MAX_OPERANDS];
 340
 341     /* BASE_REG, INDEX_REG, and LOG2_SCALE_FACTOR are used to encode
 342        the base index byte below.  */
 343     const reg_entry *base_reg;
 344     const reg_entry *index_reg;
 345     unsigned int log2_scale_factor;
 346
 347     /* SEG gives the seg_entries of this insn.  They are zero unless
 348        explicit segment overrides are given.  */
 349     const seg_entry *seg[2];
 350
 351     /* Copied first memory operand string, for re-checking.  */
 352     char *memop1_string;
 353
 354     /* PREFIX holds all the given prefix opcodes (usually null).
 355        PREFIXES is the number of prefix opcodes.  */
 356     unsigned int prefixes;
 357     unsigned char prefix[MAX_PREFIXES];
 358
 359     /* Register is in low 3 bits of opcode.  */
 360     bfd_boolean short_form;
 361
 362     /* The operand to a branch insn indicates an absolute branch.  */
 363     bfd_boolean jumpabsolute;
 364
 365     /* Extended states.  */
 366     enum
 367       {
 368         /* Use MMX state.  */
 369         xstate_mmx = 1 << 0,
 370         /* Use XMM state.  */
 371         xstate_xmm = 1 << 1,
 372         /* Use YMM state.  */
 373         xstate_ymm = 1 << 2 | xstate_xmm,
 374         /* Use ZMM state.  */
 375         xstate_zmm = 1 << 3 | xstate_ymm,
 376         /* Use TMM state.  */
 377         xstate_tmm = 1 << 4,
 378         /* Use MASK state.  */
 379         xstate_mask = 1 << 5
 380       } xstate;
 381
 382     /* Has GOTPC or TLS relocation.  */
 383     bfd_boolean has_gotpc_tls_reloc;
 384
 385     /* RM and SIB are the modrm byte and the sib byte where the
 386        addressing modes of this insn are encoded.  */
 387     modrm_byte rm;
 388     rex_byte rex;
 389     rex_byte vrex;
 390     sib_byte sib;
 391     vex_prefix vex;
 392
 393     /* Masking attributes.  */
 394     struct Mask_Operation *mask;
 395
 396     /* Rounding control and SAE attributes.  */
 397     struct RC_Operation *rounding;
 398
 399     /* Broadcasting attributes.  */
 400     struct Broadcast_Operation *broadcast;
 401
 402     /* Compressed disp8*N attribute.  */
 403     unsigned int memshift;
 404
 405     /* Prefer load or store in encoding.  */
 406     enum
 407       {
 408         dir_encoding_default = 0,
 409         dir_encoding_load,
 410         dir_encoding_store,
 411         dir_encoding_swap
 412       } dir_encoding;
 413
 414     /* Prefer 8bit, 16bit, 32bit displacement in encoding.  */
 415     enum
 416       {
 417         disp_encoding_default = 0,
 418         disp_encoding_8bit,
 419         disp_encoding_16bit,
 420         disp_encoding_32bit
 421       } disp_encoding;
 422
 423     /* Prefer the REX byte in encoding.  */
 424     bfd_boolean rex_encoding;
 425
 426     /* Disable instruction size optimization.  */
 427     bfd_boolean no_optimize;
 428
 429     /* How to encode vector instructions.  */
 430     enum
 431       {
 432         vex_encoding_default = 0,
 433         vex_encoding_vex,
 434         vex_encoding_vex3,
 435         vex_encoding_evex,
 436         vex_encoding_error
 437       } vec_encoding;
 438
 439     /* REP prefix.  */
 440     const char *rep_prefix;
 441
 442     /* HLE prefix.  */
 443     const char *hle_prefix;
 444
 445     /* Have BND prefix.  */
 446     const char *bnd_prefix;
 447
 448     /* Have NOTRACK prefix.  */
 449     const char *notrack_prefix;
 450
 451     /* Error message.  */
 452     enum i386_error error;
 453   };
 454
 455 typedef struct _i386_insn i386_insn;
 456
 457 /* Link RC type with corresponding string, that'll be looked for in
 458    asm.  */
 459 struct RC_name
 460 {
 461   enum rc_type type;
 462   const char *name;
 463   unsigned int len;
 464 };
 465
 466 static const struct RC_name RC_NamesTable[] =
 467 {
 468   {  rne, STRING_COMMA_LEN ("rn-sae") },
 469   {  rd,  STRING_COMMA_LEN ("rd-sae") },
 470   {  ru,  STRING_COMMA_LEN ("ru-sae") },
 471   {  rz,  STRING_COMMA_LEN ("rz-sae") },
 472   {  saeonly,  STRING_COMMA_LEN ("sae") },
 473 };
 474
 475 /* List of chars besides those in app.c:symbol_chars that can start an
 476    operand.  Used to prevent the scrubber eating vital white-space.  */
 477 const char extra_symbol_chars[] = "*%-([{}"
 478 #ifdef LEX_AT
 479         "@"
 480 #endif
 481 #ifdef LEX_QM
 482         "?"
 483 #endif
 484         ;
 485
 486 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF))     \
 487      && !defined (TE_GNU)                               \
 488      && !defined (TE_LINUX)                             \
 489      && !defined (TE_FreeBSD)                           \
 490      && !defined (TE_DragonFly)                         \
 491      && !defined (TE_NetBSD))
 492 /* This array holds the chars that always start a comment.  If the
 493    pre-processor is disabled, these aren't very useful.  The option
 494    --divide will remove '/' from this list.  */
 495 const char *i386_comment_chars = "#/";
 496 #define SVR4_COMMENT_CHARS 1
 497 #define PREFIX_SEPARATOR '\\'
 498
 499 #else
 500 const char *i386_comment_chars = "#";
 501 #define PREFIX_SEPARATOR '/'
 502 #endif
 503
 504 /* This array holds the chars that only start a comment at the beginning of
 505    a line.  If the line seems to have the form '# 123 filename'
 506    .line and .file directives will appear in the pre-processed output.
 507    Note that input_file.c hand checks for '#' at the beginning of the
 508    first line of the input file.  This is because the compiler outputs
 509    #NO_APP at the beginning of its output.
 510    Also note that comments started like this one will always work if
 511    '/' isn't otherwise defined.  */
 512 const char line_comment_chars[] = "#/";
 513
 514 const char line_separator_chars[] = ";";
 515
 516 /* Chars that can be used to separate mant from exp in floating point
 517    nums.  */
 518 const char EXP_CHARS[] = "eE";
 519
 520 /* Chars that mean this number is a floating point constant
 521    As in 0f12.456
 522    or    0d1.2345e12.  */
 523 const char FLT_CHARS[] = "fFdDxX";
 524
 525 /* Tables for lexical analysis.  */
 526 static char mnemonic_chars[256];
 527 static char register_chars[256];
 528 static char operand_chars[256];
 529 static char identifier_chars[256];
 530 static char digit_chars[256];
 531
 532 /* Lexical macros.  */
 533 #define is_mnemonic_char(x) (mnemonic_chars[(unsigned char) x])
 534 #define is_operand_char(x) (operand_chars[(unsigned char) x])
 535 #define is_register_char(x) (register_chars[(unsigned char) x])
 536 #define is_space_char(x) ((x) == ' ')
 537 #define is_identifier_char(x) (identifier_chars[(unsigned char) x])
 538 #define is_digit_char(x) (digit_chars[(unsigned char) x])
 539
 540 /* All non-digit non-letter characters that may occur in an operand.  */
 541 static char operand_special_chars[] = "%$-+(,)*._~/<>|&^!:[@]";
 542
 543 /* md_assemble() always leaves the strings it's passed unaltered.  To
 544    effect this we maintain a stack of saved characters that we've smashed
 545    with '\0's (indicating end of strings for various sub-fields of the
 546    assembler instruction).  */
 547 static char save_stack[32];
 548 static char *save_stack_p;
 549 #define END_STRING_AND_SAVE(s) \
 550         do { *save_stack_p++ = *(s); *(s) = '\0'; } while (0)
 551 #define RESTORE_END_STRING(s) \
 552         do { *(s) = *--save_stack_p; } while (0)
 553
 554 /* The instruction we're assembling.  */
 555 static i386_insn i;
 556
 557 /* Possible templates for current insn.  */
 558 static const templates *current_templates;
 559
 560 /* Per instruction expressionS buffers: max displacements & immediates.  */
 561 static expressionS disp_expressions[MAX_MEMORY_OPERANDS];
 562 static expressionS im_expressions[MAX_IMMEDIATE_OPERANDS];
 563
 564 /* Current operand we are working on.  */
 565 static int this_operand = -1;
 566
 567 /* We support four different modes.  FLAG_CODE variable is used to distinguish
 568    these.  */
 569
 570 enum flag_code {
 571         CODE_32BIT,
 572         CODE_16BIT,
 573         CODE_64BIT };
 574
 575 static enum flag_code flag_code;
 576 static unsigned int object_64bit;
 577 static unsigned int disallow_64bit_reloc;
 578 static int use_rela_relocations = 0;
 579 /* __tls_get_addr/___tls_get_addr symbol for TLS.  */
 580 static const char *tls_get_addr;
 581
 582 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
 583      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
 584      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
 585
 586 /* The ELF ABI to use.  */
 587 enum x86_elf_abi
 588 {
 589   I386_ABI,
 590   X86_64_ABI,
 591   X86_64_X32_ABI
 592 };
 593
 594 static enum x86_elf_abi x86_elf_abi = I386_ABI;
 595 #endif
 596
 597 #if defined (TE_PE) || defined (TE_PEP)
 598 /* Use big object file format.  */
 599 static int use_big_obj = 0;
 600 #endif
 601
 602 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 603 /* 1 if generating code for a shared library.  */
 604 static int shared = 0;
 605 #endif
 606
 607 /* 1 for intel syntax,
 608    0 if att syntax.  */
 609 static int intel_syntax = 0;
 610
 611 static enum x86_64_isa
 612 {
 613   amd64 = 1,    /* AMD64 ISA.  */
 614   intel64       /* Intel64 ISA.  */
 615 } isa64;
 616
 617 /* 1 for intel mnemonic,
 618    0 if att mnemonic.  */
 619 static int intel_mnemonic = !SYSV386_COMPAT;
 620
 621 /* 1 if pseudo registers are permitted.  */
 622 static int allow_pseudo_reg = 0;
 623
 624 /* 1 if register prefix % not required.  */
 625 static int allow_naked_reg = 0;
 626
 627 /* 1 if the assembler should add BND prefix for all control-transferring
 628    instructions supporting it, even if this prefix wasn't specified
 629    explicitly.  */
 630 static int add_bnd_prefix = 0;
 631
 632 /* 1 if pseudo index register, eiz/riz, is allowed .  */
 633 static int allow_index_reg = 0;
 634
 635 /* 1 if the assembler should ignore LOCK prefix, even if it was
 636    specified explicitly.  */
 637 static int omit_lock_prefix = 0;
 638
 639 /* 1 if the assembler should encode lfence, mfence, and sfence as
 640    "lock addl $0, (%{re}sp)".  */
 641 static int avoid_fence = 0;
 642
 643 /* 1 if lfence should be inserted after every load.  */
 644 static int lfence_after_load = 0;
 645
 646 /* Non-zero if lfence should be inserted before indirect branch.  */
 647 static enum lfence_before_indirect_branch_kind
 648   {
 649     lfence_branch_none = 0,
 650     lfence_branch_register,
 651     lfence_branch_memory,
 652     lfence_branch_all
 653   }
 654 lfence_before_indirect_branch;
 655
 656 /* Non-zero if lfence should be inserted before ret.  */
 657 static enum lfence_before_ret_kind
 658   {
 659     lfence_before_ret_none = 0,
 660     lfence_before_ret_not,
 661     lfence_before_ret_or,
 662     lfence_before_ret_shl
 663   }
 664 lfence_before_ret;
 665
 666 /* Types of previous instruction is .byte or prefix.  */
 667 static struct
 668   {
 669     segT seg;
 670     const char *file;
 671     const char *name;
 672     unsigned int line;
 673     enum last_insn_kind
 674       {
 675         last_insn_other = 0,
 676         last_insn_directive,
 677         last_insn_prefix
 678       } kind;
 679   } last_insn;
 680
 681 /* 1 if the assembler should generate relax relocations.  */
 682
 683 static int generate_relax_relocations
 684   = DEFAULT_GENERATE_X86_RELAX_RELOCATIONS;
 685
 686 static enum check_kind
 687   {
 688     check_none = 0,
 689     check_warning,
 690     check_error
 691   }
 692 sse_check, operand_check = check_warning;
 693
 694 /* Non-zero if branches should be aligned within power of 2 boundary.  */
 695 static int align_branch_power = 0;
 696
 697 /* Types of branches to align.  */
 698 enum align_branch_kind
 699   {
 700     align_branch_none = 0,
 701     align_branch_jcc = 1,
 702     align_branch_fused = 2,
 703     align_branch_jmp = 3,
 704     align_branch_call = 4,
 705     align_branch_indirect = 5,
 706     align_branch_ret = 6
 707   };
 708
 709 /* Type bits of branches to align.  */
 710 enum align_branch_bit
 711   {
 712     align_branch_jcc_bit = 1 << align_branch_jcc,
 713     align_branch_fused_bit = 1 << align_branch_fused,
 714     align_branch_jmp_bit = 1 << align_branch_jmp,
 715     align_branch_call_bit = 1 << align_branch_call,
 716     align_branch_indirect_bit = 1 << align_branch_indirect,
 717     align_branch_ret_bit = 1 << align_branch_ret
 718   };
 719
 720 static unsigned int align_branch = (align_branch_jcc_bit
 721                                     | align_branch_fused_bit
 722                                     | align_branch_jmp_bit);
 723
 724 /* Types of condition jump used by macro-fusion.  */
 725 enum mf_jcc_kind
 726   {
 727     mf_jcc_jo = 0,  /* base opcode 0x70  */
 728     mf_jcc_jc,      /* base opcode 0x72  */
 729     mf_jcc_je,      /* base opcode 0x74  */
 730     mf_jcc_jna,     /* base opcode 0x76  */
 731     mf_jcc_js,      /* base opcode 0x78  */
 732     mf_jcc_jp,      /* base opcode 0x7a  */
 733     mf_jcc_jl,      /* base opcode 0x7c  */
 734     mf_jcc_jle,     /* base opcode 0x7e  */
 735   };
 736
 737 /* Types of compare flag-modifying insntructions used by macro-fusion.  */
 738 enum mf_cmp_kind
 739   {
 740     mf_cmp_test_and,  /* test/cmp */
 741     mf_cmp_alu_cmp,  /* add/sub/cmp */
 742     mf_cmp_incdec  /* inc/dec */
 743   };
 744
 745 /* The maximum padding size for fused jcc.  CMP like instruction can
 746    be 9 bytes and jcc can be 6 bytes.  Leave room just in case for
 747    prefixes.   */
 748 #define MAX_FUSED_JCC_PADDING_SIZE 20
 749
 750 /* The maximum number of prefixes added for an instruction.  */
 751 static unsigned int align_branch_prefix_size = 5;
 752
 753 /* Optimization:
 754    1. Clear the REX_W bit with register operand if possible.
 755    2. Above plus use 128bit vector instruction to clear the full vector
 756       register.
 757  */
 758 static int optimize = 0;
 759
 760 /* Optimization:
 761    1. Clear the REX_W bit with register operand if possible.
 762    2. Above plus use 128bit vector instruction to clear the full vector
 763       register.
 764    3. Above plus optimize "test{q,l,w} $imm8,%r{64,32,16}" to
 765       "testb $imm7,%r8".
 766  */
 767 static int optimize_for_space = 0;
 768
 769 /* Register prefix used for error message.  */
 770 static const char *register_prefix = "%";
 771
 772 /* Used in 16 bit gcc mode to add an l suffix to call, ret, enter,
 773    leave, push, and pop instructions so that gcc has the same stack
 774    frame as in 32 bit mode.  */
 775 static char stackop_size = '\0';
 776
 777 /* Non-zero to optimize code alignment.  */
 778 int optimize_align_code = 1;
 779
 780 /* Non-zero to quieten some warnings.  */
 781 static int quiet_warnings = 0;
 782
 783 /* CPU name.  */
 784 static const char *cpu_arch_name = NULL;
 785 static char *cpu_sub_arch_name = NULL;
 786
 787 /* CPU feature flags.  */
 788 static i386_cpu_flags cpu_arch_flags = CPU_UNKNOWN_FLAGS;
 789
 790 /* If we have selected a cpu we are generating instructions for.  */
 791 static int cpu_arch_tune_set = 0;
 792
 793 /* Cpu we are generating instructions for.  */
 794 enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
 795
 796 /* CPU feature flags of cpu we are generating instructions for.  */
 797 static i386_cpu_flags cpu_arch_tune_flags;
 798
 799 /* CPU instruction set architecture used.  */
 800 enum processor_type cpu_arch_isa = PROCESSOR_UNKNOWN;
 801
 802 /* CPU feature flags of instruction set architecture used.  */
 803 i386_cpu_flags cpu_arch_isa_flags;
 804
 805 /* If set, conditional jumps are not automatically promoted to handle
 806    larger than a byte offset.  */
 807 static unsigned int no_cond_jump_promotion = 0;
 808
 809 /* Encode SSE instructions with VEX prefix.  */
 810 static unsigned int sse2avx;
 811
 812 /* Encode scalar AVX instructions with specific vector length.  */
 813 static enum
 814   {
 815     vex128 = 0,
 816     vex256
 817   } avxscalar;
 818
 819 /* Encode VEX WIG instructions with specific vex.w.  */
 820 static enum
 821   {
 822     vexw0 = 0,
 823     vexw1
 824   } vexwig;
 825
 826 /* Encode scalar EVEX LIG instructions with specific vector length.  */
 827 static enum
 828   {
 829     evexl128 = 0,
 830     evexl256,
 831     evexl512
 832   } evexlig;
 833
 834 /* Encode EVEX WIG instructions with specific evex.w.  */
 835 static enum
 836   {
 837     evexw0 = 0,
 838     evexw1
 839   } evexwig;
 840
 841 /* Value to encode in EVEX RC bits, for SAE-only instructions.  */
 842 static enum rc_type evexrcig = rne;
 843
 844 /* Pre-defined "_GLOBAL_OFFSET_TABLE_".  */
 845 static symbolS *GOT_symbol;
 846
 847 /* The dwarf2 return column, adjusted for 32 or 64 bit.  */
 848 unsigned int x86_dwarf2_return_column;
 849
 850 /* The dwarf2 data alignment, adjusted for 32 or 64 bit.  */
 851 int x86_cie_data_alignment;
 852
 853 /* Interface to relax_segment.
 854    There are 3 major relax states for 386 jump insns because the
 855    different types of jumps add different sizes to frags when we're
 856    figuring out what sort of jump to choose to reach a given label.
 857
 858    BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING are used to align
 859    branches which are handled by md_estimate_size_before_relax() and
 860    i386_generic_table_relax_frag().  */
 861
 862 /* Types.  */
 863 #define UNCOND_JUMP 0
 864 #define COND_JUMP 1
 865 #define COND_JUMP86 2
 866 #define BRANCH_PADDING 3
 867 #define BRANCH_PREFIX 4
 868 #define FUSED_JCC_PADDING 5
 869
 870 /* Sizes.  */
 871 #define CODE16  1
 872 #define SMALL   0
 873 #define SMALL16 (SMALL | CODE16)
 874 #define BIG     2
 875 #define BIG16   (BIG | CODE16)
 876
 877 #ifndef INLINE
 878 #ifdef __GNUC__
 879 #define INLINE __inline__
 880 #else
 881 #define INLINE
 882 #endif
 883 #endif
 884
 885 #define ENCODE_RELAX_STATE(type, size) \
 886   ((relax_substateT) (((type) << 2) | (size)))
 887 #define TYPE_FROM_RELAX_STATE(s) \
 888   ((s) >> 2)
 889 #define DISP_SIZE_FROM_RELAX_STATE(s) \
 890     ((((s) & 3) == BIG ? 4 : (((s) & 3) == BIG16 ? 2 : 1)))
 891
 892 /* This table is used by relax_frag to promote short jumps to long
 893    ones where necessary.  SMALL (short) jumps may be promoted to BIG
 894    (32 bit long) ones, and SMALL16 jumps to BIG16 (16 bit long).  We
 895    don't allow a short jump in a 32 bit code segment to be promoted to
 896    a 16 bit offset jump because it's slower (requires data size
 897    prefix), and doesn't work, unless the destination is in the bottom
 898    64k of the code segment (The top 16 bits of eip are zeroed).  */
 899
 900 const relax_typeS md_relax_table[] =
 901 {
 902   /* The fields are:
 903      1) most positive reach of this state,
 904      2) most negative reach of this state,
 905      3) how many bytes this mode will have in the variable part of the frag
 906      4) which index into the table to try if we can't fit into this one.  */
 907
 908   /* UNCOND_JUMP states.  */
 909   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG)},
 910   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16)},
 911   /* dword jmp adds 4 bytes to frag:
 912      0 extra opcode bytes, 4 displacement bytes.  */
 913   {0, 0, 4, 0},
 914   /* word jmp adds 2 byte2 to frag:
 915      0 extra opcode bytes, 2 displacement bytes.  */
 916   {0, 0, 2, 0},
 917
 918   /* COND_JUMP states.  */
 919   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG)},
 920   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG16)},
 921   /* dword conditionals adds 5 bytes to frag:
 922      1 extra opcode byte, 4 displacement bytes.  */
 923   {0, 0, 5, 0},
 924   /* word conditionals add 3 bytes to frag:
 925      1 extra opcode byte, 2 displacement bytes.  */
 926   {0, 0, 3, 0},
 927
 928   /* COND_JUMP86 states.  */
 929   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG)},
 930   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG16)},
 931   /* dword conditionals adds 5 bytes to frag:
 932      1 extra opcode byte, 4 displacement bytes.  */
 933   {0, 0, 5, 0},
 934   /* word conditionals add 4 bytes to frag:
 935      1 displacement byte and a 3 byte long branch insn.  */
 936   {0, 0, 4, 0}
 937 };
 938
 939 static const arch_entry cpu_arch[] =
 940 {
 941   /* Do not replace the first two entries - i386_target_format()
 942      relies on them being there in this order.  */
 943   { STRING_COMMA_LEN ("generic32"), PROCESSOR_GENERIC32,
 944     CPU_GENERIC32_FLAGS, 0 },
 945   { STRING_COMMA_LEN ("generic64"), PROCESSOR_GENERIC64,
 946     CPU_GENERIC64_FLAGS, 0 },
 947   { STRING_COMMA_LEN ("i8086"), PROCESSOR_UNKNOWN,
 948     CPU_NONE_FLAGS, 0 },
 949   { STRING_COMMA_LEN ("i186"), PROCESSOR_UNKNOWN,
 950     CPU_I186_FLAGS, 0 },
 951   { STRING_COMMA_LEN ("i286"), PROCESSOR_UNKNOWN,
 952     CPU_I286_FLAGS, 0 },
 953   { STRING_COMMA_LEN ("i386"), PROCESSOR_I386,
 954     CPU_I386_FLAGS, 0 },
 955   { STRING_COMMA_LEN ("i486"), PROCESSOR_I486,
 956     CPU_I486_FLAGS, 0 },
 957   { STRING_COMMA_LEN ("i586"), PROCESSOR_PENTIUM,
 958     CPU_I586_FLAGS, 0 },
 959   { STRING_COMMA_LEN ("i686"), PROCESSOR_PENTIUMPRO,
 960     CPU_I686_FLAGS, 0 },
 961   { STRING_COMMA_LEN ("pentium"), PROCESSOR_PENTIUM,
 962     CPU_I586_FLAGS, 0 },
 963   { STRING_COMMA_LEN ("pentiumpro"), PROCESSOR_PENTIUMPRO,
 964     CPU_PENTIUMPRO_FLAGS, 0 },
 965   { STRING_COMMA_LEN ("pentiumii"), PROCESSOR_PENTIUMPRO,
 966     CPU_P2_FLAGS, 0 },
 967   { STRING_COMMA_LEN ("pentiumiii"),PROCESSOR_PENTIUMPRO,
 968     CPU_P3_FLAGS, 0 },
 969   { STRING_COMMA_LEN ("pentium4"), PROCESSOR_PENTIUM4,
 970     CPU_P4_FLAGS, 0 },
 971   { STRING_COMMA_LEN ("prescott"), PROCESSOR_NOCONA,
 972     CPU_CORE_FLAGS, 0 },
 973   { STRING_COMMA_LEN ("nocona"), PROCESSOR_NOCONA,
 974     CPU_NOCONA_FLAGS, 0 },
 975   { STRING_COMMA_LEN ("yonah"), PROCESSOR_CORE,
 976     CPU_CORE_FLAGS, 1 },
 977   { STRING_COMMA_LEN ("core"), PROCESSOR_CORE,
 978     CPU_CORE_FLAGS, 0 },
 979   { STRING_COMMA_LEN ("merom"), PROCESSOR_CORE2,
 980     CPU_CORE2_FLAGS, 1 },
 981   { STRING_COMMA_LEN ("core2"), PROCESSOR_CORE2,
 982     CPU_CORE2_FLAGS, 0 },
 983   { STRING_COMMA_LEN ("corei7"), PROCESSOR_COREI7,
 984     CPU_COREI7_FLAGS, 0 },
 985   { STRING_COMMA_LEN ("l1om"), PROCESSOR_L1OM,
 986     CPU_L1OM_FLAGS, 0 },
 987   { STRING_COMMA_LEN ("k1om"), PROCESSOR_K1OM,
 988     CPU_K1OM_FLAGS, 0 },
 989   { STRING_COMMA_LEN ("iamcu"), PROCESSOR_IAMCU,
 990     CPU_IAMCU_FLAGS, 0 },
 991   { STRING_COMMA_LEN ("k6"), PROCESSOR_K6,
 992     CPU_K6_FLAGS, 0 },
 993   { STRING_COMMA_LEN ("k6_2"), PROCESSOR_K6,
 994     CPU_K6_2_FLAGS, 0 },
 995   { STRING_COMMA_LEN ("athlon"), PROCESSOR_ATHLON,
 996     CPU_ATHLON_FLAGS, 0 },
 997   { STRING_COMMA_LEN ("sledgehammer"), PROCESSOR_K8,
 998     CPU_K8_FLAGS, 1 },
 999   { STRING_COMMA_LEN ("opteron"), PROCESSOR_K8,
1000     CPU_K8_FLAGS, 0 },
1001   { STRING_COMMA_LEN ("k8"), PROCESSOR_K8,
1002     CPU_K8_FLAGS, 0 },
1003   { STRING_COMMA_LEN ("amdfam10"), PROCESSOR_AMDFAM10,
1004     CPU_AMDFAM10_FLAGS, 0 },
1005   { STRING_COMMA_LEN ("bdver1"), PROCESSOR_BD,
1006     CPU_BDVER1_FLAGS, 0 },
1007   { STRING_COMMA_LEN ("bdver2"), PROCESSOR_BD,
1008     CPU_BDVER2_FLAGS, 0 },
1009   { STRING_COMMA_LEN ("bdver3"), PROCESSOR_BD,
1010     CPU_BDVER3_FLAGS, 0 },
1011   { STRING_COMMA_LEN ("bdver4"), PROCESSOR_BD,
1012     CPU_BDVER4_FLAGS, 0 },
1013   { STRING_COMMA_LEN ("znver1"), PROCESSOR_ZNVER,
1014     CPU_ZNVER1_FLAGS, 0 },
1015   { STRING_COMMA_LEN ("znver2"), PROCESSOR_ZNVER,
1016     CPU_ZNVER2_FLAGS, 0 },
1017   { STRING_COMMA_LEN ("btver1"), PROCESSOR_BT,
1018     CPU_BTVER1_FLAGS, 0 },
1019   { STRING_COMMA_LEN ("btver2"), PROCESSOR_BT,
1020     CPU_BTVER2_FLAGS, 0 },
1021   { STRING_COMMA_LEN (".8087"), PROCESSOR_UNKNOWN,
1022     CPU_8087_FLAGS, 0 },
1023   { STRING_COMMA_LEN (".287"), PROCESSOR_UNKNOWN,
1024     CPU_287_FLAGS, 0 },
1025   { STRING_COMMA_LEN (".387"), PROCESSOR_UNKNOWN,
1026     CPU_387_FLAGS, 0 },
1027   { STRING_COMMA_LEN (".687"), PROCESSOR_UNKNOWN,
1028     CPU_687_FLAGS, 0 },
1029   { STRING_COMMA_LEN (".cmov"), PROCESSOR_UNKNOWN,
1030     CPU_CMOV_FLAGS, 0 },
1031   { STRING_COMMA_LEN (".fxsr"), PROCESSOR_UNKNOWN,
1032     CPU_FXSR_FLAGS, 0 },
1033   { STRING_COMMA_LEN (".mmx"), PROCESSOR_UNKNOWN,
1034     CPU_MMX_FLAGS, 0 },
1035   { STRING_COMMA_LEN (".sse"), PROCESSOR_UNKNOWN,
1036     CPU_SSE_FLAGS, 0 },
1037   { STRING_COMMA_LEN (".sse2"), PROCESSOR_UNKNOWN,
1038     CPU_SSE2_FLAGS, 0 },
1039   { STRING_COMMA_LEN (".sse3"), PROCESSOR_UNKNOWN,
1040     CPU_SSE3_FLAGS, 0 },
1041   { STRING_COMMA_LEN (".sse4a"), PROCESSOR_UNKNOWN,
1042     CPU_SSE4A_FLAGS, 0 },
1043   { STRING_COMMA_LEN (".ssse3"), PROCESSOR_UNKNOWN,
1044     CPU_SSSE3_FLAGS, 0 },
1045   { STRING_COMMA_LEN (".sse4.1"), PROCESSOR_UNKNOWN,
1046     CPU_SSE4_1_FLAGS, 0 },
1047   { STRING_COMMA_LEN (".sse4.2"), PROCESSOR_UNKNOWN,
1048     CPU_SSE4_2_FLAGS, 0 },
1049   { STRING_COMMA_LEN (".sse4"), PROCESSOR_UNKNOWN,
1050     CPU_SSE4_2_FLAGS, 0 },
1051   { STRING_COMMA_LEN (".avx"), PROCESSOR_UNKNOWN,
1052     CPU_AVX_FLAGS, 0 },
1053   { STRING_COMMA_LEN (".avx2"), PROCESSOR_UNKNOWN,
1054     CPU_AVX2_FLAGS, 0 },
1055   { STRING_COMMA_LEN (".avx512f"), PROCESSOR_UNKNOWN,
1056     CPU_AVX512F_FLAGS, 0 },
1057   { STRING_COMMA_LEN (".avx512cd"), PROCESSOR_UNKNOWN,
1058     CPU_AVX512CD_FLAGS, 0 },
1059   { STRING_COMMA_LEN (".avx512er"), PROCESSOR_UNKNOWN,
1060     CPU_AVX512ER_FLAGS, 0 },
1061   { STRING_COMMA_LEN (".avx512pf"), PROCESSOR_UNKNOWN,
1062     CPU_AVX512PF_FLAGS, 0 },
1063   { STRING_COMMA_LEN (".avx512dq"), PROCESSOR_UNKNOWN,
1064     CPU_AVX512DQ_FLAGS, 0 },
1065   { STRING_COMMA_LEN (".avx512bw"), PROCESSOR_UNKNOWN,
1066     CPU_AVX512BW_FLAGS, 0 },
1067   { STRING_COMMA_LEN (".avx512vl"), PROCESSOR_UNKNOWN,
1068     CPU_AVX512VL_FLAGS, 0 },
1069   { STRING_COMMA_LEN (".vmx"), PROCESSOR_UNKNOWN,
1070     CPU_VMX_FLAGS, 0 },
1071   { STRING_COMMA_LEN (".vmfunc"), PROCESSOR_UNKNOWN,
1072     CPU_VMFUNC_FLAGS, 0 },
1073   { STRING_COMMA_LEN (".smx"), PROCESSOR_UNKNOWN,
1074     CPU_SMX_FLAGS, 0 },
1075   { STRING_COMMA_LEN (".xsave"), PROCESSOR_UNKNOWN,
1076     CPU_XSAVE_FLAGS, 0 },
1077   { STRING_COMMA_LEN (".xsaveopt"), PROCESSOR_UNKNOWN,
1078     CPU_XSAVEOPT_FLAGS, 0 },
1079   { STRING_COMMA_LEN (".xsavec"), PROCESSOR_UNKNOWN,
1080     CPU_XSAVEC_FLAGS, 0 },
1081   { STRING_COMMA_LEN (".xsaves"), PROCESSOR_UNKNOWN,
1082     CPU_XSAVES_FLAGS, 0 },
1083   { STRING_COMMA_LEN (".aes"), PROCESSOR_UNKNOWN,
1084     CPU_AES_FLAGS, 0 },
1085   { STRING_COMMA_LEN (".pclmul"), PROCESSOR_UNKNOWN,
1086     CPU_PCLMUL_FLAGS, 0 },
1087   { STRING_COMMA_LEN (".clmul"), PROCESSOR_UNKNOWN,
1088     CPU_PCLMUL_FLAGS, 1 },
1089   { STRING_COMMA_LEN (".fsgsbase"), PROCESSOR_UNKNOWN,
1090     CPU_FSGSBASE_FLAGS, 0 },
1091   { STRING_COMMA_LEN (".rdrnd"), PROCESSOR_UNKNOWN,
1092     CPU_RDRND_FLAGS, 0 },
1093   { STRING_COMMA_LEN (".f16c"), PROCESSOR_UNKNOWN,
1094     CPU_F16C_FLAGS, 0 },
1095   { STRING_COMMA_LEN (".bmi2"), PROCESSOR_UNKNOWN,
1096     CPU_BMI2_FLAGS, 0 },
1097   { STRING_COMMA_LEN (".fma"), PROCESSOR_UNKNOWN,
1098     CPU_FMA_FLAGS, 0 },
1099   { STRING_COMMA_LEN (".fma4"), PROCESSOR_UNKNOWN,
1100     CPU_FMA4_FLAGS, 0 },
1101   { STRING_COMMA_LEN (".xop"), PROCESSOR_UNKNOWN,
1102     CPU_XOP_FLAGS, 0 },
1103   { STRING_COMMA_LEN (".lwp"), PROCESSOR_UNKNOWN,
1104     CPU_LWP_FLAGS, 0 },
1105   { STRING_COMMA_LEN (".movbe"), PROCESSOR_UNKNOWN,
1106     CPU_MOVBE_FLAGS, 0 },
1107   { STRING_COMMA_LEN (".cx16"), PROCESSOR_UNKNOWN,
1108     CPU_CX16_FLAGS, 0 },
1109   { STRING_COMMA_LEN (".ept"), PROCESSOR_UNKNOWN,
1110     CPU_EPT_FLAGS, 0 },
1111   { STRING_COMMA_LEN (".lzcnt"), PROCESSOR_UNKNOWN,
1112     CPU_LZCNT_FLAGS, 0 },
1113   { STRING_COMMA_LEN (".popcnt"), PROCESSOR_UNKNOWN,
1114     CPU_POPCNT_FLAGS, 0 },
1115   { STRING_COMMA_LEN (".hle"), PROCESSOR_UNKNOWN,
1116     CPU_HLE_FLAGS, 0 },
1117   { STRING_COMMA_LEN (".rtm"), PROCESSOR_UNKNOWN,
1118     CPU_RTM_FLAGS, 0 },
1119   { STRING_COMMA_LEN (".invpcid"), PROCESSOR_UNKNOWN,
1120     CPU_INVPCID_FLAGS, 0 },
1121   { STRING_COMMA_LEN (".clflush"), PROCESSOR_UNKNOWN,
1122     CPU_CLFLUSH_FLAGS, 0 },
1123   { STRING_COMMA_LEN (".nop"), PROCESSOR_UNKNOWN,
1124     CPU_NOP_FLAGS, 0 },
1125   { STRING_COMMA_LEN (".syscall"), PROCESSOR_UNKNOWN,
1126     CPU_SYSCALL_FLAGS, 0 },
1127   { STRING_COMMA_LEN (".rdtscp"), PROCESSOR_UNKNOWN,
1128     CPU_RDTSCP_FLAGS, 0 },
1129   { STRING_COMMA_LEN (".3dnow"), PROCESSOR_UNKNOWN,
1130     CPU_3DNOW_FLAGS, 0 },
1131   { STRING_COMMA_LEN (".3dnowa"), PROCESSOR_UNKNOWN,
1132     CPU_3DNOWA_FLAGS, 0 },
1133   { STRING_COMMA_LEN (".padlock"), PROCESSOR_UNKNOWN,
1134     CPU_PADLOCK_FLAGS, 0 },
1135   { STRING_COMMA_LEN (".pacifica"), PROCESSOR_UNKNOWN,
1136     CPU_SVME_FLAGS, 1 },
1137   { STRING_COMMA_LEN (".svme"), PROCESSOR_UNKNOWN,
1138     CPU_SVME_FLAGS, 0 },
1139   { STRING_COMMA_LEN (".sse4a"), PROCESSOR_UNKNOWN,
1140     CPU_SSE4A_FLAGS, 0 },
1141   { STRING_COMMA_LEN (".abm"), PROCESSOR_UNKNOWN,
1142     CPU_ABM_FLAGS, 0 },
1143   { STRING_COMMA_LEN (".bmi"), PROCESSOR_UNKNOWN,
1144     CPU_BMI_FLAGS, 0 },
1145   { STRING_COMMA_LEN (".tbm"), PROCESSOR_UNKNOWN,
1146     CPU_TBM_FLAGS, 0 },
1147   { STRING_COMMA_LEN (".adx"), PROCESSOR_UNKNOWN,
1148     CPU_ADX_FLAGS, 0 },
1149   { STRING_COMMA_LEN (".rdseed"), PROCESSOR_UNKNOWN,
1150     CPU_RDSEED_FLAGS, 0 },
1151   { STRING_COMMA_LEN (".prfchw"), PROCESSOR_UNKNOWN,
1152     CPU_PRFCHW_FLAGS, 0 },
1153   { STRING_COMMA_LEN (".smap"), PROCESSOR_UNKNOWN,
1154     CPU_SMAP_FLAGS, 0 },
1155   { STRING_COMMA_LEN (".mpx"), PROCESSOR_UNKNOWN,
1156     CPU_MPX_FLAGS, 0 },
1157   { STRING_COMMA_LEN (".sha"), PROCESSOR_UNKNOWN,
1158     CPU_SHA_FLAGS, 0 },
1159   { STRING_COMMA_LEN (".clflushopt"), PROCESSOR_UNKNOWN,
1160     CPU_CLFLUSHOPT_FLAGS, 0 },
1161   { STRING_COMMA_LEN (".prefetchwt1"), PROCESSOR_UNKNOWN,
1162     CPU_PREFETCHWT1_FLAGS, 0 },
1163   { STRING_COMMA_LEN (".se1"), PROCESSOR_UNKNOWN,
1164     CPU_SE1_FLAGS, 0 },
1165   { STRING_COMMA_LEN (".clwb"), PROCESSOR_UNKNOWN,
1166     CPU_CLWB_FLAGS, 0 },
1167   { STRING_COMMA_LEN (".avx512ifma"), PROCESSOR_UNKNOWN,
1168     CPU_AVX512IFMA_FLAGS, 0 },
1169   { STRING_COMMA_LEN (".avx512vbmi"), PROCESSOR_UNKNOWN,
1170     CPU_AVX512VBMI_FLAGS, 0 },
1171   { STRING_COMMA_LEN (".avx512_4fmaps"), PROCESSOR_UNKNOWN,
1172     CPU_AVX512_4FMAPS_FLAGS, 0 },
1173   { STRING_COMMA_LEN (".avx512_4vnniw"), PROCESSOR_UNKNOWN,
1174     CPU_AVX512_4VNNIW_FLAGS, 0 },
1175   { STRING_COMMA_LEN (".avx512_vpopcntdq"), PROCESSOR_UNKNOWN,
1176     CPU_AVX512_VPOPCNTDQ_FLAGS, 0 },
1177   { STRING_COMMA_LEN (".avx512_vbmi2"), PROCESSOR_UNKNOWN,
1178     CPU_AVX512_VBMI2_FLAGS, 0 },
1179   { STRING_COMMA_LEN (".avx512_vnni"), PROCESSOR_UNKNOWN,
1180     CPU_AVX512_VNNI_FLAGS, 0 },
1181   { STRING_COMMA_LEN (".avx512_bitalg"), PROCESSOR_UNKNOWN,
1182     CPU_AVX512_BITALG_FLAGS, 0 },
1183   { STRING_COMMA_LEN (".clzero"), PROCESSOR_UNKNOWN,
1184     CPU_CLZERO_FLAGS, 0 },
1185   { STRING_COMMA_LEN (".mwaitx"), PROCESSOR_UNKNOWN,
1186     CPU_MWAITX_FLAGS, 0 },
1187   { STRING_COMMA_LEN (".ospke"), PROCESSOR_UNKNOWN,
1188     CPU_OSPKE_FLAGS, 0 },
1189   { STRING_COMMA_LEN (".rdpid"), PROCESSOR_UNKNOWN,
1190     CPU_RDPID_FLAGS, 0 },
1191   { STRING_COMMA_LEN (".ptwrite"), PROCESSOR_UNKNOWN,
1192     CPU_PTWRITE_FLAGS, 0 },
1193   { STRING_COMMA_LEN (".ibt"), PROCESSOR_UNKNOWN,
1194     CPU_IBT_FLAGS, 0 },
1195   { STRING_COMMA_LEN (".shstk"), PROCESSOR_UNKNOWN,
1196     CPU_SHSTK_FLAGS, 0 },
1197   { STRING_COMMA_LEN (".gfni"), PROCESSOR_UNKNOWN,
1198     CPU_GFNI_FLAGS, 0 },
1199   { STRING_COMMA_LEN (".vaes"), PROCESSOR_UNKNOWN,
1200     CPU_VAES_FLAGS, 0 },
1201   { STRING_COMMA_LEN (".vpclmulqdq"), PROCESSOR_UNKNOWN,
1202     CPU_VPCLMULQDQ_FLAGS, 0 },
1203   { STRING_COMMA_LEN (".wbnoinvd"), PROCESSOR_UNKNOWN,
1204     CPU_WBNOINVD_FLAGS, 0 },
1205   { STRING_COMMA_LEN (".pconfig"), PROCESSOR_UNKNOWN,
1206     CPU_PCONFIG_FLAGS, 0 },
1207   { STRING_COMMA_LEN (".waitpkg"), PROCESSOR_UNKNOWN,
1208     CPU_WAITPKG_FLAGS, 0 },
1209   { STRING_COMMA_LEN (".cldemote"), PROCESSOR_UNKNOWN,
1210     CPU_CLDEMOTE_FLAGS, 0 },
1211   { STRING_COMMA_LEN (".amx_int8"), PROCESSOR_UNKNOWN,
1212     CPU_AMX_INT8_FLAGS, 0 },
1213   { STRING_COMMA_LEN (".amx_bf16"), PROCESSOR_UNKNOWN,
1214     CPU_AMX_BF16_FLAGS, 0 },
1215   { STRING_COMMA_LEN (".amx_tile"), PROCESSOR_UNKNOWN,
1216     CPU_AMX_TILE_FLAGS, 0 },
1217   { STRING_COMMA_LEN (".movdiri"), PROCESSOR_UNKNOWN,
1218     CPU_MOVDIRI_FLAGS, 0 },
1219   { STRING_COMMA_LEN (".movdir64b"), PROCESSOR_UNKNOWN,
1220     CPU_MOVDIR64B_FLAGS, 0 },
1221   { STRING_COMMA_LEN (".avx512_bf16"), PROCESSOR_UNKNOWN,
1222     CPU_AVX512_BF16_FLAGS, 0 },
1223   { STRING_COMMA_LEN (".avx512_vp2intersect"), PROCESSOR_UNKNOWN,
1224     CPU_AVX512_VP2INTERSECT_FLAGS, 0 },
1225   { STRING_COMMA_LEN (".tdx"), PROCESSOR_UNKNOWN,
1226     CPU_TDX_FLAGS, 0 },
1227   { STRING_COMMA_LEN (".enqcmd"), PROCESSOR_UNKNOWN,
1228     CPU_ENQCMD_FLAGS, 0 },
1229   { STRING_COMMA_LEN (".serialize"), PROCESSOR_UNKNOWN,
1230     CPU_SERIALIZE_FLAGS, 0 },
1231   { STRING_COMMA_LEN (".rdpru"), PROCESSOR_UNKNOWN,
1232     CPU_RDPRU_FLAGS, 0 },
1233   { STRING_COMMA_LEN (".mcommit"), PROCESSOR_UNKNOWN,
1234     CPU_MCOMMIT_FLAGS, 0 },
1235   { STRING_COMMA_LEN (".sev_es"), PROCESSOR_UNKNOWN,
1236     CPU_SEV_ES_FLAGS, 0 },
1237   { STRING_COMMA_LEN (".tsxldtrk"), PROCESSOR_UNKNOWN,
1238     CPU_TSXLDTRK_FLAGS, 0 },
1239   { STRING_COMMA_LEN (".kl"), PROCESSOR_UNKNOWN,
1240     CPU_KL_FLAGS, 0 },
1241   { STRING_COMMA_LEN (".widekl"), PROCESSOR_UNKNOWN,
1242     CPU_WIDEKL_FLAGS, 0 },
1243 };
1244
1245 static const noarch_entry cpu_noarch[] =
1246 {
1247   { STRING_COMMA_LEN ("no87"),  CPU_ANY_X87_FLAGS },
1248   { STRING_COMMA_LEN ("no287"),  CPU_ANY_287_FLAGS },
1249   { STRING_COMMA_LEN ("no387"),  CPU_ANY_387_FLAGS },
1250   { STRING_COMMA_LEN ("no687"),  CPU_ANY_687_FLAGS },
1251   { STRING_COMMA_LEN ("nocmov"),  CPU_ANY_CMOV_FLAGS },
1252   { STRING_COMMA_LEN ("nofxsr"),  CPU_ANY_FXSR_FLAGS },
1253   { STRING_COMMA_LEN ("nommx"),  CPU_ANY_MMX_FLAGS },
1254   { STRING_COMMA_LEN ("nosse"),  CPU_ANY_SSE_FLAGS },
1255   { STRING_COMMA_LEN ("nosse2"),  CPU_ANY_SSE2_FLAGS },
1256   { STRING_COMMA_LEN ("nosse3"),  CPU_ANY_SSE3_FLAGS },
1257   { STRING_COMMA_LEN ("nosse4a"),  CPU_ANY_SSE4A_FLAGS },
1258   { STRING_COMMA_LEN ("nossse3"),  CPU_ANY_SSSE3_FLAGS },
1259   { STRING_COMMA_LEN ("nosse4.1"),  CPU_ANY_SSE4_1_FLAGS },
1260   { STRING_COMMA_LEN ("nosse4.2"),  CPU_ANY_SSE4_2_FLAGS },
1261   { STRING_COMMA_LEN ("nosse4"),  CPU_ANY_SSE4_1_FLAGS },
1262   { STRING_COMMA_LEN ("noavx"),  CPU_ANY_AVX_FLAGS },
1263   { STRING_COMMA_LEN ("noavx2"),  CPU_ANY_AVX2_FLAGS },
1264   { STRING_COMMA_LEN ("noavx512f"), CPU_ANY_AVX512F_FLAGS },
1265   { STRING_COMMA_LEN ("noavx512cd"), CPU_ANY_AVX512CD_FLAGS },
1266   { STRING_COMMA_LEN ("noavx512er"), CPU_ANY_AVX512ER_FLAGS },
1267   { STRING_COMMA_LEN ("noavx512pf"), CPU_ANY_AVX512PF_FLAGS },
1268   { STRING_COMMA_LEN ("noavx512dq"), CPU_ANY_AVX512DQ_FLAGS },
1269   { STRING_COMMA_LEN ("noavx512bw"), CPU_ANY_AVX512BW_FLAGS },
1270   { STRING_COMMA_LEN ("noavx512vl"), CPU_ANY_AVX512VL_FLAGS },
1271   { STRING_COMMA_LEN ("noavx512ifma"), CPU_ANY_AVX512IFMA_FLAGS },
1272   { STRING_COMMA_LEN ("noavx512vbmi"), CPU_ANY_AVX512VBMI_FLAGS },
1273   { STRING_COMMA_LEN ("noavx512_4fmaps"), CPU_ANY_AVX512_4FMAPS_FLAGS },
1274   { STRING_COMMA_LEN ("noavx512_4vnniw"), CPU_ANY_AVX512_4VNNIW_FLAGS },
1275   { STRING_COMMA_LEN ("noavx512_vpopcntdq"), CPU_ANY_AVX512_VPOPCNTDQ_FLAGS },
1276   { STRING_COMMA_LEN ("noavx512_vbmi2"), CPU_ANY_AVX512_VBMI2_FLAGS },
1277   { STRING_COMMA_LEN ("noavx512_vnni"), CPU_ANY_AVX512_VNNI_FLAGS },
1278   { STRING_COMMA_LEN ("noavx512_bitalg"), CPU_ANY_AVX512_BITALG_FLAGS },
1279   { STRING_COMMA_LEN ("noibt"), CPU_ANY_IBT_FLAGS },
1280   { STRING_COMMA_LEN ("noshstk"), CPU_ANY_SHSTK_FLAGS },
1281   { STRING_COMMA_LEN ("noamx_int8"), CPU_ANY_AMX_INT8_FLAGS },
1282   { STRING_COMMA_LEN ("noamx_bf16"), CPU_ANY_AMX_BF16_FLAGS },
1283   { STRING_COMMA_LEN ("noamx_tile"), CPU_ANY_AMX_TILE_FLAGS },
1284   { STRING_COMMA_LEN ("nomovdiri"), CPU_ANY_MOVDIRI_FLAGS },
1285   { STRING_COMMA_LEN ("nomovdir64b"), CPU_ANY_MOVDIR64B_FLAGS },
1286   { STRING_COMMA_LEN ("noavx512_bf16"), CPU_ANY_AVX512_BF16_FLAGS },
1287   { STRING_COMMA_LEN ("noavx512_vp2intersect"),
1288     CPU_ANY_AVX512_VP2INTERSECT_FLAGS },
1289   { STRING_COMMA_LEN ("notdx"), CPU_ANY_TDX_FLAGS },
1290   { STRING_COMMA_LEN ("noenqcmd"), CPU_ANY_ENQCMD_FLAGS },
1291   { STRING_COMMA_LEN ("noserialize"), CPU_ANY_SERIALIZE_FLAGS },
1292   { STRING_COMMA_LEN ("notsxldtrk"), CPU_ANY_TSXLDTRK_FLAGS },
1293   { STRING_COMMA_LEN ("nokl"), CPU_ANY_KL_FLAGS },
1294   { STRING_COMMA_LEN ("nowidekl"), CPU_ANY_WIDEKL_FLAGS },
1295 };
1296
1297 #ifdef I386COFF
1298 /* Like s_lcomm_internal in gas/read.c but the alignment string
1299    is allowed to be optional.  */
1300
1301 static symbolS *
1302 pe_lcomm_internal (int needs_align, symbolS *symbolP, addressT size)
1303 {
1304   addressT align = 0;
1305
1306   SKIP_WHITESPACE ();
1307
1308   if (needs_align
1309       && *input_line_pointer == ',')
1310     {
1311       align = parse_align (needs_align - 1);
1312
1313       if (align == (addressT) -1)
1314         return NULL;
1315     }
1316   else
1317     {
1318       if (size >= 8)
1319         align = 3;
1320       else if (size >= 4)
1321         align = 2;
1322       else if (size >= 2)
1323         align = 1;
1324       else
1325         align = 0;
1326     }
1327
1328   bss_alloc (symbolP, size, align);
1329   return symbolP;
1330 }
1331
1332 static void
1333 pe_lcomm (int needs_align)
1334 {
1335   s_comm_internal (needs_align * 2, pe_lcomm_internal);
1336 }
1337 #endif
1338
1339 const pseudo_typeS md_pseudo_table[] =
1340 {
1341 #if !defined(OBJ_AOUT) && !defined(USE_ALIGN_PTWO)
1342   {"align", s_align_bytes, 0},
1343 #else
1344   {"align", s_align_ptwo, 0},
1345 #endif
1346   {"arch", set_cpu_arch, 0},
1347 #ifndef I386COFF
1348   {"bss", s_bss, 0},
1349 #else
1350   {"lcomm", pe_lcomm, 1},
1351 #endif
1352   {"ffloat", float_cons, 'f'},
1353   {"dfloat", float_cons, 'd'},
1354   {"tfloat", float_cons, 'x'},
1355   {"value", cons, 2},
1356   {"slong", signed_cons, 4},
1357   {"noopt", s_ignore, 0},
1358   {"optim", s_ignore, 0},
1359   {"code16gcc", set_16bit_gcc_code_flag, CODE_16BIT},
1360   {"code16", set_code_flag, CODE_16BIT},
1361   {"code32", set_code_flag, CODE_32BIT},
1362 #ifdef BFD64
1363   {"code64", set_code_flag, CODE_64BIT},
1364 #endif
1365   {"intel_syntax", set_intel_syntax, 1},
1366   {"att_syntax", set_intel_syntax, 0},
1367   {"intel_mnemonic", set_intel_mnemonic, 1},
1368   {"att_mnemonic", set_intel_mnemonic, 0},
1369   {"allow_index_reg", set_allow_index_reg, 1},
1370   {"disallow_index_reg", set_allow_index_reg, 0},
1371   {"sse_check", set_check, 0},
1372   {"operand_check", set_check, 1},
1373 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
1374   {"largecomm", handle_large_common, 0},
1375 #else
1376   {"file", dwarf2_directive_file, 0},
1377   {"loc", dwarf2_directive_loc, 0},
1378   {"loc_mark_labels", dwarf2_directive_loc_mark_labels, 0},
1379 #endif
1380 #ifdef TE_PE
1381   {"secrel32", pe_directive_secrel, 0},
1382 #endif
1383   {0, 0, 0}
1384 };
1385
1386 /* For interface with expression ().  */
1387 extern char *input_line_pointer;
1388
1389 /* Hash table for instruction mnemonic lookup.  */
1390 static htab_t op_hash;
1391
1392 /* Hash table for register lookup.  */
1393 static htab_t reg_hash;
1394 \f
1395   /* Various efficient no-op patterns for aligning code labels.
1396      Note: Don't try to assemble the instructions in the comments.
1397      0L and 0w are not legal.  */
1398 static const unsigned char f32_1[] =
1399   {0x90};                               /* nop                  */
1400 static const unsigned char f32_2[] =
1401   {0x66,0x90};                          /* xchg %ax,%ax         */
1402 static const unsigned char f32_3[] =
1403   {0x8d,0x76,0x00};                     /* leal 0(%esi),%esi    */
1404 static const unsigned char f32_4[] =
1405   {0x8d,0x74,0x26,0x00};                /* leal 0(%esi,1),%esi  */
1406 static const unsigned char f32_6[] =
1407   {0x8d,0xb6,0x00,0x00,0x00,0x00};      /* leal 0L(%esi),%esi   */
1408 static const unsigned char f32_7[] =
1409   {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
1410 static const unsigned char f16_3[] =
1411   {0x8d,0x74,0x00};                     /* lea 0(%si),%si       */
1412 static const unsigned char f16_4[] =
1413   {0x8d,0xb4,0x00,0x00};                /* lea 0W(%si),%si      */
1414 static const unsigned char jump_disp8[] =
1415   {0xeb};                               /* jmp disp8           */
1416 static const unsigned char jump32_disp32[] =
1417   {0xe9};                               /* jmp disp32          */
1418 static const unsigned char jump16_disp32[] =
1419   {0x66,0xe9};                          /* jmp disp32          */
1420 /* 32-bit NOPs patterns.  */
1421 static const unsigned char *const f32_patt[] = {
1422   f32_1, f32_2, f32_3, f32_4, NULL, f32_6, f32_7
1423 };
1424 /* 16-bit NOPs patterns.  */
1425 static const unsigned char *const f16_patt[] = {
1426   f32_1, f32_2, f16_3, f16_4
1427 };
1428 /* nopl (%[re]ax) */
1429 static const unsigned char alt_3[] =
1430   {0x0f,0x1f,0x00};
1431 /* nopl 0(%[re]ax) */
1432 static const unsigned char alt_4[] =
1433   {0x0f,0x1f,0x40,0x00};
1434 /* nopl 0(%[re]ax,%[re]ax,1) */
1435 static const unsigned char alt_5[] =
1436   {0x0f,0x1f,0x44,0x00,0x00};
1437 /* nopw 0(%[re]ax,%[re]ax,1) */
1438 static const unsigned char alt_6[] =
1439   {0x66,0x0f,0x1f,0x44,0x00,0x00};
1440 /* nopl 0L(%[re]ax) */
1441 static const unsigned char alt_7[] =
1442   {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
1443 /* nopl 0L(%[re]ax,%[re]ax,1) */
1444 static const unsigned char alt_8[] =
1445   {0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1446 /* nopw 0L(%[re]ax,%[re]ax,1) */
1447 static const unsigned char alt_9[] =
1448   {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1449 /* nopw %cs:0L(%[re]ax,%[re]ax,1) */
1450 static const unsigned char alt_10[] =
1451   {0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1452 /* data16 nopw %cs:0L(%eax,%eax,1) */
1453 static const unsigned char alt_11[] =
1454   {0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1455 /* 32-bit and 64-bit NOPs patterns.  */
1456 static const unsigned char *const alt_patt[] = {
1457   f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
1458   alt_9, alt_10, alt_11
1459 };
1460
1461 /* Genenerate COUNT bytes of NOPs to WHERE from PATT with the maximum
1462    size of a single NOP instruction MAX_SINGLE_NOP_SIZE.  */
1463
1464 static void
1465 i386_output_nops (char *where, const unsigned char *const *patt,
1466                   int count, int max_single_nop_size)
1467
1468 {
1469   /* Place the longer NOP first.  */
1470   int last;
1471   int offset;
1472   const unsigned char *nops;
1473
1474   if (max_single_nop_size < 1)
1475     {
1476       as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"),
1477                 max_single_nop_size);
1478       return;
1479     }
1480
1481   nops = patt[max_single_nop_size - 1];
1482
1483   /* Use the smaller one if the requsted one isn't available.  */
1484   if (nops == NULL)
1485     {
1486       max_single_nop_size--;
1487       nops = patt[max_single_nop_size - 1];
1488     }
1489
1490   last = count % max_single_nop_size;
1491
1492   count -= last;
1493   for (offset = 0; offset < count; offset += max_single_nop_size)
1494     memcpy (where + offset, nops, max_single_nop_size);
1495
1496   if (last)
1497     {
1498       nops = patt[last - 1];
1499       if (nops == NULL)
1500         {
1501           /* Use the smaller one plus one-byte NOP if the needed one
1502              isn't available.  */
1503           last--;
1504           nops = patt[last - 1];
1505           memcpy (where + offset, nops, last);
1506           where[offset + last] = *patt[0];
1507         }
1508       else
1509         memcpy (where + offset, nops, last);
1510     }
1511 }
1512
1513 static INLINE int
1514 fits_in_imm7 (offsetT num)
1515 {
1516   return (num & 0x7f) == num;
1517 }
1518
1519 static INLINE int
1520 fits_in_imm31 (offsetT num)
1521 {
1522   return (num & 0x7fffffff) == num;
1523 }
1524
1525 /* Genenerate COUNT bytes of NOPs to WHERE with the maximum size of a
1526    single NOP instruction LIMIT.  */
1527
1528 void
1529 i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
1530 {
1531   const unsigned char *const *patt = NULL;
1532   int max_single_nop_size;
1533   /* Maximum number of NOPs before switching to jump over NOPs.  */
1534   int max_number_of_nops;
1535
1536   switch (fragP->fr_type)
1537     {
1538     case rs_fill_nop:
1539     case rs_align_code:
1540       break;
1541     case rs_machine_dependent:
1542       /* Allow NOP padding for jumps and calls.  */
1543       if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
1544           || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
1545         break;
1546       /* Fall through.  */
1547     default:
1548       return;
1549     }
1550
1551   /* We need to decide which NOP sequence to use for 32bit and
1552      64bit. When -mtune= is used:
1553
1554      1. For PROCESSOR_I386, PROCESSOR_I486, PROCESSOR_PENTIUM and
1555      PROCESSOR_GENERIC32, f32_patt will be used.
1556      2. For the rest, alt_patt will be used.
1557
1558      When -mtune= isn't used, alt_patt will be used if
1559      cpu_arch_isa_flags has CpuNop.  Otherwise, f32_patt will
1560      be used.
1561
1562      When -march= or .arch is used, we can't use anything beyond
1563      cpu_arch_isa_flags.   */
1564
1565   if (flag_code == CODE_16BIT)
1566     {
1567       patt = f16_patt;
1568       max_single_nop_size = sizeof (f16_patt) / sizeof (f16_patt[0]);
1569       /* Limit number of NOPs to 2 in 16-bit mode.  */
1570       max_number_of_nops = 2;
1571     }
1572   else
1573     {
1574       if (fragP->tc_frag_data.isa == PROCESSOR_UNKNOWN)
1575         {
1576           /* PROCESSOR_UNKNOWN means that all ISAs may be used.  */
1577           switch (cpu_arch_tune)
1578             {
1579             case PROCESSOR_UNKNOWN:
1580               /* We use cpu_arch_isa_flags to check if we SHOULD
1581                  optimize with nops.  */
1582               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1583                 patt = alt_patt;
1584               else
1585                 patt = f32_patt;
1586               break;
1587             case PROCESSOR_PENTIUM4:
1588             case PROCESSOR_NOCONA:
1589             case PROCESSOR_CORE:
1590             case PROCESSOR_CORE2:
1591             case PROCESSOR_COREI7:
1592             case PROCESSOR_L1OM:
1593             case PROCESSOR_K1OM:
1594             case PROCESSOR_GENERIC64:
1595             case PROCESSOR_K6:
1596             case PROCESSOR_ATHLON:
1597             case PROCESSOR_K8:
1598             case PROCESSOR_AMDFAM10:
1599             case PROCESSOR_BD:
1600             case PROCESSOR_ZNVER:
1601             case PROCESSOR_BT:
1602               patt = alt_patt;
1603               break;
1604             case PROCESSOR_I386:
1605             case PROCESSOR_I486:
1606             case PROCESSOR_PENTIUM:
1607             case PROCESSOR_PENTIUMPRO:
1608             case PROCESSOR_IAMCU:
1609             case PROCESSOR_GENERIC32:
1610               patt = f32_patt;
1611               break;
1612             }
1613         }
1614       else
1615         {
1616           switch (fragP->tc_frag_data.tune)
1617             {
1618             case PROCESSOR_UNKNOWN:
1619               /* When cpu_arch_isa is set, cpu_arch_tune shouldn't be
1620                  PROCESSOR_UNKNOWN.  */
1621               abort ();
1622               break;
1623
1624             case PROCESSOR_I386:
1625             case PROCESSOR_I486:
1626             case PROCESSOR_PENTIUM:
1627             case PROCESSOR_IAMCU:
1628             case PROCESSOR_K6:
1629             case PROCESSOR_ATHLON:
1630             case PROCESSOR_K8:
1631             case PROCESSOR_AMDFAM10:
1632             case PROCESSOR_BD:
1633             case PROCESSOR_ZNVER:
1634             case PROCESSOR_BT:
1635             case PROCESSOR_GENERIC32:
1636               /* We use cpu_arch_isa_flags to check if we CAN optimize
1637                  with nops.  */
1638               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1639                 patt = alt_patt;
1640               else
1641                 patt = f32_patt;
1642               break;
1643             case PROCESSOR_PENTIUMPRO:
1644             case PROCESSOR_PENTIUM4:
1645             case PROCESSOR_NOCONA:
1646             case PROCESSOR_CORE:
1647             case PROCESSOR_CORE2:
1648             case PROCESSOR_COREI7:
1649             case PROCESSOR_L1OM:
1650             case PROCESSOR_K1OM:
1651               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1652                 patt = alt_patt;
1653               else
1654                 patt = f32_patt;
1655               break;
1656             case PROCESSOR_GENERIC64:
1657               patt = alt_patt;
1658               break;
1659             }
1660         }
1661
1662       if (patt == f32_patt)
1663         {
1664           max_single_nop_size = sizeof (f32_patt) / sizeof (f32_patt[0]);
1665           /* Limit number of NOPs to 2 for older processors.  */
1666           max_number_of_nops = 2;
1667         }
1668       else
1669         {
1670           max_single_nop_size = sizeof (alt_patt) / sizeof (alt_patt[0]);
1671           /* Limit number of NOPs to 7 for newer processors.  */
1672           max_number_of_nops = 7;
1673         }
1674     }
1675
1676   if (limit == 0)
1677     limit = max_single_nop_size;
1678
1679   if (fragP->fr_type == rs_fill_nop)
1680     {
1681       /* Output NOPs for .nop directive.  */
1682       if (limit > max_single_nop_size)
1683         {
1684           as_bad_where (fragP->fr_file, fragP->fr_line,
1685                         _("invalid single nop size: %d "
1686                           "(expect within [0, %d])"),
1687                         limit, max_single_nop_size);
1688           return;
1689         }
1690     }
1691   else if (fragP->fr_type != rs_machine_dependent)
1692     fragP->fr_var = count;
1693
1694   if ((count / max_single_nop_size) > max_number_of_nops)
1695     {
1696       /* Generate jump over NOPs.  */
1697       offsetT disp = count - 2;
1698       if (fits_in_imm7 (disp))
1699         {
1700           /* Use "jmp disp8" if possible.  */
1701           count = disp;
1702           where[0] = jump_disp8[0];
1703           where[1] = count;
1704           where += 2;
1705         }
1706       else
1707         {
1708           unsigned int size_of_jump;
1709
1710           if (flag_code == CODE_16BIT)
1711             {
1712               where[0] = jump16_disp32[0];
1713               where[1] = jump16_disp32[1];
1714               size_of_jump = 2;
1715             }
1716           else
1717             {
1718               where[0] = jump32_disp32[0];
1719               size_of_jump = 1;
1720             }
1721
1722           count -= size_of_jump + 4;
1723           if (!fits_in_imm31 (count))
1724             {
1725               as_bad_where (fragP->fr_file, fragP->fr_line,
1726                             _("jump over nop padding out of range"));
1727               return;
1728             }
1729
1730           md_number_to_chars (where + size_of_jump, count, 4);
1731           where += size_of_jump + 4;
1732         }
1733     }
1734
1735   /* Generate multiple NOPs.  */
1736   i386_output_nops (where, patt, count, limit);
1737 }
1738
1739 static INLINE int
1740 operand_type_all_zero (const union i386_operand_type *x)
1741 {
1742   switch (ARRAY_SIZE(x->array))
1743     {
1744     case 3:
1745       if (x->array[2])
1746         return 0;
1747       /* Fall through.  */
1748     case 2:
1749       if (x->array[1])
1750         return 0;
1751       /* Fall through.  */
1752     case 1:
1753       return !x->array[0];
1754     default:
1755       abort ();
1756     }
1757 }
1758
1759 static INLINE void
1760 operand_type_set (union i386_operand_type *x, unsigned int v)
1761 {
1762   switch (ARRAY_SIZE(x->array))
1763     {
1764     case 3:
1765       x->array[2] = v;
1766       /* Fall through.  */
1767     case 2:
1768       x->array[1] = v;
1769       /* Fall through.  */
1770     case 1:
1771       x->array[0] = v;
1772       /* Fall through.  */
1773       break;
1774     default:
1775       abort ();
1776     }
1777
1778   x->bitfield.class = ClassNone;
1779   x->bitfield.instance = InstanceNone;
1780 }
1781
1782 static INLINE int
1783 operand_type_equal (const union i386_operand_type *x,
1784                     const union i386_operand_type *y)
1785 {
1786   switch (ARRAY_SIZE(x->array))
1787     {
1788     case 3:
1789       if (x->array[2] != y->array[2])
1790         return 0;
1791       /* Fall through.  */
1792     case 2:
1793       if (x->array[1] != y->array[1])
1794         return 0;
1795       /* Fall through.  */
1796     case 1:
1797       return x->array[0] == y->array[0];
1798       break;
1799     default:
1800       abort ();
1801     }
1802 }
1803
1804 static INLINE int
1805 cpu_flags_all_zero (const union i386_cpu_flags *x)
1806 {
1807   switch (ARRAY_SIZE(x->array))
1808     {
1809     case 4:
1810       if (x->array[3])
1811         return 0;
1812       /* Fall through.  */
1813     case 3:
1814       if (x->array[2])
1815         return 0;
1816       /* Fall through.  */
1817     case 2:
1818       if (x->array[1])
1819         return 0;
1820       /* Fall through.  */
1821     case 1:
1822       return !x->array[0];
1823     default:
1824       abort ();
1825     }
1826 }
1827
1828 static INLINE int
1829 cpu_flags_equal (const union i386_cpu_flags *x,
1830                  const union i386_cpu_flags *y)
1831 {
1832   switch (ARRAY_SIZE(x->array))
1833     {
1834     case 4:
1835       if (x->array[3] != y->array[3])
1836         return 0;
1837       /* Fall through.  */
1838     case 3:
1839       if (x->array[2] != y->array[2])
1840         return 0;
1841       /* Fall through.  */
1842     case 2:
1843       if (x->array[1] != y->array[1])
1844         return 0;
1845       /* Fall through.  */
1846     case 1:
1847       return x->array[0] == y->array[0];
1848       break;
1849     default:
1850       abort ();
1851     }
1852 }
1853
1854 static INLINE int
1855 cpu_flags_check_cpu64 (i386_cpu_flags f)
1856 {
1857   return !((flag_code == CODE_64BIT && f.bitfield.cpuno64)
1858            || (flag_code != CODE_64BIT && f.bitfield.cpu64));
1859 }
1860
1861 static INLINE i386_cpu_flags
1862 cpu_flags_and (i386_cpu_flags x, i386_cpu_flags y)
1863 {
1864   switch (ARRAY_SIZE (x.array))
1865     {
1866     case 4:
1867       x.array [3] &= y.array [3];
1868       /* Fall through.  */
1869     case 3:
1870       x.array [2] &= y.array [2];
1871       /* Fall through.  */
1872     case 2:
1873       x.array [1] &= y.array [1];
1874       /* Fall through.  */
1875     case 1:
1876       x.array [0] &= y.array [0];
1877       break;
1878     default:
1879       abort ();
1880     }
1881   return x;
1882 }
1883
1884 static INLINE i386_cpu_flags
1885 cpu_flags_or (i386_cpu_flags x, i386_cpu_flags y)
1886 {
1887   switch (ARRAY_SIZE (x.array))
1888     {
1889     case 4:
1890       x.array [3] |= y.array [3];
1891       /* Fall through.  */
1892     case 3:
1893       x.array [2] |= y.array [2];
1894       /* Fall through.  */
1895     case 2:
1896       x.array [1] |= y.array [1];
1897       /* Fall through.  */
1898     case 1:
1899       x.array [0] |= y.array [0];
1900       break;
1901     default:
1902       abort ();
1903     }
1904   return x;
1905 }
1906
1907 static INLINE i386_cpu_flags
1908 cpu_flags_and_not (i386_cpu_flags x, i386_cpu_flags y)
1909 {
1910   switch (ARRAY_SIZE (x.array))
1911     {
1912     case 4:
1913       x.array [3] &= ~y.array [3];
1914       /* Fall through.  */
1915     case 3:
1916       x.array [2] &= ~y.array [2];
1917       /* Fall through.  */
1918     case 2:
1919       x.array [1] &= ~y.array [1];
1920       /* Fall through.  */
1921     case 1:
1922       x.array [0] &= ~y.array [0];
1923       break;
1924     default:
1925       abort ();
1926     }
1927   return x;
1928 }
1929
1930 static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
1931
1932 #define CPU_FLAGS_ARCH_MATCH            0x1
1933 #define CPU_FLAGS_64BIT_MATCH           0x2
1934
1935 #define CPU_FLAGS_PERFECT_MATCH \
1936   (CPU_FLAGS_ARCH_MATCH | CPU_FLAGS_64BIT_MATCH)
1937
1938 /* Return CPU flags match bits. */
1939
1940 static int
1941 cpu_flags_match (const insn_template *t)
1942 {
1943   i386_cpu_flags x = t->cpu_flags;
1944   int match = cpu_flags_check_cpu64 (x) ? CPU_FLAGS_64BIT_MATCH : 0;
1945
1946   x.bitfield.cpu64 = 0;
1947   x.bitfield.cpuno64 = 0;
1948
1949   if (cpu_flags_all_zero (&x))
1950     {
1951       /* This instruction is available on all archs.  */
1952       match |= CPU_FLAGS_ARCH_MATCH;
1953     }
1954   else
1955     {
1956       /* This instruction is available only on some archs.  */
1957       i386_cpu_flags cpu = cpu_arch_flags;
1958
1959       /* AVX512VL is no standalone feature - match it and then strip it.  */
1960       if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
1961         return match;
1962       x.bitfield.cpuavx512vl = 0;
1963
1964       cpu = cpu_flags_and (x, cpu);
1965       if (!cpu_flags_all_zero (&cpu))
1966         {
1967           if (x.bitfield.cpuavx)
1968             {
1969               /* We need to check a few extra flags with AVX.  */
1970               if (cpu.bitfield.cpuavx
1971                   && (!t->opcode_modifier.sse2avx
1972                       || (sse2avx && !i.prefix[DATA_PREFIX]))
1973                   && (!x.bitfield.cpuaes || cpu.bitfield.cpuaes)
1974                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1975                   && (!x.bitfield.cpupclmul || cpu.bitfield.cpupclmul))
1976                 match |= CPU_FLAGS_ARCH_MATCH;
1977             }
1978           else if (x.bitfield.cpuavx512f)
1979             {
1980               /* We need to check a few extra flags with AVX512F.  */
1981               if (cpu.bitfield.cpuavx512f
1982                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1983                   && (!x.bitfield.cpuvaes || cpu.bitfield.cpuvaes)
1984                   && (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
1985                 match |= CPU_FLAGS_ARCH_MATCH;
1986             }
1987           else
1988             match |= CPU_FLAGS_ARCH_MATCH;
1989         }
1990     }
1991   return match;
1992 }
1993
1994 static INLINE i386_operand_type
1995 operand_type_and (i386_operand_type x, i386_operand_type y)
1996 {
1997   if (x.bitfield.class != y.bitfield.class)
1998     x.bitfield.class = ClassNone;
1999   if (x.bitfield.instance != y.bitfield.instance)
2000     x.bitfield.instance = InstanceNone;
2001
2002   switch (ARRAY_SIZE (x.array))
2003     {
2004     case 3:
2005       x.array [2] &= y.array [2];
2006       /* Fall through.  */
2007     case 2:
2008       x.array [1] &= y.array [1];
2009       /* Fall through.  */
2010     case 1:
2011       x.array [0] &= y.array [0];
2012       break;
2013     default:
2014       abort ();
2015     }
2016   return x;
2017 }
2018
2019 static INLINE i386_operand_type
2020 operand_type_and_not (i386_operand_type x, i386_operand_type y)
2021 {
2022   gas_assert (y.bitfield.class == ClassNone);
2023   gas_assert (y.bitfield.instance == InstanceNone);
2024
2025   switch (ARRAY_SIZE (x.array))
2026     {
2027     case 3:
2028       x.array [2] &= ~y.array [2];
2029       /* Fall through.  */
2030     case 2:
2031       x.array [1] &= ~y.array [1];
2032       /* Fall through.  */
2033     case 1:
2034       x.array [0] &= ~y.array [0];
2035       break;
2036     default:
2037       abort ();
2038     }
2039   return x;
2040 }
2041
2042 static INLINE i386_operand_type
2043 operand_type_or (i386_operand_type x, i386_operand_type y)
2044 {
2045   gas_assert (x.bitfield.class == ClassNone ||
2046               y.bitfield.class == ClassNone ||
2047               x.bitfield.class == y.bitfield.class);
2048   gas_assert (x.bitfield.instance == InstanceNone ||
2049               y.bitfield.instance == InstanceNone ||
2050               x.bitfield.instance == y.bitfield.instance);
2051
2052   switch (ARRAY_SIZE (x.array))
2053     {
2054     case 3:
2055       x.array [2] |= y.array [2];
2056       /* Fall through.  */
2057     case 2:
2058       x.array [1] |= y.array [1];
2059       /* Fall through.  */
2060     case 1:
2061       x.array [0] |= y.array [0];
2062       break;
2063     default:
2064       abort ();
2065     }
2066   return x;
2067 }
2068
2069 static INLINE i386_operand_type
2070 operand_type_xor (i386_operand_type x, i386_operand_type y)
2071 {
2072   gas_assert (y.bitfield.class == ClassNone);
2073   gas_assert (y.bitfield.instance == InstanceNone);
2074
2075   switch (ARRAY_SIZE (x.array))
2076     {
2077     case 3:
2078       x.array [2] ^= y.array [2];
2079       /* Fall through.  */
2080     case 2:
2081       x.array [1] ^= y.array [1];
2082       /* Fall through.  */
2083     case 1:
2084       x.array [0] ^= y.array [0];
2085       break;
2086     default:
2087       abort ();
2088     }
2089   return x;
2090 }
2091
2092 static const i386_operand_type disp16 = OPERAND_TYPE_DISP16;
2093 static const i386_operand_type disp32 = OPERAND_TYPE_DISP32;
2094 static const i386_operand_type disp32s = OPERAND_TYPE_DISP32S;
2095 static const i386_operand_type disp16_32 = OPERAND_TYPE_DISP16_32;
2096 static const i386_operand_type anydisp = OPERAND_TYPE_ANYDISP;
2097 static const i386_operand_type anyimm = OPERAND_TYPE_ANYIMM;
2098 static const i386_operand_type regxmm = OPERAND_TYPE_REGXMM;
2099 static const i386_operand_type regmask = OPERAND_TYPE_REGMASK;
2100 static const i386_operand_type imm8 = OPERAND_TYPE_IMM8;
2101 static const i386_operand_type imm8s = OPERAND_TYPE_IMM8S;
2102 static const i386_operand_type imm16 = OPERAND_TYPE_IMM16;
2103 static const i386_operand_type imm32 = OPERAND_TYPE_IMM32;
2104 static const i386_operand_type imm32s = OPERAND_TYPE_IMM32S;
2105 static const i386_operand_type imm64 = OPERAND_TYPE_IMM64;
2106 static const i386_operand_type imm16_32 = OPERAND_TYPE_IMM16_32;
2107 static const i386_operand_type imm16_32s = OPERAND_TYPE_IMM16_32S;
2108 static const i386_operand_type imm16_32_32s = OPERAND_TYPE_IMM16_32_32S;
2109
2110 enum operand_type
2111 {
2112   reg,
2113   imm,
2114   disp,
2115   anymem
2116 };
2117
2118 static INLINE int
2119 operand_type_check (i386_operand_type t, enum operand_type c)
2120 {
2121   switch (c)
2122     {
2123     case reg:
2124       return t.bitfield.class == Reg;
2125
2126     case imm:
2127       return (t.bitfield.imm8
2128               || t.bitfield.imm8s
2129               || t.bitfield.imm16
2130               || t.bitfield.imm32
2131               || t.bitfield.imm32s
2132               || t.bitfield.imm64);
2133
2134     case disp:
2135       return (t.bitfield.disp8
2136               || t.bitfield.disp16
2137               || t.bitfield.disp32
2138               || t.bitfield.disp32s
2139               || t.bitfield.disp64);
2140
2141     case anymem:
2142       return (t.bitfield.disp8
2143               || t.bitfield.disp16
2144               || t.bitfield.disp32
2145               || t.bitfield.disp32s
2146               || t.bitfield.disp64
2147               || t.bitfield.baseindex);
2148
2149     default:
2150       abort ();
2151     }
2152
2153   return 0;
2154 }
2155
2156 /* Return 1 if there is no conflict in 8bit/16bit/32bit/64bit/80bit size
2157    between operand GIVEN and opeand WANTED for instruction template T.  */
2158
2159 static INLINE int
2160 match_operand_size (const insn_template *t, unsigned int wanted,
2161                     unsigned int given)
2162 {
2163   return !((i.types[given].bitfield.byte
2164             && !t->operand_types[wanted].bitfield.byte)
2165            || (i.types[given].bitfield.word
2166                && !t->operand_types[wanted].bitfield.word)
2167            || (i.types[given].bitfield.dword
2168                && !t->operand_types[wanted].bitfield.dword)
2169            || (i.types[given].bitfield.qword
2170                && !t->operand_types[wanted].bitfield.qword)
2171            || (i.types[given].bitfield.tbyte
2172                && !t->operand_types[wanted].bitfield.tbyte));
2173 }
2174
2175 /* Return 1 if there is no conflict in SIMD register between operand
2176    GIVEN and opeand WANTED for instruction template T.  */
2177
2178 static INLINE int
2179 match_simd_size (const insn_template *t, unsigned int wanted,
2180                  unsigned int given)
2181 {
2182   return !((i.types[given].bitfield.xmmword
2183             && !t->operand_types[wanted].bitfield.xmmword)
2184            || (i.types[given].bitfield.ymmword
2185                && !t->operand_types[wanted].bitfield.ymmword)
2186            || (i.types[given].bitfield.zmmword
2187                && !t->operand_types[wanted].bitfield.zmmword)
2188            || (i.types[given].bitfield.tmmword
2189                && !t->operand_types[wanted].bitfield.tmmword));
2190 }
2191
2192 /* Return 1 if there is no conflict in any size between operand GIVEN
2193    and opeand WANTED for instruction template T.  */
2194
2195 static INLINE int
2196 match_mem_size (const insn_template *t, unsigned int wanted,
2197                 unsigned int given)
2198 {
2199   return (match_operand_size (t, wanted, given)
2200           && !((i.types[given].bitfield.unspecified
2201                 && !i.broadcast
2202                 && !t->operand_types[wanted].bitfield.unspecified)
2203                || (i.types[given].bitfield.fword
2204                    && !t->operand_types[wanted].bitfield.fword)
2205                /* For scalar opcode templates to allow register and memory
2206                   operands at the same time, some special casing is needed
2207                   here.  Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
2208                   down-conversion vpmov*.  */
2209                || ((t->operand_types[wanted].bitfield.class == RegSIMD
2210                     && t->operand_types[wanted].bitfield.byte
2211                        + t->operand_types[wanted].bitfield.word
2212                        + t->operand_types[wanted].bitfield.dword
2213                        + t->operand_types[wanted].bitfield.qword
2214                        > !!t->opcode_modifier.broadcast)
2215                    ? (i.types[given].bitfield.xmmword
2216                       || i.types[given].bitfield.ymmword
2217                       || i.types[given].bitfield.zmmword)
2218                    : !match_simd_size(t, wanted, given))));
2219 }
2220
2221 /* Return value has MATCH_STRAIGHT set if there is no size conflict on any
2222    operands for instruction template T, and it has MATCH_REVERSE set if there
2223    is no size conflict on any operands for the template with operands reversed
2224    (and the template allows for reversing in the first place).  */
2225
2226 #define MATCH_STRAIGHT 1
2227 #define MATCH_REVERSE  2
2228
2229 static INLINE unsigned int
2230 operand_size_match (const insn_template *t)
2231 {
2232   unsigned int j, match = MATCH_STRAIGHT;
2233
2234   /* Don't check non-absolute jump instructions.  */
2235   if (t->opcode_modifier.jump
2236       && t->opcode_modifier.jump != JUMP_ABSOLUTE)
2237     return match;
2238
2239   /* Check memory and accumulator operand size.  */
2240   for (j = 0; j < i.operands; j++)
2241     {
2242       if (i.types[j].bitfield.class != Reg
2243           && i.types[j].bitfield.class != RegSIMD
2244           && t->opcode_modifier.anysize)
2245         continue;
2246
2247       if (t->operand_types[j].bitfield.class == Reg
2248           && !match_operand_size (t, j, j))
2249         {
2250           match = 0;
2251           break;
2252         }
2253
2254       if (t->operand_types[j].bitfield.class == RegSIMD
2255           && !match_simd_size (t, j, j))
2256         {
2257           match = 0;
2258           break;
2259         }
2260
2261       if (t->operand_types[j].bitfield.instance == Accum
2262           && (!match_operand_size (t, j, j) || !match_simd_size (t, j, j)))
2263         {
2264           match = 0;
2265           break;
2266         }
2267
2268       if ((i.flags[j] & Operand_Mem) && !match_mem_size (t, j, j))
2269         {
2270           match = 0;
2271           break;
2272         }
2273     }
2274
2275   if (!t->opcode_modifier.d)
2276     {
2277     mismatch:
2278       if (!match)
2279         i.error = operand_size_mismatch;
2280       return match;
2281     }
2282
2283   /* Check reverse.  */
2284   gas_assert (i.operands >= 2 && i.operands <= 3);
2285
2286   for (j = 0; j < i.operands; j++)
2287     {
2288       unsigned int given = i.operands - j - 1;
2289
2290       if (t->operand_types[j].bitfield.class == Reg
2291           && !match_operand_size (t, j, given))
2292         goto mismatch;
2293
2294       if (t->operand_types[j].bitfield.class == RegSIMD
2295           && !match_simd_size (t, j, given))
2296         goto mismatch;
2297
2298       if (t->operand_types[j].bitfield.instance == Accum
2299           && (!match_operand_size (t, j, given)
2300               || !match_simd_size (t, j, given)))
2301         goto mismatch;
2302
2303       if ((i.flags[given] & Operand_Mem) && !match_mem_size (t, j, given))
2304         goto mismatch;
2305     }
2306
2307   return match | MATCH_REVERSE;
2308 }
2309
2310 static INLINE int
2311 operand_type_match (i386_operand_type overlap,
2312                     i386_operand_type given)
2313 {
2314   i386_operand_type temp = overlap;
2315
2316   temp.bitfield.unspecified = 0;
2317   temp.bitfield.byte = 0;
2318   temp.bitfield.word = 0;
2319   temp.bitfield.dword = 0;
2320   temp.bitfield.fword = 0;
2321   temp.bitfield.qword = 0;
2322   temp.bitfield.tbyte = 0;
2323   temp.bitfield.xmmword = 0;
2324   temp.bitfield.ymmword = 0;
2325   temp.bitfield.zmmword = 0;
2326   temp.bitfield.tmmword = 0;
2327   if (operand_type_all_zero (&temp))
2328     goto mismatch;
2329
2330   if (given.bitfield.baseindex == overlap.bitfield.baseindex)
2331     return 1;
2332
2333  mismatch:
2334   i.error = operand_type_mismatch;
2335   return 0;
2336 }
2337
2338 /* If given types g0 and g1 are registers they must be of the same type
2339    unless the expected operand type register overlap is null.
2340    Some Intel syntax memory operand size checking also happens here.  */
2341
2342 static INLINE int
2343 operand_type_register_match (i386_operand_type g0,
2344                              i386_operand_type t0,
2345                              i386_operand_type g1,
2346                              i386_operand_type t1)
2347 {
2348   if (g0.bitfield.class != Reg
2349       && g0.bitfield.class != RegSIMD
2350       && (!operand_type_check (g0, anymem)
2351           || g0.bitfield.unspecified
2352           || (t0.bitfield.class != Reg
2353               && t0.bitfield.class != RegSIMD)))
2354     return 1;
2355
2356   if (g1.bitfield.class != Reg
2357       && g1.bitfield.class != RegSIMD
2358       && (!operand_type_check (g1, anymem)
2359           || g1.bitfield.unspecified
2360           || (t1.bitfield.class != Reg
2361               && t1.bitfield.class != RegSIMD)))
2362     return 1;
2363
2364   if (g0.bitfield.byte == g1.bitfield.byte
2365       && g0.bitfield.word == g1.bitfield.word
2366       && g0.bitfield.dword == g1.bitfield.dword
2367       && g0.bitfield.qword == g1.bitfield.qword
2368       && g0.bitfield.xmmword == g1.bitfield.xmmword
2369       && g0.bitfield.ymmword == g1.bitfield.ymmword
2370       && g0.bitfield.zmmword == g1.bitfield.zmmword)
2371     return 1;
2372
2373   if (!(t0.bitfield.byte & t1.bitfield.byte)
2374       && !(t0.bitfield.word & t1.bitfield.word)
2375       && !(t0.bitfield.dword & t1.bitfield.dword)
2376       && !(t0.bitfield.qword & t1.bitfield.qword)
2377       && !(t0.bitfield.xmmword & t1.bitfield.xmmword)
2378       && !(t0.bitfield.ymmword & t1.bitfield.ymmword)
2379       && !(t0.bitfield.zmmword & t1.bitfield.zmmword))
2380     return 1;
2381
2382   i.error = register_type_mismatch;
2383
2384   return 0;
2385 }
2386
2387 static INLINE unsigned int
2388 register_number (const reg_entry *r)
2389 {
2390   unsigned int nr = r->reg_num;
2391
2392   if (r->reg_flags & RegRex)
2393     nr += 8;
2394
2395   if (r->reg_flags & RegVRex)
2396     nr += 16;
2397
2398   return nr;
2399 }
2400
2401 static INLINE unsigned int
2402 mode_from_disp_size (i386_operand_type t)
2403 {
2404   if (t.bitfield.disp8)
2405     return 1;
2406   else if (t.bitfield.disp16
2407            || t.bitfield.disp32
2408            || t.bitfield.disp32s)
2409     return 2;
2410   else
2411     return 0;
2412 }
2413
2414 static INLINE int
2415 fits_in_signed_byte (addressT num)
2416 {
2417   return num + 0x80 <= 0xff;
2418 }
2419
2420 static INLINE int
2421 fits_in_unsigned_byte (addressT num)
2422 {
2423   return num <= 0xff;
2424 }
2425
2426 static INLINE int
2427 fits_in_unsigned_word (addressT num)
2428 {
2429   return num <= 0xffff;
2430 }
2431
2432 static INLINE int
2433 fits_in_signed_word (addressT num)
2434 {
2435   return num + 0x8000 <= 0xffff;
2436 }
2437
2438 static INLINE int
2439 fits_in_signed_long (addressT num ATTRIBUTE_UNUSED)
2440 {
2441 #ifndef BFD64
2442   return 1;
2443 #else
2444   return num + 0x80000000 <= 0xffffffff;
2445 #endif
2446 }                               /* fits_in_signed_long() */
2447
2448 static INLINE int
2449 fits_in_unsigned_long (addressT num ATTRIBUTE_UNUSED)
2450 {
2451 #ifndef BFD64
2452   return 1;
2453 #else
2454   return num <= 0xffffffff;
2455 #endif
2456 }                               /* fits_in_unsigned_long() */
2457
2458 static INLINE int
2459 fits_in_disp8 (offsetT num)
2460 {
2461   int shift = i.memshift;
2462   unsigned int mask;
2463
2464   if (shift == -1)
2465     abort ();
2466
2467   mask = (1 << shift) - 1;
2468
2469   /* Return 0 if NUM isn't properly aligned.  */
2470   if ((num & mask))
2471     return 0;
2472
2473   /* Check if NUM will fit in 8bit after shift.  */
2474   return fits_in_signed_byte (num >> shift);
2475 }
2476
2477 static INLINE int
2478 fits_in_imm4 (offsetT num)
2479 {
2480   return (num & 0xf) == num;
2481 }
2482
2483 static i386_operand_type
2484 smallest_imm_type (offsetT num)
2485 {
2486   i386_operand_type t;
2487
2488   operand_type_set (&t, 0);
2489   t.bitfield.imm64 = 1;
2490
2491   if (cpu_arch_tune != PROCESSOR_I486 && num == 1)
2492     {
2493       /* This code is disabled on the 486 because all the Imm1 forms
2494          in the opcode table are slower on the i486.  They're the
2495          versions with the implicitly specified single-position
2496          displacement, which has another syntax if you really want to
2497          use that form.  */
2498       t.bitfield.imm1 = 1;
2499       t.bitfield.imm8 = 1;
2500       t.bitfield.imm8s = 1;
2501       t.bitfield.imm16 = 1;
2502       t.bitfield.imm32 = 1;
2503       t.bitfield.imm32s = 1;
2504     }
2505   else if (fits_in_signed_byte (num))
2506     {
2507       t.bitfield.imm8 = 1;
2508       t.bitfield.imm8s = 1;
2509       t.bitfield.imm16 = 1;
2510       t.bitfield.imm32 = 1;
2511       t.bitfield.imm32s = 1;
2512     }
2513   else if (fits_in_unsigned_byte (num))
2514     {
2515       t.bitfield.imm8 = 1;
2516       t.bitfield.imm16 = 1;
2517       t.bitfield.imm32 = 1;
2518       t.bitfield.imm32s = 1;
2519     }
2520   else if (fits_in_signed_word (num) || fits_in_unsigned_word (num))
2521     {
2522       t.bitfield.imm16 = 1;
2523       t.bitfield.imm32 = 1;
2524       t.bitfield.imm32s = 1;
2525     }
2526   else if (fits_in_signed_long (num))
2527     {
2528       t.bitfield.imm32 = 1;
2529       t.bitfield.imm32s = 1;
2530     }
2531   else if (fits_in_unsigned_long (num))
2532     t.bitfield.imm32 = 1;
2533
2534   return t;
2535 }
2536
2537 static offsetT
2538 offset_in_range (offsetT val, int size)
2539 {
2540   addressT mask;
2541
2542   switch (size)
2543     {
2544     case 1: mask = ((addressT) 1 <<  8) - 1; break;
2545     case 2: mask = ((addressT) 1 << 16) - 1; break;
2546     case 4: mask = ((addressT) 2 << 31) - 1; break;
2547 #ifdef BFD64
2548     case 8: mask = ((addressT) 2 << 63) - 1; break;
2549 #endif
2550     default: abort ();
2551     }
2552
2553   if ((val & ~mask) != 0 && (val & ~mask) != ~mask)
2554     {
2555       char buf1[40], buf2[40];
2556
2557       sprint_value (buf1, val);
2558       sprint_value (buf2, val & mask);
2559       as_warn (_("%s shortened to %s"), buf1, buf2);
2560     }
2561   return val & mask;
2562 }
2563
2564 enum PREFIX_GROUP
2565 {
2566   PREFIX_EXIST = 0,
2567   PREFIX_LOCK,
2568   PREFIX_REP,
2569   PREFIX_DS,
2570   PREFIX_OTHER
2571 };
2572
2573 /* Returns
2574    a. PREFIX_EXIST if attempting to add a prefix where one from the
2575    same class already exists.
2576    b. PREFIX_LOCK if lock prefix is added.
2577    c. PREFIX_REP if rep/repne prefix is added.
2578    d. PREFIX_DS if ds prefix is added.
2579    e. PREFIX_OTHER if other prefix is added.
2580  */
2581
2582 static enum PREFIX_GROUP
2583 add_prefix (unsigned int prefix)
2584 {
2585   enum PREFIX_GROUP ret = PREFIX_OTHER;
2586   unsigned int q;
2587
2588   if (prefix >= REX_OPCODE && prefix < REX_OPCODE + 16
2589       && flag_code == CODE_64BIT)
2590     {
2591       if ((i.prefix[REX_PREFIX] & prefix & REX_W)
2592           || (i.prefix[REX_PREFIX] & prefix & REX_R)
2593           || (i.prefix[REX_PREFIX] & prefix & REX_X)
2594           || (i.prefix[REX_PREFIX] & prefix & REX_B))
2595         ret = PREFIX_EXIST;
2596       q = REX_PREFIX;
2597     }
2598   else
2599     {
2600       switch (prefix)
2601         {
2602         default:
2603           abort ();
2604
2605         case DS_PREFIX_OPCODE:
2606           ret = PREFIX_DS;
2607           /* Fall through.  */
2608         case CS_PREFIX_OPCODE:
2609         case ES_PREFIX_OPCODE:
2610         case FS_PREFIX_OPCODE:
2611         case GS_PREFIX_OPCODE:
2612         case SS_PREFIX_OPCODE:
2613           q = SEG_PREFIX;
2614           break;
2615
2616         case REPNE_PREFIX_OPCODE:
2617         case REPE_PREFIX_OPCODE:
2618           q = REP_PREFIX;
2619           ret = PREFIX_REP;
2620           break;
2621
2622         case LOCK_PREFIX_OPCODE:
2623           q = LOCK_PREFIX;
2624           ret = PREFIX_LOCK;
2625           break;
2626
2627         case FWAIT_OPCODE:
2628           q = WAIT_PREFIX;
2629           break;
2630
2631         case ADDR_PREFIX_OPCODE:
2632           q = ADDR_PREFIX;
2633           break;
2634
2635         case DATA_PREFIX_OPCODE:
2636           q = DATA_PREFIX;
2637           break;
2638         }
2639       if (i.prefix[q] != 0)
2640         ret = PREFIX_EXIST;
2641     }
2642
2643   if (ret)
2644     {
2645       if (!i.prefix[q])
2646         ++i.prefixes;
2647       i.prefix[q] |= prefix;
2648     }
2649   else
2650     as_bad (_("same type of prefix used twice"));
2651
2652   return ret;
2653 }
2654
2655 static void
2656 update_code_flag (int value, int check)
2657 {
2658   PRINTF_LIKE ((*as_error));
2659
2660   flag_code = (enum flag_code) value;
2661   if (flag_code == CODE_64BIT)
2662     {
2663       cpu_arch_flags.bitfield.cpu64 = 1;
2664       cpu_arch_flags.bitfield.cpuno64 = 0;
2665     }
2666   else
2667     {
2668       cpu_arch_flags.bitfield.cpu64 = 0;
2669       cpu_arch_flags.bitfield.cpuno64 = 1;
2670     }
2671   if (value == CODE_64BIT && !cpu_arch_flags.bitfield.cpulm )
2672     {
2673       if (check)
2674         as_error = as_fatal;
2675       else
2676         as_error = as_bad;
2677       (*as_error) (_("64bit mode not supported on `%s'."),
2678                    cpu_arch_name ? cpu_arch_name : default_arch);
2679     }
2680   if (value == CODE_32BIT && !cpu_arch_flags.bitfield.cpui386)
2681     {
2682       if (check)
2683         as_error = as_fatal;
2684       else
2685         as_error = as_bad;
2686       (*as_error) (_("32bit mode not supported on `%s'."),
2687                    cpu_arch_name ? cpu_arch_name : default_arch);
2688     }
2689   stackop_size = '\0';
2690 }
2691
2692 static void
2693 set_code_flag (int value)
2694 {
2695   update_code_flag (value, 0);
2696 }
2697
2698 static void
2699 set_16bit_gcc_code_flag (int new_code_flag)
2700 {
2701   flag_code = (enum flag_code) new_code_flag;
2702   if (flag_code != CODE_16BIT)
2703     abort ();
2704   cpu_arch_flags.bitfield.cpu64 = 0;
2705   cpu_arch_flags.bitfield.cpuno64 = 1;
2706   stackop_size = LONG_MNEM_SUFFIX;
2707 }
2708
2709 static void
2710 set_intel_syntax (int syntax_flag)
2711 {
2712   /* Find out if register prefixing is specified.  */
2713   int ask_naked_reg = 0;
2714
2715   SKIP_WHITESPACE ();
2716   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2717     {
2718       char *string;
2719       int e = get_symbol_name (&string);
2720
2721       if (strcmp (string, "prefix") == 0)
2722         ask_naked_reg = 1;
2723       else if (strcmp (string, "noprefix") == 0)
2724         ask_naked_reg = -1;
2725       else
2726         as_bad (_("bad argument to syntax directive."));
2727       (void) restore_line_pointer (e);
2728     }
2729   demand_empty_rest_of_line ();
2730
2731   intel_syntax = syntax_flag;
2732
2733   if (ask_naked_reg == 0)
2734     allow_naked_reg = (intel_syntax
2735                        && (bfd_get_symbol_leading_char (stdoutput) != '\0'));
2736   else
2737     allow_naked_reg = (ask_naked_reg < 0);
2738
2739   expr_set_rank (O_full_ptr, syntax_flag ? 10 : 0);
2740
2741   identifier_chars['%'] = intel_syntax && allow_naked_reg ? '%' : 0;
2742   identifier_chars['$'] = intel_syntax ? '$' : 0;
2743   register_prefix = allow_naked_reg ? "" : "%";
2744 }
2745
2746 static void
2747 set_intel_mnemonic (int mnemonic_flag)
2748 {
2749   intel_mnemonic = mnemonic_flag;
2750 }
2751
2752 static void
2753 set_allow_index_reg (int flag)
2754 {
2755   allow_index_reg = flag;
2756 }
2757
2758 static void
2759 set_check (int what)
2760 {
2761   enum check_kind *kind;
2762   const char *str;
2763
2764   if (what)
2765     {
2766       kind = &operand_check;
2767       str = "operand";
2768     }
2769   else
2770     {
2771       kind = &sse_check;
2772       str = "sse";
2773     }
2774
2775   SKIP_WHITESPACE ();
2776
2777   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2778     {
2779       char *string;
2780       int e = get_symbol_name (&string);
2781
2782       if (strcmp (string, "none") == 0)
2783         *kind = check_none;
2784       else if (strcmp (string, "warning") == 0)
2785         *kind = check_warning;
2786       else if (strcmp (string, "error") == 0)
2787         *kind = check_error;
2788       else
2789         as_bad (_("bad argument to %s_check directive."), str);
2790       (void) restore_line_pointer (e);
2791     }
2792   else
2793     as_bad (_("missing argument for %s_check directive"), str);
2794
2795   demand_empty_rest_of_line ();
2796 }
2797
2798 static void
2799 check_cpu_arch_compatible (const char *name ATTRIBUTE_UNUSED,
2800                            i386_cpu_flags new_flag ATTRIBUTE_UNUSED)
2801 {
2802 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
2803   static const char *arch;
2804
2805   /* Intel LIOM is only supported on ELF.  */
2806   if (!IS_ELF)
2807     return;
2808
2809   if (!arch)
2810     {
2811       /* Use cpu_arch_name if it is set in md_parse_option.  Otherwise
2812          use default_arch.  */
2813       arch = cpu_arch_name;
2814       if (!arch)
2815         arch = default_arch;
2816     }
2817
2818   /* If we are targeting Intel MCU, we must enable it.  */
2819   if (get_elf_backend_data (stdoutput)->elf_machine_code != EM_IAMCU
2820       || new_flag.bitfield.cpuiamcu)
2821     return;
2822
2823   /* If we are targeting Intel L1OM, we must enable it.  */
2824   if (get_elf_backend_data (stdoutput)->elf_machine_code != EM_L1OM
2825       || new_flag.bitfield.cpul1om)
2826     return;
2827
2828   /* If we are targeting Intel K1OM, we must enable it.  */
2829   if (get_elf_backend_data (stdoutput)->elf_machine_code != EM_K1OM
2830       || new_flag.bitfield.cpuk1om)
2831     return;
2832
2833   as_bad (_("`%s' is not supported on `%s'"), name, arch);
2834 #endif
2835 }
2836
2837 static void
2838 set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
2839 {
2840   SKIP_WHITESPACE ();
2841
2842   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2843     {
2844       char *string;
2845       int e = get_symbol_name (&string);
2846       unsigned int j;
2847       i386_cpu_flags flags;
2848
2849       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
2850         {
2851           if (strcmp (string, cpu_arch[j].name) == 0)
2852             {
2853               check_cpu_arch_compatible (string, cpu_arch[j].flags);
2854
2855               if (*string != '.')
2856                 {
2857                   cpu_arch_name = cpu_arch[j].name;
2858                   cpu_sub_arch_name = NULL;
2859                   cpu_arch_flags = cpu_arch[j].flags;
2860                   if (flag_code == CODE_64BIT)
2861                     {
2862                       cpu_arch_flags.bitfield.cpu64 = 1;
2863                       cpu_arch_flags.bitfield.cpuno64 = 0;
2864                     }
2865                   else
2866                     {
2867                       cpu_arch_flags.bitfield.cpu64 = 0;
2868                       cpu_arch_flags.bitfield.cpuno64 = 1;
2869                     }
2870                   cpu_arch_isa = cpu_arch[j].type;
2871                   cpu_arch_isa_flags = cpu_arch[j].flags;
2872                   if (!cpu_arch_tune_set)
2873                     {
2874                       cpu_arch_tune = cpu_arch_isa;
2875                       cpu_arch_tune_flags = cpu_arch_isa_flags;
2876                     }
2877                   break;
2878                 }
2879
2880               flags = cpu_flags_or (cpu_arch_flags,
2881                                     cpu_arch[j].flags);
2882
2883               if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2884                 {
2885                   if (cpu_sub_arch_name)
2886                     {
2887                       char *name = cpu_sub_arch_name;
2888                       cpu_sub_arch_name = concat (name,
2889                                                   cpu_arch[j].name,
2890                                                   (const char *) NULL);
2891                       free (name);
2892                     }
2893                   else
2894                     cpu_sub_arch_name = xstrdup (cpu_arch[j].name);
2895                   cpu_arch_flags = flags;
2896                   cpu_arch_isa_flags = flags;
2897                 }
2898               else
2899                 cpu_arch_isa_flags
2900                   = cpu_flags_or (cpu_arch_isa_flags,
2901                                   cpu_arch[j].flags);
2902               (void) restore_line_pointer (e);
2903               demand_empty_rest_of_line ();
2904               return;
2905             }
2906         }
2907
2908       if (*string == '.' && j >= ARRAY_SIZE (cpu_arch))
2909         {
2910           /* Disable an ISA extension.  */
2911           for (j = 0; j < ARRAY_SIZE (cpu_noarch); j++)
2912             if (strcmp (string + 1, cpu_noarch [j].name) == 0)
2913               {
2914                 flags = cpu_flags_and_not (cpu_arch_flags,
2915                                            cpu_noarch[j].flags);
2916                 if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2917                   {
2918                     if (cpu_sub_arch_name)
2919                       {
2920                         char *name = cpu_sub_arch_name;
2921                         cpu_sub_arch_name = concat (name, string,
2922                                                     (const char *) NULL);
2923                         free (name);
2924                       }
2925                     else
2926                       cpu_sub_arch_name = xstrdup (string);
2927                     cpu_arch_flags = flags;
2928                     cpu_arch_isa_flags = flags;
2929                   }
2930                 (void) restore_line_pointer (e);
2931                 demand_empty_rest_of_line ();
2932                 return;
2933               }
2934
2935           j = ARRAY_SIZE (cpu_arch);
2936         }
2937
2938       if (j >= ARRAY_SIZE (cpu_arch))
2939         as_bad (_("no such architecture: `%s'"), string);
2940
2941       *input_line_pointer = e;
2942     }
2943   else
2944     as_bad (_("missing cpu architecture"));
2945
2946   no_cond_jump_promotion = 0;
2947   if (*input_line_pointer == ','
2948       && !is_end_of_line[(unsigned char) input_line_pointer[1]])
2949     {
2950       char *string;
2951       char e;
2952
2953       ++input_line_pointer;
2954       e = get_symbol_name (&string);
2955
2956       if (strcmp (string, "nojumps") == 0)
2957         no_cond_jump_promotion = 1;
2958       else if (strcmp (string, "jumps") == 0)
2959         ;
2960       else
2961         as_bad (_("no such architecture modifier: `%s'"), string);
2962
2963       (void) restore_line_pointer (e);
2964     }
2965
2966   demand_empty_rest_of_line ();
2967 }
2968
2969 enum bfd_architecture
2970 i386_arch (void)
2971 {
2972   if (cpu_arch_isa == PROCESSOR_L1OM)
2973     {
2974       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2975           || flag_code != CODE_64BIT)
2976         as_fatal (_("Intel L1OM is 64bit ELF only"));
2977       return bfd_arch_l1om;
2978     }
2979   else if (cpu_arch_isa == PROCESSOR_K1OM)
2980     {
2981       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2982           || flag_code != CODE_64BIT)
2983         as_fatal (_("Intel K1OM is 64bit ELF only"));
2984       return bfd_arch_k1om;
2985     }
2986   else if (cpu_arch_isa == PROCESSOR_IAMCU)
2987     {
2988       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2989           || flag_code == CODE_64BIT)
2990         as_fatal (_("Intel MCU is 32bit ELF only"));
2991       return bfd_arch_iamcu;
2992     }
2993   else
2994     return bfd_arch_i386;
2995 }
2996
2997 unsigned long
2998 i386_mach (void)
2999 {
3000   if (!strncmp (default_arch, "x86_64", 6))
3001     {
3002       if (cpu_arch_isa == PROCESSOR_L1OM)
3003         {
3004           if (OUTPUT_FLAVOR != bfd_target_elf_flavour
3005               || default_arch[6] != '\0')
3006             as_fatal (_("Intel L1OM is 64bit ELF only"));
3007           return bfd_mach_l1om;
3008         }
3009       else if (cpu_arch_isa == PROCESSOR_K1OM)
3010         {
3011           if (OUTPUT_FLAVOR != bfd_target_elf_flavour
3012               || default_arch[6] != '\0')
3013             as_fatal (_("Intel K1OM is 64bit ELF only"));
3014           return bfd_mach_k1om;
3015         }
3016       else if (default_arch[6] == '\0')
3017         return bfd_mach_x86_64;
3018       else
3019         return bfd_mach_x64_32;
3020     }
3021   else if (!strcmp (default_arch, "i386")
3022            || !strcmp (default_arch, "iamcu"))
3023     {
3024       if (cpu_arch_isa == PROCESSOR_IAMCU)
3025         {
3026           if (OUTPUT_FLAVOR != bfd_target_elf_flavour)
3027             as_fatal (_("Intel MCU is 32bit ELF only"));
3028           return bfd_mach_i386_iamcu;
3029         }
3030       else
3031         return bfd_mach_i386_i386;
3032     }
3033   else
3034     as_fatal (_("unknown architecture"));
3035 }
3036 \f
3037 void
3038 md_begin (void)
3039 {
3040   /* Support pseudo prefixes like {disp32}.  */
3041   lex_type ['{'] = LEX_BEGIN_NAME;
3042
3043   /* Initialize op_hash hash table.  */
3044   op_hash = str_htab_create ();
3045
3046   {
3047     const insn_template *optab;
3048     templates *core_optab;
3049
3050     /* Setup for loop.  */
3051     optab = i386_optab;
3052     core_optab = XNEW (templates);
3053     core_optab->start = optab;
3054
3055     while (1)
3056       {
3057         ++optab;
3058         if (optab->name == NULL
3059             || strcmp (optab->name, (optab - 1)->name) != 0)
3060           {
3061             /* different name --> ship out current template list;
3062                add to hash table; & begin anew.  */
3063             core_optab->end = optab;
3064             if (str_hash_insert (op_hash, (optab - 1)->name, core_optab, 0))
3065               as_fatal (_("duplicate %s"), (optab - 1)->name);
3066
3067             if (optab->name == NULL)
3068               break;
3069             core_optab = XNEW (templates);
3070             core_optab->start = optab;
3071           }
3072       }
3073   }
3074
3075   /* Initialize reg_hash hash table.  */
3076   reg_hash = str_htab_create ();
3077   {
3078     const reg_entry *regtab;
3079     unsigned int regtab_size = i386_regtab_size;
3080
3081     for (regtab = i386_regtab; regtab_size--; regtab++)
3082       if (str_hash_insert (reg_hash, regtab->reg_name, regtab, 0) != NULL)
3083         as_fatal (_("duplicate %s"), regtab->reg_name);
3084   }
3085
3086   /* Fill in lexical tables:  mnemonic_chars, operand_chars.  */
3087   {
3088     int c;
3089     char *p;
3090
3091     for (c = 0; c < 256; c++)
3092       {
3093         if (ISDIGIT (c))
3094           {
3095             digit_chars[c] = c;
3096             mnemonic_chars[c] = c;
3097             register_chars[c] = c;
3098             operand_chars[c] = c;
3099           }
3100         else if (ISLOWER (c))
3101           {
3102             mnemonic_chars[c] = c;
3103             register_chars[c] = c;
3104             operand_chars[c] = c;
3105           }
3106         else if (ISUPPER (c))
3107           {
3108             mnemonic_chars[c] = TOLOWER (c);
3109             register_chars[c] = mnemonic_chars[c];
3110             operand_chars[c] = c;
3111           }
3112         else if (c == '{' || c == '}')
3113           {
3114             mnemonic_chars[c] = c;
3115             operand_chars[c] = c;
3116           }
3117 #ifdef SVR4_COMMENT_CHARS
3118         else if (c == '\\' && strchr (i386_comment_chars, '/'))
3119           operand_chars[c] = c;
3120 #endif
3121
3122         if (ISALPHA (c) || ISDIGIT (c))
3123           identifier_chars[c] = c;
3124         else if (c >= 128)
3125           {
3126             identifier_chars[c] = c;
3127             operand_chars[c] = c;
3128           }
3129       }
3130
3131 #ifdef LEX_AT
3132     identifier_chars['@'] = '@';
3133 #endif
3134 #ifdef LEX_QM
3135     identifier_chars['?'] = '?';
3136     operand_chars['?'] = '?';
3137 #endif
3138     digit_chars['-'] = '-';
3139     mnemonic_chars['_'] = '_';
3140     mnemonic_chars['-'] = '-';
3141     mnemonic_chars['.'] = '.';
3142     identifier_chars['_'] = '_';
3143     identifier_chars['.'] = '.';
3144
3145     for (p = operand_special_chars; *p != '\0'; p++)
3146       operand_chars[(unsigned char) *p] = *p;
3147   }
3148
3149   if (flag_code == CODE_64BIT)
3150     {
3151 #if defined (OBJ_COFF) && defined (TE_PE)
3152       x86_dwarf2_return_column = (OUTPUT_FLAVOR == bfd_target_coff_flavour
3153                                   ? 32 : 16);
3154 #else
3155       x86_dwarf2_return_column = 16;
3156 #endif
3157       x86_cie_data_alignment = -8;
3158     }
3159   else
3160     {
3161       x86_dwarf2_return_column = 8;
3162       x86_cie_data_alignment = -4;
3163     }
3164
3165   /* NB: FUSED_JCC_PADDING frag must have sufficient room so that it
3166      can be turned into BRANCH_PREFIX frag.  */
3167   if (align_branch_prefix_size > MAX_FUSED_JCC_PADDING_SIZE)
3168     abort ();
3169 }
3170
3171 void
3172 i386_print_statistics (FILE *file)
3173 {
3174   htab_print_statistics (file, "i386 opcode", op_hash);
3175   htab_print_statistics (file, "i386 register", reg_hash);
3176 }
3177 \f
3178 #ifdef DEBUG386
3179
3180 /* Debugging routines for md_assemble.  */
3181 static void pte (insn_template *);
3182 static void pt (i386_operand_type);
3183 static void pe (expressionS *);
3184 static void ps (symbolS *);
3185
3186 static void
3187 pi (const char *line, i386_insn *x)
3188 {
3189   unsigned int j;
3190
3191   fprintf (stdout, "%s: template ", line);
3192   pte (&x->tm);
3193   fprintf (stdout, "  address: base %s  index %s  scale %x\n",
3194            x->base_reg ? x->base_reg->reg_name : "none",
3195            x->index_reg ? x->index_reg->reg_name : "none",
3196            x->log2_scale_factor);
3197   fprintf (stdout, "  modrm:  mode %x  reg %x  reg/mem %x\n",
3198            x->rm.mode, x->rm.reg, x->rm.regmem);
3199   fprintf (stdout, "  sib:  base %x  index %x  scale %x\n",
3200            x->sib.base, x->sib.index, x->sib.scale);
3201   fprintf (stdout, "  rex: 64bit %x  extX %x  extY %x  extZ %x\n",
3202            (x->rex & REX_W) != 0,
3203            (x->rex & REX_R) != 0,
3204            (x->rex & REX_X) != 0,
3205            (x->rex & REX_B) != 0);
3206   for (j = 0; j < x->operands; j++)
3207     {
3208       fprintf (stdout, "    #%d:  ", j + 1);
3209       pt (x->types[j]);
3210       fprintf (stdout, "\n");
3211       if (x->types[j].bitfield.class == Reg
3212           || x->types[j].bitfield.class == RegMMX
3213           || x->types[j].bitfield.class == RegSIMD
3214           || x->types[j].bitfield.class == RegMask
3215           || x->types[j].bitfield.class == SReg
3216           || x->types[j].bitfield.class == RegCR
3217           || x->types[j].bitfield.class == RegDR
3218           || x->types[j].bitfield.class == RegTR
3219           || x->types[j].bitfield.class == RegBND)
3220         fprintf (stdout, "%s\n", x->op[j].regs->reg_name);
3221       if (operand_type_check (x->types[j], imm))
3222         pe (x->op[j].imms);
3223       if (operand_type_check (x->types[j], disp))
3224         pe (x->op[j].disps);
3225     }
3226 }
3227
3228 static void
3229 pte (insn_template *t)
3230 {
3231   unsigned int j;
3232   fprintf (stdout, " %d operands ", t->operands);
3233   fprintf (stdout, "opcode %x ", t->base_opcode);
3234   if (t->extension_opcode != None)
3235     fprintf (stdout, "ext %x ", t->extension_opcode);
3236   if (t->opcode_modifier.d)
3237     fprintf (stdout, "D");
3238   if (t->opcode_modifier.w)
3239     fprintf (stdout, "W");
3240   fprintf (stdout, "\n");
3241   for (j = 0; j < t->operands; j++)
3242     {
3243       fprintf (stdout, "    #%d type ", j + 1);
3244       pt (t->operand_types[j]);
3245       fprintf (stdout, "\n");
3246     }
3247 }
3248
3249 static void
3250 pe (expressionS *e)
3251 {
3252   fprintf (stdout, "    operation     %d\n", e->X_op);
3253   fprintf (stdout, "    add_number    %ld (%lx)\n",
3254            (long) e->X_add_number, (long) e->X_add_number);
3255   if (e->X_add_symbol)
3256     {
3257       fprintf (stdout, "    add_symbol    ");
3258       ps (e->X_add_symbol);
3259       fprintf (stdout, "\n");
3260     }
3261   if (e->X_op_symbol)
3262     {
3263       fprintf (stdout, "    op_symbol    ");
3264       ps (e->X_op_symbol);
3265       fprintf (stdout, "\n");
3266     }
3267 }
3268
3269 static void
3270 ps (symbolS *s)
3271 {
3272   fprintf (stdout, "%s type %s%s",
3273            S_GET_NAME (s),
3274            S_IS_EXTERNAL (s) ? "EXTERNAL " : "",
3275            segment_name (S_GET_SEGMENT (s)));
3276 }
3277
3278 static struct type_name
3279   {
3280     i386_operand_type mask;
3281     const char *name;
3282   }
3283 const type_names[] =
3284 {
3285   { OPERAND_TYPE_REG8, "r8" },
3286   { OPERAND_TYPE_REG16, "r16" },
3287   { OPERAND_TYPE_REG32, "r32" },
3288   { OPERAND_TYPE_REG64, "r64" },
3289   { OPERAND_TYPE_ACC8, "acc8" },
3290   { OPERAND_TYPE_ACC16, "acc16" },
3291   { OPERAND_TYPE_ACC32, "acc32" },
3292   { OPERAND_TYPE_ACC64, "acc64" },
3293   { OPERAND_TYPE_IMM8, "i8" },
3294   { OPERAND_TYPE_IMM8, "i8s" },
3295   { OPERAND_TYPE_IMM16, "i16" },
3296   { OPERAND_TYPE_IMM32, "i32" },
3297   { OPERAND_TYPE_IMM32S, "i32s" },
3298   { OPERAND_TYPE_IMM64, "i64" },
3299   { OPERAND_TYPE_IMM1, "i1" },
3300   { OPERAND_TYPE_BASEINDEX, "BaseIndex" },
3301   { OPERAND_TYPE_DISP8, "d8" },
3302   { OPERAND_TYPE_DISP16, "d16" },
3303   { OPERAND_TYPE_DISP32, "d32" },
3304   { OPERAND_TYPE_DISP32S, "d32s" },
3305   { OPERAND_TYPE_DISP64, "d64" },
3306   { OPERAND_TYPE_INOUTPORTREG, "InOutPortReg" },
3307   { OPERAND_TYPE_SHIFTCOUNT, "ShiftCount" },
3308   { OPERAND_TYPE_CONTROL, "control reg" },
3309   { OPERAND_TYPE_TEST, "test reg" },
3310   { OPERAND_TYPE_DEBUG, "debug reg" },
3311   { OPERAND_TYPE_FLOATREG, "FReg" },
3312   { OPERAND_TYPE_FLOATACC, "FAcc" },
3313   { OPERAND_TYPE_SREG, "SReg" },
3314   { OPERAND_TYPE_REGMMX, "rMMX" },
3315   { OPERAND_TYPE_REGXMM, "rXMM" },
3316   { OPERAND_TYPE_REGYMM, "rYMM" },
3317   { OPERAND_TYPE_REGZMM, "rZMM" },
3318   { OPERAND_TYPE_REGTMM, "rTMM" },
3319   { OPERAND_TYPE_REGMASK, "Mask reg" },
3320 };
3321
3322 static void
3323 pt (i386_operand_type t)
3324 {
3325   unsigned int j;
3326   i386_operand_type a;
3327
3328   for (j = 0; j < ARRAY_SIZE (type_names); j++)
3329     {
3330       a = operand_type_and (t, type_names[j].mask);
3331       if (operand_type_equal (&a, &type_names[j].mask))
3332         fprintf (stdout, "%s, ",  type_names[j].name);
3333     }
3334   fflush (stdout);
3335 }
3336
3337 #endif /* DEBUG386 */
3338 \f
3339 static bfd_reloc_code_real_type
3340 reloc (unsigned int size,
3341        int pcrel,
3342        int sign,
3343        bfd_reloc_code_real_type other)
3344 {
3345   if (other != NO_RELOC)
3346     {
3347       reloc_howto_type *rel;
3348
3349       if (size == 8)
3350         switch (other)
3351           {
3352           case BFD_RELOC_X86_64_GOT32:
3353             return BFD_RELOC_X86_64_GOT64;
3354             break;
3355           case BFD_RELOC_X86_64_GOTPLT64:
3356             return BFD_RELOC_X86_64_GOTPLT64;
3357             break;
3358           case BFD_RELOC_X86_64_PLTOFF64:
3359             return BFD_RELOC_X86_64_PLTOFF64;
3360             break;
3361           case BFD_RELOC_X86_64_GOTPC32:
3362             other = BFD_RELOC_X86_64_GOTPC64;
3363             break;
3364           case BFD_RELOC_X86_64_GOTPCREL:
3365             other = BFD_RELOC_X86_64_GOTPCREL64;
3366             break;
3367           case BFD_RELOC_X86_64_TPOFF32:
3368             other = BFD_RELOC_X86_64_TPOFF64;
3369             break;
3370           case BFD_RELOC_X86_64_DTPOFF32:
3371             other = BFD_RELOC_X86_64_DTPOFF64;
3372             break;
3373           default:
3374             break;
3375           }
3376
3377 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3378       if (other == BFD_RELOC_SIZE32)
3379         {
3380           if (size == 8)
3381             other = BFD_RELOC_SIZE64;
3382           if (pcrel)
3383             {
3384               as_bad (_("there are no pc-relative size relocations"));
3385               return NO_RELOC;
3386             }
3387         }
3388 #endif
3389
3390       /* Sign-checking 4-byte relocations in 16-/32-bit code is pointless.  */
3391       if (size == 4 && (flag_code != CODE_64BIT || disallow_64bit_reloc))
3392         sign = -1;
3393
3394       rel = bfd_reloc_type_lookup (stdoutput, other);
3395       if (!rel)
3396         as_bad (_("unknown relocation (%u)"), other);
3397       else if (size != bfd_get_reloc_size (rel))
3398         as_bad (_("%u-byte relocation cannot be applied to %u-byte field"),
3399                 bfd_get_reloc_size (rel),
3400                 size);
3401       else if (pcrel && !rel->pc_relative)
3402         as_bad (_("non-pc-relative relocation for pc-relative field"));
3403       else if ((rel->complain_on_overflow == complain_overflow_signed
3404                 && !sign)
3405                || (rel->complain_on_overflow == complain_overflow_unsigned
3406                    && sign > 0))
3407         as_bad (_("relocated field and relocation type differ in signedness"));
3408       else
3409         return other;
3410       return NO_RELOC;
3411     }
3412
3413   if (pcrel)
3414     {
3415       if (!sign)
3416         as_bad (_("there are no unsigned pc-relative relocations"));
3417       switch (size)
3418         {
3419         case 1: return BFD_RELOC_8_PCREL;
3420         case 2: return BFD_RELOC_16_PCREL;
3421         case 4: return BFD_RELOC_32_PCREL;
3422         case 8: return BFD_RELOC_64_PCREL;
3423         }
3424       as_bad (_("cannot do %u byte pc-relative relocation"), size);
3425     }
3426   else
3427     {
3428       if (sign > 0)
3429         switch (size)
3430           {
3431           case 4: return BFD_RELOC_X86_64_32S;
3432           }
3433       else
3434         switch (size)
3435           {
3436           case 1: return BFD_RELOC_8;
3437           case 2: return BFD_RELOC_16;
3438           case 4: return BFD_RELOC_32;
3439           case 8: return BFD_RELOC_64;
3440           }
3441       as_bad (_("cannot do %s %u byte relocation"),
3442               sign > 0 ? "signed" : "unsigned", size);
3443     }
3444
3445   return NO_RELOC;
3446 }
3447
3448 /* Here we decide which fixups can be adjusted to make them relative to
3449    the beginning of the section instead of the symbol.  Basically we need
3450    to make sure that the dynamic relocations are done correctly, so in
3451    some cases we force the original symbol to be used.  */
3452
3453 int
3454 tc_i386_fix_adjustable (fixS *fixP ATTRIBUTE_UNUSED)
3455 {
3456 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3457   if (!IS_ELF)
3458     return 1;
3459
3460   /* Don't adjust pc-relative references to merge sections in 64-bit
3461      mode.  */
3462   if (use_rela_relocations
3463       && (S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_MERGE) != 0
3464       && fixP->fx_pcrel)
3465     return 0;
3466
3467   /* The x86_64 GOTPCREL are represented as 32bit PCrel relocations
3468      and changed later by validate_fix.  */
3469   if (GOT_symbol && fixP->fx_subsy == GOT_symbol
3470       && fixP->fx_r_type == BFD_RELOC_32_PCREL)
3471     return 0;
3472
3473   /* Adjust_reloc_syms doesn't know about the GOT.  Need to keep symbol
3474      for size relocations.  */
3475   if (fixP->fx_r_type == BFD_RELOC_SIZE32
3476       || fixP->fx_r_type == BFD_RELOC_SIZE64
3477       || fixP->fx_r_type == BFD_RELOC_386_GOTOFF
3478       || fixP->fx_r_type == BFD_RELOC_386_GOT32
3479       || fixP->fx_r_type == BFD_RELOC_386_GOT32X
3480       || fixP->fx_r_type == BFD_RELOC_386_TLS_GD
3481       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDM
3482       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDO_32
3483       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE_32
3484       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE
3485       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTIE
3486       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE_32
3487       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE
3488       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTDESC
3489       || fixP->fx_r_type == BFD_RELOC_386_TLS_DESC_CALL
3490       || fixP->fx_r_type == BFD_RELOC_X86_64_GOT32
3491       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCREL
3492       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCRELX
3493       || fixP->fx_r_type == BFD_RELOC_X86_64_REX_GOTPCRELX
3494       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSGD
3495       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSLD
3496       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF32
3497       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF64
3498       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTTPOFF
3499       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF32
3500       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF64
3501       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTOFF64
3502       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC
3503       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSDESC_CALL
3504       || fixP->fx_r_type == BFD_RELOC_VTABLE_INHERIT
3505       || fixP->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
3506     return 0;
3507 #endif
3508   return 1;
3509 }
3510
3511 static int
3512 intel_float_operand (const char *mnemonic)
3513 {
3514   /* Note that the value returned is meaningful only for opcodes with (memory)
3515      operands, hence the code here is free to improperly handle opcodes that
3516      have no operands (for better performance and smaller code). */
3517
3518   if (mnemonic[0] != 'f')
3519     return 0; /* non-math */
3520
3521   switch (mnemonic[1])
3522     {
3523     /* fclex, fdecstp, fdisi, femms, feni, fincstp, finit, fsetpm, and
3524        the fs segment override prefix not currently handled because no
3525        call path can make opcodes without operands get here */
3526     case 'i':
3527       return 2 /* integer op */;
3528     case 'l':
3529       if (mnemonic[2] == 'd' && (mnemonic[3] == 'c' || mnemonic[3] == 'e'))
3530         return 3; /* fldcw/fldenv */
3531       break;
3532     case 'n':
3533       if (mnemonic[2] != 'o' /* fnop */)
3534         return 3; /* non-waiting control op */
3535       break;
3536     case 'r':
3537       if (mnemonic[2] == 's')
3538         return 3; /* frstor/frstpm */
3539       break;
3540     case 's':
3541       if (mnemonic[2] == 'a')
3542         return 3; /* fsave */
3543       if (mnemonic[2] == 't')
3544         {
3545           switch (mnemonic[3])
3546             {
3547             case 'c': /* fstcw */
3548             case 'd': /* fstdw */
3549             case 'e': /* fstenv */
3550             case 's': /* fsts[gw] */
3551               return 3;
3552             }
3553         }
3554       break;
3555     case 'x':
3556       if (mnemonic[2] == 'r' || mnemonic[2] == 's')
3557         return 0; /* fxsave/fxrstor are not really math ops */
3558       break;
3559     }
3560
3561   return 1;
3562 }
3563
3564 /* Build the VEX prefix.  */
3565
3566 static void
3567 build_vex_prefix (const insn_template *t)
3568 {
3569   unsigned int register_specifier;
3570   unsigned int implied_prefix;
3571   unsigned int vector_length;
3572   unsigned int w;
3573
3574   /* Check register specifier.  */
3575   if (i.vex.register_specifier)
3576     {
3577       register_specifier =
3578         ~register_number (i.vex.register_specifier) & 0xf;
3579       gas_assert ((i.vex.register_specifier->reg_flags & RegVRex) == 0);
3580     }
3581   else
3582     register_specifier = 0xf;
3583
3584   /* Use 2-byte VEX prefix by swapping destination and source operand
3585      if there are more than 1 register operand.  */
3586   if (i.reg_operands > 1
3587       && i.vec_encoding != vex_encoding_vex3
3588       && i.dir_encoding == dir_encoding_default
3589       && i.operands == i.reg_operands
3590       && operand_type_equal (&i.types[0], &i.types[i.operands - 1])
3591       && i.tm.opcode_modifier.vexopcode == VEX0F
3592       && (i.tm.opcode_modifier.load || i.tm.opcode_modifier.d)
3593       && i.rex == REX_B)
3594     {
3595       unsigned int xchg = i.operands - 1;
3596       union i386_op temp_op;
3597       i386_operand_type temp_type;
3598
3599       temp_type = i.types[xchg];
3600       i.types[xchg] = i.types[0];
3601       i.types[0] = temp_type;
3602       temp_op = i.op[xchg];
3603       i.op[xchg] = i.op[0];
3604       i.op[0] = temp_op;
3605
3606       gas_assert (i.rm.mode == 3);
3607
3608       i.rex = REX_R;
3609       xchg = i.rm.regmem;
3610       i.rm.regmem = i.rm.reg;
3611       i.rm.reg = xchg;
3612
3613       if (i.tm.opcode_modifier.d)
3614         i.tm.base_opcode ^= (i.tm.base_opcode & 0xee) != 0x6e
3615                             ? Opcode_SIMD_FloatD : Opcode_SIMD_IntD;
3616       else /* Use the next insn.  */
3617         i.tm = t[1];
3618     }
3619
3620   /* Use 2-byte VEX prefix by swapping commutative source operands if there
3621      are no memory operands and at least 3 register ones.  */
3622   if (i.reg_operands >= 3
3623       && i.vec_encoding != vex_encoding_vex3
3624       && i.reg_operands == i.operands - i.imm_operands
3625       && i.tm.opcode_modifier.vex
3626       && i.tm.opcode_modifier.commutative
3627       && (i.tm.opcode_modifier.sse2avx || optimize > 1)
3628       && i.rex == REX_B
3629       && i.vex.register_specifier
3630       && !(i.vex.register_specifier->reg_flags & RegRex))
3631     {
3632       unsigned int xchg = i.operands - i.reg_operands;
3633       union i386_op temp_op;
3634       i386_operand_type temp_type;
3635
3636       gas_assert (i.tm.opcode_modifier.vexopcode == VEX0F);
3637       gas_assert (!i.tm.opcode_modifier.sae);
3638       gas_assert (operand_type_equal (&i.types[i.operands - 2],
3639                                       &i.types[i.operands - 3]));
3640       gas_assert (i.rm.mode == 3);
3641
3642       temp_type = i.types[xchg];
3643       i.types[xchg] = i.types[xchg + 1];
3644       i.types[xchg + 1] = temp_type;
3645       temp_op = i.op[xchg];
3646       i.op[xchg] = i.op[xchg + 1];
3647       i.op[xchg + 1] = temp_op;
3648
3649       i.rex = 0;
3650       xchg = i.rm.regmem | 8;
3651       i.rm.regmem = ~register_specifier & 0xf;
3652       gas_assert (!(i.rm.regmem & 8));
3653       i.vex.register_specifier += xchg - i.rm.regmem;
3654       register_specifier = ~xchg & 0xf;
3655     }
3656
3657   if (i.tm.opcode_modifier.vex == VEXScalar)
3658     vector_length = avxscalar;
3659   else if (i.tm.opcode_modifier.vex == VEX256)
3660     vector_length = 1;
3661   else
3662     {
3663       unsigned int op;
3664
3665       /* Determine vector length from the last multi-length vector
3666          operand.  */
3667       vector_length = 0;
3668       for (op = t->operands; op--;)
3669         if (t->operand_types[op].bitfield.xmmword
3670             && t->operand_types[op].bitfield.ymmword
3671             && i.types[op].bitfield.ymmword)
3672           {
3673             vector_length = 1;
3674             break;
3675           }
3676     }
3677
3678   switch ((i.tm.base_opcode >> (i.tm.opcode_length << 3)) & 0xff)
3679     {
3680     case 0:
3681       implied_prefix = 0;
3682       break;
3683     case DATA_PREFIX_OPCODE:
3684       implied_prefix = 1;
3685       break;
3686     case REPE_PREFIX_OPCODE:
3687       implied_prefix = 2;
3688       break;
3689     case REPNE_PREFIX_OPCODE:
3690       implied_prefix = 3;
3691       break;
3692     default:
3693       abort ();
3694     }
3695
3696   /* Check the REX.W bit and VEXW.  */
3697   if (i.tm.opcode_modifier.vexw == VEXWIG)
3698     w = (vexwig == vexw1 || (i.rex & REX_W)) ? 1 : 0;
3699   else if (i.tm.opcode_modifier.vexw)
3700     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3701   else
3702     w = (flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1) ? 1 : 0;
3703
3704   /* Use 2-byte VEX prefix if possible.  */
3705   if (w == 0
3706       && i.vec_encoding != vex_encoding_vex3
3707       && i.tm.opcode_modifier.vexopcode == VEX0F
3708       && (i.rex & (REX_W | REX_X | REX_B)) == 0)
3709     {
3710       /* 2-byte VEX prefix.  */
3711       unsigned int r;
3712
3713       i.vex.length = 2;
3714       i.vex.bytes[0] = 0xc5;
3715
3716       /* Check the REX.R bit.  */
3717       r = (i.rex & REX_R) ? 0 : 1;
3718       i.vex.bytes[1] = (r << 7
3719                         | register_specifier << 3
3720                         | vector_length << 2
3721                         | implied_prefix);
3722     }
3723   else
3724     {
3725       /* 3-byte VEX prefix.  */
3726       unsigned int m;
3727
3728       i.vex.length = 3;
3729
3730       switch (i.tm.opcode_modifier.vexopcode)
3731         {
3732         case VEX0F:
3733           m = 0x1;
3734           i.vex.bytes[0] = 0xc4;
3735           break;
3736         case VEX0F38:
3737           m = 0x2;
3738           i.vex.bytes[0] = 0xc4;
3739           break;
3740         case VEX0F3A:
3741           m = 0x3;
3742           i.vex.bytes[0] = 0xc4;
3743           break;
3744         case XOP08:
3745           m = 0x8;
3746           i.vex.bytes[0] = 0x8f;
3747           break;
3748         case XOP09:
3749           m = 0x9;
3750           i.vex.bytes[0] = 0x8f;
3751           break;
3752         case XOP0A:
3753           m = 0xa;
3754           i.vex.bytes[0] = 0x8f;
3755           break;
3756         default:
3757           abort ();
3758         }
3759
3760       /* The high 3 bits of the second VEX byte are 1's compliment
3761          of RXB bits from REX.  */
3762       i.vex.bytes[1] = (~i.rex & 0x7) << 5 | m;
3763
3764       i.vex.bytes[2] = (w << 7
3765                         | register_specifier << 3
3766                         | vector_length << 2
3767                         | implied_prefix);
3768     }
3769 }
3770
3771 static INLINE bfd_boolean
3772 is_evex_encoding (const insn_template *t)
3773 {
3774   return t->opcode_modifier.evex || t->opcode_modifier.disp8memshift
3775          || t->opcode_modifier.broadcast || t->opcode_modifier.masking
3776          || t->opcode_modifier.sae;
3777 }
3778
3779 static INLINE bfd_boolean
3780 is_any_vex_encoding (const insn_template *t)
3781 {
3782   return t->opcode_modifier.vex || t->opcode_modifier.vexopcode
3783          || is_evex_encoding (t);
3784 }
3785
3786 /* Build the EVEX prefix.  */
3787
3788 static void
3789 build_evex_prefix (void)
3790 {
3791   unsigned int register_specifier;
3792   unsigned int implied_prefix;
3793   unsigned int m, w;
3794   rex_byte vrex_used = 0;
3795
3796   /* Check register specifier.  */
3797   if (i.vex.register_specifier)
3798     {
3799       gas_assert ((i.vrex & REX_X) == 0);
3800
3801       register_specifier = i.vex.register_specifier->reg_num;
3802       if ((i.vex.register_specifier->reg_flags & RegRex))
3803         register_specifier += 8;
3804       /* The upper 16 registers are encoded in the fourth byte of the
3805          EVEX prefix.  */
3806       if (!(i.vex.register_specifier->reg_flags & RegVRex))
3807         i.vex.bytes[3] = 0x8;
3808       register_specifier = ~register_specifier & 0xf;
3809     }
3810   else
3811     {
3812       register_specifier = 0xf;
3813
3814       /* Encode upper 16 vector index register in the fourth byte of
3815          the EVEX prefix.  */
3816       if (!(i.vrex & REX_X))
3817         i.vex.bytes[3] = 0x8;
3818       else
3819         vrex_used |= REX_X;
3820     }
3821
3822   switch ((i.tm.base_opcode >> 8) & 0xff)
3823     {
3824     case 0:
3825       implied_prefix = 0;
3826       break;
3827     case DATA_PREFIX_OPCODE:
3828       implied_prefix = 1;
3829       break;
3830     case REPE_PREFIX_OPCODE:
3831       implied_prefix = 2;
3832       break;
3833     case REPNE_PREFIX_OPCODE:
3834       implied_prefix = 3;
3835       break;
3836     default:
3837       abort ();
3838     }
3839
3840   /* 4 byte EVEX prefix.  */
3841   i.vex.length = 4;
3842   i.vex.bytes[0] = 0x62;
3843
3844   /* mmmm bits.  */
3845   switch (i.tm.opcode_modifier.vexopcode)
3846     {
3847     case VEX0F:
3848       m = 1;
3849       break;
3850     case VEX0F38:
3851       m = 2;
3852       break;
3853     case VEX0F3A:
3854       m = 3;
3855       break;
3856     default:
3857       abort ();
3858       break;
3859     }
3860
3861   /* The high 3 bits of the second EVEX byte are 1's compliment of RXB
3862      bits from REX.  */
3863   i.vex.bytes[1] = (~i.rex & 0x7) << 5 | m;
3864
3865   /* The fifth bit of the second EVEX byte is 1's compliment of the
3866      REX_R bit in VREX.  */
3867   if (!(i.vrex & REX_R))
3868     i.vex.bytes[1] |= 0x10;
3869   else
3870     vrex_used |= REX_R;
3871
3872   if ((i.reg_operands + i.imm_operands) == i.operands)
3873     {
3874       /* When all operands are registers, the REX_X bit in REX is not
3875          used.  We reuse it to encode the upper 16 registers, which is
3876          indicated by the REX_B bit in VREX.  The REX_X bit is encoded
3877          as 1's compliment.  */
3878       if ((i.vrex & REX_B))
3879         {
3880           vrex_used |= REX_B;
3881           i.vex.bytes[1] &= ~0x40;
3882         }
3883     }
3884
3885   /* EVEX instructions shouldn't need the REX prefix.  */
3886   i.vrex &= ~vrex_used;
3887   gas_assert (i.vrex == 0);
3888
3889   /* Check the REX.W bit and VEXW.  */
3890   if (i.tm.opcode_modifier.vexw == VEXWIG)
3891     w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0;
3892   else if (i.tm.opcode_modifier.vexw)
3893     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3894   else
3895     w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;
3896
3897   /* Encode the U bit.  */
3898   implied_prefix |= 0x4;
3899
3900   /* The third byte of the EVEX prefix.  */
3901   i.vex.bytes[2] = (w << 7 | register_specifier << 3 | implied_prefix);
3902
3903   /* The fourth byte of the EVEX prefix.  */
3904   /* The zeroing-masking bit.  */
3905   if (i.mask && i.mask->zeroing)
3906     i.vex.bytes[3] |= 0x80;
3907
3908   /* Don't always set the broadcast bit if there is no RC.  */
3909   if (!i.rounding)
3910     {
3911       /* Encode the vector length.  */
3912       unsigned int vec_length;
3913
3914       if (!i.tm.opcode_modifier.evex
3915           || i.tm.opcode_modifier.evex == EVEXDYN)
3916         {
3917           unsigned int op;
3918
3919           /* Determine vector length from the last multi-length vector
3920              operand.  */
3921           for (op = i.operands; op--;)
3922             if (i.tm.operand_types[op].bitfield.xmmword
3923                 + i.tm.operand_types[op].bitfield.ymmword
3924                 + i.tm.operand_types[op].bitfield.zmmword > 1)
3925               {
3926                 if (i.types[op].bitfield.zmmword)
3927                   {
3928                     i.tm.opcode_modifier.evex = EVEX512;
3929                     break;
3930                   }
3931                 else if (i.types[op].bitfield.ymmword)
3932                   {
3933                     i.tm.opcode_modifier.evex = EVEX256;
3934                     break;
3935                   }
3936                 else if (i.types[op].bitfield.xmmword)
3937                   {
3938                     i.tm.opcode_modifier.evex = EVEX128;
3939                     break;
3940                   }
3941                 else if (i.broadcast && (int) op == i.broadcast->operand)
3942                   {
3943                     switch (i.broadcast->bytes)
3944                       {
3945                         case 64:
3946                           i.tm.opcode_modifier.evex = EVEX512;
3947                           break;
3948                         case 32:
3949                           i.tm.opcode_modifier.evex = EVEX256;
3950                           break;
3951                         case 16:
3952                           i.tm.opcode_modifier.evex = EVEX128;
3953                           break;
3954                         default:
3955                           abort ();
3956                       }
3957                     break;
3958                   }
3959               }
3960
3961           if (op >= MAX_OPERANDS)
3962             abort ();
3963         }
3964
3965       switch (i.tm.opcode_modifier.evex)
3966         {
3967         case EVEXLIG: /* LL' is ignored */
3968           vec_length = evexlig << 5;
3969           break;
3970         case EVEX128:
3971           vec_length = 0 << 5;
3972           break;
3973         case EVEX256:
3974           vec_length = 1 << 5;
3975           break;
3976         case EVEX512:
3977           vec_length = 2 << 5;
3978           break;
3979         default:
3980           abort ();
3981           break;
3982         }
3983       i.vex.bytes[3] |= vec_length;
3984       /* Encode the broadcast bit.  */
3985       if (i.broadcast)
3986         i.vex.bytes[3] |= 0x10;
3987     }
3988   else
3989     {
3990       if (i.rounding->type != saeonly)
3991         i.vex.bytes[3] |= 0x10 | (i.rounding->type << 5);
3992       else
3993         i.vex.bytes[3] |= 0x10 | (evexrcig << 5);
3994     }
3995
3996   if (i.mask && i.mask->mask)
3997     i.vex.bytes[3] |= i.mask->mask->reg_num;
3998 }
3999
4000 static void
4001 process_immext (void)
4002 {
4003   expressionS *exp;
4004
4005   /* These AMD 3DNow! and SSE2 instructions have an opcode suffix
4006      which is coded in the same place as an 8-bit immediate field
4007      would be.  Here we fake an 8-bit immediate operand from the
4008      opcode suffix stored in tm.extension_opcode.
4009
4010      AVX instructions also use this encoding, for some of
4011      3 argument instructions.  */
4012
4013   gas_assert (i.imm_operands <= 1
4014               && (i.operands <= 2
4015                   || (is_any_vex_encoding (&i.tm)
4016                       && i.operands <= 4)));
4017
4018   exp = &im_expressions[i.imm_operands++];
4019   i.op[i.operands].imms = exp;
4020   i.types[i.operands] = imm8;
4021   i.operands++;
4022   exp->X_op = O_constant;
4023   exp->X_add_number = i.tm.extension_opcode;
4024   i.tm.extension_opcode = None;
4025 }
4026
4027
4028 static int
4029 check_hle (void)
4030 {
4031   switch (i.tm.opcode_modifier.hleprefixok)
4032     {
4033     default:
4034       abort ();
4035     case HLEPrefixNone:
4036       as_bad (_("invalid instruction `%s' after `%s'"),
4037               i.tm.name, i.hle_prefix);
4038       return 0;
4039     case HLEPrefixLock:
4040       if (i.prefix[LOCK_PREFIX])
4041         return 1;
4042       as_bad (_("missing `lock' with `%s'"), i.hle_prefix);
4043       return 0;
4044     case HLEPrefixAny:
4045       return 1;
4046     case HLEPrefixRelease:
4047       if (i.prefix[HLE_PREFIX] != XRELEASE_PREFIX_OPCODE)
4048         {
4049           as_bad (_("instruction `%s' after `xacquire' not allowed"),
4050                   i.tm.name);
4051           return 0;
4052         }
4053       if (i.mem_operands == 0 || !(i.flags[i.operands - 1] & Operand_Mem))
4054         {
4055           as_bad (_("memory destination needed for instruction `%s'"
4056                     " after `xrelease'"), i.tm.name);
4057           return 0;
4058         }
4059       return 1;
4060     }
4061 }
4062
4063 /* Try the shortest encoding by shortening operand size.  */
4064
4065 static void
4066 optimize_encoding (void)
4067 {
4068   unsigned int j;
4069
4070   if (optimize_for_space
4071       && !is_any_vex_encoding (&i.tm)
4072       && i.reg_operands == 1
4073       && i.imm_operands == 1
4074       && !i.types[1].bitfield.byte
4075       && i.op[0].imms->X_op == O_constant
4076       && fits_in_imm7 (i.op[0].imms->X_add_number)
4077       && (i.tm.base_opcode == 0xa8
4078           || (i.tm.base_opcode == 0xf6
4079               && i.tm.extension_opcode == 0x0)))
4080     {
4081       /* Optimize: -Os:
4082            test $imm7, %r64/%r32/%r16  -> test $imm7, %r8
4083        */
4084       unsigned int base_regnum = i.op[1].regs->reg_num;
4085       if (flag_code == CODE_64BIT || base_regnum < 4)
4086         {
4087           i.types[1].bitfield.byte = 1;
4088           /* Ignore the suffix.  */
4089           i.suffix = 0;
4090           /* Convert to byte registers.  */
4091           if (i.types[1].bitfield.word)
4092             j = 16;
4093           else if (i.types[1].bitfield.dword)
4094             j = 32;
4095           else
4096             j = 48;
4097           if (!(i.op[1].regs->reg_flags & RegRex) && base_regnum < 4)
4098             j += 8;
4099           i.op[1].regs -= j;
4100         }
4101     }
4102   else if (flag_code == CODE_64BIT
4103            && !is_any_vex_encoding (&i.tm)
4104            && ((i.types[1].bitfield.qword
4105                 && i.reg_operands == 1
4106                 && i.imm_operands == 1
4107                 && i.op[0].imms->X_op == O_constant
4108                 && ((i.tm.base_opcode == 0xb8
4109                      && i.tm.extension_opcode == None
4110                      && fits_in_unsigned_long (i.op[0].imms->X_add_number))
4111                     || (fits_in_imm31 (i.op[0].imms->X_add_number)
4112                         && ((i.tm.base_opcode == 0x24
4113                              || i.tm.base_opcode == 0xa8)
4114                             || (i.tm.base_opcode == 0x80
4115                                 && i.tm.extension_opcode == 0x4)
4116                             || ((i.tm.base_opcode == 0xf6
4117                                  || (i.tm.base_opcode | 1) == 0xc7)
4118                                 && i.tm.extension_opcode == 0x0)))
4119                     || (fits_in_imm7 (i.op[0].imms->X_add_number)
4120                         && i.tm.base_opcode == 0x83
4121                         && i.tm.extension_opcode == 0x4)))
4122                || (i.types[0].bitfield.qword
4123                    && ((i.reg_operands == 2
4124                         && i.op[0].regs == i.op[1].regs
4125                         && (i.tm.base_opcode == 0x30
4126                             || i.tm.base_opcode == 0x28))
4127                        || (i.reg_operands == 1
4128                            && i.operands == 1
4129                            && i.tm.base_opcode == 0x30)))))
4130     {
4131       /* Optimize: -O:
4132            andq $imm31, %r64   -> andl $imm31, %r32
4133            andq $imm7, %r64    -> andl $imm7, %r32
4134            testq $imm31, %r64  -> testl $imm31, %r32
4135            xorq %r64, %r64     -> xorl %r32, %r32
4136            subq %r64, %r64     -> subl %r32, %r32
4137            movq $imm31, %r64   -> movl $imm31, %r32
4138            movq $imm32, %r64   -> movl $imm32, %r32
4139         */
4140       i.tm.opcode_modifier.norex64 = 1;
4141       if (i.tm.base_opcode == 0xb8 || (i.tm.base_opcode | 1) == 0xc7)
4142         {
4143           /* Handle
4144                movq $imm31, %r64   -> movl $imm31, %r32
4145                movq $imm32, %r64   -> movl $imm32, %r32
4146            */
4147           i.tm.operand_types[0].bitfield.imm32 = 1;
4148           i.tm.operand_types[0].bitfield.imm32s = 0;
4149           i.tm.operand_types[0].bitfield.imm64 = 0;
4150           i.types[0].bitfield.imm32 = 1;
4151           i.types[0].bitfield.imm32s = 0;
4152           i.types[0].bitfield.imm64 = 0;
4153           i.types[1].bitfield.dword = 1;
4154           i.types[1].bitfield.qword = 0;
4155           if ((i.tm.base_opcode | 1) == 0xc7)
4156             {
4157               /* Handle
4158                    movq $imm31, %r64   -> movl $imm31, %r32
4159                */
4160               i.tm.base_opcode = 0xb8;
4161               i.tm.extension_opcode = None;
4162               i.tm.opcode_modifier.w = 0;
4163               i.tm.opcode_modifier.modrm = 0;
4164             }
4165         }
4166     }
4167   else if (optimize > 1
4168            && !optimize_for_space
4169            && !is_any_vex_encoding (&i.tm)
4170            && i.reg_operands == 2
4171            && i.op[0].regs == i.op[1].regs
4172            && ((i.tm.base_opcode & ~(Opcode_D | 1)) == 0x8
4173                || (i.tm.base_opcode & ~(Opcode_D | 1)) == 0x20)
4174            && (flag_code != CODE_64BIT || !i.types[0].bitfield.dword))
4175     {
4176       /* Optimize: -O2:
4177            andb %rN, %rN  -> testb %rN, %rN
4178            andw %rN, %rN  -> testw %rN, %rN
4179            andq %rN, %rN  -> testq %rN, %rN
4180            orb %rN, %rN   -> testb %rN, %rN
4181            orw %rN, %rN   -> testw %rN, %rN
4182            orq %rN, %rN   -> testq %rN, %rN
4183
4184            and outside of 64-bit mode
4185
4186            andl %rN, %rN  -> testl %rN, %rN
4187            orl %rN, %rN   -> testl %rN, %rN
4188        */
4189       i.tm.base_opcode = 0x84 | (i.tm.base_opcode & 1);
4190     }
4191   else if (i.reg_operands == 3
4192            && i.op[0].regs == i.op[1].regs
4193            && !i.types[2].bitfield.xmmword
4194            && (i.tm.opcode_modifier.vex
4195                || ((!i.mask || i.mask->zeroing)
4196                    && !i.rounding
4197                    && is_evex_encoding (&i.tm)
4198                    && (i.vec_encoding != vex_encoding_evex
4199                        || cpu_arch_isa_flags.bitfield.cpuavx512vl
4200                        || i.tm.cpu_flags.bitfield.cpuavx512vl
4201                        || (i.tm.operand_types[2].bitfield.zmmword
4202                            && i.types[2].bitfield.ymmword))))
4203            && ((i.tm.base_opcode == 0x55
4204                 || i.tm.base_opcode == 0x6655
4205                 || i.tm.base_opcode == 0x66df
4206                 || i.tm.base_opcode == 0x57
4207                 || i.tm.base_opcode == 0x6657
4208                 || i.tm.base_opcode == 0x66ef
4209                 || i.tm.base_opcode == 0x66f8
4210                 || i.tm.base_opcode == 0x66f9
4211                 || i.tm.base_opcode == 0x66fa
4212                 || i.tm.base_opcode == 0x66fb
4213                 || i.tm.base_opcode == 0x42
4214                 || i.tm.base_opcode == 0x6642
4215                 || i.tm.base_opcode == 0x47
4216                 || i.tm.base_opcode == 0x6647)
4217                && i.tm.extension_opcode == None))
4218     {
4219       /* Optimize: -O1:
4220            VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
4221            vpsubq and vpsubw:
4222              EVEX VOP %zmmM, %zmmM, %zmmN
4223                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4224                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4225              EVEX VOP %ymmM, %ymmM, %ymmN
4226                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4227                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4228              VEX VOP %ymmM, %ymmM, %ymmN
4229                -> VEX VOP %xmmM, %xmmM, %xmmN
4230            VOP, one of vpandn and vpxor:
4231              VEX VOP %ymmM, %ymmM, %ymmN
4232                -> VEX VOP %xmmM, %xmmM, %xmmN
4233            VOP, one of vpandnd and vpandnq:
4234              EVEX VOP %zmmM, %zmmM, %zmmN
4235                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4236                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4237              EVEX VOP %ymmM, %ymmM, %ymmN
4238                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4239                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4240            VOP, one of vpxord and vpxorq:
4241              EVEX VOP %zmmM, %zmmM, %zmmN
4242                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4243                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4244              EVEX VOP %ymmM, %ymmM, %ymmN
4245                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4246                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4247            VOP, one of kxord and kxorq:
4248              VEX VOP %kM, %kM, %kN
4249                -> VEX kxorw %kM, %kM, %kN
4250            VOP, one of kandnd and kandnq:
4251              VEX VOP %kM, %kM, %kN
4252                -> VEX kandnw %kM, %kM, %kN
4253        */
4254       if (is_evex_encoding (&i.tm))
4255         {
4256           if (i.vec_encoding != vex_encoding_evex)
4257             {
4258               i.tm.opcode_modifier.vex = VEX128;
4259               i.tm.opcode_modifier.vexw = VEXW0;
4260               i.tm.opcode_modifier.evex = 0;
4261             }
4262           else if (optimize > 1)
4263             i.tm.opcode_modifier.evex = EVEX128;
4264           else
4265             return;
4266         }
4267       else if (i.tm.operand_types[0].bitfield.class == RegMask)
4268         {
4269           i.tm.base_opcode &= 0xff;
4270           i.tm.opcode_modifier.vexw = VEXW0;
4271         }
4272       else
4273         i.tm.opcode_modifier.vex = VEX128;
4274
4275       if (i.tm.opcode_modifier.vex)
4276         for (j = 0; j < 3; j++)
4277           {
4278             i.types[j].bitfield.xmmword = 1;
4279             i.types[j].bitfield.ymmword = 0;
4280           }
4281     }
4282   else if (i.vec_encoding != vex_encoding_evex
4283            && !i.types[0].bitfield.zmmword
4284            && !i.types[1].bitfield.zmmword
4285            && !i.mask
4286            && !i.broadcast
4287            && is_evex_encoding (&i.tm)
4288            && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x666f
4289                || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf36f
4290                || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f
4291                || (i.tm.base_opcode & ~4) == 0x66db
4292                || (i.tm.base_opcode & ~4) == 0x66eb)
4293            && i.tm.extension_opcode == None)
4294     {
4295       /* Optimize: -O1:
4296            VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
4297            vmovdqu32 and vmovdqu64:
4298              EVEX VOP %xmmM, %xmmN
4299                -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
4300              EVEX VOP %ymmM, %ymmN
4301                -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
4302              EVEX VOP %xmmM, mem
4303                -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
4304              EVEX VOP %ymmM, mem
4305                -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
4306              EVEX VOP mem, %xmmN
4307                -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
4308              EVEX VOP mem, %ymmN
4309                -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
4310            VOP, one of vpand, vpandn, vpor, vpxor:
4311              EVEX VOP{d,q} %xmmL, %xmmM, %xmmN
4312                -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16)
4313              EVEX VOP{d,q} %ymmL, %ymmM, %ymmN
4314                -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16)
4315              EVEX VOP{d,q} mem, %xmmM, %xmmN
4316                -> VEX VOP mem, %xmmM, %xmmN (M and N < 16)
4317              EVEX VOP{d,q} mem, %ymmM, %ymmN
4318                -> VEX VOP mem, %ymmM, %ymmN (M and N < 16)
4319        */
4320       for (j = 0; j < i.operands; j++)
4321         if (operand_type_check (i.types[j], disp)
4322             && i.op[j].disps->X_op == O_constant)
4323           {
4324             /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix
4325                has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4
4326                bytes, we choose EVEX Disp8 over VEX Disp32.  */
4327             int evex_disp8, vex_disp8;
4328             unsigned int memshift = i.memshift;
4329             offsetT n = i.op[j].disps->X_add_number;
4330
4331             evex_disp8 = fits_in_disp8 (n);
4332             i.memshift = 0;
4333             vex_disp8 = fits_in_disp8 (n);
4334             if (evex_disp8 != vex_disp8)
4335               {
4336                 i.memshift = memshift;
4337                 return;
4338               }
4339
4340             i.types[j].bitfield.disp8 = vex_disp8;
4341             break;
4342           }
4343       if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f)
4344         i.tm.base_opcode ^= 0xf36f ^ 0xf26f;
4345       i.tm.opcode_modifier.vex
4346         = i.types[0].bitfield.ymmword ? VEX256 : VEX128;
4347       i.tm.opcode_modifier.vexw = VEXW0;
4348       /* VPAND, VPOR, and VPXOR are commutative.  */
4349       if (i.reg_operands == 3 && i.tm.base_opcode != 0x66df)
4350         i.tm.opcode_modifier.commutative = 1;
4351       i.tm.opcode_modifier.evex = 0;
4352       i.tm.opcode_modifier.masking = 0;
4353       i.tm.opcode_modifier.broadcast = 0;
4354       i.tm.opcode_modifier.disp8memshift = 0;
4355       i.memshift = 0;
4356       if (j < i.operands)
4357         i.types[j].bitfield.disp8
4358           = fits_in_disp8 (i.op[j].disps->X_add_number);
4359     }
4360 }
4361
4362 /* Return non-zero for load instruction.  */
4363
4364 static int
4365 load_insn_p (void)
4366 {
4367   unsigned int dest;
4368   int any_vex_p = is_any_vex_encoding (&i.tm);
4369   unsigned int base_opcode = i.tm.base_opcode | 1;
4370
4371   if (!any_vex_p)
4372     {
4373       /* Anysize insns: lea, invlpg, clflush, prefetchnta, prefetcht0,
4374          prefetcht1, prefetcht2, prefetchtw, bndmk, bndcl, bndcu, bndcn,
4375          bndstx, bndldx, prefetchwt1, clflushopt, clwb, cldemote.  */
4376       if (i.tm.opcode_modifier.anysize)
4377         return 0;
4378
4379       /* pop, popf, popa.   */
4380       if (strcmp (i.tm.name, "pop") == 0
4381           || i.tm.base_opcode == 0x9d
4382           || i.tm.base_opcode == 0x61)
4383         return 1;
4384
4385       /* movs, cmps, lods, scas.  */
4386       if ((i.tm.base_opcode | 0xb) == 0xaf)
4387         return 1;
4388
4389       /* outs, xlatb.  */
4390       if (base_opcode == 0x6f
4391           || i.tm.base_opcode == 0xd7)
4392         return 1;
4393       /* NB: For AMD-specific insns with implicit memory operands,
4394          they're intentionally not covered.  */
4395     }
4396
4397   /* No memory operand.  */
4398   if (!i.mem_operands)
4399     return 0;
4400
4401   if (any_vex_p)
4402     {
4403       /* vldmxcsr.  */
4404       if (i.tm.base_opcode == 0xae
4405           && i.tm.opcode_modifier.vex
4406           && i.tm.opcode_modifier.vexopcode == VEX0F
4407           && i.tm.extension_opcode == 2)
4408         return 1;
4409     }
4410   else
4411     {
4412       /* test, not, neg, mul, imul, div, idiv.  */
4413       if ((i.tm.base_opcode == 0xf6 || i.tm.base_opcode == 0xf7)
4414           && i.tm.extension_opcode != 1)
4415         return 1;
4416
4417       /* inc, dec.  */
4418       if (base_opcode == 0xff && i.tm.extension_opcode <= 1)
4419         return 1;
4420
4421       /* add, or, adc, sbb, and, sub, xor, cmp.  */
4422       if (i.tm.base_opcode >= 0x80 && i.tm.base_opcode <= 0x83)
4423         return 1;
4424
4425       /* bt, bts, btr, btc.  */
4426       if (i.tm.base_opcode == 0xfba
4427           && (i.tm.extension_opcode >= 4 && i.tm.extension_opcode <= 7))
4428         return 1;
4429
4430       /* rol, ror, rcl, rcr, shl/sal, shr, sar. */
4431       if ((base_opcode == 0xc1
4432            || (i.tm.base_opcode >= 0xd0 && i.tm.base_opcode <= 0xd3))
4433           && i.tm.extension_opcode != 6)
4434         return 1;
4435
4436       /* cmpxchg8b, cmpxchg16b, xrstors.  */
4437       if (i.tm.base_opcode == 0xfc7
4438           && (i.tm.extension_opcode == 1 || i.tm.extension_opcode == 3))
4439         return 1;
4440
4441       /* fxrstor, ldmxcsr, xrstor.  */
4442       if (i.tm.base_opcode == 0xfae
4443           && (i.tm.extension_opcode == 1
4444               || i.tm.extension_opcode == 2
4445               || i.tm.extension_opcode == 5))
4446         return 1;
4447
4448       /* lgdt, lidt, lmsw.  */
4449       if (i.tm.base_opcode == 0xf01
4450           && (i.tm.extension_opcode == 2
4451               || i.tm.extension_opcode == 3
4452               || i.tm.extension_opcode == 6))
4453         return 1;
4454
4455       /* vmptrld */
4456       if (i.tm.base_opcode == 0xfc7
4457           && i.tm.extension_opcode == 6)
4458         return 1;
4459
4460       /* Check for x87 instructions.  */
4461       if (i.tm.base_opcode >= 0xd8 && i.tm.base_opcode <= 0xdf)
4462         {
4463           /* Skip fst, fstp, fstenv, fstcw.  */
4464           if (i.tm.base_opcode == 0xd9
4465               && (i.tm.extension_opcode == 2
4466                   || i.tm.extension_opcode == 3
4467                   || i.tm.extension_opcode == 6
4468                   || i.tm.extension_opcode == 7))
4469             return 0;
4470
4471           /* Skip fisttp, fist, fistp, fstp.  */
4472           if (i.tm.base_opcode == 0xdb
4473               && (i.tm.extension_opcode == 1
4474                   || i.tm.extension_opcode == 2
4475                   || i.tm.extension_opcode == 3
4476                   || i.tm.extension_opcode == 7))
4477             return 0;
4478
4479           /* Skip fisttp, fst, fstp, fsave, fstsw.  */
4480           if (i.tm.base_opcode == 0xdd
4481               && (i.tm.extension_opcode == 1
4482                   || i.tm.extension_opcode == 2
4483                   || i.tm.extension_opcode == 3
4484                   || i.tm.extension_opcode == 6
4485                   || i.tm.extension_opcode == 7))
4486             return 0;
4487
4488           /* Skip fisttp, fist, fistp, fbstp, fistp.  */
4489           if (i.tm.base_opcode == 0xdf
4490               && (i.tm.extension_opcode == 1
4491                   || i.tm.extension_opcode == 2
4492                   || i.tm.extension_opcode == 3
4493                   || i.tm.extension_opcode == 6
4494                   || i.tm.extension_opcode == 7))
4495             return 0;
4496
4497           return 1;
4498         }
4499     }
4500
4501   dest = i.operands - 1;
4502
4503   /* Check fake imm8 operand and 3 source operands.  */
4504   if ((i.tm.opcode_modifier.immext
4505        || i.tm.opcode_modifier.vexsources == VEX3SOURCES)
4506       && i.types[dest].bitfield.imm8)
4507     dest--;
4508
4509   /* add, or, adc, sbb, and, sub, xor, cmp, test, xchg, xadd  */
4510   if (!any_vex_p
4511       && (base_opcode == 0x1
4512           || base_opcode == 0x9
4513           || base_opcode == 0x11
4514           || base_opcode == 0x19
4515           || base_opcode == 0x21
4516           || base_opcode == 0x29
4517           || base_opcode == 0x31
4518           || base_opcode == 0x39
4519           || (i.tm.base_opcode >= 0x84 && i.tm.base_opcode <= 0x87)
4520           || base_opcode == 0xfc1))
4521     return 1;
4522
4523   /* Check for load instruction.  */
4524   return (i.types[dest].bitfield.class != ClassNone
4525           || i.types[dest].bitfield.instance == Accum);
4526 }
4527
4528 /* Output lfence, 0xfaee8, after instruction.  */
4529
4530 static void
4531 insert_lfence_after (void)
4532 {
4533   if (lfence_after_load && load_insn_p ())
4534     {
4535       /* There are also two REP string instructions that require
4536          special treatment. Specifically, the compare string (CMPS)
4537          and scan string (SCAS) instructions set EFLAGS in a manner
4538          that depends on the data being compared/scanned. When used
4539          with a REP prefix, the number of iterations may therefore
4540          vary depending on this data. If the data is a program secret
4541          chosen by the adversary using an LVI method,
4542          then this data-dependent behavior may leak some aspect
4543          of the secret.  */
4544       if (((i.tm.base_opcode | 0x1) == 0xa7
4545            || (i.tm.base_opcode | 0x1) == 0xaf)
4546           && i.prefix[REP_PREFIX])
4547         {
4548             as_warn (_("`%s` changes flags which would affect control flow behavior"),
4549                      i.tm.name);
4550         }
4551       char *p = frag_more (3);
4552       *p++ = 0xf;
4553       *p++ = 0xae;
4554       *p = 0xe8;
4555     }
4556 }
4557
4558 /* Output lfence, 0xfaee8, before instruction.  */
4559
4560 static void
4561 insert_lfence_before (void)
4562 {
4563   char *p;
4564
4565   if (is_any_vex_encoding (&i.tm))
4566     return;
4567
4568   if (i.tm.base_opcode == 0xff
4569       && (i.tm.extension_opcode == 2 || i.tm.extension_opcode == 4))
4570     {
4571       /* Insert lfence before indirect branch if needed.  */
4572
4573       if (lfence_before_indirect_branch == lfence_branch_none)
4574         return;
4575
4576       if (i.operands != 1)
4577         abort ();
4578
4579       if (i.reg_operands == 1)
4580         {
4581           /* Indirect branch via register.  Don't insert lfence with
4582              -mlfence-after-load=yes.  */
4583           if (lfence_after_load
4584               || lfence_before_indirect_branch == lfence_branch_memory)
4585             return;
4586         }
4587       else if (i.mem_operands == 1
4588                && lfence_before_indirect_branch != lfence_branch_register)
4589         {
4590           as_warn (_("indirect `%s` with memory operand should be avoided"),
4591                    i.tm.name);
4592           return;
4593         }
4594       else
4595         return;
4596
4597       if (last_insn.kind != last_insn_other
4598           && last_insn.seg == now_seg)
4599         {
4600           as_warn_where (last_insn.file, last_insn.line,
4601                          _("`%s` skips -mlfence-before-indirect-branch on `%s`"),
4602                          last_insn.name, i.tm.name);
4603           return;
4604         }
4605
4606       p = frag_more (3);
4607       *p++ = 0xf;
4608       *p++ = 0xae;
4609       *p = 0xe8;
4610       return;
4611     }
4612
4613   /* Output or/not/shl and lfence before near ret.  */
4614   if (lfence_before_ret != lfence_before_ret_none
4615       && (i.tm.base_opcode == 0xc2
4616           || i.tm.base_opcode == 0xc3))
4617     {
4618       if (last_insn.kind != last_insn_other
4619           && last_insn.seg == now_seg)
4620         {
4621           as_warn_where (last_insn.file, last_insn.line,
4622                          _("`%s` skips -mlfence-before-ret on `%s`"),
4623                          last_insn.name, i.tm.name);
4624           return;
4625         }
4626
4627       /* Near ret ingore operand size override under CPU64.  */
4628       char prefix = flag_code == CODE_64BIT
4629                     ? 0x48
4630                     : i.prefix[DATA_PREFIX] ? 0x66 : 0x0;
4631
4632       if (lfence_before_ret == lfence_before_ret_not)
4633         {
4634           /* not: 0xf71424, may add prefix
4635              for operand size override or 64-bit code.  */
4636           p = frag_more ((prefix ? 2 : 0) + 6 + 3);
4637           if (prefix)
4638             *p++ = prefix;
4639           *p++ = 0xf7;
4640           *p++ = 0x14;
4641           *p++ = 0x24;
4642           if (prefix)
4643             *p++ = prefix;
4644           *p++ = 0xf7;
4645           *p++ = 0x14;
4646           *p++ = 0x24;
4647         }
4648       else
4649         {
4650           p = frag_more ((prefix ? 1 : 0) + 4 + 3);
4651           if (prefix)
4652             *p++ = prefix;
4653           if (lfence_before_ret == lfence_before_ret_or)
4654             {
4655               /* or: 0x830c2400, may add prefix
4656                  for operand size override or 64-bit code.  */
4657               *p++ = 0x83;
4658               *p++ = 0x0c;
4659             }
4660           else
4661             {
4662               /* shl: 0xc1242400, may add prefix
4663                  for operand size override or 64-bit code.  */
4664               *p++ = 0xc1;
4665               *p++ = 0x24;
4666             }
4667
4668           *p++ = 0x24;
4669           *p++ = 0x0;
4670         }
4671
4672       *p++ = 0xf;
4673       *p++ = 0xae;
4674       *p = 0xe8;
4675     }
4676 }
4677
4678 /* This is the guts of the machine-dependent assembler.  LINE points to a
4679    machine dependent instruction.  This function is supposed to emit
4680    the frags/bytes it assembles to.  */
4681
4682 void
4683 md_assemble (char *line)
4684 {
4685   unsigned int j;
4686   char mnemonic[MAX_MNEM_SIZE], mnem_suffix;
4687   const insn_template *t;
4688
4689   /* Initialize globals.  */
4690   memset (&i, '\0', sizeof (i));
4691   for (j = 0; j < MAX_OPERANDS; j++)
4692     i.reloc[j] = NO_RELOC;
4693   memset (disp_expressions, '\0', sizeof (disp_expressions));
4694   memset (im_expressions, '\0', sizeof (im_expressions));
4695   save_stack_p = save_stack;
4696
4697   /* First parse an instruction mnemonic & call i386_operand for the operands.
4698      We assume that the scrubber has arranged it so that line[0] is the valid
4699      start of a (possibly prefixed) mnemonic.  */
4700
4701   line = parse_insn (line, mnemonic);
4702   if (line == NULL)
4703     return;
4704   mnem_suffix = i.suffix;
4705
4706   line = parse_operands (line, mnemonic);
4707   this_operand = -1;
4708   xfree (i.memop1_string);
4709   i.memop1_string = NULL;
4710   if (line == NULL)
4711     return;
4712
4713   /* Now we've parsed the mnemonic into a set of templates, and have the
4714      operands at hand.  */
4715
4716   /* All Intel opcodes have reversed operands except for "bound", "enter",
4717      "monitor*", "mwait*", "tpause", and "umwait".  We also don't reverse
4718      intersegment "jmp" and "call" instructions with 2 immediate operands so
4719      that the immediate segment precedes the offset, as it does when in AT&T
4720      mode.  */
4721   if (intel_syntax
4722       && i.operands > 1
4723       && (strcmp (mnemonic, "bound") != 0)
4724       && (strcmp (mnemonic, "invlpga") != 0)
4725       && (strncmp (mnemonic, "monitor", 7) != 0)
4726       && (strncmp (mnemonic, "mwait", 5) != 0)
4727       && (strcmp (mnemonic, "tpause") != 0)
4728       && (strcmp (mnemonic, "umwait") != 0)
4729       && !(operand_type_check (i.types[0], imm)
4730            && operand_type_check (i.types[1], imm)))
4731     swap_operands ();
4732
4733   /* The order of the immediates should be reversed
4734      for 2 immediates extrq and insertq instructions */
4735   if (i.imm_operands == 2
4736       && (strcmp (mnemonic, "extrq") == 0
4737           || strcmp (mnemonic, "insertq") == 0))
4738       swap_2_operands (0, 1);
4739
4740   if (i.imm_operands)
4741     optimize_imm ();
4742
4743   /* Don't optimize displacement for movabs since it only takes 64bit
4744      displacement.  */
4745   if (i.disp_operands
4746       && i.disp_encoding != disp_encoding_32bit
4747       && (flag_code != CODE_64BIT
4748           || strcmp (mnemonic, "movabs") != 0))
4749     optimize_disp ();
4750
4751   /* Next, we find a template that matches the given insn,
4752      making sure the overlap of the given operands types is consistent
4753      with the template operand types.  */
4754
4755   if (!(t = match_template (mnem_suffix)))
4756     return;
4757
4758   if (sse_check != check_none
4759       && !i.tm.opcode_modifier.noavx
4760       && !i.tm.cpu_flags.bitfield.cpuavx
4761       && !i.tm.cpu_flags.bitfield.cpuavx512f
4762       && (i.tm.cpu_flags.bitfield.cpusse
4763           || i.tm.cpu_flags.bitfield.cpusse2
4764           || i.tm.cpu_flags.bitfield.cpusse3
4765           || i.tm.cpu_flags.bitfield.cpussse3
4766           || i.tm.cpu_flags.bitfield.cpusse4_1
4767           || i.tm.cpu_flags.bitfield.cpusse4_2
4768           || i.tm.cpu_flags.bitfield.cpupclmul
4769           || i.tm.cpu_flags.bitfield.cpuaes
4770           || i.tm.cpu_flags.bitfield.cpusha
4771           || i.tm.cpu_flags.bitfield.cpugfni))
4772     {
4773       (sse_check == check_warning
4774        ? as_warn
4775        : as_bad) (_("SSE instruction `%s' is used"), i.tm.name);
4776     }
4777
4778   if (i.tm.opcode_modifier.fwait)
4779     if (!add_prefix (FWAIT_OPCODE))
4780       return;
4781
4782   /* Check if REP prefix is OK.  */
4783   if (i.rep_prefix && !i.tm.opcode_modifier.repprefixok)
4784     {
4785       as_bad (_("invalid instruction `%s' after `%s'"),
4786                 i.tm.name, i.rep_prefix);
4787       return;
4788     }
4789
4790   /* Check for lock without a lockable instruction.  Destination operand
4791      must be memory unless it is xchg (0x86).  */
4792   if (i.prefix[LOCK_PREFIX]
4793       && (!i.tm.opcode_modifier.islockable
4794           || i.mem_operands == 0
4795           || (i.tm.base_opcode != 0x86
4796               && !(i.flags[i.operands - 1] & Operand_Mem))))
4797     {
4798       as_bad (_("expecting lockable instruction after `lock'"));
4799       return;
4800     }
4801
4802   /* Check for data size prefix on VEX/XOP/EVEX encoded and SIMD insns.  */
4803   if (i.prefix[DATA_PREFIX]
4804       && (is_any_vex_encoding (&i.tm)
4805           || i.tm.operand_types[i.imm_operands].bitfield.class >= RegMMX
4806           || i.tm.operand_types[i.imm_operands + 1].bitfield.class >= RegMMX))
4807     {
4808       as_bad (_("data size prefix invalid with `%s'"), i.tm.name);
4809       return;
4810     }
4811
4812   /* Check if HLE prefix is OK.  */
4813   if (i.hle_prefix && !check_hle ())
4814     return;
4815
4816   /* Check BND prefix.  */
4817   if (i.bnd_prefix && !i.tm.opcode_modifier.bndprefixok)
4818     as_bad (_("expecting valid branch instruction after `bnd'"));
4819
4820   /* Check NOTRACK prefix.  */
4821   if (i.notrack_prefix && !i.tm.opcode_modifier.notrackprefixok)
4822     as_bad (_("expecting indirect branch instruction after `notrack'"));
4823
4824   if (i.tm.cpu_flags.bitfield.cpumpx)
4825     {
4826       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
4827         as_bad (_("32-bit address isn't allowed in 64-bit MPX instructions."));
4828       else if (flag_code != CODE_16BIT
4829                ? i.prefix[ADDR_PREFIX]
4830                : i.mem_operands && !i.prefix[ADDR_PREFIX])
4831         as_bad (_("16-bit address isn't allowed in MPX instructions"));
4832     }
4833
4834   /* Insert BND prefix.  */
4835   if (add_bnd_prefix && i.tm.opcode_modifier.bndprefixok)
4836     {
4837       if (!i.prefix[BND_PREFIX])
4838         add_prefix (BND_PREFIX_OPCODE);
4839       else if (i.prefix[BND_PREFIX] != BND_PREFIX_OPCODE)
4840         {
4841           as_warn (_("replacing `rep'/`repe' prefix by `bnd'"));
4842           i.prefix[BND_PREFIX] = BND_PREFIX_OPCODE;
4843         }
4844     }
4845
4846   /* Check string instruction segment overrides.  */
4847   if (i.tm.opcode_modifier.isstring >= IS_STRING_ES_OP0)
4848     {
4849       gas_assert (i.mem_operands);
4850       if (!check_string ())
4851         return;
4852       i.disp_operands = 0;
4853     }
4854
4855   if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize)
4856     optimize_encoding ();
4857
4858   if (!process_suffix ())
4859     return;
4860
4861   /* Update operand types and check extended states.  */
4862   for (j = 0; j < i.operands; j++)
4863     {
4864       i.types[j] = operand_type_and (i.types[j], i.tm.operand_types[j]);
4865       switch (i.types[j].bitfield.class)
4866         {
4867         default:
4868           break;
4869         case RegMMX:
4870           i.xstate |= xstate_mmx;
4871           break;
4872         case RegMask:
4873           i.xstate |= xstate_mask;
4874           break;
4875         case RegSIMD:
4876           if (i.types[j].bitfield.tmmword)
4877             i.xstate |= xstate_tmm;
4878           else if (i.types[j].bitfield.zmmword)
4879             i.xstate |= xstate_zmm;
4880           else if (i.types[j].bitfield.ymmword)
4881             i.xstate |= xstate_ymm;
4882           else if (i.types[j].bitfield.xmmword)
4883             i.xstate |= xstate_xmm;
4884           break;
4885         }
4886     }
4887
4888   /* Make still unresolved immediate matches conform to size of immediate
4889      given in i.suffix.  */
4890   if (!finalize_imm ())
4891     return;
4892
4893   if (i.types[0].bitfield.imm1)
4894     i.imm_operands = 0; /* kludge for shift insns.  */
4895
4896   /* We only need to check those implicit registers for instructions
4897      with 3 operands or less.  */
4898   if (i.operands <= 3)
4899     for (j = 0; j < i.operands; j++)
4900       if (i.types[j].bitfield.instance != InstanceNone
4901           && !i.types[j].bitfield.xmmword)
4902         i.reg_operands--;
4903
4904   /* For insns with operands there are more diddles to do to the opcode.  */
4905   if (i.operands)
4906     {
4907       if (!process_operands ())
4908         return;
4909     }
4910   else if (!quiet_warnings && i.tm.opcode_modifier.ugh)
4911     {
4912       /* UnixWare fsub no args is alias for fsubp, fadd -> faddp, etc.  */
4913       as_warn (_("translating to `%sp'"), i.tm.name);
4914     }
4915
4916   if (is_any_vex_encoding (&i.tm))
4917     {
4918       if (!cpu_arch_flags.bitfield.cpui286)
4919         {
4920           as_bad (_("instruction `%s' isn't supported outside of protected mode."),
4921                   i.tm.name);
4922           return;
4923         }
4924
4925       /* Check for explicit REX prefix.  */
4926       if (i.prefix[REX_PREFIX] || i.rex_encoding)
4927         {
4928           as_bad (_("REX prefix invalid with `%s'"), i.tm.name);
4929           return;
4930         }
4931
4932       if (i.tm.opcode_modifier.vex)
4933         build_vex_prefix (t);
4934       else
4935         build_evex_prefix ();
4936
4937       /* The individual REX.RXBW bits got consumed.  */
4938       i.rex &= REX_OPCODE;
4939     }
4940
4941   /* Handle conversion of 'int $3' --> special int3 insn.  XOP or FMA4
4942      instructions may define INT_OPCODE as well, so avoid this corner
4943      case for those instructions that use MODRM.  */
4944   if (i.tm.base_opcode == INT_OPCODE
4945       && !i.tm.opcode_modifier.modrm
4946       && i.op[0].imms->X_add_number == 3)
4947     {
4948       i.tm.base_opcode = INT3_OPCODE;
4949       i.imm_operands = 0;
4950     }
4951
4952   if ((i.tm.opcode_modifier.jump == JUMP
4953        || i.tm.opcode_modifier.jump == JUMP_BYTE
4954        || i.tm.opcode_modifier.jump == JUMP_DWORD)
4955       && i.op[0].disps->X_op == O_constant)
4956     {
4957       /* Convert "jmp constant" (and "call constant") to a jump (call) to
4958          the absolute address given by the constant.  Since ix86 jumps and
4959          calls are pc relative, we need to generate a reloc.  */
4960       i.op[0].disps->X_add_symbol = &abs_symbol;
4961       i.op[0].disps->X_op = O_symbol;
4962     }
4963
4964   /* For 8 bit registers we need an empty rex prefix.  Also if the
4965      instruction already has a prefix, we need to convert old
4966      registers to new ones.  */
4967
4968   if ((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte
4969        && (i.op[0].regs->reg_flags & RegRex64) != 0)
4970       || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte
4971           && (i.op[1].regs->reg_flags & RegRex64) != 0)
4972       || (((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte)
4973            || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte))
4974           && i.rex != 0))
4975     {
4976       int x;
4977
4978       i.rex |= REX_OPCODE;
4979       for (x = 0; x < 2; x++)
4980         {
4981           /* Look for 8 bit operand that uses old registers.  */
4982           if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte
4983               && (i.op[x].regs->reg_flags & RegRex64) == 0)
4984             {
4985               gas_assert (!(i.op[x].regs->reg_flags & RegRex));
4986               /* In case it is "hi" register, give up.  */
4987               if (i.op[x].regs->reg_num > 3)
4988                 as_bad (_("can't encode register '%s%s' in an "
4989                           "instruction requiring REX prefix."),
4990                         register_prefix, i.op[x].regs->reg_name);
4991
4992               /* Otherwise it is equivalent to the extended register.
4993                  Since the encoding doesn't change this is merely
4994                  cosmetic cleanup for debug output.  */
4995
4996               i.op[x].regs = i.op[x].regs + 8;
4997             }
4998         }
4999     }
5000
5001   if (i.rex == 0 && i.rex_encoding)
5002     {
5003       /* Check if we can add a REX_OPCODE byte.  Look for 8 bit operand
5004          that uses legacy register.  If it is "hi" register, don't add
5005          the REX_OPCODE byte.  */
5006       int x;
5007       for (x = 0; x < 2; x++)
5008         if (i.types[x].bitfield.class == Reg
5009             && i.types[x].bitfield.byte
5010             && (i.op[x].regs->reg_flags & RegRex64) == 0
5011             && i.op[x].regs->reg_num > 3)
5012           {
5013             gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5014             i.rex_encoding = FALSE;
5015             break;
5016           }
5017
5018       if (i.rex_encoding)
5019         i.rex = REX_OPCODE;
5020     }
5021
5022   if (i.rex != 0)
5023     add_prefix (REX_OPCODE | i.rex);
5024
5025   insert_lfence_before ();
5026
5027   /* We are ready to output the insn.  */
5028   output_insn ();
5029
5030   insert_lfence_after ();
5031
5032   last_insn.seg = now_seg;
5033
5034   if (i.tm.opcode_modifier.isprefix)
5035     {
5036       last_insn.kind = last_insn_prefix;
5037       last_insn.name = i.tm.name;
5038       last_insn.file = as_where (&last_insn.line);
5039     }
5040   else
5041     last_insn.kind = last_insn_other;
5042 }
5043
5044 static char *
5045 parse_insn (char *line, char *mnemonic)
5046 {
5047   char *l = line;
5048   char *token_start = l;
5049   char *mnem_p;
5050   int supported;
5051   const insn_template *t;
5052   char *dot_p = NULL;
5053
5054   while (1)
5055     {
5056       mnem_p = mnemonic;
5057       while ((*mnem_p = mnemonic_chars[(unsigned char) *l]) != 0)
5058         {
5059           if (*mnem_p == '.')
5060             dot_p = mnem_p;
5061           mnem_p++;
5062           if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
5063             {
5064               as_bad (_("no such instruction: `%s'"), token_start);
5065               return NULL;
5066             }
5067           l++;
5068         }
5069       if (!is_space_char (*l)
5070           && *l != END_OF_INSN
5071           && (intel_syntax
5072               || (*l != PREFIX_SEPARATOR
5073                   && *l != ',')))
5074         {
5075           as_bad (_("invalid character %s in mnemonic"),
5076                   output_invalid (*l));
5077           return NULL;
5078         }
5079       if (token_start == l)
5080         {
5081           if (!intel_syntax && *l == PREFIX_SEPARATOR)
5082             as_bad (_("expecting prefix; got nothing"));
5083           else
5084             as_bad (_("expecting mnemonic; got nothing"));
5085           return NULL;
5086         }
5087
5088       /* Look up instruction (or prefix) via hash table.  */
5089       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5090
5091       if (*l != END_OF_INSN
5092           && (!is_space_char (*l) || l[1] != END_OF_INSN)
5093           && current_templates
5094           && current_templates->start->opcode_modifier.isprefix)
5095         {
5096           if (!cpu_flags_check_cpu64 (current_templates->start->cpu_flags))
5097             {
5098               as_bad ((flag_code != CODE_64BIT
5099                        ? _("`%s' is only supported in 64-bit mode")
5100                        : _("`%s' is not supported in 64-bit mode")),
5101                       current_templates->start->name);
5102               return NULL;
5103             }
5104           /* If we are in 16-bit mode, do not allow addr16 or data16.
5105              Similarly, in 32-bit mode, do not allow addr32 or data32.  */
5106           if ((current_templates->start->opcode_modifier.size == SIZE16
5107                || current_templates->start->opcode_modifier.size == SIZE32)
5108               && flag_code != CODE_64BIT
5109               && ((current_templates->start->opcode_modifier.size == SIZE32)
5110                   ^ (flag_code == CODE_16BIT)))
5111             {
5112               as_bad (_("redundant %s prefix"),
5113                       current_templates->start->name);
5114               return NULL;
5115             }
5116           if (current_templates->start->opcode_length == 0)
5117             {
5118               /* Handle pseudo prefixes.  */
5119               switch (current_templates->start->base_opcode)
5120                 {
5121                 case Prefix_Disp8:
5122                   /* {disp8} */
5123                   i.disp_encoding = disp_encoding_8bit;
5124                   break;
5125                 case Prefix_Disp16:
5126                   /* {disp16} */
5127                   i.disp_encoding = disp_encoding_16bit;
5128                   break;
5129                 case Prefix_Disp32:
5130                   /* {disp32} */
5131                   i.disp_encoding = disp_encoding_32bit;
5132                   break;
5133                 case Prefix_Load:
5134                   /* {load} */
5135                   i.dir_encoding = dir_encoding_load;
5136                   break;
5137                 case Prefix_Store:
5138                   /* {store} */
5139                   i.dir_encoding = dir_encoding_store;
5140                   break;
5141                 case Prefix_VEX:
5142                   /* {vex} */
5143                   i.vec_encoding = vex_encoding_vex;
5144                   break;
5145                 case Prefix_VEX3:
5146                   /* {vex3} */
5147                   i.vec_encoding = vex_encoding_vex3;
5148                   break;
5149                 case Prefix_EVEX:
5150                   /* {evex} */
5151                   i.vec_encoding = vex_encoding_evex;
5152                   break;
5153                 case Prefix_REX:
5154                   /* {rex} */
5155                   i.rex_encoding = TRUE;
5156                   break;
5157                 case Prefix_NoOptimize:
5158                   /* {nooptimize} */
5159                   i.no_optimize = TRUE;
5160                   break;
5161                 default:
5162                   abort ();
5163                 }
5164             }
5165           else
5166             {
5167               /* Add prefix, checking for repeated prefixes.  */
5168               switch (add_prefix (current_templates->start->base_opcode))
5169                 {
5170                 case PREFIX_EXIST:
5171                   return NULL;
5172                 case PREFIX_DS:
5173                   if (current_templates->start->cpu_flags.bitfield.cpuibt)
5174                     i.notrack_prefix = current_templates->start->name;
5175                   break;
5176                 case PREFIX_REP:
5177                   if (current_templates->start->cpu_flags.bitfield.cpuhle)
5178                     i.hle_prefix = current_templates->start->name;
5179                   else if (current_templates->start->cpu_flags.bitfield.cpumpx)
5180                     i.bnd_prefix = current_templates->start->name;
5181                   else
5182                     i.rep_prefix = current_templates->start->name;
5183                   break;
5184                 default:
5185                   break;
5186                 }
5187             }
5188           /* Skip past PREFIX_SEPARATOR and reset token_start.  */
5189           token_start = ++l;
5190         }
5191       else
5192         break;
5193     }
5194
5195   if (!current_templates)
5196     {
5197       /* Deprecated functionality (new code should use pseudo-prefixes instead):
5198          Check if we should swap operand or force 32bit displacement in
5199          encoding.  */
5200       if (mnem_p - 2 == dot_p && dot_p[1] == 's')
5201         i.dir_encoding = dir_encoding_swap;
5202       else if (mnem_p - 3 == dot_p
5203                && dot_p[1] == 'd'
5204                && dot_p[2] == '8')
5205         i.disp_encoding = disp_encoding_8bit;
5206       else if (mnem_p - 4 == dot_p
5207                && dot_p[1] == 'd'
5208                && dot_p[2] == '3'
5209                && dot_p[3] == '2')
5210         i.disp_encoding = disp_encoding_32bit;
5211       else
5212         goto check_suffix;
5213       mnem_p = dot_p;
5214       *dot_p = '\0';
5215       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5216     }
5217
5218   if (!current_templates)
5219     {
5220     check_suffix:
5221       if (mnem_p > mnemonic)
5222         {
5223           /* See if we can get a match by trimming off a suffix.  */
5224           switch (mnem_p[-1])
5225             {
5226             case WORD_MNEM_SUFFIX:
5227               if (intel_syntax && (intel_float_operand (mnemonic) & 2))
5228                 i.suffix = SHORT_MNEM_SUFFIX;
5229               else
5230                 /* Fall through.  */
5231               case BYTE_MNEM_SUFFIX:
5232               case QWORD_MNEM_SUFFIX:
5233                 i.suffix = mnem_p[-1];
5234               mnem_p[-1] = '\0';
5235               current_templates
5236                 = (const templates *) str_hash_find (op_hash, mnemonic);
5237               break;
5238             case SHORT_MNEM_SUFFIX:
5239             case LONG_MNEM_SUFFIX:
5240               if (!intel_syntax)
5241                 {
5242                   i.suffix = mnem_p[-1];
5243                   mnem_p[-1] = '\0';
5244                   current_templates
5245                     = (const templates *) str_hash_find (op_hash, mnemonic);
5246                 }
5247               break;
5248
5249               /* Intel Syntax.  */
5250             case 'd':
5251               if (intel_syntax)
5252                 {
5253                   if (intel_float_operand (mnemonic) == 1)
5254                     i.suffix = SHORT_MNEM_SUFFIX;
5255                   else
5256                     i.suffix = LONG_MNEM_SUFFIX;
5257                   mnem_p[-1] = '\0';
5258                   current_templates
5259                     = (const templates *) str_hash_find (op_hash, mnemonic);
5260                 }
5261               break;
5262             }
5263         }
5264
5265       if (!current_templates)
5266         {
5267           as_bad (_("no such instruction: `%s'"), token_start);
5268           return NULL;
5269         }
5270     }
5271
5272   if (current_templates->start->opcode_modifier.jump == JUMP
5273       || current_templates->start->opcode_modifier.jump == JUMP_BYTE)
5274     {
5275       /* Check for a branch hint.  We allow ",pt" and ",pn" for
5276          predict taken and predict not taken respectively.
5277          I'm not sure that branch hints actually do anything on loop
5278          and jcxz insns (JumpByte) for current Pentium4 chips.  They
5279          may work in the future and it doesn't hurt to accept them
5280          now.  */
5281       if (l[0] == ',' && l[1] == 'p')
5282         {
5283           if (l[2] == 't')
5284             {
5285               if (!add_prefix (DS_PREFIX_OPCODE))
5286                 return NULL;
5287               l += 3;
5288             }
5289           else if (l[2] == 'n')
5290             {
5291               if (!add_prefix (CS_PREFIX_OPCODE))
5292                 return NULL;
5293               l += 3;
5294             }
5295         }
5296     }
5297   /* Any other comma loses.  */
5298   if (*l == ',')
5299     {
5300       as_bad (_("invalid character %s in mnemonic"),
5301               output_invalid (*l));
5302       return NULL;
5303     }
5304
5305   /* Check if instruction is supported on specified architecture.  */
5306   supported = 0;
5307   for (t = current_templates->start; t < current_templates->end; ++t)
5308     {
5309       supported |= cpu_flags_match (t);
5310       if (supported == CPU_FLAGS_PERFECT_MATCH)
5311         {
5312           if (!cpu_arch_flags.bitfield.cpui386 && (flag_code != CODE_16BIT))
5313             as_warn (_("use .code16 to ensure correct addressing mode"));
5314
5315           return l;
5316         }
5317     }
5318
5319   if (!(supported & CPU_FLAGS_64BIT_MATCH))
5320     as_bad (flag_code == CODE_64BIT
5321             ? _("`%s' is not supported in 64-bit mode")
5322             : _("`%s' is only supported in 64-bit mode"),
5323             current_templates->start->name);
5324   else
5325     as_bad (_("`%s' is not supported on `%s%s'"),
5326             current_templates->start->name,
5327             cpu_arch_name ? cpu_arch_name : default_arch,
5328             cpu_sub_arch_name ? cpu_sub_arch_name : "");
5329
5330   return NULL;
5331 }
5332
5333 static char *
5334 parse_operands (char *l, const char *mnemonic)
5335 {
5336   char *token_start;
5337
5338   /* 1 if operand is pending after ','.  */
5339   unsigned int expecting_operand = 0;
5340
5341   /* Non-zero if operand parens not balanced.  */
5342   unsigned int paren_not_balanced;
5343
5344   while (*l != END_OF_INSN)
5345     {
5346       /* Skip optional white space before operand.  */
5347       if (is_space_char (*l))
5348         ++l;
5349       if (!is_operand_char (*l) && *l != END_OF_INSN && *l != '"')
5350         {
5351           as_bad (_("invalid character %s before operand %d"),
5352                   output_invalid (*l),
5353                   i.operands + 1);
5354           return NULL;
5355         }
5356       token_start = l;  /* After white space.  */
5357       paren_not_balanced = 0;
5358       while (paren_not_balanced || *l != ',')
5359         {
5360           if (*l == END_OF_INSN)
5361             {
5362               if (paren_not_balanced)
5363                 {
5364                   if (!intel_syntax)
5365                     as_bad (_("unbalanced parenthesis in operand %d."),
5366                             i.operands + 1);
5367                   else
5368                     as_bad (_("unbalanced brackets in operand %d."),
5369                             i.operands + 1);
5370                   return NULL;
5371                 }
5372               else
5373                 break;  /* we are done */
5374             }
5375           else if (!is_operand_char (*l) && !is_space_char (*l) && *l != '"')
5376             {
5377               as_bad (_("invalid character %s in operand %d"),
5378                       output_invalid (*l),
5379                       i.operands + 1);
5380               return NULL;
5381             }
5382           if (!intel_syntax)
5383             {
5384               if (*l == '(')
5385                 ++paren_not_balanced;
5386               if (*l == ')')
5387                 --paren_not_balanced;
5388             }
5389           else
5390             {
5391               if (*l == '[')
5392                 ++paren_not_balanced;
5393               if (*l == ']')
5394                 --paren_not_balanced;
5395             }
5396           l++;
5397         }
5398       if (l != token_start)
5399         {                       /* Yes, we've read in another operand.  */
5400           unsigned int operand_ok;
5401           this_operand = i.operands++;
5402           if (i.operands > MAX_OPERANDS)
5403             {
5404               as_bad (_("spurious operands; (%d operands/instruction max)"),
5405                       MAX_OPERANDS);
5406               return NULL;
5407             }
5408           i.types[this_operand].bitfield.unspecified = 1;
5409           /* Now parse operand adding info to 'i' as we go along.  */
5410           END_STRING_AND_SAVE (l);
5411
5412           if (i.mem_operands > 1)
5413             {
5414               as_bad (_("too many memory references for `%s'"),
5415                       mnemonic);
5416               return 0;
5417             }
5418
5419           if (intel_syntax)
5420             operand_ok =
5421               i386_intel_operand (token_start,
5422                                   intel_float_operand (mnemonic));
5423           else
5424             operand_ok = i386_att_operand (token_start);
5425
5426           RESTORE_END_STRING (l);
5427           if (!operand_ok)
5428             return NULL;
5429         }
5430       else
5431         {
5432           if (expecting_operand)
5433             {
5434             expecting_operand_after_comma:
5435               as_bad (_("expecting operand after ','; got nothing"));
5436               return NULL;
5437             }
5438           if (*l == ',')
5439             {
5440               as_bad (_("expecting operand before ','; got nothing"));
5441               return NULL;
5442             }
5443         }
5444
5445       /* Now *l must be either ',' or END_OF_INSN.  */
5446       if (*l == ',')
5447         {
5448           if (*++l == END_OF_INSN)
5449             {
5450               /* Just skip it, if it's \n complain.  */
5451               goto expecting_operand_after_comma;
5452             }
5453           expecting_operand = 1;
5454         }
5455     }
5456   return l;
5457 }
5458
5459 static void
5460 swap_2_operands (int xchg1, int xchg2)
5461 {
5462   union i386_op temp_op;
5463   i386_operand_type temp_type;
5464   unsigned int temp_flags;
5465   enum bfd_reloc_code_real temp_reloc;
5466
5467   temp_type = i.types[xchg2];
5468   i.types[xchg2] = i.types[xchg1];
5469   i.types[xchg1] = temp_type;
5470
5471   temp_flags = i.flags[xchg2];
5472   i.flags[xchg2] = i.flags[xchg1];
5473   i.flags[xchg1] = temp_flags;
5474
5475   temp_op = i.op[xchg2];
5476   i.op[xchg2] = i.op[xchg1];
5477   i.op[xchg1] = temp_op;
5478
5479   temp_reloc = i.reloc[xchg2];
5480   i.reloc[xchg2] = i.reloc[xchg1];
5481   i.reloc[xchg1] = temp_reloc;
5482
5483   if (i.mask)
5484     {
5485       if (i.mask->operand == xchg1)
5486         i.mask->operand = xchg2;
5487       else if (i.mask->operand == xchg2)
5488         i.mask->operand = xchg1;
5489     }
5490   if (i.broadcast)
5491     {
5492       if (i.broadcast->operand == xchg1)
5493         i.broadcast->operand = xchg2;
5494       else if (i.broadcast->operand == xchg2)
5495         i.broadcast->operand = xchg1;
5496     }
5497   if (i.rounding)
5498     {
5499       if (i.rounding->operand == xchg1)
5500         i.rounding->operand = xchg2;
5501       else if (i.rounding->operand == xchg2)
5502         i.rounding->operand = xchg1;
5503     }
5504 }
5505
5506 static void
5507 swap_operands (void)
5508 {
5509   switch (i.operands)
5510     {
5511     case 5:
5512     case 4:
5513       swap_2_operands (1, i.operands - 2);
5514       /* Fall through.  */
5515     case 3:
5516     case 2:
5517       swap_2_operands (0, i.operands - 1);
5518       break;
5519     default:
5520       abort ();
5521     }
5522
5523   if (i.mem_operands == 2)
5524     {
5525       const seg_entry *temp_seg;
5526       temp_seg = i.seg[0];
5527       i.seg[0] = i.seg[1];
5528       i.seg[1] = temp_seg;
5529     }
5530 }
5531
5532 /* Try to ensure constant immediates are represented in the smallest
5533    opcode possible.  */
5534 static void
5535 optimize_imm (void)
5536 {
5537   char guess_suffix = 0;
5538   int op;
5539
5540   if (i.suffix)
5541     guess_suffix = i.suffix;
5542   else if (i.reg_operands)
5543     {
5544       /* Figure out a suffix from the last register operand specified.
5545          We can't do this properly yet, i.e. excluding special register
5546          instances, but the following works for instructions with
5547          immediates.  In any case, we can't set i.suffix yet.  */
5548       for (op = i.operands; --op >= 0;)
5549         if (i.types[op].bitfield.class != Reg)
5550           continue;
5551         else if (i.types[op].bitfield.byte)
5552           {
5553             guess_suffix = BYTE_MNEM_SUFFIX;
5554             break;
5555           }
5556         else if (i.types[op].bitfield.word)
5557           {
5558             guess_suffix = WORD_MNEM_SUFFIX;
5559             break;
5560           }
5561         else if (i.types[op].bitfield.dword)
5562           {
5563             guess_suffix = LONG_MNEM_SUFFIX;
5564             break;
5565           }
5566         else if (i.types[op].bitfield.qword)
5567           {
5568             guess_suffix = QWORD_MNEM_SUFFIX;
5569             break;
5570           }
5571     }
5572   else if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
5573     guess_suffix = WORD_MNEM_SUFFIX;
5574
5575   for (op = i.operands; --op >= 0;)
5576     if (operand_type_check (i.types[op], imm))
5577       {
5578         switch (i.op[op].imms->X_op)
5579           {
5580           case O_constant:
5581             /* If a suffix is given, this operand may be shortened.  */
5582             switch (guess_suffix)
5583               {
5584               case LONG_MNEM_SUFFIX:
5585                 i.types[op].bitfield.imm32 = 1;
5586                 i.types[op].bitfield.imm64 = 1;
5587                 break;
5588               case WORD_MNEM_SUFFIX:
5589                 i.types[op].bitfield.imm16 = 1;
5590                 i.types[op].bitfield.imm32 = 1;
5591                 i.types[op].bitfield.imm32s = 1;
5592                 i.types[op].bitfield.imm64 = 1;
5593                 break;
5594               case BYTE_MNEM_SUFFIX:
5595                 i.types[op].bitfield.imm8 = 1;
5596                 i.types[op].bitfield.imm8s = 1;
5597                 i.types[op].bitfield.imm16 = 1;
5598                 i.types[op].bitfield.imm32 = 1;
5599                 i.types[op].bitfield.imm32s = 1;
5600                 i.types[op].bitfield.imm64 = 1;
5601                 break;
5602               }
5603
5604             /* If this operand is at most 16 bits, convert it
5605                to a signed 16 bit number before trying to see
5606                whether it will fit in an even smaller size.
5607                This allows a 16-bit operand such as $0xffe0 to
5608                be recognised as within Imm8S range.  */
5609             if ((i.types[op].bitfield.imm16)
5610                 && (i.op[op].imms->X_add_number & ~(offsetT) 0xffff) == 0)
5611               {
5612                 i.op[op].imms->X_add_number =
5613                   (((i.op[op].imms->X_add_number & 0xffff) ^ 0x8000) - 0x8000);
5614               }
5615 #ifdef BFD64
5616             /* Store 32-bit immediate in 64-bit for 64-bit BFD.  */
5617             if ((i.types[op].bitfield.imm32)
5618                 && ((i.op[op].imms->X_add_number & ~(((offsetT) 2 << 31) - 1))
5619                     == 0))
5620               {
5621                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
5622                                                 ^ ((offsetT) 1 << 31))
5623                                                - ((offsetT) 1 << 31));
5624               }
5625 #endif
5626             i.types[op]
5627               = operand_type_or (i.types[op],
5628                                  smallest_imm_type (i.op[op].imms->X_add_number));
5629
5630             /* We must avoid matching of Imm32 templates when 64bit
5631                only immediate is available.  */
5632             if (guess_suffix == QWORD_MNEM_SUFFIX)
5633               i.types[op].bitfield.imm32 = 0;
5634             break;
5635
5636           case O_absent:
5637           case O_register:
5638             abort ();
5639
5640             /* Symbols and expressions.  */
5641           default:
5642             /* Convert symbolic operand to proper sizes for matching, but don't
5643                prevent matching a set of insns that only supports sizes other
5644                than those matching the insn suffix.  */
5645             {
5646               i386_operand_type mask, allowed;
5647               const insn_template *t;
5648
5649               operand_type_set (&mask, 0);
5650               operand_type_set (&allowed, 0);
5651
5652               for (t = current_templates->start;
5653                    t < current_templates->end;
5654                    ++t)
5655                 {
5656                   allowed = operand_type_or (allowed, t->operand_types[op]);
5657                   allowed = operand_type_and (allowed, anyimm);
5658                 }
5659               switch (guess_suffix)
5660                 {
5661                 case QWORD_MNEM_SUFFIX:
5662                   mask.bitfield.imm64 = 1;
5663                   mask.bitfield.imm32s = 1;
5664                   break;
5665                 case LONG_MNEM_SUFFIX:
5666                   mask.bitfield.imm32 = 1;
5667                   break;
5668                 case WORD_MNEM_SUFFIX:
5669                   mask.bitfield.imm16 = 1;
5670                   break;
5671                 case BYTE_MNEM_SUFFIX:
5672                   mask.bitfield.imm8 = 1;
5673                   break;
5674                 default:
5675                   break;
5676                 }
5677               allowed = operand_type_and (mask, allowed);
5678               if (!operand_type_all_zero (&allowed))
5679                 i.types[op] = operand_type_and (i.types[op], mask);
5680             }
5681             break;
5682           }
5683       }
5684 }
5685
5686 /* Try to use the smallest displacement type too.  */
5687 static void
5688 optimize_disp (void)
5689 {
5690   int op;
5691
5692   for (op = i.operands; --op >= 0;)
5693     if (operand_type_check (i.types[op], disp))
5694       {
5695         if (i.op[op].disps->X_op == O_constant)
5696           {
5697             offsetT op_disp = i.op[op].disps->X_add_number;
5698
5699             if (i.types[op].bitfield.disp16
5700                 && (op_disp & ~(offsetT) 0xffff) == 0)
5701               {
5702                 /* If this operand is at most 16 bits, convert
5703                    to a signed 16 bit number and don't use 64bit
5704                    displacement.  */
5705                 op_disp = (((op_disp & 0xffff) ^ 0x8000) - 0x8000);
5706                 i.types[op].bitfield.disp64 = 0;
5707               }
5708 #ifdef BFD64
5709             /* Optimize 64-bit displacement to 32-bit for 64-bit BFD.  */
5710             if (i.types[op].bitfield.disp32
5711                 && (op_disp & ~(((offsetT) 2 << 31) - 1)) == 0)
5712               {
5713                 /* If this operand is at most 32 bits, convert
5714                    to a signed 32 bit number and don't use 64bit
5715                    displacement.  */
5716                 op_disp &= (((offsetT) 2 << 31) - 1);
5717                 op_disp = (op_disp ^ ((offsetT) 1 << 31)) - ((addressT) 1 << 31);
5718                 i.types[op].bitfield.disp64 = 0;
5719               }
5720 #endif
5721             if (!op_disp && i.types[op].bitfield.baseindex)
5722               {
5723                 i.types[op].bitfield.disp8 = 0;
5724                 i.types[op].bitfield.disp16 = 0;
5725                 i.types[op].bitfield.disp32 = 0;
5726                 i.types[op].bitfield.disp32s = 0;
5727                 i.types[op].bitfield.disp64 = 0;
5728                 i.op[op].disps = 0;
5729                 i.disp_operands--;
5730               }
5731             else if (flag_code == CODE_64BIT)
5732               {
5733                 if (fits_in_signed_long (op_disp))
5734                   {
5735                     i.types[op].bitfield.disp64 = 0;
5736                     i.types[op].bitfield.disp32s = 1;
5737                   }
5738                 if (i.prefix[ADDR_PREFIX]
5739                     && fits_in_unsigned_long (op_disp))
5740                   i.types[op].bitfield.disp32 = 1;
5741               }
5742             if ((i.types[op].bitfield.disp32
5743                  || i.types[op].bitfield.disp32s
5744                  || i.types[op].bitfield.disp16)
5745                 && fits_in_disp8 (op_disp))
5746               i.types[op].bitfield.disp8 = 1;
5747           }
5748         else if (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
5749                  || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL)
5750           {
5751             fix_new_exp (frag_now, frag_more (0) - frag_now->fr_literal, 0,
5752                          i.op[op].disps, 0, i.reloc[op]);
5753             i.types[op].bitfield.disp8 = 0;
5754             i.types[op].bitfield.disp16 = 0;
5755             i.types[op].bitfield.disp32 = 0;
5756             i.types[op].bitfield.disp32s = 0;
5757             i.types[op].bitfield.disp64 = 0;
5758           }
5759         else
5760           /* We only support 64bit displacement on constants.  */
5761           i.types[op].bitfield.disp64 = 0;
5762       }
5763 }
5764
5765 /* Return 1 if there is a match in broadcast bytes between operand
5766    GIVEN and instruction template T.   */
5767
5768 static INLINE int
5769 match_broadcast_size (const insn_template *t, unsigned int given)
5770 {
5771   return ((t->opcode_modifier.broadcast == BYTE_BROADCAST
5772            && i.types[given].bitfield.byte)
5773           || (t->opcode_modifier.broadcast == WORD_BROADCAST
5774               && i.types[given].bitfield.word)
5775           || (t->opcode_modifier.broadcast == DWORD_BROADCAST
5776               && i.types[given].bitfield.dword)
5777           || (t->opcode_modifier.broadcast == QWORD_BROADCAST
5778               && i.types[given].bitfield.qword));
5779 }
5780
5781 /* Check if operands are valid for the instruction.  */
5782
5783 static int
5784 check_VecOperands (const insn_template *t)
5785 {
5786   unsigned int op;
5787   i386_cpu_flags cpu;
5788
5789   /* Templates allowing for ZMMword as well as YMMword and/or XMMword for
5790      any one operand are implicity requiring AVX512VL support if the actual
5791      operand size is YMMword or XMMword.  Since this function runs after
5792      template matching, there's no need to check for YMMword/XMMword in
5793      the template.  */
5794   cpu = cpu_flags_and (t->cpu_flags, avx512);
5795   if (!cpu_flags_all_zero (&cpu)
5796       && !t->cpu_flags.bitfield.cpuavx512vl
5797       && !cpu_arch_flags.bitfield.cpuavx512vl)
5798     {
5799       for (op = 0; op < t->operands; ++op)
5800         {
5801           if (t->operand_types[op].bitfield.zmmword
5802               && (i.types[op].bitfield.ymmword
5803                   || i.types[op].bitfield.xmmword))
5804             {
5805               i.error = unsupported;
5806               return 1;
5807             }
5808         }
5809     }
5810
5811   /* Without VSIB byte, we can't have a vector register for index.  */
5812   if (!t->opcode_modifier.sib
5813       && i.index_reg
5814       && (i.index_reg->reg_type.bitfield.xmmword
5815           || i.index_reg->reg_type.bitfield.ymmword
5816           || i.index_reg->reg_type.bitfield.zmmword))
5817     {
5818       i.error = unsupported_vector_index_register;
5819       return 1;
5820     }
5821
5822   /* Check if default mask is allowed.  */
5823   if (t->opcode_modifier.nodefmask
5824       && (!i.mask || i.mask->mask->reg_num == 0))
5825     {
5826       i.error = no_default_mask;
5827       return 1;
5828     }
5829
5830   /* For VSIB byte, we need a vector register for index, and all vector
5831      registers must be distinct.  */
5832   if (t->opcode_modifier.sib && t->opcode_modifier.sib != SIBMEM)
5833     {
5834       if (!i.index_reg
5835           || !((t->opcode_modifier.sib == VECSIB128
5836                 && i.index_reg->reg_type.bitfield.xmmword)
5837                || (t->opcode_modifier.sib == VECSIB256
5838                    && i.index_reg->reg_type.bitfield.ymmword)
5839                || (t->opcode_modifier.sib == VECSIB512
5840                    && i.index_reg->reg_type.bitfield.zmmword)))
5841       {
5842         i.error = invalid_vsib_address;
5843         return 1;
5844       }
5845
5846       gas_assert (i.reg_operands == 2 || i.mask);
5847       if (i.reg_operands == 2 && !i.mask)
5848         {
5849           gas_assert (i.types[0].bitfield.class == RegSIMD);
5850           gas_assert (i.types[0].bitfield.xmmword
5851                       || i.types[0].bitfield.ymmword);
5852           gas_assert (i.types[2].bitfield.class == RegSIMD);
5853           gas_assert (i.types[2].bitfield.xmmword
5854                       || i.types[2].bitfield.ymmword);
5855           if (operand_check == check_none)
5856             return 0;
5857           if (register_number (i.op[0].regs)
5858               != register_number (i.index_reg)
5859               && register_number (i.op[2].regs)
5860                  != register_number (i.index_reg)
5861               && register_number (i.op[0].regs)
5862                  != register_number (i.op[2].regs))
5863             return 0;
5864           if (operand_check == check_error)
5865             {
5866               i.error = invalid_vector_register_set;
5867               return 1;
5868             }
5869           as_warn (_("mask, index, and destination registers should be distinct"));
5870         }
5871       else if (i.reg_operands == 1 && i.mask)
5872         {
5873           if (i.types[1].bitfield.class == RegSIMD
5874               && (i.types[1].bitfield.xmmword
5875                   || i.types[1].bitfield.ymmword
5876                   || i.types[1].bitfield.zmmword)
5877               && (register_number (i.op[1].regs)
5878                   == register_number (i.index_reg)))
5879             {
5880               if (operand_check == check_error)
5881                 {
5882                   i.error = invalid_vector_register_set;
5883                   return 1;
5884                 }
5885               if (operand_check != check_none)
5886                 as_warn (_("index and destination registers should be distinct"));
5887             }
5888         }
5889     }
5890
5891   /* For AMX instructions with three tmmword operands, all tmmword operand must be
5892      distinct */
5893   if (t->operand_types[0].bitfield.tmmword
5894       && i.reg_operands == 3)
5895     {
5896       if (register_number (i.op[0].regs)
5897           == register_number (i.op[1].regs)
5898           || register_number (i.op[0].regs)
5899              == register_number (i.op[2].regs)
5900           || register_number (i.op[1].regs)
5901              == register_number (i.op[2].regs))
5902         {
5903           i.error = invalid_tmm_register_set;
5904           return 1;
5905         }
5906     }
5907
5908   /* Check if broadcast is supported by the instruction and is applied
5909      to the memory operand.  */
5910   if (i.broadcast)
5911     {
5912       i386_operand_type type, overlap;
5913
5914       /* Check if specified broadcast is supported in this instruction,
5915          and its broadcast bytes match the memory operand.  */
5916       op = i.broadcast->operand;
5917       if (!t->opcode_modifier.broadcast
5918           || !(i.flags[op] & Operand_Mem)
5919           || (!i.types[op].bitfield.unspecified
5920               && !match_broadcast_size (t, op)))
5921         {
5922         bad_broadcast:
5923           i.error = unsupported_broadcast;
5924           return 1;
5925         }
5926
5927       i.broadcast->bytes = ((1 << (t->opcode_modifier.broadcast - 1))
5928                             * i.broadcast->type);
5929       operand_type_set (&type, 0);
5930       switch (i.broadcast->bytes)
5931         {
5932         case 2:
5933           type.bitfield.word = 1;
5934           break;
5935         case 4:
5936           type.bitfield.dword = 1;
5937           break;
5938         case 8:
5939           type.bitfield.qword = 1;
5940           break;
5941         case 16:
5942           type.bitfield.xmmword = 1;
5943           break;
5944         case 32:
5945           type.bitfield.ymmword = 1;
5946           break;
5947         case 64:
5948           type.bitfield.zmmword = 1;
5949           break;
5950         default:
5951           goto bad_broadcast;
5952         }
5953
5954       overlap = operand_type_and (type, t->operand_types[op]);
5955       if (t->operand_types[op].bitfield.class == RegSIMD
5956           && t->operand_types[op].bitfield.byte
5957              + t->operand_types[op].bitfield.word
5958              + t->operand_types[op].bitfield.dword
5959              + t->operand_types[op].bitfield.qword > 1)
5960         {
5961           overlap.bitfield.xmmword = 0;
5962           overlap.bitfield.ymmword = 0;
5963           overlap.bitfield.zmmword = 0;
5964         }
5965       if (operand_type_all_zero (&overlap))
5966           goto bad_broadcast;
5967
5968       if (t->opcode_modifier.checkregsize)
5969         {
5970           unsigned int j;
5971
5972           type.bitfield.baseindex = 1;
5973           for (j = 0; j < i.operands; ++j)
5974             {
5975               if (j != op
5976                   && !operand_type_register_match(i.types[j],
5977                                                   t->operand_types[j],
5978                                                   type,
5979                                                   t->operand_types[op]))
5980                 goto bad_broadcast;
5981             }
5982         }
5983     }
5984   /* If broadcast is supported in this instruction, we need to check if
5985      operand of one-element size isn't specified without broadcast.  */
5986   else if (t->opcode_modifier.broadcast && i.mem_operands)
5987     {
5988       /* Find memory operand.  */
5989       for (op = 0; op < i.operands; op++)
5990         if (i.flags[op] & Operand_Mem)
5991           break;
5992       gas_assert (op < i.operands);
5993       /* Check size of the memory operand.  */
5994       if (match_broadcast_size (t, op))
5995         {
5996           i.error = broadcast_needed;
5997           return 1;
5998         }
5999     }
6000   else
6001     op = MAX_OPERANDS - 1; /* Avoid uninitialized variable warning.  */
6002
6003   /* Check if requested masking is supported.  */
6004   if (i.mask)
6005     {
6006       switch (t->opcode_modifier.masking)
6007         {
6008         case BOTH_MASKING:
6009           break;
6010         case MERGING_MASKING:
6011           if (i.mask->zeroing)
6012             {
6013         case 0:
6014               i.error = unsupported_masking;
6015               return 1;
6016             }
6017           break;
6018         case DYNAMIC_MASKING:
6019           /* Memory destinations allow only merging masking.  */
6020           if (i.mask->zeroing && i.mem_operands)
6021             {
6022               /* Find memory operand.  */
6023               for (op = 0; op < i.operands; op++)
6024                 if (i.flags[op] & Operand_Mem)
6025                   break;
6026               gas_assert (op < i.operands);
6027               if (op == i.operands - 1)
6028                 {
6029                   i.error = unsupported_masking;
6030                   return 1;
6031                 }
6032             }
6033           break;
6034         default:
6035           abort ();
6036         }
6037     }
6038
6039   /* Check if masking is applied to dest operand.  */
6040   if (i.mask && (i.mask->operand != (int) (i.operands - 1)))
6041     {
6042       i.error = mask_not_on_destination;
6043       return 1;
6044     }
6045
6046   /* Check RC/SAE.  */
6047   if (i.rounding)
6048     {
6049       if (!t->opcode_modifier.sae
6050           || (i.rounding->type != saeonly && !t->opcode_modifier.staticrounding))
6051         {
6052           i.error = unsupported_rc_sae;
6053           return 1;
6054         }
6055       /* If the instruction has several immediate operands and one of
6056          them is rounding, the rounding operand should be the last
6057          immediate operand.  */
6058       if (i.imm_operands > 1
6059           && i.rounding->operand != (int) (i.imm_operands - 1))
6060         {
6061           i.error = rc_sae_operand_not_last_imm;
6062           return 1;
6063         }
6064     }
6065
6066   /* Check the special Imm4 cases; must be the first operand.  */
6067   if (t->cpu_flags.bitfield.cpuxop && t->operands == 5)
6068     {
6069       if (i.op[0].imms->X_op != O_constant
6070           || !fits_in_imm4 (i.op[0].imms->X_add_number))
6071         {
6072           i.error = bad_imm4;
6073           return 1;
6074         }
6075
6076       /* Turn off Imm<N> so that update_imm won't complain.  */
6077       operand_type_set (&i.types[0], 0);
6078     }
6079
6080   /* Check vector Disp8 operand.  */
6081   if (t->opcode_modifier.disp8memshift
6082       && i.disp_encoding != disp_encoding_32bit)
6083     {
6084       if (i.broadcast)
6085         i.memshift = t->opcode_modifier.broadcast - 1;
6086       else if (t->opcode_modifier.disp8memshift != DISP8_SHIFT_VL)
6087         i.memshift = t->opcode_modifier.disp8memshift;
6088       else
6089         {
6090           const i386_operand_type *type = NULL;
6091
6092           i.memshift = 0;
6093           for (op = 0; op < i.operands; op++)
6094             if (i.flags[op] & Operand_Mem)
6095               {
6096                 if (t->opcode_modifier.evex == EVEXLIG)
6097                   i.memshift = 2 + (i.suffix == QWORD_MNEM_SUFFIX);
6098                 else if (t->operand_types[op].bitfield.xmmword
6099                          + t->operand_types[op].bitfield.ymmword
6100                          + t->operand_types[op].bitfield.zmmword <= 1)
6101                   type = &t->operand_types[op];
6102                 else if (!i.types[op].bitfield.unspecified)
6103                   type = &i.types[op];
6104               }
6105             else if (i.types[op].bitfield.class == RegSIMD
6106                      && t->opcode_modifier.evex != EVEXLIG)
6107               {
6108                 if (i.types[op].bitfield.zmmword)
6109                   i.memshift = 6;
6110                 else if (i.types[op].bitfield.ymmword && i.memshift < 5)
6111                   i.memshift = 5;
6112                 else if (i.types[op].bitfield.xmmword && i.memshift < 4)
6113                   i.memshift = 4;
6114               }
6115
6116           if (type)
6117             {
6118               if (type->bitfield.zmmword)
6119                 i.memshift = 6;
6120               else if (type->bitfield.ymmword)
6121                 i.memshift = 5;
6122               else if (type->bitfield.xmmword)
6123                 i.memshift = 4;
6124             }
6125
6126           /* For the check in fits_in_disp8().  */
6127           if (i.memshift == 0)
6128             i.memshift = -1;
6129         }
6130
6131       for (op = 0; op < i.operands; op++)
6132         if (operand_type_check (i.types[op], disp)
6133             && i.op[op].disps->X_op == O_constant)
6134           {
6135             if (fits_in_disp8 (i.op[op].disps->X_add_number))
6136               {
6137                 i.types[op].bitfield.disp8 = 1;
6138                 return 0;
6139               }
6140             i.types[op].bitfield.disp8 = 0;
6141           }
6142     }
6143
6144   i.memshift = 0;
6145
6146   return 0;
6147 }
6148
6149 /* Check if encoding requirements are met by the instruction.  */
6150
6151 static int
6152 VEX_check_encoding (const insn_template *t)
6153 {
6154   if (i.vec_encoding == vex_encoding_error)
6155     {
6156       i.error = unsupported;
6157       return 1;
6158     }
6159
6160   if (i.vec_encoding == vex_encoding_evex)
6161     {
6162       /* This instruction must be encoded with EVEX prefix.  */
6163       if (!is_evex_encoding (t))
6164         {
6165           i.error = unsupported;
6166           return 1;
6167         }
6168       return 0;
6169     }
6170
6171   if (!t->opcode_modifier.vex)
6172     {
6173       /* This instruction template doesn't have VEX prefix.  */
6174       if (i.vec_encoding != vex_encoding_default)
6175         {
6176           i.error = unsupported;
6177           return 1;
6178         }
6179       return 0;
6180     }
6181
6182   return 0;
6183 }
6184
6185 static const insn_template *
6186 match_template (char mnem_suffix)
6187 {
6188   /* Points to template once we've found it.  */
6189   const insn_template *t;
6190   i386_operand_type overlap0, overlap1, overlap2, overlap3;
6191   i386_operand_type overlap4;
6192   unsigned int found_reverse_match;
6193   i386_opcode_modifier suffix_check;
6194   i386_operand_type operand_types [MAX_OPERANDS];
6195   int addr_prefix_disp;
6196   unsigned int j, size_match, check_register;
6197   enum i386_error specific_error = 0;
6198
6199 #if MAX_OPERANDS != 5
6200 # error "MAX_OPERANDS must be 5."
6201 #endif
6202
6203   found_reverse_match = 0;
6204   addr_prefix_disp = -1;
6205
6206   /* Prepare for mnemonic suffix check.  */
6207   memset (&suffix_check, 0, sizeof (suffix_check));
6208   switch (mnem_suffix)
6209     {
6210     case BYTE_MNEM_SUFFIX:
6211       suffix_check.no_bsuf = 1;
6212       break;
6213     case WORD_MNEM_SUFFIX:
6214       suffix_check.no_wsuf = 1;
6215       break;
6216     case SHORT_MNEM_SUFFIX:
6217       suffix_check.no_ssuf = 1;
6218       break;
6219     case LONG_MNEM_SUFFIX:
6220       suffix_check.no_lsuf = 1;
6221       break;
6222     case QWORD_MNEM_SUFFIX:
6223       suffix_check.no_qsuf = 1;
6224       break;
6225     default:
6226       /* NB: In Intel syntax, normally we can check for memory operand
6227          size when there is no mnemonic suffix.  But jmp and call have
6228          2 different encodings with Dword memory operand size, one with
6229          No_ldSuf and the other without.  i.suffix is set to
6230          LONG_DOUBLE_MNEM_SUFFIX to skip the one with No_ldSuf.  */
6231       if (i.suffix == LONG_DOUBLE_MNEM_SUFFIX)
6232         suffix_check.no_ldsuf = 1;
6233     }
6234
6235   /* Must have right number of operands.  */
6236   i.error = number_of_operands_mismatch;
6237
6238   for (t = current_templates->start; t < current_templates->end; t++)
6239     {
6240       addr_prefix_disp = -1;
6241       found_reverse_match = 0;
6242
6243       if (i.operands != t->operands)
6244         continue;
6245
6246       /* Check processor support.  */
6247       i.error = unsupported;
6248       if (cpu_flags_match (t) != CPU_FLAGS_PERFECT_MATCH)
6249         continue;
6250
6251       /* Check AT&T mnemonic.   */
6252       i.error = unsupported_with_intel_mnemonic;
6253       if (intel_mnemonic && t->opcode_modifier.attmnemonic)
6254         continue;
6255
6256       /* Check AT&T/Intel syntax.  */
6257       i.error = unsupported_syntax;
6258       if ((intel_syntax && t->opcode_modifier.attsyntax)
6259           || (!intel_syntax && t->opcode_modifier.intelsyntax))
6260         continue;
6261
6262       /* Check Intel64/AMD64 ISA.   */
6263       switch (isa64)
6264         {
6265         default:
6266           /* Default: Don't accept Intel64.  */
6267           if (t->opcode_modifier.isa64 == INTEL64)
6268             continue;
6269           break;
6270         case amd64:
6271           /* -mamd64: Don't accept Intel64 and Intel64 only.  */
6272           if (t->opcode_modifier.isa64 >= INTEL64)
6273             continue;
6274           break;
6275         case intel64:
6276           /* -mintel64: Don't accept AMD64.  */
6277           if (t->opcode_modifier.isa64 == AMD64 && flag_code == CODE_64BIT)
6278             continue;
6279           break;
6280         }
6281
6282       /* Check the suffix.  */
6283       i.error = invalid_instruction_suffix;
6284       if ((t->opcode_modifier.no_bsuf && suffix_check.no_bsuf)
6285           || (t->opcode_modifier.no_wsuf && suffix_check.no_wsuf)
6286           || (t->opcode_modifier.no_lsuf && suffix_check.no_lsuf)
6287           || (t->opcode_modifier.no_ssuf && suffix_check.no_ssuf)
6288           || (t->opcode_modifier.no_qsuf && suffix_check.no_qsuf)
6289           || (t->opcode_modifier.no_ldsuf && suffix_check.no_ldsuf))
6290         continue;
6291
6292       size_match = operand_size_match (t);
6293       if (!size_match)
6294         continue;
6295
6296       /* This is intentionally not
6297
6298          if (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))
6299
6300          as the case of a missing * on the operand is accepted (perhaps with
6301          a warning, issued further down).  */
6302       if (i.jumpabsolute && t->opcode_modifier.jump != JUMP_ABSOLUTE)
6303         {
6304           i.error = operand_type_mismatch;
6305           continue;
6306         }
6307
6308       for (j = 0; j < MAX_OPERANDS; j++)
6309         operand_types[j] = t->operand_types[j];
6310
6311       /* In general, don't allow
6312          - 64-bit operands outside of 64-bit mode,
6313          - 32-bit operands on pre-386.  */
6314       j = i.imm_operands + (t->operands > i.imm_operands + 1);
6315       if (((i.suffix == QWORD_MNEM_SUFFIX
6316             && flag_code != CODE_64BIT
6317             && (t->base_opcode != 0x0fc7
6318                 || t->extension_opcode != 1 /* cmpxchg8b */))
6319            || (i.suffix == LONG_MNEM_SUFFIX
6320                && !cpu_arch_flags.bitfield.cpui386))
6321           && (intel_syntax
6322               ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
6323                  && !intel_float_operand (t->name))
6324               : intel_float_operand (t->name) != 2)
6325           && (t->operands == i.imm_operands
6326               || (operand_types[i.imm_operands].bitfield.class != RegMMX
6327                && operand_types[i.imm_operands].bitfield.class != RegSIMD
6328                && operand_types[i.imm_operands].bitfield.class != RegMask)
6329               || (operand_types[j].bitfield.class != RegMMX
6330                   && operand_types[j].bitfield.class != RegSIMD
6331                   && operand_types[j].bitfield.class != RegMask))
6332           && !t->opcode_modifier.sib)
6333         continue;
6334
6335       /* Do not verify operands when there are none.  */
6336       if (!t->operands)
6337         {
6338           if (VEX_check_encoding (t))
6339             {
6340               specific_error = i.error;
6341               continue;
6342             }
6343
6344           /* We've found a match; break out of loop.  */
6345           break;
6346         }
6347
6348       if (!t->opcode_modifier.jump
6349           || t->opcode_modifier.jump == JUMP_ABSOLUTE)
6350         {
6351           /* There should be only one Disp operand.  */
6352           for (j = 0; j < MAX_OPERANDS; j++)
6353             if (operand_type_check (operand_types[j], disp))
6354               break;
6355           if (j < MAX_OPERANDS)
6356             {
6357               bfd_boolean override = (i.prefix[ADDR_PREFIX] != 0);
6358
6359               addr_prefix_disp = j;
6360
6361               /* Address size prefix will turn Disp64/Disp32S/Disp32/Disp16
6362                  operand into Disp32/Disp32/Disp16/Disp32 operand.  */
6363               switch (flag_code)
6364                 {
6365                 case CODE_16BIT:
6366                   override = !override;
6367                   /* Fall through.  */
6368                 case CODE_32BIT:
6369                   if (operand_types[j].bitfield.disp32
6370                       && operand_types[j].bitfield.disp16)
6371                     {
6372                       operand_types[j].bitfield.disp16 = override;
6373                       operand_types[j].bitfield.disp32 = !override;
6374                     }
6375                   operand_types[j].bitfield.disp32s = 0;
6376                   operand_types[j].bitfield.disp64 = 0;
6377                   break;
6378
6379                 case CODE_64BIT:
6380                   if (operand_types[j].bitfield.disp32s
6381                       || operand_types[j].bitfield.disp64)
6382                     {
6383                       operand_types[j].bitfield.disp64 &= !override;
6384                       operand_types[j].bitfield.disp32s &= !override;
6385                       operand_types[j].bitfield.disp32 = override;
6386                     }
6387                   operand_types[j].bitfield.disp16 = 0;
6388                   break;
6389                 }
6390             }
6391         }
6392
6393       /* Force 0x8b encoding for "mov foo@GOT, %eax".  */
6394       if (i.reloc[0] == BFD_RELOC_386_GOT32 && t->base_opcode == 0xa0)
6395         continue;
6396
6397       /* We check register size if needed.  */
6398       if (t->opcode_modifier.checkregsize)
6399         {
6400           check_register = (1 << t->operands) - 1;
6401           if (i.broadcast)
6402             check_register &= ~(1 << i.broadcast->operand);
6403         }
6404       else
6405         check_register = 0;
6406
6407       overlap0 = operand_type_and (i.types[0], operand_types[0]);
6408       switch (t->operands)
6409         {
6410         case 1:
6411           if (!operand_type_match (overlap0, i.types[0]))
6412             continue;
6413           break;
6414         case 2:
6415           /* xchg %eax, %eax is a special case. It is an alias for nop
6416              only in 32bit mode and we can use opcode 0x90.  In 64bit
6417              mode, we can't use 0x90 for xchg %eax, %eax since it should
6418              zero-extend %eax to %rax.  */
6419           if (flag_code == CODE_64BIT
6420               && t->base_opcode == 0x90
6421               && i.types[0].bitfield.instance == Accum
6422               && i.types[0].bitfield.dword
6423               && i.types[1].bitfield.instance == Accum
6424               && i.types[1].bitfield.dword)
6425             continue;
6426           /* xrelease mov %eax, <disp> is another special case. It must not
6427              match the accumulator-only encoding of mov.  */
6428           if (flag_code != CODE_64BIT
6429               && i.hle_prefix
6430               && t->base_opcode == 0xa0
6431               && i.types[0].bitfield.instance == Accum
6432               && (i.flags[1] & Operand_Mem))
6433             continue;
6434           /* Fall through.  */
6435
6436         case 3:
6437           if (!(size_match & MATCH_STRAIGHT))
6438             goto check_reverse;
6439           /* Reverse direction of operands if swapping is possible in the first
6440              place (operands need to be symmetric) and
6441              - the load form is requested, and the template is a store form,
6442              - the store form is requested, and the template is a load form,
6443              - the non-default (swapped) form is requested.  */
6444           overlap1 = operand_type_and (operand_types[0], operand_types[1]);
6445           if (t->opcode_modifier.d && i.reg_operands == i.operands
6446               && !operand_type_all_zero (&overlap1))
6447             switch (i.dir_encoding)
6448               {
6449               case dir_encoding_load:
6450                 if (operand_type_check (operand_types[i.operands - 1], anymem)
6451                     || t->opcode_modifier.regmem)
6452                   goto check_reverse;
6453                 break;
6454
6455               case dir_encoding_store:
6456                 if (!operand_type_check (operand_types[i.operands - 1], anymem)
6457                     && !t->opcode_modifier.regmem)
6458                   goto check_reverse;
6459                 break;
6460
6461               case dir_encoding_swap:
6462                 goto check_reverse;
6463
6464               case dir_encoding_default:
6465                 break;
6466               }
6467           /* If we want store form, we skip the current load.  */
6468           if ((i.dir_encoding == dir_encoding_store
6469                || i.dir_encoding == dir_encoding_swap)
6470               && i.mem_operands == 0
6471               && t->opcode_modifier.load)
6472             continue;
6473           /* Fall through.  */
6474         case 4:
6475         case 5:
6476           overlap1 = operand_type_and (i.types[1], operand_types[1]);
6477           if (!operand_type_match (overlap0, i.types[0])
6478               || !operand_type_match (overlap1, i.types[1])
6479               || ((check_register & 3) == 3
6480                   && !operand_type_register_match (i.types[0],
6481                                                    operand_types[0],
6482                                                    i.types[1],
6483                                                    operand_types[1])))
6484             {
6485               /* Check if other direction is valid ...  */
6486               if (!t->opcode_modifier.d)
6487                 continue;
6488
6489             check_reverse:
6490               if (!(size_match & MATCH_REVERSE))
6491                 continue;
6492               /* Try reversing direction of operands.  */
6493               overlap0 = operand_type_and (i.types[0], operand_types[i.operands - 1]);
6494               overlap1 = operand_type_and (i.types[i.operands - 1], operand_types[0]);
6495               if (!operand_type_match (overlap0, i.types[0])
6496                   || !operand_type_match (overlap1, i.types[i.operands - 1])
6497                   || (check_register
6498                       && !operand_type_register_match (i.types[0],
6499                                                        operand_types[i.operands - 1],
6500                                                        i.types[i.operands - 1],
6501                                                        operand_types[0])))
6502                 {
6503                   /* Does not match either direction.  */
6504                   continue;
6505                 }
6506               /* found_reverse_match holds which of D or FloatR
6507                  we've found.  */
6508               if (!t->opcode_modifier.d)
6509                 found_reverse_match = 0;
6510               else if (operand_types[0].bitfield.tbyte)
6511                 found_reverse_match = Opcode_FloatD;
6512               else if (operand_types[0].bitfield.xmmword
6513                        || operand_types[i.operands - 1].bitfield.xmmword
6514                        || operand_types[0].bitfield.class == RegMMX
6515                        || operand_types[i.operands - 1].bitfield.class == RegMMX
6516                        || is_any_vex_encoding(t))
6517                 found_reverse_match = (t->base_opcode & 0xee) != 0x6e
6518                                       ? Opcode_SIMD_FloatD : Opcode_SIMD_IntD;
6519               else
6520                 found_reverse_match = Opcode_D;
6521               if (t->opcode_modifier.floatr)
6522                 found_reverse_match |= Opcode_FloatR;
6523             }
6524           else
6525             {
6526               /* Found a forward 2 operand match here.  */
6527               switch (t->operands)
6528                 {
6529                 case 5:
6530                   overlap4 = operand_type_and (i.types[4],
6531                                                operand_types[4]);
6532                   /* Fall through.  */
6533                 case 4:
6534                   overlap3 = operand_type_and (i.types[3],
6535                                                operand_types[3]);
6536                   /* Fall through.  */
6537                 case 3:
6538                   overlap2 = operand_type_and (i.types[2],
6539                                                operand_types[2]);
6540                   break;
6541                 }
6542
6543               switch (t->operands)
6544                 {
6545                 case 5:
6546                   if (!operand_type_match (overlap4, i.types[4])
6547                       || !operand_type_register_match (i.types[3],
6548                                                        operand_types[3],
6549                                                        i.types[4],
6550                                                        operand_types[4]))
6551                     continue;
6552                   /* Fall through.  */
6553                 case 4:
6554                   if (!operand_type_match (overlap3, i.types[3])
6555                       || ((check_register & 0xa) == 0xa
6556                           && !operand_type_register_match (i.types[1],
6557                                                             operand_types[1],
6558                                                             i.types[3],
6559                                                             operand_types[3]))
6560                       || ((check_register & 0xc) == 0xc
6561                           && !operand_type_register_match (i.types[2],
6562                                                             operand_types[2],
6563                                                             i.types[3],
6564                                                             operand_types[3])))
6565                     continue;
6566                   /* Fall through.  */
6567                 case 3:
6568                   /* Here we make use of the fact that there are no
6569                      reverse match 3 operand instructions.  */
6570                   if (!operand_type_match (overlap2, i.types[2])
6571                       || ((check_register & 5) == 5
6572                           && !operand_type_register_match (i.types[0],
6573                                                             operand_types[0],
6574                                                             i.types[2],
6575                                                             operand_types[2]))
6576                       || ((check_register & 6) == 6
6577                           && !operand_type_register_match (i.types[1],
6578                                                             operand_types[1],
6579                                                             i.types[2],
6580                                                             operand_types[2])))
6581                     continue;
6582                   break;
6583                 }
6584             }
6585           /* Found either forward/reverse 2, 3 or 4 operand match here:
6586              slip through to break.  */
6587         }
6588
6589       /* Check if vector operands are valid.  */
6590       if (check_VecOperands (t))
6591         {
6592           specific_error = i.error;
6593           continue;
6594         }
6595
6596       /* Check if VEX/EVEX encoding requirements can be satisfied.  */
6597       if (VEX_check_encoding (t))
6598         {
6599           specific_error = i.error;
6600           continue;
6601         }
6602
6603       /* We've found a match; break out of loop.  */
6604       break;
6605     }
6606
6607   if (t == current_templates->end)
6608     {
6609       /* We found no match.  */
6610       const char *err_msg;
6611       switch (specific_error ? specific_error : i.error)
6612         {
6613         default:
6614           abort ();
6615         case operand_size_mismatch:
6616           err_msg = _("operand size mismatch");
6617           break;
6618         case operand_type_mismatch:
6619           err_msg = _("operand type mismatch");
6620           break;
6621         case register_type_mismatch:
6622           err_msg = _("register type mismatch");
6623           break;
6624         case number_of_operands_mismatch:
6625           err_msg = _("number of operands mismatch");
6626           break;
6627         case invalid_instruction_suffix:
6628           err_msg = _("invalid instruction suffix");
6629           break;
6630         case bad_imm4:
6631           err_msg = _("constant doesn't fit in 4 bits");
6632           break;
6633         case unsupported_with_intel_mnemonic:
6634           err_msg = _("unsupported with Intel mnemonic");
6635           break;
6636         case unsupported_syntax:
6637           err_msg = _("unsupported syntax");
6638           break;
6639         case unsupported:
6640           as_bad (_("unsupported instruction `%s'"),
6641                   current_templates->start->name);
6642           return NULL;
6643         case invalid_sib_address:
6644           err_msg = _("invalid SIB address");
6645           break;
6646         case invalid_vsib_address:
6647           err_msg = _("invalid VSIB address");
6648           break;
6649         case invalid_vector_register_set:
6650           err_msg = _("mask, index, and destination registers must be distinct");
6651           break;
6652         case invalid_tmm_register_set:
6653           err_msg = _("all tmm registers must be distinct");
6654           break;
6655         case unsupported_vector_index_register:
6656           err_msg = _("unsupported vector index register");
6657           break;
6658         case unsupported_broadcast:
6659           err_msg = _("unsupported broadcast");
6660           break;
6661         case broadcast_needed:
6662           err_msg = _("broadcast is needed for operand of such type");
6663           break;
6664         case unsupported_masking:
6665           err_msg = _("unsupported masking");
6666           break;
6667         case mask_not_on_destination:
6668           err_msg = _("mask not on destination operand");
6669           break;
6670         case no_default_mask:
6671           err_msg = _("default mask isn't allowed");
6672           break;
6673         case unsupported_rc_sae:
6674           err_msg = _("unsupported static rounding/sae");
6675           break;
6676         case rc_sae_operand_not_last_imm:
6677           if (intel_syntax)
6678             err_msg = _("RC/SAE operand must precede immediate operands");
6679           else
6680             err_msg = _("RC/SAE operand must follow immediate operands");
6681           break;
6682         case invalid_register_operand:
6683           err_msg = _("invalid register operand");
6684           break;
6685         }
6686       as_bad (_("%s for `%s'"), err_msg,
6687               current_templates->start->name);
6688       return NULL;
6689     }
6690
6691   if (!quiet_warnings)
6692     {
6693       if (!intel_syntax
6694           && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)))
6695         as_warn (_("indirect %s without `*'"), t->name);
6696
6697       if (t->opcode_modifier.isprefix
6698           && t->opcode_modifier.mnemonicsize == IGNORESIZE)
6699         {
6700           /* Warn them that a data or address size prefix doesn't
6701              affect assembly of the next line of code.  */
6702           as_warn (_("stand-alone `%s' prefix"), t->name);
6703         }
6704     }
6705
6706   /* Copy the template we found.  */
6707   i.tm = *t;
6708
6709   if (addr_prefix_disp != -1)
6710     i.tm.operand_types[addr_prefix_disp]
6711       = operand_types[addr_prefix_disp];
6712
6713   if (found_reverse_match)
6714     {
6715       /* If we found a reverse match we must alter the opcode direction
6716          bit and clear/flip the regmem modifier one.  found_reverse_match
6717          holds bits to change (different for int & float insns).  */
6718
6719       i.tm.base_opcode ^= found_reverse_match;
6720
6721       i.tm.operand_types[0] = operand_types[i.operands - 1];
6722       i.tm.operand_types[i.operands - 1] = operand_types[0];
6723
6724       /* Certain SIMD insns have their load forms specified in the opcode
6725          table, and hence we need to _set_ RegMem instead of clearing it.
6726          We need to avoid setting the bit though on insns like KMOVW.  */
6727       i.tm.opcode_modifier.regmem
6728         = i.tm.opcode_modifier.modrm && i.tm.opcode_modifier.d
6729           && i.tm.operands > 2U - i.tm.opcode_modifier.sse2avx
6730           && !i.tm.opcode_modifier.regmem;
6731     }
6732
6733   return t;
6734 }
6735
6736 static int
6737 check_string (void)
6738 {
6739   unsigned int es_op = i.tm.opcode_modifier.isstring - IS_STRING_ES_OP0;
6740   unsigned int op = i.tm.operand_types[0].bitfield.baseindex ? es_op : 0;
6741
6742   if (i.seg[op] != NULL && i.seg[op] != &es)
6743     {
6744       as_bad (_("`%s' operand %u must use `%ses' segment"),
6745               i.tm.name,
6746               intel_syntax ? i.tm.operands - es_op : es_op + 1,
6747               register_prefix);
6748       return 0;
6749     }
6750
6751   /* There's only ever one segment override allowed per instruction.
6752      This instruction possibly has a legal segment override on the
6753      second operand, so copy the segment to where non-string
6754      instructions store it, allowing common code.  */
6755   i.seg[op] = i.seg[1];
6756
6757   return 1;
6758 }
6759
6760 static int
6761 process_suffix (void)
6762 {
6763   /* If matched instruction specifies an explicit instruction mnemonic
6764      suffix, use it.  */
6765   if (i.tm.opcode_modifier.size == SIZE16)
6766     i.suffix = WORD_MNEM_SUFFIX;
6767   else if (i.tm.opcode_modifier.size == SIZE32)
6768     i.suffix = LONG_MNEM_SUFFIX;
6769   else if (i.tm.opcode_modifier.size == SIZE64)
6770     i.suffix = QWORD_MNEM_SUFFIX;
6771   else if (i.reg_operands
6772            && (i.operands > 1 || i.types[0].bitfield.class == Reg)
6773            && !i.tm.opcode_modifier.addrprefixopreg)
6774     {
6775       unsigned int numop = i.operands;
6776
6777       /* movsx/movzx want only their source operand considered here, for the
6778          ambiguity checking below.  The suffix will be replaced afterwards
6779          to represent the destination (register).  */
6780       if (((i.tm.base_opcode | 8) == 0xfbe && i.tm.opcode_modifier.w)
6781           || (i.tm.base_opcode == 0x63 && i.tm.cpu_flags.bitfield.cpu64))
6782         --i.operands;
6783
6784       /* crc32 needs REX.W set regardless of suffix / source operand size.  */
6785       if (i.tm.base_opcode == 0xf20f38f0
6786           && i.tm.operand_types[1].bitfield.qword)
6787         i.rex |= REX_W;
6788
6789       /* If there's no instruction mnemonic suffix we try to invent one
6790          based on GPR operands.  */
6791       if (!i.suffix)
6792         {
6793           /* We take i.suffix from the last register operand specified,
6794              Destination register type is more significant than source
6795              register type.  crc32 in SSE4.2 prefers source register
6796              type. */
6797           unsigned int op = i.tm.base_opcode != 0xf20f38f0 ? i.operands : 1;
6798
6799           while (op--)
6800             if (i.tm.operand_types[op].bitfield.instance == InstanceNone
6801                 || i.tm.operand_types[op].bitfield.instance == Accum)
6802               {
6803                 if (i.types[op].bitfield.class != Reg)
6804                   continue;
6805                 if (i.types[op].bitfield.byte)
6806                   i.suffix = BYTE_MNEM_SUFFIX;
6807                 else if (i.types[op].bitfield.word)
6808                   i.suffix = WORD_MNEM_SUFFIX;
6809                 else if (i.types[op].bitfield.dword)
6810                   i.suffix = LONG_MNEM_SUFFIX;
6811                 else if (i.types[op].bitfield.qword)
6812                   i.suffix = QWORD_MNEM_SUFFIX;
6813                 else
6814                   continue;
6815                 break;
6816               }
6817
6818           /* As an exception, movsx/movzx silently default to a byte source
6819              in AT&T mode.  */
6820           if ((i.tm.base_opcode | 8) == 0xfbe && i.tm.opcode_modifier.w
6821               && !i.suffix && !intel_syntax)
6822             i.suffix = BYTE_MNEM_SUFFIX;
6823         }
6824       else if (i.suffix == BYTE_MNEM_SUFFIX)
6825         {
6826           if (intel_syntax
6827               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
6828               && i.tm.opcode_modifier.no_bsuf)
6829             i.suffix = 0;
6830           else if (!check_byte_reg ())
6831             return 0;
6832         }
6833       else if (i.suffix == LONG_MNEM_SUFFIX)
6834         {
6835           if (intel_syntax
6836               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
6837               && i.tm.opcode_modifier.no_lsuf
6838               && !i.tm.opcode_modifier.todword
6839               && !i.tm.opcode_modifier.toqword)
6840             i.suffix = 0;
6841           else if (!check_long_reg ())
6842             return 0;
6843         }
6844       else if (i.suffix == QWORD_MNEM_SUFFIX)
6845         {
6846           if (intel_syntax
6847               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
6848               && i.tm.opcode_modifier.no_qsuf
6849               && !i.tm.opcode_modifier.todword
6850               && !i.tm.opcode_modifier.toqword)
6851             i.suffix = 0;
6852           else if (!check_qword_reg ())
6853             return 0;
6854         }
6855       else if (i.suffix == WORD_MNEM_SUFFIX)
6856         {
6857           if (intel_syntax
6858               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
6859               && i.tm.opcode_modifier.no_wsuf)
6860             i.suffix = 0;
6861           else if (!check_word_reg ())
6862             return 0;
6863         }
6864       else if (intel_syntax
6865                && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
6866         /* Do nothing if the instruction is going to ignore the prefix.  */
6867         ;
6868       else
6869         abort ();
6870
6871       /* Undo the movsx/movzx change done above.  */
6872       i.operands = numop;
6873     }
6874   else if (i.tm.opcode_modifier.mnemonicsize == DEFAULTSIZE
6875            && !i.suffix)
6876     {
6877       i.suffix = stackop_size;
6878       if (stackop_size == LONG_MNEM_SUFFIX)
6879         {
6880           /* stackop_size is set to LONG_MNEM_SUFFIX for the
6881              .code16gcc directive to support 16-bit mode with
6882              32-bit address.  For IRET without a suffix, generate
6883              16-bit IRET (opcode 0xcf) to return from an interrupt
6884              handler.  */
6885           if (i.tm.base_opcode == 0xcf)
6886             {
6887               i.suffix = WORD_MNEM_SUFFIX;
6888               as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
6889             }
6890           /* Warn about changed behavior for segment register push/pop.  */
6891           else if ((i.tm.base_opcode | 1) == 0x07)
6892             as_warn (_("generating 32-bit `%s', unlike earlier gas versions"),
6893                      i.tm.name);
6894         }
6895     }
6896   else if (!i.suffix
6897            && (i.tm.opcode_modifier.jump == JUMP_ABSOLUTE
6898                || i.tm.opcode_modifier.jump == JUMP_BYTE
6899                || i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT
6900                || (i.tm.base_opcode == 0x0f01 /* [ls][gi]dt */
6901                    && i.tm.extension_opcode <= 3)))
6902     {
6903       switch (flag_code)
6904         {
6905         case CODE_64BIT:
6906           if (!i.tm.opcode_modifier.no_qsuf)
6907             {
6908               if (i.tm.opcode_modifier.jump == JUMP_BYTE
6909                   || i.tm.opcode_modifier.no_lsuf)
6910                 i.suffix = QWORD_MNEM_SUFFIX;
6911               break;
6912             }
6913           /* Fall through.  */
6914         case CODE_32BIT:
6915           if (!i.tm.opcode_modifier.no_lsuf)
6916             i.suffix = LONG_MNEM_SUFFIX;
6917           break;
6918         case CODE_16BIT:
6919           if (!i.tm.opcode_modifier.no_wsuf)
6920             i.suffix = WORD_MNEM_SUFFIX;
6921           break;
6922         }
6923     }
6924
6925   if (!i.suffix
6926       && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
6927           /* Also cover lret/retf/iret in 64-bit mode.  */
6928           || (flag_code == CODE_64BIT
6929               && !i.tm.opcode_modifier.no_lsuf
6930               && !i.tm.opcode_modifier.no_qsuf))
6931       && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
6932       /* Explicit sizing prefixes are assumed to disambiguate insns.  */
6933       && !i.prefix[DATA_PREFIX] && !(i.prefix[REX_PREFIX] & REX_W)
6934       /* Accept FLDENV et al without suffix.  */
6935       && (i.tm.opcode_modifier.no_ssuf || i.tm.opcode_modifier.floatmf))
6936     {
6937       unsigned int suffixes, evex = 0;
6938
6939       suffixes = !i.tm.opcode_modifier.no_bsuf;
6940       if (!i.tm.opcode_modifier.no_wsuf)
6941         suffixes |= 1 << 1;
6942       if (!i.tm.opcode_modifier.no_lsuf)
6943         suffixes |= 1 << 2;
6944       if (!i.tm.opcode_modifier.no_ldsuf)
6945         suffixes |= 1 << 3;
6946       if (!i.tm.opcode_modifier.no_ssuf)
6947         suffixes |= 1 << 4;
6948       if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf)
6949         suffixes |= 1 << 5;
6950
6951       /* For [XYZ]MMWORD operands inspect operand sizes.  While generally
6952          also suitable for AT&T syntax mode, it was requested that this be
6953          restricted to just Intel syntax.  */
6954       if (intel_syntax && is_any_vex_encoding (&i.tm) && !i.broadcast)
6955         {
6956           unsigned int op;
6957
6958           for (op = 0; op < i.tm.operands; ++op)
6959             {
6960               if (is_evex_encoding (&i.tm)
6961                   && !cpu_arch_flags.bitfield.cpuavx512vl)
6962                 {
6963                   if (i.tm.operand_types[op].bitfield.ymmword)
6964                     i.tm.operand_types[op].bitfield.xmmword = 0;
6965                   if (i.tm.operand_types[op].bitfield.zmmword)
6966                     i.tm.operand_types[op].bitfield.ymmword = 0;
6967                   if (!i.tm.opcode_modifier.evex
6968                       || i.tm.opcode_modifier.evex == EVEXDYN)
6969                     i.tm.opcode_modifier.evex = EVEX512;
6970                 }
6971
6972               if (i.tm.operand_types[op].bitfield.xmmword
6973                   + i.tm.operand_types[op].bitfield.ymmword
6974                   + i.tm.operand_types[op].bitfield.zmmword < 2)
6975                 continue;
6976
6977               /* Any properly sized operand disambiguates the insn.  */
6978               if (i.types[op].bitfield.xmmword
6979                   || i.types[op].bitfield.ymmword
6980                   || i.types[op].bitfield.zmmword)
6981                 {
6982                   suffixes &= ~(7 << 6);
6983                   evex = 0;
6984                   break;
6985                 }
6986
6987               if ((i.flags[op] & Operand_Mem)
6988                   && i.tm.operand_types[op].bitfield.unspecified)
6989                 {
6990                   if (i.tm.operand_types[op].bitfield.xmmword)
6991                     suffixes |= 1 << 6;
6992                   if (i.tm.operand_types[op].bitfield.ymmword)
6993                     suffixes |= 1 << 7;
6994                   if (i.tm.operand_types[op].bitfield.zmmword)
6995                     suffixes |= 1 << 8;
6996                   if (is_evex_encoding (&i.tm))
6997                     evex = EVEX512;
6998                 }
6999             }
7000         }
7001
7002       /* Are multiple suffixes / operand sizes allowed?  */
7003       if (suffixes & (suffixes - 1))
7004         {
7005           if (intel_syntax
7006               && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7007                   || operand_check == check_error))
7008             {
7009               as_bad (_("ambiguous operand size for `%s'"), i.tm.name);
7010               return 0;
7011             }
7012           if (operand_check == check_error)
7013             {
7014               as_bad (_("no instruction mnemonic suffix given and "
7015                         "no register operands; can't size `%s'"), i.tm.name);
7016               return 0;
7017             }
7018           if (operand_check == check_warning)
7019             as_warn (_("%s; using default for `%s'"),
7020                        intel_syntax
7021                        ? _("ambiguous operand size")
7022                        : _("no instruction mnemonic suffix given and "
7023                            "no register operands"),
7024                        i.tm.name);
7025
7026           if (i.tm.opcode_modifier.floatmf)
7027             i.suffix = SHORT_MNEM_SUFFIX;
7028           else if ((i.tm.base_opcode | 8) == 0xfbe
7029                    || (i.tm.base_opcode == 0x63
7030                        && i.tm.cpu_flags.bitfield.cpu64))
7031             /* handled below */;
7032           else if (evex)
7033             i.tm.opcode_modifier.evex = evex;
7034           else if (flag_code == CODE_16BIT)
7035             i.suffix = WORD_MNEM_SUFFIX;
7036           else if (!i.tm.opcode_modifier.no_lsuf)
7037             i.suffix = LONG_MNEM_SUFFIX;
7038           else
7039             i.suffix = QWORD_MNEM_SUFFIX;
7040         }
7041     }
7042
7043   if ((i.tm.base_opcode | 8) == 0xfbe
7044       || (i.tm.base_opcode == 0x63 && i.tm.cpu_flags.bitfield.cpu64))
7045     {
7046       /* In Intel syntax, movsx/movzx must have a "suffix" (checked above).
7047          In AT&T syntax, if there is no suffix (warned about above), the default
7048          will be byte extension.  */
7049       if (i.tm.opcode_modifier.w && i.suffix && i.suffix != BYTE_MNEM_SUFFIX)
7050         i.tm.base_opcode |= 1;
7051
7052       /* For further processing, the suffix should represent the destination
7053          (register).  This is already the case when one was used with
7054          mov[sz][bw]*, but we need to replace it for mov[sz]x, or if there was
7055          no suffix to begin with.  */
7056       if (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63 || !i.suffix)
7057         {
7058           if (i.types[1].bitfield.word)
7059             i.suffix = WORD_MNEM_SUFFIX;
7060           else if (i.types[1].bitfield.qword)
7061             i.suffix = QWORD_MNEM_SUFFIX;
7062           else
7063             i.suffix = LONG_MNEM_SUFFIX;
7064
7065           i.tm.opcode_modifier.w = 0;
7066         }
7067     }
7068
7069   if (!i.tm.opcode_modifier.modrm && i.reg_operands && i.tm.operands < 3)
7070     i.short_form = (i.tm.operand_types[0].bitfield.class == Reg)
7071                    != (i.tm.operand_types[1].bitfield.class == Reg);
7072
7073   /* Change the opcode based on the operand size given by i.suffix.  */
7074   switch (i.suffix)
7075     {
7076     /* Size floating point instruction.  */
7077     case LONG_MNEM_SUFFIX:
7078       if (i.tm.opcode_modifier.floatmf)
7079         {
7080           i.tm.base_opcode ^= 4;
7081           break;
7082         }
7083     /* fall through */
7084     case WORD_MNEM_SUFFIX:
7085     case QWORD_MNEM_SUFFIX:
7086       /* It's not a byte, select word/dword operation.  */
7087       if (i.tm.opcode_modifier.w)
7088         {
7089           if (i.short_form)
7090             i.tm.base_opcode |= 8;
7091           else
7092             i.tm.base_opcode |= 1;
7093         }
7094     /* fall through */
7095     case SHORT_MNEM_SUFFIX:
7096       /* Now select between word & dword operations via the operand
7097          size prefix, except for instructions that will ignore this
7098          prefix anyway.  */
7099       if (i.suffix != QWORD_MNEM_SUFFIX
7100           && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7101           && !i.tm.opcode_modifier.floatmf
7102           && !is_any_vex_encoding (&i.tm)
7103           && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
7104               || (flag_code == CODE_64BIT
7105                   && i.tm.opcode_modifier.jump == JUMP_BYTE)))
7106         {
7107           unsigned int prefix = DATA_PREFIX_OPCODE;
7108
7109           if (i.tm.opcode_modifier.jump == JUMP_BYTE) /* jcxz, loop */
7110             prefix = ADDR_PREFIX_OPCODE;
7111
7112           if (!add_prefix (prefix))
7113             return 0;
7114         }
7115
7116       /* Set mode64 for an operand.  */
7117       if (i.suffix == QWORD_MNEM_SUFFIX
7118           && flag_code == CODE_64BIT
7119           && !i.tm.opcode_modifier.norex64
7120           && !i.tm.opcode_modifier.vexw
7121           /* Special case for xchg %rax,%rax.  It is NOP and doesn't
7122              need rex64. */
7123           && ! (i.operands == 2
7124                 && i.tm.base_opcode == 0x90
7125                 && i.tm.extension_opcode == None
7126                 && i.types[0].bitfield.instance == Accum
7127                 && i.types[0].bitfield.qword
7128                 && i.types[1].bitfield.instance == Accum
7129                 && i.types[1].bitfield.qword))
7130         i.rex |= REX_W;
7131
7132       break;
7133
7134     case 0:
7135       /* Select word/dword/qword operation with explict data sizing prefix
7136          when there are no suitable register operands.  */
7137       if (i.tm.opcode_modifier.w
7138           && (i.prefix[DATA_PREFIX] || (i.prefix[REX_PREFIX] & REX_W))
7139           && (!i.reg_operands
7140               || (i.reg_operands == 1
7141                       /* ShiftCount */
7142                   && (i.tm.operand_types[0].bitfield.instance == RegC
7143                       /* InOutPortReg */
7144                       || i.tm.operand_types[0].bitfield.instance == RegD
7145                       || i.tm.operand_types[1].bitfield.instance == RegD
7146                       /* CRC32 */
7147                       || i.tm.base_opcode == 0xf20f38f0))))
7148         i.tm.base_opcode |= 1;
7149       break;
7150     }
7151
7152   if (i.tm.opcode_modifier.addrprefixopreg)
7153     {
7154       gas_assert (!i.suffix);
7155       gas_assert (i.reg_operands);
7156
7157       if (i.tm.operand_types[0].bitfield.instance == Accum
7158           || i.operands == 1)
7159         {
7160           /* The address size override prefix changes the size of the
7161              first operand.  */
7162           if (flag_code == CODE_64BIT
7163               && i.op[0].regs->reg_type.bitfield.word)
7164             {
7165               as_bad (_("16-bit addressing unavailable for `%s'"),
7166                       i.tm.name);
7167               return 0;
7168             }
7169
7170           if ((flag_code == CODE_32BIT
7171                ? i.op[0].regs->reg_type.bitfield.word
7172                : i.op[0].regs->reg_type.bitfield.dword)
7173               && !add_prefix (ADDR_PREFIX_OPCODE))
7174             return 0;
7175         }
7176       else
7177         {
7178           /* Check invalid register operand when the address size override
7179              prefix changes the size of register operands.  */
7180           unsigned int op;
7181           enum { need_word, need_dword, need_qword } need;
7182
7183           /* Check the register operand for the address size prefix if
7184              the memory operand has no real registers, like symbol, DISP
7185              or symbol(%rip).  */
7186           if (i.mem_operands == 1
7187               && i.reg_operands == 1
7188               && i.operands == 2
7189               && i.types[1].bitfield.class == Reg
7190               && (flag_code == CODE_32BIT
7191                   ? i.op[1].regs->reg_type.bitfield.word
7192                   : i.op[1].regs->reg_type.bitfield.dword)
7193               && ((i.base_reg == NULL && i.index_reg == NULL)
7194                   || (i.base_reg
7195                       && i.base_reg->reg_num == RegIP
7196                       && i.base_reg->reg_type.bitfield.qword))
7197               && !add_prefix (ADDR_PREFIX_OPCODE))
7198             return 0;
7199
7200           if (flag_code == CODE_32BIT)
7201             need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
7202           else if (i.prefix[ADDR_PREFIX])
7203             need = need_dword;
7204           else
7205             need = flag_code == CODE_64BIT ? need_qword : need_word;
7206
7207           for (op = 0; op < i.operands; op++)
7208             {
7209               if (i.types[op].bitfield.class != Reg)
7210                 continue;
7211
7212               switch (need)
7213                 {
7214                 case need_word:
7215                   if (i.op[op].regs->reg_type.bitfield.word)
7216                     continue;
7217                   break;
7218                 case need_dword:
7219                   if (i.op[op].regs->reg_type.bitfield.dword)
7220                     continue;
7221                   break;
7222                 case need_qword:
7223                   if (i.op[op].regs->reg_type.bitfield.qword)
7224                     continue;
7225                   break;
7226                 }
7227
7228               as_bad (_("invalid register operand size for `%s'"),
7229                       i.tm.name);
7230               return 0;
7231             }
7232         }
7233     }
7234
7235   return 1;
7236 }
7237
7238 static int
7239 check_byte_reg (void)
7240 {
7241   int op;
7242
7243   for (op = i.operands; --op >= 0;)
7244     {
7245       /* Skip non-register operands. */
7246       if (i.types[op].bitfield.class != Reg)
7247         continue;
7248
7249       /* If this is an eight bit register, it's OK.  If it's the 16 or
7250          32 bit version of an eight bit register, we will just use the
7251          low portion, and that's OK too.  */
7252       if (i.types[op].bitfield.byte)
7253         continue;
7254
7255       /* I/O port address operands are OK too.  */
7256       if (i.tm.operand_types[op].bitfield.instance == RegD
7257           && i.tm.operand_types[op].bitfield.word)
7258         continue;
7259
7260       /* crc32 only wants its source operand checked here.  */
7261       if (i.tm.base_opcode == 0xf20f38f0 && op)
7262         continue;
7263
7264       /* Any other register is bad.  */
7265       as_bad (_("`%s%s' not allowed with `%s%c'"),
7266               register_prefix, i.op[op].regs->reg_name,
7267               i.tm.name, i.suffix);
7268       return 0;
7269     }
7270   return 1;
7271 }
7272
7273 static int
7274 check_long_reg (void)
7275 {
7276   int op;
7277
7278   for (op = i.operands; --op >= 0;)
7279     /* Skip non-register operands. */
7280     if (i.types[op].bitfield.class != Reg)
7281       continue;
7282     /* Reject eight bit registers, except where the template requires
7283        them. (eg. movzb)  */
7284     else if (i.types[op].bitfield.byte
7285              && (i.tm.operand_types[op].bitfield.class == Reg
7286                  || i.tm.operand_types[op].bitfield.instance == Accum)
7287              && (i.tm.operand_types[op].bitfield.word
7288                  || i.tm.operand_types[op].bitfield.dword))
7289       {
7290         as_bad (_("`%s%s' not allowed with `%s%c'"),
7291                 register_prefix,
7292                 i.op[op].regs->reg_name,
7293                 i.tm.name,
7294                 i.suffix);
7295         return 0;
7296       }
7297     /* Error if the e prefix on a general reg is missing.  */
7298     else if (i.types[op].bitfield.word
7299              && (i.tm.operand_types[op].bitfield.class == Reg
7300                  || i.tm.operand_types[op].bitfield.instance == Accum)
7301              && i.tm.operand_types[op].bitfield.dword)
7302       {
7303         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7304                 register_prefix, i.op[op].regs->reg_name,
7305                 i.suffix);
7306         return 0;
7307       }
7308     /* Warn if the r prefix on a general reg is present.  */
7309     else if (i.types[op].bitfield.qword
7310              && (i.tm.operand_types[op].bitfield.class == Reg
7311                  || i.tm.operand_types[op].bitfield.instance == Accum)
7312              && i.tm.operand_types[op].bitfield.dword)
7313       {
7314         if (intel_syntax
7315             && i.tm.opcode_modifier.toqword
7316             && i.types[0].bitfield.class != RegSIMD)
7317           {
7318             /* Convert to QWORD.  We want REX byte. */
7319             i.suffix = QWORD_MNEM_SUFFIX;
7320           }
7321         else
7322           {
7323             as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7324                     register_prefix, i.op[op].regs->reg_name,
7325                     i.suffix);
7326             return 0;
7327           }
7328       }
7329   return 1;
7330 }
7331
7332 static int
7333 check_qword_reg (void)
7334 {
7335   int op;
7336
7337   for (op = i.operands; --op >= 0; )
7338     /* Skip non-register operands. */
7339     if (i.types[op].bitfield.class != Reg)
7340       continue;
7341     /* Reject eight bit registers, except where the template requires
7342        them. (eg. movzb)  */
7343     else if (i.types[op].bitfield.byte
7344              && (i.tm.operand_types[op].bitfield.class == Reg
7345                  || i.tm.operand_types[op].bitfield.instance == Accum)
7346              && (i.tm.operand_types[op].bitfield.word
7347                  || i.tm.operand_types[op].bitfield.dword))
7348       {
7349         as_bad (_("`%s%s' not allowed with `%s%c'"),
7350                 register_prefix,
7351                 i.op[op].regs->reg_name,
7352                 i.tm.name,
7353                 i.suffix);
7354         return 0;
7355       }
7356     /* Warn if the r prefix on a general reg is missing.  */
7357     else if ((i.types[op].bitfield.word
7358               || i.types[op].bitfield.dword)
7359              && (i.tm.operand_types[op].bitfield.class == Reg
7360                  || i.tm.operand_types[op].bitfield.instance == Accum)
7361              && i.tm.operand_types[op].bitfield.qword)
7362       {
7363         /* Prohibit these changes in the 64bit mode, since the
7364            lowering is more complicated.  */
7365         if (intel_syntax
7366             && i.tm.opcode_modifier.todword
7367             && i.types[0].bitfield.class != RegSIMD)
7368           {
7369             /* Convert to DWORD.  We don't want REX byte. */
7370             i.suffix = LONG_MNEM_SUFFIX;
7371           }
7372         else
7373           {
7374             as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7375                     register_prefix, i.op[op].regs->reg_name,
7376                     i.suffix);
7377             return 0;
7378           }
7379       }
7380   return 1;
7381 }
7382
7383 static int
7384 check_word_reg (void)
7385 {
7386   int op;
7387   for (op = i.operands; --op >= 0;)
7388     /* Skip non-register operands. */
7389     if (i.types[op].bitfield.class != Reg)
7390       continue;
7391     /* Reject eight bit registers, except where the template requires
7392        them. (eg. movzb)  */
7393     else if (i.types[op].bitfield.byte
7394              && (i.tm.operand_types[op].bitfield.class == Reg
7395                  || i.tm.operand_types[op].bitfield.instance == Accum)
7396              && (i.tm.operand_types[op].bitfield.word
7397                  || i.tm.operand_types[op].bitfield.dword))
7398       {
7399         as_bad (_("`%s%s' not allowed with `%s%c'"),
7400                 register_prefix,
7401                 i.op[op].regs->reg_name,
7402                 i.tm.name,
7403                 i.suffix);
7404         return 0;
7405       }
7406     /* Error if the e or r prefix on a general reg is present.  */
7407     else if ((i.types[op].bitfield.dword
7408                  || i.types[op].bitfield.qword)
7409              && (i.tm.operand_types[op].bitfield.class == Reg
7410                  || i.tm.operand_types[op].bitfield.instance == Accum)
7411              && i.tm.operand_types[op].bitfield.word)
7412       {
7413         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7414                 register_prefix, i.op[op].regs->reg_name,
7415                 i.suffix);
7416         return 0;
7417       }
7418   return 1;
7419 }
7420
7421 static int
7422 update_imm (unsigned int j)
7423 {
7424   i386_operand_type overlap = i.types[j];
7425   if ((overlap.bitfield.imm8
7426        || overlap.bitfield.imm8s
7427        || overlap.bitfield.imm16
7428        || overlap.bitfield.imm32
7429        || overlap.bitfield.imm32s
7430        || overlap.bitfield.imm64)
7431       && !operand_type_equal (&overlap, &imm8)
7432       && !operand_type_equal (&overlap, &imm8s)
7433       && !operand_type_equal (&overlap, &imm16)
7434       && !operand_type_equal (&overlap, &imm32)
7435       && !operand_type_equal (&overlap, &imm32s)
7436       && !operand_type_equal (&overlap, &imm64))
7437     {
7438       if (i.suffix)
7439         {
7440           i386_operand_type temp;
7441
7442           operand_type_set (&temp, 0);
7443           if (i.suffix == BYTE_MNEM_SUFFIX)
7444             {
7445               temp.bitfield.imm8 = overlap.bitfield.imm8;
7446               temp.bitfield.imm8s = overlap.bitfield.imm8s;
7447             }
7448           else if (i.suffix == WORD_MNEM_SUFFIX)
7449             temp.bitfield.imm16 = overlap.bitfield.imm16;
7450           else if (i.suffix == QWORD_MNEM_SUFFIX)
7451             {
7452               temp.bitfield.imm64 = overlap.bitfield.imm64;
7453               temp.bitfield.imm32s = overlap.bitfield.imm32s;
7454             }
7455           else
7456             temp.bitfield.imm32 = overlap.bitfield.imm32;
7457           overlap = temp;
7458         }
7459       else if (operand_type_equal (&overlap, &imm16_32_32s)
7460                || operand_type_equal (&overlap, &imm16_32)
7461                || operand_type_equal (&overlap, &imm16_32s))
7462         {
7463           if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
7464             overlap = imm16;
7465           else
7466             overlap = imm32s;
7467         }
7468       else if (i.prefix[REX_PREFIX] & REX_W)
7469         overlap = operand_type_and (overlap, imm32s);
7470       else if (i.prefix[DATA_PREFIX])
7471         overlap = operand_type_and (overlap,
7472                                     flag_code != CODE_16BIT ? imm16 : imm32);
7473       if (!operand_type_equal (&overlap, &imm8)
7474           && !operand_type_equal (&overlap, &imm8s)
7475           && !operand_type_equal (&overlap, &imm16)
7476           && !operand_type_equal (&overlap, &imm32)
7477           && !operand_type_equal (&overlap, &imm32s)
7478           && !operand_type_equal (&overlap, &imm64))
7479         {
7480           as_bad (_("no instruction mnemonic suffix given; "
7481                     "can't determine immediate size"));
7482           return 0;
7483         }
7484     }
7485   i.types[j] = overlap;
7486
7487   return 1;
7488 }
7489
7490 static int
7491 finalize_imm (void)
7492 {
7493   unsigned int j, n;
7494
7495   /* Update the first 2 immediate operands.  */
7496   n = i.operands > 2 ? 2 : i.operands;
7497   if (n)
7498     {
7499       for (j = 0; j < n; j++)
7500         if (update_imm (j) == 0)
7501           return 0;
7502
7503       /* The 3rd operand can't be immediate operand.  */
7504       gas_assert (operand_type_check (i.types[2], imm) == 0);
7505     }
7506
7507   return 1;
7508 }
7509
7510 static int
7511 process_operands (void)
7512 {
7513   /* Default segment register this instruction will use for memory
7514      accesses.  0 means unknown.  This is only for optimizing out
7515      unnecessary segment overrides.  */
7516   const seg_entry *default_seg = 0;
7517
7518   if (i.tm.opcode_modifier.sse2avx)
7519     {
7520       /* Legacy encoded insns allow explicit REX prefixes, so these prefixes
7521          need converting.  */
7522       i.rex |= i.prefix[REX_PREFIX] & (REX_W | REX_R | REX_X | REX_B);
7523       i.prefix[REX_PREFIX] = 0;
7524       i.rex_encoding = 0;
7525     }
7526   /* ImmExt should be processed after SSE2AVX.  */
7527   else if (i.tm.opcode_modifier.immext)
7528     process_immext ();
7529
7530   if (i.tm.opcode_modifier.sse2avx && i.tm.opcode_modifier.vexvvvv)
7531     {
7532       unsigned int dupl = i.operands;
7533       unsigned int dest = dupl - 1;
7534       unsigned int j;
7535
7536       /* The destination must be an xmm register.  */
7537       gas_assert (i.reg_operands
7538                   && MAX_OPERANDS > dupl
7539                   && operand_type_equal (&i.types[dest], &regxmm));
7540
7541       if (i.tm.operand_types[0].bitfield.instance == Accum
7542           && i.tm.operand_types[0].bitfield.xmmword)
7543         {
7544           if (i.tm.opcode_modifier.vexsources == VEX3SOURCES)
7545             {
7546               /* Keep xmm0 for instructions with VEX prefix and 3
7547                  sources.  */
7548               i.tm.operand_types[0].bitfield.instance = InstanceNone;
7549               i.tm.operand_types[0].bitfield.class = RegSIMD;
7550               goto duplicate;
7551             }
7552           else
7553             {
7554               /* We remove the first xmm0 and keep the number of
7555                  operands unchanged, which in fact duplicates the
7556                  destination.  */
7557               for (j = 1; j < i.operands; j++)
7558                 {
7559                   i.op[j - 1] = i.op[j];
7560                   i.types[j - 1] = i.types[j];
7561                   i.tm.operand_types[j - 1] = i.tm.operand_types[j];
7562                   i.flags[j - 1] = i.flags[j];
7563                 }
7564             }
7565         }
7566       else if (i.tm.opcode_modifier.implicit1stxmm0)
7567         {
7568           gas_assert ((MAX_OPERANDS - 1) > dupl
7569                       && (i.tm.opcode_modifier.vexsources
7570                           == VEX3SOURCES));
7571
7572           /* Add the implicit xmm0 for instructions with VEX prefix
7573              and 3 sources.  */
7574           for (j = i.operands; j > 0; j--)
7575             {
7576               i.op[j] = i.op[j - 1];
7577               i.types[j] = i.types[j - 1];
7578               i.tm.operand_types[j] = i.tm.operand_types[j - 1];
7579               i.flags[j] = i.flags[j - 1];
7580             }
7581           i.op[0].regs
7582             = (const reg_entry *) str_hash_find (reg_hash, "xmm0");
7583           i.types[0] = regxmm;
7584           i.tm.operand_types[0] = regxmm;
7585
7586           i.operands += 2;
7587           i.reg_operands += 2;
7588           i.tm.operands += 2;
7589
7590           dupl++;
7591           dest++;
7592           i.op[dupl] = i.op[dest];
7593           i.types[dupl] = i.types[dest];
7594           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
7595           i.flags[dupl] = i.flags[dest];
7596         }
7597       else
7598         {
7599         duplicate:
7600           i.operands++;
7601           i.reg_operands++;
7602           i.tm.operands++;
7603
7604           i.op[dupl] = i.op[dest];
7605           i.types[dupl] = i.types[dest];
7606           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
7607           i.flags[dupl] = i.flags[dest];
7608         }
7609
7610        if (i.tm.opcode_modifier.immext)
7611          process_immext ();
7612     }
7613   else if (i.tm.operand_types[0].bitfield.instance == Accum
7614            && i.tm.operand_types[0].bitfield.xmmword)
7615     {
7616       unsigned int j;
7617
7618       for (j = 1; j < i.operands; j++)
7619         {
7620           i.op[j - 1] = i.op[j];
7621           i.types[j - 1] = i.types[j];
7622
7623           /* We need to adjust fields in i.tm since they are used by
7624              build_modrm_byte.  */
7625           i.tm.operand_types [j - 1] = i.tm.operand_types [j];
7626
7627           i.flags[j - 1] = i.flags[j];
7628         }
7629
7630       i.operands--;
7631       i.reg_operands--;
7632       i.tm.operands--;
7633     }
7634   else if (i.tm.opcode_modifier.implicitquadgroup)
7635     {
7636       unsigned int regnum, first_reg_in_group, last_reg_in_group;
7637
7638       /* The second operand must be {x,y,z}mmN, where N is a multiple of 4. */
7639       gas_assert (i.operands >= 2 && i.types[1].bitfield.class == RegSIMD);
7640       regnum = register_number (i.op[1].regs);
7641       first_reg_in_group = regnum & ~3;
7642       last_reg_in_group = first_reg_in_group + 3;
7643       if (regnum != first_reg_in_group)
7644         as_warn (_("source register `%s%s' implicitly denotes"
7645                    " `%s%.3s%u' to `%s%.3s%u' source group in `%s'"),
7646                  register_prefix, i.op[1].regs->reg_name,
7647                  register_prefix, i.op[1].regs->reg_name, first_reg_in_group,
7648                  register_prefix, i.op[1].regs->reg_name, last_reg_in_group,
7649                  i.tm.name);
7650     }
7651   else if (i.tm.opcode_modifier.regkludge)
7652     {
7653       /* The imul $imm, %reg instruction is converted into
7654          imul $imm, %reg, %reg, and the clr %reg instruction
7655          is converted into xor %reg, %reg.  */
7656
7657       unsigned int first_reg_op;
7658
7659       if (operand_type_check (i.types[0], reg))
7660         first_reg_op = 0;
7661       else
7662         first_reg_op = 1;
7663       /* Pretend we saw the extra register operand.  */
7664       gas_assert (i.reg_operands == 1
7665                   && i.op[first_reg_op + 1].regs == 0);
7666       i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs;
7667       i.types[first_reg_op + 1] = i.types[first_reg_op];
7668       i.operands++;
7669       i.reg_operands++;
7670     }
7671
7672   if (i.tm.opcode_modifier.modrm)
7673     {
7674       /* The opcode is completed (modulo i.tm.extension_opcode which
7675          must be put into the modrm byte).  Now, we make the modrm and
7676          index base bytes based on all the info we've collected.  */
7677
7678       default_seg = build_modrm_byte ();
7679     }
7680   else if (i.types[0].bitfield.class == SReg)
7681     {
7682       if (flag_code != CODE_64BIT
7683           ? i.tm.base_opcode == POP_SEG_SHORT
7684             && i.op[0].regs->reg_num == 1
7685           : (i.tm.base_opcode | 1) == POP_SEG386_SHORT
7686             && i.op[0].regs->reg_num < 4)
7687         {
7688           as_bad (_("you can't `%s %s%s'"),
7689                   i.tm.name, register_prefix, i.op[0].regs->reg_name);
7690           return 0;
7691         }
7692       if ( i.op[0].regs->reg_num > 3 && i.tm.opcode_length == 1 )
7693         {
7694           i.tm.base_opcode ^= POP_SEG_SHORT ^ POP_SEG386_SHORT;
7695           i.tm.opcode_length = 2;
7696         }
7697       i.tm.base_opcode |= (i.op[0].regs->reg_num << 3);
7698     }
7699   else if ((i.tm.base_opcode & ~0x3) == MOV_AX_DISP32)
7700     {
7701       default_seg = &ds;
7702     }
7703   else if (i.tm.opcode_modifier.isstring)
7704     {
7705       /* For the string instructions that allow a segment override
7706          on one of their operands, the default segment is ds.  */
7707       default_seg = &ds;
7708     }
7709   else if (i.short_form)
7710     {
7711       /* The register or float register operand is in operand
7712          0 or 1.  */
7713       unsigned int op = i.tm.operand_types[0].bitfield.class != Reg;
7714
7715       /* Register goes in low 3 bits of opcode.  */
7716       i.tm.base_opcode |= i.op[op].regs->reg_num;
7717       if ((i.op[op].regs->reg_flags & RegRex) != 0)
7718         i.rex |= REX_B;
7719       if (!quiet_warnings && i.tm.opcode_modifier.ugh)
7720         {
7721           /* Warn about some common errors, but press on regardless.
7722              The first case can be generated by gcc (<= 2.8.1).  */
7723           if (i.operands == 2)
7724             {
7725               /* Reversed arguments on faddp, fsubp, etc.  */
7726               as_warn (_("translating to `%s %s%s,%s%s'"), i.tm.name,
7727                        register_prefix, i.op[!intel_syntax].regs->reg_name,
7728                        register_prefix, i.op[intel_syntax].regs->reg_name);
7729             }
7730           else
7731             {
7732               /* Extraneous `l' suffix on fp insn.  */
7733               as_warn (_("translating to `%s %s%s'"), i.tm.name,
7734                        register_prefix, i.op[0].regs->reg_name);
7735             }
7736         }
7737     }
7738
7739   if ((i.seg[0] || i.prefix[SEG_PREFIX])
7740       && i.tm.base_opcode == 0x8d /* lea */
7741       && !is_any_vex_encoding(&i.tm))
7742     {
7743       if (!quiet_warnings)
7744         as_warn (_("segment override on `%s' is ineffectual"), i.tm.name);
7745       if (optimize)
7746         {
7747           i.seg[0] = NULL;
7748           i.prefix[SEG_PREFIX] = 0;
7749         }
7750     }
7751
7752   /* If a segment was explicitly specified, and the specified segment
7753      is neither the default nor the one already recorded from a prefix,
7754      use an opcode prefix to select it.  If we never figured out what
7755      the default segment is, then default_seg will be zero at this
7756      point, and the specified segment prefix will always be used.  */
7757   if (i.seg[0]
7758       && i.seg[0] != default_seg
7759       && i.seg[0]->seg_prefix != i.prefix[SEG_PREFIX])
7760     {
7761       if (!add_prefix (i.seg[0]->seg_prefix))
7762         return 0;
7763     }
7764   return 1;
7765 }
7766
7767 static INLINE void set_rex_vrex (const reg_entry *r, unsigned int rex_bit,
7768                                  bfd_boolean do_sse2avx)
7769 {
7770   if (r->reg_flags & RegRex)
7771     {
7772       if (i.rex & rex_bit)
7773         as_bad (_("same type of prefix used twice"));
7774       i.rex |= rex_bit;
7775     }
7776   else if (do_sse2avx && (i.rex & rex_bit) && i.vex.register_specifier)
7777     {
7778       gas_assert (i.vex.register_specifier == r);
7779       i.vex.register_specifier += 8;
7780     }
7781
7782   if (r->reg_flags & RegVRex)
7783     i.vrex |= rex_bit;
7784 }
7785
7786 static const seg_entry *
7787 build_modrm_byte (void)
7788 {
7789   const seg_entry *default_seg = 0;
7790   unsigned int source, dest;
7791   int vex_3_sources;
7792
7793   vex_3_sources = i.tm.opcode_modifier.vexsources == VEX3SOURCES;
7794   if (vex_3_sources)
7795     {
7796       unsigned int nds, reg_slot;
7797       expressionS *exp;
7798
7799       dest = i.operands - 1;
7800       nds = dest - 1;
7801
7802       /* There are 2 kinds of instructions:
7803          1. 5 operands: 4 register operands or 3 register operands
7804          plus 1 memory operand plus one Imm4 operand, VexXDS, and
7805          VexW0 or VexW1.  The destination must be either XMM, YMM or
7806          ZMM register.
7807          2. 4 operands: 4 register operands or 3 register operands
7808          plus 1 memory operand, with VexXDS.  */
7809       gas_assert ((i.reg_operands == 4
7810                    || (i.reg_operands == 3 && i.mem_operands == 1))
7811                   && i.tm.opcode_modifier.vexvvvv == VEXXDS
7812                   && i.tm.opcode_modifier.vexw
7813                   && i.tm.operand_types[dest].bitfield.class == RegSIMD);
7814
7815       /* If VexW1 is set, the first non-immediate operand is the source and
7816          the second non-immediate one is encoded in the immediate operand.  */
7817       if (i.tm.opcode_modifier.vexw == VEXW1)
7818         {
7819           source = i.imm_operands;
7820           reg_slot = i.imm_operands + 1;
7821         }
7822       else
7823         {
7824           source = i.imm_operands + 1;
7825           reg_slot = i.imm_operands;
7826         }
7827
7828       if (i.imm_operands == 0)
7829         {
7830           /* When there is no immediate operand, generate an 8bit
7831              immediate operand to encode the first operand.  */
7832           exp = &im_expressions[i.imm_operands++];
7833           i.op[i.operands].imms = exp;
7834           i.types[i.operands] = imm8;
7835           i.operands++;
7836
7837           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
7838           exp->X_op = O_constant;
7839           exp->X_add_number = register_number (i.op[reg_slot].regs) << 4;
7840           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
7841         }
7842       else
7843         {
7844           gas_assert (i.imm_operands == 1);
7845           gas_assert (fits_in_imm4 (i.op[0].imms->X_add_number));
7846           gas_assert (!i.tm.opcode_modifier.immext);
7847
7848           /* Turn on Imm8 again so that output_imm will generate it.  */
7849           i.types[0].bitfield.imm8 = 1;
7850
7851           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
7852           i.op[0].imms->X_add_number
7853               |= register_number (i.op[reg_slot].regs) << 4;
7854           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
7855         }
7856
7857       gas_assert (i.tm.operand_types[nds].bitfield.class == RegSIMD);
7858       i.vex.register_specifier = i.op[nds].regs;
7859     }
7860   else
7861     source = dest = 0;
7862
7863   /* i.reg_operands MUST be the number of real register operands;
7864      implicit registers do not count.  If there are 3 register
7865      operands, it must be a instruction with VexNDS.  For a
7866      instruction with VexNDD, the destination register is encoded
7867      in VEX prefix.  If there are 4 register operands, it must be
7868      a instruction with VEX prefix and 3 sources.  */
7869   if (i.mem_operands == 0
7870       && ((i.reg_operands == 2
7871            && i.tm.opcode_modifier.vexvvvv <= VEXXDS)
7872           || (i.reg_operands == 3
7873               && i.tm.opcode_modifier.vexvvvv == VEXXDS)
7874           || (i.reg_operands == 4 && vex_3_sources)))
7875     {
7876       switch (i.operands)
7877         {
7878         case 2:
7879           source = 0;
7880           break;
7881         case 3:
7882           /* When there are 3 operands, one of them may be immediate,
7883              which may be the first or the last operand.  Otherwise,
7884              the first operand must be shift count register (cl) or it
7885              is an instruction with VexNDS. */
7886           gas_assert (i.imm_operands == 1
7887                       || (i.imm_operands == 0
7888                           && (i.tm.opcode_modifier.vexvvvv == VEXXDS
7889                               || (i.types[0].bitfield.instance == RegC
7890                                   && i.types[0].bitfield.byte))));
7891           if (operand_type_check (i.types[0], imm)
7892               || (i.types[0].bitfield.instance == RegC
7893                   && i.types[0].bitfield.byte))
7894             source = 1;
7895           else
7896             source = 0;
7897           break;
7898         case 4:
7899           /* When there are 4 operands, the first two must be 8bit
7900              immediate operands. The source operand will be the 3rd
7901              one.
7902
7903              For instructions with VexNDS, if the first operand
7904              an imm8, the source operand is the 2nd one.  If the last
7905              operand is imm8, the source operand is the first one.  */
7906           gas_assert ((i.imm_operands == 2
7907                        && i.types[0].bitfield.imm8
7908                        && i.types[1].bitfield.imm8)
7909                       || (i.tm.opcode_modifier.vexvvvv == VEXXDS
7910                           && i.imm_operands == 1
7911                           && (i.types[0].bitfield.imm8
7912                               || i.types[i.operands - 1].bitfield.imm8
7913                               || i.rounding)));
7914           if (i.imm_operands == 2)
7915             source = 2;
7916           else
7917             {
7918               if (i.types[0].bitfield.imm8)
7919                 source = 1;
7920               else
7921                 source = 0;
7922             }
7923           break;
7924         case 5:
7925           if (is_evex_encoding (&i.tm))
7926             {
7927               /* For EVEX instructions, when there are 5 operands, the
7928                  first one must be immediate operand.  If the second one
7929                  is immediate operand, the source operand is the 3th
7930                  one.  If the last one is immediate operand, the source
7931                  operand is the 2nd one.  */
7932               gas_assert (i.imm_operands == 2
7933                           && i.tm.opcode_modifier.sae
7934                           && operand_type_check (i.types[0], imm));
7935               if (operand_type_check (i.types[1], imm))
7936                 source = 2;
7937               else if (operand_type_check (i.types[4], imm))
7938                 source = 1;
7939               else
7940                 abort ();
7941             }
7942           break;
7943         default:
7944           abort ();
7945         }
7946
7947       if (!vex_3_sources)
7948         {
7949           dest = source + 1;
7950
7951           /* RC/SAE operand could be between DEST and SRC.  That happens
7952              when one operand is GPR and the other one is XMM/YMM/ZMM
7953              register.  */
7954           if (i.rounding && i.rounding->operand == (int) dest)
7955             dest++;
7956
7957           if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
7958             {
7959               /* For instructions with VexNDS, the register-only source
7960                  operand must be a 32/64bit integer, XMM, YMM, ZMM, or mask
7961                  register.  It is encoded in VEX prefix.  */
7962
7963               i386_operand_type op;
7964               unsigned int vvvv;
7965
7966               /* Swap two source operands if needed.  */
7967               if (i.tm.opcode_modifier.swapsources)
7968                 {
7969                   vvvv = source;
7970                   source = dest;
7971                 }
7972               else
7973                 vvvv = dest;
7974
7975               op = i.tm.operand_types[vvvv];
7976               if ((dest + 1) >= i.operands
7977                   || ((op.bitfield.class != Reg
7978                        || (!op.bitfield.dword && !op.bitfield.qword))
7979                       && op.bitfield.class != RegSIMD
7980                       && !operand_type_equal (&op, &regmask)))
7981                 abort ();
7982               i.vex.register_specifier = i.op[vvvv].regs;
7983               dest++;
7984             }
7985         }
7986
7987       i.rm.mode = 3;
7988       /* One of the register operands will be encoded in the i.rm.reg
7989          field, the other in the combined i.rm.mode and i.rm.regmem
7990          fields.  If no form of this instruction supports a memory
7991          destination operand, then we assume the source operand may
7992          sometimes be a memory operand and so we need to store the
7993          destination in the i.rm.reg field.  */
7994       if (!i.tm.opcode_modifier.regmem
7995           && operand_type_check (i.tm.operand_types[dest], anymem) == 0)
7996         {
7997           i.rm.reg = i.op[dest].regs->reg_num;
7998           i.rm.regmem = i.op[source].regs->reg_num;
7999           set_rex_vrex (i.op[dest].regs, REX_R, i.tm.opcode_modifier.sse2avx);
8000           set_rex_vrex (i.op[source].regs, REX_B, FALSE);
8001         }
8002       else
8003         {
8004           i.rm.reg = i.op[source].regs->reg_num;
8005           i.rm.regmem = i.op[dest].regs->reg_num;
8006           set_rex_vrex (i.op[dest].regs, REX_B, i.tm.opcode_modifier.sse2avx);
8007           set_rex_vrex (i.op[source].regs, REX_R, FALSE);
8008         }
8009       if (flag_code != CODE_64BIT && (i.rex & REX_R))
8010         {
8011           if (i.types[!i.tm.opcode_modifier.regmem].bitfield.class != RegCR)
8012             abort ();
8013           i.rex &= ~REX_R;
8014           add_prefix (LOCK_PREFIX_OPCODE);
8015         }
8016     }
8017   else
8018     {                   /* If it's not 2 reg operands...  */
8019       unsigned int mem;
8020
8021       if (i.mem_operands)
8022         {
8023           unsigned int fake_zero_displacement = 0;
8024           unsigned int op;
8025
8026           for (op = 0; op < i.operands; op++)
8027             if (i.flags[op] & Operand_Mem)
8028               break;
8029           gas_assert (op < i.operands);
8030
8031           if (i.tm.opcode_modifier.sib)
8032             {
8033               /* The index register of VSIB shouldn't be RegIZ.  */
8034               if (i.tm.opcode_modifier.sib != SIBMEM
8035                   && i.index_reg->reg_num == RegIZ)
8036                 abort ();
8037
8038               i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8039               if (!i.base_reg)
8040                 {
8041                   i.sib.base = NO_BASE_REGISTER;
8042                   i.sib.scale = i.log2_scale_factor;
8043                   i.types[op].bitfield.disp8 = 0;
8044                   i.types[op].bitfield.disp16 = 0;
8045                   i.types[op].bitfield.disp64 = 0;
8046                   if (flag_code != CODE_64BIT || i.prefix[ADDR_PREFIX])
8047                     {
8048                       /* Must be 32 bit */
8049                       i.types[op].bitfield.disp32 = 1;
8050                       i.types[op].bitfield.disp32s = 0;
8051                     }
8052                   else
8053                     {
8054                       i.types[op].bitfield.disp32 = 0;
8055                       i.types[op].bitfield.disp32s = 1;
8056                     }
8057                 }
8058
8059               /* Since the mandatory SIB always has index register, so
8060                  the code logic remains unchanged. The non-mandatory SIB
8061                  without index register is allowed and will be handled
8062                  later.  */
8063               if (i.index_reg)
8064                 {
8065                   if (i.index_reg->reg_num == RegIZ)
8066                     i.sib.index = NO_INDEX_REGISTER;
8067                   else
8068                     i.sib.index = i.index_reg->reg_num;
8069                   set_rex_vrex (i.index_reg, REX_X, FALSE);
8070                 }
8071             }
8072
8073           default_seg = &ds;
8074
8075           if (i.base_reg == 0)
8076             {
8077               i.rm.mode = 0;
8078               if (!i.disp_operands)
8079                 fake_zero_displacement = 1;
8080               if (i.index_reg == 0)
8081                 {
8082                   i386_operand_type newdisp;
8083
8084                   /* Both check for VSIB and mandatory non-vector SIB. */
8085                   gas_assert (!i.tm.opcode_modifier.sib
8086                               || i.tm.opcode_modifier.sib == SIBMEM);
8087                   /* Operand is just <disp>  */
8088                   if (flag_code == CODE_64BIT)
8089                     {
8090                       /* 64bit mode overwrites the 32bit absolute
8091                          addressing by RIP relative addressing and
8092                          absolute addressing is encoded by one of the
8093                          redundant SIB forms.  */
8094                       i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8095                       i.sib.base = NO_BASE_REGISTER;
8096                       i.sib.index = NO_INDEX_REGISTER;
8097                       newdisp = (!i.prefix[ADDR_PREFIX] ? disp32s : disp32);
8098                     }
8099                   else if ((flag_code == CODE_16BIT)
8100                            ^ (i.prefix[ADDR_PREFIX] != 0))
8101                     {
8102                       i.rm.regmem = NO_BASE_REGISTER_16;
8103                       newdisp = disp16;
8104                     }
8105                   else
8106                     {
8107                       i.rm.regmem = NO_BASE_REGISTER;
8108                       newdisp = disp32;
8109                     }
8110                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8111                   i.types[op] = operand_type_or (i.types[op], newdisp);
8112                 }
8113               else if (!i.tm.opcode_modifier.sib)
8114                 {
8115                   /* !i.base_reg && i.index_reg  */
8116                   if (i.index_reg->reg_num == RegIZ)
8117                     i.sib.index = NO_INDEX_REGISTER;
8118                   else
8119                     i.sib.index = i.index_reg->reg_num;
8120                   i.sib.base = NO_BASE_REGISTER;
8121                   i.sib.scale = i.log2_scale_factor;
8122                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8123                   i.types[op].bitfield.disp8 = 0;
8124                   i.types[op].bitfield.disp16 = 0;
8125                   i.types[op].bitfield.disp64 = 0;
8126                   if (flag_code != CODE_64BIT || i.prefix[ADDR_PREFIX])
8127                     {
8128                       /* Must be 32 bit */
8129                       i.types[op].bitfield.disp32 = 1;
8130                       i.types[op].bitfield.disp32s = 0;
8131                     }
8132                   else
8133                     {
8134                       i.types[op].bitfield.disp32 = 0;
8135                       i.types[op].bitfield.disp32s = 1;
8136                     }
8137                   if ((i.index_reg->reg_flags & RegRex) != 0)
8138                     i.rex |= REX_X;
8139                 }
8140             }
8141           /* RIP addressing for 64bit mode.  */
8142           else if (i.base_reg->reg_num == RegIP)
8143             {
8144               gas_assert (!i.tm.opcode_modifier.sib);
8145               i.rm.regmem = NO_BASE_REGISTER;
8146               i.types[op].bitfield.disp8 = 0;
8147               i.types[op].bitfield.disp16 = 0;
8148               i.types[op].bitfield.disp32 = 0;
8149               i.types[op].bitfield.disp32s = 1;
8150               i.types[op].bitfield.disp64 = 0;
8151               i.flags[op] |= Operand_PCrel;
8152               if (! i.disp_operands)
8153                 fake_zero_displacement = 1;
8154             }
8155           else if (i.base_reg->reg_type.bitfield.word)
8156             {
8157               gas_assert (!i.tm.opcode_modifier.sib);
8158               switch (i.base_reg->reg_num)
8159                 {
8160                 case 3: /* (%bx)  */
8161                   if (i.index_reg == 0)
8162                     i.rm.regmem = 7;
8163                   else /* (%bx,%si) -> 0, or (%bx,%di) -> 1  */
8164                     i.rm.regmem = i.index_reg->reg_num - 6;
8165                   break;
8166                 case 5: /* (%bp)  */
8167                   default_seg = &ss;
8168                   if (i.index_reg == 0)
8169                     {
8170                       i.rm.regmem = 6;
8171                       if (operand_type_check (i.types[op], disp) == 0)
8172                         {
8173                           /* fake (%bp) into 0(%bp)  */
8174                           if (i.disp_encoding == disp_encoding_16bit)
8175                             i.types[op].bitfield.disp16 = 1;
8176                           else
8177                             i.types[op].bitfield.disp8 = 1;
8178                           fake_zero_displacement = 1;
8179                         }
8180                     }
8181                   else /* (%bp,%si) -> 2, or (%bp,%di) -> 3  */
8182                     i.rm.regmem = i.index_reg->reg_num - 6 + 2;
8183                   break;
8184                 default: /* (%si) -> 4 or (%di) -> 5  */
8185                   i.rm.regmem = i.base_reg->reg_num - 6 + 4;
8186                 }
8187               if (!fake_zero_displacement
8188                   && !i.disp_operands
8189                   && i.disp_encoding)
8190                 {
8191                   fake_zero_displacement = 1;
8192                   if (i.disp_encoding == disp_encoding_8bit)
8193                     i.types[op].bitfield.disp8 = 1;
8194                   else
8195                     i.types[op].bitfield.disp16 = 1;
8196                 }
8197               i.rm.mode = mode_from_disp_size (i.types[op]);
8198             }
8199           else /* i.base_reg and 32/64 bit mode  */
8200             {
8201               if (flag_code == CODE_64BIT
8202                   && operand_type_check (i.types[op], disp))
8203                 {
8204                   i.types[op].bitfield.disp16 = 0;
8205                   i.types[op].bitfield.disp64 = 0;
8206                   if (i.prefix[ADDR_PREFIX] == 0)
8207                     {
8208                       i.types[op].bitfield.disp32 = 0;
8209                       i.types[op].bitfield.disp32s = 1;
8210                     }
8211                   else
8212                     {
8213                       i.types[op].bitfield.disp32 = 1;
8214                       i.types[op].bitfield.disp32s = 0;
8215                     }
8216                 }
8217
8218               if (!i.tm.opcode_modifier.sib)
8219                 i.rm.regmem = i.base_reg->reg_num;
8220               if ((i.base_reg->reg_flags & RegRex) != 0)
8221                 i.rex |= REX_B;
8222               i.sib.base = i.base_reg->reg_num;
8223               /* x86-64 ignores REX prefix bit here to avoid decoder
8224                  complications.  */
8225               if (!(i.base_reg->reg_flags & RegRex)
8226                   && (i.base_reg->reg_num == EBP_REG_NUM
8227                    || i.base_reg->reg_num == ESP_REG_NUM))
8228                   default_seg = &ss;
8229               if (i.base_reg->reg_num == 5 && i.disp_operands == 0)
8230                 {
8231                   fake_zero_displacement = 1;
8232                   if (i.disp_encoding == disp_encoding_32bit)
8233                     i.types[op].bitfield.disp32 = 1;
8234                   else
8235                     i.types[op].bitfield.disp8 = 1;
8236                 }
8237               i.sib.scale = i.log2_scale_factor;
8238               if (i.index_reg == 0)
8239                 {
8240                   /* Only check for VSIB. */
8241                   gas_assert (i.tm.opcode_modifier.sib != VECSIB128
8242                               && i.tm.opcode_modifier.sib != VECSIB256
8243                               && i.tm.opcode_modifier.sib != VECSIB512);
8244
8245                   /* <disp>(%esp) becomes two byte modrm with no index
8246                      register.  We've already stored the code for esp
8247                      in i.rm.regmem ie. ESCAPE_TO_TWO_BYTE_ADDRESSING.
8248                      Any base register besides %esp will not use the
8249                      extra modrm byte.  */
8250                   i.sib.index = NO_INDEX_REGISTER;
8251                 }
8252               else if (!i.tm.opcode_modifier.sib)
8253                 {
8254                   if (i.index_reg->reg_num == RegIZ)
8255                     i.sib.index = NO_INDEX_REGISTER;
8256                   else
8257                     i.sib.index = i.index_reg->reg_num;
8258                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8259                   if ((i.index_reg->reg_flags & RegRex) != 0)
8260                     i.rex |= REX_X;
8261                 }
8262
8263               if (i.disp_operands
8264                   && (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
8265                       || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL))
8266                 i.rm.mode = 0;
8267               else
8268                 {
8269                   if (!fake_zero_displacement
8270                       && !i.disp_operands
8271                       && i.disp_encoding)
8272                     {
8273                       fake_zero_displacement = 1;
8274                       if (i.disp_encoding == disp_encoding_8bit)
8275                         i.types[op].bitfield.disp8 = 1;
8276                       else
8277                         i.types[op].bitfield.disp32 = 1;
8278                     }
8279                   i.rm.mode = mode_from_disp_size (i.types[op]);
8280                 }
8281             }
8282
8283           if (fake_zero_displacement)
8284             {
8285               /* Fakes a zero displacement assuming that i.types[op]
8286                  holds the correct displacement size.  */
8287               expressionS *exp;
8288
8289               gas_assert (i.op[op].disps == 0);
8290               exp = &disp_expressions[i.disp_operands++];
8291               i.op[op].disps = exp;
8292               exp->X_op = O_constant;
8293               exp->X_add_number = 0;
8294               exp->X_add_symbol = (symbolS *) 0;
8295               exp->X_op_symbol = (symbolS *) 0;
8296             }
8297
8298           mem = op;
8299         }
8300       else
8301         mem = ~0;
8302
8303       if (i.tm.opcode_modifier.vexsources == XOP2SOURCES)
8304         {
8305           if (operand_type_check (i.types[0], imm))
8306             i.vex.register_specifier = NULL;
8307           else
8308             {
8309               /* VEX.vvvv encodes one of the sources when the first
8310                  operand is not an immediate.  */
8311               if (i.tm.opcode_modifier.vexw == VEXW0)
8312                 i.vex.register_specifier = i.op[0].regs;
8313               else
8314                 i.vex.register_specifier = i.op[1].regs;
8315             }
8316
8317           /* Destination is a XMM register encoded in the ModRM.reg
8318              and VEX.R bit.  */
8319           i.rm.reg = i.op[2].regs->reg_num;
8320           if ((i.op[2].regs->reg_flags & RegRex) != 0)
8321             i.rex |= REX_R;
8322
8323           /* ModRM.rm and VEX.B encodes the other source.  */
8324           if (!i.mem_operands)
8325             {
8326               i.rm.mode = 3;
8327
8328               if (i.tm.opcode_modifier.vexw == VEXW0)
8329                 i.rm.regmem = i.op[1].regs->reg_num;
8330               else
8331                 i.rm.regmem = i.op[0].regs->reg_num;
8332
8333               if ((i.op[1].regs->reg_flags & RegRex) != 0)
8334                 i.rex |= REX_B;
8335             }
8336         }
8337       else if (i.tm.opcode_modifier.vexvvvv == VEXLWP)
8338         {
8339           i.vex.register_specifier = i.op[2].regs;
8340           if (!i.mem_operands)
8341             {
8342               i.rm.mode = 3;
8343               i.rm.regmem = i.op[1].regs->reg_num;
8344               if ((i.op[1].regs->reg_flags & RegRex) != 0)
8345                 i.rex |= REX_B;
8346             }
8347         }
8348       /* Fill in i.rm.reg or i.rm.regmem field with register operand
8349          (if any) based on i.tm.extension_opcode.  Again, we must be
8350          careful to make sure that segment/control/debug/test/MMX
8351          registers are coded into the i.rm.reg field.  */
8352       else if (i.reg_operands)
8353         {
8354           unsigned int op;
8355           unsigned int vex_reg = ~0;
8356
8357           for (op = 0; op < i.operands; op++)
8358             if (i.types[op].bitfield.class == Reg
8359                 || i.types[op].bitfield.class == RegBND
8360                 || i.types[op].bitfield.class == RegMask
8361                 || i.types[op].bitfield.class == SReg
8362                 || i.types[op].bitfield.class == RegCR
8363                 || i.types[op].bitfield.class == RegDR
8364                 || i.types[op].bitfield.class == RegTR
8365                 || i.types[op].bitfield.class == RegSIMD
8366                 || i.types[op].bitfield.class == RegMMX)
8367               break;
8368
8369           if (vex_3_sources)
8370             op = dest;
8371           else if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
8372             {
8373               /* For instructions with VexNDS, the register-only
8374                  source operand is encoded in VEX prefix. */
8375               gas_assert (mem != (unsigned int) ~0);
8376
8377               if (op > mem)
8378                 {
8379                   vex_reg = op++;
8380                   gas_assert (op < i.operands);
8381                 }
8382               else
8383                 {
8384                   /* Check register-only source operand when two source
8385                      operands are swapped.  */
8386                   if (!i.tm.operand_types[op].bitfield.baseindex
8387                       && i.tm.operand_types[op + 1].bitfield.baseindex)
8388                     {
8389                       vex_reg = op;
8390                       op += 2;
8391                       gas_assert (mem == (vex_reg + 1)
8392                                   && op < i.operands);
8393                     }
8394                   else
8395                     {
8396                       vex_reg = op + 1;
8397                       gas_assert (vex_reg < i.operands);
8398                     }
8399                 }
8400             }
8401           else if (i.tm.opcode_modifier.vexvvvv == VEXNDD)
8402             {
8403               /* For instructions with VexNDD, the register destination
8404                  is encoded in VEX prefix.  */
8405               if (i.mem_operands == 0)
8406                 {
8407                   /* There is no memory operand.  */
8408                   gas_assert ((op + 2) == i.operands);
8409                   vex_reg = op + 1;
8410                 }
8411               else
8412                 {
8413                   /* There are only 2 non-immediate operands.  */
8414                   gas_assert (op < i.imm_operands + 2
8415                               && i.operands == i.imm_operands + 2);
8416                   vex_reg = i.imm_operands + 1;
8417                 }
8418             }
8419           else
8420             gas_assert (op < i.operands);
8421
8422           if (vex_reg != (unsigned int) ~0)
8423             {
8424               i386_operand_type *type = &i.tm.operand_types[vex_reg];
8425
8426               if ((type->bitfield.class != Reg
8427                    || (!type->bitfield.dword && !type->bitfield.qword))
8428                   && type->bitfield.class != RegSIMD
8429                   && !operand_type_equal (type, &regmask))
8430                 abort ();
8431
8432               i.vex.register_specifier = i.op[vex_reg].regs;
8433             }
8434
8435           /* Don't set OP operand twice.  */
8436           if (vex_reg != op)
8437             {
8438               /* If there is an extension opcode to put here, the
8439                  register number must be put into the regmem field.  */
8440               if (i.tm.extension_opcode != None)
8441                 {
8442                   i.rm.regmem = i.op[op].regs->reg_num;
8443                   set_rex_vrex (i.op[op].regs, REX_B,
8444                                 i.tm.opcode_modifier.sse2avx);
8445                 }
8446               else
8447                 {
8448                   i.rm.reg = i.op[op].regs->reg_num;
8449                   set_rex_vrex (i.op[op].regs, REX_R,
8450                                 i.tm.opcode_modifier.sse2avx);
8451                 }
8452             }
8453
8454           /* Now, if no memory operand has set i.rm.mode = 0, 1, 2 we
8455              must set it to 3 to indicate this is a register operand
8456              in the regmem field.  */
8457           if (!i.mem_operands)
8458             i.rm.mode = 3;
8459         }
8460
8461       /* Fill in i.rm.reg field with extension opcode (if any).  */
8462       if (i.tm.extension_opcode != None)
8463         i.rm.reg = i.tm.extension_opcode;
8464     }
8465   return default_seg;
8466 }
8467
8468 static INLINE void
8469 frag_opcode_byte (unsigned char byte)
8470 {
8471   if (now_seg != absolute_section)
8472     FRAG_APPEND_1_CHAR (byte);
8473   else
8474     ++abs_section_offset;
8475 }
8476
8477 static unsigned int
8478 flip_code16 (unsigned int code16)
8479 {
8480   gas_assert (i.tm.operands == 1);
8481
8482   return !(i.prefix[REX_PREFIX] & REX_W)
8483          && (code16 ? i.tm.operand_types[0].bitfield.disp32
8484                       || i.tm.operand_types[0].bitfield.disp32s
8485                     : i.tm.operand_types[0].bitfield.disp16)
8486          ? CODE16 : 0;
8487 }
8488
8489 static void
8490 output_branch (void)
8491 {
8492   char *p;
8493   int size;
8494   int code16;
8495   int prefix;
8496   relax_substateT subtype;
8497   symbolS *sym;
8498   offsetT off;
8499
8500   if (now_seg == absolute_section)
8501     {
8502       as_bad (_("relaxable branches not supported in absolute section"));
8503       return;
8504     }
8505
8506   code16 = flag_code == CODE_16BIT ? CODE16 : 0;
8507   size = i.disp_encoding == disp_encoding_32bit ? BIG : SMALL;
8508
8509   prefix = 0;
8510   if (i.prefix[DATA_PREFIX] != 0)
8511     {
8512       prefix = 1;
8513       i.prefixes -= 1;
8514       code16 ^= flip_code16(code16);
8515     }
8516   /* Pentium4 branch hints.  */
8517   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8518       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8519     {
8520       prefix++;
8521       i.prefixes--;
8522     }
8523   if (i.prefix[REX_PREFIX] != 0)
8524     {
8525       prefix++;
8526       i.prefixes--;
8527     }
8528
8529   /* BND prefixed jump.  */
8530   if (i.prefix[BND_PREFIX] != 0)
8531     {
8532       prefix++;
8533       i.prefixes--;
8534     }
8535
8536   if (i.prefixes != 0)
8537     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8538
8539   /* It's always a symbol;  End frag & setup for relax.
8540      Make sure there is enough room in this frag for the largest
8541      instruction we may generate in md_convert_frag.  This is 2
8542      bytes for the opcode and room for the prefix and largest
8543      displacement.  */
8544   frag_grow (prefix + 2 + 4);
8545   /* Prefix and 1 opcode byte go in fr_fix.  */
8546   p = frag_more (prefix + 1);
8547   if (i.prefix[DATA_PREFIX] != 0)
8548     *p++ = DATA_PREFIX_OPCODE;
8549   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE
8550       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE)
8551     *p++ = i.prefix[SEG_PREFIX];
8552   if (i.prefix[BND_PREFIX] != 0)
8553     *p++ = BND_PREFIX_OPCODE;
8554   if (i.prefix[REX_PREFIX] != 0)
8555     *p++ = i.prefix[REX_PREFIX];
8556   *p = i.tm.base_opcode;
8557
8558   if ((unsigned char) *p == JUMP_PC_RELATIVE)
8559     subtype = ENCODE_RELAX_STATE (UNCOND_JUMP, size);
8560   else if (cpu_arch_flags.bitfield.cpui386)
8561     subtype = ENCODE_RELAX_STATE (COND_JUMP, size);
8562   else
8563     subtype = ENCODE_RELAX_STATE (COND_JUMP86, size);
8564   subtype |= code16;
8565
8566   sym = i.op[0].disps->X_add_symbol;
8567   off = i.op[0].disps->X_add_number;
8568
8569   if (i.op[0].disps->X_op != O_constant
8570       && i.op[0].disps->X_op != O_symbol)
8571     {
8572       /* Handle complex expressions.  */
8573       sym = make_expr_symbol (i.op[0].disps);
8574       off = 0;
8575     }
8576
8577   /* 1 possible extra opcode + 4 byte displacement go in var part.
8578      Pass reloc in fr_var.  */
8579   frag_var (rs_machine_dependent, 5, i.reloc[0], subtype, sym, off, p);
8580 }
8581
8582 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8583 /* Return TRUE iff PLT32 relocation should be used for branching to
8584    symbol S.  */
8585
8586 static bfd_boolean
8587 need_plt32_p (symbolS *s)
8588 {
8589   /* PLT32 relocation is ELF only.  */
8590   if (!IS_ELF)
8591     return FALSE;
8592
8593 #ifdef TE_SOLARIS
8594   /* Don't emit PLT32 relocation on Solaris: neither native linker nor
8595      krtld support it.  */
8596   return FALSE;
8597 #endif
8598
8599   /* Since there is no need to prepare for PLT branch on x86-64, we
8600      can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
8601      be used as a marker for 32-bit PC-relative branches.  */
8602   if (!object_64bit)
8603     return FALSE;
8604
8605   /* Weak or undefined symbol need PLT32 relocation.  */
8606   if (S_IS_WEAK (s) || !S_IS_DEFINED (s))
8607     return TRUE;
8608
8609   /* Non-global symbol doesn't need PLT32 relocation.  */
8610   if (! S_IS_EXTERNAL (s))
8611     return FALSE;
8612
8613   /* Other global symbols need PLT32 relocation.  NB: Symbol with
8614      non-default visibilities are treated as normal global symbol
8615      so that PLT32 relocation can be used as a marker for 32-bit
8616      PC-relative branches.  It is useful for linker relaxation.  */
8617   return TRUE;
8618 }
8619 #endif
8620
8621 static void
8622 output_jump (void)
8623 {
8624   char *p;
8625   int size;
8626   fixS *fixP;
8627   bfd_reloc_code_real_type jump_reloc = i.reloc[0];
8628
8629   if (i.tm.opcode_modifier.jump == JUMP_BYTE)
8630     {
8631       /* This is a loop or jecxz type instruction.  */
8632       size = 1;
8633       if (i.prefix[ADDR_PREFIX] != 0)
8634         {
8635           frag_opcode_byte (ADDR_PREFIX_OPCODE);
8636           i.prefixes -= 1;
8637         }
8638       /* Pentium4 branch hints.  */
8639       if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8640           || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8641         {
8642           frag_opcode_byte (i.prefix[SEG_PREFIX]);
8643           i.prefixes--;
8644         }
8645     }
8646   else
8647     {
8648       int code16;
8649
8650       code16 = 0;
8651       if (flag_code == CODE_16BIT)
8652         code16 = CODE16;
8653
8654       if (i.prefix[DATA_PREFIX] != 0)
8655         {
8656           frag_opcode_byte (DATA_PREFIX_OPCODE);
8657           i.prefixes -= 1;
8658           code16 ^= flip_code16(code16);
8659         }
8660
8661       size = 4;
8662       if (code16)
8663         size = 2;
8664     }
8665
8666   /* BND prefixed jump.  */
8667   if (i.prefix[BND_PREFIX] != 0)
8668     {
8669       frag_opcode_byte (i.prefix[BND_PREFIX]);
8670       i.prefixes -= 1;
8671     }
8672
8673   if (i.prefix[REX_PREFIX] != 0)
8674     {
8675       frag_opcode_byte (i.prefix[REX_PREFIX]);
8676       i.prefixes -= 1;
8677     }
8678
8679   if (i.prefixes != 0)
8680     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8681
8682   if (now_seg == absolute_section)
8683     {
8684       abs_section_offset += i.tm.opcode_length + size;
8685       return;
8686     }
8687
8688   p = frag_more (i.tm.opcode_length + size);
8689   switch (i.tm.opcode_length)
8690     {
8691     case 2:
8692       *p++ = i.tm.base_opcode >> 8;
8693       /* Fall through.  */
8694     case 1:
8695       *p++ = i.tm.base_opcode;
8696       break;
8697     default:
8698       abort ();
8699     }
8700
8701 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8702   if (size == 4
8703       && jump_reloc == NO_RELOC
8704       && need_plt32_p (i.op[0].disps->X_add_symbol))
8705     jump_reloc = BFD_RELOC_X86_64_PLT32;
8706 #endif
8707
8708   jump_reloc = reloc (size, 1, 1, jump_reloc);
8709
8710   fixP = fix_new_exp (frag_now, p - frag_now->fr_literal, size,
8711                       i.op[0].disps, 1, jump_reloc);
8712
8713   /* All jumps handled here are signed, but don't use a signed limit
8714      check for 32 and 16 bit jumps as we want to allow wrap around at
8715      4G and 64k respectively.  */
8716   if (size == 1)
8717     fixP->fx_signed = 1;
8718 }
8719
8720 static void
8721 output_interseg_jump (void)
8722 {
8723   char *p;
8724   int size;
8725   int prefix;
8726   int code16;
8727
8728   code16 = 0;
8729   if (flag_code == CODE_16BIT)
8730     code16 = CODE16;
8731
8732   prefix = 0;
8733   if (i.prefix[DATA_PREFIX] != 0)
8734     {
8735       prefix = 1;
8736       i.prefixes -= 1;
8737       code16 ^= CODE16;
8738     }
8739
8740   gas_assert (!i.prefix[REX_PREFIX]);
8741
8742   size = 4;
8743   if (code16)
8744     size = 2;
8745
8746   if (i.prefixes != 0)
8747     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8748
8749   if (now_seg == absolute_section)
8750     {
8751       abs_section_offset += prefix + 1 + 2 + size;
8752       return;
8753     }
8754
8755   /* 1 opcode; 2 segment; offset  */
8756   p = frag_more (prefix + 1 + 2 + size);
8757
8758   if (i.prefix[DATA_PREFIX] != 0)
8759     *p++ = DATA_PREFIX_OPCODE;
8760
8761   if (i.prefix[REX_PREFIX] != 0)
8762     *p++ = i.prefix[REX_PREFIX];
8763
8764   *p++ = i.tm.base_opcode;
8765   if (i.op[1].imms->X_op == O_constant)
8766     {
8767       offsetT n = i.op[1].imms->X_add_number;
8768
8769       if (size == 2
8770           && !fits_in_unsigned_word (n)
8771           && !fits_in_signed_word (n))
8772         {
8773           as_bad (_("16-bit jump out of range"));
8774           return;
8775         }
8776       md_number_to_chars (p, n, size);
8777     }
8778   else
8779     fix_new_exp (frag_now, p - frag_now->fr_literal, size,
8780                  i.op[1].imms, 0, reloc (size, 0, 0, i.reloc[1]));
8781
8782   p += size;
8783   if (i.op[0].imms->X_op == O_constant)
8784     md_number_to_chars (p, (valueT) i.op[0].imms->X_add_number, 2);
8785   else
8786     fix_new_exp (frag_now, p - frag_now->fr_literal, 2,
8787                  i.op[0].imms, 0, reloc (2, 0, 0, i.reloc[0]));
8788 }
8789
8790 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8791 void
8792 x86_cleanup (void)
8793 {
8794   char *p;
8795   asection *seg = now_seg;
8796   subsegT subseg = now_subseg;
8797   asection *sec;
8798   unsigned int alignment, align_size_1;
8799   unsigned int isa_1_descsz, feature_2_descsz, descsz;
8800   unsigned int isa_1_descsz_raw, feature_2_descsz_raw;
8801   unsigned int padding;
8802
8803   if (!IS_ELF || !x86_used_note)
8804     return;
8805
8806   x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86;
8807
8808   /* The .note.gnu.property section layout:
8809
8810      Field      Length          Contents
8811      ----       ----            ----
8812      n_namsz    4               4
8813      n_descsz   4               The note descriptor size
8814      n_type     4               NT_GNU_PROPERTY_TYPE_0
8815      n_name     4               "GNU"
8816      n_desc     n_descsz        The program property array
8817      ....       ....            ....
8818    */
8819
8820   /* Create the .note.gnu.property section.  */
8821   sec = subseg_new (NOTE_GNU_PROPERTY_SECTION_NAME, 0);
8822   bfd_set_section_flags (sec,
8823                          (SEC_ALLOC
8824                           | SEC_LOAD
8825                           | SEC_DATA
8826                           | SEC_HAS_CONTENTS
8827                           | SEC_READONLY));
8828
8829   if (get_elf_backend_data (stdoutput)->s->elfclass == ELFCLASS64)
8830     {
8831       align_size_1 = 7;
8832       alignment = 3;
8833     }
8834   else
8835     {
8836       align_size_1 = 3;
8837       alignment = 2;
8838     }
8839
8840   bfd_set_section_alignment (sec, alignment);
8841   elf_section_type (sec) = SHT_NOTE;
8842
8843   /* GNU_PROPERTY_X86_ISA_1_USED: 4-byte type + 4-byte data size
8844                                   + 4-byte data  */
8845   isa_1_descsz_raw = 4 + 4 + 4;
8846   /* Align GNU_PROPERTY_X86_ISA_1_USED.  */
8847   isa_1_descsz = (isa_1_descsz_raw + align_size_1) & ~align_size_1;
8848
8849   feature_2_descsz_raw = isa_1_descsz;
8850   /* GNU_PROPERTY_X86_FEATURE_2_USED: 4-byte type + 4-byte data size
8851                                       + 4-byte data  */
8852   feature_2_descsz_raw += 4 + 4 + 4;
8853   /* Align GNU_PROPERTY_X86_FEATURE_2_USED.  */
8854   feature_2_descsz = ((feature_2_descsz_raw + align_size_1)
8855                       & ~align_size_1);
8856
8857   descsz = feature_2_descsz;
8858   /* Section size: n_namsz + n_descsz + n_type + n_name + n_descsz.  */
8859   p = frag_more (4 + 4 + 4 + 4 + descsz);
8860
8861   /* Write n_namsz.  */
8862   md_number_to_chars (p, (valueT) 4, 4);
8863
8864   /* Write n_descsz.  */
8865   md_number_to_chars (p + 4, (valueT) descsz, 4);
8866
8867   /* Write n_type.  */
8868   md_number_to_chars (p + 4 * 2, (valueT) NT_GNU_PROPERTY_TYPE_0, 4);
8869
8870   /* Write n_name.  */
8871   memcpy (p + 4 * 3, "GNU", 4);
8872
8873   /* Write 4-byte type.  */
8874   md_number_to_chars (p + 4 * 4,
8875                       (valueT) GNU_PROPERTY_X86_ISA_1_USED, 4);
8876
8877   /* Write 4-byte data size.  */
8878   md_number_to_chars (p + 4 * 5, (valueT) 4, 4);
8879
8880   /* Write 4-byte data.  */
8881   md_number_to_chars (p + 4 * 6, (valueT) x86_isa_1_used, 4);
8882
8883   /* Zero out paddings.  */
8884   padding = isa_1_descsz - isa_1_descsz_raw;
8885   if (padding)
8886     memset (p + 4 * 7, 0, padding);
8887
8888   /* Write 4-byte type.  */
8889   md_number_to_chars (p + isa_1_descsz + 4 * 4,
8890                       (valueT) GNU_PROPERTY_X86_FEATURE_2_USED, 4);
8891
8892   /* Write 4-byte data size.  */
8893   md_number_to_chars (p + isa_1_descsz + 4 * 5, (valueT) 4, 4);
8894
8895   /* Write 4-byte data.  */
8896   md_number_to_chars (p + isa_1_descsz + 4 * 6,
8897                       (valueT) x86_feature_2_used, 4);
8898
8899   /* Zero out paddings.  */
8900   padding = feature_2_descsz - feature_2_descsz_raw;
8901   if (padding)
8902     memset (p + isa_1_descsz + 4 * 7, 0, padding);
8903
8904   /* We probably can't restore the current segment, for there likely
8905      isn't one yet...  */
8906   if (seg && subseg)
8907     subseg_set (seg, subseg);
8908 }
8909 #endif
8910
8911 static unsigned int
8912 encoding_length (const fragS *start_frag, offsetT start_off,
8913                  const char *frag_now_ptr)
8914 {
8915   unsigned int len = 0;
8916
8917   if (start_frag != frag_now)
8918     {
8919       const fragS *fr = start_frag;
8920
8921       do {
8922         len += fr->fr_fix;
8923         fr = fr->fr_next;
8924       } while (fr && fr != frag_now);
8925     }
8926
8927   return len - start_off + (frag_now_ptr - frag_now->fr_literal);
8928 }
8929
8930 /* Return 1 for test, and, cmp, add, sub, inc and dec which may
8931    be macro-fused with conditional jumps.
8932    NB: If TEST/AND/CMP/ADD/SUB/INC/DEC is of RIP relative address,
8933    or is one of the following format:
8934
8935     cmp m, imm
8936     add m, imm
8937     sub m, imm
8938    test m, imm
8939     and m, imm
8940     inc m
8941     dec m
8942
8943    it is unfusible.  */
8944
8945 static int
8946 maybe_fused_with_jcc_p (enum mf_cmp_kind* mf_cmp_p)
8947 {
8948   /* No RIP address.  */
8949   if (i.base_reg && i.base_reg->reg_num == RegIP)
8950     return 0;
8951
8952   /* No VEX/EVEX encoding.  */
8953   if (is_any_vex_encoding (&i.tm))
8954     return 0;
8955
8956   /* add, sub without add/sub m, imm.  */
8957   if (i.tm.base_opcode <= 5
8958       || (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d)
8959       || ((i.tm.base_opcode | 3) == 0x83
8960           && (i.tm.extension_opcode == 0x5
8961               || i.tm.extension_opcode == 0x0)))
8962     {
8963       *mf_cmp_p = mf_cmp_alu_cmp;
8964       return !(i.mem_operands && i.imm_operands);
8965     }
8966
8967   /* and without and m, imm.  */
8968   if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
8969       || ((i.tm.base_opcode | 3) == 0x83
8970           && i.tm.extension_opcode == 0x4))
8971     {
8972       *mf_cmp_p = mf_cmp_test_and;
8973       return !(i.mem_operands && i.imm_operands);
8974     }
8975
8976   /* test without test m imm.  */
8977   if ((i.tm.base_opcode | 1) == 0x85
8978       || (i.tm.base_opcode | 1) == 0xa9
8979       || ((i.tm.base_opcode | 1) == 0xf7
8980           && i.tm.extension_opcode == 0))
8981     {
8982       *mf_cmp_p = mf_cmp_test_and;
8983       return !(i.mem_operands && i.imm_operands);
8984     }
8985
8986   /* cmp without cmp m, imm.  */
8987   if ((i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
8988       || ((i.tm.base_opcode | 3) == 0x83
8989           && (i.tm.extension_opcode == 0x7)))
8990     {
8991       *mf_cmp_p = mf_cmp_alu_cmp;
8992       return !(i.mem_operands && i.imm_operands);
8993     }
8994
8995   /* inc, dec without inc/dec m.   */
8996   if ((i.tm.cpu_flags.bitfield.cpuno64
8997        && (i.tm.base_opcode | 0xf) == 0x4f)
8998       || ((i.tm.base_opcode | 1) == 0xff
8999           && i.tm.extension_opcode <= 0x1))
9000     {
9001       *mf_cmp_p = mf_cmp_incdec;
9002       return !i.mem_operands;
9003     }
9004
9005   return 0;
9006 }
9007
9008 /* Return 1 if a FUSED_JCC_PADDING frag should be generated.  */
9009
9010 static int
9011 add_fused_jcc_padding_frag_p (enum mf_cmp_kind* mf_cmp_p)
9012 {
9013   /* NB: Don't work with COND_JUMP86 without i386.  */
9014   if (!align_branch_power
9015       || now_seg == absolute_section
9016       || !cpu_arch_flags.bitfield.cpui386
9017       || !(align_branch & align_branch_fused_bit))
9018     return 0;
9019
9020   if (maybe_fused_with_jcc_p (mf_cmp_p))
9021     {
9022       if (last_insn.kind == last_insn_other
9023           || last_insn.seg != now_seg)
9024         return 1;
9025       if (flag_debug)
9026         as_warn_where (last_insn.file, last_insn.line,
9027                        _("`%s` skips -malign-branch-boundary on `%s`"),
9028                        last_insn.name, i.tm.name);
9029     }
9030
9031   return 0;
9032 }
9033
9034 /* Return 1 if a BRANCH_PREFIX frag should be generated.  */
9035
9036 static int
9037 add_branch_prefix_frag_p (void)
9038 {
9039   /* NB: Don't work with COND_JUMP86 without i386.  Don't add prefix
9040      to PadLock instructions since they include prefixes in opcode.  */
9041   if (!align_branch_power
9042       || !align_branch_prefix_size
9043       || now_seg == absolute_section
9044       || i.tm.cpu_flags.bitfield.cpupadlock
9045       || !cpu_arch_flags.bitfield.cpui386)
9046     return 0;
9047
9048   /* Don't add prefix if it is a prefix or there is no operand in case
9049      that segment prefix is special.  */
9050   if (!i.operands || i.tm.opcode_modifier.isprefix)
9051     return 0;
9052
9053   if (last_insn.kind == last_insn_other
9054       || last_insn.seg != now_seg)
9055     return 1;
9056
9057   if (flag_debug)
9058     as_warn_where (last_insn.file, last_insn.line,
9059                    _("`%s` skips -malign-branch-boundary on `%s`"),
9060                    last_insn.name, i.tm.name);
9061
9062   return 0;
9063 }
9064
9065 /* Return 1 if a BRANCH_PADDING frag should be generated.  */
9066
9067 static int
9068 add_branch_padding_frag_p (enum align_branch_kind *branch_p,
9069                            enum mf_jcc_kind *mf_jcc_p)
9070 {
9071   int add_padding;
9072
9073   /* NB: Don't work with COND_JUMP86 without i386.  */
9074   if (!align_branch_power
9075       || now_seg == absolute_section
9076       || !cpu_arch_flags.bitfield.cpui386)
9077     return 0;
9078
9079   add_padding = 0;
9080
9081   /* Check for jcc and direct jmp.  */
9082   if (i.tm.opcode_modifier.jump == JUMP)
9083     {
9084       if (i.tm.base_opcode == JUMP_PC_RELATIVE)
9085         {
9086           *branch_p = align_branch_jmp;
9087           add_padding = align_branch & align_branch_jmp_bit;
9088         }
9089       else
9090         {
9091           /* Because J<cc> and JN<cc> share same group in macro-fusible table,
9092              igore the lowest bit.  */
9093           *mf_jcc_p = (i.tm.base_opcode & 0x0e) >> 1;
9094           *branch_p = align_branch_jcc;
9095           if ((align_branch & align_branch_jcc_bit))
9096             add_padding = 1;
9097         }
9098     }
9099   else if (is_any_vex_encoding (&i.tm))
9100     return 0;
9101   else if ((i.tm.base_opcode | 1) == 0xc3)
9102     {
9103       /* Near ret.  */
9104       *branch_p = align_branch_ret;
9105       if ((align_branch & align_branch_ret_bit))
9106         add_padding = 1;
9107     }
9108   else
9109     {
9110       /* Check for indirect jmp, direct and indirect calls.  */
9111       if (i.tm.base_opcode == 0xe8)
9112         {
9113           /* Direct call.  */
9114           *branch_p = align_branch_call;
9115           if ((align_branch & align_branch_call_bit))
9116             add_padding = 1;
9117         }
9118       else if (i.tm.base_opcode == 0xff
9119                && (i.tm.extension_opcode == 2
9120                    || i.tm.extension_opcode == 4))
9121         {
9122           /* Indirect call and jmp.  */
9123           *branch_p = align_branch_indirect;
9124           if ((align_branch & align_branch_indirect_bit))
9125             add_padding = 1;
9126         }
9127
9128       if (add_padding
9129           && i.disp_operands
9130           && tls_get_addr
9131           && (i.op[0].disps->X_op == O_symbol
9132               || (i.op[0].disps->X_op == O_subtract
9133                   && i.op[0].disps->X_op_symbol == GOT_symbol)))
9134         {
9135           symbolS *s = i.op[0].disps->X_add_symbol;
9136           /* No padding to call to global or undefined tls_get_addr.  */
9137           if ((S_IS_EXTERNAL (s) || !S_IS_DEFINED (s))
9138               && strcmp (S_GET_NAME (s), tls_get_addr) == 0)
9139             return 0;
9140         }
9141     }
9142
9143   if (add_padding
9144       && last_insn.kind != last_insn_other
9145       && last_insn.seg == now_seg)
9146     {
9147       if (flag_debug)
9148         as_warn_where (last_insn.file, last_insn.line,
9149                        _("`%s` skips -malign-branch-boundary on `%s`"),
9150                        last_insn.name, i.tm.name);
9151       return 0;
9152     }
9153
9154   return add_padding;
9155 }
9156
9157 static void
9158 output_insn (void)
9159 {
9160   fragS *insn_start_frag;
9161   offsetT insn_start_off;
9162   fragS *fragP = NULL;
9163   enum align_branch_kind branch = align_branch_none;
9164   /* The initializer is arbitrary just to avoid uninitialized error.
9165      it's actually either assigned in add_branch_padding_frag_p
9166      or never be used.  */
9167   enum mf_jcc_kind mf_jcc = mf_jcc_jo;
9168
9169 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9170   if (IS_ELF && x86_used_note && now_seg != absolute_section)
9171     {
9172       if ((i.xstate & xstate_tmm) == xstate_tmm
9173           || i.tm.cpu_flags.bitfield.cpuamx_tile)
9174         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_TMM;
9175
9176       if (i.tm.cpu_flags.bitfield.cpusse3
9177           || i.tm.cpu_flags.bitfield.cpussse3
9178           || i.tm.cpu_flags.bitfield.cpusse4_1
9179           || i.tm.cpu_flags.bitfield.cpusse4_2
9180           || i.tm.cpu_flags.bitfield.cpucx16
9181           || i.tm.cpu_flags.bitfield.cpupopcnt
9182           /* LAHF-SAHF insns in 64-bit mode.  */
9183           || (flag_code == CODE_64BIT
9184               && (i.tm.base_opcode | 1) == 0x9f))
9185         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V2;
9186       if (i.tm.cpu_flags.bitfield.cpuavx
9187           || i.tm.cpu_flags.bitfield.cpuavx2
9188           /* Any VEX encoded insns execpt for CpuAVX512F, CpuAVX512BW,
9189              CpuAVX512DQ, LPW, TBM and AMX.  */
9190           || (i.tm.opcode_modifier.vex
9191               && !i.tm.cpu_flags.bitfield.cpuavx512f
9192               && !i.tm.cpu_flags.bitfield.cpuavx512bw
9193               && !i.tm.cpu_flags.bitfield.cpuavx512dq
9194               && !i.tm.cpu_flags.bitfield.cpulwp
9195               && !i.tm.cpu_flags.bitfield.cputbm
9196               && !(x86_feature_2_used & GNU_PROPERTY_X86_FEATURE_2_TMM))
9197           || i.tm.cpu_flags.bitfield.cpuf16c
9198           || i.tm.cpu_flags.bitfield.cpufma
9199           || i.tm.cpu_flags.bitfield.cpulzcnt
9200           || i.tm.cpu_flags.bitfield.cpumovbe
9201           || i.tm.cpu_flags.bitfield.cpuxsave
9202           || i.tm.cpu_flags.bitfield.cpuxsavec
9203           || i.tm.cpu_flags.bitfield.cpuxsaveopt
9204           || i.tm.cpu_flags.bitfield.cpuxsaves)
9205         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V3;
9206       if (i.tm.cpu_flags.bitfield.cpuavx512f
9207           || i.tm.cpu_flags.bitfield.cpuavx512bw
9208           || i.tm.cpu_flags.bitfield.cpuavx512dq
9209           || i.tm.cpu_flags.bitfield.cpuavx512vl
9210           /* Any EVEX encoded insns except for AVX512ER, AVX512PF and
9211              VNNIW.  */
9212           || (i.tm.opcode_modifier.evex
9213               && !i.tm.cpu_flags.bitfield.cpuavx512er
9214               && !i.tm.cpu_flags.bitfield.cpuavx512pf
9215               && !i.tm.cpu_flags.bitfield.cpuavx512_4vnniw))
9216         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V4;
9217
9218       if (i.tm.cpu_flags.bitfield.cpu8087
9219           || i.tm.cpu_flags.bitfield.cpu287
9220           || i.tm.cpu_flags.bitfield.cpu387
9221           || i.tm.cpu_flags.bitfield.cpu687
9222           || i.tm.cpu_flags.bitfield.cpufisttp)
9223         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87;
9224       if ((i.xstate & xstate_mmx)
9225           || i.tm.base_opcode == 0xf77 /* emms */
9226           || i.tm.base_opcode == 0xf0e /* femms */)
9227         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MMX;
9228       if (i.index_reg)
9229         {
9230           if (i.index_reg->reg_type.bitfield.zmmword)
9231             i.xstate |= xstate_zmm;
9232           else if (i.index_reg->reg_type.bitfield.ymmword)
9233             i.xstate |= xstate_ymm;
9234           else if (i.index_reg->reg_type.bitfield.xmmword)
9235             i.xstate |= xstate_xmm;
9236         }
9237       if ((i.xstate & xstate_xmm)
9238           || i.tm.cpu_flags.bitfield.cpuwidekl
9239           || i.tm.cpu_flags.bitfield.cpukl)
9240         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM;
9241       if ((i.xstate & xstate_ymm) == xstate_ymm)
9242         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_YMM;
9243       if ((i.xstate & xstate_zmm) == xstate_zmm)
9244         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_ZMM;
9245       if (i.mask || (i.xstate & xstate_mask) == xstate_mask)
9246         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MASK;
9247       if (i.tm.cpu_flags.bitfield.cpufxsr)
9248         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_FXSR;
9249       if (i.tm.cpu_flags.bitfield.cpuxsave)
9250         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVE;
9251       if (i.tm.cpu_flags.bitfield.cpuxsaveopt)
9252         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT;
9253       if (i.tm.cpu_flags.bitfield.cpuxsavec)
9254         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEC;
9255     }
9256 #endif
9257
9258   /* Tie dwarf2 debug info to the address at the start of the insn.
9259      We can't do this after the insn has been output as the current
9260      frag may have been closed off.  eg. by frag_var.  */
9261   dwarf2_emit_insn (0);
9262
9263   insn_start_frag = frag_now;
9264   insn_start_off = frag_now_fix ();
9265
9266   if (add_branch_padding_frag_p (&branch, &mf_jcc))
9267     {
9268       char *p;
9269       /* Branch can be 8 bytes.  Leave some room for prefixes.  */
9270       unsigned int max_branch_padding_size = 14;
9271
9272       /* Align section to boundary.  */
9273       record_alignment (now_seg, align_branch_power);
9274
9275       /* Make room for padding.  */
9276       frag_grow (max_branch_padding_size);
9277
9278       /* Start of the padding.  */
9279       p = frag_more (0);
9280
9281       fragP = frag_now;
9282
9283       frag_var (rs_machine_dependent, max_branch_padding_size, 0,
9284                 ENCODE_RELAX_STATE (BRANCH_PADDING, 0),
9285                 NULL, 0, p);
9286
9287       fragP->tc_frag_data.mf_type = mf_jcc;
9288       fragP->tc_frag_data.branch_type = branch;
9289       fragP->tc_frag_data.max_bytes = max_branch_padding_size;
9290     }
9291
9292   /* Output jumps.  */
9293   if (i.tm.opcode_modifier.jump == JUMP)
9294     output_branch ();
9295   else if (i.tm.opcode_modifier.jump == JUMP_BYTE
9296            || i.tm.opcode_modifier.jump == JUMP_DWORD)
9297     output_jump ();
9298   else if (i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT)
9299     output_interseg_jump ();
9300   else
9301     {
9302       /* Output normal instructions here.  */
9303       char *p;
9304       unsigned char *q;
9305       unsigned int j;
9306       unsigned int prefix;
9307       enum mf_cmp_kind mf_cmp;
9308
9309       if (avoid_fence
9310           && (i.tm.base_opcode == 0xfaee8
9311               || i.tm.base_opcode == 0xfaef0
9312               || i.tm.base_opcode == 0xfaef8))
9313         {
9314           /* Encode lfence, mfence, and sfence as
9315              f0 83 04 24 00   lock addl $0x0, (%{re}sp).  */
9316           if (now_seg != absolute_section)
9317             {
9318               offsetT val = 0x240483f0ULL;
9319
9320               p = frag_more (5);
9321               md_number_to_chars (p, val, 5);
9322             }
9323           else
9324             abs_section_offset += 5;
9325           return;
9326         }
9327
9328       /* Some processors fail on LOCK prefix. This options makes
9329          assembler ignore LOCK prefix and serves as a workaround.  */
9330       if (omit_lock_prefix)
9331         {
9332           if (i.tm.base_opcode == LOCK_PREFIX_OPCODE)
9333             return;
9334           i.prefix[LOCK_PREFIX] = 0;
9335         }
9336
9337       if (branch)
9338         /* Skip if this is a branch.  */
9339         ;
9340       else if (add_fused_jcc_padding_frag_p (&mf_cmp))
9341         {
9342           /* Make room for padding.  */
9343           frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
9344           p = frag_more (0);
9345
9346           fragP = frag_now;
9347
9348           frag_var (rs_machine_dependent, MAX_FUSED_JCC_PADDING_SIZE, 0,
9349                     ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0),
9350                     NULL, 0, p);
9351
9352           fragP->tc_frag_data.mf_type = mf_cmp;
9353           fragP->tc_frag_data.branch_type = align_branch_fused;
9354           fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
9355         }
9356       else if (add_branch_prefix_frag_p ())
9357         {
9358           unsigned int max_prefix_size = align_branch_prefix_size;
9359
9360           /* Make room for padding.  */
9361           frag_grow (max_prefix_size);
9362           p = frag_more (0);
9363
9364           fragP = frag_now;
9365
9366           frag_var (rs_machine_dependent, max_prefix_size, 0,
9367                     ENCODE_RELAX_STATE (BRANCH_PREFIX, 0),
9368                     NULL, 0, p);
9369
9370           fragP->tc_frag_data.max_bytes = max_prefix_size;
9371         }
9372
9373       /* Since the VEX/EVEX prefix contains the implicit prefix, we
9374          don't need the explicit prefix.  */
9375       if (!i.tm.opcode_modifier.vex && !i.tm.opcode_modifier.evex)
9376         {
9377           switch (i.tm.opcode_length)
9378             {
9379             case 3:
9380               if (i.tm.base_opcode & 0xff000000)
9381                 {
9382                   prefix = (i.tm.base_opcode >> 24) & 0xff;
9383                   if (!i.tm.cpu_flags.bitfield.cpupadlock
9384                       || prefix != REPE_PREFIX_OPCODE
9385                       || (i.prefix[REP_PREFIX] != REPE_PREFIX_OPCODE))
9386                     add_prefix (prefix);
9387                 }
9388               break;
9389             case 2:
9390               if ((i.tm.base_opcode & 0xff0000) != 0)
9391                 {
9392                   prefix = (i.tm.base_opcode >> 16) & 0xff;
9393                   add_prefix (prefix);
9394                 }
9395               break;
9396             case 1:
9397               break;
9398             case 0:
9399               /* Check for pseudo prefixes.  */
9400               as_bad_where (insn_start_frag->fr_file,
9401                             insn_start_frag->fr_line,
9402                              _("pseudo prefix without instruction"));
9403               return;
9404             default:
9405               abort ();
9406             }
9407
9408 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
9409           /* For x32, add a dummy REX_OPCODE prefix for mov/add with
9410              R_X86_64_GOTTPOFF relocation so that linker can safely
9411              perform IE->LE optimization.  A dummy REX_OPCODE prefix
9412              is also needed for lea with R_X86_64_GOTPC32_TLSDESC
9413              relocation for GDesc -> IE/LE optimization.  */
9414           if (x86_elf_abi == X86_64_X32_ABI
9415               && i.operands == 2
9416               && (i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
9417                   || i.reloc[0] == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
9418               && i.prefix[REX_PREFIX] == 0)
9419             add_prefix (REX_OPCODE);
9420 #endif
9421
9422           /* The prefix bytes.  */
9423           for (j = ARRAY_SIZE (i.prefix), q = i.prefix; j > 0; j--, q++)
9424             if (*q)
9425               frag_opcode_byte (*q);
9426         }
9427       else
9428         {
9429           for (j = 0, q = i.prefix; j < ARRAY_SIZE (i.prefix); j++, q++)
9430             if (*q)
9431               switch (j)
9432                 {
9433                 case SEG_PREFIX:
9434                 case ADDR_PREFIX:
9435                   frag_opcode_byte (*q);
9436                   break;
9437                 default:
9438                   /* There should be no other prefixes for instructions
9439                      with VEX prefix.  */
9440                   abort ();
9441                 }
9442
9443           /* For EVEX instructions i.vrex should become 0 after
9444              build_evex_prefix.  For VEX instructions upper 16 registers
9445              aren't available, so VREX should be 0.  */
9446           if (i.vrex)
9447             abort ();
9448           /* Now the VEX prefix.  */
9449           if (now_seg != absolute_section)
9450             {
9451               p = frag_more (i.vex.length);
9452               for (j = 0; j < i.vex.length; j++)
9453                 p[j] = i.vex.bytes[j];
9454             }
9455           else
9456             abs_section_offset += i.vex.length;
9457         }
9458
9459       /* Now the opcode; be careful about word order here!  */
9460       if (now_seg == absolute_section)
9461         abs_section_offset += i.tm.opcode_length;
9462       else if (i.tm.opcode_length == 1)
9463         {
9464           FRAG_APPEND_1_CHAR (i.tm.base_opcode);
9465         }
9466       else
9467         {
9468           switch (i.tm.opcode_length)
9469             {
9470             case 4:
9471               p = frag_more (4);
9472               *p++ = (i.tm.base_opcode >> 24) & 0xff;
9473               *p++ = (i.tm.base_opcode >> 16) & 0xff;
9474               break;
9475             case 3:
9476               p = frag_more (3);
9477               *p++ = (i.tm.base_opcode >> 16) & 0xff;
9478               break;
9479             case 2:
9480               p = frag_more (2);
9481               break;
9482             default:
9483               abort ();
9484               break;
9485             }
9486
9487           /* Put out high byte first: can't use md_number_to_chars!  */
9488           *p++ = (i.tm.base_opcode >> 8) & 0xff;
9489           *p = i.tm.base_opcode & 0xff;
9490         }
9491
9492       /* Now the modrm byte and sib byte (if present).  */
9493       if (i.tm.opcode_modifier.modrm)
9494         {
9495           frag_opcode_byte ((i.rm.regmem << 0)
9496                              | (i.rm.reg << 3)
9497                              | (i.rm.mode << 6));
9498           /* If i.rm.regmem == ESP (4)
9499              && i.rm.mode != (Register mode)
9500              && not 16 bit
9501              ==> need second modrm byte.  */
9502           if (i.rm.regmem == ESCAPE_TO_TWO_BYTE_ADDRESSING
9503               && i.rm.mode != 3
9504               && !(i.base_reg && i.base_reg->reg_type.bitfield.word))
9505             frag_opcode_byte ((i.sib.base << 0)
9506                               | (i.sib.index << 3)
9507                               | (i.sib.scale << 6));
9508         }
9509
9510       if (i.disp_operands)
9511         output_disp (insn_start_frag, insn_start_off);
9512
9513       if (i.imm_operands)
9514         output_imm (insn_start_frag, insn_start_off);
9515
9516       /*
9517        * frag_now_fix () returning plain abs_section_offset when we're in the
9518        * absolute section, and abs_section_offset not getting updated as data
9519        * gets added to the frag breaks the logic below.
9520        */
9521       if (now_seg != absolute_section)
9522         {
9523           j = encoding_length (insn_start_frag, insn_start_off, frag_more (0));
9524           if (j > 15)
9525             as_warn (_("instruction length of %u bytes exceeds the limit of 15"),
9526                      j);
9527           else if (fragP)
9528             {
9529               /* NB: Don't add prefix with GOTPC relocation since
9530                  output_disp() above depends on the fixed encoding
9531                  length.  Can't add prefix with TLS relocation since
9532                  it breaks TLS linker optimization.  */
9533               unsigned int max = i.has_gotpc_tls_reloc ? 0 : 15 - j;
9534               /* Prefix count on the current instruction.  */
9535               unsigned int count = i.vex.length;
9536               unsigned int k;
9537               for (k = 0; k < ARRAY_SIZE (i.prefix); k++)
9538                 /* REX byte is encoded in VEX/EVEX prefix.  */
9539                 if (i.prefix[k] && (k != REX_PREFIX || !i.vex.length))
9540                   count++;
9541
9542               /* Count prefixes for extended opcode maps.  */
9543               if (!i.vex.length)
9544                 switch (i.tm.opcode_length)
9545                   {
9546                   case 3:
9547                     if (((i.tm.base_opcode >> 16) & 0xff) == 0xf)
9548                       {
9549                         count++;
9550                         switch ((i.tm.base_opcode >> 8) & 0xff)
9551                           {
9552                           case 0x38:
9553                           case 0x3a:
9554                             count++;
9555                             break;
9556                           default:
9557                             break;
9558                           }
9559                       }
9560                     break;
9561                   case 2:
9562                     if (((i.tm.base_opcode >> 8) & 0xff) == 0xf)
9563                       count++;
9564                     break;
9565                   case 1:
9566                     break;
9567                   default:
9568                     abort ();
9569                   }
9570
9571               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
9572                   == BRANCH_PREFIX)
9573                 {
9574                   /* Set the maximum prefix size in BRANCH_PREFIX
9575                      frag.  */
9576                   if (fragP->tc_frag_data.max_bytes > max)
9577                     fragP->tc_frag_data.max_bytes = max;
9578                   if (fragP->tc_frag_data.max_bytes > count)
9579                     fragP->tc_frag_data.max_bytes -= count;
9580                   else
9581                     fragP->tc_frag_data.max_bytes = 0;
9582                 }
9583               else
9584                 {
9585                   /* Remember the maximum prefix size in FUSED_JCC_PADDING
9586                      frag.  */
9587                   unsigned int max_prefix_size;
9588                   if (align_branch_prefix_size > max)
9589                     max_prefix_size = max;
9590                   else
9591                     max_prefix_size = align_branch_prefix_size;
9592                   if (max_prefix_size > count)
9593                     fragP->tc_frag_data.max_prefix_length
9594                       = max_prefix_size - count;
9595                 }
9596
9597               /* Use existing segment prefix if possible.  Use CS
9598                  segment prefix in 64-bit mode.  In 32-bit mode, use SS
9599                  segment prefix with ESP/EBP base register and use DS
9600                  segment prefix without ESP/EBP base register.  */
9601               if (i.prefix[SEG_PREFIX])
9602                 fragP->tc_frag_data.default_prefix = i.prefix[SEG_PREFIX];
9603               else if (flag_code == CODE_64BIT)
9604                 fragP->tc_frag_data.default_prefix = CS_PREFIX_OPCODE;
9605               else if (i.base_reg
9606                        && (i.base_reg->reg_num == 4
9607                            || i.base_reg->reg_num == 5))
9608                 fragP->tc_frag_data.default_prefix = SS_PREFIX_OPCODE;
9609               else
9610                 fragP->tc_frag_data.default_prefix = DS_PREFIX_OPCODE;
9611             }
9612         }
9613     }
9614
9615   /* NB: Don't work with COND_JUMP86 without i386.  */
9616   if (align_branch_power
9617       && now_seg != absolute_section
9618       && cpu_arch_flags.bitfield.cpui386)
9619     {
9620       /* Terminate each frag so that we can add prefix and check for
9621          fused jcc.  */
9622       frag_wane (frag_now);
9623       frag_new (0);
9624     }
9625
9626 #ifdef DEBUG386
9627   if (flag_debug)
9628     {
9629       pi ("" /*line*/, &i);
9630     }
9631 #endif /* DEBUG386  */
9632 }
9633
9634 /* Return the size of the displacement operand N.  */
9635
9636 static int
9637 disp_size (unsigned int n)
9638 {
9639   int size = 4;
9640
9641   if (i.types[n].bitfield.disp64)
9642     size = 8;
9643   else if (i.types[n].bitfield.disp8)
9644     size = 1;
9645   else if (i.types[n].bitfield.disp16)
9646     size = 2;
9647   return size;
9648 }
9649
9650 /* Return the size of the immediate operand N.  */
9651
9652 static int
9653 imm_size (unsigned int n)
9654 {
9655   int size = 4;
9656   if (i.types[n].bitfield.imm64)
9657     size = 8;
9658   else if (i.types[n].bitfield.imm8 || i.types[n].bitfield.imm8s)
9659     size = 1;
9660   else if (i.types[n].bitfield.imm16)
9661     size = 2;
9662   return size;
9663 }
9664
9665 static void
9666 output_disp (fragS *insn_start_frag, offsetT insn_start_off)
9667 {
9668   char *p;
9669   unsigned int n;
9670
9671   for (n = 0; n < i.operands; n++)
9672     {
9673       if (operand_type_check (i.types[n], disp))
9674         {
9675           int size = disp_size (n);
9676
9677           if (now_seg == absolute_section)
9678             abs_section_offset += size;
9679           else if (i.op[n].disps->X_op == O_constant)
9680             {
9681               offsetT val = i.op[n].disps->X_add_number;
9682
9683               val = offset_in_range (val >> (size == 1 ? i.memshift : 0),
9684                                      size);
9685               p = frag_more (size);
9686               md_number_to_chars (p, val, size);
9687             }
9688           else
9689             {
9690               enum bfd_reloc_code_real reloc_type;
9691               int sign = i.types[n].bitfield.disp32s;
9692               int pcrel = (i.flags[n] & Operand_PCrel) != 0;
9693               fixS *fixP;
9694
9695               /* We can't have 8 bit displacement here.  */
9696               gas_assert (!i.types[n].bitfield.disp8);
9697
9698               /* The PC relative address is computed relative
9699                  to the instruction boundary, so in case immediate
9700                  fields follows, we need to adjust the value.  */
9701               if (pcrel && i.imm_operands)
9702                 {
9703                   unsigned int n1;
9704                   int sz = 0;
9705
9706                   for (n1 = 0; n1 < i.operands; n1++)
9707                     if (operand_type_check (i.types[n1], imm))
9708                       {
9709                         /* Only one immediate is allowed for PC
9710                            relative address.  */
9711                         gas_assert (sz == 0);
9712                         sz = imm_size (n1);
9713                         i.op[n].disps->X_add_number -= sz;
9714                       }
9715                   /* We should find the immediate.  */
9716                   gas_assert (sz != 0);
9717                 }
9718
9719               p = frag_more (size);
9720               reloc_type = reloc (size, pcrel, sign, i.reloc[n]);
9721               if (GOT_symbol
9722                   && GOT_symbol == i.op[n].disps->X_add_symbol
9723                   && (((reloc_type == BFD_RELOC_32
9724                         || reloc_type == BFD_RELOC_X86_64_32S
9725                         || (reloc_type == BFD_RELOC_64
9726                             && object_64bit))
9727                        && (i.op[n].disps->X_op == O_symbol
9728                            || (i.op[n].disps->X_op == O_add
9729                                && ((symbol_get_value_expression
9730                                     (i.op[n].disps->X_op_symbol)->X_op)
9731                                    == O_subtract))))
9732                       || reloc_type == BFD_RELOC_32_PCREL))
9733                 {
9734                   if (!object_64bit)
9735                     {
9736                       reloc_type = BFD_RELOC_386_GOTPC;
9737                       i.has_gotpc_tls_reloc = TRUE;
9738                       i.op[n].imms->X_add_number +=
9739                         encoding_length (insn_start_frag, insn_start_off, p);
9740                     }
9741                   else if (reloc_type == BFD_RELOC_64)
9742                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
9743                   else
9744                     /* Don't do the adjustment for x86-64, as there
9745                        the pcrel addressing is relative to the _next_
9746                        insn, and that is taken care of in other code.  */
9747                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
9748                 }
9749               else if (align_branch_power)
9750                 {
9751                   switch (reloc_type)
9752                     {
9753                     case BFD_RELOC_386_TLS_GD:
9754                     case BFD_RELOC_386_TLS_LDM:
9755                     case BFD_RELOC_386_TLS_IE:
9756                     case BFD_RELOC_386_TLS_IE_32:
9757                     case BFD_RELOC_386_TLS_GOTIE:
9758                     case BFD_RELOC_386_TLS_GOTDESC:
9759                     case BFD_RELOC_386_TLS_DESC_CALL:
9760                     case BFD_RELOC_X86_64_TLSGD:
9761                     case BFD_RELOC_X86_64_TLSLD:
9762                     case BFD_RELOC_X86_64_GOTTPOFF:
9763                     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
9764                     case BFD_RELOC_X86_64_TLSDESC_CALL:
9765                       i.has_gotpc_tls_reloc = TRUE;
9766                     default:
9767                       break;
9768                     }
9769                 }
9770               fixP = fix_new_exp (frag_now, p - frag_now->fr_literal,
9771                                   size, i.op[n].disps, pcrel,
9772                                   reloc_type);
9773               /* Check for "call/jmp *mem", "mov mem, %reg",
9774                  "test %reg, mem" and "binop mem, %reg" where binop
9775                  is one of adc, add, and, cmp, or, sbb, sub, xor
9776                  instructions without data prefix.  Always generate
9777                  R_386_GOT32X for "sym*GOT" operand in 32-bit mode.  */
9778               if (i.prefix[DATA_PREFIX] == 0
9779                   && (generate_relax_relocations
9780                       || (!object_64bit
9781                           && i.rm.mode == 0
9782                           && i.rm.regmem == 5))
9783                   && (i.rm.mode == 2
9784                       || (i.rm.mode == 0 && i.rm.regmem == 5))
9785                   && !is_any_vex_encoding(&i.tm)
9786                   && ((i.operands == 1
9787                        && i.tm.base_opcode == 0xff
9788                        && (i.rm.reg == 2 || i.rm.reg == 4))
9789                       || (i.operands == 2
9790                           && (i.tm.base_opcode == 0x8b
9791                               || i.tm.base_opcode == 0x85
9792                               || (i.tm.base_opcode & ~0x38) == 0x03))))
9793                 {
9794                   if (object_64bit)
9795                     {
9796                       fixP->fx_tcbit = i.rex != 0;
9797                       if (i.base_reg
9798                           && (i.base_reg->reg_num == RegIP))
9799                       fixP->fx_tcbit2 = 1;
9800                     }
9801                   else
9802                     fixP->fx_tcbit2 = 1;
9803                 }
9804             }
9805         }
9806     }
9807 }
9808
9809 static void
9810 output_imm (fragS *insn_start_frag, offsetT insn_start_off)
9811 {
9812   char *p;
9813   unsigned int n;
9814
9815   for (n = 0; n < i.operands; n++)
9816     {
9817       /* Skip SAE/RC Imm operand in EVEX.  They are already handled.  */
9818       if (i.rounding && (int) n == i.rounding->operand)
9819         continue;
9820
9821       if (operand_type_check (i.types[n], imm))
9822         {
9823           int size = imm_size (n);
9824
9825           if (now_seg == absolute_section)
9826             abs_section_offset += size;
9827           else if (i.op[n].imms->X_op == O_constant)
9828             {
9829               offsetT val;
9830
9831               val = offset_in_range (i.op[n].imms->X_add_number,
9832                                      size);
9833               p = frag_more (size);
9834               md_number_to_chars (p, val, size);
9835             }
9836           else
9837             {
9838               /* Not absolute_section.
9839                  Need a 32-bit fixup (don't support 8bit
9840                  non-absolute imms).  Try to support other
9841                  sizes ...  */
9842               enum bfd_reloc_code_real reloc_type;
9843               int sign;
9844
9845               if (i.types[n].bitfield.imm32s
9846                   && (i.suffix == QWORD_MNEM_SUFFIX
9847                       || (!i.suffix && i.tm.opcode_modifier.no_lsuf)))
9848                 sign = 1;
9849               else
9850                 sign = 0;
9851
9852               p = frag_more (size);
9853               reloc_type = reloc (size, 0, sign, i.reloc[n]);
9854
9855               /*   This is tough to explain.  We end up with this one if we
9856                * have operands that look like
9857                * "_GLOBAL_OFFSET_TABLE_+[.-.L284]".  The goal here is to
9858                * obtain the absolute address of the GOT, and it is strongly
9859                * preferable from a performance point of view to avoid using
9860                * a runtime relocation for this.  The actual sequence of
9861                * instructions often look something like:
9862                *
9863                *        call    .L66
9864                * .L66:
9865                *        popl    %ebx
9866                *        addl    $_GLOBAL_OFFSET_TABLE_+[.-.L66],%ebx
9867                *
9868                *   The call and pop essentially return the absolute address
9869                * of the label .L66 and store it in %ebx.  The linker itself
9870                * will ultimately change the first operand of the addl so
9871                * that %ebx points to the GOT, but to keep things simple, the
9872                * .o file must have this operand set so that it generates not
9873                * the absolute address of .L66, but the absolute address of
9874                * itself.  This allows the linker itself simply treat a GOTPC
9875                * relocation as asking for a pcrel offset to the GOT to be
9876                * added in, and the addend of the relocation is stored in the
9877                * operand field for the instruction itself.
9878                *
9879                *   Our job here is to fix the operand so that it would add
9880                * the correct offset so that %ebx would point to itself.  The
9881                * thing that is tricky is that .-.L66 will point to the
9882                * beginning of the instruction, so we need to further modify
9883                * the operand so that it will point to itself.  There are
9884                * other cases where you have something like:
9885                *
9886                *        .long   $_GLOBAL_OFFSET_TABLE_+[.-.L66]
9887                *
9888                * and here no correction would be required.  Internally in
9889                * the assembler we treat operands of this form as not being
9890                * pcrel since the '.' is explicitly mentioned, and I wonder
9891                * whether it would simplify matters to do it this way.  Who
9892                * knows.  In earlier versions of the PIC patches, the
9893                * pcrel_adjust field was used to store the correction, but
9894                * since the expression is not pcrel, I felt it would be
9895                * confusing to do it this way.  */
9896
9897               if ((reloc_type == BFD_RELOC_32
9898                    || reloc_type == BFD_RELOC_X86_64_32S
9899                    || reloc_type == BFD_RELOC_64)
9900                   && GOT_symbol
9901                   && GOT_symbol == i.op[n].imms->X_add_symbol
9902                   && (i.op[n].imms->X_op == O_symbol
9903                       || (i.op[n].imms->X_op == O_add
9904                           && ((symbol_get_value_expression
9905                                (i.op[n].imms->X_op_symbol)->X_op)
9906                               == O_subtract))))
9907                 {
9908                   if (!object_64bit)
9909                     reloc_type = BFD_RELOC_386_GOTPC;
9910                   else if (size == 4)
9911                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
9912                   else if (size == 8)
9913                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
9914                   i.has_gotpc_tls_reloc = TRUE;
9915                   i.op[n].imms->X_add_number +=
9916                     encoding_length (insn_start_frag, insn_start_off, p);
9917                 }
9918               fix_new_exp (frag_now, p - frag_now->fr_literal, size,
9919                            i.op[n].imms, 0, reloc_type);
9920             }
9921         }
9922     }
9923 }
9924 \f
9925 /* x86_cons_fix_new is called via the expression parsing code when a
9926    reloc is needed.  We use this hook to get the correct .got reloc.  */
9927 static int cons_sign = -1;
9928
9929 void
9930 x86_cons_fix_new (fragS *frag, unsigned int off, unsigned int len,
9931                   expressionS *exp, bfd_reloc_code_real_type r)
9932 {
9933   r = reloc (len, 0, cons_sign, r);
9934
9935 #ifdef TE_PE
9936   if (exp->X_op == O_secrel)
9937     {
9938       exp->X_op = O_symbol;
9939       r = BFD_RELOC_32_SECREL;
9940     }
9941 #endif
9942
9943   fix_new_exp (frag, off, len, exp, 0, r);
9944 }
9945
9946 /* Export the ABI address size for use by TC_ADDRESS_BYTES for the
9947    purpose of the `.dc.a' internal pseudo-op.  */
9948
9949 int
9950 x86_address_bytes (void)
9951 {
9952   if ((stdoutput->arch_info->mach & bfd_mach_x64_32))
9953     return 4;
9954   return stdoutput->arch_info->bits_per_address / 8;
9955 }
9956
9957 #if !(defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) || defined (OBJ_MACH_O)) \
9958     || defined (LEX_AT)
9959 # define lex_got(reloc, adjust, types) NULL
9960 #else
9961 /* Parse operands of the form
9962    <symbol>@GOTOFF+<nnn>
9963    and similar .plt or .got references.
9964
9965    If we find one, set up the correct relocation in RELOC and copy the
9966    input string, minus the `@GOTOFF' into a malloc'd buffer for
9967    parsing by the calling routine.  Return this buffer, and if ADJUST
9968    is non-null set it to the length of the string we removed from the
9969    input line.  Otherwise return NULL.  */
9970 static char *
9971 lex_got (enum bfd_reloc_code_real *rel,
9972          int *adjust,
9973          i386_operand_type *types)
9974 {
9975   /* Some of the relocations depend on the size of what field is to
9976      be relocated.  But in our callers i386_immediate and i386_displacement
9977      we don't yet know the operand size (this will be set by insn
9978      matching).  Hence we record the word32 relocation here,
9979      and adjust the reloc according to the real size in reloc().  */
9980   static const struct {
9981     const char *str;
9982     int len;
9983     const enum bfd_reloc_code_real rel[2];
9984     const i386_operand_type types64;
9985   } gotrel[] = {
9986 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9987     { STRING_COMMA_LEN ("SIZE"),      { BFD_RELOC_SIZE32,
9988                                         BFD_RELOC_SIZE32 },
9989       OPERAND_TYPE_IMM32_64 },
9990 #endif
9991     { STRING_COMMA_LEN ("PLTOFF"),   { _dummy_first_bfd_reloc_code_real,
9992                                        BFD_RELOC_X86_64_PLTOFF64 },
9993       OPERAND_TYPE_IMM64 },
9994     { STRING_COMMA_LEN ("PLT"),      { BFD_RELOC_386_PLT32,
9995                                        BFD_RELOC_X86_64_PLT32    },
9996       OPERAND_TYPE_IMM32_32S_DISP32 },
9997     { STRING_COMMA_LEN ("GOTPLT"),   { _dummy_first_bfd_reloc_code_real,
9998                                        BFD_RELOC_X86_64_GOTPLT64 },
9999       OPERAND_TYPE_IMM64_DISP64 },
10000     { STRING_COMMA_LEN ("GOTOFF"),   { BFD_RELOC_386_GOTOFF,
10001                                        BFD_RELOC_X86_64_GOTOFF64 },
10002       OPERAND_TYPE_IMM64_DISP64 },
10003     { STRING_COMMA_LEN ("GOTPCREL"), { _dummy_first_bfd_reloc_code_real,
10004                                        BFD_RELOC_X86_64_GOTPCREL },
10005       OPERAND_TYPE_IMM32_32S_DISP32 },
10006     { STRING_COMMA_LEN ("TLSGD"),    { BFD_RELOC_386_TLS_GD,
10007                                        BFD_RELOC_X86_64_TLSGD    },
10008       OPERAND_TYPE_IMM32_32S_DISP32 },
10009     { STRING_COMMA_LEN ("TLSLDM"),   { BFD_RELOC_386_TLS_LDM,
10010                                        _dummy_first_bfd_reloc_code_real },
10011       OPERAND_TYPE_NONE },
10012     { STRING_COMMA_LEN ("TLSLD"),    { _dummy_first_bfd_reloc_code_real,
10013                                        BFD_RELOC_X86_64_TLSLD    },
10014       OPERAND_TYPE_IMM32_32S_DISP32 },
10015     { STRING_COMMA_LEN ("GOTTPOFF"), { BFD_RELOC_386_TLS_IE_32,
10016                                        BFD_RELOC_X86_64_GOTTPOFF },
10017       OPERAND_TYPE_IMM32_32S_DISP32 },
10018     { STRING_COMMA_LEN ("TPOFF"),    { BFD_RELOC_386_TLS_LE_32,
10019                                        BFD_RELOC_X86_64_TPOFF32  },
10020       OPERAND_TYPE_IMM32_32S_64_DISP32_64 },
10021     { STRING_COMMA_LEN ("NTPOFF"),   { BFD_RELOC_386_TLS_LE,
10022                                        _dummy_first_bfd_reloc_code_real },
10023       OPERAND_TYPE_NONE },
10024     { STRING_COMMA_LEN ("DTPOFF"),   { BFD_RELOC_386_TLS_LDO_32,
10025                                        BFD_RELOC_X86_64_DTPOFF32 },
10026       OPERAND_TYPE_IMM32_32S_64_DISP32_64 },
10027     { STRING_COMMA_LEN ("GOTNTPOFF"),{ BFD_RELOC_386_TLS_GOTIE,
10028                                        _dummy_first_bfd_reloc_code_real },
10029       OPERAND_TYPE_NONE },
10030     { STRING_COMMA_LEN ("INDNTPOFF"),{ BFD_RELOC_386_TLS_IE,
10031                                        _dummy_first_bfd_reloc_code_real },
10032       OPERAND_TYPE_NONE },
10033     { STRING_COMMA_LEN ("GOT"),      { BFD_RELOC_386_GOT32,
10034                                        BFD_RELOC_X86_64_GOT32    },
10035       OPERAND_TYPE_IMM32_32S_64_DISP32 },
10036     { STRING_COMMA_LEN ("TLSDESC"),  { BFD_RELOC_386_TLS_GOTDESC,
10037                                        BFD_RELOC_X86_64_GOTPC32_TLSDESC },
10038       OPERAND_TYPE_IMM32_32S_DISP32 },
10039     { STRING_COMMA_LEN ("TLSCALL"),  { BFD_RELOC_386_TLS_DESC_CALL,
10040                                        BFD_RELOC_X86_64_TLSDESC_CALL },
10041       OPERAND_TYPE_IMM32_32S_DISP32 },
10042   };
10043   char *cp;
10044   unsigned int j;
10045
10046 #if defined (OBJ_MAYBE_ELF)
10047   if (!IS_ELF)
10048     return NULL;
10049 #endif
10050
10051   for (cp = input_line_pointer; *cp != '@'; cp++)
10052     if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
10053       return NULL;
10054
10055   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
10056     {
10057       int len = gotrel[j].len;
10058       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
10059         {
10060           if (gotrel[j].rel[object_64bit] != 0)
10061             {
10062               int first, second;
10063               char *tmpbuf, *past_reloc;
10064
10065               *rel = gotrel[j].rel[object_64bit];
10066
10067               if (types)
10068                 {
10069                   if (flag_code != CODE_64BIT)
10070                     {
10071                       types->bitfield.imm32 = 1;
10072                       types->bitfield.disp32 = 1;
10073                     }
10074                   else
10075                     *types = gotrel[j].types64;
10076                 }
10077
10078               if (j != 0 && GOT_symbol == NULL)
10079                 GOT_symbol = symbol_find_or_make (GLOBAL_OFFSET_TABLE_NAME);
10080
10081               /* The length of the first part of our input line.  */
10082               first = cp - input_line_pointer;
10083
10084               /* The second part goes from after the reloc token until
10085                  (and including) an end_of_line char or comma.  */
10086               past_reloc = cp + 1 + len;
10087               cp = past_reloc;
10088               while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
10089                 ++cp;
10090               second = cp + 1 - past_reloc;
10091
10092               /* Allocate and copy string.  The trailing NUL shouldn't
10093                  be necessary, but be safe.  */
10094               tmpbuf = XNEWVEC (char, first + second + 2);
10095               memcpy (tmpbuf, input_line_pointer, first);
10096               if (second != 0 && *past_reloc != ' ')
10097                 /* Replace the relocation token with ' ', so that
10098                    errors like foo@GOTOFF1 will be detected.  */
10099                 tmpbuf[first++] = ' ';
10100               else
10101                 /* Increment length by 1 if the relocation token is
10102                    removed.  */
10103                 len++;
10104               if (adjust)
10105                 *adjust = len;
10106               memcpy (tmpbuf + first, past_reloc, second);
10107               tmpbuf[first + second] = '\0';
10108               return tmpbuf;
10109             }
10110
10111           as_bad (_("@%s reloc is not supported with %d-bit output format"),
10112                   gotrel[j].str, 1 << (5 + object_64bit));
10113           return NULL;
10114         }
10115     }
10116
10117   /* Might be a symbol version string.  Don't as_bad here.  */
10118   return NULL;
10119 }
10120 #endif
10121
10122 #ifdef TE_PE
10123 #ifdef lex_got
10124 #undef lex_got
10125 #endif
10126 /* Parse operands of the form
10127    <symbol>@SECREL32+<nnn>
10128
10129    If we find one, set up the correct relocation in RELOC and copy the
10130    input string, minus the `@SECREL32' into a malloc'd buffer for
10131    parsing by the calling routine.  Return this buffer, and if ADJUST
10132    is non-null set it to the length of the string we removed from the
10133    input line.  Otherwise return NULL.
10134
10135    This function is copied from the ELF version above adjusted for PE targets.  */
10136
10137 static char *
10138 lex_got (enum bfd_reloc_code_real *rel ATTRIBUTE_UNUSED,
10139          int *adjust ATTRIBUTE_UNUSED,
10140          i386_operand_type *types)
10141 {
10142   static const struct
10143   {
10144     const char *str;
10145     int len;
10146     const enum bfd_reloc_code_real rel[2];
10147     const i386_operand_type types64;
10148   }
10149   gotrel[] =
10150   {
10151     { STRING_COMMA_LEN ("SECREL32"),    { BFD_RELOC_32_SECREL,
10152                                           BFD_RELOC_32_SECREL },
10153       OPERAND_TYPE_IMM32_32S_64_DISP32_64 },
10154   };
10155
10156   char *cp;
10157   unsigned j;
10158
10159   for (cp = input_line_pointer; *cp != '@'; cp++)
10160     if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
10161       return NULL;
10162
10163   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
10164     {
10165       int len = gotrel[j].len;
10166
10167       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
10168         {
10169           if (gotrel[j].rel[object_64bit] != 0)
10170             {
10171               int first, second;
10172               char *tmpbuf, *past_reloc;
10173
10174               *rel = gotrel[j].rel[object_64bit];
10175               if (adjust)
10176                 *adjust = len;
10177
10178               if (types)
10179                 {
10180                   if (flag_code != CODE_64BIT)
10181                     {
10182                       types->bitfield.imm32 = 1;
10183                       types->bitfield.disp32 = 1;
10184                     }
10185                   else
10186                     *types = gotrel[j].types64;
10187                 }
10188
10189               /* The length of the first part of our input line.  */
10190               first = cp - input_line_pointer;
10191
10192               /* The second part goes from after the reloc token until
10193                  (and including) an end_of_line char or comma.  */
10194               past_reloc = cp + 1 + len;
10195               cp = past_reloc;
10196               while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
10197                 ++cp;
10198               second = cp + 1 - past_reloc;
10199
10200               /* Allocate and copy string.  The trailing NUL shouldn't
10201                  be necessary, but be safe.  */
10202               tmpbuf = XNEWVEC (char, first + second + 2);
10203               memcpy (tmpbuf, input_line_pointer, first);
10204               if (second != 0 && *past_reloc != ' ')
10205                 /* Replace the relocation token with ' ', so that
10206                    errors like foo@SECLREL321 will be detected.  */
10207                 tmpbuf[first++] = ' ';
10208               memcpy (tmpbuf + first, past_reloc, second);
10209               tmpbuf[first + second] = '\0';
10210               return tmpbuf;
10211             }
10212
10213           as_bad (_("@%s reloc is not supported with %d-bit output format"),
10214                   gotrel[j].str, 1 << (5 + object_64bit));
10215           return NULL;
10216         }
10217     }
10218
10219   /* Might be a symbol version string.  Don't as_bad here.  */
10220   return NULL;
10221 }
10222
10223 #endif /* TE_PE */
10224
10225 bfd_reloc_code_real_type
10226 x86_cons (expressionS *exp, int size)
10227 {
10228   bfd_reloc_code_real_type got_reloc = NO_RELOC;
10229
10230   intel_syntax = -intel_syntax;
10231
10232   exp->X_md = 0;
10233   if (size == 4 || (object_64bit && size == 8))
10234     {
10235       /* Handle @GOTOFF and the like in an expression.  */
10236       char *save;
10237       char *gotfree_input_line;
10238       int adjust = 0;
10239
10240       save = input_line_pointer;
10241       gotfree_input_line = lex_got (&got_reloc, &adjust, NULL);
10242       if (gotfree_input_line)
10243         input_line_pointer = gotfree_input_line;
10244
10245       expression (exp);
10246
10247       if (gotfree_input_line)
10248         {
10249           /* expression () has merrily parsed up to the end of line,
10250              or a comma - in the wrong buffer.  Transfer how far
10251              input_line_pointer has moved to the right buffer.  */
10252           input_line_pointer = (save
10253                                 + (input_line_pointer - gotfree_input_line)
10254                                 + adjust);
10255           free (gotfree_input_line);
10256           if (exp->X_op == O_constant
10257               || exp->X_op == O_absent
10258               || exp->X_op == O_illegal
10259               || exp->X_op == O_register
10260               || exp->X_op == O_big)
10261             {
10262               char c = *input_line_pointer;
10263               *input_line_pointer = 0;
10264               as_bad (_("missing or invalid expression `%s'"), save);
10265               *input_line_pointer = c;
10266             }
10267           else if ((got_reloc == BFD_RELOC_386_PLT32
10268                     || got_reloc == BFD_RELOC_X86_64_PLT32)
10269                    && exp->X_op != O_symbol)
10270             {
10271               char c = *input_line_pointer;
10272               *input_line_pointer = 0;
10273               as_bad (_("invalid PLT expression `%s'"), save);
10274               *input_line_pointer = c;
10275             }
10276         }
10277     }
10278   else
10279     expression (exp);
10280
10281   intel_syntax = -intel_syntax;
10282
10283   if (intel_syntax)
10284     i386_intel_simplify (exp);
10285
10286   return got_reloc;
10287 }
10288
10289 static void
10290 signed_cons (int size)
10291 {
10292   if (flag_code == CODE_64BIT)
10293     cons_sign = 1;
10294   cons (size);
10295   cons_sign = -1;
10296 }
10297
10298 #ifdef TE_PE
10299 static void
10300 pe_directive_secrel (int dummy ATTRIBUTE_UNUSED)
10301 {
10302   expressionS exp;
10303
10304   do
10305     {
10306       expression (&exp);
10307       if (exp.X_op == O_symbol)
10308         exp.X_op = O_secrel;
10309
10310       emit_expr (&exp, 4);
10311     }
10312   while (*input_line_pointer++ == ',');
10313
10314   input_line_pointer--;
10315   demand_empty_rest_of_line ();
10316 }
10317 #endif
10318
10319 /* Handle Vector operations.  */
10320
10321 static char *
10322 check_VecOperations (char *op_string, char *op_end)
10323 {
10324   const reg_entry *mask;
10325   const char *saved;
10326   char *end_op;
10327
10328   while (*op_string
10329          && (op_end == NULL || op_string < op_end))
10330     {
10331       saved = op_string;
10332       if (*op_string == '{')
10333         {
10334           op_string++;
10335
10336           /* Check broadcasts.  */
10337           if (strncmp (op_string, "1to", 3) == 0)
10338             {
10339               int bcst_type;
10340
10341               if (i.broadcast)
10342                 goto duplicated_vec_op;
10343
10344               op_string += 3;
10345               if (*op_string == '8')
10346                 bcst_type = 8;
10347               else if (*op_string == '4')
10348                 bcst_type = 4;
10349               else if (*op_string == '2')
10350                 bcst_type = 2;
10351               else if (*op_string == '1'
10352                        && *(op_string+1) == '6')
10353                 {
10354                   bcst_type = 16;
10355                   op_string++;
10356                 }
10357               else
10358                 {
10359                   as_bad (_("Unsupported broadcast: `%s'"), saved);
10360                   return NULL;
10361                 }
10362               op_string++;
10363
10364               broadcast_op.type = bcst_type;
10365               broadcast_op.operand = this_operand;
10366               broadcast_op.bytes = 0;
10367               i.broadcast = &broadcast_op;
10368             }
10369           /* Check masking operation.  */
10370           else if ((mask = parse_register (op_string, &end_op)) != NULL)
10371             {
10372               if (mask == &bad_reg)
10373                 return NULL;
10374
10375               /* k0 can't be used for write mask.  */
10376               if (mask->reg_type.bitfield.class != RegMask || !mask->reg_num)
10377                 {
10378                   as_bad (_("`%s%s' can't be used for write mask"),
10379                           register_prefix, mask->reg_name);
10380                   return NULL;
10381                 }
10382
10383               if (!i.mask)
10384                 {
10385                   mask_op.mask = mask;
10386                   mask_op.zeroing = 0;
10387                   mask_op.operand = this_operand;
10388                   i.mask = &mask_op;
10389                 }
10390               else
10391                 {
10392                   if (i.mask->mask)
10393                     goto duplicated_vec_op;
10394
10395                   i.mask->mask = mask;
10396
10397                   /* Only "{z}" is allowed here.  No need to check
10398                      zeroing mask explicitly.  */
10399                   if (i.mask->operand != this_operand)
10400                     {
10401                       as_bad (_("invalid write mask `%s'"), saved);
10402                       return NULL;
10403                     }
10404                 }
10405
10406               op_string = end_op;
10407             }
10408           /* Check zeroing-flag for masking operation.  */
10409           else if (*op_string == 'z')
10410             {
10411               if (!i.mask)
10412                 {
10413                   mask_op.mask = NULL;
10414                   mask_op.zeroing = 1;
10415                   mask_op.operand = this_operand;
10416                   i.mask = &mask_op;
10417                 }
10418               else
10419                 {
10420                   if (i.mask->zeroing)
10421                     {
10422                     duplicated_vec_op:
10423                       as_bad (_("duplicated `%s'"), saved);
10424                       return NULL;
10425                     }
10426
10427                   i.mask->zeroing = 1;
10428
10429                   /* Only "{%k}" is allowed here.  No need to check mask
10430                      register explicitly.  */
10431                   if (i.mask->operand != this_operand)
10432                     {
10433                       as_bad (_("invalid zeroing-masking `%s'"),
10434                               saved);
10435                       return NULL;
10436                     }
10437                 }
10438
10439               op_string++;
10440             }
10441           else
10442             goto unknown_vec_op;
10443
10444           if (*op_string != '}')
10445             {
10446               as_bad (_("missing `}' in `%s'"), saved);
10447               return NULL;
10448             }
10449           op_string++;
10450
10451           /* Strip whitespace since the addition of pseudo prefixes
10452              changed how the scrubber treats '{'.  */
10453           if (is_space_char (*op_string))
10454             ++op_string;
10455
10456           continue;
10457         }
10458     unknown_vec_op:
10459       /* We don't know this one.  */
10460       as_bad (_("unknown vector operation: `%s'"), saved);
10461       return NULL;
10462     }
10463
10464   if (i.mask && i.mask->zeroing && !i.mask->mask)
10465     {
10466       as_bad (_("zeroing-masking only allowed with write mask"));
10467       return NULL;
10468     }
10469
10470   return op_string;
10471 }
10472
10473 static int
10474 i386_immediate (char *imm_start)
10475 {
10476   char *save_input_line_pointer;
10477   char *gotfree_input_line;
10478   segT exp_seg = 0;
10479   expressionS *exp;
10480   i386_operand_type types;
10481
10482   operand_type_set (&types, ~0);
10483
10484   if (i.imm_operands == MAX_IMMEDIATE_OPERANDS)
10485     {
10486       as_bad (_("at most %d immediate operands are allowed"),
10487               MAX_IMMEDIATE_OPERANDS);
10488       return 0;
10489     }
10490
10491   exp = &im_expressions[i.imm_operands++];
10492   i.op[this_operand].imms = exp;
10493
10494   if (is_space_char (*imm_start))
10495     ++imm_start;
10496
10497   save_input_line_pointer = input_line_pointer;
10498   input_line_pointer = imm_start;
10499
10500   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
10501   if (gotfree_input_line)
10502     input_line_pointer = gotfree_input_line;
10503
10504   exp_seg = expression (exp);
10505
10506   SKIP_WHITESPACE ();
10507
10508   /* Handle vector operations.  */
10509   if (*input_line_pointer == '{')
10510     {
10511       input_line_pointer = check_VecOperations (input_line_pointer,
10512                                                 NULL);
10513       if (input_line_pointer == NULL)
10514         return 0;
10515     }
10516
10517   if (*input_line_pointer)
10518     as_bad (_("junk `%s' after expression"), input_line_pointer);
10519
10520   input_line_pointer = save_input_line_pointer;
10521   if (gotfree_input_line)
10522     {
10523       free (gotfree_input_line);
10524
10525       if (exp->X_op == O_constant || exp->X_op == O_register)
10526         exp->X_op = O_illegal;
10527     }
10528
10529   return i386_finalize_immediate (exp_seg, exp, types, imm_start);
10530 }
10531
10532 static int
10533 i386_finalize_immediate (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
10534                          i386_operand_type types, const char *imm_start)
10535 {
10536   if (exp->X_op == O_absent || exp->X_op == O_illegal || exp->X_op == O_big)
10537     {
10538       if (imm_start)
10539         as_bad (_("missing or invalid immediate expression `%s'"),
10540                 imm_start);
10541       return 0;
10542     }
10543   else if (exp->X_op == O_constant)
10544     {
10545       /* Size it properly later.  */
10546       i.types[this_operand].bitfield.imm64 = 1;
10547       /* If not 64bit, sign extend val.  */
10548       if (flag_code != CODE_64BIT
10549           && (exp->X_add_number & ~(((addressT) 2 << 31) - 1)) == 0)
10550         exp->X_add_number
10551           = (exp->X_add_number ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
10552     }
10553 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
10554   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
10555            && exp_seg != absolute_section
10556            && exp_seg != text_section
10557            && exp_seg != data_section
10558            && exp_seg != bss_section
10559            && exp_seg != undefined_section
10560            && !bfd_is_com_section (exp_seg))
10561     {
10562       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
10563       return 0;
10564     }
10565 #endif
10566   else if (!intel_syntax && exp_seg == reg_section)
10567     {
10568       if (imm_start)
10569         as_bad (_("illegal immediate register operand %s"), imm_start);
10570       return 0;
10571     }
10572   else
10573     {
10574       /* This is an address.  The size of the address will be
10575          determined later, depending on destination register,
10576          suffix, or the default for the section.  */
10577       i.types[this_operand].bitfield.imm8 = 1;
10578       i.types[this_operand].bitfield.imm16 = 1;
10579       i.types[this_operand].bitfield.imm32 = 1;
10580       i.types[this_operand].bitfield.imm32s = 1;
10581       i.types[this_operand].bitfield.imm64 = 1;
10582       i.types[this_operand] = operand_type_and (i.types[this_operand],
10583                                                 types);
10584     }
10585
10586   return 1;
10587 }
10588
10589 static char *
10590 i386_scale (char *scale)
10591 {
10592   offsetT val;
10593   char *save = input_line_pointer;
10594
10595   input_line_pointer = scale;
10596   val = get_absolute_expression ();
10597
10598   switch (val)
10599     {
10600     case 1:
10601       i.log2_scale_factor = 0;
10602       break;
10603     case 2:
10604       i.log2_scale_factor = 1;
10605       break;
10606     case 4:
10607       i.log2_scale_factor = 2;
10608       break;
10609     case 8:
10610       i.log2_scale_factor = 3;
10611       break;
10612     default:
10613       {
10614         char sep = *input_line_pointer;
10615
10616         *input_line_pointer = '\0';
10617         as_bad (_("expecting scale factor of 1, 2, 4, or 8: got `%s'"),
10618                 scale);
10619         *input_line_pointer = sep;
10620         input_line_pointer = save;
10621         return NULL;
10622       }
10623     }
10624   if (i.log2_scale_factor != 0 && i.index_reg == 0)
10625     {
10626       as_warn (_("scale factor of %d without an index register"),
10627                1 << i.log2_scale_factor);
10628       i.log2_scale_factor = 0;
10629     }
10630   scale = input_line_pointer;
10631   input_line_pointer = save;
10632   return scale;
10633 }
10634
10635 static int
10636 i386_displacement (char *disp_start, char *disp_end)
10637 {
10638   expressionS *exp;
10639   segT exp_seg = 0;
10640   char *save_input_line_pointer;
10641   char *gotfree_input_line;
10642   int override;
10643   i386_operand_type bigdisp, types = anydisp;
10644   int ret;
10645
10646   if (i.disp_operands == MAX_MEMORY_OPERANDS)
10647     {
10648       as_bad (_("at most %d displacement operands are allowed"),
10649               MAX_MEMORY_OPERANDS);
10650       return 0;
10651     }
10652
10653   operand_type_set (&bigdisp, 0);
10654   if (i.jumpabsolute
10655       || i.types[this_operand].bitfield.baseindex
10656       || (current_templates->start->opcode_modifier.jump != JUMP
10657           && current_templates->start->opcode_modifier.jump != JUMP_DWORD))
10658     {
10659       i386_addressing_mode ();
10660       override = (i.prefix[ADDR_PREFIX] != 0);
10661       if (flag_code == CODE_64BIT)
10662         {
10663           if (!override)
10664             {
10665               bigdisp.bitfield.disp32s = 1;
10666               bigdisp.bitfield.disp64 = 1;
10667             }
10668           else
10669             bigdisp.bitfield.disp32 = 1;
10670         }
10671       else if ((flag_code == CODE_16BIT) ^ override)
10672           bigdisp.bitfield.disp16 = 1;
10673       else
10674           bigdisp.bitfield.disp32 = 1;
10675     }
10676   else
10677     {
10678       /* For PC-relative branches, the width of the displacement may be
10679          dependent upon data size, but is never dependent upon address size.
10680          Also make sure to not unintentionally match against a non-PC-relative
10681          branch template.  */
10682       static templates aux_templates;
10683       const insn_template *t = current_templates->start;
10684       bfd_boolean has_intel64 = FALSE;
10685
10686       aux_templates.start = t;
10687       while (++t < current_templates->end)
10688         {
10689           if (t->opcode_modifier.jump
10690               != current_templates->start->opcode_modifier.jump)
10691             break;
10692           if ((t->opcode_modifier.isa64 >= INTEL64))
10693             has_intel64 = TRUE;
10694         }
10695       if (t < current_templates->end)
10696         {
10697           aux_templates.end = t;
10698           current_templates = &aux_templates;
10699         }
10700
10701       override = (i.prefix[DATA_PREFIX] != 0);
10702       if (flag_code == CODE_64BIT)
10703         {
10704           if ((override || i.suffix == WORD_MNEM_SUFFIX)
10705               && (!intel64 || !has_intel64))
10706             bigdisp.bitfield.disp16 = 1;
10707           else
10708             bigdisp.bitfield.disp32s = 1;
10709         }
10710       else
10711         {
10712           if (!override)
10713             override = (i.suffix == (flag_code != CODE_16BIT
10714                                      ? WORD_MNEM_SUFFIX
10715                                      : LONG_MNEM_SUFFIX));
10716           bigdisp.bitfield.disp32 = 1;
10717           if ((flag_code == CODE_16BIT) ^ override)
10718             {
10719               bigdisp.bitfield.disp32 = 0;
10720               bigdisp.bitfield.disp16 = 1;
10721             }
10722         }
10723     }
10724   i.types[this_operand] = operand_type_or (i.types[this_operand],
10725                                            bigdisp);
10726
10727   exp = &disp_expressions[i.disp_operands];
10728   i.op[this_operand].disps = exp;
10729   i.disp_operands++;
10730   save_input_line_pointer = input_line_pointer;
10731   input_line_pointer = disp_start;
10732   END_STRING_AND_SAVE (disp_end);
10733
10734 #ifndef GCC_ASM_O_HACK
10735 #define GCC_ASM_O_HACK 0
10736 #endif
10737 #if GCC_ASM_O_HACK
10738   END_STRING_AND_SAVE (disp_end + 1);
10739   if (i.types[this_operand].bitfield.baseIndex
10740       && displacement_string_end[-1] == '+')
10741     {
10742       /* This hack is to avoid a warning when using the "o"
10743          constraint within gcc asm statements.
10744          For instance:
10745
10746          #define _set_tssldt_desc(n,addr,limit,type) \
10747          __asm__ __volatile__ ( \
10748          "movw %w2,%0\n\t" \
10749          "movw %w1,2+%0\n\t" \
10750          "rorl $16,%1\n\t" \
10751          "movb %b1,4+%0\n\t" \
10752          "movb %4,5+%0\n\t" \
10753          "movb $0,6+%0\n\t" \
10754          "movb %h1,7+%0\n\t" \
10755          "rorl $16,%1" \
10756          : "=o"(*(n)) : "q" (addr), "ri"(limit), "i"(type))
10757
10758          This works great except that the output assembler ends
10759          up looking a bit weird if it turns out that there is
10760          no offset.  You end up producing code that looks like:
10761
10762          #APP
10763          movw $235,(%eax)
10764          movw %dx,2+(%eax)
10765          rorl $16,%edx
10766          movb %dl,4+(%eax)
10767          movb $137,5+(%eax)
10768          movb $0,6+(%eax)
10769          movb %dh,7+(%eax)
10770          rorl $16,%edx
10771          #NO_APP
10772
10773          So here we provide the missing zero.  */
10774
10775       *displacement_string_end = '0';
10776     }
10777 #endif
10778   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
10779   if (gotfree_input_line)
10780     input_line_pointer = gotfree_input_line;
10781
10782   exp_seg = expression (exp);
10783
10784   SKIP_WHITESPACE ();
10785   if (*input_line_pointer)
10786     as_bad (_("junk `%s' after expression"), input_line_pointer);
10787 #if GCC_ASM_O_HACK
10788   RESTORE_END_STRING (disp_end + 1);
10789 #endif
10790   input_line_pointer = save_input_line_pointer;
10791   if (gotfree_input_line)
10792     {
10793       free (gotfree_input_line);
10794
10795       if (exp->X_op == O_constant || exp->X_op == O_register)
10796         exp->X_op = O_illegal;
10797     }
10798
10799   ret = i386_finalize_displacement (exp_seg, exp, types, disp_start);
10800
10801   RESTORE_END_STRING (disp_end);
10802
10803   return ret;
10804 }
10805
10806 static int
10807 i386_finalize_displacement (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
10808                             i386_operand_type types, const char *disp_start)
10809 {
10810   i386_operand_type bigdisp;
10811   int ret = 1;
10812
10813   /* We do this to make sure that the section symbol is in
10814      the symbol table.  We will ultimately change the relocation
10815      to be relative to the beginning of the section.  */
10816   if (i.reloc[this_operand] == BFD_RELOC_386_GOTOFF
10817       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL
10818       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
10819     {
10820       if (exp->X_op != O_symbol)
10821         goto inv_disp;
10822
10823       if (S_IS_LOCAL (exp->X_add_symbol)
10824           && S_GET_SEGMENT (exp->X_add_symbol) != undefined_section
10825           && S_GET_SEGMENT (exp->X_add_symbol) != expr_section)
10826         section_symbol (S_GET_SEGMENT (exp->X_add_symbol));
10827       exp->X_op = O_subtract;
10828       exp->X_op_symbol = GOT_symbol;
10829       if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL)
10830         i.reloc[this_operand] = BFD_RELOC_32_PCREL;
10831       else if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
10832         i.reloc[this_operand] = BFD_RELOC_64;
10833       else
10834         i.reloc[this_operand] = BFD_RELOC_32;
10835     }
10836
10837   else if (exp->X_op == O_absent
10838            || exp->X_op == O_illegal
10839            || exp->X_op == O_big)
10840     {
10841     inv_disp:
10842       as_bad (_("missing or invalid displacement expression `%s'"),
10843               disp_start);
10844       ret = 0;
10845     }
10846
10847   else if (flag_code == CODE_64BIT
10848            && !i.prefix[ADDR_PREFIX]
10849            && exp->X_op == O_constant)
10850     {
10851       /* Since displacement is signed extended to 64bit, don't allow
10852          disp32 and turn off disp32s if they are out of range.  */
10853       i.types[this_operand].bitfield.disp32 = 0;
10854       if (!fits_in_signed_long (exp->X_add_number))
10855         {
10856           i.types[this_operand].bitfield.disp32s = 0;
10857           if (i.types[this_operand].bitfield.baseindex)
10858             {
10859               as_bad (_("0x%lx out range of signed 32bit displacement"),
10860                       (long) exp->X_add_number);
10861               ret = 0;
10862             }
10863         }
10864     }
10865
10866 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
10867   else if (exp->X_op != O_constant
10868            && OUTPUT_FLAVOR == bfd_target_aout_flavour
10869            && exp_seg != absolute_section
10870            && exp_seg != text_section
10871            && exp_seg != data_section
10872            && exp_seg != bss_section
10873            && exp_seg != undefined_section
10874            && !bfd_is_com_section (exp_seg))
10875     {
10876       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
10877       ret = 0;
10878     }
10879 #endif
10880
10881   if (current_templates->start->opcode_modifier.jump == JUMP_BYTE
10882       /* Constants get taken care of by optimize_disp().  */
10883       && exp->X_op != O_constant)
10884     i.types[this_operand].bitfield.disp8 = 1;
10885
10886   /* Check if this is a displacement only operand.  */
10887   bigdisp = i.types[this_operand];
10888   bigdisp.bitfield.disp8 = 0;
10889   bigdisp.bitfield.disp16 = 0;
10890   bigdisp.bitfield.disp32 = 0;
10891   bigdisp.bitfield.disp32s = 0;
10892   bigdisp.bitfield.disp64 = 0;
10893   if (operand_type_all_zero (&bigdisp))
10894     i.types[this_operand] = operand_type_and (i.types[this_operand],
10895                                               types);
10896
10897   return ret;
10898 }
10899
10900 /* Return the active addressing mode, taking address override and
10901    registers forming the address into consideration.  Update the
10902    address override prefix if necessary.  */
10903
10904 static enum flag_code
10905 i386_addressing_mode (void)
10906 {
10907   enum flag_code addr_mode;
10908
10909   if (i.prefix[ADDR_PREFIX])
10910     addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
10911   else if (flag_code == CODE_16BIT
10912            && current_templates->start->cpu_flags.bitfield.cpumpx
10913            /* Avoid replacing the "16-bit addressing not allowed" diagnostic
10914               from md_assemble() by "is not a valid base/index expression"
10915               when there is a base and/or index.  */
10916            && !i.types[this_operand].bitfield.baseindex)
10917     {
10918       /* MPX insn memory operands with neither base nor index must be forced
10919          to use 32-bit addressing in 16-bit mode.  */
10920       addr_mode = CODE_32BIT;
10921       i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
10922       ++i.prefixes;
10923       gas_assert (!i.types[this_operand].bitfield.disp16);
10924       gas_assert (!i.types[this_operand].bitfield.disp32);
10925     }
10926   else
10927     {
10928       addr_mode = flag_code;
10929
10930 #if INFER_ADDR_PREFIX
10931       if (i.mem_operands == 0)
10932         {
10933           /* Infer address prefix from the first memory operand.  */
10934           const reg_entry *addr_reg = i.base_reg;
10935
10936           if (addr_reg == NULL)
10937             addr_reg = i.index_reg;
10938
10939           if (addr_reg)
10940             {
10941               if (addr_reg->reg_type.bitfield.dword)
10942                 addr_mode = CODE_32BIT;
10943               else if (flag_code != CODE_64BIT
10944                        && addr_reg->reg_type.bitfield.word)
10945                 addr_mode = CODE_16BIT;
10946
10947               if (addr_mode != flag_code)
10948                 {
10949                   i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
10950                   i.prefixes += 1;
10951                   /* Change the size of any displacement too.  At most one
10952                      of Disp16 or Disp32 is set.
10953                      FIXME.  There doesn't seem to be any real need for
10954                      separate Disp16 and Disp32 flags.  The same goes for
10955                      Imm16 and Imm32.  Removing them would probably clean
10956                      up the code quite a lot.  */
10957                   if (flag_code != CODE_64BIT
10958                       && (i.types[this_operand].bitfield.disp16
10959                           || i.types[this_operand].bitfield.disp32))
10960                     i.types[this_operand]
10961                       = operand_type_xor (i.types[this_operand], disp16_32);
10962                 }
10963             }
10964         }
10965 #endif
10966     }
10967
10968   return addr_mode;
10969 }
10970
10971 /* Make sure the memory operand we've been dealt is valid.
10972    Return 1 on success, 0 on a failure.  */
10973
10974 static int
10975 i386_index_check (const char *operand_string)
10976 {
10977   const char *kind = "base/index";
10978   enum flag_code addr_mode = i386_addressing_mode ();
10979
10980   if (current_templates->start->opcode_modifier.isstring
10981       && !current_templates->start->cpu_flags.bitfield.cpupadlock
10982       && (current_templates->end[-1].opcode_modifier.isstring
10983           || i.mem_operands))
10984     {
10985       /* Memory operands of string insns are special in that they only allow
10986          a single register (rDI, rSI, or rBX) as their memory address.  */
10987       const reg_entry *expected_reg;
10988       static const char *di_si[][2] =
10989         {
10990           { "esi", "edi" },
10991           { "si", "di" },
10992           { "rsi", "rdi" }
10993         };
10994       static const char *bx[] = { "ebx", "bx", "rbx" };
10995
10996       kind = "string address";
10997
10998       if (current_templates->start->opcode_modifier.repprefixok)
10999         {
11000           int es_op = current_templates->end[-1].opcode_modifier.isstring
11001                       - IS_STRING_ES_OP0;
11002           int op = 0;
11003
11004           if (!current_templates->end[-1].operand_types[0].bitfield.baseindex
11005               || ((!i.mem_operands != !intel_syntax)
11006                   && current_templates->end[-1].operand_types[1]
11007                      .bitfield.baseindex))
11008             op = 1;
11009           expected_reg
11010             = (const reg_entry *) str_hash_find (reg_hash,
11011                                                  di_si[addr_mode][op == es_op]);
11012         }
11013       else
11014         expected_reg
11015           = (const reg_entry *)str_hash_find (reg_hash, bx[addr_mode]);
11016
11017       if (i.base_reg != expected_reg
11018           || i.index_reg
11019           || operand_type_check (i.types[this_operand], disp))
11020         {
11021           /* The second memory operand must have the same size as
11022              the first one.  */
11023           if (i.mem_operands
11024               && i.base_reg
11025               && !((addr_mode == CODE_64BIT
11026                     && i.base_reg->reg_type.bitfield.qword)
11027                    || (addr_mode == CODE_32BIT
11028                        ? i.base_reg->reg_type.bitfield.dword
11029                        : i.base_reg->reg_type.bitfield.word)))
11030             goto bad_address;
11031
11032           as_warn (_("`%s' is not valid here (expected `%c%s%s%c')"),
11033                    operand_string,
11034                    intel_syntax ? '[' : '(',
11035                    register_prefix,
11036                    expected_reg->reg_name,
11037                    intel_syntax ? ']' : ')');
11038           return 1;
11039         }
11040       else
11041         return 1;
11042
11043     bad_address:
11044       as_bad (_("`%s' is not a valid %s expression"),
11045               operand_string, kind);
11046       return 0;
11047     }
11048   else
11049     {
11050       if (addr_mode != CODE_16BIT)
11051         {
11052           /* 32-bit/64-bit checks.  */
11053           if (i.disp_encoding == disp_encoding_16bit)
11054             {
11055             bad_disp:
11056               as_bad (_("invalid `%s' prefix"),
11057                       addr_mode == CODE_16BIT ? "{disp32}" : "{disp16}");
11058               return 0;
11059             }
11060
11061           if ((i.base_reg
11062                && ((addr_mode == CODE_64BIT
11063                     ? !i.base_reg->reg_type.bitfield.qword
11064                     : !i.base_reg->reg_type.bitfield.dword)
11065                    || (i.index_reg && i.base_reg->reg_num == RegIP)
11066                    || i.base_reg->reg_num == RegIZ))
11067               || (i.index_reg
11068                   && !i.index_reg->reg_type.bitfield.xmmword
11069                   && !i.index_reg->reg_type.bitfield.ymmword
11070                   && !i.index_reg->reg_type.bitfield.zmmword
11071                   && ((addr_mode == CODE_64BIT
11072                        ? !i.index_reg->reg_type.bitfield.qword
11073                        : !i.index_reg->reg_type.bitfield.dword)
11074                       || !i.index_reg->reg_type.bitfield.baseindex)))
11075             goto bad_address;
11076
11077           /* bndmk, bndldx, bndstx and mandatory non-vector SIB have special restrictions. */
11078           if (current_templates->start->base_opcode == 0xf30f1b
11079               || (current_templates->start->base_opcode & ~1) == 0x0f1a
11080               || current_templates->start->opcode_modifier.sib == SIBMEM)
11081             {
11082               /* They cannot use RIP-relative addressing. */
11083               if (i.base_reg && i.base_reg->reg_num == RegIP)
11084                 {
11085                   as_bad (_("`%s' cannot be used here"), operand_string);
11086                   return 0;
11087                 }
11088
11089               /* bndldx and bndstx ignore their scale factor. */
11090               if ((current_templates->start->base_opcode & ~1) == 0x0f1a
11091                   && i.log2_scale_factor)
11092                 as_warn (_("register scaling is being ignored here"));
11093             }
11094         }
11095       else
11096         {
11097           /* 16-bit checks.  */
11098           if (i.disp_encoding == disp_encoding_32bit)
11099             goto bad_disp;
11100
11101           if ((i.base_reg
11102                && (!i.base_reg->reg_type.bitfield.word
11103                    || !i.base_reg->reg_type.bitfield.baseindex))
11104               || (i.index_reg
11105                   && (!i.index_reg->reg_type.bitfield.word
11106                       || !i.index_reg->reg_type.bitfield.baseindex
11107                       || !(i.base_reg
11108                            && i.base_reg->reg_num < 6
11109                            && i.index_reg->reg_num >= 6
11110                            && i.log2_scale_factor == 0))))
11111             goto bad_address;
11112         }
11113     }
11114   return 1;
11115 }
11116
11117 /* Handle vector immediates.  */
11118
11119 static int
11120 RC_SAE_immediate (const char *imm_start)
11121 {
11122   unsigned int match_found, j;
11123   const char *pstr = imm_start;
11124   expressionS *exp;
11125
11126   if (*pstr != '{')
11127     return 0;
11128
11129   pstr++;
11130   match_found = 0;
11131   for (j = 0; j < ARRAY_SIZE (RC_NamesTable); j++)
11132     {
11133       if (!strncmp (pstr, RC_NamesTable[j].name, RC_NamesTable[j].len))
11134         {
11135           if (!i.rounding)
11136             {
11137               rc_op.type = RC_NamesTable[j].type;
11138               rc_op.operand = this_operand;
11139               i.rounding = &rc_op;
11140             }
11141           else
11142             {
11143               as_bad (_("duplicated `%s'"), imm_start);
11144               return 0;
11145             }
11146           pstr += RC_NamesTable[j].len;
11147           match_found = 1;
11148           break;
11149         }
11150     }
11151   if (!match_found)
11152     return 0;
11153
11154   if (*pstr++ != '}')
11155     {
11156       as_bad (_("Missing '}': '%s'"), imm_start);
11157       return 0;
11158     }
11159   /* RC/SAE immediate string should contain nothing more.  */;
11160   if (*pstr != 0)
11161     {
11162       as_bad (_("Junk after '}': '%s'"), imm_start);
11163       return 0;
11164     }
11165
11166   exp = &im_expressions[i.imm_operands++];
11167   i.op[this_operand].imms = exp;
11168
11169   exp->X_op = O_constant;
11170   exp->X_add_number = 0;
11171   exp->X_add_symbol = (symbolS *) 0;
11172   exp->X_op_symbol = (symbolS *) 0;
11173
11174   i.types[this_operand].bitfield.imm8 = 1;
11175   return 1;
11176 }
11177
11178 /* Only string instructions can have a second memory operand, so
11179    reduce current_templates to just those if it contains any.  */
11180 static int
11181 maybe_adjust_templates (void)
11182 {
11183   const insn_template *t;
11184
11185   gas_assert (i.mem_operands == 1);
11186
11187   for (t = current_templates->start; t < current_templates->end; ++t)
11188     if (t->opcode_modifier.isstring)
11189       break;
11190
11191   if (t < current_templates->end)
11192     {
11193       static templates aux_templates;
11194       bfd_boolean recheck;
11195
11196       aux_templates.start = t;
11197       for (; t < current_templates->end; ++t)
11198         if (!t->opcode_modifier.isstring)
11199           break;
11200       aux_templates.end = t;
11201
11202       /* Determine whether to re-check the first memory operand.  */
11203       recheck = (aux_templates.start != current_templates->start
11204                  || t != current_templates->end);
11205
11206       current_templates = &aux_templates;
11207
11208       if (recheck)
11209         {
11210           i.mem_operands = 0;
11211           if (i.memop1_string != NULL
11212               && i386_index_check (i.memop1_string) == 0)
11213             return 0;
11214           i.mem_operands = 1;
11215         }
11216     }
11217
11218   return 1;
11219 }
11220
11221 /* Parse OPERAND_STRING into the i386_insn structure I.  Returns zero
11222    on error.  */
11223
11224 static int
11225 i386_att_operand (char *operand_string)
11226 {
11227   const reg_entry *r;
11228   char *end_op;
11229   char *op_string = operand_string;
11230
11231   if (is_space_char (*op_string))
11232     ++op_string;
11233
11234   /* We check for an absolute prefix (differentiating,
11235      for example, 'jmp pc_relative_label' from 'jmp *absolute_label'.  */
11236   if (*op_string == ABSOLUTE_PREFIX)
11237     {
11238       ++op_string;
11239       if (is_space_char (*op_string))
11240         ++op_string;
11241       i.jumpabsolute = TRUE;
11242     }
11243
11244   /* Check if operand is a register.  */
11245   if ((r = parse_register (op_string, &end_op)) != NULL)
11246     {
11247       i386_operand_type temp;
11248
11249       if (r == &bad_reg)
11250         return 0;
11251
11252       /* Check for a segment override by searching for ':' after a
11253          segment register.  */
11254       op_string = end_op;
11255       if (is_space_char (*op_string))
11256         ++op_string;
11257       if (*op_string == ':' && r->reg_type.bitfield.class == SReg)
11258         {
11259           switch (r->reg_num)
11260             {
11261             case 0:
11262               i.seg[i.mem_operands] = &es;
11263               break;
11264             case 1:
11265               i.seg[i.mem_operands] = &cs;
11266               break;
11267             case 2:
11268               i.seg[i.mem_operands] = &ss;
11269               break;
11270             case 3:
11271               i.seg[i.mem_operands] = &ds;
11272               break;
11273             case 4:
11274               i.seg[i.mem_operands] = &fs;
11275               break;
11276             case 5:
11277               i.seg[i.mem_operands] = &gs;
11278               break;
11279             }
11280
11281           /* Skip the ':' and whitespace.  */
11282           ++op_string;
11283           if (is_space_char (*op_string))
11284             ++op_string;
11285
11286           if (!is_digit_char (*op_string)
11287               && !is_identifier_char (*op_string)
11288               && *op_string != '('
11289               && *op_string != ABSOLUTE_PREFIX)
11290             {
11291               as_bad (_("bad memory operand `%s'"), op_string);
11292               return 0;
11293             }
11294           /* Handle case of %es:*foo.  */
11295           if (*op_string == ABSOLUTE_PREFIX)
11296             {
11297               ++op_string;
11298               if (is_space_char (*op_string))
11299                 ++op_string;
11300               i.jumpabsolute = TRUE;
11301             }
11302           goto do_memory_reference;
11303         }
11304
11305       /* Handle vector operations.  */
11306       if (*op_string == '{')
11307         {
11308           op_string = check_VecOperations (op_string, NULL);
11309           if (op_string == NULL)
11310             return 0;
11311         }
11312
11313       if (*op_string)
11314         {
11315           as_bad (_("junk `%s' after register"), op_string);
11316           return 0;
11317         }
11318       temp = r->reg_type;
11319       temp.bitfield.baseindex = 0;
11320       i.types[this_operand] = operand_type_or (i.types[this_operand],
11321                                                temp);
11322       i.types[this_operand].bitfield.unspecified = 0;
11323       i.op[this_operand].regs = r;
11324       i.reg_operands++;
11325     }
11326   else if (*op_string == REGISTER_PREFIX)
11327     {
11328       as_bad (_("bad register name `%s'"), op_string);
11329       return 0;
11330     }
11331   else if (*op_string == IMMEDIATE_PREFIX)
11332     {
11333       ++op_string;
11334       if (i.jumpabsolute)
11335         {
11336           as_bad (_("immediate operand illegal with absolute jump"));
11337           return 0;
11338         }
11339       if (!i386_immediate (op_string))
11340         return 0;
11341     }
11342   else if (RC_SAE_immediate (operand_string))
11343     {
11344       /* If it is a RC or SAE immediate, do nothing.  */
11345       ;
11346     }
11347   else if (is_digit_char (*op_string)
11348            || is_identifier_char (*op_string)
11349            || *op_string == '"'
11350            || *op_string == '(')
11351     {
11352       /* This is a memory reference of some sort.  */
11353       char *base_string;
11354
11355       /* Start and end of displacement string expression (if found).  */
11356       char *displacement_string_start;
11357       char *displacement_string_end;
11358       char *vop_start;
11359
11360     do_memory_reference:
11361       if (i.mem_operands == 1 && !maybe_adjust_templates ())
11362         return 0;
11363       if ((i.mem_operands == 1
11364            && !current_templates->start->opcode_modifier.isstring)
11365           || i.mem_operands == 2)
11366         {
11367           as_bad (_("too many memory references for `%s'"),
11368                   current_templates->start->name);
11369           return 0;
11370         }
11371
11372       /* Check for base index form.  We detect the base index form by
11373          looking for an ')' at the end of the operand, searching
11374          for the '(' matching it, and finding a REGISTER_PREFIX or ','
11375          after the '('.  */
11376       base_string = op_string + strlen (op_string);
11377
11378       /* Handle vector operations.  */
11379       vop_start = strchr (op_string, '{');
11380       if (vop_start && vop_start < base_string)
11381         {
11382           if (check_VecOperations (vop_start, base_string) == NULL)
11383             return 0;
11384           base_string = vop_start;
11385         }
11386
11387       --base_string;
11388       if (is_space_char (*base_string))
11389         --base_string;
11390
11391       /* If we only have a displacement, set-up for it to be parsed later.  */
11392       displacement_string_start = op_string;
11393       displacement_string_end = base_string + 1;
11394
11395       if (*base_string == ')')
11396         {
11397           char *temp_string;
11398           unsigned int parens_balanced = 1;
11399           /* We've already checked that the number of left & right ()'s are
11400              equal, so this loop will not be infinite.  */
11401           do
11402             {
11403               base_string--;
11404               if (*base_string == ')')
11405                 parens_balanced++;
11406               if (*base_string == '(')
11407                 parens_balanced--;
11408             }
11409           while (parens_balanced);
11410
11411           temp_string = base_string;
11412
11413           /* Skip past '(' and whitespace.  */
11414           ++base_string;
11415           if (is_space_char (*base_string))
11416             ++base_string;
11417
11418           if (*base_string == ','
11419               || ((i.base_reg = parse_register (base_string, &end_op))
11420                   != NULL))
11421             {
11422               displacement_string_end = temp_string;
11423
11424               i.types[this_operand].bitfield.baseindex = 1;
11425
11426               if (i.base_reg)
11427                 {
11428                   if (i.base_reg == &bad_reg)
11429                     return 0;
11430                   base_string = end_op;
11431                   if (is_space_char (*base_string))
11432                     ++base_string;
11433                 }
11434
11435               /* There may be an index reg or scale factor here.  */
11436               if (*base_string == ',')
11437                 {
11438                   ++base_string;
11439                   if (is_space_char (*base_string))
11440                     ++base_string;
11441
11442                   if ((i.index_reg = parse_register (base_string, &end_op))
11443                       != NULL)
11444                     {
11445                       if (i.index_reg == &bad_reg)
11446                         return 0;
11447                       base_string = end_op;
11448                       if (is_space_char (*base_string))
11449                         ++base_string;
11450                       if (*base_string == ',')
11451                         {
11452                           ++base_string;
11453                           if (is_space_char (*base_string))
11454                             ++base_string;
11455                         }
11456                       else if (*base_string != ')')
11457                         {
11458                           as_bad (_("expecting `,' or `)' "
11459                                     "after index register in `%s'"),
11460                                   operand_string);
11461                           return 0;
11462                         }
11463                     }
11464                   else if (*base_string == REGISTER_PREFIX)
11465                     {
11466                       end_op = strchr (base_string, ',');
11467                       if (end_op)
11468                         *end_op = '\0';
11469                       as_bad (_("bad register name `%s'"), base_string);
11470                       return 0;
11471                     }
11472
11473                   /* Check for scale factor.  */
11474                   if (*base_string != ')')
11475                     {
11476                       char *end_scale = i386_scale (base_string);
11477
11478                       if (!end_scale)
11479                         return 0;
11480
11481                       base_string = end_scale;
11482                       if (is_space_char (*base_string))
11483                         ++base_string;
11484                       if (*base_string != ')')
11485                         {
11486                           as_bad (_("expecting `)' "
11487                                     "after scale factor in `%s'"),
11488                                   operand_string);
11489                           return 0;
11490                         }
11491                     }
11492                   else if (!i.index_reg)
11493                     {
11494                       as_bad (_("expecting index register or scale factor "
11495                                 "after `,'; got '%c'"),
11496                               *base_string);
11497                       return 0;
11498                     }
11499                 }
11500               else if (*base_string != ')')
11501                 {
11502                   as_bad (_("expecting `,' or `)' "
11503                             "after base register in `%s'"),
11504                           operand_string);
11505                   return 0;
11506                 }
11507             }
11508           else if (*base_string == REGISTER_PREFIX)
11509             {
11510               end_op = strchr (base_string, ',');
11511               if (end_op)
11512                 *end_op = '\0';
11513               as_bad (_("bad register name `%s'"), base_string);
11514               return 0;
11515             }
11516         }
11517
11518       /* If there's an expression beginning the operand, parse it,
11519          assuming displacement_string_start and
11520          displacement_string_end are meaningful.  */
11521       if (displacement_string_start != displacement_string_end)
11522         {
11523           if (!i386_displacement (displacement_string_start,
11524                                   displacement_string_end))
11525             return 0;
11526         }
11527
11528       /* Special case for (%dx) while doing input/output op.  */
11529       if (i.base_reg
11530           && i.base_reg->reg_type.bitfield.instance == RegD
11531           && i.base_reg->reg_type.bitfield.word
11532           && i.index_reg == 0
11533           && i.log2_scale_factor == 0
11534           && i.seg[i.mem_operands] == 0
11535           && !operand_type_check (i.types[this_operand], disp))
11536         {
11537           i.types[this_operand] = i.base_reg->reg_type;
11538           return 1;
11539         }
11540
11541       if (i386_index_check (operand_string) == 0)
11542         return 0;
11543       i.flags[this_operand] |= Operand_Mem;
11544       if (i.mem_operands == 0)
11545         i.memop1_string = xstrdup (operand_string);
11546       i.mem_operands++;
11547     }
11548   else
11549     {
11550       /* It's not a memory operand; argh!  */
11551       as_bad (_("invalid char %s beginning operand %d `%s'"),
11552               output_invalid (*op_string),
11553               this_operand + 1,
11554               op_string);
11555       return 0;
11556     }
11557   return 1;                     /* Normal return.  */
11558 }
11559 \f
11560 /* Calculate the maximum variable size (i.e., excluding fr_fix)
11561    that an rs_machine_dependent frag may reach.  */
11562
11563 unsigned int
11564 i386_frag_max_var (fragS *frag)
11565 {
11566   /* The only relaxable frags are for jumps.
11567      Unconditional jumps can grow by 4 bytes and others by 5 bytes.  */
11568   gas_assert (frag->fr_type == rs_machine_dependent);
11569   return TYPE_FROM_RELAX_STATE (frag->fr_subtype) == UNCOND_JUMP ? 4 : 5;
11570 }
11571
11572 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11573 static int
11574 elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var)
11575 {
11576   /* STT_GNU_IFUNC symbol must go through PLT.  */
11577   if ((symbol_get_bfdsym (fr_symbol)->flags
11578        & BSF_GNU_INDIRECT_FUNCTION) != 0)
11579     return 0;
11580
11581   if (!S_IS_EXTERNAL (fr_symbol))
11582     /* Symbol may be weak or local.  */
11583     return !S_IS_WEAK (fr_symbol);
11584
11585   /* Global symbols with non-default visibility can't be preempted. */
11586   if (ELF_ST_VISIBILITY (S_GET_OTHER (fr_symbol)) != STV_DEFAULT)
11587     return 1;
11588
11589   if (fr_var != NO_RELOC)
11590     switch ((enum bfd_reloc_code_real) fr_var)
11591       {
11592       case BFD_RELOC_386_PLT32:
11593       case BFD_RELOC_X86_64_PLT32:
11594         /* Symbol with PLT relocation may be preempted. */
11595         return 0;
11596       default:
11597         abort ();
11598       }
11599
11600   /* Global symbols with default visibility in a shared library may be
11601      preempted by another definition.  */
11602   return !shared;
11603 }
11604 #endif
11605
11606 /* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
11607    Note also work for Skylake and Cascadelake.
11608 ---------------------------------------------------------------------
11609 |   JCC   | ADD/SUB/CMP | INC/DEC | TEST/AND |
11610 | ------  | ----------- | ------- | -------- |
11611 |   Jo    |      N      |    N    |     Y    |
11612 |   Jno   |      N      |    N    |     Y    |
11613 |  Jc/Jb  |      Y      |    N    |     Y    |
11614 | Jae/Jnb |      Y      |    N    |     Y    |
11615 |  Je/Jz  |      Y      |    Y    |     Y    |
11616 | Jne/Jnz |      Y      |    Y    |     Y    |
11617 | Jna/Jbe |      Y      |    N    |     Y    |
11618 | Ja/Jnbe |      Y      |    N    |     Y    |
11619 |   Js    |      N      |    N    |     Y    |
11620 |   Jns   |      N      |    N    |     Y    |
11621 |  Jp/Jpe |      N      |    N    |     Y    |
11622 | Jnp/Jpo |      N      |    N    |     Y    |
11623 | Jl/Jnge |      Y      |    Y    |     Y    |
11624 | Jge/Jnl |      Y      |    Y    |     Y    |
11625 | Jle/Jng |      Y      |    Y    |     Y    |
11626 | Jg/Jnle |      Y      |    Y    |     Y    |
11627 ---------------------------------------------------------------------  */
11628 static int
11629 i386_macro_fusible_p (enum mf_cmp_kind mf_cmp, enum mf_jcc_kind mf_jcc)
11630 {
11631   if (mf_cmp == mf_cmp_alu_cmp)
11632     return ((mf_jcc >= mf_jcc_jc && mf_jcc <= mf_jcc_jna)
11633             || mf_jcc == mf_jcc_jl || mf_jcc == mf_jcc_jle);
11634   if (mf_cmp == mf_cmp_incdec)
11635     return (mf_jcc == mf_jcc_je || mf_jcc == mf_jcc_jl
11636             || mf_jcc == mf_jcc_jle);
11637   if (mf_cmp == mf_cmp_test_and)
11638     return 1;
11639   return 0;
11640 }
11641
11642 /* Return the next non-empty frag.  */
11643
11644 static fragS *
11645 i386_next_non_empty_frag (fragS *fragP)
11646 {
11647   /* There may be a frag with a ".fill 0" when there is no room in
11648      the current frag for frag_grow in output_insn.  */
11649   for (fragP = fragP->fr_next;
11650        (fragP != NULL
11651         && fragP->fr_type == rs_fill
11652         && fragP->fr_fix == 0);
11653        fragP = fragP->fr_next)
11654     ;
11655   return fragP;
11656 }
11657
11658 /* Return the next jcc frag after BRANCH_PADDING.  */
11659
11660 static fragS *
11661 i386_next_fusible_jcc_frag (fragS *maybe_cmp_fragP, fragS *pad_fragP)
11662 {
11663   fragS *branch_fragP;
11664   if (!pad_fragP)
11665     return NULL;
11666
11667   if (pad_fragP->fr_type == rs_machine_dependent
11668       && (TYPE_FROM_RELAX_STATE (pad_fragP->fr_subtype)
11669           == BRANCH_PADDING))
11670     {
11671       branch_fragP = i386_next_non_empty_frag (pad_fragP);
11672       if (branch_fragP->fr_type != rs_machine_dependent)
11673         return NULL;
11674       if (TYPE_FROM_RELAX_STATE (branch_fragP->fr_subtype) == COND_JUMP
11675           && i386_macro_fusible_p (maybe_cmp_fragP->tc_frag_data.mf_type,
11676                                    pad_fragP->tc_frag_data.mf_type))
11677         return branch_fragP;
11678     }
11679
11680   return NULL;
11681 }
11682
11683 /* Classify BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags.  */
11684
11685 static void
11686 i386_classify_machine_dependent_frag (fragS *fragP)
11687 {
11688   fragS *cmp_fragP;
11689   fragS *pad_fragP;
11690   fragS *branch_fragP;
11691   fragS *next_fragP;
11692   unsigned int max_prefix_length;
11693
11694   if (fragP->tc_frag_data.classified)
11695     return;
11696
11697   /* First scan for BRANCH_PADDING and FUSED_JCC_PADDING.  Convert
11698      FUSED_JCC_PADDING and merge BRANCH_PADDING.  */
11699   for (next_fragP = fragP;
11700        next_fragP != NULL;
11701        next_fragP = next_fragP->fr_next)
11702     {
11703       next_fragP->tc_frag_data.classified = 1;
11704       if (next_fragP->fr_type == rs_machine_dependent)
11705         switch (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype))
11706           {
11707           case BRANCH_PADDING:
11708             /* The BRANCH_PADDING frag must be followed by a branch
11709                frag.  */
11710             branch_fragP = i386_next_non_empty_frag (next_fragP);
11711             next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
11712             break;
11713           case FUSED_JCC_PADDING:
11714             /* Check if this is a fused jcc:
11715                FUSED_JCC_PADDING
11716                CMP like instruction
11717                BRANCH_PADDING
11718                COND_JUMP
11719                */
11720             cmp_fragP = i386_next_non_empty_frag (next_fragP);
11721             pad_fragP = i386_next_non_empty_frag (cmp_fragP);
11722             branch_fragP = i386_next_fusible_jcc_frag (next_fragP, pad_fragP);
11723             if (branch_fragP)
11724               {
11725                 /* The BRANCH_PADDING frag is merged with the
11726                    FUSED_JCC_PADDING frag.  */
11727                 next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
11728                 /* CMP like instruction size.  */
11729                 next_fragP->tc_frag_data.cmp_size = cmp_fragP->fr_fix;
11730                 frag_wane (pad_fragP);
11731                 /* Skip to branch_fragP.  */
11732                 next_fragP = branch_fragP;
11733               }
11734             else if (next_fragP->tc_frag_data.max_prefix_length)
11735               {
11736                 /* Turn FUSED_JCC_PADDING into BRANCH_PREFIX if it isn't
11737                    a fused jcc.  */
11738                 next_fragP->fr_subtype
11739                   = ENCODE_RELAX_STATE (BRANCH_PREFIX, 0);
11740                 next_fragP->tc_frag_data.max_bytes
11741                   = next_fragP->tc_frag_data.max_prefix_length;
11742                 /* This will be updated in the BRANCH_PREFIX scan.  */
11743                 next_fragP->tc_frag_data.max_prefix_length = 0;
11744               }
11745             else
11746               frag_wane (next_fragP);
11747             break;
11748           }
11749     }
11750
11751   /* Stop if there is no BRANCH_PREFIX.  */
11752   if (!align_branch_prefix_size)
11753     return;
11754
11755   /* Scan for BRANCH_PREFIX.  */
11756   for (; fragP != NULL; fragP = fragP->fr_next)
11757     {
11758       if (fragP->fr_type != rs_machine_dependent
11759           || (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
11760               != BRANCH_PREFIX))
11761         continue;
11762
11763       /* Count all BRANCH_PREFIX frags before BRANCH_PADDING and
11764          COND_JUMP_PREFIX.  */
11765       max_prefix_length = 0;
11766       for (next_fragP = fragP;
11767            next_fragP != NULL;
11768            next_fragP = next_fragP->fr_next)
11769         {
11770           if (next_fragP->fr_type == rs_fill)
11771             /* Skip rs_fill frags.  */
11772             continue;
11773           else if (next_fragP->fr_type != rs_machine_dependent)
11774             /* Stop for all other frags.  */
11775             break;
11776
11777           /* rs_machine_dependent frags.  */
11778           if (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11779               == BRANCH_PREFIX)
11780             {
11781               /* Count BRANCH_PREFIX frags.  */
11782               if (max_prefix_length >= MAX_FUSED_JCC_PADDING_SIZE)
11783                 {
11784                   max_prefix_length = MAX_FUSED_JCC_PADDING_SIZE;
11785                   frag_wane (next_fragP);
11786                 }
11787               else
11788                 max_prefix_length
11789                   += next_fragP->tc_frag_data.max_bytes;
11790             }
11791           else if ((TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11792                     == BRANCH_PADDING)
11793                    || (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11794                        == FUSED_JCC_PADDING))
11795             {
11796               /* Stop at BRANCH_PADDING and FUSED_JCC_PADDING.  */
11797               fragP->tc_frag_data.u.padding_fragP = next_fragP;
11798               break;
11799             }
11800           else
11801             /* Stop for other rs_machine_dependent frags.  */
11802             break;
11803         }
11804
11805       fragP->tc_frag_data.max_prefix_length = max_prefix_length;
11806
11807       /* Skip to the next frag.  */
11808       fragP = next_fragP;
11809     }
11810 }
11811
11812 /* Compute padding size for
11813
11814         FUSED_JCC_PADDING
11815         CMP like instruction
11816         BRANCH_PADDING
11817         COND_JUMP/UNCOND_JUMP
11818
11819    or
11820
11821         BRANCH_PADDING
11822         COND_JUMP/UNCOND_JUMP
11823  */
11824
11825 static int
11826 i386_branch_padding_size (fragS *fragP, offsetT address)
11827 {
11828   unsigned int offset, size, padding_size;
11829   fragS *branch_fragP = fragP->tc_frag_data.u.branch_fragP;
11830
11831   /* The start address of the BRANCH_PADDING or FUSED_JCC_PADDING frag.  */
11832   if (!address)
11833     address = fragP->fr_address;
11834   address += fragP->fr_fix;
11835
11836   /* CMP like instrunction size.  */
11837   size = fragP->tc_frag_data.cmp_size;
11838
11839   /* The base size of the branch frag.  */
11840   size += branch_fragP->fr_fix;
11841
11842   /* Add opcode and displacement bytes for the rs_machine_dependent
11843      branch frag.  */
11844   if (branch_fragP->fr_type == rs_machine_dependent)
11845     size += md_relax_table[branch_fragP->fr_subtype].rlx_length;
11846
11847   /* Check if branch is within boundary and doesn't end at the last
11848      byte.  */
11849   offset = address & ((1U << align_branch_power) - 1);
11850   if ((offset + size) >= (1U << align_branch_power))
11851     /* Padding needed to avoid crossing boundary.  */
11852     padding_size = (1U << align_branch_power) - offset;
11853   else
11854     /* No padding needed.  */
11855     padding_size = 0;
11856
11857   /* The return value may be saved in tc_frag_data.length which is
11858      unsigned byte.  */
11859   if (!fits_in_unsigned_byte (padding_size))
11860     abort ();
11861
11862   return padding_size;
11863 }
11864
11865 /* i386_generic_table_relax_frag()
11866
11867    Handle BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags to
11868    grow/shrink padding to align branch frags.  Hand others to
11869    relax_frag().  */
11870
11871 long
11872 i386_generic_table_relax_frag (segT segment, fragS *fragP, long stretch)
11873 {
11874   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
11875       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
11876     {
11877       long padding_size = i386_branch_padding_size (fragP, 0);
11878       long grow = padding_size - fragP->tc_frag_data.length;
11879
11880       /* When the BRANCH_PREFIX frag is used, the computed address
11881          must match the actual address and there should be no padding.  */
11882       if (fragP->tc_frag_data.padding_address
11883           && (fragP->tc_frag_data.padding_address != fragP->fr_address
11884               || padding_size))
11885         abort ();
11886
11887       /* Update the padding size.  */
11888       if (grow)
11889         fragP->tc_frag_data.length = padding_size;
11890
11891       return grow;
11892     }
11893   else if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
11894     {
11895       fragS *padding_fragP, *next_fragP;
11896       long padding_size, left_size, last_size;
11897
11898       padding_fragP = fragP->tc_frag_data.u.padding_fragP;
11899       if (!padding_fragP)
11900         /* Use the padding set by the leading BRANCH_PREFIX frag.  */
11901         return (fragP->tc_frag_data.length
11902                 - fragP->tc_frag_data.last_length);
11903
11904       /* Compute the relative address of the padding frag in the very
11905         first time where the BRANCH_PREFIX frag sizes are zero.  */
11906       if (!fragP->tc_frag_data.padding_address)
11907         fragP->tc_frag_data.padding_address
11908           = padding_fragP->fr_address - (fragP->fr_address - stretch);
11909
11910       /* First update the last length from the previous interation.  */
11911       left_size = fragP->tc_frag_data.prefix_length;
11912       for (next_fragP = fragP;
11913            next_fragP != padding_fragP;
11914            next_fragP = next_fragP->fr_next)
11915         if (next_fragP->fr_type == rs_machine_dependent
11916             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11917                 == BRANCH_PREFIX))
11918           {
11919             if (left_size)
11920               {
11921                 int max = next_fragP->tc_frag_data.max_bytes;
11922                 if (max)
11923                   {
11924                     int size;
11925                     if (max > left_size)
11926                       size = left_size;
11927                     else
11928                       size = max;
11929                     left_size -= size;
11930                     next_fragP->tc_frag_data.last_length = size;
11931                   }
11932               }
11933             else
11934               next_fragP->tc_frag_data.last_length = 0;
11935           }
11936
11937       /* Check the padding size for the padding frag.  */
11938       padding_size = i386_branch_padding_size
11939         (padding_fragP, (fragP->fr_address
11940                          + fragP->tc_frag_data.padding_address));
11941
11942       last_size = fragP->tc_frag_data.prefix_length;
11943       /* Check if there is change from the last interation.  */
11944       if (padding_size == last_size)
11945         {
11946           /* Update the expected address of the padding frag.  */
11947           padding_fragP->tc_frag_data.padding_address
11948             = (fragP->fr_address + padding_size
11949                + fragP->tc_frag_data.padding_address);
11950           return 0;
11951         }
11952
11953       if (padding_size > fragP->tc_frag_data.max_prefix_length)
11954         {
11955           /* No padding if there is no sufficient room.  Clear the
11956              expected address of the padding frag.  */
11957           padding_fragP->tc_frag_data.padding_address = 0;
11958           padding_size = 0;
11959         }
11960       else
11961         /* Store the expected address of the padding frag.  */
11962         padding_fragP->tc_frag_data.padding_address
11963           = (fragP->fr_address + padding_size
11964              + fragP->tc_frag_data.padding_address);
11965
11966       fragP->tc_frag_data.prefix_length = padding_size;
11967
11968       /* Update the length for the current interation.  */
11969       left_size = padding_size;
11970       for (next_fragP = fragP;
11971            next_fragP != padding_fragP;
11972            next_fragP = next_fragP->fr_next)
11973         if (next_fragP->fr_type == rs_machine_dependent
11974             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11975                 == BRANCH_PREFIX))
11976           {
11977             if (left_size)
11978               {
11979                 int max = next_fragP->tc_frag_data.max_bytes;
11980                 if (max)
11981                   {
11982                     int size;
11983                     if (max > left_size)
11984                       size = left_size;
11985                     else
11986                       size = max;
11987                     left_size -= size;
11988                     next_fragP->tc_frag_data.length = size;
11989                   }
11990               }
11991             else
11992               next_fragP->tc_frag_data.length = 0;
11993           }
11994
11995       return (fragP->tc_frag_data.length
11996               - fragP->tc_frag_data.last_length);
11997     }
11998   return relax_frag (segment, fragP, stretch);
11999 }
12000
12001 /* md_estimate_size_before_relax()
12002
12003    Called just before relax() for rs_machine_dependent frags.  The x86
12004    assembler uses these frags to handle variable size jump
12005    instructions.
12006
12007    Any symbol that is now undefined will not become defined.
12008    Return the correct fr_subtype in the frag.
12009    Return the initial "guess for variable size of frag" to caller.
12010    The guess is actually the growth beyond the fixed part.  Whatever
12011    we do to grow the fixed or variable part contributes to our
12012    returned value.  */
12013
12014 int
12015 md_estimate_size_before_relax (fragS *fragP, segT segment)
12016 {
12017   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12018       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX
12019       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
12020     {
12021       i386_classify_machine_dependent_frag (fragP);
12022       return fragP->tc_frag_data.length;
12023     }
12024
12025   /* We've already got fragP->fr_subtype right;  all we have to do is
12026      check for un-relaxable symbols.  On an ELF system, we can't relax
12027      an externally visible symbol, because it may be overridden by a
12028      shared library.  */
12029   if (S_GET_SEGMENT (fragP->fr_symbol) != segment
12030 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12031       || (IS_ELF
12032           && !elf_symbol_resolved_in_segment_p (fragP->fr_symbol,
12033                                                 fragP->fr_var))
12034 #endif
12035 #if defined (OBJ_COFF) && defined (TE_PE)
12036       || (OUTPUT_FLAVOR == bfd_target_coff_flavour
12037           && S_IS_WEAK (fragP->fr_symbol))
12038 #endif
12039       )
12040     {
12041       /* Symbol is undefined in this segment, or we need to keep a
12042          reloc so that weak symbols can be overridden.  */
12043       int size = (fragP->fr_subtype & CODE16) ? 2 : 4;
12044       enum bfd_reloc_code_real reloc_type;
12045       unsigned char *opcode;
12046       int old_fr_fix;
12047
12048       if (fragP->fr_var != NO_RELOC)
12049         reloc_type = (enum bfd_reloc_code_real) fragP->fr_var;
12050       else if (size == 2)
12051         reloc_type = BFD_RELOC_16_PCREL;
12052 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12053       else if (need_plt32_p (fragP->fr_symbol))
12054         reloc_type = BFD_RELOC_X86_64_PLT32;
12055 #endif
12056       else
12057         reloc_type = BFD_RELOC_32_PCREL;
12058
12059       old_fr_fix = fragP->fr_fix;
12060       opcode = (unsigned char *) fragP->fr_opcode;
12061
12062       switch (TYPE_FROM_RELAX_STATE (fragP->fr_subtype))
12063         {
12064         case UNCOND_JUMP:
12065           /* Make jmp (0xeb) a (d)word displacement jump.  */
12066           opcode[0] = 0xe9;
12067           fragP->fr_fix += size;
12068           fix_new (fragP, old_fr_fix, size,
12069                    fragP->fr_symbol,
12070                    fragP->fr_offset, 1,
12071                    reloc_type);
12072           break;
12073
12074         case COND_JUMP86:
12075           if (size == 2
12076               && (!no_cond_jump_promotion || fragP->fr_var != NO_RELOC))
12077             {
12078               /* Negate the condition, and branch past an
12079                  unconditional jump.  */
12080               opcode[0] ^= 1;
12081               opcode[1] = 3;
12082               /* Insert an unconditional jump.  */
12083               opcode[2] = 0xe9;
12084               /* We added two extra opcode bytes, and have a two byte
12085                  offset.  */
12086               fragP->fr_fix += 2 + 2;
12087               fix_new (fragP, old_fr_fix + 2, 2,
12088                        fragP->fr_symbol,
12089                        fragP->fr_offset, 1,
12090                        reloc_type);
12091               break;
12092             }
12093           /* Fall through.  */
12094
12095         case COND_JUMP:
12096           if (no_cond_jump_promotion && fragP->fr_var == NO_RELOC)
12097             {
12098               fixS *fixP;
12099
12100               fragP->fr_fix += 1;
12101               fixP = fix_new (fragP, old_fr_fix, 1,
12102                               fragP->fr_symbol,
12103                               fragP->fr_offset, 1,
12104                               BFD_RELOC_8_PCREL);
12105               fixP->fx_signed = 1;
12106               break;
12107             }
12108
12109           /* This changes the byte-displacement jump 0x7N
12110              to the (d)word-displacement jump 0x0f,0x8N.  */
12111           opcode[1] = opcode[0] + 0x10;
12112           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12113           /* We've added an opcode byte.  */
12114           fragP->fr_fix += 1 + size;
12115           fix_new (fragP, old_fr_fix + 1, size,
12116                    fragP->fr_symbol,
12117                    fragP->fr_offset, 1,
12118                    reloc_type);
12119           break;
12120
12121         default:
12122           BAD_CASE (fragP->fr_subtype);
12123           break;
12124         }
12125       frag_wane (fragP);
12126       return fragP->fr_fix - old_fr_fix;
12127     }
12128
12129   /* Guess size depending on current relax state.  Initially the relax
12130      state will correspond to a short jump and we return 1, because
12131      the variable part of the frag (the branch offset) is one byte
12132      long.  However, we can relax a section more than once and in that
12133      case we must either set fr_subtype back to the unrelaxed state,
12134      or return the value for the appropriate branch.  */
12135   return md_relax_table[fragP->fr_subtype].rlx_length;
12136 }
12137
12138 /* Called after relax() is finished.
12139
12140    In:  Address of frag.
12141         fr_type == rs_machine_dependent.
12142         fr_subtype is what the address relaxed to.
12143
12144    Out: Any fixSs and constants are set up.
12145         Caller will turn frag into a ".space 0".  */
12146
12147 void
12148 md_convert_frag (bfd *abfd ATTRIBUTE_UNUSED, segT sec ATTRIBUTE_UNUSED,
12149                  fragS *fragP)
12150 {
12151   unsigned char *opcode;
12152   unsigned char *where_to_put_displacement = NULL;
12153   offsetT target_address;
12154   offsetT opcode_address;
12155   unsigned int extension = 0;
12156   offsetT displacement_from_opcode_start;
12157
12158   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12159       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING
12160       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12161     {
12162       /* Generate nop padding.  */
12163       unsigned int size = fragP->tc_frag_data.length;
12164       if (size)
12165         {
12166           if (size > fragP->tc_frag_data.max_bytes)
12167             abort ();
12168
12169           if (flag_debug)
12170             {
12171               const char *msg;
12172               const char *branch = "branch";
12173               const char *prefix = "";
12174               fragS *padding_fragP;
12175               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
12176                   == BRANCH_PREFIX)
12177                 {
12178                   padding_fragP = fragP->tc_frag_data.u.padding_fragP;
12179                   switch (fragP->tc_frag_data.default_prefix)
12180                     {
12181                     default:
12182                       abort ();
12183                       break;
12184                     case CS_PREFIX_OPCODE:
12185                       prefix = " cs";
12186                       break;
12187                     case DS_PREFIX_OPCODE:
12188                       prefix = " ds";
12189                       break;
12190                     case ES_PREFIX_OPCODE:
12191                       prefix = " es";
12192                       break;
12193                     case FS_PREFIX_OPCODE:
12194                       prefix = " fs";
12195                       break;
12196                     case GS_PREFIX_OPCODE:
12197                       prefix = " gs";
12198                       break;
12199                     case SS_PREFIX_OPCODE:
12200                       prefix = " ss";
12201                       break;
12202                     }
12203                   if (padding_fragP)
12204                     msg = _("%s:%u: add %d%s at 0x%llx to align "
12205                             "%s within %d-byte boundary\n");
12206                   else
12207                     msg = _("%s:%u: add additional %d%s at 0x%llx to "
12208                             "align %s within %d-byte boundary\n");
12209                 }
12210               else
12211                 {
12212                   padding_fragP = fragP;
12213                   msg = _("%s:%u: add %d%s-byte nop at 0x%llx to align "
12214                           "%s within %d-byte boundary\n");
12215                 }
12216
12217               if (padding_fragP)
12218                 switch (padding_fragP->tc_frag_data.branch_type)
12219                   {
12220                   case align_branch_jcc:
12221                     branch = "jcc";
12222                     break;
12223                   case align_branch_fused:
12224                     branch = "fused jcc";
12225                     break;
12226                   case align_branch_jmp:
12227                     branch = "jmp";
12228                     break;
12229                   case align_branch_call:
12230                     branch = "call";
12231                     break;
12232                   case align_branch_indirect:
12233                     branch = "indiret branch";
12234                     break;
12235                   case align_branch_ret:
12236                     branch = "ret";
12237                     break;
12238                   default:
12239                     break;
12240                   }
12241
12242               fprintf (stdout, msg,
12243                        fragP->fr_file, fragP->fr_line, size, prefix,
12244                        (long long) fragP->fr_address, branch,
12245                        1 << align_branch_power);
12246             }
12247           if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12248             memset (fragP->fr_opcode,
12249                     fragP->tc_frag_data.default_prefix, size);
12250           else
12251             i386_generate_nops (fragP, (char *) fragP->fr_opcode,
12252                                 size, 0);
12253           fragP->fr_fix += size;
12254         }
12255       return;
12256     }
12257
12258   opcode = (unsigned char *) fragP->fr_opcode;
12259
12260   /* Address we want to reach in file space.  */
12261   target_address = S_GET_VALUE (fragP->fr_symbol) + fragP->fr_offset;
12262
12263   /* Address opcode resides at in file space.  */
12264   opcode_address = fragP->fr_address + fragP->fr_fix;
12265
12266   /* Displacement from opcode start to fill into instruction.  */
12267   displacement_from_opcode_start = target_address - opcode_address;
12268
12269   if ((fragP->fr_subtype & BIG) == 0)
12270     {
12271       /* Don't have to change opcode.  */
12272       extension = 1;            /* 1 opcode + 1 displacement  */
12273       where_to_put_displacement = &opcode[1];
12274     }
12275   else
12276     {
12277       if (no_cond_jump_promotion
12278           && TYPE_FROM_RELAX_STATE (fragP->fr_subtype) != UNCOND_JUMP)
12279         as_warn_where (fragP->fr_file, fragP->fr_line,
12280                        _("long jump required"));
12281
12282       switch (fragP->fr_subtype)
12283         {
12284         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG):
12285           extension = 4;                /* 1 opcode + 4 displacement  */
12286           opcode[0] = 0xe9;
12287           where_to_put_displacement = &opcode[1];
12288           break;
12289
12290         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16):
12291           extension = 2;                /* 1 opcode + 2 displacement  */
12292           opcode[0] = 0xe9;
12293           where_to_put_displacement = &opcode[1];
12294           break;
12295
12296         case ENCODE_RELAX_STATE (COND_JUMP, BIG):
12297         case ENCODE_RELAX_STATE (COND_JUMP86, BIG):
12298           extension = 5;                /* 2 opcode + 4 displacement  */
12299           opcode[1] = opcode[0] + 0x10;
12300           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12301           where_to_put_displacement = &opcode[2];
12302           break;
12303
12304         case ENCODE_RELAX_STATE (COND_JUMP, BIG16):
12305           extension = 3;                /* 2 opcode + 2 displacement  */
12306           opcode[1] = opcode[0] + 0x10;
12307           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12308           where_to_put_displacement = &opcode[2];
12309           break;
12310
12311         case ENCODE_RELAX_STATE (COND_JUMP86, BIG16):
12312           extension = 4;
12313           opcode[0] ^= 1;
12314           opcode[1] = 3;
12315           opcode[2] = 0xe9;
12316           where_to_put_displacement = &opcode[3];
12317           break;
12318
12319         default:
12320           BAD_CASE (fragP->fr_subtype);
12321           break;
12322         }
12323     }
12324
12325   /* If size if less then four we are sure that the operand fits,
12326      but if it's 4, then it could be that the displacement is larger
12327      then -/+ 2GB.  */
12328   if (DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype) == 4
12329       && object_64bit
12330       && ((addressT) (displacement_from_opcode_start - extension
12331                       + ((addressT) 1 << 31))
12332           > (((addressT) 2 << 31) - 1)))
12333     {
12334       as_bad_where (fragP->fr_file, fragP->fr_line,
12335                     _("jump target out of range"));
12336       /* Make us emit 0.  */
12337       displacement_from_opcode_start = extension;
12338     }
12339   /* Now put displacement after opcode.  */
12340   md_number_to_chars ((char *) where_to_put_displacement,
12341                       (valueT) (displacement_from_opcode_start - extension),
12342                       DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype));
12343   fragP->fr_fix += extension;
12344 }
12345 \f
12346 /* Apply a fixup (fixP) to segment data, once it has been determined
12347    by our caller that we have all the info we need to fix it up.
12348
12349    Parameter valP is the pointer to the value of the bits.
12350
12351    On the 386, immediates, displacements, and data pointers are all in
12352    the same (little-endian) format, so we don't need to care about which
12353    we are handling.  */
12354
12355 void
12356 md_apply_fix (fixS *fixP, valueT *valP, segT seg ATTRIBUTE_UNUSED)
12357 {
12358   char *p = fixP->fx_where + fixP->fx_frag->fr_literal;
12359   valueT value = *valP;
12360
12361 #if !defined (TE_Mach)
12362   if (fixP->fx_pcrel)
12363     {
12364       switch (fixP->fx_r_type)
12365         {
12366         default:
12367           break;
12368
12369         case BFD_RELOC_64:
12370           fixP->fx_r_type = BFD_RELOC_64_PCREL;
12371           break;
12372         case BFD_RELOC_32:
12373         case BFD_RELOC_X86_64_32S:
12374           fixP->fx_r_type = BFD_RELOC_32_PCREL;
12375           break;
12376         case BFD_RELOC_16:
12377           fixP->fx_r_type = BFD_RELOC_16_PCREL;
12378           break;
12379         case BFD_RELOC_8:
12380           fixP->fx_r_type = BFD_RELOC_8_PCREL;
12381           break;
12382         }
12383     }
12384
12385   if (fixP->fx_addsy != NULL
12386       && (fixP->fx_r_type == BFD_RELOC_32_PCREL
12387           || fixP->fx_r_type == BFD_RELOC_64_PCREL
12388           || fixP->fx_r_type == BFD_RELOC_16_PCREL
12389           || fixP->fx_r_type == BFD_RELOC_8_PCREL)
12390       && !use_rela_relocations)
12391     {
12392       /* This is a hack.  There should be a better way to handle this.
12393          This covers for the fact that bfd_install_relocation will
12394          subtract the current location (for partial_inplace, PC relative
12395          relocations); see more below.  */
12396 #ifndef OBJ_AOUT
12397       if (IS_ELF
12398 #ifdef TE_PE
12399           || OUTPUT_FLAVOR == bfd_target_coff_flavour
12400 #endif
12401           )
12402         value += fixP->fx_where + fixP->fx_frag->fr_address;
12403 #endif
12404 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12405       if (IS_ELF)
12406         {
12407           segT sym_seg = S_GET_SEGMENT (fixP->fx_addsy);
12408
12409           if ((sym_seg == seg
12410                || (symbol_section_p (fixP->fx_addsy)
12411                    && sym_seg != absolute_section))
12412               && !generic_force_reloc (fixP))
12413             {
12414               /* Yes, we add the values in twice.  This is because
12415                  bfd_install_relocation subtracts them out again.  I think
12416                  bfd_install_relocation is broken, but I don't dare change
12417                  it.  FIXME.  */
12418               value += fixP->fx_where + fixP->fx_frag->fr_address;
12419             }
12420         }
12421 #endif
12422 #if defined (OBJ_COFF) && defined (TE_PE)
12423       /* For some reason, the PE format does not store a
12424          section address offset for a PC relative symbol.  */
12425       if (S_GET_SEGMENT (fixP->fx_addsy) != seg
12426           || S_IS_WEAK (fixP->fx_addsy))
12427         value += md_pcrel_from (fixP);
12428 #endif
12429     }
12430 #if defined (OBJ_COFF) && defined (TE_PE)
12431   if (fixP->fx_addsy != NULL
12432       && S_IS_WEAK (fixP->fx_addsy)
12433       /* PR 16858: Do not modify weak function references.  */
12434       && ! fixP->fx_pcrel)
12435     {
12436 #if !defined (TE_PEP)
12437       /* For x86 PE weak function symbols are neither PC-relative
12438          nor do they set S_IS_FUNCTION.  So the only reliable way
12439          to detect them is to check the flags of their containing
12440          section.  */
12441       if (S_GET_SEGMENT (fixP->fx_addsy) != NULL
12442           && S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_CODE)
12443         ;
12444       else
12445 #endif
12446       value -= S_GET_VALUE (fixP->fx_addsy);
12447     }
12448 #endif
12449
12450   /* Fix a few things - the dynamic linker expects certain values here,
12451      and we must not disappoint it.  */
12452 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12453   if (IS_ELF && fixP->fx_addsy)
12454     switch (fixP->fx_r_type)
12455       {
12456       case BFD_RELOC_386_PLT32:
12457       case BFD_RELOC_X86_64_PLT32:
12458         /* Make the jump instruction point to the address of the operand.
12459            At runtime we merely add the offset to the actual PLT entry.
12460            NB: Subtract the offset size only for jump instructions.  */
12461         if (fixP->fx_pcrel)
12462           value = -4;
12463         break;
12464
12465       case BFD_RELOC_386_TLS_GD:
12466       case BFD_RELOC_386_TLS_LDM:
12467       case BFD_RELOC_386_TLS_IE_32:
12468       case BFD_RELOC_386_TLS_IE:
12469       case BFD_RELOC_386_TLS_GOTIE:
12470       case BFD_RELOC_386_TLS_GOTDESC:
12471       case BFD_RELOC_X86_64_TLSGD:
12472       case BFD_RELOC_X86_64_TLSLD:
12473       case BFD_RELOC_X86_64_GOTTPOFF:
12474       case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
12475         value = 0; /* Fully resolved at runtime.  No addend.  */
12476         /* Fallthrough */
12477       case BFD_RELOC_386_TLS_LE:
12478       case BFD_RELOC_386_TLS_LDO_32:
12479       case BFD_RELOC_386_TLS_LE_32:
12480       case BFD_RELOC_X86_64_DTPOFF32:
12481       case BFD_RELOC_X86_64_DTPOFF64:
12482       case BFD_RELOC_X86_64_TPOFF32:
12483       case BFD_RELOC_X86_64_TPOFF64:
12484         S_SET_THREAD_LOCAL (fixP->fx_addsy);
12485         break;
12486
12487       case BFD_RELOC_386_TLS_DESC_CALL:
12488       case BFD_RELOC_X86_64_TLSDESC_CALL:
12489         value = 0; /* Fully resolved at runtime.  No addend.  */
12490         S_SET_THREAD_LOCAL (fixP->fx_addsy);
12491         fixP->fx_done = 0;
12492         return;
12493
12494       case BFD_RELOC_VTABLE_INHERIT:
12495       case BFD_RELOC_VTABLE_ENTRY:
12496         fixP->fx_done = 0;
12497         return;
12498
12499       default:
12500         break;
12501       }
12502 #endif /* defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)  */
12503   *valP = value;
12504 #endif /* !defined (TE_Mach)  */
12505
12506   /* Are we finished with this relocation now?  */
12507   if (fixP->fx_addsy == NULL)
12508     fixP->fx_done = 1;
12509 #if defined (OBJ_COFF) && defined (TE_PE)
12510   else if (fixP->fx_addsy != NULL && S_IS_WEAK (fixP->fx_addsy))
12511     {
12512       fixP->fx_done = 0;
12513       /* Remember value for tc_gen_reloc.  */
12514       fixP->fx_addnumber = value;
12515       /* Clear out the frag for now.  */
12516       value = 0;
12517     }
12518 #endif
12519   else if (use_rela_relocations)
12520     {
12521       fixP->fx_no_overflow = 1;
12522       /* Remember value for tc_gen_reloc.  */
12523       fixP->fx_addnumber = value;
12524       value = 0;
12525     }
12526
12527   md_number_to_chars (p, value, fixP->fx_size);
12528 }
12529 \f
12530 const char *
12531 md_atof (int type, char *litP, int *sizeP)
12532 {
12533   /* This outputs the LITTLENUMs in REVERSE order;
12534      in accord with the bigendian 386.  */
12535   return ieee_md_atof (type, litP, sizeP, FALSE);
12536 }
12537 \f
12538 static char output_invalid_buf[sizeof (unsigned char) * 2 + 6];
12539
12540 static char *
12541 output_invalid (int c)
12542 {
12543   if (ISPRINT (c))
12544     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
12545               "'%c'", c);
12546   else
12547     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
12548               "(0x%x)", (unsigned char) c);
12549   return output_invalid_buf;
12550 }
12551
12552 /* Verify that @r can be used in the current context.  */
12553
12554 static bfd_boolean check_register (const reg_entry *r)
12555 {
12556   if (allow_pseudo_reg)
12557     return TRUE;
12558
12559   if (operand_type_all_zero (&r->reg_type))
12560     return FALSE;
12561
12562   if ((r->reg_type.bitfield.dword
12563        || (r->reg_type.bitfield.class == SReg && r->reg_num > 3)
12564        || r->reg_type.bitfield.class == RegCR
12565        || r->reg_type.bitfield.class == RegDR)
12566       && !cpu_arch_flags.bitfield.cpui386)
12567     return FALSE;
12568
12569   if (r->reg_type.bitfield.class == RegTR
12570       && (flag_code == CODE_64BIT
12571           || !cpu_arch_flags.bitfield.cpui386
12572           || cpu_arch_isa_flags.bitfield.cpui586
12573           || cpu_arch_isa_flags.bitfield.cpui686))
12574     return FALSE;
12575
12576   if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
12577     return FALSE;
12578
12579   if (!cpu_arch_flags.bitfield.cpuavx512f)
12580     {
12581       if (r->reg_type.bitfield.zmmword
12582           || r->reg_type.bitfield.class == RegMask)
12583         return FALSE;
12584
12585       if (!cpu_arch_flags.bitfield.cpuavx)
12586         {
12587           if (r->reg_type.bitfield.ymmword)
12588             return FALSE;
12589
12590           if (!cpu_arch_flags.bitfield.cpusse && r->reg_type.bitfield.xmmword)
12591             return FALSE;
12592         }
12593     }
12594
12595   if (r->reg_type.bitfield.tmmword
12596       && (!cpu_arch_flags.bitfield.cpuamx_tile
12597           || flag_code != CODE_64BIT))
12598     return FALSE;
12599
12600   if (r->reg_type.bitfield.class == RegBND && !cpu_arch_flags.bitfield.cpumpx)
12601     return FALSE;
12602
12603   /* Don't allow fake index register unless allow_index_reg isn't 0. */
12604   if (!allow_index_reg && r->reg_num == RegIZ)
12605     return FALSE;
12606
12607   /* Upper 16 vector registers are only available with VREX in 64bit
12608      mode, and require EVEX encoding.  */
12609   if (r->reg_flags & RegVRex)
12610     {
12611       if (!cpu_arch_flags.bitfield.cpuavx512f
12612           || flag_code != CODE_64BIT)
12613         return FALSE;
12614
12615       if (i.vec_encoding == vex_encoding_default)
12616         i.vec_encoding = vex_encoding_evex;
12617       else if (i.vec_encoding != vex_encoding_evex)
12618         i.vec_encoding = vex_encoding_error;
12619     }
12620
12621   if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
12622       && (!cpu_arch_flags.bitfield.cpulm || r->reg_type.bitfield.class != RegCR)
12623       && flag_code != CODE_64BIT)
12624     return FALSE;
12625
12626   if (r->reg_type.bitfield.class == SReg && r->reg_num == RegFlat
12627       && !intel_syntax)
12628     return FALSE;
12629
12630   return TRUE;
12631 }
12632
12633 /* REG_STRING starts *before* REGISTER_PREFIX.  */
12634
12635 static const reg_entry *
12636 parse_real_register (char *reg_string, char **end_op)
12637 {
12638   char *s = reg_string;
12639   char *p;
12640   char reg_name_given[MAX_REG_NAME_SIZE + 1];
12641   const reg_entry *r;
12642
12643   /* Skip possible REGISTER_PREFIX and possible whitespace.  */
12644   if (*s == REGISTER_PREFIX)
12645     ++s;
12646
12647   if (is_space_char (*s))
12648     ++s;
12649
12650   p = reg_name_given;
12651   while ((*p++ = register_chars[(unsigned char) *s]) != '\0')
12652     {
12653       if (p >= reg_name_given + MAX_REG_NAME_SIZE)
12654         return (const reg_entry *) NULL;
12655       s++;
12656     }
12657
12658   /* For naked regs, make sure that we are not dealing with an identifier.
12659      This prevents confusing an identifier like `eax_var' with register
12660      `eax'.  */
12661   if (allow_naked_reg && identifier_chars[(unsigned char) *s])
12662     return (const reg_entry *) NULL;
12663
12664   *end_op = s;
12665
12666   r = (const reg_entry *) str_hash_find (reg_hash, reg_name_given);
12667
12668   /* Handle floating point regs, allowing spaces in the (i) part.  */
12669   if (r == i386_regtab /* %st is first entry of table  */)
12670     {
12671       if (!cpu_arch_flags.bitfield.cpu8087
12672           && !cpu_arch_flags.bitfield.cpu287
12673           && !cpu_arch_flags.bitfield.cpu387
12674           && !allow_pseudo_reg)
12675         return (const reg_entry *) NULL;
12676
12677       if (is_space_char (*s))
12678         ++s;
12679       if (*s == '(')
12680         {
12681           ++s;
12682           if (is_space_char (*s))
12683             ++s;
12684           if (*s >= '0' && *s <= '7')
12685             {
12686               int fpr = *s - '0';
12687               ++s;
12688               if (is_space_char (*s))
12689                 ++s;
12690               if (*s == ')')
12691                 {
12692                   *end_op = s + 1;
12693                   r = (const reg_entry *) str_hash_find (reg_hash, "st(0)");
12694                   know (r);
12695                   return r + fpr;
12696                 }
12697             }
12698           /* We have "%st(" then garbage.  */
12699           return (const reg_entry *) NULL;
12700         }
12701     }
12702
12703   return r && check_register (r) ? r : NULL;
12704 }
12705
12706 /* REG_STRING starts *before* REGISTER_PREFIX.  */
12707
12708 static const reg_entry *
12709 parse_register (char *reg_string, char **end_op)
12710 {
12711   const reg_entry *r;
12712
12713   if (*reg_string == REGISTER_PREFIX || allow_naked_reg)
12714     r = parse_real_register (reg_string, end_op);
12715   else
12716     r = NULL;
12717   if (!r)
12718     {
12719       char *save = input_line_pointer;
12720       char c;
12721       symbolS *symbolP;
12722
12723       input_line_pointer = reg_string;
12724       c = get_symbol_name (&reg_string);
12725       symbolP = symbol_find (reg_string);
12726       if (symbolP && S_GET_SEGMENT (symbolP) == reg_section)
12727         {
12728           const expressionS *e = symbol_get_value_expression (symbolP);
12729
12730           know (e->X_op == O_register);
12731           know (e->X_add_number >= 0
12732                 && (valueT) e->X_add_number < i386_regtab_size);
12733           r = i386_regtab + e->X_add_number;
12734           if (!check_register (r))
12735             {
12736               as_bad (_("register '%s%s' cannot be used here"),
12737                       register_prefix, r->reg_name);
12738               r = &bad_reg;
12739             }
12740           *end_op = input_line_pointer;
12741         }
12742       *input_line_pointer = c;
12743       input_line_pointer = save;
12744     }
12745   return r;
12746 }
12747
12748 int
12749 i386_parse_name (char *name, expressionS *e, char *nextcharP)
12750 {
12751   const reg_entry *r;
12752   char *end = input_line_pointer;
12753
12754   *end = *nextcharP;
12755   r = parse_register (name, &input_line_pointer);
12756   if (r && end <= input_line_pointer)
12757     {
12758       *nextcharP = *input_line_pointer;
12759       *input_line_pointer = 0;
12760       if (r != &bad_reg)
12761         {
12762           e->X_op = O_register;
12763           e->X_add_number = r - i386_regtab;
12764         }
12765       else
12766           e->X_op = O_illegal;
12767       return 1;
12768     }
12769   input_line_pointer = end;
12770   *end = 0;
12771   return intel_syntax ? i386_intel_parse_name (name, e) : 0;
12772 }
12773
12774 void
12775 md_operand (expressionS *e)
12776 {
12777   char *end;
12778   const reg_entry *r;
12779
12780   switch (*input_line_pointer)
12781     {
12782     case REGISTER_PREFIX:
12783       r = parse_real_register (input_line_pointer, &end);
12784       if (r)
12785         {
12786           e->X_op = O_register;
12787           e->X_add_number = r - i386_regtab;
12788           input_line_pointer = end;
12789         }
12790       break;
12791
12792     case '[':
12793       gas_assert (intel_syntax);
12794       end = input_line_pointer++;
12795       expression (e);
12796       if (*input_line_pointer == ']')
12797         {
12798           ++input_line_pointer;
12799           e->X_op_symbol = make_expr_symbol (e);
12800           e->X_add_symbol = NULL;
12801           e->X_add_number = 0;
12802           e->X_op = O_index;
12803         }
12804       else
12805         {
12806           e->X_op = O_absent;
12807           input_line_pointer = end;
12808         }
12809       break;
12810     }
12811 }
12812
12813 \f
12814 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12815 const char *md_shortopts = "kVQ:sqnO::";
12816 #else
12817 const char *md_shortopts = "qnO::";
12818 #endif
12819
12820 #define OPTION_32 (OPTION_MD_BASE + 0)
12821 #define OPTION_64 (OPTION_MD_BASE + 1)
12822 #define OPTION_DIVIDE (OPTION_MD_BASE + 2)
12823 #define OPTION_MARCH (OPTION_MD_BASE + 3)
12824 #define OPTION_MTUNE (OPTION_MD_BASE + 4)
12825 #define OPTION_MMNEMONIC (OPTION_MD_BASE + 5)
12826 #define OPTION_MSYNTAX (OPTION_MD_BASE + 6)
12827 #define OPTION_MINDEX_REG (OPTION_MD_BASE + 7)
12828 #define OPTION_MNAKED_REG (OPTION_MD_BASE + 8)
12829 #define OPTION_MRELAX_RELOCATIONS (OPTION_MD_BASE + 9)
12830 #define OPTION_MSSE2AVX (OPTION_MD_BASE + 10)
12831 #define OPTION_MSSE_CHECK (OPTION_MD_BASE + 11)
12832 #define OPTION_MOPERAND_CHECK (OPTION_MD_BASE + 12)
12833 #define OPTION_MAVXSCALAR (OPTION_MD_BASE + 13)
12834 #define OPTION_X32 (OPTION_MD_BASE + 14)
12835 #define OPTION_MADD_BND_PREFIX (OPTION_MD_BASE + 15)
12836 #define OPTION_MEVEXLIG (OPTION_MD_BASE + 16)
12837 #define OPTION_MEVEXWIG (OPTION_MD_BASE + 17)
12838 #define OPTION_MBIG_OBJ (OPTION_MD_BASE + 18)
12839 #define OPTION_MOMIT_LOCK_PREFIX (OPTION_MD_BASE + 19)
12840 #define OPTION_MEVEXRCIG (OPTION_MD_BASE + 20)
12841 #define OPTION_MSHARED (OPTION_MD_BASE + 21)
12842 #define OPTION_MAMD64 (OPTION_MD_BASE + 22)
12843 #define OPTION_MINTEL64 (OPTION_MD_BASE + 23)
12844 #define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24)
12845 #define OPTION_X86_USED_NOTE (OPTION_MD_BASE + 25)
12846 #define OPTION_MVEXWIG (OPTION_MD_BASE + 26)
12847 #define OPTION_MALIGN_BRANCH_BOUNDARY (OPTION_MD_BASE + 27)
12848 #define OPTION_MALIGN_BRANCH_PREFIX_SIZE (OPTION_MD_BASE + 28)
12849 #define OPTION_MALIGN_BRANCH (OPTION_MD_BASE + 29)
12850 #define OPTION_MBRANCHES_WITH_32B_BOUNDARIES (OPTION_MD_BASE + 30)
12851 #define OPTION_MLFENCE_AFTER_LOAD (OPTION_MD_BASE + 31)
12852 #define OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH (OPTION_MD_BASE + 32)
12853 #define OPTION_MLFENCE_BEFORE_RET (OPTION_MD_BASE + 33)
12854
12855 struct option md_longopts[] =
12856 {
12857   {"32", no_argument, NULL, OPTION_32},
12858 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
12859      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
12860   {"64", no_argument, NULL, OPTION_64},
12861 #endif
12862 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12863   {"x32", no_argument, NULL, OPTION_X32},
12864   {"mshared", no_argument, NULL, OPTION_MSHARED},
12865   {"mx86-used-note", required_argument, NULL, OPTION_X86_USED_NOTE},
12866 #endif
12867   {"divide", no_argument, NULL, OPTION_DIVIDE},
12868   {"march", required_argument, NULL, OPTION_MARCH},
12869   {"mtune", required_argument, NULL, OPTION_MTUNE},
12870   {"mmnemonic", required_argument, NULL, OPTION_MMNEMONIC},
12871   {"msyntax", required_argument, NULL, OPTION_MSYNTAX},
12872   {"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG},
12873   {"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG},
12874   {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
12875   {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
12876   {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
12877   {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR},
12878   {"mvexwig", required_argument, NULL, OPTION_MVEXWIG},
12879   {"madd-bnd-prefix", no_argument, NULL, OPTION_MADD_BND_PREFIX},
12880   {"mevexlig", required_argument, NULL, OPTION_MEVEXLIG},
12881   {"mevexwig", required_argument, NULL, OPTION_MEVEXWIG},
12882 # if defined (TE_PE) || defined (TE_PEP)
12883   {"mbig-obj", no_argument, NULL, OPTION_MBIG_OBJ},
12884 #endif
12885   {"momit-lock-prefix", required_argument, NULL, OPTION_MOMIT_LOCK_PREFIX},
12886   {"mfence-as-lock-add", required_argument, NULL, OPTION_MFENCE_AS_LOCK_ADD},
12887   {"mrelax-relocations", required_argument, NULL, OPTION_MRELAX_RELOCATIONS},
12888   {"mevexrcig", required_argument, NULL, OPTION_MEVEXRCIG},
12889   {"malign-branch-boundary", required_argument, NULL, OPTION_MALIGN_BRANCH_BOUNDARY},
12890   {"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE},
12891   {"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH},
12892   {"mbranches-within-32B-boundaries", no_argument, NULL, OPTION_MBRANCHES_WITH_32B_BOUNDARIES},
12893   {"mlfence-after-load", required_argument, NULL, OPTION_MLFENCE_AFTER_LOAD},
12894   {"mlfence-before-indirect-branch", required_argument, NULL,
12895    OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH},
12896   {"mlfence-before-ret", required_argument, NULL, OPTION_MLFENCE_BEFORE_RET},
12897   {"mamd64", no_argument, NULL, OPTION_MAMD64},
12898   {"mintel64", no_argument, NULL, OPTION_MINTEL64},
12899   {NULL, no_argument, NULL, 0}
12900 };
12901 size_t md_longopts_size = sizeof (md_longopts);
12902
12903 int
12904 md_parse_option (int c, const char *arg)
12905 {
12906   unsigned int j;
12907   char *arch, *next, *saved, *type;
12908
12909   switch (c)
12910     {
12911     case 'n':
12912       optimize_align_code = 0;
12913       break;
12914
12915     case 'q':
12916       quiet_warnings = 1;
12917       break;
12918
12919 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12920       /* -Qy, -Qn: SVR4 arguments controlling whether a .comment section
12921          should be emitted or not.  FIXME: Not implemented.  */
12922     case 'Q':
12923       if ((arg[0] != 'y' && arg[0] != 'n') || arg[1])
12924         return 0;
12925       break;
12926
12927       /* -V: SVR4 argument to print version ID.  */
12928     case 'V':
12929       print_version_id ();
12930       break;
12931
12932       /* -k: Ignore for FreeBSD compatibility.  */
12933     case 'k':
12934       break;
12935
12936     case 's':
12937       /* -s: On i386 Solaris, this tells the native assembler to use
12938          .stab instead of .stab.excl.  We always use .stab anyhow.  */
12939       break;
12940
12941     case OPTION_MSHARED:
12942       shared = 1;
12943       break;
12944
12945     case OPTION_X86_USED_NOTE:
12946       if (strcasecmp (arg, "yes") == 0)
12947         x86_used_note = 1;
12948       else if (strcasecmp (arg, "no") == 0)
12949         x86_used_note = 0;
12950       else
12951         as_fatal (_("invalid -mx86-used-note= option: `%s'"), arg);
12952       break;
12953
12954
12955 #endif
12956 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
12957      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
12958     case OPTION_64:
12959       {
12960         const char **list, **l;
12961
12962         list = bfd_target_list ();
12963         for (l = list; *l != NULL; l++)
12964           if (CONST_STRNEQ (*l, "elf64-x86-64")
12965               || strcmp (*l, "coff-x86-64") == 0
12966               || strcmp (*l, "pe-x86-64") == 0
12967               || strcmp (*l, "pei-x86-64") == 0
12968               || strcmp (*l, "mach-o-x86-64") == 0)
12969             {
12970               default_arch = "x86_64";
12971               break;
12972             }
12973         if (*l == NULL)
12974           as_fatal (_("no compiled in support for x86_64"));
12975         free (list);
12976       }
12977       break;
12978 #endif
12979
12980 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12981     case OPTION_X32:
12982       if (IS_ELF)
12983         {
12984           const char **list, **l;
12985
12986           list = bfd_target_list ();
12987           for (l = list; *l != NULL; l++)
12988             if (CONST_STRNEQ (*l, "elf32-x86-64"))
12989               {
12990                 default_arch = "x86_64:32";
12991                 break;
12992               }
12993           if (*l == NULL)
12994             as_fatal (_("no compiled in support for 32bit x86_64"));
12995           free (list);
12996         }
12997       else
12998         as_fatal (_("32bit x86_64 is only supported for ELF"));
12999       break;
13000 #endif
13001
13002     case OPTION_32:
13003       default_arch = "i386";
13004       break;
13005
13006     case OPTION_DIVIDE:
13007 #ifdef SVR4_COMMENT_CHARS
13008       {
13009         char *n, *t;
13010         const char *s;
13011
13012         n = XNEWVEC (char, strlen (i386_comment_chars) + 1);
13013         t = n;
13014         for (s = i386_comment_chars; *s != '\0'; s++)
13015           if (*s != '/')
13016             *t++ = *s;
13017         *t = '\0';
13018         i386_comment_chars = n;
13019       }
13020 #endif
13021       break;
13022
13023     case OPTION_MARCH:
13024       saved = xstrdup (arg);
13025       arch = saved;
13026       /* Allow -march=+nosse.  */
13027       if (*arch == '+')
13028         arch++;
13029       do
13030         {
13031           if (*arch == '.')
13032             as_fatal (_("invalid -march= option: `%s'"), arg);
13033           next = strchr (arch, '+');
13034           if (next)
13035             *next++ = '\0';
13036           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13037             {
13038               if (strcmp (arch, cpu_arch [j].name) == 0)
13039                 {
13040                   /* Processor.  */
13041                   if (! cpu_arch[j].flags.bitfield.cpui386)
13042                     continue;
13043
13044                   cpu_arch_name = cpu_arch[j].name;
13045                   cpu_sub_arch_name = NULL;
13046                   cpu_arch_flags = cpu_arch[j].flags;
13047                   cpu_arch_isa = cpu_arch[j].type;
13048                   cpu_arch_isa_flags = cpu_arch[j].flags;
13049                   if (!cpu_arch_tune_set)
13050                     {
13051                       cpu_arch_tune = cpu_arch_isa;
13052                       cpu_arch_tune_flags = cpu_arch_isa_flags;
13053                     }
13054                   break;
13055                 }
13056               else if (*cpu_arch [j].name == '.'
13057                        && strcmp (arch, cpu_arch [j].name + 1) == 0)
13058                 {
13059                   /* ISA extension.  */
13060                   i386_cpu_flags flags;
13061
13062                   flags = cpu_flags_or (cpu_arch_flags,
13063                                         cpu_arch[j].flags);
13064
13065                   if (!cpu_flags_equal (&flags, &cpu_arch_flags))
13066                     {
13067                       if (cpu_sub_arch_name)
13068                         {
13069                           char *name = cpu_sub_arch_name;
13070                           cpu_sub_arch_name = concat (name,
13071                                                       cpu_arch[j].name,
13072                                                       (const char *) NULL);
13073                           free (name);
13074                         }
13075                       else
13076                         cpu_sub_arch_name = xstrdup (cpu_arch[j].name);
13077                       cpu_arch_flags = flags;
13078                       cpu_arch_isa_flags = flags;
13079                     }
13080                   else
13081                     cpu_arch_isa_flags
13082                       = cpu_flags_or (cpu_arch_isa_flags,
13083                                       cpu_arch[j].flags);
13084                   break;
13085                 }
13086             }
13087
13088           if (j >= ARRAY_SIZE (cpu_arch))
13089             {
13090               /* Disable an ISA extension.  */
13091               for (j = 0; j < ARRAY_SIZE (cpu_noarch); j++)
13092                 if (strcmp (arch, cpu_noarch [j].name) == 0)
13093                   {
13094                     i386_cpu_flags flags;
13095
13096                     flags = cpu_flags_and_not (cpu_arch_flags,
13097                                                cpu_noarch[j].flags);
13098                     if (!cpu_flags_equal (&flags, &cpu_arch_flags))
13099                       {
13100                         if (cpu_sub_arch_name)
13101                           {
13102                             char *name = cpu_sub_arch_name;
13103                             cpu_sub_arch_name = concat (arch,
13104                                                         (const char *) NULL);
13105                             free (name);
13106                           }
13107                         else
13108                           cpu_sub_arch_name = xstrdup (arch);
13109                         cpu_arch_flags = flags;
13110                         cpu_arch_isa_flags = flags;
13111                       }
13112                     break;
13113                   }
13114
13115               if (j >= ARRAY_SIZE (cpu_noarch))
13116                 j = ARRAY_SIZE (cpu_arch);
13117             }
13118
13119           if (j >= ARRAY_SIZE (cpu_arch))
13120             as_fatal (_("invalid -march= option: `%s'"), arg);
13121
13122           arch = next;
13123         }
13124       while (next != NULL);
13125       free (saved);
13126       break;
13127
13128     case OPTION_MTUNE:
13129       if (*arg == '.')
13130         as_fatal (_("invalid -mtune= option: `%s'"), arg);
13131       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13132         {
13133           if (strcmp (arg, cpu_arch [j].name) == 0)
13134             {
13135               cpu_arch_tune_set = 1;
13136               cpu_arch_tune = cpu_arch [j].type;
13137               cpu_arch_tune_flags = cpu_arch[j].flags;
13138               break;
13139             }
13140         }
13141       if (j >= ARRAY_SIZE (cpu_arch))
13142         as_fatal (_("invalid -mtune= option: `%s'"), arg);
13143       break;
13144
13145     case OPTION_MMNEMONIC:
13146       if (strcasecmp (arg, "att") == 0)
13147         intel_mnemonic = 0;
13148       else if (strcasecmp (arg, "intel") == 0)
13149         intel_mnemonic = 1;
13150       else
13151         as_fatal (_("invalid -mmnemonic= option: `%s'"), arg);
13152       break;
13153
13154     case OPTION_MSYNTAX:
13155       if (strcasecmp (arg, "att") == 0)
13156         intel_syntax = 0;
13157       else if (strcasecmp (arg, "intel") == 0)
13158         intel_syntax = 1;
13159       else
13160         as_fatal (_("invalid -msyntax= option: `%s'"), arg);
13161       break;
13162
13163     case OPTION_MINDEX_REG:
13164       allow_index_reg = 1;
13165       break;
13166
13167     case OPTION_MNAKED_REG:
13168       allow_naked_reg = 1;
13169       break;
13170
13171     case OPTION_MSSE2AVX:
13172       sse2avx = 1;
13173       break;
13174
13175     case OPTION_MSSE_CHECK:
13176       if (strcasecmp (arg, "error") == 0)
13177         sse_check = check_error;
13178       else if (strcasecmp (arg, "warning") == 0)
13179         sse_check = check_warning;
13180       else if (strcasecmp (arg, "none") == 0)
13181         sse_check = check_none;
13182       else
13183         as_fatal (_("invalid -msse-check= option: `%s'"), arg);
13184       break;
13185
13186     case OPTION_MOPERAND_CHECK:
13187       if (strcasecmp (arg, "error") == 0)
13188         operand_check = check_error;
13189       else if (strcasecmp (arg, "warning") == 0)
13190         operand_check = check_warning;
13191       else if (strcasecmp (arg, "none") == 0)
13192         operand_check = check_none;
13193       else
13194         as_fatal (_("invalid -moperand-check= option: `%s'"), arg);
13195       break;
13196
13197     case OPTION_MAVXSCALAR:
13198       if (strcasecmp (arg, "128") == 0)
13199         avxscalar = vex128;
13200       else if (strcasecmp (arg, "256") == 0)
13201         avxscalar = vex256;
13202       else
13203         as_fatal (_("invalid -mavxscalar= option: `%s'"), arg);
13204       break;
13205
13206     case OPTION_MVEXWIG:
13207       if (strcmp (arg, "0") == 0)
13208         vexwig = vexw0;
13209       else if (strcmp (arg, "1") == 0)
13210         vexwig = vexw1;
13211       else
13212         as_fatal (_("invalid -mvexwig= option: `%s'"), arg);
13213       break;
13214
13215     case OPTION_MADD_BND_PREFIX:
13216       add_bnd_prefix = 1;
13217       break;
13218
13219     case OPTION_MEVEXLIG:
13220       if (strcmp (arg, "128") == 0)
13221         evexlig = evexl128;
13222       else if (strcmp (arg, "256") == 0)
13223         evexlig = evexl256;
13224       else  if (strcmp (arg, "512") == 0)
13225         evexlig = evexl512;
13226       else
13227         as_fatal (_("invalid -mevexlig= option: `%s'"), arg);
13228       break;
13229
13230     case OPTION_MEVEXRCIG:
13231       if (strcmp (arg, "rne") == 0)
13232         evexrcig = rne;
13233       else if (strcmp (arg, "rd") == 0)
13234         evexrcig = rd;
13235       else if (strcmp (arg, "ru") == 0)
13236         evexrcig = ru;
13237       else if (strcmp (arg, "rz") == 0)
13238         evexrcig = rz;
13239       else
13240         as_fatal (_("invalid -mevexrcig= option: `%s'"), arg);
13241       break;
13242
13243     case OPTION_MEVEXWIG:
13244       if (strcmp (arg, "0") == 0)
13245         evexwig = evexw0;
13246       else if (strcmp (arg, "1") == 0)
13247         evexwig = evexw1;
13248       else
13249         as_fatal (_("invalid -mevexwig= option: `%s'"), arg);
13250       break;
13251
13252 # if defined (TE_PE) || defined (TE_PEP)
13253     case OPTION_MBIG_OBJ:
13254       use_big_obj = 1;
13255       break;
13256 #endif
13257
13258     case OPTION_MOMIT_LOCK_PREFIX:
13259       if (strcasecmp (arg, "yes") == 0)
13260         omit_lock_prefix = 1;
13261       else if (strcasecmp (arg, "no") == 0)
13262         omit_lock_prefix = 0;
13263       else
13264         as_fatal (_("invalid -momit-lock-prefix= option: `%s'"), arg);
13265       break;
13266
13267     case OPTION_MFENCE_AS_LOCK_ADD:
13268       if (strcasecmp (arg, "yes") == 0)
13269         avoid_fence = 1;
13270       else if (strcasecmp (arg, "no") == 0)
13271         avoid_fence = 0;
13272       else
13273         as_fatal (_("invalid -mfence-as-lock-add= option: `%s'"), arg);
13274       break;
13275
13276     case OPTION_MLFENCE_AFTER_LOAD:
13277       if (strcasecmp (arg, "yes") == 0)
13278         lfence_after_load = 1;
13279       else if (strcasecmp (arg, "no") == 0)
13280         lfence_after_load = 0;
13281       else
13282         as_fatal (_("invalid -mlfence-after-load= option: `%s'"), arg);
13283       break;
13284
13285     case OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH:
13286       if (strcasecmp (arg, "all") == 0)
13287         {
13288           lfence_before_indirect_branch = lfence_branch_all;
13289           if (lfence_before_ret == lfence_before_ret_none)
13290             lfence_before_ret = lfence_before_ret_shl;
13291         }
13292       else if (strcasecmp (arg, "memory") == 0)
13293         lfence_before_indirect_branch = lfence_branch_memory;
13294       else if (strcasecmp (arg, "register") == 0)
13295         lfence_before_indirect_branch = lfence_branch_register;
13296       else if (strcasecmp (arg, "none") == 0)
13297         lfence_before_indirect_branch = lfence_branch_none;
13298       else
13299         as_fatal (_("invalid -mlfence-before-indirect-branch= option: `%s'"),
13300                   arg);
13301       break;
13302
13303     case OPTION_MLFENCE_BEFORE_RET:
13304       if (strcasecmp (arg, "or") == 0)
13305         lfence_before_ret = lfence_before_ret_or;
13306       else if (strcasecmp (arg, "not") == 0)
13307         lfence_before_ret = lfence_before_ret_not;
13308       else if (strcasecmp (arg, "shl") == 0 || strcasecmp (arg, "yes") == 0)
13309         lfence_before_ret = lfence_before_ret_shl;
13310       else if (strcasecmp (arg, "none") == 0)
13311         lfence_before_ret = lfence_before_ret_none;
13312       else
13313         as_fatal (_("invalid -mlfence-before-ret= option: `%s'"),
13314                   arg);
13315       break;
13316
13317     case OPTION_MRELAX_RELOCATIONS:
13318       if (strcasecmp (arg, "yes") == 0)
13319         generate_relax_relocations = 1;
13320       else if (strcasecmp (arg, "no") == 0)
13321         generate_relax_relocations = 0;
13322       else
13323         as_fatal (_("invalid -mrelax-relocations= option: `%s'"), arg);
13324       break;
13325
13326     case OPTION_MALIGN_BRANCH_BOUNDARY:
13327       {
13328         char *end;
13329         long int align = strtoul (arg, &end, 0);
13330         if (*end == '\0')
13331           {
13332             if (align == 0)
13333               {
13334                 align_branch_power = 0;
13335                 break;
13336               }
13337             else if (align >= 16)
13338               {
13339                 int align_power;
13340                 for (align_power = 0;
13341                      (align & 1) == 0;
13342                      align >>= 1, align_power++)
13343                   continue;
13344                 /* Limit alignment power to 31.  */
13345                 if (align == 1 && align_power < 32)
13346                   {
13347                     align_branch_power = align_power;
13348                     break;
13349                   }
13350               }
13351           }
13352         as_fatal (_("invalid -malign-branch-boundary= value: %s"), arg);
13353       }
13354       break;
13355
13356     case OPTION_MALIGN_BRANCH_PREFIX_SIZE:
13357       {
13358         char *end;
13359         int align = strtoul (arg, &end, 0);
13360         /* Some processors only support 5 prefixes.  */
13361         if (*end == '\0' && align >= 0 && align < 6)
13362           {
13363             align_branch_prefix_size = align;
13364             break;
13365           }
13366         as_fatal (_("invalid -malign-branch-prefix-size= value: %s"),
13367                   arg);
13368       }
13369       break;
13370
13371     case OPTION_MALIGN_BRANCH:
13372       align_branch = 0;
13373       saved = xstrdup (arg);
13374       type = saved;
13375       do
13376         {
13377           next = strchr (type, '+');
13378           if (next)
13379             *next++ = '\0';
13380           if (strcasecmp (type, "jcc") == 0)
13381             align_branch |= align_branch_jcc_bit;
13382           else if (strcasecmp (type, "fused") == 0)
13383             align_branch |= align_branch_fused_bit;
13384           else if (strcasecmp (type, "jmp") == 0)
13385             align_branch |= align_branch_jmp_bit;
13386           else if (strcasecmp (type, "call") == 0)
13387             align_branch |= align_branch_call_bit;
13388           else if (strcasecmp (type, "ret") == 0)
13389             align_branch |= align_branch_ret_bit;
13390           else if (strcasecmp (type, "indirect") == 0)
13391             align_branch |= align_branch_indirect_bit;
13392           else
13393             as_fatal (_("invalid -malign-branch= option: `%s'"), arg);
13394           type = next;
13395         }
13396       while (next != NULL);
13397       free (saved);
13398       break;
13399
13400     case OPTION_MBRANCHES_WITH_32B_BOUNDARIES:
13401       align_branch_power = 5;
13402       align_branch_prefix_size = 5;
13403       align_branch = (align_branch_jcc_bit
13404                       | align_branch_fused_bit
13405                       | align_branch_jmp_bit);
13406       break;
13407
13408     case OPTION_MAMD64:
13409       isa64 = amd64;
13410       break;
13411
13412     case OPTION_MINTEL64:
13413       isa64 = intel64;
13414       break;
13415
13416     case 'O':
13417       if (arg == NULL)
13418         {
13419           optimize = 1;
13420           /* Turn off -Os.  */
13421           optimize_for_space = 0;
13422         }
13423       else if (*arg == 's')
13424         {
13425           optimize_for_space = 1;
13426           /* Turn on all encoding optimizations.  */
13427           optimize = INT_MAX;
13428         }
13429       else
13430         {
13431           optimize = atoi (arg);
13432           /* Turn off -Os.  */
13433           optimize_for_space = 0;
13434         }
13435       break;
13436
13437     default:
13438       return 0;
13439     }
13440   return 1;
13441 }
13442
13443 #define MESSAGE_TEMPLATE \
13444 "                                                                                "
13445
13446 static char *
13447 output_message (FILE *stream, char *p, char *message, char *start,
13448                 int *left_p, const char *name, int len)
13449 {
13450   int size = sizeof (MESSAGE_TEMPLATE);
13451   int left = *left_p;
13452
13453   /* Reserve 2 spaces for ", " or ",\0" */
13454   left -= len + 2;
13455
13456   /* Check if there is any room.  */
13457   if (left >= 0)
13458     {
13459       if (p != start)
13460         {
13461           *p++ = ',';
13462           *p++ = ' ';
13463         }
13464       p = mempcpy (p, name, len);
13465     }
13466   else
13467     {
13468       /* Output the current message now and start a new one.  */
13469       *p++ = ',';
13470       *p = '\0';
13471       fprintf (stream, "%s\n", message);
13472       p = start;
13473       left = size - (start - message) - len - 2;
13474
13475       gas_assert (left >= 0);
13476
13477       p = mempcpy (p, name, len);
13478     }
13479
13480   *left_p = left;
13481   return p;
13482 }
13483
13484 static void
13485 show_arch (FILE *stream, int ext, int check)
13486 {
13487   static char message[] = MESSAGE_TEMPLATE;
13488   char *start = message + 27;
13489   char *p;
13490   int size = sizeof (MESSAGE_TEMPLATE);
13491   int left;
13492   const char *name;
13493   int len;
13494   unsigned int j;
13495
13496   p = start;
13497   left = size - (start - message);
13498   for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13499     {
13500       /* Should it be skipped?  */
13501       if (cpu_arch [j].skip)
13502         continue;
13503
13504       name = cpu_arch [j].name;
13505       len = cpu_arch [j].len;
13506       if (*name == '.')
13507         {
13508           /* It is an extension.  Skip if we aren't asked to show it.  */
13509           if (ext)
13510             {
13511               name++;
13512               len--;
13513             }
13514           else
13515             continue;
13516         }
13517       else if (ext)
13518         {
13519           /* It is an processor.  Skip if we show only extension.  */
13520           continue;
13521         }
13522       else if (check && ! cpu_arch[j].flags.bitfield.cpui386)
13523         {
13524           /* It is an impossible processor - skip.  */
13525           continue;
13526         }
13527
13528       p = output_message (stream, p, message, start, &left, name, len);
13529     }
13530
13531   /* Display disabled extensions.  */
13532   if (ext)
13533     for (j = 0; j < ARRAY_SIZE (cpu_noarch); j++)
13534       {
13535         name = cpu_noarch [j].name;
13536         len = cpu_noarch [j].len;
13537         p = output_message (stream, p, message, start, &left, name,
13538                             len);
13539       }
13540
13541   *p = '\0';
13542   fprintf (stream, "%s\n", message);
13543 }
13544
13545 void
13546 md_show_usage (FILE *stream)
13547 {
13548 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13549   fprintf (stream, _("\
13550   -Qy, -Qn                ignored\n\
13551   -V                      print assembler version number\n\
13552   -k                      ignored\n"));
13553 #endif
13554   fprintf (stream, _("\
13555   -n                      Do not optimize code alignment\n\
13556   -q                      quieten some warnings\n"));
13557 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13558   fprintf (stream, _("\
13559   -s                      ignored\n"));
13560 #endif
13561 #if defined BFD64 && (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13562                       || defined (TE_PE) || defined (TE_PEP))
13563   fprintf (stream, _("\
13564   --32/--64/--x32         generate 32bit/64bit/x32 code\n"));
13565 #endif
13566 #ifdef SVR4_COMMENT_CHARS
13567   fprintf (stream, _("\
13568   --divide                do not treat `/' as a comment character\n"));
13569 #else
13570   fprintf (stream, _("\
13571   --divide                ignored\n"));
13572 #endif
13573   fprintf (stream, _("\
13574   -march=CPU[,+EXTENSION...]\n\
13575                           generate code for CPU and EXTENSION, CPU is one of:\n"));
13576   show_arch (stream, 0, 1);
13577   fprintf (stream, _("\
13578                           EXTENSION is combination of:\n"));
13579   show_arch (stream, 1, 0);
13580   fprintf (stream, _("\
13581   -mtune=CPU              optimize for CPU, CPU is one of:\n"));
13582   show_arch (stream, 0, 0);
13583   fprintf (stream, _("\
13584   -msse2avx               encode SSE instructions with VEX prefix\n"));
13585   fprintf (stream, _("\
13586   -msse-check=[none|error|warning] (default: warning)\n\
13587                           check SSE instructions\n"));
13588   fprintf (stream, _("\
13589   -moperand-check=[none|error|warning] (default: warning)\n\
13590                           check operand combinations for validity\n"));
13591   fprintf (stream, _("\
13592   -mavxscalar=[128|256] (default: 128)\n\
13593                           encode scalar AVX instructions with specific vector\n\
13594                            length\n"));
13595   fprintf (stream, _("\
13596   -mvexwig=[0|1] (default: 0)\n\
13597                           encode VEX instructions with specific VEX.W value\n\
13598                            for VEX.W bit ignored instructions\n"));
13599   fprintf (stream, _("\
13600   -mevexlig=[128|256|512] (default: 128)\n\
13601                           encode scalar EVEX instructions with specific vector\n\
13602                            length\n"));
13603   fprintf (stream, _("\
13604   -mevexwig=[0|1] (default: 0)\n\
13605                           encode EVEX instructions with specific EVEX.W value\n\
13606                            for EVEX.W bit ignored instructions\n"));
13607   fprintf (stream, _("\
13608   -mevexrcig=[rne|rd|ru|rz] (default: rne)\n\
13609                           encode EVEX instructions with specific EVEX.RC value\n\
13610                            for SAE-only ignored instructions\n"));
13611   fprintf (stream, _("\
13612   -mmnemonic=[att|intel] "));
13613   if (SYSV386_COMPAT)
13614     fprintf (stream, _("(default: att)\n"));
13615   else
13616     fprintf (stream, _("(default: intel)\n"));
13617   fprintf (stream, _("\
13618                           use AT&T/Intel mnemonic\n"));
13619   fprintf (stream, _("\
13620   -msyntax=[att|intel] (default: att)\n\
13621                           use AT&T/Intel syntax\n"));
13622   fprintf (stream, _("\
13623   -mindex-reg             support pseudo index registers\n"));
13624   fprintf (stream, _("\
13625   -mnaked-reg             don't require `%%' prefix for registers\n"));
13626   fprintf (stream, _("\
13627   -madd-bnd-prefix        add BND prefix for all valid branches\n"));
13628 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13629   fprintf (stream, _("\
13630   -mshared                disable branch optimization for shared code\n"));
13631   fprintf (stream, _("\
13632   -mx86-used-note=[no|yes] "));
13633   if (DEFAULT_X86_USED_NOTE)
13634     fprintf (stream, _("(default: yes)\n"));
13635   else
13636     fprintf (stream, _("(default: no)\n"));
13637   fprintf (stream, _("\
13638                           generate x86 used ISA and feature properties\n"));
13639 #endif
13640 #if defined (TE_PE) || defined (TE_PEP)
13641   fprintf (stream, _("\
13642   -mbig-obj               generate big object files\n"));
13643 #endif
13644   fprintf (stream, _("\
13645   -momit-lock-prefix=[no|yes] (default: no)\n\
13646                           strip all lock prefixes\n"));
13647   fprintf (stream, _("\
13648   -mfence-as-lock-add=[no|yes] (default: no)\n\
13649                           encode lfence, mfence and sfence as\n\
13650                            lock addl $0x0, (%%{re}sp)\n"));
13651   fprintf (stream, _("\
13652   -mrelax-relocations=[no|yes] "));
13653   if (DEFAULT_GENERATE_X86_RELAX_RELOCATIONS)
13654     fprintf (stream, _("(default: yes)\n"));
13655   else
13656     fprintf (stream, _("(default: no)\n"));
13657   fprintf (stream, _("\
13658                           generate relax relocations\n"));
13659   fprintf (stream, _("\
13660   -malign-branch-boundary=NUM (default: 0)\n\
13661                           align branches within NUM byte boundary\n"));
13662   fprintf (stream, _("\
13663   -malign-branch=TYPE[+TYPE...] (default: jcc+fused+jmp)\n\
13664                           TYPE is combination of jcc, fused, jmp, call, ret,\n\
13665                            indirect\n\
13666                           specify types of branches to align\n"));
13667   fprintf (stream, _("\
13668   -malign-branch-prefix-size=NUM (default: 5)\n\
13669                           align branches with NUM prefixes per instruction\n"));
13670   fprintf (stream, _("\
13671   -mbranches-within-32B-boundaries\n\
13672                           align branches within 32 byte boundary\n"));
13673   fprintf (stream, _("\
13674   -mlfence-after-load=[no|yes] (default: no)\n\
13675                           generate lfence after load\n"));
13676   fprintf (stream, _("\
13677   -mlfence-before-indirect-branch=[none|all|register|memory] (default: none)\n\
13678                           generate lfence before indirect near branch\n"));
13679   fprintf (stream, _("\
13680   -mlfence-before-ret=[none|or|not|shl|yes] (default: none)\n\
13681                           generate lfence before ret\n"));
13682   fprintf (stream, _("\
13683   -mamd64                 accept only AMD64 ISA [default]\n"));
13684   fprintf (stream, _("\
13685   -mintel64               accept only Intel64 ISA\n"));
13686 }
13687
13688 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
13689      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13690      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
13691
13692 /* Pick the target format to use.  */
13693
13694 const char *
13695 i386_target_format (void)
13696 {
13697   if (!strncmp (default_arch, "x86_64", 6))
13698     {
13699       update_code_flag (CODE_64BIT, 1);
13700       if (default_arch[6] == '\0')
13701         x86_elf_abi = X86_64_ABI;
13702       else
13703         x86_elf_abi = X86_64_X32_ABI;
13704     }
13705   else if (!strcmp (default_arch, "i386"))
13706     update_code_flag (CODE_32BIT, 1);
13707   else if (!strcmp (default_arch, "iamcu"))
13708     {
13709       update_code_flag (CODE_32BIT, 1);
13710       if (cpu_arch_isa == PROCESSOR_UNKNOWN)
13711         {
13712           static const i386_cpu_flags iamcu_flags = CPU_IAMCU_FLAGS;
13713           cpu_arch_name = "iamcu";
13714           cpu_sub_arch_name = NULL;
13715           cpu_arch_flags = iamcu_flags;
13716           cpu_arch_isa = PROCESSOR_IAMCU;
13717           cpu_arch_isa_flags = iamcu_flags;
13718           if (!cpu_arch_tune_set)
13719             {
13720               cpu_arch_tune = cpu_arch_isa;
13721               cpu_arch_tune_flags = cpu_arch_isa_flags;
13722             }
13723         }
13724       else if (cpu_arch_isa != PROCESSOR_IAMCU)
13725         as_fatal (_("Intel MCU doesn't support `%s' architecture"),
13726                   cpu_arch_name);
13727     }
13728   else
13729     as_fatal (_("unknown architecture"));
13730
13731   if (cpu_flags_all_zero (&cpu_arch_isa_flags))
13732     cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].flags;
13733   if (cpu_flags_all_zero (&cpu_arch_tune_flags))
13734     cpu_arch_tune_flags = cpu_arch[flag_code == CODE_64BIT].flags;
13735
13736   switch (OUTPUT_FLAVOR)
13737     {
13738 #if defined (OBJ_MAYBE_AOUT) || defined (OBJ_AOUT)
13739     case bfd_target_aout_flavour:
13740       return AOUT_TARGET_FORMAT;
13741 #endif
13742 #if defined (OBJ_MAYBE_COFF) || defined (OBJ_COFF)
13743 # if defined (TE_PE) || defined (TE_PEP)
13744     case bfd_target_coff_flavour:
13745       if (flag_code == CODE_64BIT)
13746         return use_big_obj ? "pe-bigobj-x86-64" : "pe-x86-64";
13747       else
13748         return use_big_obj ? "pe-bigobj-i386" : "pe-i386";
13749 # elif defined (TE_GO32)
13750     case bfd_target_coff_flavour:
13751       return "coff-go32";
13752 # else
13753     case bfd_target_coff_flavour:
13754       return "coff-i386";
13755 # endif
13756 #endif
13757 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
13758     case bfd_target_elf_flavour:
13759       {
13760         const char *format;
13761
13762         switch (x86_elf_abi)
13763           {
13764           default:
13765             format = ELF_TARGET_FORMAT;
13766 #ifndef TE_SOLARIS
13767             tls_get_addr = "___tls_get_addr";
13768 #endif
13769             break;
13770           case X86_64_ABI:
13771             use_rela_relocations = 1;
13772             object_64bit = 1;
13773 #ifndef TE_SOLARIS
13774             tls_get_addr = "__tls_get_addr";
13775 #endif
13776             format = ELF_TARGET_FORMAT64;
13777             break;
13778           case X86_64_X32_ABI:
13779             use_rela_relocations = 1;
13780             object_64bit = 1;
13781 #ifndef TE_SOLARIS
13782             tls_get_addr = "__tls_get_addr";
13783 #endif
13784             disallow_64bit_reloc = 1;
13785             format = ELF_TARGET_FORMAT32;
13786             break;
13787           }
13788         if (cpu_arch_isa == PROCESSOR_L1OM)
13789           {
13790             if (x86_elf_abi != X86_64_ABI)
13791               as_fatal (_("Intel L1OM is 64bit only"));
13792             return ELF_TARGET_L1OM_FORMAT;
13793           }
13794         else if (cpu_arch_isa == PROCESSOR_K1OM)
13795           {
13796             if (x86_elf_abi != X86_64_ABI)
13797               as_fatal (_("Intel K1OM is 64bit only"));
13798             return ELF_TARGET_K1OM_FORMAT;
13799           }
13800         else if (cpu_arch_isa == PROCESSOR_IAMCU)
13801           {
13802             if (x86_elf_abi != I386_ABI)
13803               as_fatal (_("Intel MCU is 32bit only"));
13804             return ELF_TARGET_IAMCU_FORMAT;
13805           }
13806         else
13807           return format;
13808       }
13809 #endif
13810 #if defined (OBJ_MACH_O)
13811     case bfd_target_mach_o_flavour:
13812       if (flag_code == CODE_64BIT)
13813         {
13814           use_rela_relocations = 1;
13815           object_64bit = 1;
13816           return "mach-o-x86-64";
13817         }
13818       else
13819         return "mach-o-i386";
13820 #endif
13821     default:
13822       abort ();
13823       return NULL;
13824     }
13825 }
13826
13827 #endif /* OBJ_MAYBE_ more than one  */
13828 \f
13829 symbolS *
13830 md_undefined_symbol (char *name)
13831 {
13832   if (name[0] == GLOBAL_OFFSET_TABLE_NAME[0]
13833       && name[1] == GLOBAL_OFFSET_TABLE_NAME[1]
13834       && name[2] == GLOBAL_OFFSET_TABLE_NAME[2]
13835       && strcmp (name, GLOBAL_OFFSET_TABLE_NAME) == 0)
13836     {
13837       if (!GOT_symbol)
13838         {
13839           if (symbol_find (name))
13840             as_bad (_("GOT already in symbol table"));
13841           GOT_symbol = symbol_new (name, undefined_section,
13842                                    &zero_address_frag, 0);
13843         };
13844       return GOT_symbol;
13845     }
13846   return 0;
13847 }
13848
13849 /* Round up a section size to the appropriate boundary.  */
13850
13851 valueT
13852 md_section_align (segT segment ATTRIBUTE_UNUSED, valueT size)
13853 {
13854 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
13855   if (OUTPUT_FLAVOR == bfd_target_aout_flavour)
13856     {
13857       /* For a.out, force the section size to be aligned.  If we don't do
13858          this, BFD will align it for us, but it will not write out the
13859          final bytes of the section.  This may be a bug in BFD, but it is
13860          easier to fix it here since that is how the other a.out targets
13861          work.  */
13862       int align;
13863
13864       align = bfd_section_alignment (segment);
13865       size = ((size + (1 << align) - 1) & (-((valueT) 1 << align)));
13866     }
13867 #endif
13868
13869   return size;
13870 }
13871
13872 /* On the i386, PC-relative offsets are relative to the start of the
13873    next instruction.  That is, the address of the offset, plus its
13874    size, since the offset is always the last part of the insn.  */
13875
13876 long
13877 md_pcrel_from (fixS *fixP)
13878 {
13879   return fixP->fx_size + fixP->fx_where + fixP->fx_frag->fr_address;
13880 }
13881
13882 #ifndef I386COFF
13883
13884 static void
13885 s_bss (int ignore ATTRIBUTE_UNUSED)
13886 {
13887   int temp;
13888
13889 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13890   if (IS_ELF)
13891     obj_elf_section_change_hook ();
13892 #endif
13893   temp = get_absolute_expression ();
13894   subseg_set (bss_section, (subsegT) temp);
13895   demand_empty_rest_of_line ();
13896 }
13897
13898 #endif
13899
13900 /* Remember constant directive.  */
13901
13902 void
13903 i386_cons_align (int ignore ATTRIBUTE_UNUSED)
13904 {
13905   if (last_insn.kind != last_insn_directive
13906       && (bfd_section_flags (now_seg) & SEC_CODE))
13907     {
13908       last_insn.seg = now_seg;
13909       last_insn.kind = last_insn_directive;
13910       last_insn.name = "constant directive";
13911       last_insn.file = as_where (&last_insn.line);
13912       if (lfence_before_ret != lfence_before_ret_none)
13913         {
13914           if (lfence_before_indirect_branch != lfence_branch_none)
13915             as_warn (_("constant directive skips -mlfence-before-ret "
13916                        "and -mlfence-before-indirect-branch"));
13917           else
13918             as_warn (_("constant directive skips -mlfence-before-ret"));
13919         }
13920       else if (lfence_before_indirect_branch != lfence_branch_none)
13921         as_warn (_("constant directive skips -mlfence-before-indirect-branch"));
13922     }
13923 }
13924
13925 void
13926 i386_validate_fix (fixS *fixp)
13927 {
13928   if (fixp->fx_subsy)
13929     {
13930       if (fixp->fx_subsy == GOT_symbol)
13931         {
13932           if (fixp->fx_r_type == BFD_RELOC_32_PCREL)
13933             {
13934               if (!object_64bit)
13935                 abort ();
13936 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13937               if (fixp->fx_tcbit2)
13938                 fixp->fx_r_type = (fixp->fx_tcbit
13939                                    ? BFD_RELOC_X86_64_REX_GOTPCRELX
13940                                    : BFD_RELOC_X86_64_GOTPCRELX);
13941               else
13942 #endif
13943                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTPCREL;
13944             }
13945           else
13946             {
13947               if (!object_64bit)
13948                 fixp->fx_r_type = BFD_RELOC_386_GOTOFF;
13949               else
13950                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTOFF64;
13951             }
13952           fixp->fx_subsy = 0;
13953         }
13954     }
13955 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13956   else
13957     {
13958       /* NB: Commit 292676c1 resolved PLT32 reloc aganst local symbol
13959          to section.  Since PLT32 relocation must be against symbols,
13960          turn such PLT32 relocation into PC32 relocation.  */
13961       if (fixp->fx_addsy
13962           && (fixp->fx_r_type == BFD_RELOC_386_PLT32
13963               || fixp->fx_r_type == BFD_RELOC_X86_64_PLT32)
13964           && symbol_section_p (fixp->fx_addsy))
13965         fixp->fx_r_type = BFD_RELOC_32_PCREL;
13966       if (!object_64bit)
13967         {
13968           if (fixp->fx_r_type == BFD_RELOC_386_GOT32
13969               && fixp->fx_tcbit2)
13970             fixp->fx_r_type = BFD_RELOC_386_GOT32X;
13971         }
13972     }
13973 #endif
13974 }
13975
13976 arelent *
13977 tc_gen_reloc (asection *section ATTRIBUTE_UNUSED, fixS *fixp)
13978 {
13979   arelent *rel;
13980   bfd_reloc_code_real_type code;
13981
13982   switch (fixp->fx_r_type)
13983     {
13984 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13985     case BFD_RELOC_SIZE32:
13986     case BFD_RELOC_SIZE64:
13987       if (S_IS_DEFINED (fixp->fx_addsy)
13988           && !S_IS_EXTERNAL (fixp->fx_addsy))
13989         {
13990           /* Resolve size relocation against local symbol to size of
13991              the symbol plus addend.  */
13992           valueT value = S_GET_SIZE (fixp->fx_addsy) + fixp->fx_offset;
13993           if (fixp->fx_r_type == BFD_RELOC_SIZE32
13994               && !fits_in_unsigned_long (value))
13995             as_bad_where (fixp->fx_file, fixp->fx_line,
13996                           _("symbol size computation overflow"));
13997           fixp->fx_addsy = NULL;
13998           fixp->fx_subsy = NULL;
13999           md_apply_fix (fixp, (valueT *) &value, NULL);
14000           return NULL;
14001         }
14002 #endif
14003       /* Fall through.  */
14004
14005     case BFD_RELOC_X86_64_PLT32:
14006     case BFD_RELOC_X86_64_GOT32:
14007     case BFD_RELOC_X86_64_GOTPCREL:
14008     case BFD_RELOC_X86_64_GOTPCRELX:
14009     case BFD_RELOC_X86_64_REX_GOTPCRELX:
14010     case BFD_RELOC_386_PLT32:
14011     case BFD_RELOC_386_GOT32:
14012     case BFD_RELOC_386_GOT32X:
14013     case BFD_RELOC_386_GOTOFF:
14014     case BFD_RELOC_386_GOTPC:
14015     case BFD_RELOC_386_TLS_GD:
14016     case BFD_RELOC_386_TLS_LDM:
14017     case BFD_RELOC_386_TLS_LDO_32:
14018     case BFD_RELOC_386_TLS_IE_32:
14019     case BFD_RELOC_386_TLS_IE:
14020     case BFD_RELOC_386_TLS_GOTIE:
14021     case BFD_RELOC_386_TLS_LE_32:
14022     case BFD_RELOC_386_TLS_LE:
14023     case BFD_RELOC_386_TLS_GOTDESC:
14024     case BFD_RELOC_386_TLS_DESC_CALL:
14025     case BFD_RELOC_X86_64_TLSGD:
14026     case BFD_RELOC_X86_64_TLSLD:
14027     case BFD_RELOC_X86_64_DTPOFF32:
14028     case BFD_RELOC_X86_64_DTPOFF64:
14029     case BFD_RELOC_X86_64_GOTTPOFF:
14030     case BFD_RELOC_X86_64_TPOFF32:
14031     case BFD_RELOC_X86_64_TPOFF64:
14032     case BFD_RELOC_X86_64_GOTOFF64:
14033     case BFD_RELOC_X86_64_GOTPC32:
14034     case BFD_RELOC_X86_64_GOT64:
14035     case BFD_RELOC_X86_64_GOTPCREL64:
14036     case BFD_RELOC_X86_64_GOTPC64:
14037     case BFD_RELOC_X86_64_GOTPLT64:
14038     case BFD_RELOC_X86_64_PLTOFF64:
14039     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
14040     case BFD_RELOC_X86_64_TLSDESC_CALL:
14041     case BFD_RELOC_RVA:
14042     case BFD_RELOC_VTABLE_ENTRY:
14043     case BFD_RELOC_VTABLE_INHERIT:
14044 #ifdef TE_PE
14045     case BFD_RELOC_32_SECREL:
14046 #endif
14047       code = fixp->fx_r_type;
14048       break;
14049     case BFD_RELOC_X86_64_32S:
14050       if (!fixp->fx_pcrel)
14051         {
14052           /* Don't turn BFD_RELOC_X86_64_32S into BFD_RELOC_32.  */
14053           code = fixp->fx_r_type;
14054           break;
14055         }
14056       /* Fall through.  */
14057     default:
14058       if (fixp->fx_pcrel)
14059         {
14060           switch (fixp->fx_size)
14061             {
14062             default:
14063               as_bad_where (fixp->fx_file, fixp->fx_line,
14064                             _("can not do %d byte pc-relative relocation"),
14065                             fixp->fx_size);
14066               code = BFD_RELOC_32_PCREL;
14067               break;
14068             case 1: code = BFD_RELOC_8_PCREL;  break;
14069             case 2: code = BFD_RELOC_16_PCREL; break;
14070             case 4: code = BFD_RELOC_32_PCREL; break;
14071 #ifdef BFD64
14072             case 8: code = BFD_RELOC_64_PCREL; break;
14073 #endif
14074             }
14075         }
14076       else
14077         {
14078           switch (fixp->fx_size)
14079             {
14080             default:
14081               as_bad_where (fixp->fx_file, fixp->fx_line,
14082                             _("can not do %d byte relocation"),
14083                             fixp->fx_size);
14084               code = BFD_RELOC_32;
14085               break;
14086             case 1: code = BFD_RELOC_8;  break;
14087             case 2: code = BFD_RELOC_16; break;
14088             case 4: code = BFD_RELOC_32; break;
14089 #ifdef BFD64
14090             case 8: code = BFD_RELOC_64; break;
14091 #endif
14092             }
14093         }
14094       break;
14095     }
14096
14097   if ((code == BFD_RELOC_32
14098        || code == BFD_RELOC_32_PCREL
14099        || code == BFD_RELOC_X86_64_32S)
14100       && GOT_symbol
14101       && fixp->fx_addsy == GOT_symbol)
14102     {
14103       if (!object_64bit)
14104         code = BFD_RELOC_386_GOTPC;
14105       else
14106         code = BFD_RELOC_X86_64_GOTPC32;
14107     }
14108   if ((code == BFD_RELOC_64 || code == BFD_RELOC_64_PCREL)
14109       && GOT_symbol
14110       && fixp->fx_addsy == GOT_symbol)
14111     {
14112       code = BFD_RELOC_X86_64_GOTPC64;
14113     }
14114
14115   rel = XNEW (arelent);
14116   rel->sym_ptr_ptr = XNEW (asymbol *);
14117   *rel->sym_ptr_ptr = symbol_get_bfdsym (fixp->fx_addsy);
14118
14119   rel->address = fixp->fx_frag->fr_address + fixp->fx_where;
14120
14121   if (!use_rela_relocations)
14122     {
14123       /* HACK: Since i386 ELF uses Rel instead of Rela, encode the
14124          vtable entry to be used in the relocation's section offset.  */
14125       if (fixp->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
14126         rel->address = fixp->fx_offset;
14127 #if defined (OBJ_COFF) && defined (TE_PE)
14128       else if (fixp->fx_addsy && S_IS_WEAK (fixp->fx_addsy))
14129         rel->addend = fixp->fx_addnumber - (S_GET_VALUE (fixp->fx_addsy) * 2);
14130       else
14131 #endif
14132       rel->addend = 0;
14133     }
14134   /* Use the rela in 64bit mode.  */
14135   else
14136     {
14137       if (disallow_64bit_reloc)
14138         switch (code)
14139           {
14140           case BFD_RELOC_X86_64_DTPOFF64:
14141           case BFD_RELOC_X86_64_TPOFF64:
14142           case BFD_RELOC_64_PCREL:
14143           case BFD_RELOC_X86_64_GOTOFF64:
14144           case BFD_RELOC_X86_64_GOT64:
14145           case BFD_RELOC_X86_64_GOTPCREL64:
14146           case BFD_RELOC_X86_64_GOTPC64:
14147           case BFD_RELOC_X86_64_GOTPLT64:
14148           case BFD_RELOC_X86_64_PLTOFF64:
14149             as_bad_where (fixp->fx_file, fixp->fx_line,
14150                           _("cannot represent relocation type %s in x32 mode"),
14151                           bfd_get_reloc_code_name (code));
14152             break;
14153           default:
14154             break;
14155           }
14156
14157       if (!fixp->fx_pcrel)
14158         rel->addend = fixp->fx_offset;
14159       else
14160         switch (code)
14161           {
14162           case BFD_RELOC_X86_64_PLT32:
14163           case BFD_RELOC_X86_64_GOT32:
14164           case BFD_RELOC_X86_64_GOTPCREL:
14165           case BFD_RELOC_X86_64_GOTPCRELX:
14166           case BFD_RELOC_X86_64_REX_GOTPCRELX:
14167           case BFD_RELOC_X86_64_TLSGD:
14168           case BFD_RELOC_X86_64_TLSLD:
14169           case BFD_RELOC_X86_64_GOTTPOFF:
14170           case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
14171           case BFD_RELOC_X86_64_TLSDESC_CALL:
14172             rel->addend = fixp->fx_offset - fixp->fx_size;
14173             break;
14174           default:
14175             rel->addend = (section->vma
14176                            - fixp->fx_size
14177                            + fixp->fx_addnumber
14178                            + md_pcrel_from (fixp));
14179             break;
14180           }
14181     }
14182
14183   rel->howto = bfd_reloc_type_lookup (stdoutput, code);
14184   if (rel->howto == NULL)
14185     {
14186       as_bad_where (fixp->fx_file, fixp->fx_line,
14187                     _("cannot represent relocation type %s"),
14188                     bfd_get_reloc_code_name (code));
14189       /* Set howto to a garbage value so that we can keep going.  */
14190       rel->howto = bfd_reloc_type_lookup (stdoutput, BFD_RELOC_32);
14191       gas_assert (rel->howto != NULL);
14192     }
14193
14194   return rel;
14195 }
14196
14197 #include "tc-i386-intel.c"
14198
14199 void
14200 tc_x86_parse_to_dw2regnum (expressionS *exp)
14201 {
14202   int saved_naked_reg;
14203   char saved_register_dot;
14204
14205   saved_naked_reg = allow_naked_reg;
14206   allow_naked_reg = 1;
14207   saved_register_dot = register_chars['.'];
14208   register_chars['.'] = '.';
14209   allow_pseudo_reg = 1;
14210   expression_and_evaluate (exp);
14211   allow_pseudo_reg = 0;
14212   register_chars['.'] = saved_register_dot;
14213   allow_naked_reg = saved_naked_reg;
14214
14215   if (exp->X_op == O_register && exp->X_add_number >= 0)
14216     {
14217       if ((addressT) exp->X_add_number < i386_regtab_size)
14218         {
14219           exp->X_op = O_constant;
14220           exp->X_add_number = i386_regtab[exp->X_add_number]
14221                               .dw2_regnum[flag_code >> 1];
14222         }
14223       else
14224         exp->X_op = O_illegal;
14225     }
14226 }
14227
14228 void
14229 tc_x86_frame_initial_instructions (void)
14230 {
14231   static unsigned int sp_regno[2];
14232
14233   if (!sp_regno[flag_code >> 1])
14234     {
14235       char *saved_input = input_line_pointer;
14236       char sp[][4] = {"esp", "rsp"};
14237       expressionS exp;
14238
14239       input_line_pointer = sp[flag_code >> 1];
14240       tc_x86_parse_to_dw2regnum (&exp);
14241       gas_assert (exp.X_op == O_constant);
14242       sp_regno[flag_code >> 1] = exp.X_add_number;
14243       input_line_pointer = saved_input;
14244     }
14245
14246   cfi_add_CFA_def_cfa (sp_regno[flag_code >> 1], -x86_cie_data_alignment);
14247   cfi_add_CFA_offset (x86_dwarf2_return_column, x86_cie_data_alignment);
14248 }
14249
14250 int
14251 x86_dwarf2_addr_size (void)
14252 {
14253 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
14254   if (x86_elf_abi == X86_64_X32_ABI)
14255     return 4;
14256 #endif
14257   return bfd_arch_bits_per_address (stdoutput) / 8;
14258 }
14259
14260 int
14261 i386_elf_section_type (const char *str, size_t len)
14262 {
14263   if (flag_code == CODE_64BIT
14264       && len == sizeof ("unwind") - 1
14265       && strncmp (str, "unwind", 6) == 0)
14266     return SHT_X86_64_UNWIND;
14267
14268   return -1;
14269 }
14270
14271 #ifdef TE_SOLARIS
14272 void
14273 i386_solaris_fix_up_eh_frame (segT sec)
14274 {
14275   if (flag_code == CODE_64BIT)
14276     elf_section_type (sec) = SHT_X86_64_UNWIND;
14277 }
14278 #endif
14279
14280 #ifdef TE_PE
14281 void
14282 tc_pe_dwarf2_emit_offset (symbolS *symbol, unsigned int size)
14283 {
14284   expressionS exp;
14285
14286   exp.X_op = O_secrel;
14287   exp.X_add_symbol = symbol;
14288   exp.X_add_number = 0;
14289   emit_expr (&exp, size);
14290 }
14291 #endif
14292
14293 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14294 /* For ELF on x86-64, add support for SHF_X86_64_LARGE.  */
14295
14296 bfd_vma
14297 x86_64_section_letter (int letter, const char **ptr_msg)
14298 {
14299   if (flag_code == CODE_64BIT)
14300     {
14301       if (letter == 'l')
14302         return SHF_X86_64_LARGE;
14303
14304       *ptr_msg = _("bad .section directive: want a,l,w,x,M,S,G,T in string");
14305     }
14306   else
14307     *ptr_msg = _("bad .section directive: want a,w,x,M,S,G,T in string");
14308   return -1;
14309 }
14310
14311 bfd_vma
14312 x86_64_section_word (char *str, size_t len)
14313 {
14314   if (len == 5 && flag_code == CODE_64BIT && CONST_STRNEQ (str, "large"))
14315     return SHF_X86_64_LARGE;
14316
14317   return -1;
14318 }
14319
14320 static void
14321 handle_large_common (int small ATTRIBUTE_UNUSED)
14322 {
14323   if (flag_code != CODE_64BIT)
14324     {
14325       s_comm_internal (0, elf_common_parse);
14326       as_warn (_(".largecomm supported only in 64bit mode, producing .comm"));
14327     }
14328   else
14329     {
14330       static segT lbss_section;
14331       asection *saved_com_section_ptr = elf_com_section_ptr;
14332       asection *saved_bss_section = bss_section;
14333
14334       if (lbss_section == NULL)
14335         {
14336           flagword applicable;
14337           segT seg = now_seg;
14338           subsegT subseg = now_subseg;
14339
14340           /* The .lbss section is for local .largecomm symbols.  */
14341           lbss_section = subseg_new (".lbss", 0);
14342           applicable = bfd_applicable_section_flags (stdoutput);
14343           bfd_set_section_flags (lbss_section, applicable & SEC_ALLOC);
14344           seg_info (lbss_section)->bss = 1;
14345
14346           subseg_set (seg, subseg);
14347         }
14348
14349       elf_com_section_ptr = &_bfd_elf_large_com_section;
14350       bss_section = lbss_section;
14351
14352       s_comm_internal (0, elf_common_parse);
14353
14354       elf_com_section_ptr = saved_com_section_ptr;
14355       bss_section = saved_bss_section;
14356     }
14357 }
14358 #endif /* OBJ_ELF || OBJ_MAYBE_ELF */