gas/config/tc-i386.c

   1 /* tc-i386.c -- Assemble code for the Intel 80386
   2    Copyright (C) 1989-2020 Free Software Foundation, Inc.
   3
   4    This file is part of GAS, the GNU Assembler.
   5
   6    GAS is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GAS is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GAS; see the file COPYING.  If not, write to the Free
  18    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
  19    02110-1301, USA.  */
  20
  21 /* Intel 80386 machine specific gas.
  22    Written by Eliot Dresselhaus (eliot@mgm.mit.edu).
  23    x86_64 support by Jan Hubicka (jh@suse.cz)
  24    VIA PadLock support by Michal Ludvig (mludvig@suse.cz)
  25    Bugs & suggestions are completely welcome.  This is free software.
  26    Please help us make it better.  */
  27
  28 #include "as.h"
  29 #include "safe-ctype.h"
  30 #include "subsegs.h"
  31 #include "dwarf2dbg.h"
  32 #include "dw2gencfi.h"
  33 #include "elf/x86-64.h"
  34 #include "opcodes/i386-init.h"
  35
  36 #ifdef HAVE_LIMITS_H
  37 #include <limits.h>
  38 #else
  39 #ifdef HAVE_SYS_PARAM_H
  40 #include <sys/param.h>
  41 #endif
  42 #ifndef INT_MAX
  43 #define INT_MAX (int) (((unsigned) (-1)) >> 1)
  44 #endif
  45 #endif
  46
  47 #ifndef INFER_ADDR_PREFIX
  48 #define INFER_ADDR_PREFIX 1
  49 #endif
  50
  51 #ifndef DEFAULT_ARCH
  52 #define DEFAULT_ARCH "i386"
  53 #endif
  54
  55 #ifndef INLINE
  56 #if __GNUC__ >= 2
  57 #define INLINE __inline__
  58 #else
  59 #define INLINE
  60 #endif
  61 #endif
  62
  63 /* Prefixes will be emitted in the order defined below.
  64    WAIT_PREFIX must be the first prefix since FWAIT is really is an
  65    instruction, and so must come before any prefixes.
  66    The preferred prefix order is SEG_PREFIX, ADDR_PREFIX, DATA_PREFIX,
  67    REP_PREFIX/HLE_PREFIX, LOCK_PREFIX.  */
  68 #define WAIT_PREFIX     0
  69 #define SEG_PREFIX      1
  70 #define ADDR_PREFIX     2
  71 #define DATA_PREFIX     3
  72 #define REP_PREFIX      4
  73 #define HLE_PREFIX      REP_PREFIX
  74 #define BND_PREFIX      REP_PREFIX
  75 #define LOCK_PREFIX     5
  76 #define REX_PREFIX      6       /* must come last.  */
  77 #define MAX_PREFIXES    7       /* max prefixes per opcode */
  78
  79 /* we define the syntax here (modulo base,index,scale syntax) */
  80 #define REGISTER_PREFIX '%'
  81 #define IMMEDIATE_PREFIX '$'
  82 #define ABSOLUTE_PREFIX '*'
  83
  84 /* these are the instruction mnemonic suffixes in AT&T syntax or
  85    memory operand size in Intel syntax.  */
  86 #define WORD_MNEM_SUFFIX  'w'
  87 #define BYTE_MNEM_SUFFIX  'b'
  88 #define SHORT_MNEM_SUFFIX 's'
  89 #define LONG_MNEM_SUFFIX  'l'
  90 #define QWORD_MNEM_SUFFIX  'q'
  91 /* Intel Syntax.  Use a non-ascii letter since since it never appears
  92    in instructions.  */
  93 #define LONG_DOUBLE_MNEM_SUFFIX '\1'
  94
  95 #define END_OF_INSN '\0'
  96
  97 /* This matches the C -> StaticRounding alias in the opcode table.  */
  98 #define commutative staticrounding
  99
 100 /*
 101   'templates' is for grouping together 'template' structures for opcodes
 102   of the same name.  This is only used for storing the insns in the grand
 103   ole hash table of insns.
 104   The templates themselves start at START and range up to (but not including)
 105   END.
 106   */
 107 typedef struct
 108 {
 109   const insn_template *start;
 110   const insn_template *end;
 111 }
 112 templates;
 113
 114 /* 386 operand encoding bytes:  see 386 book for details of this.  */
 115 typedef struct
 116 {
 117   unsigned int regmem;  /* codes register or memory operand */
 118   unsigned int reg;     /* codes register operand (or extended opcode) */
 119   unsigned int mode;    /* how to interpret regmem & reg */
 120 }
 121 modrm_byte;
 122
 123 /* x86-64 extension prefix.  */
 124 typedef int rex_byte;
 125
 126 /* 386 opcode byte to code indirect addressing.  */
 127 typedef struct
 128 {
 129   unsigned base;
 130   unsigned index;
 131   unsigned scale;
 132 }
 133 sib_byte;
 134
 135 /* x86 arch names, types and features */
 136 typedef struct
 137 {
 138   const char *name;             /* arch name */
 139   unsigned int len;             /* arch string length */
 140   enum processor_type type;     /* arch type */
 141   i386_cpu_flags flags;         /* cpu feature flags */
 142   unsigned int skip;            /* show_arch should skip this. */
 143 }
 144 arch_entry;
 145
 146 /* Used to turn off indicated flags.  */
 147 typedef struct
 148 {
 149   const char *name;             /* arch name */
 150   unsigned int len;             /* arch string length */
 151   i386_cpu_flags flags;         /* cpu feature flags */
 152 }
 153 noarch_entry;
 154
 155 static void update_code_flag (int, int);
 156 static void set_code_flag (int);
 157 static void set_16bit_gcc_code_flag (int);
 158 static void set_intel_syntax (int);
 159 static void set_intel_mnemonic (int);
 160 static void set_allow_index_reg (int);
 161 static void set_check (int);
 162 static void set_cpu_arch (int);
 163 #ifdef TE_PE
 164 static void pe_directive_secrel (int);
 165 #endif
 166 static void signed_cons (int);
 167 static char *output_invalid (int c);
 168 static int i386_finalize_immediate (segT, expressionS *, i386_operand_type,
 169                                     const char *);
 170 static int i386_finalize_displacement (segT, expressionS *, i386_operand_type,
 171                                        const char *);
 172 static int i386_att_operand (char *);
 173 static int i386_intel_operand (char *, int);
 174 static int i386_intel_simplify (expressionS *);
 175 static int i386_intel_parse_name (const char *, expressionS *);
 176 static const reg_entry *parse_register (char *, char **);
 177 static char *parse_insn (char *, char *);
 178 static char *parse_operands (char *, const char *);
 179 static void swap_operands (void);
 180 static void swap_2_operands (int, int);
 181 static enum flag_code i386_addressing_mode (void);
 182 static void optimize_imm (void);
 183 static void optimize_disp (void);
 184 static const insn_template *match_template (char);
 185 static int check_string (void);
 186 static int process_suffix (void);
 187 static int check_byte_reg (void);
 188 static int check_long_reg (void);
 189 static int check_qword_reg (void);
 190 static int check_word_reg (void);
 191 static int finalize_imm (void);
 192 static int process_operands (void);
 193 static const seg_entry *build_modrm_byte (void);
 194 static void output_insn (void);
 195 static void output_imm (fragS *, offsetT);
 196 static void output_disp (fragS *, offsetT);
 197 #ifndef I386COFF
 198 static void s_bss (int);
 199 #endif
 200 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 201 static void handle_large_common (int small ATTRIBUTE_UNUSED);
 202
 203 /* GNU_PROPERTY_X86_ISA_1_USED.  */
 204 static unsigned int x86_isa_1_used;
 205 /* GNU_PROPERTY_X86_FEATURE_2_USED.  */
 206 static unsigned int x86_feature_2_used;
 207 /* Generate x86 used ISA and feature properties.  */
 208 static unsigned int x86_used_note = DEFAULT_X86_USED_NOTE;
 209 #endif
 210
 211 static const char *default_arch = DEFAULT_ARCH;
 212
 213 /* parse_register() returns this when a register alias cannot be used.  */
 214 static const reg_entry bad_reg = { "<bad>", OPERAND_TYPE_NONE, 0, 0,
 215                                    { Dw2Inval, Dw2Inval } };
 216
 217 /* This struct describes rounding control and SAE in the instruction.  */
 218 struct RC_Operation
 219 {
 220   enum rc_type
 221     {
 222       rne = 0,
 223       rd,
 224       ru,
 225       rz,
 226       saeonly
 227     } type;
 228   int operand;
 229 };
 230
 231 static struct RC_Operation rc_op;
 232
 233 /* The struct describes masking, applied to OPERAND in the instruction.
 234    MASK is a pointer to the corresponding mask register.  ZEROING tells
 235    whether merging or zeroing mask is used.  */
 236 struct Mask_Operation
 237 {
 238   const reg_entry *mask;
 239   unsigned int zeroing;
 240   /* The operand where this operation is associated.  */
 241   int operand;
 242 };
 243
 244 static struct Mask_Operation mask_op;
 245
 246 /* The struct describes broadcasting, applied to OPERAND.  FACTOR is
 247    broadcast factor.  */
 248 struct Broadcast_Operation
 249 {
 250   /* Type of broadcast: {1to2}, {1to4}, {1to8}, or {1to16}.  */
 251   int type;
 252
 253   /* Index of broadcasted operand.  */
 254   int operand;
 255
 256   /* Number of bytes to broadcast.  */
 257   int bytes;
 258 };
 259
 260 static struct Broadcast_Operation broadcast_op;
 261
 262 /* VEX prefix.  */
 263 typedef struct
 264 {
 265   /* VEX prefix is either 2 byte or 3 byte.  EVEX is 4 byte.  */
 266   unsigned char bytes[4];
 267   unsigned int length;
 268   /* Destination or source register specifier.  */
 269   const reg_entry *register_specifier;
 270 } vex_prefix;
 271
 272 /* 'md_assemble ()' gathers together information and puts it into a
 273    i386_insn.  */
 274
 275 union i386_op
 276   {
 277     expressionS *disps;
 278     expressionS *imms;
 279     const reg_entry *regs;
 280   };
 281
 282 enum i386_error
 283   {
 284     operand_size_mismatch,
 285     operand_type_mismatch,
 286     register_type_mismatch,
 287     number_of_operands_mismatch,
 288     invalid_instruction_suffix,
 289     bad_imm4,
 290     unsupported_with_intel_mnemonic,
 291     unsupported_syntax,
 292     unsupported,
 293     invalid_sib_address,
 294     invalid_vsib_address,
 295     invalid_vector_register_set,
 296     invalid_tmm_register_set,
 297     unsupported_vector_index_register,
 298     unsupported_broadcast,
 299     broadcast_needed,
 300     unsupported_masking,
 301     mask_not_on_destination,
 302     no_default_mask,
 303     unsupported_rc_sae,
 304     rc_sae_operand_not_last_imm,
 305     invalid_register_operand,
 306   };
 307
 308 struct _i386_insn
 309   {
 310     /* TM holds the template for the insn were currently assembling.  */
 311     insn_template tm;
 312
 313     /* SUFFIX holds the instruction size suffix for byte, word, dword
 314        or qword, if given.  */
 315     char suffix;
 316
 317     /* OPERANDS gives the number of given operands.  */
 318     unsigned int operands;
 319
 320     /* REG_OPERANDS, DISP_OPERANDS, MEM_OPERANDS, IMM_OPERANDS give the number
 321        of given register, displacement, memory operands and immediate
 322        operands.  */
 323     unsigned int reg_operands, disp_operands, mem_operands, imm_operands;
 324
 325     /* TYPES [i] is the type (see above #defines) which tells us how to
 326        use OP[i] for the corresponding operand.  */
 327     i386_operand_type types[MAX_OPERANDS];
 328
 329     /* Displacement expression, immediate expression, or register for each
 330        operand.  */
 331     union i386_op op[MAX_OPERANDS];
 332
 333     /* Flags for operands.  */
 334     unsigned int flags[MAX_OPERANDS];
 335 #define Operand_PCrel 1
 336 #define Operand_Mem   2
 337
 338     /* Relocation type for operand */
 339     enum bfd_reloc_code_real reloc[MAX_OPERANDS];
 340
 341     /* BASE_REG, INDEX_REG, and LOG2_SCALE_FACTOR are used to encode
 342        the base index byte below.  */
 343     const reg_entry *base_reg;
 344     const reg_entry *index_reg;
 345     unsigned int log2_scale_factor;
 346
 347     /* SEG gives the seg_entries of this insn.  They are zero unless
 348        explicit segment overrides are given.  */
 349     const seg_entry *seg[2];
 350
 351     /* Copied first memory operand string, for re-checking.  */
 352     char *memop1_string;
 353
 354     /* PREFIX holds all the given prefix opcodes (usually null).
 355        PREFIXES is the number of prefix opcodes.  */
 356     unsigned int prefixes;
 357     unsigned char prefix[MAX_PREFIXES];
 358
 359     /* Register is in low 3 bits of opcode.  */
 360     bfd_boolean short_form;
 361
 362     /* The operand to a branch insn indicates an absolute branch.  */
 363     bfd_boolean jumpabsolute;
 364
 365     /* Extended states.  */
 366     enum
 367       {
 368         /* Use MMX state.  */
 369         xstate_mmx = 1 << 0,
 370         /* Use XMM state.  */
 371         xstate_xmm = 1 << 1,
 372         /* Use YMM state.  */
 373         xstate_ymm = 1 << 2 | xstate_xmm,
 374         /* Use ZMM state.  */
 375         xstate_zmm = 1 << 3 | xstate_ymm,
 376         /* Use TMM state.  */
 377         xstate_tmm = 1 << 4,
 378         /* Use MASK state.  */
 379         xstate_mask = 1 << 5
 380       } xstate;
 381
 382     /* Has GOTPC or TLS relocation.  */
 383     bfd_boolean has_gotpc_tls_reloc;
 384
 385     /* RM and SIB are the modrm byte and the sib byte where the
 386        addressing modes of this insn are encoded.  */
 387     modrm_byte rm;
 388     rex_byte rex;
 389     rex_byte vrex;
 390     sib_byte sib;
 391     vex_prefix vex;
 392
 393     /* Masking attributes.  */
 394     struct Mask_Operation *mask;
 395
 396     /* Rounding control and SAE attributes.  */
 397     struct RC_Operation *rounding;
 398
 399     /* Broadcasting attributes.  */
 400     struct Broadcast_Operation *broadcast;
 401
 402     /* Compressed disp8*N attribute.  */
 403     unsigned int memshift;
 404
 405     /* Prefer load or store in encoding.  */
 406     enum
 407       {
 408         dir_encoding_default = 0,
 409         dir_encoding_load,
 410         dir_encoding_store,
 411         dir_encoding_swap
 412       } dir_encoding;
 413
 414     /* Prefer 8bit, 16bit, 32bit displacement in encoding.  */
 415     enum
 416       {
 417         disp_encoding_default = 0,
 418         disp_encoding_8bit,
 419         disp_encoding_16bit,
 420         disp_encoding_32bit
 421       } disp_encoding;
 422
 423     /* Prefer the REX byte in encoding.  */
 424     bfd_boolean rex_encoding;
 425
 426     /* Disable instruction size optimization.  */
 427     bfd_boolean no_optimize;
 428
 429     /* How to encode vector instructions.  */
 430     enum
 431       {
 432         vex_encoding_default = 0,
 433         vex_encoding_vex,
 434         vex_encoding_vex3,
 435         vex_encoding_evex,
 436         vex_encoding_error
 437       } vec_encoding;
 438
 439     /* REP prefix.  */
 440     const char *rep_prefix;
 441
 442     /* HLE prefix.  */
 443     const char *hle_prefix;
 444
 445     /* Have BND prefix.  */
 446     const char *bnd_prefix;
 447
 448     /* Have NOTRACK prefix.  */
 449     const char *notrack_prefix;
 450
 451     /* Error message.  */
 452     enum i386_error error;
 453   };
 454
 455 typedef struct _i386_insn i386_insn;
 456
 457 /* Link RC type with corresponding string, that'll be looked for in
 458    asm.  */
 459 struct RC_name
 460 {
 461   enum rc_type type;
 462   const char *name;
 463   unsigned int len;
 464 };
 465
 466 static const struct RC_name RC_NamesTable[] =
 467 {
 468   {  rne, STRING_COMMA_LEN ("rn-sae") },
 469   {  rd,  STRING_COMMA_LEN ("rd-sae") },
 470   {  ru,  STRING_COMMA_LEN ("ru-sae") },
 471   {  rz,  STRING_COMMA_LEN ("rz-sae") },
 472   {  saeonly,  STRING_COMMA_LEN ("sae") },
 473 };
 474
 475 /* List of chars besides those in app.c:symbol_chars that can start an
 476    operand.  Used to prevent the scrubber eating vital white-space.  */
 477 const char extra_symbol_chars[] = "*%-([{}"
 478 #ifdef LEX_AT
 479         "@"
 480 #endif
 481 #ifdef LEX_QM
 482         "?"
 483 #endif
 484         ;
 485
 486 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF))     \
 487      && !defined (TE_GNU)                               \
 488      && !defined (TE_LINUX)                             \
 489      && !defined (TE_FreeBSD)                           \
 490      && !defined (TE_DragonFly)                         \
 491      && !defined (TE_NetBSD))
 492 /* This array holds the chars that always start a comment.  If the
 493    pre-processor is disabled, these aren't very useful.  The option
 494    --divide will remove '/' from this list.  */
 495 const char *i386_comment_chars = "#/";
 496 #define SVR4_COMMENT_CHARS 1
 497 #define PREFIX_SEPARATOR '\\'
 498
 499 #else
 500 const char *i386_comment_chars = "#";
 501 #define PREFIX_SEPARATOR '/'
 502 #endif
 503
 504 /* This array holds the chars that only start a comment at the beginning of
 505    a line.  If the line seems to have the form '# 123 filename'
 506    .line and .file directives will appear in the pre-processed output.
 507    Note that input_file.c hand checks for '#' at the beginning of the
 508    first line of the input file.  This is because the compiler outputs
 509    #NO_APP at the beginning of its output.
 510    Also note that comments started like this one will always work if
 511    '/' isn't otherwise defined.  */
 512 const char line_comment_chars[] = "#/";
 513
 514 const char line_separator_chars[] = ";";
 515
 516 /* Chars that can be used to separate mant from exp in floating point
 517    nums.  */
 518 const char EXP_CHARS[] = "eE";
 519
 520 /* Chars that mean this number is a floating point constant
 521    As in 0f12.456
 522    or    0d1.2345e12.  */
 523 const char FLT_CHARS[] = "fFdDxX";
 524
 525 /* Tables for lexical analysis.  */
 526 static char mnemonic_chars[256];
 527 static char register_chars[256];
 528 static char operand_chars[256];
 529 static char identifier_chars[256];
 530 static char digit_chars[256];
 531
 532 /* Lexical macros.  */
 533 #define is_mnemonic_char(x) (mnemonic_chars[(unsigned char) x])
 534 #define is_operand_char(x) (operand_chars[(unsigned char) x])
 535 #define is_register_char(x) (register_chars[(unsigned char) x])
 536 #define is_space_char(x) ((x) == ' ')
 537 #define is_identifier_char(x) (identifier_chars[(unsigned char) x])
 538 #define is_digit_char(x) (digit_chars[(unsigned char) x])
 539
 540 /* All non-digit non-letter characters that may occur in an operand.  */
 541 static char operand_special_chars[] = "%$-+(,)*._~/<>|&^!:[@]";
 542
 543 /* md_assemble() always leaves the strings it's passed unaltered.  To
 544    effect this we maintain a stack of saved characters that we've smashed
 545    with '\0's (indicating end of strings for various sub-fields of the
 546    assembler instruction).  */
 547 static char save_stack[32];
 548 static char *save_stack_p;
 549 #define END_STRING_AND_SAVE(s) \
 550         do { *save_stack_p++ = *(s); *(s) = '\0'; } while (0)
 551 #define RESTORE_END_STRING(s) \
 552         do { *(s) = *--save_stack_p; } while (0)
 553
 554 /* The instruction we're assembling.  */
 555 static i386_insn i;
 556
 557 /* Possible templates for current insn.  */
 558 static const templates *current_templates;
 559
 560 /* Per instruction expressionS buffers: max displacements & immediates.  */
 561 static expressionS disp_expressions[MAX_MEMORY_OPERANDS];
 562 static expressionS im_expressions[MAX_IMMEDIATE_OPERANDS];
 563
 564 /* Current operand we are working on.  */
 565 static int this_operand = -1;
 566
 567 /* We support four different modes.  FLAG_CODE variable is used to distinguish
 568    these.  */
 569
 570 enum flag_code {
 571         CODE_32BIT,
 572         CODE_16BIT,
 573         CODE_64BIT };
 574
 575 static enum flag_code flag_code;
 576 static unsigned int object_64bit;
 577 static unsigned int disallow_64bit_reloc;
 578 static int use_rela_relocations = 0;
 579 /* __tls_get_addr/___tls_get_addr symbol for TLS.  */
 580 static const char *tls_get_addr;
 581
 582 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
 583      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
 584      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
 585
 586 /* The ELF ABI to use.  */
 587 enum x86_elf_abi
 588 {
 589   I386_ABI,
 590   X86_64_ABI,
 591   X86_64_X32_ABI
 592 };
 593
 594 static enum x86_elf_abi x86_elf_abi = I386_ABI;
 595 #endif
 596
 597 #if defined (TE_PE) || defined (TE_PEP)
 598 /* Use big object file format.  */
 599 static int use_big_obj = 0;
 600 #endif
 601
 602 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 603 /* 1 if generating code for a shared library.  */
 604 static int shared = 0;
 605 #endif
 606
 607 /* 1 for intel syntax,
 608    0 if att syntax.  */
 609 static int intel_syntax = 0;
 610
 611 static enum x86_64_isa
 612 {
 613   amd64 = 1,    /* AMD64 ISA.  */
 614   intel64       /* Intel64 ISA.  */
 615 } isa64;
 616
 617 /* 1 for intel mnemonic,
 618    0 if att mnemonic.  */
 619 static int intel_mnemonic = !SYSV386_COMPAT;
 620
 621 /* 1 if pseudo registers are permitted.  */
 622 static int allow_pseudo_reg = 0;
 623
 624 /* 1 if register prefix % not required.  */
 625 static int allow_naked_reg = 0;
 626
 627 /* 1 if the assembler should add BND prefix for all control-transferring
 628    instructions supporting it, even if this prefix wasn't specified
 629    explicitly.  */
 630 static int add_bnd_prefix = 0;
 631
 632 /* 1 if pseudo index register, eiz/riz, is allowed .  */
 633 static int allow_index_reg = 0;
 634
 635 /* 1 if the assembler should ignore LOCK prefix, even if it was
 636    specified explicitly.  */
 637 static int omit_lock_prefix = 0;
 638
 639 /* 1 if the assembler should encode lfence, mfence, and sfence as
 640    "lock addl $0, (%{re}sp)".  */
 641 static int avoid_fence = 0;
 642
 643 /* 1 if lfence should be inserted after every load.  */
 644 static int lfence_after_load = 0;
 645
 646 /* Non-zero if lfence should be inserted before indirect branch.  */
 647 static enum lfence_before_indirect_branch_kind
 648   {
 649     lfence_branch_none = 0,
 650     lfence_branch_register,
 651     lfence_branch_memory,
 652     lfence_branch_all
 653   }
 654 lfence_before_indirect_branch;
 655
 656 /* Non-zero if lfence should be inserted before ret.  */
 657 static enum lfence_before_ret_kind
 658   {
 659     lfence_before_ret_none = 0,
 660     lfence_before_ret_not,
 661     lfence_before_ret_or,
 662     lfence_before_ret_shl
 663   }
 664 lfence_before_ret;
 665
 666 /* Types of previous instruction is .byte or prefix.  */
 667 static struct
 668   {
 669     segT seg;
 670     const char *file;
 671     const char *name;
 672     unsigned int line;
 673     enum last_insn_kind
 674       {
 675         last_insn_other = 0,
 676         last_insn_directive,
 677         last_insn_prefix
 678       } kind;
 679   } last_insn;
 680
 681 /* 1 if the assembler should generate relax relocations.  */
 682
 683 static int generate_relax_relocations
 684   = DEFAULT_GENERATE_X86_RELAX_RELOCATIONS;
 685
 686 static enum check_kind
 687   {
 688     check_none = 0,
 689     check_warning,
 690     check_error
 691   }
 692 sse_check, operand_check = check_warning;
 693
 694 /* Non-zero if branches should be aligned within power of 2 boundary.  */
 695 static int align_branch_power = 0;
 696
 697 /* Types of branches to align.  */
 698 enum align_branch_kind
 699   {
 700     align_branch_none = 0,
 701     align_branch_jcc = 1,
 702     align_branch_fused = 2,
 703     align_branch_jmp = 3,
 704     align_branch_call = 4,
 705     align_branch_indirect = 5,
 706     align_branch_ret = 6
 707   };
 708
 709 /* Type bits of branches to align.  */
 710 enum align_branch_bit
 711   {
 712     align_branch_jcc_bit = 1 << align_branch_jcc,
 713     align_branch_fused_bit = 1 << align_branch_fused,
 714     align_branch_jmp_bit = 1 << align_branch_jmp,
 715     align_branch_call_bit = 1 << align_branch_call,
 716     align_branch_indirect_bit = 1 << align_branch_indirect,
 717     align_branch_ret_bit = 1 << align_branch_ret
 718   };
 719
 720 static unsigned int align_branch = (align_branch_jcc_bit
 721                                     | align_branch_fused_bit
 722                                     | align_branch_jmp_bit);
 723
 724 /* Types of condition jump used by macro-fusion.  */
 725 enum mf_jcc_kind
 726   {
 727     mf_jcc_jo = 0,  /* base opcode 0x70  */
 728     mf_jcc_jc,      /* base opcode 0x72  */
 729     mf_jcc_je,      /* base opcode 0x74  */
 730     mf_jcc_jna,     /* base opcode 0x76  */
 731     mf_jcc_js,      /* base opcode 0x78  */
 732     mf_jcc_jp,      /* base opcode 0x7a  */
 733     mf_jcc_jl,      /* base opcode 0x7c  */
 734     mf_jcc_jle,     /* base opcode 0x7e  */
 735   };
 736
 737 /* Types of compare flag-modifying insntructions used by macro-fusion.  */
 738 enum mf_cmp_kind
 739   {
 740     mf_cmp_test_and,  /* test/cmp */
 741     mf_cmp_alu_cmp,  /* add/sub/cmp */
 742     mf_cmp_incdec  /* inc/dec */
 743   };
 744
 745 /* The maximum padding size for fused jcc.  CMP like instruction can
 746    be 9 bytes and jcc can be 6 bytes.  Leave room just in case for
 747    prefixes.   */
 748 #define MAX_FUSED_JCC_PADDING_SIZE 20
 749
 750 /* The maximum number of prefixes added for an instruction.  */
 751 static unsigned int align_branch_prefix_size = 5;
 752
 753 /* Optimization:
 754    1. Clear the REX_W bit with register operand if possible.
 755    2. Above plus use 128bit vector instruction to clear the full vector
 756       register.
 757  */
 758 static int optimize = 0;
 759
 760 /* Optimization:
 761    1. Clear the REX_W bit with register operand if possible.
 762    2. Above plus use 128bit vector instruction to clear the full vector
 763       register.
 764    3. Above plus optimize "test{q,l,w} $imm8,%r{64,32,16}" to
 765       "testb $imm7,%r8".
 766  */
 767 static int optimize_for_space = 0;
 768
 769 /* Register prefix used for error message.  */
 770 static const char *register_prefix = "%";
 771
 772 /* Used in 16 bit gcc mode to add an l suffix to call, ret, enter,
 773    leave, push, and pop instructions so that gcc has the same stack
 774    frame as in 32 bit mode.  */
 775 static char stackop_size = '\0';
 776
 777 /* Non-zero to optimize code alignment.  */
 778 int optimize_align_code = 1;
 779
 780 /* Non-zero to quieten some warnings.  */
 781 static int quiet_warnings = 0;
 782
 783 /* CPU name.  */
 784 static const char *cpu_arch_name = NULL;
 785 static char *cpu_sub_arch_name = NULL;
 786
 787 /* CPU feature flags.  */
 788 static i386_cpu_flags cpu_arch_flags = CPU_UNKNOWN_FLAGS;
 789
 790 /* If we have selected a cpu we are generating instructions for.  */
 791 static int cpu_arch_tune_set = 0;
 792
 793 /* Cpu we are generating instructions for.  */
 794 enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
 795
 796 /* CPU feature flags of cpu we are generating instructions for.  */
 797 static i386_cpu_flags cpu_arch_tune_flags;
 798
 799 /* CPU instruction set architecture used.  */
 800 enum processor_type cpu_arch_isa = PROCESSOR_UNKNOWN;
 801
 802 /* CPU feature flags of instruction set architecture used.  */
 803 i386_cpu_flags cpu_arch_isa_flags;
 804
 805 /* If set, conditional jumps are not automatically promoted to handle
 806    larger than a byte offset.  */
 807 static unsigned int no_cond_jump_promotion = 0;
 808
 809 /* Encode SSE instructions with VEX prefix.  */
 810 static unsigned int sse2avx;
 811
 812 /* Encode scalar AVX instructions with specific vector length.  */
 813 static enum
 814   {
 815     vex128 = 0,
 816     vex256
 817   } avxscalar;
 818
 819 /* Encode VEX WIG instructions with specific vex.w.  */
 820 static enum
 821   {
 822     vexw0 = 0,
 823     vexw1
 824   } vexwig;
 825
 826 /* Encode scalar EVEX LIG instructions with specific vector length.  */
 827 static enum
 828   {
 829     evexl128 = 0,
 830     evexl256,
 831     evexl512
 832   } evexlig;
 833
 834 /* Encode EVEX WIG instructions with specific evex.w.  */
 835 static enum
 836   {
 837     evexw0 = 0,
 838     evexw1
 839   } evexwig;
 840
 841 /* Value to encode in EVEX RC bits, for SAE-only instructions.  */
 842 static enum rc_type evexrcig = rne;
 843
 844 /* Pre-defined "_GLOBAL_OFFSET_TABLE_".  */
 845 static symbolS *GOT_symbol;
 846
 847 /* The dwarf2 return column, adjusted for 32 or 64 bit.  */
 848 unsigned int x86_dwarf2_return_column;
 849
 850 /* The dwarf2 data alignment, adjusted for 32 or 64 bit.  */
 851 int x86_cie_data_alignment;
 852
 853 /* Interface to relax_segment.
 854    There are 3 major relax states for 386 jump insns because the
 855    different types of jumps add different sizes to frags when we're
 856    figuring out what sort of jump to choose to reach a given label.
 857
 858    BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING are used to align
 859    branches which are handled by md_estimate_size_before_relax() and
 860    i386_generic_table_relax_frag().  */
 861
 862 /* Types.  */
 863 #define UNCOND_JUMP 0
 864 #define COND_JUMP 1
 865 #define COND_JUMP86 2
 866 #define BRANCH_PADDING 3
 867 #define BRANCH_PREFIX 4
 868 #define FUSED_JCC_PADDING 5
 869
 870 /* Sizes.  */
 871 #define CODE16  1
 872 #define SMALL   0
 873 #define SMALL16 (SMALL | CODE16)
 874 #define BIG     2
 875 #define BIG16   (BIG | CODE16)
 876
 877 #ifndef INLINE
 878 #ifdef __GNUC__
 879 #define INLINE __inline__
 880 #else
 881 #define INLINE
 882 #endif
 883 #endif
 884
 885 #define ENCODE_RELAX_STATE(type, size) \
 886   ((relax_substateT) (((type) << 2) | (size)))
 887 #define TYPE_FROM_RELAX_STATE(s) \
 888   ((s) >> 2)
 889 #define DISP_SIZE_FROM_RELAX_STATE(s) \
 890     ((((s) & 3) == BIG ? 4 : (((s) & 3) == BIG16 ? 2 : 1)))
 891
 892 /* This table is used by relax_frag to promote short jumps to long
 893    ones where necessary.  SMALL (short) jumps may be promoted to BIG
 894    (32 bit long) ones, and SMALL16 jumps to BIG16 (16 bit long).  We
 895    don't allow a short jump in a 32 bit code segment to be promoted to
 896    a 16 bit offset jump because it's slower (requires data size
 897    prefix), and doesn't work, unless the destination is in the bottom
 898    64k of the code segment (The top 16 bits of eip are zeroed).  */
 899
 900 const relax_typeS md_relax_table[] =
 901 {
 902   /* The fields are:
 903      1) most positive reach of this state,
 904      2) most negative reach of this state,
 905      3) how many bytes this mode will have in the variable part of the frag
 906      4) which index into the table to try if we can't fit into this one.  */
 907
 908   /* UNCOND_JUMP states.  */
 909   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG)},
 910   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16)},
 911   /* dword jmp adds 4 bytes to frag:
 912      0 extra opcode bytes, 4 displacement bytes.  */
 913   {0, 0, 4, 0},
 914   /* word jmp adds 2 byte2 to frag:
 915      0 extra opcode bytes, 2 displacement bytes.  */
 916   {0, 0, 2, 0},
 917
 918   /* COND_JUMP states.  */
 919   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG)},
 920   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG16)},
 921   /* dword conditionals adds 5 bytes to frag:
 922      1 extra opcode byte, 4 displacement bytes.  */
 923   {0, 0, 5, 0},
 924   /* word conditionals add 3 bytes to frag:
 925      1 extra opcode byte, 2 displacement bytes.  */
 926   {0, 0, 3, 0},
 927
 928   /* COND_JUMP86 states.  */
 929   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG)},
 930   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG16)},
 931   /* dword conditionals adds 5 bytes to frag:
 932      1 extra opcode byte, 4 displacement bytes.  */
 933   {0, 0, 5, 0},
 934   /* word conditionals add 4 bytes to frag:
 935      1 displacement byte and a 3 byte long branch insn.  */
 936   {0, 0, 4, 0}
 937 };
 938
 939 static const arch_entry cpu_arch[] =
 940 {
 941   /* Do not replace the first two entries - i386_target_format()
 942      relies on them being there in this order.  */
 943   { STRING_COMMA_LEN ("generic32"), PROCESSOR_GENERIC32,
 944     CPU_GENERIC32_FLAGS, 0 },
 945   { STRING_COMMA_LEN ("generic64"), PROCESSOR_GENERIC64,
 946     CPU_GENERIC64_FLAGS, 0 },
 947   { STRING_COMMA_LEN ("i8086"), PROCESSOR_UNKNOWN,
 948     CPU_NONE_FLAGS, 0 },
 949   { STRING_COMMA_LEN ("i186"), PROCESSOR_UNKNOWN,
 950     CPU_I186_FLAGS, 0 },
 951   { STRING_COMMA_LEN ("i286"), PROCESSOR_UNKNOWN,
 952     CPU_I286_FLAGS, 0 },
 953   { STRING_COMMA_LEN ("i386"), PROCESSOR_I386,
 954     CPU_I386_FLAGS, 0 },
 955   { STRING_COMMA_LEN ("i486"), PROCESSOR_I486,
 956     CPU_I486_FLAGS, 0 },
 957   { STRING_COMMA_LEN ("i586"), PROCESSOR_PENTIUM,
 958     CPU_I586_FLAGS, 0 },
 959   { STRING_COMMA_LEN ("i686"), PROCESSOR_PENTIUMPRO,
 960     CPU_I686_FLAGS, 0 },
 961   { STRING_COMMA_LEN ("pentium"), PROCESSOR_PENTIUM,
 962     CPU_I586_FLAGS, 0 },
 963   { STRING_COMMA_LEN ("pentiumpro"), PROCESSOR_PENTIUMPRO,
 964     CPU_PENTIUMPRO_FLAGS, 0 },
 965   { STRING_COMMA_LEN ("pentiumii"), PROCESSOR_PENTIUMPRO,
 966     CPU_P2_FLAGS, 0 },
 967   { STRING_COMMA_LEN ("pentiumiii"),PROCESSOR_PENTIUMPRO,
 968     CPU_P3_FLAGS, 0 },
 969   { STRING_COMMA_LEN ("pentium4"), PROCESSOR_PENTIUM4,
 970     CPU_P4_FLAGS, 0 },
 971   { STRING_COMMA_LEN ("prescott"), PROCESSOR_NOCONA,
 972     CPU_CORE_FLAGS, 0 },
 973   { STRING_COMMA_LEN ("nocona"), PROCESSOR_NOCONA,
 974     CPU_NOCONA_FLAGS, 0 },
 975   { STRING_COMMA_LEN ("yonah"), PROCESSOR_CORE,
 976     CPU_CORE_FLAGS, 1 },
 977   { STRING_COMMA_LEN ("core"), PROCESSOR_CORE,
 978     CPU_CORE_FLAGS, 0 },
 979   { STRING_COMMA_LEN ("merom"), PROCESSOR_CORE2,
 980     CPU_CORE2_FLAGS, 1 },
 981   { STRING_COMMA_LEN ("core2"), PROCESSOR_CORE2,
 982     CPU_CORE2_FLAGS, 0 },
 983   { STRING_COMMA_LEN ("corei7"), PROCESSOR_COREI7,
 984     CPU_COREI7_FLAGS, 0 },
 985   { STRING_COMMA_LEN ("l1om"), PROCESSOR_L1OM,
 986     CPU_L1OM_FLAGS, 0 },
 987   { STRING_COMMA_LEN ("k1om"), PROCESSOR_K1OM,
 988     CPU_K1OM_FLAGS, 0 },
 989   { STRING_COMMA_LEN ("iamcu"), PROCESSOR_IAMCU,
 990     CPU_IAMCU_FLAGS, 0 },
 991   { STRING_COMMA_LEN ("k6"), PROCESSOR_K6,
 992     CPU_K6_FLAGS, 0 },
 993   { STRING_COMMA_LEN ("k6_2"), PROCESSOR_K6,
 994     CPU_K6_2_FLAGS, 0 },
 995   { STRING_COMMA_LEN ("athlon"), PROCESSOR_ATHLON,
 996     CPU_ATHLON_FLAGS, 0 },
 997   { STRING_COMMA_LEN ("sledgehammer"), PROCESSOR_K8,
 998     CPU_K8_FLAGS, 1 },
 999   { STRING_COMMA_LEN ("opteron"), PROCESSOR_K8,
1000     CPU_K8_FLAGS, 0 },
1001   { STRING_COMMA_LEN ("k8"), PROCESSOR_K8,
1002     CPU_K8_FLAGS, 0 },
1003   { STRING_COMMA_LEN ("amdfam10"), PROCESSOR_AMDFAM10,
1004     CPU_AMDFAM10_FLAGS, 0 },
1005   { STRING_COMMA_LEN ("bdver1"), PROCESSOR_BD,
1006     CPU_BDVER1_FLAGS, 0 },
1007   { STRING_COMMA_LEN ("bdver2"), PROCESSOR_BD,
1008     CPU_BDVER2_FLAGS, 0 },
1009   { STRING_COMMA_LEN ("bdver3"), PROCESSOR_BD,
1010     CPU_BDVER3_FLAGS, 0 },
1011   { STRING_COMMA_LEN ("bdver4"), PROCESSOR_BD,
1012     CPU_BDVER4_FLAGS, 0 },
1013   { STRING_COMMA_LEN ("znver1"), PROCESSOR_ZNVER,
1014     CPU_ZNVER1_FLAGS, 0 },
1015   { STRING_COMMA_LEN ("znver2"), PROCESSOR_ZNVER,
1016     CPU_ZNVER2_FLAGS, 0 },
1017   { STRING_COMMA_LEN ("btver1"), PROCESSOR_BT,
1018     CPU_BTVER1_FLAGS, 0 },
1019   { STRING_COMMA_LEN ("btver2"), PROCESSOR_BT,
1020     CPU_BTVER2_FLAGS, 0 },
1021   { STRING_COMMA_LEN (".8087"), PROCESSOR_UNKNOWN,
1022     CPU_8087_FLAGS, 0 },
1023   { STRING_COMMA_LEN (".287"), PROCESSOR_UNKNOWN,
1024     CPU_287_FLAGS, 0 },
1025   { STRING_COMMA_LEN (".387"), PROCESSOR_UNKNOWN,
1026     CPU_387_FLAGS, 0 },
1027   { STRING_COMMA_LEN (".687"), PROCESSOR_UNKNOWN,
1028     CPU_687_FLAGS, 0 },
1029   { STRING_COMMA_LEN (".cmov"), PROCESSOR_UNKNOWN,
1030     CPU_CMOV_FLAGS, 0 },
1031   { STRING_COMMA_LEN (".fxsr"), PROCESSOR_UNKNOWN,
1032     CPU_FXSR_FLAGS, 0 },
1033   { STRING_COMMA_LEN (".mmx"), PROCESSOR_UNKNOWN,
1034     CPU_MMX_FLAGS, 0 },
1035   { STRING_COMMA_LEN (".sse"), PROCESSOR_UNKNOWN,
1036     CPU_SSE_FLAGS, 0 },
1037   { STRING_COMMA_LEN (".sse2"), PROCESSOR_UNKNOWN,
1038     CPU_SSE2_FLAGS, 0 },
1039   { STRING_COMMA_LEN (".sse3"), PROCESSOR_UNKNOWN,
1040     CPU_SSE3_FLAGS, 0 },
1041   { STRING_COMMA_LEN (".sse4a"), PROCESSOR_UNKNOWN,
1042     CPU_SSE4A_FLAGS, 0 },
1043   { STRING_COMMA_LEN (".ssse3"), PROCESSOR_UNKNOWN,
1044     CPU_SSSE3_FLAGS, 0 },
1045   { STRING_COMMA_LEN (".sse4.1"), PROCESSOR_UNKNOWN,
1046     CPU_SSE4_1_FLAGS, 0 },
1047   { STRING_COMMA_LEN (".sse4.2"), PROCESSOR_UNKNOWN,
1048     CPU_SSE4_2_FLAGS, 0 },
1049   { STRING_COMMA_LEN (".sse4"), PROCESSOR_UNKNOWN,
1050     CPU_SSE4_2_FLAGS, 0 },
1051   { STRING_COMMA_LEN (".avx"), PROCESSOR_UNKNOWN,
1052     CPU_AVX_FLAGS, 0 },
1053   { STRING_COMMA_LEN (".avx2"), PROCESSOR_UNKNOWN,
1054     CPU_AVX2_FLAGS, 0 },
1055   { STRING_COMMA_LEN (".avx512f"), PROCESSOR_UNKNOWN,
1056     CPU_AVX512F_FLAGS, 0 },
1057   { STRING_COMMA_LEN (".avx512cd"), PROCESSOR_UNKNOWN,
1058     CPU_AVX512CD_FLAGS, 0 },
1059   { STRING_COMMA_LEN (".avx512er"), PROCESSOR_UNKNOWN,
1060     CPU_AVX512ER_FLAGS, 0 },
1061   { STRING_COMMA_LEN (".avx512pf"), PROCESSOR_UNKNOWN,
1062     CPU_AVX512PF_FLAGS, 0 },
1063   { STRING_COMMA_LEN (".avx512dq"), PROCESSOR_UNKNOWN,
1064     CPU_AVX512DQ_FLAGS, 0 },
1065   { STRING_COMMA_LEN (".avx512bw"), PROCESSOR_UNKNOWN,
1066     CPU_AVX512BW_FLAGS, 0 },
1067   { STRING_COMMA_LEN (".avx512vl"), PROCESSOR_UNKNOWN,
1068     CPU_AVX512VL_FLAGS, 0 },
1069   { STRING_COMMA_LEN (".vmx"), PROCESSOR_UNKNOWN,
1070     CPU_VMX_FLAGS, 0 },
1071   { STRING_COMMA_LEN (".vmfunc"), PROCESSOR_UNKNOWN,
1072     CPU_VMFUNC_FLAGS, 0 },
1073   { STRING_COMMA_LEN (".smx"), PROCESSOR_UNKNOWN,
1074     CPU_SMX_FLAGS, 0 },
1075   { STRING_COMMA_LEN (".xsave"), PROCESSOR_UNKNOWN,
1076     CPU_XSAVE_FLAGS, 0 },
1077   { STRING_COMMA_LEN (".xsaveopt"), PROCESSOR_UNKNOWN,
1078     CPU_XSAVEOPT_FLAGS, 0 },
1079   { STRING_COMMA_LEN (".xsavec"), PROCESSOR_UNKNOWN,
1080     CPU_XSAVEC_FLAGS, 0 },
1081   { STRING_COMMA_LEN (".xsaves"), PROCESSOR_UNKNOWN,
1082     CPU_XSAVES_FLAGS, 0 },
1083   { STRING_COMMA_LEN (".aes"), PROCESSOR_UNKNOWN,
1084     CPU_AES_FLAGS, 0 },
1085   { STRING_COMMA_LEN (".pclmul"), PROCESSOR_UNKNOWN,
1086     CPU_PCLMUL_FLAGS, 0 },
1087   { STRING_COMMA_LEN (".clmul"), PROCESSOR_UNKNOWN,
1088     CPU_PCLMUL_FLAGS, 1 },
1089   { STRING_COMMA_LEN (".fsgsbase"), PROCESSOR_UNKNOWN,
1090     CPU_FSGSBASE_FLAGS, 0 },
1091   { STRING_COMMA_LEN (".rdrnd"), PROCESSOR_UNKNOWN,
1092     CPU_RDRND_FLAGS, 0 },
1093   { STRING_COMMA_LEN (".f16c"), PROCESSOR_UNKNOWN,
1094     CPU_F16C_FLAGS, 0 },
1095   { STRING_COMMA_LEN (".bmi2"), PROCESSOR_UNKNOWN,
1096     CPU_BMI2_FLAGS, 0 },
1097   { STRING_COMMA_LEN (".fma"), PROCESSOR_UNKNOWN,
1098     CPU_FMA_FLAGS, 0 },
1099   { STRING_COMMA_LEN (".fma4"), PROCESSOR_UNKNOWN,
1100     CPU_FMA4_FLAGS, 0 },
1101   { STRING_COMMA_LEN (".xop"), PROCESSOR_UNKNOWN,
1102     CPU_XOP_FLAGS, 0 },
1103   { STRING_COMMA_LEN (".lwp"), PROCESSOR_UNKNOWN,
1104     CPU_LWP_FLAGS, 0 },
1105   { STRING_COMMA_LEN (".movbe"), PROCESSOR_UNKNOWN,
1106     CPU_MOVBE_FLAGS, 0 },
1107   { STRING_COMMA_LEN (".cx16"), PROCESSOR_UNKNOWN,
1108     CPU_CX16_FLAGS, 0 },
1109   { STRING_COMMA_LEN (".ept"), PROCESSOR_UNKNOWN,
1110     CPU_EPT_FLAGS, 0 },
1111   { STRING_COMMA_LEN (".lzcnt"), PROCESSOR_UNKNOWN,
1112     CPU_LZCNT_FLAGS, 0 },
1113   { STRING_COMMA_LEN (".popcnt"), PROCESSOR_UNKNOWN,
1114     CPU_POPCNT_FLAGS, 0 },
1115   { STRING_COMMA_LEN (".hle"), PROCESSOR_UNKNOWN,
1116     CPU_HLE_FLAGS, 0 },
1117   { STRING_COMMA_LEN (".rtm"), PROCESSOR_UNKNOWN,
1118     CPU_RTM_FLAGS, 0 },
1119   { STRING_COMMA_LEN (".invpcid"), PROCESSOR_UNKNOWN,
1120     CPU_INVPCID_FLAGS, 0 },
1121   { STRING_COMMA_LEN (".clflush"), PROCESSOR_UNKNOWN,
1122     CPU_CLFLUSH_FLAGS, 0 },
1123   { STRING_COMMA_LEN (".nop"), PROCESSOR_UNKNOWN,
1124     CPU_NOP_FLAGS, 0 },
1125   { STRING_COMMA_LEN (".syscall"), PROCESSOR_UNKNOWN,
1126     CPU_SYSCALL_FLAGS, 0 },
1127   { STRING_COMMA_LEN (".rdtscp"), PROCESSOR_UNKNOWN,
1128     CPU_RDTSCP_FLAGS, 0 },
1129   { STRING_COMMA_LEN (".3dnow"), PROCESSOR_UNKNOWN,
1130     CPU_3DNOW_FLAGS, 0 },
1131   { STRING_COMMA_LEN (".3dnowa"), PROCESSOR_UNKNOWN,
1132     CPU_3DNOWA_FLAGS, 0 },
1133   { STRING_COMMA_LEN (".padlock"), PROCESSOR_UNKNOWN,
1134     CPU_PADLOCK_FLAGS, 0 },
1135   { STRING_COMMA_LEN (".pacifica"), PROCESSOR_UNKNOWN,
1136     CPU_SVME_FLAGS, 1 },
1137   { STRING_COMMA_LEN (".svme"), PROCESSOR_UNKNOWN,
1138     CPU_SVME_FLAGS, 0 },
1139   { STRING_COMMA_LEN (".sse4a"), PROCESSOR_UNKNOWN,
1140     CPU_SSE4A_FLAGS, 0 },
1141   { STRING_COMMA_LEN (".abm"), PROCESSOR_UNKNOWN,
1142     CPU_ABM_FLAGS, 0 },
1143   { STRING_COMMA_LEN (".bmi"), PROCESSOR_UNKNOWN,
1144     CPU_BMI_FLAGS, 0 },
1145   { STRING_COMMA_LEN (".tbm"), PROCESSOR_UNKNOWN,
1146     CPU_TBM_FLAGS, 0 },
1147   { STRING_COMMA_LEN (".adx"), PROCESSOR_UNKNOWN,
1148     CPU_ADX_FLAGS, 0 },
1149   { STRING_COMMA_LEN (".rdseed"), PROCESSOR_UNKNOWN,
1150     CPU_RDSEED_FLAGS, 0 },
1151   { STRING_COMMA_LEN (".prfchw"), PROCESSOR_UNKNOWN,
1152     CPU_PRFCHW_FLAGS, 0 },
1153   { STRING_COMMA_LEN (".smap"), PROCESSOR_UNKNOWN,
1154     CPU_SMAP_FLAGS, 0 },
1155   { STRING_COMMA_LEN (".mpx"), PROCESSOR_UNKNOWN,
1156     CPU_MPX_FLAGS, 0 },
1157   { STRING_COMMA_LEN (".sha"), PROCESSOR_UNKNOWN,
1158     CPU_SHA_FLAGS, 0 },
1159   { STRING_COMMA_LEN (".clflushopt"), PROCESSOR_UNKNOWN,
1160     CPU_CLFLUSHOPT_FLAGS, 0 },
1161   { STRING_COMMA_LEN (".prefetchwt1"), PROCESSOR_UNKNOWN,
1162     CPU_PREFETCHWT1_FLAGS, 0 },
1163   { STRING_COMMA_LEN (".se1"), PROCESSOR_UNKNOWN,
1164     CPU_SE1_FLAGS, 0 },
1165   { STRING_COMMA_LEN (".clwb"), PROCESSOR_UNKNOWN,
1166     CPU_CLWB_FLAGS, 0 },
1167   { STRING_COMMA_LEN (".avx512ifma"), PROCESSOR_UNKNOWN,
1168     CPU_AVX512IFMA_FLAGS, 0 },
1169   { STRING_COMMA_LEN (".avx512vbmi"), PROCESSOR_UNKNOWN,
1170     CPU_AVX512VBMI_FLAGS, 0 },
1171   { STRING_COMMA_LEN (".avx512_4fmaps"), PROCESSOR_UNKNOWN,
1172     CPU_AVX512_4FMAPS_FLAGS, 0 },
1173   { STRING_COMMA_LEN (".avx512_4vnniw"), PROCESSOR_UNKNOWN,
1174     CPU_AVX512_4VNNIW_FLAGS, 0 },
1175   { STRING_COMMA_LEN (".avx512_vpopcntdq"), PROCESSOR_UNKNOWN,
1176     CPU_AVX512_VPOPCNTDQ_FLAGS, 0 },
1177   { STRING_COMMA_LEN (".avx512_vbmi2"), PROCESSOR_UNKNOWN,
1178     CPU_AVX512_VBMI2_FLAGS, 0 },
1179   { STRING_COMMA_LEN (".avx512_vnni"), PROCESSOR_UNKNOWN,
1180     CPU_AVX512_VNNI_FLAGS, 0 },
1181   { STRING_COMMA_LEN (".avx512_bitalg"), PROCESSOR_UNKNOWN,
1182     CPU_AVX512_BITALG_FLAGS, 0 },
1183   { STRING_COMMA_LEN (".clzero"), PROCESSOR_UNKNOWN,
1184     CPU_CLZERO_FLAGS, 0 },
1185   { STRING_COMMA_LEN (".mwaitx"), PROCESSOR_UNKNOWN,
1186     CPU_MWAITX_FLAGS, 0 },
1187   { STRING_COMMA_LEN (".ospke"), PROCESSOR_UNKNOWN,
1188     CPU_OSPKE_FLAGS, 0 },
1189   { STRING_COMMA_LEN (".rdpid"), PROCESSOR_UNKNOWN,
1190     CPU_RDPID_FLAGS, 0 },
1191   { STRING_COMMA_LEN (".ptwrite"), PROCESSOR_UNKNOWN,
1192     CPU_PTWRITE_FLAGS, 0 },
1193   { STRING_COMMA_LEN (".ibt"), PROCESSOR_UNKNOWN,
1194     CPU_IBT_FLAGS, 0 },
1195   { STRING_COMMA_LEN (".shstk"), PROCESSOR_UNKNOWN,
1196     CPU_SHSTK_FLAGS, 0 },
1197   { STRING_COMMA_LEN (".gfni"), PROCESSOR_UNKNOWN,
1198     CPU_GFNI_FLAGS, 0 },
1199   { STRING_COMMA_LEN (".vaes"), PROCESSOR_UNKNOWN,
1200     CPU_VAES_FLAGS, 0 },
1201   { STRING_COMMA_LEN (".vpclmulqdq"), PROCESSOR_UNKNOWN,
1202     CPU_VPCLMULQDQ_FLAGS, 0 },
1203   { STRING_COMMA_LEN (".wbnoinvd"), PROCESSOR_UNKNOWN,
1204     CPU_WBNOINVD_FLAGS, 0 },
1205   { STRING_COMMA_LEN (".pconfig"), PROCESSOR_UNKNOWN,
1206     CPU_PCONFIG_FLAGS, 0 },
1207   { STRING_COMMA_LEN (".waitpkg"), PROCESSOR_UNKNOWN,
1208     CPU_WAITPKG_FLAGS, 0 },
1209   { STRING_COMMA_LEN (".cldemote"), PROCESSOR_UNKNOWN,
1210     CPU_CLDEMOTE_FLAGS, 0 },
1211   { STRING_COMMA_LEN (".amx_int8"), PROCESSOR_UNKNOWN,
1212     CPU_AMX_INT8_FLAGS, 0 },
1213   { STRING_COMMA_LEN (".amx_bf16"), PROCESSOR_UNKNOWN,
1214     CPU_AMX_BF16_FLAGS, 0 },
1215   { STRING_COMMA_LEN (".amx_tile"), PROCESSOR_UNKNOWN,
1216     CPU_AMX_TILE_FLAGS, 0 },
1217   { STRING_COMMA_LEN (".movdiri"), PROCESSOR_UNKNOWN,
1218     CPU_MOVDIRI_FLAGS, 0 },
1219   { STRING_COMMA_LEN (".movdir64b"), PROCESSOR_UNKNOWN,
1220     CPU_MOVDIR64B_FLAGS, 0 },
1221   { STRING_COMMA_LEN (".avx512_bf16"), PROCESSOR_UNKNOWN,
1222     CPU_AVX512_BF16_FLAGS, 0 },
1223   { STRING_COMMA_LEN (".avx512_vp2intersect"), PROCESSOR_UNKNOWN,
1224     CPU_AVX512_VP2INTERSECT_FLAGS, 0 },
1225   { STRING_COMMA_LEN (".tdx"), PROCESSOR_UNKNOWN,
1226     CPU_TDX_FLAGS, 0 },
1227   { STRING_COMMA_LEN (".enqcmd"), PROCESSOR_UNKNOWN,
1228     CPU_ENQCMD_FLAGS, 0 },
1229   { STRING_COMMA_LEN (".serialize"), PROCESSOR_UNKNOWN,
1230     CPU_SERIALIZE_FLAGS, 0 },
1231   { STRING_COMMA_LEN (".rdpru"), PROCESSOR_UNKNOWN,
1232     CPU_RDPRU_FLAGS, 0 },
1233   { STRING_COMMA_LEN (".mcommit"), PROCESSOR_UNKNOWN,
1234     CPU_MCOMMIT_FLAGS, 0 },
1235   { STRING_COMMA_LEN (".sev_es"), PROCESSOR_UNKNOWN,
1236     CPU_SEV_ES_FLAGS, 0 },
1237   { STRING_COMMA_LEN (".tsxldtrk"), PROCESSOR_UNKNOWN,
1238     CPU_TSXLDTRK_FLAGS, 0 },
1239   { STRING_COMMA_LEN (".kl"), PROCESSOR_UNKNOWN,
1240     CPU_KL_FLAGS, 0 },
1241   { STRING_COMMA_LEN (".widekl"), PROCESSOR_UNKNOWN,
1242     CPU_WIDEKL_FLAGS, 0 },
1243   { STRING_COMMA_LEN (".uintr"), PROCESSOR_UNKNOWN,
1244     CPU_UINTR_FLAGS, 0 },
1245   { STRING_COMMA_LEN (".hreset"), PROCESSOR_UNKNOWN,
1246     CPU_HRESET_FLAGS, 0 },
1247 };
1248
1249 static const noarch_entry cpu_noarch[] =
1250 {
1251   { STRING_COMMA_LEN ("no87"),  CPU_ANY_X87_FLAGS },
1252   { STRING_COMMA_LEN ("no287"),  CPU_ANY_287_FLAGS },
1253   { STRING_COMMA_LEN ("no387"),  CPU_ANY_387_FLAGS },
1254   { STRING_COMMA_LEN ("no687"),  CPU_ANY_687_FLAGS },
1255   { STRING_COMMA_LEN ("nocmov"),  CPU_ANY_CMOV_FLAGS },
1256   { STRING_COMMA_LEN ("nofxsr"),  CPU_ANY_FXSR_FLAGS },
1257   { STRING_COMMA_LEN ("nommx"),  CPU_ANY_MMX_FLAGS },
1258   { STRING_COMMA_LEN ("nosse"),  CPU_ANY_SSE_FLAGS },
1259   { STRING_COMMA_LEN ("nosse2"),  CPU_ANY_SSE2_FLAGS },
1260   { STRING_COMMA_LEN ("nosse3"),  CPU_ANY_SSE3_FLAGS },
1261   { STRING_COMMA_LEN ("nosse4a"),  CPU_ANY_SSE4A_FLAGS },
1262   { STRING_COMMA_LEN ("nossse3"),  CPU_ANY_SSSE3_FLAGS },
1263   { STRING_COMMA_LEN ("nosse4.1"),  CPU_ANY_SSE4_1_FLAGS },
1264   { STRING_COMMA_LEN ("nosse4.2"),  CPU_ANY_SSE4_2_FLAGS },
1265   { STRING_COMMA_LEN ("nosse4"),  CPU_ANY_SSE4_1_FLAGS },
1266   { STRING_COMMA_LEN ("noavx"),  CPU_ANY_AVX_FLAGS },
1267   { STRING_COMMA_LEN ("noavx2"),  CPU_ANY_AVX2_FLAGS },
1268   { STRING_COMMA_LEN ("noavx512f"), CPU_ANY_AVX512F_FLAGS },
1269   { STRING_COMMA_LEN ("noavx512cd"), CPU_ANY_AVX512CD_FLAGS },
1270   { STRING_COMMA_LEN ("noavx512er"), CPU_ANY_AVX512ER_FLAGS },
1271   { STRING_COMMA_LEN ("noavx512pf"), CPU_ANY_AVX512PF_FLAGS },
1272   { STRING_COMMA_LEN ("noavx512dq"), CPU_ANY_AVX512DQ_FLAGS },
1273   { STRING_COMMA_LEN ("noavx512bw"), CPU_ANY_AVX512BW_FLAGS },
1274   { STRING_COMMA_LEN ("noavx512vl"), CPU_ANY_AVX512VL_FLAGS },
1275   { STRING_COMMA_LEN ("noavx512ifma"), CPU_ANY_AVX512IFMA_FLAGS },
1276   { STRING_COMMA_LEN ("noavx512vbmi"), CPU_ANY_AVX512VBMI_FLAGS },
1277   { STRING_COMMA_LEN ("noavx512_4fmaps"), CPU_ANY_AVX512_4FMAPS_FLAGS },
1278   { STRING_COMMA_LEN ("noavx512_4vnniw"), CPU_ANY_AVX512_4VNNIW_FLAGS },
1279   { STRING_COMMA_LEN ("noavx512_vpopcntdq"), CPU_ANY_AVX512_VPOPCNTDQ_FLAGS },
1280   { STRING_COMMA_LEN ("noavx512_vbmi2"), CPU_ANY_AVX512_VBMI2_FLAGS },
1281   { STRING_COMMA_LEN ("noavx512_vnni"), CPU_ANY_AVX512_VNNI_FLAGS },
1282   { STRING_COMMA_LEN ("noavx512_bitalg"), CPU_ANY_AVX512_BITALG_FLAGS },
1283   { STRING_COMMA_LEN ("noibt"), CPU_ANY_IBT_FLAGS },
1284   { STRING_COMMA_LEN ("noshstk"), CPU_ANY_SHSTK_FLAGS },
1285   { STRING_COMMA_LEN ("noamx_int8"), CPU_ANY_AMX_INT8_FLAGS },
1286   { STRING_COMMA_LEN ("noamx_bf16"), CPU_ANY_AMX_BF16_FLAGS },
1287   { STRING_COMMA_LEN ("noamx_tile"), CPU_ANY_AMX_TILE_FLAGS },
1288   { STRING_COMMA_LEN ("nomovdiri"), CPU_ANY_MOVDIRI_FLAGS },
1289   { STRING_COMMA_LEN ("nomovdir64b"), CPU_ANY_MOVDIR64B_FLAGS },
1290   { STRING_COMMA_LEN ("noavx512_bf16"), CPU_ANY_AVX512_BF16_FLAGS },
1291   { STRING_COMMA_LEN ("noavx512_vp2intersect"),
1292     CPU_ANY_AVX512_VP2INTERSECT_FLAGS },
1293   { STRING_COMMA_LEN ("notdx"), CPU_ANY_TDX_FLAGS },
1294   { STRING_COMMA_LEN ("noenqcmd"), CPU_ANY_ENQCMD_FLAGS },
1295   { STRING_COMMA_LEN ("noserialize"), CPU_ANY_SERIALIZE_FLAGS },
1296   { STRING_COMMA_LEN ("notsxldtrk"), CPU_ANY_TSXLDTRK_FLAGS },
1297   { STRING_COMMA_LEN ("nokl"), CPU_ANY_KL_FLAGS },
1298   { STRING_COMMA_LEN ("nowidekl"), CPU_ANY_WIDEKL_FLAGS },
1299   { STRING_COMMA_LEN ("nouintr"), CPU_ANY_UINTR_FLAGS },
1300   { STRING_COMMA_LEN ("nohreset"), CPU_ANY_HRESET_FLAGS },
1301 };
1302
1303 #ifdef I386COFF
1304 /* Like s_lcomm_internal in gas/read.c but the alignment string
1305    is allowed to be optional.  */
1306
1307 static symbolS *
1308 pe_lcomm_internal (int needs_align, symbolS *symbolP, addressT size)
1309 {
1310   addressT align = 0;
1311
1312   SKIP_WHITESPACE ();
1313
1314   if (needs_align
1315       && *input_line_pointer == ',')
1316     {
1317       align = parse_align (needs_align - 1);
1318
1319       if (align == (addressT) -1)
1320         return NULL;
1321     }
1322   else
1323     {
1324       if (size >= 8)
1325         align = 3;
1326       else if (size >= 4)
1327         align = 2;
1328       else if (size >= 2)
1329         align = 1;
1330       else
1331         align = 0;
1332     }
1333
1334   bss_alloc (symbolP, size, align);
1335   return symbolP;
1336 }
1337
1338 static void
1339 pe_lcomm (int needs_align)
1340 {
1341   s_comm_internal (needs_align * 2, pe_lcomm_internal);
1342 }
1343 #endif
1344
1345 const pseudo_typeS md_pseudo_table[] =
1346 {
1347 #if !defined(OBJ_AOUT) && !defined(USE_ALIGN_PTWO)
1348   {"align", s_align_bytes, 0},
1349 #else
1350   {"align", s_align_ptwo, 0},
1351 #endif
1352   {"arch", set_cpu_arch, 0},
1353 #ifndef I386COFF
1354   {"bss", s_bss, 0},
1355 #else
1356   {"lcomm", pe_lcomm, 1},
1357 #endif
1358   {"ffloat", float_cons, 'f'},
1359   {"dfloat", float_cons, 'd'},
1360   {"tfloat", float_cons, 'x'},
1361   {"value", cons, 2},
1362   {"slong", signed_cons, 4},
1363   {"noopt", s_ignore, 0},
1364   {"optim", s_ignore, 0},
1365   {"code16gcc", set_16bit_gcc_code_flag, CODE_16BIT},
1366   {"code16", set_code_flag, CODE_16BIT},
1367   {"code32", set_code_flag, CODE_32BIT},
1368 #ifdef BFD64
1369   {"code64", set_code_flag, CODE_64BIT},
1370 #endif
1371   {"intel_syntax", set_intel_syntax, 1},
1372   {"att_syntax", set_intel_syntax, 0},
1373   {"intel_mnemonic", set_intel_mnemonic, 1},
1374   {"att_mnemonic", set_intel_mnemonic, 0},
1375   {"allow_index_reg", set_allow_index_reg, 1},
1376   {"disallow_index_reg", set_allow_index_reg, 0},
1377   {"sse_check", set_check, 0},
1378   {"operand_check", set_check, 1},
1379 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
1380   {"largecomm", handle_large_common, 0},
1381 #else
1382   {"file", dwarf2_directive_file, 0},
1383   {"loc", dwarf2_directive_loc, 0},
1384   {"loc_mark_labels", dwarf2_directive_loc_mark_labels, 0},
1385 #endif
1386 #ifdef TE_PE
1387   {"secrel32", pe_directive_secrel, 0},
1388 #endif
1389   {0, 0, 0}
1390 };
1391
1392 /* For interface with expression ().  */
1393 extern char *input_line_pointer;
1394
1395 /* Hash table for instruction mnemonic lookup.  */
1396 static htab_t op_hash;
1397
1398 /* Hash table for register lookup.  */
1399 static htab_t reg_hash;
1400 \f
1401   /* Various efficient no-op patterns for aligning code labels.
1402      Note: Don't try to assemble the instructions in the comments.
1403      0L and 0w are not legal.  */
1404 static const unsigned char f32_1[] =
1405   {0x90};                               /* nop                  */
1406 static const unsigned char f32_2[] =
1407   {0x66,0x90};                          /* xchg %ax,%ax         */
1408 static const unsigned char f32_3[] =
1409   {0x8d,0x76,0x00};                     /* leal 0(%esi),%esi    */
1410 static const unsigned char f32_4[] =
1411   {0x8d,0x74,0x26,0x00};                /* leal 0(%esi,1),%esi  */
1412 static const unsigned char f32_6[] =
1413   {0x8d,0xb6,0x00,0x00,0x00,0x00};      /* leal 0L(%esi),%esi   */
1414 static const unsigned char f32_7[] =
1415   {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
1416 static const unsigned char f16_3[] =
1417   {0x8d,0x74,0x00};                     /* lea 0(%si),%si       */
1418 static const unsigned char f16_4[] =
1419   {0x8d,0xb4,0x00,0x00};                /* lea 0W(%si),%si      */
1420 static const unsigned char jump_disp8[] =
1421   {0xeb};                               /* jmp disp8           */
1422 static const unsigned char jump32_disp32[] =
1423   {0xe9};                               /* jmp disp32          */
1424 static const unsigned char jump16_disp32[] =
1425   {0x66,0xe9};                          /* jmp disp32          */
1426 /* 32-bit NOPs patterns.  */
1427 static const unsigned char *const f32_patt[] = {
1428   f32_1, f32_2, f32_3, f32_4, NULL, f32_6, f32_7
1429 };
1430 /* 16-bit NOPs patterns.  */
1431 static const unsigned char *const f16_patt[] = {
1432   f32_1, f32_2, f16_3, f16_4
1433 };
1434 /* nopl (%[re]ax) */
1435 static const unsigned char alt_3[] =
1436   {0x0f,0x1f,0x00};
1437 /* nopl 0(%[re]ax) */
1438 static const unsigned char alt_4[] =
1439   {0x0f,0x1f,0x40,0x00};
1440 /* nopl 0(%[re]ax,%[re]ax,1) */
1441 static const unsigned char alt_5[] =
1442   {0x0f,0x1f,0x44,0x00,0x00};
1443 /* nopw 0(%[re]ax,%[re]ax,1) */
1444 static const unsigned char alt_6[] =
1445   {0x66,0x0f,0x1f,0x44,0x00,0x00};
1446 /* nopl 0L(%[re]ax) */
1447 static const unsigned char alt_7[] =
1448   {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
1449 /* nopl 0L(%[re]ax,%[re]ax,1) */
1450 static const unsigned char alt_8[] =
1451   {0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1452 /* nopw 0L(%[re]ax,%[re]ax,1) */
1453 static const unsigned char alt_9[] =
1454   {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1455 /* nopw %cs:0L(%[re]ax,%[re]ax,1) */
1456 static const unsigned char alt_10[] =
1457   {0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1458 /* data16 nopw %cs:0L(%eax,%eax,1) */
1459 static const unsigned char alt_11[] =
1460   {0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1461 /* 32-bit and 64-bit NOPs patterns.  */
1462 static const unsigned char *const alt_patt[] = {
1463   f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
1464   alt_9, alt_10, alt_11
1465 };
1466
1467 /* Genenerate COUNT bytes of NOPs to WHERE from PATT with the maximum
1468    size of a single NOP instruction MAX_SINGLE_NOP_SIZE.  */
1469
1470 static void
1471 i386_output_nops (char *where, const unsigned char *const *patt,
1472                   int count, int max_single_nop_size)
1473
1474 {
1475   /* Place the longer NOP first.  */
1476   int last;
1477   int offset;
1478   const unsigned char *nops;
1479
1480   if (max_single_nop_size < 1)
1481     {
1482       as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"),
1483                 max_single_nop_size);
1484       return;
1485     }
1486
1487   nops = patt[max_single_nop_size - 1];
1488
1489   /* Use the smaller one if the requsted one isn't available.  */
1490   if (nops == NULL)
1491     {
1492       max_single_nop_size--;
1493       nops = patt[max_single_nop_size - 1];
1494     }
1495
1496   last = count % max_single_nop_size;
1497
1498   count -= last;
1499   for (offset = 0; offset < count; offset += max_single_nop_size)
1500     memcpy (where + offset, nops, max_single_nop_size);
1501
1502   if (last)
1503     {
1504       nops = patt[last - 1];
1505       if (nops == NULL)
1506         {
1507           /* Use the smaller one plus one-byte NOP if the needed one
1508              isn't available.  */
1509           last--;
1510           nops = patt[last - 1];
1511           memcpy (where + offset, nops, last);
1512           where[offset + last] = *patt[0];
1513         }
1514       else
1515         memcpy (where + offset, nops, last);
1516     }
1517 }
1518
1519 static INLINE int
1520 fits_in_imm7 (offsetT num)
1521 {
1522   return (num & 0x7f) == num;
1523 }
1524
1525 static INLINE int
1526 fits_in_imm31 (offsetT num)
1527 {
1528   return (num & 0x7fffffff) == num;
1529 }
1530
1531 /* Genenerate COUNT bytes of NOPs to WHERE with the maximum size of a
1532    single NOP instruction LIMIT.  */
1533
1534 void
1535 i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
1536 {
1537   const unsigned char *const *patt = NULL;
1538   int max_single_nop_size;
1539   /* Maximum number of NOPs before switching to jump over NOPs.  */
1540   int max_number_of_nops;
1541
1542   switch (fragP->fr_type)
1543     {
1544     case rs_fill_nop:
1545     case rs_align_code:
1546       break;
1547     case rs_machine_dependent:
1548       /* Allow NOP padding for jumps and calls.  */
1549       if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
1550           || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
1551         break;
1552       /* Fall through.  */
1553     default:
1554       return;
1555     }
1556
1557   /* We need to decide which NOP sequence to use for 32bit and
1558      64bit. When -mtune= is used:
1559
1560      1. For PROCESSOR_I386, PROCESSOR_I486, PROCESSOR_PENTIUM and
1561      PROCESSOR_GENERIC32, f32_patt will be used.
1562      2. For the rest, alt_patt will be used.
1563
1564      When -mtune= isn't used, alt_patt will be used if
1565      cpu_arch_isa_flags has CpuNop.  Otherwise, f32_patt will
1566      be used.
1567
1568      When -march= or .arch is used, we can't use anything beyond
1569      cpu_arch_isa_flags.   */
1570
1571   if (flag_code == CODE_16BIT)
1572     {
1573       patt = f16_patt;
1574       max_single_nop_size = sizeof (f16_patt) / sizeof (f16_patt[0]);
1575       /* Limit number of NOPs to 2 in 16-bit mode.  */
1576       max_number_of_nops = 2;
1577     }
1578   else
1579     {
1580       if (fragP->tc_frag_data.isa == PROCESSOR_UNKNOWN)
1581         {
1582           /* PROCESSOR_UNKNOWN means that all ISAs may be used.  */
1583           switch (cpu_arch_tune)
1584             {
1585             case PROCESSOR_UNKNOWN:
1586               /* We use cpu_arch_isa_flags to check if we SHOULD
1587                  optimize with nops.  */
1588               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1589                 patt = alt_patt;
1590               else
1591                 patt = f32_patt;
1592               break;
1593             case PROCESSOR_PENTIUM4:
1594             case PROCESSOR_NOCONA:
1595             case PROCESSOR_CORE:
1596             case PROCESSOR_CORE2:
1597             case PROCESSOR_COREI7:
1598             case PROCESSOR_L1OM:
1599             case PROCESSOR_K1OM:
1600             case PROCESSOR_GENERIC64:
1601             case PROCESSOR_K6:
1602             case PROCESSOR_ATHLON:
1603             case PROCESSOR_K8:
1604             case PROCESSOR_AMDFAM10:
1605             case PROCESSOR_BD:
1606             case PROCESSOR_ZNVER:
1607             case PROCESSOR_BT:
1608               patt = alt_patt;
1609               break;
1610             case PROCESSOR_I386:
1611             case PROCESSOR_I486:
1612             case PROCESSOR_PENTIUM:
1613             case PROCESSOR_PENTIUMPRO:
1614             case PROCESSOR_IAMCU:
1615             case PROCESSOR_GENERIC32:
1616               patt = f32_patt;
1617               break;
1618             }
1619         }
1620       else
1621         {
1622           switch (fragP->tc_frag_data.tune)
1623             {
1624             case PROCESSOR_UNKNOWN:
1625               /* When cpu_arch_isa is set, cpu_arch_tune shouldn't be
1626                  PROCESSOR_UNKNOWN.  */
1627               abort ();
1628               break;
1629
1630             case PROCESSOR_I386:
1631             case PROCESSOR_I486:
1632             case PROCESSOR_PENTIUM:
1633             case PROCESSOR_IAMCU:
1634             case PROCESSOR_K6:
1635             case PROCESSOR_ATHLON:
1636             case PROCESSOR_K8:
1637             case PROCESSOR_AMDFAM10:
1638             case PROCESSOR_BD:
1639             case PROCESSOR_ZNVER:
1640             case PROCESSOR_BT:
1641             case PROCESSOR_GENERIC32:
1642               /* We use cpu_arch_isa_flags to check if we CAN optimize
1643                  with nops.  */
1644               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1645                 patt = alt_patt;
1646               else
1647                 patt = f32_patt;
1648               break;
1649             case PROCESSOR_PENTIUMPRO:
1650             case PROCESSOR_PENTIUM4:
1651             case PROCESSOR_NOCONA:
1652             case PROCESSOR_CORE:
1653             case PROCESSOR_CORE2:
1654             case PROCESSOR_COREI7:
1655             case PROCESSOR_L1OM:
1656             case PROCESSOR_K1OM:
1657               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1658                 patt = alt_patt;
1659               else
1660                 patt = f32_patt;
1661               break;
1662             case PROCESSOR_GENERIC64:
1663               patt = alt_patt;
1664               break;
1665             }
1666         }
1667
1668       if (patt == f32_patt)
1669         {
1670           max_single_nop_size = sizeof (f32_patt) / sizeof (f32_patt[0]);
1671           /* Limit number of NOPs to 2 for older processors.  */
1672           max_number_of_nops = 2;
1673         }
1674       else
1675         {
1676           max_single_nop_size = sizeof (alt_patt) / sizeof (alt_patt[0]);
1677           /* Limit number of NOPs to 7 for newer processors.  */
1678           max_number_of_nops = 7;
1679         }
1680     }
1681
1682   if (limit == 0)
1683     limit = max_single_nop_size;
1684
1685   if (fragP->fr_type == rs_fill_nop)
1686     {
1687       /* Output NOPs for .nop directive.  */
1688       if (limit > max_single_nop_size)
1689         {
1690           as_bad_where (fragP->fr_file, fragP->fr_line,
1691                         _("invalid single nop size: %d "
1692                           "(expect within [0, %d])"),
1693                         limit, max_single_nop_size);
1694           return;
1695         }
1696     }
1697   else if (fragP->fr_type != rs_machine_dependent)
1698     fragP->fr_var = count;
1699
1700   if ((count / max_single_nop_size) > max_number_of_nops)
1701     {
1702       /* Generate jump over NOPs.  */
1703       offsetT disp = count - 2;
1704       if (fits_in_imm7 (disp))
1705         {
1706           /* Use "jmp disp8" if possible.  */
1707           count = disp;
1708           where[0] = jump_disp8[0];
1709           where[1] = count;
1710           where += 2;
1711         }
1712       else
1713         {
1714           unsigned int size_of_jump;
1715
1716           if (flag_code == CODE_16BIT)
1717             {
1718               where[0] = jump16_disp32[0];
1719               where[1] = jump16_disp32[1];
1720               size_of_jump = 2;
1721             }
1722           else
1723             {
1724               where[0] = jump32_disp32[0];
1725               size_of_jump = 1;
1726             }
1727
1728           count -= size_of_jump + 4;
1729           if (!fits_in_imm31 (count))
1730             {
1731               as_bad_where (fragP->fr_file, fragP->fr_line,
1732                             _("jump over nop padding out of range"));
1733               return;
1734             }
1735
1736           md_number_to_chars (where + size_of_jump, count, 4);
1737           where += size_of_jump + 4;
1738         }
1739     }
1740
1741   /* Generate multiple NOPs.  */
1742   i386_output_nops (where, patt, count, limit);
1743 }
1744
1745 static INLINE int
1746 operand_type_all_zero (const union i386_operand_type *x)
1747 {
1748   switch (ARRAY_SIZE(x->array))
1749     {
1750     case 3:
1751       if (x->array[2])
1752         return 0;
1753       /* Fall through.  */
1754     case 2:
1755       if (x->array[1])
1756         return 0;
1757       /* Fall through.  */
1758     case 1:
1759       return !x->array[0];
1760     default:
1761       abort ();
1762     }
1763 }
1764
1765 static INLINE void
1766 operand_type_set (union i386_operand_type *x, unsigned int v)
1767 {
1768   switch (ARRAY_SIZE(x->array))
1769     {
1770     case 3:
1771       x->array[2] = v;
1772       /* Fall through.  */
1773     case 2:
1774       x->array[1] = v;
1775       /* Fall through.  */
1776     case 1:
1777       x->array[0] = v;
1778       /* Fall through.  */
1779       break;
1780     default:
1781       abort ();
1782     }
1783
1784   x->bitfield.class = ClassNone;
1785   x->bitfield.instance = InstanceNone;
1786 }
1787
1788 static INLINE int
1789 operand_type_equal (const union i386_operand_type *x,
1790                     const union i386_operand_type *y)
1791 {
1792   switch (ARRAY_SIZE(x->array))
1793     {
1794     case 3:
1795       if (x->array[2] != y->array[2])
1796         return 0;
1797       /* Fall through.  */
1798     case 2:
1799       if (x->array[1] != y->array[1])
1800         return 0;
1801       /* Fall through.  */
1802     case 1:
1803       return x->array[0] == y->array[0];
1804       break;
1805     default:
1806       abort ();
1807     }
1808 }
1809
1810 static INLINE int
1811 cpu_flags_all_zero (const union i386_cpu_flags *x)
1812 {
1813   switch (ARRAY_SIZE(x->array))
1814     {
1815     case 4:
1816       if (x->array[3])
1817         return 0;
1818       /* Fall through.  */
1819     case 3:
1820       if (x->array[2])
1821         return 0;
1822       /* Fall through.  */
1823     case 2:
1824       if (x->array[1])
1825         return 0;
1826       /* Fall through.  */
1827     case 1:
1828       return !x->array[0];
1829     default:
1830       abort ();
1831     }
1832 }
1833
1834 static INLINE int
1835 cpu_flags_equal (const union i386_cpu_flags *x,
1836                  const union i386_cpu_flags *y)
1837 {
1838   switch (ARRAY_SIZE(x->array))
1839     {
1840     case 4:
1841       if (x->array[3] != y->array[3])
1842         return 0;
1843       /* Fall through.  */
1844     case 3:
1845       if (x->array[2] != y->array[2])
1846         return 0;
1847       /* Fall through.  */
1848     case 2:
1849       if (x->array[1] != y->array[1])
1850         return 0;
1851       /* Fall through.  */
1852     case 1:
1853       return x->array[0] == y->array[0];
1854       break;
1855     default:
1856       abort ();
1857     }
1858 }
1859
1860 static INLINE int
1861 cpu_flags_check_cpu64 (i386_cpu_flags f)
1862 {
1863   return !((flag_code == CODE_64BIT && f.bitfield.cpuno64)
1864            || (flag_code != CODE_64BIT && f.bitfield.cpu64));
1865 }
1866
1867 static INLINE i386_cpu_flags
1868 cpu_flags_and (i386_cpu_flags x, i386_cpu_flags y)
1869 {
1870   switch (ARRAY_SIZE (x.array))
1871     {
1872     case 4:
1873       x.array [3] &= y.array [3];
1874       /* Fall through.  */
1875     case 3:
1876       x.array [2] &= y.array [2];
1877       /* Fall through.  */
1878     case 2:
1879       x.array [1] &= y.array [1];
1880       /* Fall through.  */
1881     case 1:
1882       x.array [0] &= y.array [0];
1883       break;
1884     default:
1885       abort ();
1886     }
1887   return x;
1888 }
1889
1890 static INLINE i386_cpu_flags
1891 cpu_flags_or (i386_cpu_flags x, i386_cpu_flags y)
1892 {
1893   switch (ARRAY_SIZE (x.array))
1894     {
1895     case 4:
1896       x.array [3] |= y.array [3];
1897       /* Fall through.  */
1898     case 3:
1899       x.array [2] |= y.array [2];
1900       /* Fall through.  */
1901     case 2:
1902       x.array [1] |= y.array [1];
1903       /* Fall through.  */
1904     case 1:
1905       x.array [0] |= y.array [0];
1906       break;
1907     default:
1908       abort ();
1909     }
1910   return x;
1911 }
1912
1913 static INLINE i386_cpu_flags
1914 cpu_flags_and_not (i386_cpu_flags x, i386_cpu_flags y)
1915 {
1916   switch (ARRAY_SIZE (x.array))
1917     {
1918     case 4:
1919       x.array [3] &= ~y.array [3];
1920       /* Fall through.  */
1921     case 3:
1922       x.array [2] &= ~y.array [2];
1923       /* Fall through.  */
1924     case 2:
1925       x.array [1] &= ~y.array [1];
1926       /* Fall through.  */
1927     case 1:
1928       x.array [0] &= ~y.array [0];
1929       break;
1930     default:
1931       abort ();
1932     }
1933   return x;
1934 }
1935
1936 static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
1937
1938 #define CPU_FLAGS_ARCH_MATCH            0x1
1939 #define CPU_FLAGS_64BIT_MATCH           0x2
1940
1941 #define CPU_FLAGS_PERFECT_MATCH \
1942   (CPU_FLAGS_ARCH_MATCH | CPU_FLAGS_64BIT_MATCH)
1943
1944 /* Return CPU flags match bits. */
1945
1946 static int
1947 cpu_flags_match (const insn_template *t)
1948 {
1949   i386_cpu_flags x = t->cpu_flags;
1950   int match = cpu_flags_check_cpu64 (x) ? CPU_FLAGS_64BIT_MATCH : 0;
1951
1952   x.bitfield.cpu64 = 0;
1953   x.bitfield.cpuno64 = 0;
1954
1955   if (cpu_flags_all_zero (&x))
1956     {
1957       /* This instruction is available on all archs.  */
1958       match |= CPU_FLAGS_ARCH_MATCH;
1959     }
1960   else
1961     {
1962       /* This instruction is available only on some archs.  */
1963       i386_cpu_flags cpu = cpu_arch_flags;
1964
1965       /* AVX512VL is no standalone feature - match it and then strip it.  */
1966       if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
1967         return match;
1968       x.bitfield.cpuavx512vl = 0;
1969
1970       cpu = cpu_flags_and (x, cpu);
1971       if (!cpu_flags_all_zero (&cpu))
1972         {
1973           if (x.bitfield.cpuavx)
1974             {
1975               /* We need to check a few extra flags with AVX.  */
1976               if (cpu.bitfield.cpuavx
1977                   && (!t->opcode_modifier.sse2avx
1978                       || (sse2avx && !i.prefix[DATA_PREFIX]))
1979                   && (!x.bitfield.cpuaes || cpu.bitfield.cpuaes)
1980                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1981                   && (!x.bitfield.cpupclmul || cpu.bitfield.cpupclmul))
1982                 match |= CPU_FLAGS_ARCH_MATCH;
1983             }
1984           else if (x.bitfield.cpuavx512f)
1985             {
1986               /* We need to check a few extra flags with AVX512F.  */
1987               if (cpu.bitfield.cpuavx512f
1988                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1989                   && (!x.bitfield.cpuvaes || cpu.bitfield.cpuvaes)
1990                   && (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
1991                 match |= CPU_FLAGS_ARCH_MATCH;
1992             }
1993           else
1994             match |= CPU_FLAGS_ARCH_MATCH;
1995         }
1996     }
1997   return match;
1998 }
1999
2000 static INLINE i386_operand_type
2001 operand_type_and (i386_operand_type x, i386_operand_type y)
2002 {
2003   if (x.bitfield.class != y.bitfield.class)
2004     x.bitfield.class = ClassNone;
2005   if (x.bitfield.instance != y.bitfield.instance)
2006     x.bitfield.instance = InstanceNone;
2007
2008   switch (ARRAY_SIZE (x.array))
2009     {
2010     case 3:
2011       x.array [2] &= y.array [2];
2012       /* Fall through.  */
2013     case 2:
2014       x.array [1] &= y.array [1];
2015       /* Fall through.  */
2016     case 1:
2017       x.array [0] &= y.array [0];
2018       break;
2019     default:
2020       abort ();
2021     }
2022   return x;
2023 }
2024
2025 static INLINE i386_operand_type
2026 operand_type_and_not (i386_operand_type x, i386_operand_type y)
2027 {
2028   gas_assert (y.bitfield.class == ClassNone);
2029   gas_assert (y.bitfield.instance == InstanceNone);
2030
2031   switch (ARRAY_SIZE (x.array))
2032     {
2033     case 3:
2034       x.array [2] &= ~y.array [2];
2035       /* Fall through.  */
2036     case 2:
2037       x.array [1] &= ~y.array [1];
2038       /* Fall through.  */
2039     case 1:
2040       x.array [0] &= ~y.array [0];
2041       break;
2042     default:
2043       abort ();
2044     }
2045   return x;
2046 }
2047
2048 static INLINE i386_operand_type
2049 operand_type_or (i386_operand_type x, i386_operand_type y)
2050 {
2051   gas_assert (x.bitfield.class == ClassNone ||
2052               y.bitfield.class == ClassNone ||
2053               x.bitfield.class == y.bitfield.class);
2054   gas_assert (x.bitfield.instance == InstanceNone ||
2055               y.bitfield.instance == InstanceNone ||
2056               x.bitfield.instance == y.bitfield.instance);
2057
2058   switch (ARRAY_SIZE (x.array))
2059     {
2060     case 3:
2061       x.array [2] |= y.array [2];
2062       /* Fall through.  */
2063     case 2:
2064       x.array [1] |= y.array [1];
2065       /* Fall through.  */
2066     case 1:
2067       x.array [0] |= y.array [0];
2068       break;
2069     default:
2070       abort ();
2071     }
2072   return x;
2073 }
2074
2075 static INLINE i386_operand_type
2076 operand_type_xor (i386_operand_type x, i386_operand_type y)
2077 {
2078   gas_assert (y.bitfield.class == ClassNone);
2079   gas_assert (y.bitfield.instance == InstanceNone);
2080
2081   switch (ARRAY_SIZE (x.array))
2082     {
2083     case 3:
2084       x.array [2] ^= y.array [2];
2085       /* Fall through.  */
2086     case 2:
2087       x.array [1] ^= y.array [1];
2088       /* Fall through.  */
2089     case 1:
2090       x.array [0] ^= y.array [0];
2091       break;
2092     default:
2093       abort ();
2094     }
2095   return x;
2096 }
2097
2098 static const i386_operand_type disp16 = OPERAND_TYPE_DISP16;
2099 static const i386_operand_type disp32 = OPERAND_TYPE_DISP32;
2100 static const i386_operand_type disp32s = OPERAND_TYPE_DISP32S;
2101 static const i386_operand_type disp16_32 = OPERAND_TYPE_DISP16_32;
2102 static const i386_operand_type anydisp = OPERAND_TYPE_ANYDISP;
2103 static const i386_operand_type anyimm = OPERAND_TYPE_ANYIMM;
2104 static const i386_operand_type regxmm = OPERAND_TYPE_REGXMM;
2105 static const i386_operand_type regmask = OPERAND_TYPE_REGMASK;
2106 static const i386_operand_type imm8 = OPERAND_TYPE_IMM8;
2107 static const i386_operand_type imm8s = OPERAND_TYPE_IMM8S;
2108 static const i386_operand_type imm16 = OPERAND_TYPE_IMM16;
2109 static const i386_operand_type imm32 = OPERAND_TYPE_IMM32;
2110 static const i386_operand_type imm32s = OPERAND_TYPE_IMM32S;
2111 static const i386_operand_type imm64 = OPERAND_TYPE_IMM64;
2112 static const i386_operand_type imm16_32 = OPERAND_TYPE_IMM16_32;
2113 static const i386_operand_type imm16_32s = OPERAND_TYPE_IMM16_32S;
2114 static const i386_operand_type imm16_32_32s = OPERAND_TYPE_IMM16_32_32S;
2115
2116 enum operand_type
2117 {
2118   reg,
2119   imm,
2120   disp,
2121   anymem
2122 };
2123
2124 static INLINE int
2125 operand_type_check (i386_operand_type t, enum operand_type c)
2126 {
2127   switch (c)
2128     {
2129     case reg:
2130       return t.bitfield.class == Reg;
2131
2132     case imm:
2133       return (t.bitfield.imm8
2134               || t.bitfield.imm8s
2135               || t.bitfield.imm16
2136               || t.bitfield.imm32
2137               || t.bitfield.imm32s
2138               || t.bitfield.imm64);
2139
2140     case disp:
2141       return (t.bitfield.disp8
2142               || t.bitfield.disp16
2143               || t.bitfield.disp32
2144               || t.bitfield.disp32s
2145               || t.bitfield.disp64);
2146
2147     case anymem:
2148       return (t.bitfield.disp8
2149               || t.bitfield.disp16
2150               || t.bitfield.disp32
2151               || t.bitfield.disp32s
2152               || t.bitfield.disp64
2153               || t.bitfield.baseindex);
2154
2155     default:
2156       abort ();
2157     }
2158
2159   return 0;
2160 }
2161
2162 /* Return 1 if there is no conflict in 8bit/16bit/32bit/64bit/80bit size
2163    between operand GIVEN and opeand WANTED for instruction template T.  */
2164
2165 static INLINE int
2166 match_operand_size (const insn_template *t, unsigned int wanted,
2167                     unsigned int given)
2168 {
2169   return !((i.types[given].bitfield.byte
2170             && !t->operand_types[wanted].bitfield.byte)
2171            || (i.types[given].bitfield.word
2172                && !t->operand_types[wanted].bitfield.word)
2173            || (i.types[given].bitfield.dword
2174                && !t->operand_types[wanted].bitfield.dword)
2175            || (i.types[given].bitfield.qword
2176                && !t->operand_types[wanted].bitfield.qword)
2177            || (i.types[given].bitfield.tbyte
2178                && !t->operand_types[wanted].bitfield.tbyte));
2179 }
2180
2181 /* Return 1 if there is no conflict in SIMD register between operand
2182    GIVEN and opeand WANTED for instruction template T.  */
2183
2184 static INLINE int
2185 match_simd_size (const insn_template *t, unsigned int wanted,
2186                  unsigned int given)
2187 {
2188   return !((i.types[given].bitfield.xmmword
2189             && !t->operand_types[wanted].bitfield.xmmword)
2190            || (i.types[given].bitfield.ymmword
2191                && !t->operand_types[wanted].bitfield.ymmword)
2192            || (i.types[given].bitfield.zmmword
2193                && !t->operand_types[wanted].bitfield.zmmword)
2194            || (i.types[given].bitfield.tmmword
2195                && !t->operand_types[wanted].bitfield.tmmword));
2196 }
2197
2198 /* Return 1 if there is no conflict in any size between operand GIVEN
2199    and opeand WANTED for instruction template T.  */
2200
2201 static INLINE int
2202 match_mem_size (const insn_template *t, unsigned int wanted,
2203                 unsigned int given)
2204 {
2205   return (match_operand_size (t, wanted, given)
2206           && !((i.types[given].bitfield.unspecified
2207                 && !i.broadcast
2208                 && !t->operand_types[wanted].bitfield.unspecified)
2209                || (i.types[given].bitfield.fword
2210                    && !t->operand_types[wanted].bitfield.fword)
2211                /* For scalar opcode templates to allow register and memory
2212                   operands at the same time, some special casing is needed
2213                   here.  Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
2214                   down-conversion vpmov*.  */
2215                || ((t->operand_types[wanted].bitfield.class == RegSIMD
2216                     && t->operand_types[wanted].bitfield.byte
2217                        + t->operand_types[wanted].bitfield.word
2218                        + t->operand_types[wanted].bitfield.dword
2219                        + t->operand_types[wanted].bitfield.qword
2220                        > !!t->opcode_modifier.broadcast)
2221                    ? (i.types[given].bitfield.xmmword
2222                       || i.types[given].bitfield.ymmword
2223                       || i.types[given].bitfield.zmmword)
2224                    : !match_simd_size(t, wanted, given))));
2225 }
2226
2227 /* Return value has MATCH_STRAIGHT set if there is no size conflict on any
2228    operands for instruction template T, and it has MATCH_REVERSE set if there
2229    is no size conflict on any operands for the template with operands reversed
2230    (and the template allows for reversing in the first place).  */
2231
2232 #define MATCH_STRAIGHT 1
2233 #define MATCH_REVERSE  2
2234
2235 static INLINE unsigned int
2236 operand_size_match (const insn_template *t)
2237 {
2238   unsigned int j, match = MATCH_STRAIGHT;
2239
2240   /* Don't check non-absolute jump instructions.  */
2241   if (t->opcode_modifier.jump
2242       && t->opcode_modifier.jump != JUMP_ABSOLUTE)
2243     return match;
2244
2245   /* Check memory and accumulator operand size.  */
2246   for (j = 0; j < i.operands; j++)
2247     {
2248       if (i.types[j].bitfield.class != Reg
2249           && i.types[j].bitfield.class != RegSIMD
2250           && t->opcode_modifier.anysize)
2251         continue;
2252
2253       if (t->operand_types[j].bitfield.class == Reg
2254           && !match_operand_size (t, j, j))
2255         {
2256           match = 0;
2257           break;
2258         }
2259
2260       if (t->operand_types[j].bitfield.class == RegSIMD
2261           && !match_simd_size (t, j, j))
2262         {
2263           match = 0;
2264           break;
2265         }
2266
2267       if (t->operand_types[j].bitfield.instance == Accum
2268           && (!match_operand_size (t, j, j) || !match_simd_size (t, j, j)))
2269         {
2270           match = 0;
2271           break;
2272         }
2273
2274       if ((i.flags[j] & Operand_Mem) && !match_mem_size (t, j, j))
2275         {
2276           match = 0;
2277           break;
2278         }
2279     }
2280
2281   if (!t->opcode_modifier.d)
2282     {
2283     mismatch:
2284       if (!match)
2285         i.error = operand_size_mismatch;
2286       return match;
2287     }
2288
2289   /* Check reverse.  */
2290   gas_assert (i.operands >= 2 && i.operands <= 3);
2291
2292   for (j = 0; j < i.operands; j++)
2293     {
2294       unsigned int given = i.operands - j - 1;
2295
2296       if (t->operand_types[j].bitfield.class == Reg
2297           && !match_operand_size (t, j, given))
2298         goto mismatch;
2299
2300       if (t->operand_types[j].bitfield.class == RegSIMD
2301           && !match_simd_size (t, j, given))
2302         goto mismatch;
2303
2304       if (t->operand_types[j].bitfield.instance == Accum
2305           && (!match_operand_size (t, j, given)
2306               || !match_simd_size (t, j, given)))
2307         goto mismatch;
2308
2309       if ((i.flags[given] & Operand_Mem) && !match_mem_size (t, j, given))
2310         goto mismatch;
2311     }
2312
2313   return match | MATCH_REVERSE;
2314 }
2315
2316 static INLINE int
2317 operand_type_match (i386_operand_type overlap,
2318                     i386_operand_type given)
2319 {
2320   i386_operand_type temp = overlap;
2321
2322   temp.bitfield.unspecified = 0;
2323   temp.bitfield.byte = 0;
2324   temp.bitfield.word = 0;
2325   temp.bitfield.dword = 0;
2326   temp.bitfield.fword = 0;
2327   temp.bitfield.qword = 0;
2328   temp.bitfield.tbyte = 0;
2329   temp.bitfield.xmmword = 0;
2330   temp.bitfield.ymmword = 0;
2331   temp.bitfield.zmmword = 0;
2332   temp.bitfield.tmmword = 0;
2333   if (operand_type_all_zero (&temp))
2334     goto mismatch;
2335
2336   if (given.bitfield.baseindex == overlap.bitfield.baseindex)
2337     return 1;
2338
2339  mismatch:
2340   i.error = operand_type_mismatch;
2341   return 0;
2342 }
2343
2344 /* If given types g0 and g1 are registers they must be of the same type
2345    unless the expected operand type register overlap is null.
2346    Some Intel syntax memory operand size checking also happens here.  */
2347
2348 static INLINE int
2349 operand_type_register_match (i386_operand_type g0,
2350                              i386_operand_type t0,
2351                              i386_operand_type g1,
2352                              i386_operand_type t1)
2353 {
2354   if (g0.bitfield.class != Reg
2355       && g0.bitfield.class != RegSIMD
2356       && (!operand_type_check (g0, anymem)
2357           || g0.bitfield.unspecified
2358           || (t0.bitfield.class != Reg
2359               && t0.bitfield.class != RegSIMD)))
2360     return 1;
2361
2362   if (g1.bitfield.class != Reg
2363       && g1.bitfield.class != RegSIMD
2364       && (!operand_type_check (g1, anymem)
2365           || g1.bitfield.unspecified
2366           || (t1.bitfield.class != Reg
2367               && t1.bitfield.class != RegSIMD)))
2368     return 1;
2369
2370   if (g0.bitfield.byte == g1.bitfield.byte
2371       && g0.bitfield.word == g1.bitfield.word
2372       && g0.bitfield.dword == g1.bitfield.dword
2373       && g0.bitfield.qword == g1.bitfield.qword
2374       && g0.bitfield.xmmword == g1.bitfield.xmmword
2375       && g0.bitfield.ymmword == g1.bitfield.ymmword
2376       && g0.bitfield.zmmword == g1.bitfield.zmmword)
2377     return 1;
2378
2379   if (!(t0.bitfield.byte & t1.bitfield.byte)
2380       && !(t0.bitfield.word & t1.bitfield.word)
2381       && !(t0.bitfield.dword & t1.bitfield.dword)
2382       && !(t0.bitfield.qword & t1.bitfield.qword)
2383       && !(t0.bitfield.xmmword & t1.bitfield.xmmword)
2384       && !(t0.bitfield.ymmword & t1.bitfield.ymmword)
2385       && !(t0.bitfield.zmmword & t1.bitfield.zmmword))
2386     return 1;
2387
2388   i.error = register_type_mismatch;
2389
2390   return 0;
2391 }
2392
2393 static INLINE unsigned int
2394 register_number (const reg_entry *r)
2395 {
2396   unsigned int nr = r->reg_num;
2397
2398   if (r->reg_flags & RegRex)
2399     nr += 8;
2400
2401   if (r->reg_flags & RegVRex)
2402     nr += 16;
2403
2404   return nr;
2405 }
2406
2407 static INLINE unsigned int
2408 mode_from_disp_size (i386_operand_type t)
2409 {
2410   if (t.bitfield.disp8)
2411     return 1;
2412   else if (t.bitfield.disp16
2413            || t.bitfield.disp32
2414            || t.bitfield.disp32s)
2415     return 2;
2416   else
2417     return 0;
2418 }
2419
2420 static INLINE int
2421 fits_in_signed_byte (addressT num)
2422 {
2423   return num + 0x80 <= 0xff;
2424 }
2425
2426 static INLINE int
2427 fits_in_unsigned_byte (addressT num)
2428 {
2429   return num <= 0xff;
2430 }
2431
2432 static INLINE int
2433 fits_in_unsigned_word (addressT num)
2434 {
2435   return num <= 0xffff;
2436 }
2437
2438 static INLINE int
2439 fits_in_signed_word (addressT num)
2440 {
2441   return num + 0x8000 <= 0xffff;
2442 }
2443
2444 static INLINE int
2445 fits_in_signed_long (addressT num ATTRIBUTE_UNUSED)
2446 {
2447 #ifndef BFD64
2448   return 1;
2449 #else
2450   return num + 0x80000000 <= 0xffffffff;
2451 #endif
2452 }                               /* fits_in_signed_long() */
2453
2454 static INLINE int
2455 fits_in_unsigned_long (addressT num ATTRIBUTE_UNUSED)
2456 {
2457 #ifndef BFD64
2458   return 1;
2459 #else
2460   return num <= 0xffffffff;
2461 #endif
2462 }                               /* fits_in_unsigned_long() */
2463
2464 static INLINE int
2465 fits_in_disp8 (offsetT num)
2466 {
2467   int shift = i.memshift;
2468   unsigned int mask;
2469
2470   if (shift == -1)
2471     abort ();
2472
2473   mask = (1 << shift) - 1;
2474
2475   /* Return 0 if NUM isn't properly aligned.  */
2476   if ((num & mask))
2477     return 0;
2478
2479   /* Check if NUM will fit in 8bit after shift.  */
2480   return fits_in_signed_byte (num >> shift);
2481 }
2482
2483 static INLINE int
2484 fits_in_imm4 (offsetT num)
2485 {
2486   return (num & 0xf) == num;
2487 }
2488
2489 static i386_operand_type
2490 smallest_imm_type (offsetT num)
2491 {
2492   i386_operand_type t;
2493
2494   operand_type_set (&t, 0);
2495   t.bitfield.imm64 = 1;
2496
2497   if (cpu_arch_tune != PROCESSOR_I486 && num == 1)
2498     {
2499       /* This code is disabled on the 486 because all the Imm1 forms
2500          in the opcode table are slower on the i486.  They're the
2501          versions with the implicitly specified single-position
2502          displacement, which has another syntax if you really want to
2503          use that form.  */
2504       t.bitfield.imm1 = 1;
2505       t.bitfield.imm8 = 1;
2506       t.bitfield.imm8s = 1;
2507       t.bitfield.imm16 = 1;
2508       t.bitfield.imm32 = 1;
2509       t.bitfield.imm32s = 1;
2510     }
2511   else if (fits_in_signed_byte (num))
2512     {
2513       t.bitfield.imm8 = 1;
2514       t.bitfield.imm8s = 1;
2515       t.bitfield.imm16 = 1;
2516       t.bitfield.imm32 = 1;
2517       t.bitfield.imm32s = 1;
2518     }
2519   else if (fits_in_unsigned_byte (num))
2520     {
2521       t.bitfield.imm8 = 1;
2522       t.bitfield.imm16 = 1;
2523       t.bitfield.imm32 = 1;
2524       t.bitfield.imm32s = 1;
2525     }
2526   else if (fits_in_signed_word (num) || fits_in_unsigned_word (num))
2527     {
2528       t.bitfield.imm16 = 1;
2529       t.bitfield.imm32 = 1;
2530       t.bitfield.imm32s = 1;
2531     }
2532   else if (fits_in_signed_long (num))
2533     {
2534       t.bitfield.imm32 = 1;
2535       t.bitfield.imm32s = 1;
2536     }
2537   else if (fits_in_unsigned_long (num))
2538     t.bitfield.imm32 = 1;
2539
2540   return t;
2541 }
2542
2543 static offsetT
2544 offset_in_range (offsetT val, int size)
2545 {
2546   addressT mask;
2547
2548   switch (size)
2549     {
2550     case 1: mask = ((addressT) 1 <<  8) - 1; break;
2551     case 2: mask = ((addressT) 1 << 16) - 1; break;
2552     case 4: mask = ((addressT) 2 << 31) - 1; break;
2553 #ifdef BFD64
2554     case 8: mask = ((addressT) 2 << 63) - 1; break;
2555 #endif
2556     default: abort ();
2557     }
2558
2559   if ((val & ~mask) != 0 && (val & ~mask) != ~mask)
2560     {
2561       char buf1[40], buf2[40];
2562
2563       sprint_value (buf1, val);
2564       sprint_value (buf2, val & mask);
2565       as_warn (_("%s shortened to %s"), buf1, buf2);
2566     }
2567   return val & mask;
2568 }
2569
2570 enum PREFIX_GROUP
2571 {
2572   PREFIX_EXIST = 0,
2573   PREFIX_LOCK,
2574   PREFIX_REP,
2575   PREFIX_DS,
2576   PREFIX_OTHER
2577 };
2578
2579 /* Returns
2580    a. PREFIX_EXIST if attempting to add a prefix where one from the
2581    same class already exists.
2582    b. PREFIX_LOCK if lock prefix is added.
2583    c. PREFIX_REP if rep/repne prefix is added.
2584    d. PREFIX_DS if ds prefix is added.
2585    e. PREFIX_OTHER if other prefix is added.
2586  */
2587
2588 static enum PREFIX_GROUP
2589 add_prefix (unsigned int prefix)
2590 {
2591   enum PREFIX_GROUP ret = PREFIX_OTHER;
2592   unsigned int q;
2593
2594   if (prefix >= REX_OPCODE && prefix < REX_OPCODE + 16
2595       && flag_code == CODE_64BIT)
2596     {
2597       if ((i.prefix[REX_PREFIX] & prefix & REX_W)
2598           || (i.prefix[REX_PREFIX] & prefix & REX_R)
2599           || (i.prefix[REX_PREFIX] & prefix & REX_X)
2600           || (i.prefix[REX_PREFIX] & prefix & REX_B))
2601         ret = PREFIX_EXIST;
2602       q = REX_PREFIX;
2603     }
2604   else
2605     {
2606       switch (prefix)
2607         {
2608         default:
2609           abort ();
2610
2611         case DS_PREFIX_OPCODE:
2612           ret = PREFIX_DS;
2613           /* Fall through.  */
2614         case CS_PREFIX_OPCODE:
2615         case ES_PREFIX_OPCODE:
2616         case FS_PREFIX_OPCODE:
2617         case GS_PREFIX_OPCODE:
2618         case SS_PREFIX_OPCODE:
2619           q = SEG_PREFIX;
2620           break;
2621
2622         case REPNE_PREFIX_OPCODE:
2623         case REPE_PREFIX_OPCODE:
2624           q = REP_PREFIX;
2625           ret = PREFIX_REP;
2626           break;
2627
2628         case LOCK_PREFIX_OPCODE:
2629           q = LOCK_PREFIX;
2630           ret = PREFIX_LOCK;
2631           break;
2632
2633         case FWAIT_OPCODE:
2634           q = WAIT_PREFIX;
2635           break;
2636
2637         case ADDR_PREFIX_OPCODE:
2638           q = ADDR_PREFIX;
2639           break;
2640
2641         case DATA_PREFIX_OPCODE:
2642           q = DATA_PREFIX;
2643           break;
2644         }
2645       if (i.prefix[q] != 0)
2646         ret = PREFIX_EXIST;
2647     }
2648
2649   if (ret)
2650     {
2651       if (!i.prefix[q])
2652         ++i.prefixes;
2653       i.prefix[q] |= prefix;
2654     }
2655   else
2656     as_bad (_("same type of prefix used twice"));
2657
2658   return ret;
2659 }
2660
2661 static void
2662 update_code_flag (int value, int check)
2663 {
2664   PRINTF_LIKE ((*as_error));
2665
2666   flag_code = (enum flag_code) value;
2667   if (flag_code == CODE_64BIT)
2668     {
2669       cpu_arch_flags.bitfield.cpu64 = 1;
2670       cpu_arch_flags.bitfield.cpuno64 = 0;
2671     }
2672   else
2673     {
2674       cpu_arch_flags.bitfield.cpu64 = 0;
2675       cpu_arch_flags.bitfield.cpuno64 = 1;
2676     }
2677   if (value == CODE_64BIT && !cpu_arch_flags.bitfield.cpulm )
2678     {
2679       if (check)
2680         as_error = as_fatal;
2681       else
2682         as_error = as_bad;
2683       (*as_error) (_("64bit mode not supported on `%s'."),
2684                    cpu_arch_name ? cpu_arch_name : default_arch);
2685     }
2686   if (value == CODE_32BIT && !cpu_arch_flags.bitfield.cpui386)
2687     {
2688       if (check)
2689         as_error = as_fatal;
2690       else
2691         as_error = as_bad;
2692       (*as_error) (_("32bit mode not supported on `%s'."),
2693                    cpu_arch_name ? cpu_arch_name : default_arch);
2694     }
2695   stackop_size = '\0';
2696 }
2697
2698 static void
2699 set_code_flag (int value)
2700 {
2701   update_code_flag (value, 0);
2702 }
2703
2704 static void
2705 set_16bit_gcc_code_flag (int new_code_flag)
2706 {
2707   flag_code = (enum flag_code) new_code_flag;
2708   if (flag_code != CODE_16BIT)
2709     abort ();
2710   cpu_arch_flags.bitfield.cpu64 = 0;
2711   cpu_arch_flags.bitfield.cpuno64 = 1;
2712   stackop_size = LONG_MNEM_SUFFIX;
2713 }
2714
2715 static void
2716 set_intel_syntax (int syntax_flag)
2717 {
2718   /* Find out if register prefixing is specified.  */
2719   int ask_naked_reg = 0;
2720
2721   SKIP_WHITESPACE ();
2722   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2723     {
2724       char *string;
2725       int e = get_symbol_name (&string);
2726
2727       if (strcmp (string, "prefix") == 0)
2728         ask_naked_reg = 1;
2729       else if (strcmp (string, "noprefix") == 0)
2730         ask_naked_reg = -1;
2731       else
2732         as_bad (_("bad argument to syntax directive."));
2733       (void) restore_line_pointer (e);
2734     }
2735   demand_empty_rest_of_line ();
2736
2737   intel_syntax = syntax_flag;
2738
2739   if (ask_naked_reg == 0)
2740     allow_naked_reg = (intel_syntax
2741                        && (bfd_get_symbol_leading_char (stdoutput) != '\0'));
2742   else
2743     allow_naked_reg = (ask_naked_reg < 0);
2744
2745   expr_set_rank (O_full_ptr, syntax_flag ? 10 : 0);
2746
2747   identifier_chars['%'] = intel_syntax && allow_naked_reg ? '%' : 0;
2748   identifier_chars['$'] = intel_syntax ? '$' : 0;
2749   register_prefix = allow_naked_reg ? "" : "%";
2750 }
2751
2752 static void
2753 set_intel_mnemonic (int mnemonic_flag)
2754 {
2755   intel_mnemonic = mnemonic_flag;
2756 }
2757
2758 static void
2759 set_allow_index_reg (int flag)
2760 {
2761   allow_index_reg = flag;
2762 }
2763
2764 static void
2765 set_check (int what)
2766 {
2767   enum check_kind *kind;
2768   const char *str;
2769
2770   if (what)
2771     {
2772       kind = &operand_check;
2773       str = "operand";
2774     }
2775   else
2776     {
2777       kind = &sse_check;
2778       str = "sse";
2779     }
2780
2781   SKIP_WHITESPACE ();
2782
2783   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2784     {
2785       char *string;
2786       int e = get_symbol_name (&string);
2787
2788       if (strcmp (string, "none") == 0)
2789         *kind = check_none;
2790       else if (strcmp (string, "warning") == 0)
2791         *kind = check_warning;
2792       else if (strcmp (string, "error") == 0)
2793         *kind = check_error;
2794       else
2795         as_bad (_("bad argument to %s_check directive."), str);
2796       (void) restore_line_pointer (e);
2797     }
2798   else
2799     as_bad (_("missing argument for %s_check directive"), str);
2800
2801   demand_empty_rest_of_line ();
2802 }
2803
2804 static void
2805 check_cpu_arch_compatible (const char *name ATTRIBUTE_UNUSED,
2806                            i386_cpu_flags new_flag ATTRIBUTE_UNUSED)
2807 {
2808 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
2809   static const char *arch;
2810
2811   /* Intel LIOM is only supported on ELF.  */
2812   if (!IS_ELF)
2813     return;
2814
2815   if (!arch)
2816     {
2817       /* Use cpu_arch_name if it is set in md_parse_option.  Otherwise
2818          use default_arch.  */
2819       arch = cpu_arch_name;
2820       if (!arch)
2821         arch = default_arch;
2822     }
2823
2824   /* If we are targeting Intel MCU, we must enable it.  */
2825   if (get_elf_backend_data (stdoutput)->elf_machine_code != EM_IAMCU
2826       || new_flag.bitfield.cpuiamcu)
2827     return;
2828
2829   /* If we are targeting Intel L1OM, we must enable it.  */
2830   if (get_elf_backend_data (stdoutput)->elf_machine_code != EM_L1OM
2831       || new_flag.bitfield.cpul1om)
2832     return;
2833
2834   /* If we are targeting Intel K1OM, we must enable it.  */
2835   if (get_elf_backend_data (stdoutput)->elf_machine_code != EM_K1OM
2836       || new_flag.bitfield.cpuk1om)
2837     return;
2838
2839   as_bad (_("`%s' is not supported on `%s'"), name, arch);
2840 #endif
2841 }
2842
2843 static void
2844 set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
2845 {
2846   SKIP_WHITESPACE ();
2847
2848   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2849     {
2850       char *string;
2851       int e = get_symbol_name (&string);
2852       unsigned int j;
2853       i386_cpu_flags flags;
2854
2855       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
2856         {
2857           if (strcmp (string, cpu_arch[j].name) == 0)
2858             {
2859               check_cpu_arch_compatible (string, cpu_arch[j].flags);
2860
2861               if (*string != '.')
2862                 {
2863                   cpu_arch_name = cpu_arch[j].name;
2864                   cpu_sub_arch_name = NULL;
2865                   cpu_arch_flags = cpu_arch[j].flags;
2866                   if (flag_code == CODE_64BIT)
2867                     {
2868                       cpu_arch_flags.bitfield.cpu64 = 1;
2869                       cpu_arch_flags.bitfield.cpuno64 = 0;
2870                     }
2871                   else
2872                     {
2873                       cpu_arch_flags.bitfield.cpu64 = 0;
2874                       cpu_arch_flags.bitfield.cpuno64 = 1;
2875                     }
2876                   cpu_arch_isa = cpu_arch[j].type;
2877                   cpu_arch_isa_flags = cpu_arch[j].flags;
2878                   if (!cpu_arch_tune_set)
2879                     {
2880                       cpu_arch_tune = cpu_arch_isa;
2881                       cpu_arch_tune_flags = cpu_arch_isa_flags;
2882                     }
2883                   break;
2884                 }
2885
2886               flags = cpu_flags_or (cpu_arch_flags,
2887                                     cpu_arch[j].flags);
2888
2889               if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2890                 {
2891                   if (cpu_sub_arch_name)
2892                     {
2893                       char *name = cpu_sub_arch_name;
2894                       cpu_sub_arch_name = concat (name,
2895                                                   cpu_arch[j].name,
2896                                                   (const char *) NULL);
2897                       free (name);
2898                     }
2899                   else
2900                     cpu_sub_arch_name = xstrdup (cpu_arch[j].name);
2901                   cpu_arch_flags = flags;
2902                   cpu_arch_isa_flags = flags;
2903                 }
2904               else
2905                 cpu_arch_isa_flags
2906                   = cpu_flags_or (cpu_arch_isa_flags,
2907                                   cpu_arch[j].flags);
2908               (void) restore_line_pointer (e);
2909               demand_empty_rest_of_line ();
2910               return;
2911             }
2912         }
2913
2914       if (*string == '.' && j >= ARRAY_SIZE (cpu_arch))
2915         {
2916           /* Disable an ISA extension.  */
2917           for (j = 0; j < ARRAY_SIZE (cpu_noarch); j++)
2918             if (strcmp (string + 1, cpu_noarch [j].name) == 0)
2919               {
2920                 flags = cpu_flags_and_not (cpu_arch_flags,
2921                                            cpu_noarch[j].flags);
2922                 if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2923                   {
2924                     if (cpu_sub_arch_name)
2925                       {
2926                         char *name = cpu_sub_arch_name;
2927                         cpu_sub_arch_name = concat (name, string,
2928                                                     (const char *) NULL);
2929                         free (name);
2930                       }
2931                     else
2932                       cpu_sub_arch_name = xstrdup (string);
2933                     cpu_arch_flags = flags;
2934                     cpu_arch_isa_flags = flags;
2935                   }
2936                 (void) restore_line_pointer (e);
2937                 demand_empty_rest_of_line ();
2938                 return;
2939               }
2940
2941           j = ARRAY_SIZE (cpu_arch);
2942         }
2943
2944       if (j >= ARRAY_SIZE (cpu_arch))
2945         as_bad (_("no such architecture: `%s'"), string);
2946
2947       *input_line_pointer = e;
2948     }
2949   else
2950     as_bad (_("missing cpu architecture"));
2951
2952   no_cond_jump_promotion = 0;
2953   if (*input_line_pointer == ','
2954       && !is_end_of_line[(unsigned char) input_line_pointer[1]])
2955     {
2956       char *string;
2957       char e;
2958
2959       ++input_line_pointer;
2960       e = get_symbol_name (&string);
2961
2962       if (strcmp (string, "nojumps") == 0)
2963         no_cond_jump_promotion = 1;
2964       else if (strcmp (string, "jumps") == 0)
2965         ;
2966       else
2967         as_bad (_("no such architecture modifier: `%s'"), string);
2968
2969       (void) restore_line_pointer (e);
2970     }
2971
2972   demand_empty_rest_of_line ();
2973 }
2974
2975 enum bfd_architecture
2976 i386_arch (void)
2977 {
2978   if (cpu_arch_isa == PROCESSOR_L1OM)
2979     {
2980       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2981           || flag_code != CODE_64BIT)
2982         as_fatal (_("Intel L1OM is 64bit ELF only"));
2983       return bfd_arch_l1om;
2984     }
2985   else if (cpu_arch_isa == PROCESSOR_K1OM)
2986     {
2987       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2988           || flag_code != CODE_64BIT)
2989         as_fatal (_("Intel K1OM is 64bit ELF only"));
2990       return bfd_arch_k1om;
2991     }
2992   else if (cpu_arch_isa == PROCESSOR_IAMCU)
2993     {
2994       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2995           || flag_code == CODE_64BIT)
2996         as_fatal (_("Intel MCU is 32bit ELF only"));
2997       return bfd_arch_iamcu;
2998     }
2999   else
3000     return bfd_arch_i386;
3001 }
3002
3003 unsigned long
3004 i386_mach (void)
3005 {
3006   if (!strncmp (default_arch, "x86_64", 6))
3007     {
3008       if (cpu_arch_isa == PROCESSOR_L1OM)
3009         {
3010           if (OUTPUT_FLAVOR != bfd_target_elf_flavour
3011               || default_arch[6] != '\0')
3012             as_fatal (_("Intel L1OM is 64bit ELF only"));
3013           return bfd_mach_l1om;
3014         }
3015       else if (cpu_arch_isa == PROCESSOR_K1OM)
3016         {
3017           if (OUTPUT_FLAVOR != bfd_target_elf_flavour
3018               || default_arch[6] != '\0')
3019             as_fatal (_("Intel K1OM is 64bit ELF only"));
3020           return bfd_mach_k1om;
3021         }
3022       else if (default_arch[6] == '\0')
3023         return bfd_mach_x86_64;
3024       else
3025         return bfd_mach_x64_32;
3026     }
3027   else if (!strcmp (default_arch, "i386")
3028            || !strcmp (default_arch, "iamcu"))
3029     {
3030       if (cpu_arch_isa == PROCESSOR_IAMCU)
3031         {
3032           if (OUTPUT_FLAVOR != bfd_target_elf_flavour)
3033             as_fatal (_("Intel MCU is 32bit ELF only"));
3034           return bfd_mach_i386_iamcu;
3035         }
3036       else
3037         return bfd_mach_i386_i386;
3038     }
3039   else
3040     as_fatal (_("unknown architecture"));
3041 }
3042 \f
3043 void
3044 md_begin (void)
3045 {
3046   /* Support pseudo prefixes like {disp32}.  */
3047   lex_type ['{'] = LEX_BEGIN_NAME;
3048
3049   /* Initialize op_hash hash table.  */
3050   op_hash = str_htab_create ();
3051
3052   {
3053     const insn_template *optab;
3054     templates *core_optab;
3055
3056     /* Setup for loop.  */
3057     optab = i386_optab;
3058     core_optab = XNEW (templates);
3059     core_optab->start = optab;
3060
3061     while (1)
3062       {
3063         ++optab;
3064         if (optab->name == NULL
3065             || strcmp (optab->name, (optab - 1)->name) != 0)
3066           {
3067             /* different name --> ship out current template list;
3068                add to hash table; & begin anew.  */
3069             core_optab->end = optab;
3070             if (str_hash_insert (op_hash, (optab - 1)->name, core_optab, 0))
3071               as_fatal (_("duplicate %s"), (optab - 1)->name);
3072
3073             if (optab->name == NULL)
3074               break;
3075             core_optab = XNEW (templates);
3076             core_optab->start = optab;
3077           }
3078       }
3079   }
3080
3081   /* Initialize reg_hash hash table.  */
3082   reg_hash = str_htab_create ();
3083   {
3084     const reg_entry *regtab;
3085     unsigned int regtab_size = i386_regtab_size;
3086
3087     for (regtab = i386_regtab; regtab_size--; regtab++)
3088       if (str_hash_insert (reg_hash, regtab->reg_name, regtab, 0) != NULL)
3089         as_fatal (_("duplicate %s"), regtab->reg_name);
3090   }
3091
3092   /* Fill in lexical tables:  mnemonic_chars, operand_chars.  */
3093   {
3094     int c;
3095     char *p;
3096
3097     for (c = 0; c < 256; c++)
3098       {
3099         if (ISDIGIT (c))
3100           {
3101             digit_chars[c] = c;
3102             mnemonic_chars[c] = c;
3103             register_chars[c] = c;
3104             operand_chars[c] = c;
3105           }
3106         else if (ISLOWER (c))
3107           {
3108             mnemonic_chars[c] = c;
3109             register_chars[c] = c;
3110             operand_chars[c] = c;
3111           }
3112         else if (ISUPPER (c))
3113           {
3114             mnemonic_chars[c] = TOLOWER (c);
3115             register_chars[c] = mnemonic_chars[c];
3116             operand_chars[c] = c;
3117           }
3118         else if (c == '{' || c == '}')
3119           {
3120             mnemonic_chars[c] = c;
3121             operand_chars[c] = c;
3122           }
3123 #ifdef SVR4_COMMENT_CHARS
3124         else if (c == '\\' && strchr (i386_comment_chars, '/'))
3125           operand_chars[c] = c;
3126 #endif
3127
3128         if (ISALPHA (c) || ISDIGIT (c))
3129           identifier_chars[c] = c;
3130         else if (c >= 128)
3131           {
3132             identifier_chars[c] = c;
3133             operand_chars[c] = c;
3134           }
3135       }
3136
3137 #ifdef LEX_AT
3138     identifier_chars['@'] = '@';
3139 #endif
3140 #ifdef LEX_QM
3141     identifier_chars['?'] = '?';
3142     operand_chars['?'] = '?';
3143 #endif
3144     digit_chars['-'] = '-';
3145     mnemonic_chars['_'] = '_';
3146     mnemonic_chars['-'] = '-';
3147     mnemonic_chars['.'] = '.';
3148     identifier_chars['_'] = '_';
3149     identifier_chars['.'] = '.';
3150
3151     for (p = operand_special_chars; *p != '\0'; p++)
3152       operand_chars[(unsigned char) *p] = *p;
3153   }
3154
3155   if (flag_code == CODE_64BIT)
3156     {
3157 #if defined (OBJ_COFF) && defined (TE_PE)
3158       x86_dwarf2_return_column = (OUTPUT_FLAVOR == bfd_target_coff_flavour
3159                                   ? 32 : 16);
3160 #else
3161       x86_dwarf2_return_column = 16;
3162 #endif
3163       x86_cie_data_alignment = -8;
3164     }
3165   else
3166     {
3167       x86_dwarf2_return_column = 8;
3168       x86_cie_data_alignment = -4;
3169     }
3170
3171   /* NB: FUSED_JCC_PADDING frag must have sufficient room so that it
3172      can be turned into BRANCH_PREFIX frag.  */
3173   if (align_branch_prefix_size > MAX_FUSED_JCC_PADDING_SIZE)
3174     abort ();
3175 }
3176
3177 void
3178 i386_print_statistics (FILE *file)
3179 {
3180   htab_print_statistics (file, "i386 opcode", op_hash);
3181   htab_print_statistics (file, "i386 register", reg_hash);
3182 }
3183 \f
3184 #ifdef DEBUG386
3185
3186 /* Debugging routines for md_assemble.  */
3187 static void pte (insn_template *);
3188 static void pt (i386_operand_type);
3189 static void pe (expressionS *);
3190 static void ps (symbolS *);
3191
3192 static void
3193 pi (const char *line, i386_insn *x)
3194 {
3195   unsigned int j;
3196
3197   fprintf (stdout, "%s: template ", line);
3198   pte (&x->tm);
3199   fprintf (stdout, "  address: base %s  index %s  scale %x\n",
3200            x->base_reg ? x->base_reg->reg_name : "none",
3201            x->index_reg ? x->index_reg->reg_name : "none",
3202            x->log2_scale_factor);
3203   fprintf (stdout, "  modrm:  mode %x  reg %x  reg/mem %x\n",
3204            x->rm.mode, x->rm.reg, x->rm.regmem);
3205   fprintf (stdout, "  sib:  base %x  index %x  scale %x\n",
3206            x->sib.base, x->sib.index, x->sib.scale);
3207   fprintf (stdout, "  rex: 64bit %x  extX %x  extY %x  extZ %x\n",
3208            (x->rex & REX_W) != 0,
3209            (x->rex & REX_R) != 0,
3210            (x->rex & REX_X) != 0,
3211            (x->rex & REX_B) != 0);
3212   for (j = 0; j < x->operands; j++)
3213     {
3214       fprintf (stdout, "    #%d:  ", j + 1);
3215       pt (x->types[j]);
3216       fprintf (stdout, "\n");
3217       if (x->types[j].bitfield.class == Reg
3218           || x->types[j].bitfield.class == RegMMX
3219           || x->types[j].bitfield.class == RegSIMD
3220           || x->types[j].bitfield.class == RegMask
3221           || x->types[j].bitfield.class == SReg
3222           || x->types[j].bitfield.class == RegCR
3223           || x->types[j].bitfield.class == RegDR
3224           || x->types[j].bitfield.class == RegTR
3225           || x->types[j].bitfield.class == RegBND)
3226         fprintf (stdout, "%s\n", x->op[j].regs->reg_name);
3227       if (operand_type_check (x->types[j], imm))
3228         pe (x->op[j].imms);
3229       if (operand_type_check (x->types[j], disp))
3230         pe (x->op[j].disps);
3231     }
3232 }
3233
3234 static void
3235 pte (insn_template *t)
3236 {
3237   unsigned int j;
3238   fprintf (stdout, " %d operands ", t->operands);
3239   fprintf (stdout, "opcode %x ", t->base_opcode);
3240   if (t->extension_opcode != None)
3241     fprintf (stdout, "ext %x ", t->extension_opcode);
3242   if (t->opcode_modifier.d)
3243     fprintf (stdout, "D");
3244   if (t->opcode_modifier.w)
3245     fprintf (stdout, "W");
3246   fprintf (stdout, "\n");
3247   for (j = 0; j < t->operands; j++)
3248     {
3249       fprintf (stdout, "    #%d type ", j + 1);
3250       pt (t->operand_types[j]);
3251       fprintf (stdout, "\n");
3252     }
3253 }
3254
3255 static void
3256 pe (expressionS *e)
3257 {
3258   fprintf (stdout, "    operation     %d\n", e->X_op);
3259   fprintf (stdout, "    add_number    %ld (%lx)\n",
3260            (long) e->X_add_number, (long) e->X_add_number);
3261   if (e->X_add_symbol)
3262     {
3263       fprintf (stdout, "    add_symbol    ");
3264       ps (e->X_add_symbol);
3265       fprintf (stdout, "\n");
3266     }
3267   if (e->X_op_symbol)
3268     {
3269       fprintf (stdout, "    op_symbol    ");
3270       ps (e->X_op_symbol);
3271       fprintf (stdout, "\n");
3272     }
3273 }
3274
3275 static void
3276 ps (symbolS *s)
3277 {
3278   fprintf (stdout, "%s type %s%s",
3279            S_GET_NAME (s),
3280            S_IS_EXTERNAL (s) ? "EXTERNAL " : "",
3281            segment_name (S_GET_SEGMENT (s)));
3282 }
3283
3284 static struct type_name
3285   {
3286     i386_operand_type mask;
3287     const char *name;
3288   }
3289 const type_names[] =
3290 {
3291   { OPERAND_TYPE_REG8, "r8" },
3292   { OPERAND_TYPE_REG16, "r16" },
3293   { OPERAND_TYPE_REG32, "r32" },
3294   { OPERAND_TYPE_REG64, "r64" },
3295   { OPERAND_TYPE_ACC8, "acc8" },
3296   { OPERAND_TYPE_ACC16, "acc16" },
3297   { OPERAND_TYPE_ACC32, "acc32" },
3298   { OPERAND_TYPE_ACC64, "acc64" },
3299   { OPERAND_TYPE_IMM8, "i8" },
3300   { OPERAND_TYPE_IMM8, "i8s" },
3301   { OPERAND_TYPE_IMM16, "i16" },
3302   { OPERAND_TYPE_IMM32, "i32" },
3303   { OPERAND_TYPE_IMM32S, "i32s" },
3304   { OPERAND_TYPE_IMM64, "i64" },
3305   { OPERAND_TYPE_IMM1, "i1" },
3306   { OPERAND_TYPE_BASEINDEX, "BaseIndex" },
3307   { OPERAND_TYPE_DISP8, "d8" },
3308   { OPERAND_TYPE_DISP16, "d16" },
3309   { OPERAND_TYPE_DISP32, "d32" },
3310   { OPERAND_TYPE_DISP32S, "d32s" },
3311   { OPERAND_TYPE_DISP64, "d64" },
3312   { OPERAND_TYPE_INOUTPORTREG, "InOutPortReg" },
3313   { OPERAND_TYPE_SHIFTCOUNT, "ShiftCount" },
3314   { OPERAND_TYPE_CONTROL, "control reg" },
3315   { OPERAND_TYPE_TEST, "test reg" },
3316   { OPERAND_TYPE_DEBUG, "debug reg" },
3317   { OPERAND_TYPE_FLOATREG, "FReg" },
3318   { OPERAND_TYPE_FLOATACC, "FAcc" },
3319   { OPERAND_TYPE_SREG, "SReg" },
3320   { OPERAND_TYPE_REGMMX, "rMMX" },
3321   { OPERAND_TYPE_REGXMM, "rXMM" },
3322   { OPERAND_TYPE_REGYMM, "rYMM" },
3323   { OPERAND_TYPE_REGZMM, "rZMM" },
3324   { OPERAND_TYPE_REGTMM, "rTMM" },
3325   { OPERAND_TYPE_REGMASK, "Mask reg" },
3326 };
3327
3328 static void
3329 pt (i386_operand_type t)
3330 {
3331   unsigned int j;
3332   i386_operand_type a;
3333
3334   for (j = 0; j < ARRAY_SIZE (type_names); j++)
3335     {
3336       a = operand_type_and (t, type_names[j].mask);
3337       if (operand_type_equal (&a, &type_names[j].mask))
3338         fprintf (stdout, "%s, ",  type_names[j].name);
3339     }
3340   fflush (stdout);
3341 }
3342
3343 #endif /* DEBUG386 */
3344 \f
3345 static bfd_reloc_code_real_type
3346 reloc (unsigned int size,
3347        int pcrel,
3348        int sign,
3349        bfd_reloc_code_real_type other)
3350 {
3351   if (other != NO_RELOC)
3352     {
3353       reloc_howto_type *rel;
3354
3355       if (size == 8)
3356         switch (other)
3357           {
3358           case BFD_RELOC_X86_64_GOT32:
3359             return BFD_RELOC_X86_64_GOT64;
3360             break;
3361           case BFD_RELOC_X86_64_GOTPLT64:
3362             return BFD_RELOC_X86_64_GOTPLT64;
3363             break;
3364           case BFD_RELOC_X86_64_PLTOFF64:
3365             return BFD_RELOC_X86_64_PLTOFF64;
3366             break;
3367           case BFD_RELOC_X86_64_GOTPC32:
3368             other = BFD_RELOC_X86_64_GOTPC64;
3369             break;
3370           case BFD_RELOC_X86_64_GOTPCREL:
3371             other = BFD_RELOC_X86_64_GOTPCREL64;
3372             break;
3373           case BFD_RELOC_X86_64_TPOFF32:
3374             other = BFD_RELOC_X86_64_TPOFF64;
3375             break;
3376           case BFD_RELOC_X86_64_DTPOFF32:
3377             other = BFD_RELOC_X86_64_DTPOFF64;
3378             break;
3379           default:
3380             break;
3381           }
3382
3383 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3384       if (other == BFD_RELOC_SIZE32)
3385         {
3386           if (size == 8)
3387             other = BFD_RELOC_SIZE64;
3388           if (pcrel)
3389             {
3390               as_bad (_("there are no pc-relative size relocations"));
3391               return NO_RELOC;
3392             }
3393         }
3394 #endif
3395
3396       /* Sign-checking 4-byte relocations in 16-/32-bit code is pointless.  */
3397       if (size == 4 && (flag_code != CODE_64BIT || disallow_64bit_reloc))
3398         sign = -1;
3399
3400       rel = bfd_reloc_type_lookup (stdoutput, other);
3401       if (!rel)
3402         as_bad (_("unknown relocation (%u)"), other);
3403       else if (size != bfd_get_reloc_size (rel))
3404         as_bad (_("%u-byte relocation cannot be applied to %u-byte field"),
3405                 bfd_get_reloc_size (rel),
3406                 size);
3407       else if (pcrel && !rel->pc_relative)
3408         as_bad (_("non-pc-relative relocation for pc-relative field"));
3409       else if ((rel->complain_on_overflow == complain_overflow_signed
3410                 && !sign)
3411                || (rel->complain_on_overflow == complain_overflow_unsigned
3412                    && sign > 0))
3413         as_bad (_("relocated field and relocation type differ in signedness"));
3414       else
3415         return other;
3416       return NO_RELOC;
3417     }
3418
3419   if (pcrel)
3420     {
3421       if (!sign)
3422         as_bad (_("there are no unsigned pc-relative relocations"));
3423       switch (size)
3424         {
3425         case 1: return BFD_RELOC_8_PCREL;
3426         case 2: return BFD_RELOC_16_PCREL;
3427         case 4: return BFD_RELOC_32_PCREL;
3428         case 8: return BFD_RELOC_64_PCREL;
3429         }
3430       as_bad (_("cannot do %u byte pc-relative relocation"), size);
3431     }
3432   else
3433     {
3434       if (sign > 0)
3435         switch (size)
3436           {
3437           case 4: return BFD_RELOC_X86_64_32S;
3438           }
3439       else
3440         switch (size)
3441           {
3442           case 1: return BFD_RELOC_8;
3443           case 2: return BFD_RELOC_16;
3444           case 4: return BFD_RELOC_32;
3445           case 8: return BFD_RELOC_64;
3446           }
3447       as_bad (_("cannot do %s %u byte relocation"),
3448               sign > 0 ? "signed" : "unsigned", size);
3449     }
3450
3451   return NO_RELOC;
3452 }
3453
3454 /* Here we decide which fixups can be adjusted to make them relative to
3455    the beginning of the section instead of the symbol.  Basically we need
3456    to make sure that the dynamic relocations are done correctly, so in
3457    some cases we force the original symbol to be used.  */
3458
3459 int
3460 tc_i386_fix_adjustable (fixS *fixP ATTRIBUTE_UNUSED)
3461 {
3462 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3463   if (!IS_ELF)
3464     return 1;
3465
3466   /* Don't adjust pc-relative references to merge sections in 64-bit
3467      mode.  */
3468   if (use_rela_relocations
3469       && (S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_MERGE) != 0
3470       && fixP->fx_pcrel)
3471     return 0;
3472
3473   /* The x86_64 GOTPCREL are represented as 32bit PCrel relocations
3474      and changed later by validate_fix.  */
3475   if (GOT_symbol && fixP->fx_subsy == GOT_symbol
3476       && fixP->fx_r_type == BFD_RELOC_32_PCREL)
3477     return 0;
3478
3479   /* Adjust_reloc_syms doesn't know about the GOT.  Need to keep symbol
3480      for size relocations.  */
3481   if (fixP->fx_r_type == BFD_RELOC_SIZE32
3482       || fixP->fx_r_type == BFD_RELOC_SIZE64
3483       || fixP->fx_r_type == BFD_RELOC_386_GOTOFF
3484       || fixP->fx_r_type == BFD_RELOC_386_GOT32
3485       || fixP->fx_r_type == BFD_RELOC_386_GOT32X
3486       || fixP->fx_r_type == BFD_RELOC_386_TLS_GD
3487       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDM
3488       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDO_32
3489       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE_32
3490       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE
3491       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTIE
3492       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE_32
3493       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE
3494       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTDESC
3495       || fixP->fx_r_type == BFD_RELOC_386_TLS_DESC_CALL
3496       || fixP->fx_r_type == BFD_RELOC_X86_64_GOT32
3497       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCREL
3498       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCRELX
3499       || fixP->fx_r_type == BFD_RELOC_X86_64_REX_GOTPCRELX
3500       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSGD
3501       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSLD
3502       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF32
3503       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF64
3504       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTTPOFF
3505       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF32
3506       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF64
3507       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTOFF64
3508       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC
3509       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSDESC_CALL
3510       || fixP->fx_r_type == BFD_RELOC_VTABLE_INHERIT
3511       || fixP->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
3512     return 0;
3513 #endif
3514   return 1;
3515 }
3516
3517 static int
3518 intel_float_operand (const char *mnemonic)
3519 {
3520   /* Note that the value returned is meaningful only for opcodes with (memory)
3521      operands, hence the code here is free to improperly handle opcodes that
3522      have no operands (for better performance and smaller code). */
3523
3524   if (mnemonic[0] != 'f')
3525     return 0; /* non-math */
3526
3527   switch (mnemonic[1])
3528     {
3529     /* fclex, fdecstp, fdisi, femms, feni, fincstp, finit, fsetpm, and
3530        the fs segment override prefix not currently handled because no
3531        call path can make opcodes without operands get here */
3532     case 'i':
3533       return 2 /* integer op */;
3534     case 'l':
3535       if (mnemonic[2] == 'd' && (mnemonic[3] == 'c' || mnemonic[3] == 'e'))
3536         return 3; /* fldcw/fldenv */
3537       break;
3538     case 'n':
3539       if (mnemonic[2] != 'o' /* fnop */)
3540         return 3; /* non-waiting control op */
3541       break;
3542     case 'r':
3543       if (mnemonic[2] == 's')
3544         return 3; /* frstor/frstpm */
3545       break;
3546     case 's':
3547       if (mnemonic[2] == 'a')
3548         return 3; /* fsave */
3549       if (mnemonic[2] == 't')
3550         {
3551           switch (mnemonic[3])
3552             {
3553             case 'c': /* fstcw */
3554             case 'd': /* fstdw */
3555             case 'e': /* fstenv */
3556             case 's': /* fsts[gw] */
3557               return 3;
3558             }
3559         }
3560       break;
3561     case 'x':
3562       if (mnemonic[2] == 'r' || mnemonic[2] == 's')
3563         return 0; /* fxsave/fxrstor are not really math ops */
3564       break;
3565     }
3566
3567   return 1;
3568 }
3569
3570 /* Build the VEX prefix.  */
3571
3572 static void
3573 build_vex_prefix (const insn_template *t)
3574 {
3575   unsigned int register_specifier;
3576   unsigned int implied_prefix;
3577   unsigned int vector_length;
3578   unsigned int w;
3579
3580   /* Check register specifier.  */
3581   if (i.vex.register_specifier)
3582     {
3583       register_specifier =
3584         ~register_number (i.vex.register_specifier) & 0xf;
3585       gas_assert ((i.vex.register_specifier->reg_flags & RegVRex) == 0);
3586     }
3587   else
3588     register_specifier = 0xf;
3589
3590   /* Use 2-byte VEX prefix by swapping destination and source operand
3591      if there are more than 1 register operand.  */
3592   if (i.reg_operands > 1
3593       && i.vec_encoding != vex_encoding_vex3
3594       && i.dir_encoding == dir_encoding_default
3595       && i.operands == i.reg_operands
3596       && operand_type_equal (&i.types[0], &i.types[i.operands - 1])
3597       && i.tm.opcode_modifier.opcodeprefix == VEX0F
3598       && (i.tm.opcode_modifier.load || i.tm.opcode_modifier.d)
3599       && i.rex == REX_B)
3600     {
3601       unsigned int xchg = i.operands - 1;
3602       union i386_op temp_op;
3603       i386_operand_type temp_type;
3604
3605       temp_type = i.types[xchg];
3606       i.types[xchg] = i.types[0];
3607       i.types[0] = temp_type;
3608       temp_op = i.op[xchg];
3609       i.op[xchg] = i.op[0];
3610       i.op[0] = temp_op;
3611
3612       gas_assert (i.rm.mode == 3);
3613
3614       i.rex = REX_R;
3615       xchg = i.rm.regmem;
3616       i.rm.regmem = i.rm.reg;
3617       i.rm.reg = xchg;
3618
3619       if (i.tm.opcode_modifier.d)
3620         i.tm.base_opcode ^= (i.tm.base_opcode & 0xee) != 0x6e
3621                             ? Opcode_SIMD_FloatD : Opcode_SIMD_IntD;
3622       else /* Use the next insn.  */
3623         i.tm = t[1];
3624     }
3625
3626   /* Use 2-byte VEX prefix by swapping commutative source operands if there
3627      are no memory operands and at least 3 register ones.  */
3628   if (i.reg_operands >= 3
3629       && i.vec_encoding != vex_encoding_vex3
3630       && i.reg_operands == i.operands - i.imm_operands
3631       && i.tm.opcode_modifier.vex
3632       && i.tm.opcode_modifier.commutative
3633       && (i.tm.opcode_modifier.sse2avx || optimize > 1)
3634       && i.rex == REX_B
3635       && i.vex.register_specifier
3636       && !(i.vex.register_specifier->reg_flags & RegRex))
3637     {
3638       unsigned int xchg = i.operands - i.reg_operands;
3639       union i386_op temp_op;
3640       i386_operand_type temp_type;
3641
3642       gas_assert (i.tm.opcode_modifier.opcodeprefix == VEX0F);
3643       gas_assert (!i.tm.opcode_modifier.sae);
3644       gas_assert (operand_type_equal (&i.types[i.operands - 2],
3645                                       &i.types[i.operands - 3]));
3646       gas_assert (i.rm.mode == 3);
3647
3648       temp_type = i.types[xchg];
3649       i.types[xchg] = i.types[xchg + 1];
3650       i.types[xchg + 1] = temp_type;
3651       temp_op = i.op[xchg];
3652       i.op[xchg] = i.op[xchg + 1];
3653       i.op[xchg + 1] = temp_op;
3654
3655       i.rex = 0;
3656       xchg = i.rm.regmem | 8;
3657       i.rm.regmem = ~register_specifier & 0xf;
3658       gas_assert (!(i.rm.regmem & 8));
3659       i.vex.register_specifier += xchg - i.rm.regmem;
3660       register_specifier = ~xchg & 0xf;
3661     }
3662
3663   if (i.tm.opcode_modifier.vex == VEXScalar)
3664     vector_length = avxscalar;
3665   else if (i.tm.opcode_modifier.vex == VEX256)
3666     vector_length = 1;
3667   else
3668     {
3669       unsigned int op;
3670
3671       /* Determine vector length from the last multi-length vector
3672          operand.  */
3673       vector_length = 0;
3674       for (op = t->operands; op--;)
3675         if (t->operand_types[op].bitfield.xmmword
3676             && t->operand_types[op].bitfield.ymmword
3677             && i.types[op].bitfield.ymmword)
3678           {
3679             vector_length = 1;
3680             break;
3681           }
3682     }
3683
3684   switch ((i.tm.base_opcode >> (i.tm.opcode_length << 3)) & 0xff)
3685     {
3686     case 0:
3687       implied_prefix = 0;
3688       break;
3689     case DATA_PREFIX_OPCODE:
3690       implied_prefix = 1;
3691       break;
3692     case REPE_PREFIX_OPCODE:
3693       implied_prefix = 2;
3694       break;
3695     case REPNE_PREFIX_OPCODE:
3696       implied_prefix = 3;
3697       break;
3698     default:
3699       abort ();
3700     }
3701
3702   /* Check the REX.W bit and VEXW.  */
3703   if (i.tm.opcode_modifier.vexw == VEXWIG)
3704     w = (vexwig == vexw1 || (i.rex & REX_W)) ? 1 : 0;
3705   else if (i.tm.opcode_modifier.vexw)
3706     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3707   else
3708     w = (flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1) ? 1 : 0;
3709
3710   /* Use 2-byte VEX prefix if possible.  */
3711   if (w == 0
3712       && i.vec_encoding != vex_encoding_vex3
3713       && i.tm.opcode_modifier.opcodeprefix == VEX0F
3714       && (i.rex & (REX_W | REX_X | REX_B)) == 0)
3715     {
3716       /* 2-byte VEX prefix.  */
3717       unsigned int r;
3718
3719       i.vex.length = 2;
3720       i.vex.bytes[0] = 0xc5;
3721
3722       /* Check the REX.R bit.  */
3723       r = (i.rex & REX_R) ? 0 : 1;
3724       i.vex.bytes[1] = (r << 7
3725                         | register_specifier << 3
3726                         | vector_length << 2
3727                         | implied_prefix);
3728     }
3729   else
3730     {
3731       /* 3-byte VEX prefix.  */
3732       unsigned int m;
3733
3734       i.vex.length = 3;
3735
3736       switch (i.tm.opcode_modifier.opcodeprefix)
3737         {
3738         case VEX0F:
3739           m = 0x1;
3740           i.vex.bytes[0] = 0xc4;
3741           break;
3742         case VEX0F38:
3743           m = 0x2;
3744           i.vex.bytes[0] = 0xc4;
3745           break;
3746         case VEX0F3A:
3747           m = 0x3;
3748           i.vex.bytes[0] = 0xc4;
3749           break;
3750         case XOP08:
3751           m = 0x8;
3752           i.vex.bytes[0] = 0x8f;
3753           break;
3754         case XOP09:
3755           m = 0x9;
3756           i.vex.bytes[0] = 0x8f;
3757           break;
3758         case XOP0A:
3759           m = 0xa;
3760           i.vex.bytes[0] = 0x8f;
3761           break;
3762         default:
3763           abort ();
3764         }
3765
3766       /* The high 3 bits of the second VEX byte are 1's compliment
3767          of RXB bits from REX.  */
3768       i.vex.bytes[1] = (~i.rex & 0x7) << 5 | m;
3769
3770       i.vex.bytes[2] = (w << 7
3771                         | register_specifier << 3
3772                         | vector_length << 2
3773                         | implied_prefix);
3774     }
3775 }
3776
3777 static INLINE bfd_boolean
3778 is_evex_encoding (const insn_template *t)
3779 {
3780   return t->opcode_modifier.evex || t->opcode_modifier.disp8memshift
3781          || t->opcode_modifier.broadcast || t->opcode_modifier.masking
3782          || t->opcode_modifier.sae;
3783 }
3784
3785 static INLINE bfd_boolean
3786 is_any_vex_encoding (const insn_template *t)
3787 {
3788   return t->opcode_modifier.vex || is_evex_encoding (t);
3789 }
3790
3791 /* Build the EVEX prefix.  */
3792
3793 static void
3794 build_evex_prefix (void)
3795 {
3796   unsigned int register_specifier;
3797   unsigned int implied_prefix;
3798   unsigned int m, w;
3799   rex_byte vrex_used = 0;
3800
3801   /* Check register specifier.  */
3802   if (i.vex.register_specifier)
3803     {
3804       gas_assert ((i.vrex & REX_X) == 0);
3805
3806       register_specifier = i.vex.register_specifier->reg_num;
3807       if ((i.vex.register_specifier->reg_flags & RegRex))
3808         register_specifier += 8;
3809       /* The upper 16 registers are encoded in the fourth byte of the
3810          EVEX prefix.  */
3811       if (!(i.vex.register_specifier->reg_flags & RegVRex))
3812         i.vex.bytes[3] = 0x8;
3813       register_specifier = ~register_specifier & 0xf;
3814     }
3815   else
3816     {
3817       register_specifier = 0xf;
3818
3819       /* Encode upper 16 vector index register in the fourth byte of
3820          the EVEX prefix.  */
3821       if (!(i.vrex & REX_X))
3822         i.vex.bytes[3] = 0x8;
3823       else
3824         vrex_used |= REX_X;
3825     }
3826
3827   switch ((i.tm.base_opcode >> 8) & 0xff)
3828     {
3829     case 0:
3830       implied_prefix = 0;
3831       break;
3832     case DATA_PREFIX_OPCODE:
3833       implied_prefix = 1;
3834       break;
3835     case REPE_PREFIX_OPCODE:
3836       implied_prefix = 2;
3837       break;
3838     case REPNE_PREFIX_OPCODE:
3839       implied_prefix = 3;
3840       break;
3841     default:
3842       abort ();
3843     }
3844
3845   /* 4 byte EVEX prefix.  */
3846   i.vex.length = 4;
3847   i.vex.bytes[0] = 0x62;
3848
3849   /* mmmm bits.  */
3850   switch (i.tm.opcode_modifier.opcodeprefix)
3851     {
3852     case VEX0F:
3853       m = 1;
3854       break;
3855     case VEX0F38:
3856       m = 2;
3857       break;
3858     case VEX0F3A:
3859       m = 3;
3860       break;
3861     default:
3862       abort ();
3863       break;
3864     }
3865
3866   /* The high 3 bits of the second EVEX byte are 1's compliment of RXB
3867      bits from REX.  */
3868   i.vex.bytes[1] = (~i.rex & 0x7) << 5 | m;
3869
3870   /* The fifth bit of the second EVEX byte is 1's compliment of the
3871      REX_R bit in VREX.  */
3872   if (!(i.vrex & REX_R))
3873     i.vex.bytes[1] |= 0x10;
3874   else
3875     vrex_used |= REX_R;
3876
3877   if ((i.reg_operands + i.imm_operands) == i.operands)
3878     {
3879       /* When all operands are registers, the REX_X bit in REX is not
3880          used.  We reuse it to encode the upper 16 registers, which is
3881          indicated by the REX_B bit in VREX.  The REX_X bit is encoded
3882          as 1's compliment.  */
3883       if ((i.vrex & REX_B))
3884         {
3885           vrex_used |= REX_B;
3886           i.vex.bytes[1] &= ~0x40;
3887         }
3888     }
3889
3890   /* EVEX instructions shouldn't need the REX prefix.  */
3891   i.vrex &= ~vrex_used;
3892   gas_assert (i.vrex == 0);
3893
3894   /* Check the REX.W bit and VEXW.  */
3895   if (i.tm.opcode_modifier.vexw == VEXWIG)
3896     w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0;
3897   else if (i.tm.opcode_modifier.vexw)
3898     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3899   else
3900     w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;
3901
3902   /* Encode the U bit.  */
3903   implied_prefix |= 0x4;
3904
3905   /* The third byte of the EVEX prefix.  */
3906   i.vex.bytes[2] = (w << 7 | register_specifier << 3 | implied_prefix);
3907
3908   /* The fourth byte of the EVEX prefix.  */
3909   /* The zeroing-masking bit.  */
3910   if (i.mask && i.mask->zeroing)
3911     i.vex.bytes[3] |= 0x80;
3912
3913   /* Don't always set the broadcast bit if there is no RC.  */
3914   if (!i.rounding)
3915     {
3916       /* Encode the vector length.  */
3917       unsigned int vec_length;
3918
3919       if (!i.tm.opcode_modifier.evex
3920           || i.tm.opcode_modifier.evex == EVEXDYN)
3921         {
3922           unsigned int op;
3923
3924           /* Determine vector length from the last multi-length vector
3925              operand.  */
3926           for (op = i.operands; op--;)
3927             if (i.tm.operand_types[op].bitfield.xmmword
3928                 + i.tm.operand_types[op].bitfield.ymmword
3929                 + i.tm.operand_types[op].bitfield.zmmword > 1)
3930               {
3931                 if (i.types[op].bitfield.zmmword)
3932                   {
3933                     i.tm.opcode_modifier.evex = EVEX512;
3934                     break;
3935                   }
3936                 else if (i.types[op].bitfield.ymmword)
3937                   {
3938                     i.tm.opcode_modifier.evex = EVEX256;
3939                     break;
3940                   }
3941                 else if (i.types[op].bitfield.xmmword)
3942                   {
3943                     i.tm.opcode_modifier.evex = EVEX128;
3944                     break;
3945                   }
3946                 else if (i.broadcast && (int) op == i.broadcast->operand)
3947                   {
3948                     switch (i.broadcast->bytes)
3949                       {
3950                         case 64:
3951                           i.tm.opcode_modifier.evex = EVEX512;
3952                           break;
3953                         case 32:
3954                           i.tm.opcode_modifier.evex = EVEX256;
3955                           break;
3956                         case 16:
3957                           i.tm.opcode_modifier.evex = EVEX128;
3958                           break;
3959                         default:
3960                           abort ();
3961                       }
3962                     break;
3963                   }
3964               }
3965
3966           if (op >= MAX_OPERANDS)
3967             abort ();
3968         }
3969
3970       switch (i.tm.opcode_modifier.evex)
3971         {
3972         case EVEXLIG: /* LL' is ignored */
3973           vec_length = evexlig << 5;
3974           break;
3975         case EVEX128:
3976           vec_length = 0 << 5;
3977           break;
3978         case EVEX256:
3979           vec_length = 1 << 5;
3980           break;
3981         case EVEX512:
3982           vec_length = 2 << 5;
3983           break;
3984         default:
3985           abort ();
3986           break;
3987         }
3988       i.vex.bytes[3] |= vec_length;
3989       /* Encode the broadcast bit.  */
3990       if (i.broadcast)
3991         i.vex.bytes[3] |= 0x10;
3992     }
3993   else
3994     {
3995       if (i.rounding->type != saeonly)
3996         i.vex.bytes[3] |= 0x10 | (i.rounding->type << 5);
3997       else
3998         i.vex.bytes[3] |= 0x10 | (evexrcig << 5);
3999     }
4000
4001   if (i.mask && i.mask->mask)
4002     i.vex.bytes[3] |= i.mask->mask->reg_num;
4003 }
4004
4005 static void
4006 process_immext (void)
4007 {
4008   expressionS *exp;
4009
4010   /* These AMD 3DNow! and SSE2 instructions have an opcode suffix
4011      which is coded in the same place as an 8-bit immediate field
4012      would be.  Here we fake an 8-bit immediate operand from the
4013      opcode suffix stored in tm.extension_opcode.
4014
4015      AVX instructions also use this encoding, for some of
4016      3 argument instructions.  */
4017
4018   gas_assert (i.imm_operands <= 1
4019               && (i.operands <= 2
4020                   || (is_any_vex_encoding (&i.tm)
4021                       && i.operands <= 4)));
4022
4023   exp = &im_expressions[i.imm_operands++];
4024   i.op[i.operands].imms = exp;
4025   i.types[i.operands] = imm8;
4026   i.operands++;
4027   exp->X_op = O_constant;
4028   exp->X_add_number = i.tm.extension_opcode;
4029   i.tm.extension_opcode = None;
4030 }
4031
4032
4033 static int
4034 check_hle (void)
4035 {
4036   switch (i.tm.opcode_modifier.hleprefixok)
4037     {
4038     default:
4039       abort ();
4040     case HLEPrefixNone:
4041       as_bad (_("invalid instruction `%s' after `%s'"),
4042               i.tm.name, i.hle_prefix);
4043       return 0;
4044     case HLEPrefixLock:
4045       if (i.prefix[LOCK_PREFIX])
4046         return 1;
4047       as_bad (_("missing `lock' with `%s'"), i.hle_prefix);
4048       return 0;
4049     case HLEPrefixAny:
4050       return 1;
4051     case HLEPrefixRelease:
4052       if (i.prefix[HLE_PREFIX] != XRELEASE_PREFIX_OPCODE)
4053         {
4054           as_bad (_("instruction `%s' after `xacquire' not allowed"),
4055                   i.tm.name);
4056           return 0;
4057         }
4058       if (i.mem_operands == 0 || !(i.flags[i.operands - 1] & Operand_Mem))
4059         {
4060           as_bad (_("memory destination needed for instruction `%s'"
4061                     " after `xrelease'"), i.tm.name);
4062           return 0;
4063         }
4064       return 1;
4065     }
4066 }
4067
4068 /* Try the shortest encoding by shortening operand size.  */
4069
4070 static void
4071 optimize_encoding (void)
4072 {
4073   unsigned int j;
4074
4075   if (optimize_for_space
4076       && !is_any_vex_encoding (&i.tm)
4077       && i.reg_operands == 1
4078       && i.imm_operands == 1
4079       && !i.types[1].bitfield.byte
4080       && i.op[0].imms->X_op == O_constant
4081       && fits_in_imm7 (i.op[0].imms->X_add_number)
4082       && (i.tm.base_opcode == 0xa8
4083           || (i.tm.base_opcode == 0xf6
4084               && i.tm.extension_opcode == 0x0)))
4085     {
4086       /* Optimize: -Os:
4087            test $imm7, %r64/%r32/%r16  -> test $imm7, %r8
4088        */
4089       unsigned int base_regnum = i.op[1].regs->reg_num;
4090       if (flag_code == CODE_64BIT || base_regnum < 4)
4091         {
4092           i.types[1].bitfield.byte = 1;
4093           /* Ignore the suffix.  */
4094           i.suffix = 0;
4095           /* Convert to byte registers.  */
4096           if (i.types[1].bitfield.word)
4097             j = 16;
4098           else if (i.types[1].bitfield.dword)
4099             j = 32;
4100           else
4101             j = 48;
4102           if (!(i.op[1].regs->reg_flags & RegRex) && base_regnum < 4)
4103             j += 8;
4104           i.op[1].regs -= j;
4105         }
4106     }
4107   else if (flag_code == CODE_64BIT
4108            && !is_any_vex_encoding (&i.tm)
4109            && ((i.types[1].bitfield.qword
4110                 && i.reg_operands == 1
4111                 && i.imm_operands == 1
4112                 && i.op[0].imms->X_op == O_constant
4113                 && ((i.tm.base_opcode == 0xb8
4114                      && i.tm.extension_opcode == None
4115                      && fits_in_unsigned_long (i.op[0].imms->X_add_number))
4116                     || (fits_in_imm31 (i.op[0].imms->X_add_number)
4117                         && ((i.tm.base_opcode == 0x24
4118                              || i.tm.base_opcode == 0xa8)
4119                             || (i.tm.base_opcode == 0x80
4120                                 && i.tm.extension_opcode == 0x4)
4121                             || ((i.tm.base_opcode == 0xf6
4122                                  || (i.tm.base_opcode | 1) == 0xc7)
4123                                 && i.tm.extension_opcode == 0x0)))
4124                     || (fits_in_imm7 (i.op[0].imms->X_add_number)
4125                         && i.tm.base_opcode == 0x83
4126                         && i.tm.extension_opcode == 0x4)))
4127                || (i.types[0].bitfield.qword
4128                    && ((i.reg_operands == 2
4129                         && i.op[0].regs == i.op[1].regs
4130                         && (i.tm.base_opcode == 0x30
4131                             || i.tm.base_opcode == 0x28))
4132                        || (i.reg_operands == 1
4133                            && i.operands == 1
4134                            && i.tm.base_opcode == 0x30)))))
4135     {
4136       /* Optimize: -O:
4137            andq $imm31, %r64   -> andl $imm31, %r32
4138            andq $imm7, %r64    -> andl $imm7, %r32
4139            testq $imm31, %r64  -> testl $imm31, %r32
4140            xorq %r64, %r64     -> xorl %r32, %r32
4141            subq %r64, %r64     -> subl %r32, %r32
4142            movq $imm31, %r64   -> movl $imm31, %r32
4143            movq $imm32, %r64   -> movl $imm32, %r32
4144         */
4145       i.tm.opcode_modifier.norex64 = 1;
4146       if (i.tm.base_opcode == 0xb8 || (i.tm.base_opcode | 1) == 0xc7)
4147         {
4148           /* Handle
4149                movq $imm31, %r64   -> movl $imm31, %r32
4150                movq $imm32, %r64   -> movl $imm32, %r32
4151            */
4152           i.tm.operand_types[0].bitfield.imm32 = 1;
4153           i.tm.operand_types[0].bitfield.imm32s = 0;
4154           i.tm.operand_types[0].bitfield.imm64 = 0;
4155           i.types[0].bitfield.imm32 = 1;
4156           i.types[0].bitfield.imm32s = 0;
4157           i.types[0].bitfield.imm64 = 0;
4158           i.types[1].bitfield.dword = 1;
4159           i.types[1].bitfield.qword = 0;
4160           if ((i.tm.base_opcode | 1) == 0xc7)
4161             {
4162               /* Handle
4163                    movq $imm31, %r64   -> movl $imm31, %r32
4164                */
4165               i.tm.base_opcode = 0xb8;
4166               i.tm.extension_opcode = None;
4167               i.tm.opcode_modifier.w = 0;
4168               i.tm.opcode_modifier.modrm = 0;
4169             }
4170         }
4171     }
4172   else if (optimize > 1
4173            && !optimize_for_space
4174            && !is_any_vex_encoding (&i.tm)
4175            && i.reg_operands == 2
4176            && i.op[0].regs == i.op[1].regs
4177            && ((i.tm.base_opcode & ~(Opcode_D | 1)) == 0x8
4178                || (i.tm.base_opcode & ~(Opcode_D | 1)) == 0x20)
4179            && (flag_code != CODE_64BIT || !i.types[0].bitfield.dword))
4180     {
4181       /* Optimize: -O2:
4182            andb %rN, %rN  -> testb %rN, %rN
4183            andw %rN, %rN  -> testw %rN, %rN
4184            andq %rN, %rN  -> testq %rN, %rN
4185            orb %rN, %rN   -> testb %rN, %rN
4186            orw %rN, %rN   -> testw %rN, %rN
4187            orq %rN, %rN   -> testq %rN, %rN
4188
4189            and outside of 64-bit mode
4190
4191            andl %rN, %rN  -> testl %rN, %rN
4192            orl %rN, %rN   -> testl %rN, %rN
4193        */
4194       i.tm.base_opcode = 0x84 | (i.tm.base_opcode & 1);
4195     }
4196   else if (i.reg_operands == 3
4197            && i.op[0].regs == i.op[1].regs
4198            && !i.types[2].bitfield.xmmword
4199            && (i.tm.opcode_modifier.vex
4200                || ((!i.mask || i.mask->zeroing)
4201                    && !i.rounding
4202                    && is_evex_encoding (&i.tm)
4203                    && (i.vec_encoding != vex_encoding_evex
4204                        || cpu_arch_isa_flags.bitfield.cpuavx512vl
4205                        || i.tm.cpu_flags.bitfield.cpuavx512vl
4206                        || (i.tm.operand_types[2].bitfield.zmmword
4207                            && i.types[2].bitfield.ymmword))))
4208            && ((i.tm.base_opcode == 0x55
4209                 || i.tm.base_opcode == 0x6655
4210                 || i.tm.base_opcode == 0x66df
4211                 || i.tm.base_opcode == 0x57
4212                 || i.tm.base_opcode == 0x6657
4213                 || i.tm.base_opcode == 0x66ef
4214                 || i.tm.base_opcode == 0x66f8
4215                 || i.tm.base_opcode == 0x66f9
4216                 || i.tm.base_opcode == 0x66fa
4217                 || i.tm.base_opcode == 0x66fb
4218                 || i.tm.base_opcode == 0x42
4219                 || i.tm.base_opcode == 0x6642
4220                 || i.tm.base_opcode == 0x47
4221                 || i.tm.base_opcode == 0x6647)
4222                && i.tm.extension_opcode == None))
4223     {
4224       /* Optimize: -O1:
4225            VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
4226            vpsubq and vpsubw:
4227              EVEX VOP %zmmM, %zmmM, %zmmN
4228                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4229                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4230              EVEX VOP %ymmM, %ymmM, %ymmN
4231                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4232                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4233              VEX VOP %ymmM, %ymmM, %ymmN
4234                -> VEX VOP %xmmM, %xmmM, %xmmN
4235            VOP, one of vpandn and vpxor:
4236              VEX VOP %ymmM, %ymmM, %ymmN
4237                -> VEX VOP %xmmM, %xmmM, %xmmN
4238            VOP, one of vpandnd and vpandnq:
4239              EVEX VOP %zmmM, %zmmM, %zmmN
4240                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4241                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4242              EVEX VOP %ymmM, %ymmM, %ymmN
4243                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4244                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4245            VOP, one of vpxord and vpxorq:
4246              EVEX VOP %zmmM, %zmmM, %zmmN
4247                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4248                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4249              EVEX VOP %ymmM, %ymmM, %ymmN
4250                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4251                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4252            VOP, one of kxord and kxorq:
4253              VEX VOP %kM, %kM, %kN
4254                -> VEX kxorw %kM, %kM, %kN
4255            VOP, one of kandnd and kandnq:
4256              VEX VOP %kM, %kM, %kN
4257                -> VEX kandnw %kM, %kM, %kN
4258        */
4259       if (is_evex_encoding (&i.tm))
4260         {
4261           if (i.vec_encoding != vex_encoding_evex)
4262             {
4263               i.tm.opcode_modifier.vex = VEX128;
4264               i.tm.opcode_modifier.vexw = VEXW0;
4265               i.tm.opcode_modifier.evex = 0;
4266             }
4267           else if (optimize > 1)
4268             i.tm.opcode_modifier.evex = EVEX128;
4269           else
4270             return;
4271         }
4272       else if (i.tm.operand_types[0].bitfield.class == RegMask)
4273         {
4274           i.tm.base_opcode &= 0xff;
4275           i.tm.opcode_modifier.vexw = VEXW0;
4276         }
4277       else
4278         i.tm.opcode_modifier.vex = VEX128;
4279
4280       if (i.tm.opcode_modifier.vex)
4281         for (j = 0; j < 3; j++)
4282           {
4283             i.types[j].bitfield.xmmword = 1;
4284             i.types[j].bitfield.ymmword = 0;
4285           }
4286     }
4287   else if (i.vec_encoding != vex_encoding_evex
4288            && !i.types[0].bitfield.zmmword
4289            && !i.types[1].bitfield.zmmword
4290            && !i.mask
4291            && !i.broadcast
4292            && is_evex_encoding (&i.tm)
4293            && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x666f
4294                || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf36f
4295                || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f
4296                || (i.tm.base_opcode & ~4) == 0x66db
4297                || (i.tm.base_opcode & ~4) == 0x66eb)
4298            && i.tm.extension_opcode == None)
4299     {
4300       /* Optimize: -O1:
4301            VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
4302            vmovdqu32 and vmovdqu64:
4303              EVEX VOP %xmmM, %xmmN
4304                -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
4305              EVEX VOP %ymmM, %ymmN
4306                -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
4307              EVEX VOP %xmmM, mem
4308                -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
4309              EVEX VOP %ymmM, mem
4310                -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
4311              EVEX VOP mem, %xmmN
4312                -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
4313              EVEX VOP mem, %ymmN
4314                -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
4315            VOP, one of vpand, vpandn, vpor, vpxor:
4316              EVEX VOP{d,q} %xmmL, %xmmM, %xmmN
4317                -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16)
4318              EVEX VOP{d,q} %ymmL, %ymmM, %ymmN
4319                -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16)
4320              EVEX VOP{d,q} mem, %xmmM, %xmmN
4321                -> VEX VOP mem, %xmmM, %xmmN (M and N < 16)
4322              EVEX VOP{d,q} mem, %ymmM, %ymmN
4323                -> VEX VOP mem, %ymmM, %ymmN (M and N < 16)
4324        */
4325       for (j = 0; j < i.operands; j++)
4326         if (operand_type_check (i.types[j], disp)
4327             && i.op[j].disps->X_op == O_constant)
4328           {
4329             /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix
4330                has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4
4331                bytes, we choose EVEX Disp8 over VEX Disp32.  */
4332             int evex_disp8, vex_disp8;
4333             unsigned int memshift = i.memshift;
4334             offsetT n = i.op[j].disps->X_add_number;
4335
4336             evex_disp8 = fits_in_disp8 (n);
4337             i.memshift = 0;
4338             vex_disp8 = fits_in_disp8 (n);
4339             if (evex_disp8 != vex_disp8)
4340               {
4341                 i.memshift = memshift;
4342                 return;
4343               }
4344
4345             i.types[j].bitfield.disp8 = vex_disp8;
4346             break;
4347           }
4348       if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f)
4349         i.tm.base_opcode ^= 0xf36f ^ 0xf26f;
4350       i.tm.opcode_modifier.vex
4351         = i.types[0].bitfield.ymmword ? VEX256 : VEX128;
4352       i.tm.opcode_modifier.vexw = VEXW0;
4353       /* VPAND, VPOR, and VPXOR are commutative.  */
4354       if (i.reg_operands == 3 && i.tm.base_opcode != 0x66df)
4355         i.tm.opcode_modifier.commutative = 1;
4356       i.tm.opcode_modifier.evex = 0;
4357       i.tm.opcode_modifier.masking = 0;
4358       i.tm.opcode_modifier.broadcast = 0;
4359       i.tm.opcode_modifier.disp8memshift = 0;
4360       i.memshift = 0;
4361       if (j < i.operands)
4362         i.types[j].bitfield.disp8
4363           = fits_in_disp8 (i.op[j].disps->X_add_number);
4364     }
4365 }
4366
4367 /* Return non-zero for load instruction.  */
4368
4369 static int
4370 load_insn_p (void)
4371 {
4372   unsigned int dest;
4373   int any_vex_p = is_any_vex_encoding (&i.tm);
4374   unsigned int base_opcode = i.tm.base_opcode | 1;
4375
4376   if (!any_vex_p)
4377     {
4378       /* Anysize insns: lea, invlpg, clflush, prefetchnta, prefetcht0,
4379          prefetcht1, prefetcht2, prefetchtw, bndmk, bndcl, bndcu, bndcn,
4380          bndstx, bndldx, prefetchwt1, clflushopt, clwb, cldemote.  */
4381       if (i.tm.opcode_modifier.anysize)
4382         return 0;
4383
4384       /* pop, popf, popa.   */
4385       if (strcmp (i.tm.name, "pop") == 0
4386           || i.tm.base_opcode == 0x9d
4387           || i.tm.base_opcode == 0x61)
4388         return 1;
4389
4390       /* movs, cmps, lods, scas.  */
4391       if ((i.tm.base_opcode | 0xb) == 0xaf)
4392         return 1;
4393
4394       /* outs, xlatb.  */
4395       if (base_opcode == 0x6f
4396           || i.tm.base_opcode == 0xd7)
4397         return 1;
4398       /* NB: For AMD-specific insns with implicit memory operands,
4399          they're intentionally not covered.  */
4400     }
4401
4402   /* No memory operand.  */
4403   if (!i.mem_operands)
4404     return 0;
4405
4406   if (any_vex_p)
4407     {
4408       /* vldmxcsr.  */
4409       if (i.tm.base_opcode == 0xae
4410           && i.tm.opcode_modifier.vex
4411           && i.tm.opcode_modifier.opcodeprefix == VEX0F
4412           && i.tm.extension_opcode == 2)
4413         return 1;
4414     }
4415   else
4416     {
4417       /* test, not, neg, mul, imul, div, idiv.  */
4418       if ((i.tm.base_opcode == 0xf6 || i.tm.base_opcode == 0xf7)
4419           && i.tm.extension_opcode != 1)
4420         return 1;
4421
4422       /* inc, dec.  */
4423       if (base_opcode == 0xff && i.tm.extension_opcode <= 1)
4424         return 1;
4425
4426       /* add, or, adc, sbb, and, sub, xor, cmp.  */
4427       if (i.tm.base_opcode >= 0x80 && i.tm.base_opcode <= 0x83)
4428         return 1;
4429
4430       /* bt, bts, btr, btc.  */
4431       if (i.tm.base_opcode == 0xfba
4432           && (i.tm.extension_opcode >= 4 && i.tm.extension_opcode <= 7))
4433         return 1;
4434
4435       /* rol, ror, rcl, rcr, shl/sal, shr, sar. */
4436       if ((base_opcode == 0xc1
4437            || (i.tm.base_opcode >= 0xd0 && i.tm.base_opcode <= 0xd3))
4438           && i.tm.extension_opcode != 6)
4439         return 1;
4440
4441       /* cmpxchg8b, cmpxchg16b, xrstors.  */
4442       if (i.tm.base_opcode == 0xfc7
4443           && i.tm.opcode_modifier.opcodeprefix == 0
4444           && (i.tm.extension_opcode == 1 || i.tm.extension_opcode == 3))
4445         return 1;
4446
4447       /* fxrstor, ldmxcsr, xrstor.  */
4448       if (i.tm.base_opcode == 0xfae
4449           && (i.tm.extension_opcode == 1
4450               || i.tm.extension_opcode == 2
4451               || i.tm.extension_opcode == 5))
4452         return 1;
4453
4454       /* lgdt, lidt, lmsw.  */
4455       if (i.tm.base_opcode == 0xf01
4456           && (i.tm.extension_opcode == 2
4457               || i.tm.extension_opcode == 3
4458               || i.tm.extension_opcode == 6))
4459         return 1;
4460
4461       /* vmptrld */
4462       if (i.tm.base_opcode == 0xfc7
4463           && i.tm.opcode_modifier.opcodeprefix == 0
4464           && i.tm.extension_opcode == 6)
4465         return 1;
4466
4467       /* Check for x87 instructions.  */
4468       if (i.tm.base_opcode >= 0xd8 && i.tm.base_opcode <= 0xdf)
4469         {
4470           /* Skip fst, fstp, fstenv, fstcw.  */
4471           if (i.tm.base_opcode == 0xd9
4472               && (i.tm.extension_opcode == 2
4473                   || i.tm.extension_opcode == 3
4474                   || i.tm.extension_opcode == 6
4475                   || i.tm.extension_opcode == 7))
4476             return 0;
4477
4478           /* Skip fisttp, fist, fistp, fstp.  */
4479           if (i.tm.base_opcode == 0xdb
4480               && (i.tm.extension_opcode == 1
4481                   || i.tm.extension_opcode == 2
4482                   || i.tm.extension_opcode == 3
4483                   || i.tm.extension_opcode == 7))
4484             return 0;
4485
4486           /* Skip fisttp, fst, fstp, fsave, fstsw.  */
4487           if (i.tm.base_opcode == 0xdd
4488               && (i.tm.extension_opcode == 1
4489                   || i.tm.extension_opcode == 2
4490                   || i.tm.extension_opcode == 3
4491                   || i.tm.extension_opcode == 6
4492                   || i.tm.extension_opcode == 7))
4493             return 0;
4494
4495           /* Skip fisttp, fist, fistp, fbstp, fistp.  */
4496           if (i.tm.base_opcode == 0xdf
4497               && (i.tm.extension_opcode == 1
4498                   || i.tm.extension_opcode == 2
4499                   || i.tm.extension_opcode == 3
4500                   || i.tm.extension_opcode == 6
4501                   || i.tm.extension_opcode == 7))
4502             return 0;
4503
4504           return 1;
4505         }
4506     }
4507
4508   dest = i.operands - 1;
4509
4510   /* Check fake imm8 operand and 3 source operands.  */
4511   if ((i.tm.opcode_modifier.immext
4512        || i.tm.opcode_modifier.vexsources == VEX3SOURCES)
4513       && i.types[dest].bitfield.imm8)
4514     dest--;
4515
4516   /* add, or, adc, sbb, and, sub, xor, cmp, test, xchg, xadd  */
4517   if (!any_vex_p
4518       && (base_opcode == 0x1
4519           || base_opcode == 0x9
4520           || base_opcode == 0x11
4521           || base_opcode == 0x19
4522           || base_opcode == 0x21
4523           || base_opcode == 0x29
4524           || base_opcode == 0x31
4525           || base_opcode == 0x39
4526           || (i.tm.base_opcode >= 0x84 && i.tm.base_opcode <= 0x87)
4527           || base_opcode == 0xfc1))
4528     return 1;
4529
4530   /* Check for load instruction.  */
4531   return (i.types[dest].bitfield.class != ClassNone
4532           || i.types[dest].bitfield.instance == Accum);
4533 }
4534
4535 /* Output lfence, 0xfaee8, after instruction.  */
4536
4537 static void
4538 insert_lfence_after (void)
4539 {
4540   if (lfence_after_load && load_insn_p ())
4541     {
4542       /* There are also two REP string instructions that require
4543          special treatment. Specifically, the compare string (CMPS)
4544          and scan string (SCAS) instructions set EFLAGS in a manner
4545          that depends on the data being compared/scanned. When used
4546          with a REP prefix, the number of iterations may therefore
4547          vary depending on this data. If the data is a program secret
4548          chosen by the adversary using an LVI method,
4549          then this data-dependent behavior may leak some aspect
4550          of the secret.  */
4551       if (((i.tm.base_opcode | 0x1) == 0xa7
4552            || (i.tm.base_opcode | 0x1) == 0xaf)
4553           && i.prefix[REP_PREFIX])
4554         {
4555             as_warn (_("`%s` changes flags which would affect control flow behavior"),
4556                      i.tm.name);
4557         }
4558       char *p = frag_more (3);
4559       *p++ = 0xf;
4560       *p++ = 0xae;
4561       *p = 0xe8;
4562     }
4563 }
4564
4565 /* Output lfence, 0xfaee8, before instruction.  */
4566
4567 static void
4568 insert_lfence_before (void)
4569 {
4570   char *p;
4571
4572   if (is_any_vex_encoding (&i.tm))
4573     return;
4574
4575   if (i.tm.base_opcode == 0xff
4576       && (i.tm.extension_opcode == 2 || i.tm.extension_opcode == 4))
4577     {
4578       /* Insert lfence before indirect branch if needed.  */
4579
4580       if (lfence_before_indirect_branch == lfence_branch_none)
4581         return;
4582
4583       if (i.operands != 1)
4584         abort ();
4585
4586       if (i.reg_operands == 1)
4587         {
4588           /* Indirect branch via register.  Don't insert lfence with
4589              -mlfence-after-load=yes.  */
4590           if (lfence_after_load
4591               || lfence_before_indirect_branch == lfence_branch_memory)
4592             return;
4593         }
4594       else if (i.mem_operands == 1
4595                && lfence_before_indirect_branch != lfence_branch_register)
4596         {
4597           as_warn (_("indirect `%s` with memory operand should be avoided"),
4598                    i.tm.name);
4599           return;
4600         }
4601       else
4602         return;
4603
4604       if (last_insn.kind != last_insn_other
4605           && last_insn.seg == now_seg)
4606         {
4607           as_warn_where (last_insn.file, last_insn.line,
4608                          _("`%s` skips -mlfence-before-indirect-branch on `%s`"),
4609                          last_insn.name, i.tm.name);
4610           return;
4611         }
4612
4613       p = frag_more (3);
4614       *p++ = 0xf;
4615       *p++ = 0xae;
4616       *p = 0xe8;
4617       return;
4618     }
4619
4620   /* Output or/not/shl and lfence before near ret.  */
4621   if (lfence_before_ret != lfence_before_ret_none
4622       && (i.tm.base_opcode == 0xc2
4623           || i.tm.base_opcode == 0xc3))
4624     {
4625       if (last_insn.kind != last_insn_other
4626           && last_insn.seg == now_seg)
4627         {
4628           as_warn_where (last_insn.file, last_insn.line,
4629                          _("`%s` skips -mlfence-before-ret on `%s`"),
4630                          last_insn.name, i.tm.name);
4631           return;
4632         }
4633
4634       /* Near ret ingore operand size override under CPU64.  */
4635       char prefix = flag_code == CODE_64BIT
4636                     ? 0x48
4637                     : i.prefix[DATA_PREFIX] ? 0x66 : 0x0;
4638
4639       if (lfence_before_ret == lfence_before_ret_not)
4640         {
4641           /* not: 0xf71424, may add prefix
4642              for operand size override or 64-bit code.  */
4643           p = frag_more ((prefix ? 2 : 0) + 6 + 3);
4644           if (prefix)
4645             *p++ = prefix;
4646           *p++ = 0xf7;
4647           *p++ = 0x14;
4648           *p++ = 0x24;
4649           if (prefix)
4650             *p++ = prefix;
4651           *p++ = 0xf7;
4652           *p++ = 0x14;
4653           *p++ = 0x24;
4654         }
4655       else
4656         {
4657           p = frag_more ((prefix ? 1 : 0) + 4 + 3);
4658           if (prefix)
4659             *p++ = prefix;
4660           if (lfence_before_ret == lfence_before_ret_or)
4661             {
4662               /* or: 0x830c2400, may add prefix
4663                  for operand size override or 64-bit code.  */
4664               *p++ = 0x83;
4665               *p++ = 0x0c;
4666             }
4667           else
4668             {
4669               /* shl: 0xc1242400, may add prefix
4670                  for operand size override or 64-bit code.  */
4671               *p++ = 0xc1;
4672               *p++ = 0x24;
4673             }
4674
4675           *p++ = 0x24;
4676           *p++ = 0x0;
4677         }
4678
4679       *p++ = 0xf;
4680       *p++ = 0xae;
4681       *p = 0xe8;
4682     }
4683 }
4684
4685 /* This is the guts of the machine-dependent assembler.  LINE points to a
4686    machine dependent instruction.  This function is supposed to emit
4687    the frags/bytes it assembles to.  */
4688
4689 void
4690 md_assemble (char *line)
4691 {
4692   unsigned int j;
4693   char mnemonic[MAX_MNEM_SIZE], mnem_suffix;
4694   const insn_template *t;
4695
4696   /* Initialize globals.  */
4697   memset (&i, '\0', sizeof (i));
4698   for (j = 0; j < MAX_OPERANDS; j++)
4699     i.reloc[j] = NO_RELOC;
4700   memset (disp_expressions, '\0', sizeof (disp_expressions));
4701   memset (im_expressions, '\0', sizeof (im_expressions));
4702   save_stack_p = save_stack;
4703
4704   /* First parse an instruction mnemonic & call i386_operand for the operands.
4705      We assume that the scrubber has arranged it so that line[0] is the valid
4706      start of a (possibly prefixed) mnemonic.  */
4707
4708   line = parse_insn (line, mnemonic);
4709   if (line == NULL)
4710     return;
4711   mnem_suffix = i.suffix;
4712
4713   line = parse_operands (line, mnemonic);
4714   this_operand = -1;
4715   xfree (i.memop1_string);
4716   i.memop1_string = NULL;
4717   if (line == NULL)
4718     return;
4719
4720   /* Now we've parsed the mnemonic into a set of templates, and have the
4721      operands at hand.  */
4722
4723   /* All Intel opcodes have reversed operands except for "bound", "enter",
4724      "monitor*", "mwait*", "tpause", and "umwait".  We also don't reverse
4725      intersegment "jmp" and "call" instructions with 2 immediate operands so
4726      that the immediate segment precedes the offset, as it does when in AT&T
4727      mode.  */
4728   if (intel_syntax
4729       && i.operands > 1
4730       && (strcmp (mnemonic, "bound") != 0)
4731       && (strcmp (mnemonic, "invlpga") != 0)
4732       && (strncmp (mnemonic, "monitor", 7) != 0)
4733       && (strncmp (mnemonic, "mwait", 5) != 0)
4734       && (strcmp (mnemonic, "tpause") != 0)
4735       && (strcmp (mnemonic, "umwait") != 0)
4736       && !(operand_type_check (i.types[0], imm)
4737            && operand_type_check (i.types[1], imm)))
4738     swap_operands ();
4739
4740   /* The order of the immediates should be reversed
4741      for 2 immediates extrq and insertq instructions */
4742   if (i.imm_operands == 2
4743       && (strcmp (mnemonic, "extrq") == 0
4744           || strcmp (mnemonic, "insertq") == 0))
4745       swap_2_operands (0, 1);
4746
4747   if (i.imm_operands)
4748     optimize_imm ();
4749
4750   /* Don't optimize displacement for movabs since it only takes 64bit
4751      displacement.  */
4752   if (i.disp_operands
4753       && i.disp_encoding != disp_encoding_32bit
4754       && (flag_code != CODE_64BIT
4755           || strcmp (mnemonic, "movabs") != 0))
4756     optimize_disp ();
4757
4758   /* Next, we find a template that matches the given insn,
4759      making sure the overlap of the given operands types is consistent
4760      with the template operand types.  */
4761
4762   if (!(t = match_template (mnem_suffix)))
4763     return;
4764
4765   if (sse_check != check_none
4766       && !i.tm.opcode_modifier.noavx
4767       && !i.tm.cpu_flags.bitfield.cpuavx
4768       && !i.tm.cpu_flags.bitfield.cpuavx512f
4769       && (i.tm.cpu_flags.bitfield.cpusse
4770           || i.tm.cpu_flags.bitfield.cpusse2
4771           || i.tm.cpu_flags.bitfield.cpusse3
4772           || i.tm.cpu_flags.bitfield.cpussse3
4773           || i.tm.cpu_flags.bitfield.cpusse4_1
4774           || i.tm.cpu_flags.bitfield.cpusse4_2
4775           || i.tm.cpu_flags.bitfield.cpupclmul
4776           || i.tm.cpu_flags.bitfield.cpuaes
4777           || i.tm.cpu_flags.bitfield.cpusha
4778           || i.tm.cpu_flags.bitfield.cpugfni))
4779     {
4780       (sse_check == check_warning
4781        ? as_warn
4782        : as_bad) (_("SSE instruction `%s' is used"), i.tm.name);
4783     }
4784
4785   if (i.tm.opcode_modifier.fwait)
4786     if (!add_prefix (FWAIT_OPCODE))
4787       return;
4788
4789   /* Check if REP prefix is OK.  */
4790   if (i.rep_prefix && !i.tm.opcode_modifier.repprefixok)
4791     {
4792       as_bad (_("invalid instruction `%s' after `%s'"),
4793                 i.tm.name, i.rep_prefix);
4794       return;
4795     }
4796
4797   /* Check for lock without a lockable instruction.  Destination operand
4798      must be memory unless it is xchg (0x86).  */
4799   if (i.prefix[LOCK_PREFIX]
4800       && (!i.tm.opcode_modifier.islockable
4801           || i.mem_operands == 0
4802           || (i.tm.base_opcode != 0x86
4803               && !(i.flags[i.operands - 1] & Operand_Mem))))
4804     {
4805       as_bad (_("expecting lockable instruction after `lock'"));
4806       return;
4807     }
4808
4809   /* Check for data size prefix on VEX/XOP/EVEX encoded and SIMD insns.  */
4810   if (i.prefix[DATA_PREFIX]
4811       && (is_any_vex_encoding (&i.tm)
4812           || i.tm.operand_types[i.imm_operands].bitfield.class >= RegMMX
4813           || i.tm.operand_types[i.imm_operands + 1].bitfield.class >= RegMMX))
4814     {
4815       as_bad (_("data size prefix invalid with `%s'"), i.tm.name);
4816       return;
4817     }
4818
4819   /* Check if HLE prefix is OK.  */
4820   if (i.hle_prefix && !check_hle ())
4821     return;
4822
4823   /* Check BND prefix.  */
4824   if (i.bnd_prefix && !i.tm.opcode_modifier.bndprefixok)
4825     as_bad (_("expecting valid branch instruction after `bnd'"));
4826
4827   /* Check NOTRACK prefix.  */
4828   if (i.notrack_prefix && !i.tm.opcode_modifier.notrackprefixok)
4829     as_bad (_("expecting indirect branch instruction after `notrack'"));
4830
4831   if (i.tm.cpu_flags.bitfield.cpumpx)
4832     {
4833       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
4834         as_bad (_("32-bit address isn't allowed in 64-bit MPX instructions."));
4835       else if (flag_code != CODE_16BIT
4836                ? i.prefix[ADDR_PREFIX]
4837                : i.mem_operands && !i.prefix[ADDR_PREFIX])
4838         as_bad (_("16-bit address isn't allowed in MPX instructions"));
4839     }
4840
4841   /* Insert BND prefix.  */
4842   if (add_bnd_prefix && i.tm.opcode_modifier.bndprefixok)
4843     {
4844       if (!i.prefix[BND_PREFIX])
4845         add_prefix (BND_PREFIX_OPCODE);
4846       else if (i.prefix[BND_PREFIX] != BND_PREFIX_OPCODE)
4847         {
4848           as_warn (_("replacing `rep'/`repe' prefix by `bnd'"));
4849           i.prefix[BND_PREFIX] = BND_PREFIX_OPCODE;
4850         }
4851     }
4852
4853   /* Check string instruction segment overrides.  */
4854   if (i.tm.opcode_modifier.isstring >= IS_STRING_ES_OP0)
4855     {
4856       gas_assert (i.mem_operands);
4857       if (!check_string ())
4858         return;
4859       i.disp_operands = 0;
4860     }
4861
4862   if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize)
4863     optimize_encoding ();
4864
4865   if (!process_suffix ())
4866     return;
4867
4868   /* Update operand types and check extended states.  */
4869   for (j = 0; j < i.operands; j++)
4870     {
4871       i.types[j] = operand_type_and (i.types[j], i.tm.operand_types[j]);
4872       switch (i.types[j].bitfield.class)
4873         {
4874         default:
4875           break;
4876         case RegMMX:
4877           i.xstate |= xstate_mmx;
4878           break;
4879         case RegMask:
4880           i.xstate |= xstate_mask;
4881           break;
4882         case RegSIMD:
4883           if (i.types[j].bitfield.tmmword)
4884             i.xstate |= xstate_tmm;
4885           else if (i.types[j].bitfield.zmmword)
4886             i.xstate |= xstate_zmm;
4887           else if (i.types[j].bitfield.ymmword)
4888             i.xstate |= xstate_ymm;
4889           else if (i.types[j].bitfield.xmmword)
4890             i.xstate |= xstate_xmm;
4891           break;
4892         }
4893     }
4894
4895   /* Make still unresolved immediate matches conform to size of immediate
4896      given in i.suffix.  */
4897   if (!finalize_imm ())
4898     return;
4899
4900   if (i.types[0].bitfield.imm1)
4901     i.imm_operands = 0; /* kludge for shift insns.  */
4902
4903   /* We only need to check those implicit registers for instructions
4904      with 3 operands or less.  */
4905   if (i.operands <= 3)
4906     for (j = 0; j < i.operands; j++)
4907       if (i.types[j].bitfield.instance != InstanceNone
4908           && !i.types[j].bitfield.xmmword)
4909         i.reg_operands--;
4910
4911   /* For insns with operands there are more diddles to do to the opcode.  */
4912   if (i.operands)
4913     {
4914       if (!process_operands ())
4915         return;
4916     }
4917   else if (!quiet_warnings && i.tm.opcode_modifier.ugh)
4918     {
4919       /* UnixWare fsub no args is alias for fsubp, fadd -> faddp, etc.  */
4920       as_warn (_("translating to `%sp'"), i.tm.name);
4921     }
4922
4923   if (is_any_vex_encoding (&i.tm))
4924     {
4925       if (!cpu_arch_flags.bitfield.cpui286)
4926         {
4927           as_bad (_("instruction `%s' isn't supported outside of protected mode."),
4928                   i.tm.name);
4929           return;
4930         }
4931
4932       /* Check for explicit REX prefix.  */
4933       if (i.prefix[REX_PREFIX] || i.rex_encoding)
4934         {
4935           as_bad (_("REX prefix invalid with `%s'"), i.tm.name);
4936           return;
4937         }
4938
4939       if (i.tm.opcode_modifier.vex)
4940         build_vex_prefix (t);
4941       else
4942         build_evex_prefix ();
4943
4944       /* The individual REX.RXBW bits got consumed.  */
4945       i.rex &= REX_OPCODE;
4946     }
4947
4948   /* Handle conversion of 'int $3' --> special int3 insn.  XOP or FMA4
4949      instructions may define INT_OPCODE as well, so avoid this corner
4950      case for those instructions that use MODRM.  */
4951   if (i.tm.base_opcode == INT_OPCODE
4952       && !i.tm.opcode_modifier.modrm
4953       && i.op[0].imms->X_add_number == 3)
4954     {
4955       i.tm.base_opcode = INT3_OPCODE;
4956       i.imm_operands = 0;
4957     }
4958
4959   if ((i.tm.opcode_modifier.jump == JUMP
4960        || i.tm.opcode_modifier.jump == JUMP_BYTE
4961        || i.tm.opcode_modifier.jump == JUMP_DWORD)
4962       && i.op[0].disps->X_op == O_constant)
4963     {
4964       /* Convert "jmp constant" (and "call constant") to a jump (call) to
4965          the absolute address given by the constant.  Since ix86 jumps and
4966          calls are pc relative, we need to generate a reloc.  */
4967       i.op[0].disps->X_add_symbol = &abs_symbol;
4968       i.op[0].disps->X_op = O_symbol;
4969     }
4970
4971   /* For 8 bit registers we need an empty rex prefix.  Also if the
4972      instruction already has a prefix, we need to convert old
4973      registers to new ones.  */
4974
4975   if ((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte
4976        && (i.op[0].regs->reg_flags & RegRex64) != 0)
4977       || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte
4978           && (i.op[1].regs->reg_flags & RegRex64) != 0)
4979       || (((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte)
4980            || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte))
4981           && i.rex != 0))
4982     {
4983       int x;
4984
4985       i.rex |= REX_OPCODE;
4986       for (x = 0; x < 2; x++)
4987         {
4988           /* Look for 8 bit operand that uses old registers.  */
4989           if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte
4990               && (i.op[x].regs->reg_flags & RegRex64) == 0)
4991             {
4992               gas_assert (!(i.op[x].regs->reg_flags & RegRex));
4993               /* In case it is "hi" register, give up.  */
4994               if (i.op[x].regs->reg_num > 3)
4995                 as_bad (_("can't encode register '%s%s' in an "
4996                           "instruction requiring REX prefix."),
4997                         register_prefix, i.op[x].regs->reg_name);
4998
4999               /* Otherwise it is equivalent to the extended register.
5000                  Since the encoding doesn't change this is merely
5001                  cosmetic cleanup for debug output.  */
5002
5003               i.op[x].regs = i.op[x].regs + 8;
5004             }
5005         }
5006     }
5007
5008   if (i.rex == 0 && i.rex_encoding)
5009     {
5010       /* Check if we can add a REX_OPCODE byte.  Look for 8 bit operand
5011          that uses legacy register.  If it is "hi" register, don't add
5012          the REX_OPCODE byte.  */
5013       int x;
5014       for (x = 0; x < 2; x++)
5015         if (i.types[x].bitfield.class == Reg
5016             && i.types[x].bitfield.byte
5017             && (i.op[x].regs->reg_flags & RegRex64) == 0
5018             && i.op[x].regs->reg_num > 3)
5019           {
5020             gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5021             i.rex_encoding = FALSE;
5022             break;
5023           }
5024
5025       if (i.rex_encoding)
5026         i.rex = REX_OPCODE;
5027     }
5028
5029   if (i.rex != 0)
5030     add_prefix (REX_OPCODE | i.rex);
5031
5032   insert_lfence_before ();
5033
5034   /* We are ready to output the insn.  */
5035   output_insn ();
5036
5037   insert_lfence_after ();
5038
5039   last_insn.seg = now_seg;
5040
5041   if (i.tm.opcode_modifier.isprefix)
5042     {
5043       last_insn.kind = last_insn_prefix;
5044       last_insn.name = i.tm.name;
5045       last_insn.file = as_where (&last_insn.line);
5046     }
5047   else
5048     last_insn.kind = last_insn_other;
5049 }
5050
5051 static char *
5052 parse_insn (char *line, char *mnemonic)
5053 {
5054   char *l = line;
5055   char *token_start = l;
5056   char *mnem_p;
5057   int supported;
5058   const insn_template *t;
5059   char *dot_p = NULL;
5060
5061   while (1)
5062     {
5063       mnem_p = mnemonic;
5064       while ((*mnem_p = mnemonic_chars[(unsigned char) *l]) != 0)
5065         {
5066           if (*mnem_p == '.')
5067             dot_p = mnem_p;
5068           mnem_p++;
5069           if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
5070             {
5071               as_bad (_("no such instruction: `%s'"), token_start);
5072               return NULL;
5073             }
5074           l++;
5075         }
5076       if (!is_space_char (*l)
5077           && *l != END_OF_INSN
5078           && (intel_syntax
5079               || (*l != PREFIX_SEPARATOR
5080                   && *l != ',')))
5081         {
5082           as_bad (_("invalid character %s in mnemonic"),
5083                   output_invalid (*l));
5084           return NULL;
5085         }
5086       if (token_start == l)
5087         {
5088           if (!intel_syntax && *l == PREFIX_SEPARATOR)
5089             as_bad (_("expecting prefix; got nothing"));
5090           else
5091             as_bad (_("expecting mnemonic; got nothing"));
5092           return NULL;
5093         }
5094
5095       /* Look up instruction (or prefix) via hash table.  */
5096       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5097
5098       if (*l != END_OF_INSN
5099           && (!is_space_char (*l) || l[1] != END_OF_INSN)
5100           && current_templates
5101           && current_templates->start->opcode_modifier.isprefix)
5102         {
5103           if (!cpu_flags_check_cpu64 (current_templates->start->cpu_flags))
5104             {
5105               as_bad ((flag_code != CODE_64BIT
5106                        ? _("`%s' is only supported in 64-bit mode")
5107                        : _("`%s' is not supported in 64-bit mode")),
5108                       current_templates->start->name);
5109               return NULL;
5110             }
5111           /* If we are in 16-bit mode, do not allow addr16 or data16.
5112              Similarly, in 32-bit mode, do not allow addr32 or data32.  */
5113           if ((current_templates->start->opcode_modifier.size == SIZE16
5114                || current_templates->start->opcode_modifier.size == SIZE32)
5115               && flag_code != CODE_64BIT
5116               && ((current_templates->start->opcode_modifier.size == SIZE32)
5117                   ^ (flag_code == CODE_16BIT)))
5118             {
5119               as_bad (_("redundant %s prefix"),
5120                       current_templates->start->name);
5121               return NULL;
5122             }
5123           if (current_templates->start->opcode_length == 0)
5124             {
5125               /* Handle pseudo prefixes.  */
5126               switch (current_templates->start->base_opcode)
5127                 {
5128                 case Prefix_Disp8:
5129                   /* {disp8} */
5130                   i.disp_encoding = disp_encoding_8bit;
5131                   break;
5132                 case Prefix_Disp16:
5133                   /* {disp16} */
5134                   i.disp_encoding = disp_encoding_16bit;
5135                   break;
5136                 case Prefix_Disp32:
5137                   /* {disp32} */
5138                   i.disp_encoding = disp_encoding_32bit;
5139                   break;
5140                 case Prefix_Load:
5141                   /* {load} */
5142                   i.dir_encoding = dir_encoding_load;
5143                   break;
5144                 case Prefix_Store:
5145                   /* {store} */
5146                   i.dir_encoding = dir_encoding_store;
5147                   break;
5148                 case Prefix_VEX:
5149                   /* {vex} */
5150                   i.vec_encoding = vex_encoding_vex;
5151                   break;
5152                 case Prefix_VEX3:
5153                   /* {vex3} */
5154                   i.vec_encoding = vex_encoding_vex3;
5155                   break;
5156                 case Prefix_EVEX:
5157                   /* {evex} */
5158                   i.vec_encoding = vex_encoding_evex;
5159                   break;
5160                 case Prefix_REX:
5161                   /* {rex} */
5162                   i.rex_encoding = TRUE;
5163                   break;
5164                 case Prefix_NoOptimize:
5165                   /* {nooptimize} */
5166                   i.no_optimize = TRUE;
5167                   break;
5168                 default:
5169                   abort ();
5170                 }
5171             }
5172           else
5173             {
5174               /* Add prefix, checking for repeated prefixes.  */
5175               switch (add_prefix (current_templates->start->base_opcode))
5176                 {
5177                 case PREFIX_EXIST:
5178                   return NULL;
5179                 case PREFIX_DS:
5180                   if (current_templates->start->cpu_flags.bitfield.cpuibt)
5181                     i.notrack_prefix = current_templates->start->name;
5182                   break;
5183                 case PREFIX_REP:
5184                   if (current_templates->start->cpu_flags.bitfield.cpuhle)
5185                     i.hle_prefix = current_templates->start->name;
5186                   else if (current_templates->start->cpu_flags.bitfield.cpumpx)
5187                     i.bnd_prefix = current_templates->start->name;
5188                   else
5189                     i.rep_prefix = current_templates->start->name;
5190                   break;
5191                 default:
5192                   break;
5193                 }
5194             }
5195           /* Skip past PREFIX_SEPARATOR and reset token_start.  */
5196           token_start = ++l;
5197         }
5198       else
5199         break;
5200     }
5201
5202   if (!current_templates)
5203     {
5204       /* Deprecated functionality (new code should use pseudo-prefixes instead):
5205          Check if we should swap operand or force 32bit displacement in
5206          encoding.  */
5207       if (mnem_p - 2 == dot_p && dot_p[1] == 's')
5208         i.dir_encoding = dir_encoding_swap;
5209       else if (mnem_p - 3 == dot_p
5210                && dot_p[1] == 'd'
5211                && dot_p[2] == '8')
5212         i.disp_encoding = disp_encoding_8bit;
5213       else if (mnem_p - 4 == dot_p
5214                && dot_p[1] == 'd'
5215                && dot_p[2] == '3'
5216                && dot_p[3] == '2')
5217         i.disp_encoding = disp_encoding_32bit;
5218       else
5219         goto check_suffix;
5220       mnem_p = dot_p;
5221       *dot_p = '\0';
5222       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5223     }
5224
5225   if (!current_templates)
5226     {
5227     check_suffix:
5228       if (mnem_p > mnemonic)
5229         {
5230           /* See if we can get a match by trimming off a suffix.  */
5231           switch (mnem_p[-1])
5232             {
5233             case WORD_MNEM_SUFFIX:
5234               if (intel_syntax && (intel_float_operand (mnemonic) & 2))
5235                 i.suffix = SHORT_MNEM_SUFFIX;
5236               else
5237                 /* Fall through.  */
5238               case BYTE_MNEM_SUFFIX:
5239               case QWORD_MNEM_SUFFIX:
5240                 i.suffix = mnem_p[-1];
5241               mnem_p[-1] = '\0';
5242               current_templates
5243                 = (const templates *) str_hash_find (op_hash, mnemonic);
5244               break;
5245             case SHORT_MNEM_SUFFIX:
5246             case LONG_MNEM_SUFFIX:
5247               if (!intel_syntax)
5248                 {
5249                   i.suffix = mnem_p[-1];
5250                   mnem_p[-1] = '\0';
5251                   current_templates
5252                     = (const templates *) str_hash_find (op_hash, mnemonic);
5253                 }
5254               break;
5255
5256               /* Intel Syntax.  */
5257             case 'd':
5258               if (intel_syntax)
5259                 {
5260                   if (intel_float_operand (mnemonic) == 1)
5261                     i.suffix = SHORT_MNEM_SUFFIX;
5262                   else
5263                     i.suffix = LONG_MNEM_SUFFIX;
5264                   mnem_p[-1] = '\0';
5265                   current_templates
5266                     = (const templates *) str_hash_find (op_hash, mnemonic);
5267                 }
5268               break;
5269             }
5270         }
5271
5272       if (!current_templates)
5273         {
5274           as_bad (_("no such instruction: `%s'"), token_start);
5275           return NULL;
5276         }
5277     }
5278
5279   if (current_templates->start->opcode_modifier.jump == JUMP
5280       || current_templates->start->opcode_modifier.jump == JUMP_BYTE)
5281     {
5282       /* Check for a branch hint.  We allow ",pt" and ",pn" for
5283          predict taken and predict not taken respectively.
5284          I'm not sure that branch hints actually do anything on loop
5285          and jcxz insns (JumpByte) for current Pentium4 chips.  They
5286          may work in the future and it doesn't hurt to accept them
5287          now.  */
5288       if (l[0] == ',' && l[1] == 'p')
5289         {
5290           if (l[2] == 't')
5291             {
5292               if (!add_prefix (DS_PREFIX_OPCODE))
5293                 return NULL;
5294               l += 3;
5295             }
5296           else if (l[2] == 'n')
5297             {
5298               if (!add_prefix (CS_PREFIX_OPCODE))
5299                 return NULL;
5300               l += 3;
5301             }
5302         }
5303     }
5304   /* Any other comma loses.  */
5305   if (*l == ',')
5306     {
5307       as_bad (_("invalid character %s in mnemonic"),
5308               output_invalid (*l));
5309       return NULL;
5310     }
5311
5312   /* Check if instruction is supported on specified architecture.  */
5313   supported = 0;
5314   for (t = current_templates->start; t < current_templates->end; ++t)
5315     {
5316       supported |= cpu_flags_match (t);
5317       if (supported == CPU_FLAGS_PERFECT_MATCH)
5318         {
5319           if (!cpu_arch_flags.bitfield.cpui386 && (flag_code != CODE_16BIT))
5320             as_warn (_("use .code16 to ensure correct addressing mode"));
5321
5322           return l;
5323         }
5324     }
5325
5326   if (!(supported & CPU_FLAGS_64BIT_MATCH))
5327     as_bad (flag_code == CODE_64BIT
5328             ? _("`%s' is not supported in 64-bit mode")
5329             : _("`%s' is only supported in 64-bit mode"),
5330             current_templates->start->name);
5331   else
5332     as_bad (_("`%s' is not supported on `%s%s'"),
5333             current_templates->start->name,
5334             cpu_arch_name ? cpu_arch_name : default_arch,
5335             cpu_sub_arch_name ? cpu_sub_arch_name : "");
5336
5337   return NULL;
5338 }
5339
5340 static char *
5341 parse_operands (char *l, const char *mnemonic)
5342 {
5343   char *token_start;
5344
5345   /* 1 if operand is pending after ','.  */
5346   unsigned int expecting_operand = 0;
5347
5348   /* Non-zero if operand parens not balanced.  */
5349   unsigned int paren_not_balanced;
5350
5351   while (*l != END_OF_INSN)
5352     {
5353       /* Skip optional white space before operand.  */
5354       if (is_space_char (*l))
5355         ++l;
5356       if (!is_operand_char (*l) && *l != END_OF_INSN && *l != '"')
5357         {
5358           as_bad (_("invalid character %s before operand %d"),
5359                   output_invalid (*l),
5360                   i.operands + 1);
5361           return NULL;
5362         }
5363       token_start = l;  /* After white space.  */
5364       paren_not_balanced = 0;
5365       while (paren_not_balanced || *l != ',')
5366         {
5367           if (*l == END_OF_INSN)
5368             {
5369               if (paren_not_balanced)
5370                 {
5371                   if (!intel_syntax)
5372                     as_bad (_("unbalanced parenthesis in operand %d."),
5373                             i.operands + 1);
5374                   else
5375                     as_bad (_("unbalanced brackets in operand %d."),
5376                             i.operands + 1);
5377                   return NULL;
5378                 }
5379               else
5380                 break;  /* we are done */
5381             }
5382           else if (!is_operand_char (*l) && !is_space_char (*l) && *l != '"')
5383             {
5384               as_bad (_("invalid character %s in operand %d"),
5385                       output_invalid (*l),
5386                       i.operands + 1);
5387               return NULL;
5388             }
5389           if (!intel_syntax)
5390             {
5391               if (*l == '(')
5392                 ++paren_not_balanced;
5393               if (*l == ')')
5394                 --paren_not_balanced;
5395             }
5396           else
5397             {
5398               if (*l == '[')
5399                 ++paren_not_balanced;
5400               if (*l == ']')
5401                 --paren_not_balanced;
5402             }
5403           l++;
5404         }
5405       if (l != token_start)
5406         {                       /* Yes, we've read in another operand.  */
5407           unsigned int operand_ok;
5408           this_operand = i.operands++;
5409           if (i.operands > MAX_OPERANDS)
5410             {
5411               as_bad (_("spurious operands; (%d operands/instruction max)"),
5412                       MAX_OPERANDS);
5413               return NULL;
5414             }
5415           i.types[this_operand].bitfield.unspecified = 1;
5416           /* Now parse operand adding info to 'i' as we go along.  */
5417           END_STRING_AND_SAVE (l);
5418
5419           if (i.mem_operands > 1)
5420             {
5421               as_bad (_("too many memory references for `%s'"),
5422                       mnemonic);
5423               return 0;
5424             }
5425
5426           if (intel_syntax)
5427             operand_ok =
5428               i386_intel_operand (token_start,
5429                                   intel_float_operand (mnemonic));
5430           else
5431             operand_ok = i386_att_operand (token_start);
5432
5433           RESTORE_END_STRING (l);
5434           if (!operand_ok)
5435             return NULL;
5436         }
5437       else
5438         {
5439           if (expecting_operand)
5440             {
5441             expecting_operand_after_comma:
5442               as_bad (_("expecting operand after ','; got nothing"));
5443               return NULL;
5444             }
5445           if (*l == ',')
5446             {
5447               as_bad (_("expecting operand before ','; got nothing"));
5448               return NULL;
5449             }
5450         }
5451
5452       /* Now *l must be either ',' or END_OF_INSN.  */
5453       if (*l == ',')
5454         {
5455           if (*++l == END_OF_INSN)
5456             {
5457               /* Just skip it, if it's \n complain.  */
5458               goto expecting_operand_after_comma;
5459             }
5460           expecting_operand = 1;
5461         }
5462     }
5463   return l;
5464 }
5465
5466 static void
5467 swap_2_operands (int xchg1, int xchg2)
5468 {
5469   union i386_op temp_op;
5470   i386_operand_type temp_type;
5471   unsigned int temp_flags;
5472   enum bfd_reloc_code_real temp_reloc;
5473
5474   temp_type = i.types[xchg2];
5475   i.types[xchg2] = i.types[xchg1];
5476   i.types[xchg1] = temp_type;
5477
5478   temp_flags = i.flags[xchg2];
5479   i.flags[xchg2] = i.flags[xchg1];
5480   i.flags[xchg1] = temp_flags;
5481
5482   temp_op = i.op[xchg2];
5483   i.op[xchg2] = i.op[xchg1];
5484   i.op[xchg1] = temp_op;
5485
5486   temp_reloc = i.reloc[xchg2];
5487   i.reloc[xchg2] = i.reloc[xchg1];
5488   i.reloc[xchg1] = temp_reloc;
5489
5490   if (i.mask)
5491     {
5492       if (i.mask->operand == xchg1)
5493         i.mask->operand = xchg2;
5494       else if (i.mask->operand == xchg2)
5495         i.mask->operand = xchg1;
5496     }
5497   if (i.broadcast)
5498     {
5499       if (i.broadcast->operand == xchg1)
5500         i.broadcast->operand = xchg2;
5501       else if (i.broadcast->operand == xchg2)
5502         i.broadcast->operand = xchg1;
5503     }
5504   if (i.rounding)
5505     {
5506       if (i.rounding->operand == xchg1)
5507         i.rounding->operand = xchg2;
5508       else if (i.rounding->operand == xchg2)
5509         i.rounding->operand = xchg1;
5510     }
5511 }
5512
5513 static void
5514 swap_operands (void)
5515 {
5516   switch (i.operands)
5517     {
5518     case 5:
5519     case 4:
5520       swap_2_operands (1, i.operands - 2);
5521       /* Fall through.  */
5522     case 3:
5523     case 2:
5524       swap_2_operands (0, i.operands - 1);
5525       break;
5526     default:
5527       abort ();
5528     }
5529
5530   if (i.mem_operands == 2)
5531     {
5532       const seg_entry *temp_seg;
5533       temp_seg = i.seg[0];
5534       i.seg[0] = i.seg[1];
5535       i.seg[1] = temp_seg;
5536     }
5537 }
5538
5539 /* Try to ensure constant immediates are represented in the smallest
5540    opcode possible.  */
5541 static void
5542 optimize_imm (void)
5543 {
5544   char guess_suffix = 0;
5545   int op;
5546
5547   if (i.suffix)
5548     guess_suffix = i.suffix;
5549   else if (i.reg_operands)
5550     {
5551       /* Figure out a suffix from the last register operand specified.
5552          We can't do this properly yet, i.e. excluding special register
5553          instances, but the following works for instructions with
5554          immediates.  In any case, we can't set i.suffix yet.  */
5555       for (op = i.operands; --op >= 0;)
5556         if (i.types[op].bitfield.class != Reg)
5557           continue;
5558         else if (i.types[op].bitfield.byte)
5559           {
5560             guess_suffix = BYTE_MNEM_SUFFIX;
5561             break;
5562           }
5563         else if (i.types[op].bitfield.word)
5564           {
5565             guess_suffix = WORD_MNEM_SUFFIX;
5566             break;
5567           }
5568         else if (i.types[op].bitfield.dword)
5569           {
5570             guess_suffix = LONG_MNEM_SUFFIX;
5571             break;
5572           }
5573         else if (i.types[op].bitfield.qword)
5574           {
5575             guess_suffix = QWORD_MNEM_SUFFIX;
5576             break;
5577           }
5578     }
5579   else if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
5580     guess_suffix = WORD_MNEM_SUFFIX;
5581
5582   for (op = i.operands; --op >= 0;)
5583     if (operand_type_check (i.types[op], imm))
5584       {
5585         switch (i.op[op].imms->X_op)
5586           {
5587           case O_constant:
5588             /* If a suffix is given, this operand may be shortened.  */
5589             switch (guess_suffix)
5590               {
5591               case LONG_MNEM_SUFFIX:
5592                 i.types[op].bitfield.imm32 = 1;
5593                 i.types[op].bitfield.imm64 = 1;
5594                 break;
5595               case WORD_MNEM_SUFFIX:
5596                 i.types[op].bitfield.imm16 = 1;
5597                 i.types[op].bitfield.imm32 = 1;
5598                 i.types[op].bitfield.imm32s = 1;
5599                 i.types[op].bitfield.imm64 = 1;
5600                 break;
5601               case BYTE_MNEM_SUFFIX:
5602                 i.types[op].bitfield.imm8 = 1;
5603                 i.types[op].bitfield.imm8s = 1;
5604                 i.types[op].bitfield.imm16 = 1;
5605                 i.types[op].bitfield.imm32 = 1;
5606                 i.types[op].bitfield.imm32s = 1;
5607                 i.types[op].bitfield.imm64 = 1;
5608                 break;
5609               }
5610
5611             /* If this operand is at most 16 bits, convert it
5612                to a signed 16 bit number before trying to see
5613                whether it will fit in an even smaller size.
5614                This allows a 16-bit operand such as $0xffe0 to
5615                be recognised as within Imm8S range.  */
5616             if ((i.types[op].bitfield.imm16)
5617                 && (i.op[op].imms->X_add_number & ~(offsetT) 0xffff) == 0)
5618               {
5619                 i.op[op].imms->X_add_number =
5620                   (((i.op[op].imms->X_add_number & 0xffff) ^ 0x8000) - 0x8000);
5621               }
5622 #ifdef BFD64
5623             /* Store 32-bit immediate in 64-bit for 64-bit BFD.  */
5624             if ((i.types[op].bitfield.imm32)
5625                 && ((i.op[op].imms->X_add_number & ~(((offsetT) 2 << 31) - 1))
5626                     == 0))
5627               {
5628                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
5629                                                 ^ ((offsetT) 1 << 31))
5630                                                - ((offsetT) 1 << 31));
5631               }
5632 #endif
5633             i.types[op]
5634               = operand_type_or (i.types[op],
5635                                  smallest_imm_type (i.op[op].imms->X_add_number));
5636
5637             /* We must avoid matching of Imm32 templates when 64bit
5638                only immediate is available.  */
5639             if (guess_suffix == QWORD_MNEM_SUFFIX)
5640               i.types[op].bitfield.imm32 = 0;
5641             break;
5642
5643           case O_absent:
5644           case O_register:
5645             abort ();
5646
5647             /* Symbols and expressions.  */
5648           default:
5649             /* Convert symbolic operand to proper sizes for matching, but don't
5650                prevent matching a set of insns that only supports sizes other
5651                than those matching the insn suffix.  */
5652             {
5653               i386_operand_type mask, allowed;
5654               const insn_template *t;
5655
5656               operand_type_set (&mask, 0);
5657               operand_type_set (&allowed, 0);
5658
5659               for (t = current_templates->start;
5660                    t < current_templates->end;
5661                    ++t)
5662                 {
5663                   allowed = operand_type_or (allowed, t->operand_types[op]);
5664                   allowed = operand_type_and (allowed, anyimm);
5665                 }
5666               switch (guess_suffix)
5667                 {
5668                 case QWORD_MNEM_SUFFIX:
5669                   mask.bitfield.imm64 = 1;
5670                   mask.bitfield.imm32s = 1;
5671                   break;
5672                 case LONG_MNEM_SUFFIX:
5673                   mask.bitfield.imm32 = 1;
5674                   break;
5675                 case WORD_MNEM_SUFFIX:
5676                   mask.bitfield.imm16 = 1;
5677                   break;
5678                 case BYTE_MNEM_SUFFIX:
5679                   mask.bitfield.imm8 = 1;
5680                   break;
5681                 default:
5682                   break;
5683                 }
5684               allowed = operand_type_and (mask, allowed);
5685               if (!operand_type_all_zero (&allowed))
5686                 i.types[op] = operand_type_and (i.types[op], mask);
5687             }
5688             break;
5689           }
5690       }
5691 }
5692
5693 /* Try to use the smallest displacement type too.  */
5694 static void
5695 optimize_disp (void)
5696 {
5697   int op;
5698
5699   for (op = i.operands; --op >= 0;)
5700     if (operand_type_check (i.types[op], disp))
5701       {
5702         if (i.op[op].disps->X_op == O_constant)
5703           {
5704             offsetT op_disp = i.op[op].disps->X_add_number;
5705
5706             if (i.types[op].bitfield.disp16
5707                 && (op_disp & ~(offsetT) 0xffff) == 0)
5708               {
5709                 /* If this operand is at most 16 bits, convert
5710                    to a signed 16 bit number and don't use 64bit
5711                    displacement.  */
5712                 op_disp = (((op_disp & 0xffff) ^ 0x8000) - 0x8000);
5713                 i.types[op].bitfield.disp64 = 0;
5714               }
5715 #ifdef BFD64
5716             /* Optimize 64-bit displacement to 32-bit for 64-bit BFD.  */
5717             if (i.types[op].bitfield.disp32
5718                 && (op_disp & ~(((offsetT) 2 << 31) - 1)) == 0)
5719               {
5720                 /* If this operand is at most 32 bits, convert
5721                    to a signed 32 bit number and don't use 64bit
5722                    displacement.  */
5723                 op_disp &= (((offsetT) 2 << 31) - 1);
5724                 op_disp = (op_disp ^ ((offsetT) 1 << 31)) - ((addressT) 1 << 31);
5725                 i.types[op].bitfield.disp64 = 0;
5726               }
5727 #endif
5728             if (!op_disp && i.types[op].bitfield.baseindex)
5729               {
5730                 i.types[op].bitfield.disp8 = 0;
5731                 i.types[op].bitfield.disp16 = 0;
5732                 i.types[op].bitfield.disp32 = 0;
5733                 i.types[op].bitfield.disp32s = 0;
5734                 i.types[op].bitfield.disp64 = 0;
5735                 i.op[op].disps = 0;
5736                 i.disp_operands--;
5737               }
5738             else if (flag_code == CODE_64BIT)
5739               {
5740                 if (fits_in_signed_long (op_disp))
5741                   {
5742                     i.types[op].bitfield.disp64 = 0;
5743                     i.types[op].bitfield.disp32s = 1;
5744                   }
5745                 if (i.prefix[ADDR_PREFIX]
5746                     && fits_in_unsigned_long (op_disp))
5747                   i.types[op].bitfield.disp32 = 1;
5748               }
5749             if ((i.types[op].bitfield.disp32
5750                  || i.types[op].bitfield.disp32s
5751                  || i.types[op].bitfield.disp16)
5752                 && fits_in_disp8 (op_disp))
5753               i.types[op].bitfield.disp8 = 1;
5754           }
5755         else if (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
5756                  || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL)
5757           {
5758             fix_new_exp (frag_now, frag_more (0) - frag_now->fr_literal, 0,
5759                          i.op[op].disps, 0, i.reloc[op]);
5760             i.types[op].bitfield.disp8 = 0;
5761             i.types[op].bitfield.disp16 = 0;
5762             i.types[op].bitfield.disp32 = 0;
5763             i.types[op].bitfield.disp32s = 0;
5764             i.types[op].bitfield.disp64 = 0;
5765           }
5766         else
5767           /* We only support 64bit displacement on constants.  */
5768           i.types[op].bitfield.disp64 = 0;
5769       }
5770 }
5771
5772 /* Return 1 if there is a match in broadcast bytes between operand
5773    GIVEN and instruction template T.   */
5774
5775 static INLINE int
5776 match_broadcast_size (const insn_template *t, unsigned int given)
5777 {
5778   return ((t->opcode_modifier.broadcast == BYTE_BROADCAST
5779            && i.types[given].bitfield.byte)
5780           || (t->opcode_modifier.broadcast == WORD_BROADCAST
5781               && i.types[given].bitfield.word)
5782           || (t->opcode_modifier.broadcast == DWORD_BROADCAST
5783               && i.types[given].bitfield.dword)
5784           || (t->opcode_modifier.broadcast == QWORD_BROADCAST
5785               && i.types[given].bitfield.qword));
5786 }
5787
5788 /* Check if operands are valid for the instruction.  */
5789
5790 static int
5791 check_VecOperands (const insn_template *t)
5792 {
5793   unsigned int op;
5794   i386_cpu_flags cpu;
5795
5796   /* Templates allowing for ZMMword as well as YMMword and/or XMMword for
5797      any one operand are implicity requiring AVX512VL support if the actual
5798      operand size is YMMword or XMMword.  Since this function runs after
5799      template matching, there's no need to check for YMMword/XMMword in
5800      the template.  */
5801   cpu = cpu_flags_and (t->cpu_flags, avx512);
5802   if (!cpu_flags_all_zero (&cpu)
5803       && !t->cpu_flags.bitfield.cpuavx512vl
5804       && !cpu_arch_flags.bitfield.cpuavx512vl)
5805     {
5806       for (op = 0; op < t->operands; ++op)
5807         {
5808           if (t->operand_types[op].bitfield.zmmword
5809               && (i.types[op].bitfield.ymmword
5810                   || i.types[op].bitfield.xmmword))
5811             {
5812               i.error = unsupported;
5813               return 1;
5814             }
5815         }
5816     }
5817
5818   /* Without VSIB byte, we can't have a vector register for index.  */
5819   if (!t->opcode_modifier.sib
5820       && i.index_reg
5821       && (i.index_reg->reg_type.bitfield.xmmword
5822           || i.index_reg->reg_type.bitfield.ymmword
5823           || i.index_reg->reg_type.bitfield.zmmword))
5824     {
5825       i.error = unsupported_vector_index_register;
5826       return 1;
5827     }
5828
5829   /* Check if default mask is allowed.  */
5830   if (t->opcode_modifier.nodefmask
5831       && (!i.mask || i.mask->mask->reg_num == 0))
5832     {
5833       i.error = no_default_mask;
5834       return 1;
5835     }
5836
5837   /* For VSIB byte, we need a vector register for index, and all vector
5838      registers must be distinct.  */
5839   if (t->opcode_modifier.sib && t->opcode_modifier.sib != SIBMEM)
5840     {
5841       if (!i.index_reg
5842           || !((t->opcode_modifier.sib == VECSIB128
5843                 && i.index_reg->reg_type.bitfield.xmmword)
5844                || (t->opcode_modifier.sib == VECSIB256
5845                    && i.index_reg->reg_type.bitfield.ymmword)
5846                || (t->opcode_modifier.sib == VECSIB512
5847                    && i.index_reg->reg_type.bitfield.zmmword)))
5848       {
5849         i.error = invalid_vsib_address;
5850         return 1;
5851       }
5852
5853       gas_assert (i.reg_operands == 2 || i.mask);
5854       if (i.reg_operands == 2 && !i.mask)
5855         {
5856           gas_assert (i.types[0].bitfield.class == RegSIMD);
5857           gas_assert (i.types[0].bitfield.xmmword
5858                       || i.types[0].bitfield.ymmword);
5859           gas_assert (i.types[2].bitfield.class == RegSIMD);
5860           gas_assert (i.types[2].bitfield.xmmword
5861                       || i.types[2].bitfield.ymmword);
5862           if (operand_check == check_none)
5863             return 0;
5864           if (register_number (i.op[0].regs)
5865               != register_number (i.index_reg)
5866               && register_number (i.op[2].regs)
5867                  != register_number (i.index_reg)
5868               && register_number (i.op[0].regs)
5869                  != register_number (i.op[2].regs))
5870             return 0;
5871           if (operand_check == check_error)
5872             {
5873               i.error = invalid_vector_register_set;
5874               return 1;
5875             }
5876           as_warn (_("mask, index, and destination registers should be distinct"));
5877         }
5878       else if (i.reg_operands == 1 && i.mask)
5879         {
5880           if (i.types[1].bitfield.class == RegSIMD
5881               && (i.types[1].bitfield.xmmword
5882                   || i.types[1].bitfield.ymmword
5883                   || i.types[1].bitfield.zmmword)
5884               && (register_number (i.op[1].regs)
5885                   == register_number (i.index_reg)))
5886             {
5887               if (operand_check == check_error)
5888                 {
5889                   i.error = invalid_vector_register_set;
5890                   return 1;
5891                 }
5892               if (operand_check != check_none)
5893                 as_warn (_("index and destination registers should be distinct"));
5894             }
5895         }
5896     }
5897
5898   /* For AMX instructions with three tmmword operands, all tmmword operand must be
5899      distinct */
5900   if (t->operand_types[0].bitfield.tmmword
5901       && i.reg_operands == 3)
5902     {
5903       if (register_number (i.op[0].regs)
5904           == register_number (i.op[1].regs)
5905           || register_number (i.op[0].regs)
5906              == register_number (i.op[2].regs)
5907           || register_number (i.op[1].regs)
5908              == register_number (i.op[2].regs))
5909         {
5910           i.error = invalid_tmm_register_set;
5911           return 1;
5912         }
5913     }
5914
5915   /* Check if broadcast is supported by the instruction and is applied
5916      to the memory operand.  */
5917   if (i.broadcast)
5918     {
5919       i386_operand_type type, overlap;
5920
5921       /* Check if specified broadcast is supported in this instruction,
5922          and its broadcast bytes match the memory operand.  */
5923       op = i.broadcast->operand;
5924       if (!t->opcode_modifier.broadcast
5925           || !(i.flags[op] & Operand_Mem)
5926           || (!i.types[op].bitfield.unspecified
5927               && !match_broadcast_size (t, op)))
5928         {
5929         bad_broadcast:
5930           i.error = unsupported_broadcast;
5931           return 1;
5932         }
5933
5934       i.broadcast->bytes = ((1 << (t->opcode_modifier.broadcast - 1))
5935                             * i.broadcast->type);
5936       operand_type_set (&type, 0);
5937       switch (i.broadcast->bytes)
5938         {
5939         case 2:
5940           type.bitfield.word = 1;
5941           break;
5942         case 4:
5943           type.bitfield.dword = 1;
5944           break;
5945         case 8:
5946           type.bitfield.qword = 1;
5947           break;
5948         case 16:
5949           type.bitfield.xmmword = 1;
5950           break;
5951         case 32:
5952           type.bitfield.ymmword = 1;
5953           break;
5954         case 64:
5955           type.bitfield.zmmword = 1;
5956           break;
5957         default:
5958           goto bad_broadcast;
5959         }
5960
5961       overlap = operand_type_and (type, t->operand_types[op]);
5962       if (t->operand_types[op].bitfield.class == RegSIMD
5963           && t->operand_types[op].bitfield.byte
5964              + t->operand_types[op].bitfield.word
5965              + t->operand_types[op].bitfield.dword
5966              + t->operand_types[op].bitfield.qword > 1)
5967         {
5968           overlap.bitfield.xmmword = 0;
5969           overlap.bitfield.ymmword = 0;
5970           overlap.bitfield.zmmword = 0;
5971         }
5972       if (operand_type_all_zero (&overlap))
5973           goto bad_broadcast;
5974
5975       if (t->opcode_modifier.checkregsize)
5976         {
5977           unsigned int j;
5978
5979           type.bitfield.baseindex = 1;
5980           for (j = 0; j < i.operands; ++j)
5981             {
5982               if (j != op
5983                   && !operand_type_register_match(i.types[j],
5984                                                   t->operand_types[j],
5985                                                   type,
5986                                                   t->operand_types[op]))
5987                 goto bad_broadcast;
5988             }
5989         }
5990     }
5991   /* If broadcast is supported in this instruction, we need to check if
5992      operand of one-element size isn't specified without broadcast.  */
5993   else if (t->opcode_modifier.broadcast && i.mem_operands)
5994     {
5995       /* Find memory operand.  */
5996       for (op = 0; op < i.operands; op++)
5997         if (i.flags[op] & Operand_Mem)
5998           break;
5999       gas_assert (op < i.operands);
6000       /* Check size of the memory operand.  */
6001       if (match_broadcast_size (t, op))
6002         {
6003           i.error = broadcast_needed;
6004           return 1;
6005         }
6006     }
6007   else
6008     op = MAX_OPERANDS - 1; /* Avoid uninitialized variable warning.  */
6009
6010   /* Check if requested masking is supported.  */
6011   if (i.mask)
6012     {
6013       switch (t->opcode_modifier.masking)
6014         {
6015         case BOTH_MASKING:
6016           break;
6017         case MERGING_MASKING:
6018           if (i.mask->zeroing)
6019             {
6020         case 0:
6021               i.error = unsupported_masking;
6022               return 1;
6023             }
6024           break;
6025         case DYNAMIC_MASKING:
6026           /* Memory destinations allow only merging masking.  */
6027           if (i.mask->zeroing && i.mem_operands)
6028             {
6029               /* Find memory operand.  */
6030               for (op = 0; op < i.operands; op++)
6031                 if (i.flags[op] & Operand_Mem)
6032                   break;
6033               gas_assert (op < i.operands);
6034               if (op == i.operands - 1)
6035                 {
6036                   i.error = unsupported_masking;
6037                   return 1;
6038                 }
6039             }
6040           break;
6041         default:
6042           abort ();
6043         }
6044     }
6045
6046   /* Check if masking is applied to dest operand.  */
6047   if (i.mask && (i.mask->operand != (int) (i.operands - 1)))
6048     {
6049       i.error = mask_not_on_destination;
6050       return 1;
6051     }
6052
6053   /* Check RC/SAE.  */
6054   if (i.rounding)
6055     {
6056       if (!t->opcode_modifier.sae
6057           || (i.rounding->type != saeonly && !t->opcode_modifier.staticrounding))
6058         {
6059           i.error = unsupported_rc_sae;
6060           return 1;
6061         }
6062       /* If the instruction has several immediate operands and one of
6063          them is rounding, the rounding operand should be the last
6064          immediate operand.  */
6065       if (i.imm_operands > 1
6066           && i.rounding->operand != (int) (i.imm_operands - 1))
6067         {
6068           i.error = rc_sae_operand_not_last_imm;
6069           return 1;
6070         }
6071     }
6072
6073   /* Check the special Imm4 cases; must be the first operand.  */
6074   if (t->cpu_flags.bitfield.cpuxop && t->operands == 5)
6075     {
6076       if (i.op[0].imms->X_op != O_constant
6077           || !fits_in_imm4 (i.op[0].imms->X_add_number))
6078         {
6079           i.error = bad_imm4;
6080           return 1;
6081         }
6082
6083       /* Turn off Imm<N> so that update_imm won't complain.  */
6084       operand_type_set (&i.types[0], 0);
6085     }
6086
6087   /* Check vector Disp8 operand.  */
6088   if (t->opcode_modifier.disp8memshift
6089       && i.disp_encoding != disp_encoding_32bit)
6090     {
6091       if (i.broadcast)
6092         i.memshift = t->opcode_modifier.broadcast - 1;
6093       else if (t->opcode_modifier.disp8memshift != DISP8_SHIFT_VL)
6094         i.memshift = t->opcode_modifier.disp8memshift;
6095       else
6096         {
6097           const i386_operand_type *type = NULL;
6098
6099           i.memshift = 0;
6100           for (op = 0; op < i.operands; op++)
6101             if (i.flags[op] & Operand_Mem)
6102               {
6103                 if (t->opcode_modifier.evex == EVEXLIG)
6104                   i.memshift = 2 + (i.suffix == QWORD_MNEM_SUFFIX);
6105                 else if (t->operand_types[op].bitfield.xmmword
6106                          + t->operand_types[op].bitfield.ymmword
6107                          + t->operand_types[op].bitfield.zmmword <= 1)
6108                   type = &t->operand_types[op];
6109                 else if (!i.types[op].bitfield.unspecified)
6110                   type = &i.types[op];
6111               }
6112             else if (i.types[op].bitfield.class == RegSIMD
6113                      && t->opcode_modifier.evex != EVEXLIG)
6114               {
6115                 if (i.types[op].bitfield.zmmword)
6116                   i.memshift = 6;
6117                 else if (i.types[op].bitfield.ymmword && i.memshift < 5)
6118                   i.memshift = 5;
6119                 else if (i.types[op].bitfield.xmmword && i.memshift < 4)
6120                   i.memshift = 4;
6121               }
6122
6123           if (type)
6124             {
6125               if (type->bitfield.zmmword)
6126                 i.memshift = 6;
6127               else if (type->bitfield.ymmword)
6128                 i.memshift = 5;
6129               else if (type->bitfield.xmmword)
6130                 i.memshift = 4;
6131             }
6132
6133           /* For the check in fits_in_disp8().  */
6134           if (i.memshift == 0)
6135             i.memshift = -1;
6136         }
6137
6138       for (op = 0; op < i.operands; op++)
6139         if (operand_type_check (i.types[op], disp)
6140             && i.op[op].disps->X_op == O_constant)
6141           {
6142             if (fits_in_disp8 (i.op[op].disps->X_add_number))
6143               {
6144                 i.types[op].bitfield.disp8 = 1;
6145                 return 0;
6146               }
6147             i.types[op].bitfield.disp8 = 0;
6148           }
6149     }
6150
6151   i.memshift = 0;
6152
6153   return 0;
6154 }
6155
6156 /* Check if encoding requirements are met by the instruction.  */
6157
6158 static int
6159 VEX_check_encoding (const insn_template *t)
6160 {
6161   if (i.vec_encoding == vex_encoding_error)
6162     {
6163       i.error = unsupported;
6164       return 1;
6165     }
6166
6167   if (i.vec_encoding == vex_encoding_evex)
6168     {
6169       /* This instruction must be encoded with EVEX prefix.  */
6170       if (!is_evex_encoding (t))
6171         {
6172           i.error = unsupported;
6173           return 1;
6174         }
6175       return 0;
6176     }
6177
6178   if (!t->opcode_modifier.vex)
6179     {
6180       /* This instruction template doesn't have VEX prefix.  */
6181       if (i.vec_encoding != vex_encoding_default)
6182         {
6183           i.error = unsupported;
6184           return 1;
6185         }
6186       return 0;
6187     }
6188
6189   return 0;
6190 }
6191
6192 static const insn_template *
6193 match_template (char mnem_suffix)
6194 {
6195   /* Points to template once we've found it.  */
6196   const insn_template *t;
6197   i386_operand_type overlap0, overlap1, overlap2, overlap3;
6198   i386_operand_type overlap4;
6199   unsigned int found_reverse_match;
6200   i386_opcode_modifier suffix_check;
6201   i386_operand_type operand_types [MAX_OPERANDS];
6202   int addr_prefix_disp;
6203   unsigned int j, size_match, check_register;
6204   enum i386_error specific_error = 0;
6205
6206 #if MAX_OPERANDS != 5
6207 # error "MAX_OPERANDS must be 5."
6208 #endif
6209
6210   found_reverse_match = 0;
6211   addr_prefix_disp = -1;
6212
6213   /* Prepare for mnemonic suffix check.  */
6214   memset (&suffix_check, 0, sizeof (suffix_check));
6215   switch (mnem_suffix)
6216     {
6217     case BYTE_MNEM_SUFFIX:
6218       suffix_check.no_bsuf = 1;
6219       break;
6220     case WORD_MNEM_SUFFIX:
6221       suffix_check.no_wsuf = 1;
6222       break;
6223     case SHORT_MNEM_SUFFIX:
6224       suffix_check.no_ssuf = 1;
6225       break;
6226     case LONG_MNEM_SUFFIX:
6227       suffix_check.no_lsuf = 1;
6228       break;
6229     case QWORD_MNEM_SUFFIX:
6230       suffix_check.no_qsuf = 1;
6231       break;
6232     default:
6233       /* NB: In Intel syntax, normally we can check for memory operand
6234          size when there is no mnemonic suffix.  But jmp and call have
6235          2 different encodings with Dword memory operand size, one with
6236          No_ldSuf and the other without.  i.suffix is set to
6237          LONG_DOUBLE_MNEM_SUFFIX to skip the one with No_ldSuf.  */
6238       if (i.suffix == LONG_DOUBLE_MNEM_SUFFIX)
6239         suffix_check.no_ldsuf = 1;
6240     }
6241
6242   /* Must have right number of operands.  */
6243   i.error = number_of_operands_mismatch;
6244
6245   for (t = current_templates->start; t < current_templates->end; t++)
6246     {
6247       addr_prefix_disp = -1;
6248       found_reverse_match = 0;
6249
6250       if (i.operands != t->operands)
6251         continue;
6252
6253       /* Check processor support.  */
6254       i.error = unsupported;
6255       if (cpu_flags_match (t) != CPU_FLAGS_PERFECT_MATCH)
6256         continue;
6257
6258       /* Check AT&T mnemonic.   */
6259       i.error = unsupported_with_intel_mnemonic;
6260       if (intel_mnemonic && t->opcode_modifier.attmnemonic)
6261         continue;
6262
6263       /* Check AT&T/Intel syntax.  */
6264       i.error = unsupported_syntax;
6265       if ((intel_syntax && t->opcode_modifier.attsyntax)
6266           || (!intel_syntax && t->opcode_modifier.intelsyntax))
6267         continue;
6268
6269       /* Check Intel64/AMD64 ISA.   */
6270       switch (isa64)
6271         {
6272         default:
6273           /* Default: Don't accept Intel64.  */
6274           if (t->opcode_modifier.isa64 == INTEL64)
6275             continue;
6276           break;
6277         case amd64:
6278           /* -mamd64: Don't accept Intel64 and Intel64 only.  */
6279           if (t->opcode_modifier.isa64 >= INTEL64)
6280             continue;
6281           break;
6282         case intel64:
6283           /* -mintel64: Don't accept AMD64.  */
6284           if (t->opcode_modifier.isa64 == AMD64 && flag_code == CODE_64BIT)
6285             continue;
6286           break;
6287         }
6288
6289       /* Check the suffix.  */
6290       i.error = invalid_instruction_suffix;
6291       if ((t->opcode_modifier.no_bsuf && suffix_check.no_bsuf)
6292           || (t->opcode_modifier.no_wsuf && suffix_check.no_wsuf)
6293           || (t->opcode_modifier.no_lsuf && suffix_check.no_lsuf)
6294           || (t->opcode_modifier.no_ssuf && suffix_check.no_ssuf)
6295           || (t->opcode_modifier.no_qsuf && suffix_check.no_qsuf)
6296           || (t->opcode_modifier.no_ldsuf && suffix_check.no_ldsuf))
6297         continue;
6298
6299       size_match = operand_size_match (t);
6300       if (!size_match)
6301         continue;
6302
6303       /* This is intentionally not
6304
6305          if (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))
6306
6307          as the case of a missing * on the operand is accepted (perhaps with
6308          a warning, issued further down).  */
6309       if (i.jumpabsolute && t->opcode_modifier.jump != JUMP_ABSOLUTE)
6310         {
6311           i.error = operand_type_mismatch;
6312           continue;
6313         }
6314
6315       for (j = 0; j < MAX_OPERANDS; j++)
6316         operand_types[j] = t->operand_types[j];
6317
6318       /* In general, don't allow
6319          - 64-bit operands outside of 64-bit mode,
6320          - 32-bit operands on pre-386.  */
6321       j = i.imm_operands + (t->operands > i.imm_operands + 1);
6322       if (((i.suffix == QWORD_MNEM_SUFFIX
6323             && flag_code != CODE_64BIT
6324             && !(t->base_opcode == 0xfc7
6325                  && i.tm.opcode_modifier.opcodeprefix == 0
6326                  && t->extension_opcode == 1) /* cmpxchg8b */)
6327            || (i.suffix == LONG_MNEM_SUFFIX
6328                && !cpu_arch_flags.bitfield.cpui386))
6329           && (intel_syntax
6330               ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
6331                  && !intel_float_operand (t->name))
6332               : intel_float_operand (t->name) != 2)
6333           && (t->operands == i.imm_operands
6334               || (operand_types[i.imm_operands].bitfield.class != RegMMX
6335                && operand_types[i.imm_operands].bitfield.class != RegSIMD
6336                && operand_types[i.imm_operands].bitfield.class != RegMask)
6337               || (operand_types[j].bitfield.class != RegMMX
6338                   && operand_types[j].bitfield.class != RegSIMD
6339                   && operand_types[j].bitfield.class != RegMask))
6340           && !t->opcode_modifier.sib)
6341         continue;
6342
6343       /* Do not verify operands when there are none.  */
6344       if (!t->operands)
6345         {
6346           if (VEX_check_encoding (t))
6347             {
6348               specific_error = i.error;
6349               continue;
6350             }
6351
6352           /* We've found a match; break out of loop.  */
6353           break;
6354         }
6355
6356       if (!t->opcode_modifier.jump
6357           || t->opcode_modifier.jump == JUMP_ABSOLUTE)
6358         {
6359           /* There should be only one Disp operand.  */
6360           for (j = 0; j < MAX_OPERANDS; j++)
6361             if (operand_type_check (operand_types[j], disp))
6362               break;
6363           if (j < MAX_OPERANDS)
6364             {
6365               bfd_boolean override = (i.prefix[ADDR_PREFIX] != 0);
6366
6367               addr_prefix_disp = j;
6368
6369               /* Address size prefix will turn Disp64/Disp32S/Disp32/Disp16
6370                  operand into Disp32/Disp32/Disp16/Disp32 operand.  */
6371               switch (flag_code)
6372                 {
6373                 case CODE_16BIT:
6374                   override = !override;
6375                   /* Fall through.  */
6376                 case CODE_32BIT:
6377                   if (operand_types[j].bitfield.disp32
6378                       && operand_types[j].bitfield.disp16)
6379                     {
6380                       operand_types[j].bitfield.disp16 = override;
6381                       operand_types[j].bitfield.disp32 = !override;
6382                     }
6383                   operand_types[j].bitfield.disp32s = 0;
6384                   operand_types[j].bitfield.disp64 = 0;
6385                   break;
6386
6387                 case CODE_64BIT:
6388                   if (operand_types[j].bitfield.disp32s
6389                       || operand_types[j].bitfield.disp64)
6390                     {
6391                       operand_types[j].bitfield.disp64 &= !override;
6392                       operand_types[j].bitfield.disp32s &= !override;
6393                       operand_types[j].bitfield.disp32 = override;
6394                     }
6395                   operand_types[j].bitfield.disp16 = 0;
6396                   break;
6397                 }
6398             }
6399         }
6400
6401       /* Force 0x8b encoding for "mov foo@GOT, %eax".  */
6402       if (i.reloc[0] == BFD_RELOC_386_GOT32 && t->base_opcode == 0xa0)
6403         continue;
6404
6405       /* We check register size if needed.  */
6406       if (t->opcode_modifier.checkregsize)
6407         {
6408           check_register = (1 << t->operands) - 1;
6409           if (i.broadcast)
6410             check_register &= ~(1 << i.broadcast->operand);
6411         }
6412       else
6413         check_register = 0;
6414
6415       overlap0 = operand_type_and (i.types[0], operand_types[0]);
6416       switch (t->operands)
6417         {
6418         case 1:
6419           if (!operand_type_match (overlap0, i.types[0]))
6420             continue;
6421           break;
6422         case 2:
6423           /* xchg %eax, %eax is a special case. It is an alias for nop
6424              only in 32bit mode and we can use opcode 0x90.  In 64bit
6425              mode, we can't use 0x90 for xchg %eax, %eax since it should
6426              zero-extend %eax to %rax.  */
6427           if (flag_code == CODE_64BIT
6428               && t->base_opcode == 0x90
6429               && i.types[0].bitfield.instance == Accum
6430               && i.types[0].bitfield.dword
6431               && i.types[1].bitfield.instance == Accum
6432               && i.types[1].bitfield.dword)
6433             continue;
6434           /* xrelease mov %eax, <disp> is another special case. It must not
6435              match the accumulator-only encoding of mov.  */
6436           if (flag_code != CODE_64BIT
6437               && i.hle_prefix
6438               && t->base_opcode == 0xa0
6439               && i.types[0].bitfield.instance == Accum
6440               && (i.flags[1] & Operand_Mem))
6441             continue;
6442           /* Fall through.  */
6443
6444         case 3:
6445           if (!(size_match & MATCH_STRAIGHT))
6446             goto check_reverse;
6447           /* Reverse direction of operands if swapping is possible in the first
6448              place (operands need to be symmetric) and
6449              - the load form is requested, and the template is a store form,
6450              - the store form is requested, and the template is a load form,
6451              - the non-default (swapped) form is requested.  */
6452           overlap1 = operand_type_and (operand_types[0], operand_types[1]);
6453           if (t->opcode_modifier.d && i.reg_operands == i.operands
6454               && !operand_type_all_zero (&overlap1))
6455             switch (i.dir_encoding)
6456               {
6457               case dir_encoding_load:
6458                 if (operand_type_check (operand_types[i.operands - 1], anymem)
6459                     || t->opcode_modifier.regmem)
6460                   goto check_reverse;
6461                 break;
6462
6463               case dir_encoding_store:
6464                 if (!operand_type_check (operand_types[i.operands - 1], anymem)
6465                     && !t->opcode_modifier.regmem)
6466                   goto check_reverse;
6467                 break;
6468
6469               case dir_encoding_swap:
6470                 goto check_reverse;
6471
6472               case dir_encoding_default:
6473                 break;
6474               }
6475           /* If we want store form, we skip the current load.  */
6476           if ((i.dir_encoding == dir_encoding_store
6477                || i.dir_encoding == dir_encoding_swap)
6478               && i.mem_operands == 0
6479               && t->opcode_modifier.load)
6480             continue;
6481           /* Fall through.  */
6482         case 4:
6483         case 5:
6484           overlap1 = operand_type_and (i.types[1], operand_types[1]);
6485           if (!operand_type_match (overlap0, i.types[0])
6486               || !operand_type_match (overlap1, i.types[1])
6487               || ((check_register & 3) == 3
6488                   && !operand_type_register_match (i.types[0],
6489                                                    operand_types[0],
6490                                                    i.types[1],
6491                                                    operand_types[1])))
6492             {
6493               /* Check if other direction is valid ...  */
6494               if (!t->opcode_modifier.d)
6495                 continue;
6496
6497             check_reverse:
6498               if (!(size_match & MATCH_REVERSE))
6499                 continue;
6500               /* Try reversing direction of operands.  */
6501               overlap0 = operand_type_and (i.types[0], operand_types[i.operands - 1]);
6502               overlap1 = operand_type_and (i.types[i.operands - 1], operand_types[0]);
6503               if (!operand_type_match (overlap0, i.types[0])
6504                   || !operand_type_match (overlap1, i.types[i.operands - 1])
6505                   || (check_register
6506                       && !operand_type_register_match (i.types[0],
6507                                                        operand_types[i.operands - 1],
6508                                                        i.types[i.operands - 1],
6509                                                        operand_types[0])))
6510                 {
6511                   /* Does not match either direction.  */
6512                   continue;
6513                 }
6514               /* found_reverse_match holds which of D or FloatR
6515                  we've found.  */
6516               if (!t->opcode_modifier.d)
6517                 found_reverse_match = 0;
6518               else if (operand_types[0].bitfield.tbyte)
6519                 found_reverse_match = Opcode_FloatD;
6520               else if (operand_types[0].bitfield.xmmword
6521                        || operand_types[i.operands - 1].bitfield.xmmword
6522                        || operand_types[0].bitfield.class == RegMMX
6523                        || operand_types[i.operands - 1].bitfield.class == RegMMX
6524                        || is_any_vex_encoding(t))
6525                 found_reverse_match = (t->base_opcode & 0xee) != 0x6e
6526                                       ? Opcode_SIMD_FloatD : Opcode_SIMD_IntD;
6527               else
6528                 found_reverse_match = Opcode_D;
6529               if (t->opcode_modifier.floatr)
6530                 found_reverse_match |= Opcode_FloatR;
6531             }
6532           else
6533             {
6534               /* Found a forward 2 operand match here.  */
6535               switch (t->operands)
6536                 {
6537                 case 5:
6538                   overlap4 = operand_type_and (i.types[4],
6539                                                operand_types[4]);
6540                   /* Fall through.  */
6541                 case 4:
6542                   overlap3 = operand_type_and (i.types[3],
6543                                                operand_types[3]);
6544                   /* Fall through.  */
6545                 case 3:
6546                   overlap2 = operand_type_and (i.types[2],
6547                                                operand_types[2]);
6548                   break;
6549                 }
6550
6551               switch (t->operands)
6552                 {
6553                 case 5:
6554                   if (!operand_type_match (overlap4, i.types[4])
6555                       || !operand_type_register_match (i.types[3],
6556                                                        operand_types[3],
6557                                                        i.types[4],
6558                                                        operand_types[4]))
6559                     continue;
6560                   /* Fall through.  */
6561                 case 4:
6562                   if (!operand_type_match (overlap3, i.types[3])
6563                       || ((check_register & 0xa) == 0xa
6564                           && !operand_type_register_match (i.types[1],
6565                                                             operand_types[1],
6566                                                             i.types[3],
6567                                                             operand_types[3]))
6568                       || ((check_register & 0xc) == 0xc
6569                           && !operand_type_register_match (i.types[2],
6570                                                             operand_types[2],
6571                                                             i.types[3],
6572                                                             operand_types[3])))
6573                     continue;
6574                   /* Fall through.  */
6575                 case 3:
6576                   /* Here we make use of the fact that there are no
6577                      reverse match 3 operand instructions.  */
6578                   if (!operand_type_match (overlap2, i.types[2])
6579                       || ((check_register & 5) == 5
6580                           && !operand_type_register_match (i.types[0],
6581                                                             operand_types[0],
6582                                                             i.types[2],
6583                                                             operand_types[2]))
6584                       || ((check_register & 6) == 6
6585                           && !operand_type_register_match (i.types[1],
6586                                                             operand_types[1],
6587                                                             i.types[2],
6588                                                             operand_types[2])))
6589                     continue;
6590                   break;
6591                 }
6592             }
6593           /* Found either forward/reverse 2, 3 or 4 operand match here:
6594              slip through to break.  */
6595         }
6596
6597       /* Check if vector operands are valid.  */
6598       if (check_VecOperands (t))
6599         {
6600           specific_error = i.error;
6601           continue;
6602         }
6603
6604       /* Check if VEX/EVEX encoding requirements can be satisfied.  */
6605       if (VEX_check_encoding (t))
6606         {
6607           specific_error = i.error;
6608           continue;
6609         }
6610
6611       /* We've found a match; break out of loop.  */
6612       break;
6613     }
6614
6615   if (t == current_templates->end)
6616     {
6617       /* We found no match.  */
6618       const char *err_msg;
6619       switch (specific_error ? specific_error : i.error)
6620         {
6621         default:
6622           abort ();
6623         case operand_size_mismatch:
6624           err_msg = _("operand size mismatch");
6625           break;
6626         case operand_type_mismatch:
6627           err_msg = _("operand type mismatch");
6628           break;
6629         case register_type_mismatch:
6630           err_msg = _("register type mismatch");
6631           break;
6632         case number_of_operands_mismatch:
6633           err_msg = _("number of operands mismatch");
6634           break;
6635         case invalid_instruction_suffix:
6636           err_msg = _("invalid instruction suffix");
6637           break;
6638         case bad_imm4:
6639           err_msg = _("constant doesn't fit in 4 bits");
6640           break;
6641         case unsupported_with_intel_mnemonic:
6642           err_msg = _("unsupported with Intel mnemonic");
6643           break;
6644         case unsupported_syntax:
6645           err_msg = _("unsupported syntax");
6646           break;
6647         case unsupported:
6648           as_bad (_("unsupported instruction `%s'"),
6649                   current_templates->start->name);
6650           return NULL;
6651         case invalid_sib_address:
6652           err_msg = _("invalid SIB address");
6653           break;
6654         case invalid_vsib_address:
6655           err_msg = _("invalid VSIB address");
6656           break;
6657         case invalid_vector_register_set:
6658           err_msg = _("mask, index, and destination registers must be distinct");
6659           break;
6660         case invalid_tmm_register_set:
6661           err_msg = _("all tmm registers must be distinct");
6662           break;
6663         case unsupported_vector_index_register:
6664           err_msg = _("unsupported vector index register");
6665           break;
6666         case unsupported_broadcast:
6667           err_msg = _("unsupported broadcast");
6668           break;
6669         case broadcast_needed:
6670           err_msg = _("broadcast is needed for operand of such type");
6671           break;
6672         case unsupported_masking:
6673           err_msg = _("unsupported masking");
6674           break;
6675         case mask_not_on_destination:
6676           err_msg = _("mask not on destination operand");
6677           break;
6678         case no_default_mask:
6679           err_msg = _("default mask isn't allowed");
6680           break;
6681         case unsupported_rc_sae:
6682           err_msg = _("unsupported static rounding/sae");
6683           break;
6684         case rc_sae_operand_not_last_imm:
6685           if (intel_syntax)
6686             err_msg = _("RC/SAE operand must precede immediate operands");
6687           else
6688             err_msg = _("RC/SAE operand must follow immediate operands");
6689           break;
6690         case invalid_register_operand:
6691           err_msg = _("invalid register operand");
6692           break;
6693         }
6694       as_bad (_("%s for `%s'"), err_msg,
6695               current_templates->start->name);
6696       return NULL;
6697     }
6698
6699   if (!quiet_warnings)
6700     {
6701       if (!intel_syntax
6702           && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)))
6703         as_warn (_("indirect %s without `*'"), t->name);
6704
6705       if (t->opcode_modifier.isprefix
6706           && t->opcode_modifier.mnemonicsize == IGNORESIZE)
6707         {
6708           /* Warn them that a data or address size prefix doesn't
6709              affect assembly of the next line of code.  */
6710           as_warn (_("stand-alone `%s' prefix"), t->name);
6711         }
6712     }
6713
6714   /* Copy the template we found.  */
6715   i.tm = *t;
6716
6717   if (addr_prefix_disp != -1)
6718     i.tm.operand_types[addr_prefix_disp]
6719       = operand_types[addr_prefix_disp];
6720
6721   if (found_reverse_match)
6722     {
6723       /* If we found a reverse match we must alter the opcode direction
6724          bit and clear/flip the regmem modifier one.  found_reverse_match
6725          holds bits to change (different for int & float insns).  */
6726
6727       i.tm.base_opcode ^= found_reverse_match;
6728
6729       i.tm.operand_types[0] = operand_types[i.operands - 1];
6730       i.tm.operand_types[i.operands - 1] = operand_types[0];
6731
6732       /* Certain SIMD insns have their load forms specified in the opcode
6733          table, and hence we need to _set_ RegMem instead of clearing it.
6734          We need to avoid setting the bit though on insns like KMOVW.  */
6735       i.tm.opcode_modifier.regmem
6736         = i.tm.opcode_modifier.modrm && i.tm.opcode_modifier.d
6737           && i.tm.operands > 2U - i.tm.opcode_modifier.sse2avx
6738           && !i.tm.opcode_modifier.regmem;
6739     }
6740
6741   return t;
6742 }
6743
6744 static int
6745 check_string (void)
6746 {
6747   unsigned int es_op = i.tm.opcode_modifier.isstring - IS_STRING_ES_OP0;
6748   unsigned int op = i.tm.operand_types[0].bitfield.baseindex ? es_op : 0;
6749
6750   if (i.seg[op] != NULL && i.seg[op] != &es)
6751     {
6752       as_bad (_("`%s' operand %u must use `%ses' segment"),
6753               i.tm.name,
6754               intel_syntax ? i.tm.operands - es_op : es_op + 1,
6755               register_prefix);
6756       return 0;
6757     }
6758
6759   /* There's only ever one segment override allowed per instruction.
6760      This instruction possibly has a legal segment override on the
6761      second operand, so copy the segment to where non-string
6762      instructions store it, allowing common code.  */
6763   i.seg[op] = i.seg[1];
6764
6765   return 1;
6766 }
6767
6768 static int
6769 process_suffix (void)
6770 {
6771   bfd_boolean is_crc32 = FALSE;
6772
6773   /* If matched instruction specifies an explicit instruction mnemonic
6774      suffix, use it.  */
6775   if (i.tm.opcode_modifier.size == SIZE16)
6776     i.suffix = WORD_MNEM_SUFFIX;
6777   else if (i.tm.opcode_modifier.size == SIZE32)
6778     i.suffix = LONG_MNEM_SUFFIX;
6779   else if (i.tm.opcode_modifier.size == SIZE64)
6780     i.suffix = QWORD_MNEM_SUFFIX;
6781   else if (i.reg_operands
6782            && (i.operands > 1 || i.types[0].bitfield.class == Reg)
6783            && !i.tm.opcode_modifier.addrprefixopreg)
6784     {
6785       unsigned int numop = i.operands;
6786       /* CRC32 */
6787       is_crc32 = (i.tm.base_opcode == 0xf38f0
6788                   && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2);
6789
6790       /* movsx/movzx want only their source operand considered here, for the
6791          ambiguity checking below.  The suffix will be replaced afterwards
6792          to represent the destination (register).  */
6793       if (((i.tm.base_opcode | 8) == 0xfbe && i.tm.opcode_modifier.w)
6794           || (i.tm.base_opcode == 0x63 && i.tm.cpu_flags.bitfield.cpu64))
6795         --i.operands;
6796
6797       /* crc32 needs REX.W set regardless of suffix / source operand size.  */
6798       if (is_crc32 && i.tm.operand_types[1].bitfield.qword)
6799         i.rex |= REX_W;
6800
6801       /* If there's no instruction mnemonic suffix we try to invent one
6802          based on GPR operands.  */
6803       if (!i.suffix)
6804         {
6805           /* We take i.suffix from the last register operand specified,
6806              Destination register type is more significant than source
6807              register type.  crc32 in SSE4.2 prefers source register
6808              type. */
6809           unsigned int op = is_crc32 ? 1 : i.operands;
6810
6811           while (op--)
6812             if (i.tm.operand_types[op].bitfield.instance == InstanceNone
6813                 || i.tm.operand_types[op].bitfield.instance == Accum)
6814               {
6815                 if (i.types[op].bitfield.class != Reg)
6816                   continue;
6817                 if (i.types[op].bitfield.byte)
6818                   i.suffix = BYTE_MNEM_SUFFIX;
6819                 else if (i.types[op].bitfield.word)
6820                   i.suffix = WORD_MNEM_SUFFIX;
6821                 else if (i.types[op].bitfield.dword)
6822                   i.suffix = LONG_MNEM_SUFFIX;
6823                 else if (i.types[op].bitfield.qword)
6824                   i.suffix = QWORD_MNEM_SUFFIX;
6825                 else
6826                   continue;
6827                 break;
6828               }
6829
6830           /* As an exception, movsx/movzx silently default to a byte source
6831              in AT&T mode.  */
6832           if ((i.tm.base_opcode | 8) == 0xfbe && i.tm.opcode_modifier.w
6833               && !i.suffix && !intel_syntax)
6834             i.suffix = BYTE_MNEM_SUFFIX;
6835         }
6836       else if (i.suffix == BYTE_MNEM_SUFFIX)
6837         {
6838           if (intel_syntax
6839               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
6840               && i.tm.opcode_modifier.no_bsuf)
6841             i.suffix = 0;
6842           else if (!check_byte_reg ())
6843             return 0;
6844         }
6845       else if (i.suffix == LONG_MNEM_SUFFIX)
6846         {
6847           if (intel_syntax
6848               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
6849               && i.tm.opcode_modifier.no_lsuf
6850               && !i.tm.opcode_modifier.todword
6851               && !i.tm.opcode_modifier.toqword)
6852             i.suffix = 0;
6853           else if (!check_long_reg ())
6854             return 0;
6855         }
6856       else if (i.suffix == QWORD_MNEM_SUFFIX)
6857         {
6858           if (intel_syntax
6859               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
6860               && i.tm.opcode_modifier.no_qsuf
6861               && !i.tm.opcode_modifier.todword
6862               && !i.tm.opcode_modifier.toqword)
6863             i.suffix = 0;
6864           else if (!check_qword_reg ())
6865             return 0;
6866         }
6867       else if (i.suffix == WORD_MNEM_SUFFIX)
6868         {
6869           if (intel_syntax
6870               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
6871               && i.tm.opcode_modifier.no_wsuf)
6872             i.suffix = 0;
6873           else if (!check_word_reg ())
6874             return 0;
6875         }
6876       else if (intel_syntax
6877                && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
6878         /* Do nothing if the instruction is going to ignore the prefix.  */
6879         ;
6880       else
6881         abort ();
6882
6883       /* Undo the movsx/movzx change done above.  */
6884       i.operands = numop;
6885     }
6886   else if (i.tm.opcode_modifier.mnemonicsize == DEFAULTSIZE
6887            && !i.suffix)
6888     {
6889       i.suffix = stackop_size;
6890       if (stackop_size == LONG_MNEM_SUFFIX)
6891         {
6892           /* stackop_size is set to LONG_MNEM_SUFFIX for the
6893              .code16gcc directive to support 16-bit mode with
6894              32-bit address.  For IRET without a suffix, generate
6895              16-bit IRET (opcode 0xcf) to return from an interrupt
6896              handler.  */
6897           if (i.tm.base_opcode == 0xcf)
6898             {
6899               i.suffix = WORD_MNEM_SUFFIX;
6900               as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
6901             }
6902           /* Warn about changed behavior for segment register push/pop.  */
6903           else if ((i.tm.base_opcode | 1) == 0x07)
6904             as_warn (_("generating 32-bit `%s', unlike earlier gas versions"),
6905                      i.tm.name);
6906         }
6907     }
6908   else if (!i.suffix
6909            && (i.tm.opcode_modifier.jump == JUMP_ABSOLUTE
6910                || i.tm.opcode_modifier.jump == JUMP_BYTE
6911                || i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT
6912                || (i.tm.base_opcode == 0x0f01 /* [ls][gi]dt */
6913                    && i.tm.extension_opcode <= 3)))
6914     {
6915       switch (flag_code)
6916         {
6917         case CODE_64BIT:
6918           if (!i.tm.opcode_modifier.no_qsuf)
6919             {
6920               if (i.tm.opcode_modifier.jump == JUMP_BYTE
6921                   || i.tm.opcode_modifier.no_lsuf)
6922                 i.suffix = QWORD_MNEM_SUFFIX;
6923               break;
6924             }
6925           /* Fall through.  */
6926         case CODE_32BIT:
6927           if (!i.tm.opcode_modifier.no_lsuf)
6928             i.suffix = LONG_MNEM_SUFFIX;
6929           break;
6930         case CODE_16BIT:
6931           if (!i.tm.opcode_modifier.no_wsuf)
6932             i.suffix = WORD_MNEM_SUFFIX;
6933           break;
6934         }
6935     }
6936
6937   if (!i.suffix
6938       && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
6939           /* Also cover lret/retf/iret in 64-bit mode.  */
6940           || (flag_code == CODE_64BIT
6941               && !i.tm.opcode_modifier.no_lsuf
6942               && !i.tm.opcode_modifier.no_qsuf))
6943       && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
6944       /* Explicit sizing prefixes are assumed to disambiguate insns.  */
6945       && !i.prefix[DATA_PREFIX] && !(i.prefix[REX_PREFIX] & REX_W)
6946       /* Accept FLDENV et al without suffix.  */
6947       && (i.tm.opcode_modifier.no_ssuf || i.tm.opcode_modifier.floatmf))
6948     {
6949       unsigned int suffixes, evex = 0;
6950
6951       suffixes = !i.tm.opcode_modifier.no_bsuf;
6952       if (!i.tm.opcode_modifier.no_wsuf)
6953         suffixes |= 1 << 1;
6954       if (!i.tm.opcode_modifier.no_lsuf)
6955         suffixes |= 1 << 2;
6956       if (!i.tm.opcode_modifier.no_ldsuf)
6957         suffixes |= 1 << 3;
6958       if (!i.tm.opcode_modifier.no_ssuf)
6959         suffixes |= 1 << 4;
6960       if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf)
6961         suffixes |= 1 << 5;
6962
6963       /* For [XYZ]MMWORD operands inspect operand sizes.  While generally
6964          also suitable for AT&T syntax mode, it was requested that this be
6965          restricted to just Intel syntax.  */
6966       if (intel_syntax && is_any_vex_encoding (&i.tm) && !i.broadcast)
6967         {
6968           unsigned int op;
6969
6970           for (op = 0; op < i.tm.operands; ++op)
6971             {
6972               if (is_evex_encoding (&i.tm)
6973                   && !cpu_arch_flags.bitfield.cpuavx512vl)
6974                 {
6975                   if (i.tm.operand_types[op].bitfield.ymmword)
6976                     i.tm.operand_types[op].bitfield.xmmword = 0;
6977                   if (i.tm.operand_types[op].bitfield.zmmword)
6978                     i.tm.operand_types[op].bitfield.ymmword = 0;
6979                   if (!i.tm.opcode_modifier.evex
6980                       || i.tm.opcode_modifier.evex == EVEXDYN)
6981                     i.tm.opcode_modifier.evex = EVEX512;
6982                 }
6983
6984               if (i.tm.operand_types[op].bitfield.xmmword
6985                   + i.tm.operand_types[op].bitfield.ymmword
6986                   + i.tm.operand_types[op].bitfield.zmmword < 2)
6987                 continue;
6988
6989               /* Any properly sized operand disambiguates the insn.  */
6990               if (i.types[op].bitfield.xmmword
6991                   || i.types[op].bitfield.ymmword
6992                   || i.types[op].bitfield.zmmword)
6993                 {
6994                   suffixes &= ~(7 << 6);
6995                   evex = 0;
6996                   break;
6997                 }
6998
6999               if ((i.flags[op] & Operand_Mem)
7000                   && i.tm.operand_types[op].bitfield.unspecified)
7001                 {
7002                   if (i.tm.operand_types[op].bitfield.xmmword)
7003                     suffixes |= 1 << 6;
7004                   if (i.tm.operand_types[op].bitfield.ymmword)
7005                     suffixes |= 1 << 7;
7006                   if (i.tm.operand_types[op].bitfield.zmmword)
7007                     suffixes |= 1 << 8;
7008                   if (is_evex_encoding (&i.tm))
7009                     evex = EVEX512;
7010                 }
7011             }
7012         }
7013
7014       /* Are multiple suffixes / operand sizes allowed?  */
7015       if (suffixes & (suffixes - 1))
7016         {
7017           if (intel_syntax
7018               && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7019                   || operand_check == check_error))
7020             {
7021               as_bad (_("ambiguous operand size for `%s'"), i.tm.name);
7022               return 0;
7023             }
7024           if (operand_check == check_error)
7025             {
7026               as_bad (_("no instruction mnemonic suffix given and "
7027                         "no register operands; can't size `%s'"), i.tm.name);
7028               return 0;
7029             }
7030           if (operand_check == check_warning)
7031             as_warn (_("%s; using default for `%s'"),
7032                        intel_syntax
7033                        ? _("ambiguous operand size")
7034                        : _("no instruction mnemonic suffix given and "
7035                            "no register operands"),
7036                        i.tm.name);
7037
7038           if (i.tm.opcode_modifier.floatmf)
7039             i.suffix = SHORT_MNEM_SUFFIX;
7040           else if ((i.tm.base_opcode | 8) == 0xfbe
7041                    || (i.tm.base_opcode == 0x63
7042                        && i.tm.cpu_flags.bitfield.cpu64))
7043             /* handled below */;
7044           else if (evex)
7045             i.tm.opcode_modifier.evex = evex;
7046           else if (flag_code == CODE_16BIT)
7047             i.suffix = WORD_MNEM_SUFFIX;
7048           else if (!i.tm.opcode_modifier.no_lsuf)
7049             i.suffix = LONG_MNEM_SUFFIX;
7050           else
7051             i.suffix = QWORD_MNEM_SUFFIX;
7052         }
7053     }
7054
7055   if ((i.tm.base_opcode | 8) == 0xfbe
7056       || (i.tm.base_opcode == 0x63 && i.tm.cpu_flags.bitfield.cpu64))
7057     {
7058       /* In Intel syntax, movsx/movzx must have a "suffix" (checked above).
7059          In AT&T syntax, if there is no suffix (warned about above), the default
7060          will be byte extension.  */
7061       if (i.tm.opcode_modifier.w && i.suffix && i.suffix != BYTE_MNEM_SUFFIX)
7062         i.tm.base_opcode |= 1;
7063
7064       /* For further processing, the suffix should represent the destination
7065          (register).  This is already the case when one was used with
7066          mov[sz][bw]*, but we need to replace it for mov[sz]x, or if there was
7067          no suffix to begin with.  */
7068       if (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63 || !i.suffix)
7069         {
7070           if (i.types[1].bitfield.word)
7071             i.suffix = WORD_MNEM_SUFFIX;
7072           else if (i.types[1].bitfield.qword)
7073             i.suffix = QWORD_MNEM_SUFFIX;
7074           else
7075             i.suffix = LONG_MNEM_SUFFIX;
7076
7077           i.tm.opcode_modifier.w = 0;
7078         }
7079     }
7080
7081   if (!i.tm.opcode_modifier.modrm && i.reg_operands && i.tm.operands < 3)
7082     i.short_form = (i.tm.operand_types[0].bitfield.class == Reg)
7083                    != (i.tm.operand_types[1].bitfield.class == Reg);
7084
7085   /* Change the opcode based on the operand size given by i.suffix.  */
7086   switch (i.suffix)
7087     {
7088     /* Size floating point instruction.  */
7089     case LONG_MNEM_SUFFIX:
7090       if (i.tm.opcode_modifier.floatmf)
7091         {
7092           i.tm.base_opcode ^= 4;
7093           break;
7094         }
7095     /* fall through */
7096     case WORD_MNEM_SUFFIX:
7097     case QWORD_MNEM_SUFFIX:
7098       /* It's not a byte, select word/dword operation.  */
7099       if (i.tm.opcode_modifier.w)
7100         {
7101           if (i.short_form)
7102             i.tm.base_opcode |= 8;
7103           else
7104             i.tm.base_opcode |= 1;
7105         }
7106     /* fall through */
7107     case SHORT_MNEM_SUFFIX:
7108       /* Now select between word & dword operations via the operand
7109          size prefix, except for instructions that will ignore this
7110          prefix anyway.  */
7111       if (i.suffix != QWORD_MNEM_SUFFIX
7112           && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7113           && !i.tm.opcode_modifier.floatmf
7114           && !is_any_vex_encoding (&i.tm)
7115           && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
7116               || (flag_code == CODE_64BIT
7117                   && i.tm.opcode_modifier.jump == JUMP_BYTE)))
7118         {
7119           unsigned int prefix = DATA_PREFIX_OPCODE;
7120
7121           if (i.tm.opcode_modifier.jump == JUMP_BYTE) /* jcxz, loop */
7122             prefix = ADDR_PREFIX_OPCODE;
7123
7124           if (!add_prefix (prefix))
7125             return 0;
7126         }
7127
7128       /* Set mode64 for an operand.  */
7129       if (i.suffix == QWORD_MNEM_SUFFIX
7130           && flag_code == CODE_64BIT
7131           && !i.tm.opcode_modifier.norex64
7132           && !i.tm.opcode_modifier.vexw
7133           /* Special case for xchg %rax,%rax.  It is NOP and doesn't
7134              need rex64. */
7135           && ! (i.operands == 2
7136                 && i.tm.base_opcode == 0x90
7137                 && i.tm.extension_opcode == None
7138                 && i.types[0].bitfield.instance == Accum
7139                 && i.types[0].bitfield.qword
7140                 && i.types[1].bitfield.instance == Accum
7141                 && i.types[1].bitfield.qword))
7142         i.rex |= REX_W;
7143
7144       break;
7145
7146     case 0:
7147       /* Select word/dword/qword operation with explict data sizing prefix
7148          when there are no suitable register operands.  */
7149       if (i.tm.opcode_modifier.w
7150           && (i.prefix[DATA_PREFIX] || (i.prefix[REX_PREFIX] & REX_W))
7151           && (!i.reg_operands
7152               || (i.reg_operands == 1
7153                       /* ShiftCount */
7154                   && (i.tm.operand_types[0].bitfield.instance == RegC
7155                       /* InOutPortReg */
7156                       || i.tm.operand_types[0].bitfield.instance == RegD
7157                       || i.tm.operand_types[1].bitfield.instance == RegD
7158                       /* CRC32 */
7159                       || is_crc32))))
7160         i.tm.base_opcode |= 1;
7161       break;
7162     }
7163
7164   if (i.tm.opcode_modifier.addrprefixopreg)
7165     {
7166       gas_assert (!i.suffix);
7167       gas_assert (i.reg_operands);
7168
7169       if (i.tm.operand_types[0].bitfield.instance == Accum
7170           || i.operands == 1)
7171         {
7172           /* The address size override prefix changes the size of the
7173              first operand.  */
7174           if (flag_code == CODE_64BIT
7175               && i.op[0].regs->reg_type.bitfield.word)
7176             {
7177               as_bad (_("16-bit addressing unavailable for `%s'"),
7178                       i.tm.name);
7179               return 0;
7180             }
7181
7182           if ((flag_code == CODE_32BIT
7183                ? i.op[0].regs->reg_type.bitfield.word
7184                : i.op[0].regs->reg_type.bitfield.dword)
7185               && !add_prefix (ADDR_PREFIX_OPCODE))
7186             return 0;
7187         }
7188       else
7189         {
7190           /* Check invalid register operand when the address size override
7191              prefix changes the size of register operands.  */
7192           unsigned int op;
7193           enum { need_word, need_dword, need_qword } need;
7194
7195           /* Check the register operand for the address size prefix if
7196              the memory operand has no real registers, like symbol, DISP
7197              or symbol(%rip).  */
7198           if (i.mem_operands == 1
7199               && i.reg_operands == 1
7200               && i.operands == 2
7201               && i.types[1].bitfield.class == Reg
7202               && (flag_code == CODE_32BIT
7203                   ? i.op[1].regs->reg_type.bitfield.word
7204                   : i.op[1].regs->reg_type.bitfield.dword)
7205               && ((i.base_reg == NULL && i.index_reg == NULL)
7206                   || (i.base_reg
7207                       && i.base_reg->reg_num == RegIP
7208                       && i.base_reg->reg_type.bitfield.qword))
7209               && !add_prefix (ADDR_PREFIX_OPCODE))
7210             return 0;
7211
7212           if (flag_code == CODE_32BIT)
7213             need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
7214           else if (i.prefix[ADDR_PREFIX])
7215             need = need_dword;
7216           else
7217             need = flag_code == CODE_64BIT ? need_qword : need_word;
7218
7219           for (op = 0; op < i.operands; op++)
7220             {
7221               if (i.types[op].bitfield.class != Reg)
7222                 continue;
7223
7224               switch (need)
7225                 {
7226                 case need_word:
7227                   if (i.op[op].regs->reg_type.bitfield.word)
7228                     continue;
7229                   break;
7230                 case need_dword:
7231                   if (i.op[op].regs->reg_type.bitfield.dword)
7232                     continue;
7233                   break;
7234                 case need_qword:
7235                   if (i.op[op].regs->reg_type.bitfield.qword)
7236                     continue;
7237                   break;
7238                 }
7239
7240               as_bad (_("invalid register operand size for `%s'"),
7241                       i.tm.name);
7242               return 0;
7243             }
7244         }
7245     }
7246
7247   return 1;
7248 }
7249
7250 static int
7251 check_byte_reg (void)
7252 {
7253   int op;
7254
7255   for (op = i.operands; --op >= 0;)
7256     {
7257       /* Skip non-register operands. */
7258       if (i.types[op].bitfield.class != Reg)
7259         continue;
7260
7261       /* If this is an eight bit register, it's OK.  If it's the 16 or
7262          32 bit version of an eight bit register, we will just use the
7263          low portion, and that's OK too.  */
7264       if (i.types[op].bitfield.byte)
7265         continue;
7266
7267       /* I/O port address operands are OK too.  */
7268       if (i.tm.operand_types[op].bitfield.instance == RegD
7269           && i.tm.operand_types[op].bitfield.word)
7270         continue;
7271
7272       /* crc32 only wants its source operand checked here.  */
7273       if (i.tm.base_opcode == 0xf38f0
7274           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2
7275           && op != 0)
7276         continue;
7277
7278       /* Any other register is bad.  */
7279       as_bad (_("`%s%s' not allowed with `%s%c'"),
7280               register_prefix, i.op[op].regs->reg_name,
7281               i.tm.name, i.suffix);
7282       return 0;
7283     }
7284   return 1;
7285 }
7286
7287 static int
7288 check_long_reg (void)
7289 {
7290   int op;
7291
7292   for (op = i.operands; --op >= 0;)
7293     /* Skip non-register operands. */
7294     if (i.types[op].bitfield.class != Reg)
7295       continue;
7296     /* Reject eight bit registers, except where the template requires
7297        them. (eg. movzb)  */
7298     else if (i.types[op].bitfield.byte
7299              && (i.tm.operand_types[op].bitfield.class == Reg
7300                  || i.tm.operand_types[op].bitfield.instance == Accum)
7301              && (i.tm.operand_types[op].bitfield.word
7302                  || i.tm.operand_types[op].bitfield.dword))
7303       {
7304         as_bad (_("`%s%s' not allowed with `%s%c'"),
7305                 register_prefix,
7306                 i.op[op].regs->reg_name,
7307                 i.tm.name,
7308                 i.suffix);
7309         return 0;
7310       }
7311     /* Error if the e prefix on a general reg is missing.  */
7312     else if (i.types[op].bitfield.word
7313              && (i.tm.operand_types[op].bitfield.class == Reg
7314                  || i.tm.operand_types[op].bitfield.instance == Accum)
7315              && i.tm.operand_types[op].bitfield.dword)
7316       {
7317         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7318                 register_prefix, i.op[op].regs->reg_name,
7319                 i.suffix);
7320         return 0;
7321       }
7322     /* Warn if the r prefix on a general reg is present.  */
7323     else if (i.types[op].bitfield.qword
7324              && (i.tm.operand_types[op].bitfield.class == Reg
7325                  || i.tm.operand_types[op].bitfield.instance == Accum)
7326              && i.tm.operand_types[op].bitfield.dword)
7327       {
7328         if (intel_syntax
7329             && i.tm.opcode_modifier.toqword
7330             && i.types[0].bitfield.class != RegSIMD)
7331           {
7332             /* Convert to QWORD.  We want REX byte. */
7333             i.suffix = QWORD_MNEM_SUFFIX;
7334           }
7335         else
7336           {
7337             as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7338                     register_prefix, i.op[op].regs->reg_name,
7339                     i.suffix);
7340             return 0;
7341           }
7342       }
7343   return 1;
7344 }
7345
7346 static int
7347 check_qword_reg (void)
7348 {
7349   int op;
7350
7351   for (op = i.operands; --op >= 0; )
7352     /* Skip non-register operands. */
7353     if (i.types[op].bitfield.class != Reg)
7354       continue;
7355     /* Reject eight bit registers, except where the template requires
7356        them. (eg. movzb)  */
7357     else if (i.types[op].bitfield.byte
7358              && (i.tm.operand_types[op].bitfield.class == Reg
7359                  || i.tm.operand_types[op].bitfield.instance == Accum)
7360              && (i.tm.operand_types[op].bitfield.word
7361                  || i.tm.operand_types[op].bitfield.dword))
7362       {
7363         as_bad (_("`%s%s' not allowed with `%s%c'"),
7364                 register_prefix,
7365                 i.op[op].regs->reg_name,
7366                 i.tm.name,
7367                 i.suffix);
7368         return 0;
7369       }
7370     /* Warn if the r prefix on a general reg is missing.  */
7371     else if ((i.types[op].bitfield.word
7372               || i.types[op].bitfield.dword)
7373              && (i.tm.operand_types[op].bitfield.class == Reg
7374                  || i.tm.operand_types[op].bitfield.instance == Accum)
7375              && i.tm.operand_types[op].bitfield.qword)
7376       {
7377         /* Prohibit these changes in the 64bit mode, since the
7378            lowering is more complicated.  */
7379         if (intel_syntax
7380             && i.tm.opcode_modifier.todword
7381             && i.types[0].bitfield.class != RegSIMD)
7382           {
7383             /* Convert to DWORD.  We don't want REX byte. */
7384             i.suffix = LONG_MNEM_SUFFIX;
7385           }
7386         else
7387           {
7388             as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7389                     register_prefix, i.op[op].regs->reg_name,
7390                     i.suffix);
7391             return 0;
7392           }
7393       }
7394   return 1;
7395 }
7396
7397 static int
7398 check_word_reg (void)
7399 {
7400   int op;
7401   for (op = i.operands; --op >= 0;)
7402     /* Skip non-register operands. */
7403     if (i.types[op].bitfield.class != Reg)
7404       continue;
7405     /* Reject eight bit registers, except where the template requires
7406        them. (eg. movzb)  */
7407     else if (i.types[op].bitfield.byte
7408              && (i.tm.operand_types[op].bitfield.class == Reg
7409                  || i.tm.operand_types[op].bitfield.instance == Accum)
7410              && (i.tm.operand_types[op].bitfield.word
7411                  || i.tm.operand_types[op].bitfield.dword))
7412       {
7413         as_bad (_("`%s%s' not allowed with `%s%c'"),
7414                 register_prefix,
7415                 i.op[op].regs->reg_name,
7416                 i.tm.name,
7417                 i.suffix);
7418         return 0;
7419       }
7420     /* Error if the e or r prefix on a general reg is present.  */
7421     else if ((i.types[op].bitfield.dword
7422                  || i.types[op].bitfield.qword)
7423              && (i.tm.operand_types[op].bitfield.class == Reg
7424                  || i.tm.operand_types[op].bitfield.instance == Accum)
7425              && i.tm.operand_types[op].bitfield.word)
7426       {
7427         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7428                 register_prefix, i.op[op].regs->reg_name,
7429                 i.suffix);
7430         return 0;
7431       }
7432   return 1;
7433 }
7434
7435 static int
7436 update_imm (unsigned int j)
7437 {
7438   i386_operand_type overlap = i.types[j];
7439   if ((overlap.bitfield.imm8
7440        || overlap.bitfield.imm8s
7441        || overlap.bitfield.imm16
7442        || overlap.bitfield.imm32
7443        || overlap.bitfield.imm32s
7444        || overlap.bitfield.imm64)
7445       && !operand_type_equal (&overlap, &imm8)
7446       && !operand_type_equal (&overlap, &imm8s)
7447       && !operand_type_equal (&overlap, &imm16)
7448       && !operand_type_equal (&overlap, &imm32)
7449       && !operand_type_equal (&overlap, &imm32s)
7450       && !operand_type_equal (&overlap, &imm64))
7451     {
7452       if (i.suffix)
7453         {
7454           i386_operand_type temp;
7455
7456           operand_type_set (&temp, 0);
7457           if (i.suffix == BYTE_MNEM_SUFFIX)
7458             {
7459               temp.bitfield.imm8 = overlap.bitfield.imm8;
7460               temp.bitfield.imm8s = overlap.bitfield.imm8s;
7461             }
7462           else if (i.suffix == WORD_MNEM_SUFFIX)
7463             temp.bitfield.imm16 = overlap.bitfield.imm16;
7464           else if (i.suffix == QWORD_MNEM_SUFFIX)
7465             {
7466               temp.bitfield.imm64 = overlap.bitfield.imm64;
7467               temp.bitfield.imm32s = overlap.bitfield.imm32s;
7468             }
7469           else
7470             temp.bitfield.imm32 = overlap.bitfield.imm32;
7471           overlap = temp;
7472         }
7473       else if (operand_type_equal (&overlap, &imm16_32_32s)
7474                || operand_type_equal (&overlap, &imm16_32)
7475                || operand_type_equal (&overlap, &imm16_32s))
7476         {
7477           if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
7478             overlap = imm16;
7479           else
7480             overlap = imm32s;
7481         }
7482       else if (i.prefix[REX_PREFIX] & REX_W)
7483         overlap = operand_type_and (overlap, imm32s);
7484       else if (i.prefix[DATA_PREFIX])
7485         overlap = operand_type_and (overlap,
7486                                     flag_code != CODE_16BIT ? imm16 : imm32);
7487       if (!operand_type_equal (&overlap, &imm8)
7488           && !operand_type_equal (&overlap, &imm8s)
7489           && !operand_type_equal (&overlap, &imm16)
7490           && !operand_type_equal (&overlap, &imm32)
7491           && !operand_type_equal (&overlap, &imm32s)
7492           && !operand_type_equal (&overlap, &imm64))
7493         {
7494           as_bad (_("no instruction mnemonic suffix given; "
7495                     "can't determine immediate size"));
7496           return 0;
7497         }
7498     }
7499   i.types[j] = overlap;
7500
7501   return 1;
7502 }
7503
7504 static int
7505 finalize_imm (void)
7506 {
7507   unsigned int j, n;
7508
7509   /* Update the first 2 immediate operands.  */
7510   n = i.operands > 2 ? 2 : i.operands;
7511   if (n)
7512     {
7513       for (j = 0; j < n; j++)
7514         if (update_imm (j) == 0)
7515           return 0;
7516
7517       /* The 3rd operand can't be immediate operand.  */
7518       gas_assert (operand_type_check (i.types[2], imm) == 0);
7519     }
7520
7521   return 1;
7522 }
7523
7524 static int
7525 process_operands (void)
7526 {
7527   /* Default segment register this instruction will use for memory
7528      accesses.  0 means unknown.  This is only for optimizing out
7529      unnecessary segment overrides.  */
7530   const seg_entry *default_seg = 0;
7531
7532   if (i.tm.opcode_modifier.sse2avx)
7533     {
7534       /* Legacy encoded insns allow explicit REX prefixes, so these prefixes
7535          need converting.  */
7536       i.rex |= i.prefix[REX_PREFIX] & (REX_W | REX_R | REX_X | REX_B);
7537       i.prefix[REX_PREFIX] = 0;
7538       i.rex_encoding = 0;
7539     }
7540   /* ImmExt should be processed after SSE2AVX.  */
7541   else if (i.tm.opcode_modifier.immext)
7542     process_immext ();
7543
7544   if (i.tm.opcode_modifier.sse2avx && i.tm.opcode_modifier.vexvvvv)
7545     {
7546       unsigned int dupl = i.operands;
7547       unsigned int dest = dupl - 1;
7548       unsigned int j;
7549
7550       /* The destination must be an xmm register.  */
7551       gas_assert (i.reg_operands
7552                   && MAX_OPERANDS > dupl
7553                   && operand_type_equal (&i.types[dest], &regxmm));
7554
7555       if (i.tm.operand_types[0].bitfield.instance == Accum
7556           && i.tm.operand_types[0].bitfield.xmmword)
7557         {
7558           if (i.tm.opcode_modifier.vexsources == VEX3SOURCES)
7559             {
7560               /* Keep xmm0 for instructions with VEX prefix and 3
7561                  sources.  */
7562               i.tm.operand_types[0].bitfield.instance = InstanceNone;
7563               i.tm.operand_types[0].bitfield.class = RegSIMD;
7564               goto duplicate;
7565             }
7566           else
7567             {
7568               /* We remove the first xmm0 and keep the number of
7569                  operands unchanged, which in fact duplicates the
7570                  destination.  */
7571               for (j = 1; j < i.operands; j++)
7572                 {
7573                   i.op[j - 1] = i.op[j];
7574                   i.types[j - 1] = i.types[j];
7575                   i.tm.operand_types[j - 1] = i.tm.operand_types[j];
7576                   i.flags[j - 1] = i.flags[j];
7577                 }
7578             }
7579         }
7580       else if (i.tm.opcode_modifier.implicit1stxmm0)
7581         {
7582           gas_assert ((MAX_OPERANDS - 1) > dupl
7583                       && (i.tm.opcode_modifier.vexsources
7584                           == VEX3SOURCES));
7585
7586           /* Add the implicit xmm0 for instructions with VEX prefix
7587              and 3 sources.  */
7588           for (j = i.operands; j > 0; j--)
7589             {
7590               i.op[j] = i.op[j - 1];
7591               i.types[j] = i.types[j - 1];
7592               i.tm.operand_types[j] = i.tm.operand_types[j - 1];
7593               i.flags[j] = i.flags[j - 1];
7594             }
7595           i.op[0].regs
7596             = (const reg_entry *) str_hash_find (reg_hash, "xmm0");
7597           i.types[0] = regxmm;
7598           i.tm.operand_types[0] = regxmm;
7599
7600           i.operands += 2;
7601           i.reg_operands += 2;
7602           i.tm.operands += 2;
7603
7604           dupl++;
7605           dest++;
7606           i.op[dupl] = i.op[dest];
7607           i.types[dupl] = i.types[dest];
7608           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
7609           i.flags[dupl] = i.flags[dest];
7610         }
7611       else
7612         {
7613         duplicate:
7614           i.operands++;
7615           i.reg_operands++;
7616           i.tm.operands++;
7617
7618           i.op[dupl] = i.op[dest];
7619           i.types[dupl] = i.types[dest];
7620           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
7621           i.flags[dupl] = i.flags[dest];
7622         }
7623
7624        if (i.tm.opcode_modifier.immext)
7625          process_immext ();
7626     }
7627   else if (i.tm.operand_types[0].bitfield.instance == Accum
7628            && i.tm.operand_types[0].bitfield.xmmword)
7629     {
7630       unsigned int j;
7631
7632       for (j = 1; j < i.operands; j++)
7633         {
7634           i.op[j - 1] = i.op[j];
7635           i.types[j - 1] = i.types[j];
7636
7637           /* We need to adjust fields in i.tm since they are used by
7638              build_modrm_byte.  */
7639           i.tm.operand_types [j - 1] = i.tm.operand_types [j];
7640
7641           i.flags[j - 1] = i.flags[j];
7642         }
7643
7644       i.operands--;
7645       i.reg_operands--;
7646       i.tm.operands--;
7647     }
7648   else if (i.tm.opcode_modifier.implicitquadgroup)
7649     {
7650       unsigned int regnum, first_reg_in_group, last_reg_in_group;
7651
7652       /* The second operand must be {x,y,z}mmN, where N is a multiple of 4. */
7653       gas_assert (i.operands >= 2 && i.types[1].bitfield.class == RegSIMD);
7654       regnum = register_number (i.op[1].regs);
7655       first_reg_in_group = regnum & ~3;
7656       last_reg_in_group = first_reg_in_group + 3;
7657       if (regnum != first_reg_in_group)
7658         as_warn (_("source register `%s%s' implicitly denotes"
7659                    " `%s%.3s%u' to `%s%.3s%u' source group in `%s'"),
7660                  register_prefix, i.op[1].regs->reg_name,
7661                  register_prefix, i.op[1].regs->reg_name, first_reg_in_group,
7662                  register_prefix, i.op[1].regs->reg_name, last_reg_in_group,
7663                  i.tm.name);
7664     }
7665   else if (i.tm.opcode_modifier.regkludge)
7666     {
7667       /* The imul $imm, %reg instruction is converted into
7668          imul $imm, %reg, %reg, and the clr %reg instruction
7669          is converted into xor %reg, %reg.  */
7670
7671       unsigned int first_reg_op;
7672
7673       if (operand_type_check (i.types[0], reg))
7674         first_reg_op = 0;
7675       else
7676         first_reg_op = 1;
7677       /* Pretend we saw the extra register operand.  */
7678       gas_assert (i.reg_operands == 1
7679                   && i.op[first_reg_op + 1].regs == 0);
7680       i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs;
7681       i.types[first_reg_op + 1] = i.types[first_reg_op];
7682       i.operands++;
7683       i.reg_operands++;
7684     }
7685
7686   if (i.tm.opcode_modifier.modrm)
7687     {
7688       /* The opcode is completed (modulo i.tm.extension_opcode which
7689          must be put into the modrm byte).  Now, we make the modrm and
7690          index base bytes based on all the info we've collected.  */
7691
7692       default_seg = build_modrm_byte ();
7693     }
7694   else if (i.types[0].bitfield.class == SReg)
7695     {
7696       if (flag_code != CODE_64BIT
7697           ? i.tm.base_opcode == POP_SEG_SHORT
7698             && i.op[0].regs->reg_num == 1
7699           : (i.tm.base_opcode | 1) == POP_SEG386_SHORT
7700             && i.op[0].regs->reg_num < 4)
7701         {
7702           as_bad (_("you can't `%s %s%s'"),
7703                   i.tm.name, register_prefix, i.op[0].regs->reg_name);
7704           return 0;
7705         }
7706       if ( i.op[0].regs->reg_num > 3 && i.tm.opcode_length == 1 )
7707         {
7708           i.tm.base_opcode ^= POP_SEG_SHORT ^ POP_SEG386_SHORT;
7709           i.tm.opcode_length = 2;
7710         }
7711       i.tm.base_opcode |= (i.op[0].regs->reg_num << 3);
7712     }
7713   else if ((i.tm.base_opcode & ~0x3) == MOV_AX_DISP32)
7714     {
7715       default_seg = &ds;
7716     }
7717   else if (i.tm.opcode_modifier.isstring)
7718     {
7719       /* For the string instructions that allow a segment override
7720          on one of their operands, the default segment is ds.  */
7721       default_seg = &ds;
7722     }
7723   else if (i.short_form)
7724     {
7725       /* The register or float register operand is in operand
7726          0 or 1.  */
7727       unsigned int op = i.tm.operand_types[0].bitfield.class != Reg;
7728
7729       /* Register goes in low 3 bits of opcode.  */
7730       i.tm.base_opcode |= i.op[op].regs->reg_num;
7731       if ((i.op[op].regs->reg_flags & RegRex) != 0)
7732         i.rex |= REX_B;
7733       if (!quiet_warnings && i.tm.opcode_modifier.ugh)
7734         {
7735           /* Warn about some common errors, but press on regardless.
7736              The first case can be generated by gcc (<= 2.8.1).  */
7737           if (i.operands == 2)
7738             {
7739               /* Reversed arguments on faddp, fsubp, etc.  */
7740               as_warn (_("translating to `%s %s%s,%s%s'"), i.tm.name,
7741                        register_prefix, i.op[!intel_syntax].regs->reg_name,
7742                        register_prefix, i.op[intel_syntax].regs->reg_name);
7743             }
7744           else
7745             {
7746               /* Extraneous `l' suffix on fp insn.  */
7747               as_warn (_("translating to `%s %s%s'"), i.tm.name,
7748                        register_prefix, i.op[0].regs->reg_name);
7749             }
7750         }
7751     }
7752
7753   if ((i.seg[0] || i.prefix[SEG_PREFIX])
7754       && i.tm.base_opcode == 0x8d /* lea */
7755       && !is_any_vex_encoding(&i.tm))
7756     {
7757       if (!quiet_warnings)
7758         as_warn (_("segment override on `%s' is ineffectual"), i.tm.name);
7759       if (optimize)
7760         {
7761           i.seg[0] = NULL;
7762           i.prefix[SEG_PREFIX] = 0;
7763         }
7764     }
7765
7766   /* If a segment was explicitly specified, and the specified segment
7767      is neither the default nor the one already recorded from a prefix,
7768      use an opcode prefix to select it.  If we never figured out what
7769      the default segment is, then default_seg will be zero at this
7770      point, and the specified segment prefix will always be used.  */
7771   if (i.seg[0]
7772       && i.seg[0] != default_seg
7773       && i.seg[0]->seg_prefix != i.prefix[SEG_PREFIX])
7774     {
7775       if (!add_prefix (i.seg[0]->seg_prefix))
7776         return 0;
7777     }
7778   return 1;
7779 }
7780
7781 static INLINE void set_rex_vrex (const reg_entry *r, unsigned int rex_bit,
7782                                  bfd_boolean do_sse2avx)
7783 {
7784   if (r->reg_flags & RegRex)
7785     {
7786       if (i.rex & rex_bit)
7787         as_bad (_("same type of prefix used twice"));
7788       i.rex |= rex_bit;
7789     }
7790   else if (do_sse2avx && (i.rex & rex_bit) && i.vex.register_specifier)
7791     {
7792       gas_assert (i.vex.register_specifier == r);
7793       i.vex.register_specifier += 8;
7794     }
7795
7796   if (r->reg_flags & RegVRex)
7797     i.vrex |= rex_bit;
7798 }
7799
7800 static const seg_entry *
7801 build_modrm_byte (void)
7802 {
7803   const seg_entry *default_seg = 0;
7804   unsigned int source, dest;
7805   int vex_3_sources;
7806
7807   vex_3_sources = i.tm.opcode_modifier.vexsources == VEX3SOURCES;
7808   if (vex_3_sources)
7809     {
7810       unsigned int nds, reg_slot;
7811       expressionS *exp;
7812
7813       dest = i.operands - 1;
7814       nds = dest - 1;
7815
7816       /* There are 2 kinds of instructions:
7817          1. 5 operands: 4 register operands or 3 register operands
7818          plus 1 memory operand plus one Imm4 operand, VexXDS, and
7819          VexW0 or VexW1.  The destination must be either XMM, YMM or
7820          ZMM register.
7821          2. 4 operands: 4 register operands or 3 register operands
7822          plus 1 memory operand, with VexXDS.  */
7823       gas_assert ((i.reg_operands == 4
7824                    || (i.reg_operands == 3 && i.mem_operands == 1))
7825                   && i.tm.opcode_modifier.vexvvvv == VEXXDS
7826                   && i.tm.opcode_modifier.vexw
7827                   && i.tm.operand_types[dest].bitfield.class == RegSIMD);
7828
7829       /* If VexW1 is set, the first non-immediate operand is the source and
7830          the second non-immediate one is encoded in the immediate operand.  */
7831       if (i.tm.opcode_modifier.vexw == VEXW1)
7832         {
7833           source = i.imm_operands;
7834           reg_slot = i.imm_operands + 1;
7835         }
7836       else
7837         {
7838           source = i.imm_operands + 1;
7839           reg_slot = i.imm_operands;
7840         }
7841
7842       if (i.imm_operands == 0)
7843         {
7844           /* When there is no immediate operand, generate an 8bit
7845              immediate operand to encode the first operand.  */
7846           exp = &im_expressions[i.imm_operands++];
7847           i.op[i.operands].imms = exp;
7848           i.types[i.operands] = imm8;
7849           i.operands++;
7850
7851           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
7852           exp->X_op = O_constant;
7853           exp->X_add_number = register_number (i.op[reg_slot].regs) << 4;
7854           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
7855         }
7856       else
7857         {
7858           gas_assert (i.imm_operands == 1);
7859           gas_assert (fits_in_imm4 (i.op[0].imms->X_add_number));
7860           gas_assert (!i.tm.opcode_modifier.immext);
7861
7862           /* Turn on Imm8 again so that output_imm will generate it.  */
7863           i.types[0].bitfield.imm8 = 1;
7864
7865           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
7866           i.op[0].imms->X_add_number
7867               |= register_number (i.op[reg_slot].regs) << 4;
7868           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
7869         }
7870
7871       gas_assert (i.tm.operand_types[nds].bitfield.class == RegSIMD);
7872       i.vex.register_specifier = i.op[nds].regs;
7873     }
7874   else
7875     source = dest = 0;
7876
7877   /* i.reg_operands MUST be the number of real register operands;
7878      implicit registers do not count.  If there are 3 register
7879      operands, it must be a instruction with VexNDS.  For a
7880      instruction with VexNDD, the destination register is encoded
7881      in VEX prefix.  If there are 4 register operands, it must be
7882      a instruction with VEX prefix and 3 sources.  */
7883   if (i.mem_operands == 0
7884       && ((i.reg_operands == 2
7885            && i.tm.opcode_modifier.vexvvvv <= VEXXDS)
7886           || (i.reg_operands == 3
7887               && i.tm.opcode_modifier.vexvvvv == VEXXDS)
7888           || (i.reg_operands == 4 && vex_3_sources)))
7889     {
7890       switch (i.operands)
7891         {
7892         case 2:
7893           source = 0;
7894           break;
7895         case 3:
7896           /* When there are 3 operands, one of them may be immediate,
7897              which may be the first or the last operand.  Otherwise,
7898              the first operand must be shift count register (cl) or it
7899              is an instruction with VexNDS. */
7900           gas_assert (i.imm_operands == 1
7901                       || (i.imm_operands == 0
7902                           && (i.tm.opcode_modifier.vexvvvv == VEXXDS
7903                               || (i.types[0].bitfield.instance == RegC
7904                                   && i.types[0].bitfield.byte))));
7905           if (operand_type_check (i.types[0], imm)
7906               || (i.types[0].bitfield.instance == RegC
7907                   && i.types[0].bitfield.byte))
7908             source = 1;
7909           else
7910             source = 0;
7911           break;
7912         case 4:
7913           /* When there are 4 operands, the first two must be 8bit
7914              immediate operands. The source operand will be the 3rd
7915              one.
7916
7917              For instructions with VexNDS, if the first operand
7918              an imm8, the source operand is the 2nd one.  If the last
7919              operand is imm8, the source operand is the first one.  */
7920           gas_assert ((i.imm_operands == 2
7921                        && i.types[0].bitfield.imm8
7922                        && i.types[1].bitfield.imm8)
7923                       || (i.tm.opcode_modifier.vexvvvv == VEXXDS
7924                           && i.imm_operands == 1
7925                           && (i.types[0].bitfield.imm8
7926                               || i.types[i.operands - 1].bitfield.imm8
7927                               || i.rounding)));
7928           if (i.imm_operands == 2)
7929             source = 2;
7930           else
7931             {
7932               if (i.types[0].bitfield.imm8)
7933                 source = 1;
7934               else
7935                 source = 0;
7936             }
7937           break;
7938         case 5:
7939           if (is_evex_encoding (&i.tm))
7940             {
7941               /* For EVEX instructions, when there are 5 operands, the
7942                  first one must be immediate operand.  If the second one
7943                  is immediate operand, the source operand is the 3th
7944                  one.  If the last one is immediate operand, the source
7945                  operand is the 2nd one.  */
7946               gas_assert (i.imm_operands == 2
7947                           && i.tm.opcode_modifier.sae
7948                           && operand_type_check (i.types[0], imm));
7949               if (operand_type_check (i.types[1], imm))
7950                 source = 2;
7951               else if (operand_type_check (i.types[4], imm))
7952                 source = 1;
7953               else
7954                 abort ();
7955             }
7956           break;
7957         default:
7958           abort ();
7959         }
7960
7961       if (!vex_3_sources)
7962         {
7963           dest = source + 1;
7964
7965           /* RC/SAE operand could be between DEST and SRC.  That happens
7966              when one operand is GPR and the other one is XMM/YMM/ZMM
7967              register.  */
7968           if (i.rounding && i.rounding->operand == (int) dest)
7969             dest++;
7970
7971           if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
7972             {
7973               /* For instructions with VexNDS, the register-only source
7974                  operand must be a 32/64bit integer, XMM, YMM, ZMM, or mask
7975                  register.  It is encoded in VEX prefix.  */
7976
7977               i386_operand_type op;
7978               unsigned int vvvv;
7979
7980               /* Swap two source operands if needed.  */
7981               if (i.tm.opcode_modifier.swapsources)
7982                 {
7983                   vvvv = source;
7984                   source = dest;
7985                 }
7986               else
7987                 vvvv = dest;
7988
7989               op = i.tm.operand_types[vvvv];
7990               if ((dest + 1) >= i.operands
7991                   || ((op.bitfield.class != Reg
7992                        || (!op.bitfield.dword && !op.bitfield.qword))
7993                       && op.bitfield.class != RegSIMD
7994                       && !operand_type_equal (&op, &regmask)))
7995                 abort ();
7996               i.vex.register_specifier = i.op[vvvv].regs;
7997               dest++;
7998             }
7999         }
8000
8001       i.rm.mode = 3;
8002       /* One of the register operands will be encoded in the i.rm.reg
8003          field, the other in the combined i.rm.mode and i.rm.regmem
8004          fields.  If no form of this instruction supports a memory
8005          destination operand, then we assume the source operand may
8006          sometimes be a memory operand and so we need to store the
8007          destination in the i.rm.reg field.  */
8008       if (!i.tm.opcode_modifier.regmem
8009           && operand_type_check (i.tm.operand_types[dest], anymem) == 0)
8010         {
8011           i.rm.reg = i.op[dest].regs->reg_num;
8012           i.rm.regmem = i.op[source].regs->reg_num;
8013           set_rex_vrex (i.op[dest].regs, REX_R, i.tm.opcode_modifier.sse2avx);
8014           set_rex_vrex (i.op[source].regs, REX_B, FALSE);
8015         }
8016       else
8017         {
8018           i.rm.reg = i.op[source].regs->reg_num;
8019           i.rm.regmem = i.op[dest].regs->reg_num;
8020           set_rex_vrex (i.op[dest].regs, REX_B, i.tm.opcode_modifier.sse2avx);
8021           set_rex_vrex (i.op[source].regs, REX_R, FALSE);
8022         }
8023       if (flag_code != CODE_64BIT && (i.rex & REX_R))
8024         {
8025           if (i.types[!i.tm.opcode_modifier.regmem].bitfield.class != RegCR)
8026             abort ();
8027           i.rex &= ~REX_R;
8028           add_prefix (LOCK_PREFIX_OPCODE);
8029         }
8030     }
8031   else
8032     {                   /* If it's not 2 reg operands...  */
8033       unsigned int mem;
8034
8035       if (i.mem_operands)
8036         {
8037           unsigned int fake_zero_displacement = 0;
8038           unsigned int op;
8039
8040           for (op = 0; op < i.operands; op++)
8041             if (i.flags[op] & Operand_Mem)
8042               break;
8043           gas_assert (op < i.operands);
8044
8045           if (i.tm.opcode_modifier.sib)
8046             {
8047               /* The index register of VSIB shouldn't be RegIZ.  */
8048               if (i.tm.opcode_modifier.sib != SIBMEM
8049                   && i.index_reg->reg_num == RegIZ)
8050                 abort ();
8051
8052               i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8053               if (!i.base_reg)
8054                 {
8055                   i.sib.base = NO_BASE_REGISTER;
8056                   i.sib.scale = i.log2_scale_factor;
8057                   i.types[op].bitfield.disp8 = 0;
8058                   i.types[op].bitfield.disp16 = 0;
8059                   i.types[op].bitfield.disp64 = 0;
8060                   if (flag_code != CODE_64BIT || i.prefix[ADDR_PREFIX])
8061                     {
8062                       /* Must be 32 bit */
8063                       i.types[op].bitfield.disp32 = 1;
8064                       i.types[op].bitfield.disp32s = 0;
8065                     }
8066                   else
8067                     {
8068                       i.types[op].bitfield.disp32 = 0;
8069                       i.types[op].bitfield.disp32s = 1;
8070                     }
8071                 }
8072
8073               /* Since the mandatory SIB always has index register, so
8074                  the code logic remains unchanged. The non-mandatory SIB
8075                  without index register is allowed and will be handled
8076                  later.  */
8077               if (i.index_reg)
8078                 {
8079                   if (i.index_reg->reg_num == RegIZ)
8080                     i.sib.index = NO_INDEX_REGISTER;
8081                   else
8082                     i.sib.index = i.index_reg->reg_num;
8083                   set_rex_vrex (i.index_reg, REX_X, FALSE);
8084                 }
8085             }
8086
8087           default_seg = &ds;
8088
8089           if (i.base_reg == 0)
8090             {
8091               i.rm.mode = 0;
8092               if (!i.disp_operands)
8093                 fake_zero_displacement = 1;
8094               if (i.index_reg == 0)
8095                 {
8096                   i386_operand_type newdisp;
8097
8098                   /* Both check for VSIB and mandatory non-vector SIB. */
8099                   gas_assert (!i.tm.opcode_modifier.sib
8100                               || i.tm.opcode_modifier.sib == SIBMEM);
8101                   /* Operand is just <disp>  */
8102                   if (flag_code == CODE_64BIT)
8103                     {
8104                       /* 64bit mode overwrites the 32bit absolute
8105                          addressing by RIP relative addressing and
8106                          absolute addressing is encoded by one of the
8107                          redundant SIB forms.  */
8108                       i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8109                       i.sib.base = NO_BASE_REGISTER;
8110                       i.sib.index = NO_INDEX_REGISTER;
8111                       newdisp = (!i.prefix[ADDR_PREFIX] ? disp32s : disp32);
8112                     }
8113                   else if ((flag_code == CODE_16BIT)
8114                            ^ (i.prefix[ADDR_PREFIX] != 0))
8115                     {
8116                       i.rm.regmem = NO_BASE_REGISTER_16;
8117                       newdisp = disp16;
8118                     }
8119                   else
8120                     {
8121                       i.rm.regmem = NO_BASE_REGISTER;
8122                       newdisp = disp32;
8123                     }
8124                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8125                   i.types[op] = operand_type_or (i.types[op], newdisp);
8126                 }
8127               else if (!i.tm.opcode_modifier.sib)
8128                 {
8129                   /* !i.base_reg && i.index_reg  */
8130                   if (i.index_reg->reg_num == RegIZ)
8131                     i.sib.index = NO_INDEX_REGISTER;
8132                   else
8133                     i.sib.index = i.index_reg->reg_num;
8134                   i.sib.base = NO_BASE_REGISTER;
8135                   i.sib.scale = i.log2_scale_factor;
8136                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8137                   i.types[op].bitfield.disp8 = 0;
8138                   i.types[op].bitfield.disp16 = 0;
8139                   i.types[op].bitfield.disp64 = 0;
8140                   if (flag_code != CODE_64BIT || i.prefix[ADDR_PREFIX])
8141                     {
8142                       /* Must be 32 bit */
8143                       i.types[op].bitfield.disp32 = 1;
8144                       i.types[op].bitfield.disp32s = 0;
8145                     }
8146                   else
8147                     {
8148                       i.types[op].bitfield.disp32 = 0;
8149                       i.types[op].bitfield.disp32s = 1;
8150                     }
8151                   if ((i.index_reg->reg_flags & RegRex) != 0)
8152                     i.rex |= REX_X;
8153                 }
8154             }
8155           /* RIP addressing for 64bit mode.  */
8156           else if (i.base_reg->reg_num == RegIP)
8157             {
8158               gas_assert (!i.tm.opcode_modifier.sib);
8159               i.rm.regmem = NO_BASE_REGISTER;
8160               i.types[op].bitfield.disp8 = 0;
8161               i.types[op].bitfield.disp16 = 0;
8162               i.types[op].bitfield.disp32 = 0;
8163               i.types[op].bitfield.disp32s = 1;
8164               i.types[op].bitfield.disp64 = 0;
8165               i.flags[op] |= Operand_PCrel;
8166               if (! i.disp_operands)
8167                 fake_zero_displacement = 1;
8168             }
8169           else if (i.base_reg->reg_type.bitfield.word)
8170             {
8171               gas_assert (!i.tm.opcode_modifier.sib);
8172               switch (i.base_reg->reg_num)
8173                 {
8174                 case 3: /* (%bx)  */
8175                   if (i.index_reg == 0)
8176                     i.rm.regmem = 7;
8177                   else /* (%bx,%si) -> 0, or (%bx,%di) -> 1  */
8178                     i.rm.regmem = i.index_reg->reg_num - 6;
8179                   break;
8180                 case 5: /* (%bp)  */
8181                   default_seg = &ss;
8182                   if (i.index_reg == 0)
8183                     {
8184                       i.rm.regmem = 6;
8185                       if (operand_type_check (i.types[op], disp) == 0)
8186                         {
8187                           /* fake (%bp) into 0(%bp)  */
8188                           if (i.disp_encoding == disp_encoding_16bit)
8189                             i.types[op].bitfield.disp16 = 1;
8190                           else
8191                             i.types[op].bitfield.disp8 = 1;
8192                           fake_zero_displacement = 1;
8193                         }
8194                     }
8195                   else /* (%bp,%si) -> 2, or (%bp,%di) -> 3  */
8196                     i.rm.regmem = i.index_reg->reg_num - 6 + 2;
8197                   break;
8198                 default: /* (%si) -> 4 or (%di) -> 5  */
8199                   i.rm.regmem = i.base_reg->reg_num - 6 + 4;
8200                 }
8201               if (!fake_zero_displacement
8202                   && !i.disp_operands
8203                   && i.disp_encoding)
8204                 {
8205                   fake_zero_displacement = 1;
8206                   if (i.disp_encoding == disp_encoding_8bit)
8207                     i.types[op].bitfield.disp8 = 1;
8208                   else
8209                     i.types[op].bitfield.disp16 = 1;
8210                 }
8211               i.rm.mode = mode_from_disp_size (i.types[op]);
8212             }
8213           else /* i.base_reg and 32/64 bit mode  */
8214             {
8215               if (flag_code == CODE_64BIT
8216                   && operand_type_check (i.types[op], disp))
8217                 {
8218                   i.types[op].bitfield.disp16 = 0;
8219                   i.types[op].bitfield.disp64 = 0;
8220                   if (i.prefix[ADDR_PREFIX] == 0)
8221                     {
8222                       i.types[op].bitfield.disp32 = 0;
8223                       i.types[op].bitfield.disp32s = 1;
8224                     }
8225                   else
8226                     {
8227                       i.types[op].bitfield.disp32 = 1;
8228                       i.types[op].bitfield.disp32s = 0;
8229                     }
8230                 }
8231
8232               if (!i.tm.opcode_modifier.sib)
8233                 i.rm.regmem = i.base_reg->reg_num;
8234               if ((i.base_reg->reg_flags & RegRex) != 0)
8235                 i.rex |= REX_B;
8236               i.sib.base = i.base_reg->reg_num;
8237               /* x86-64 ignores REX prefix bit here to avoid decoder
8238                  complications.  */
8239               if (!(i.base_reg->reg_flags & RegRex)
8240                   && (i.base_reg->reg_num == EBP_REG_NUM
8241                    || i.base_reg->reg_num == ESP_REG_NUM))
8242                   default_seg = &ss;
8243               if (i.base_reg->reg_num == 5 && i.disp_operands == 0)
8244                 {
8245                   fake_zero_displacement = 1;
8246                   if (i.disp_encoding == disp_encoding_32bit)
8247                     i.types[op].bitfield.disp32 = 1;
8248                   else
8249                     i.types[op].bitfield.disp8 = 1;
8250                 }
8251               i.sib.scale = i.log2_scale_factor;
8252               if (i.index_reg == 0)
8253                 {
8254                   /* Only check for VSIB. */
8255                   gas_assert (i.tm.opcode_modifier.sib != VECSIB128
8256                               && i.tm.opcode_modifier.sib != VECSIB256
8257                               && i.tm.opcode_modifier.sib != VECSIB512);
8258
8259                   /* <disp>(%esp) becomes two byte modrm with no index
8260                      register.  We've already stored the code for esp
8261                      in i.rm.regmem ie. ESCAPE_TO_TWO_BYTE_ADDRESSING.
8262                      Any base register besides %esp will not use the
8263                      extra modrm byte.  */
8264                   i.sib.index = NO_INDEX_REGISTER;
8265                 }
8266               else if (!i.tm.opcode_modifier.sib)
8267                 {
8268                   if (i.index_reg->reg_num == RegIZ)
8269                     i.sib.index = NO_INDEX_REGISTER;
8270                   else
8271                     i.sib.index = i.index_reg->reg_num;
8272                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8273                   if ((i.index_reg->reg_flags & RegRex) != 0)
8274                     i.rex |= REX_X;
8275                 }
8276
8277               if (i.disp_operands
8278                   && (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
8279                       || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL))
8280                 i.rm.mode = 0;
8281               else
8282                 {
8283                   if (!fake_zero_displacement
8284                       && !i.disp_operands
8285                       && i.disp_encoding)
8286                     {
8287                       fake_zero_displacement = 1;
8288                       if (i.disp_encoding == disp_encoding_8bit)
8289                         i.types[op].bitfield.disp8 = 1;
8290                       else
8291                         i.types[op].bitfield.disp32 = 1;
8292                     }
8293                   i.rm.mode = mode_from_disp_size (i.types[op]);
8294                 }
8295             }
8296
8297           if (fake_zero_displacement)
8298             {
8299               /* Fakes a zero displacement assuming that i.types[op]
8300                  holds the correct displacement size.  */
8301               expressionS *exp;
8302
8303               gas_assert (i.op[op].disps == 0);
8304               exp = &disp_expressions[i.disp_operands++];
8305               i.op[op].disps = exp;
8306               exp->X_op = O_constant;
8307               exp->X_add_number = 0;
8308               exp->X_add_symbol = (symbolS *) 0;
8309               exp->X_op_symbol = (symbolS *) 0;
8310             }
8311
8312           mem = op;
8313         }
8314       else
8315         mem = ~0;
8316
8317       if (i.tm.opcode_modifier.vexsources == XOP2SOURCES)
8318         {
8319           if (operand_type_check (i.types[0], imm))
8320             i.vex.register_specifier = NULL;
8321           else
8322             {
8323               /* VEX.vvvv encodes one of the sources when the first
8324                  operand is not an immediate.  */
8325               if (i.tm.opcode_modifier.vexw == VEXW0)
8326                 i.vex.register_specifier = i.op[0].regs;
8327               else
8328                 i.vex.register_specifier = i.op[1].regs;
8329             }
8330
8331           /* Destination is a XMM register encoded in the ModRM.reg
8332              and VEX.R bit.  */
8333           i.rm.reg = i.op[2].regs->reg_num;
8334           if ((i.op[2].regs->reg_flags & RegRex) != 0)
8335             i.rex |= REX_R;
8336
8337           /* ModRM.rm and VEX.B encodes the other source.  */
8338           if (!i.mem_operands)
8339             {
8340               i.rm.mode = 3;
8341
8342               if (i.tm.opcode_modifier.vexw == VEXW0)
8343                 i.rm.regmem = i.op[1].regs->reg_num;
8344               else
8345                 i.rm.regmem = i.op[0].regs->reg_num;
8346
8347               if ((i.op[1].regs->reg_flags & RegRex) != 0)
8348                 i.rex |= REX_B;
8349             }
8350         }
8351       else if (i.tm.opcode_modifier.vexvvvv == VEXLWP)
8352         {
8353           i.vex.register_specifier = i.op[2].regs;
8354           if (!i.mem_operands)
8355             {
8356               i.rm.mode = 3;
8357               i.rm.regmem = i.op[1].regs->reg_num;
8358               if ((i.op[1].regs->reg_flags & RegRex) != 0)
8359                 i.rex |= REX_B;
8360             }
8361         }
8362       /* Fill in i.rm.reg or i.rm.regmem field with register operand
8363          (if any) based on i.tm.extension_opcode.  Again, we must be
8364          careful to make sure that segment/control/debug/test/MMX
8365          registers are coded into the i.rm.reg field.  */
8366       else if (i.reg_operands)
8367         {
8368           unsigned int op;
8369           unsigned int vex_reg = ~0;
8370
8371           for (op = 0; op < i.operands; op++)
8372             if (i.types[op].bitfield.class == Reg
8373                 || i.types[op].bitfield.class == RegBND
8374                 || i.types[op].bitfield.class == RegMask
8375                 || i.types[op].bitfield.class == SReg
8376                 || i.types[op].bitfield.class == RegCR
8377                 || i.types[op].bitfield.class == RegDR
8378                 || i.types[op].bitfield.class == RegTR
8379                 || i.types[op].bitfield.class == RegSIMD
8380                 || i.types[op].bitfield.class == RegMMX)
8381               break;
8382
8383           if (vex_3_sources)
8384             op = dest;
8385           else if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
8386             {
8387               /* For instructions with VexNDS, the register-only
8388                  source operand is encoded in VEX prefix. */
8389               gas_assert (mem != (unsigned int) ~0);
8390
8391               if (op > mem)
8392                 {
8393                   vex_reg = op++;
8394                   gas_assert (op < i.operands);
8395                 }
8396               else
8397                 {
8398                   /* Check register-only source operand when two source
8399                      operands are swapped.  */
8400                   if (!i.tm.operand_types[op].bitfield.baseindex
8401                       && i.tm.operand_types[op + 1].bitfield.baseindex)
8402                     {
8403                       vex_reg = op;
8404                       op += 2;
8405                       gas_assert (mem == (vex_reg + 1)
8406                                   && op < i.operands);
8407                     }
8408                   else
8409                     {
8410                       vex_reg = op + 1;
8411                       gas_assert (vex_reg < i.operands);
8412                     }
8413                 }
8414             }
8415           else if (i.tm.opcode_modifier.vexvvvv == VEXNDD)
8416             {
8417               /* For instructions with VexNDD, the register destination
8418                  is encoded in VEX prefix.  */
8419               if (i.mem_operands == 0)
8420                 {
8421                   /* There is no memory operand.  */
8422                   gas_assert ((op + 2) == i.operands);
8423                   vex_reg = op + 1;
8424                 }
8425               else
8426                 {
8427                   /* There are only 2 non-immediate operands.  */
8428                   gas_assert (op < i.imm_operands + 2
8429                               && i.operands == i.imm_operands + 2);
8430                   vex_reg = i.imm_operands + 1;
8431                 }
8432             }
8433           else
8434             gas_assert (op < i.operands);
8435
8436           if (vex_reg != (unsigned int) ~0)
8437             {
8438               i386_operand_type *type = &i.tm.operand_types[vex_reg];
8439
8440               if ((type->bitfield.class != Reg
8441                    || (!type->bitfield.dword && !type->bitfield.qword))
8442                   && type->bitfield.class != RegSIMD
8443                   && !operand_type_equal (type, &regmask))
8444                 abort ();
8445
8446               i.vex.register_specifier = i.op[vex_reg].regs;
8447             }
8448
8449           /* Don't set OP operand twice.  */
8450           if (vex_reg != op)
8451             {
8452               /* If there is an extension opcode to put here, the
8453                  register number must be put into the regmem field.  */
8454               if (i.tm.extension_opcode != None)
8455                 {
8456                   i.rm.regmem = i.op[op].regs->reg_num;
8457                   set_rex_vrex (i.op[op].regs, REX_B,
8458                                 i.tm.opcode_modifier.sse2avx);
8459                 }
8460               else
8461                 {
8462                   i.rm.reg = i.op[op].regs->reg_num;
8463                   set_rex_vrex (i.op[op].regs, REX_R,
8464                                 i.tm.opcode_modifier.sse2avx);
8465                 }
8466             }
8467
8468           /* Now, if no memory operand has set i.rm.mode = 0, 1, 2 we
8469              must set it to 3 to indicate this is a register operand
8470              in the regmem field.  */
8471           if (!i.mem_operands)
8472             i.rm.mode = 3;
8473         }
8474
8475       /* Fill in i.rm.reg field with extension opcode (if any).  */
8476       if (i.tm.extension_opcode != None)
8477         i.rm.reg = i.tm.extension_opcode;
8478     }
8479   return default_seg;
8480 }
8481
8482 static INLINE void
8483 frag_opcode_byte (unsigned char byte)
8484 {
8485   if (now_seg != absolute_section)
8486     FRAG_APPEND_1_CHAR (byte);
8487   else
8488     ++abs_section_offset;
8489 }
8490
8491 static unsigned int
8492 flip_code16 (unsigned int code16)
8493 {
8494   gas_assert (i.tm.operands == 1);
8495
8496   return !(i.prefix[REX_PREFIX] & REX_W)
8497          && (code16 ? i.tm.operand_types[0].bitfield.disp32
8498                       || i.tm.operand_types[0].bitfield.disp32s
8499                     : i.tm.operand_types[0].bitfield.disp16)
8500          ? CODE16 : 0;
8501 }
8502
8503 static void
8504 output_branch (void)
8505 {
8506   char *p;
8507   int size;
8508   int code16;
8509   int prefix;
8510   relax_substateT subtype;
8511   symbolS *sym;
8512   offsetT off;
8513
8514   if (now_seg == absolute_section)
8515     {
8516       as_bad (_("relaxable branches not supported in absolute section"));
8517       return;
8518     }
8519
8520   code16 = flag_code == CODE_16BIT ? CODE16 : 0;
8521   size = i.disp_encoding == disp_encoding_32bit ? BIG : SMALL;
8522
8523   prefix = 0;
8524   if (i.prefix[DATA_PREFIX] != 0)
8525     {
8526       prefix = 1;
8527       i.prefixes -= 1;
8528       code16 ^= flip_code16(code16);
8529     }
8530   /* Pentium4 branch hints.  */
8531   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8532       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8533     {
8534       prefix++;
8535       i.prefixes--;
8536     }
8537   if (i.prefix[REX_PREFIX] != 0)
8538     {
8539       prefix++;
8540       i.prefixes--;
8541     }
8542
8543   /* BND prefixed jump.  */
8544   if (i.prefix[BND_PREFIX] != 0)
8545     {
8546       prefix++;
8547       i.prefixes--;
8548     }
8549
8550   if (i.prefixes != 0)
8551     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8552
8553   /* It's always a symbol;  End frag & setup for relax.
8554      Make sure there is enough room in this frag for the largest
8555      instruction we may generate in md_convert_frag.  This is 2
8556      bytes for the opcode and room for the prefix and largest
8557      displacement.  */
8558   frag_grow (prefix + 2 + 4);
8559   /* Prefix and 1 opcode byte go in fr_fix.  */
8560   p = frag_more (prefix + 1);
8561   if (i.prefix[DATA_PREFIX] != 0)
8562     *p++ = DATA_PREFIX_OPCODE;
8563   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE
8564       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE)
8565     *p++ = i.prefix[SEG_PREFIX];
8566   if (i.prefix[BND_PREFIX] != 0)
8567     *p++ = BND_PREFIX_OPCODE;
8568   if (i.prefix[REX_PREFIX] != 0)
8569     *p++ = i.prefix[REX_PREFIX];
8570   *p = i.tm.base_opcode;
8571
8572   if ((unsigned char) *p == JUMP_PC_RELATIVE)
8573     subtype = ENCODE_RELAX_STATE (UNCOND_JUMP, size);
8574   else if (cpu_arch_flags.bitfield.cpui386)
8575     subtype = ENCODE_RELAX_STATE (COND_JUMP, size);
8576   else
8577     subtype = ENCODE_RELAX_STATE (COND_JUMP86, size);
8578   subtype |= code16;
8579
8580   sym = i.op[0].disps->X_add_symbol;
8581   off = i.op[0].disps->X_add_number;
8582
8583   if (i.op[0].disps->X_op != O_constant
8584       && i.op[0].disps->X_op != O_symbol)
8585     {
8586       /* Handle complex expressions.  */
8587       sym = make_expr_symbol (i.op[0].disps);
8588       off = 0;
8589     }
8590
8591   /* 1 possible extra opcode + 4 byte displacement go in var part.
8592      Pass reloc in fr_var.  */
8593   frag_var (rs_machine_dependent, 5, i.reloc[0], subtype, sym, off, p);
8594 }
8595
8596 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8597 /* Return TRUE iff PLT32 relocation should be used for branching to
8598    symbol S.  */
8599
8600 static bfd_boolean
8601 need_plt32_p (symbolS *s)
8602 {
8603   /* PLT32 relocation is ELF only.  */
8604   if (!IS_ELF)
8605     return FALSE;
8606
8607 #ifdef TE_SOLARIS
8608   /* Don't emit PLT32 relocation on Solaris: neither native linker nor
8609      krtld support it.  */
8610   return FALSE;
8611 #endif
8612
8613   /* Since there is no need to prepare for PLT branch on x86-64, we
8614      can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
8615      be used as a marker for 32-bit PC-relative branches.  */
8616   if (!object_64bit)
8617     return FALSE;
8618
8619   /* Weak or undefined symbol need PLT32 relocation.  */
8620   if (S_IS_WEAK (s) || !S_IS_DEFINED (s))
8621     return TRUE;
8622
8623   /* Non-global symbol doesn't need PLT32 relocation.  */
8624   if (! S_IS_EXTERNAL (s))
8625     return FALSE;
8626
8627   /* Other global symbols need PLT32 relocation.  NB: Symbol with
8628      non-default visibilities are treated as normal global symbol
8629      so that PLT32 relocation can be used as a marker for 32-bit
8630      PC-relative branches.  It is useful for linker relaxation.  */
8631   return TRUE;
8632 }
8633 #endif
8634
8635 static void
8636 output_jump (void)
8637 {
8638   char *p;
8639   int size;
8640   fixS *fixP;
8641   bfd_reloc_code_real_type jump_reloc = i.reloc[0];
8642
8643   if (i.tm.opcode_modifier.jump == JUMP_BYTE)
8644     {
8645       /* This is a loop or jecxz type instruction.  */
8646       size = 1;
8647       if (i.prefix[ADDR_PREFIX] != 0)
8648         {
8649           frag_opcode_byte (ADDR_PREFIX_OPCODE);
8650           i.prefixes -= 1;
8651         }
8652       /* Pentium4 branch hints.  */
8653       if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8654           || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8655         {
8656           frag_opcode_byte (i.prefix[SEG_PREFIX]);
8657           i.prefixes--;
8658         }
8659     }
8660   else
8661     {
8662       int code16;
8663
8664       code16 = 0;
8665       if (flag_code == CODE_16BIT)
8666         code16 = CODE16;
8667
8668       if (i.prefix[DATA_PREFIX] != 0)
8669         {
8670           frag_opcode_byte (DATA_PREFIX_OPCODE);
8671           i.prefixes -= 1;
8672           code16 ^= flip_code16(code16);
8673         }
8674
8675       size = 4;
8676       if (code16)
8677         size = 2;
8678     }
8679
8680   /* BND prefixed jump.  */
8681   if (i.prefix[BND_PREFIX] != 0)
8682     {
8683       frag_opcode_byte (i.prefix[BND_PREFIX]);
8684       i.prefixes -= 1;
8685     }
8686
8687   if (i.prefix[REX_PREFIX] != 0)
8688     {
8689       frag_opcode_byte (i.prefix[REX_PREFIX]);
8690       i.prefixes -= 1;
8691     }
8692
8693   if (i.prefixes != 0)
8694     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8695
8696   if (now_seg == absolute_section)
8697     {
8698       abs_section_offset += i.tm.opcode_length + size;
8699       return;
8700     }
8701
8702   p = frag_more (i.tm.opcode_length + size);
8703   switch (i.tm.opcode_length)
8704     {
8705     case 2:
8706       *p++ = i.tm.base_opcode >> 8;
8707       /* Fall through.  */
8708     case 1:
8709       *p++ = i.tm.base_opcode;
8710       break;
8711     default:
8712       abort ();
8713     }
8714
8715 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8716   if (size == 4
8717       && jump_reloc == NO_RELOC
8718       && need_plt32_p (i.op[0].disps->X_add_symbol))
8719     jump_reloc = BFD_RELOC_X86_64_PLT32;
8720 #endif
8721
8722   jump_reloc = reloc (size, 1, 1, jump_reloc);
8723
8724   fixP = fix_new_exp (frag_now, p - frag_now->fr_literal, size,
8725                       i.op[0].disps, 1, jump_reloc);
8726
8727   /* All jumps handled here are signed, but don't use a signed limit
8728      check for 32 and 16 bit jumps as we want to allow wrap around at
8729      4G and 64k respectively.  */
8730   if (size == 1)
8731     fixP->fx_signed = 1;
8732 }
8733
8734 static void
8735 output_interseg_jump (void)
8736 {
8737   char *p;
8738   int size;
8739   int prefix;
8740   int code16;
8741
8742   code16 = 0;
8743   if (flag_code == CODE_16BIT)
8744     code16 = CODE16;
8745
8746   prefix = 0;
8747   if (i.prefix[DATA_PREFIX] != 0)
8748     {
8749       prefix = 1;
8750       i.prefixes -= 1;
8751       code16 ^= CODE16;
8752     }
8753
8754   gas_assert (!i.prefix[REX_PREFIX]);
8755
8756   size = 4;
8757   if (code16)
8758     size = 2;
8759
8760   if (i.prefixes != 0)
8761     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8762
8763   if (now_seg == absolute_section)
8764     {
8765       abs_section_offset += prefix + 1 + 2 + size;
8766       return;
8767     }
8768
8769   /* 1 opcode; 2 segment; offset  */
8770   p = frag_more (prefix + 1 + 2 + size);
8771
8772   if (i.prefix[DATA_PREFIX] != 0)
8773     *p++ = DATA_PREFIX_OPCODE;
8774
8775   if (i.prefix[REX_PREFIX] != 0)
8776     *p++ = i.prefix[REX_PREFIX];
8777
8778   *p++ = i.tm.base_opcode;
8779   if (i.op[1].imms->X_op == O_constant)
8780     {
8781       offsetT n = i.op[1].imms->X_add_number;
8782
8783       if (size == 2
8784           && !fits_in_unsigned_word (n)
8785           && !fits_in_signed_word (n))
8786         {
8787           as_bad (_("16-bit jump out of range"));
8788           return;
8789         }
8790       md_number_to_chars (p, n, size);
8791     }
8792   else
8793     fix_new_exp (frag_now, p - frag_now->fr_literal, size,
8794                  i.op[1].imms, 0, reloc (size, 0, 0, i.reloc[1]));
8795
8796   p += size;
8797   if (i.op[0].imms->X_op == O_constant)
8798     md_number_to_chars (p, (valueT) i.op[0].imms->X_add_number, 2);
8799   else
8800     fix_new_exp (frag_now, p - frag_now->fr_literal, 2,
8801                  i.op[0].imms, 0, reloc (2, 0, 0, i.reloc[0]));
8802 }
8803
8804 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8805 void
8806 x86_cleanup (void)
8807 {
8808   char *p;
8809   asection *seg = now_seg;
8810   subsegT subseg = now_subseg;
8811   asection *sec;
8812   unsigned int alignment, align_size_1;
8813   unsigned int isa_1_descsz, feature_2_descsz, descsz;
8814   unsigned int isa_1_descsz_raw, feature_2_descsz_raw;
8815   unsigned int padding;
8816
8817   if (!IS_ELF || !x86_used_note)
8818     return;
8819
8820   x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86;
8821
8822   /* The .note.gnu.property section layout:
8823
8824      Field      Length          Contents
8825      ----       ----            ----
8826      n_namsz    4               4
8827      n_descsz   4               The note descriptor size
8828      n_type     4               NT_GNU_PROPERTY_TYPE_0
8829      n_name     4               "GNU"
8830      n_desc     n_descsz        The program property array
8831      ....       ....            ....
8832    */
8833
8834   /* Create the .note.gnu.property section.  */
8835   sec = subseg_new (NOTE_GNU_PROPERTY_SECTION_NAME, 0);
8836   bfd_set_section_flags (sec,
8837                          (SEC_ALLOC
8838                           | SEC_LOAD
8839                           | SEC_DATA
8840                           | SEC_HAS_CONTENTS
8841                           | SEC_READONLY));
8842
8843   if (get_elf_backend_data (stdoutput)->s->elfclass == ELFCLASS64)
8844     {
8845       align_size_1 = 7;
8846       alignment = 3;
8847     }
8848   else
8849     {
8850       align_size_1 = 3;
8851       alignment = 2;
8852     }
8853
8854   bfd_set_section_alignment (sec, alignment);
8855   elf_section_type (sec) = SHT_NOTE;
8856
8857   /* GNU_PROPERTY_X86_ISA_1_USED: 4-byte type + 4-byte data size
8858                                   + 4-byte data  */
8859   isa_1_descsz_raw = 4 + 4 + 4;
8860   /* Align GNU_PROPERTY_X86_ISA_1_USED.  */
8861   isa_1_descsz = (isa_1_descsz_raw + align_size_1) & ~align_size_1;
8862
8863   feature_2_descsz_raw = isa_1_descsz;
8864   /* GNU_PROPERTY_X86_FEATURE_2_USED: 4-byte type + 4-byte data size
8865                                       + 4-byte data  */
8866   feature_2_descsz_raw += 4 + 4 + 4;
8867   /* Align GNU_PROPERTY_X86_FEATURE_2_USED.  */
8868   feature_2_descsz = ((feature_2_descsz_raw + align_size_1)
8869                       & ~align_size_1);
8870
8871   descsz = feature_2_descsz;
8872   /* Section size: n_namsz + n_descsz + n_type + n_name + n_descsz.  */
8873   p = frag_more (4 + 4 + 4 + 4 + descsz);
8874
8875   /* Write n_namsz.  */
8876   md_number_to_chars (p, (valueT) 4, 4);
8877
8878   /* Write n_descsz.  */
8879   md_number_to_chars (p + 4, (valueT) descsz, 4);
8880
8881   /* Write n_type.  */
8882   md_number_to_chars (p + 4 * 2, (valueT) NT_GNU_PROPERTY_TYPE_0, 4);
8883
8884   /* Write n_name.  */
8885   memcpy (p + 4 * 3, "GNU", 4);
8886
8887   /* Write 4-byte type.  */
8888   md_number_to_chars (p + 4 * 4,
8889                       (valueT) GNU_PROPERTY_X86_ISA_1_USED, 4);
8890
8891   /* Write 4-byte data size.  */
8892   md_number_to_chars (p + 4 * 5, (valueT) 4, 4);
8893
8894   /* Write 4-byte data.  */
8895   md_number_to_chars (p + 4 * 6, (valueT) x86_isa_1_used, 4);
8896
8897   /* Zero out paddings.  */
8898   padding = isa_1_descsz - isa_1_descsz_raw;
8899   if (padding)
8900     memset (p + 4 * 7, 0, padding);
8901
8902   /* Write 4-byte type.  */
8903   md_number_to_chars (p + isa_1_descsz + 4 * 4,
8904                       (valueT) GNU_PROPERTY_X86_FEATURE_2_USED, 4);
8905
8906   /* Write 4-byte data size.  */
8907   md_number_to_chars (p + isa_1_descsz + 4 * 5, (valueT) 4, 4);
8908
8909   /* Write 4-byte data.  */
8910   md_number_to_chars (p + isa_1_descsz + 4 * 6,
8911                       (valueT) x86_feature_2_used, 4);
8912
8913   /* Zero out paddings.  */
8914   padding = feature_2_descsz - feature_2_descsz_raw;
8915   if (padding)
8916     memset (p + isa_1_descsz + 4 * 7, 0, padding);
8917
8918   /* We probably can't restore the current segment, for there likely
8919      isn't one yet...  */
8920   if (seg && subseg)
8921     subseg_set (seg, subseg);
8922 }
8923 #endif
8924
8925 static unsigned int
8926 encoding_length (const fragS *start_frag, offsetT start_off,
8927                  const char *frag_now_ptr)
8928 {
8929   unsigned int len = 0;
8930
8931   if (start_frag != frag_now)
8932     {
8933       const fragS *fr = start_frag;
8934
8935       do {
8936         len += fr->fr_fix;
8937         fr = fr->fr_next;
8938       } while (fr && fr != frag_now);
8939     }
8940
8941   return len - start_off + (frag_now_ptr - frag_now->fr_literal);
8942 }
8943
8944 /* Return 1 for test, and, cmp, add, sub, inc and dec which may
8945    be macro-fused with conditional jumps.
8946    NB: If TEST/AND/CMP/ADD/SUB/INC/DEC is of RIP relative address,
8947    or is one of the following format:
8948
8949     cmp m, imm
8950     add m, imm
8951     sub m, imm
8952    test m, imm
8953     and m, imm
8954     inc m
8955     dec m
8956
8957    it is unfusible.  */
8958
8959 static int
8960 maybe_fused_with_jcc_p (enum mf_cmp_kind* mf_cmp_p)
8961 {
8962   /* No RIP address.  */
8963   if (i.base_reg && i.base_reg->reg_num == RegIP)
8964     return 0;
8965
8966   /* No VEX/EVEX encoding.  */
8967   if (is_any_vex_encoding (&i.tm))
8968     return 0;
8969
8970   /* add, sub without add/sub m, imm.  */
8971   if (i.tm.base_opcode <= 5
8972       || (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d)
8973       || ((i.tm.base_opcode | 3) == 0x83
8974           && (i.tm.extension_opcode == 0x5
8975               || i.tm.extension_opcode == 0x0)))
8976     {
8977       *mf_cmp_p = mf_cmp_alu_cmp;
8978       return !(i.mem_operands && i.imm_operands);
8979     }
8980
8981   /* and without and m, imm.  */
8982   if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
8983       || ((i.tm.base_opcode | 3) == 0x83
8984           && i.tm.extension_opcode == 0x4))
8985     {
8986       *mf_cmp_p = mf_cmp_test_and;
8987       return !(i.mem_operands && i.imm_operands);
8988     }
8989
8990   /* test without test m imm.  */
8991   if ((i.tm.base_opcode | 1) == 0x85
8992       || (i.tm.base_opcode | 1) == 0xa9
8993       || ((i.tm.base_opcode | 1) == 0xf7
8994           && i.tm.extension_opcode == 0))
8995     {
8996       *mf_cmp_p = mf_cmp_test_and;
8997       return !(i.mem_operands && i.imm_operands);
8998     }
8999
9000   /* cmp without cmp m, imm.  */
9001   if ((i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
9002       || ((i.tm.base_opcode | 3) == 0x83
9003           && (i.tm.extension_opcode == 0x7)))
9004     {
9005       *mf_cmp_p = mf_cmp_alu_cmp;
9006       return !(i.mem_operands && i.imm_operands);
9007     }
9008
9009   /* inc, dec without inc/dec m.   */
9010   if ((i.tm.cpu_flags.bitfield.cpuno64
9011        && (i.tm.base_opcode | 0xf) == 0x4f)
9012       || ((i.tm.base_opcode | 1) == 0xff
9013           && i.tm.extension_opcode <= 0x1))
9014     {
9015       *mf_cmp_p = mf_cmp_incdec;
9016       return !i.mem_operands;
9017     }
9018
9019   return 0;
9020 }
9021
9022 /* Return 1 if a FUSED_JCC_PADDING frag should be generated.  */
9023
9024 static int
9025 add_fused_jcc_padding_frag_p (enum mf_cmp_kind* mf_cmp_p)
9026 {
9027   /* NB: Don't work with COND_JUMP86 without i386.  */
9028   if (!align_branch_power
9029       || now_seg == absolute_section
9030       || !cpu_arch_flags.bitfield.cpui386
9031       || !(align_branch & align_branch_fused_bit))
9032     return 0;
9033
9034   if (maybe_fused_with_jcc_p (mf_cmp_p))
9035     {
9036       if (last_insn.kind == last_insn_other
9037           || last_insn.seg != now_seg)
9038         return 1;
9039       if (flag_debug)
9040         as_warn_where (last_insn.file, last_insn.line,
9041                        _("`%s` skips -malign-branch-boundary on `%s`"),
9042                        last_insn.name, i.tm.name);
9043     }
9044
9045   return 0;
9046 }
9047
9048 /* Return 1 if a BRANCH_PREFIX frag should be generated.  */
9049
9050 static int
9051 add_branch_prefix_frag_p (void)
9052 {
9053   /* NB: Don't work with COND_JUMP86 without i386.  Don't add prefix
9054      to PadLock instructions since they include prefixes in opcode.  */
9055   if (!align_branch_power
9056       || !align_branch_prefix_size
9057       || now_seg == absolute_section
9058       || i.tm.cpu_flags.bitfield.cpupadlock
9059       || !cpu_arch_flags.bitfield.cpui386)
9060     return 0;
9061
9062   /* Don't add prefix if it is a prefix or there is no operand in case
9063      that segment prefix is special.  */
9064   if (!i.operands || i.tm.opcode_modifier.isprefix)
9065     return 0;
9066
9067   if (last_insn.kind == last_insn_other
9068       || last_insn.seg != now_seg)
9069     return 1;
9070
9071   if (flag_debug)
9072     as_warn_where (last_insn.file, last_insn.line,
9073                    _("`%s` skips -malign-branch-boundary on `%s`"),
9074                    last_insn.name, i.tm.name);
9075
9076   return 0;
9077 }
9078
9079 /* Return 1 if a BRANCH_PADDING frag should be generated.  */
9080
9081 static int
9082 add_branch_padding_frag_p (enum align_branch_kind *branch_p,
9083                            enum mf_jcc_kind *mf_jcc_p)
9084 {
9085   int add_padding;
9086
9087   /* NB: Don't work with COND_JUMP86 without i386.  */
9088   if (!align_branch_power
9089       || now_seg == absolute_section
9090       || !cpu_arch_flags.bitfield.cpui386)
9091     return 0;
9092
9093   add_padding = 0;
9094
9095   /* Check for jcc and direct jmp.  */
9096   if (i.tm.opcode_modifier.jump == JUMP)
9097     {
9098       if (i.tm.base_opcode == JUMP_PC_RELATIVE)
9099         {
9100           *branch_p = align_branch_jmp;
9101           add_padding = align_branch & align_branch_jmp_bit;
9102         }
9103       else
9104         {
9105           /* Because J<cc> and JN<cc> share same group in macro-fusible table,
9106              igore the lowest bit.  */
9107           *mf_jcc_p = (i.tm.base_opcode & 0x0e) >> 1;
9108           *branch_p = align_branch_jcc;
9109           if ((align_branch & align_branch_jcc_bit))
9110             add_padding = 1;
9111         }
9112     }
9113   else if (is_any_vex_encoding (&i.tm))
9114     return 0;
9115   else if ((i.tm.base_opcode | 1) == 0xc3)
9116     {
9117       /* Near ret.  */
9118       *branch_p = align_branch_ret;
9119       if ((align_branch & align_branch_ret_bit))
9120         add_padding = 1;
9121     }
9122   else
9123     {
9124       /* Check for indirect jmp, direct and indirect calls.  */
9125       if (i.tm.base_opcode == 0xe8)
9126         {
9127           /* Direct call.  */
9128           *branch_p = align_branch_call;
9129           if ((align_branch & align_branch_call_bit))
9130             add_padding = 1;
9131         }
9132       else if (i.tm.base_opcode == 0xff
9133                && (i.tm.extension_opcode == 2
9134                    || i.tm.extension_opcode == 4))
9135         {
9136           /* Indirect call and jmp.  */
9137           *branch_p = align_branch_indirect;
9138           if ((align_branch & align_branch_indirect_bit))
9139             add_padding = 1;
9140         }
9141
9142       if (add_padding
9143           && i.disp_operands
9144           && tls_get_addr
9145           && (i.op[0].disps->X_op == O_symbol
9146               || (i.op[0].disps->X_op == O_subtract
9147                   && i.op[0].disps->X_op_symbol == GOT_symbol)))
9148         {
9149           symbolS *s = i.op[0].disps->X_add_symbol;
9150           /* No padding to call to global or undefined tls_get_addr.  */
9151           if ((S_IS_EXTERNAL (s) || !S_IS_DEFINED (s))
9152               && strcmp (S_GET_NAME (s), tls_get_addr) == 0)
9153             return 0;
9154         }
9155     }
9156
9157   if (add_padding
9158       && last_insn.kind != last_insn_other
9159       && last_insn.seg == now_seg)
9160     {
9161       if (flag_debug)
9162         as_warn_where (last_insn.file, last_insn.line,
9163                        _("`%s` skips -malign-branch-boundary on `%s`"),
9164                        last_insn.name, i.tm.name);
9165       return 0;
9166     }
9167
9168   return add_padding;
9169 }
9170
9171 static void
9172 output_insn (void)
9173 {
9174   fragS *insn_start_frag;
9175   offsetT insn_start_off;
9176   fragS *fragP = NULL;
9177   enum align_branch_kind branch = align_branch_none;
9178   /* The initializer is arbitrary just to avoid uninitialized error.
9179      it's actually either assigned in add_branch_padding_frag_p
9180      or never be used.  */
9181   enum mf_jcc_kind mf_jcc = mf_jcc_jo;
9182
9183 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9184   if (IS_ELF && x86_used_note && now_seg != absolute_section)
9185     {
9186       if ((i.xstate & xstate_tmm) == xstate_tmm
9187           || i.tm.cpu_flags.bitfield.cpuamx_tile)
9188         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_TMM;
9189
9190       if (i.tm.cpu_flags.bitfield.cpusse3
9191           || i.tm.cpu_flags.bitfield.cpussse3
9192           || i.tm.cpu_flags.bitfield.cpusse4_1
9193           || i.tm.cpu_flags.bitfield.cpusse4_2
9194           || i.tm.cpu_flags.bitfield.cpucx16
9195           || i.tm.cpu_flags.bitfield.cpupopcnt
9196           /* LAHF-SAHF insns in 64-bit mode.  */
9197           || (flag_code == CODE_64BIT
9198               && (i.tm.base_opcode | 1) == 0x9f))
9199         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V2;
9200       if (i.tm.cpu_flags.bitfield.cpuavx
9201           || i.tm.cpu_flags.bitfield.cpuavx2
9202           /* Any VEX encoded insns execpt for CpuAVX512F, CpuAVX512BW,
9203              CpuAVX512DQ, LPW, TBM and AMX.  */
9204           || (i.tm.opcode_modifier.vex
9205               && !i.tm.cpu_flags.bitfield.cpuavx512f
9206               && !i.tm.cpu_flags.bitfield.cpuavx512bw
9207               && !i.tm.cpu_flags.bitfield.cpuavx512dq
9208               && !i.tm.cpu_flags.bitfield.cpulwp
9209               && !i.tm.cpu_flags.bitfield.cputbm
9210               && !(x86_feature_2_used & GNU_PROPERTY_X86_FEATURE_2_TMM))
9211           || i.tm.cpu_flags.bitfield.cpuf16c
9212           || i.tm.cpu_flags.bitfield.cpufma
9213           || i.tm.cpu_flags.bitfield.cpulzcnt
9214           || i.tm.cpu_flags.bitfield.cpumovbe
9215           || i.tm.cpu_flags.bitfield.cpuxsave
9216           || i.tm.cpu_flags.bitfield.cpuxsavec
9217           || i.tm.cpu_flags.bitfield.cpuxsaveopt
9218           || i.tm.cpu_flags.bitfield.cpuxsaves)
9219         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V3;
9220       if (i.tm.cpu_flags.bitfield.cpuavx512f
9221           || i.tm.cpu_flags.bitfield.cpuavx512bw
9222           || i.tm.cpu_flags.bitfield.cpuavx512dq
9223           || i.tm.cpu_flags.bitfield.cpuavx512vl
9224           /* Any EVEX encoded insns except for AVX512ER, AVX512PF and
9225              VNNIW.  */
9226           || (i.tm.opcode_modifier.evex
9227               && !i.tm.cpu_flags.bitfield.cpuavx512er
9228               && !i.tm.cpu_flags.bitfield.cpuavx512pf
9229               && !i.tm.cpu_flags.bitfield.cpuavx512_4vnniw))
9230         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V4;
9231
9232       if (i.tm.cpu_flags.bitfield.cpu8087
9233           || i.tm.cpu_flags.bitfield.cpu287
9234           || i.tm.cpu_flags.bitfield.cpu387
9235           || i.tm.cpu_flags.bitfield.cpu687
9236           || i.tm.cpu_flags.bitfield.cpufisttp)
9237         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87;
9238       if ((i.xstate & xstate_mmx)
9239           || i.tm.base_opcode == 0xf77 /* emms */
9240           || i.tm.base_opcode == 0xf0e /* femms */)
9241         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MMX;
9242       if (i.index_reg)
9243         {
9244           if (i.index_reg->reg_type.bitfield.zmmword)
9245             i.xstate |= xstate_zmm;
9246           else if (i.index_reg->reg_type.bitfield.ymmword)
9247             i.xstate |= xstate_ymm;
9248           else if (i.index_reg->reg_type.bitfield.xmmword)
9249             i.xstate |= xstate_xmm;
9250         }
9251       if ((i.xstate & xstate_xmm)
9252           || i.tm.cpu_flags.bitfield.cpuwidekl
9253           || i.tm.cpu_flags.bitfield.cpukl)
9254         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM;
9255       if ((i.xstate & xstate_ymm) == xstate_ymm)
9256         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_YMM;
9257       if ((i.xstate & xstate_zmm) == xstate_zmm)
9258         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_ZMM;
9259       if (i.mask || (i.xstate & xstate_mask) == xstate_mask)
9260         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MASK;
9261       if (i.tm.cpu_flags.bitfield.cpufxsr)
9262         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_FXSR;
9263       if (i.tm.cpu_flags.bitfield.cpuxsave)
9264         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVE;
9265       if (i.tm.cpu_flags.bitfield.cpuxsaveopt)
9266         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT;
9267       if (i.tm.cpu_flags.bitfield.cpuxsavec)
9268         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEC;
9269     }
9270 #endif
9271
9272   /* Tie dwarf2 debug info to the address at the start of the insn.
9273      We can't do this after the insn has been output as the current
9274      frag may have been closed off.  eg. by frag_var.  */
9275   dwarf2_emit_insn (0);
9276
9277   insn_start_frag = frag_now;
9278   insn_start_off = frag_now_fix ();
9279
9280   if (add_branch_padding_frag_p (&branch, &mf_jcc))
9281     {
9282       char *p;
9283       /* Branch can be 8 bytes.  Leave some room for prefixes.  */
9284       unsigned int max_branch_padding_size = 14;
9285
9286       /* Align section to boundary.  */
9287       record_alignment (now_seg, align_branch_power);
9288
9289       /* Make room for padding.  */
9290       frag_grow (max_branch_padding_size);
9291
9292       /* Start of the padding.  */
9293       p = frag_more (0);
9294
9295       fragP = frag_now;
9296
9297       frag_var (rs_machine_dependent, max_branch_padding_size, 0,
9298                 ENCODE_RELAX_STATE (BRANCH_PADDING, 0),
9299                 NULL, 0, p);
9300
9301       fragP->tc_frag_data.mf_type = mf_jcc;
9302       fragP->tc_frag_data.branch_type = branch;
9303       fragP->tc_frag_data.max_bytes = max_branch_padding_size;
9304     }
9305
9306   /* Output jumps.  */
9307   if (i.tm.opcode_modifier.jump == JUMP)
9308     output_branch ();
9309   else if (i.tm.opcode_modifier.jump == JUMP_BYTE
9310            || i.tm.opcode_modifier.jump == JUMP_DWORD)
9311     output_jump ();
9312   else if (i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT)
9313     output_interseg_jump ();
9314   else
9315     {
9316       /* Output normal instructions here.  */
9317       char *p;
9318       unsigned char *q;
9319       unsigned int j;
9320       enum mf_cmp_kind mf_cmp;
9321
9322       if (avoid_fence
9323           && (i.tm.base_opcode == 0xfaee8
9324               || i.tm.base_opcode == 0xfaef0
9325               || i.tm.base_opcode == 0xfaef8))
9326         {
9327           /* Encode lfence, mfence, and sfence as
9328              f0 83 04 24 00   lock addl $0x0, (%{re}sp).  */
9329           if (now_seg != absolute_section)
9330             {
9331               offsetT val = 0x240483f0ULL;
9332
9333               p = frag_more (5);
9334               md_number_to_chars (p, val, 5);
9335             }
9336           else
9337             abs_section_offset += 5;
9338           return;
9339         }
9340
9341       /* Some processors fail on LOCK prefix. This options makes
9342          assembler ignore LOCK prefix and serves as a workaround.  */
9343       if (omit_lock_prefix)
9344         {
9345           if (i.tm.base_opcode == LOCK_PREFIX_OPCODE)
9346             return;
9347           i.prefix[LOCK_PREFIX] = 0;
9348         }
9349
9350       if (branch)
9351         /* Skip if this is a branch.  */
9352         ;
9353       else if (add_fused_jcc_padding_frag_p (&mf_cmp))
9354         {
9355           /* Make room for padding.  */
9356           frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
9357           p = frag_more (0);
9358
9359           fragP = frag_now;
9360
9361           frag_var (rs_machine_dependent, MAX_FUSED_JCC_PADDING_SIZE, 0,
9362                     ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0),
9363                     NULL, 0, p);
9364
9365           fragP->tc_frag_data.mf_type = mf_cmp;
9366           fragP->tc_frag_data.branch_type = align_branch_fused;
9367           fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
9368         }
9369       else if (add_branch_prefix_frag_p ())
9370         {
9371           unsigned int max_prefix_size = align_branch_prefix_size;
9372
9373           /* Make room for padding.  */
9374           frag_grow (max_prefix_size);
9375           p = frag_more (0);
9376
9377           fragP = frag_now;
9378
9379           frag_var (rs_machine_dependent, max_prefix_size, 0,
9380                     ENCODE_RELAX_STATE (BRANCH_PREFIX, 0),
9381                     NULL, 0, p);
9382
9383           fragP->tc_frag_data.max_bytes = max_prefix_size;
9384         }
9385
9386       /* Since the VEX/EVEX prefix contains the implicit prefix, we
9387          don't need the explicit prefix.  */
9388       if (!i.tm.opcode_modifier.vex && !i.tm.opcode_modifier.evex)
9389         {
9390           switch (i.tm.opcode_modifier.opcodeprefix)
9391             {
9392             case PREFIX_0X66:
9393               add_prefix (0x66);
9394               break;
9395             case PREFIX_0XF2:
9396               add_prefix (0xf2);
9397               break;
9398             case PREFIX_0XF3:
9399               if (!i.tm.cpu_flags.bitfield.cpupadlock
9400                   || (i.prefix[REP_PREFIX] != 0xf3))
9401                 add_prefix (0xf3);
9402               break;
9403             case PREFIX_NONE:
9404               switch (i.tm.opcode_length)
9405                 {
9406                 case 3:
9407                 case 2:
9408                 case 1:
9409                   break;
9410                 case 0:
9411                   /* Check for pseudo prefixes.  */
9412                   as_bad_where (insn_start_frag->fr_file,
9413                                 insn_start_frag->fr_line,
9414                                 _("pseudo prefix without instruction"));
9415                   return;
9416                 default:
9417                   abort ();
9418                 }
9419               break;
9420             default:
9421               abort ();
9422             }
9423
9424 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
9425           /* For x32, add a dummy REX_OPCODE prefix for mov/add with
9426              R_X86_64_GOTTPOFF relocation so that linker can safely
9427              perform IE->LE optimization.  A dummy REX_OPCODE prefix
9428              is also needed for lea with R_X86_64_GOTPC32_TLSDESC
9429              relocation for GDesc -> IE/LE optimization.  */
9430           if (x86_elf_abi == X86_64_X32_ABI
9431               && i.operands == 2
9432               && (i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
9433                   || i.reloc[0] == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
9434               && i.prefix[REX_PREFIX] == 0)
9435             add_prefix (REX_OPCODE);
9436 #endif
9437
9438           /* The prefix bytes.  */
9439           for (j = ARRAY_SIZE (i.prefix), q = i.prefix; j > 0; j--, q++)
9440             if (*q)
9441               frag_opcode_byte (*q);
9442         }
9443       else
9444         {
9445           for (j = 0, q = i.prefix; j < ARRAY_SIZE (i.prefix); j++, q++)
9446             if (*q)
9447               switch (j)
9448                 {
9449                 case SEG_PREFIX:
9450                 case ADDR_PREFIX:
9451                   frag_opcode_byte (*q);
9452                   break;
9453                 default:
9454                   /* There should be no other prefixes for instructions
9455                      with VEX prefix.  */
9456                   abort ();
9457                 }
9458
9459           /* For EVEX instructions i.vrex should become 0 after
9460              build_evex_prefix.  For VEX instructions upper 16 registers
9461              aren't available, so VREX should be 0.  */
9462           if (i.vrex)
9463             abort ();
9464           /* Now the VEX prefix.  */
9465           if (now_seg != absolute_section)
9466             {
9467               p = frag_more (i.vex.length);
9468               for (j = 0; j < i.vex.length; j++)
9469                 p[j] = i.vex.bytes[j];
9470             }
9471           else
9472             abs_section_offset += i.vex.length;
9473         }
9474
9475       /* Now the opcode; be careful about word order here!  */
9476       if (now_seg == absolute_section)
9477         abs_section_offset += i.tm.opcode_length;
9478       else if (i.tm.opcode_length == 1)
9479         {
9480           FRAG_APPEND_1_CHAR (i.tm.base_opcode);
9481         }
9482       else
9483         {
9484           switch (i.tm.opcode_length)
9485             {
9486             case 4:
9487               p = frag_more (4);
9488               *p++ = (i.tm.base_opcode >> 24) & 0xff;
9489               *p++ = (i.tm.base_opcode >> 16) & 0xff;
9490               break;
9491             case 3:
9492               p = frag_more (3);
9493               *p++ = (i.tm.base_opcode >> 16) & 0xff;
9494               break;
9495             case 2:
9496               p = frag_more (2);
9497               break;
9498             default:
9499               abort ();
9500               break;
9501             }
9502
9503           /* Put out high byte first: can't use md_number_to_chars!  */
9504           *p++ = (i.tm.base_opcode >> 8) & 0xff;
9505           *p = i.tm.base_opcode & 0xff;
9506         }
9507
9508       /* Now the modrm byte and sib byte (if present).  */
9509       if (i.tm.opcode_modifier.modrm)
9510         {
9511           frag_opcode_byte ((i.rm.regmem << 0)
9512                              | (i.rm.reg << 3)
9513                              | (i.rm.mode << 6));
9514           /* If i.rm.regmem == ESP (4)
9515              && i.rm.mode != (Register mode)
9516              && not 16 bit
9517              ==> need second modrm byte.  */
9518           if (i.rm.regmem == ESCAPE_TO_TWO_BYTE_ADDRESSING
9519               && i.rm.mode != 3
9520               && !(i.base_reg && i.base_reg->reg_type.bitfield.word))
9521             frag_opcode_byte ((i.sib.base << 0)
9522                               | (i.sib.index << 3)
9523                               | (i.sib.scale << 6));
9524         }
9525
9526       if (i.disp_operands)
9527         output_disp (insn_start_frag, insn_start_off);
9528
9529       if (i.imm_operands)
9530         output_imm (insn_start_frag, insn_start_off);
9531
9532       /*
9533        * frag_now_fix () returning plain abs_section_offset when we're in the
9534        * absolute section, and abs_section_offset not getting updated as data
9535        * gets added to the frag breaks the logic below.
9536        */
9537       if (now_seg != absolute_section)
9538         {
9539           j = encoding_length (insn_start_frag, insn_start_off, frag_more (0));
9540           if (j > 15)
9541             as_warn (_("instruction length of %u bytes exceeds the limit of 15"),
9542                      j);
9543           else if (fragP)
9544             {
9545               /* NB: Don't add prefix with GOTPC relocation since
9546                  output_disp() above depends on the fixed encoding
9547                  length.  Can't add prefix with TLS relocation since
9548                  it breaks TLS linker optimization.  */
9549               unsigned int max = i.has_gotpc_tls_reloc ? 0 : 15 - j;
9550               /* Prefix count on the current instruction.  */
9551               unsigned int count = i.vex.length;
9552               unsigned int k;
9553               for (k = 0; k < ARRAY_SIZE (i.prefix); k++)
9554                 /* REX byte is encoded in VEX/EVEX prefix.  */
9555                 if (i.prefix[k] && (k != REX_PREFIX || !i.vex.length))
9556                   count++;
9557
9558               /* Count prefixes for extended opcode maps.  */
9559               if (!i.vex.length)
9560                 switch (i.tm.opcode_length)
9561                   {
9562                   case 3:
9563                     if (((i.tm.base_opcode >> 16) & 0xff) == 0xf)
9564                       {
9565                         count++;
9566                         switch ((i.tm.base_opcode >> 8) & 0xff)
9567                           {
9568                           case 0x38:
9569                           case 0x3a:
9570                             count++;
9571                             break;
9572                           default:
9573                             break;
9574                           }
9575                       }
9576                     break;
9577                   case 2:
9578                     if (((i.tm.base_opcode >> 8) & 0xff) == 0xf)
9579                       count++;
9580                     break;
9581                   case 1:
9582                     break;
9583                   default:
9584                     abort ();
9585                   }
9586
9587               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
9588                   == BRANCH_PREFIX)
9589                 {
9590                   /* Set the maximum prefix size in BRANCH_PREFIX
9591                      frag.  */
9592                   if (fragP->tc_frag_data.max_bytes > max)
9593                     fragP->tc_frag_data.max_bytes = max;
9594                   if (fragP->tc_frag_data.max_bytes > count)
9595                     fragP->tc_frag_data.max_bytes -= count;
9596                   else
9597                     fragP->tc_frag_data.max_bytes = 0;
9598                 }
9599               else
9600                 {
9601                   /* Remember the maximum prefix size in FUSED_JCC_PADDING
9602                      frag.  */
9603                   unsigned int max_prefix_size;
9604                   if (align_branch_prefix_size > max)
9605                     max_prefix_size = max;
9606                   else
9607                     max_prefix_size = align_branch_prefix_size;
9608                   if (max_prefix_size > count)
9609                     fragP->tc_frag_data.max_prefix_length
9610                       = max_prefix_size - count;
9611                 }
9612
9613               /* Use existing segment prefix if possible.  Use CS
9614                  segment prefix in 64-bit mode.  In 32-bit mode, use SS
9615                  segment prefix with ESP/EBP base register and use DS
9616                  segment prefix without ESP/EBP base register.  */
9617               if (i.prefix[SEG_PREFIX])
9618                 fragP->tc_frag_data.default_prefix = i.prefix[SEG_PREFIX];
9619               else if (flag_code == CODE_64BIT)
9620                 fragP->tc_frag_data.default_prefix = CS_PREFIX_OPCODE;
9621               else if (i.base_reg
9622                        && (i.base_reg->reg_num == 4
9623                            || i.base_reg->reg_num == 5))
9624                 fragP->tc_frag_data.default_prefix = SS_PREFIX_OPCODE;
9625               else
9626                 fragP->tc_frag_data.default_prefix = DS_PREFIX_OPCODE;
9627             }
9628         }
9629     }
9630
9631   /* NB: Don't work with COND_JUMP86 without i386.  */
9632   if (align_branch_power
9633       && now_seg != absolute_section
9634       && cpu_arch_flags.bitfield.cpui386)
9635     {
9636       /* Terminate each frag so that we can add prefix and check for
9637          fused jcc.  */
9638       frag_wane (frag_now);
9639       frag_new (0);
9640     }
9641
9642 #ifdef DEBUG386
9643   if (flag_debug)
9644     {
9645       pi ("" /*line*/, &i);
9646     }
9647 #endif /* DEBUG386  */
9648 }
9649
9650 /* Return the size of the displacement operand N.  */
9651
9652 static int
9653 disp_size (unsigned int n)
9654 {
9655   int size = 4;
9656
9657   if (i.types[n].bitfield.disp64)
9658     size = 8;
9659   else if (i.types[n].bitfield.disp8)
9660     size = 1;
9661   else if (i.types[n].bitfield.disp16)
9662     size = 2;
9663   return size;
9664 }
9665
9666 /* Return the size of the immediate operand N.  */
9667
9668 static int
9669 imm_size (unsigned int n)
9670 {
9671   int size = 4;
9672   if (i.types[n].bitfield.imm64)
9673     size = 8;
9674   else if (i.types[n].bitfield.imm8 || i.types[n].bitfield.imm8s)
9675     size = 1;
9676   else if (i.types[n].bitfield.imm16)
9677     size = 2;
9678   return size;
9679 }
9680
9681 static void
9682 output_disp (fragS *insn_start_frag, offsetT insn_start_off)
9683 {
9684   char *p;
9685   unsigned int n;
9686
9687   for (n = 0; n < i.operands; n++)
9688     {
9689       if (operand_type_check (i.types[n], disp))
9690         {
9691           int size = disp_size (n);
9692
9693           if (now_seg == absolute_section)
9694             abs_section_offset += size;
9695           else if (i.op[n].disps->X_op == O_constant)
9696             {
9697               offsetT val = i.op[n].disps->X_add_number;
9698
9699               val = offset_in_range (val >> (size == 1 ? i.memshift : 0),
9700                                      size);
9701               p = frag_more (size);
9702               md_number_to_chars (p, val, size);
9703             }
9704           else
9705             {
9706               enum bfd_reloc_code_real reloc_type;
9707               int sign = i.types[n].bitfield.disp32s;
9708               int pcrel = (i.flags[n] & Operand_PCrel) != 0;
9709               fixS *fixP;
9710
9711               /* We can't have 8 bit displacement here.  */
9712               gas_assert (!i.types[n].bitfield.disp8);
9713
9714               /* The PC relative address is computed relative
9715                  to the instruction boundary, so in case immediate
9716                  fields follows, we need to adjust the value.  */
9717               if (pcrel && i.imm_operands)
9718                 {
9719                   unsigned int n1;
9720                   int sz = 0;
9721
9722                   for (n1 = 0; n1 < i.operands; n1++)
9723                     if (operand_type_check (i.types[n1], imm))
9724                       {
9725                         /* Only one immediate is allowed for PC
9726                            relative address.  */
9727                         gas_assert (sz == 0);
9728                         sz = imm_size (n1);
9729                         i.op[n].disps->X_add_number -= sz;
9730                       }
9731                   /* We should find the immediate.  */
9732                   gas_assert (sz != 0);
9733                 }
9734
9735               p = frag_more (size);
9736               reloc_type = reloc (size, pcrel, sign, i.reloc[n]);
9737               if (GOT_symbol
9738                   && GOT_symbol == i.op[n].disps->X_add_symbol
9739                   && (((reloc_type == BFD_RELOC_32
9740                         || reloc_type == BFD_RELOC_X86_64_32S
9741                         || (reloc_type == BFD_RELOC_64
9742                             && object_64bit))
9743                        && (i.op[n].disps->X_op == O_symbol
9744                            || (i.op[n].disps->X_op == O_add
9745                                && ((symbol_get_value_expression
9746                                     (i.op[n].disps->X_op_symbol)->X_op)
9747                                    == O_subtract))))
9748                       || reloc_type == BFD_RELOC_32_PCREL))
9749                 {
9750                   if (!object_64bit)
9751                     {
9752                       reloc_type = BFD_RELOC_386_GOTPC;
9753                       i.has_gotpc_tls_reloc = TRUE;
9754                       i.op[n].imms->X_add_number +=
9755                         encoding_length (insn_start_frag, insn_start_off, p);
9756                     }
9757                   else if (reloc_type == BFD_RELOC_64)
9758                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
9759                   else
9760                     /* Don't do the adjustment for x86-64, as there
9761                        the pcrel addressing is relative to the _next_
9762                        insn, and that is taken care of in other code.  */
9763                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
9764                 }
9765               else if (align_branch_power)
9766                 {
9767                   switch (reloc_type)
9768                     {
9769                     case BFD_RELOC_386_TLS_GD:
9770                     case BFD_RELOC_386_TLS_LDM:
9771                     case BFD_RELOC_386_TLS_IE:
9772                     case BFD_RELOC_386_TLS_IE_32:
9773                     case BFD_RELOC_386_TLS_GOTIE:
9774                     case BFD_RELOC_386_TLS_GOTDESC:
9775                     case BFD_RELOC_386_TLS_DESC_CALL:
9776                     case BFD_RELOC_X86_64_TLSGD:
9777                     case BFD_RELOC_X86_64_TLSLD:
9778                     case BFD_RELOC_X86_64_GOTTPOFF:
9779                     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
9780                     case BFD_RELOC_X86_64_TLSDESC_CALL:
9781                       i.has_gotpc_tls_reloc = TRUE;
9782                     default:
9783                       break;
9784                     }
9785                 }
9786               fixP = fix_new_exp (frag_now, p - frag_now->fr_literal,
9787                                   size, i.op[n].disps, pcrel,
9788                                   reloc_type);
9789               /* Check for "call/jmp *mem", "mov mem, %reg",
9790                  "test %reg, mem" and "binop mem, %reg" where binop
9791                  is one of adc, add, and, cmp, or, sbb, sub, xor
9792                  instructions without data prefix.  Always generate
9793                  R_386_GOT32X for "sym*GOT" operand in 32-bit mode.  */
9794               if (i.prefix[DATA_PREFIX] == 0
9795                   && (generate_relax_relocations
9796                       || (!object_64bit
9797                           && i.rm.mode == 0
9798                           && i.rm.regmem == 5))
9799                   && (i.rm.mode == 2
9800                       || (i.rm.mode == 0 && i.rm.regmem == 5))
9801                   && !is_any_vex_encoding(&i.tm)
9802                   && ((i.operands == 1
9803                        && i.tm.base_opcode == 0xff
9804                        && (i.rm.reg == 2 || i.rm.reg == 4))
9805                       || (i.operands == 2
9806                           && (i.tm.base_opcode == 0x8b
9807                               || i.tm.base_opcode == 0x85
9808                               || (i.tm.base_opcode & ~0x38) == 0x03))))
9809                 {
9810                   if (object_64bit)
9811                     {
9812                       fixP->fx_tcbit = i.rex != 0;
9813                       if (i.base_reg
9814                           && (i.base_reg->reg_num == RegIP))
9815                       fixP->fx_tcbit2 = 1;
9816                     }
9817                   else
9818                     fixP->fx_tcbit2 = 1;
9819                 }
9820             }
9821         }
9822     }
9823 }
9824
9825 static void
9826 output_imm (fragS *insn_start_frag, offsetT insn_start_off)
9827 {
9828   char *p;
9829   unsigned int n;
9830
9831   for (n = 0; n < i.operands; n++)
9832     {
9833       /* Skip SAE/RC Imm operand in EVEX.  They are already handled.  */
9834       if (i.rounding && (int) n == i.rounding->operand)
9835         continue;
9836
9837       if (operand_type_check (i.types[n], imm))
9838         {
9839           int size = imm_size (n);
9840
9841           if (now_seg == absolute_section)
9842             abs_section_offset += size;
9843           else if (i.op[n].imms->X_op == O_constant)
9844             {
9845               offsetT val;
9846
9847               val = offset_in_range (i.op[n].imms->X_add_number,
9848                                      size);
9849               p = frag_more (size);
9850               md_number_to_chars (p, val, size);
9851             }
9852           else
9853             {
9854               /* Not absolute_section.
9855                  Need a 32-bit fixup (don't support 8bit
9856                  non-absolute imms).  Try to support other
9857                  sizes ...  */
9858               enum bfd_reloc_code_real reloc_type;
9859               int sign;
9860
9861               if (i.types[n].bitfield.imm32s
9862                   && (i.suffix == QWORD_MNEM_SUFFIX
9863                       || (!i.suffix && i.tm.opcode_modifier.no_lsuf)))
9864                 sign = 1;
9865               else
9866                 sign = 0;
9867
9868               p = frag_more (size);
9869               reloc_type = reloc (size, 0, sign, i.reloc[n]);
9870
9871               /*   This is tough to explain.  We end up with this one if we
9872                * have operands that look like
9873                * "_GLOBAL_OFFSET_TABLE_+[.-.L284]".  The goal here is to
9874                * obtain the absolute address of the GOT, and it is strongly
9875                * preferable from a performance point of view to avoid using
9876                * a runtime relocation for this.  The actual sequence of
9877                * instructions often look something like:
9878                *
9879                *        call    .L66
9880                * .L66:
9881                *        popl    %ebx
9882                *        addl    $_GLOBAL_OFFSET_TABLE_+[.-.L66],%ebx
9883                *
9884                *   The call and pop essentially return the absolute address
9885                * of the label .L66 and store it in %ebx.  The linker itself
9886                * will ultimately change the first operand of the addl so
9887                * that %ebx points to the GOT, but to keep things simple, the
9888                * .o file must have this operand set so that it generates not
9889                * the absolute address of .L66, but the absolute address of
9890                * itself.  This allows the linker itself simply treat a GOTPC
9891                * relocation as asking for a pcrel offset to the GOT to be
9892                * added in, and the addend of the relocation is stored in the
9893                * operand field for the instruction itself.
9894                *
9895                *   Our job here is to fix the operand so that it would add
9896                * the correct offset so that %ebx would point to itself.  The
9897                * thing that is tricky is that .-.L66 will point to the
9898                * beginning of the instruction, so we need to further modify
9899                * the operand so that it will point to itself.  There are
9900                * other cases where you have something like:
9901                *
9902                *        .long   $_GLOBAL_OFFSET_TABLE_+[.-.L66]
9903                *
9904                * and here no correction would be required.  Internally in
9905                * the assembler we treat operands of this form as not being
9906                * pcrel since the '.' is explicitly mentioned, and I wonder
9907                * whether it would simplify matters to do it this way.  Who
9908                * knows.  In earlier versions of the PIC patches, the
9909                * pcrel_adjust field was used to store the correction, but
9910                * since the expression is not pcrel, I felt it would be
9911                * confusing to do it this way.  */
9912
9913               if ((reloc_type == BFD_RELOC_32
9914                    || reloc_type == BFD_RELOC_X86_64_32S
9915                    || reloc_type == BFD_RELOC_64)
9916                   && GOT_symbol
9917                   && GOT_symbol == i.op[n].imms->X_add_symbol
9918                   && (i.op[n].imms->X_op == O_symbol
9919                       || (i.op[n].imms->X_op == O_add
9920                           && ((symbol_get_value_expression
9921                                (i.op[n].imms->X_op_symbol)->X_op)
9922                               == O_subtract))))
9923                 {
9924                   if (!object_64bit)
9925                     reloc_type = BFD_RELOC_386_GOTPC;
9926                   else if (size == 4)
9927                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
9928                   else if (size == 8)
9929                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
9930                   i.has_gotpc_tls_reloc = TRUE;
9931                   i.op[n].imms->X_add_number +=
9932                     encoding_length (insn_start_frag, insn_start_off, p);
9933                 }
9934               fix_new_exp (frag_now, p - frag_now->fr_literal, size,
9935                            i.op[n].imms, 0, reloc_type);
9936             }
9937         }
9938     }
9939 }
9940 \f
9941 /* x86_cons_fix_new is called via the expression parsing code when a
9942    reloc is needed.  We use this hook to get the correct .got reloc.  */
9943 static int cons_sign = -1;
9944
9945 void
9946 x86_cons_fix_new (fragS *frag, unsigned int off, unsigned int len,
9947                   expressionS *exp, bfd_reloc_code_real_type r)
9948 {
9949   r = reloc (len, 0, cons_sign, r);
9950
9951 #ifdef TE_PE
9952   if (exp->X_op == O_secrel)
9953     {
9954       exp->X_op = O_symbol;
9955       r = BFD_RELOC_32_SECREL;
9956     }
9957 #endif
9958
9959   fix_new_exp (frag, off, len, exp, 0, r);
9960 }
9961
9962 /* Export the ABI address size for use by TC_ADDRESS_BYTES for the
9963    purpose of the `.dc.a' internal pseudo-op.  */
9964
9965 int
9966 x86_address_bytes (void)
9967 {
9968   if ((stdoutput->arch_info->mach & bfd_mach_x64_32))
9969     return 4;
9970   return stdoutput->arch_info->bits_per_address / 8;
9971 }
9972
9973 #if !(defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) || defined (OBJ_MACH_O)) \
9974     || defined (LEX_AT)
9975 # define lex_got(reloc, adjust, types) NULL
9976 #else
9977 /* Parse operands of the form
9978    <symbol>@GOTOFF+<nnn>
9979    and similar .plt or .got references.
9980
9981    If we find one, set up the correct relocation in RELOC and copy the
9982    input string, minus the `@GOTOFF' into a malloc'd buffer for
9983    parsing by the calling routine.  Return this buffer, and if ADJUST
9984    is non-null set it to the length of the string we removed from the
9985    input line.  Otherwise return NULL.  */
9986 static char *
9987 lex_got (enum bfd_reloc_code_real *rel,
9988          int *adjust,
9989          i386_operand_type *types)
9990 {
9991   /* Some of the relocations depend on the size of what field is to
9992      be relocated.  But in our callers i386_immediate and i386_displacement
9993      we don't yet know the operand size (this will be set by insn
9994      matching).  Hence we record the word32 relocation here,
9995      and adjust the reloc according to the real size in reloc().  */
9996   static const struct {
9997     const char *str;
9998     int len;
9999     const enum bfd_reloc_code_real rel[2];
10000     const i386_operand_type types64;
10001   } gotrel[] = {
10002 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
10003     { STRING_COMMA_LEN ("SIZE"),      { BFD_RELOC_SIZE32,
10004                                         BFD_RELOC_SIZE32 },
10005       OPERAND_TYPE_IMM32_64 },
10006 #endif
10007     { STRING_COMMA_LEN ("PLTOFF"),   { _dummy_first_bfd_reloc_code_real,
10008                                        BFD_RELOC_X86_64_PLTOFF64 },
10009       OPERAND_TYPE_IMM64 },
10010     { STRING_COMMA_LEN ("PLT"),      { BFD_RELOC_386_PLT32,
10011                                        BFD_RELOC_X86_64_PLT32    },
10012       OPERAND_TYPE_IMM32_32S_DISP32 },
10013     { STRING_COMMA_LEN ("GOTPLT"),   { _dummy_first_bfd_reloc_code_real,
10014                                        BFD_RELOC_X86_64_GOTPLT64 },
10015       OPERAND_TYPE_IMM64_DISP64 },
10016     { STRING_COMMA_LEN ("GOTOFF"),   { BFD_RELOC_386_GOTOFF,
10017                                        BFD_RELOC_X86_64_GOTOFF64 },
10018       OPERAND_TYPE_IMM64_DISP64 },
10019     { STRING_COMMA_LEN ("GOTPCREL"), { _dummy_first_bfd_reloc_code_real,
10020                                        BFD_RELOC_X86_64_GOTPCREL },
10021       OPERAND_TYPE_IMM32_32S_DISP32 },
10022     { STRING_COMMA_LEN ("TLSGD"),    { BFD_RELOC_386_TLS_GD,
10023                                        BFD_RELOC_X86_64_TLSGD    },
10024       OPERAND_TYPE_IMM32_32S_DISP32 },
10025     { STRING_COMMA_LEN ("TLSLDM"),   { BFD_RELOC_386_TLS_LDM,
10026                                        _dummy_first_bfd_reloc_code_real },
10027       OPERAND_TYPE_NONE },
10028     { STRING_COMMA_LEN ("TLSLD"),    { _dummy_first_bfd_reloc_code_real,
10029                                        BFD_RELOC_X86_64_TLSLD    },
10030       OPERAND_TYPE_IMM32_32S_DISP32 },
10031     { STRING_COMMA_LEN ("GOTTPOFF"), { BFD_RELOC_386_TLS_IE_32,
10032                                        BFD_RELOC_X86_64_GOTTPOFF },
10033       OPERAND_TYPE_IMM32_32S_DISP32 },
10034     { STRING_COMMA_LEN ("TPOFF"),    { BFD_RELOC_386_TLS_LE_32,
10035                                        BFD_RELOC_X86_64_TPOFF32  },
10036       OPERAND_TYPE_IMM32_32S_64_DISP32_64 },
10037     { STRING_COMMA_LEN ("NTPOFF"),   { BFD_RELOC_386_TLS_LE,
10038                                        _dummy_first_bfd_reloc_code_real },
10039       OPERAND_TYPE_NONE },
10040     { STRING_COMMA_LEN ("DTPOFF"),   { BFD_RELOC_386_TLS_LDO_32,
10041                                        BFD_RELOC_X86_64_DTPOFF32 },
10042       OPERAND_TYPE_IMM32_32S_64_DISP32_64 },
10043     { STRING_COMMA_LEN ("GOTNTPOFF"),{ BFD_RELOC_386_TLS_GOTIE,
10044                                        _dummy_first_bfd_reloc_code_real },
10045       OPERAND_TYPE_NONE },
10046     { STRING_COMMA_LEN ("INDNTPOFF"),{ BFD_RELOC_386_TLS_IE,
10047                                        _dummy_first_bfd_reloc_code_real },
10048       OPERAND_TYPE_NONE },
10049     { STRING_COMMA_LEN ("GOT"),      { BFD_RELOC_386_GOT32,
10050                                        BFD_RELOC_X86_64_GOT32    },
10051       OPERAND_TYPE_IMM32_32S_64_DISP32 },
10052     { STRING_COMMA_LEN ("TLSDESC"),  { BFD_RELOC_386_TLS_GOTDESC,
10053                                        BFD_RELOC_X86_64_GOTPC32_TLSDESC },
10054       OPERAND_TYPE_IMM32_32S_DISP32 },
10055     { STRING_COMMA_LEN ("TLSCALL"),  { BFD_RELOC_386_TLS_DESC_CALL,
10056                                        BFD_RELOC_X86_64_TLSDESC_CALL },
10057       OPERAND_TYPE_IMM32_32S_DISP32 },
10058   };
10059   char *cp;
10060   unsigned int j;
10061
10062 #if defined (OBJ_MAYBE_ELF)
10063   if (!IS_ELF)
10064     return NULL;
10065 #endif
10066
10067   for (cp = input_line_pointer; *cp != '@'; cp++)
10068     if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
10069       return NULL;
10070
10071   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
10072     {
10073       int len = gotrel[j].len;
10074       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
10075         {
10076           if (gotrel[j].rel[object_64bit] != 0)
10077             {
10078               int first, second;
10079               char *tmpbuf, *past_reloc;
10080
10081               *rel = gotrel[j].rel[object_64bit];
10082
10083               if (types)
10084                 {
10085                   if (flag_code != CODE_64BIT)
10086                     {
10087                       types->bitfield.imm32 = 1;
10088                       types->bitfield.disp32 = 1;
10089                     }
10090                   else
10091                     *types = gotrel[j].types64;
10092                 }
10093
10094               if (j != 0 && GOT_symbol == NULL)
10095                 GOT_symbol = symbol_find_or_make (GLOBAL_OFFSET_TABLE_NAME);
10096
10097               /* The length of the first part of our input line.  */
10098               first = cp - input_line_pointer;
10099
10100               /* The second part goes from after the reloc token until
10101                  (and including) an end_of_line char or comma.  */
10102               past_reloc = cp + 1 + len;
10103               cp = past_reloc;
10104               while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
10105                 ++cp;
10106               second = cp + 1 - past_reloc;
10107
10108               /* Allocate and copy string.  The trailing NUL shouldn't
10109                  be necessary, but be safe.  */
10110               tmpbuf = XNEWVEC (char, first + second + 2);
10111               memcpy (tmpbuf, input_line_pointer, first);
10112               if (second != 0 && *past_reloc != ' ')
10113                 /* Replace the relocation token with ' ', so that
10114                    errors like foo@GOTOFF1 will be detected.  */
10115                 tmpbuf[first++] = ' ';
10116               else
10117                 /* Increment length by 1 if the relocation token is
10118                    removed.  */
10119                 len++;
10120               if (adjust)
10121                 *adjust = len;
10122               memcpy (tmpbuf + first, past_reloc, second);
10123               tmpbuf[first + second] = '\0';
10124               return tmpbuf;
10125             }
10126
10127           as_bad (_("@%s reloc is not supported with %d-bit output format"),
10128                   gotrel[j].str, 1 << (5 + object_64bit));
10129           return NULL;
10130         }
10131     }
10132
10133   /* Might be a symbol version string.  Don't as_bad here.  */
10134   return NULL;
10135 }
10136 #endif
10137
10138 #ifdef TE_PE
10139 #ifdef lex_got
10140 #undef lex_got
10141 #endif
10142 /* Parse operands of the form
10143    <symbol>@SECREL32+<nnn>
10144
10145    If we find one, set up the correct relocation in RELOC and copy the
10146    input string, minus the `@SECREL32' into a malloc'd buffer for
10147    parsing by the calling routine.  Return this buffer, and if ADJUST
10148    is non-null set it to the length of the string we removed from the
10149    input line.  Otherwise return NULL.
10150
10151    This function is copied from the ELF version above adjusted for PE targets.  */
10152
10153 static char *
10154 lex_got (enum bfd_reloc_code_real *rel ATTRIBUTE_UNUSED,
10155          int *adjust ATTRIBUTE_UNUSED,
10156          i386_operand_type *types)
10157 {
10158   static const struct
10159   {
10160     const char *str;
10161     int len;
10162     const enum bfd_reloc_code_real rel[2];
10163     const i386_operand_type types64;
10164   }
10165   gotrel[] =
10166   {
10167     { STRING_COMMA_LEN ("SECREL32"),    { BFD_RELOC_32_SECREL,
10168                                           BFD_RELOC_32_SECREL },
10169       OPERAND_TYPE_IMM32_32S_64_DISP32_64 },
10170   };
10171
10172   char *cp;
10173   unsigned j;
10174
10175   for (cp = input_line_pointer; *cp != '@'; cp++)
10176     if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
10177       return NULL;
10178
10179   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
10180     {
10181       int len = gotrel[j].len;
10182
10183       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
10184         {
10185           if (gotrel[j].rel[object_64bit] != 0)
10186             {
10187               int first, second;
10188               char *tmpbuf, *past_reloc;
10189
10190               *rel = gotrel[j].rel[object_64bit];
10191               if (adjust)
10192                 *adjust = len;
10193
10194               if (types)
10195                 {
10196                   if (flag_code != CODE_64BIT)
10197                     {
10198                       types->bitfield.imm32 = 1;
10199                       types->bitfield.disp32 = 1;
10200                     }
10201                   else
10202                     *types = gotrel[j].types64;
10203                 }
10204
10205               /* The length of the first part of our input line.  */
10206               first = cp - input_line_pointer;
10207
10208               /* The second part goes from after the reloc token until
10209                  (and including) an end_of_line char or comma.  */
10210               past_reloc = cp + 1 + len;
10211               cp = past_reloc;
10212               while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
10213                 ++cp;
10214               second = cp + 1 - past_reloc;
10215
10216               /* Allocate and copy string.  The trailing NUL shouldn't
10217                  be necessary, but be safe.  */
10218               tmpbuf = XNEWVEC (char, first + second + 2);
10219               memcpy (tmpbuf, input_line_pointer, first);
10220               if (second != 0 && *past_reloc != ' ')
10221                 /* Replace the relocation token with ' ', so that
10222                    errors like foo@SECLREL321 will be detected.  */
10223                 tmpbuf[first++] = ' ';
10224               memcpy (tmpbuf + first, past_reloc, second);
10225               tmpbuf[first + second] = '\0';
10226               return tmpbuf;
10227             }
10228
10229           as_bad (_("@%s reloc is not supported with %d-bit output format"),
10230                   gotrel[j].str, 1 << (5 + object_64bit));
10231           return NULL;
10232         }
10233     }
10234
10235   /* Might be a symbol version string.  Don't as_bad here.  */
10236   return NULL;
10237 }
10238
10239 #endif /* TE_PE */
10240
10241 bfd_reloc_code_real_type
10242 x86_cons (expressionS *exp, int size)
10243 {
10244   bfd_reloc_code_real_type got_reloc = NO_RELOC;
10245
10246   intel_syntax = -intel_syntax;
10247
10248   exp->X_md = 0;
10249   if (size == 4 || (object_64bit && size == 8))
10250     {
10251       /* Handle @GOTOFF and the like in an expression.  */
10252       char *save;
10253       char *gotfree_input_line;
10254       int adjust = 0;
10255
10256       save = input_line_pointer;
10257       gotfree_input_line = lex_got (&got_reloc, &adjust, NULL);
10258       if (gotfree_input_line)
10259         input_line_pointer = gotfree_input_line;
10260
10261       expression (exp);
10262
10263       if (gotfree_input_line)
10264         {
10265           /* expression () has merrily parsed up to the end of line,
10266              or a comma - in the wrong buffer.  Transfer how far
10267              input_line_pointer has moved to the right buffer.  */
10268           input_line_pointer = (save
10269                                 + (input_line_pointer - gotfree_input_line)
10270                                 + adjust);
10271           free (gotfree_input_line);
10272           if (exp->X_op == O_constant
10273               || exp->X_op == O_absent
10274               || exp->X_op == O_illegal
10275               || exp->X_op == O_register
10276               || exp->X_op == O_big)
10277             {
10278               char c = *input_line_pointer;
10279               *input_line_pointer = 0;
10280               as_bad (_("missing or invalid expression `%s'"), save);
10281               *input_line_pointer = c;
10282             }
10283           else if ((got_reloc == BFD_RELOC_386_PLT32
10284                     || got_reloc == BFD_RELOC_X86_64_PLT32)
10285                    && exp->X_op != O_symbol)
10286             {
10287               char c = *input_line_pointer;
10288               *input_line_pointer = 0;
10289               as_bad (_("invalid PLT expression `%s'"), save);
10290               *input_line_pointer = c;
10291             }
10292         }
10293     }
10294   else
10295     expression (exp);
10296
10297   intel_syntax = -intel_syntax;
10298
10299   if (intel_syntax)
10300     i386_intel_simplify (exp);
10301
10302   return got_reloc;
10303 }
10304
10305 static void
10306 signed_cons (int size)
10307 {
10308   if (flag_code == CODE_64BIT)
10309     cons_sign = 1;
10310   cons (size);
10311   cons_sign = -1;
10312 }
10313
10314 #ifdef TE_PE
10315 static void
10316 pe_directive_secrel (int dummy ATTRIBUTE_UNUSED)
10317 {
10318   expressionS exp;
10319
10320   do
10321     {
10322       expression (&exp);
10323       if (exp.X_op == O_symbol)
10324         exp.X_op = O_secrel;
10325
10326       emit_expr (&exp, 4);
10327     }
10328   while (*input_line_pointer++ == ',');
10329
10330   input_line_pointer--;
10331   demand_empty_rest_of_line ();
10332 }
10333 #endif
10334
10335 /* Handle Vector operations.  */
10336
10337 static char *
10338 check_VecOperations (char *op_string, char *op_end)
10339 {
10340   const reg_entry *mask;
10341   const char *saved;
10342   char *end_op;
10343
10344   while (*op_string
10345          && (op_end == NULL || op_string < op_end))
10346     {
10347       saved = op_string;
10348       if (*op_string == '{')
10349         {
10350           op_string++;
10351
10352           /* Check broadcasts.  */
10353           if (strncmp (op_string, "1to", 3) == 0)
10354             {
10355               int bcst_type;
10356
10357               if (i.broadcast)
10358                 goto duplicated_vec_op;
10359
10360               op_string += 3;
10361               if (*op_string == '8')
10362                 bcst_type = 8;
10363               else if (*op_string == '4')
10364                 bcst_type = 4;
10365               else if (*op_string == '2')
10366                 bcst_type = 2;
10367               else if (*op_string == '1'
10368                        && *(op_string+1) == '6')
10369                 {
10370                   bcst_type = 16;
10371                   op_string++;
10372                 }
10373               else
10374                 {
10375                   as_bad (_("Unsupported broadcast: `%s'"), saved);
10376                   return NULL;
10377                 }
10378               op_string++;
10379
10380               broadcast_op.type = bcst_type;
10381               broadcast_op.operand = this_operand;
10382               broadcast_op.bytes = 0;
10383               i.broadcast = &broadcast_op;
10384             }
10385           /* Check masking operation.  */
10386           else if ((mask = parse_register (op_string, &end_op)) != NULL)
10387             {
10388               if (mask == &bad_reg)
10389                 return NULL;
10390
10391               /* k0 can't be used for write mask.  */
10392               if (mask->reg_type.bitfield.class != RegMask || !mask->reg_num)
10393                 {
10394                   as_bad (_("`%s%s' can't be used for write mask"),
10395                           register_prefix, mask->reg_name);
10396                   return NULL;
10397                 }
10398
10399               if (!i.mask)
10400                 {
10401                   mask_op.mask = mask;
10402                   mask_op.zeroing = 0;
10403                   mask_op.operand = this_operand;
10404                   i.mask = &mask_op;
10405                 }
10406               else
10407                 {
10408                   if (i.mask->mask)
10409                     goto duplicated_vec_op;
10410
10411                   i.mask->mask = mask;
10412
10413                   /* Only "{z}" is allowed here.  No need to check
10414                      zeroing mask explicitly.  */
10415                   if (i.mask->operand != this_operand)
10416                     {
10417                       as_bad (_("invalid write mask `%s'"), saved);
10418                       return NULL;
10419                     }
10420                 }
10421
10422               op_string = end_op;
10423             }
10424           /* Check zeroing-flag for masking operation.  */
10425           else if (*op_string == 'z')
10426             {
10427               if (!i.mask)
10428                 {
10429                   mask_op.mask = NULL;
10430                   mask_op.zeroing = 1;
10431                   mask_op.operand = this_operand;
10432                   i.mask = &mask_op;
10433                 }
10434               else
10435                 {
10436                   if (i.mask->zeroing)
10437                     {
10438                     duplicated_vec_op:
10439                       as_bad (_("duplicated `%s'"), saved);
10440                       return NULL;
10441                     }
10442
10443                   i.mask->zeroing = 1;
10444
10445                   /* Only "{%k}" is allowed here.  No need to check mask
10446                      register explicitly.  */
10447                   if (i.mask->operand != this_operand)
10448                     {
10449                       as_bad (_("invalid zeroing-masking `%s'"),
10450                               saved);
10451                       return NULL;
10452                     }
10453                 }
10454
10455               op_string++;
10456             }
10457           else
10458             goto unknown_vec_op;
10459
10460           if (*op_string != '}')
10461             {
10462               as_bad (_("missing `}' in `%s'"), saved);
10463               return NULL;
10464             }
10465           op_string++;
10466
10467           /* Strip whitespace since the addition of pseudo prefixes
10468              changed how the scrubber treats '{'.  */
10469           if (is_space_char (*op_string))
10470             ++op_string;
10471
10472           continue;
10473         }
10474     unknown_vec_op:
10475       /* We don't know this one.  */
10476       as_bad (_("unknown vector operation: `%s'"), saved);
10477       return NULL;
10478     }
10479
10480   if (i.mask && i.mask->zeroing && !i.mask->mask)
10481     {
10482       as_bad (_("zeroing-masking only allowed with write mask"));
10483       return NULL;
10484     }
10485
10486   return op_string;
10487 }
10488
10489 static int
10490 i386_immediate (char *imm_start)
10491 {
10492   char *save_input_line_pointer;
10493   char *gotfree_input_line;
10494   segT exp_seg = 0;
10495   expressionS *exp;
10496   i386_operand_type types;
10497
10498   operand_type_set (&types, ~0);
10499
10500   if (i.imm_operands == MAX_IMMEDIATE_OPERANDS)
10501     {
10502       as_bad (_("at most %d immediate operands are allowed"),
10503               MAX_IMMEDIATE_OPERANDS);
10504       return 0;
10505     }
10506
10507   exp = &im_expressions[i.imm_operands++];
10508   i.op[this_operand].imms = exp;
10509
10510   if (is_space_char (*imm_start))
10511     ++imm_start;
10512
10513   save_input_line_pointer = input_line_pointer;
10514   input_line_pointer = imm_start;
10515
10516   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
10517   if (gotfree_input_line)
10518     input_line_pointer = gotfree_input_line;
10519
10520   exp_seg = expression (exp);
10521
10522   SKIP_WHITESPACE ();
10523
10524   /* Handle vector operations.  */
10525   if (*input_line_pointer == '{')
10526     {
10527       input_line_pointer = check_VecOperations (input_line_pointer,
10528                                                 NULL);
10529       if (input_line_pointer == NULL)
10530         return 0;
10531     }
10532
10533   if (*input_line_pointer)
10534     as_bad (_("junk `%s' after expression"), input_line_pointer);
10535
10536   input_line_pointer = save_input_line_pointer;
10537   if (gotfree_input_line)
10538     {
10539       free (gotfree_input_line);
10540
10541       if (exp->X_op == O_constant || exp->X_op == O_register)
10542         exp->X_op = O_illegal;
10543     }
10544
10545   return i386_finalize_immediate (exp_seg, exp, types, imm_start);
10546 }
10547
10548 static int
10549 i386_finalize_immediate (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
10550                          i386_operand_type types, const char *imm_start)
10551 {
10552   if (exp->X_op == O_absent || exp->X_op == O_illegal || exp->X_op == O_big)
10553     {
10554       if (imm_start)
10555         as_bad (_("missing or invalid immediate expression `%s'"),
10556                 imm_start);
10557       return 0;
10558     }
10559   else if (exp->X_op == O_constant)
10560     {
10561       /* Size it properly later.  */
10562       i.types[this_operand].bitfield.imm64 = 1;
10563       /* If not 64bit, sign extend val.  */
10564       if (flag_code != CODE_64BIT
10565           && (exp->X_add_number & ~(((addressT) 2 << 31) - 1)) == 0)
10566         exp->X_add_number
10567           = (exp->X_add_number ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
10568     }
10569 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
10570   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
10571            && exp_seg != absolute_section
10572            && exp_seg != text_section
10573            && exp_seg != data_section
10574            && exp_seg != bss_section
10575            && exp_seg != undefined_section
10576            && !bfd_is_com_section (exp_seg))
10577     {
10578       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
10579       return 0;
10580     }
10581 #endif
10582   else if (!intel_syntax && exp_seg == reg_section)
10583     {
10584       if (imm_start)
10585         as_bad (_("illegal immediate register operand %s"), imm_start);
10586       return 0;
10587     }
10588   else
10589     {
10590       /* This is an address.  The size of the address will be
10591          determined later, depending on destination register,
10592          suffix, or the default for the section.  */
10593       i.types[this_operand].bitfield.imm8 = 1;
10594       i.types[this_operand].bitfield.imm16 = 1;
10595       i.types[this_operand].bitfield.imm32 = 1;
10596       i.types[this_operand].bitfield.imm32s = 1;
10597       i.types[this_operand].bitfield.imm64 = 1;
10598       i.types[this_operand] = operand_type_and (i.types[this_operand],
10599                                                 types);
10600     }
10601
10602   return 1;
10603 }
10604
10605 static char *
10606 i386_scale (char *scale)
10607 {
10608   offsetT val;
10609   char *save = input_line_pointer;
10610
10611   input_line_pointer = scale;
10612   val = get_absolute_expression ();
10613
10614   switch (val)
10615     {
10616     case 1:
10617       i.log2_scale_factor = 0;
10618       break;
10619     case 2:
10620       i.log2_scale_factor = 1;
10621       break;
10622     case 4:
10623       i.log2_scale_factor = 2;
10624       break;
10625     case 8:
10626       i.log2_scale_factor = 3;
10627       break;
10628     default:
10629       {
10630         char sep = *input_line_pointer;
10631
10632         *input_line_pointer = '\0';
10633         as_bad (_("expecting scale factor of 1, 2, 4, or 8: got `%s'"),
10634                 scale);
10635         *input_line_pointer = sep;
10636         input_line_pointer = save;
10637         return NULL;
10638       }
10639     }
10640   if (i.log2_scale_factor != 0 && i.index_reg == 0)
10641     {
10642       as_warn (_("scale factor of %d without an index register"),
10643                1 << i.log2_scale_factor);
10644       i.log2_scale_factor = 0;
10645     }
10646   scale = input_line_pointer;
10647   input_line_pointer = save;
10648   return scale;
10649 }
10650
10651 static int
10652 i386_displacement (char *disp_start, char *disp_end)
10653 {
10654   expressionS *exp;
10655   segT exp_seg = 0;
10656   char *save_input_line_pointer;
10657   char *gotfree_input_line;
10658   int override;
10659   i386_operand_type bigdisp, types = anydisp;
10660   int ret;
10661
10662   if (i.disp_operands == MAX_MEMORY_OPERANDS)
10663     {
10664       as_bad (_("at most %d displacement operands are allowed"),
10665               MAX_MEMORY_OPERANDS);
10666       return 0;
10667     }
10668
10669   operand_type_set (&bigdisp, 0);
10670   if (i.jumpabsolute
10671       || i.types[this_operand].bitfield.baseindex
10672       || (current_templates->start->opcode_modifier.jump != JUMP
10673           && current_templates->start->opcode_modifier.jump != JUMP_DWORD))
10674     {
10675       i386_addressing_mode ();
10676       override = (i.prefix[ADDR_PREFIX] != 0);
10677       if (flag_code == CODE_64BIT)
10678         {
10679           if (!override)
10680             {
10681               bigdisp.bitfield.disp32s = 1;
10682               bigdisp.bitfield.disp64 = 1;
10683             }
10684           else
10685             bigdisp.bitfield.disp32 = 1;
10686         }
10687       else if ((flag_code == CODE_16BIT) ^ override)
10688           bigdisp.bitfield.disp16 = 1;
10689       else
10690           bigdisp.bitfield.disp32 = 1;
10691     }
10692   else
10693     {
10694       /* For PC-relative branches, the width of the displacement may be
10695          dependent upon data size, but is never dependent upon address size.
10696          Also make sure to not unintentionally match against a non-PC-relative
10697          branch template.  */
10698       static templates aux_templates;
10699       const insn_template *t = current_templates->start;
10700       bfd_boolean has_intel64 = FALSE;
10701
10702       aux_templates.start = t;
10703       while (++t < current_templates->end)
10704         {
10705           if (t->opcode_modifier.jump
10706               != current_templates->start->opcode_modifier.jump)
10707             break;
10708           if ((t->opcode_modifier.isa64 >= INTEL64))
10709             has_intel64 = TRUE;
10710         }
10711       if (t < current_templates->end)
10712         {
10713           aux_templates.end = t;
10714           current_templates = &aux_templates;
10715         }
10716
10717       override = (i.prefix[DATA_PREFIX] != 0);
10718       if (flag_code == CODE_64BIT)
10719         {
10720           if ((override || i.suffix == WORD_MNEM_SUFFIX)
10721               && (!intel64 || !has_intel64))
10722             bigdisp.bitfield.disp16 = 1;
10723           else
10724             bigdisp.bitfield.disp32s = 1;
10725         }
10726       else
10727         {
10728           if (!override)
10729             override = (i.suffix == (flag_code != CODE_16BIT
10730                                      ? WORD_MNEM_SUFFIX
10731                                      : LONG_MNEM_SUFFIX));
10732           bigdisp.bitfield.disp32 = 1;
10733           if ((flag_code == CODE_16BIT) ^ override)
10734             {
10735               bigdisp.bitfield.disp32 = 0;
10736               bigdisp.bitfield.disp16 = 1;
10737             }
10738         }
10739     }
10740   i.types[this_operand] = operand_type_or (i.types[this_operand],
10741                                            bigdisp);
10742
10743   exp = &disp_expressions[i.disp_operands];
10744   i.op[this_operand].disps = exp;
10745   i.disp_operands++;
10746   save_input_line_pointer = input_line_pointer;
10747   input_line_pointer = disp_start;
10748   END_STRING_AND_SAVE (disp_end);
10749
10750 #ifndef GCC_ASM_O_HACK
10751 #define GCC_ASM_O_HACK 0
10752 #endif
10753 #if GCC_ASM_O_HACK
10754   END_STRING_AND_SAVE (disp_end + 1);
10755   if (i.types[this_operand].bitfield.baseIndex
10756       && displacement_string_end[-1] == '+')
10757     {
10758       /* This hack is to avoid a warning when using the "o"
10759          constraint within gcc asm statements.
10760          For instance:
10761
10762          #define _set_tssldt_desc(n,addr,limit,type) \
10763          __asm__ __volatile__ ( \
10764          "movw %w2,%0\n\t" \
10765          "movw %w1,2+%0\n\t" \
10766          "rorl $16,%1\n\t" \
10767          "movb %b1,4+%0\n\t" \
10768          "movb %4,5+%0\n\t" \
10769          "movb $0,6+%0\n\t" \
10770          "movb %h1,7+%0\n\t" \
10771          "rorl $16,%1" \
10772          : "=o"(*(n)) : "q" (addr), "ri"(limit), "i"(type))
10773
10774          This works great except that the output assembler ends
10775          up looking a bit weird if it turns out that there is
10776          no offset.  You end up producing code that looks like:
10777
10778          #APP
10779          movw $235,(%eax)
10780          movw %dx,2+(%eax)
10781          rorl $16,%edx
10782          movb %dl,4+(%eax)
10783          movb $137,5+(%eax)
10784          movb $0,6+(%eax)
10785          movb %dh,7+(%eax)
10786          rorl $16,%edx
10787          #NO_APP
10788
10789          So here we provide the missing zero.  */
10790
10791       *displacement_string_end = '0';
10792     }
10793 #endif
10794   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
10795   if (gotfree_input_line)
10796     input_line_pointer = gotfree_input_line;
10797
10798   exp_seg = expression (exp);
10799
10800   SKIP_WHITESPACE ();
10801   if (*input_line_pointer)
10802     as_bad (_("junk `%s' after expression"), input_line_pointer);
10803 #if GCC_ASM_O_HACK
10804   RESTORE_END_STRING (disp_end + 1);
10805 #endif
10806   input_line_pointer = save_input_line_pointer;
10807   if (gotfree_input_line)
10808     {
10809       free (gotfree_input_line);
10810
10811       if (exp->X_op == O_constant || exp->X_op == O_register)
10812         exp->X_op = O_illegal;
10813     }
10814
10815   ret = i386_finalize_displacement (exp_seg, exp, types, disp_start);
10816
10817   RESTORE_END_STRING (disp_end);
10818
10819   return ret;
10820 }
10821
10822 static int
10823 i386_finalize_displacement (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
10824                             i386_operand_type types, const char *disp_start)
10825 {
10826   i386_operand_type bigdisp;
10827   int ret = 1;
10828
10829   /* We do this to make sure that the section symbol is in
10830      the symbol table.  We will ultimately change the relocation
10831      to be relative to the beginning of the section.  */
10832   if (i.reloc[this_operand] == BFD_RELOC_386_GOTOFF
10833       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL
10834       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
10835     {
10836       if (exp->X_op != O_symbol)
10837         goto inv_disp;
10838
10839       if (S_IS_LOCAL (exp->X_add_symbol)
10840           && S_GET_SEGMENT (exp->X_add_symbol) != undefined_section
10841           && S_GET_SEGMENT (exp->X_add_symbol) != expr_section)
10842         section_symbol (S_GET_SEGMENT (exp->X_add_symbol));
10843       exp->X_op = O_subtract;
10844       exp->X_op_symbol = GOT_symbol;
10845       if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL)
10846         i.reloc[this_operand] = BFD_RELOC_32_PCREL;
10847       else if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
10848         i.reloc[this_operand] = BFD_RELOC_64;
10849       else
10850         i.reloc[this_operand] = BFD_RELOC_32;
10851     }
10852
10853   else if (exp->X_op == O_absent
10854            || exp->X_op == O_illegal
10855            || exp->X_op == O_big)
10856     {
10857     inv_disp:
10858       as_bad (_("missing or invalid displacement expression `%s'"),
10859               disp_start);
10860       ret = 0;
10861     }
10862
10863   else if (flag_code == CODE_64BIT
10864            && !i.prefix[ADDR_PREFIX]
10865            && exp->X_op == O_constant)
10866     {
10867       /* Since displacement is signed extended to 64bit, don't allow
10868          disp32 and turn off disp32s if they are out of range.  */
10869       i.types[this_operand].bitfield.disp32 = 0;
10870       if (!fits_in_signed_long (exp->X_add_number))
10871         {
10872           i.types[this_operand].bitfield.disp32s = 0;
10873           if (i.types[this_operand].bitfield.baseindex)
10874             {
10875               as_bad (_("0x%lx out range of signed 32bit displacement"),
10876                       (long) exp->X_add_number);
10877               ret = 0;
10878             }
10879         }
10880     }
10881
10882 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
10883   else if (exp->X_op != O_constant
10884            && OUTPUT_FLAVOR == bfd_target_aout_flavour
10885            && exp_seg != absolute_section
10886            && exp_seg != text_section
10887            && exp_seg != data_section
10888            && exp_seg != bss_section
10889            && exp_seg != undefined_section
10890            && !bfd_is_com_section (exp_seg))
10891     {
10892       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
10893       ret = 0;
10894     }
10895 #endif
10896
10897   if (current_templates->start->opcode_modifier.jump == JUMP_BYTE
10898       /* Constants get taken care of by optimize_disp().  */
10899       && exp->X_op != O_constant)
10900     i.types[this_operand].bitfield.disp8 = 1;
10901
10902   /* Check if this is a displacement only operand.  */
10903   bigdisp = i.types[this_operand];
10904   bigdisp.bitfield.disp8 = 0;
10905   bigdisp.bitfield.disp16 = 0;
10906   bigdisp.bitfield.disp32 = 0;
10907   bigdisp.bitfield.disp32s = 0;
10908   bigdisp.bitfield.disp64 = 0;
10909   if (operand_type_all_zero (&bigdisp))
10910     i.types[this_operand] = operand_type_and (i.types[this_operand],
10911                                               types);
10912
10913   return ret;
10914 }
10915
10916 /* Return the active addressing mode, taking address override and
10917    registers forming the address into consideration.  Update the
10918    address override prefix if necessary.  */
10919
10920 static enum flag_code
10921 i386_addressing_mode (void)
10922 {
10923   enum flag_code addr_mode;
10924
10925   if (i.prefix[ADDR_PREFIX])
10926     addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
10927   else if (flag_code == CODE_16BIT
10928            && current_templates->start->cpu_flags.bitfield.cpumpx
10929            /* Avoid replacing the "16-bit addressing not allowed" diagnostic
10930               from md_assemble() by "is not a valid base/index expression"
10931               when there is a base and/or index.  */
10932            && !i.types[this_operand].bitfield.baseindex)
10933     {
10934       /* MPX insn memory operands with neither base nor index must be forced
10935          to use 32-bit addressing in 16-bit mode.  */
10936       addr_mode = CODE_32BIT;
10937       i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
10938       ++i.prefixes;
10939       gas_assert (!i.types[this_operand].bitfield.disp16);
10940       gas_assert (!i.types[this_operand].bitfield.disp32);
10941     }
10942   else
10943     {
10944       addr_mode = flag_code;
10945
10946 #if INFER_ADDR_PREFIX
10947       if (i.mem_operands == 0)
10948         {
10949           /* Infer address prefix from the first memory operand.  */
10950           const reg_entry *addr_reg = i.base_reg;
10951
10952           if (addr_reg == NULL)
10953             addr_reg = i.index_reg;
10954
10955           if (addr_reg)
10956             {
10957               if (addr_reg->reg_type.bitfield.dword)
10958                 addr_mode = CODE_32BIT;
10959               else if (flag_code != CODE_64BIT
10960                        && addr_reg->reg_type.bitfield.word)
10961                 addr_mode = CODE_16BIT;
10962
10963               if (addr_mode != flag_code)
10964                 {
10965                   i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
10966                   i.prefixes += 1;
10967                   /* Change the size of any displacement too.  At most one
10968                      of Disp16 or Disp32 is set.
10969                      FIXME.  There doesn't seem to be any real need for
10970                      separate Disp16 and Disp32 flags.  The same goes for
10971                      Imm16 and Imm32.  Removing them would probably clean
10972                      up the code quite a lot.  */
10973                   if (flag_code != CODE_64BIT
10974                       && (i.types[this_operand].bitfield.disp16
10975                           || i.types[this_operand].bitfield.disp32))
10976                     i.types[this_operand]
10977                       = operand_type_xor (i.types[this_operand], disp16_32);
10978                 }
10979             }
10980         }
10981 #endif
10982     }
10983
10984   return addr_mode;
10985 }
10986
10987 /* Make sure the memory operand we've been dealt is valid.
10988    Return 1 on success, 0 on a failure.  */
10989
10990 static int
10991 i386_index_check (const char *operand_string)
10992 {
10993   const char *kind = "base/index";
10994   enum flag_code addr_mode = i386_addressing_mode ();
10995
10996   if (current_templates->start->opcode_modifier.isstring
10997       && !current_templates->start->cpu_flags.bitfield.cpupadlock
10998       && (current_templates->end[-1].opcode_modifier.isstring
10999           || i.mem_operands))
11000     {
11001       /* Memory operands of string insns are special in that they only allow
11002          a single register (rDI, rSI, or rBX) as their memory address.  */
11003       const reg_entry *expected_reg;
11004       static const char *di_si[][2] =
11005         {
11006           { "esi", "edi" },
11007           { "si", "di" },
11008           { "rsi", "rdi" }
11009         };
11010       static const char *bx[] = { "ebx", "bx", "rbx" };
11011
11012       kind = "string address";
11013
11014       if (current_templates->start->opcode_modifier.repprefixok)
11015         {
11016           int es_op = current_templates->end[-1].opcode_modifier.isstring
11017                       - IS_STRING_ES_OP0;
11018           int op = 0;
11019
11020           if (!current_templates->end[-1].operand_types[0].bitfield.baseindex
11021               || ((!i.mem_operands != !intel_syntax)
11022                   && current_templates->end[-1].operand_types[1]
11023                      .bitfield.baseindex))
11024             op = 1;
11025           expected_reg
11026             = (const reg_entry *) str_hash_find (reg_hash,
11027                                                  di_si[addr_mode][op == es_op]);
11028         }
11029       else
11030         expected_reg
11031           = (const reg_entry *)str_hash_find (reg_hash, bx[addr_mode]);
11032
11033       if (i.base_reg != expected_reg
11034           || i.index_reg
11035           || operand_type_check (i.types[this_operand], disp))
11036         {
11037           /* The second memory operand must have the same size as
11038              the first one.  */
11039           if (i.mem_operands
11040               && i.base_reg
11041               && !((addr_mode == CODE_64BIT
11042                     && i.base_reg->reg_type.bitfield.qword)
11043                    || (addr_mode == CODE_32BIT
11044                        ? i.base_reg->reg_type.bitfield.dword
11045                        : i.base_reg->reg_type.bitfield.word)))
11046             goto bad_address;
11047
11048           as_warn (_("`%s' is not valid here (expected `%c%s%s%c')"),
11049                    operand_string,
11050                    intel_syntax ? '[' : '(',
11051                    register_prefix,
11052                    expected_reg->reg_name,
11053                    intel_syntax ? ']' : ')');
11054           return 1;
11055         }
11056       else
11057         return 1;
11058
11059     bad_address:
11060       as_bad (_("`%s' is not a valid %s expression"),
11061               operand_string, kind);
11062       return 0;
11063     }
11064   else
11065     {
11066       if (addr_mode != CODE_16BIT)
11067         {
11068           /* 32-bit/64-bit checks.  */
11069           if (i.disp_encoding == disp_encoding_16bit)
11070             {
11071             bad_disp:
11072               as_bad (_("invalid `%s' prefix"),
11073                       addr_mode == CODE_16BIT ? "{disp32}" : "{disp16}");
11074               return 0;
11075             }
11076
11077           if ((i.base_reg
11078                && ((addr_mode == CODE_64BIT
11079                     ? !i.base_reg->reg_type.bitfield.qword
11080                     : !i.base_reg->reg_type.bitfield.dword)
11081                    || (i.index_reg && i.base_reg->reg_num == RegIP)
11082                    || i.base_reg->reg_num == RegIZ))
11083               || (i.index_reg
11084                   && !i.index_reg->reg_type.bitfield.xmmword
11085                   && !i.index_reg->reg_type.bitfield.ymmword
11086                   && !i.index_reg->reg_type.bitfield.zmmword
11087                   && ((addr_mode == CODE_64BIT
11088                        ? !i.index_reg->reg_type.bitfield.qword
11089                        : !i.index_reg->reg_type.bitfield.dword)
11090                       || !i.index_reg->reg_type.bitfield.baseindex)))
11091             goto bad_address;
11092
11093           /* bndmk, bndldx, bndstx and mandatory non-vector SIB have special restrictions. */
11094           if (current_templates->start->base_opcode == 0xf30f1b
11095               || (current_templates->start->base_opcode & ~1) == 0x0f1a
11096               || current_templates->start->opcode_modifier.sib == SIBMEM)
11097             {
11098               /* They cannot use RIP-relative addressing. */
11099               if (i.base_reg && i.base_reg->reg_num == RegIP)
11100                 {
11101                   as_bad (_("`%s' cannot be used here"), operand_string);
11102                   return 0;
11103                 }
11104
11105               /* bndldx and bndstx ignore their scale factor. */
11106               if ((current_templates->start->base_opcode & ~1) == 0x0f1a
11107                   && i.log2_scale_factor)
11108                 as_warn (_("register scaling is being ignored here"));
11109             }
11110         }
11111       else
11112         {
11113           /* 16-bit checks.  */
11114           if (i.disp_encoding == disp_encoding_32bit)
11115             goto bad_disp;
11116
11117           if ((i.base_reg
11118                && (!i.base_reg->reg_type.bitfield.word
11119                    || !i.base_reg->reg_type.bitfield.baseindex))
11120               || (i.index_reg
11121                   && (!i.index_reg->reg_type.bitfield.word
11122                       || !i.index_reg->reg_type.bitfield.baseindex
11123                       || !(i.base_reg
11124                            && i.base_reg->reg_num < 6
11125                            && i.index_reg->reg_num >= 6
11126                            && i.log2_scale_factor == 0))))
11127             goto bad_address;
11128         }
11129     }
11130   return 1;
11131 }
11132
11133 /* Handle vector immediates.  */
11134
11135 static int
11136 RC_SAE_immediate (const char *imm_start)
11137 {
11138   unsigned int match_found, j;
11139   const char *pstr = imm_start;
11140   expressionS *exp;
11141
11142   if (*pstr != '{')
11143     return 0;
11144
11145   pstr++;
11146   match_found = 0;
11147   for (j = 0; j < ARRAY_SIZE (RC_NamesTable); j++)
11148     {
11149       if (!strncmp (pstr, RC_NamesTable[j].name, RC_NamesTable[j].len))
11150         {
11151           if (!i.rounding)
11152             {
11153               rc_op.type = RC_NamesTable[j].type;
11154               rc_op.operand = this_operand;
11155               i.rounding = &rc_op;
11156             }
11157           else
11158             {
11159               as_bad (_("duplicated `%s'"), imm_start);
11160               return 0;
11161             }
11162           pstr += RC_NamesTable[j].len;
11163           match_found = 1;
11164           break;
11165         }
11166     }
11167   if (!match_found)
11168     return 0;
11169
11170   if (*pstr++ != '}')
11171     {
11172       as_bad (_("Missing '}': '%s'"), imm_start);
11173       return 0;
11174     }
11175   /* RC/SAE immediate string should contain nothing more.  */;
11176   if (*pstr != 0)
11177     {
11178       as_bad (_("Junk after '}': '%s'"), imm_start);
11179       return 0;
11180     }
11181
11182   exp = &im_expressions[i.imm_operands++];
11183   i.op[this_operand].imms = exp;
11184
11185   exp->X_op = O_constant;
11186   exp->X_add_number = 0;
11187   exp->X_add_symbol = (symbolS *) 0;
11188   exp->X_op_symbol = (symbolS *) 0;
11189
11190   i.types[this_operand].bitfield.imm8 = 1;
11191   return 1;
11192 }
11193
11194 /* Only string instructions can have a second memory operand, so
11195    reduce current_templates to just those if it contains any.  */
11196 static int
11197 maybe_adjust_templates (void)
11198 {
11199   const insn_template *t;
11200
11201   gas_assert (i.mem_operands == 1);
11202
11203   for (t = current_templates->start; t < current_templates->end; ++t)
11204     if (t->opcode_modifier.isstring)
11205       break;
11206
11207   if (t < current_templates->end)
11208     {
11209       static templates aux_templates;
11210       bfd_boolean recheck;
11211
11212       aux_templates.start = t;
11213       for (; t < current_templates->end; ++t)
11214         if (!t->opcode_modifier.isstring)
11215           break;
11216       aux_templates.end = t;
11217
11218       /* Determine whether to re-check the first memory operand.  */
11219       recheck = (aux_templates.start != current_templates->start
11220                  || t != current_templates->end);
11221
11222       current_templates = &aux_templates;
11223
11224       if (recheck)
11225         {
11226           i.mem_operands = 0;
11227           if (i.memop1_string != NULL
11228               && i386_index_check (i.memop1_string) == 0)
11229             return 0;
11230           i.mem_operands = 1;
11231         }
11232     }
11233
11234   return 1;
11235 }
11236
11237 /* Parse OPERAND_STRING into the i386_insn structure I.  Returns zero
11238    on error.  */
11239
11240 static int
11241 i386_att_operand (char *operand_string)
11242 {
11243   const reg_entry *r;
11244   char *end_op;
11245   char *op_string = operand_string;
11246
11247   if (is_space_char (*op_string))
11248     ++op_string;
11249
11250   /* We check for an absolute prefix (differentiating,
11251      for example, 'jmp pc_relative_label' from 'jmp *absolute_label'.  */
11252   if (*op_string == ABSOLUTE_PREFIX)
11253     {
11254       ++op_string;
11255       if (is_space_char (*op_string))
11256         ++op_string;
11257       i.jumpabsolute = TRUE;
11258     }
11259
11260   /* Check if operand is a register.  */
11261   if ((r = parse_register (op_string, &end_op)) != NULL)
11262     {
11263       i386_operand_type temp;
11264
11265       if (r == &bad_reg)
11266         return 0;
11267
11268       /* Check for a segment override by searching for ':' after a
11269          segment register.  */
11270       op_string = end_op;
11271       if (is_space_char (*op_string))
11272         ++op_string;
11273       if (*op_string == ':' && r->reg_type.bitfield.class == SReg)
11274         {
11275           switch (r->reg_num)
11276             {
11277             case 0:
11278               i.seg[i.mem_operands] = &es;
11279               break;
11280             case 1:
11281               i.seg[i.mem_operands] = &cs;
11282               break;
11283             case 2:
11284               i.seg[i.mem_operands] = &ss;
11285               break;
11286             case 3:
11287               i.seg[i.mem_operands] = &ds;
11288               break;
11289             case 4:
11290               i.seg[i.mem_operands] = &fs;
11291               break;
11292             case 5:
11293               i.seg[i.mem_operands] = &gs;
11294               break;
11295             }
11296
11297           /* Skip the ':' and whitespace.  */
11298           ++op_string;
11299           if (is_space_char (*op_string))
11300             ++op_string;
11301
11302           if (!is_digit_char (*op_string)
11303               && !is_identifier_char (*op_string)
11304               && *op_string != '('
11305               && *op_string != ABSOLUTE_PREFIX)
11306             {
11307               as_bad (_("bad memory operand `%s'"), op_string);
11308               return 0;
11309             }
11310           /* Handle case of %es:*foo.  */
11311           if (*op_string == ABSOLUTE_PREFIX)
11312             {
11313               ++op_string;
11314               if (is_space_char (*op_string))
11315                 ++op_string;
11316               i.jumpabsolute = TRUE;
11317             }
11318           goto do_memory_reference;
11319         }
11320
11321       /* Handle vector operations.  */
11322       if (*op_string == '{')
11323         {
11324           op_string = check_VecOperations (op_string, NULL);
11325           if (op_string == NULL)
11326             return 0;
11327         }
11328
11329       if (*op_string)
11330         {
11331           as_bad (_("junk `%s' after register"), op_string);
11332           return 0;
11333         }
11334       temp = r->reg_type;
11335       temp.bitfield.baseindex = 0;
11336       i.types[this_operand] = operand_type_or (i.types[this_operand],
11337                                                temp);
11338       i.types[this_operand].bitfield.unspecified = 0;
11339       i.op[this_operand].regs = r;
11340       i.reg_operands++;
11341     }
11342   else if (*op_string == REGISTER_PREFIX)
11343     {
11344       as_bad (_("bad register name `%s'"), op_string);
11345       return 0;
11346     }
11347   else if (*op_string == IMMEDIATE_PREFIX)
11348     {
11349       ++op_string;
11350       if (i.jumpabsolute)
11351         {
11352           as_bad (_("immediate operand illegal with absolute jump"));
11353           return 0;
11354         }
11355       if (!i386_immediate (op_string))
11356         return 0;
11357     }
11358   else if (RC_SAE_immediate (operand_string))
11359     {
11360       /* If it is a RC or SAE immediate, do nothing.  */
11361       ;
11362     }
11363   else if (is_digit_char (*op_string)
11364            || is_identifier_char (*op_string)
11365            || *op_string == '"'
11366            || *op_string == '(')
11367     {
11368       /* This is a memory reference of some sort.  */
11369       char *base_string;
11370
11371       /* Start and end of displacement string expression (if found).  */
11372       char *displacement_string_start;
11373       char *displacement_string_end;
11374       char *vop_start;
11375
11376     do_memory_reference:
11377       if (i.mem_operands == 1 && !maybe_adjust_templates ())
11378         return 0;
11379       if ((i.mem_operands == 1
11380            && !current_templates->start->opcode_modifier.isstring)
11381           || i.mem_operands == 2)
11382         {
11383           as_bad (_("too many memory references for `%s'"),
11384                   current_templates->start->name);
11385           return 0;
11386         }
11387
11388       /* Check for base index form.  We detect the base index form by
11389          looking for an ')' at the end of the operand, searching
11390          for the '(' matching it, and finding a REGISTER_PREFIX or ','
11391          after the '('.  */
11392       base_string = op_string + strlen (op_string);
11393
11394       /* Handle vector operations.  */
11395       vop_start = strchr (op_string, '{');
11396       if (vop_start && vop_start < base_string)
11397         {
11398           if (check_VecOperations (vop_start, base_string) == NULL)
11399             return 0;
11400           base_string = vop_start;
11401         }
11402
11403       --base_string;
11404       if (is_space_char (*base_string))
11405         --base_string;
11406
11407       /* If we only have a displacement, set-up for it to be parsed later.  */
11408       displacement_string_start = op_string;
11409       displacement_string_end = base_string + 1;
11410
11411       if (*base_string == ')')
11412         {
11413           char *temp_string;
11414           unsigned int parens_balanced = 1;
11415           /* We've already checked that the number of left & right ()'s are
11416              equal, so this loop will not be infinite.  */
11417           do
11418             {
11419               base_string--;
11420               if (*base_string == ')')
11421                 parens_balanced++;
11422               if (*base_string == '(')
11423                 parens_balanced--;
11424             }
11425           while (parens_balanced);
11426
11427           temp_string = base_string;
11428
11429           /* Skip past '(' and whitespace.  */
11430           ++base_string;
11431           if (is_space_char (*base_string))
11432             ++base_string;
11433
11434           if (*base_string == ','
11435               || ((i.base_reg = parse_register (base_string, &end_op))
11436                   != NULL))
11437             {
11438               displacement_string_end = temp_string;
11439
11440               i.types[this_operand].bitfield.baseindex = 1;
11441
11442               if (i.base_reg)
11443                 {
11444                   if (i.base_reg == &bad_reg)
11445                     return 0;
11446                   base_string = end_op;
11447                   if (is_space_char (*base_string))
11448                     ++base_string;
11449                 }
11450
11451               /* There may be an index reg or scale factor here.  */
11452               if (*base_string == ',')
11453                 {
11454                   ++base_string;
11455                   if (is_space_char (*base_string))
11456                     ++base_string;
11457
11458                   if ((i.index_reg = parse_register (base_string, &end_op))
11459                       != NULL)
11460                     {
11461                       if (i.index_reg == &bad_reg)
11462                         return 0;
11463                       base_string = end_op;
11464                       if (is_space_char (*base_string))
11465                         ++base_string;
11466                       if (*base_string == ',')
11467                         {
11468                           ++base_string;
11469                           if (is_space_char (*base_string))
11470                             ++base_string;
11471                         }
11472                       else if (*base_string != ')')
11473                         {
11474                           as_bad (_("expecting `,' or `)' "
11475                                     "after index register in `%s'"),
11476                                   operand_string);
11477                           return 0;
11478                         }
11479                     }
11480                   else if (*base_string == REGISTER_PREFIX)
11481                     {
11482                       end_op = strchr (base_string, ',');
11483                       if (end_op)
11484                         *end_op = '\0';
11485                       as_bad (_("bad register name `%s'"), base_string);
11486                       return 0;
11487                     }
11488
11489                   /* Check for scale factor.  */
11490                   if (*base_string != ')')
11491                     {
11492                       char *end_scale = i386_scale (base_string);
11493
11494                       if (!end_scale)
11495                         return 0;
11496
11497                       base_string = end_scale;
11498                       if (is_space_char (*base_string))
11499                         ++base_string;
11500                       if (*base_string != ')')
11501                         {
11502                           as_bad (_("expecting `)' "
11503                                     "after scale factor in `%s'"),
11504                                   operand_string);
11505                           return 0;
11506                         }
11507                     }
11508                   else if (!i.index_reg)
11509                     {
11510                       as_bad (_("expecting index register or scale factor "
11511                                 "after `,'; got '%c'"),
11512                               *base_string);
11513                       return 0;
11514                     }
11515                 }
11516               else if (*base_string != ')')
11517                 {
11518                   as_bad (_("expecting `,' or `)' "
11519                             "after base register in `%s'"),
11520                           operand_string);
11521                   return 0;
11522                 }
11523             }
11524           else if (*base_string == REGISTER_PREFIX)
11525             {
11526               end_op = strchr (base_string, ',');
11527               if (end_op)
11528                 *end_op = '\0';
11529               as_bad (_("bad register name `%s'"), base_string);
11530               return 0;
11531             }
11532         }
11533
11534       /* If there's an expression beginning the operand, parse it,
11535          assuming displacement_string_start and
11536          displacement_string_end are meaningful.  */
11537       if (displacement_string_start != displacement_string_end)
11538         {
11539           if (!i386_displacement (displacement_string_start,
11540                                   displacement_string_end))
11541             return 0;
11542         }
11543
11544       /* Special case for (%dx) while doing input/output op.  */
11545       if (i.base_reg
11546           && i.base_reg->reg_type.bitfield.instance == RegD
11547           && i.base_reg->reg_type.bitfield.word
11548           && i.index_reg == 0
11549           && i.log2_scale_factor == 0
11550           && i.seg[i.mem_operands] == 0
11551           && !operand_type_check (i.types[this_operand], disp))
11552         {
11553           i.types[this_operand] = i.base_reg->reg_type;
11554           return 1;
11555         }
11556
11557       if (i386_index_check (operand_string) == 0)
11558         return 0;
11559       i.flags[this_operand] |= Operand_Mem;
11560       if (i.mem_operands == 0)
11561         i.memop1_string = xstrdup (operand_string);
11562       i.mem_operands++;
11563     }
11564   else
11565     {
11566       /* It's not a memory operand; argh!  */
11567       as_bad (_("invalid char %s beginning operand %d `%s'"),
11568               output_invalid (*op_string),
11569               this_operand + 1,
11570               op_string);
11571       return 0;
11572     }
11573   return 1;                     /* Normal return.  */
11574 }
11575 \f
11576 /* Calculate the maximum variable size (i.e., excluding fr_fix)
11577    that an rs_machine_dependent frag may reach.  */
11578
11579 unsigned int
11580 i386_frag_max_var (fragS *frag)
11581 {
11582   /* The only relaxable frags are for jumps.
11583      Unconditional jumps can grow by 4 bytes and others by 5 bytes.  */
11584   gas_assert (frag->fr_type == rs_machine_dependent);
11585   return TYPE_FROM_RELAX_STATE (frag->fr_subtype) == UNCOND_JUMP ? 4 : 5;
11586 }
11587
11588 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11589 static int
11590 elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var)
11591 {
11592   /* STT_GNU_IFUNC symbol must go through PLT.  */
11593   if ((symbol_get_bfdsym (fr_symbol)->flags
11594        & BSF_GNU_INDIRECT_FUNCTION) != 0)
11595     return 0;
11596
11597   if (!S_IS_EXTERNAL (fr_symbol))
11598     /* Symbol may be weak or local.  */
11599     return !S_IS_WEAK (fr_symbol);
11600
11601   /* Global symbols with non-default visibility can't be preempted. */
11602   if (ELF_ST_VISIBILITY (S_GET_OTHER (fr_symbol)) != STV_DEFAULT)
11603     return 1;
11604
11605   if (fr_var != NO_RELOC)
11606     switch ((enum bfd_reloc_code_real) fr_var)
11607       {
11608       case BFD_RELOC_386_PLT32:
11609       case BFD_RELOC_X86_64_PLT32:
11610         /* Symbol with PLT relocation may be preempted. */
11611         return 0;
11612       default:
11613         abort ();
11614       }
11615
11616   /* Global symbols with default visibility in a shared library may be
11617      preempted by another definition.  */
11618   return !shared;
11619 }
11620 #endif
11621
11622 /* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
11623    Note also work for Skylake and Cascadelake.
11624 ---------------------------------------------------------------------
11625 |   JCC   | ADD/SUB/CMP | INC/DEC | TEST/AND |
11626 | ------  | ----------- | ------- | -------- |
11627 |   Jo    |      N      |    N    |     Y    |
11628 |   Jno   |      N      |    N    |     Y    |
11629 |  Jc/Jb  |      Y      |    N    |     Y    |
11630 | Jae/Jnb |      Y      |    N    |     Y    |
11631 |  Je/Jz  |      Y      |    Y    |     Y    |
11632 | Jne/Jnz |      Y      |    Y    |     Y    |
11633 | Jna/Jbe |      Y      |    N    |     Y    |
11634 | Ja/Jnbe |      Y      |    N    |     Y    |
11635 |   Js    |      N      |    N    |     Y    |
11636 |   Jns   |      N      |    N    |     Y    |
11637 |  Jp/Jpe |      N      |    N    |     Y    |
11638 | Jnp/Jpo |      N      |    N    |     Y    |
11639 | Jl/Jnge |      Y      |    Y    |     Y    |
11640 | Jge/Jnl |      Y      |    Y    |     Y    |
11641 | Jle/Jng |      Y      |    Y    |     Y    |
11642 | Jg/Jnle |      Y      |    Y    |     Y    |
11643 ---------------------------------------------------------------------  */
11644 static int
11645 i386_macro_fusible_p (enum mf_cmp_kind mf_cmp, enum mf_jcc_kind mf_jcc)
11646 {
11647   if (mf_cmp == mf_cmp_alu_cmp)
11648     return ((mf_jcc >= mf_jcc_jc && mf_jcc <= mf_jcc_jna)
11649             || mf_jcc == mf_jcc_jl || mf_jcc == mf_jcc_jle);
11650   if (mf_cmp == mf_cmp_incdec)
11651     return (mf_jcc == mf_jcc_je || mf_jcc == mf_jcc_jl
11652             || mf_jcc == mf_jcc_jle);
11653   if (mf_cmp == mf_cmp_test_and)
11654     return 1;
11655   return 0;
11656 }
11657
11658 /* Return the next non-empty frag.  */
11659
11660 static fragS *
11661 i386_next_non_empty_frag (fragS *fragP)
11662 {
11663   /* There may be a frag with a ".fill 0" when there is no room in
11664      the current frag for frag_grow in output_insn.  */
11665   for (fragP = fragP->fr_next;
11666        (fragP != NULL
11667         && fragP->fr_type == rs_fill
11668         && fragP->fr_fix == 0);
11669        fragP = fragP->fr_next)
11670     ;
11671   return fragP;
11672 }
11673
11674 /* Return the next jcc frag after BRANCH_PADDING.  */
11675
11676 static fragS *
11677 i386_next_fusible_jcc_frag (fragS *maybe_cmp_fragP, fragS *pad_fragP)
11678 {
11679   fragS *branch_fragP;
11680   if (!pad_fragP)
11681     return NULL;
11682
11683   if (pad_fragP->fr_type == rs_machine_dependent
11684       && (TYPE_FROM_RELAX_STATE (pad_fragP->fr_subtype)
11685           == BRANCH_PADDING))
11686     {
11687       branch_fragP = i386_next_non_empty_frag (pad_fragP);
11688       if (branch_fragP->fr_type != rs_machine_dependent)
11689         return NULL;
11690       if (TYPE_FROM_RELAX_STATE (branch_fragP->fr_subtype) == COND_JUMP
11691           && i386_macro_fusible_p (maybe_cmp_fragP->tc_frag_data.mf_type,
11692                                    pad_fragP->tc_frag_data.mf_type))
11693         return branch_fragP;
11694     }
11695
11696   return NULL;
11697 }
11698
11699 /* Classify BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags.  */
11700
11701 static void
11702 i386_classify_machine_dependent_frag (fragS *fragP)
11703 {
11704   fragS *cmp_fragP;
11705   fragS *pad_fragP;
11706   fragS *branch_fragP;
11707   fragS *next_fragP;
11708   unsigned int max_prefix_length;
11709
11710   if (fragP->tc_frag_data.classified)
11711     return;
11712
11713   /* First scan for BRANCH_PADDING and FUSED_JCC_PADDING.  Convert
11714      FUSED_JCC_PADDING and merge BRANCH_PADDING.  */
11715   for (next_fragP = fragP;
11716        next_fragP != NULL;
11717        next_fragP = next_fragP->fr_next)
11718     {
11719       next_fragP->tc_frag_data.classified = 1;
11720       if (next_fragP->fr_type == rs_machine_dependent)
11721         switch (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype))
11722           {
11723           case BRANCH_PADDING:
11724             /* The BRANCH_PADDING frag must be followed by a branch
11725                frag.  */
11726             branch_fragP = i386_next_non_empty_frag (next_fragP);
11727             next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
11728             break;
11729           case FUSED_JCC_PADDING:
11730             /* Check if this is a fused jcc:
11731                FUSED_JCC_PADDING
11732                CMP like instruction
11733                BRANCH_PADDING
11734                COND_JUMP
11735                */
11736             cmp_fragP = i386_next_non_empty_frag (next_fragP);
11737             pad_fragP = i386_next_non_empty_frag (cmp_fragP);
11738             branch_fragP = i386_next_fusible_jcc_frag (next_fragP, pad_fragP);
11739             if (branch_fragP)
11740               {
11741                 /* The BRANCH_PADDING frag is merged with the
11742                    FUSED_JCC_PADDING frag.  */
11743                 next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
11744                 /* CMP like instruction size.  */
11745                 next_fragP->tc_frag_data.cmp_size = cmp_fragP->fr_fix;
11746                 frag_wane (pad_fragP);
11747                 /* Skip to branch_fragP.  */
11748                 next_fragP = branch_fragP;
11749               }
11750             else if (next_fragP->tc_frag_data.max_prefix_length)
11751               {
11752                 /* Turn FUSED_JCC_PADDING into BRANCH_PREFIX if it isn't
11753                    a fused jcc.  */
11754                 next_fragP->fr_subtype
11755                   = ENCODE_RELAX_STATE (BRANCH_PREFIX, 0);
11756                 next_fragP->tc_frag_data.max_bytes
11757                   = next_fragP->tc_frag_data.max_prefix_length;
11758                 /* This will be updated in the BRANCH_PREFIX scan.  */
11759                 next_fragP->tc_frag_data.max_prefix_length = 0;
11760               }
11761             else
11762               frag_wane (next_fragP);
11763             break;
11764           }
11765     }
11766
11767   /* Stop if there is no BRANCH_PREFIX.  */
11768   if (!align_branch_prefix_size)
11769     return;
11770
11771   /* Scan for BRANCH_PREFIX.  */
11772   for (; fragP != NULL; fragP = fragP->fr_next)
11773     {
11774       if (fragP->fr_type != rs_machine_dependent
11775           || (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
11776               != BRANCH_PREFIX))
11777         continue;
11778
11779       /* Count all BRANCH_PREFIX frags before BRANCH_PADDING and
11780          COND_JUMP_PREFIX.  */
11781       max_prefix_length = 0;
11782       for (next_fragP = fragP;
11783            next_fragP != NULL;
11784            next_fragP = next_fragP->fr_next)
11785         {
11786           if (next_fragP->fr_type == rs_fill)
11787             /* Skip rs_fill frags.  */
11788             continue;
11789           else if (next_fragP->fr_type != rs_machine_dependent)
11790             /* Stop for all other frags.  */
11791             break;
11792
11793           /* rs_machine_dependent frags.  */
11794           if (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11795               == BRANCH_PREFIX)
11796             {
11797               /* Count BRANCH_PREFIX frags.  */
11798               if (max_prefix_length >= MAX_FUSED_JCC_PADDING_SIZE)
11799                 {
11800                   max_prefix_length = MAX_FUSED_JCC_PADDING_SIZE;
11801                   frag_wane (next_fragP);
11802                 }
11803               else
11804                 max_prefix_length
11805                   += next_fragP->tc_frag_data.max_bytes;
11806             }
11807           else if ((TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11808                     == BRANCH_PADDING)
11809                    || (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11810                        == FUSED_JCC_PADDING))
11811             {
11812               /* Stop at BRANCH_PADDING and FUSED_JCC_PADDING.  */
11813               fragP->tc_frag_data.u.padding_fragP = next_fragP;
11814               break;
11815             }
11816           else
11817             /* Stop for other rs_machine_dependent frags.  */
11818             break;
11819         }
11820
11821       fragP->tc_frag_data.max_prefix_length = max_prefix_length;
11822
11823       /* Skip to the next frag.  */
11824       fragP = next_fragP;
11825     }
11826 }
11827
11828 /* Compute padding size for
11829
11830         FUSED_JCC_PADDING
11831         CMP like instruction
11832         BRANCH_PADDING
11833         COND_JUMP/UNCOND_JUMP
11834
11835    or
11836
11837         BRANCH_PADDING
11838         COND_JUMP/UNCOND_JUMP
11839  */
11840
11841 static int
11842 i386_branch_padding_size (fragS *fragP, offsetT address)
11843 {
11844   unsigned int offset, size, padding_size;
11845   fragS *branch_fragP = fragP->tc_frag_data.u.branch_fragP;
11846
11847   /* The start address of the BRANCH_PADDING or FUSED_JCC_PADDING frag.  */
11848   if (!address)
11849     address = fragP->fr_address;
11850   address += fragP->fr_fix;
11851
11852   /* CMP like instrunction size.  */
11853   size = fragP->tc_frag_data.cmp_size;
11854
11855   /* The base size of the branch frag.  */
11856   size += branch_fragP->fr_fix;
11857
11858   /* Add opcode and displacement bytes for the rs_machine_dependent
11859      branch frag.  */
11860   if (branch_fragP->fr_type == rs_machine_dependent)
11861     size += md_relax_table[branch_fragP->fr_subtype].rlx_length;
11862
11863   /* Check if branch is within boundary and doesn't end at the last
11864      byte.  */
11865   offset = address & ((1U << align_branch_power) - 1);
11866   if ((offset + size) >= (1U << align_branch_power))
11867     /* Padding needed to avoid crossing boundary.  */
11868     padding_size = (1U << align_branch_power) - offset;
11869   else
11870     /* No padding needed.  */
11871     padding_size = 0;
11872
11873   /* The return value may be saved in tc_frag_data.length which is
11874      unsigned byte.  */
11875   if (!fits_in_unsigned_byte (padding_size))
11876     abort ();
11877
11878   return padding_size;
11879 }
11880
11881 /* i386_generic_table_relax_frag()
11882
11883    Handle BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags to
11884    grow/shrink padding to align branch frags.  Hand others to
11885    relax_frag().  */
11886
11887 long
11888 i386_generic_table_relax_frag (segT segment, fragS *fragP, long stretch)
11889 {
11890   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
11891       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
11892     {
11893       long padding_size = i386_branch_padding_size (fragP, 0);
11894       long grow = padding_size - fragP->tc_frag_data.length;
11895
11896       /* When the BRANCH_PREFIX frag is used, the computed address
11897          must match the actual address and there should be no padding.  */
11898       if (fragP->tc_frag_data.padding_address
11899           && (fragP->tc_frag_data.padding_address != fragP->fr_address
11900               || padding_size))
11901         abort ();
11902
11903       /* Update the padding size.  */
11904       if (grow)
11905         fragP->tc_frag_data.length = padding_size;
11906
11907       return grow;
11908     }
11909   else if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
11910     {
11911       fragS *padding_fragP, *next_fragP;
11912       long padding_size, left_size, last_size;
11913
11914       padding_fragP = fragP->tc_frag_data.u.padding_fragP;
11915       if (!padding_fragP)
11916         /* Use the padding set by the leading BRANCH_PREFIX frag.  */
11917         return (fragP->tc_frag_data.length
11918                 - fragP->tc_frag_data.last_length);
11919
11920       /* Compute the relative address of the padding frag in the very
11921         first time where the BRANCH_PREFIX frag sizes are zero.  */
11922       if (!fragP->tc_frag_data.padding_address)
11923         fragP->tc_frag_data.padding_address
11924           = padding_fragP->fr_address - (fragP->fr_address - stretch);
11925
11926       /* First update the last length from the previous interation.  */
11927       left_size = fragP->tc_frag_data.prefix_length;
11928       for (next_fragP = fragP;
11929            next_fragP != padding_fragP;
11930            next_fragP = next_fragP->fr_next)
11931         if (next_fragP->fr_type == rs_machine_dependent
11932             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11933                 == BRANCH_PREFIX))
11934           {
11935             if (left_size)
11936               {
11937                 int max = next_fragP->tc_frag_data.max_bytes;
11938                 if (max)
11939                   {
11940                     int size;
11941                     if (max > left_size)
11942                       size = left_size;
11943                     else
11944                       size = max;
11945                     left_size -= size;
11946                     next_fragP->tc_frag_data.last_length = size;
11947                   }
11948               }
11949             else
11950               next_fragP->tc_frag_data.last_length = 0;
11951           }
11952
11953       /* Check the padding size for the padding frag.  */
11954       padding_size = i386_branch_padding_size
11955         (padding_fragP, (fragP->fr_address
11956                          + fragP->tc_frag_data.padding_address));
11957
11958       last_size = fragP->tc_frag_data.prefix_length;
11959       /* Check if there is change from the last interation.  */
11960       if (padding_size == last_size)
11961         {
11962           /* Update the expected address of the padding frag.  */
11963           padding_fragP->tc_frag_data.padding_address
11964             = (fragP->fr_address + padding_size
11965                + fragP->tc_frag_data.padding_address);
11966           return 0;
11967         }
11968
11969       if (padding_size > fragP->tc_frag_data.max_prefix_length)
11970         {
11971           /* No padding if there is no sufficient room.  Clear the
11972              expected address of the padding frag.  */
11973           padding_fragP->tc_frag_data.padding_address = 0;
11974           padding_size = 0;
11975         }
11976       else
11977         /* Store the expected address of the padding frag.  */
11978         padding_fragP->tc_frag_data.padding_address
11979           = (fragP->fr_address + padding_size
11980              + fragP->tc_frag_data.padding_address);
11981
11982       fragP->tc_frag_data.prefix_length = padding_size;
11983
11984       /* Update the length for the current interation.  */
11985       left_size = padding_size;
11986       for (next_fragP = fragP;
11987            next_fragP != padding_fragP;
11988            next_fragP = next_fragP->fr_next)
11989         if (next_fragP->fr_type == rs_machine_dependent
11990             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11991                 == BRANCH_PREFIX))
11992           {
11993             if (left_size)
11994               {
11995                 int max = next_fragP->tc_frag_data.max_bytes;
11996                 if (max)
11997                   {
11998                     int size;
11999                     if (max > left_size)
12000                       size = left_size;
12001                     else
12002                       size = max;
12003                     left_size -= size;
12004                     next_fragP->tc_frag_data.length = size;
12005                   }
12006               }
12007             else
12008               next_fragP->tc_frag_data.length = 0;
12009           }
12010
12011       return (fragP->tc_frag_data.length
12012               - fragP->tc_frag_data.last_length);
12013     }
12014   return relax_frag (segment, fragP, stretch);
12015 }
12016
12017 /* md_estimate_size_before_relax()
12018
12019    Called just before relax() for rs_machine_dependent frags.  The x86
12020    assembler uses these frags to handle variable size jump
12021    instructions.
12022
12023    Any symbol that is now undefined will not become defined.
12024    Return the correct fr_subtype in the frag.
12025    Return the initial "guess for variable size of frag" to caller.
12026    The guess is actually the growth beyond the fixed part.  Whatever
12027    we do to grow the fixed or variable part contributes to our
12028    returned value.  */
12029
12030 int
12031 md_estimate_size_before_relax (fragS *fragP, segT segment)
12032 {
12033   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12034       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX
12035       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
12036     {
12037       i386_classify_machine_dependent_frag (fragP);
12038       return fragP->tc_frag_data.length;
12039     }
12040
12041   /* We've already got fragP->fr_subtype right;  all we have to do is
12042      check for un-relaxable symbols.  On an ELF system, we can't relax
12043      an externally visible symbol, because it may be overridden by a
12044      shared library.  */
12045   if (S_GET_SEGMENT (fragP->fr_symbol) != segment
12046 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12047       || (IS_ELF
12048           && !elf_symbol_resolved_in_segment_p (fragP->fr_symbol,
12049                                                 fragP->fr_var))
12050 #endif
12051 #if defined (OBJ_COFF) && defined (TE_PE)
12052       || (OUTPUT_FLAVOR == bfd_target_coff_flavour
12053           && S_IS_WEAK (fragP->fr_symbol))
12054 #endif
12055       )
12056     {
12057       /* Symbol is undefined in this segment, or we need to keep a
12058          reloc so that weak symbols can be overridden.  */
12059       int size = (fragP->fr_subtype & CODE16) ? 2 : 4;
12060       enum bfd_reloc_code_real reloc_type;
12061       unsigned char *opcode;
12062       int old_fr_fix;
12063
12064       if (fragP->fr_var != NO_RELOC)
12065         reloc_type = (enum bfd_reloc_code_real) fragP->fr_var;
12066       else if (size == 2)
12067         reloc_type = BFD_RELOC_16_PCREL;
12068 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12069       else if (need_plt32_p (fragP->fr_symbol))
12070         reloc_type = BFD_RELOC_X86_64_PLT32;
12071 #endif
12072       else
12073         reloc_type = BFD_RELOC_32_PCREL;
12074
12075       old_fr_fix = fragP->fr_fix;
12076       opcode = (unsigned char *) fragP->fr_opcode;
12077
12078       switch (TYPE_FROM_RELAX_STATE (fragP->fr_subtype))
12079         {
12080         case UNCOND_JUMP:
12081           /* Make jmp (0xeb) a (d)word displacement jump.  */
12082           opcode[0] = 0xe9;
12083           fragP->fr_fix += size;
12084           fix_new (fragP, old_fr_fix, size,
12085                    fragP->fr_symbol,
12086                    fragP->fr_offset, 1,
12087                    reloc_type);
12088           break;
12089
12090         case COND_JUMP86:
12091           if (size == 2
12092               && (!no_cond_jump_promotion || fragP->fr_var != NO_RELOC))
12093             {
12094               /* Negate the condition, and branch past an
12095                  unconditional jump.  */
12096               opcode[0] ^= 1;
12097               opcode[1] = 3;
12098               /* Insert an unconditional jump.  */
12099               opcode[2] = 0xe9;
12100               /* We added two extra opcode bytes, and have a two byte
12101                  offset.  */
12102               fragP->fr_fix += 2 + 2;
12103               fix_new (fragP, old_fr_fix + 2, 2,
12104                        fragP->fr_symbol,
12105                        fragP->fr_offset, 1,
12106                        reloc_type);
12107               break;
12108             }
12109           /* Fall through.  */
12110
12111         case COND_JUMP:
12112           if (no_cond_jump_promotion && fragP->fr_var == NO_RELOC)
12113             {
12114               fixS *fixP;
12115
12116               fragP->fr_fix += 1;
12117               fixP = fix_new (fragP, old_fr_fix, 1,
12118                               fragP->fr_symbol,
12119                               fragP->fr_offset, 1,
12120                               BFD_RELOC_8_PCREL);
12121               fixP->fx_signed = 1;
12122               break;
12123             }
12124
12125           /* This changes the byte-displacement jump 0x7N
12126              to the (d)word-displacement jump 0x0f,0x8N.  */
12127           opcode[1] = opcode[0] + 0x10;
12128           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12129           /* We've added an opcode byte.  */
12130           fragP->fr_fix += 1 + size;
12131           fix_new (fragP, old_fr_fix + 1, size,
12132                    fragP->fr_symbol,
12133                    fragP->fr_offset, 1,
12134                    reloc_type);
12135           break;
12136
12137         default:
12138           BAD_CASE (fragP->fr_subtype);
12139           break;
12140         }
12141       frag_wane (fragP);
12142       return fragP->fr_fix - old_fr_fix;
12143     }
12144
12145   /* Guess size depending on current relax state.  Initially the relax
12146      state will correspond to a short jump and we return 1, because
12147      the variable part of the frag (the branch offset) is one byte
12148      long.  However, we can relax a section more than once and in that
12149      case we must either set fr_subtype back to the unrelaxed state,
12150      or return the value for the appropriate branch.  */
12151   return md_relax_table[fragP->fr_subtype].rlx_length;
12152 }
12153
12154 /* Called after relax() is finished.
12155
12156    In:  Address of frag.
12157         fr_type == rs_machine_dependent.
12158         fr_subtype is what the address relaxed to.
12159
12160    Out: Any fixSs and constants are set up.
12161         Caller will turn frag into a ".space 0".  */
12162
12163 void
12164 md_convert_frag (bfd *abfd ATTRIBUTE_UNUSED, segT sec ATTRIBUTE_UNUSED,
12165                  fragS *fragP)
12166 {
12167   unsigned char *opcode;
12168   unsigned char *where_to_put_displacement = NULL;
12169   offsetT target_address;
12170   offsetT opcode_address;
12171   unsigned int extension = 0;
12172   offsetT displacement_from_opcode_start;
12173
12174   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12175       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING
12176       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12177     {
12178       /* Generate nop padding.  */
12179       unsigned int size = fragP->tc_frag_data.length;
12180       if (size)
12181         {
12182           if (size > fragP->tc_frag_data.max_bytes)
12183             abort ();
12184
12185           if (flag_debug)
12186             {
12187               const char *msg;
12188               const char *branch = "branch";
12189               const char *prefix = "";
12190               fragS *padding_fragP;
12191               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
12192                   == BRANCH_PREFIX)
12193                 {
12194                   padding_fragP = fragP->tc_frag_data.u.padding_fragP;
12195                   switch (fragP->tc_frag_data.default_prefix)
12196                     {
12197                     default:
12198                       abort ();
12199                       break;
12200                     case CS_PREFIX_OPCODE:
12201                       prefix = " cs";
12202                       break;
12203                     case DS_PREFIX_OPCODE:
12204                       prefix = " ds";
12205                       break;
12206                     case ES_PREFIX_OPCODE:
12207                       prefix = " es";
12208                       break;
12209                     case FS_PREFIX_OPCODE:
12210                       prefix = " fs";
12211                       break;
12212                     case GS_PREFIX_OPCODE:
12213                       prefix = " gs";
12214                       break;
12215                     case SS_PREFIX_OPCODE:
12216                       prefix = " ss";
12217                       break;
12218                     }
12219                   if (padding_fragP)
12220                     msg = _("%s:%u: add %d%s at 0x%llx to align "
12221                             "%s within %d-byte boundary\n");
12222                   else
12223                     msg = _("%s:%u: add additional %d%s at 0x%llx to "
12224                             "align %s within %d-byte boundary\n");
12225                 }
12226               else
12227                 {
12228                   padding_fragP = fragP;
12229                   msg = _("%s:%u: add %d%s-byte nop at 0x%llx to align "
12230                           "%s within %d-byte boundary\n");
12231                 }
12232
12233               if (padding_fragP)
12234                 switch (padding_fragP->tc_frag_data.branch_type)
12235                   {
12236                   case align_branch_jcc:
12237                     branch = "jcc";
12238                     break;
12239                   case align_branch_fused:
12240                     branch = "fused jcc";
12241                     break;
12242                   case align_branch_jmp:
12243                     branch = "jmp";
12244                     break;
12245                   case align_branch_call:
12246                     branch = "call";
12247                     break;
12248                   case align_branch_indirect:
12249                     branch = "indiret branch";
12250                     break;
12251                   case align_branch_ret:
12252                     branch = "ret";
12253                     break;
12254                   default:
12255                     break;
12256                   }
12257
12258               fprintf (stdout, msg,
12259                        fragP->fr_file, fragP->fr_line, size, prefix,
12260                        (long long) fragP->fr_address, branch,
12261                        1 << align_branch_power);
12262             }
12263           if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12264             memset (fragP->fr_opcode,
12265                     fragP->tc_frag_data.default_prefix, size);
12266           else
12267             i386_generate_nops (fragP, (char *) fragP->fr_opcode,
12268                                 size, 0);
12269           fragP->fr_fix += size;
12270         }
12271       return;
12272     }
12273
12274   opcode = (unsigned char *) fragP->fr_opcode;
12275
12276   /* Address we want to reach in file space.  */
12277   target_address = S_GET_VALUE (fragP->fr_symbol) + fragP->fr_offset;
12278
12279   /* Address opcode resides at in file space.  */
12280   opcode_address = fragP->fr_address + fragP->fr_fix;
12281
12282   /* Displacement from opcode start to fill into instruction.  */
12283   displacement_from_opcode_start = target_address - opcode_address;
12284
12285   if ((fragP->fr_subtype & BIG) == 0)
12286     {
12287       /* Don't have to change opcode.  */
12288       extension = 1;            /* 1 opcode + 1 displacement  */
12289       where_to_put_displacement = &opcode[1];
12290     }
12291   else
12292     {
12293       if (no_cond_jump_promotion
12294           && TYPE_FROM_RELAX_STATE (fragP->fr_subtype) != UNCOND_JUMP)
12295         as_warn_where (fragP->fr_file, fragP->fr_line,
12296                        _("long jump required"));
12297
12298       switch (fragP->fr_subtype)
12299         {
12300         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG):
12301           extension = 4;                /* 1 opcode + 4 displacement  */
12302           opcode[0] = 0xe9;
12303           where_to_put_displacement = &opcode[1];
12304           break;
12305
12306         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16):
12307           extension = 2;                /* 1 opcode + 2 displacement  */
12308           opcode[0] = 0xe9;
12309           where_to_put_displacement = &opcode[1];
12310           break;
12311
12312         case ENCODE_RELAX_STATE (COND_JUMP, BIG):
12313         case ENCODE_RELAX_STATE (COND_JUMP86, BIG):
12314           extension = 5;                /* 2 opcode + 4 displacement  */
12315           opcode[1] = opcode[0] + 0x10;
12316           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12317           where_to_put_displacement = &opcode[2];
12318           break;
12319
12320         case ENCODE_RELAX_STATE (COND_JUMP, BIG16):
12321           extension = 3;                /* 2 opcode + 2 displacement  */
12322           opcode[1] = opcode[0] + 0x10;
12323           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12324           where_to_put_displacement = &opcode[2];
12325           break;
12326
12327         case ENCODE_RELAX_STATE (COND_JUMP86, BIG16):
12328           extension = 4;
12329           opcode[0] ^= 1;
12330           opcode[1] = 3;
12331           opcode[2] = 0xe9;
12332           where_to_put_displacement = &opcode[3];
12333           break;
12334
12335         default:
12336           BAD_CASE (fragP->fr_subtype);
12337           break;
12338         }
12339     }
12340
12341   /* If size if less then four we are sure that the operand fits,
12342      but if it's 4, then it could be that the displacement is larger
12343      then -/+ 2GB.  */
12344   if (DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype) == 4
12345       && object_64bit
12346       && ((addressT) (displacement_from_opcode_start - extension
12347                       + ((addressT) 1 << 31))
12348           > (((addressT) 2 << 31) - 1)))
12349     {
12350       as_bad_where (fragP->fr_file, fragP->fr_line,
12351                     _("jump target out of range"));
12352       /* Make us emit 0.  */
12353       displacement_from_opcode_start = extension;
12354     }
12355   /* Now put displacement after opcode.  */
12356   md_number_to_chars ((char *) where_to_put_displacement,
12357                       (valueT) (displacement_from_opcode_start - extension),
12358                       DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype));
12359   fragP->fr_fix += extension;
12360 }
12361 \f
12362 /* Apply a fixup (fixP) to segment data, once it has been determined
12363    by our caller that we have all the info we need to fix it up.
12364
12365    Parameter valP is the pointer to the value of the bits.
12366
12367    On the 386, immediates, displacements, and data pointers are all in
12368    the same (little-endian) format, so we don't need to care about which
12369    we are handling.  */
12370
12371 void
12372 md_apply_fix (fixS *fixP, valueT *valP, segT seg ATTRIBUTE_UNUSED)
12373 {
12374   char *p = fixP->fx_where + fixP->fx_frag->fr_literal;
12375   valueT value = *valP;
12376
12377 #if !defined (TE_Mach)
12378   if (fixP->fx_pcrel)
12379     {
12380       switch (fixP->fx_r_type)
12381         {
12382         default:
12383           break;
12384
12385         case BFD_RELOC_64:
12386           fixP->fx_r_type = BFD_RELOC_64_PCREL;
12387           break;
12388         case BFD_RELOC_32:
12389         case BFD_RELOC_X86_64_32S:
12390           fixP->fx_r_type = BFD_RELOC_32_PCREL;
12391           break;
12392         case BFD_RELOC_16:
12393           fixP->fx_r_type = BFD_RELOC_16_PCREL;
12394           break;
12395         case BFD_RELOC_8:
12396           fixP->fx_r_type = BFD_RELOC_8_PCREL;
12397           break;
12398         }
12399     }
12400
12401   if (fixP->fx_addsy != NULL
12402       && (fixP->fx_r_type == BFD_RELOC_32_PCREL
12403           || fixP->fx_r_type == BFD_RELOC_64_PCREL
12404           || fixP->fx_r_type == BFD_RELOC_16_PCREL
12405           || fixP->fx_r_type == BFD_RELOC_8_PCREL)
12406       && !use_rela_relocations)
12407     {
12408       /* This is a hack.  There should be a better way to handle this.
12409          This covers for the fact that bfd_install_relocation will
12410          subtract the current location (for partial_inplace, PC relative
12411          relocations); see more below.  */
12412 #ifndef OBJ_AOUT
12413       if (IS_ELF
12414 #ifdef TE_PE
12415           || OUTPUT_FLAVOR == bfd_target_coff_flavour
12416 #endif
12417           )
12418         value += fixP->fx_where + fixP->fx_frag->fr_address;
12419 #endif
12420 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12421       if (IS_ELF)
12422         {
12423           segT sym_seg = S_GET_SEGMENT (fixP->fx_addsy);
12424
12425           if ((sym_seg == seg
12426                || (symbol_section_p (fixP->fx_addsy)
12427                    && sym_seg != absolute_section))
12428               && !generic_force_reloc (fixP))
12429             {
12430               /* Yes, we add the values in twice.  This is because
12431                  bfd_install_relocation subtracts them out again.  I think
12432                  bfd_install_relocation is broken, but I don't dare change
12433                  it.  FIXME.  */
12434               value += fixP->fx_where + fixP->fx_frag->fr_address;
12435             }
12436         }
12437 #endif
12438 #if defined (OBJ_COFF) && defined (TE_PE)
12439       /* For some reason, the PE format does not store a
12440          section address offset for a PC relative symbol.  */
12441       if (S_GET_SEGMENT (fixP->fx_addsy) != seg
12442           || S_IS_WEAK (fixP->fx_addsy))
12443         value += md_pcrel_from (fixP);
12444 #endif
12445     }
12446 #if defined (OBJ_COFF) && defined (TE_PE)
12447   if (fixP->fx_addsy != NULL
12448       && S_IS_WEAK (fixP->fx_addsy)
12449       /* PR 16858: Do not modify weak function references.  */
12450       && ! fixP->fx_pcrel)
12451     {
12452 #if !defined (TE_PEP)
12453       /* For x86 PE weak function symbols are neither PC-relative
12454          nor do they set S_IS_FUNCTION.  So the only reliable way
12455          to detect them is to check the flags of their containing
12456          section.  */
12457       if (S_GET_SEGMENT (fixP->fx_addsy) != NULL
12458           && S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_CODE)
12459         ;
12460       else
12461 #endif
12462       value -= S_GET_VALUE (fixP->fx_addsy);
12463     }
12464 #endif
12465
12466   /* Fix a few things - the dynamic linker expects certain values here,
12467      and we must not disappoint it.  */
12468 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12469   if (IS_ELF && fixP->fx_addsy)
12470     switch (fixP->fx_r_type)
12471       {
12472       case BFD_RELOC_386_PLT32:
12473       case BFD_RELOC_X86_64_PLT32:
12474         /* Make the jump instruction point to the address of the operand.
12475            At runtime we merely add the offset to the actual PLT entry.
12476            NB: Subtract the offset size only for jump instructions.  */
12477         if (fixP->fx_pcrel)
12478           value = -4;
12479         break;
12480
12481       case BFD_RELOC_386_TLS_GD:
12482       case BFD_RELOC_386_TLS_LDM:
12483       case BFD_RELOC_386_TLS_IE_32:
12484       case BFD_RELOC_386_TLS_IE:
12485       case BFD_RELOC_386_TLS_GOTIE:
12486       case BFD_RELOC_386_TLS_GOTDESC:
12487       case BFD_RELOC_X86_64_TLSGD:
12488       case BFD_RELOC_X86_64_TLSLD:
12489       case BFD_RELOC_X86_64_GOTTPOFF:
12490       case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
12491         value = 0; /* Fully resolved at runtime.  No addend.  */
12492         /* Fallthrough */
12493       case BFD_RELOC_386_TLS_LE:
12494       case BFD_RELOC_386_TLS_LDO_32:
12495       case BFD_RELOC_386_TLS_LE_32:
12496       case BFD_RELOC_X86_64_DTPOFF32:
12497       case BFD_RELOC_X86_64_DTPOFF64:
12498       case BFD_RELOC_X86_64_TPOFF32:
12499       case BFD_RELOC_X86_64_TPOFF64:
12500         S_SET_THREAD_LOCAL (fixP->fx_addsy);
12501         break;
12502
12503       case BFD_RELOC_386_TLS_DESC_CALL:
12504       case BFD_RELOC_X86_64_TLSDESC_CALL:
12505         value = 0; /* Fully resolved at runtime.  No addend.  */
12506         S_SET_THREAD_LOCAL (fixP->fx_addsy);
12507         fixP->fx_done = 0;
12508         return;
12509
12510       case BFD_RELOC_VTABLE_INHERIT:
12511       case BFD_RELOC_VTABLE_ENTRY:
12512         fixP->fx_done = 0;
12513         return;
12514
12515       default:
12516         break;
12517       }
12518 #endif /* defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)  */
12519   *valP = value;
12520 #endif /* !defined (TE_Mach)  */
12521
12522   /* Are we finished with this relocation now?  */
12523   if (fixP->fx_addsy == NULL)
12524     fixP->fx_done = 1;
12525 #if defined (OBJ_COFF) && defined (TE_PE)
12526   else if (fixP->fx_addsy != NULL && S_IS_WEAK (fixP->fx_addsy))
12527     {
12528       fixP->fx_done = 0;
12529       /* Remember value for tc_gen_reloc.  */
12530       fixP->fx_addnumber = value;
12531       /* Clear out the frag for now.  */
12532       value = 0;
12533     }
12534 #endif
12535   else if (use_rela_relocations)
12536     {
12537       fixP->fx_no_overflow = 1;
12538       /* Remember value for tc_gen_reloc.  */
12539       fixP->fx_addnumber = value;
12540       value = 0;
12541     }
12542
12543   md_number_to_chars (p, value, fixP->fx_size);
12544 }
12545 \f
12546 const char *
12547 md_atof (int type, char *litP, int *sizeP)
12548 {
12549   /* This outputs the LITTLENUMs in REVERSE order;
12550      in accord with the bigendian 386.  */
12551   return ieee_md_atof (type, litP, sizeP, FALSE);
12552 }
12553 \f
12554 static char output_invalid_buf[sizeof (unsigned char) * 2 + 6];
12555
12556 static char *
12557 output_invalid (int c)
12558 {
12559   if (ISPRINT (c))
12560     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
12561               "'%c'", c);
12562   else
12563     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
12564               "(0x%x)", (unsigned char) c);
12565   return output_invalid_buf;
12566 }
12567
12568 /* Verify that @r can be used in the current context.  */
12569
12570 static bfd_boolean check_register (const reg_entry *r)
12571 {
12572   if (allow_pseudo_reg)
12573     return TRUE;
12574
12575   if (operand_type_all_zero (&r->reg_type))
12576     return FALSE;
12577
12578   if ((r->reg_type.bitfield.dword
12579        || (r->reg_type.bitfield.class == SReg && r->reg_num > 3)
12580        || r->reg_type.bitfield.class == RegCR
12581        || r->reg_type.bitfield.class == RegDR)
12582       && !cpu_arch_flags.bitfield.cpui386)
12583     return FALSE;
12584
12585   if (r->reg_type.bitfield.class == RegTR
12586       && (flag_code == CODE_64BIT
12587           || !cpu_arch_flags.bitfield.cpui386
12588           || cpu_arch_isa_flags.bitfield.cpui586
12589           || cpu_arch_isa_flags.bitfield.cpui686))
12590     return FALSE;
12591
12592   if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
12593     return FALSE;
12594
12595   if (!cpu_arch_flags.bitfield.cpuavx512f)
12596     {
12597       if (r->reg_type.bitfield.zmmword
12598           || r->reg_type.bitfield.class == RegMask)
12599         return FALSE;
12600
12601       if (!cpu_arch_flags.bitfield.cpuavx)
12602         {
12603           if (r->reg_type.bitfield.ymmword)
12604             return FALSE;
12605
12606           if (!cpu_arch_flags.bitfield.cpusse && r->reg_type.bitfield.xmmword)
12607             return FALSE;
12608         }
12609     }
12610
12611   if (r->reg_type.bitfield.tmmword
12612       && (!cpu_arch_flags.bitfield.cpuamx_tile
12613           || flag_code != CODE_64BIT))
12614     return FALSE;
12615
12616   if (r->reg_type.bitfield.class == RegBND && !cpu_arch_flags.bitfield.cpumpx)
12617     return FALSE;
12618
12619   /* Don't allow fake index register unless allow_index_reg isn't 0. */
12620   if (!allow_index_reg && r->reg_num == RegIZ)
12621     return FALSE;
12622
12623   /* Upper 16 vector registers are only available with VREX in 64bit
12624      mode, and require EVEX encoding.  */
12625   if (r->reg_flags & RegVRex)
12626     {
12627       if (!cpu_arch_flags.bitfield.cpuavx512f
12628           || flag_code != CODE_64BIT)
12629         return FALSE;
12630
12631       if (i.vec_encoding == vex_encoding_default)
12632         i.vec_encoding = vex_encoding_evex;
12633       else if (i.vec_encoding != vex_encoding_evex)
12634         i.vec_encoding = vex_encoding_error;
12635     }
12636
12637   if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
12638       && (!cpu_arch_flags.bitfield.cpulm || r->reg_type.bitfield.class != RegCR)
12639       && flag_code != CODE_64BIT)
12640     return FALSE;
12641
12642   if (r->reg_type.bitfield.class == SReg && r->reg_num == RegFlat
12643       && !intel_syntax)
12644     return FALSE;
12645
12646   return TRUE;
12647 }
12648
12649 /* REG_STRING starts *before* REGISTER_PREFIX.  */
12650
12651 static const reg_entry *
12652 parse_real_register (char *reg_string, char **end_op)
12653 {
12654   char *s = reg_string;
12655   char *p;
12656   char reg_name_given[MAX_REG_NAME_SIZE + 1];
12657   const reg_entry *r;
12658
12659   /* Skip possible REGISTER_PREFIX and possible whitespace.  */
12660   if (*s == REGISTER_PREFIX)
12661     ++s;
12662
12663   if (is_space_char (*s))
12664     ++s;
12665
12666   p = reg_name_given;
12667   while ((*p++ = register_chars[(unsigned char) *s]) != '\0')
12668     {
12669       if (p >= reg_name_given + MAX_REG_NAME_SIZE)
12670         return (const reg_entry *) NULL;
12671       s++;
12672     }
12673
12674   /* For naked regs, make sure that we are not dealing with an identifier.
12675      This prevents confusing an identifier like `eax_var' with register
12676      `eax'.  */
12677   if (allow_naked_reg && identifier_chars[(unsigned char) *s])
12678     return (const reg_entry *) NULL;
12679
12680   *end_op = s;
12681
12682   r = (const reg_entry *) str_hash_find (reg_hash, reg_name_given);
12683
12684   /* Handle floating point regs, allowing spaces in the (i) part.  */
12685   if (r == i386_regtab /* %st is first entry of table  */)
12686     {
12687       if (!cpu_arch_flags.bitfield.cpu8087
12688           && !cpu_arch_flags.bitfield.cpu287
12689           && !cpu_arch_flags.bitfield.cpu387
12690           && !allow_pseudo_reg)
12691         return (const reg_entry *) NULL;
12692
12693       if (is_space_char (*s))
12694         ++s;
12695       if (*s == '(')
12696         {
12697           ++s;
12698           if (is_space_char (*s))
12699             ++s;
12700           if (*s >= '0' && *s <= '7')
12701             {
12702               int fpr = *s - '0';
12703               ++s;
12704               if (is_space_char (*s))
12705                 ++s;
12706               if (*s == ')')
12707                 {
12708                   *end_op = s + 1;
12709                   r = (const reg_entry *) str_hash_find (reg_hash, "st(0)");
12710                   know (r);
12711                   return r + fpr;
12712                 }
12713             }
12714           /* We have "%st(" then garbage.  */
12715           return (const reg_entry *) NULL;
12716         }
12717     }
12718
12719   return r && check_register (r) ? r : NULL;
12720 }
12721
12722 /* REG_STRING starts *before* REGISTER_PREFIX.  */
12723
12724 static const reg_entry *
12725 parse_register (char *reg_string, char **end_op)
12726 {
12727   const reg_entry *r;
12728
12729   if (*reg_string == REGISTER_PREFIX || allow_naked_reg)
12730     r = parse_real_register (reg_string, end_op);
12731   else
12732     r = NULL;
12733   if (!r)
12734     {
12735       char *save = input_line_pointer;
12736       char c;
12737       symbolS *symbolP;
12738
12739       input_line_pointer = reg_string;
12740       c = get_symbol_name (&reg_string);
12741       symbolP = symbol_find (reg_string);
12742       if (symbolP && S_GET_SEGMENT (symbolP) == reg_section)
12743         {
12744           const expressionS *e = symbol_get_value_expression (symbolP);
12745
12746           know (e->X_op == O_register);
12747           know (e->X_add_number >= 0
12748                 && (valueT) e->X_add_number < i386_regtab_size);
12749           r = i386_regtab + e->X_add_number;
12750           if (!check_register (r))
12751             {
12752               as_bad (_("register '%s%s' cannot be used here"),
12753                       register_prefix, r->reg_name);
12754               r = &bad_reg;
12755             }
12756           *end_op = input_line_pointer;
12757         }
12758       *input_line_pointer = c;
12759       input_line_pointer = save;
12760     }
12761   return r;
12762 }
12763
12764 int
12765 i386_parse_name (char *name, expressionS *e, char *nextcharP)
12766 {
12767   const reg_entry *r;
12768   char *end = input_line_pointer;
12769
12770   *end = *nextcharP;
12771   r = parse_register (name, &input_line_pointer);
12772   if (r && end <= input_line_pointer)
12773     {
12774       *nextcharP = *input_line_pointer;
12775       *input_line_pointer = 0;
12776       if (r != &bad_reg)
12777         {
12778           e->X_op = O_register;
12779           e->X_add_number = r - i386_regtab;
12780         }
12781       else
12782           e->X_op = O_illegal;
12783       return 1;
12784     }
12785   input_line_pointer = end;
12786   *end = 0;
12787   return intel_syntax ? i386_intel_parse_name (name, e) : 0;
12788 }
12789
12790 void
12791 md_operand (expressionS *e)
12792 {
12793   char *end;
12794   const reg_entry *r;
12795
12796   switch (*input_line_pointer)
12797     {
12798     case REGISTER_PREFIX:
12799       r = parse_real_register (input_line_pointer, &end);
12800       if (r)
12801         {
12802           e->X_op = O_register;
12803           e->X_add_number = r - i386_regtab;
12804           input_line_pointer = end;
12805         }
12806       break;
12807
12808     case '[':
12809       gas_assert (intel_syntax);
12810       end = input_line_pointer++;
12811       expression (e);
12812       if (*input_line_pointer == ']')
12813         {
12814           ++input_line_pointer;
12815           e->X_op_symbol = make_expr_symbol (e);
12816           e->X_add_symbol = NULL;
12817           e->X_add_number = 0;
12818           e->X_op = O_index;
12819         }
12820       else
12821         {
12822           e->X_op = O_absent;
12823           input_line_pointer = end;
12824         }
12825       break;
12826     }
12827 }
12828
12829 \f
12830 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12831 const char *md_shortopts = "kVQ:sqnO::";
12832 #else
12833 const char *md_shortopts = "qnO::";
12834 #endif
12835
12836 #define OPTION_32 (OPTION_MD_BASE + 0)
12837 #define OPTION_64 (OPTION_MD_BASE + 1)
12838 #define OPTION_DIVIDE (OPTION_MD_BASE + 2)
12839 #define OPTION_MARCH (OPTION_MD_BASE + 3)
12840 #define OPTION_MTUNE (OPTION_MD_BASE + 4)
12841 #define OPTION_MMNEMONIC (OPTION_MD_BASE + 5)
12842 #define OPTION_MSYNTAX (OPTION_MD_BASE + 6)
12843 #define OPTION_MINDEX_REG (OPTION_MD_BASE + 7)
12844 #define OPTION_MNAKED_REG (OPTION_MD_BASE + 8)
12845 #define OPTION_MRELAX_RELOCATIONS (OPTION_MD_BASE + 9)
12846 #define OPTION_MSSE2AVX (OPTION_MD_BASE + 10)
12847 #define OPTION_MSSE_CHECK (OPTION_MD_BASE + 11)
12848 #define OPTION_MOPERAND_CHECK (OPTION_MD_BASE + 12)
12849 #define OPTION_MAVXSCALAR (OPTION_MD_BASE + 13)
12850 #define OPTION_X32 (OPTION_MD_BASE + 14)
12851 #define OPTION_MADD_BND_PREFIX (OPTION_MD_BASE + 15)
12852 #define OPTION_MEVEXLIG (OPTION_MD_BASE + 16)
12853 #define OPTION_MEVEXWIG (OPTION_MD_BASE + 17)
12854 #define OPTION_MBIG_OBJ (OPTION_MD_BASE + 18)
12855 #define OPTION_MOMIT_LOCK_PREFIX (OPTION_MD_BASE + 19)
12856 #define OPTION_MEVEXRCIG (OPTION_MD_BASE + 20)
12857 #define OPTION_MSHARED (OPTION_MD_BASE + 21)
12858 #define OPTION_MAMD64 (OPTION_MD_BASE + 22)
12859 #define OPTION_MINTEL64 (OPTION_MD_BASE + 23)
12860 #define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24)
12861 #define OPTION_X86_USED_NOTE (OPTION_MD_BASE + 25)
12862 #define OPTION_MVEXWIG (OPTION_MD_BASE + 26)
12863 #define OPTION_MALIGN_BRANCH_BOUNDARY (OPTION_MD_BASE + 27)
12864 #define OPTION_MALIGN_BRANCH_PREFIX_SIZE (OPTION_MD_BASE + 28)
12865 #define OPTION_MALIGN_BRANCH (OPTION_MD_BASE + 29)
12866 #define OPTION_MBRANCHES_WITH_32B_BOUNDARIES (OPTION_MD_BASE + 30)
12867 #define OPTION_MLFENCE_AFTER_LOAD (OPTION_MD_BASE + 31)
12868 #define OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH (OPTION_MD_BASE + 32)
12869 #define OPTION_MLFENCE_BEFORE_RET (OPTION_MD_BASE + 33)
12870
12871 struct option md_longopts[] =
12872 {
12873   {"32", no_argument, NULL, OPTION_32},
12874 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
12875      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
12876   {"64", no_argument, NULL, OPTION_64},
12877 #endif
12878 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12879   {"x32", no_argument, NULL, OPTION_X32},
12880   {"mshared", no_argument, NULL, OPTION_MSHARED},
12881   {"mx86-used-note", required_argument, NULL, OPTION_X86_USED_NOTE},
12882 #endif
12883   {"divide", no_argument, NULL, OPTION_DIVIDE},
12884   {"march", required_argument, NULL, OPTION_MARCH},
12885   {"mtune", required_argument, NULL, OPTION_MTUNE},
12886   {"mmnemonic", required_argument, NULL, OPTION_MMNEMONIC},
12887   {"msyntax", required_argument, NULL, OPTION_MSYNTAX},
12888   {"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG},
12889   {"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG},
12890   {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
12891   {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
12892   {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
12893   {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR},
12894   {"mvexwig", required_argument, NULL, OPTION_MVEXWIG},
12895   {"madd-bnd-prefix", no_argument, NULL, OPTION_MADD_BND_PREFIX},
12896   {"mevexlig", required_argument, NULL, OPTION_MEVEXLIG},
12897   {"mevexwig", required_argument, NULL, OPTION_MEVEXWIG},
12898 # if defined (TE_PE) || defined (TE_PEP)
12899   {"mbig-obj", no_argument, NULL, OPTION_MBIG_OBJ},
12900 #endif
12901   {"momit-lock-prefix", required_argument, NULL, OPTION_MOMIT_LOCK_PREFIX},
12902   {"mfence-as-lock-add", required_argument, NULL, OPTION_MFENCE_AS_LOCK_ADD},
12903   {"mrelax-relocations", required_argument, NULL, OPTION_MRELAX_RELOCATIONS},
12904   {"mevexrcig", required_argument, NULL, OPTION_MEVEXRCIG},
12905   {"malign-branch-boundary", required_argument, NULL, OPTION_MALIGN_BRANCH_BOUNDARY},
12906   {"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE},
12907   {"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH},
12908   {"mbranches-within-32B-boundaries", no_argument, NULL, OPTION_MBRANCHES_WITH_32B_BOUNDARIES},
12909   {"mlfence-after-load", required_argument, NULL, OPTION_MLFENCE_AFTER_LOAD},
12910   {"mlfence-before-indirect-branch", required_argument, NULL,
12911    OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH},
12912   {"mlfence-before-ret", required_argument, NULL, OPTION_MLFENCE_BEFORE_RET},
12913   {"mamd64", no_argument, NULL, OPTION_MAMD64},
12914   {"mintel64", no_argument, NULL, OPTION_MINTEL64},
12915   {NULL, no_argument, NULL, 0}
12916 };
12917 size_t md_longopts_size = sizeof (md_longopts);
12918
12919 int
12920 md_parse_option (int c, const char *arg)
12921 {
12922   unsigned int j;
12923   char *arch, *next, *saved, *type;
12924
12925   switch (c)
12926     {
12927     case 'n':
12928       optimize_align_code = 0;
12929       break;
12930
12931     case 'q':
12932       quiet_warnings = 1;
12933       break;
12934
12935 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12936       /* -Qy, -Qn: SVR4 arguments controlling whether a .comment section
12937          should be emitted or not.  FIXME: Not implemented.  */
12938     case 'Q':
12939       if ((arg[0] != 'y' && arg[0] != 'n') || arg[1])
12940         return 0;
12941       break;
12942
12943       /* -V: SVR4 argument to print version ID.  */
12944     case 'V':
12945       print_version_id ();
12946       break;
12947
12948       /* -k: Ignore for FreeBSD compatibility.  */
12949     case 'k':
12950       break;
12951
12952     case 's':
12953       /* -s: On i386 Solaris, this tells the native assembler to use
12954          .stab instead of .stab.excl.  We always use .stab anyhow.  */
12955       break;
12956
12957     case OPTION_MSHARED:
12958       shared = 1;
12959       break;
12960
12961     case OPTION_X86_USED_NOTE:
12962       if (strcasecmp (arg, "yes") == 0)
12963         x86_used_note = 1;
12964       else if (strcasecmp (arg, "no") == 0)
12965         x86_used_note = 0;
12966       else
12967         as_fatal (_("invalid -mx86-used-note= option: `%s'"), arg);
12968       break;
12969
12970
12971 #endif
12972 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
12973      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
12974     case OPTION_64:
12975       {
12976         const char **list, **l;
12977
12978         list = bfd_target_list ();
12979         for (l = list; *l != NULL; l++)
12980           if (CONST_STRNEQ (*l, "elf64-x86-64")
12981               || strcmp (*l, "coff-x86-64") == 0
12982               || strcmp (*l, "pe-x86-64") == 0
12983               || strcmp (*l, "pei-x86-64") == 0
12984               || strcmp (*l, "mach-o-x86-64") == 0)
12985             {
12986               default_arch = "x86_64";
12987               break;
12988             }
12989         if (*l == NULL)
12990           as_fatal (_("no compiled in support for x86_64"));
12991         free (list);
12992       }
12993       break;
12994 #endif
12995
12996 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12997     case OPTION_X32:
12998       if (IS_ELF)
12999         {
13000           const char **list, **l;
13001
13002           list = bfd_target_list ();
13003           for (l = list; *l != NULL; l++)
13004             if (CONST_STRNEQ (*l, "elf32-x86-64"))
13005               {
13006                 default_arch = "x86_64:32";
13007                 break;
13008               }
13009           if (*l == NULL)
13010             as_fatal (_("no compiled in support for 32bit x86_64"));
13011           free (list);
13012         }
13013       else
13014         as_fatal (_("32bit x86_64 is only supported for ELF"));
13015       break;
13016 #endif
13017
13018     case OPTION_32:
13019       default_arch = "i386";
13020       break;
13021
13022     case OPTION_DIVIDE:
13023 #ifdef SVR4_COMMENT_CHARS
13024       {
13025         char *n, *t;
13026         const char *s;
13027
13028         n = XNEWVEC (char, strlen (i386_comment_chars) + 1);
13029         t = n;
13030         for (s = i386_comment_chars; *s != '\0'; s++)
13031           if (*s != '/')
13032             *t++ = *s;
13033         *t = '\0';
13034         i386_comment_chars = n;
13035       }
13036 #endif
13037       break;
13038
13039     case OPTION_MARCH:
13040       saved = xstrdup (arg);
13041       arch = saved;
13042       /* Allow -march=+nosse.  */
13043       if (*arch == '+')
13044         arch++;
13045       do
13046         {
13047           if (*arch == '.')
13048             as_fatal (_("invalid -march= option: `%s'"), arg);
13049           next = strchr (arch, '+');
13050           if (next)
13051             *next++ = '\0';
13052           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13053             {
13054               if (strcmp (arch, cpu_arch [j].name) == 0)
13055                 {
13056                   /* Processor.  */
13057                   if (! cpu_arch[j].flags.bitfield.cpui386)
13058                     continue;
13059
13060                   cpu_arch_name = cpu_arch[j].name;
13061                   cpu_sub_arch_name = NULL;
13062                   cpu_arch_flags = cpu_arch[j].flags;
13063                   cpu_arch_isa = cpu_arch[j].type;
13064                   cpu_arch_isa_flags = cpu_arch[j].flags;
13065                   if (!cpu_arch_tune_set)
13066                     {
13067                       cpu_arch_tune = cpu_arch_isa;
13068                       cpu_arch_tune_flags = cpu_arch_isa_flags;
13069                     }
13070                   break;
13071                 }
13072               else if (*cpu_arch [j].name == '.'
13073                        && strcmp (arch, cpu_arch [j].name + 1) == 0)
13074                 {
13075                   /* ISA extension.  */
13076                   i386_cpu_flags flags;
13077
13078                   flags = cpu_flags_or (cpu_arch_flags,
13079                                         cpu_arch[j].flags);
13080
13081                   if (!cpu_flags_equal (&flags, &cpu_arch_flags))
13082                     {
13083                       if (cpu_sub_arch_name)
13084                         {
13085                           char *name = cpu_sub_arch_name;
13086                           cpu_sub_arch_name = concat (name,
13087                                                       cpu_arch[j].name,
13088                                                       (const char *) NULL);
13089                           free (name);
13090                         }
13091                       else
13092                         cpu_sub_arch_name = xstrdup (cpu_arch[j].name);
13093                       cpu_arch_flags = flags;
13094                       cpu_arch_isa_flags = flags;
13095                     }
13096                   else
13097                     cpu_arch_isa_flags
13098                       = cpu_flags_or (cpu_arch_isa_flags,
13099                                       cpu_arch[j].flags);
13100                   break;
13101                 }
13102             }
13103
13104           if (j >= ARRAY_SIZE (cpu_arch))
13105             {
13106               /* Disable an ISA extension.  */
13107               for (j = 0; j < ARRAY_SIZE (cpu_noarch); j++)
13108                 if (strcmp (arch, cpu_noarch [j].name) == 0)
13109                   {
13110                     i386_cpu_flags flags;
13111
13112                     flags = cpu_flags_and_not (cpu_arch_flags,
13113                                                cpu_noarch[j].flags);
13114                     if (!cpu_flags_equal (&flags, &cpu_arch_flags))
13115                       {
13116                         if (cpu_sub_arch_name)
13117                           {
13118                             char *name = cpu_sub_arch_name;
13119                             cpu_sub_arch_name = concat (arch,
13120                                                         (const char *) NULL);
13121                             free (name);
13122                           }
13123                         else
13124                           cpu_sub_arch_name = xstrdup (arch);
13125                         cpu_arch_flags = flags;
13126                         cpu_arch_isa_flags = flags;
13127                       }
13128                     break;
13129                   }
13130
13131               if (j >= ARRAY_SIZE (cpu_noarch))
13132                 j = ARRAY_SIZE (cpu_arch);
13133             }
13134
13135           if (j >= ARRAY_SIZE (cpu_arch))
13136             as_fatal (_("invalid -march= option: `%s'"), arg);
13137
13138           arch = next;
13139         }
13140       while (next != NULL);
13141       free (saved);
13142       break;
13143
13144     case OPTION_MTUNE:
13145       if (*arg == '.')
13146         as_fatal (_("invalid -mtune= option: `%s'"), arg);
13147       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13148         {
13149           if (strcmp (arg, cpu_arch [j].name) == 0)
13150             {
13151               cpu_arch_tune_set = 1;
13152               cpu_arch_tune = cpu_arch [j].type;
13153               cpu_arch_tune_flags = cpu_arch[j].flags;
13154               break;
13155             }
13156         }
13157       if (j >= ARRAY_SIZE (cpu_arch))
13158         as_fatal (_("invalid -mtune= option: `%s'"), arg);
13159       break;
13160
13161     case OPTION_MMNEMONIC:
13162       if (strcasecmp (arg, "att") == 0)
13163         intel_mnemonic = 0;
13164       else if (strcasecmp (arg, "intel") == 0)
13165         intel_mnemonic = 1;
13166       else
13167         as_fatal (_("invalid -mmnemonic= option: `%s'"), arg);
13168       break;
13169
13170     case OPTION_MSYNTAX:
13171       if (strcasecmp (arg, "att") == 0)
13172         intel_syntax = 0;
13173       else if (strcasecmp (arg, "intel") == 0)
13174         intel_syntax = 1;
13175       else
13176         as_fatal (_("invalid -msyntax= option: `%s'"), arg);
13177       break;
13178
13179     case OPTION_MINDEX_REG:
13180       allow_index_reg = 1;
13181       break;
13182
13183     case OPTION_MNAKED_REG:
13184       allow_naked_reg = 1;
13185       break;
13186
13187     case OPTION_MSSE2AVX:
13188       sse2avx = 1;
13189       break;
13190
13191     case OPTION_MSSE_CHECK:
13192       if (strcasecmp (arg, "error") == 0)
13193         sse_check = check_error;
13194       else if (strcasecmp (arg, "warning") == 0)
13195         sse_check = check_warning;
13196       else if (strcasecmp (arg, "none") == 0)
13197         sse_check = check_none;
13198       else
13199         as_fatal (_("invalid -msse-check= option: `%s'"), arg);
13200       break;
13201
13202     case OPTION_MOPERAND_CHECK:
13203       if (strcasecmp (arg, "error") == 0)
13204         operand_check = check_error;
13205       else if (strcasecmp (arg, "warning") == 0)
13206         operand_check = check_warning;
13207       else if (strcasecmp (arg, "none") == 0)
13208         operand_check = check_none;
13209       else
13210         as_fatal (_("invalid -moperand-check= option: `%s'"), arg);
13211       break;
13212
13213     case OPTION_MAVXSCALAR:
13214       if (strcasecmp (arg, "128") == 0)
13215         avxscalar = vex128;
13216       else if (strcasecmp (arg, "256") == 0)
13217         avxscalar = vex256;
13218       else
13219         as_fatal (_("invalid -mavxscalar= option: `%s'"), arg);
13220       break;
13221
13222     case OPTION_MVEXWIG:
13223       if (strcmp (arg, "0") == 0)
13224         vexwig = vexw0;
13225       else if (strcmp (arg, "1") == 0)
13226         vexwig = vexw1;
13227       else
13228         as_fatal (_("invalid -mvexwig= option: `%s'"), arg);
13229       break;
13230
13231     case OPTION_MADD_BND_PREFIX:
13232       add_bnd_prefix = 1;
13233       break;
13234
13235     case OPTION_MEVEXLIG:
13236       if (strcmp (arg, "128") == 0)
13237         evexlig = evexl128;
13238       else if (strcmp (arg, "256") == 0)
13239         evexlig = evexl256;
13240       else  if (strcmp (arg, "512") == 0)
13241         evexlig = evexl512;
13242       else
13243         as_fatal (_("invalid -mevexlig= option: `%s'"), arg);
13244       break;
13245
13246     case OPTION_MEVEXRCIG:
13247       if (strcmp (arg, "rne") == 0)
13248         evexrcig = rne;
13249       else if (strcmp (arg, "rd") == 0)
13250         evexrcig = rd;
13251       else if (strcmp (arg, "ru") == 0)
13252         evexrcig = ru;
13253       else if (strcmp (arg, "rz") == 0)
13254         evexrcig = rz;
13255       else
13256         as_fatal (_("invalid -mevexrcig= option: `%s'"), arg);
13257       break;
13258
13259     case OPTION_MEVEXWIG:
13260       if (strcmp (arg, "0") == 0)
13261         evexwig = evexw0;
13262       else if (strcmp (arg, "1") == 0)
13263         evexwig = evexw1;
13264       else
13265         as_fatal (_("invalid -mevexwig= option: `%s'"), arg);
13266       break;
13267
13268 # if defined (TE_PE) || defined (TE_PEP)
13269     case OPTION_MBIG_OBJ:
13270       use_big_obj = 1;
13271       break;
13272 #endif
13273
13274     case OPTION_MOMIT_LOCK_PREFIX:
13275       if (strcasecmp (arg, "yes") == 0)
13276         omit_lock_prefix = 1;
13277       else if (strcasecmp (arg, "no") == 0)
13278         omit_lock_prefix = 0;
13279       else
13280         as_fatal (_("invalid -momit-lock-prefix= option: `%s'"), arg);
13281       break;
13282
13283     case OPTION_MFENCE_AS_LOCK_ADD:
13284       if (strcasecmp (arg, "yes") == 0)
13285         avoid_fence = 1;
13286       else if (strcasecmp (arg, "no") == 0)
13287         avoid_fence = 0;
13288       else
13289         as_fatal (_("invalid -mfence-as-lock-add= option: `%s'"), arg);
13290       break;
13291
13292     case OPTION_MLFENCE_AFTER_LOAD:
13293       if (strcasecmp (arg, "yes") == 0)
13294         lfence_after_load = 1;
13295       else if (strcasecmp (arg, "no") == 0)
13296         lfence_after_load = 0;
13297       else
13298         as_fatal (_("invalid -mlfence-after-load= option: `%s'"), arg);
13299       break;
13300
13301     case OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH:
13302       if (strcasecmp (arg, "all") == 0)
13303         {
13304           lfence_before_indirect_branch = lfence_branch_all;
13305           if (lfence_before_ret == lfence_before_ret_none)
13306             lfence_before_ret = lfence_before_ret_shl;
13307         }
13308       else if (strcasecmp (arg, "memory") == 0)
13309         lfence_before_indirect_branch = lfence_branch_memory;
13310       else if (strcasecmp (arg, "register") == 0)
13311         lfence_before_indirect_branch = lfence_branch_register;
13312       else if (strcasecmp (arg, "none") == 0)
13313         lfence_before_indirect_branch = lfence_branch_none;
13314       else
13315         as_fatal (_("invalid -mlfence-before-indirect-branch= option: `%s'"),
13316                   arg);
13317       break;
13318
13319     case OPTION_MLFENCE_BEFORE_RET:
13320       if (strcasecmp (arg, "or") == 0)
13321         lfence_before_ret = lfence_before_ret_or;
13322       else if (strcasecmp (arg, "not") == 0)
13323         lfence_before_ret = lfence_before_ret_not;
13324       else if (strcasecmp (arg, "shl") == 0 || strcasecmp (arg, "yes") == 0)
13325         lfence_before_ret = lfence_before_ret_shl;
13326       else if (strcasecmp (arg, "none") == 0)
13327         lfence_before_ret = lfence_before_ret_none;
13328       else
13329         as_fatal (_("invalid -mlfence-before-ret= option: `%s'"),
13330                   arg);
13331       break;
13332
13333     case OPTION_MRELAX_RELOCATIONS:
13334       if (strcasecmp (arg, "yes") == 0)
13335         generate_relax_relocations = 1;
13336       else if (strcasecmp (arg, "no") == 0)
13337         generate_relax_relocations = 0;
13338       else
13339         as_fatal (_("invalid -mrelax-relocations= option: `%s'"), arg);
13340       break;
13341
13342     case OPTION_MALIGN_BRANCH_BOUNDARY:
13343       {
13344         char *end;
13345         long int align = strtoul (arg, &end, 0);
13346         if (*end == '\0')
13347           {
13348             if (align == 0)
13349               {
13350                 align_branch_power = 0;
13351                 break;
13352               }
13353             else if (align >= 16)
13354               {
13355                 int align_power;
13356                 for (align_power = 0;
13357                      (align & 1) == 0;
13358                      align >>= 1, align_power++)
13359                   continue;
13360                 /* Limit alignment power to 31.  */
13361                 if (align == 1 && align_power < 32)
13362                   {
13363                     align_branch_power = align_power;
13364                     break;
13365                   }
13366               }
13367           }
13368         as_fatal (_("invalid -malign-branch-boundary= value: %s"), arg);
13369       }
13370       break;
13371
13372     case OPTION_MALIGN_BRANCH_PREFIX_SIZE:
13373       {
13374         char *end;
13375         int align = strtoul (arg, &end, 0);
13376         /* Some processors only support 5 prefixes.  */
13377         if (*end == '\0' && align >= 0 && align < 6)
13378           {
13379             align_branch_prefix_size = align;
13380             break;
13381           }
13382         as_fatal (_("invalid -malign-branch-prefix-size= value: %s"),
13383                   arg);
13384       }
13385       break;
13386
13387     case OPTION_MALIGN_BRANCH:
13388       align_branch = 0;
13389       saved = xstrdup (arg);
13390       type = saved;
13391       do
13392         {
13393           next = strchr (type, '+');
13394           if (next)
13395             *next++ = '\0';
13396           if (strcasecmp (type, "jcc") == 0)
13397             align_branch |= align_branch_jcc_bit;
13398           else if (strcasecmp (type, "fused") == 0)
13399             align_branch |= align_branch_fused_bit;
13400           else if (strcasecmp (type, "jmp") == 0)
13401             align_branch |= align_branch_jmp_bit;
13402           else if (strcasecmp (type, "call") == 0)
13403             align_branch |= align_branch_call_bit;
13404           else if (strcasecmp (type, "ret") == 0)
13405             align_branch |= align_branch_ret_bit;
13406           else if (strcasecmp (type, "indirect") == 0)
13407             align_branch |= align_branch_indirect_bit;
13408           else
13409             as_fatal (_("invalid -malign-branch= option: `%s'"), arg);
13410           type = next;
13411         }
13412       while (next != NULL);
13413       free (saved);
13414       break;
13415
13416     case OPTION_MBRANCHES_WITH_32B_BOUNDARIES:
13417       align_branch_power = 5;
13418       align_branch_prefix_size = 5;
13419       align_branch = (align_branch_jcc_bit
13420                       | align_branch_fused_bit
13421                       | align_branch_jmp_bit);
13422       break;
13423
13424     case OPTION_MAMD64:
13425       isa64 = amd64;
13426       break;
13427
13428     case OPTION_MINTEL64:
13429       isa64 = intel64;
13430       break;
13431
13432     case 'O':
13433       if (arg == NULL)
13434         {
13435           optimize = 1;
13436           /* Turn off -Os.  */
13437           optimize_for_space = 0;
13438         }
13439       else if (*arg == 's')
13440         {
13441           optimize_for_space = 1;
13442           /* Turn on all encoding optimizations.  */
13443           optimize = INT_MAX;
13444         }
13445       else
13446         {
13447           optimize = atoi (arg);
13448           /* Turn off -Os.  */
13449           optimize_for_space = 0;
13450         }
13451       break;
13452
13453     default:
13454       return 0;
13455     }
13456   return 1;
13457 }
13458
13459 #define MESSAGE_TEMPLATE \
13460 "                                                                                "
13461
13462 static char *
13463 output_message (FILE *stream, char *p, char *message, char *start,
13464                 int *left_p, const char *name, int len)
13465 {
13466   int size = sizeof (MESSAGE_TEMPLATE);
13467   int left = *left_p;
13468
13469   /* Reserve 2 spaces for ", " or ",\0" */
13470   left -= len + 2;
13471
13472   /* Check if there is any room.  */
13473   if (left >= 0)
13474     {
13475       if (p != start)
13476         {
13477           *p++ = ',';
13478           *p++ = ' ';
13479         }
13480       p = mempcpy (p, name, len);
13481     }
13482   else
13483     {
13484       /* Output the current message now and start a new one.  */
13485       *p++ = ',';
13486       *p = '\0';
13487       fprintf (stream, "%s\n", message);
13488       p = start;
13489       left = size - (start - message) - len - 2;
13490
13491       gas_assert (left >= 0);
13492
13493       p = mempcpy (p, name, len);
13494     }
13495
13496   *left_p = left;
13497   return p;
13498 }
13499
13500 static void
13501 show_arch (FILE *stream, int ext, int check)
13502 {
13503   static char message[] = MESSAGE_TEMPLATE;
13504   char *start = message + 27;
13505   char *p;
13506   int size = sizeof (MESSAGE_TEMPLATE);
13507   int left;
13508   const char *name;
13509   int len;
13510   unsigned int j;
13511
13512   p = start;
13513   left = size - (start - message);
13514   for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13515     {
13516       /* Should it be skipped?  */
13517       if (cpu_arch [j].skip)
13518         continue;
13519
13520       name = cpu_arch [j].name;
13521       len = cpu_arch [j].len;
13522       if (*name == '.')
13523         {
13524           /* It is an extension.  Skip if we aren't asked to show it.  */
13525           if (ext)
13526             {
13527               name++;
13528               len--;
13529             }
13530           else
13531             continue;
13532         }
13533       else if (ext)
13534         {
13535           /* It is an processor.  Skip if we show only extension.  */
13536           continue;
13537         }
13538       else if (check && ! cpu_arch[j].flags.bitfield.cpui386)
13539         {
13540           /* It is an impossible processor - skip.  */
13541           continue;
13542         }
13543
13544       p = output_message (stream, p, message, start, &left, name, len);
13545     }
13546
13547   /* Display disabled extensions.  */
13548   if (ext)
13549     for (j = 0; j < ARRAY_SIZE (cpu_noarch); j++)
13550       {
13551         name = cpu_noarch [j].name;
13552         len = cpu_noarch [j].len;
13553         p = output_message (stream, p, message, start, &left, name,
13554                             len);
13555       }
13556
13557   *p = '\0';
13558   fprintf (stream, "%s\n", message);
13559 }
13560
13561 void
13562 md_show_usage (FILE *stream)
13563 {
13564 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13565   fprintf (stream, _("\
13566   -Qy, -Qn                ignored\n\
13567   -V                      print assembler version number\n\
13568   -k                      ignored\n"));
13569 #endif
13570   fprintf (stream, _("\
13571   -n                      Do not optimize code alignment\n\
13572   -q                      quieten some warnings\n"));
13573 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13574   fprintf (stream, _("\
13575   -s                      ignored\n"));
13576 #endif
13577 #if defined BFD64 && (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13578                       || defined (TE_PE) || defined (TE_PEP))
13579   fprintf (stream, _("\
13580   --32/--64/--x32         generate 32bit/64bit/x32 code\n"));
13581 #endif
13582 #ifdef SVR4_COMMENT_CHARS
13583   fprintf (stream, _("\
13584   --divide                do not treat `/' as a comment character\n"));
13585 #else
13586   fprintf (stream, _("\
13587   --divide                ignored\n"));
13588 #endif
13589   fprintf (stream, _("\
13590   -march=CPU[,+EXTENSION...]\n\
13591                           generate code for CPU and EXTENSION, CPU is one of:\n"));
13592   show_arch (stream, 0, 1);
13593   fprintf (stream, _("\
13594                           EXTENSION is combination of:\n"));
13595   show_arch (stream, 1, 0);
13596   fprintf (stream, _("\
13597   -mtune=CPU              optimize for CPU, CPU is one of:\n"));
13598   show_arch (stream, 0, 0);
13599   fprintf (stream, _("\
13600   -msse2avx               encode SSE instructions with VEX prefix\n"));
13601   fprintf (stream, _("\
13602   -msse-check=[none|error|warning] (default: warning)\n\
13603                           check SSE instructions\n"));
13604   fprintf (stream, _("\
13605   -moperand-check=[none|error|warning] (default: warning)\n\
13606                           check operand combinations for validity\n"));
13607   fprintf (stream, _("\
13608   -mavxscalar=[128|256] (default: 128)\n\
13609                           encode scalar AVX instructions with specific vector\n\
13610                            length\n"));
13611   fprintf (stream, _("\
13612   -mvexwig=[0|1] (default: 0)\n\
13613                           encode VEX instructions with specific VEX.W value\n\
13614                            for VEX.W bit ignored instructions\n"));
13615   fprintf (stream, _("\
13616   -mevexlig=[128|256|512] (default: 128)\n\
13617                           encode scalar EVEX instructions with specific vector\n\
13618                            length\n"));
13619   fprintf (stream, _("\
13620   -mevexwig=[0|1] (default: 0)\n\
13621                           encode EVEX instructions with specific EVEX.W value\n\
13622                            for EVEX.W bit ignored instructions\n"));
13623   fprintf (stream, _("\
13624   -mevexrcig=[rne|rd|ru|rz] (default: rne)\n\
13625                           encode EVEX instructions with specific EVEX.RC value\n\
13626                            for SAE-only ignored instructions\n"));
13627   fprintf (stream, _("\
13628   -mmnemonic=[att|intel] "));
13629   if (SYSV386_COMPAT)
13630     fprintf (stream, _("(default: att)\n"));
13631   else
13632     fprintf (stream, _("(default: intel)\n"));
13633   fprintf (stream, _("\
13634                           use AT&T/Intel mnemonic\n"));
13635   fprintf (stream, _("\
13636   -msyntax=[att|intel] (default: att)\n\
13637                           use AT&T/Intel syntax\n"));
13638   fprintf (stream, _("\
13639   -mindex-reg             support pseudo index registers\n"));
13640   fprintf (stream, _("\
13641   -mnaked-reg             don't require `%%' prefix for registers\n"));
13642   fprintf (stream, _("\
13643   -madd-bnd-prefix        add BND prefix for all valid branches\n"));
13644 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13645   fprintf (stream, _("\
13646   -mshared                disable branch optimization for shared code\n"));
13647   fprintf (stream, _("\
13648   -mx86-used-note=[no|yes] "));
13649   if (DEFAULT_X86_USED_NOTE)
13650     fprintf (stream, _("(default: yes)\n"));
13651   else
13652     fprintf (stream, _("(default: no)\n"));
13653   fprintf (stream, _("\
13654                           generate x86 used ISA and feature properties\n"));
13655 #endif
13656 #if defined (TE_PE) || defined (TE_PEP)
13657   fprintf (stream, _("\
13658   -mbig-obj               generate big object files\n"));
13659 #endif
13660   fprintf (stream, _("\
13661   -momit-lock-prefix=[no|yes] (default: no)\n\
13662                           strip all lock prefixes\n"));
13663   fprintf (stream, _("\
13664   -mfence-as-lock-add=[no|yes] (default: no)\n\
13665                           encode lfence, mfence and sfence as\n\
13666                            lock addl $0x0, (%%{re}sp)\n"));
13667   fprintf (stream, _("\
13668   -mrelax-relocations=[no|yes] "));
13669   if (DEFAULT_GENERATE_X86_RELAX_RELOCATIONS)
13670     fprintf (stream, _("(default: yes)\n"));
13671   else
13672     fprintf (stream, _("(default: no)\n"));
13673   fprintf (stream, _("\
13674                           generate relax relocations\n"));
13675   fprintf (stream, _("\
13676   -malign-branch-boundary=NUM (default: 0)\n\
13677                           align branches within NUM byte boundary\n"));
13678   fprintf (stream, _("\
13679   -malign-branch=TYPE[+TYPE...] (default: jcc+fused+jmp)\n\
13680                           TYPE is combination of jcc, fused, jmp, call, ret,\n\
13681                            indirect\n\
13682                           specify types of branches to align\n"));
13683   fprintf (stream, _("\
13684   -malign-branch-prefix-size=NUM (default: 5)\n\
13685                           align branches with NUM prefixes per instruction\n"));
13686   fprintf (stream, _("\
13687   -mbranches-within-32B-boundaries\n\
13688                           align branches within 32 byte boundary\n"));
13689   fprintf (stream, _("\
13690   -mlfence-after-load=[no|yes] (default: no)\n\
13691                           generate lfence after load\n"));
13692   fprintf (stream, _("\
13693   -mlfence-before-indirect-branch=[none|all|register|memory] (default: none)\n\
13694                           generate lfence before indirect near branch\n"));
13695   fprintf (stream, _("\
13696   -mlfence-before-ret=[none|or|not|shl|yes] (default: none)\n\
13697                           generate lfence before ret\n"));
13698   fprintf (stream, _("\
13699   -mamd64                 accept only AMD64 ISA [default]\n"));
13700   fprintf (stream, _("\
13701   -mintel64               accept only Intel64 ISA\n"));
13702 }
13703
13704 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
13705      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13706      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
13707
13708 /* Pick the target format to use.  */
13709
13710 const char *
13711 i386_target_format (void)
13712 {
13713   if (!strncmp (default_arch, "x86_64", 6))
13714     {
13715       update_code_flag (CODE_64BIT, 1);
13716       if (default_arch[6] == '\0')
13717         x86_elf_abi = X86_64_ABI;
13718       else
13719         x86_elf_abi = X86_64_X32_ABI;
13720     }
13721   else if (!strcmp (default_arch, "i386"))
13722     update_code_flag (CODE_32BIT, 1);
13723   else if (!strcmp (default_arch, "iamcu"))
13724     {
13725       update_code_flag (CODE_32BIT, 1);
13726       if (cpu_arch_isa == PROCESSOR_UNKNOWN)
13727         {
13728           static const i386_cpu_flags iamcu_flags = CPU_IAMCU_FLAGS;
13729           cpu_arch_name = "iamcu";
13730           cpu_sub_arch_name = NULL;
13731           cpu_arch_flags = iamcu_flags;
13732           cpu_arch_isa = PROCESSOR_IAMCU;
13733           cpu_arch_isa_flags = iamcu_flags;
13734           if (!cpu_arch_tune_set)
13735             {
13736               cpu_arch_tune = cpu_arch_isa;
13737               cpu_arch_tune_flags = cpu_arch_isa_flags;
13738             }
13739         }
13740       else if (cpu_arch_isa != PROCESSOR_IAMCU)
13741         as_fatal (_("Intel MCU doesn't support `%s' architecture"),
13742                   cpu_arch_name);
13743     }
13744   else
13745     as_fatal (_("unknown architecture"));
13746
13747   if (cpu_flags_all_zero (&cpu_arch_isa_flags))
13748     cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].flags;
13749   if (cpu_flags_all_zero (&cpu_arch_tune_flags))
13750     cpu_arch_tune_flags = cpu_arch[flag_code == CODE_64BIT].flags;
13751
13752   switch (OUTPUT_FLAVOR)
13753     {
13754 #if defined (OBJ_MAYBE_AOUT) || defined (OBJ_AOUT)
13755     case bfd_target_aout_flavour:
13756       return AOUT_TARGET_FORMAT;
13757 #endif
13758 #if defined (OBJ_MAYBE_COFF) || defined (OBJ_COFF)
13759 # if defined (TE_PE) || defined (TE_PEP)
13760     case bfd_target_coff_flavour:
13761       if (flag_code == CODE_64BIT)
13762         return use_big_obj ? "pe-bigobj-x86-64" : "pe-x86-64";
13763       else
13764         return use_big_obj ? "pe-bigobj-i386" : "pe-i386";
13765 # elif defined (TE_GO32)
13766     case bfd_target_coff_flavour:
13767       return "coff-go32";
13768 # else
13769     case bfd_target_coff_flavour:
13770       return "coff-i386";
13771 # endif
13772 #endif
13773 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
13774     case bfd_target_elf_flavour:
13775       {
13776         const char *format;
13777
13778         switch (x86_elf_abi)
13779           {
13780           default:
13781             format = ELF_TARGET_FORMAT;
13782 #ifndef TE_SOLARIS
13783             tls_get_addr = "___tls_get_addr";
13784 #endif
13785             break;
13786           case X86_64_ABI:
13787             use_rela_relocations = 1;
13788             object_64bit = 1;
13789 #ifndef TE_SOLARIS
13790             tls_get_addr = "__tls_get_addr";
13791 #endif
13792             format = ELF_TARGET_FORMAT64;
13793             break;
13794           case X86_64_X32_ABI:
13795             use_rela_relocations = 1;
13796             object_64bit = 1;
13797 #ifndef TE_SOLARIS
13798             tls_get_addr = "__tls_get_addr";
13799 #endif
13800             disallow_64bit_reloc = 1;
13801             format = ELF_TARGET_FORMAT32;
13802             break;
13803           }
13804         if (cpu_arch_isa == PROCESSOR_L1OM)
13805           {
13806             if (x86_elf_abi != X86_64_ABI)
13807               as_fatal (_("Intel L1OM is 64bit only"));
13808             return ELF_TARGET_L1OM_FORMAT;
13809           }
13810         else if (cpu_arch_isa == PROCESSOR_K1OM)
13811           {
13812             if (x86_elf_abi != X86_64_ABI)
13813               as_fatal (_("Intel K1OM is 64bit only"));
13814             return ELF_TARGET_K1OM_FORMAT;
13815           }
13816         else if (cpu_arch_isa == PROCESSOR_IAMCU)
13817           {
13818             if (x86_elf_abi != I386_ABI)
13819               as_fatal (_("Intel MCU is 32bit only"));
13820             return ELF_TARGET_IAMCU_FORMAT;
13821           }
13822         else
13823           return format;
13824       }
13825 #endif
13826 #if defined (OBJ_MACH_O)
13827     case bfd_target_mach_o_flavour:
13828       if (flag_code == CODE_64BIT)
13829         {
13830           use_rela_relocations = 1;
13831           object_64bit = 1;
13832           return "mach-o-x86-64";
13833         }
13834       else
13835         return "mach-o-i386";
13836 #endif
13837     default:
13838       abort ();
13839       return NULL;
13840     }
13841 }
13842
13843 #endif /* OBJ_MAYBE_ more than one  */
13844 \f
13845 symbolS *
13846 md_undefined_symbol (char *name)
13847 {
13848   if (name[0] == GLOBAL_OFFSET_TABLE_NAME[0]
13849       && name[1] == GLOBAL_OFFSET_TABLE_NAME[1]
13850       && name[2] == GLOBAL_OFFSET_TABLE_NAME[2]
13851       && strcmp (name, GLOBAL_OFFSET_TABLE_NAME) == 0)
13852     {
13853       if (!GOT_symbol)
13854         {
13855           if (symbol_find (name))
13856             as_bad (_("GOT already in symbol table"));
13857           GOT_symbol = symbol_new (name, undefined_section,
13858                                    &zero_address_frag, 0);
13859         };
13860       return GOT_symbol;
13861     }
13862   return 0;
13863 }
13864
13865 /* Round up a section size to the appropriate boundary.  */
13866
13867 valueT
13868 md_section_align (segT segment ATTRIBUTE_UNUSED, valueT size)
13869 {
13870 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
13871   if (OUTPUT_FLAVOR == bfd_target_aout_flavour)
13872     {
13873       /* For a.out, force the section size to be aligned.  If we don't do
13874          this, BFD will align it for us, but it will not write out the
13875          final bytes of the section.  This may be a bug in BFD, but it is
13876          easier to fix it here since that is how the other a.out targets
13877          work.  */
13878       int align;
13879
13880       align = bfd_section_alignment (segment);
13881       size = ((size + (1 << align) - 1) & (-((valueT) 1 << align)));
13882     }
13883 #endif
13884
13885   return size;
13886 }
13887
13888 /* On the i386, PC-relative offsets are relative to the start of the
13889    next instruction.  That is, the address of the offset, plus its
13890    size, since the offset is always the last part of the insn.  */
13891
13892 long
13893 md_pcrel_from (fixS *fixP)
13894 {
13895   return fixP->fx_size + fixP->fx_where + fixP->fx_frag->fr_address;
13896 }
13897
13898 #ifndef I386COFF
13899
13900 static void
13901 s_bss (int ignore ATTRIBUTE_UNUSED)
13902 {
13903   int temp;
13904
13905 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13906   if (IS_ELF)
13907     obj_elf_section_change_hook ();
13908 #endif
13909   temp = get_absolute_expression ();
13910   subseg_set (bss_section, (subsegT) temp);
13911   demand_empty_rest_of_line ();
13912 }
13913
13914 #endif
13915
13916 /* Remember constant directive.  */
13917
13918 void
13919 i386_cons_align (int ignore ATTRIBUTE_UNUSED)
13920 {
13921   if (last_insn.kind != last_insn_directive
13922       && (bfd_section_flags (now_seg) & SEC_CODE))
13923     {
13924       last_insn.seg = now_seg;
13925       last_insn.kind = last_insn_directive;
13926       last_insn.name = "constant directive";
13927       last_insn.file = as_where (&last_insn.line);
13928       if (lfence_before_ret != lfence_before_ret_none)
13929         {
13930           if (lfence_before_indirect_branch != lfence_branch_none)
13931             as_warn (_("constant directive skips -mlfence-before-ret "
13932                        "and -mlfence-before-indirect-branch"));
13933           else
13934             as_warn (_("constant directive skips -mlfence-before-ret"));
13935         }
13936       else if (lfence_before_indirect_branch != lfence_branch_none)
13937         as_warn (_("constant directive skips -mlfence-before-indirect-branch"));
13938     }
13939 }
13940
13941 void
13942 i386_validate_fix (fixS *fixp)
13943 {
13944   if (fixp->fx_subsy)
13945     {
13946       if (fixp->fx_subsy == GOT_symbol)
13947         {
13948           if (fixp->fx_r_type == BFD_RELOC_32_PCREL)
13949             {
13950               if (!object_64bit)
13951                 abort ();
13952 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13953               if (fixp->fx_tcbit2)
13954                 fixp->fx_r_type = (fixp->fx_tcbit
13955                                    ? BFD_RELOC_X86_64_REX_GOTPCRELX
13956                                    : BFD_RELOC_X86_64_GOTPCRELX);
13957               else
13958 #endif
13959                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTPCREL;
13960             }
13961           else
13962             {
13963               if (!object_64bit)
13964                 fixp->fx_r_type = BFD_RELOC_386_GOTOFF;
13965               else
13966                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTOFF64;
13967             }
13968           fixp->fx_subsy = 0;
13969         }
13970     }
13971 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13972   else
13973     {
13974       /* NB: Commit 292676c1 resolved PLT32 reloc aganst local symbol
13975          to section.  Since PLT32 relocation must be against symbols,
13976          turn such PLT32 relocation into PC32 relocation.  */
13977       if (fixp->fx_addsy
13978           && (fixp->fx_r_type == BFD_RELOC_386_PLT32
13979               || fixp->fx_r_type == BFD_RELOC_X86_64_PLT32)
13980           && symbol_section_p (fixp->fx_addsy))
13981         fixp->fx_r_type = BFD_RELOC_32_PCREL;
13982       if (!object_64bit)
13983         {
13984           if (fixp->fx_r_type == BFD_RELOC_386_GOT32
13985               && fixp->fx_tcbit2)
13986             fixp->fx_r_type = BFD_RELOC_386_GOT32X;
13987         }
13988     }
13989 #endif
13990 }
13991
13992 arelent *
13993 tc_gen_reloc (asection *section ATTRIBUTE_UNUSED, fixS *fixp)
13994 {
13995   arelent *rel;
13996   bfd_reloc_code_real_type code;
13997
13998   switch (fixp->fx_r_type)
13999     {
14000 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14001     case BFD_RELOC_SIZE32:
14002     case BFD_RELOC_SIZE64:
14003       if (S_IS_DEFINED (fixp->fx_addsy)
14004           && !S_IS_EXTERNAL (fixp->fx_addsy))
14005         {
14006           /* Resolve size relocation against local symbol to size of
14007              the symbol plus addend.  */
14008           valueT value = S_GET_SIZE (fixp->fx_addsy) + fixp->fx_offset;
14009           if (fixp->fx_r_type == BFD_RELOC_SIZE32
14010               && !fits_in_unsigned_long (value))
14011             as_bad_where (fixp->fx_file, fixp->fx_line,
14012                           _("symbol size computation overflow"));
14013           fixp->fx_addsy = NULL;
14014           fixp->fx_subsy = NULL;
14015           md_apply_fix (fixp, (valueT *) &value, NULL);
14016           return NULL;
14017         }
14018 #endif
14019       /* Fall through.  */
14020
14021     case BFD_RELOC_X86_64_PLT32:
14022     case BFD_RELOC_X86_64_GOT32:
14023     case BFD_RELOC_X86_64_GOTPCREL:
14024     case BFD_RELOC_X86_64_GOTPCRELX:
14025     case BFD_RELOC_X86_64_REX_GOTPCRELX:
14026     case BFD_RELOC_386_PLT32:
14027     case BFD_RELOC_386_GOT32:
14028     case BFD_RELOC_386_GOT32X:
14029     case BFD_RELOC_386_GOTOFF:
14030     case BFD_RELOC_386_GOTPC:
14031     case BFD_RELOC_386_TLS_GD:
14032     case BFD_RELOC_386_TLS_LDM:
14033     case BFD_RELOC_386_TLS_LDO_32:
14034     case BFD_RELOC_386_TLS_IE_32:
14035     case BFD_RELOC_386_TLS_IE:
14036     case BFD_RELOC_386_TLS_GOTIE:
14037     case BFD_RELOC_386_TLS_LE_32:
14038     case BFD_RELOC_386_TLS_LE:
14039     case BFD_RELOC_386_TLS_GOTDESC:
14040     case BFD_RELOC_386_TLS_DESC_CALL:
14041     case BFD_RELOC_X86_64_TLSGD:
14042     case BFD_RELOC_X86_64_TLSLD:
14043     case BFD_RELOC_X86_64_DTPOFF32:
14044     case BFD_RELOC_X86_64_DTPOFF64:
14045     case BFD_RELOC_X86_64_GOTTPOFF:
14046     case BFD_RELOC_X86_64_TPOFF32:
14047     case BFD_RELOC_X86_64_TPOFF64:
14048     case BFD_RELOC_X86_64_GOTOFF64:
14049     case BFD_RELOC_X86_64_GOTPC32:
14050     case BFD_RELOC_X86_64_GOT64:
14051     case BFD_RELOC_X86_64_GOTPCREL64:
14052     case BFD_RELOC_X86_64_GOTPC64:
14053     case BFD_RELOC_X86_64_GOTPLT64:
14054     case BFD_RELOC_X86_64_PLTOFF64:
14055     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
14056     case BFD_RELOC_X86_64_TLSDESC_CALL:
14057     case BFD_RELOC_RVA:
14058     case BFD_RELOC_VTABLE_ENTRY:
14059     case BFD_RELOC_VTABLE_INHERIT:
14060 #ifdef TE_PE
14061     case BFD_RELOC_32_SECREL:
14062 #endif
14063       code = fixp->fx_r_type;
14064       break;
14065     case BFD_RELOC_X86_64_32S:
14066       if (!fixp->fx_pcrel)
14067         {
14068           /* Don't turn BFD_RELOC_X86_64_32S into BFD_RELOC_32.  */
14069           code = fixp->fx_r_type;
14070           break;
14071         }
14072       /* Fall through.  */
14073     default:
14074       if (fixp->fx_pcrel)
14075         {
14076           switch (fixp->fx_size)
14077             {
14078             default:
14079               as_bad_where (fixp->fx_file, fixp->fx_line,
14080                             _("can not do %d byte pc-relative relocation"),
14081                             fixp->fx_size);
14082               code = BFD_RELOC_32_PCREL;
14083               break;
14084             case 1: code = BFD_RELOC_8_PCREL;  break;
14085             case 2: code = BFD_RELOC_16_PCREL; break;
14086             case 4: code = BFD_RELOC_32_PCREL; break;
14087 #ifdef BFD64
14088             case 8: code = BFD_RELOC_64_PCREL; break;
14089 #endif
14090             }
14091         }
14092       else
14093         {
14094           switch (fixp->fx_size)
14095             {
14096             default:
14097               as_bad_where (fixp->fx_file, fixp->fx_line,
14098                             _("can not do %d byte relocation"),
14099                             fixp->fx_size);
14100               code = BFD_RELOC_32;
14101               break;
14102             case 1: code = BFD_RELOC_8;  break;
14103             case 2: code = BFD_RELOC_16; break;
14104             case 4: code = BFD_RELOC_32; break;
14105 #ifdef BFD64
14106             case 8: code = BFD_RELOC_64; break;
14107 #endif
14108             }
14109         }
14110       break;
14111     }
14112
14113   if ((code == BFD_RELOC_32
14114        || code == BFD_RELOC_32_PCREL
14115        || code == BFD_RELOC_X86_64_32S)
14116       && GOT_symbol
14117       && fixp->fx_addsy == GOT_symbol)
14118     {
14119       if (!object_64bit)
14120         code = BFD_RELOC_386_GOTPC;
14121       else
14122         code = BFD_RELOC_X86_64_GOTPC32;
14123     }
14124   if ((code == BFD_RELOC_64 || code == BFD_RELOC_64_PCREL)
14125       && GOT_symbol
14126       && fixp->fx_addsy == GOT_symbol)
14127     {
14128       code = BFD_RELOC_X86_64_GOTPC64;
14129     }
14130
14131   rel = XNEW (arelent);
14132   rel->sym_ptr_ptr = XNEW (asymbol *);
14133   *rel->sym_ptr_ptr = symbol_get_bfdsym (fixp->fx_addsy);
14134
14135   rel->address = fixp->fx_frag->fr_address + fixp->fx_where;
14136
14137   if (!use_rela_relocations)
14138     {
14139       /* HACK: Since i386 ELF uses Rel instead of Rela, encode the
14140          vtable entry to be used in the relocation's section offset.  */
14141       if (fixp->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
14142         rel->address = fixp->fx_offset;
14143 #if defined (OBJ_COFF) && defined (TE_PE)
14144       else if (fixp->fx_addsy && S_IS_WEAK (fixp->fx_addsy))
14145         rel->addend = fixp->fx_addnumber - (S_GET_VALUE (fixp->fx_addsy) * 2);
14146       else
14147 #endif
14148       rel->addend = 0;
14149     }
14150   /* Use the rela in 64bit mode.  */
14151   else
14152     {
14153       if (disallow_64bit_reloc)
14154         switch (code)
14155           {
14156           case BFD_RELOC_X86_64_DTPOFF64:
14157           case BFD_RELOC_X86_64_TPOFF64:
14158           case BFD_RELOC_64_PCREL:
14159           case BFD_RELOC_X86_64_GOTOFF64:
14160           case BFD_RELOC_X86_64_GOT64:
14161           case BFD_RELOC_X86_64_GOTPCREL64:
14162           case BFD_RELOC_X86_64_GOTPC64:
14163           case BFD_RELOC_X86_64_GOTPLT64:
14164           case BFD_RELOC_X86_64_PLTOFF64:
14165             as_bad_where (fixp->fx_file, fixp->fx_line,
14166                           _("cannot represent relocation type %s in x32 mode"),
14167                           bfd_get_reloc_code_name (code));
14168             break;
14169           default:
14170             break;
14171           }
14172
14173       if (!fixp->fx_pcrel)
14174         rel->addend = fixp->fx_offset;
14175       else
14176         switch (code)
14177           {
14178           case BFD_RELOC_X86_64_PLT32:
14179           case BFD_RELOC_X86_64_GOT32:
14180           case BFD_RELOC_X86_64_GOTPCREL:
14181           case BFD_RELOC_X86_64_GOTPCRELX:
14182           case BFD_RELOC_X86_64_REX_GOTPCRELX:
14183           case BFD_RELOC_X86_64_TLSGD:
14184           case BFD_RELOC_X86_64_TLSLD:
14185           case BFD_RELOC_X86_64_GOTTPOFF:
14186           case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
14187           case BFD_RELOC_X86_64_TLSDESC_CALL:
14188             rel->addend = fixp->fx_offset - fixp->fx_size;
14189             break;
14190           default:
14191             rel->addend = (section->vma
14192                            - fixp->fx_size
14193                            + fixp->fx_addnumber
14194                            + md_pcrel_from (fixp));
14195             break;
14196           }
14197     }
14198
14199   rel->howto = bfd_reloc_type_lookup (stdoutput, code);
14200   if (rel->howto == NULL)
14201     {
14202       as_bad_where (fixp->fx_file, fixp->fx_line,
14203                     _("cannot represent relocation type %s"),
14204                     bfd_get_reloc_code_name (code));
14205       /* Set howto to a garbage value so that we can keep going.  */
14206       rel->howto = bfd_reloc_type_lookup (stdoutput, BFD_RELOC_32);
14207       gas_assert (rel->howto != NULL);
14208     }
14209
14210   return rel;
14211 }
14212
14213 #include "tc-i386-intel.c"
14214
14215 void
14216 tc_x86_parse_to_dw2regnum (expressionS *exp)
14217 {
14218   int saved_naked_reg;
14219   char saved_register_dot;
14220
14221   saved_naked_reg = allow_naked_reg;
14222   allow_naked_reg = 1;
14223   saved_register_dot = register_chars['.'];
14224   register_chars['.'] = '.';
14225   allow_pseudo_reg = 1;
14226   expression_and_evaluate (exp);
14227   allow_pseudo_reg = 0;
14228   register_chars['.'] = saved_register_dot;
14229   allow_naked_reg = saved_naked_reg;
14230
14231   if (exp->X_op == O_register && exp->X_add_number >= 0)
14232     {
14233       if ((addressT) exp->X_add_number < i386_regtab_size)
14234         {
14235           exp->X_op = O_constant;
14236           exp->X_add_number = i386_regtab[exp->X_add_number]
14237                               .dw2_regnum[flag_code >> 1];
14238         }
14239       else
14240         exp->X_op = O_illegal;
14241     }
14242 }
14243
14244 void
14245 tc_x86_frame_initial_instructions (void)
14246 {
14247   static unsigned int sp_regno[2];
14248
14249   if (!sp_regno[flag_code >> 1])
14250     {
14251       char *saved_input = input_line_pointer;
14252       char sp[][4] = {"esp", "rsp"};
14253       expressionS exp;
14254
14255       input_line_pointer = sp[flag_code >> 1];
14256       tc_x86_parse_to_dw2regnum (&exp);
14257       gas_assert (exp.X_op == O_constant);
14258       sp_regno[flag_code >> 1] = exp.X_add_number;
14259       input_line_pointer = saved_input;
14260     }
14261
14262   cfi_add_CFA_def_cfa (sp_regno[flag_code >> 1], -x86_cie_data_alignment);
14263   cfi_add_CFA_offset (x86_dwarf2_return_column, x86_cie_data_alignment);
14264 }
14265
14266 int
14267 x86_dwarf2_addr_size (void)
14268 {
14269 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
14270   if (x86_elf_abi == X86_64_X32_ABI)
14271     return 4;
14272 #endif
14273   return bfd_arch_bits_per_address (stdoutput) / 8;
14274 }
14275
14276 int
14277 i386_elf_section_type (const char *str, size_t len)
14278 {
14279   if (flag_code == CODE_64BIT
14280       && len == sizeof ("unwind") - 1
14281       && strncmp (str, "unwind", 6) == 0)
14282     return SHT_X86_64_UNWIND;
14283
14284   return -1;
14285 }
14286
14287 #ifdef TE_SOLARIS
14288 void
14289 i386_solaris_fix_up_eh_frame (segT sec)
14290 {
14291   if (flag_code == CODE_64BIT)
14292     elf_section_type (sec) = SHT_X86_64_UNWIND;
14293 }
14294 #endif
14295
14296 #ifdef TE_PE
14297 void
14298 tc_pe_dwarf2_emit_offset (symbolS *symbol, unsigned int size)
14299 {
14300   expressionS exp;
14301
14302   exp.X_op = O_secrel;
14303   exp.X_add_symbol = symbol;
14304   exp.X_add_number = 0;
14305   emit_expr (&exp, size);
14306 }
14307 #endif
14308
14309 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14310 /* For ELF on x86-64, add support for SHF_X86_64_LARGE.  */
14311
14312 bfd_vma
14313 x86_64_section_letter (int letter, const char **ptr_msg)
14314 {
14315   if (flag_code == CODE_64BIT)
14316     {
14317       if (letter == 'l')
14318         return SHF_X86_64_LARGE;
14319
14320       *ptr_msg = _("bad .section directive: want a,l,w,x,M,S,G,T in string");
14321     }
14322   else
14323     *ptr_msg = _("bad .section directive: want a,w,x,M,S,G,T in string");
14324   return -1;
14325 }
14326
14327 bfd_vma
14328 x86_64_section_word (char *str, size_t len)
14329 {
14330   if (len == 5 && flag_code == CODE_64BIT && CONST_STRNEQ (str, "large"))
14331     return SHF_X86_64_LARGE;
14332
14333   return -1;
14334 }
14335
14336 static void
14337 handle_large_common (int small ATTRIBUTE_UNUSED)
14338 {
14339   if (flag_code != CODE_64BIT)
14340     {
14341       s_comm_internal (0, elf_common_parse);
14342       as_warn (_(".largecomm supported only in 64bit mode, producing .comm"));
14343     }
14344   else
14345     {
14346       static segT lbss_section;
14347       asection *saved_com_section_ptr = elf_com_section_ptr;
14348       asection *saved_bss_section = bss_section;
14349
14350       if (lbss_section == NULL)
14351         {
14352           flagword applicable;
14353           segT seg = now_seg;
14354           subsegT subseg = now_subseg;
14355
14356           /* The .lbss section is for local .largecomm symbols.  */
14357           lbss_section = subseg_new (".lbss", 0);
14358           applicable = bfd_applicable_section_flags (stdoutput);
14359           bfd_set_section_flags (lbss_section, applicable & SEC_ALLOC);
14360           seg_info (lbss_section)->bss = 1;
14361
14362           subseg_set (seg, subseg);
14363         }
14364
14365       elf_com_section_ptr = &_bfd_elf_large_com_section;
14366       bss_section = lbss_section;
14367
14368       s_comm_internal (0, elf_common_parse);
14369
14370       elf_com_section_ptr = saved_com_section_ptr;
14371       bss_section = saved_bss_section;
14372     }
14373 }
14374 #endif /* OBJ_ELF || OBJ_MAYBE_ELF */