X-Git-Url: http://drtracing.org/?a=blobdiff_plain;f=gas%2Fconfig%2Ftc-arm.c;h=50a658b8c158bfad6965af857ff1f430d1ee1a74;hb=6610dc6daa661b7cd042bc6313a29859b87263d9;hp=80230e45d82c183d4a8971eea69509ebf5f714e1;hpb=6735952f7c0f5f1f69a94c2d92c26e452a196da6;p=deliverable%2Fbinutils-gdb.git diff --git a/gas/config/tc-arm.c b/gas/config/tc-arm.c index 80230e45d8..50a658b8c1 100644 --- a/gas/config/tc-arm.c +++ b/gas/config/tc-arm.c @@ -155,10 +155,10 @@ static const arm_feature_set *object_arch = NULL; /* Constants for known architecture features. */ static const arm_feature_set fpu_default = FPU_DEFAULT; -static const arm_feature_set fpu_arch_vfp_v1 = FPU_ARCH_VFP_V1; +static const arm_feature_set fpu_arch_vfp_v1 ATTRIBUTE_UNUSED = FPU_ARCH_VFP_V1; static const arm_feature_set fpu_arch_vfp_v2 = FPU_ARCH_VFP_V2; -static const arm_feature_set fpu_arch_vfp_v3 = FPU_ARCH_VFP_V3; -static const arm_feature_set fpu_arch_neon_v1 = FPU_ARCH_NEON_V1; +static const arm_feature_set fpu_arch_vfp_v3 ATTRIBUTE_UNUSED = FPU_ARCH_VFP_V3; +static const arm_feature_set fpu_arch_neon_v1 ATTRIBUTE_UNUSED = FPU_ARCH_NEON_V1; static const arm_feature_set fpu_arch_fpa = FPU_ARCH_FPA; static const arm_feature_set fpu_any_hard = FPU_ANY_HARD; static const arm_feature_set fpu_arch_maverick = FPU_ARCH_MAVERICK; @@ -201,7 +201,8 @@ static const arm_feature_set arm_ext_v7r = ARM_FEATURE_CORE_LOW (ARM_EXT_V7R); static const arm_feature_set arm_ext_v7m = ARM_FEATURE_CORE_LOW (ARM_EXT_V7M); static const arm_feature_set arm_ext_v8 = ARM_FEATURE_CORE_LOW (ARM_EXT_V8); static const arm_feature_set arm_ext_m = - ARM_FEATURE_CORE (ARM_EXT_V6M | ARM_EXT_OS | ARM_EXT_V7M, ARM_EXT2_V8M); + ARM_FEATURE_CORE (ARM_EXT_V6M | ARM_EXT_OS | ARM_EXT_V7M, + ARM_EXT2_V8M | ARM_EXT2_V8M_MAIN); static const arm_feature_set arm_ext_mp = ARM_FEATURE_CORE_LOW (ARM_EXT_MP); static const arm_feature_set arm_ext_sec = ARM_FEATURE_CORE_LOW (ARM_EXT_SEC); static const arm_feature_set arm_ext_os = ARM_FEATURE_CORE_LOW (ARM_EXT_OS); @@ -209,11 +210,19 @@ static const arm_feature_set arm_ext_adiv = ARM_FEATURE_CORE_LOW (ARM_EXT_ADIV); static const arm_feature_set arm_ext_virt = ARM_FEATURE_CORE_LOW (ARM_EXT_VIRT); static const arm_feature_set arm_ext_pan = ARM_FEATURE_CORE_HIGH (ARM_EXT2_PAN); static const arm_feature_set arm_ext_v8m = ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8M); +static const arm_feature_set arm_ext_v8m_main = + ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8M_MAIN); +/* Instructions in ARMv8-M only found in M profile architectures. */ +static const arm_feature_set arm_ext_v8m_m_only = + ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8M | ARM_EXT2_V8M_MAIN); static const arm_feature_set arm_ext_v6t2_v8m = ARM_FEATURE_CORE_HIGH (ARM_EXT2_V6T2_V8M); /* Instructions shared between ARMv8-A and ARMv8-M. */ static const arm_feature_set arm_ext_atomics = ARM_FEATURE_CORE_HIGH (ARM_EXT2_ATOMICS); +/* DSP instructions Tag_DSP_extension refers to. */ +static const arm_feature_set arm_ext_dsp = + ARM_FEATURE_CORE_LOW (ARM_EXT_V5E | ARM_EXT_V5ExP | ARM_EXT_V6_DSP); static const arm_feature_set arm_ext_v8_2 = ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8_2A); /* FP16 instructions. */ @@ -221,7 +230,7 @@ static const arm_feature_set arm_ext_fp16 = ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST); static const arm_feature_set arm_arch_any = ARM_ANY; -static const arm_feature_set arm_arch_full = ARM_FEATURE (-1, -1, -1); +static const arm_feature_set arm_arch_full ATTRIBUTE_UNUSED = ARM_FEATURE (-1, -1, -1); static const arm_feature_set arm_arch_t2 = ARM_ARCH_THUMB2; static const arm_feature_set arm_arch_none = ARM_ARCH_NONE; static const arm_feature_set arm_arch_v6m_only = ARM_ARCH_V6M_ONLY; @@ -271,7 +280,7 @@ static const arm_feature_set fpu_crypto_ext_armv8 = static const arm_feature_set crc_ext_armv8 = ARM_FEATURE_COPROC (CRC_EXT_ARMV8); static const arm_feature_set fpu_neon_ext_v8_1 = - ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8 | FPU_NEON_EXT_RDMA); + ARM_FEATURE_COPROC (FPU_NEON_EXT_RDMA); static int mfloat_abi_opt = -1; /* Record user cpu selection for object attributes. */ @@ -516,7 +525,7 @@ struct asm_barrier_opt struct reloc_entry { - char * name; + const char * name; bfd_reloc_code_real_type reloc; }; @@ -784,8 +793,10 @@ struct asm_opcode _("cannot use register index with PC-relative addressing") #define BAD_PC_WRITEBACK \ _("cannot use writeback with PC-relative addressing") -#define BAD_RANGE _("branch out of range") +#define BAD_RANGE _("branch out of range") +#define BAD_FP16 _("selected processor does not support fp16 instruction") #define UNPRED_REG(R) _("using " R " results in unpredictable behaviour") +#define THUMB1_RELOC_ONLY _("relocation valid in thumb1 code only") static struct hash_control * arm_ops_hsh; static struct hash_control * arm_cond_hsh; @@ -1075,7 +1086,7 @@ my_get_expression (expressionS * ep, char ** str, int prefix_mode) ??? The format of 12 byte floats is uncertain according to gcc's arm.h. */ -char * +const char * md_atof (int type, char * litP, int * sizeP) { int prec; @@ -1986,6 +1997,10 @@ parse_neon_el_struct_list (char **str, unsigned *pbase, const char *const incr_error = _("register stride must be 1 or 2"); const char *const type_error = _("mismatched element/structure types in list"); struct neon_typed_alias firsttype; + firsttype.defined = 0; + firsttype.eltype.type = NT_invtype; + firsttype.eltype.size = -1; + firsttype.index = -1; if (skip_past_char (&ptr, '{') == SUCCESS) leading_brace = 1; @@ -2178,7 +2193,7 @@ insert_reg_alias (char *str, unsigned number, int type) } name = xstrdup (str); - new_reg = (struct reg_entry *) xmalloc (sizeof (struct reg_entry)); + new_reg = XNEW (struct reg_entry); new_reg->name = name; new_reg->number = number; @@ -2206,8 +2221,7 @@ insert_neon_reg_alias (char *str, int number, int type, if (atype) { - reg->neon = (struct neon_typed_alias *) - xmalloc (sizeof (struct neon_typed_alias)); + reg->neon = XNEW (struct neon_typed_alias); *reg->neon = *atype; } } @@ -2253,9 +2267,7 @@ create_register_alias (char * newname, char *p) nlen = strlen (newname); #endif - nbuf = (char *) alloca (nlen + 1); - memcpy (nbuf, newname, nlen); - nbuf[nlen] = '\0'; + nbuf = xmemdup0 (newname, nlen); /* Create aliases under the new name as stated; an all-lowercase version of the new name; and an all-uppercase version of the new @@ -2277,7 +2289,10 @@ create_register_alias (char * newname, char *p) the artificial FOO alias because it has already been created by the first .req. */ if (insert_reg_alias (nbuf, old->number, old->type) == NULL) - return TRUE; + { + free (nbuf); + return TRUE; + } } for (p = nbuf; *p; p++) @@ -2287,6 +2302,7 @@ create_register_alias (char * newname, char *p) insert_reg_alias (nbuf, old->number, old->type); } + free (nbuf); return TRUE; } @@ -2414,9 +2430,7 @@ create_neon_reg_alias (char *newname, char *p) namelen = strlen (newname); #endif - namebuf = (char *) alloca (namelen + 1); - strncpy (namebuf, newname, namelen); - namebuf[namelen] = '\0'; + namebuf = xmemdup0 (newname, namelen); insert_neon_reg_alias (namebuf, basereg->number, basetype, typeinfo.defined != 0 ? &typeinfo : NULL); @@ -2437,6 +2451,7 @@ create_neon_reg_alias (char *newname, char *p) insert_neon_reg_alias (namebuf, basereg->number, basetype, typeinfo.defined != 0 ? &typeinfo : NULL); + free (namebuf); return TRUE; } @@ -2752,8 +2767,9 @@ find_real_start (symbolS * symbolP) if (S_IS_LOCAL (symbolP) || name[0] == '.') return symbolP; - real_start = ACONCAT ((STUB_NAME, name, NULL)); + real_start = concat (STUB_NAME, name, NULL); new_target = symbol_find (real_start); + free (real_start); if (new_target == NULL) { @@ -3130,7 +3146,7 @@ find_or_make_literal_pool (void) if (pool == NULL) { /* Create a new pool. */ - pool = (literal_pool *) xmalloc (sizeof (* pool)); + pool = XNEW (literal_pool); if (! pool) return NULL; @@ -3268,6 +3284,7 @@ add_to_lit_pool (unsigned int nbytes) } pool->literals[entry] = inst.reloc.exp; + pool->literals[entry].X_op = O_constant; pool->literals[entry].X_add_number = 0; pool->literals[entry++].X_md = (PADDING_SLOT << 8) | 4; pool->next_free_entry += 1; @@ -3527,7 +3544,8 @@ s_arm_elf_cons (int nbytes) XXX Surely there is a cleaner way to do this. */ char *p = input_line_pointer; int offset; - char *save_buf = (char *) alloca (input_line_pointer - base); + char *save_buf = XNEWVEC (char, input_line_pointer - base); + memcpy (save_buf, base, input_line_pointer - base); memmove (base + (input_line_pointer - before_reloc), base, before_reloc - base); @@ -3541,6 +3559,7 @@ s_arm_elf_cons (int nbytes) memset (p, 0, nbytes); fix_new_exp (frag_now, p - frag_now->fr_literal + offset, size, &exp, 0, (enum bfd_reloc_code_real) reloc); + free (save_buf); } } } @@ -6084,6 +6103,16 @@ parse_cond (char **str) return c->value; } +/* Record a use of the given feature. */ +static void +record_feature_use (const arm_feature_set *feature) +{ + if (thumb_mode) + ARM_MERGE_FEATURE_SETS (thumb_arch_used, thumb_arch_used, *feature); + else + ARM_MERGE_FEATURE_SETS (arm_arch_used, arm_arch_used, *feature); +} + /* If the given feature available in the selected CPU, mark it as used. Returns TRUE iff feature is available. */ static bfd_boolean @@ -6095,10 +6124,7 @@ mark_feature_used (const arm_feature_set *feature) /* Add the appropriate architecture feature for the barrier option used. */ - if (thumb_mode) - ARM_MERGE_FEATURE_SETS (thumb_arch_used, thumb_arch_used, *feature); - else - ARM_MERGE_FEATURE_SETS (arm_arch_used, arm_arch_used, *feature); + record_feature_use (feature); return TRUE; } @@ -7263,6 +7289,26 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb) #define rotate_left(v, n) (v << (n & 31) | v >> ((32 - n) & 31)) +/* If the current inst is scalar ARMv8.2 fp16 instruction, do special encoding. + + The only binary encoding difference is the Coprocessor number. Coprocessor + 9 is used for half-precision calculations or conversions. The format of the + instruction is the same as the equivalent Coprocessor 10 instuction that + exists for Single-Precision operation. */ + +static void +do_scalar_fp16_v82_encode (void) +{ + if (inst.cond != COND_ALWAYS) + as_warn (_("ARMv8.2 scalar fp16 instruction cannot be conditional," + " the behaviour is UNPREDICTABLE")); + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16), + _(BAD_FP16)); + + inst.instruction = (inst.instruction & 0xfffff0ff) | 0x900; + mark_feature_used (&arm_ext_fp16); +} + /* If VAL can be encoded in the immediate field of an ARM instruction, return the encoded form. Otherwise, return FAIL. */ @@ -8137,6 +8183,12 @@ do_rd (void) inst.instruction |= inst.operands[0].reg << 12; } +static void +do_rn (void) +{ + inst.instruction |= inst.operands[0].reg << 16; +} + static void do_rd_rm (void) { @@ -8287,6 +8339,9 @@ do_adrl (void) static void do_arit (void) { + constraint (inst.reloc.type >= BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC + && inst.reloc.type <= BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC , + THUMB1_RELOC_ONLY); if (!inst.operands[1].present) inst.operands[1].reg = inst.operands[0].reg; inst.instruction |= inst.operands[0].reg << 12; @@ -8944,6 +8999,9 @@ do_mlas (void) static void do_mov (void) { + constraint (inst.reloc.type >= BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC + && inst.reloc.type <= BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC , + THUMB1_RELOC_ONLY); inst.instruction |= inst.operands[0].reg << 12; encode_arm_shifter_operand (1); } @@ -10444,9 +10502,12 @@ do_t_add_sub (void) inst.instruction |= (Rd << 4) | Rs; if (inst.reloc.type < BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC || inst.reloc.type > BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC) - inst.reloc.type = BFD_RELOC_ARM_THUMB_ADD; - if (inst.size_req != 2) - inst.relax = opcode; + { + if (inst.size_req == 2) + inst.reloc.type = BFD_RELOC_ARM_THUMB_ADD; + else + inst.relax = opcode; + } } else constraint (inst.size_req == 2, BAD_HIREG); @@ -10454,6 +10515,9 @@ do_t_add_sub (void) if (inst.size_req == 4 || (inst.size_req != 2 && !opcode)) { + constraint (inst.reloc.type >= BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC + && inst.reloc.type <= BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC , + THUMB1_RELOC_ONLY); if (Rd == REG_PC) { constraint (add, BAD_PC); @@ -10922,7 +10986,7 @@ do_t_branch (void) { int opcode; int cond; - int reloc; + bfd_reloc_code_real_type reloc; cond = inst.cond; set_it_insn_type (IF_INSIDE_IT_LAST_INSN); @@ -11798,17 +11862,21 @@ do_t_mov_cmp (void) { inst.instruction = THUMB_OP16 (opcode); inst.instruction |= Rn << 8; - if (inst.size_req == 2) + if (inst.reloc.type < BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC + || inst.reloc.type > BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC) { - if (inst.reloc.type < BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC - || inst.reloc.type > BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC) + if (inst.size_req == 2) inst.reloc.type = BFD_RELOC_ARM_THUMB_IMM; + else + inst.relax = opcode; } - else - inst.relax = opcode; } else { + constraint (inst.reloc.type >= BFD_RELOC_ARM_THUMB_ALU_ABS_G0_NC + && inst.reloc.type <= BFD_RELOC_ARM_THUMB_ALU_ABS_G3_NC , + THUMB1_RELOC_ONLY); + inst.instruction = THUMB_OP32 (inst.instruction); inst.instruction = (inst.instruction & 0xe1ffffff) | 0x10000000; inst.instruction |= Rn << r0off; @@ -12463,7 +12531,7 @@ do_t_push_pop (void) if (inst.size_req != 4 && (mask & ~0xff) == 0) inst.instruction = THUMB_OP16 (inst.instruction) | mask; else if (inst.size_req != 4 - && (mask & ~0xff) == (1 << (inst.instruction == T_MNEM_push + && (mask & ~0xff) == (1U << (inst.instruction == T_MNEM_push ? REG_LR : REG_PC))) { inst.instruction = THUMB_OP16 (inst.instruction); @@ -13252,7 +13320,19 @@ NEON_ENC_TAB X(2, (S, R), SINGLE), \ X(2, (R, S), SINGLE), \ X(2, (F, R), SINGLE), \ - X(2, (R, F), SINGLE) + X(2, (R, F), SINGLE), \ +/* Half float shape supported so far. */\ + X (2, (H, D), MIXED), \ + X (2, (D, H), MIXED), \ + X (2, (H, F), MIXED), \ + X (2, (F, H), MIXED), \ + X (2, (H, H), HALF), \ + X (2, (H, R), HALF), \ + X (2, (R, H), HALF), \ + X (2, (H, I), HALF), \ + X (3, (H, H, H), HALF), \ + X (3, (H, F, I), MIXED), \ + X (3, (F, H, I), MIXED) #define S2(A,B) NS_##A##B #define S3(A,B,C) NS_##A##B##C @@ -13273,6 +13353,7 @@ enum neon_shape enum neon_shape_class { + SC_HALF, SC_SINGLE, SC_DOUBLE, SC_QUAD, @@ -13290,6 +13371,7 @@ static enum neon_shape_class neon_shape_class[] = enum neon_shape_el { + SE_H, SE_F, SE_D, SE_Q, @@ -13302,6 +13384,7 @@ enum neon_shape_el /* Register widths of above. */ static unsigned neon_shape_el_size[] = { + 16, 32, 64, 128, @@ -13383,9 +13466,12 @@ enum neon_type_mask #define N_SU_ALL (N_S8 | N_S16 | N_S32 | N_S64 | N_U8 | N_U16 | N_U32 | N_U64) #define N_SU_32 (N_S8 | N_S16 | N_S32 | N_U8 | N_U16 | N_U32) #define N_SU_16_64 (N_S16 | N_S32 | N_S64 | N_U16 | N_U32 | N_U64) -#define N_SUF_32 (N_SU_32 | N_F32) +#define N_S_32 (N_S8 | N_S16 | N_S32) +#define N_F_16_32 (N_F16 | N_F32) +#define N_SUF_32 (N_SU_32 | N_F_16_32) #define N_I_ALL (N_I8 | N_I16 | N_I32 | N_I64) -#define N_IF_32 (N_I8 | N_I16 | N_I32 | N_F32) +#define N_IF_32 (N_I8 | N_I16 | N_I32 | N_F16 | N_F32) +#define N_F_ALL (N_F16 | N_F32 | N_F64) /* Pass this as the first type argument to neon_check_type to ignore types altogether. */ @@ -13427,11 +13513,56 @@ neon_select_shape (enum neon_shape shape, ...) switch (neon_shape_tab[shape].el[j]) { + /* If a .f16, .16, .u16, .s16 type specifier is given over + a VFP single precision register operand, it's essentially + means only half of the register is used. + + If the type specifier is given after the mnemonics, the + information is stored in inst.vectype. If the type specifier + is given after register operand, the information is stored + in inst.operands[].vectype. + + When there is only one type specifier, and all the register + operands are the same type of hardware register, the type + specifier applies to all register operands. + + If no type specifier is given, the shape is inferred from + operand information. + + for example: + vadd.f16 s0, s1, s2: NS_HHH + vabs.f16 s0, s1: NS_HH + vmov.f16 s0, r1: NS_HR + vmov.f16 r0, s1: NS_RH + vcvt.f16 r0, s1: NS_RH + vcvt.f16.s32 s2, s2, #29: NS_HFI + vcvt.f16.s32 s2, s2: NS_HF + */ + case SE_H: + if (!(inst.operands[j].isreg + && inst.operands[j].isvec + && inst.operands[j].issingle + && !inst.operands[j].isquad + && ((inst.vectype.elems == 1 + && inst.vectype.el[0].size == 16) + || (inst.vectype.elems > 1 + && inst.vectype.el[j].size == 16) + || (inst.vectype.elems == 0 + && inst.operands[j].vectype.type != NT_invtype + && inst.operands[j].vectype.size == 16)))) + matches = 0; + break; + case SE_F: if (!(inst.operands[j].isreg && inst.operands[j].isvec && inst.operands[j].issingle - && !inst.operands[j].isquad)) + && !inst.operands[j].isquad + && ((inst.vectype.elems == 1 && inst.vectype.el[0].size == 32) + || (inst.vectype.elems > 1 && inst.vectype.el[j].size == 32) + || (inst.vectype.elems == 0 + && (inst.operands[j].vectype.size == 32 + || inst.operands[j].vectype.type == NT_invtype))))) matches = 0; break; @@ -13647,7 +13778,7 @@ el_type_of_type_chk (enum neon_el_type *type, unsigned *size, *type = NT_untyped; else if ((mask & (N_P8 | N_P16 | N_P64)) != 0) *type = NT_poly; - else if ((mask & (N_F16 | N_F32 | N_F64)) != 0) + else if ((mask & (N_F_ALL)) != 0) *type = NT_float; else return FAIL; @@ -13809,6 +13940,15 @@ neon_check_type (unsigned els, enum neon_shape ns, ...) k_type = g_type; k_size = g_size; key_allowed = thisarg & ~N_KEY; + + /* Check architecture constraint on FP16 extension. */ + if (k_size == 16 + && k_type == NT_float + && ! ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16)) + { + inst.error = _(BAD_FP16); + return badtype; + } } } else @@ -13835,6 +13975,18 @@ neon_check_type (unsigned els, enum neon_shape ns, ...) else match = g_size; + /* FP16 will use a single precision register. */ + if (regwidth == 32 && match == 16) + { + if (ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16)) + match = regwidth; + else + { + inst.error = _(BAD_FP16); + return badtype; + } + } + if (regwidth != match) { first_error (_("operand size must match register width")); @@ -13926,12 +14078,16 @@ do_vfp_nsyn_add_sub (enum neon_shape rs) { int is_add = (inst.instruction & 0x0fffffff) == N_MNEM_vadd; - if (rs == NS_FFF) + if (rs == NS_FFF || rs == NS_HHH) { if (is_add) do_vfp_nsyn_opcode ("fadds"); else do_vfp_nsyn_opcode ("fsubs"); + + /* ARMv8.2 fp16 instruction. */ + if (rs == NS_HHH) + do_scalar_fp16_v82_encode (); } else { @@ -13954,15 +14110,14 @@ try_vfp_nsyn (int args, void (*pfn) (enum neon_shape)) switch (args) { case 2: - rs = neon_select_shape (NS_FF, NS_DD, NS_NULL); - et = neon_check_type (2, rs, - N_EQK | N_VFP, N_F32 | N_F64 | N_KEY | N_VFP); + rs = neon_select_shape (NS_HH, NS_FF, NS_DD, NS_NULL); + et = neon_check_type (2, rs, N_EQK | N_VFP, N_F_ALL | N_KEY | N_VFP); break; case 3: - rs = neon_select_shape (NS_FFF, NS_DDD, NS_NULL); - et = neon_check_type (3, rs, - N_EQK | N_VFP, N_EQK | N_VFP, N_F32 | N_F64 | N_KEY | N_VFP); + rs = neon_select_shape (NS_HHH, NS_FFF, NS_DDD, NS_NULL); + et = neon_check_type (3, rs, N_EQK | N_VFP, N_EQK | N_VFP, + N_F_ALL | N_KEY | N_VFP); break; default: @@ -13984,12 +14139,16 @@ do_vfp_nsyn_mla_mls (enum neon_shape rs) { int is_mla = (inst.instruction & 0x0fffffff) == N_MNEM_vmla; - if (rs == NS_FFF) + if (rs == NS_FFF || rs == NS_HHH) { if (is_mla) do_vfp_nsyn_opcode ("fmacs"); else do_vfp_nsyn_opcode ("fnmacs"); + + /* ARMv8.2 fp16 instruction. */ + if (rs == NS_HHH) + do_scalar_fp16_v82_encode (); } else { @@ -14005,12 +14164,16 @@ do_vfp_nsyn_fma_fms (enum neon_shape rs) { int is_fma = (inst.instruction & 0x0fffffff) == N_MNEM_vfma; - if (rs == NS_FFF) + if (rs == NS_FFF || rs == NS_HHH) { if (is_fma) do_vfp_nsyn_opcode ("ffmas"); else do_vfp_nsyn_opcode ("ffnmas"); + + /* ARMv8.2 fp16 instruction. */ + if (rs == NS_HHH) + do_scalar_fp16_v82_encode (); } else { @@ -14024,8 +14187,14 @@ do_vfp_nsyn_fma_fms (enum neon_shape rs) static void do_vfp_nsyn_mul (enum neon_shape rs) { - if (rs == NS_FFF) - do_vfp_nsyn_opcode ("fmuls"); + if (rs == NS_FFF || rs == NS_HHH) + { + do_vfp_nsyn_opcode ("fmuls"); + + /* ARMv8.2 fp16 instruction. */ + if (rs == NS_HHH) + do_scalar_fp16_v82_encode (); + } else do_vfp_nsyn_opcode ("fmuld"); } @@ -14034,14 +14203,18 @@ static void do_vfp_nsyn_abs_neg (enum neon_shape rs) { int is_neg = (inst.instruction & 0x80) != 0; - neon_check_type (2, rs, N_EQK | N_VFP, N_F32 | N_F64 | N_VFP | N_KEY); + neon_check_type (2, rs, N_EQK | N_VFP, N_F_ALL | N_VFP | N_KEY); - if (rs == NS_FF) + if (rs == NS_FF || rs == NS_HH) { if (is_neg) do_vfp_nsyn_opcode ("fnegs"); else do_vfp_nsyn_opcode ("fabss"); + + /* ARMv8.2 fp16 instruction. */ + if (rs == NS_HH) + do_scalar_fp16_v82_encode (); } else { @@ -14078,11 +14251,17 @@ do_vfp_nsyn_ldm_stm (int is_dbmode) static void do_vfp_nsyn_sqrt (void) { - enum neon_shape rs = neon_select_shape (NS_FF, NS_DD, NS_NULL); - neon_check_type (2, rs, N_EQK | N_VFP, N_F32 | N_F64 | N_KEY | N_VFP); + enum neon_shape rs = neon_select_shape (NS_HH, NS_FF, NS_DD, NS_NULL); + neon_check_type (2, rs, N_EQK | N_VFP, N_F_ALL | N_KEY | N_VFP); + + if (rs == NS_FF || rs == NS_HH) + { + do_vfp_nsyn_opcode ("fsqrts"); - if (rs == NS_FF) - do_vfp_nsyn_opcode ("fsqrts"); + /* ARMv8.2 fp16 instruction. */ + if (rs == NS_HH) + do_scalar_fp16_v82_encode (); + } else do_vfp_nsyn_opcode ("fsqrtd"); } @@ -14090,12 +14269,18 @@ do_vfp_nsyn_sqrt (void) static void do_vfp_nsyn_div (void) { - enum neon_shape rs = neon_select_shape (NS_FFF, NS_DDD, NS_NULL); + enum neon_shape rs = neon_select_shape (NS_HHH, NS_FFF, NS_DDD, NS_NULL); neon_check_type (3, rs, N_EQK | N_VFP, N_EQK | N_VFP, - N_F32 | N_F64 | N_KEY | N_VFP); + N_F_ALL | N_KEY | N_VFP); + + if (rs == NS_FFF || rs == NS_HHH) + { + do_vfp_nsyn_opcode ("fdivs"); - if (rs == NS_FFF) - do_vfp_nsyn_opcode ("fdivs"); + /* ARMv8.2 fp16 instruction. */ + if (rs == NS_HHH) + do_scalar_fp16_v82_encode (); + } else do_vfp_nsyn_opcode ("fdivd"); } @@ -14103,14 +14288,18 @@ do_vfp_nsyn_div (void) static void do_vfp_nsyn_nmul (void) { - enum neon_shape rs = neon_select_shape (NS_FFF, NS_DDD, NS_NULL); + enum neon_shape rs = neon_select_shape (NS_HHH, NS_FFF, NS_DDD, NS_NULL); neon_check_type (3, rs, N_EQK | N_VFP, N_EQK | N_VFP, - N_F32 | N_F64 | N_KEY | N_VFP); + N_F_ALL | N_KEY | N_VFP); - if (rs == NS_FFF) + if (rs == NS_FFF || rs == NS_HHH) { NEON_ENCODE (SINGLE, inst); do_vfp_sp_dyadic (); + + /* ARMv8.2 fp16 instruction. */ + if (rs == NS_HHH) + do_scalar_fp16_v82_encode (); } else { @@ -14118,17 +14307,19 @@ do_vfp_nsyn_nmul (void) do_vfp_dp_rd_rn_rm (); } do_vfp_cond_or_thumb (); + } static void do_vfp_nsyn_cmp (void) { + enum neon_shape rs; if (inst.operands[1].isreg) { - enum neon_shape rs = neon_select_shape (NS_FF, NS_DD, NS_NULL); - neon_check_type (2, rs, N_EQK | N_VFP, N_F32 | N_F64 | N_KEY | N_VFP); + rs = neon_select_shape (NS_HH, NS_FF, NS_DD, NS_NULL); + neon_check_type (2, rs, N_EQK | N_VFP, N_F_ALL | N_KEY | N_VFP); - if (rs == NS_FF) + if (rs == NS_FF || rs == NS_HH) { NEON_ENCODE (SINGLE, inst); do_vfp_sp_monadic (); @@ -14141,8 +14332,8 @@ do_vfp_nsyn_cmp (void) } else { - enum neon_shape rs = neon_select_shape (NS_FI, NS_DI, NS_NULL); - neon_check_type (2, rs, N_F32 | N_F64 | N_KEY | N_VFP, N_EQK); + rs = neon_select_shape (NS_HI, NS_FI, NS_DI, NS_NULL); + neon_check_type (2, rs, N_F_ALL | N_KEY | N_VFP, N_EQK); switch (inst.instruction & 0x0fffffff) { @@ -14156,7 +14347,7 @@ do_vfp_nsyn_cmp (void) abort (); } - if (rs == NS_FI) + if (rs == NS_FI || rs == NS_HI) { NEON_ENCODE (SINGLE, inst); do_vfp_sp_compare_z (); @@ -14168,6 +14359,10 @@ do_vfp_nsyn_cmp (void) } } do_vfp_cond_or_thumb (); + + /* ARMv8.2 fp16 instruction. */ + if (rs == NS_HI || rs == NS_HH) + do_scalar_fp16_v82_encode (); } static void @@ -14565,7 +14760,7 @@ neon_dyadic_misc (enum neon_el_type ubit_meaning, unsigned types, if (et.type == NT_float) { NEON_ENCODE (FLOAT, inst); - neon_three_same (neon_quad (rs), 0, -1); + neon_three_same (neon_quad (rs), 0, et.size == 16 ? (int) et.size : -1); } else { @@ -14680,13 +14875,15 @@ do_neon_addsub_if_i (void) static void neon_exchange_operands (void) { - void *scratch = alloca (sizeof (inst.operands[0])); if (inst.operands[1].present) { + void *scratch = xmalloc (sizeof (inst.operands[0])); + /* Swap operands[1] and operands[2]. */ memcpy (scratch, &inst.operands[1], sizeof (inst.operands[0])); inst.operands[1] = inst.operands[2]; memcpy (&inst.operands[2], scratch, sizeof (inst.operands[0])); + free (scratch); } else { @@ -14726,13 +14923,13 @@ neon_compare (unsigned regtypes, unsigned immtypes, int invert) static void do_neon_cmp (void) { - neon_compare (N_SUF_32, N_S8 | N_S16 | N_S32 | N_F32, FALSE); + neon_compare (N_SUF_32, N_S_32 | N_F_16_32, FALSE); } static void do_neon_cmp_inv (void) { - neon_compare (N_SUF_32, N_S8 | N_S16 | N_S32 | N_F32, TRUE); + neon_compare (N_SUF_32, N_S_32 | N_F_16_32, TRUE); } static void @@ -14811,7 +15008,7 @@ do_neon_mac_maybe_scalar (void) { enum neon_shape rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL); struct neon_type_el et = neon_check_type (3, rs, - N_EQK, N_EQK, N_I16 | N_I32 | N_F32 | N_KEY); + N_EQK, N_EQK, N_I16 | N_I32 | N_F_16_32 | N_KEY); NEON_ENCODE (SCALAR, inst); neon_mul_mac (et, neon_quad (rs)); } @@ -14860,7 +15057,7 @@ do_neon_mul (void) if (inst.operands[2].isscalar) do_neon_mac_maybe_scalar (); else - neon_dyadic_misc (NT_poly, N_I8 | N_I16 | N_I32 | N_F32 | N_P8, 0); + neon_dyadic_misc (NT_poly, N_I8 | N_I16 | N_I32 | N_F16 | N_F32 | N_P8, 0); } static void @@ -14885,13 +15082,46 @@ do_neon_qdmulh (void) } } +static void +do_neon_qrdmlah (void) +{ + /* Check we're on the correct architecture. */ + if (!mark_feature_used (&fpu_neon_ext_armv8)) + inst.error = + _("instruction form not available on this architecture."); + else if (!mark_feature_used (&fpu_neon_ext_v8_1)) + { + as_warn (_("this instruction implies use of ARMv8.1 AdvSIMD.")); + record_feature_use (&fpu_neon_ext_v8_1); + } + + if (inst.operands[2].isscalar) + { + enum neon_shape rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL); + struct neon_type_el et = neon_check_type (3, rs, + N_EQK, N_EQK, N_S16 | N_S32 | N_KEY); + NEON_ENCODE (SCALAR, inst); + neon_mul_mac (et, neon_quad (rs)); + } + else + { + enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL); + struct neon_type_el et = neon_check_type (3, rs, + N_EQK, N_EQK, N_S16 | N_S32 | N_KEY); + NEON_ENCODE (INTEGER, inst); + /* The U bit (rounding) comes from bit mask. */ + neon_three_same (neon_quad (rs), 0, et.size); + } +} + static void do_neon_fcmp_absolute (void) { enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL); - neon_check_type (3, rs, N_EQK, N_EQK, N_F32 | N_KEY); + struct neon_type_el et = neon_check_type (3, rs, N_EQK, N_EQK, + N_F_16_32 | N_KEY); /* Size field comes from bit mask. */ - neon_three_same (neon_quad (rs), 1, -1); + neon_three_same (neon_quad (rs), 1, et.size == 16 ? (int) et.size : -1); } static void @@ -14905,8 +15135,9 @@ static void do_neon_step (void) { enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL); - neon_check_type (3, rs, N_EQK, N_EQK, N_F32 | N_KEY); - neon_three_same (neon_quad (rs), 0, -1); + struct neon_type_el et = neon_check_type (3, rs, N_EQK, N_EQK, + N_F_16_32 | N_KEY); + neon_three_same (neon_quad (rs), 0, et.size == 16 ? (int) et.size : -1); } static void @@ -14922,7 +15153,7 @@ do_neon_abs_neg (void) return; rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL); - et = neon_check_type (2, rs, N_EQK, N_S8 | N_S16 | N_S32 | N_F32 | N_KEY); + et = neon_check_type (2, rs, N_EQK, N_S_32 | N_F_16_32 | N_KEY); inst.instruction |= LOW4 (inst.operands[0].reg) << 12; inst.instruction |= HI1 (inst.operands[0].reg) << 22; @@ -15131,8 +15362,19 @@ do_neon_shll (void) CVT_VAR (f32_s32, N_F32, N_S32, whole_reg, "fsltos", "fsitos", NULL) \ CVT_VAR (f32_u32, N_F32, N_U32, whole_reg, "fultos", "fuitos", NULL) \ /* Half-precision conversions. */ \ + CVT_VAR (s16_f16, N_S16, N_F16 | N_KEY, whole_reg, NULL, NULL, NULL) \ + CVT_VAR (u16_f16, N_U16, N_F16 | N_KEY, whole_reg, NULL, NULL, NULL) \ + CVT_VAR (f16_s16, N_F16 | N_KEY, N_S16, whole_reg, NULL, NULL, NULL) \ + CVT_VAR (f16_u16, N_F16 | N_KEY, N_U16, whole_reg, NULL, NULL, NULL) \ CVT_VAR (f32_f16, N_F32, N_F16, whole_reg, NULL, NULL, NULL) \ CVT_VAR (f16_f32, N_F16, N_F32, whole_reg, NULL, NULL, NULL) \ + /* New VCVT instructions introduced by ARMv8.2 fp16 extension. \ + Compared with single/double precision variants, only the co-processor \ + field is different, so the encoding flow is reused here. */ \ + CVT_VAR (f16_s32, N_F16 | N_KEY, N_S32, N_VFP, "fsltos", "fsitos", NULL) \ + CVT_VAR (f16_u32, N_F16 | N_KEY, N_U32, N_VFP, "fultos", "fuitos", NULL) \ + CVT_VAR (u32_f16, N_U32, N_F16 | N_KEY, N_VFP, "ftouls", "ftouis", "ftouizs")\ + CVT_VAR (s32_f16, N_S32, N_F16 | N_KEY, N_VFP, "ftosls", "ftosis", "ftosizs")\ /* VFP instructions. */ \ CVT_VAR (f32_f64, N_F32, N_F64, N_VFP, NULL, "fcvtsd", NULL) \ CVT_VAR (f64_f32, N_F64, N_F32, N_VFP, NULL, "fcvtds", NULL) \ @@ -15207,7 +15449,8 @@ do_vfp_nsyn_cvt (enum neon_shape rs, enum neon_cvt_flavour flavour) { const char *opname = 0; - if (rs == NS_DDI || rs == NS_QQI || rs == NS_FFI) + if (rs == NS_DDI || rs == NS_QQI || rs == NS_FFI + || rs == NS_FHI || rs == NS_HFI) { /* Conversions with immediate bitshift. */ const char *enc[] = @@ -15244,12 +15487,19 @@ do_vfp_nsyn_cvt (enum neon_shape rs, enum neon_cvt_flavour flavour) if (opname) do_vfp_nsyn_opcode (opname); + + /* ARMv8.2 fp16 VCVT instruction. */ + if (flavour == neon_cvt_flavour_s32_f16 + || flavour == neon_cvt_flavour_u32_f16 + || flavour == neon_cvt_flavour_f16_u32 + || flavour == neon_cvt_flavour_f16_s32) + do_scalar_fp16_v82_encode (); } static void do_vfp_nsyn_cvtz (void) { - enum neon_shape rs = neon_select_shape (NS_FF, NS_FD, NS_NULL); + enum neon_shape rs = neon_select_shape (NS_FH, NS_FF, NS_FD, NS_NULL); enum neon_cvt_flavour flavour = get_neon_cvt_flavour (rs); const char *enc[] = { @@ -15277,6 +15527,11 @@ do_vfp_nsyn_cvt_fpv8 (enum neon_cvt_flavour flavour, constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8), _(BAD_FPU)); + if (flavour == neon_cvt_flavour_s32_f16 + || flavour == neon_cvt_flavour_u32_f16) + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16), + _(BAD_FP16)); + set_it_insn_type (OUTSIDE_IT_INSN); switch (flavour) @@ -15289,6 +15544,10 @@ do_vfp_nsyn_cvt_fpv8 (enum neon_cvt_flavour flavour, sz = 0; op = 1; break; + case neon_cvt_flavour_s32_f16: + sz = 0; + op = 1; + break; case neon_cvt_flavour_u32_f64: sz = 1; op = 0; @@ -15297,6 +15556,10 @@ do_vfp_nsyn_cvt_fpv8 (enum neon_cvt_flavour flavour, sz = 0; op = 0; break; + case neon_cvt_flavour_u32_f16: + sz = 0; + op = 0; + break; default: first_error (_("invalid instruction shape")); return; @@ -15315,6 +15578,11 @@ do_vfp_nsyn_cvt_fpv8 (enum neon_cvt_flavour flavour, encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd); encode_arm_vfp_reg (inst.operands[1].reg, sz == 1 ? VFP_REG_Dm : VFP_REG_Sm); inst.instruction |= sz << 8; + + /* ARMv8.2 fp16 VCVT instruction. */ + if (flavour == neon_cvt_flavour_s32_f16 + ||flavour == neon_cvt_flavour_u32_f16) + do_scalar_fp16_v82_encode (); inst.instruction |= op << 7; inst.instruction |= rm << 16; inst.instruction |= 0xf0000000; @@ -15325,13 +15593,20 @@ static void do_neon_cvt_1 (enum neon_cvt_mode mode) { enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_FFI, NS_DD, NS_QQ, - NS_FD, NS_DF, NS_FF, NS_QD, NS_DQ, NS_NULL); + NS_FD, NS_DF, NS_FF, NS_QD, NS_DQ, + NS_FH, NS_HF, NS_FHI, NS_HFI, + NS_NULL); enum neon_cvt_flavour flavour = get_neon_cvt_flavour (rs); + if (flavour == neon_cvt_flavour_invalid) + return; + /* PR11109: Handle round-to-zero for VCVT conversions. */ if (mode == neon_cvt_mode_z && ARM_CPU_HAS_FEATURE (cpu_variant, fpu_arch_vfp_v2) - && (flavour == neon_cvt_flavour_s32_f32 + && (flavour == neon_cvt_flavour_s16_f16 + || flavour == neon_cvt_flavour_u16_f16 + || flavour == neon_cvt_flavour_s32_f32 || flavour == neon_cvt_flavour_u32_f32 || flavour == neon_cvt_flavour_s32_f64 || flavour == neon_cvt_flavour_u32_f64) @@ -15341,6 +15616,18 @@ do_neon_cvt_1 (enum neon_cvt_mode mode) return; } + /* ARMv8.2 fp16 VCVT conversions. */ + if (mode == neon_cvt_mode_z + && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16) + && (flavour == neon_cvt_flavour_s32_f16 + || flavour == neon_cvt_flavour_u32_f16) + && (rs == NS_FH)) + { + do_vfp_nsyn_cvtz (); + do_scalar_fp16_v82_encode (); + return; + } + /* VFP rather than Neon conversions. */ if (flavour >= neon_cvt_flavour_first_fp) { @@ -15358,7 +15645,8 @@ do_neon_cvt_1 (enum neon_cvt_mode mode) case NS_QQI: { unsigned immbits; - unsigned enctab[] = { 0x0000100, 0x1000100, 0x0, 0x1000000 }; + unsigned enctab[] = {0x0000100, 0x1000100, 0x0, 0x1000000, + 0x0000100, 0x1000100, 0x0, 0x1000000}; if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL) return; @@ -15367,7 +15655,6 @@ do_neon_cvt_1 (enum neon_cvt_mode mode) integer conversion. */ if (inst.operands[2].present && inst.operands[2].imm == 0) goto int_encode; - immbits = 32 - inst.operands[2].imm; NEON_ENCODE (IMMED, inst); if (flavour != neon_cvt_flavour_invalid) inst.instruction |= enctab[flavour]; @@ -15377,7 +15664,19 @@ do_neon_cvt_1 (enum neon_cvt_mode mode) inst.instruction |= HI1 (inst.operands[1].reg) << 5; inst.instruction |= neon_quad (rs) << 6; inst.instruction |= 1 << 21; - inst.instruction |= immbits << 16; + if (flavour < neon_cvt_flavour_s16_f16) + { + inst.instruction |= 1 << 21; + immbits = 32 - inst.operands[2].imm; + inst.instruction |= immbits << 16; + } + else + { + inst.instruction |= 3 << 20; + immbits = 16 - inst.operands[2].imm; + inst.instruction |= immbits << 16; + inst.instruction &= ~(1 << 9); + } neon_dp_fixup (&inst); } @@ -15398,8 +15697,14 @@ do_neon_cvt_1 (enum neon_cvt_mode mode) inst.instruction |= LOW4 (inst.operands[1].reg); inst.instruction |= HI1 (inst.operands[1].reg) << 5; inst.instruction |= neon_quad (rs) << 6; - inst.instruction |= (flavour == neon_cvt_flavour_u32_f32) << 7; + inst.instruction |= (flavour == neon_cvt_flavour_u16_f16 + || flavour == neon_cvt_flavour_u32_f32) << 7; inst.instruction |= mode << 8; + if (flavour == neon_cvt_flavour_u16_f16 + || flavour == neon_cvt_flavour_s16_f16) + /* Mask off the original size bits and reencode them. */ + inst.instruction = ((inst.instruction & 0xfff3ffff) | (1 << 18)); + if (thumb_mode) inst.instruction |= 0xfc000000; else @@ -15409,7 +15714,8 @@ do_neon_cvt_1 (enum neon_cvt_mode mode) { int_encode: { - unsigned enctab[] = { 0x100, 0x180, 0x0, 0x080 }; + unsigned enctab[] = { 0x100, 0x180, 0x0, 0x080, + 0x100, 0x180, 0x0, 0x080}; NEON_ENCODE (INTEGER, inst); @@ -15424,7 +15730,12 @@ do_neon_cvt_1 (enum neon_cvt_mode mode) inst.instruction |= LOW4 (inst.operands[1].reg); inst.instruction |= HI1 (inst.operands[1].reg) << 5; inst.instruction |= neon_quad (rs) << 6; - inst.instruction |= 2 << 18; + if (flavour >= neon_cvt_flavour_s16_f16 + && flavour <= neon_cvt_flavour_f16_u16) + /* Half precision. */ + inst.instruction |= 1 << 18; + else + inst.instruction |= 2 << 18; neon_dp_fixup (&inst); } @@ -15525,7 +15836,8 @@ do_neon_cvttb_2 (bfd_boolean t, bfd_boolean to, bfd_boolean is_double) static void do_neon_cvttb_1 (bfd_boolean t) { - enum neon_shape rs = neon_select_shape (NS_FF, NS_FD, NS_DF, NS_NULL); + enum neon_shape rs = neon_select_shape (NS_HF, NS_HD, NS_FH, NS_FF, NS_FD, + NS_DF, NS_DH, NS_NULL); if (rs == NS_NULL) return; @@ -15905,8 +16217,9 @@ static void do_neon_mov (void) { enum neon_shape rs = neon_select_shape (NS_RRFF, NS_FFRR, NS_DRR, NS_RRD, - NS_QQ, NS_DD, NS_QI, NS_DI, NS_SR, NS_RS, NS_FF, NS_FI, NS_RF, NS_FR, - NS_NULL); + NS_QQ, NS_DD, NS_QI, NS_DI, NS_SR, + NS_RS, NS_FF, NS_FI, NS_RF, NS_FR, + NS_HR, NS_RH, NS_HI, NS_NULL); struct neon_type_el et; const char *ldconst = 0; @@ -16084,6 +16397,7 @@ do_neon_mov (void) do_vfp_nsyn_opcode ("fcpys"); break; + case NS_HI: case NS_FI: /* case 10 (fconsts). */ ldconst = "fconsts"; encode_fconstd: @@ -16091,17 +16405,29 @@ do_neon_mov (void) { inst.operands[1].imm = neon_qfloat_bits (inst.operands[1].imm); do_vfp_nsyn_opcode (ldconst); + + /* ARMv8.2 fp16 vmov.f16 instruction. */ + if (rs == NS_HI) + do_scalar_fp16_v82_encode (); } else first_error (_("immediate out of range")); break; + case NS_RH: case NS_RF: /* case 12 (fmrs). */ do_vfp_nsyn_opcode ("fmrs"); + /* ARMv8.2 fp16 vmov.f16 instruction. */ + if (rs == NS_RH) + do_scalar_fp16_v82_encode (); break; + case NS_HR: case NS_FR: /* case 13 (fmsr). */ do_vfp_nsyn_opcode ("fmsr"); + /* ARMv8.2 fp16 vmov.f16 instruction. */ + if (rs == NS_HR) + do_scalar_fp16_v82_encode (); break; /* The encoders for the fmrrs and fmsrr instructions expect three operands @@ -16157,6 +16483,21 @@ do_neon_rshift_round_imm (void) et.size - imm); } +static void +do_neon_movhf (void) +{ + enum neon_shape rs = neon_select_shape (NS_HH, NS_NULL); + constraint (rs != NS_HH, _("invalid suffix")); + + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8), + _(BAD_FPU)); + + do_vfp_sp_monadic (); + + inst.is_neon = 1; + inst.instruction |= 0xf0000000; +} + static void do_neon_movl (void) { @@ -16217,7 +16558,7 @@ do_neon_recip_est (void) { enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL); struct neon_type_el et = neon_check_type (2, rs, - N_EQK | N_FLT, N_F32 | N_U32 | N_KEY); + N_EQK | N_FLT, N_F_16_32 | N_U32 | N_KEY); inst.instruction |= (et.type == NT_float) << 8; neon_two_same (neon_quad (rs), 1, et.size); } @@ -16333,6 +16674,10 @@ do_neon_ldr_str (void) do_vfp_nsyn_opcode ("flds"); else do_vfp_nsyn_opcode ("fsts"); + + /* ARMv8.2 vldr.16/vstr.16 instruction. */ + if (inst.vectype.el[0].size == 16) + do_scalar_fp16_v82_encode (); } else { @@ -16418,18 +16763,18 @@ do_neon_ld_st_interleave (void) values, terminated with -1. */ static int -neon_alignment_bit (int size, int align, int *do_align, ...) +neon_alignment_bit (int size, int align, int *do_alignment, ...) { va_list ap; int result = FAIL, thissize, thisalign; if (!inst.operands[1].immisalign) { - *do_align = 0; + *do_alignment = 0; return SUCCESS; } - va_start (ap, do_align); + va_start (ap, do_alignment); do { @@ -16446,7 +16791,7 @@ neon_alignment_bit (int size, int align, int *do_align, ...) va_end (ap); if (result == SUCCESS) - *do_align = 1; + *do_alignment = 1; else first_error (_("unsupported alignment for instruction")); @@ -16457,7 +16802,7 @@ static void do_neon_ld_st_lane (void) { struct neon_type_el et = neon_check_type (1, NS_NULL, N_8 | N_16 | N_32); - int align_good, do_align = 0; + int align_good, do_alignment = 0; int logsize = neon_logbits (et.size); int align = inst.operands[1].imm >> 8; int n = (inst.instruction >> 8) & 3; @@ -16477,11 +16822,11 @@ do_neon_ld_st_lane (void) switch (n) { case 0: /* VLD1 / VST1. */ - align_good = neon_alignment_bit (et.size, align, &do_align, 16, 16, + align_good = neon_alignment_bit (et.size, align, &do_alignment, 16, 16, 32, 32, -1); if (align_good == FAIL) return; - if (do_align) + if (do_alignment) { unsigned alignbits = 0; switch (et.size) @@ -16495,11 +16840,11 @@ do_neon_ld_st_lane (void) break; case 1: /* VLD2 / VST2. */ - align_good = neon_alignment_bit (et.size, align, &do_align, 8, 16, 16, 32, - 32, 64, -1); + align_good = neon_alignment_bit (et.size, align, &do_alignment, 8, 16, + 16, 32, 32, 64, -1); if (align_good == FAIL) return; - if (do_align) + if (do_alignment) inst.instruction |= 1 << 4; break; @@ -16509,11 +16854,11 @@ do_neon_ld_st_lane (void) break; case 3: /* VLD4 / VST4. */ - align_good = neon_alignment_bit (et.size, align, &do_align, 8, 32, + align_good = neon_alignment_bit (et.size, align, &do_alignment, 8, 32, 16, 64, 32, 64, 32, 128, -1); if (align_good == FAIL) return; - if (do_align) + if (do_alignment) { unsigned alignbits = 0; switch (et.size) @@ -16544,7 +16889,7 @@ static void do_neon_ld_dup (void) { struct neon_type_el et = neon_check_type (1, NS_NULL, N_8 | N_16 | N_32); - int align_good, do_align = 0; + int align_good, do_alignment = 0; if (et.type == NT_invtype) return; @@ -16554,7 +16899,7 @@ do_neon_ld_dup (void) case 0: /* VLD1. */ gas_assert (NEON_REG_STRIDE (inst.operands[0].imm) != 2); align_good = neon_alignment_bit (et.size, inst.operands[1].imm >> 8, - &do_align, 16, 16, 32, 32, -1); + &do_alignment, 16, 16, 32, 32, -1); if (align_good == FAIL) return; switch (NEON_REGLIST_LENGTH (inst.operands[0].imm)) @@ -16568,7 +16913,8 @@ do_neon_ld_dup (void) case 1: /* VLD2. */ align_good = neon_alignment_bit (et.size, inst.operands[1].imm >> 8, - &do_align, 8, 16, 16, 32, 32, 64, -1); + &do_alignment, 8, 16, 16, 32, 32, 64, + -1); if (align_good == FAIL) return; constraint (NEON_REGLIST_LENGTH (inst.operands[0].imm) != 2, @@ -16591,7 +16937,7 @@ do_neon_ld_dup (void) case 3: /* VLD4. */ { int align = inst.operands[1].imm >> 8; - align_good = neon_alignment_bit (et.size, align, &do_align, 8, 32, + align_good = neon_alignment_bit (et.size, align, &do_alignment, 8, 32, 16, 64, 32, 64, 32, 128, -1); if (align_good == FAIL) return; @@ -16609,7 +16955,7 @@ do_neon_ld_dup (void) default: ; } - inst.instruction |= do_align << 4; + inst.instruction |= do_alignment << 4; } /* Disambiguate VLD and VST instructions, and fill in common bits (those @@ -16690,8 +17036,14 @@ do_vfp_nsyn_fpv8 (enum neon_shape rs) NEON_ENCODE (FPV8, inst); - if (rs == NS_FFF) - do_vfp_sp_dyadic (); + if (rs == NS_FFF || rs == NS_HHH) + { + do_vfp_sp_dyadic (); + + /* ARMv8.2 fp16 instruction. */ + if (rs == NS_HHH) + do_scalar_fp16_v82_encode (); + } else do_vfp_dp_rd_rn_rm (); @@ -16721,13 +17073,13 @@ do_vmaxnm (void) if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH8) == FAIL) return; - neon_dyadic_misc (NT_untyped, N_F32, 0); + neon_dyadic_misc (NT_untyped, N_F_16_32, 0); } static void do_vrint_1 (enum neon_cvt_mode mode) { - enum neon_shape rs = neon_select_shape (NS_FF, NS_DD, NS_QQ, NS_NULL); + enum neon_shape rs = neon_select_shape (NS_HH, NS_FF, NS_DD, NS_QQ, NS_NULL); struct neon_type_el et; if (rs == NS_NULL) @@ -16739,7 +17091,8 @@ do_vrint_1 (enum neon_cvt_mode mode) constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8), _(BAD_FPU)); - et = neon_check_type (2, rs, N_EQK | N_VFP, N_F32 | N_F64 | N_KEY | N_VFP); + et = neon_check_type (2, rs, N_EQK | N_VFP, N_F_ALL | N_KEY + | N_VFP); if (et.type != NT_invtype) { /* VFP encodings. */ @@ -16748,7 +17101,7 @@ do_vrint_1 (enum neon_cvt_mode mode) set_it_insn_type (OUTSIDE_IT_INSN); NEON_ENCODE (FPV8, inst); - if (rs == NS_FF) + if (rs == NS_FF || rs == NS_HH) do_vfp_sp_monadic (); else do_vfp_dp_rd_rm (); @@ -16767,12 +17120,16 @@ do_vrint_1 (enum neon_cvt_mode mode) inst.instruction |= (rs == NS_DD) << 8; do_vfp_cond_or_thumb (); + + /* ARMv8.2 fp16 vrint instruction. */ + if (rs == NS_HH) + do_scalar_fp16_v82_encode (); } else { /* Neon encodings (or something broken...). */ inst.error = NULL; - et = neon_check_type (2, rs, N_EQK, N_F32 | N_KEY); + et = neon_check_type (2, rs, N_EQK, N_F_16_32 | N_KEY); if (et.type == NT_invtype) return; @@ -16788,6 +17145,10 @@ do_vrint_1 (enum neon_cvt_mode mode) inst.instruction |= LOW4 (inst.operands[1].reg); inst.instruction |= HI1 (inst.operands[1].reg) << 5; inst.instruction |= neon_quad (rs) << 6; + /* Mask off the original size bits and reencode them. */ + inst.instruction = ((inst.instruction & 0xfff3ffff) + | neon_logbits (et.size) << 18); + switch (mode) { case neon_cvt_mode_z: inst.instruction |= 3 << 7; break; @@ -17840,8 +18201,8 @@ known_t32_only_insn (const struct asm_opcode *opcode) || ARM_CPU_HAS_FEATURE (*opcode->tvariant, arm_ext_barrier)) return TRUE; - /* Wide-only instruction added to ARMv8-M. */ - if (ARM_CPU_HAS_FEATURE (*opcode->tvariant, arm_ext_v8m) + /* Wide-only instruction added to ARMv8-M Baseline. */ + if (ARM_CPU_HAS_FEATURE (*opcode->tvariant, arm_ext_v8m_m_only) || ARM_CPU_HAS_FEATURE (*opcode->tvariant, arm_ext_atomics) || ARM_CPU_HAS_FEATURE (*opcode->tvariant, arm_ext_v6t2_v8m) || ARM_CPU_HAS_FEATURE (*opcode->tvariant, arm_ext_div)) @@ -18417,14 +18778,16 @@ static const struct asm_psr v7m_psrs[] = {"ipsr", 5 }, {"IPSR", 5 }, {"epsr", 6 }, {"EPSR", 6 }, {"iepsr", 7 }, {"IEPSR", 7 }, - {"msp", 8 }, {"MSP", 8 }, - {"psp", 9 }, {"PSP", 9 }, + {"msp", 8 }, {"MSP", 8 }, {"msp_s", 8 }, {"MSP_S", 8 }, + {"psp", 9 }, {"PSP", 9 }, {"psp_s", 9 }, {"PSP_S", 9 }, {"primask", 16}, {"PRIMASK", 16}, {"basepri", 17}, {"BASEPRI", 17}, {"basepri_max", 18}, {"BASEPRI_MAX", 18}, {"basepri_max", 18}, {"BASEPRI_MASK", 18}, /* Typo, preserved for backwards compatibility. */ {"faultmask", 19}, {"FAULTMASK", 19}, - {"control", 20}, {"CONTROL", 20} + {"control", 20}, {"CONTROL", 20}, + {"msp_ns", 0x88}, {"MSP_NS", 0x88}, + {"psp_ns", 0x89}, {"PSP_NS", 0x89} }; /* Table of all shift-in-operand names. */ @@ -19971,6 +20334,15 @@ static const struct asm_opcode insns[] = NCE(vmov, 0, 1, (VMOV), neon_mov), NCE(vmovq, 0, 1, (VMOV), neon_mov), +#undef ARM_VARIANT +#define ARM_VARIANT & arm_ext_fp16 +#undef THUMB_VARIANT +#define THUMB_VARIANT & arm_ext_fp16 + /* New instructions added from v8.2, allowing the extraction and insertion of + the upper 16 bits of a 32-bit vector register. */ + NCE (vmovx, eb00a40, 2, (RVS, RVS), neon_movhf), + NCE (vins, eb00ac0, 2, (RVS, RVS), neon_movhf), + #undef THUMB_VARIANT #define THUMB_VARIANT & fpu_neon_ext_v1 #undef ARM_VARIANT @@ -20020,7 +20392,7 @@ static const struct asm_opcode insns[] = NUF(vbitq, 1200110, 3, (RNQ, RNQ, RNQ), neon_bitfield), NUF(vbif, 1300110, 3, (RNDQ, RNDQ, RNDQ), neon_bitfield), NUF(vbifq, 1300110, 3, (RNQ, RNQ, RNQ), neon_bitfield), - /* Int and float variants, types S8 S16 S32 U8 U16 U32 F32. */ + /* Int and float variants, types S8 S16 S32 U8 U16 U32 F16 F32. */ nUF(vabd, _vabd, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_if_su), nUF(vabdq, _vabd, 3, (RNQ, oRNQ, RNQ), neon_dyadic_if_su), nUF(vmax, _vmax, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_if_su), @@ -20073,10 +20445,10 @@ static const struct asm_opcode insns[] = NUF(vrsqrts, 0200f10, 3, (RNDQ, oRNDQ, RNDQ), neon_step), NUF(vrsqrtsq, 0200f10, 3, (RNQ, oRNQ, RNQ), neon_step), /* ARM v8.1 extension. */ - nUF(vqrdmlah, _vqrdmlah, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qdmulh), - nUF(vqrdmlahq, _vqrdmlah, 3, (RNQ, oRNQ, RNDQ_RNSC), neon_qdmulh), - nUF(vqrdmlsh, _vqrdmlsh, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qdmulh), - nUF(vqrdmlshq, _vqrdmlsh, 3, (RNQ, oRNQ, RNDQ_RNSC), neon_qdmulh), + nUF (vqrdmlah, _vqrdmlah, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qrdmlah), + nUF (vqrdmlahq, _vqrdmlah, 3, (RNQ, oRNQ, RNDQ_RNSC), neon_qrdmlah), + nUF (vqrdmlsh, _vqrdmlsh, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qrdmlah), + nUF (vqrdmlshq, _vqrdmlsh, 3, (RNQ, oRNQ, RNDQ_RNSC), neon_qrdmlah), /* Two address, int/float. Types S8 S16 S32 F32. */ NUF(vabsq, 1b10300, 2, (RNQ, RNQ), neon_abs_neg), @@ -20183,7 +20555,7 @@ static const struct asm_opcode insns[] = NUF(vpadalq, 1b00600, 2, (RNQ, RNQ), neon_pair_long), NUF(vpaddl, 1b00200, 2, (RNDQ, RNDQ), neon_pair_long), NUF(vpaddlq, 1b00200, 2, (RNQ, RNQ), neon_pair_long), - /* Reciprocal estimates. Types U32 F32. */ + /* Reciprocal estimates. Types U32 F16 F32. */ NUF(vrecpe, 1b30400, 2, (RNDQ, RNDQ), neon_recip_est), NUF(vrecpeq, 1b30400, 2, (RNQ, RNQ), neon_recip_est), NUF(vrsqrte, 1b30480, 2, (RNDQ, RNDQ), neon_recip_est), @@ -20590,12 +20962,25 @@ static const struct asm_opcode insns[] = cCE("cfmadda32", e200600, 4, (RMAX, RMAX, RMFX, RMFX), mav_quad), cCE("cfmsuba32", e300600, 4, (RMAX, RMAX, RMFX, RMFX), mav_quad), + /* ARMv8-M instructions. */ #undef ARM_VARIANT #define ARM_VARIANT NULL #undef THUMB_VARIANT #define THUMB_VARIANT & arm_ext_v8m + TUE("sg", 0, e97fe97f, 0, (), 0, noargs), + TUE("blxns", 0, 4784, 1, (RRnpc), 0, t_blx), + TUE("bxns", 0, 4704, 1, (RRnpc), 0, t_bx), TUE("tt", 0, e840f000, 2, (RRnpc, RRnpc), 0, tt), TUE("ttt", 0, e840f040, 2, (RRnpc, RRnpc), 0, tt), + TUE("tta", 0, e840f080, 2, (RRnpc, RRnpc), 0, tt), + TUE("ttat", 0, e840f0c0, 2, (RRnpc, RRnpc), 0, tt), + + /* FP for ARMv8-M Mainline. Enabled for ARMv8-M Mainline because the + instructions behave as nop if no VFP is present. */ +#undef THUMB_VARIANT +#define THUMB_VARIANT & arm_ext_v8m_main + TUEc("vlldm", 0, ec300a00, 1, (RRnpc), rn), + TUEc("vlstm", 0, ec200a00, 1, (RRnpc), rn), }; #undef ARM_VARIANT #undef THUMB_VARIANT @@ -21163,7 +21548,7 @@ md_section_align (segT segment ATTRIBUTE_UNUSED, void arm_handle_align (fragS * fragP) { - static char const arm_noop[2][2][4] = + static unsigned char const arm_noop[2][2][4] = { { /* ARMv1 */ {0x00, 0x00, 0xa0, 0xe1}, /* LE */ @@ -21174,7 +21559,7 @@ arm_handle_align (fragS * fragP) {0xe3, 0x20, 0xf0, 0x00}, /* BE */ }, }; - static char const thumb_noop[2][2][2] = + static unsigned char const thumb_noop[2][2][2] = { { /* Thumb-1 */ {0xc0, 0x46}, /* LE */ @@ -21185,7 +21570,7 @@ arm_handle_align (fragS * fragP) {0xbf, 0x00} /* BE */ } }; - static char const wide_thumb_noop[2][4] = + static unsigned char const wide_thumb_noop[2][4] = { /* Wide Thumb-2 */ {0xaf, 0xf3, 0x00, 0x80}, /* LE */ {0xf3, 0xaf, 0x80, 0x00}, /* BE */ @@ -21193,8 +21578,8 @@ arm_handle_align (fragS * fragP) unsigned bytes, fix, noop_size; char * p; - const char * noop; - const char *narrow_noop = NULL; + const unsigned char * noop; + const unsigned char *narrow_noop = NULL; #ifdef OBJ_ELF enum mstate state; #endif @@ -21405,10 +21790,10 @@ add_unwind_opcode (valueT op, int length) { unwind.opcode_alloc += ARM_OPCODE_CHUNK_SIZE; if (unwind.opcodes) - unwind.opcodes = (unsigned char *) xrealloc (unwind.opcodes, - unwind.opcode_alloc); + unwind.opcodes = XRESIZEVEC (unsigned char, unwind.opcodes, + unwind.opcode_alloc); else - unwind.opcodes = (unsigned char *) xmalloc (unwind.opcode_alloc); + unwind.opcodes = XNEWVEC (unsigned char, unwind.opcode_alloc); } while (length > 0) { @@ -21512,10 +21897,7 @@ start_unwind_section (const segT text_seg, int idx) const char * prefix; const char * prefix_once; const char * group_name; - size_t prefix_len; - size_t text_len; char * sec_name; - size_t sec_name_len; int type; int flags; int linkonce; @@ -21544,13 +21926,7 @@ start_unwind_section (const segT text_seg, int idx) text_name += strlen (".gnu.linkonce.t."); } - prefix_len = strlen (prefix); - text_len = strlen (text_name); - sec_name_len = prefix_len + text_len; - sec_name = (char *) xmalloc (sec_name_len + 1); - memcpy (sec_name, prefix, prefix_len); - memcpy (sec_name + prefix_len, text_name, text_len); - sec_name[prefix_len + text_len] = '\0'; + sec_name = concat (prefix, text_name, (char *) NULL); flags = SHF_ALLOC; linkonce = 0; @@ -23091,7 +23467,20 @@ md_apply_fix (fixS * fixP, case BFD_RELOC_ARM_CP_OFF_IMM: case BFD_RELOC_ARM_T32_CP_OFF_IMM: - if (value < -1023 || value > 1023 || (value & 3)) + if (fixP->fx_r_type == BFD_RELOC_ARM_CP_OFF_IMM) + newval = md_chars_to_number (buf, INSN_SIZE); + else + newval = get_thumb32_insn (buf); + if ((newval & 0x0f200f00) == 0x0d000900) + { + /* This is a fp16 vstr/vldr. The immediate offset in the mnemonic + has permitted values that are multiples of 2, in the range 0 + to 510. */ + if (value < -510 || value > 510 || (value & 1)) + as_bad_where (fixP->fx_file, fixP->fx_line, + _("co-processor offset out of range")); + } + else if (value < -1023 || value > 1023 || (value & 3)) as_bad_where (fixP->fx_file, fixP->fx_line, _("co-processor offset out of range")); cp_off_common: @@ -23108,6 +23497,17 @@ md_apply_fix (fixS * fixP, else { newval &= 0xff7fff00; + if ((newval & 0x0f200f00) == 0x0d000900) + { + /* This is a fp16 vstr/vldr. + + It requires the immediate offset in the instruction is shifted + left by 1 to be a half-word offset. + + Here, left shift by 1 first, and later right shift by 2 + should get the right offset. */ + value <<= 1; + } newval |= (value >> 2) | (sign ? INDEX_UP : 0); } if (fixP->fx_r_type == BFD_RELOC_ARM_CP_OFF_IMM @@ -23633,9 +24033,9 @@ tc_gen_reloc (asection *section, fixS *fixp) arelent * reloc; bfd_reloc_code_real_type code; - reloc = (arelent *) xmalloc (sizeof (arelent)); + reloc = XNEW (arelent); - reloc->sym_ptr_ptr = (asymbol **) xmalloc (sizeof (asymbol *)); + reloc->sym_ptr_ptr = XNEW (asymbol *); *reloc->sym_ptr_ptr = symbol_get_bfdsym (fixp->fx_addsy); reloc->address = fixp->fx_frag->fr_address + fixp->fx_where; @@ -23833,7 +24233,7 @@ tc_gen_reloc (asection *section, fixS *fixp) default: { - char * type; + const char * type; switch (fixp->fx_r_type) { @@ -24282,8 +24682,8 @@ arm_adjust_symtab (void) /* If it's a .thumb_func, declare it as so, otherwise tag label as .code 16. */ if (THUMB_IS_FUNC (sym)) - elf_sym->internal_elf_sym.st_target_internal - = ST_BRANCH_TO_THUMB; + ARM_SET_SYM_BRANCH_TYPE (elf_sym->internal_elf_sym.st_target_internal, + ST_BRANCH_TO_THUMB); else if (EF_ARM_EABI_VERSION (meabi_flags) < EF_ARM_EABI_VER4) elf_sym->internal_elf_sym.st_info = ELF_ST_INFO (bind, STT_ARM_16BIT); @@ -24644,11 +25044,11 @@ size_t md_longopts_size = sizeof (md_longopts); struct arm_option_table { - char *option; /* Option name to match. */ - char *help; /* Help information. */ + const char *option; /* Option name to match. */ + const char *help; /* Help information. */ int *var; /* Variable to change. */ int value; /* What to change it to. */ - char *deprecated; /* If non-null, print this message. */ + const char *deprecated; /* If non-null, print this message. */ }; struct arm_option_table arm_opts[] = @@ -24681,10 +25081,10 @@ struct arm_option_table arm_opts[] = struct arm_legacy_option_table { - char *option; /* Option name to match. */ + const char *option; /* Option name to match. */ const arm_feature_set **var; /* Variable to change. */ const arm_feature_set value; /* What to change it to. */ - char *deprecated; /* If non-null, print this message. */ + const char *deprecated; /* If non-null, print this message. */ }; const struct arm_legacy_option_table arm_legacy_opts[] = @@ -24802,7 +25202,7 @@ const struct arm_legacy_option_table arm_legacy_opts[] = struct arm_cpu_option_table { - char *name; + const char *name; size_t name_len; const arm_feature_set value; /* For some CPUs we assume an FPU unless the user explicitly sets @@ -24940,6 +25340,9 @@ static const struct arm_cpu_option_table arm_cpus[] = ARM_CPU_OPT ("cortex-r7", ARM_ARCH_V7R_IDIV, FPU_ARCH_VFP_V3D16, "Cortex-R7"), + ARM_CPU_OPT ("cortex-r8", ARM_ARCH_V7R_IDIV, + FPU_ARCH_VFP_V3D16, + "Cortex-R8"), ARM_CPU_OPT ("cortex-m7", ARM_ARCH_V7EM, FPU_NONE, "Cortex-M7"), ARM_CPU_OPT ("cortex-m4", ARM_ARCH_V7EM, FPU_NONE, "Cortex-M4"), ARM_CPU_OPT ("cortex-m3", ARM_ARCH_V7M, FPU_NONE, "Cortex-M3"), @@ -24983,7 +25386,7 @@ static const struct arm_cpu_option_table arm_cpus[] = struct arm_arch_option_table { - char *name; + const char *name; size_t name_len; const arm_feature_set value; const arm_feature_set default_fpu; @@ -25054,16 +25457,20 @@ static const struct arm_arch_option_table arm_archs[] = /* ISA extensions in the co-processor and main instruction set space. */ struct arm_option_extension_value_table { - char *name; + const char *name; size_t name_len; const arm_feature_set merge_value; const arm_feature_set clear_value; - const arm_feature_set allowed_archs; + /* List of architectures for which an extension is available. ARM_ARCH_NONE + indicates that an extension is available for all architectures while + ARM_ANY marks an empty entry. */ + const arm_feature_set allowed_archs[2]; }; /* The following table must be in alphabetical order with a NULL last entry. */ -#define ARM_EXT_OPT(N, M, C, AA) { N, sizeof (N) - 1, M, C, AA } +#define ARM_EXT_OPT(N, M, C, AA) { N, sizeof (N) - 1, M, C, { AA, ARM_ANY } } +#define ARM_EXT_OPT2(N, M, C, AA1, AA2) { N, sizeof (N) - 1, M, C, {AA1, AA2} } static const struct arm_option_extension_value_table arm_extensions[] = { ARM_EXT_OPT ("crc", ARCH_CRC_ARMV8, ARM_FEATURE_COPROC (CRC_EXT_ARMV8), @@ -25071,52 +25478,58 @@ static const struct arm_option_extension_value_table arm_extensions[] = ARM_EXT_OPT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8, ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8), ARM_FEATURE_CORE_LOW (ARM_EXT_V8)), + ARM_EXT_OPT ("dsp", ARM_FEATURE_CORE_LOW (ARM_EXT_V5ExP | ARM_EXT_V6_DSP), + ARM_FEATURE_CORE_LOW (ARM_EXT_V5ExP | ARM_EXT_V6_DSP), + ARM_FEATURE_CORE (ARM_EXT_V7M, ARM_EXT2_V8M)), ARM_EXT_OPT ("fp", FPU_ARCH_VFP_ARMV8, ARM_FEATURE_COPROC (FPU_VFP_ARMV8), ARM_FEATURE_CORE_LOW (ARM_EXT_V8)), ARM_EXT_OPT ("fp16", ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST), ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST), ARM_ARCH_V8_2A), - ARM_EXT_OPT ("idiv", ARM_FEATURE_CORE_LOW (ARM_EXT_ADIV | ARM_EXT_DIV), + ARM_EXT_OPT2 ("idiv", ARM_FEATURE_CORE_LOW (ARM_EXT_ADIV | ARM_EXT_DIV), ARM_FEATURE_CORE_LOW (ARM_EXT_ADIV | ARM_EXT_DIV), - ARM_FEATURE_CORE_LOW (ARM_EXT_V7A | ARM_EXT_V7R)), + ARM_FEATURE_CORE_LOW (ARM_EXT_V7A), + ARM_FEATURE_CORE_LOW (ARM_EXT_V7R)), ARM_EXT_OPT ("iwmmxt",ARM_FEATURE_COPROC (ARM_CEXT_IWMMXT), - ARM_FEATURE_COPROC (ARM_CEXT_IWMMXT), ARM_ANY), + ARM_FEATURE_COPROC (ARM_CEXT_IWMMXT), ARM_ARCH_NONE), ARM_EXT_OPT ("iwmmxt2", ARM_FEATURE_COPROC (ARM_CEXT_IWMMXT2), - ARM_FEATURE_COPROC (ARM_CEXT_IWMMXT2), ARM_ANY), + ARM_FEATURE_COPROC (ARM_CEXT_IWMMXT2), ARM_ARCH_NONE), ARM_EXT_OPT ("maverick", ARM_FEATURE_COPROC (ARM_CEXT_MAVERICK), - ARM_FEATURE_COPROC (ARM_CEXT_MAVERICK), ARM_ANY), - ARM_EXT_OPT ("mp", ARM_FEATURE_CORE_LOW (ARM_EXT_MP), + ARM_FEATURE_COPROC (ARM_CEXT_MAVERICK), ARM_ARCH_NONE), + ARM_EXT_OPT2 ("mp", ARM_FEATURE_CORE_LOW (ARM_EXT_MP), ARM_FEATURE_CORE_LOW (ARM_EXT_MP), - ARM_FEATURE_CORE_LOW (ARM_EXT_V7A | ARM_EXT_V7R)), - ARM_EXT_OPT ("simd", FPU_ARCH_NEON_VFP_ARMV8, - ARM_FEATURE_COPROC (FPU_NEON_ARMV8), - ARM_FEATURE_CORE_LOW (ARM_EXT_V8)), + ARM_FEATURE_CORE_LOW (ARM_EXT_V7A), + ARM_FEATURE_CORE_LOW (ARM_EXT_V7R)), ARM_EXT_OPT ("os", ARM_FEATURE_CORE_LOW (ARM_EXT_OS), ARM_FEATURE_CORE_LOW (ARM_EXT_OS), ARM_FEATURE_CORE_LOW (ARM_EXT_V6M)), ARM_EXT_OPT ("pan", ARM_FEATURE_CORE_HIGH (ARM_EXT2_PAN), ARM_FEATURE (ARM_EXT_V8, ARM_EXT2_PAN, 0), ARM_FEATURE_CORE_LOW (ARM_EXT_V8)), - ARM_EXT_OPT ("sec", ARM_FEATURE_CORE_LOW (ARM_EXT_SEC), + ARM_EXT_OPT ("rdma", FPU_ARCH_NEON_VFP_ARMV8_1, + ARM_FEATURE_COPROC (FPU_NEON_ARMV8 | FPU_NEON_EXT_RDMA), + ARM_FEATURE_CORE_LOW (ARM_EXT_V8)), + ARM_EXT_OPT2 ("sec", ARM_FEATURE_CORE_LOW (ARM_EXT_SEC), ARM_FEATURE_CORE_LOW (ARM_EXT_SEC), - ARM_FEATURE_CORE_LOW (ARM_EXT_V6K | ARM_EXT_V7A)), + ARM_FEATURE_CORE_LOW (ARM_EXT_V6K), + ARM_FEATURE_CORE_LOW (ARM_EXT_V7A)), + ARM_EXT_OPT ("simd", FPU_ARCH_NEON_VFP_ARMV8, + ARM_FEATURE_COPROC (FPU_NEON_ARMV8), + ARM_FEATURE_CORE_LOW (ARM_EXT_V8)), ARM_EXT_OPT ("virt", ARM_FEATURE_CORE_LOW (ARM_EXT_VIRT | ARM_EXT_ADIV | ARM_EXT_DIV), ARM_FEATURE_CORE_LOW (ARM_EXT_VIRT), ARM_FEATURE_CORE_LOW (ARM_EXT_V7A)), - ARM_EXT_OPT ("rdma", FPU_ARCH_NEON_VFP_ARMV8, - ARM_FEATURE_COPROC (FPU_NEON_ARMV8 | FPU_NEON_EXT_RDMA), - ARM_FEATURE_CORE_LOW (ARM_EXT_V8)), ARM_EXT_OPT ("xscale",ARM_FEATURE_COPROC (ARM_CEXT_XSCALE), - ARM_FEATURE_COPROC (ARM_CEXT_XSCALE), ARM_ANY), - { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE, ARM_ARCH_NONE } + ARM_FEATURE_COPROC (ARM_CEXT_XSCALE), ARM_ARCH_NONE), + { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE, { ARM_ARCH_NONE, ARM_ARCH_NONE } } }; #undef ARM_EXT_OPT /* ISA floating-point and Advanced SIMD extensions. */ struct arm_option_fpu_value_table { - char *name; + const char *name; const arm_feature_set value; }; @@ -25172,7 +25585,7 @@ static const struct arm_option_fpu_value_table arm_fpus[] = struct arm_option_value_table { - char *name; + const char *name; long value; }; @@ -25197,17 +25610,16 @@ static const struct arm_option_value_table arm_eabis[] = struct arm_long_option_table { - char * option; /* Substring to match. */ - char * help; /* Help information. */ - int (* func) (char * subopt); /* Function to decode sub-option. */ - char * deprecated; /* If non-null, print this message. */ + const char * option; /* Substring to match. */ + const char * help; /* Help information. */ + int (* func) (const char * subopt); /* Function to decode sub-option. */ + const char * deprecated; /* If non-null, print this message. */ }; static bfd_boolean -arm_parse_extension (char *str, const arm_feature_set **opt_p) +arm_parse_extension (const char *str, const arm_feature_set **opt_p) { - arm_feature_set *ext_set = (arm_feature_set *) - xmalloc (sizeof (arm_feature_set)); + arm_feature_set *ext_set = XNEW (arm_feature_set); /* We insist on extensions being specified in alphabetical order, and with extensions being added before being removed. We achieve this by having @@ -25216,6 +25628,7 @@ arm_parse_extension (char *str, const arm_feature_set **opt_p) or removing it (0) and only allowing it to change in the order -1 -> 1 -> 0. */ const struct arm_option_extension_value_table * opt = NULL; + const arm_feature_set arm_any = ARM_ANY; int adding_value = -1; /* Copy the feature set, so that we can modify it. */ @@ -25224,7 +25637,7 @@ arm_parse_extension (char *str, const arm_feature_set **opt_p) while (str != NULL && *str != 0) { - char *ext; + const char *ext; size_t len; if (*str != '+') @@ -25280,8 +25693,18 @@ arm_parse_extension (char *str, const arm_feature_set **opt_p) for (; opt->name != NULL; opt++) if (opt->name_len == len && strncmp (opt->name, str, len) == 0) { + int i, nb_allowed_archs = + sizeof (opt->allowed_archs) / sizeof (opt->allowed_archs[0]); /* Check we can apply the extension to this architecture. */ - if (!ARM_CPU_HAS_FEATURE (*ext_set, opt->allowed_archs)) + for (i = 0; i < nb_allowed_archs; i++) + { + /* Empty entry. */ + if (ARM_FEATURE_EQUAL (opt->allowed_archs[i], arm_any)) + continue; + if (ARM_FSET_CPU_SUBSET (opt->allowed_archs[i], *ext_set)) + break; + } + if (i == nb_allowed_archs) { as_bad (_("extension does not apply to the base architecture")); return FALSE; @@ -25327,10 +25750,10 @@ arm_parse_extension (char *str, const arm_feature_set **opt_p) } static bfd_boolean -arm_parse_cpu (char *str) +arm_parse_cpu (const char *str) { const struct arm_cpu_option_table *opt; - char *ext = strchr (str, '+'); + const char *ext = strchr (str, '+'); size_t len; if (ext != NULL) @@ -25377,10 +25800,10 @@ arm_parse_cpu (char *str) } static bfd_boolean -arm_parse_arch (char *str) +arm_parse_arch (const char *str) { const struct arm_arch_option_table *opt; - char *ext = strchr (str, '+'); + const char *ext = strchr (str, '+'); size_t len; if (ext != NULL) @@ -25412,7 +25835,7 @@ arm_parse_arch (char *str) } static bfd_boolean -arm_parse_fpu (char * str) +arm_parse_fpu (const char * str) { const struct arm_option_fpu_value_table * opt; @@ -25428,7 +25851,7 @@ arm_parse_fpu (char * str) } static bfd_boolean -arm_parse_float_abi (char * str) +arm_parse_float_abi (const char * str) { const struct arm_option_value_table * opt; @@ -25445,7 +25868,7 @@ arm_parse_float_abi (char * str) #ifdef OBJ_ELF static bfd_boolean -arm_parse_eabi (char * str) +arm_parse_eabi (const char * str) { const struct arm_option_value_table *opt; @@ -25461,7 +25884,7 @@ arm_parse_eabi (char * str) #endif static bfd_boolean -arm_parse_it_mode (char * str) +arm_parse_it_mode (const char * str) { bfd_boolean ret = TRUE; @@ -25484,7 +25907,7 @@ arm_parse_it_mode (char * str) } static bfd_boolean -arm_ccs_mode (char * unused ATTRIBUTE_UNUSED) +arm_ccs_mode (const char * unused ATTRIBUTE_UNUSED) { codecomposer_syntax = TRUE; arm_comment_chars[0] = ';'; @@ -25514,7 +25937,7 @@ struct arm_long_option_table arm_long_opts[] = }; int -md_parse_option (int c, char * arg) +md_parse_option (int c, const char * arg) { struct arm_option_table *opt; const struct arm_legacy_option_table *fopt; @@ -25695,6 +26118,7 @@ aeabi_set_public_attributes (void) char profile; int virt_sec = 0; int fp16_optional = 0; + arm_feature_set arm_arch = ARM_ARCH_NONE; arm_feature_set flags; arm_feature_set tmp; arm_feature_set arm_arch_v8m_base = ARM_ARCH_V8M_BASE; @@ -25734,6 +26158,7 @@ aeabi_set_public_attributes (void) if (ARM_CPU_HAS_FEATURE (tmp, p->flags)) { arch = p->val; + arm_arch = p->flags; ARM_CLEAR_FEATURE (tmp, tmp, p->flags); } } @@ -25750,18 +26175,27 @@ aeabi_set_public_attributes (void) && !ARM_CPU_HAS_FEATURE (flags, arm_ext_v7a) && ARM_CPU_HAS_FEATURE (flags, arm_ext_v7m) && ARM_CPU_HAS_FEATURE (flags, arm_ext_v6_dsp)) - arch = TAG_CPU_ARCH_V7E_M; + { + arch = TAG_CPU_ARCH_V7E_M; + arm_arch = (arm_feature_set) ARM_ARCH_V7EM; + } ARM_CLEAR_FEATURE (tmp, flags, arm_arch_v8m_base); if (arch == TAG_CPU_ARCH_V8M_BASE && ARM_CPU_HAS_FEATURE (tmp, arm_arch_any)) - arch = TAG_CPU_ARCH_V8M_MAIN; + { + arch = TAG_CPU_ARCH_V8M_MAIN; + arm_arch = (arm_feature_set) ARM_ARCH_V8M_MAIN; + } /* In cpu_arch_ver ARMv8-A is before ARMv8-M for atomics to be detected as coming from ARMv8-A. However, since ARMv8-A has more instructions than ARMv8-M, -march=all must be detected as ARMv8-A. */ if (arch == TAG_CPU_ARCH_V8M_MAIN && ARM_FEATURE_CORE_EQUAL (selected_cpu, arm_arch_any)) - arch = TAG_CPU_ARCH_V8; + { + arch = TAG_CPU_ARCH_V8; + arm_arch = (arm_feature_set) ARM_ARCH_V8A; + } /* Tag_CPU_name. */ if (selected_cpu_name[0]) @@ -25787,7 +26221,7 @@ aeabi_set_public_attributes (void) if (ARM_CPU_HAS_FEATURE (flags, arm_ext_v7a) || ARM_CPU_HAS_FEATURE (flags, arm_ext_v8) || (ARM_CPU_HAS_FEATURE (flags, arm_ext_atomics) - && !ARM_CPU_HAS_FEATURE (flags, arm_ext_v8m))) + && !ARM_CPU_HAS_FEATURE (flags, arm_ext_v8m_m_only))) profile = 'A'; else if (ARM_CPU_HAS_FEATURE (flags, arm_ext_v7r)) profile = 'R'; @@ -25799,6 +26233,17 @@ aeabi_set_public_attributes (void) if (profile != '\0') aeabi_set_attribute_int (Tag_CPU_arch_profile, profile); + /* Tag_DSP_extension. */ + if (ARM_CPU_HAS_FEATURE (flags, arm_ext_dsp)) + { + arm_feature_set ext; + + /* DSP instructions not in architecture. */ + ARM_CLEAR_FEATURE (ext, flags, arm_arch); + if (ARM_CPU_HAS_FEATURE (ext, arm_ext_dsp)) + aeabi_set_attribute_int (Tag_DSP_extension, 1); + } + /* Tag_ARM_ISA_use. */ if (ARM_CPU_HAS_FEATURE (flags, arm_ext_v1) || arch == 0) @@ -25811,7 +26256,7 @@ aeabi_set_public_attributes (void) int thumb_isa_use; if (!ARM_CPU_HAS_FEATURE (flags, arm_ext_v8) - && ARM_CPU_HAS_FEATURE (flags, arm_ext_v8m)) + && ARM_CPU_HAS_FEATURE (flags, arm_ext_v8m_m_only)) thumb_isa_use = 3; else if (ARM_CPU_HAS_FEATURE (flags, arm_arch_t2)) thumb_isa_use = 2; @@ -25857,7 +26302,9 @@ aeabi_set_public_attributes (void) aeabi_set_attribute_int (Tag_WMMX_arch, 1); /* Tag_Advanced_SIMD_arch (formerly Tag_NEON_arch). */ - if (ARM_CPU_HAS_FEATURE (flags, fpu_neon_ext_armv8)) + if (ARM_CPU_HAS_FEATURE (flags, fpu_neon_ext_v8_1)) + aeabi_set_attribute_int (Tag_Advanced_SIMD_arch, 4); + else if (ARM_CPU_HAS_FEATURE (flags, fpu_neon_ext_armv8)) aeabi_set_attribute_int (Tag_Advanced_SIMD_arch, 3); else if (ARM_CPU_HAS_FEATURE (flags, fpu_neon_ext_v1)) { @@ -26035,6 +26482,7 @@ static void s_arm_arch_extension (int ignored ATTRIBUTE_UNUSED) { const struct arm_option_extension_value_table *opt; + const arm_feature_set arm_any = ARM_ANY; char saved_char; char *name; int adding_value = 1; @@ -26055,7 +26503,18 @@ s_arm_arch_extension (int ignored ATTRIBUTE_UNUSED) for (opt = arm_extensions; opt->name != NULL; opt++) if (streq (opt->name, name)) { - if (!ARM_CPU_HAS_FEATURE (*mcpu_cpu_opt, opt->allowed_archs)) + int i, nb_allowed_archs = + sizeof (opt->allowed_archs) / sizeof (opt->allowed_archs[i]); + for (i = 0; i < nb_allowed_archs; i++) + { + /* Empty entry. */ + if (ARM_FEATURE_EQUAL (opt->allowed_archs[i], arm_any)) + continue; + if (ARM_FSET_CPU_SUBSET (opt->allowed_archs[i], *mcpu_cpu_opt)) + break; + } + + if (i == nb_allowed_archs) { as_bad (_("architectural extension `%s' is not allowed for the " "current base architecture"), name); @@ -26180,6 +26639,7 @@ arm_convert_symbolic_attribute (const char *name) T (Tag_conformance), T (Tag_T2EE_use), T (Tag_Virtualization_use), + T (Tag_DSP_extension), /* We deliberately do not include Tag_MPextension_use_legacy. */ #undef T };