+do_t_tb (void)
+{
+ int half;
+
+ half = (inst.instruction & 0x10) != 0;
+ constraint (current_it_mask && current_it_mask != 0x10, BAD_BRANCH);
+ constraint (inst.operands[0].immisreg,
+ _("instruction requires register index"));
+ constraint (inst.operands[0].imm == 15,
+ _("PC is not a valid index register"));
+ constraint (!half && inst.operands[0].shifted,
+ _("instruction does not allow shifted index"));
+ inst.instruction |= (inst.operands[0].reg << 16) | inst.operands[0].imm;
+}
+
+static void
+do_t_usat (void)
+{
+ inst.instruction |= inst.operands[0].reg << 8;
+ inst.instruction |= inst.operands[1].imm;
+ inst.instruction |= inst.operands[2].reg << 16;
+
+ if (inst.operands[3].present)
+ {
+ constraint (inst.reloc.exp.X_op != O_constant,
+ _("expression too complex"));
+ if (inst.reloc.exp.X_add_number != 0)
+ {
+ if (inst.operands[3].shift_kind == SHIFT_ASR)
+ inst.instruction |= 0x00200000; /* sh bit */
+
+ inst.instruction |= (inst.reloc.exp.X_add_number & 0x1c) << 10;
+ inst.instruction |= (inst.reloc.exp.X_add_number & 0x03) << 6;
+ }
+ inst.reloc.type = BFD_RELOC_UNUSED;
+ }
+}
+
+static void
+do_t_usat16 (void)
+{
+ inst.instruction |= inst.operands[0].reg << 8;
+ inst.instruction |= inst.operands[1].imm;
+ inst.instruction |= inst.operands[2].reg << 16;
+}
+
+/* Neon instruction encoder helpers. */
+
+/* Encodings for the different types for various Neon opcodes. */
+
+/* An "invalid" code for the following tables. */
+#define N_INV -1u
+
+struct neon_tab_entry
+{
+ unsigned integer;
+ unsigned float_or_poly;
+ unsigned scalar_or_imm;
+};
+
+/* Map overloaded Neon opcodes to their respective encodings. */
+#define NEON_ENC_TAB \
+ X(vabd, 0x0000700, 0x1200d00, N_INV), \
+ X(vmax, 0x0000600, 0x0000f00, N_INV), \
+ X(vmin, 0x0000610, 0x0200f00, N_INV), \
+ X(vpadd, 0x0000b10, 0x1000d00, N_INV), \
+ X(vpmax, 0x0000a00, 0x1000f00, N_INV), \
+ X(vpmin, 0x0000a10, 0x1200f00, N_INV), \
+ X(vadd, 0x0000800, 0x0000d00, N_INV), \
+ X(vsub, 0x1000800, 0x0200d00, N_INV), \
+ X(vceq, 0x1000810, 0x0000e00, 0x1b10100), \
+ X(vcge, 0x0000310, 0x1000e00, 0x1b10080), \
+ X(vcgt, 0x0000300, 0x1200e00, 0x1b10000), \
+ /* Register variants of the following two instructions are encoded as
+ vcge / vcgt with the operands reversed. */ \
+ X(vclt, 0x0000300, 0x1200e00, 0x1b10200), \
+ X(vcle, 0x0000310, 0x1000e00, 0x1b10180), \
+ X(vmla, 0x0000900, 0x0000d10, 0x0800040), \
+ X(vmls, 0x1000900, 0x0200d10, 0x0800440), \
+ X(vmul, 0x0000910, 0x1000d10, 0x0800840), \
+ X(vmull, 0x0800c00, 0x0800e00, 0x0800a40), /* polynomial not float. */ \
+ X(vmlal, 0x0800800, N_INV, 0x0800240), \
+ X(vmlsl, 0x0800a00, N_INV, 0x0800640), \
+ X(vqdmlal, 0x0800900, N_INV, 0x0800340), \
+ X(vqdmlsl, 0x0800b00, N_INV, 0x0800740), \
+ X(vqdmull, 0x0800d00, N_INV, 0x0800b40), \
+ X(vqdmulh, 0x0000b00, N_INV, 0x0800c40), \
+ X(vqrdmulh, 0x1000b00, N_INV, 0x0800d40), \
+ X(vshl, 0x0000400, N_INV, 0x0800510), \
+ X(vqshl, 0x0000410, N_INV, 0x0800710), \
+ X(vand, 0x0000110, N_INV, 0x0800030), \
+ X(vbic, 0x0100110, N_INV, 0x0800030), \
+ X(veor, 0x1000110, N_INV, N_INV), \
+ X(vorn, 0x0300110, N_INV, 0x0800010), \
+ X(vorr, 0x0200110, N_INV, 0x0800010), \
+ X(vmvn, 0x1b00580, N_INV, 0x0800030), \
+ X(vshll, 0x1b20300, N_INV, 0x0800a10), /* max shift, immediate. */ \
+ X(vcvt, 0x1b30600, N_INV, 0x0800e10), /* integer, fixed-point. */ \
+ X(vdup, 0xe800b10, N_INV, 0x1b00c00), /* arm, scalar. */ \
+ X(vld1, 0x0200000, 0x0a00000, 0x0a00c00), /* interlv, lane, dup. */ \
+ X(vst1, 0x0000000, 0x0800000, N_INV), \
+ X(vld2, 0x0200100, 0x0a00100, 0x0a00d00), \
+ X(vst2, 0x0000100, 0x0800100, N_INV), \
+ X(vld3, 0x0200200, 0x0a00200, 0x0a00e00), \
+ X(vst3, 0x0000200, 0x0800200, N_INV), \
+ X(vld4, 0x0200300, 0x0a00300, 0x0a00f00), \
+ X(vst4, 0x0000300, 0x0800300, N_INV), \
+ X(vmovn, 0x1b20200, N_INV, N_INV), \
+ X(vtrn, 0x1b20080, N_INV, N_INV), \
+ X(vqmovn, 0x1b20200, N_INV, N_INV), \
+ X(vqmovun, 0x1b20240, N_INV, N_INV), \
+ X(vnmul, 0xe200a40, 0xe200b40, N_INV), \
+ X(vnmla, 0xe000a40, 0xe000b40, N_INV), \
+ X(vnmls, 0xe100a40, 0xe100b40, N_INV), \
+ X(vcmp, 0xeb40a40, 0xeb40b40, N_INV), \
+ X(vcmpz, 0xeb50a40, 0xeb50b40, N_INV), \
+ X(vcmpe, 0xeb40ac0, 0xeb40bc0, N_INV), \
+ X(vcmpez, 0xeb50ac0, 0xeb50bc0, N_INV)
+
+enum neon_opc
+{
+#define X(OPC,I,F,S) N_MNEM_##OPC
+NEON_ENC_TAB
+#undef X
+};
+
+static const struct neon_tab_entry neon_enc_tab[] =
+{
+#define X(OPC,I,F,S) { (I), (F), (S) }
+NEON_ENC_TAB
+#undef X
+};
+
+#define NEON_ENC_INTEGER(X) (neon_enc_tab[(X) & 0x0fffffff].integer)
+#define NEON_ENC_ARMREG(X) (neon_enc_tab[(X) & 0x0fffffff].integer)
+#define NEON_ENC_POLY(X) (neon_enc_tab[(X) & 0x0fffffff].float_or_poly)
+#define NEON_ENC_FLOAT(X) (neon_enc_tab[(X) & 0x0fffffff].float_or_poly)
+#define NEON_ENC_SCALAR(X) (neon_enc_tab[(X) & 0x0fffffff].scalar_or_imm)
+#define NEON_ENC_IMMED(X) (neon_enc_tab[(X) & 0x0fffffff].scalar_or_imm)
+#define NEON_ENC_INTERLV(X) (neon_enc_tab[(X) & 0x0fffffff].integer)
+#define NEON_ENC_LANE(X) (neon_enc_tab[(X) & 0x0fffffff].float_or_poly)
+#define NEON_ENC_DUP(X) (neon_enc_tab[(X) & 0x0fffffff].scalar_or_imm)
+#define NEON_ENC_SINGLE(X) \
+ ((neon_enc_tab[(X) & 0x0fffffff].integer) | ((X) & 0xf0000000))
+#define NEON_ENC_DOUBLE(X) \
+ ((neon_enc_tab[(X) & 0x0fffffff].float_or_poly) | ((X) & 0xf0000000))
+
+/* Define shapes for instruction operands. The following mnemonic characters
+ are used in this table:
+
+ F - VFP S<n> register
+ D - Neon D<n> register
+ Q - Neon Q<n> register
+ I - Immediate
+ S - Scalar
+ R - ARM register
+ L - D<n> register list
+
+ This table is used to generate various data:
+ - enumerations of the form NS_DDR to be used as arguments to
+ neon_select_shape.
+ - a table classifying shapes into single, double, quad, mixed.
+ - a table used to drive neon_select_shape.
+*/
+
+#define NEON_SHAPE_DEF \
+ X(3, (D, D, D), DOUBLE), \
+ X(3, (Q, Q, Q), QUAD), \
+ X(3, (D, D, I), DOUBLE), \
+ X(3, (Q, Q, I), QUAD), \
+ X(3, (D, D, S), DOUBLE), \
+ X(3, (Q, Q, S), QUAD), \
+ X(2, (D, D), DOUBLE), \
+ X(2, (Q, Q), QUAD), \
+ X(2, (D, S), DOUBLE), \
+ X(2, (Q, S), QUAD), \
+ X(2, (D, R), DOUBLE), \
+ X(2, (Q, R), QUAD), \
+ X(2, (D, I), DOUBLE), \
+ X(2, (Q, I), QUAD), \
+ X(3, (D, L, D), DOUBLE), \
+ X(2, (D, Q), MIXED), \
+ X(2, (Q, D), MIXED), \
+ X(3, (D, Q, I), MIXED), \
+ X(3, (Q, D, I), MIXED), \
+ X(3, (Q, D, D), MIXED), \
+ X(3, (D, Q, Q), MIXED), \
+ X(3, (Q, Q, D), MIXED), \
+ X(3, (Q, D, S), MIXED), \
+ X(3, (D, Q, S), MIXED), \
+ X(4, (D, D, D, I), DOUBLE), \
+ X(4, (Q, Q, Q, I), QUAD), \
+ X(2, (F, F), SINGLE), \
+ X(3, (F, F, F), SINGLE), \
+ X(2, (F, I), SINGLE), \
+ X(2, (F, D), MIXED), \
+ X(2, (D, F), MIXED), \
+ X(3, (F, F, I), MIXED), \
+ X(4, (R, R, F, F), SINGLE), \
+ X(4, (F, F, R, R), SINGLE), \
+ X(3, (D, R, R), DOUBLE), \
+ X(3, (R, R, D), DOUBLE), \
+ X(2, (S, R), SINGLE), \
+ X(2, (R, S), SINGLE), \
+ X(2, (F, R), SINGLE), \
+ X(2, (R, F), SINGLE)
+
+#define S2(A,B) NS_##A##B
+#define S3(A,B,C) NS_##A##B##C
+#define S4(A,B,C,D) NS_##A##B##C##D
+
+#define X(N, L, C) S##N L
+
+enum neon_shape
+{
+ NEON_SHAPE_DEF,
+ NS_NULL
+};
+
+#undef X
+#undef S2
+#undef S3
+#undef S4
+
+enum neon_shape_class
+{
+ SC_SINGLE,
+ SC_DOUBLE,
+ SC_QUAD,
+ SC_MIXED
+};
+
+#define X(N, L, C) SC_##C
+
+static enum neon_shape_class neon_shape_class[] =
+{
+ NEON_SHAPE_DEF
+};
+
+#undef X
+
+enum neon_shape_el
+{
+ SE_F,
+ SE_D,
+ SE_Q,
+ SE_I,
+ SE_S,
+ SE_R,
+ SE_L
+};
+
+/* Register widths of above. */
+static unsigned neon_shape_el_size[] =
+{
+ 32,
+ 64,
+ 128,
+ 0,
+ 32,
+ 32,
+ 0
+};
+
+struct neon_shape_info
+{
+ unsigned els;
+ enum neon_shape_el el[NEON_MAX_TYPE_ELS];
+};
+
+#define S2(A,B) { SE_##A, SE_##B }
+#define S3(A,B,C) { SE_##A, SE_##B, SE_##C }
+#define S4(A,B,C,D) { SE_##A, SE_##B, SE_##C, SE_##D }
+
+#define X(N, L, C) { N, S##N L }
+
+static struct neon_shape_info neon_shape_tab[] =
+{
+ NEON_SHAPE_DEF
+};
+
+#undef X
+#undef S2
+#undef S3
+#undef S4
+
+/* Bit masks used in type checking given instructions.
+ 'N_EQK' means the type must be the same as (or based on in some way) the key
+ type, which itself is marked with the 'N_KEY' bit. If the 'N_EQK' bit is
+ set, various other bits can be set as well in order to modify the meaning of
+ the type constraint. */
+
+enum neon_type_mask
+{
+ N_S8 = 0x000001,
+ N_S16 = 0x000002,
+ N_S32 = 0x000004,
+ N_S64 = 0x000008,
+ N_U8 = 0x000010,
+ N_U16 = 0x000020,
+ N_U32 = 0x000040,
+ N_U64 = 0x000080,
+ N_I8 = 0x000100,
+ N_I16 = 0x000200,
+ N_I32 = 0x000400,
+ N_I64 = 0x000800,
+ N_8 = 0x001000,
+ N_16 = 0x002000,
+ N_32 = 0x004000,
+ N_64 = 0x008000,
+ N_P8 = 0x010000,
+ N_P16 = 0x020000,
+ N_F32 = 0x040000,
+ N_F64 = 0x080000,
+ N_KEY = 0x100000, /* key element (main type specifier). */
+ N_EQK = 0x200000, /* given operand has the same type & size as the key. */
+ N_VFP = 0x400000, /* VFP mode: operand size must match register width. */
+ N_DBL = 0x000001, /* if N_EQK, this operand is twice the size. */
+ N_HLF = 0x000002, /* if N_EQK, this operand is half the size. */
+ N_SGN = 0x000004, /* if N_EQK, this operand is forced to be signed. */
+ N_UNS = 0x000008, /* if N_EQK, this operand is forced to be unsigned. */
+ N_INT = 0x000010, /* if N_EQK, this operand is forced to be integer. */
+ N_FLT = 0x000020, /* if N_EQK, this operand is forced to be float. */
+ N_SIZ = 0x000040, /* if N_EQK, this operand is forced to be size-only. */
+ N_UTYP = 0,
+ N_MAX_NONSPECIAL = N_F64
+};
+
+#define N_ALLMODS (N_DBL | N_HLF | N_SGN | N_UNS | N_INT | N_FLT | N_SIZ)
+
+#define N_SU_ALL (N_S8 | N_S16 | N_S32 | N_S64 | N_U8 | N_U16 | N_U32 | N_U64)
+#define N_SU_32 (N_S8 | N_S16 | N_S32 | N_U8 | N_U16 | N_U32)
+#define N_SU_16_64 (N_S16 | N_S32 | N_S64 | N_U16 | N_U32 | N_U64)
+#define N_SUF_32 (N_SU_32 | N_F32)
+#define N_I_ALL (N_I8 | N_I16 | N_I32 | N_I64)
+#define N_IF_32 (N_I8 | N_I16 | N_I32 | N_F32)
+
+/* Pass this as the first type argument to neon_check_type to ignore types
+ altogether. */
+#define N_IGNORE_TYPE (N_KEY | N_EQK)
+
+/* Select a "shape" for the current instruction (describing register types or
+ sizes) from a list of alternatives. Return NS_NULL if the current instruction
+ doesn't fit. For non-polymorphic shapes, checking is usually done as a
+ function of operand parsing, so this function doesn't need to be called.
+ Shapes should be listed in order of decreasing length. */
+
+static enum neon_shape
+neon_select_shape (enum neon_shape shape, ...)
+{
+ va_list ap;
+ enum neon_shape first_shape = shape;
+
+ /* Fix missing optional operands. FIXME: we don't know at this point how
+ many arguments we should have, so this makes the assumption that we have
+ > 1. This is true of all current Neon opcodes, I think, but may not be
+ true in the future. */
+ if (!inst.operands[1].present)
+ inst.operands[1] = inst.operands[0];
+
+ va_start (ap, shape);
+
+ for (; shape != NS_NULL; shape = va_arg (ap, int))
+ {
+ unsigned j;
+ int matches = 1;
+
+ for (j = 0; j < neon_shape_tab[shape].els; j++)
+ {
+ if (!inst.operands[j].present)
+ {
+ matches = 0;
+ break;
+ }
+
+ switch (neon_shape_tab[shape].el[j])
+ {
+ case SE_F:
+ if (!(inst.operands[j].isreg
+ && inst.operands[j].isvec
+ && inst.operands[j].issingle
+ && !inst.operands[j].isquad))
+ matches = 0;
+ break;
+
+ case SE_D:
+ if (!(inst.operands[j].isreg
+ && inst.operands[j].isvec
+ && !inst.operands[j].isquad
+ && !inst.operands[j].issingle))
+ matches = 0;
+ break;
+
+ case SE_R:
+ if (!(inst.operands[j].isreg
+ && !inst.operands[j].isvec))
+ matches = 0;
+ break;
+
+ case SE_Q:
+ if (!(inst.operands[j].isreg
+ && inst.operands[j].isvec
+ && inst.operands[j].isquad
+ && !inst.operands[j].issingle))
+ matches = 0;
+ break;
+
+ case SE_I:
+ if (!(!inst.operands[j].isreg
+ && !inst.operands[j].isscalar))
+ matches = 0;
+ break;
+
+ case SE_S:
+ if (!(!inst.operands[j].isreg
+ && inst.operands[j].isscalar))
+ matches = 0;
+ break;
+
+ case SE_L:
+ break;
+ }
+ }
+ if (matches)
+ break;
+ }
+
+ va_end (ap);
+
+ if (shape == NS_NULL && first_shape != NS_NULL)
+ first_error (_("invalid instruction shape"));
+
+ return shape;
+}
+
+/* True if SHAPE is predominantly a quadword operation (most of the time, this
+ means the Q bit should be set). */
+
+static int
+neon_quad (enum neon_shape shape)
+{
+ return neon_shape_class[shape] == SC_QUAD;
+}
+
+static void
+neon_modify_type_size (unsigned typebits, enum neon_el_type *g_type,
+ unsigned *g_size)
+{
+ /* Allow modification to be made to types which are constrained to be
+ based on the key element, based on bits set alongside N_EQK. */
+ if ((typebits & N_EQK) != 0)
+ {
+ if ((typebits & N_HLF) != 0)
+ *g_size /= 2;
+ else if ((typebits & N_DBL) != 0)
+ *g_size *= 2;
+ if ((typebits & N_SGN) != 0)
+ *g_type = NT_signed;
+ else if ((typebits & N_UNS) != 0)
+ *g_type = NT_unsigned;
+ else if ((typebits & N_INT) != 0)
+ *g_type = NT_integer;
+ else if ((typebits & N_FLT) != 0)
+ *g_type = NT_float;
+ else if ((typebits & N_SIZ) != 0)
+ *g_type = NT_untyped;
+ }
+}
+
+/* Return operand OPNO promoted by bits set in THISARG. KEY should be the "key"
+ operand type, i.e. the single type specified in a Neon instruction when it
+ is the only one given. */
+
+static struct neon_type_el
+neon_type_promote (struct neon_type_el *key, unsigned thisarg)
+{
+ struct neon_type_el dest = *key;
+
+ assert ((thisarg & N_EQK) != 0);
+
+ neon_modify_type_size (thisarg, &dest.type, &dest.size);
+
+ return dest;
+}
+
+/* Convert Neon type and size into compact bitmask representation. */
+
+static enum neon_type_mask
+type_chk_of_el_type (enum neon_el_type type, unsigned size)
+{
+ switch (type)
+ {
+ case NT_untyped:
+ switch (size)
+ {
+ case 8: return N_8;
+ case 16: return N_16;
+ case 32: return N_32;
+ case 64: return N_64;
+ default: ;
+ }
+ break;
+
+ case NT_integer:
+ switch (size)
+ {
+ case 8: return N_I8;
+ case 16: return N_I16;
+ case 32: return N_I32;
+ case 64: return N_I64;
+ default: ;
+ }
+ break;
+
+ case NT_float:
+ switch (size)
+ {
+ case 32: return N_F32;
+ case 64: return N_F64;
+ default: ;
+ }
+ break;
+
+ case NT_poly:
+ switch (size)
+ {
+ case 8: return N_P8;
+ case 16: return N_P16;
+ default: ;
+ }
+ break;
+
+ case NT_signed:
+ switch (size)
+ {
+ case 8: return N_S8;
+ case 16: return N_S16;
+ case 32: return N_S32;
+ case 64: return N_S64;
+ default: ;
+ }
+ break;
+
+ case NT_unsigned:
+ switch (size)
+ {
+ case 8: return N_U8;
+ case 16: return N_U16;
+ case 32: return N_U32;
+ case 64: return N_U64;
+ default: ;
+ }
+ break;
+
+ default: ;
+ }
+
+ return N_UTYP;
+}
+
+/* Convert compact Neon bitmask type representation to a type and size. Only
+ handles the case where a single bit is set in the mask. */
+
+static int
+el_type_of_type_chk (enum neon_el_type *type, unsigned *size,
+ enum neon_type_mask mask)
+{
+ if ((mask & N_EQK) != 0)
+ return FAIL;
+
+ if ((mask & (N_S8 | N_U8 | N_I8 | N_8 | N_P8)) != 0)
+ *size = 8;
+ else if ((mask & (N_S16 | N_U16 | N_I16 | N_16 | N_P16)) != 0)
+ *size = 16;
+ else if ((mask & (N_S32 | N_U32 | N_I32 | N_32 | N_F32)) != 0)
+ *size = 32;
+ else if ((mask & (N_S64 | N_U64 | N_I64 | N_64 | N_F64)) != 0)
+ *size = 64;
+ else
+ return FAIL;
+
+ if ((mask & (N_S8 | N_S16 | N_S32 | N_S64)) != 0)
+ *type = NT_signed;
+ else if ((mask & (N_U8 | N_U16 | N_U32 | N_U64)) != 0)
+ *type = NT_unsigned;
+ else if ((mask & (N_I8 | N_I16 | N_I32 | N_I64)) != 0)
+ *type = NT_integer;
+ else if ((mask & (N_8 | N_16 | N_32 | N_64)) != 0)
+ *type = NT_untyped;
+ else if ((mask & (N_P8 | N_P16)) != 0)
+ *type = NT_poly;
+ else if ((mask & (N_F32 | N_F64)) != 0)
+ *type = NT_float;
+ else
+ return FAIL;
+
+ return SUCCESS;
+}
+
+/* Modify a bitmask of allowed types. This is only needed for type
+ relaxation. */
+
+static unsigned
+modify_types_allowed (unsigned allowed, unsigned mods)
+{
+ unsigned size;
+ enum neon_el_type type;
+ unsigned destmask;
+ int i;
+
+ destmask = 0;
+
+ for (i = 1; i <= N_MAX_NONSPECIAL; i <<= 1)
+ {
+ if (el_type_of_type_chk (&type, &size, allowed & i) == SUCCESS)
+ {
+ neon_modify_type_size (mods, &type, &size);
+ destmask |= type_chk_of_el_type (type, size);
+ }
+ }
+
+ return destmask;
+}
+
+/* Check type and return type classification.
+ The manual states (paraphrase): If one datatype is given, it indicates the
+ type given in:
+ - the second operand, if there is one
+ - the operand, if there is no second operand
+ - the result, if there are no operands.
+ This isn't quite good enough though, so we use a concept of a "key" datatype
+ which is set on a per-instruction basis, which is the one which matters when
+ only one data type is written.
+ Note: this function has side-effects (e.g. filling in missing operands). All
+ Neon instructions should call it before performing bit encoding. */
+
+static struct neon_type_el
+neon_check_type (unsigned els, enum neon_shape ns, ...)
+{
+ va_list ap;
+ unsigned i, pass, key_el = 0;
+ unsigned types[NEON_MAX_TYPE_ELS];
+ enum neon_el_type k_type = NT_invtype;
+ unsigned k_size = -1u;
+ struct neon_type_el badtype = {NT_invtype, -1};
+ unsigned key_allowed = 0;
+
+ /* Optional registers in Neon instructions are always (not) in operand 1.
+ Fill in the missing operand here, if it was omitted. */
+ if (els > 1 && !inst.operands[1].present)
+ inst.operands[1] = inst.operands[0];
+
+ /* Suck up all the varargs. */
+ va_start (ap, ns);
+ for (i = 0; i < els; i++)
+ {
+ unsigned thisarg = va_arg (ap, unsigned);
+ if (thisarg == N_IGNORE_TYPE)
+ {
+ va_end (ap);
+ return badtype;
+ }
+ types[i] = thisarg;
+ if ((thisarg & N_KEY) != 0)
+ key_el = i;
+ }
+ va_end (ap);
+
+ if (inst.vectype.elems > 0)
+ for (i = 0; i < els; i++)
+ if (inst.operands[i].vectype.type != NT_invtype)
+ {
+ first_error (_("types specified in both the mnemonic and operands"));
+ return badtype;
+ }
+
+ /* Duplicate inst.vectype elements here as necessary.
+ FIXME: No idea if this is exactly the same as the ARM assembler,
+ particularly when an insn takes one register and one non-register
+ operand. */
+ if (inst.vectype.elems == 1 && els > 1)
+ {
+ unsigned j;
+ inst.vectype.elems = els;
+ inst.vectype.el[key_el] = inst.vectype.el[0];
+ for (j = 0; j < els; j++)
+ if (j != key_el)
+ inst.vectype.el[j] = neon_type_promote (&inst.vectype.el[key_el],
+ types[j]);
+ }
+ else if (inst.vectype.elems == 0 && els > 0)
+ {
+ unsigned j;
+ /* No types were given after the mnemonic, so look for types specified
+ after each operand. We allow some flexibility here; as long as the
+ "key" operand has a type, we can infer the others. */
+ for (j = 0; j < els; j++)
+ if (inst.operands[j].vectype.type != NT_invtype)
+ inst.vectype.el[j] = inst.operands[j].vectype;
+
+ if (inst.operands[key_el].vectype.type != NT_invtype)
+ {
+ for (j = 0; j < els; j++)
+ if (inst.operands[j].vectype.type == NT_invtype)
+ inst.vectype.el[j] = neon_type_promote (&inst.vectype.el[key_el],
+ types[j]);
+ }
+ else
+ {
+ first_error (_("operand types can't be inferred"));
+ return badtype;
+ }
+ }
+ else if (inst.vectype.elems != els)
+ {
+ first_error (_("type specifier has the wrong number of parts"));
+ return badtype;
+ }
+
+ for (pass = 0; pass < 2; pass++)
+ {
+ for (i = 0; i < els; i++)
+ {
+ unsigned thisarg = types[i];
+ unsigned types_allowed = ((thisarg & N_EQK) != 0 && pass != 0)
+ ? modify_types_allowed (key_allowed, thisarg) : thisarg;
+ enum neon_el_type g_type = inst.vectype.el[i].type;
+ unsigned g_size = inst.vectype.el[i].size;
+
+ /* Decay more-specific signed & unsigned types to sign-insensitive
+ integer types if sign-specific variants are unavailable. */
+ if ((g_type == NT_signed || g_type == NT_unsigned)
+ && (types_allowed & N_SU_ALL) == 0)
+ g_type = NT_integer;
+
+ /* If only untyped args are allowed, decay any more specific types to
+ them. Some instructions only care about signs for some element
+ sizes, so handle that properly. */
+ if ((g_size == 8 && (types_allowed & N_8) != 0)
+ || (g_size == 16 && (types_allowed & N_16) != 0)
+ || (g_size == 32 && (types_allowed & N_32) != 0)
+ || (g_size == 64 && (types_allowed & N_64) != 0))
+ g_type = NT_untyped;
+
+ if (pass == 0)
+ {
+ if ((thisarg & N_KEY) != 0)
+ {
+ k_type = g_type;
+ k_size = g_size;
+ key_allowed = thisarg & ~N_KEY;
+ }
+ }
+ else
+ {
+ if ((thisarg & N_VFP) != 0)
+ {
+ enum neon_shape_el regshape = neon_shape_tab[ns].el[i];
+ unsigned regwidth = neon_shape_el_size[regshape], match;
+
+ /* In VFP mode, operands must match register widths. If we
+ have a key operand, use its width, else use the width of
+ the current operand. */
+ if (k_size != -1u)
+ match = k_size;
+ else
+ match = g_size;
+
+ if (regwidth != match)
+ {
+ first_error (_("operand size must match register width"));
+ return badtype;
+ }
+ }
+
+ if ((thisarg & N_EQK) == 0)
+ {
+ unsigned given_type = type_chk_of_el_type (g_type, g_size);
+
+ if ((given_type & types_allowed) == 0)
+ {
+ first_error (_("bad type in Neon instruction"));
+ return badtype;
+ }
+ }
+ else
+ {
+ enum neon_el_type mod_k_type = k_type;
+ unsigned mod_k_size = k_size;
+ neon_modify_type_size (thisarg, &mod_k_type, &mod_k_size);
+ if (g_type != mod_k_type || g_size != mod_k_size)
+ {
+ first_error (_("inconsistent types in Neon instruction"));
+ return badtype;
+ }
+ }
+ }
+ }
+ }
+
+ return inst.vectype.el[key_el];
+}
+
+/* Neon-style VFP instruction forwarding. */
+
+/* Thumb VFP instructions have 0xE in the condition field. */
+
+static void
+do_vfp_cond_or_thumb (void)
+{
+ if (thumb_mode)
+ inst.instruction |= 0xe0000000;
+ else
+ inst.instruction |= inst.cond << 28;
+}
+
+/* Look up and encode a simple mnemonic, for use as a helper function for the
+ Neon-style VFP syntax. This avoids duplication of bits of the insns table,
+ etc. It is assumed that operand parsing has already been done, and that the
+ operands are in the form expected by the given opcode (this isn't necessarily
+ the same as the form in which they were parsed, hence some massaging must
+ take place before this function is called).
+ Checks current arch version against that in the looked-up opcode. */
+
+static void
+do_vfp_nsyn_opcode (const char *opname)
+{
+ const struct asm_opcode *opcode;
+
+ opcode = hash_find (arm_ops_hsh, opname);
+
+ if (!opcode)
+ abort ();
+
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant,
+ thumb_mode ? *opcode->tvariant : *opcode->avariant),
+ _(BAD_FPU));
+
+ if (thumb_mode)
+ {
+ inst.instruction = opcode->tvalue;
+ opcode->tencode ();
+ }
+ else
+ {
+ inst.instruction = (inst.cond << 28) | opcode->avalue;
+ opcode->aencode ();
+ }
+}
+
+static void
+do_vfp_nsyn_add_sub (enum neon_shape rs)
+{
+ int is_add = (inst.instruction & 0x0fffffff) == N_MNEM_vadd;
+
+ if (rs == NS_FFF)
+ {
+ if (is_add)
+ do_vfp_nsyn_opcode ("fadds");
+ else
+ do_vfp_nsyn_opcode ("fsubs");
+ }
+ else
+ {
+ if (is_add)
+ do_vfp_nsyn_opcode ("faddd");
+ else
+ do_vfp_nsyn_opcode ("fsubd");
+ }
+}
+
+/* Check operand types to see if this is a VFP instruction, and if so call
+ PFN (). */
+
+static int
+try_vfp_nsyn (int args, void (*pfn) (enum neon_shape))
+{
+ enum neon_shape rs;
+ struct neon_type_el et;
+
+ switch (args)
+ {
+ case 2:
+ rs = neon_select_shape (NS_FF, NS_DD, NS_NULL);
+ et = neon_check_type (2, rs,
+ N_EQK | N_VFP, N_F32 | N_F64 | N_KEY | N_VFP);
+ break;
+
+ case 3:
+ rs = neon_select_shape (NS_FFF, NS_DDD, NS_NULL);
+ et = neon_check_type (3, rs,
+ N_EQK | N_VFP, N_EQK | N_VFP, N_F32 | N_F64 | N_KEY | N_VFP);
+ break;
+
+ default:
+ abort ();
+ }
+
+ if (et.type != NT_invtype)
+ {
+ pfn (rs);
+ return SUCCESS;
+ }
+ else
+ inst.error = NULL;
+
+ return FAIL;
+}
+
+static void
+do_vfp_nsyn_mla_mls (enum neon_shape rs)
+{
+ int is_mla = (inst.instruction & 0x0fffffff) == N_MNEM_vmla;
+
+ if (rs == NS_FFF)
+ {
+ if (is_mla)
+ do_vfp_nsyn_opcode ("fmacs");
+ else
+ do_vfp_nsyn_opcode ("fmscs");
+ }
+ else
+ {
+ if (is_mla)
+ do_vfp_nsyn_opcode ("fmacd");
+ else
+ do_vfp_nsyn_opcode ("fmscd");
+ }
+}
+
+static void
+do_vfp_nsyn_mul (enum neon_shape rs)
+{
+ if (rs == NS_FFF)
+ do_vfp_nsyn_opcode ("fmuls");
+ else
+ do_vfp_nsyn_opcode ("fmuld");
+}
+
+static void
+do_vfp_nsyn_abs_neg (enum neon_shape rs)
+{
+ int is_neg = (inst.instruction & 0x80) != 0;
+ neon_check_type (2, rs, N_EQK | N_VFP, N_F32 | N_F64 | N_VFP | N_KEY);
+
+ if (rs == NS_FF)
+ {
+ if (is_neg)
+ do_vfp_nsyn_opcode ("fnegs");
+ else
+ do_vfp_nsyn_opcode ("fabss");
+ }
+ else
+ {
+ if (is_neg)
+ do_vfp_nsyn_opcode ("fnegd");
+ else
+ do_vfp_nsyn_opcode ("fabsd");
+ }
+}
+
+/* Encode single-precision (only!) VFP fldm/fstm instructions. Double precision
+ insns belong to Neon, and are handled elsewhere. */
+
+static void
+do_vfp_nsyn_ldm_stm (int is_dbmode)
+{
+ int is_ldm = (inst.instruction & (1 << 20)) != 0;
+ if (is_ldm)
+ {
+ if (is_dbmode)
+ do_vfp_nsyn_opcode ("fldmdbs");
+ else
+ do_vfp_nsyn_opcode ("fldmias");
+ }
+ else
+ {
+ if (is_dbmode)
+ do_vfp_nsyn_opcode ("fstmdbs");
+ else
+ do_vfp_nsyn_opcode ("fstmias");
+ }
+}
+
+static void
+do_vfp_nsyn_sqrt (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_FF, NS_DD, NS_NULL);
+ neon_check_type (2, rs, N_EQK | N_VFP, N_F32 | N_F64 | N_KEY | N_VFP);
+
+ if (rs == NS_FF)
+ do_vfp_nsyn_opcode ("fsqrts");
+ else
+ do_vfp_nsyn_opcode ("fsqrtd");
+}
+
+static void
+do_vfp_nsyn_div (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_FFF, NS_DDD, NS_NULL);
+ neon_check_type (3, rs, N_EQK | N_VFP, N_EQK | N_VFP,
+ N_F32 | N_F64 | N_KEY | N_VFP);
+
+ if (rs == NS_FFF)
+ do_vfp_nsyn_opcode ("fdivs");
+ else
+ do_vfp_nsyn_opcode ("fdivd");
+}
+
+static void
+do_vfp_nsyn_nmul (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_FFF, NS_DDD, NS_NULL);
+ neon_check_type (3, rs, N_EQK | N_VFP, N_EQK | N_VFP,
+ N_F32 | N_F64 | N_KEY | N_VFP);
+
+ if (rs == NS_FFF)
+ {
+ inst.instruction = NEON_ENC_SINGLE (inst.instruction);
+ do_vfp_sp_dyadic ();
+ }
+ else
+ {
+ inst.instruction = NEON_ENC_DOUBLE (inst.instruction);
+ do_vfp_dp_rd_rn_rm ();
+ }
+ do_vfp_cond_or_thumb ();
+}
+
+static void
+do_vfp_nsyn_cmp (void)
+{
+ if (inst.operands[1].isreg)
+ {
+ enum neon_shape rs = neon_select_shape (NS_FF, NS_DD, NS_NULL);
+ neon_check_type (2, rs, N_EQK | N_VFP, N_F32 | N_F64 | N_KEY | N_VFP);
+
+ if (rs == NS_FF)
+ {
+ inst.instruction = NEON_ENC_SINGLE (inst.instruction);
+ do_vfp_sp_monadic ();
+ }
+ else
+ {
+ inst.instruction = NEON_ENC_DOUBLE (inst.instruction);
+ do_vfp_dp_rd_rm ();
+ }
+ }
+ else
+ {
+ enum neon_shape rs = neon_select_shape (NS_FI, NS_DI, NS_NULL);
+ neon_check_type (2, rs, N_F32 | N_F64 | N_KEY | N_VFP, N_EQK);
+
+ switch (inst.instruction & 0x0fffffff)
+ {
+ case N_MNEM_vcmp:
+ inst.instruction += N_MNEM_vcmpz - N_MNEM_vcmp;
+ break;
+ case N_MNEM_vcmpe:
+ inst.instruction += N_MNEM_vcmpez - N_MNEM_vcmpe;
+ break;
+ default:
+ abort ();
+ }
+
+ if (rs == NS_FI)
+ {
+ inst.instruction = NEON_ENC_SINGLE (inst.instruction);
+ do_vfp_sp_compare_z ();
+ }
+ else
+ {
+ inst.instruction = NEON_ENC_DOUBLE (inst.instruction);
+ do_vfp_dp_rd ();
+ }
+ }
+ do_vfp_cond_or_thumb ();
+}
+
+static void
+nsyn_insert_sp (void)
+{
+ inst.operands[1] = inst.operands[0];
+ memset (&inst.operands[0], '\0', sizeof (inst.operands[0]));
+ inst.operands[0].reg = 13;
+ inst.operands[0].isreg = 1;
+ inst.operands[0].writeback = 1;
+ inst.operands[0].present = 1;
+}
+
+static void
+do_vfp_nsyn_push (void)
+{
+ nsyn_insert_sp ();
+ if (inst.operands[1].issingle)
+ do_vfp_nsyn_opcode ("fstmdbs");
+ else
+ do_vfp_nsyn_opcode ("fstmdbd");
+}
+
+static void
+do_vfp_nsyn_pop (void)
+{
+ nsyn_insert_sp ();
+ if (inst.operands[1].issingle)
+ do_vfp_nsyn_opcode ("fldmias");
+ else
+ do_vfp_nsyn_opcode ("fldmiad");
+}
+
+/* Fix up Neon data-processing instructions, ORing in the correct bits for
+ ARM mode or Thumb mode and moving the encoded bit 24 to bit 28. */
+
+static unsigned
+neon_dp_fixup (unsigned i)
+{
+ if (thumb_mode)
+ {
+ /* The U bit is at bit 24 by default. Move to bit 28 in Thumb mode. */
+ if (i & (1 << 24))
+ i |= 1 << 28;
+
+ i &= ~(1 << 24);
+
+ i |= 0xef000000;
+ }
+ else
+ i |= 0xf2000000;
+
+ return i;
+}
+
+/* Turn a size (8, 16, 32, 64) into the respective bit number minus 3
+ (0, 1, 2, 3). */
+
+static unsigned
+neon_logbits (unsigned x)
+{
+ return ffs (x) - 4;
+}
+
+#define LOW4(R) ((R) & 0xf)
+#define HI1(R) (((R) >> 4) & 1)
+
+/* Encode insns with bit pattern:
+
+ |28/24|23|22 |21 20|19 16|15 12|11 8|7|6|5|4|3 0|
+ | U |x |D |size | Rn | Rd |x x x x|N|Q|M|x| Rm |
+
+ SIZE is passed in bits. -1 means size field isn't changed, in case it has a
+ different meaning for some instruction. */
+
+static void
+neon_three_same (int isquad, int ubit, int size)
+{
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+ inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+ inst.instruction |= LOW4 (inst.operands[2].reg);
+ inst.instruction |= HI1 (inst.operands[2].reg) << 5;
+ inst.instruction |= (isquad != 0) << 6;
+ inst.instruction |= (ubit != 0) << 24;
+ if (size != -1)
+ inst.instruction |= neon_logbits (size) << 20;
+
+ inst.instruction = neon_dp_fixup (inst.instruction);
+}
+
+/* Encode instructions of the form:
+
+ |28/24|23|22|21 20|19 18|17 16|15 12|11 7|6|5|4|3 0|
+ | U |x |D |x x |size |x x | Rd |x x x x x|Q|M|x| Rm |
+
+ Don't write size if SIZE == -1. */
+
+static void
+neon_two_same (int qbit, int ubit, int size)
+{
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg);
+ inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+ inst.instruction |= (qbit != 0) << 6;
+ inst.instruction |= (ubit != 0) << 24;
+
+ if (size != -1)
+ inst.instruction |= neon_logbits (size) << 18;
+
+ inst.instruction = neon_dp_fixup (inst.instruction);
+}
+
+/* Neon instruction encoders, in approximate order of appearance. */
+
+static void
+do_neon_dyadic_i_su (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+ struct neon_type_el et = neon_check_type (3, rs,
+ N_EQK, N_EQK, N_SU_32 | N_KEY);
+ neon_three_same (neon_quad (rs), et.type == NT_unsigned, et.size);
+}
+
+static void
+do_neon_dyadic_i64_su (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+ struct neon_type_el et = neon_check_type (3, rs,
+ N_EQK, N_EQK, N_SU_ALL | N_KEY);
+ neon_three_same (neon_quad (rs), et.type == NT_unsigned, et.size);
+}
+
+static void
+neon_imm_shift (int write_ubit, int uval, int isquad, struct neon_type_el et,
+ unsigned immbits)
+{
+ unsigned size = et.size >> 3;
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg);
+ inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+ inst.instruction |= (isquad != 0) << 6;
+ inst.instruction |= immbits << 16;
+ inst.instruction |= (size >> 3) << 7;
+ inst.instruction |= (size & 0x7) << 19;
+ if (write_ubit)
+ inst.instruction |= (uval != 0) << 24;
+
+ inst.instruction = neon_dp_fixup (inst.instruction);
+}
+
+static void
+do_neon_shl_imm (void)
+{
+ if (!inst.operands[2].isreg)
+ {
+ enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
+ struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_KEY | N_I_ALL);
+ inst.instruction = NEON_ENC_IMMED (inst.instruction);
+ neon_imm_shift (FALSE, 0, neon_quad (rs), et, inst.operands[2].imm);
+ }
+ else
+ {
+ enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+ struct neon_type_el et = neon_check_type (3, rs,
+ N_EQK, N_SU_ALL | N_KEY, N_EQK | N_SGN);
+ unsigned int tmp;
+
+ /* VSHL/VQSHL 3-register variants have syntax such as:
+ vshl.xx Dd, Dm, Dn
+ whereas other 3-register operations encoded by neon_three_same have
+ syntax like:
+ vadd.xx Dd, Dn, Dm
+ (i.e. with Dn & Dm reversed). Swap operands[1].reg and operands[2].reg
+ here. */
+ tmp = inst.operands[2].reg;
+ inst.operands[2].reg = inst.operands[1].reg;
+ inst.operands[1].reg = tmp;
+ inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+ neon_three_same (neon_quad (rs), et.type == NT_unsigned, et.size);
+ }
+}
+
+static void
+do_neon_qshl_imm (void)
+{
+ if (!inst.operands[2].isreg)
+ {
+ enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
+ struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_SU_ALL | N_KEY);
+
+ inst.instruction = NEON_ENC_IMMED (inst.instruction);
+ neon_imm_shift (TRUE, et.type == NT_unsigned, neon_quad (rs), et,
+ inst.operands[2].imm);
+ }
+ else
+ {
+ enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+ struct neon_type_el et = neon_check_type (3, rs,
+ N_EQK, N_SU_ALL | N_KEY, N_EQK | N_SGN);
+ unsigned int tmp;
+
+ /* See note in do_neon_shl_imm. */
+ tmp = inst.operands[2].reg;
+ inst.operands[2].reg = inst.operands[1].reg;
+ inst.operands[1].reg = tmp;
+ inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+ neon_three_same (neon_quad (rs), et.type == NT_unsigned, et.size);
+ }
+}
+
+static void
+do_neon_rshl (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+ struct neon_type_el et = neon_check_type (3, rs,
+ N_EQK, N_EQK, N_SU_ALL | N_KEY);
+ unsigned int tmp;
+
+ tmp = inst.operands[2].reg;
+ inst.operands[2].reg = inst.operands[1].reg;
+ inst.operands[1].reg = tmp;
+ neon_three_same (neon_quad (rs), et.type == NT_unsigned, et.size);
+}
+
+static int
+neon_cmode_for_logic_imm (unsigned immediate, unsigned *immbits, int size)
+{
+ /* Handle .I8 pseudo-instructions. */
+ if (size == 8)
+ {
+ /* Unfortunately, this will make everything apart from zero out-of-range.
+ FIXME is this the intended semantics? There doesn't seem much point in
+ accepting .I8 if so. */
+ immediate |= immediate << 8;
+ size = 16;
+ }
+
+ if (size >= 32)
+ {
+ if (immediate == (immediate & 0x000000ff))
+ {
+ *immbits = immediate;
+ return 0x1;
+ }
+ else if (immediate == (immediate & 0x0000ff00))
+ {
+ *immbits = immediate >> 8;
+ return 0x3;
+ }
+ else if (immediate == (immediate & 0x00ff0000))
+ {
+ *immbits = immediate >> 16;
+ return 0x5;
+ }
+ else if (immediate == (immediate & 0xff000000))
+ {
+ *immbits = immediate >> 24;
+ return 0x7;
+ }
+ if ((immediate & 0xffff) != (immediate >> 16))
+ goto bad_immediate;
+ immediate &= 0xffff;
+ }
+
+ if (immediate == (immediate & 0x000000ff))
+ {
+ *immbits = immediate;
+ return 0x9;
+ }
+ else if (immediate == (immediate & 0x0000ff00))
+ {
+ *immbits = immediate >> 8;
+ return 0xb;
+ }
+
+ bad_immediate:
+ first_error (_("immediate value out of range"));
+ return FAIL;
+}
+
+/* True if IMM has form 0bAAAAAAAABBBBBBBBCCCCCCCCDDDDDDDD for bits
+ A, B, C, D. */
+
+static int
+neon_bits_same_in_bytes (unsigned imm)
+{
+ return ((imm & 0x000000ff) == 0 || (imm & 0x000000ff) == 0x000000ff)
+ && ((imm & 0x0000ff00) == 0 || (imm & 0x0000ff00) == 0x0000ff00)
+ && ((imm & 0x00ff0000) == 0 || (imm & 0x00ff0000) == 0x00ff0000)
+ && ((imm & 0xff000000) == 0 || (imm & 0xff000000) == 0xff000000);
+}
+
+/* For immediate of above form, return 0bABCD. */
+
+static unsigned
+neon_squash_bits (unsigned imm)
+{
+ return (imm & 0x01) | ((imm & 0x0100) >> 7) | ((imm & 0x010000) >> 14)
+ | ((imm & 0x01000000) >> 21);
+}
+
+/* Compress quarter-float representation to 0b...000 abcdefgh. */
+
+static unsigned
+neon_qfloat_bits (unsigned imm)
+{
+ return ((imm >> 19) & 0x7f) | ((imm >> 24) & 0x80);
+}
+
+/* Returns CMODE. IMMBITS [7:0] is set to bits suitable for inserting into
+ the instruction. *OP is passed as the initial value of the op field, and
+ may be set to a different value depending on the constant (i.e.
+ "MOV I64, 0bAAAAAAAABBBB..." which uses OP = 1 despite being MOV not
+ MVN). If the immediate looks like a repeated parttern then also
+ try smaller element sizes. */
+
+static int
+neon_cmode_for_move_imm (unsigned immlo, unsigned immhi, int float_p,
+ unsigned *immbits, int *op, int size,
+ enum neon_el_type type)
+{
+ /* Only permit float immediates (including 0.0/-0.0) if the operand type is
+ float. */
+ if (type == NT_float && !float_p)
+ return FAIL;
+
+ if (type == NT_float && is_quarter_float (immlo) && immhi == 0)
+ {
+ if (size != 32 || *op == 1)
+ return FAIL;
+ *immbits = neon_qfloat_bits (immlo);
+ return 0xf;
+ }
+
+ if (size == 64)
+ {
+ if (neon_bits_same_in_bytes (immhi)
+ && neon_bits_same_in_bytes (immlo))
+ {
+ if (*op == 1)
+ return FAIL;
+ *immbits = (neon_squash_bits (immhi) << 4)
+ | neon_squash_bits (immlo);
+ *op = 1;
+ return 0xe;
+ }
+
+ if (immhi != immlo)
+ return FAIL;
+ }
+
+ if (size >= 32)
+ {
+ if (immlo == (immlo & 0x000000ff))
+ {
+ *immbits = immlo;
+ return 0x0;
+ }
+ else if (immlo == (immlo & 0x0000ff00))
+ {
+ *immbits = immlo >> 8;
+ return 0x2;
+ }
+ else if (immlo == (immlo & 0x00ff0000))
+ {
+ *immbits = immlo >> 16;
+ return 0x4;
+ }
+ else if (immlo == (immlo & 0xff000000))
+ {
+ *immbits = immlo >> 24;
+ return 0x6;
+ }
+ else if (immlo == ((immlo & 0x0000ff00) | 0x000000ff))
+ {
+ *immbits = (immlo >> 8) & 0xff;
+ return 0xc;
+ }
+ else if (immlo == ((immlo & 0x00ff0000) | 0x0000ffff))
+ {
+ *immbits = (immlo >> 16) & 0xff;
+ return 0xd;
+ }
+
+ if ((immlo & 0xffff) != (immlo >> 16))
+ return FAIL;
+ immlo &= 0xffff;
+ }
+
+ if (size >= 16)
+ {
+ if (immlo == (immlo & 0x000000ff))
+ {
+ *immbits = immlo;
+ return 0x8;
+ }
+ else if (immlo == (immlo & 0x0000ff00))
+ {
+ *immbits = immlo >> 8;
+ return 0xa;
+ }
+
+ if ((immlo & 0xff) != (immlo >> 8))
+ return FAIL;
+ immlo &= 0xff;
+ }
+
+ if (immlo == (immlo & 0x000000ff))
+ {
+ /* Don't allow MVN with 8-bit immediate. */
+ if (*op == 1)
+ return FAIL;
+ *immbits = immlo;
+ return 0xe;
+ }
+
+ return FAIL;
+}
+
+/* Write immediate bits [7:0] to the following locations:
+
+ |28/24|23 19|18 16|15 4|3 0|
+ | a |x x x x x|b c d|x x x x x x x x x x x x|e f g h|
+
+ This function is used by VMOV/VMVN/VORR/VBIC. */
+
+static void
+neon_write_immbits (unsigned immbits)
+{
+ inst.instruction |= immbits & 0xf;
+ inst.instruction |= ((immbits >> 4) & 0x7) << 16;
+ inst.instruction |= ((immbits >> 7) & 0x1) << 24;
+}
+
+/* Invert low-order SIZE bits of XHI:XLO. */
+
+static void
+neon_invert_size (unsigned *xlo, unsigned *xhi, int size)
+{
+ unsigned immlo = xlo ? *xlo : 0;
+ unsigned immhi = xhi ? *xhi : 0;
+
+ switch (size)
+ {
+ case 8:
+ immlo = (~immlo) & 0xff;
+ break;
+
+ case 16:
+ immlo = (~immlo) & 0xffff;
+ break;
+
+ case 64:
+ immhi = (~immhi) & 0xffffffff;
+ /* fall through. */
+
+ case 32:
+ immlo = (~immlo) & 0xffffffff;
+ break;
+
+ default:
+ abort ();
+ }
+
+ if (xlo)
+ *xlo = immlo;
+
+ if (xhi)
+ *xhi = immhi;
+}
+
+static void
+do_neon_logic (void)
+{
+ if (inst.operands[2].present && inst.operands[2].isreg)
+ {
+ enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+ neon_check_type (3, rs, N_IGNORE_TYPE);
+ /* U bit and size field were set as part of the bitmask. */
+ inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+ neon_three_same (neon_quad (rs), 0, -1);
+ }
+ else
+ {
+ enum neon_shape rs = neon_select_shape (NS_DI, NS_QI, NS_NULL);
+ struct neon_type_el et = neon_check_type (2, rs,
+ N_I8 | N_I16 | N_I32 | N_I64 | N_F32 | N_KEY, N_EQK);
+ enum neon_opc opcode = inst.instruction & 0x0fffffff;
+ unsigned immbits;
+ int cmode;
+
+ if (et.type == NT_invtype)
+ return;
+
+ inst.instruction = NEON_ENC_IMMED (inst.instruction);
+
+ immbits = inst.operands[1].imm;
+ if (et.size == 64)
+ {
+ /* .i64 is a pseudo-op, so the immediate must be a repeating
+ pattern. */
+ if (immbits != (inst.operands[1].regisimm ?
+ inst.operands[1].reg : 0))
+ {
+ /* Set immbits to an invalid constant. */
+ immbits = 0xdeadbeef;
+ }
+ }
+
+ switch (opcode)
+ {
+ case N_MNEM_vbic:
+ cmode = neon_cmode_for_logic_imm (immbits, &immbits, et.size);
+ break;
+
+ case N_MNEM_vorr:
+ cmode = neon_cmode_for_logic_imm (immbits, &immbits, et.size);
+ break;
+
+ case N_MNEM_vand:
+ /* Pseudo-instruction for VBIC. */
+ neon_invert_size (&immbits, 0, et.size);
+ cmode = neon_cmode_for_logic_imm (immbits, &immbits, et.size);
+ break;
+
+ case N_MNEM_vorn:
+ /* Pseudo-instruction for VORR. */
+ neon_invert_size (&immbits, 0, et.size);
+ cmode = neon_cmode_for_logic_imm (immbits, &immbits, et.size);
+ break;
+
+ default:
+ abort ();
+ }
+
+ if (cmode == FAIL)
+ return;
+
+ inst.instruction |= neon_quad (rs) << 6;
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= cmode << 8;
+ neon_write_immbits (immbits);
+
+ inst.instruction = neon_dp_fixup (inst.instruction);
+ }
+}
+
+static void
+do_neon_bitfield (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+ neon_check_type (3, rs, N_IGNORE_TYPE);
+ neon_three_same (neon_quad (rs), 0, -1);
+}
+
+static void
+neon_dyadic_misc (enum neon_el_type ubit_meaning, unsigned types,
+ unsigned destbits)
+{
+ enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+ struct neon_type_el et = neon_check_type (3, rs, N_EQK | destbits, N_EQK,
+ types | N_KEY);
+ if (et.type == NT_float)
+ {
+ inst.instruction = NEON_ENC_FLOAT (inst.instruction);
+ neon_three_same (neon_quad (rs), 0, -1);
+ }
+ else
+ {
+ inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+ neon_three_same (neon_quad (rs), et.type == ubit_meaning, et.size);
+ }
+}
+
+static void
+do_neon_dyadic_if_su (void)
+{
+ neon_dyadic_misc (NT_unsigned, N_SUF_32, 0);
+}
+
+static void
+do_neon_dyadic_if_su_d (void)
+{
+ /* This version only allow D registers, but that constraint is enforced during
+ operand parsing so we don't need to do anything extra here. */
+ neon_dyadic_misc (NT_unsigned, N_SUF_32, 0);
+}
+
+static void
+do_neon_dyadic_if_i_d (void)
+{
+ /* The "untyped" case can't happen. Do this to stop the "U" bit being
+ affected if we specify unsigned args. */
+ neon_dyadic_misc (NT_untyped, N_IF_32, 0);
+}
+
+enum vfp_or_neon_is_neon_bits
+{
+ NEON_CHECK_CC = 1,
+ NEON_CHECK_ARCH = 2
+};
+
+/* Call this function if an instruction which may have belonged to the VFP or
+ Neon instruction sets, but turned out to be a Neon instruction (due to the
+ operand types involved, etc.). We have to check and/or fix-up a couple of
+ things:
+
+ - Make sure the user hasn't attempted to make a Neon instruction
+ conditional.
+ - Alter the value in the condition code field if necessary.
+ - Make sure that the arch supports Neon instructions.
+
+ Which of these operations take place depends on bits from enum
+ vfp_or_neon_is_neon_bits.
+
+ WARNING: This function has side effects! If NEON_CHECK_CC is used and the
+ current instruction's condition is COND_ALWAYS, the condition field is
+ changed to inst.uncond_value. This is necessary because instructions shared
+ between VFP and Neon may be conditional for the VFP variants only, and the
+ unconditional Neon version must have, e.g., 0xF in the condition field. */
+
+static int
+vfp_or_neon_is_neon (unsigned check)
+{
+ /* Conditions are always legal in Thumb mode (IT blocks). */
+ if (!thumb_mode && (check & NEON_CHECK_CC))
+ {
+ if (inst.cond != COND_ALWAYS)
+ {
+ first_error (_(BAD_COND));
+ return FAIL;
+ }
+ if (inst.uncond_value != -1)
+ inst.instruction |= inst.uncond_value << 28;
+ }
+
+ if ((check & NEON_CHECK_ARCH)
+ && !ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_v1))
+ {
+ first_error (_(BAD_FPU));
+ return FAIL;
+ }
+
+ return SUCCESS;
+}
+
+static void
+do_neon_addsub_if_i (void)
+{
+ if (try_vfp_nsyn (3, do_vfp_nsyn_add_sub) == SUCCESS)
+ return;
+
+ if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
+ return;
+
+ /* The "untyped" case can't happen. Do this to stop the "U" bit being
+ affected if we specify unsigned args. */
+ neon_dyadic_misc (NT_untyped, N_IF_32 | N_I64, 0);
+}
+
+/* Swaps operands 1 and 2. If operand 1 (optional arg) was omitted, we want the
+ result to be:
+ V<op> A,B (A is operand 0, B is operand 2)
+ to mean:
+ V<op> A,B,A
+ not:
+ V<op> A,B,B
+ so handle that case specially. */
+
+static void
+neon_exchange_operands (void)
+{
+ void *scratch = alloca (sizeof (inst.operands[0]));
+ if (inst.operands[1].present)
+ {
+ /* Swap operands[1] and operands[2]. */
+ memcpy (scratch, &inst.operands[1], sizeof (inst.operands[0]));
+ inst.operands[1] = inst.operands[2];
+ memcpy (&inst.operands[2], scratch, sizeof (inst.operands[0]));
+ }
+ else
+ {
+ inst.operands[1] = inst.operands[2];
+ inst.operands[2] = inst.operands[0];
+ }
+}
+
+static void
+neon_compare (unsigned regtypes, unsigned immtypes, int invert)
+{
+ if (inst.operands[2].isreg)
+ {
+ if (invert)
+ neon_exchange_operands ();
+ neon_dyadic_misc (NT_unsigned, regtypes, N_SIZ);
+ }
+ else
+ {
+ enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
+ struct neon_type_el et = neon_check_type (2, rs,
+ N_EQK | N_SIZ, immtypes | N_KEY);
+
+ inst.instruction = NEON_ENC_IMMED (inst.instruction);
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg);
+ inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+ inst.instruction |= neon_quad (rs) << 6;
+ inst.instruction |= (et.type == NT_float) << 10;
+ inst.instruction |= neon_logbits (et.size) << 18;
+
+ inst.instruction = neon_dp_fixup (inst.instruction);
+ }
+}
+
+static void
+do_neon_cmp (void)
+{
+ neon_compare (N_SUF_32, N_S8 | N_S16 | N_S32 | N_F32, FALSE);
+}
+
+static void
+do_neon_cmp_inv (void)
+{
+ neon_compare (N_SUF_32, N_S8 | N_S16 | N_S32 | N_F32, TRUE);
+}
+
+static void
+do_neon_ceq (void)
+{
+ neon_compare (N_IF_32, N_IF_32, FALSE);
+}
+
+/* For multiply instructions, we have the possibility of 16-bit or 32-bit
+ scalars, which are encoded in 5 bits, M : Rm.
+ For 16-bit scalars, the register is encoded in Rm[2:0] and the index in
+ M:Rm[3], and for 32-bit scalars, the register is encoded in Rm[3:0] and the
+ index in M. */
+
+static unsigned
+neon_scalar_for_mul (unsigned scalar, unsigned elsize)
+{
+ unsigned regno = NEON_SCALAR_REG (scalar);
+ unsigned elno = NEON_SCALAR_INDEX (scalar);
+
+ switch (elsize)
+ {
+ case 16:
+ if (regno > 7 || elno > 3)
+ goto bad_scalar;
+ return regno | (elno << 3);
+
+ case 32:
+ if (regno > 15 || elno > 1)
+ goto bad_scalar;
+ return regno | (elno << 4);
+
+ default:
+ bad_scalar:
+ first_error (_("scalar out of range for multiply instruction"));
+ }
+
+ return 0;
+}
+
+/* Encode multiply / multiply-accumulate scalar instructions. */
+
+static void
+neon_mul_mac (struct neon_type_el et, int ubit)
+{
+ unsigned scalar;
+
+ /* Give a more helpful error message if we have an invalid type. */
+ if (et.type == NT_invtype)
+ return;
+
+ scalar = neon_scalar_for_mul (inst.operands[2].reg, et.size);
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+ inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+ inst.instruction |= LOW4 (scalar);
+ inst.instruction |= HI1 (scalar) << 5;
+ inst.instruction |= (et.type == NT_float) << 8;
+ inst.instruction |= neon_logbits (et.size) << 20;
+ inst.instruction |= (ubit != 0) << 24;
+
+ inst.instruction = neon_dp_fixup (inst.instruction);
+}
+
+static void
+do_neon_mac_maybe_scalar (void)
+{
+ if (try_vfp_nsyn (3, do_vfp_nsyn_mla_mls) == SUCCESS)
+ return;
+
+ if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
+ return;
+
+ if (inst.operands[2].isscalar)
+ {
+ enum neon_shape rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
+ struct neon_type_el et = neon_check_type (3, rs,
+ N_EQK, N_EQK, N_I16 | N_I32 | N_F32 | N_KEY);
+ inst.instruction = NEON_ENC_SCALAR (inst.instruction);
+ neon_mul_mac (et, neon_quad (rs));
+ }
+ else
+ {
+ /* The "untyped" case can't happen. Do this to stop the "U" bit being
+ affected if we specify unsigned args. */
+ neon_dyadic_misc (NT_untyped, N_IF_32, 0);
+ }
+}
+
+static void
+do_neon_tst (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+ struct neon_type_el et = neon_check_type (3, rs,
+ N_EQK, N_EQK, N_8 | N_16 | N_32 | N_KEY);
+ neon_three_same (neon_quad (rs), 0, et.size);
+}
+
+/* VMUL with 3 registers allows the P8 type. The scalar version supports the
+ same types as the MAC equivalents. The polynomial type for this instruction
+ is encoded the same as the integer type. */
+
+static void
+do_neon_mul (void)
+{
+ if (try_vfp_nsyn (3, do_vfp_nsyn_mul) == SUCCESS)
+ return;
+
+ if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
+ return;
+
+ if (inst.operands[2].isscalar)
+ do_neon_mac_maybe_scalar ();
+ else
+ neon_dyadic_misc (NT_poly, N_I8 | N_I16 | N_I32 | N_F32 | N_P8, 0);
+}
+
+static void
+do_neon_qdmulh (void)
+{
+ if (inst.operands[2].isscalar)
+ {
+ enum neon_shape rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
+ struct neon_type_el et = neon_check_type (3, rs,
+ N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
+ inst.instruction = NEON_ENC_SCALAR (inst.instruction);
+ neon_mul_mac (et, neon_quad (rs));
+ }
+ else
+ {
+ enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+ struct neon_type_el et = neon_check_type (3, rs,
+ N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
+ inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+ /* The U bit (rounding) comes from bit mask. */
+ neon_three_same (neon_quad (rs), 0, et.size);
+ }
+}
+
+static void
+do_neon_fcmp_absolute (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+ neon_check_type (3, rs, N_EQK, N_EQK, N_F32 | N_KEY);
+ /* Size field comes from bit mask. */
+ neon_three_same (neon_quad (rs), 1, -1);
+}
+
+static void
+do_neon_fcmp_absolute_inv (void)
+{
+ neon_exchange_operands ();
+ do_neon_fcmp_absolute ();
+}
+
+static void
+do_neon_step (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+ neon_check_type (3, rs, N_EQK, N_EQK, N_F32 | N_KEY);
+ neon_three_same (neon_quad (rs), 0, -1);
+}
+
+static void
+do_neon_abs_neg (void)
+{
+ enum neon_shape rs;
+ struct neon_type_el et;
+
+ if (try_vfp_nsyn (2, do_vfp_nsyn_abs_neg) == SUCCESS)
+ return;
+
+ if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
+ return;
+
+ rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
+ et = neon_check_type (2, rs, N_EQK, N_S8 | N_S16 | N_S32 | N_F32 | N_KEY);
+
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg);
+ inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+ inst.instruction |= neon_quad (rs) << 6;
+ inst.instruction |= (et.type == NT_float) << 10;
+ inst.instruction |= neon_logbits (et.size) << 18;
+
+ inst.instruction = neon_dp_fixup (inst.instruction);
+}
+
+static void
+do_neon_sli (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
+ struct neon_type_el et = neon_check_type (2, rs,
+ N_EQK, N_8 | N_16 | N_32 | N_64 | N_KEY);
+ int imm = inst.operands[2].imm;
+ constraint (imm < 0 || (unsigned)imm >= et.size,
+ _("immediate out of range for insert"));
+ neon_imm_shift (FALSE, 0, neon_quad (rs), et, imm);
+}
+
+static void
+do_neon_sri (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
+ struct neon_type_el et = neon_check_type (2, rs,
+ N_EQK, N_8 | N_16 | N_32 | N_64 | N_KEY);
+ int imm = inst.operands[2].imm;
+ constraint (imm < 1 || (unsigned)imm > et.size,
+ _("immediate out of range for insert"));
+ neon_imm_shift (FALSE, 0, neon_quad (rs), et, et.size - imm);
+}
+
+static void
+do_neon_qshlu_imm (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
+ struct neon_type_el et = neon_check_type (2, rs,
+ N_EQK | N_UNS, N_S8 | N_S16 | N_S32 | N_S64 | N_KEY);
+ int imm = inst.operands[2].imm;
+ constraint (imm < 0 || (unsigned)imm >= et.size,
+ _("immediate out of range for shift"));
+ /* Only encodes the 'U present' variant of the instruction.
+ In this case, signed types have OP (bit 8) set to 0.
+ Unsigned types have OP set to 1. */
+ inst.instruction |= (et.type == NT_unsigned) << 8;
+ /* The rest of the bits are the same as other immediate shifts. */
+ neon_imm_shift (FALSE, 0, neon_quad (rs), et, imm);
+}
+
+static void
+do_neon_qmovn (void)
+{
+ struct neon_type_el et = neon_check_type (2, NS_DQ,
+ N_EQK | N_HLF, N_SU_16_64 | N_KEY);
+ /* Saturating move where operands can be signed or unsigned, and the
+ destination has the same signedness. */
+ inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+ if (et.type == NT_unsigned)
+ inst.instruction |= 0xc0;
+ else
+ inst.instruction |= 0x80;
+ neon_two_same (0, 1, et.size / 2);
+}
+
+static void
+do_neon_qmovun (void)
+{
+ struct neon_type_el et = neon_check_type (2, NS_DQ,
+ N_EQK | N_HLF | N_UNS, N_S16 | N_S32 | N_S64 | N_KEY);
+ /* Saturating move with unsigned results. Operands must be signed. */
+ inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+ neon_two_same (0, 1, et.size / 2);
+}
+
+static void
+do_neon_rshift_sat_narrow (void)
+{
+ /* FIXME: Types for narrowing. If operands are signed, results can be signed
+ or unsigned. If operands are unsigned, results must also be unsigned. */
+ struct neon_type_el et = neon_check_type (2, NS_DQI,
+ N_EQK | N_HLF, N_SU_16_64 | N_KEY);
+ int imm = inst.operands[2].imm;
+ /* This gets the bounds check, size encoding and immediate bits calculation
+ right. */
+ et.size /= 2;
+
+ /* VQ{R}SHRN.I<size> <Dd>, <Qm>, #0 is a synonym for
+ VQMOVN.I<size> <Dd>, <Qm>. */
+ if (imm == 0)
+ {
+ inst.operands[2].present = 0;
+ inst.instruction = N_MNEM_vqmovn;
+ do_neon_qmovn ();
+ return;
+ }
+
+ constraint (imm < 1 || (unsigned)imm > et.size,
+ _("immediate out of range"));
+ neon_imm_shift (TRUE, et.type == NT_unsigned, 0, et, et.size - imm);
+}
+
+static void
+do_neon_rshift_sat_narrow_u (void)
+{
+ /* FIXME: Types for narrowing. If operands are signed, results can be signed
+ or unsigned. If operands are unsigned, results must also be unsigned. */
+ struct neon_type_el et = neon_check_type (2, NS_DQI,
+ N_EQK | N_HLF | N_UNS, N_S16 | N_S32 | N_S64 | N_KEY);
+ int imm = inst.operands[2].imm;
+ /* This gets the bounds check, size encoding and immediate bits calculation
+ right. */
+ et.size /= 2;
+
+ /* VQSHRUN.I<size> <Dd>, <Qm>, #0 is a synonym for
+ VQMOVUN.I<size> <Dd>, <Qm>. */
+ if (imm == 0)
+ {
+ inst.operands[2].present = 0;
+ inst.instruction = N_MNEM_vqmovun;
+ do_neon_qmovun ();
+ return;
+ }
+
+ constraint (imm < 1 || (unsigned)imm > et.size,
+ _("immediate out of range"));
+ /* FIXME: The manual is kind of unclear about what value U should have in
+ VQ{R}SHRUN instructions, but U=0, op=0 definitely encodes VRSHR, so it
+ must be 1. */
+ neon_imm_shift (TRUE, 1, 0, et, et.size - imm);
+}
+
+static void
+do_neon_movn (void)
+{
+ struct neon_type_el et = neon_check_type (2, NS_DQ,
+ N_EQK | N_HLF, N_I16 | N_I32 | N_I64 | N_KEY);
+ inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+ neon_two_same (0, 1, et.size / 2);
+}
+
+static void
+do_neon_rshift_narrow (void)
+{
+ struct neon_type_el et = neon_check_type (2, NS_DQI,
+ N_EQK | N_HLF, N_I16 | N_I32 | N_I64 | N_KEY);
+ int imm = inst.operands[2].imm;
+ /* This gets the bounds check, size encoding and immediate bits calculation
+ right. */
+ et.size /= 2;
+
+ /* If immediate is zero then we are a pseudo-instruction for
+ VMOVN.I<size> <Dd>, <Qm> */
+ if (imm == 0)
+ {
+ inst.operands[2].present = 0;
+ inst.instruction = N_MNEM_vmovn;
+ do_neon_movn ();
+ return;
+ }
+
+ constraint (imm < 1 || (unsigned)imm > et.size,
+ _("immediate out of range for narrowing operation"));
+ neon_imm_shift (FALSE, 0, 0, et, et.size - imm);
+}
+
+static void
+do_neon_shll (void)
+{
+ /* FIXME: Type checking when lengthening. */
+ struct neon_type_el et = neon_check_type (2, NS_QDI,
+ N_EQK | N_DBL, N_I8 | N_I16 | N_I32 | N_KEY);
+ unsigned imm = inst.operands[2].imm;
+
+ if (imm == et.size)
+ {
+ /* Maximum shift variant. */
+ inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg);
+ inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+ inst.instruction |= neon_logbits (et.size) << 18;
+
+ inst.instruction = neon_dp_fixup (inst.instruction);
+ }
+ else
+ {
+ /* A more-specific type check for non-max versions. */
+ et = neon_check_type (2, NS_QDI,
+ N_EQK | N_DBL, N_SU_32 | N_KEY);
+ inst.instruction = NEON_ENC_IMMED (inst.instruction);
+ neon_imm_shift (TRUE, et.type == NT_unsigned, 0, et, imm);
+ }
+}
+
+/* Check the various types for the VCVT instruction, and return which version
+ the current instruction is. */
+
+static int
+neon_cvt_flavour (enum neon_shape rs)
+{
+#define CVT_VAR(C,X,Y) \
+ et = neon_check_type (2, rs, whole_reg | (X), whole_reg | (Y)); \
+ if (et.type != NT_invtype) \
+ { \
+ inst.error = NULL; \
+ return (C); \
+ }
+ struct neon_type_el et;
+ unsigned whole_reg = (rs == NS_FFI || rs == NS_FD || rs == NS_DF
+ || rs == NS_FF) ? N_VFP : 0;
+ /* The instruction versions which take an immediate take one register
+ argument, which is extended to the width of the full register. Thus the
+ "source" and "destination" registers must have the same width. Hack that
+ here by making the size equal to the key (wider, in this case) operand. */
+ unsigned key = (rs == NS_QQI || rs == NS_DDI || rs == NS_FFI) ? N_KEY : 0;
+
+ CVT_VAR (0, N_S32, N_F32);
+ CVT_VAR (1, N_U32, N_F32);
+ CVT_VAR (2, N_F32, N_S32);
+ CVT_VAR (3, N_F32, N_U32);
+
+ whole_reg = N_VFP;
+
+ /* VFP instructions. */
+ CVT_VAR (4, N_F32, N_F64);
+ CVT_VAR (5, N_F64, N_F32);
+ CVT_VAR (6, N_S32, N_F64 | key);
+ CVT_VAR (7, N_U32, N_F64 | key);
+ CVT_VAR (8, N_F64 | key, N_S32);
+ CVT_VAR (9, N_F64 | key, N_U32);
+ /* VFP instructions with bitshift. */
+ CVT_VAR (10, N_F32 | key, N_S16);
+ CVT_VAR (11, N_F32 | key, N_U16);
+ CVT_VAR (12, N_F64 | key, N_S16);
+ CVT_VAR (13, N_F64 | key, N_U16);
+ CVT_VAR (14, N_S16, N_F32 | key);
+ CVT_VAR (15, N_U16, N_F32 | key);
+ CVT_VAR (16, N_S16, N_F64 | key);
+ CVT_VAR (17, N_U16, N_F64 | key);
+
+ return -1;
+#undef CVT_VAR
+}
+
+/* Neon-syntax VFP conversions. */
+
+static void
+do_vfp_nsyn_cvt (enum neon_shape rs, int flavour)
+{
+ const char *opname = 0;
+
+ if (rs == NS_DDI || rs == NS_QQI || rs == NS_FFI)
+ {
+ /* Conversions with immediate bitshift. */
+ const char *enc[] =
+ {
+ "ftosls",
+ "ftouls",
+ "fsltos",
+ "fultos",
+ NULL,
+ NULL,
+ "ftosld",
+ "ftould",
+ "fsltod",
+ "fultod",
+ "fshtos",
+ "fuhtos",
+ "fshtod",
+ "fuhtod",
+ "ftoshs",
+ "ftouhs",
+ "ftoshd",
+ "ftouhd"
+ };
+
+ if (flavour >= 0 && flavour < (int) ARRAY_SIZE (enc))
+ {
+ opname = enc[flavour];
+ constraint (inst.operands[0].reg != inst.operands[1].reg,
+ _("operands 0 and 1 must be the same register"));
+ inst.operands[1] = inst.operands[2];
+ memset (&inst.operands[2], '\0', sizeof (inst.operands[2]));
+ }
+ }
+ else
+ {
+ /* Conversions without bitshift. */
+ const char *enc[] =
+ {
+ "ftosis",
+ "ftouis",
+ "fsitos",
+ "fuitos",
+ "fcvtsd",
+ "fcvtds",
+ "ftosid",
+ "ftouid",
+ "fsitod",
+ "fuitod"
+ };
+
+ if (flavour >= 0 && flavour < (int) ARRAY_SIZE (enc))
+ opname = enc[flavour];
+ }
+
+ if (opname)
+ do_vfp_nsyn_opcode (opname);
+}
+
+static void
+do_vfp_nsyn_cvtz (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_FF, NS_FD, NS_NULL);
+ int flavour = neon_cvt_flavour (rs);
+ const char *enc[] =
+ {
+ "ftosizs",
+ "ftouizs",
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ "ftosizd",
+ "ftouizd"
+ };
+
+ if (flavour >= 0 && flavour < (int) ARRAY_SIZE (enc) && enc[flavour])
+ do_vfp_nsyn_opcode (enc[flavour]);
+}
+
+static void
+do_neon_cvt (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_FFI, NS_DD, NS_QQ,
+ NS_FD, NS_DF, NS_FF, NS_NULL);
+ int flavour = neon_cvt_flavour (rs);
+
+ /* VFP rather than Neon conversions. */
+ if (flavour >= 4)
+ {
+ do_vfp_nsyn_cvt (rs, flavour);
+ return;
+ }
+
+ switch (rs)
+ {
+ case NS_DDI:
+ case NS_QQI:
+ {
+ if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
+ return;
+
+ /* Fixed-point conversion with #0 immediate is encoded as an
+ integer conversion. */
+ if (inst.operands[2].present && inst.operands[2].imm == 0)
+ goto int_encode;
+ unsigned immbits = 32 - inst.operands[2].imm;
+ unsigned enctab[] = { 0x0000100, 0x1000100, 0x0, 0x1000000 };
+ inst.instruction = NEON_ENC_IMMED (inst.instruction);
+ if (flavour != -1)
+ inst.instruction |= enctab[flavour];
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg);
+ inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+ inst.instruction |= neon_quad (rs) << 6;
+ inst.instruction |= 1 << 21;
+ inst.instruction |= immbits << 16;
+
+ inst.instruction = neon_dp_fixup (inst.instruction);
+ }
+ break;
+
+ case NS_DD:
+ case NS_QQ:
+ int_encode:
+ {
+ unsigned enctab[] = { 0x100, 0x180, 0x0, 0x080 };
+
+ inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+
+ if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
+ return;
+
+ if (flavour != -1)
+ inst.instruction |= enctab[flavour];
+
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg);
+ inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+ inst.instruction |= neon_quad (rs) << 6;
+ inst.instruction |= 2 << 18;
+
+ inst.instruction = neon_dp_fixup (inst.instruction);
+ }
+ break;
+
+ default:
+ /* Some VFP conversions go here (s32 <-> f32, u32 <-> f32). */
+ do_vfp_nsyn_cvt (rs, flavour);
+ }
+}
+
+static void
+neon_move_immediate (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_DI, NS_QI, NS_NULL);
+ struct neon_type_el et = neon_check_type (2, rs,
+ N_I8 | N_I16 | N_I32 | N_I64 | N_F32 | N_KEY, N_EQK);
+ unsigned immlo, immhi = 0, immbits;
+ int op, cmode, float_p;
+
+ constraint (et.type == NT_invtype,
+ _("operand size must be specified for immediate VMOV"));
+
+ /* We start out as an MVN instruction if OP = 1, MOV otherwise. */
+ op = (inst.instruction & (1 << 5)) != 0;
+
+ immlo = inst.operands[1].imm;
+ if (inst.operands[1].regisimm)
+ immhi = inst.operands[1].reg;
+
+ constraint (et.size < 32 && (immlo & ~((1 << et.size) - 1)) != 0,
+ _("immediate has bits set outside the operand size"));
+
+ float_p = inst.operands[1].immisfloat;
+
+ if ((cmode = neon_cmode_for_move_imm (immlo, immhi, float_p, &immbits, &op,
+ et.size, et.type)) == FAIL)
+ {
+ /* Invert relevant bits only. */
+ neon_invert_size (&immlo, &immhi, et.size);
+ /* Flip from VMOV/VMVN to VMVN/VMOV. Some immediate types are unavailable
+ with one or the other; those cases are caught by
+ neon_cmode_for_move_imm. */
+ op = !op;
+ if ((cmode = neon_cmode_for_move_imm (immlo, immhi, float_p, &immbits,
+ &op, et.size, et.type)) == FAIL)
+ {
+ first_error (_("immediate out of range"));
+ return;
+ }
+ }
+
+ inst.instruction &= ~(1 << 5);
+ inst.instruction |= op << 5;
+
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= neon_quad (rs) << 6;
+ inst.instruction |= cmode << 8;
+
+ neon_write_immbits (immbits);
+}
+
+static void
+do_neon_mvn (void)
+{
+ if (inst.operands[1].isreg)
+ {
+ enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
+
+ inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg);
+ inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+ inst.instruction |= neon_quad (rs) << 6;
+ }
+ else
+ {
+ inst.instruction = NEON_ENC_IMMED (inst.instruction);
+ neon_move_immediate ();
+ }
+
+ inst.instruction = neon_dp_fixup (inst.instruction);
+}
+
+/* Encode instructions of form:
+
+ |28/24|23|22|21 20|19 16|15 12|11 8|7|6|5|4|3 0|
+ | U |x |D |size | Rn | Rd |x x x x|N|x|M|x| Rm |
+
+*/
+
+static void
+neon_mixed_length (struct neon_type_el et, unsigned size)
+{
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+ inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+ inst.instruction |= LOW4 (inst.operands[2].reg);
+ inst.instruction |= HI1 (inst.operands[2].reg) << 5;
+ inst.instruction |= (et.type == NT_unsigned) << 24;
+ inst.instruction |= neon_logbits (size) << 20;
+
+ inst.instruction = neon_dp_fixup (inst.instruction);
+}
+
+static void
+do_neon_dyadic_long (void)
+{
+ /* FIXME: Type checking for lengthening op. */
+ struct neon_type_el et = neon_check_type (3, NS_QDD,
+ N_EQK | N_DBL, N_EQK, N_SU_32 | N_KEY);
+ neon_mixed_length (et, et.size);
+}
+
+static void
+do_neon_abal (void)
+{
+ struct neon_type_el et = neon_check_type (3, NS_QDD,
+ N_EQK | N_INT | N_DBL, N_EQK, N_SU_32 | N_KEY);
+ neon_mixed_length (et, et.size);
+}
+
+static void
+neon_mac_reg_scalar_long (unsigned regtypes, unsigned scalartypes)
+{
+ if (inst.operands[2].isscalar)
+ {
+ struct neon_type_el et = neon_check_type (3, NS_QDS,
+ N_EQK | N_DBL, N_EQK, regtypes | N_KEY);
+ inst.instruction = NEON_ENC_SCALAR (inst.instruction);
+ neon_mul_mac (et, et.type == NT_unsigned);
+ }
+ else
+ {
+ struct neon_type_el et = neon_check_type (3, NS_QDD,
+ N_EQK | N_DBL, N_EQK, scalartypes | N_KEY);
+ inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+ neon_mixed_length (et, et.size);
+ }
+}
+
+static void
+do_neon_mac_maybe_scalar_long (void)
+{
+ neon_mac_reg_scalar_long (N_S16 | N_S32 | N_U16 | N_U32, N_SU_32);
+}
+
+static void
+do_neon_dyadic_wide (void)
+{
+ struct neon_type_el et = neon_check_type (3, NS_QQD,
+ N_EQK | N_DBL, N_EQK | N_DBL, N_SU_32 | N_KEY);
+ neon_mixed_length (et, et.size);
+}
+
+static void
+do_neon_dyadic_narrow (void)
+{
+ struct neon_type_el et = neon_check_type (3, NS_QDD,
+ N_EQK | N_DBL, N_EQK, N_I16 | N_I32 | N_I64 | N_KEY);
+ /* Operand sign is unimportant, and the U bit is part of the opcode,
+ so force the operand type to integer. */
+ et.type = NT_integer;
+ neon_mixed_length (et, et.size / 2);
+}
+
+static void
+do_neon_mul_sat_scalar_long (void)
+{
+ neon_mac_reg_scalar_long (N_S16 | N_S32, N_S16 | N_S32);
+}
+
+static void
+do_neon_vmull (void)
+{
+ if (inst.operands[2].isscalar)
+ do_neon_mac_maybe_scalar_long ();
+ else
+ {
+ struct neon_type_el et = neon_check_type (3, NS_QDD,
+ N_EQK | N_DBL, N_EQK, N_SU_32 | N_P8 | N_KEY);
+ if (et.type == NT_poly)
+ inst.instruction = NEON_ENC_POLY (inst.instruction);
+ else
+ inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+ /* For polynomial encoding, size field must be 0b00 and the U bit must be
+ zero. Should be OK as-is. */
+ neon_mixed_length (et, et.size);
+ }
+}
+
+static void
+do_neon_ext (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_DDDI, NS_QQQI, NS_NULL);
+ struct neon_type_el et = neon_check_type (3, rs,
+ N_EQK, N_EQK, N_8 | N_16 | N_32 | N_64 | N_KEY);
+ unsigned imm = (inst.operands[3].imm * et.size) / 8;
+ constraint (imm >= (neon_quad (rs) ? 16 : 8), _("shift out of range"));
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+ inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+ inst.instruction |= LOW4 (inst.operands[2].reg);
+ inst.instruction |= HI1 (inst.operands[2].reg) << 5;
+ inst.instruction |= neon_quad (rs) << 6;
+ inst.instruction |= imm << 8;
+
+ inst.instruction = neon_dp_fixup (inst.instruction);
+}
+
+static void
+do_neon_rev (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
+ struct neon_type_el et = neon_check_type (2, rs,
+ N_EQK, N_8 | N_16 | N_32 | N_KEY);
+ unsigned op = (inst.instruction >> 7) & 3;
+ /* N (width of reversed regions) is encoded as part of the bitmask. We
+ extract it here to check the elements to be reversed are smaller.
+ Otherwise we'd get a reserved instruction. */
+ unsigned elsize = (op == 2) ? 16 : (op == 1) ? 32 : (op == 0) ? 64 : 0;
+ assert (elsize != 0);
+ constraint (et.size >= elsize,
+ _("elements must be smaller than reversal region"));
+ neon_two_same (neon_quad (rs), 1, et.size);
+}
+
+static void
+do_neon_dup (void)
+{
+ if (inst.operands[1].isscalar)
+ {
+ enum neon_shape rs = neon_select_shape (NS_DS, NS_QS, NS_NULL);
+ struct neon_type_el et = neon_check_type (2, rs,
+ N_EQK, N_8 | N_16 | N_32 | N_KEY);
+ unsigned sizebits = et.size >> 3;
+ unsigned dm = NEON_SCALAR_REG (inst.operands[1].reg);
+ int logsize = neon_logbits (et.size);
+ unsigned x = NEON_SCALAR_INDEX (inst.operands[1].reg) << logsize;
+
+ if (vfp_or_neon_is_neon (NEON_CHECK_CC) == FAIL)
+ return;
+
+ inst.instruction = NEON_ENC_SCALAR (inst.instruction);
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (dm);
+ inst.instruction |= HI1 (dm) << 5;
+ inst.instruction |= neon_quad (rs) << 6;
+ inst.instruction |= x << 17;
+ inst.instruction |= sizebits << 16;
+
+ inst.instruction = neon_dp_fixup (inst.instruction);
+ }
+ else
+ {
+ enum neon_shape rs = neon_select_shape (NS_DR, NS_QR, NS_NULL);
+ struct neon_type_el et = neon_check_type (2, rs,
+ N_8 | N_16 | N_32 | N_KEY, N_EQK);
+ /* Duplicate ARM register to lanes of vector. */
+ inst.instruction = NEON_ENC_ARMREG (inst.instruction);
+ switch (et.size)
+ {
+ case 8: inst.instruction |= 0x400000; break;
+ case 16: inst.instruction |= 0x000020; break;
+ case 32: inst.instruction |= 0x000000; break;
+ default: break;
+ }
+ inst.instruction |= LOW4 (inst.operands[1].reg) << 12;
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 16;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 7;
+ inst.instruction |= neon_quad (rs) << 21;
+ /* The encoding for this instruction is identical for the ARM and Thumb
+ variants, except for the condition field. */
+ do_vfp_cond_or_thumb ();
+ }
+}
+
+/* VMOV has particularly many variations. It can be one of:
+ 0. VMOV<c><q> <Qd>, <Qm>
+ 1. VMOV<c><q> <Dd>, <Dm>
+ (Register operations, which are VORR with Rm = Rn.)
+ 2. VMOV<c><q>.<dt> <Qd>, #<imm>
+ 3. VMOV<c><q>.<dt> <Dd>, #<imm>
+ (Immediate loads.)
+ 4. VMOV<c><q>.<size> <Dn[x]>, <Rd>
+ (ARM register to scalar.)
+ 5. VMOV<c><q> <Dm>, <Rd>, <Rn>
+ (Two ARM registers to vector.)
+ 6. VMOV<c><q>.<dt> <Rd>, <Dn[x]>
+ (Scalar to ARM register.)
+ 7. VMOV<c><q> <Rd>, <Rn>, <Dm>
+ (Vector to two ARM registers.)
+ 8. VMOV.F32 <Sd>, <Sm>
+ 9. VMOV.F64 <Dd>, <Dm>
+ (VFP register moves.)
+ 10. VMOV.F32 <Sd>, #imm
+ 11. VMOV.F64 <Dd>, #imm
+ (VFP float immediate load.)
+ 12. VMOV <Rd>, <Sm>
+ (VFP single to ARM reg.)
+ 13. VMOV <Sd>, <Rm>
+ (ARM reg to VFP single.)
+ 14. VMOV <Rd>, <Re>, <Sn>, <Sm>
+ (Two ARM regs to two VFP singles.)
+ 15. VMOV <Sd>, <Se>, <Rn>, <Rm>
+ (Two VFP singles to two ARM regs.)
+
+ These cases can be disambiguated using neon_select_shape, except cases 1/9
+ and 3/11 which depend on the operand type too.
+
+ All the encoded bits are hardcoded by this function.
+
+ Cases 4, 6 may be used with VFPv1 and above (only 32-bit transfers!).
+ Cases 5, 7 may be used with VFPv2 and above.
+
+ FIXME: Some of the checking may be a bit sloppy (in a couple of cases you
+ can specify a type where it doesn't make sense to, and is ignored).
+*/
+
+static void
+do_neon_mov (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_RRFF, NS_FFRR, NS_DRR, NS_RRD,
+ NS_QQ, NS_DD, NS_QI, NS_DI, NS_SR, NS_RS, NS_FF, NS_FI, NS_RF, NS_FR,
+ NS_NULL);
+ struct neon_type_el et;
+ const char *ldconst = 0;
+
+ switch (rs)
+ {
+ case NS_DD: /* case 1/9. */
+ et = neon_check_type (2, rs, N_EQK, N_F64 | N_KEY);
+ /* It is not an error here if no type is given. */
+ inst.error = NULL;
+ if (et.type == NT_float && et.size == 64)
+ {
+ do_vfp_nsyn_opcode ("fcpyd");
+ break;
+ }
+ /* fall through. */
+
+ case NS_QQ: /* case 0/1. */
+ {
+ if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
+ return;
+ /* The architecture manual I have doesn't explicitly state which
+ value the U bit should have for register->register moves, but
+ the equivalent VORR instruction has U = 0, so do that. */
+ inst.instruction = 0x0200110;
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg);
+ inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+ inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+ inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+ inst.instruction |= neon_quad (rs) << 6;
+
+ inst.instruction = neon_dp_fixup (inst.instruction);
+ }
+ break;
+
+ case NS_DI: /* case 3/11. */
+ et = neon_check_type (2, rs, N_EQK, N_F64 | N_KEY);
+ inst.error = NULL;
+ if (et.type == NT_float && et.size == 64)
+ {
+ /* case 11 (fconstd). */
+ ldconst = "fconstd";
+ goto encode_fconstd;
+ }
+ /* fall through. */
+
+ case NS_QI: /* case 2/3. */
+ if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
+ return;
+ inst.instruction = 0x0800010;
+ neon_move_immediate ();
+ inst.instruction = neon_dp_fixup (inst.instruction);
+ break;
+
+ case NS_SR: /* case 4. */
+ {
+ unsigned bcdebits = 0;
+ struct neon_type_el et = neon_check_type (2, NS_NULL,
+ N_8 | N_16 | N_32 | N_KEY, N_EQK);
+ int logsize = neon_logbits (et.size);
+ unsigned dn = NEON_SCALAR_REG (inst.operands[0].reg);
+ unsigned x = NEON_SCALAR_INDEX (inst.operands[0].reg);
+
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1),
+ _(BAD_FPU));
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_v1)
+ && et.size != 32, _(BAD_FPU));
+ constraint (et.type == NT_invtype, _("bad type for scalar"));
+ constraint (x >= 64 / et.size, _("scalar index out of range"));
+
+ switch (et.size)
+ {
+ case 8: bcdebits = 0x8; break;
+ case 16: bcdebits = 0x1; break;
+ case 32: bcdebits = 0x0; break;
+ default: ;
+ }
+
+ bcdebits |= x << logsize;
+
+ inst.instruction = 0xe000b10;
+ do_vfp_cond_or_thumb ();
+ inst.instruction |= LOW4 (dn) << 16;
+ inst.instruction |= HI1 (dn) << 7;
+ inst.instruction |= inst.operands[1].reg << 12;
+ inst.instruction |= (bcdebits & 3) << 5;
+ inst.instruction |= (bcdebits >> 2) << 21;
+ }
+ break;
+
+ case NS_DRR: /* case 5 (fmdrr). */
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v2),
+ _(BAD_FPU));
+
+ inst.instruction = 0xc400b10;
+ do_vfp_cond_or_thumb ();
+ inst.instruction |= LOW4 (inst.operands[0].reg);
+ inst.instruction |= HI1 (inst.operands[0].reg) << 5;
+ inst.instruction |= inst.operands[1].reg << 12;
+ inst.instruction |= inst.operands[2].reg << 16;
+ break;
+
+ case NS_RS: /* case 6. */
+ {
+ struct neon_type_el et = neon_check_type (2, NS_NULL,
+ N_EQK, N_S8 | N_S16 | N_U8 | N_U16 | N_32 | N_KEY);
+ unsigned logsize = neon_logbits (et.size);
+ unsigned dn = NEON_SCALAR_REG (inst.operands[1].reg);
+ unsigned x = NEON_SCALAR_INDEX (inst.operands[1].reg);
+ unsigned abcdebits = 0;
+
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1),
+ _(BAD_FPU));
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_v1)
+ && et.size != 32, _(BAD_FPU));
+ constraint (et.type == NT_invtype, _("bad type for scalar"));
+ constraint (x >= 64 / et.size, _("scalar index out of range"));
+
+ switch (et.size)
+ {
+ case 8: abcdebits = (et.type == NT_signed) ? 0x08 : 0x18; break;
+ case 16: abcdebits = (et.type == NT_signed) ? 0x01 : 0x11; break;
+ case 32: abcdebits = 0x00; break;
+ default: ;
+ }
+
+ abcdebits |= x << logsize;
+ inst.instruction = 0xe100b10;
+ do_vfp_cond_or_thumb ();
+ inst.instruction |= LOW4 (dn) << 16;
+ inst.instruction |= HI1 (dn) << 7;
+ inst.instruction |= inst.operands[0].reg << 12;
+ inst.instruction |= (abcdebits & 3) << 5;
+ inst.instruction |= (abcdebits >> 2) << 21;
+ }
+ break;
+
+ case NS_RRD: /* case 7 (fmrrd). */
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v2),
+ _(BAD_FPU));
+
+ inst.instruction = 0xc500b10;
+ do_vfp_cond_or_thumb ();
+ inst.instruction |= inst.operands[0].reg << 12;
+ inst.instruction |= inst.operands[1].reg << 16;
+ inst.instruction |= LOW4 (inst.operands[2].reg);
+ inst.instruction |= HI1 (inst.operands[2].reg) << 5;
+ break;
+
+ case NS_FF: /* case 8 (fcpys). */
+ do_vfp_nsyn_opcode ("fcpys");
+ break;
+
+ case NS_FI: /* case 10 (fconsts). */
+ ldconst = "fconsts";
+ encode_fconstd:
+ if (is_quarter_float (inst.operands[1].imm))
+ {
+ inst.operands[1].imm = neon_qfloat_bits (inst.operands[1].imm);
+ do_vfp_nsyn_opcode (ldconst);
+ }
+ else
+ first_error (_("immediate out of range"));
+ break;
+
+ case NS_RF: /* case 12 (fmrs). */
+ do_vfp_nsyn_opcode ("fmrs");
+ break;
+
+ case NS_FR: /* case 13 (fmsr). */
+ do_vfp_nsyn_opcode ("fmsr");
+ break;
+
+ /* The encoders for the fmrrs and fmsrr instructions expect three operands
+ (one of which is a list), but we have parsed four. Do some fiddling to
+ make the operands what do_vfp_reg2_from_sp2 and do_vfp_sp2_from_reg2
+ expect. */
+ case NS_RRFF: /* case 14 (fmrrs). */
+ constraint (inst.operands[3].reg != inst.operands[2].reg + 1,
+ _("VFP registers must be adjacent"));
+ inst.operands[2].imm = 2;
+ memset (&inst.operands[3], '\0', sizeof (inst.operands[3]));
+ do_vfp_nsyn_opcode ("fmrrs");
+ break;
+
+ case NS_FFRR: /* case 15 (fmsrr). */
+ constraint (inst.operands[1].reg != inst.operands[0].reg + 1,
+ _("VFP registers must be adjacent"));
+ inst.operands[1] = inst.operands[2];
+ inst.operands[2] = inst.operands[3];
+ inst.operands[0].imm = 2;
+ memset (&inst.operands[3], '\0', sizeof (inst.operands[3]));
+ do_vfp_nsyn_opcode ("fmsrr");
+ break;
+
+ default:
+ abort ();
+ }
+}
+
+static void
+do_neon_rshift_round_imm (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
+ struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_SU_ALL | N_KEY);
+ int imm = inst.operands[2].imm;
+
+ /* imm == 0 case is encoded as VMOV for V{R}SHR. */
+ if (imm == 0)
+ {
+ inst.operands[2].present = 0;
+ do_neon_mov ();
+ return;
+ }
+
+ constraint (imm < 1 || (unsigned)imm > et.size,
+ _("immediate out of range for shift"));
+ neon_imm_shift (TRUE, et.type == NT_unsigned, neon_quad (rs), et,
+ et.size - imm);
+}
+
+static void
+do_neon_movl (void)
+{
+ struct neon_type_el et = neon_check_type (2, NS_QD,
+ N_EQK | N_DBL, N_SU_32 | N_KEY);
+ unsigned sizebits = et.size >> 3;
+ inst.instruction |= sizebits << 19;
+ neon_two_same (0, et.type == NT_unsigned, -1);
+}
+
+static void
+do_neon_trn (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
+ struct neon_type_el et = neon_check_type (2, rs,
+ N_EQK, N_8 | N_16 | N_32 | N_KEY);
+ inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+ neon_two_same (neon_quad (rs), 1, et.size);
+}
+
+static void
+do_neon_zip_uzp (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
+ struct neon_type_el et = neon_check_type (2, rs,
+ N_EQK, N_8 | N_16 | N_32 | N_KEY);
+ if (rs == NS_DD && et.size == 32)
+ {
+ /* Special case: encode as VTRN.32 <Dd>, <Dm>. */
+ inst.instruction = N_MNEM_vtrn;
+ do_neon_trn ();
+ return;
+ }
+ neon_two_same (neon_quad (rs), 1, et.size);
+}
+
+static void
+do_neon_sat_abs_neg (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
+ struct neon_type_el et = neon_check_type (2, rs,
+ N_EQK, N_S8 | N_S16 | N_S32 | N_KEY);
+ neon_two_same (neon_quad (rs), 1, et.size);
+}
+
+static void
+do_neon_pair_long (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
+ struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_SU_32 | N_KEY);
+ /* Unsigned is encoded in OP field (bit 7) for these instruction. */
+ inst.instruction |= (et.type == NT_unsigned) << 7;
+ neon_two_same (neon_quad (rs), 1, et.size);
+}
+
+static void
+do_neon_recip_est (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
+ struct neon_type_el et = neon_check_type (2, rs,
+ N_EQK | N_FLT, N_F32 | N_U32 | N_KEY);
+ inst.instruction |= (et.type == NT_float) << 8;
+ neon_two_same (neon_quad (rs), 1, et.size);
+}
+
+static void
+do_neon_cls (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
+ struct neon_type_el et = neon_check_type (2, rs,
+ N_EQK, N_S8 | N_S16 | N_S32 | N_KEY);
+ neon_two_same (neon_quad (rs), 1, et.size);
+}
+
+static void
+do_neon_clz (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
+ struct neon_type_el et = neon_check_type (2, rs,
+ N_EQK, N_I8 | N_I16 | N_I32 | N_KEY);
+ neon_two_same (neon_quad (rs), 1, et.size);
+}
+
+static void
+do_neon_cnt (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
+ struct neon_type_el et = neon_check_type (2, rs,
+ N_EQK | N_INT, N_8 | N_KEY);
+ neon_two_same (neon_quad (rs), 1, et.size);
+}
+
+static void
+do_neon_swp (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
+ neon_two_same (neon_quad (rs), 1, -1);
+}
+
+static void
+do_neon_tbl_tbx (void)
+{
+ unsigned listlenbits;
+ neon_check_type (3, NS_DLD, N_EQK, N_EQK, N_8 | N_KEY);
+
+ if (inst.operands[1].imm < 1 || inst.operands[1].imm > 4)
+ {
+ first_error (_("bad list length for table lookup"));
+ return;
+ }
+
+ listlenbits = inst.operands[1].imm - 1;
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+ inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+ inst.instruction |= LOW4 (inst.operands[2].reg);
+ inst.instruction |= HI1 (inst.operands[2].reg) << 5;
+ inst.instruction |= listlenbits << 8;
+
+ inst.instruction = neon_dp_fixup (inst.instruction);
+}
+
+static void
+do_neon_ldm_stm (void)
+{
+ /* P, U and L bits are part of bitmask. */
+ int is_dbmode = (inst.instruction & (1 << 24)) != 0;
+ unsigned offsetbits = inst.operands[1].imm * 2;
+
+ if (inst.operands[1].issingle)
+ {
+ do_vfp_nsyn_ldm_stm (is_dbmode);
+ return;
+ }
+
+ constraint (is_dbmode && !inst.operands[0].writeback,
+ _("writeback (!) must be used for VLDMDB and VSTMDB"));
+
+ constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16,
+ _("register list must contain at least 1 and at most 16 "
+ "registers"));
+
+ inst.instruction |= inst.operands[0].reg << 16;
+ inst.instruction |= inst.operands[0].writeback << 21;
+ inst.instruction |= LOW4 (inst.operands[1].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[1].reg) << 22;
+
+ inst.instruction |= offsetbits;
+
+ do_vfp_cond_or_thumb ();
+}
+
+static void
+do_neon_ldr_str (void)
+{
+ int is_ldr = (inst.instruction & (1 << 20)) != 0;
+
+ if (inst.operands[0].issingle)
+ {
+ if (is_ldr)
+ do_vfp_nsyn_opcode ("flds");
+ else
+ do_vfp_nsyn_opcode ("fsts");
+ }
+ else
+ {
+ if (is_ldr)
+ do_vfp_nsyn_opcode ("fldd");
+ else
+ do_vfp_nsyn_opcode ("fstd");
+ }
+}
+
+/* "interleave" version also handles non-interleaving register VLD1/VST1
+ instructions. */
+
+static void
+do_neon_ld_st_interleave (void)
+{
+ struct neon_type_el et = neon_check_type (1, NS_NULL,
+ N_8 | N_16 | N_32 | N_64);
+ unsigned alignbits = 0;
+ unsigned idx;
+ /* The bits in this table go:
+ 0: register stride of one (0) or two (1)
+ 1,2: register list length, minus one (1, 2, 3, 4).
+ 3,4: <n> in instruction type, minus one (VLD<n> / VST<n>).
+ We use -1 for invalid entries. */
+ const int typetable[] =
+ {
+ 0x7, -1, 0xa, -1, 0x6, -1, 0x2, -1, /* VLD1 / VST1. */
+ -1, -1, 0x8, 0x9, -1, -1, 0x3, -1, /* VLD2 / VST2. */
+ -1, -1, -1, -1, 0x4, 0x5, -1, -1, /* VLD3 / VST3. */
+ -1, -1, -1, -1, -1, -1, 0x0, 0x1 /* VLD4 / VST4. */
+ };
+ int typebits;
+
+ if (et.type == NT_invtype)
+ return;
+
+ if (inst.operands[1].immisalign)
+ switch (inst.operands[1].imm >> 8)
+ {
+ case 64: alignbits = 1; break;
+ case 128:
+ if (NEON_REGLIST_LENGTH (inst.operands[0].imm) == 3)
+ goto bad_alignment;
+ alignbits = 2;
+ break;
+ case 256:
+ if (NEON_REGLIST_LENGTH (inst.operands[0].imm) == 3)
+ goto bad_alignment;
+ alignbits = 3;
+ break;
+ default:
+ bad_alignment:
+ first_error (_("bad alignment"));
+ return;
+ }
+
+ inst.instruction |= alignbits << 4;
+ inst.instruction |= neon_logbits (et.size) << 6;
+
+ /* Bits [4:6] of the immediate in a list specifier encode register stride
+ (minus 1) in bit 4, and list length in bits [5:6]. We put the <n> of
+ VLD<n>/VST<n> in bits [9:8] of the initial bitmask. Suck it out here, look
+ up the right value for "type" in a table based on this value and the given
+ list style, then stick it back. */
+ idx = ((inst.operands[0].imm >> 4) & 7)
+ | (((inst.instruction >> 8) & 3) << 3);
+
+ typebits = typetable[idx];
+
+ constraint (typebits == -1, _("bad list type for instruction"));
+
+ inst.instruction &= ~0xf00;
+ inst.instruction |= typebits << 8;
+}
+
+/* Check alignment is valid for do_neon_ld_st_lane and do_neon_ld_dup.
+ *DO_ALIGN is set to 1 if the relevant alignment bit should be set, 0
+ otherwise. The variable arguments are a list of pairs of legal (size, align)
+ values, terminated with -1. */
+
+static int
+neon_alignment_bit (int size, int align, int *do_align, ...)
+{
+ va_list ap;
+ int result = FAIL, thissize, thisalign;
+
+ if (!inst.operands[1].immisalign)
+ {
+ *do_align = 0;
+ return SUCCESS;
+ }
+
+ va_start (ap, do_align);
+
+ do
+ {
+ thissize = va_arg (ap, int);
+ if (thissize == -1)
+ break;
+ thisalign = va_arg (ap, int);
+
+ if (size == thissize && align == thisalign)
+ result = SUCCESS;
+ }
+ while (result != SUCCESS);
+
+ va_end (ap);
+
+ if (result == SUCCESS)
+ *do_align = 1;
+ else
+ first_error (_("unsupported alignment for instruction"));
+
+ return result;
+}
+
+static void
+do_neon_ld_st_lane (void)
+{
+ struct neon_type_el et = neon_check_type (1, NS_NULL, N_8 | N_16 | N_32);
+ int align_good, do_align = 0;
+ int logsize = neon_logbits (et.size);
+ int align = inst.operands[1].imm >> 8;
+ int n = (inst.instruction >> 8) & 3;
+ int max_el = 64 / et.size;
+
+ if (et.type == NT_invtype)
+ return;
+
+ constraint (NEON_REGLIST_LENGTH (inst.operands[0].imm) != n + 1,
+ _("bad list length"));
+ constraint (NEON_LANE (inst.operands[0].imm) >= max_el,
+ _("scalar index out of range"));
+ constraint (n != 0 && NEON_REG_STRIDE (inst.operands[0].imm) == 2
+ && et.size == 8,
+ _("stride of 2 unavailable when element size is 8"));
+
+ switch (n)
+ {
+ case 0: /* VLD1 / VST1. */
+ align_good = neon_alignment_bit (et.size, align, &do_align, 16, 16,
+ 32, 32, -1);
+ if (align_good == FAIL)
+ return;
+ if (do_align)
+ {
+ unsigned alignbits = 0;
+ switch (et.size)
+ {
+ case 16: alignbits = 0x1; break;
+ case 32: alignbits = 0x3; break;
+ default: ;
+ }
+ inst.instruction |= alignbits << 4;
+ }
+ break;
+
+ case 1: /* VLD2 / VST2. */
+ align_good = neon_alignment_bit (et.size, align, &do_align, 8, 16, 16, 32,
+ 32, 64, -1);
+ if (align_good == FAIL)
+ return;
+ if (do_align)
+ inst.instruction |= 1 << 4;
+ break;
+
+ case 2: /* VLD3 / VST3. */
+ constraint (inst.operands[1].immisalign,
+ _("can't use alignment with this instruction"));
+ break;
+
+ case 3: /* VLD4 / VST4. */
+ align_good = neon_alignment_bit (et.size, align, &do_align, 8, 32,
+ 16, 64, 32, 64, 32, 128, -1);
+ if (align_good == FAIL)
+ return;
+ if (do_align)
+ {
+ unsigned alignbits = 0;
+ switch (et.size)
+ {
+ case 8: alignbits = 0x1; break;
+ case 16: alignbits = 0x1; break;
+ case 32: alignbits = (align == 64) ? 0x1 : 0x2; break;
+ default: ;
+ }
+ inst.instruction |= alignbits << 4;
+ }
+ break;
+
+ default: ;
+ }
+
+ /* Reg stride of 2 is encoded in bit 5 when size==16, bit 6 when size==32. */
+ if (n != 0 && NEON_REG_STRIDE (inst.operands[0].imm) == 2)
+ inst.instruction |= 1 << (4 + logsize);
+
+ inst.instruction |= NEON_LANE (inst.operands[0].imm) << (logsize + 5);
+ inst.instruction |= logsize << 10;
+}
+
+/* Encode single n-element structure to all lanes VLD<n> instructions. */
+
+static void
+do_neon_ld_dup (void)