From 616ce08e1cc98c28c42bc2afed6b92df449c7b00 Mon Sep 17 00:00:00 2001 From: Matthew Malcomson Date: Thu, 7 Nov 2019 17:20:08 +0000 Subject: [PATCH] [Patch][binutils][arm] Armv8.6-A Matrix Multiply extension [9/10] Hi, This patch is part of a series that adds support for Armv8.6-A (Matrix Multiply and BFloat16 extensions) to binutils. This patch introduces the Matrix Multiply (Int8, F32, F64) extensions to the arm backend. The following Matrix Multiply instructions are added: vummla, vsmmla, vusmmla, vusdot, vsudot[1]. [1]https://developer.arm.com/docs/ddi0597/latest/simd-and-floating-point-instructions-alphabetic-order Committed on behalf of Mihail Ionescu. gas/ChangeLog: 2019-11-07 Mihail Ionescu * config/tc-arm.c (arm_ext_i8mm): New feature set. (do_vusdot): New. (do_vsudot): New. (do_vsmmla): New. (do_vummla): New. (insns): Add vsmmla, vummla, vusmmla, vusdot, vsudot mnemonics. (armv86a_ext_table): Add i8mm extension. (arm_extensions): Move bf16 extension to context sensitive table. (armv82a_ext_table, armv84a_ext_table, armv85a_ext_table): Move bf16 extension to context sensitive table. (armv86a_ext_table): Add i8mm extension. * doc/c-arm.texi: Document i8mm extension. * testsuite/gas/arm/i8mm.s: New test. * testsuite/gas/arm/i8mm.d: New test. * testsuite/gas/arm/bfloat17-cmdline-bad-3.d: Update test. include/ChangeLog: 2019-11-07 Mihail Ionescu * opcode/arm.h (ARM_EXT2_I8MM): New feature macro. opcodes/ChangeLog: 2019-11-07 Mihail Ionescu * arm-dis.c (neon_opcodes): Add i8mm SIMD instructions. Regression tested on arm-none-eabi. Is this ok for trunk? Regards, Mihail --- gas/ChangeLog | 18 ++++ gas/config/tc-arm.c | 87 ++++++++++++++++++- gas/doc/c-arm.texi | 1 + .../gas/arm/bfloat16-cmdline-bad-3.d | 2 +- gas/testsuite/gas/arm/i8mm.d | 36 ++++++++ gas/testsuite/gas/arm/i8mm.s | 32 +++++++ include/ChangeLog | 4 + include/opcode/arm.h | 1 + opcodes/ChangeLog | 5 ++ opcodes/arm-dis.c | 14 +++ 10 files changed, 195 insertions(+), 5 deletions(-) create mode 100644 gas/testsuite/gas/arm/i8mm.d create mode 100644 gas/testsuite/gas/arm/i8mm.s diff --git a/gas/ChangeLog b/gas/ChangeLog index 1d835b6fde..2af2901604 100644 --- a/gas/ChangeLog +++ b/gas/ChangeLog @@ -1,3 +1,21 @@ +2019-11-07 Mihail Ionescu + + * config/tc-arm.c (arm_ext_i8mm): New feature set. + (do_vusdot): New. + (do_vsudot): New. + (do_vsmmla): New. + (do_vummla): New. + (insns): Add vsmmla, vummla, vusmmla, vusdot, vsudot mnemonics. + (armv86a_ext_table): Add i8mm extension. + (arm_extensions): Move bf16 extension to context sensitive table. + (armv82a_ext_table, armv84a_ext_table, armv85a_ext_table): + Move bf16 extension to context sensitive table. + (armv86a_ext_table): Add i8mm extension. + * doc/c-arm.texi: Document i8mm extension. + * testsuite/gas/arm/i8mm.s: New test. + * testsuite/gas/arm/i8mm.d: New test. + * testsuite/gas/arm/bfloat17-cmdline-bad-3.d: Update test. + 2019-11-07 Mihail Ionescu * config/tc-aarch64.c: Add new arch fetures to suppport the mm extension. diff --git a/gas/config/tc-arm.c b/gas/config/tc-arm.c index 1d735a731e..2a884b13dc 100644 --- a/gas/config/tc-arm.c +++ b/gas/config/tc-arm.c @@ -277,6 +277,8 @@ static const arm_feature_set arm_ext_predres = ARM_FEATURE_CORE_HIGH (ARM_EXT2_PREDRES); static const arm_feature_set arm_ext_bf16 = ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16); +static const arm_feature_set arm_ext_i8mm = + ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM); static const arm_feature_set arm_arch_any = ARM_ANY; #ifdef OBJ_ELF @@ -21483,6 +21485,79 @@ do_neon_dotproduct_u (void) return do_neon_dotproduct (1); } +static void +do_vusdot (void) +{ + enum neon_shape rs; + set_pred_insn_type (OUTSIDE_PRED_INSN); + if (inst.operands[2].isscalar) + { + rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL); + neon_check_type (3, rs, N_EQK, N_EQK, N_S8 | N_KEY); + + inst.instruction |= (1 << 25); + int index = inst.operands[2].reg & 0xf; + constraint ((index != 1 && index != 0), _("index must be 0 or 1")); + inst.operands[2].reg >>= 4; + constraint (!(inst.operands[2].reg < 16), + _("indexed register must be less than 16")); + neon_three_args (rs == NS_QQS); + inst.instruction |= (index << 5); + } + else + { + inst.instruction |= (1 << 21); + rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL); + neon_check_type (3, rs, N_EQK, N_EQK, N_S8 | N_KEY); + neon_three_args (rs == NS_QQQ); + } +} + +static void +do_vsudot (void) +{ + enum neon_shape rs; + set_pred_insn_type (OUTSIDE_PRED_INSN); + if (inst.operands[2].isscalar) + { + rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL); + neon_check_type (3, rs, N_EQK, N_EQK, N_U8 | N_KEY); + + inst.instruction |= (1 << 25); + int index = inst.operands[2].reg & 0xf; + constraint ((index != 1 && index != 0), _("index must be 0 or 1")); + inst.operands[2].reg >>= 4; + constraint (!(inst.operands[2].reg < 16), + _("indexed register must be less than 16")); + neon_three_args (rs == NS_QQS); + inst.instruction |= (index << 5); + } +} + +static void +do_vsmmla (void) +{ + enum neon_shape rs = neon_select_shape (NS_QQQ, NS_NULL); + neon_check_type (3, rs, N_EQK, N_EQK, N_S8 | N_KEY); + + set_pred_insn_type (OUTSIDE_PRED_INSN); + + neon_three_args (1); + +} + +static void +do_vummla (void) +{ + enum neon_shape rs = neon_select_shape (NS_QQQ, NS_NULL); + neon_check_type (3, rs, N_EQK, N_EQK, N_U8 | N_KEY); + + set_pred_insn_type (OUTSIDE_PRED_INSN); + + neon_three_args (1); + +} + /* Crypto v1 instructions. */ static void do_crypto_2op_1 (unsigned elttype, int op) @@ -26000,7 +26075,7 @@ static const struct asm_opcode insns[] = #define THUMB_VARIANT &arm_ext_i8mm TUF ("vsmmla", c200c40, fc200c40, 3, (RNQ, RNQ, RNQ), vsmmla, vsmmla), TUF ("vummla", c200c50, fc200c50, 3, (RNQ, RNQ, RNQ), vummla, vummla), - TUF ("vusmmla", ca00c40, fca00c40, 3, (RNQ, RNQ, RNQ), vummla, vummla), + TUF ("vusmmla", ca00c40, fca00c40, 3, (RNQ, RNQ, RNQ), vsmmla, vsmmla), TUF ("vusdot", c800d00, fc800d00, 3, (RNDQ, RNDQ, RNDQ_RNSC), vusdot, vusdot), TUF ("vsudot", c800d10, fc800d10, 3, (RNDQ, RNDQ, RNSC), vsudot, vsudot), }; @@ -31127,6 +31202,8 @@ static const struct arm_ext_table armv82a_ext_table[] = ARM_ADD ("simd", FPU_ARCH_NEON_VFP_ARMV8_1), ARM_ADD ("fp16", FPU_ARCH_NEON_VFP_ARMV8_2_FP16), ARM_ADD ("fp16fml", FPU_ARCH_NEON_VFP_ARMV8_2_FP16FML), + ARM_ADD ("bf16", ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16)), + ARM_ADD ("i8mm", ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM)), ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_1, ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)), ARM_ADD ("dotprod", FPU_ARCH_DOTPROD_NEON_VFP_ARMV8), @@ -31143,6 +31220,8 @@ static const struct arm_ext_table armv84a_ext_table[] = { ARM_ADD ("simd", FPU_ARCH_DOTPROD_NEON_VFP_ARMV8), ARM_ADD ("fp16", FPU_ARCH_NEON_VFP_ARMV8_4_FP16FML), + ARM_ADD ("bf16", ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16)), + ARM_ADD ("i8mm", ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM)), ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_4, ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)), @@ -31158,6 +31237,8 @@ static const struct arm_ext_table armv85a_ext_table[] = { ARM_ADD ("simd", FPU_ARCH_DOTPROD_NEON_VFP_ARMV8), ARM_ADD ("fp16", FPU_ARCH_NEON_VFP_ARMV8_4_FP16FML), + ARM_ADD ("bf16", ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16)), + ARM_ADD ("i8mm", ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM)), ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_4, ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)), @@ -31169,6 +31250,7 @@ static const struct arm_ext_table armv85a_ext_table[] = static const struct arm_ext_table armv86a_ext_table[] = { + ARM_ADD ("i8mm", ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM)), { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE } }; @@ -31308,9 +31390,6 @@ struct arm_option_extension_value_table use the context sensitive approach using arm_ext_table's. */ static const struct arm_option_extension_value_table arm_extensions[] = { - ARM_EXT_OPT ("bf16", ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16), - ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16), - ARM_ARCH_V8_2A), ARM_EXT_OPT ("crc", ARCH_CRC_ARMV8, ARM_FEATURE_COPROC (CRC_EXT_ARMV8), ARM_FEATURE_CORE_LOW (ARM_EXT_V8)), ARM_EXT_OPT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8, diff --git a/gas/doc/c-arm.texi b/gas/doc/c-arm.texi index 8afee70120..a67bb59899 100644 --- a/gas/doc/c-arm.texi +++ b/gas/doc/c-arm.texi @@ -181,6 +181,7 @@ been added, again in ascending alphabetical order. For example, The following extensions are currently supported: @code{bf16} (BFloat16 extensions for v8.6-A architecture), +@code{i8mm} (Int8 Matrix Multiply extensions for v8.6-A architecture), @code{crc} @code{crypto} (Cryptography Extensions for v8-A architecture, implies @code{fp+simd}), @code{dotprod} (Dot Product Extensions for v8.2-A architecture, implies @code{fp+simd}), diff --git a/gas/testsuite/gas/arm/bfloat16-cmdline-bad-3.d b/gas/testsuite/gas/arm/bfloat16-cmdline-bad-3.d index 5dfdeb4d6c..ad99cda5dc 100644 --- a/gas/testsuite/gas/arm/bfloat16-cmdline-bad-3.d +++ b/gas/testsuite/gas/arm/bfloat16-cmdline-bad-3.d @@ -1,4 +1,4 @@ #name: Bfloat 16 bad extension #source: bfloat16-non-neon.s #as: -mno-warn-deprecated -march=armv8.1-a+bf16 -#error: .*Error: extension does not apply to the base architecture.* +#error: .*Error: unknown architectural extension `bf16'* diff --git a/gas/testsuite/gas/arm/i8mm.d b/gas/testsuite/gas/arm/i8mm.d new file mode 100644 index 0000000000..6d7f1d74e4 --- /dev/null +++ b/gas/testsuite/gas/arm/i8mm.d @@ -0,0 +1,36 @@ +#name: Int8 Matrix Multiply extension +#source: i8mm.s +#as: -mno-warn-deprecated -march=armv8.6-a+i8mm+simd -I$srcdir/$subdir +#objdump: -dr --show-raw-insn + +.*: +file format .*arm.* + +Disassembly of section \.text: + +00000000 <\.text>: + *[0-9a-f]+: fcea4c40 vusmmla\.s8 q10, q5, q0 + *[0-9a-f]+: fc6a4c50 vummla\.u8 q10, q5, q0 + *[0-9a-f]+: fc6a4c40 vsmmla\.s8 q10, q5, q0 + *[0-9a-f]+: fcea4d40 vusdot\.s8 q10, q5, q0 + *[0-9a-f]+: feca4d50 vsudot\.u8 q10, q5, d0\[0\] + *[0-9a-f]+: feca4d70 vsudot\.u8 q10, q5, d0\[1\] + *[0-9a-f]+: feca4d40 vusdot\.s8 q10, q5, d0\[0\] + *[0-9a-f]+: feca4d60 vusdot\.s8 q10, q5, d0\[1\] + *[0-9a-f]+: fca5ad00 vusdot\.s8 d10, d5, d0 + *[0-9a-f]+: fe85ad00 vusdot\.s8 d10, d5, d0\[0\] + *[0-9a-f]+: fe85ad20 vusdot\.s8 d10, d5, d0\[1\] + *[0-9a-f]+: fe85ad10 vsudot\.u8 d10, d5, d0\[0\] + *[0-9a-f]+: fe85ad30 vsudot\.u8 d10, d5, d0\[1\] + *[0-9a-f]+: fcea4c40 vusmmla\.s8 q10, q5, q0 + *[0-9a-f]+: fc6a4c50 vummla\.u8 q10, q5, q0 + *[0-9a-f]+: fc6a4c40 vsmmla\.s8 q10, q5, q0 + *[0-9a-f]+: fcea4d40 vusdot\.s8 q10, q5, q0 + *[0-9a-f]+: feca4d50 vsudot\.u8 q10, q5, d0\[0\] + *[0-9a-f]+: feca4d70 vsudot\.u8 q10, q5, d0\[1\] + *[0-9a-f]+: feca4d40 vusdot\.s8 q10, q5, d0\[0\] + *[0-9a-f]+: feca4d60 vusdot\.s8 q10, q5, d0\[1\] + *[0-9a-f]+: fca5ad00 vusdot\.s8 d10, d5, d0 + *[0-9a-f]+: fe85ad00 vusdot\.s8 d10, d5, d0\[0\] + *[0-9a-f]+: fe85ad20 vusdot\.s8 d10, d5, d0\[1\] + *[0-9a-f]+: fe85ad10 vsudot\.u8 d10, d5, d0\[0\] + *[0-9a-f]+: fe85ad30 vsudot\.u8 d10, d5, d0\[1\] diff --git a/gas/testsuite/gas/arm/i8mm.s b/gas/testsuite/gas/arm/i8mm.s new file mode 100644 index 0000000000..20d04309ce --- /dev/null +++ b/gas/testsuite/gas/arm/i8mm.s @@ -0,0 +1,32 @@ +vusmmla.s8 q10, q5, q0 +vummla.u8 q10, q5, q0 +vsmmla.s8 q10, q5, q0 + +vusdot.s8 q10, q5, q0 +vsudot.u8 q10, q5, d0[0] +vsudot.u8 q10, q5, d0[1] +vusdot.s8 q10, q5, d0[0] +vusdot.s8 q10, q5, d0[1] + +vusdot.s8 d10, d5, d0 +vusdot.s8 d10, d5, d0[0] +vusdot.s8 d10, d5, d0[1] +vsudot.u8 d10, d5, d0[0] +vsudot.u8 d10, d5, d0[1] + + +vusmmla q10.s8, q5.s8, q0.s8 +vummla q10.u8, q5.u8, q0.u8 +vsmmla q10.s8, q5.s8, q0.s8 + +vusdot q10.s8, q5.s8, q0.s8 +vsudot q10.u8, q5.u8, d0.u8[0] +vsudot q10.u8, q5.u8, d0.u8[1] +vusdot q10.s8, q5.s8, d0.s8[0] +vusdot q10.s8, q5.s8, d0.s8[1] + +vusdot d10.s8, d5.s8, d0.s8 +vusdot d10.s8, d5.s8, d0.s8[0] +vusdot d10.s8, d5.s8, d0.s8[1] +vsudot d10.u8, d5.u8, d0.u8[0] +vsudot d10.u8, d5.u8, d0.u8[1] diff --git a/include/ChangeLog b/include/ChangeLog index 2543e095b8..591ae4e773 100644 --- a/include/ChangeLog +++ b/include/ChangeLog @@ -1,3 +1,7 @@ +2019-11-07 Mihail Ionescu + + * opcode/arm.h (ARM_EXT2_I8MM): New feature macro. + 2019-11-07 Mihail Ionescu * opcode/aarch64.h (AARCH64_FEATURE_I8MM): New. diff --git a/include/opcode/arm.h b/include/opcode/arm.h index 7aea4d6e56..982da5abbd 100644 --- a/include/opcode/arm.h +++ b/include/opcode/arm.h @@ -75,6 +75,7 @@ #define ARM_EXT2_V8_1M_MAIN 0x00008000 /* ARMv8.1-M Mainline. */ #define ARM_EXT2_V8_6A 0x00010000 /* ARM V8.6A. */ #define ARM_EXT2_BF16 0x00020000 /* ARMv8 bfloat16. */ +#define ARM_EXT2_I8MM 0x00040000 /* ARMv8.6A i8mm. */ /* Co-processor space extensions. */ #define ARM_CEXT_XSCALE 0x00000001 /* Allow MIA etc. */ diff --git a/opcodes/ChangeLog b/opcodes/ChangeLog index 21372d4b5c..b51d406511 100644 --- a/opcodes/ChangeLog +++ b/opcodes/ChangeLog @@ -1,3 +1,8 @@ +2019-11-07 Mihail Ionescu + + * arm-dis.c (neon_opcodes): Add i8mm SIMD instructions. + + 2019-11-07 Mihail Ionescu * aarch64-tbl.h (aarch64_feature_i8mm_sve, aarch64_feature_f32mm_sve, diff --git a/opcodes/arm-dis.c b/opcodes/arm-dis.c index 50ae957656..8f82cb24e4 100644 --- a/opcodes/arm-dis.c +++ b/opcodes/arm-dis.c @@ -1471,6 +1471,20 @@ static const struct opcode32 neon_opcodes[] = {ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16), 0xfe300810, 0xffb00f10, "vfma%6?tb.bf16\t%12-15,22Q, %16-19,7Q, %0-2D[%3,5d]"}, + /* Matrix Multiply instructions. */ + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM), + 0xfc200c40, 0xffb00f50, "vsmmla.s8\t%12-15,22R, %16-19,7R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM), + 0xfc200c50, 0xffb00f50, "vummla.u8\t%12-15,22R, %16-19,7R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM), + 0xfca00c40, 0xffb00f50, "vusmmla.s8\t%12-15,22R, %16-19,7R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM), + 0xfca00d00, 0xffb00f10, "vusdot.s8\t%12-15,22R, %16-19,7R, %0-3,5R"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM), + 0xfe800d00, 0xffb00f10, "vusdot.s8\t%12-15,22R, %16-19,7R, d%0-3d[%5d]"}, + {ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM), + 0xfe800d10, 0xffb00f10, "vsudot.u8\t%12-15,22R, %16-19,7R, d%0-3d[%5d]"}, + /* Two registers, miscellaneous. */ {ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8), 0xf3ba0400, 0xffbf0c10, "vrint%7-9?p?m?zaxn%u.f32\t%12-15,22R, %0-3,5R"}, -- 2.34.1