From a0a1771e895e6606a2a795c407e20aed73f69bd9 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 1 Jul 2019 08:31:14 +0200 Subject: [PATCH] x86: optimize EVEX packed integer logical instructions As long as there's no write mask as well as no broadcast, and as long as the scaled Disp8 wouldn't result in a shorter EVEX encoding, encode VPAND{D,Q}, VPANDN{D,Q}, VPOR{D,Q}, and VPXOR{D,Q} acting on only the lower 16 XMM/YMM registers using their VEX equivalents with -O1. Also take the opportunity and avoid looping twice over all operands when dealing with memory-with-displacement ones. --- gas/ChangeLog | 29 ++++++++ gas/config/tc-i386.c | 30 +++++--- gas/doc/c-i386.texi | 16 ++-- gas/testsuite/gas/i386/optimize-1.d | 48 ++++++++++++ gas/testsuite/gas/i386/optimize-1.s | 54 ++++++++++++++ gas/testsuite/gas/i386/optimize-1a.d | 48 ++++++++++++ gas/testsuite/gas/i386/optimize-2.d | 64 ++++++++++++++++ gas/testsuite/gas/i386/optimize-2.s | 72 ++++++++++++++++++ gas/testsuite/gas/i386/optimize-3.d | 8 ++ gas/testsuite/gas/i386/optimize-3.s | 9 +++ gas/testsuite/gas/i386/optimize-4.d | 48 ++++++++++++ gas/testsuite/gas/i386/optimize-5.d | 56 ++++++++++++++ gas/testsuite/gas/i386/optimize-5.s | 9 +++ gas/testsuite/gas/i386/x86-64-optimize-2.d | 48 ++++++++++++ gas/testsuite/gas/i386/x86-64-optimize-2.s | 54 ++++++++++++++ gas/testsuite/gas/i386/x86-64-optimize-2a.d | 48 ++++++++++++ gas/testsuite/gas/i386/x86-64-optimize-2b.d | 48 ++++++++++++ gas/testsuite/gas/i386/x86-64-optimize-3.d | 72 ++++++++++++++++++ gas/testsuite/gas/i386/x86-64-optimize-3.s | 81 +++++++++++++++++++++ gas/testsuite/gas/i386/x86-64-optimize-4.d | 8 ++ gas/testsuite/gas/i386/x86-64-optimize-4.s | 9 +++ gas/testsuite/gas/i386/x86-64-optimize-5.d | 48 ++++++++++++ gas/testsuite/gas/i386/x86-64-optimize-6.d | 56 ++++++++++++++ gas/testsuite/gas/i386/x86-64-optimize-6.s | 9 +++ opcodes/ChangeLog | 6 ++ opcodes/i386-opc.tbl | 8 +- opcodes/i386-tbl.h | 8 +- 27 files changed, 969 insertions(+), 25 deletions(-) diff --git a/gas/ChangeLog b/gas/ChangeLog index 0a7c2a0934..b969177059 100644 --- a/gas/ChangeLog +++ b/gas/ChangeLog @@ -1,3 +1,32 @@ +2019-07-01 Jan Beulich + + * config/tc-i386.c (optimize_encoding): Make j unsigned. Handle + vpand{d,q}, vpandn{d,q}, vpor{d,q}, and vpxor{d,q}. Also check/ + clear broadcast. Eliminate a loop. + * doc/c-i386.texi: Update -O1 documentation. + * testsuite/gas/i386/optimize-1.s, + testsuite/gas/i386/optimize-2.s, + testsuite/gas/i386/optimize-3.s, + testsuite/gas/i386/optimize-5.s, + testsuite/gas/i386/x86-64-optimize-2.s, + testsuite/gas/i386/x86-64-optimize-3.s, + testsuite/gas/i386/x86-64-optimize-4.s, + testsuite/gas/i386/x86-64-optimize-6.s: Add vpand{d,q}, + vpandn{d,q}, vpor{d,q}, and vpxor{d,q} cases. + testsuite/gas/i386/optimize-1.d, + testsuite/gas/i386/optimize-1a.d, + testsuite/gas/i386/optimize-2.d, + testsuite/gas/i386/optimize-3.d, + testsuite/gas/i386/optimize-4.d, + testsuite/gas/i386/optimize-5.d, + testsuite/gas/i386/x86-64-optimize-2.d, + testsuite/gas/i386/x86-64-optimize-2a.d, + testsuite/gas/i386/x86-64-optimize-2b.d, + testsuite/gas/i386/x86-64-optimize-3.d, + testsuite/gas/i386/x86-64-optimize-4.d, + testsuite/gas/i386/x86-64-optimize-5.d, + testsuite/gas/i386/x86-64-optimize-6.d: Adjust expectations. + 2019-07-01 Jan Beulich * testsuite/gas/i386/avx512f_vpclmulqdq.s, diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index f6c14c9ac7..bc7d55611a 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -3897,7 +3897,7 @@ check_hle (void) static void optimize_encoding (void) { - int j; + unsigned int j; if (optimize_for_space && i.reg_operands == 1 @@ -4095,10 +4095,13 @@ optimize_encoding (void) && !i.types[0].bitfield.zmmword && !i.types[1].bitfield.zmmword && !i.mask + && !i.broadcast && is_evex_encoding (&i.tm) && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x666f || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf36f - || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f) + || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f + || (i.tm.base_opcode & ~4) == 0x66db + || (i.tm.base_opcode & ~4) == 0x66eb) && i.tm.extension_opcode == None) { /* Optimize: -O1: @@ -4116,8 +4119,17 @@ optimize_encoding (void) -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16) EVEX VOP mem, %ymmN -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16) + VOP, one of vpand, vpandn, vpor, vpxor: + EVEX VOP{d,q} %xmmL, %xmmM, %xmmN + -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16) + EVEX VOP{d,q} %ymmL, %ymmM, %ymmN + -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16) + EVEX VOP{d,q} mem, %xmmM, %xmmN + -> VEX VOP mem, %xmmM, %xmmN (M and N < 16) + EVEX VOP{d,q} mem, %ymmM, %ymmN + -> VEX VOP mem, %ymmM, %ymmN (M and N < 16) */ - for (j = 0; j < 2; j++) + for (j = 0; j < i.operands; j++) if (operand_type_check (i.types[j], disp) && i.op[j].disps->X_op == O_constant) { @@ -4147,16 +4159,12 @@ optimize_encoding (void) i.tm.opcode_modifier.vexw = VEXW0; i.tm.opcode_modifier.evex = 0; i.tm.opcode_modifier.masking = 0; + i.tm.opcode_modifier.broadcast = 0; i.tm.opcode_modifier.disp8memshift = 0; i.memshift = 0; - for (j = 0; j < 2; j++) - if (operand_type_check (i.types[j], disp) - && i.op[j].disps->X_op == O_constant) - { - i.types[j].bitfield.disp8 - = fits_in_disp8 (i.op[j].disps->X_add_number); - break; - } + if (j < i.operands) + i.types[j].bitfield.disp8 + = fits_in_disp8 (i.op[j].disps->X_add_number); } } diff --git a/gas/doc/c-i386.texi b/gas/doc/c-i386.texi index 80bbcbe20f..ba20067438 100644 --- a/gas/doc/c-i386.texi +++ b/gas/doc/c-i386.texi @@ -465,13 +465,17 @@ Optimize instruction encoding with smaller instruction size. @samp{-O} and @samp{-O1} encode 64-bit register load instructions with 64-bit immediate as 32-bit register load instructions with 31-bit or 32-bits immediates, encode 64-bit register clearing instructions with 32-bit -register clearing instructions and encode 256-bit/512-bit VEX/EVEX -vector register clearing instructions with 128-bit VEX vector register -clearing instructions as well as encode 128-bit/256-bit EVEX vector +register clearing instructions, encode 256-bit/512-bit VEX/EVEX vector +register clearing instructions with 128-bit VEX vector register +clearing instructions, encode 128-bit/256-bit EVEX vector register load/store instructions with VEX vector register load/store -instructions. @samp{-O2} includes @samp{-O1} optimization plus -encodes 256-bit/512-bit EVEX vector register clearing instructions with -128-bit EVEX vector register clearing instructions. +instructions, and encode 128-bit/256-bit EVEX packed integer logical +instructions with 128-bit/256-bit VEX packed integer logical. + +@samp{-O2} includes @samp{-O1} optimization plus encodes +256-bit/512-bit EVEX vector register clearing instructions with 128-bit +EVEX vector register clearing instructions. + @samp{-Os} includes @samp{-O2} optimization plus encodes 16-bit, 32-bit and 64-bit register tests with immediate as 8-bit register test with immediate. @samp{-O0} turns off this optimization. diff --git a/gas/testsuite/gas/i386/optimize-1.d b/gas/testsuite/gas/i386/optimize-1.d index 2f40c72a4e..9c5f423f67 100644 --- a/gas/testsuite/gas/i386/optimize-1.d +++ b/gas/testsuite/gas/i386/optimize-1.d @@ -99,4 +99,52 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 7e 28 7f 48 04 vmovdqu32 %ymm1,0x80\(%eax\) +[a-f0-9]+: 62 f1 fe 28 7f 48 04 vmovdqu64 %ymm1,0x80\(%eax\) +[a-f0-9]+: 62 f1 7d 48 6f 10 vmovdqa32 \(%eax\),%zmm2 + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpand 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%eax\),%ymm2,%ymm3 #pass diff --git a/gas/testsuite/gas/i386/optimize-1.s b/gas/testsuite/gas/i386/optimize-1.s index 4c15d16c2a..b41a960350 100644 --- a/gas/testsuite/gas/i386/optimize-1.s +++ b/gas/testsuite/gas/i386/optimize-1.s @@ -116,3 +116,57 @@ _start: vmovdqu64 %ymm1, 128(%eax) vmovdqa32 (%eax), %zmm2 + + vpandd %xmm2, %xmm3, %xmm4 + vpandq %xmm2, %xmm3, %xmm4 + vpandnd %xmm2, %xmm3, %xmm4 + vpandnq %xmm2, %xmm3, %xmm4 + vpord %xmm2, %xmm3, %xmm4 + vporq %xmm2, %xmm3, %xmm4 + vpxord %xmm2, %xmm3, %xmm4 + vpxorq %xmm2, %xmm3, %xmm4 + + vpandd %ymm2, %ymm3, %ymm4 + vpandq %ymm2, %ymm3, %ymm4 + vpandnd %ymm2, %ymm3, %ymm4 + vpandnq %ymm2, %ymm3, %ymm4 + vpord %ymm2, %ymm3, %ymm4 + vporq %ymm2, %ymm3, %ymm4 + vpxord %ymm2, %ymm3, %ymm4 + vpxorq %ymm2, %ymm3, %ymm4 + + vpandd 112(%eax), %xmm2, %xmm3 + vpandq 112(%eax), %xmm2, %xmm3 + vpandnd 112(%eax), %xmm2, %xmm3 + vpandnq 112(%eax), %xmm2, %xmm3 + vpord 112(%eax), %xmm2, %xmm3 + vporq 112(%eax), %xmm2, %xmm3 + vpxord 112(%eax), %xmm2, %xmm3 + vpxorq 112(%eax), %xmm2, %xmm3 + + vpandd 128(%eax), %xmm2, %xmm3 + vpandq 128(%eax), %xmm2, %xmm3 + vpandnd 128(%eax), %xmm2, %xmm3 + vpandnq 128(%eax), %xmm2, %xmm3 + vpord 128(%eax), %xmm2, %xmm3 + vporq 128(%eax), %xmm2, %xmm3 + vpxord 128(%eax), %xmm2, %xmm3 + vpxorq 128(%eax), %xmm2, %xmm3 + + vpandd 96(%eax), %ymm2, %ymm3 + vpandq 96(%eax), %ymm2, %ymm3 + vpandnd 96(%eax), %ymm2, %ymm3 + vpandnq 96(%eax), %ymm2, %ymm3 + vpord 96(%eax), %ymm2, %ymm3 + vporq 96(%eax), %ymm2, %ymm3 + vpxord 96(%eax), %ymm2, %ymm3 + vpxorq 96(%eax), %ymm2, %ymm3 + + vpandd 128(%eax), %ymm2, %ymm3 + vpandq 128(%eax), %ymm2, %ymm3 + vpandnd 128(%eax), %ymm2, %ymm3 + vpandnq 128(%eax), %ymm2, %ymm3 + vpord 128(%eax), %ymm2, %ymm3 + vporq 128(%eax), %ymm2, %ymm3 + vpxord 128(%eax), %ymm2, %ymm3 + vpxorq 128(%eax), %ymm2, %ymm3 diff --git a/gas/testsuite/gas/i386/optimize-1a.d b/gas/testsuite/gas/i386/optimize-1a.d index d7c253a6fa..bdac98561b 100644 --- a/gas/testsuite/gas/i386/optimize-1a.d +++ b/gas/testsuite/gas/i386/optimize-1a.d @@ -100,4 +100,52 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 7e 28 7f 48 04 vmovdqu32 %ymm1,0x80\(%eax\) +[a-f0-9]+: 62 f1 fe 28 7f 48 04 vmovdqu64 %ymm1,0x80\(%eax\) +[a-f0-9]+: 62 f1 7d 48 6f 10 vmovdqa32 \(%eax\),%zmm2 + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpand 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%eax\),%ymm2,%ymm3 #pass diff --git a/gas/testsuite/gas/i386/optimize-2.d b/gas/testsuite/gas/i386/optimize-2.d index ed61dec6fa..ffc15f65ee 100644 --- a/gas/testsuite/gas/i386/optimize-2.d +++ b/gas/testsuite/gas/i386/optimize-2.d @@ -89,4 +89,68 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 ff 89 6f d1 vmovdqu16 %xmm1,%xmm2\{%k1\}\{z\} +[a-f0-9]+: 62 f1 7e 89 6f d1 vmovdqu32 %xmm1,%xmm2\{%k1\}\{z\} +[a-f0-9]+: 62 f1 fe 89 6f d1 vmovdqu64 %xmm1,%xmm2\{%k1\}\{z\} + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpand 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandd %xmm2,%xmm3,%xmm4\{%k5\} + +[a-f0-9]+: 62 .* vpandq %ymm2,%ymm3,%ymm4\{%k5\} + +[a-f0-9]+: 62 .* vpandnd %ymm2,%ymm3,%ymm4\{%k5\} + +[a-f0-9]+: 62 .* vpandnq %xmm2,%xmm3,%xmm4\{%k5\} + +[a-f0-9]+: 62 .* vpord %xmm2,%xmm3,%xmm4\{%k5\} + +[a-f0-9]+: 62 .* vporq %ymm2,%ymm3,%ymm4\{%k5\} + +[a-f0-9]+: 62 .* vpxord %ymm2,%ymm3,%ymm4\{%k5\} + +[a-f0-9]+: 62 .* vpxorq %xmm2,%xmm3,%xmm4\{%k5\} + +[a-f0-9]+: 62 .* vpandd \(%eax\)\{1to8\},%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandq \(%eax\)\{1to2\},%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnd \(%eax\)\{1to4\},%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnq \(%eax\)\{1to4\},%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpord \(%eax\)\{1to8\},%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vporq \(%eax\)\{1to2\},%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxord \(%eax\)\{1to4\},%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxorq \(%eax\)\{1to4\},%ymm2,%ymm3 #pass diff --git a/gas/testsuite/gas/i386/optimize-2.s b/gas/testsuite/gas/i386/optimize-2.s index 0a4fb23167..c84840cedd 100644 --- a/gas/testsuite/gas/i386/optimize-2.s +++ b/gas/testsuite/gas/i386/optimize-2.s @@ -97,3 +97,75 @@ _start: vmovdqu16 %xmm1, %xmm2{%k1}{z} vmovdqu32 %xmm1, %xmm2{%k1}{z} vmovdqu64 %xmm1, %xmm2{%k1}{z} + + vpandd %xmm2, %xmm3, %xmm4 + vpandq %xmm2, %xmm3, %xmm4 + vpandnd %xmm2, %xmm3, %xmm4 + vpandnq %xmm2, %xmm3, %xmm4 + vpord %xmm2, %xmm3, %xmm4 + vporq %xmm2, %xmm3, %xmm4 + vpxord %xmm2, %xmm3, %xmm4 + vpxorq %xmm2, %xmm3, %xmm4 + + vpandd %ymm2, %ymm3, %ymm4 + vpandq %ymm2, %ymm3, %ymm4 + vpandnd %ymm2, %ymm3, %ymm4 + vpandnq %ymm2, %ymm3, %ymm4 + vpord %ymm2, %ymm3, %ymm4 + vporq %ymm2, %ymm3, %ymm4 + vpxord %ymm2, %ymm3, %ymm4 + vpxorq %ymm2, %ymm3, %ymm4 + + vpandd 112(%eax), %xmm2, %xmm3 + vpandq 112(%eax), %xmm2, %xmm3 + vpandnd 112(%eax), %xmm2, %xmm3 + vpandnq 112(%eax), %xmm2, %xmm3 + vpord 112(%eax), %xmm2, %xmm3 + vporq 112(%eax), %xmm2, %xmm3 + vpxord 112(%eax), %xmm2, %xmm3 + vpxorq 112(%eax), %xmm2, %xmm3 + + vpandd 128(%eax), %xmm2, %xmm3 + vpandq 128(%eax), %xmm2, %xmm3 + vpandnd 128(%eax), %xmm2, %xmm3 + vpandnq 128(%eax), %xmm2, %xmm3 + vpord 128(%eax), %xmm2, %xmm3 + vporq 128(%eax), %xmm2, %xmm3 + vpxord 128(%eax), %xmm2, %xmm3 + vpxorq 128(%eax), %xmm2, %xmm3 + + vpandd 96(%eax), %ymm2, %ymm3 + vpandq 96(%eax), %ymm2, %ymm3 + vpandnd 96(%eax), %ymm2, %ymm3 + vpandnq 96(%eax), %ymm2, %ymm3 + vpord 96(%eax), %ymm2, %ymm3 + vporq 96(%eax), %ymm2, %ymm3 + vpxord 96(%eax), %ymm2, %ymm3 + vpxorq 96(%eax), %ymm2, %ymm3 + + vpandd 128(%eax), %ymm2, %ymm3 + vpandq 128(%eax), %ymm2, %ymm3 + vpandnd 128(%eax), %ymm2, %ymm3 + vpandnq 128(%eax), %ymm2, %ymm3 + vpord 128(%eax), %ymm2, %ymm3 + vporq 128(%eax), %ymm2, %ymm3 + vpxord 128(%eax), %ymm2, %ymm3 + vpxorq 128(%eax), %ymm2, %ymm3 + + vpandd %xmm2, %xmm3, %xmm4{%k5} + vpandq %ymm2, %ymm3, %ymm4{%k5} + vpandnd %ymm2, %ymm3, %ymm4{%k5} + vpandnq %xmm2, %xmm3, %xmm4{%k5} + vpord %xmm2, %xmm3, %xmm4{%k5} + vporq %ymm2, %ymm3, %ymm4{%k5} + vpxord %ymm2, %ymm3, %ymm4{%k5} + vpxorq %xmm2, %xmm3, %xmm4{%k5} + + vpandd (%eax){1to8}, %ymm2, %ymm3 + vpandq (%eax){1to2}, %xmm2, %xmm3 + vpandnd (%eax){1to4}, %xmm2, %xmm3 + vpandnq (%eax){1to4}, %ymm2, %ymm3 + vpord (%eax){1to8}, %ymm2, %ymm3 + vporq (%eax){1to2}, %xmm2, %xmm3 + vpxord (%eax){1to4}, %xmm2, %xmm3 + vpxorq (%eax){1to4}, %ymm2, %ymm3 diff --git a/gas/testsuite/gas/i386/optimize-3.d b/gas/testsuite/gas/i386/optimize-3.d index cd43243b49..ea8a9b55b4 100644 --- a/gas/testsuite/gas/i386/optimize-3.d +++ b/gas/testsuite/gas/i386/optimize-3.d @@ -15,4 +15,12 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 ff 08 6f d1 vmovdqu16 %xmm1,%xmm2 +[a-f0-9]+: 62 f1 7e 08 6f d1 vmovdqu32 %xmm1,%xmm2 +[a-f0-9]+: 62 f1 fe 08 6f d1 vmovdqu64 %xmm1,%xmm2 + +[a-f0-9]+: 62 .* vpandd %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: 62 .* vpandq %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: 62 .* vpandnd %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: 62 .* vpandnq %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: 62 .* vpord %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: 62 .* vporq %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: 62 .* vpxord %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: 62 .* vpxorq %xmm2,%xmm3,%xmm4 #pass diff --git a/gas/testsuite/gas/i386/optimize-3.s b/gas/testsuite/gas/i386/optimize-3.s index a70893c15d..ec2a5b9de9 100644 --- a/gas/testsuite/gas/i386/optimize-3.s +++ b/gas/testsuite/gas/i386/optimize-3.s @@ -11,3 +11,12 @@ _start: {nooptimize} vmovdqu16 %xmm1, %xmm2 {nooptimize} vmovdqu32 %xmm1, %xmm2 {nooptimize} vmovdqu64 %xmm1, %xmm2 + + {nooptimize} vpandd %xmm2, %xmm3, %xmm4 + {nooptimize} vpandq %ymm2, %ymm3, %ymm4 + {nooptimize} vpandnd %ymm2, %ymm3, %ymm4 + {nooptimize} vpandnq %xmm2, %xmm3, %xmm4 + {nooptimize} vpord %xmm2, %xmm3, %xmm4 + {nooptimize} vporq %ymm2, %ymm3, %ymm4 + {nooptimize} vpxord %ymm2, %ymm3, %ymm4 + {nooptimize} vpxorq %xmm2, %xmm3, %xmm4 diff --git a/gas/testsuite/gas/i386/optimize-4.d b/gas/testsuite/gas/i386/optimize-4.d index f062ad7717..d97718faf9 100644 --- a/gas/testsuite/gas/i386/optimize-4.d +++ b/gas/testsuite/gas/i386/optimize-4.d @@ -99,6 +99,54 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 7e 28 7f 48 04 vmovdqu32 %ymm1,0x80\(%eax\) +[a-f0-9]+: 62 f1 fe 28 7f 48 04 vmovdqu64 %ymm1,0x80\(%eax\) +[a-f0-9]+: 62 f1 7d 48 6f 10 vmovdqa32 \(%eax\),%zmm2 + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpand 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%eax\),%ymm2,%ymm3 +[a-f0-9]+: 62 f1 f5 08 55 e9 vandnpd %xmm1,%xmm1,%xmm5 +[a-f0-9]+: 62 f1 f5 08 55 e9 vandnpd %xmm1,%xmm1,%xmm5 #pass diff --git a/gas/testsuite/gas/i386/optimize-5.d b/gas/testsuite/gas/i386/optimize-5.d index fdf5561af8..ecab78cb1a 100644 --- a/gas/testsuite/gas/i386/optimize-5.d +++ b/gas/testsuite/gas/i386/optimize-5.d @@ -99,6 +99,54 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 7e 28 7f 48 04 vmovdqu32 %ymm1,0x80\(%eax\) +[a-f0-9]+: 62 f1 fe 28 7f 48 04 vmovdqu64 %ymm1,0x80\(%eax\) +[a-f0-9]+: 62 f1 7d 48 6f 10 vmovdqa32 \(%eax\),%zmm2 + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpand 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%eax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%eax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%eax\),%ymm2,%ymm3 +[a-f0-9]+: 62 f1 f5 08 55 e9 vandnpd %xmm1,%xmm1,%xmm5 +[a-f0-9]+: 62 f1 f5 08 55 e9 vandnpd %xmm1,%xmm1,%xmm5 +[a-f0-9]+: 62 f1 7d 28 6f d1 vmovdqa32 %ymm1,%ymm2 @@ -107,4 +155,12 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 ff 08 6f d1 vmovdqu16 %xmm1,%xmm2 +[a-f0-9]+: 62 f1 7e 08 6f d1 vmovdqu32 %xmm1,%xmm2 +[a-f0-9]+: 62 f1 fe 08 6f d1 vmovdqu64 %xmm1,%xmm2 + +[a-f0-9]+: 62 .* vpandd %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: 62 .* vpandq %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: 62 .* vpandnd %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: 62 .* vpandnq %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: 62 .* vpord %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: 62 .* vporq %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: 62 .* vpxord %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: 62 .* vpxorq %xmm2,%xmm3,%xmm4 #pass diff --git a/gas/testsuite/gas/i386/optimize-5.s b/gas/testsuite/gas/i386/optimize-5.s index 77d60edb69..e88fab1fa4 100644 --- a/gas/testsuite/gas/i386/optimize-5.s +++ b/gas/testsuite/gas/i386/optimize-5.s @@ -13,3 +13,12 @@ {evex} vmovdqu16 %xmm1, %xmm2 {evex} vmovdqu32 %xmm1, %xmm2 {evex} vmovdqu64 %xmm1, %xmm2 + + {evex} vpandd %xmm2, %xmm3, %xmm4 + {evex} vpandq %ymm2, %ymm3, %ymm4 + {evex} vpandnd %ymm2, %ymm3, %ymm4 + {evex} vpandnq %xmm2, %xmm3, %xmm4 + {evex} vpord %xmm2, %xmm3, %xmm4 + {evex} vporq %ymm2, %ymm3, %ymm4 + {evex} vpxord %ymm2, %ymm3, %ymm4 + {evex} vpxorq %xmm2, %xmm3, %xmm4 diff --git a/gas/testsuite/gas/i386/x86-64-optimize-2.d b/gas/testsuite/gas/i386/x86-64-optimize-2.d index 45b98ae694..0041b0070e 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-2.d +++ b/gas/testsuite/gas/i386/x86-64-optimize-2.d @@ -155,4 +155,52 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 7e 28 7f 48 04 vmovdqu32 %ymm1,0x80\(%rax\) +[a-f0-9]+: 62 f1 fe 28 7f 48 04 vmovdqu64 %ymm1,0x80\(%rax\) +[a-f0-9]+: 62 f1 7d 48 6f 10 vmovdqa32 \(%rax\),%zmm2 + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c4 .* vpand %xmm12,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm13,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm14 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c4 .* vpor %xmm12,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm13,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm14 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c4 .* vpand %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm14 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c4 .* vpor %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm14 + +[a-f0-9]+: c5 .* vpand 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%rax\),%ymm2,%ymm3 #pass diff --git a/gas/testsuite/gas/i386/x86-64-optimize-2.s b/gas/testsuite/gas/i386/x86-64-optimize-2.s index e5d298225a..22dbd1ce46 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-2.s +++ b/gas/testsuite/gas/i386/x86-64-optimize-2.s @@ -172,3 +172,57 @@ _start: vmovdqu64 %ymm1, 128(%rax) vmovdqa32 (%rax), %zmm2 + + vpandd %xmm2, %xmm3, %xmm4 + vpandq %xmm12, %xmm3, %xmm4 + vpandnd %xmm2, %xmm13, %xmm4 + vpandnq %xmm2, %xmm3, %xmm14 + vpord %xmm2, %xmm3, %xmm4 + vporq %xmm12, %xmm3, %xmm4 + vpxord %xmm2, %xmm13, %xmm4 + vpxorq %xmm2, %xmm3, %xmm14 + + vpandd %ymm2, %ymm3, %ymm4 + vpandq %ymm12, %ymm3, %ymm4 + vpandnd %ymm2, %ymm13, %ymm4 + vpandnq %ymm2, %ymm3, %ymm14 + vpord %ymm2, %ymm3, %ymm4 + vporq %ymm12, %ymm3, %ymm4 + vpxord %ymm2, %ymm13, %ymm4 + vpxorq %ymm2, %ymm3, %ymm14 + + vpandd 112(%rax), %xmm2, %xmm3 + vpandq 112(%rax), %xmm2, %xmm3 + vpandnd 112(%rax), %xmm2, %xmm3 + vpandnq 112(%rax), %xmm2, %xmm3 + vpord 112(%rax), %xmm2, %xmm3 + vporq 112(%rax), %xmm2, %xmm3 + vpxord 112(%rax), %xmm2, %xmm3 + vpxorq 112(%rax), %xmm2, %xmm3 + + vpandd 128(%rax), %xmm2, %xmm3 + vpandq 128(%rax), %xmm2, %xmm3 + vpandnd 128(%rax), %xmm2, %xmm3 + vpandnq 128(%rax), %xmm2, %xmm3 + vpord 128(%rax), %xmm2, %xmm3 + vporq 128(%rax), %xmm2, %xmm3 + vpxord 128(%rax), %xmm2, %xmm3 + vpxorq 128(%rax), %xmm2, %xmm3 + + vpandd 96(%rax), %ymm2, %ymm3 + vpandq 96(%rax), %ymm2, %ymm3 + vpandnd 96(%rax), %ymm2, %ymm3 + vpandnq 96(%rax), %ymm2, %ymm3 + vpord 96(%rax), %ymm2, %ymm3 + vporq 96(%rax), %ymm2, %ymm3 + vpxord 96(%rax), %ymm2, %ymm3 + vpxorq 96(%rax), %ymm2, %ymm3 + + vpandd 128(%rax), %ymm2, %ymm3 + vpandq 128(%rax), %ymm2, %ymm3 + vpandnd 128(%rax), %ymm2, %ymm3 + vpandnq 128(%rax), %ymm2, %ymm3 + vpord 128(%rax), %ymm2, %ymm3 + vporq 128(%rax), %ymm2, %ymm3 + vpxord 128(%rax), %ymm2, %ymm3 + vpxorq 128(%rax), %ymm2, %ymm3 diff --git a/gas/testsuite/gas/i386/x86-64-optimize-2a.d b/gas/testsuite/gas/i386/x86-64-optimize-2a.d index 39385b96ec..70a8ff3147 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-2a.d +++ b/gas/testsuite/gas/i386/x86-64-optimize-2a.d @@ -156,4 +156,52 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 7e 28 7f 48 04 vmovdqu32 %ymm1,0x80\(%rax\) +[a-f0-9]+: 62 f1 fe 28 7f 48 04 vmovdqu64 %ymm1,0x80\(%rax\) +[a-f0-9]+: 62 f1 7d 48 6f 10 vmovdqa32 \(%rax\),%zmm2 + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c4 .* vpand %xmm12,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm13,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm14 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c4 .* vpor %xmm12,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm13,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm14 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c4 .* vpand %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm14 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c4 .* vpor %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm14 + +[a-f0-9]+: c5 .* vpand 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%rax\),%ymm2,%ymm3 #pass diff --git a/gas/testsuite/gas/i386/x86-64-optimize-2b.d b/gas/testsuite/gas/i386/x86-64-optimize-2b.d index 3eb3a59eac..b5c6ceaf6e 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-2b.d +++ b/gas/testsuite/gas/i386/x86-64-optimize-2b.d @@ -155,4 +155,52 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 7e 28 7f 48 04 vmovdqu32 %ymm1,0x80\(%rax\) +[a-f0-9]+: 62 f1 fe 28 7f 48 04 vmovdqu64 %ymm1,0x80\(%rax\) +[a-f0-9]+: 62 f1 7d 48 6f 10 vmovdqa32 \(%rax\),%zmm2 + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c4 .* vpand %xmm12,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm13,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm14 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c4 .* vpor %xmm12,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm13,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm14 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c4 .* vpand %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm14 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c4 .* vpor %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm14 + +[a-f0-9]+: c5 .* vpand 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%rax\),%ymm2,%ymm3 #pass diff --git a/gas/testsuite/gas/i386/x86-64-optimize-3.d b/gas/testsuite/gas/i386/x86-64-optimize-3.d index 5e2832df4c..fb73b1eab3 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-3.d +++ b/gas/testsuite/gas/i386/x86-64-optimize-3.d @@ -115,4 +115,76 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 ff 89 6f d1 vmovdqu16 %xmm1,%xmm2\{%k1\}\{z\} +[a-f0-9]+: 62 f1 7e 89 6f d1 vmovdqu32 %xmm1,%xmm2\{%k1\}\{z\} +[a-f0-9]+: 62 f1 fe 89 6f d1 vmovdqu64 %xmm1,%xmm2\{%k1\}\{z\} + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c4 .* vpand %xmm12,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm13,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm14 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c4 .* vpor %xmm12,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm13,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm14 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c4 .* vpand %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm14 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c4 .* vpor %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm14 + +[a-f0-9]+: c5 .* vpand 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandd %xmm22,%xmm23,%xmm24 + +[a-f0-9]+: 62 .* vpandq %ymm22,%ymm3,%ymm4 + +[a-f0-9]+: 62 .* vpandnd %ymm2,%ymm23,%ymm4 + +[a-f0-9]+: 62 .* vpandnq %xmm2,%xmm3,%xmm24 + +[a-f0-9]+: 62 .* vpord %xmm22,%xmm23,%xmm24 + +[a-f0-9]+: 62 .* vporq %ymm22,%ymm3,%ymm4 + +[a-f0-9]+: 62 .* vpxord %ymm2,%ymm23,%ymm4 + +[a-f0-9]+: 62 .* vpxorq %xmm2,%xmm3,%xmm24 + +[a-f0-9]+: 62 .* vpandd %xmm2,%xmm3,%xmm4\{%k5\} + +[a-f0-9]+: 62 .* vpandq %ymm12,%ymm3,%ymm4\{%k5\} + +[a-f0-9]+: 62 .* vpandnd %ymm2,%ymm13,%ymm4\{%k5\} + +[a-f0-9]+: 62 .* vpandnq %xmm2,%xmm3,%xmm14\{%k5\} + +[a-f0-9]+: 62 .* vpord %xmm2,%xmm3,%xmm4\{%k5\} + +[a-f0-9]+: 62 .* vporq %ymm12,%ymm3,%ymm4\{%k5\} + +[a-f0-9]+: 62 .* vpxord %ymm2,%ymm13,%ymm4\{%k5\} + +[a-f0-9]+: 62 .* vpxorq %xmm2,%xmm3,%xmm14\{%k5\} + +[a-f0-9]+: 62 .* vpandd \(%rax\)\{1to8\},%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandq \(%rax\)\{1to2\},%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnd \(%rax\)\{1to4\},%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnq \(%rax\)\{1to4\},%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpord \(%rax\)\{1to8\},%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vporq \(%rax\)\{1to2\},%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxord \(%rax\)\{1to4\},%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxorq \(%rax\)\{1to4\},%ymm2,%ymm3 #pass diff --git a/gas/testsuite/gas/i386/x86-64-optimize-3.s b/gas/testsuite/gas/i386/x86-64-optimize-3.s index d9c2eb86cb..56bda5cf87 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-3.s +++ b/gas/testsuite/gas/i386/x86-64-optimize-3.s @@ -126,3 +126,84 @@ _start: vmovdqu16 %xmm1, %xmm2{%k1}{z} vmovdqu32 %xmm1, %xmm2{%k1}{z} vmovdqu64 %xmm1, %xmm2{%k1}{z} + + vpandd %xmm2, %xmm3, %xmm4 + vpandq %xmm12, %xmm3, %xmm4 + vpandnd %xmm2, %xmm13, %xmm4 + vpandnq %xmm2, %xmm3, %xmm14 + vpord %xmm2, %xmm3, %xmm4 + vporq %xmm12, %xmm3, %xmm4 + vpxord %xmm2, %xmm13, %xmm4 + vpxorq %xmm2, %xmm3, %xmm14 + + vpandd %ymm2, %ymm3, %ymm4 + vpandq %ymm12, %ymm3, %ymm4 + vpandnd %ymm2, %ymm13, %ymm4 + vpandnq %ymm2, %ymm3, %ymm14 + vpord %ymm2, %ymm3, %ymm4 + vporq %ymm12, %ymm3, %ymm4 + vpxord %ymm2, %ymm13, %ymm4 + vpxorq %ymm2, %ymm3, %ymm14 + + vpandd 112(%rax), %xmm2, %xmm3 + vpandq 112(%rax), %xmm2, %xmm3 + vpandnd 112(%rax), %xmm2, %xmm3 + vpandnq 112(%rax), %xmm2, %xmm3 + vpord 112(%rax), %xmm2, %xmm3 + vporq 112(%rax), %xmm2, %xmm3 + vpxord 112(%rax), %xmm2, %xmm3 + vpxorq 112(%rax), %xmm2, %xmm3 + + vpandd 128(%rax), %xmm2, %xmm3 + vpandq 128(%rax), %xmm2, %xmm3 + vpandnd 128(%rax), %xmm2, %xmm3 + vpandnq 128(%rax), %xmm2, %xmm3 + vpord 128(%rax), %xmm2, %xmm3 + vporq 128(%rax), %xmm2, %xmm3 + vpxord 128(%rax), %xmm2, %xmm3 + vpxorq 128(%rax), %xmm2, %xmm3 + + vpandd 96(%rax), %ymm2, %ymm3 + vpandq 96(%rax), %ymm2, %ymm3 + vpandnd 96(%rax), %ymm2, %ymm3 + vpandnq 96(%rax), %ymm2, %ymm3 + vpord 96(%rax), %ymm2, %ymm3 + vporq 96(%rax), %ymm2, %ymm3 + vpxord 96(%rax), %ymm2, %ymm3 + vpxorq 96(%rax), %ymm2, %ymm3 + + vpandd 128(%rax), %ymm2, %ymm3 + vpandq 128(%rax), %ymm2, %ymm3 + vpandnd 128(%rax), %ymm2, %ymm3 + vpandnq 128(%rax), %ymm2, %ymm3 + vpord 128(%rax), %ymm2, %ymm3 + vporq 128(%rax), %ymm2, %ymm3 + vpxord 128(%rax), %ymm2, %ymm3 + vpxorq 128(%rax), %ymm2, %ymm3 + + vpandd %xmm22, %xmm23, %xmm24 + vpandq %ymm22, %ymm3, %ymm4 + vpandnd %ymm2, %ymm23, %ymm4 + vpandnq %xmm2, %xmm3, %xmm24 + vpord %xmm22, %xmm23, %xmm24 + vporq %ymm22, %ymm3, %ymm4 + vpxord %ymm2, %ymm23, %ymm4 + vpxorq %xmm2, %xmm3, %xmm24 + + vpandd %xmm2, %xmm3, %xmm4{%k5} + vpandq %ymm12, %ymm3, %ymm4{%k5} + vpandnd %ymm2, %ymm13, %ymm4{%k5} + vpandnq %xmm2, %xmm3, %xmm14{%k5} + vpord %xmm2, %xmm3, %xmm4{%k5} + vporq %ymm12, %ymm3, %ymm4{%k5} + vpxord %ymm2, %ymm13, %ymm4{%k5} + vpxorq %xmm2, %xmm3, %xmm14{%k5} + + vpandd (%rax){1to8}, %ymm2, %ymm3 + vpandq (%rax){1to2}, %xmm2, %xmm3 + vpandnd (%rax){1to4}, %xmm2, %xmm3 + vpandnq (%rax){1to4}, %ymm2, %ymm3 + vpord (%rax){1to8}, %ymm2, %ymm3 + vporq (%rax){1to2}, %xmm2, %xmm3 + vpxord (%rax){1to4}, %xmm2, %xmm3 + vpxorq (%rax){1to4}, %ymm2, %ymm3 diff --git a/gas/testsuite/gas/i386/x86-64-optimize-4.d b/gas/testsuite/gas/i386/x86-64-optimize-4.d index 18fdeb1442..d25d24d3c6 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-4.d +++ b/gas/testsuite/gas/i386/x86-64-optimize-4.d @@ -15,4 +15,12 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 ff 08 6f d1 vmovdqu16 %xmm1,%xmm2 +[a-f0-9]+: 62 f1 7e 08 6f d1 vmovdqu32 %xmm1,%xmm2 +[a-f0-9]+: 62 f1 fe 08 6f d1 vmovdqu64 %xmm1,%xmm2 + +[a-f0-9]+: 62 .* vpandd %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: 62 .* vpandq %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: 62 .* vpandnd %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: 62 .* vpandnq %xmm2,%xmm3,%xmm14 + +[a-f0-9]+: 62 .* vpord %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: 62 .* vporq %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: 62 .* vpxord %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: 62 .* vpxorq %xmm2,%xmm3,%xmm14 #pass diff --git a/gas/testsuite/gas/i386/x86-64-optimize-4.s b/gas/testsuite/gas/i386/x86-64-optimize-4.s index b6d872db2c..a3f69a3f77 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-4.s +++ b/gas/testsuite/gas/i386/x86-64-optimize-4.s @@ -11,3 +11,12 @@ _start: {nooptimize} vmovdqu16 %xmm1, %xmm2 {nooptimize} vmovdqu32 %xmm1, %xmm2 {nooptimize} vmovdqu64 %xmm1, %xmm2 + + {nooptimize} vpandd %xmm2, %xmm3, %xmm4 + {nooptimize} vpandq %ymm12, %ymm3, %ymm4 + {nooptimize} vpandnd %ymm2, %ymm13, %ymm4 + {nooptimize} vpandnq %xmm2, %xmm3, %xmm14 + {nooptimize} vpord %xmm2, %xmm3, %xmm4 + {nooptimize} vporq %ymm12, %ymm3, %ymm4 + {nooptimize} vpxord %ymm2, %ymm13, %ymm4 + {nooptimize} vpxorq %xmm2, %xmm3, %xmm14 diff --git a/gas/testsuite/gas/i386/x86-64-optimize-5.d b/gas/testsuite/gas/i386/x86-64-optimize-5.d index 5065d650d4..0fb20b3ab3 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-5.d +++ b/gas/testsuite/gas/i386/x86-64-optimize-5.d @@ -155,6 +155,54 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 7e 28 7f 48 04 vmovdqu32 %ymm1,0x80\(%rax\) +[a-f0-9]+: 62 f1 fe 28 7f 48 04 vmovdqu64 %ymm1,0x80\(%rax\) +[a-f0-9]+: 62 f1 7d 48 6f 10 vmovdqa32 \(%rax\),%zmm2 + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c4 .* vpand %xmm12,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm13,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm14 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c4 .* vpor %xmm12,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm13,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm14 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c4 .* vpand %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm14 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c4 .* vpor %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm14 + +[a-f0-9]+: c5 .* vpand 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%rax\),%ymm2,%ymm3 +[a-f0-9]+: 62 f1 f5 08 55 e9 vandnpd %xmm1,%xmm1,%xmm5 +[a-f0-9]+: 62 f1 f5 08 55 e9 vandnpd %xmm1,%xmm1,%xmm5 +[a-f0-9]+: 62 f1 7d 28 6f d1 vmovdqa32 %ymm1,%ymm2 diff --git a/gas/testsuite/gas/i386/x86-64-optimize-6.d b/gas/testsuite/gas/i386/x86-64-optimize-6.d index 8ebd9b2475..c9f7da6fe2 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-6.d +++ b/gas/testsuite/gas/i386/x86-64-optimize-6.d @@ -155,6 +155,54 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 7e 28 7f 48 04 vmovdqu32 %ymm1,0x80\(%rax\) +[a-f0-9]+: 62 f1 fe 28 7f 48 04 vmovdqu64 %ymm1,0x80\(%rax\) +[a-f0-9]+: 62 f1 7d 48 6f 10 vmovdqa32 \(%rax\),%zmm2 + +[a-f0-9]+: c5 .* vpand %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c4 .* vpand %xmm12,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm13,%xmm4 + +[a-f0-9]+: c5 .* vpandn %xmm2,%xmm3,%xmm14 + +[a-f0-9]+: c5 .* vpor %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: c4 .* vpor %xmm12,%xmm3,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm13,%xmm4 + +[a-f0-9]+: c5 .* vpxor %xmm2,%xmm3,%xmm14 + +[a-f0-9]+: c5 .* vpand %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c4 .* vpand %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: c5 .* vpandn %ymm2,%ymm3,%ymm14 + +[a-f0-9]+: c5 .* vpor %ymm2,%ymm3,%ymm4 + +[a-f0-9]+: c4 .* vpor %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: c5 .* vpxor %ymm2,%ymm3,%ymm14 + +[a-f0-9]+: c5 .* vpand 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpandn 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpxor 0x70\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%rax\),%xmm2,%xmm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpand 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpandn 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: c5 .* vpxor 0x60\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandd 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnd 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpandnq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpord 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vporq 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxord 0x80\(%rax\),%ymm2,%ymm3 + +[a-f0-9]+: 62 .* vpxorq 0x80\(%rax\),%ymm2,%ymm3 +[a-f0-9]+: 62 f1 f5 08 55 e9 vandnpd %xmm1,%xmm1,%xmm5 +[a-f0-9]+: 62 f1 f5 08 55 e9 vandnpd %xmm1,%xmm1,%xmm5 +[a-f0-9]+: 62 f1 7d 28 6f d1 vmovdqa32 %ymm1,%ymm2 @@ -163,4 +211,12 @@ Disassembly of section .text: +[a-f0-9]+: 62 f1 ff 08 6f d1 vmovdqu16 %xmm1,%xmm2 +[a-f0-9]+: 62 f1 7e 08 6f d1 vmovdqu32 %xmm1,%xmm2 +[a-f0-9]+: 62 f1 fe 08 6f d1 vmovdqu64 %xmm1,%xmm2 + +[a-f0-9]+: 62 .* vpandd %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: 62 .* vpandq %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: 62 .* vpandnd %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: 62 .* vpandnq %xmm2,%xmm3,%xmm14 + +[a-f0-9]+: 62 .* vpord %xmm2,%xmm3,%xmm4 + +[a-f0-9]+: 62 .* vporq %ymm12,%ymm3,%ymm4 + +[a-f0-9]+: 62 .* vpxord %ymm2,%ymm13,%ymm4 + +[a-f0-9]+: 62 .* vpxorq %xmm2,%xmm3,%xmm14 #pass diff --git a/gas/testsuite/gas/i386/x86-64-optimize-6.s b/gas/testsuite/gas/i386/x86-64-optimize-6.s index 7c403fcc86..8f775b095b 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-6.s +++ b/gas/testsuite/gas/i386/x86-64-optimize-6.s @@ -13,3 +13,12 @@ {evex} vmovdqu16 %xmm1, %xmm2 {evex} vmovdqu32 %xmm1, %xmm2 {evex} vmovdqu64 %xmm1, %xmm2 + + {evex} vpandd %xmm2, %xmm3, %xmm4 + {evex} vpandq %ymm12, %ymm3, %ymm4 + {evex} vpandnd %ymm2, %ymm13, %ymm4 + {evex} vpandnq %xmm2, %xmm3, %xmm14 + {evex} vpord %xmm2, %xmm3, %xmm4 + {evex} vporq %ymm12, %ymm3, %ymm4 + {evex} vpxord %ymm2, %ymm13, %ymm4 + {evex} vpxorq %xmm2, %xmm3, %xmm14 diff --git a/opcodes/ChangeLog b/opcodes/ChangeLog index a7322aeb2b..c6a713b39c 100644 --- a/opcodes/ChangeLog +++ b/opcodes/ChangeLog @@ -1,3 +1,9 @@ +2019-07-01 Jan Beulich + + * i386-opc.tbl (and, or): Add Optimize to forms allowing two + register operands. + * i386-tbl.h: Re-generate. + 2019-07-01 Jan Beulich * i386-dis-evex-prefix.h: Use PCLMUL for vpclmulqdq. diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl index b9c5e32928..abc4155c5a 100644 --- a/opcodes/i386-opc.tbl +++ b/opcodes/i386-opc.tbl @@ -3754,9 +3754,9 @@ vrcp14pd, 2, 0x664C, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=1|VexW=2|Bro vrsqrt14pd, 2, 0x664E, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=1|VexW=2|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM } vpaddd, 3, 0x66FE, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } -vpandd, 3, 0x66DB, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } +vpandd, 3, 0x66DB, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } vpandnd, 3, 0x66DF, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } -vpord, 3, 0x66EB, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } +vpord, 3, 0x66EB, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } vpsubd, 3, 0x66FA, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } vpunpckhdq, 3, 0x666A, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } vpunpckldq, 3, 0x6662, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } @@ -3764,9 +3764,9 @@ vpxord, 3, 0x66EF, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|Ve vpaddq, 3, 0x66D4, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } vpandnq, 3, 0x66DF, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } -vpandq, 3, 0x66DB, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } +vpandq, 3, 0x66DB, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } vpmuludq, 3, 0x66F4, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } -vporq, 3, 0x66EB, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } +vporq, 3, 0x66EB, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } vpsubq, 3, 0x66FB, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } vpunpckhqdq, 3, 0x666D, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } vpunpcklqdq, 3, 0x666C, None, 1, CpuAVX512F, Modrm|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } diff --git a/opcodes/i386-tbl.h b/opcodes/i386-tbl.h index 3e874a6205..e8c5eda01d 100644 --- a/opcodes/i386-tbl.h +++ b/opcodes/i386-tbl.h @@ -60419,7 +60419,7 @@ const insn_template i386_optab[] = 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 1, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0 }, + 1, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 7, 0, 0, 1, 0, 0, 0, 0, 0 }, { { { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0 } }, @@ -60457,7 +60457,7 @@ const insn_template i386_optab[] = 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 1, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0 }, + 1, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 7, 0, 0, 1, 0, 0, 0, 0, 0 }, { { { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0 } }, @@ -60514,7 +60514,7 @@ const insn_template i386_optab[] = 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 2, 0, 0, 0, 0, 0, 0, 3, 4, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0 }, + 2, 0, 0, 0, 0, 0, 0, 3, 4, 0, 0, 7, 0, 0, 1, 0, 0, 0, 0, 0 }, { { { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0 } }, @@ -60533,7 +60533,7 @@ const insn_template i386_optab[] = 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 2, 0, 0, 0, 0, 0, 0, 3, 4, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0 }, + 2, 0, 0, 0, 0, 0, 0, 3, 4, 0, 0, 7, 0, 0, 1, 0, 0, 0, 0, 0 }, { { { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0 } }, -- 2.34.1