1 .macro f16_dq_ifsu reg0 reg1 reg2
2 .irp op, vabd.f16, vmax.f16, vmin.f16
3 \op d\reg0, d\reg1, d\reg2
4 \op q\reg0, q\reg1, q\reg2
8 .macro f16_q_ifsu reg0 reg1 reg2
9 .irp op, vabdq.f16, vmaxq.f16, vminq.f16
10 \op q\reg0, q\reg1, q\reg2
14 .macro f16_dq_abs_neg reg0 reg1
15 .irp op, vabs.f16, vneg.f16
21 .macro f16_q_abs_neg reg0 reg1
22 .irp op, vabsq.f16, vnegq.f16
27 .macro f16_dq_fcmp reg0 reg1 reg2
28 .irp op, vacge.f16, vacgt.f16, vaclt.f16, vacle.f16, vceq.f16, vcge.f16, vcgt.f16, vcle.f16, vclt.f16
29 \op d\reg0, d\reg1, d\reg2
30 \op q\reg0, q\reg1, q\reg2
34 .macro f16_dq_fcmp_imm0 reg0 reg1
35 .irp op, vceq.f16, vcge.f16, vcgt.f16, vcle.f16, vclt.f16
36 \op d\reg0, d\reg1, #0
37 \op q\reg0, q\reg1, #0
41 .macro f16_q_fcmp reg0 reg1 reg2
42 .irp op, vacgeq.f16, vacgtq.f16, vacltq.f16, vacleq.f16, vceqq.f16, vcgeq.f16, vcgtq.f16, vcleq.f16, vcltq.f16
43 \op q\reg0, q\reg1, q\reg2
47 .macro f16_dq_addsub reg0 reg1 reg2
48 .irp op, vadd.f16, vsub.f16
49 \op d\reg0, d\reg1, d\reg2
50 \op q\reg0, q\reg1, q\reg2
54 .macro f16_q_addsub reg0 reg1 reg2
55 .irp op, vaddq.f16, vsubq.f16
56 \op q\reg0, q\reg1, q\reg2
60 .macro f16_dq_vmaxnm reg0 reg1 reg2
61 .irp op, vmaxnm.f16, vminnm.f16
62 \op d\reg0, d\reg1, d\reg2
63 \op q\reg0, q\reg1, q\reg2
67 .macro f16_dq_fmac reg0 reg1 reg2
68 .irp op, vfma.f16, vfms.f16
69 \op d\reg0, d\reg1, d\reg2
70 \op q\reg0, q\reg1, q\reg2
74 .macro f16_dq_fmacmaybe reg0 reg1 reg2
75 .irp op, vmla.f16, vmls.f16
76 \op d\reg0, d\reg1, d\reg2
77 \op q\reg0, q\reg1, q\reg2
81 .macro f16_dq_vrint reg0 reg1
82 .irp op, vrintz.f16, vrintx.f16, vrinta.f16, vrintn.f16, vrintp.f16, vrintm.f16
88 .macro f16_dq_recip reg0 reg1
89 .irp op, vrecpe.f16, vrsqrte.f16
95 .macro f16_q_recip reg0 reg1
96 .irp op, vrecpeq.f16, vrsqrteq.f16
101 .macro f16_dq_step reg0 reg1 reg2
102 .irp op, vrecps.f16, vrsqrts.f16
103 \op d\reg0, d\reg1, d\reg2
104 \op q\reg0, q\reg1, q\reg2
108 .macro f16_q_step reg0 reg1 reg2
109 .irp op, vrecpsq.f16, vrsqrtsq.f16
110 \op q\reg0, q\reg1, q\reg2
114 .macro f16_dq_cvt reg0 reg1
115 .irp op, vcvta.s16.f16, vcvtm.s16.f16, vcvtn.s16.f16, vcvtp.s16.f16, vcvta.u16.f16, vcvtm.u16.f16, vcvtn.u16.f16, vcvtp.u16.f16,
121 .macro f16_dq_cvtz reg0 reg1
122 .irp op, vcvt.s16.f16, vcvt.u16.f16, vcvt.f16.s16, vcvt.f16.u16,
128 .macro f16_dq_cvtz_fixed reg0 reg1 imm
129 .irp op, vcvt.s16.f16, vcvt.u16.f16, vcvt.f16.s16, vcvt.f16.u16,
130 \op d\reg0, d\reg1, #\imm
131 \op q\reg0, q\reg1, #\imm
135 .macro f16_dq op reg0 reg1 reg2
136 \op d\reg0, d\reg1, d\reg2
137 \op q\reg0, q\reg1, q\reg2
140 .macro f16_d op reg0 reg1 reg2
141 \op d\reg0, d\reg1, d\reg2
144 .macro f16_q op reg0 reg1 reg2
145 \op q\reg0, q\reg1, q\reg2
148 .macro f16_dq_2 op reg0 reg1
153 .macro f16_d_2 op reg0 reg1
157 .macro f16_q_2 op reg0 reg1
165 f16_d vabd.f16 1 3 15
188 # neon_mac_maybe_scalar
189 f16_dq_fmacmaybe 2 4 14
195 f16_d vpadd.f16 4 8 14
205 # neon_dyadic_if_su_d
206 f16_d vpmax.f16 4 8 14
207 f16_d vpmin.f16 10 8 2
210 f16_d vmul.f16 4 8 14
221 f16_dq_cvtz_fixed 14, 0, 3
224 f16_dq_fcmp_imm0 14, 2
226 .macro f16_d_by_scalar op reg0 reg1 reg2 idx
227 \op d\reg0, d\reg1, d\reg2[\idx]
230 .macro f16_q_by_scalar op reg0 reg1 reg2 idx
231 \op q\reg0, q\reg1, d\reg2[\idx]
234 .macro f16_dq_fmacmaybe_by_scalar reg0 reg1 reg2 idx
235 .irp op, vmla.f16, vmls.f16
236 \op d\reg0, d\reg1, d\reg2[\idx]
237 \op q\reg0, q\reg1, d\reg2[\idx]
241 # neon_mul (by scalar)
242 f16_d_by_scalar vmul.f16 7 0 1 0
243 f16_d_by_scalar vmul.f16 4 8 6 2
244 f16_q_by_scalar vmul.f16 2 8 0 1
245 f16_q_by_scalar vmul.f16 2 8 7 3
247 # neon_mac_maybe_scalar (by scalar)
248 f16_dq_fmacmaybe_by_scalar 2 4 1 0
249 f16_dq_fmacmaybe_by_scalar 1 8 7 3