.macro f16_dq_ifsu reg0 reg1 reg2 .irp op, vabd.f16, vmax.f16, vmin.f16 \op d\reg0, d\reg1, d\reg2 \op q\reg0, q\reg1, q\reg2 .endr .endm .macro f16_q_ifsu reg0 reg1 reg2 .irp op, vabdq.f16, vmaxq.f16, vminq.f16 \op q\reg0, q\reg1, q\reg2 .endr .endm .macro f16_dq_abs_neg reg0 reg1 .irp op, vabs.f16, vneg.f16 \op d\reg0, d\reg1 \op q\reg0, q\reg1 .endr .endm .macro f16_q_abs_neg reg0 reg1 .irp op, vabsq.f16, vnegq.f16 \op q\reg0, q\reg1 .endr .endm .macro f16_dq_fcmp reg0 reg1 reg2 .irp op, vacge.f16, vacgt.f16, vaclt.f16, vacle.f16, vceq.f16, vcge.f16, vcgt.f16, vcle.f16, vclt.f16 \op d\reg0, d\reg1, d\reg2 \op q\reg0, q\reg1, q\reg2 .endr .endm .macro f16_dq_fcmp_imm0 reg0 reg1 .irp op, vceq.f16, vcge.f16, vcgt.f16, vcle.f16, vclt.f16 \op d\reg0, d\reg1, #0 \op q\reg0, q\reg1, #0 .endr .endm .macro f16_q_fcmp reg0 reg1 reg2 .irp op, vacgeq.f16, vacgtq.f16, vacltq.f16, vacleq.f16, vceqq.f16, vcgeq.f16, vcgtq.f16, vcleq.f16, vcltq.f16 \op q\reg0, q\reg1, q\reg2 .endr .endm .macro f16_dq_addsub reg0 reg1 reg2 .irp op, vadd.f16, vsub.f16 \op d\reg0, d\reg1, d\reg2 \op q\reg0, q\reg1, q\reg2 .endr .endm .macro f16_q_addsub reg0 reg1 reg2 .irp op, vaddq.f16, vsubq.f16 \op q\reg0, q\reg1, q\reg2 .endr .endm .macro f16_dq_vmaxnm reg0 reg1 reg2 .irp op, vmaxnm.f16, vminnm.f16 \op d\reg0, d\reg1, d\reg2 \op q\reg0, q\reg1, q\reg2 .endr .endm .macro f16_dq_fmac reg0 reg1 reg2 .irp op, vfma.f16, vfms.f16 \op d\reg0, d\reg1, d\reg2 \op q\reg0, q\reg1, q\reg2 .endr .endm .macro f16_dq_fmacmaybe reg0 reg1 reg2 .irp op, vmla.f16, vmls.f16 \op d\reg0, d\reg1, d\reg2 \op q\reg0, q\reg1, q\reg2 .endr .endm .macro f16_dq_vrint reg0 reg1 .irp op, vrintz.f16, vrintx.f16, vrinta.f16, vrintn.f16, vrintp.f16, vrintm.f16 \op d\reg0, d\reg1 \op q\reg0, q\reg1 .endr .endm .macro f16_dq_recip reg0 reg1 .irp op, vrecpe.f16, vrsqrte.f16 \op d\reg0, d\reg1 \op q\reg0, q\reg1 .endr .endm .macro f16_q_recip reg0 reg1 .irp op, vrecpeq.f16, vrsqrteq.f16 \op q\reg0, q\reg1 .endr .endm .macro f16_dq_step reg0 reg1 reg2 .irp op, vrecps.f16, vrsqrts.f16 \op d\reg0, d\reg1, d\reg2 \op q\reg0, q\reg1, q\reg2 .endr .endm .macro f16_q_step reg0 reg1 reg2 .irp op, vrecpsq.f16, vrsqrtsq.f16 \op q\reg0, q\reg1, q\reg2 .endr .endm .macro f16_dq_cvt reg0 reg1 .irp op, vcvta.s16.f16, vcvtm.s16.f16, vcvtn.s16.f16, vcvtp.s16.f16, vcvta.u16.f16, vcvtm.u16.f16, vcvtn.u16.f16, vcvtp.u16.f16, \op d\reg0, d\reg1 \op q\reg0, q\reg1 .endr .endm .macro f16_dq_cvtz reg0 reg1 .irp op, vcvt.s16.f16, vcvt.u16.f16, vcvt.f16.s16, vcvt.f16.u16, \op d\reg0, d\reg1 \op q\reg0, q\reg1 .endr .endm .macro f16_dq_cvtz_fixed reg0 reg1 imm .irp op, vcvt.s16.f16, vcvt.u16.f16, vcvt.f16.s16, vcvt.f16.u16, \op d\reg0, d\reg1, #\imm \op q\reg0, q\reg1, #\imm .endr .endm .macro f16_dq op reg0 reg1 reg2 \op d\reg0, d\reg1, d\reg2 \op q\reg0, q\reg1, q\reg2 .endm .macro f16_d op reg0 reg1 reg2 \op d\reg0, d\reg1, d\reg2 .endm .macro f16_q op reg0 reg1 reg2 \op q\reg0, q\reg1, q\reg2 .endm .macro f16_dq_2 op reg0 reg1 \op d\reg0, d\reg1 \op q\reg0, q\reg1 .endm .macro f16_d_2 op reg0 reg1 \op d\reg0, d\reg1 .endm .macro f16_q_2 op reg0 reg1 \op q\reg0, q\reg1 .endm func: # neon_dyadic_if_su f16_dq_ifsu 2 4 14 f16_q_ifsu 0 8 14 f16_d vabd.f16 1 3 15 f16_d vabd.f16 0 1 8 # neon_abs_neg f16_dq_abs_neg 0 8 f16_q_abs_neg 2 6 f16_d_2 vabs.f16 7 3 f16_d_2 vneg.f16 9 1 # neon_fcmp f16_dq_fcmp 2 4 14 f16_q_fcmp 0 8 14 # neon_addsub_if_i f16_dq_addsub 2 4 14 f16_q_addsub 0 8 14 # neon_vmaxnm f16_dq_vmaxnm 2 4 14 # neon_fmac f16_dq_fmac 2 4 14 # neon_mac_maybe_scalar f16_dq_fmacmaybe 2 4 14 # vrint f16_dq_vrint 4 14 # neon_dyadic_if_i_d f16_d vpadd.f16 4 8 14 # neon_recip_est f16_dq_recip 4 8 f16_q_recip 0 10 # neon_step f16_dq_step 8 10 12 f16_q_step 2 0 4 # neon_dyadic_if_su_d f16_d vpmax.f16 4 8 14 f16_d vpmin.f16 10 8 2 # neon_mul f16_d vmul.f16 4 8 14 f16_d vmul.f16 7 0 1 f16_q vmul.f16 2 8 0 # neon_cvt f16_dq_cvt 6 12 # neon_cvtz f16_dq_cvtz 14, 0 # neon_cvtz_fixed f16_dq_cvtz_fixed 14, 0, 3 # neon_fcmp_imm0 f16_dq_fcmp_imm0 14, 2 .macro f16_d_by_scalar op reg0 reg1 reg2 idx \op d\reg0, d\reg1, d\reg2[\idx] .endm .macro f16_q_by_scalar op reg0 reg1 reg2 idx \op q\reg0, q\reg1, d\reg2[\idx] .endm .macro f16_dq_fmacmaybe_by_scalar reg0 reg1 reg2 idx .irp op, vmla.f16, vmls.f16 \op d\reg0, d\reg1, d\reg2[\idx] \op q\reg0, q\reg1, d\reg2[\idx] .endr .endm # neon_mul (by scalar) f16_d_by_scalar vmul.f16 7 0 1 0 f16_d_by_scalar vmul.f16 4 8 6 2 f16_q_by_scalar vmul.f16 2 8 0 1 f16_q_by_scalar vmul.f16 2 8 7 3 # neon_mac_maybe_scalar (by scalar) f16_dq_fmacmaybe_by_scalar 2 4 1 0 f16_dq_fmacmaybe_by_scalar 1 8 7 3