Commit | Line | Data |
---|---|---|
cc933301 JW |
1 | .macro f16_dq_ifsu reg0 reg1 reg2 |
2 | .irp op, vabd.f16, vmax.f16, vmin.f16 | |
3 | \op d\reg0, d\reg1, d\reg2 | |
4 | \op q\reg0, q\reg1, q\reg2 | |
5 | .endr | |
6 | .endm | |
7 | ||
8 | .macro f16_q_ifsu reg0 reg1 reg2 | |
9 | .irp op, vabdq.f16, vmaxq.f16, vminq.f16 | |
10 | \op q\reg0, q\reg1, q\reg2 | |
11 | .endr | |
12 | .endm | |
13 | ||
14 | .macro f16_dq_abs_neg reg0 reg1 | |
15 | .irp op, vabs.f16, vneg.f16 | |
16 | \op d\reg0, d\reg1 | |
17 | \op q\reg0, q\reg1 | |
18 | .endr | |
19 | .endm | |
20 | ||
21 | .macro f16_q_abs_neg reg0 reg1 | |
22 | .irp op, vabsq.f16, vnegq.f16 | |
23 | \op q\reg0, q\reg1 | |
24 | .endr | |
25 | .endm | |
26 | ||
27 | .macro f16_dq_fcmp reg0 reg1 reg2 | |
28 | .irp op, vacge.f16, vacgt.f16, vaclt.f16, vacle.f16, vceq.f16, vcge.f16, vcgt.f16, vcle.f16, vclt.f16 | |
29 | \op d\reg0, d\reg1, d\reg2 | |
30 | \op q\reg0, q\reg1, q\reg2 | |
31 | .endr | |
32 | .endm | |
33 | ||
34 | .macro f16_dq_fcmp_imm0 reg0 reg1 | |
35 | .irp op, vceq.f16, vcge.f16, vcgt.f16, vcle.f16, vclt.f16 | |
36 | \op d\reg0, d\reg1, #0 | |
37 | \op q\reg0, q\reg1, #0 | |
38 | .endr | |
39 | .endm | |
40 | ||
41 | .macro f16_q_fcmp reg0 reg1 reg2 | |
42 | .irp op, vacgeq.f16, vacgtq.f16, vacltq.f16, vacleq.f16, vceqq.f16, vcgeq.f16, vcgtq.f16, vcleq.f16, vcltq.f16 | |
43 | \op q\reg0, q\reg1, q\reg2 | |
44 | .endr | |
45 | .endm | |
46 | ||
47 | .macro f16_dq_addsub reg0 reg1 reg2 | |
48 | .irp op, vadd.f16, vsub.f16 | |
49 | \op d\reg0, d\reg1, d\reg2 | |
50 | \op q\reg0, q\reg1, q\reg2 | |
51 | .endr | |
52 | .endm | |
53 | ||
54 | .macro f16_q_addsub reg0 reg1 reg2 | |
55 | .irp op, vaddq.f16, vsubq.f16 | |
56 | \op q\reg0, q\reg1, q\reg2 | |
57 | .endr | |
58 | .endm | |
59 | ||
60 | .macro f16_dq_vmaxnm reg0 reg1 reg2 | |
61 | .irp op, vmaxnm.f16, vminnm.f16 | |
62 | \op d\reg0, d\reg1, d\reg2 | |
63 | \op q\reg0, q\reg1, q\reg2 | |
64 | .endr | |
65 | .endm | |
66 | ||
67 | .macro f16_dq_fmac reg0 reg1 reg2 | |
68 | .irp op, vfma.f16, vfms.f16 | |
69 | \op d\reg0, d\reg1, d\reg2 | |
70 | \op q\reg0, q\reg1, q\reg2 | |
71 | .endr | |
72 | .endm | |
73 | ||
74 | .macro f16_dq_fmacmaybe reg0 reg1 reg2 | |
75 | .irp op, vmla.f16, vmls.f16 | |
76 | \op d\reg0, d\reg1, d\reg2 | |
77 | \op q\reg0, q\reg1, q\reg2 | |
78 | .endr | |
79 | .endm | |
80 | ||
81 | .macro f16_dq_vrint reg0 reg1 | |
82 | .irp op, vrintz.f16, vrintx.f16, vrinta.f16, vrintn.f16, vrintp.f16, vrintm.f16 | |
83 | \op d\reg0, d\reg1 | |
84 | \op q\reg0, q\reg1 | |
85 | .endr | |
86 | .endm | |
87 | ||
88 | .macro f16_dq_recip reg0 reg1 | |
89 | .irp op, vrecpe.f16, vrsqrte.f16 | |
90 | \op d\reg0, d\reg1 | |
91 | \op q\reg0, q\reg1 | |
92 | .endr | |
93 | .endm | |
94 | ||
95 | .macro f16_q_recip reg0 reg1 | |
96 | .irp op, vrecpeq.f16, vrsqrteq.f16 | |
97 | \op q\reg0, q\reg1 | |
98 | .endr | |
99 | .endm | |
100 | ||
101 | .macro f16_dq_step reg0 reg1 reg2 | |
102 | .irp op, vrecps.f16, vrsqrts.f16 | |
103 | \op d\reg0, d\reg1, d\reg2 | |
104 | \op q\reg0, q\reg1, q\reg2 | |
105 | .endr | |
106 | .endm | |
107 | ||
108 | .macro f16_q_step reg0 reg1 reg2 | |
109 | .irp op, vrecpsq.f16, vrsqrtsq.f16 | |
110 | \op q\reg0, q\reg1, q\reg2 | |
111 | .endr | |
112 | .endm | |
113 | ||
114 | .macro f16_dq_cvt reg0 reg1 | |
115 | .irp op, vcvta.s16.f16, vcvtm.s16.f16, vcvtn.s16.f16, vcvtp.s16.f16, vcvta.u16.f16, vcvtm.u16.f16, vcvtn.u16.f16, vcvtp.u16.f16, | |
116 | \op d\reg0, d\reg1 | |
117 | \op q\reg0, q\reg1 | |
118 | .endr | |
119 | .endm | |
120 | ||
121 | .macro f16_dq_cvtz reg0 reg1 | |
122 | .irp op, vcvt.s16.f16, vcvt.u16.f16, vcvt.f16.s16, vcvt.f16.u16, | |
123 | \op d\reg0, d\reg1 | |
124 | \op q\reg0, q\reg1 | |
125 | .endr | |
126 | .endm | |
127 | ||
128 | .macro f16_dq_cvtz_fixed reg0 reg1 imm | |
129 | .irp op, vcvt.s16.f16, vcvt.u16.f16, vcvt.f16.s16, vcvt.f16.u16, | |
130 | \op d\reg0, d\reg1, #\imm | |
131 | \op q\reg0, q\reg1, #\imm | |
132 | .endr | |
133 | .endm | |
134 | ||
135 | .macro f16_dq op reg0 reg1 reg2 | |
136 | \op d\reg0, d\reg1, d\reg2 | |
137 | \op q\reg0, q\reg1, q\reg2 | |
138 | .endm | |
139 | ||
140 | .macro f16_d op reg0 reg1 reg2 | |
141 | \op d\reg0, d\reg1, d\reg2 | |
142 | .endm | |
143 | ||
144 | .macro f16_q op reg0 reg1 reg2 | |
145 | \op q\reg0, q\reg1, q\reg2 | |
146 | .endm | |
147 | ||
148 | .macro f16_dq_2 op reg0 reg1 | |
149 | \op d\reg0, d\reg1 | |
150 | \op q\reg0, q\reg1 | |
151 | .endm | |
152 | ||
153 | .macro f16_d_2 op reg0 reg1 | |
154 | \op d\reg0, d\reg1 | |
155 | .endm | |
156 | ||
157 | .macro f16_q_2 op reg0 reg1 | |
158 | \op q\reg0, q\reg1 | |
159 | .endm | |
160 | ||
161 | func: | |
162 | # neon_dyadic_if_su | |
163 | f16_dq_ifsu 2 4 14 | |
164 | f16_q_ifsu 0 8 14 | |
165 | f16_d vabd.f16 1 3 15 | |
166 | f16_d vabd.f16 0 1 8 | |
167 | ||
168 | # neon_abs_neg | |
169 | f16_dq_abs_neg 0 8 | |
170 | f16_q_abs_neg 2 6 | |
171 | f16_d_2 vabs.f16 7 3 | |
172 | f16_d_2 vneg.f16 9 1 | |
173 | ||
174 | # neon_fcmp | |
175 | f16_dq_fcmp 2 4 14 | |
176 | f16_q_fcmp 0 8 14 | |
177 | ||
178 | # neon_addsub_if_i | |
179 | f16_dq_addsub 2 4 14 | |
180 | f16_q_addsub 0 8 14 | |
181 | ||
182 | # neon_vmaxnm | |
183 | f16_dq_vmaxnm 2 4 14 | |
184 | ||
185 | # neon_fmac | |
186 | f16_dq_fmac 2 4 14 | |
187 | ||
188 | # neon_mac_maybe_scalar | |
189 | f16_dq_fmacmaybe 2 4 14 | |
190 | ||
191 | # vrint | |
192 | f16_dq_vrint 4 14 | |
193 | ||
194 | # neon_dyadic_if_i_d | |
195 | f16_d vpadd.f16 4 8 14 | |
196 | ||
197 | # neon_recip_est | |
198 | f16_dq_recip 4 8 | |
199 | f16_q_recip 0 10 | |
200 | ||
201 | # neon_step | |
202 | f16_dq_step 8 10 12 | |
203 | f16_q_step 2 0 4 | |
204 | ||
205 | # neon_dyadic_if_su_d | |
206 | f16_d vpmax.f16 4 8 14 | |
207 | f16_d vpmin.f16 10 8 2 | |
208 | ||
209 | # neon_mul | |
210 | f16_d vmul.f16 4 8 14 | |
211 | f16_d vmul.f16 7 0 1 | |
212 | f16_q vmul.f16 2 8 0 | |
213 | ||
214 | # neon_cvt | |
215 | f16_dq_cvt 6 12 | |
216 | ||
217 | # neon_cvtz | |
218 | f16_dq_cvtz 14, 0 | |
219 | ||
220 | # neon_cvtz_fixed | |
221 | f16_dq_cvtz_fixed 14, 0, 3 | |
222 | ||
223 | # neon_fcmp_imm0 | |
224 | f16_dq_fcmp_imm0 14, 2 | |
589a7d88 JW |
225 | |
226 | .macro f16_d_by_scalar op reg0 reg1 reg2 idx | |
227 | \op d\reg0, d\reg1, d\reg2[\idx] | |
228 | .endm | |
229 | ||
230 | .macro f16_q_by_scalar op reg0 reg1 reg2 idx | |
231 | \op q\reg0, q\reg1, d\reg2[\idx] | |
232 | .endm | |
233 | ||
234 | .macro f16_dq_fmacmaybe_by_scalar reg0 reg1 reg2 idx | |
235 | .irp op, vmla.f16, vmls.f16 | |
236 | \op d\reg0, d\reg1, d\reg2[\idx] | |
237 | \op q\reg0, q\reg1, d\reg2[\idx] | |
238 | .endr | |
239 | .endm | |
240 | ||
241 | # neon_mul (by scalar) | |
242 | f16_d_by_scalar vmul.f16 7 0 1 0 | |
243 | f16_d_by_scalar vmul.f16 4 8 6 2 | |
244 | f16_q_by_scalar vmul.f16 2 8 0 1 | |
245 | f16_q_by_scalar vmul.f16 2 8 7 3 | |
246 | ||
247 | # neon_mac_maybe_scalar (by scalar) | |
248 | f16_dq_fmacmaybe_by_scalar 2 4 1 0 | |
249 | f16_dq_fmacmaybe_by_scalar 1 8 7 3 |