Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * linux/arch/arm/vfp/vfpdouble.c | |
3 | * | |
4 | * This code is derived in part from John R. Housers softfloat library, which | |
5 | * carries the following notice: | |
6 | * | |
7 | * =========================================================================== | |
8 | * This C source file is part of the SoftFloat IEC/IEEE Floating-point | |
9 | * Arithmetic Package, Release 2. | |
10 | * | |
11 | * Written by John R. Hauser. This work was made possible in part by the | |
12 | * International Computer Science Institute, located at Suite 600, 1947 Center | |
13 | * Street, Berkeley, California 94704. Funding was partially provided by the | |
14 | * National Science Foundation under grant MIP-9311980. The original version | |
15 | * of this code was written as part of a project to build a fixed-point vector | |
16 | * processor in collaboration with the University of California at Berkeley, | |
17 | * overseen by Profs. Nelson Morgan and John Wawrzynek. More information | |
18 | * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ | |
19 | * arithmetic/softfloat.html'. | |
20 | * | |
21 | * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort | |
22 | * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT | |
23 | * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO | |
24 | * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY | |
25 | * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. | |
26 | * | |
27 | * Derivative works are acceptable, even for commercial purposes, so long as | |
28 | * (1) they include prominent notice that the work is derivative, and (2) they | |
29 | * include prominent notice akin to these three paragraphs for those parts of | |
30 | * this code that are retained. | |
31 | * =========================================================================== | |
32 | */ | |
33 | #include <linux/kernel.h> | |
34 | #include <linux/bitops.h> | |
35 | #include <asm/ptrace.h> | |
36 | #include <asm/vfp.h> | |
37 | ||
38 | #include "vfpinstr.h" | |
39 | #include "vfp.h" | |
40 | ||
41 | static struct vfp_double vfp_double_default_qnan = { | |
42 | .exponent = 2047, | |
43 | .sign = 0, | |
44 | .significand = VFP_DOUBLE_SIGNIFICAND_QNAN, | |
45 | }; | |
46 | ||
47 | static void vfp_double_dump(const char *str, struct vfp_double *d) | |
48 | { | |
49 | pr_debug("VFP: %s: sign=%d exponent=%d significand=%016llx\n", | |
50 | str, d->sign != 0, d->exponent, d->significand); | |
51 | } | |
52 | ||
53 | static void vfp_double_normalise_denormal(struct vfp_double *vd) | |
54 | { | |
55 | int bits = 31 - fls(vd->significand >> 32); | |
56 | if (bits == 31) | |
57 | bits = 62 - fls(vd->significand); | |
58 | ||
59 | vfp_double_dump("normalise_denormal: in", vd); | |
60 | ||
61 | if (bits) { | |
62 | vd->exponent -= bits - 1; | |
63 | vd->significand <<= bits; | |
64 | } | |
65 | ||
66 | vfp_double_dump("normalise_denormal: out", vd); | |
67 | } | |
68 | ||
69 | u32 vfp_double_normaliseround(int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func) | |
70 | { | |
71 | u64 significand, incr; | |
72 | int exponent, shift, underflow; | |
73 | u32 rmode; | |
74 | ||
75 | vfp_double_dump("pack: in", vd); | |
76 | ||
77 | /* | |
78 | * Infinities and NaNs are a special case. | |
79 | */ | |
80 | if (vd->exponent == 2047 && (vd->significand == 0 || exceptions)) | |
81 | goto pack; | |
82 | ||
83 | /* | |
84 | * Special-case zero. | |
85 | */ | |
86 | if (vd->significand == 0) { | |
87 | vd->exponent = 0; | |
88 | goto pack; | |
89 | } | |
90 | ||
91 | exponent = vd->exponent; | |
92 | significand = vd->significand; | |
93 | ||
94 | shift = 32 - fls(significand >> 32); | |
95 | if (shift == 32) | |
96 | shift = 64 - fls(significand); | |
97 | if (shift) { | |
98 | exponent -= shift; | |
99 | significand <<= shift; | |
100 | } | |
101 | ||
102 | #ifdef DEBUG | |
103 | vd->exponent = exponent; | |
104 | vd->significand = significand; | |
105 | vfp_double_dump("pack: normalised", vd); | |
106 | #endif | |
107 | ||
108 | /* | |
109 | * Tiny number? | |
110 | */ | |
111 | underflow = exponent < 0; | |
112 | if (underflow) { | |
113 | significand = vfp_shiftright64jamming(significand, -exponent); | |
114 | exponent = 0; | |
115 | #ifdef DEBUG | |
116 | vd->exponent = exponent; | |
117 | vd->significand = significand; | |
118 | vfp_double_dump("pack: tiny number", vd); | |
119 | #endif | |
120 | if (!(significand & ((1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1))) | |
121 | underflow = 0; | |
122 | } | |
123 | ||
124 | /* | |
125 | * Select rounding increment. | |
126 | */ | |
127 | incr = 0; | |
128 | rmode = fpscr & FPSCR_RMODE_MASK; | |
129 | ||
130 | if (rmode == FPSCR_ROUND_NEAREST) { | |
131 | incr = 1ULL << VFP_DOUBLE_LOW_BITS; | |
132 | if ((significand & (1ULL << (VFP_DOUBLE_LOW_BITS + 1))) == 0) | |
133 | incr -= 1; | |
134 | } else if (rmode == FPSCR_ROUND_TOZERO) { | |
135 | incr = 0; | |
136 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vd->sign != 0)) | |
137 | incr = (1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1; | |
138 | ||
139 | pr_debug("VFP: rounding increment = 0x%08llx\n", incr); | |
140 | ||
141 | /* | |
142 | * Is our rounding going to overflow? | |
143 | */ | |
144 | if ((significand + incr) < significand) { | |
145 | exponent += 1; | |
146 | significand = (significand >> 1) | (significand & 1); | |
147 | incr >>= 1; | |
148 | #ifdef DEBUG | |
149 | vd->exponent = exponent; | |
150 | vd->significand = significand; | |
151 | vfp_double_dump("pack: overflow", vd); | |
152 | #endif | |
153 | } | |
154 | ||
155 | /* | |
156 | * If any of the low bits (which will be shifted out of the | |
157 | * number) are non-zero, the result is inexact. | |
158 | */ | |
159 | if (significand & ((1 << (VFP_DOUBLE_LOW_BITS + 1)) - 1)) | |
160 | exceptions |= FPSCR_IXC; | |
161 | ||
162 | /* | |
163 | * Do our rounding. | |
164 | */ | |
165 | significand += incr; | |
166 | ||
167 | /* | |
168 | * Infinity? | |
169 | */ | |
170 | if (exponent >= 2046) { | |
171 | exceptions |= FPSCR_OFC | FPSCR_IXC; | |
172 | if (incr == 0) { | |
173 | vd->exponent = 2045; | |
174 | vd->significand = 0x7fffffffffffffffULL; | |
175 | } else { | |
176 | vd->exponent = 2047; /* infinity */ | |
177 | vd->significand = 0; | |
178 | } | |
179 | } else { | |
180 | if (significand >> (VFP_DOUBLE_LOW_BITS + 1) == 0) | |
181 | exponent = 0; | |
182 | if (exponent || significand > 0x8000000000000000ULL) | |
183 | underflow = 0; | |
184 | if (underflow) | |
185 | exceptions |= FPSCR_UFC; | |
186 | vd->exponent = exponent; | |
187 | vd->significand = significand >> 1; | |
188 | } | |
189 | ||
190 | pack: | |
191 | vfp_double_dump("pack: final", vd); | |
192 | { | |
193 | s64 d = vfp_double_pack(vd); | |
194 | pr_debug("VFP: %s: d(d%d)=%016llx exceptions=%08x\n", func, | |
195 | dd, d, exceptions); | |
196 | vfp_put_double(dd, d); | |
197 | } | |
198 | return exceptions & ~VFP_NAN_FLAG; | |
199 | } | |
200 | ||
201 | /* | |
202 | * Propagate the NaN, setting exceptions if it is signalling. | |
203 | * 'n' is always a NaN. 'm' may be a number, NaN or infinity. | |
204 | */ | |
205 | static u32 | |
206 | vfp_propagate_nan(struct vfp_double *vdd, struct vfp_double *vdn, | |
207 | struct vfp_double *vdm, u32 fpscr) | |
208 | { | |
209 | struct vfp_double *nan; | |
210 | int tn, tm = 0; | |
211 | ||
212 | tn = vfp_double_type(vdn); | |
213 | ||
214 | if (vdm) | |
215 | tm = vfp_double_type(vdm); | |
216 | ||
217 | if (fpscr & FPSCR_DEFAULT_NAN) | |
218 | /* | |
219 | * Default NaN mode - always returns a quiet NaN | |
220 | */ | |
221 | nan = &vfp_double_default_qnan; | |
222 | else { | |
223 | /* | |
224 | * Contemporary mode - select the first signalling | |
225 | * NAN, or if neither are signalling, the first | |
226 | * quiet NAN. | |
227 | */ | |
228 | if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN)) | |
229 | nan = vdn; | |
230 | else | |
231 | nan = vdm; | |
232 | /* | |
233 | * Make the NaN quiet. | |
234 | */ | |
235 | nan->significand |= VFP_DOUBLE_SIGNIFICAND_QNAN; | |
236 | } | |
237 | ||
238 | *vdd = *nan; | |
239 | ||
240 | /* | |
241 | * If one was a signalling NAN, raise invalid operation. | |
242 | */ | |
243 | return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG; | |
244 | } | |
245 | ||
246 | /* | |
247 | * Extended operations | |
248 | */ | |
249 | static u32 vfp_double_fabs(int dd, int unused, int dm, u32 fpscr) | |
250 | { | |
251 | vfp_put_double(dd, vfp_double_packed_abs(vfp_get_double(dm))); | |
252 | return 0; | |
253 | } | |
254 | ||
255 | static u32 vfp_double_fcpy(int dd, int unused, int dm, u32 fpscr) | |
256 | { | |
257 | vfp_put_double(dd, vfp_get_double(dm)); | |
258 | return 0; | |
259 | } | |
260 | ||
261 | static u32 vfp_double_fneg(int dd, int unused, int dm, u32 fpscr) | |
262 | { | |
263 | vfp_put_double(dd, vfp_double_packed_negate(vfp_get_double(dm))); | |
264 | return 0; | |
265 | } | |
266 | ||
267 | static u32 vfp_double_fsqrt(int dd, int unused, int dm, u32 fpscr) | |
268 | { | |
269 | struct vfp_double vdm, vdd; | |
270 | int ret, tm; | |
271 | ||
272 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | |
273 | tm = vfp_double_type(&vdm); | |
274 | if (tm & (VFP_NAN|VFP_INFINITY)) { | |
275 | struct vfp_double *vdp = &vdd; | |
276 | ||
277 | if (tm & VFP_NAN) | |
278 | ret = vfp_propagate_nan(vdp, &vdm, NULL, fpscr); | |
279 | else if (vdm.sign == 0) { | |
280 | sqrt_copy: | |
281 | vdp = &vdm; | |
282 | ret = 0; | |
283 | } else { | |
284 | sqrt_invalid: | |
285 | vdp = &vfp_double_default_qnan; | |
286 | ret = FPSCR_IOC; | |
287 | } | |
288 | vfp_put_double(dd, vfp_double_pack(vdp)); | |
289 | return ret; | |
290 | } | |
291 | ||
292 | /* | |
293 | * sqrt(+/- 0) == +/- 0 | |
294 | */ | |
295 | if (tm & VFP_ZERO) | |
296 | goto sqrt_copy; | |
297 | ||
298 | /* | |
299 | * Normalise a denormalised number | |
300 | */ | |
301 | if (tm & VFP_DENORMAL) | |
302 | vfp_double_normalise_denormal(&vdm); | |
303 | ||
304 | /* | |
305 | * sqrt(<0) = invalid | |
306 | */ | |
307 | if (vdm.sign) | |
308 | goto sqrt_invalid; | |
309 | ||
310 | vfp_double_dump("sqrt", &vdm); | |
311 | ||
312 | /* | |
313 | * Estimate the square root. | |
314 | */ | |
315 | vdd.sign = 0; | |
316 | vdd.exponent = ((vdm.exponent - 1023) >> 1) + 1023; | |
317 | vdd.significand = (u64)vfp_estimate_sqrt_significand(vdm.exponent, vdm.significand >> 32) << 31; | |
318 | ||
319 | vfp_double_dump("sqrt estimate1", &vdd); | |
320 | ||
321 | vdm.significand >>= 1 + (vdm.exponent & 1); | |
322 | vdd.significand += 2 + vfp_estimate_div128to64(vdm.significand, 0, vdd.significand); | |
323 | ||
324 | vfp_double_dump("sqrt estimate2", &vdd); | |
325 | ||
326 | /* | |
327 | * And now adjust. | |
328 | */ | |
329 | if ((vdd.significand & VFP_DOUBLE_LOW_BITS_MASK) <= 5) { | |
330 | if (vdd.significand < 2) { | |
331 | vdd.significand = ~0ULL; | |
332 | } else { | |
333 | u64 termh, terml, remh, reml; | |
334 | vdm.significand <<= 2; | |
335 | mul64to128(&termh, &terml, vdd.significand, vdd.significand); | |
336 | sub128(&remh, &reml, vdm.significand, 0, termh, terml); | |
337 | while ((s64)remh < 0) { | |
338 | vdd.significand -= 1; | |
339 | shift64left(&termh, &terml, vdd.significand); | |
340 | terml |= 1; | |
341 | add128(&remh, &reml, remh, reml, termh, terml); | |
342 | } | |
343 | vdd.significand |= (remh | reml) != 0; | |
344 | } | |
345 | } | |
346 | vdd.significand = vfp_shiftright64jamming(vdd.significand, 1); | |
347 | ||
348 | return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fsqrt"); | |
349 | } | |
350 | ||
351 | /* | |
352 | * Equal := ZC | |
353 | * Less than := N | |
354 | * Greater than := C | |
355 | * Unordered := CV | |
356 | */ | |
357 | static u32 vfp_compare(int dd, int signal_on_qnan, int dm, u32 fpscr) | |
358 | { | |
359 | s64 d, m; | |
360 | u32 ret = 0; | |
361 | ||
362 | m = vfp_get_double(dm); | |
363 | if (vfp_double_packed_exponent(m) == 2047 && vfp_double_packed_mantissa(m)) { | |
364 | ret |= FPSCR_C | FPSCR_V; | |
365 | if (signal_on_qnan || !(vfp_double_packed_mantissa(m) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) | |
366 | /* | |
367 | * Signalling NaN, or signalling on quiet NaN | |
368 | */ | |
369 | ret |= FPSCR_IOC; | |
370 | } | |
371 | ||
372 | d = vfp_get_double(dd); | |
373 | if (vfp_double_packed_exponent(d) == 2047 && vfp_double_packed_mantissa(d)) { | |
374 | ret |= FPSCR_C | FPSCR_V; | |
375 | if (signal_on_qnan || !(vfp_double_packed_mantissa(d) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) | |
376 | /* | |
377 | * Signalling NaN, or signalling on quiet NaN | |
378 | */ | |
379 | ret |= FPSCR_IOC; | |
380 | } | |
381 | ||
382 | if (ret == 0) { | |
383 | if (d == m || vfp_double_packed_abs(d | m) == 0) { | |
384 | /* | |
385 | * equal | |
386 | */ | |
387 | ret |= FPSCR_Z | FPSCR_C; | |
388 | } else if (vfp_double_packed_sign(d ^ m)) { | |
389 | /* | |
390 | * different signs | |
391 | */ | |
392 | if (vfp_double_packed_sign(d)) | |
393 | /* | |
394 | * d is negative, so d < m | |
395 | */ | |
396 | ret |= FPSCR_N; | |
397 | else | |
398 | /* | |
399 | * d is positive, so d > m | |
400 | */ | |
401 | ret |= FPSCR_C; | |
402 | } else if ((vfp_double_packed_sign(d) != 0) ^ (d < m)) { | |
403 | /* | |
404 | * d < m | |
405 | */ | |
406 | ret |= FPSCR_N; | |
407 | } else if ((vfp_double_packed_sign(d) != 0) ^ (d > m)) { | |
408 | /* | |
409 | * d > m | |
410 | */ | |
411 | ret |= FPSCR_C; | |
412 | } | |
413 | } | |
414 | ||
415 | return ret; | |
416 | } | |
417 | ||
418 | static u32 vfp_double_fcmp(int dd, int unused, int dm, u32 fpscr) | |
419 | { | |
420 | return vfp_compare(dd, 0, dm, fpscr); | |
421 | } | |
422 | ||
423 | static u32 vfp_double_fcmpe(int dd, int unused, int dm, u32 fpscr) | |
424 | { | |
425 | return vfp_compare(dd, 1, dm, fpscr); | |
426 | } | |
427 | ||
428 | static u32 vfp_double_fcmpz(int dd, int unused, int dm, u32 fpscr) | |
429 | { | |
430 | return vfp_compare(dd, 0, VFP_REG_ZERO, fpscr); | |
431 | } | |
432 | ||
433 | static u32 vfp_double_fcmpez(int dd, int unused, int dm, u32 fpscr) | |
434 | { | |
435 | return vfp_compare(dd, 1, VFP_REG_ZERO, fpscr); | |
436 | } | |
437 | ||
438 | static u32 vfp_double_fcvts(int sd, int unused, int dm, u32 fpscr) | |
439 | { | |
440 | struct vfp_double vdm; | |
441 | struct vfp_single vsd; | |
442 | int tm; | |
443 | u32 exceptions = 0; | |
444 | ||
445 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | |
446 | ||
447 | tm = vfp_double_type(&vdm); | |
448 | ||
449 | /* | |
450 | * If we have a signalling NaN, signal invalid operation. | |
451 | */ | |
452 | if (tm == VFP_SNAN) | |
453 | exceptions = FPSCR_IOC; | |
454 | ||
455 | if (tm & VFP_DENORMAL) | |
456 | vfp_double_normalise_denormal(&vdm); | |
457 | ||
458 | vsd.sign = vdm.sign; | |
459 | vsd.significand = vfp_hi64to32jamming(vdm.significand); | |
460 | ||
461 | /* | |
462 | * If we have an infinity or a NaN, the exponent must be 255 | |
463 | */ | |
464 | if (tm & (VFP_INFINITY|VFP_NAN)) { | |
465 | vsd.exponent = 255; | |
466 | if (tm & VFP_NAN) | |
467 | vsd.significand |= VFP_SINGLE_SIGNIFICAND_QNAN; | |
468 | goto pack_nan; | |
469 | } else if (tm & VFP_ZERO) | |
470 | vsd.exponent = 0; | |
471 | else | |
472 | vsd.exponent = vdm.exponent - (1023 - 127); | |
473 | ||
474 | return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fcvts"); | |
475 | ||
476 | pack_nan: | |
477 | vfp_put_float(sd, vfp_single_pack(&vsd)); | |
478 | return exceptions; | |
479 | } | |
480 | ||
481 | static u32 vfp_double_fuito(int dd, int unused, int dm, u32 fpscr) | |
482 | { | |
483 | struct vfp_double vdm; | |
484 | u32 m = vfp_get_float(dm); | |
485 | ||
486 | vdm.sign = 0; | |
487 | vdm.exponent = 1023 + 63 - 1; | |
488 | vdm.significand = (u64)m; | |
489 | ||
490 | return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fuito"); | |
491 | } | |
492 | ||
493 | static u32 vfp_double_fsito(int dd, int unused, int dm, u32 fpscr) | |
494 | { | |
495 | struct vfp_double vdm; | |
496 | u32 m = vfp_get_float(dm); | |
497 | ||
498 | vdm.sign = (m & 0x80000000) >> 16; | |
499 | vdm.exponent = 1023 + 63 - 1; | |
500 | vdm.significand = vdm.sign ? -m : m; | |
501 | ||
502 | return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fsito"); | |
503 | } | |
504 | ||
505 | static u32 vfp_double_ftoui(int sd, int unused, int dm, u32 fpscr) | |
506 | { | |
507 | struct vfp_double vdm; | |
508 | u32 d, exceptions = 0; | |
509 | int rmode = fpscr & FPSCR_RMODE_MASK; | |
510 | int tm; | |
511 | ||
512 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | |
513 | ||
514 | /* | |
515 | * Do we have a denormalised number? | |
516 | */ | |
517 | tm = vfp_double_type(&vdm); | |
518 | if (tm & VFP_DENORMAL) | |
519 | exceptions |= FPSCR_IDC; | |
520 | ||
521 | if (tm & VFP_NAN) | |
522 | vdm.sign = 0; | |
523 | ||
524 | if (vdm.exponent >= 1023 + 32) { | |
525 | d = vdm.sign ? 0 : 0xffffffff; | |
526 | exceptions = FPSCR_IOC; | |
527 | } else if (vdm.exponent >= 1023 - 1) { | |
528 | int shift = 1023 + 63 - vdm.exponent; | |
529 | u64 rem, incr = 0; | |
530 | ||
531 | /* | |
532 | * 2^0 <= m < 2^32-2^8 | |
533 | */ | |
534 | d = (vdm.significand << 1) >> shift; | |
535 | rem = vdm.significand << (65 - shift); | |
536 | ||
537 | if (rmode == FPSCR_ROUND_NEAREST) { | |
538 | incr = 0x8000000000000000ULL; | |
539 | if ((d & 1) == 0) | |
540 | incr -= 1; | |
541 | } else if (rmode == FPSCR_ROUND_TOZERO) { | |
542 | incr = 0; | |
543 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) { | |
544 | incr = ~0ULL; | |
545 | } | |
546 | ||
547 | if ((rem + incr) < rem) { | |
548 | if (d < 0xffffffff) | |
549 | d += 1; | |
550 | else | |
551 | exceptions |= FPSCR_IOC; | |
552 | } | |
553 | ||
554 | if (d && vdm.sign) { | |
555 | d = 0; | |
556 | exceptions |= FPSCR_IOC; | |
557 | } else if (rem) | |
558 | exceptions |= FPSCR_IXC; | |
559 | } else { | |
560 | d = 0; | |
561 | if (vdm.exponent | vdm.significand) { | |
562 | exceptions |= FPSCR_IXC; | |
563 | if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0) | |
564 | d = 1; | |
565 | else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) { | |
566 | d = 0; | |
567 | exceptions |= FPSCR_IOC; | |
568 | } | |
569 | } | |
570 | } | |
571 | ||
572 | pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); | |
573 | ||
574 | vfp_put_float(sd, d); | |
575 | ||
576 | return exceptions; | |
577 | } | |
578 | ||
579 | static u32 vfp_double_ftouiz(int sd, int unused, int dm, u32 fpscr) | |
580 | { | |
581 | return vfp_double_ftoui(sd, unused, dm, FPSCR_ROUND_TOZERO); | |
582 | } | |
583 | ||
584 | static u32 vfp_double_ftosi(int sd, int unused, int dm, u32 fpscr) | |
585 | { | |
586 | struct vfp_double vdm; | |
587 | u32 d, exceptions = 0; | |
588 | int rmode = fpscr & FPSCR_RMODE_MASK; | |
589 | ||
590 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | |
591 | vfp_double_dump("VDM", &vdm); | |
592 | ||
593 | /* | |
594 | * Do we have denormalised number? | |
595 | */ | |
596 | if (vfp_double_type(&vdm) & VFP_DENORMAL) | |
597 | exceptions |= FPSCR_IDC; | |
598 | ||
599 | if (vdm.exponent >= 1023 + 32) { | |
600 | d = 0x7fffffff; | |
601 | if (vdm.sign) | |
602 | d = ~d; | |
603 | exceptions |= FPSCR_IOC; | |
604 | } else if (vdm.exponent >= 1023 - 1) { | |
605 | int shift = 1023 + 63 - vdm.exponent; /* 58 */ | |
606 | u64 rem, incr = 0; | |
607 | ||
608 | d = (vdm.significand << 1) >> shift; | |
609 | rem = vdm.significand << (65 - shift); | |
610 | ||
611 | if (rmode == FPSCR_ROUND_NEAREST) { | |
612 | incr = 0x8000000000000000ULL; | |
613 | if ((d & 1) == 0) | |
614 | incr -= 1; | |
615 | } else if (rmode == FPSCR_ROUND_TOZERO) { | |
616 | incr = 0; | |
617 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) { | |
618 | incr = ~0ULL; | |
619 | } | |
620 | ||
621 | if ((rem + incr) < rem && d < 0xffffffff) | |
622 | d += 1; | |
623 | if (d > 0x7fffffff + (vdm.sign != 0)) { | |
624 | d = 0x7fffffff + (vdm.sign != 0); | |
625 | exceptions |= FPSCR_IOC; | |
626 | } else if (rem) | |
627 | exceptions |= FPSCR_IXC; | |
628 | ||
629 | if (vdm.sign) | |
630 | d = -d; | |
631 | } else { | |
632 | d = 0; | |
633 | if (vdm.exponent | vdm.significand) { | |
634 | exceptions |= FPSCR_IXC; | |
635 | if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0) | |
636 | d = 1; | |
637 | else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) | |
638 | d = -1; | |
639 | } | |
640 | } | |
641 | ||
642 | pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); | |
643 | ||
644 | vfp_put_float(sd, (s32)d); | |
645 | ||
646 | return exceptions; | |
647 | } | |
648 | ||
649 | static u32 vfp_double_ftosiz(int dd, int unused, int dm, u32 fpscr) | |
650 | { | |
651 | return vfp_double_ftosi(dd, unused, dm, FPSCR_ROUND_TOZERO); | |
652 | } | |
653 | ||
654 | ||
655 | static u32 (* const fop_extfns[32])(int dd, int unused, int dm, u32 fpscr) = { | |
656 | [FEXT_TO_IDX(FEXT_FCPY)] = vfp_double_fcpy, | |
657 | [FEXT_TO_IDX(FEXT_FABS)] = vfp_double_fabs, | |
658 | [FEXT_TO_IDX(FEXT_FNEG)] = vfp_double_fneg, | |
659 | [FEXT_TO_IDX(FEXT_FSQRT)] = vfp_double_fsqrt, | |
660 | [FEXT_TO_IDX(FEXT_FCMP)] = vfp_double_fcmp, | |
661 | [FEXT_TO_IDX(FEXT_FCMPE)] = vfp_double_fcmpe, | |
662 | [FEXT_TO_IDX(FEXT_FCMPZ)] = vfp_double_fcmpz, | |
663 | [FEXT_TO_IDX(FEXT_FCMPEZ)] = vfp_double_fcmpez, | |
664 | [FEXT_TO_IDX(FEXT_FCVT)] = vfp_double_fcvts, | |
665 | [FEXT_TO_IDX(FEXT_FUITO)] = vfp_double_fuito, | |
666 | [FEXT_TO_IDX(FEXT_FSITO)] = vfp_double_fsito, | |
667 | [FEXT_TO_IDX(FEXT_FTOUI)] = vfp_double_ftoui, | |
668 | [FEXT_TO_IDX(FEXT_FTOUIZ)] = vfp_double_ftouiz, | |
669 | [FEXT_TO_IDX(FEXT_FTOSI)] = vfp_double_ftosi, | |
670 | [FEXT_TO_IDX(FEXT_FTOSIZ)] = vfp_double_ftosiz, | |
671 | }; | |
672 | ||
673 | ||
674 | ||
675 | ||
676 | static u32 | |
677 | vfp_double_fadd_nonnumber(struct vfp_double *vdd, struct vfp_double *vdn, | |
678 | struct vfp_double *vdm, u32 fpscr) | |
679 | { | |
680 | struct vfp_double *vdp; | |
681 | u32 exceptions = 0; | |
682 | int tn, tm; | |
683 | ||
684 | tn = vfp_double_type(vdn); | |
685 | tm = vfp_double_type(vdm); | |
686 | ||
687 | if (tn & tm & VFP_INFINITY) { | |
688 | /* | |
689 | * Two infinities. Are they different signs? | |
690 | */ | |
691 | if (vdn->sign ^ vdm->sign) { | |
692 | /* | |
693 | * different signs -> invalid | |
694 | */ | |
695 | exceptions = FPSCR_IOC; | |
696 | vdp = &vfp_double_default_qnan; | |
697 | } else { | |
698 | /* | |
699 | * same signs -> valid | |
700 | */ | |
701 | vdp = vdn; | |
702 | } | |
703 | } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) { | |
704 | /* | |
705 | * One infinity and one number -> infinity | |
706 | */ | |
707 | vdp = vdn; | |
708 | } else { | |
709 | /* | |
710 | * 'n' is a NaN of some type | |
711 | */ | |
712 | return vfp_propagate_nan(vdd, vdn, vdm, fpscr); | |
713 | } | |
714 | *vdd = *vdp; | |
715 | return exceptions; | |
716 | } | |
717 | ||
718 | static u32 | |
719 | vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn, | |
720 | struct vfp_double *vdm, u32 fpscr) | |
721 | { | |
722 | u32 exp_diff; | |
723 | u64 m_sig; | |
724 | ||
725 | if (vdn->significand & (1ULL << 63) || | |
726 | vdm->significand & (1ULL << 63)) { | |
727 | pr_info("VFP: bad FP values in %s\n", __func__); | |
728 | vfp_double_dump("VDN", vdn); | |
729 | vfp_double_dump("VDM", vdm); | |
730 | } | |
731 | ||
732 | /* | |
733 | * Ensure that 'n' is the largest magnitude number. Note that | |
734 | * if 'n' and 'm' have equal exponents, we do not swap them. | |
735 | * This ensures that NaN propagation works correctly. | |
736 | */ | |
737 | if (vdn->exponent < vdm->exponent) { | |
738 | struct vfp_double *t = vdn; | |
739 | vdn = vdm; | |
740 | vdm = t; | |
741 | } | |
742 | ||
743 | /* | |
744 | * Is 'n' an infinity or a NaN? Note that 'm' may be a number, | |
745 | * infinity or a NaN here. | |
746 | */ | |
747 | if (vdn->exponent == 2047) | |
748 | return vfp_double_fadd_nonnumber(vdd, vdn, vdm, fpscr); | |
749 | ||
750 | /* | |
751 | * We have two proper numbers, where 'vdn' is the larger magnitude. | |
752 | * | |
753 | * Copy 'n' to 'd' before doing the arithmetic. | |
754 | */ | |
755 | *vdd = *vdn; | |
756 | ||
757 | /* | |
758 | * Align 'm' with the result. | |
759 | */ | |
760 | exp_diff = vdn->exponent - vdm->exponent; | |
761 | m_sig = vfp_shiftright64jamming(vdm->significand, exp_diff); | |
762 | ||
763 | /* | |
764 | * If the signs are different, we are really subtracting. | |
765 | */ | |
766 | if (vdn->sign ^ vdm->sign) { | |
767 | m_sig = vdn->significand - m_sig; | |
768 | if ((s64)m_sig < 0) { | |
769 | vdd->sign = vfp_sign_negate(vdd->sign); | |
770 | m_sig = -m_sig; | |
771 | } | |
772 | } else { | |
773 | m_sig += vdn->significand; | |
774 | } | |
775 | vdd->significand = m_sig; | |
776 | ||
777 | return 0; | |
778 | } | |
779 | ||
780 | static u32 | |
781 | vfp_double_multiply(struct vfp_double *vdd, struct vfp_double *vdn, | |
782 | struct vfp_double *vdm, u32 fpscr) | |
783 | { | |
784 | vfp_double_dump("VDN", vdn); | |
785 | vfp_double_dump("VDM", vdm); | |
786 | ||
787 | /* | |
788 | * Ensure that 'n' is the largest magnitude number. Note that | |
789 | * if 'n' and 'm' have equal exponents, we do not swap them. | |
790 | * This ensures that NaN propagation works correctly. | |
791 | */ | |
792 | if (vdn->exponent < vdm->exponent) { | |
793 | struct vfp_double *t = vdn; | |
794 | vdn = vdm; | |
795 | vdm = t; | |
796 | pr_debug("VFP: swapping M <-> N\n"); | |
797 | } | |
798 | ||
799 | vdd->sign = vdn->sign ^ vdm->sign; | |
800 | ||
801 | /* | |
802 | * If 'n' is an infinity or NaN, handle it. 'm' may be anything. | |
803 | */ | |
804 | if (vdn->exponent == 2047) { | |
805 | if (vdn->significand || (vdm->exponent == 2047 && vdm->significand)) | |
806 | return vfp_propagate_nan(vdd, vdn, vdm, fpscr); | |
807 | if ((vdm->exponent | vdm->significand) == 0) { | |
808 | *vdd = vfp_double_default_qnan; | |
809 | return FPSCR_IOC; | |
810 | } | |
811 | vdd->exponent = vdn->exponent; | |
812 | vdd->significand = 0; | |
813 | return 0; | |
814 | } | |
815 | ||
816 | /* | |
817 | * If 'm' is zero, the result is always zero. In this case, | |
818 | * 'n' may be zero or a number, but it doesn't matter which. | |
819 | */ | |
820 | if ((vdm->exponent | vdm->significand) == 0) { | |
821 | vdd->exponent = 0; | |
822 | vdd->significand = 0; | |
823 | return 0; | |
824 | } | |
825 | ||
826 | /* | |
827 | * We add 2 to the destination exponent for the same reason | |
828 | * as the addition case - though this time we have +1 from | |
829 | * each input operand. | |
830 | */ | |
831 | vdd->exponent = vdn->exponent + vdm->exponent - 1023 + 2; | |
832 | vdd->significand = vfp_hi64multiply64(vdn->significand, vdm->significand); | |
833 | ||
834 | vfp_double_dump("VDD", vdd); | |
835 | return 0; | |
836 | } | |
837 | ||
838 | #define NEG_MULTIPLY (1 << 0) | |
839 | #define NEG_SUBTRACT (1 << 1) | |
840 | ||
841 | static u32 | |
842 | vfp_double_multiply_accumulate(int dd, int dn, int dm, u32 fpscr, u32 negate, char *func) | |
843 | { | |
844 | struct vfp_double vdd, vdp, vdn, vdm; | |
845 | u32 exceptions; | |
846 | ||
847 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | |
848 | if (vdn.exponent == 0 && vdn.significand) | |
849 | vfp_double_normalise_denormal(&vdn); | |
850 | ||
851 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | |
852 | if (vdm.exponent == 0 && vdm.significand) | |
853 | vfp_double_normalise_denormal(&vdm); | |
854 | ||
855 | exceptions = vfp_double_multiply(&vdp, &vdn, &vdm, fpscr); | |
856 | if (negate & NEG_MULTIPLY) | |
857 | vdp.sign = vfp_sign_negate(vdp.sign); | |
858 | ||
859 | vfp_double_unpack(&vdn, vfp_get_double(dd)); | |
860 | if (negate & NEG_SUBTRACT) | |
861 | vdn.sign = vfp_sign_negate(vdn.sign); | |
862 | ||
863 | exceptions |= vfp_double_add(&vdd, &vdn, &vdp, fpscr); | |
864 | ||
865 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, func); | |
866 | } | |
867 | ||
868 | /* | |
869 | * Standard operations | |
870 | */ | |
871 | ||
872 | /* | |
873 | * sd = sd + (sn * sm) | |
874 | */ | |
875 | static u32 vfp_double_fmac(int dd, int dn, int dm, u32 fpscr) | |
876 | { | |
877 | return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, 0, "fmac"); | |
878 | } | |
879 | ||
880 | /* | |
881 | * sd = sd - (sn * sm) | |
882 | */ | |
883 | static u32 vfp_double_fnmac(int dd, int dn, int dm, u32 fpscr) | |
884 | { | |
885 | return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_MULTIPLY, "fnmac"); | |
886 | } | |
887 | ||
888 | /* | |
889 | * sd = -sd + (sn * sm) | |
890 | */ | |
891 | static u32 vfp_double_fmsc(int dd, int dn, int dm, u32 fpscr) | |
892 | { | |
893 | return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT, "fmsc"); | |
894 | } | |
895 | ||
896 | /* | |
897 | * sd = -sd - (sn * sm) | |
898 | */ | |
899 | static u32 vfp_double_fnmsc(int dd, int dn, int dm, u32 fpscr) | |
900 | { | |
901 | return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc"); | |
902 | } | |
903 | ||
904 | /* | |
905 | * sd = sn * sm | |
906 | */ | |
907 | static u32 vfp_double_fmul(int dd, int dn, int dm, u32 fpscr) | |
908 | { | |
909 | struct vfp_double vdd, vdn, vdm; | |
910 | u32 exceptions; | |
911 | ||
912 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | |
913 | if (vdn.exponent == 0 && vdn.significand) | |
914 | vfp_double_normalise_denormal(&vdn); | |
915 | ||
916 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | |
917 | if (vdm.exponent == 0 && vdm.significand) | |
918 | vfp_double_normalise_denormal(&vdm); | |
919 | ||
920 | exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr); | |
921 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fmul"); | |
922 | } | |
923 | ||
924 | /* | |
925 | * sd = -(sn * sm) | |
926 | */ | |
927 | static u32 vfp_double_fnmul(int dd, int dn, int dm, u32 fpscr) | |
928 | { | |
929 | struct vfp_double vdd, vdn, vdm; | |
930 | u32 exceptions; | |
931 | ||
932 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | |
933 | if (vdn.exponent == 0 && vdn.significand) | |
934 | vfp_double_normalise_denormal(&vdn); | |
935 | ||
936 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | |
937 | if (vdm.exponent == 0 && vdm.significand) | |
938 | vfp_double_normalise_denormal(&vdm); | |
939 | ||
940 | exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr); | |
941 | vdd.sign = vfp_sign_negate(vdd.sign); | |
942 | ||
943 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fnmul"); | |
944 | } | |
945 | ||
946 | /* | |
947 | * sd = sn + sm | |
948 | */ | |
949 | static u32 vfp_double_fadd(int dd, int dn, int dm, u32 fpscr) | |
950 | { | |
951 | struct vfp_double vdd, vdn, vdm; | |
952 | u32 exceptions; | |
953 | ||
954 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | |
955 | if (vdn.exponent == 0 && vdn.significand) | |
956 | vfp_double_normalise_denormal(&vdn); | |
957 | ||
958 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | |
959 | if (vdm.exponent == 0 && vdm.significand) | |
960 | vfp_double_normalise_denormal(&vdm); | |
961 | ||
962 | exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr); | |
963 | ||
964 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fadd"); | |
965 | } | |
966 | ||
967 | /* | |
968 | * sd = sn - sm | |
969 | */ | |
970 | static u32 vfp_double_fsub(int dd, int dn, int dm, u32 fpscr) | |
971 | { | |
972 | struct vfp_double vdd, vdn, vdm; | |
973 | u32 exceptions; | |
974 | ||
975 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | |
976 | if (vdn.exponent == 0 && vdn.significand) | |
977 | vfp_double_normalise_denormal(&vdn); | |
978 | ||
979 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | |
980 | if (vdm.exponent == 0 && vdm.significand) | |
981 | vfp_double_normalise_denormal(&vdm); | |
982 | ||
983 | /* | |
984 | * Subtraction is like addition, but with a negated operand. | |
985 | */ | |
986 | vdm.sign = vfp_sign_negate(vdm.sign); | |
987 | ||
988 | exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr); | |
989 | ||
990 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fsub"); | |
991 | } | |
992 | ||
993 | /* | |
994 | * sd = sn / sm | |
995 | */ | |
996 | static u32 vfp_double_fdiv(int dd, int dn, int dm, u32 fpscr) | |
997 | { | |
998 | struct vfp_double vdd, vdn, vdm; | |
999 | u32 exceptions = 0; | |
1000 | int tm, tn; | |
1001 | ||
1002 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | |
1003 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | |
1004 | ||
1005 | vdd.sign = vdn.sign ^ vdm.sign; | |
1006 | ||
1007 | tn = vfp_double_type(&vdn); | |
1008 | tm = vfp_double_type(&vdm); | |
1009 | ||
1010 | /* | |
1011 | * Is n a NAN? | |
1012 | */ | |
1013 | if (tn & VFP_NAN) | |
1014 | goto vdn_nan; | |
1015 | ||
1016 | /* | |
1017 | * Is m a NAN? | |
1018 | */ | |
1019 | if (tm & VFP_NAN) | |
1020 | goto vdm_nan; | |
1021 | ||
1022 | /* | |
1023 | * If n and m are infinity, the result is invalid | |
1024 | * If n and m are zero, the result is invalid | |
1025 | */ | |
1026 | if (tm & tn & (VFP_INFINITY|VFP_ZERO)) | |
1027 | goto invalid; | |
1028 | ||
1029 | /* | |
1030 | * If n is infinity, the result is infinity | |
1031 | */ | |
1032 | if (tn & VFP_INFINITY) | |
1033 | goto infinity; | |
1034 | ||
1035 | /* | |
1036 | * If m is zero, raise div0 exceptions | |
1037 | */ | |
1038 | if (tm & VFP_ZERO) | |
1039 | goto divzero; | |
1040 | ||
1041 | /* | |
1042 | * If m is infinity, or n is zero, the result is zero | |
1043 | */ | |
1044 | if (tm & VFP_INFINITY || tn & VFP_ZERO) | |
1045 | goto zero; | |
1046 | ||
1047 | if (tn & VFP_DENORMAL) | |
1048 | vfp_double_normalise_denormal(&vdn); | |
1049 | if (tm & VFP_DENORMAL) | |
1050 | vfp_double_normalise_denormal(&vdm); | |
1051 | ||
1052 | /* | |
1053 | * Ok, we have two numbers, we can perform division. | |
1054 | */ | |
1055 | vdd.exponent = vdn.exponent - vdm.exponent + 1023 - 1; | |
1056 | vdm.significand <<= 1; | |
1057 | if (vdm.significand <= (2 * vdn.significand)) { | |
1058 | vdn.significand >>= 1; | |
1059 | vdd.exponent++; | |
1060 | } | |
1061 | vdd.significand = vfp_estimate_div128to64(vdn.significand, 0, vdm.significand); | |
1062 | if ((vdd.significand & 0x1ff) <= 2) { | |
1063 | u64 termh, terml, remh, reml; | |
1064 | mul64to128(&termh, &terml, vdm.significand, vdd.significand); | |
1065 | sub128(&remh, &reml, vdn.significand, 0, termh, terml); | |
1066 | while ((s64)remh < 0) { | |
1067 | vdd.significand -= 1; | |
1068 | add128(&remh, &reml, remh, reml, 0, vdm.significand); | |
1069 | } | |
1070 | vdd.significand |= (reml != 0); | |
1071 | } | |
1072 | return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fdiv"); | |
1073 | ||
1074 | vdn_nan: | |
1075 | exceptions = vfp_propagate_nan(&vdd, &vdn, &vdm, fpscr); | |
1076 | pack: | |
1077 | vfp_put_double(dd, vfp_double_pack(&vdd)); | |
1078 | return exceptions; | |
1079 | ||
1080 | vdm_nan: | |
1081 | exceptions = vfp_propagate_nan(&vdd, &vdm, &vdn, fpscr); | |
1082 | goto pack; | |
1083 | ||
1084 | zero: | |
1085 | vdd.exponent = 0; | |
1086 | vdd.significand = 0; | |
1087 | goto pack; | |
1088 | ||
1089 | divzero: | |
1090 | exceptions = FPSCR_DZC; | |
1091 | infinity: | |
1092 | vdd.exponent = 2047; | |
1093 | vdd.significand = 0; | |
1094 | goto pack; | |
1095 | ||
1096 | invalid: | |
1097 | vfp_put_double(dd, vfp_double_pack(&vfp_double_default_qnan)); | |
1098 | return FPSCR_IOC; | |
1099 | } | |
1100 | ||
1101 | static u32 (* const fop_fns[16])(int dd, int dn, int dm, u32 fpscr) = { | |
1102 | [FOP_TO_IDX(FOP_FMAC)] = vfp_double_fmac, | |
1103 | [FOP_TO_IDX(FOP_FNMAC)] = vfp_double_fnmac, | |
1104 | [FOP_TO_IDX(FOP_FMSC)] = vfp_double_fmsc, | |
1105 | [FOP_TO_IDX(FOP_FNMSC)] = vfp_double_fnmsc, | |
1106 | [FOP_TO_IDX(FOP_FMUL)] = vfp_double_fmul, | |
1107 | [FOP_TO_IDX(FOP_FNMUL)] = vfp_double_fnmul, | |
1108 | [FOP_TO_IDX(FOP_FADD)] = vfp_double_fadd, | |
1109 | [FOP_TO_IDX(FOP_FSUB)] = vfp_double_fsub, | |
1110 | [FOP_TO_IDX(FOP_FDIV)] = vfp_double_fdiv, | |
1111 | }; | |
1112 | ||
1113 | #define FREG_BANK(x) ((x) & 0x0c) | |
1114 | #define FREG_IDX(x) ((x) & 3) | |
1115 | ||
1116 | u32 vfp_double_cpdo(u32 inst, u32 fpscr) | |
1117 | { | |
1118 | u32 op = inst & FOP_MASK; | |
1119 | u32 exceptions = 0; | |
1120 | unsigned int dd = vfp_get_sd(inst); | |
1121 | unsigned int dn = vfp_get_sn(inst); | |
1122 | unsigned int dm = vfp_get_sm(inst); | |
1123 | unsigned int vecitr, veclen, vecstride; | |
1124 | u32 (*fop)(int, int, s32, u32); | |
1125 | ||
1126 | veclen = fpscr & FPSCR_LENGTH_MASK; | |
1127 | vecstride = (1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK)) * 2; | |
1128 | ||
1129 | /* | |
1130 | * If destination bank is zero, vector length is always '1'. | |
1131 | * ARM DDI0100F C5.1.3, C5.3.2. | |
1132 | */ | |
1133 | if (FREG_BANK(dd) == 0) | |
1134 | veclen = 0; | |
1135 | ||
1136 | pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride, | |
1137 | (veclen >> FPSCR_LENGTH_BIT) + 1); | |
1138 | ||
1139 | fop = (op == FOP_EXT) ? fop_extfns[dn] : fop_fns[FOP_TO_IDX(op)]; | |
1140 | if (!fop) | |
1141 | goto invalid; | |
1142 | ||
1143 | for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) { | |
1144 | u32 except; | |
1145 | ||
1146 | if (op == FOP_EXT) | |
1147 | pr_debug("VFP: itr%d (d%u.%u) = op[%u] (d%u.%u)\n", | |
1148 | vecitr >> FPSCR_LENGTH_BIT, | |
1149 | dd >> 1, dd & 1, dn, | |
1150 | dm >> 1, dm & 1); | |
1151 | else | |
1152 | pr_debug("VFP: itr%d (d%u.%u) = (d%u.%u) op[%u] (d%u.%u)\n", | |
1153 | vecitr >> FPSCR_LENGTH_BIT, | |
1154 | dd >> 1, dd & 1, | |
1155 | dn >> 1, dn & 1, | |
1156 | FOP_TO_IDX(op), | |
1157 | dm >> 1, dm & 1); | |
1158 | ||
1159 | except = fop(dd, dn, dm, fpscr); | |
1160 | pr_debug("VFP: itr%d: exceptions=%08x\n", | |
1161 | vecitr >> FPSCR_LENGTH_BIT, except); | |
1162 | ||
1163 | exceptions |= except; | |
1164 | ||
1165 | /* | |
1166 | * This ensures that comparisons only operate on scalars; | |
1167 | * comparisons always return with one FPSCR status bit set. | |
1168 | */ | |
1169 | if (except & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V)) | |
1170 | break; | |
1171 | ||
1172 | /* | |
1173 | * CHECK: It appears to be undefined whether we stop when | |
1174 | * we encounter an exception. We continue. | |
1175 | */ | |
1176 | ||
1177 | dd = FREG_BANK(dd) + ((FREG_IDX(dd) + vecstride) & 6); | |
1178 | dn = FREG_BANK(dn) + ((FREG_IDX(dn) + vecstride) & 6); | |
1179 | if (FREG_BANK(dm) != 0) | |
1180 | dm = FREG_BANK(dm) + ((FREG_IDX(dm) + vecstride) & 6); | |
1181 | } | |
1182 | return exceptions; | |
1183 | ||
1184 | invalid: | |
1185 | return ~0; | |
1186 | } |