Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * linux/arch/arm/vfp/vfpdouble.c | |
3 | * | |
4 | * This code is derived in part from John R. Housers softfloat library, which | |
5 | * carries the following notice: | |
6 | * | |
7 | * =========================================================================== | |
8 | * This C source file is part of the SoftFloat IEC/IEEE Floating-point | |
9 | * Arithmetic Package, Release 2. | |
10 | * | |
11 | * Written by John R. Hauser. This work was made possible in part by the | |
12 | * International Computer Science Institute, located at Suite 600, 1947 Center | |
13 | * Street, Berkeley, California 94704. Funding was partially provided by the | |
14 | * National Science Foundation under grant MIP-9311980. The original version | |
15 | * of this code was written as part of a project to build a fixed-point vector | |
16 | * processor in collaboration with the University of California at Berkeley, | |
17 | * overseen by Profs. Nelson Morgan and John Wawrzynek. More information | |
18 | * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ | |
19 | * arithmetic/softfloat.html'. | |
20 | * | |
21 | * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort | |
22 | * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT | |
23 | * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO | |
24 | * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY | |
25 | * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. | |
26 | * | |
27 | * Derivative works are acceptable, even for commercial purposes, so long as | |
28 | * (1) they include prominent notice that the work is derivative, and (2) they | |
29 | * include prominent notice akin to these three paragraphs for those parts of | |
30 | * this code that are retained. | |
31 | * =========================================================================== | |
32 | */ | |
33 | #include <linux/kernel.h> | |
34 | #include <linux/bitops.h> | |
438a7616 RK |
35 | |
36 | #include <asm/div64.h> | |
1da177e4 LT |
37 | #include <asm/vfp.h> |
38 | ||
39 | #include "vfpinstr.h" | |
40 | #include "vfp.h" | |
41 | ||
42 | static struct vfp_double vfp_double_default_qnan = { | |
43 | .exponent = 2047, | |
44 | .sign = 0, | |
45 | .significand = VFP_DOUBLE_SIGNIFICAND_QNAN, | |
46 | }; | |
47 | ||
48 | static void vfp_double_dump(const char *str, struct vfp_double *d) | |
49 | { | |
50 | pr_debug("VFP: %s: sign=%d exponent=%d significand=%016llx\n", | |
51 | str, d->sign != 0, d->exponent, d->significand); | |
52 | } | |
53 | ||
54 | static void vfp_double_normalise_denormal(struct vfp_double *vd) | |
55 | { | |
56 | int bits = 31 - fls(vd->significand >> 32); | |
57 | if (bits == 31) | |
e816d71a | 58 | bits = 63 - fls(vd->significand); |
1da177e4 LT |
59 | |
60 | vfp_double_dump("normalise_denormal: in", vd); | |
61 | ||
62 | if (bits) { | |
63 | vd->exponent -= bits - 1; | |
64 | vd->significand <<= bits; | |
65 | } | |
66 | ||
67 | vfp_double_dump("normalise_denormal: out", vd); | |
68 | } | |
69 | ||
70 | u32 vfp_double_normaliseround(int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func) | |
71 | { | |
72 | u64 significand, incr; | |
73 | int exponent, shift, underflow; | |
74 | u32 rmode; | |
75 | ||
76 | vfp_double_dump("pack: in", vd); | |
77 | ||
78 | /* | |
79 | * Infinities and NaNs are a special case. | |
80 | */ | |
81 | if (vd->exponent == 2047 && (vd->significand == 0 || exceptions)) | |
82 | goto pack; | |
83 | ||
84 | /* | |
85 | * Special-case zero. | |
86 | */ | |
87 | if (vd->significand == 0) { | |
88 | vd->exponent = 0; | |
89 | goto pack; | |
90 | } | |
91 | ||
92 | exponent = vd->exponent; | |
93 | significand = vd->significand; | |
94 | ||
95 | shift = 32 - fls(significand >> 32); | |
96 | if (shift == 32) | |
97 | shift = 64 - fls(significand); | |
98 | if (shift) { | |
99 | exponent -= shift; | |
100 | significand <<= shift; | |
101 | } | |
102 | ||
103 | #ifdef DEBUG | |
104 | vd->exponent = exponent; | |
105 | vd->significand = significand; | |
106 | vfp_double_dump("pack: normalised", vd); | |
107 | #endif | |
108 | ||
109 | /* | |
110 | * Tiny number? | |
111 | */ | |
112 | underflow = exponent < 0; | |
113 | if (underflow) { | |
114 | significand = vfp_shiftright64jamming(significand, -exponent); | |
115 | exponent = 0; | |
116 | #ifdef DEBUG | |
117 | vd->exponent = exponent; | |
118 | vd->significand = significand; | |
119 | vfp_double_dump("pack: tiny number", vd); | |
120 | #endif | |
121 | if (!(significand & ((1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1))) | |
122 | underflow = 0; | |
123 | } | |
124 | ||
125 | /* | |
126 | * Select rounding increment. | |
127 | */ | |
128 | incr = 0; | |
129 | rmode = fpscr & FPSCR_RMODE_MASK; | |
130 | ||
131 | if (rmode == FPSCR_ROUND_NEAREST) { | |
132 | incr = 1ULL << VFP_DOUBLE_LOW_BITS; | |
133 | if ((significand & (1ULL << (VFP_DOUBLE_LOW_BITS + 1))) == 0) | |
134 | incr -= 1; | |
135 | } else if (rmode == FPSCR_ROUND_TOZERO) { | |
136 | incr = 0; | |
137 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vd->sign != 0)) | |
138 | incr = (1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1; | |
139 | ||
140 | pr_debug("VFP: rounding increment = 0x%08llx\n", incr); | |
141 | ||
142 | /* | |
143 | * Is our rounding going to overflow? | |
144 | */ | |
145 | if ((significand + incr) < significand) { | |
146 | exponent += 1; | |
147 | significand = (significand >> 1) | (significand & 1); | |
148 | incr >>= 1; | |
149 | #ifdef DEBUG | |
150 | vd->exponent = exponent; | |
151 | vd->significand = significand; | |
152 | vfp_double_dump("pack: overflow", vd); | |
153 | #endif | |
154 | } | |
155 | ||
156 | /* | |
157 | * If any of the low bits (which will be shifted out of the | |
158 | * number) are non-zero, the result is inexact. | |
159 | */ | |
160 | if (significand & ((1 << (VFP_DOUBLE_LOW_BITS + 1)) - 1)) | |
161 | exceptions |= FPSCR_IXC; | |
162 | ||
163 | /* | |
164 | * Do our rounding. | |
165 | */ | |
166 | significand += incr; | |
167 | ||
168 | /* | |
169 | * Infinity? | |
170 | */ | |
171 | if (exponent >= 2046) { | |
172 | exceptions |= FPSCR_OFC | FPSCR_IXC; | |
173 | if (incr == 0) { | |
174 | vd->exponent = 2045; | |
175 | vd->significand = 0x7fffffffffffffffULL; | |
176 | } else { | |
177 | vd->exponent = 2047; /* infinity */ | |
178 | vd->significand = 0; | |
179 | } | |
180 | } else { | |
181 | if (significand >> (VFP_DOUBLE_LOW_BITS + 1) == 0) | |
182 | exponent = 0; | |
183 | if (exponent || significand > 0x8000000000000000ULL) | |
184 | underflow = 0; | |
185 | if (underflow) | |
186 | exceptions |= FPSCR_UFC; | |
187 | vd->exponent = exponent; | |
188 | vd->significand = significand >> 1; | |
189 | } | |
190 | ||
191 | pack: | |
192 | vfp_double_dump("pack: final", vd); | |
193 | { | |
194 | s64 d = vfp_double_pack(vd); | |
195 | pr_debug("VFP: %s: d(d%d)=%016llx exceptions=%08x\n", func, | |
196 | dd, d, exceptions); | |
0355b3e0 | 197 | vfp_put_double(d, dd); |
1da177e4 | 198 | } |
928bd1b4 | 199 | return exceptions; |
1da177e4 LT |
200 | } |
201 | ||
202 | /* | |
203 | * Propagate the NaN, setting exceptions if it is signalling. | |
204 | * 'n' is always a NaN. 'm' may be a number, NaN or infinity. | |
205 | */ | |
206 | static u32 | |
207 | vfp_propagate_nan(struct vfp_double *vdd, struct vfp_double *vdn, | |
208 | struct vfp_double *vdm, u32 fpscr) | |
209 | { | |
210 | struct vfp_double *nan; | |
211 | int tn, tm = 0; | |
212 | ||
213 | tn = vfp_double_type(vdn); | |
214 | ||
215 | if (vdm) | |
216 | tm = vfp_double_type(vdm); | |
217 | ||
218 | if (fpscr & FPSCR_DEFAULT_NAN) | |
219 | /* | |
220 | * Default NaN mode - always returns a quiet NaN | |
221 | */ | |
222 | nan = &vfp_double_default_qnan; | |
223 | else { | |
224 | /* | |
225 | * Contemporary mode - select the first signalling | |
226 | * NAN, or if neither are signalling, the first | |
227 | * quiet NAN. | |
228 | */ | |
229 | if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN)) | |
230 | nan = vdn; | |
231 | else | |
232 | nan = vdm; | |
233 | /* | |
234 | * Make the NaN quiet. | |
235 | */ | |
236 | nan->significand |= VFP_DOUBLE_SIGNIFICAND_QNAN; | |
237 | } | |
238 | ||
239 | *vdd = *nan; | |
240 | ||
241 | /* | |
242 | * If one was a signalling NAN, raise invalid operation. | |
243 | */ | |
244 | return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG; | |
245 | } | |
246 | ||
247 | /* | |
248 | * Extended operations | |
249 | */ | |
250 | static u32 vfp_double_fabs(int dd, int unused, int dm, u32 fpscr) | |
251 | { | |
0355b3e0 | 252 | vfp_put_double(vfp_double_packed_abs(vfp_get_double(dm)), dd); |
1da177e4 LT |
253 | return 0; |
254 | } | |
255 | ||
256 | static u32 vfp_double_fcpy(int dd, int unused, int dm, u32 fpscr) | |
257 | { | |
0355b3e0 | 258 | vfp_put_double(vfp_get_double(dm), dd); |
1da177e4 LT |
259 | return 0; |
260 | } | |
261 | ||
262 | static u32 vfp_double_fneg(int dd, int unused, int dm, u32 fpscr) | |
263 | { | |
0355b3e0 | 264 | vfp_put_double(vfp_double_packed_negate(vfp_get_double(dm)), dd); |
1da177e4 LT |
265 | return 0; |
266 | } | |
267 | ||
268 | static u32 vfp_double_fsqrt(int dd, int unused, int dm, u32 fpscr) | |
269 | { | |
270 | struct vfp_double vdm, vdd; | |
271 | int ret, tm; | |
272 | ||
273 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | |
274 | tm = vfp_double_type(&vdm); | |
275 | if (tm & (VFP_NAN|VFP_INFINITY)) { | |
276 | struct vfp_double *vdp = &vdd; | |
277 | ||
278 | if (tm & VFP_NAN) | |
279 | ret = vfp_propagate_nan(vdp, &vdm, NULL, fpscr); | |
280 | else if (vdm.sign == 0) { | |
281 | sqrt_copy: | |
282 | vdp = &vdm; | |
283 | ret = 0; | |
284 | } else { | |
285 | sqrt_invalid: | |
286 | vdp = &vfp_double_default_qnan; | |
287 | ret = FPSCR_IOC; | |
288 | } | |
0355b3e0 | 289 | vfp_put_double(vfp_double_pack(vdp), dd); |
1da177e4 LT |
290 | return ret; |
291 | } | |
292 | ||
293 | /* | |
294 | * sqrt(+/- 0) == +/- 0 | |
295 | */ | |
296 | if (tm & VFP_ZERO) | |
297 | goto sqrt_copy; | |
298 | ||
299 | /* | |
300 | * Normalise a denormalised number | |
301 | */ | |
302 | if (tm & VFP_DENORMAL) | |
303 | vfp_double_normalise_denormal(&vdm); | |
304 | ||
305 | /* | |
306 | * sqrt(<0) = invalid | |
307 | */ | |
308 | if (vdm.sign) | |
309 | goto sqrt_invalid; | |
310 | ||
311 | vfp_double_dump("sqrt", &vdm); | |
312 | ||
313 | /* | |
314 | * Estimate the square root. | |
315 | */ | |
316 | vdd.sign = 0; | |
317 | vdd.exponent = ((vdm.exponent - 1023) >> 1) + 1023; | |
318 | vdd.significand = (u64)vfp_estimate_sqrt_significand(vdm.exponent, vdm.significand >> 32) << 31; | |
319 | ||
320 | vfp_double_dump("sqrt estimate1", &vdd); | |
321 | ||
322 | vdm.significand >>= 1 + (vdm.exponent & 1); | |
323 | vdd.significand += 2 + vfp_estimate_div128to64(vdm.significand, 0, vdd.significand); | |
324 | ||
325 | vfp_double_dump("sqrt estimate2", &vdd); | |
326 | ||
327 | /* | |
328 | * And now adjust. | |
329 | */ | |
330 | if ((vdd.significand & VFP_DOUBLE_LOW_BITS_MASK) <= 5) { | |
331 | if (vdd.significand < 2) { | |
332 | vdd.significand = ~0ULL; | |
333 | } else { | |
334 | u64 termh, terml, remh, reml; | |
335 | vdm.significand <<= 2; | |
336 | mul64to128(&termh, &terml, vdd.significand, vdd.significand); | |
337 | sub128(&remh, &reml, vdm.significand, 0, termh, terml); | |
338 | while ((s64)remh < 0) { | |
339 | vdd.significand -= 1; | |
340 | shift64left(&termh, &terml, vdd.significand); | |
341 | terml |= 1; | |
342 | add128(&remh, &reml, remh, reml, termh, terml); | |
343 | } | |
344 | vdd.significand |= (remh | reml) != 0; | |
345 | } | |
346 | } | |
347 | vdd.significand = vfp_shiftright64jamming(vdd.significand, 1); | |
348 | ||
349 | return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fsqrt"); | |
350 | } | |
351 | ||
352 | /* | |
353 | * Equal := ZC | |
354 | * Less than := N | |
355 | * Greater than := C | |
356 | * Unordered := CV | |
357 | */ | |
358 | static u32 vfp_compare(int dd, int signal_on_qnan, int dm, u32 fpscr) | |
359 | { | |
360 | s64 d, m; | |
361 | u32 ret = 0; | |
362 | ||
363 | m = vfp_get_double(dm); | |
364 | if (vfp_double_packed_exponent(m) == 2047 && vfp_double_packed_mantissa(m)) { | |
365 | ret |= FPSCR_C | FPSCR_V; | |
366 | if (signal_on_qnan || !(vfp_double_packed_mantissa(m) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) | |
367 | /* | |
368 | * Signalling NaN, or signalling on quiet NaN | |
369 | */ | |
370 | ret |= FPSCR_IOC; | |
371 | } | |
372 | ||
373 | d = vfp_get_double(dd); | |
374 | if (vfp_double_packed_exponent(d) == 2047 && vfp_double_packed_mantissa(d)) { | |
375 | ret |= FPSCR_C | FPSCR_V; | |
376 | if (signal_on_qnan || !(vfp_double_packed_mantissa(d) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) | |
377 | /* | |
378 | * Signalling NaN, or signalling on quiet NaN | |
379 | */ | |
380 | ret |= FPSCR_IOC; | |
381 | } | |
382 | ||
383 | if (ret == 0) { | |
384 | if (d == m || vfp_double_packed_abs(d | m) == 0) { | |
385 | /* | |
386 | * equal | |
387 | */ | |
388 | ret |= FPSCR_Z | FPSCR_C; | |
389 | } else if (vfp_double_packed_sign(d ^ m)) { | |
390 | /* | |
391 | * different signs | |
392 | */ | |
393 | if (vfp_double_packed_sign(d)) | |
394 | /* | |
395 | * d is negative, so d < m | |
396 | */ | |
397 | ret |= FPSCR_N; | |
398 | else | |
399 | /* | |
400 | * d is positive, so d > m | |
401 | */ | |
402 | ret |= FPSCR_C; | |
403 | } else if ((vfp_double_packed_sign(d) != 0) ^ (d < m)) { | |
404 | /* | |
405 | * d < m | |
406 | */ | |
407 | ret |= FPSCR_N; | |
408 | } else if ((vfp_double_packed_sign(d) != 0) ^ (d > m)) { | |
409 | /* | |
410 | * d > m | |
411 | */ | |
412 | ret |= FPSCR_C; | |
413 | } | |
414 | } | |
415 | ||
416 | return ret; | |
417 | } | |
418 | ||
419 | static u32 vfp_double_fcmp(int dd, int unused, int dm, u32 fpscr) | |
420 | { | |
421 | return vfp_compare(dd, 0, dm, fpscr); | |
422 | } | |
423 | ||
424 | static u32 vfp_double_fcmpe(int dd, int unused, int dm, u32 fpscr) | |
425 | { | |
426 | return vfp_compare(dd, 1, dm, fpscr); | |
427 | } | |
428 | ||
429 | static u32 vfp_double_fcmpz(int dd, int unused, int dm, u32 fpscr) | |
430 | { | |
431 | return vfp_compare(dd, 0, VFP_REG_ZERO, fpscr); | |
432 | } | |
433 | ||
434 | static u32 vfp_double_fcmpez(int dd, int unused, int dm, u32 fpscr) | |
435 | { | |
436 | return vfp_compare(dd, 1, VFP_REG_ZERO, fpscr); | |
437 | } | |
438 | ||
439 | static u32 vfp_double_fcvts(int sd, int unused, int dm, u32 fpscr) | |
440 | { | |
441 | struct vfp_double vdm; | |
442 | struct vfp_single vsd; | |
443 | int tm; | |
444 | u32 exceptions = 0; | |
445 | ||
446 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | |
447 | ||
448 | tm = vfp_double_type(&vdm); | |
449 | ||
450 | /* | |
451 | * If we have a signalling NaN, signal invalid operation. | |
452 | */ | |
453 | if (tm == VFP_SNAN) | |
454 | exceptions = FPSCR_IOC; | |
455 | ||
456 | if (tm & VFP_DENORMAL) | |
457 | vfp_double_normalise_denormal(&vdm); | |
458 | ||
459 | vsd.sign = vdm.sign; | |
460 | vsd.significand = vfp_hi64to32jamming(vdm.significand); | |
461 | ||
462 | /* | |
463 | * If we have an infinity or a NaN, the exponent must be 255 | |
464 | */ | |
465 | if (tm & (VFP_INFINITY|VFP_NAN)) { | |
466 | vsd.exponent = 255; | |
b53a2b41 | 467 | if (tm == VFP_QNAN) |
1da177e4 LT |
468 | vsd.significand |= VFP_SINGLE_SIGNIFICAND_QNAN; |
469 | goto pack_nan; | |
470 | } else if (tm & VFP_ZERO) | |
471 | vsd.exponent = 0; | |
472 | else | |
473 | vsd.exponent = vdm.exponent - (1023 - 127); | |
474 | ||
475 | return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fcvts"); | |
476 | ||
477 | pack_nan: | |
0355b3e0 | 478 | vfp_put_float(vfp_single_pack(&vsd), sd); |
1da177e4 LT |
479 | return exceptions; |
480 | } | |
481 | ||
482 | static u32 vfp_double_fuito(int dd, int unused, int dm, u32 fpscr) | |
483 | { | |
484 | struct vfp_double vdm; | |
485 | u32 m = vfp_get_float(dm); | |
486 | ||
487 | vdm.sign = 0; | |
488 | vdm.exponent = 1023 + 63 - 1; | |
489 | vdm.significand = (u64)m; | |
490 | ||
491 | return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fuito"); | |
492 | } | |
493 | ||
494 | static u32 vfp_double_fsito(int dd, int unused, int dm, u32 fpscr) | |
495 | { | |
496 | struct vfp_double vdm; | |
497 | u32 m = vfp_get_float(dm); | |
498 | ||
499 | vdm.sign = (m & 0x80000000) >> 16; | |
500 | vdm.exponent = 1023 + 63 - 1; | |
501 | vdm.significand = vdm.sign ? -m : m; | |
502 | ||
503 | return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fsito"); | |
504 | } | |
505 | ||
506 | static u32 vfp_double_ftoui(int sd, int unused, int dm, u32 fpscr) | |
507 | { | |
508 | struct vfp_double vdm; | |
509 | u32 d, exceptions = 0; | |
510 | int rmode = fpscr & FPSCR_RMODE_MASK; | |
511 | int tm; | |
512 | ||
513 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | |
514 | ||
515 | /* | |
516 | * Do we have a denormalised number? | |
517 | */ | |
518 | tm = vfp_double_type(&vdm); | |
519 | if (tm & VFP_DENORMAL) | |
520 | exceptions |= FPSCR_IDC; | |
521 | ||
522 | if (tm & VFP_NAN) | |
523 | vdm.sign = 0; | |
524 | ||
525 | if (vdm.exponent >= 1023 + 32) { | |
526 | d = vdm.sign ? 0 : 0xffffffff; | |
527 | exceptions = FPSCR_IOC; | |
528 | } else if (vdm.exponent >= 1023 - 1) { | |
529 | int shift = 1023 + 63 - vdm.exponent; | |
530 | u64 rem, incr = 0; | |
531 | ||
532 | /* | |
533 | * 2^0 <= m < 2^32-2^8 | |
534 | */ | |
535 | d = (vdm.significand << 1) >> shift; | |
536 | rem = vdm.significand << (65 - shift); | |
537 | ||
538 | if (rmode == FPSCR_ROUND_NEAREST) { | |
539 | incr = 0x8000000000000000ULL; | |
540 | if ((d & 1) == 0) | |
541 | incr -= 1; | |
542 | } else if (rmode == FPSCR_ROUND_TOZERO) { | |
543 | incr = 0; | |
544 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) { | |
545 | incr = ~0ULL; | |
546 | } | |
547 | ||
548 | if ((rem + incr) < rem) { | |
549 | if (d < 0xffffffff) | |
550 | d += 1; | |
551 | else | |
552 | exceptions |= FPSCR_IOC; | |
553 | } | |
554 | ||
555 | if (d && vdm.sign) { | |
556 | d = 0; | |
557 | exceptions |= FPSCR_IOC; | |
558 | } else if (rem) | |
559 | exceptions |= FPSCR_IXC; | |
560 | } else { | |
561 | d = 0; | |
562 | if (vdm.exponent | vdm.significand) { | |
563 | exceptions |= FPSCR_IXC; | |
564 | if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0) | |
565 | d = 1; | |
566 | else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) { | |
567 | d = 0; | |
568 | exceptions |= FPSCR_IOC; | |
569 | } | |
570 | } | |
571 | } | |
572 | ||
573 | pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); | |
574 | ||
0355b3e0 | 575 | vfp_put_float(d, sd); |
1da177e4 LT |
576 | |
577 | return exceptions; | |
578 | } | |
579 | ||
580 | static u32 vfp_double_ftouiz(int sd, int unused, int dm, u32 fpscr) | |
581 | { | |
582 | return vfp_double_ftoui(sd, unused, dm, FPSCR_ROUND_TOZERO); | |
583 | } | |
584 | ||
585 | static u32 vfp_double_ftosi(int sd, int unused, int dm, u32 fpscr) | |
586 | { | |
587 | struct vfp_double vdm; | |
588 | u32 d, exceptions = 0; | |
589 | int rmode = fpscr & FPSCR_RMODE_MASK; | |
1320a80d | 590 | int tm; |
1da177e4 LT |
591 | |
592 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | |
593 | vfp_double_dump("VDM", &vdm); | |
594 | ||
595 | /* | |
596 | * Do we have denormalised number? | |
597 | */ | |
1320a80d CM |
598 | tm = vfp_double_type(&vdm); |
599 | if (tm & VFP_DENORMAL) | |
1da177e4 LT |
600 | exceptions |= FPSCR_IDC; |
601 | ||
1320a80d CM |
602 | if (tm & VFP_NAN) { |
603 | d = 0; | |
604 | exceptions |= FPSCR_IOC; | |
605 | } else if (vdm.exponent >= 1023 + 32) { | |
1da177e4 LT |
606 | d = 0x7fffffff; |
607 | if (vdm.sign) | |
608 | d = ~d; | |
609 | exceptions |= FPSCR_IOC; | |
610 | } else if (vdm.exponent >= 1023 - 1) { | |
611 | int shift = 1023 + 63 - vdm.exponent; /* 58 */ | |
612 | u64 rem, incr = 0; | |
613 | ||
614 | d = (vdm.significand << 1) >> shift; | |
615 | rem = vdm.significand << (65 - shift); | |
616 | ||
617 | if (rmode == FPSCR_ROUND_NEAREST) { | |
618 | incr = 0x8000000000000000ULL; | |
619 | if ((d & 1) == 0) | |
620 | incr -= 1; | |
621 | } else if (rmode == FPSCR_ROUND_TOZERO) { | |
622 | incr = 0; | |
623 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) { | |
624 | incr = ~0ULL; | |
625 | } | |
626 | ||
627 | if ((rem + incr) < rem && d < 0xffffffff) | |
628 | d += 1; | |
629 | if (d > 0x7fffffff + (vdm.sign != 0)) { | |
630 | d = 0x7fffffff + (vdm.sign != 0); | |
631 | exceptions |= FPSCR_IOC; | |
632 | } else if (rem) | |
633 | exceptions |= FPSCR_IXC; | |
634 | ||
635 | if (vdm.sign) | |
636 | d = -d; | |
637 | } else { | |
638 | d = 0; | |
639 | if (vdm.exponent | vdm.significand) { | |
640 | exceptions |= FPSCR_IXC; | |
641 | if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0) | |
642 | d = 1; | |
643 | else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) | |
644 | d = -1; | |
645 | } | |
646 | } | |
647 | ||
648 | pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); | |
649 | ||
0355b3e0 | 650 | vfp_put_float((s32)d, sd); |
1da177e4 LT |
651 | |
652 | return exceptions; | |
653 | } | |
654 | ||
655 | static u32 vfp_double_ftosiz(int dd, int unused, int dm, u32 fpscr) | |
656 | { | |
657 | return vfp_double_ftosi(dd, unused, dm, FPSCR_ROUND_TOZERO); | |
658 | } | |
659 | ||
660 | ||
4cc9bd2e | 661 | static struct op fops_ext[32] = { |
baf97ce6 RK |
662 | [FEXT_TO_IDX(FEXT_FCPY)] = { vfp_double_fcpy, 0 }, |
663 | [FEXT_TO_IDX(FEXT_FABS)] = { vfp_double_fabs, 0 }, | |
664 | [FEXT_TO_IDX(FEXT_FNEG)] = { vfp_double_fneg, 0 }, | |
665 | [FEXT_TO_IDX(FEXT_FSQRT)] = { vfp_double_fsqrt, 0 }, | |
666 | [FEXT_TO_IDX(FEXT_FCMP)] = { vfp_double_fcmp, OP_SCALAR }, | |
667 | [FEXT_TO_IDX(FEXT_FCMPE)] = { vfp_double_fcmpe, OP_SCALAR }, | |
668 | [FEXT_TO_IDX(FEXT_FCMPZ)] = { vfp_double_fcmpz, OP_SCALAR }, | |
669 | [FEXT_TO_IDX(FEXT_FCMPEZ)] = { vfp_double_fcmpez, OP_SCALAR }, | |
670 | [FEXT_TO_IDX(FEXT_FCVT)] = { vfp_double_fcvts, OP_SCALAR|OP_SD }, | |
90e6b048 RK |
671 | [FEXT_TO_IDX(FEXT_FUITO)] = { vfp_double_fuito, OP_SCALAR|OP_SM }, |
672 | [FEXT_TO_IDX(FEXT_FSITO)] = { vfp_double_fsito, OP_SCALAR|OP_SM }, | |
baf97ce6 RK |
673 | [FEXT_TO_IDX(FEXT_FTOUI)] = { vfp_double_ftoui, OP_SCALAR|OP_SD }, |
674 | [FEXT_TO_IDX(FEXT_FTOUIZ)] = { vfp_double_ftouiz, OP_SCALAR|OP_SD }, | |
675 | [FEXT_TO_IDX(FEXT_FTOSI)] = { vfp_double_ftosi, OP_SCALAR|OP_SD }, | |
676 | [FEXT_TO_IDX(FEXT_FTOSIZ)] = { vfp_double_ftosiz, OP_SCALAR|OP_SD }, | |
1da177e4 LT |
677 | }; |
678 | ||
679 | ||
680 | ||
681 | ||
682 | static u32 | |
683 | vfp_double_fadd_nonnumber(struct vfp_double *vdd, struct vfp_double *vdn, | |
684 | struct vfp_double *vdm, u32 fpscr) | |
685 | { | |
686 | struct vfp_double *vdp; | |
687 | u32 exceptions = 0; | |
688 | int tn, tm; | |
689 | ||
690 | tn = vfp_double_type(vdn); | |
691 | tm = vfp_double_type(vdm); | |
692 | ||
693 | if (tn & tm & VFP_INFINITY) { | |
694 | /* | |
695 | * Two infinities. Are they different signs? | |
696 | */ | |
697 | if (vdn->sign ^ vdm->sign) { | |
698 | /* | |
699 | * different signs -> invalid | |
700 | */ | |
701 | exceptions = FPSCR_IOC; | |
702 | vdp = &vfp_double_default_qnan; | |
703 | } else { | |
704 | /* | |
705 | * same signs -> valid | |
706 | */ | |
707 | vdp = vdn; | |
708 | } | |
709 | } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) { | |
710 | /* | |
711 | * One infinity and one number -> infinity | |
712 | */ | |
713 | vdp = vdn; | |
714 | } else { | |
715 | /* | |
716 | * 'n' is a NaN of some type | |
717 | */ | |
718 | return vfp_propagate_nan(vdd, vdn, vdm, fpscr); | |
719 | } | |
720 | *vdd = *vdp; | |
721 | return exceptions; | |
722 | } | |
723 | ||
724 | static u32 | |
725 | vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn, | |
726 | struct vfp_double *vdm, u32 fpscr) | |
727 | { | |
728 | u32 exp_diff; | |
729 | u64 m_sig; | |
730 | ||
731 | if (vdn->significand & (1ULL << 63) || | |
732 | vdm->significand & (1ULL << 63)) { | |
733 | pr_info("VFP: bad FP values in %s\n", __func__); | |
734 | vfp_double_dump("VDN", vdn); | |
735 | vfp_double_dump("VDM", vdm); | |
736 | } | |
737 | ||
738 | /* | |
739 | * Ensure that 'n' is the largest magnitude number. Note that | |
740 | * if 'n' and 'm' have equal exponents, we do not swap them. | |
741 | * This ensures that NaN propagation works correctly. | |
742 | */ | |
743 | if (vdn->exponent < vdm->exponent) { | |
744 | struct vfp_double *t = vdn; | |
745 | vdn = vdm; | |
746 | vdm = t; | |
747 | } | |
748 | ||
749 | /* | |
750 | * Is 'n' an infinity or a NaN? Note that 'm' may be a number, | |
751 | * infinity or a NaN here. | |
752 | */ | |
753 | if (vdn->exponent == 2047) | |
754 | return vfp_double_fadd_nonnumber(vdd, vdn, vdm, fpscr); | |
755 | ||
756 | /* | |
757 | * We have two proper numbers, where 'vdn' is the larger magnitude. | |
758 | * | |
759 | * Copy 'n' to 'd' before doing the arithmetic. | |
760 | */ | |
761 | *vdd = *vdn; | |
762 | ||
763 | /* | |
764 | * Align 'm' with the result. | |
765 | */ | |
766 | exp_diff = vdn->exponent - vdm->exponent; | |
767 | m_sig = vfp_shiftright64jamming(vdm->significand, exp_diff); | |
768 | ||
769 | /* | |
770 | * If the signs are different, we are really subtracting. | |
771 | */ | |
772 | if (vdn->sign ^ vdm->sign) { | |
773 | m_sig = vdn->significand - m_sig; | |
774 | if ((s64)m_sig < 0) { | |
775 | vdd->sign = vfp_sign_negate(vdd->sign); | |
776 | m_sig = -m_sig; | |
7b1fbf29 CM |
777 | } else if (m_sig == 0) { |
778 | vdd->sign = (fpscr & FPSCR_RMODE_MASK) == | |
779 | FPSCR_ROUND_MINUSINF ? 0x8000 : 0; | |
1da177e4 LT |
780 | } |
781 | } else { | |
782 | m_sig += vdn->significand; | |
783 | } | |
784 | vdd->significand = m_sig; | |
785 | ||
786 | return 0; | |
787 | } | |
788 | ||
789 | static u32 | |
790 | vfp_double_multiply(struct vfp_double *vdd, struct vfp_double *vdn, | |
791 | struct vfp_double *vdm, u32 fpscr) | |
792 | { | |
793 | vfp_double_dump("VDN", vdn); | |
794 | vfp_double_dump("VDM", vdm); | |
795 | ||
796 | /* | |
797 | * Ensure that 'n' is the largest magnitude number. Note that | |
798 | * if 'n' and 'm' have equal exponents, we do not swap them. | |
799 | * This ensures that NaN propagation works correctly. | |
800 | */ | |
801 | if (vdn->exponent < vdm->exponent) { | |
802 | struct vfp_double *t = vdn; | |
803 | vdn = vdm; | |
804 | vdm = t; | |
805 | pr_debug("VFP: swapping M <-> N\n"); | |
806 | } | |
807 | ||
808 | vdd->sign = vdn->sign ^ vdm->sign; | |
809 | ||
810 | /* | |
811 | * If 'n' is an infinity or NaN, handle it. 'm' may be anything. | |
812 | */ | |
813 | if (vdn->exponent == 2047) { | |
814 | if (vdn->significand || (vdm->exponent == 2047 && vdm->significand)) | |
815 | return vfp_propagate_nan(vdd, vdn, vdm, fpscr); | |
816 | if ((vdm->exponent | vdm->significand) == 0) { | |
817 | *vdd = vfp_double_default_qnan; | |
818 | return FPSCR_IOC; | |
819 | } | |
820 | vdd->exponent = vdn->exponent; | |
821 | vdd->significand = 0; | |
822 | return 0; | |
823 | } | |
824 | ||
825 | /* | |
826 | * If 'm' is zero, the result is always zero. In this case, | |
827 | * 'n' may be zero or a number, but it doesn't matter which. | |
828 | */ | |
829 | if ((vdm->exponent | vdm->significand) == 0) { | |
830 | vdd->exponent = 0; | |
831 | vdd->significand = 0; | |
832 | return 0; | |
833 | } | |
834 | ||
835 | /* | |
836 | * We add 2 to the destination exponent for the same reason | |
837 | * as the addition case - though this time we have +1 from | |
838 | * each input operand. | |
839 | */ | |
840 | vdd->exponent = vdn->exponent + vdm->exponent - 1023 + 2; | |
841 | vdd->significand = vfp_hi64multiply64(vdn->significand, vdm->significand); | |
842 | ||
843 | vfp_double_dump("VDD", vdd); | |
844 | return 0; | |
845 | } | |
846 | ||
847 | #define NEG_MULTIPLY (1 << 0) | |
848 | #define NEG_SUBTRACT (1 << 1) | |
849 | ||
850 | static u32 | |
851 | vfp_double_multiply_accumulate(int dd, int dn, int dm, u32 fpscr, u32 negate, char *func) | |
852 | { | |
853 | struct vfp_double vdd, vdp, vdn, vdm; | |
854 | u32 exceptions; | |
855 | ||
856 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | |
857 | if (vdn.exponent == 0 && vdn.significand) | |
858 | vfp_double_normalise_denormal(&vdn); | |
859 | ||
860 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | |
861 | if (vdm.exponent == 0 && vdm.significand) | |
862 | vfp_double_normalise_denormal(&vdm); | |
863 | ||
864 | exceptions = vfp_double_multiply(&vdp, &vdn, &vdm, fpscr); | |
865 | if (negate & NEG_MULTIPLY) | |
866 | vdp.sign = vfp_sign_negate(vdp.sign); | |
867 | ||
868 | vfp_double_unpack(&vdn, vfp_get_double(dd)); | |
244b4783 JF |
869 | if (vdn.exponent == 0 && vdn.significand) |
870 | vfp_double_normalise_denormal(&vdn); | |
1da177e4 LT |
871 | if (negate & NEG_SUBTRACT) |
872 | vdn.sign = vfp_sign_negate(vdn.sign); | |
873 | ||
874 | exceptions |= vfp_double_add(&vdd, &vdn, &vdp, fpscr); | |
875 | ||
876 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, func); | |
877 | } | |
878 | ||
879 | /* | |
880 | * Standard operations | |
881 | */ | |
882 | ||
883 | /* | |
884 | * sd = sd + (sn * sm) | |
885 | */ | |
886 | static u32 vfp_double_fmac(int dd, int dn, int dm, u32 fpscr) | |
887 | { | |
888 | return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, 0, "fmac"); | |
889 | } | |
890 | ||
891 | /* | |
892 | * sd = sd - (sn * sm) | |
893 | */ | |
894 | static u32 vfp_double_fnmac(int dd, int dn, int dm, u32 fpscr) | |
895 | { | |
896 | return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_MULTIPLY, "fnmac"); | |
897 | } | |
898 | ||
899 | /* | |
900 | * sd = -sd + (sn * sm) | |
901 | */ | |
902 | static u32 vfp_double_fmsc(int dd, int dn, int dm, u32 fpscr) | |
903 | { | |
904 | return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT, "fmsc"); | |
905 | } | |
906 | ||
907 | /* | |
908 | * sd = -sd - (sn * sm) | |
909 | */ | |
910 | static u32 vfp_double_fnmsc(int dd, int dn, int dm, u32 fpscr) | |
911 | { | |
912 | return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc"); | |
913 | } | |
914 | ||
915 | /* | |
916 | * sd = sn * sm | |
917 | */ | |
918 | static u32 vfp_double_fmul(int dd, int dn, int dm, u32 fpscr) | |
919 | { | |
920 | struct vfp_double vdd, vdn, vdm; | |
921 | u32 exceptions; | |
922 | ||
923 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | |
924 | if (vdn.exponent == 0 && vdn.significand) | |
925 | vfp_double_normalise_denormal(&vdn); | |
926 | ||
927 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | |
928 | if (vdm.exponent == 0 && vdm.significand) | |
929 | vfp_double_normalise_denormal(&vdm); | |
930 | ||
931 | exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr); | |
932 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fmul"); | |
933 | } | |
934 | ||
935 | /* | |
936 | * sd = -(sn * sm) | |
937 | */ | |
938 | static u32 vfp_double_fnmul(int dd, int dn, int dm, u32 fpscr) | |
939 | { | |
940 | struct vfp_double vdd, vdn, vdm; | |
941 | u32 exceptions; | |
942 | ||
943 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | |
944 | if (vdn.exponent == 0 && vdn.significand) | |
945 | vfp_double_normalise_denormal(&vdn); | |
946 | ||
947 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | |
948 | if (vdm.exponent == 0 && vdm.significand) | |
949 | vfp_double_normalise_denormal(&vdm); | |
950 | ||
951 | exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr); | |
952 | vdd.sign = vfp_sign_negate(vdd.sign); | |
953 | ||
954 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fnmul"); | |
955 | } | |
956 | ||
957 | /* | |
958 | * sd = sn + sm | |
959 | */ | |
960 | static u32 vfp_double_fadd(int dd, int dn, int dm, u32 fpscr) | |
961 | { | |
962 | struct vfp_double vdd, vdn, vdm; | |
963 | u32 exceptions; | |
964 | ||
965 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | |
966 | if (vdn.exponent == 0 && vdn.significand) | |
967 | vfp_double_normalise_denormal(&vdn); | |
968 | ||
969 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | |
970 | if (vdm.exponent == 0 && vdm.significand) | |
971 | vfp_double_normalise_denormal(&vdm); | |
972 | ||
973 | exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr); | |
974 | ||
975 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fadd"); | |
976 | } | |
977 | ||
978 | /* | |
979 | * sd = sn - sm | |
980 | */ | |
981 | static u32 vfp_double_fsub(int dd, int dn, int dm, u32 fpscr) | |
982 | { | |
983 | struct vfp_double vdd, vdn, vdm; | |
984 | u32 exceptions; | |
985 | ||
986 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | |
987 | if (vdn.exponent == 0 && vdn.significand) | |
988 | vfp_double_normalise_denormal(&vdn); | |
989 | ||
990 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | |
991 | if (vdm.exponent == 0 && vdm.significand) | |
992 | vfp_double_normalise_denormal(&vdm); | |
993 | ||
994 | /* | |
995 | * Subtraction is like addition, but with a negated operand. | |
996 | */ | |
997 | vdm.sign = vfp_sign_negate(vdm.sign); | |
998 | ||
999 | exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr); | |
1000 | ||
1001 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fsub"); | |
1002 | } | |
1003 | ||
1004 | /* | |
1005 | * sd = sn / sm | |
1006 | */ | |
1007 | static u32 vfp_double_fdiv(int dd, int dn, int dm, u32 fpscr) | |
1008 | { | |
1009 | struct vfp_double vdd, vdn, vdm; | |
1010 | u32 exceptions = 0; | |
1011 | int tm, tn; | |
1012 | ||
1013 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | |
1014 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | |
1015 | ||
1016 | vdd.sign = vdn.sign ^ vdm.sign; | |
1017 | ||
1018 | tn = vfp_double_type(&vdn); | |
1019 | tm = vfp_double_type(&vdm); | |
1020 | ||
1021 | /* | |
1022 | * Is n a NAN? | |
1023 | */ | |
1024 | if (tn & VFP_NAN) | |
1025 | goto vdn_nan; | |
1026 | ||
1027 | /* | |
1028 | * Is m a NAN? | |
1029 | */ | |
1030 | if (tm & VFP_NAN) | |
1031 | goto vdm_nan; | |
1032 | ||
1033 | /* | |
1034 | * If n and m are infinity, the result is invalid | |
1035 | * If n and m are zero, the result is invalid | |
1036 | */ | |
1037 | if (tm & tn & (VFP_INFINITY|VFP_ZERO)) | |
1038 | goto invalid; | |
1039 | ||
1040 | /* | |
1041 | * If n is infinity, the result is infinity | |
1042 | */ | |
1043 | if (tn & VFP_INFINITY) | |
1044 | goto infinity; | |
1045 | ||
1046 | /* | |
1047 | * If m is zero, raise div0 exceptions | |
1048 | */ | |
1049 | if (tm & VFP_ZERO) | |
1050 | goto divzero; | |
1051 | ||
1052 | /* | |
1053 | * If m is infinity, or n is zero, the result is zero | |
1054 | */ | |
1055 | if (tm & VFP_INFINITY || tn & VFP_ZERO) | |
1056 | goto zero; | |
1057 | ||
1058 | if (tn & VFP_DENORMAL) | |
1059 | vfp_double_normalise_denormal(&vdn); | |
1060 | if (tm & VFP_DENORMAL) | |
1061 | vfp_double_normalise_denormal(&vdm); | |
1062 | ||
1063 | /* | |
1064 | * Ok, we have two numbers, we can perform division. | |
1065 | */ | |
1066 | vdd.exponent = vdn.exponent - vdm.exponent + 1023 - 1; | |
1067 | vdm.significand <<= 1; | |
1068 | if (vdm.significand <= (2 * vdn.significand)) { | |
1069 | vdn.significand >>= 1; | |
1070 | vdd.exponent++; | |
1071 | } | |
1072 | vdd.significand = vfp_estimate_div128to64(vdn.significand, 0, vdm.significand); | |
1073 | if ((vdd.significand & 0x1ff) <= 2) { | |
1074 | u64 termh, terml, remh, reml; | |
1075 | mul64to128(&termh, &terml, vdm.significand, vdd.significand); | |
1076 | sub128(&remh, &reml, vdn.significand, 0, termh, terml); | |
1077 | while ((s64)remh < 0) { | |
1078 | vdd.significand -= 1; | |
1079 | add128(&remh, &reml, remh, reml, 0, vdm.significand); | |
1080 | } | |
1081 | vdd.significand |= (reml != 0); | |
1082 | } | |
1083 | return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fdiv"); | |
1084 | ||
1085 | vdn_nan: | |
1086 | exceptions = vfp_propagate_nan(&vdd, &vdn, &vdm, fpscr); | |
1087 | pack: | |
0355b3e0 | 1088 | vfp_put_double(vfp_double_pack(&vdd), dd); |
1da177e4 LT |
1089 | return exceptions; |
1090 | ||
1091 | vdm_nan: | |
1092 | exceptions = vfp_propagate_nan(&vdd, &vdm, &vdn, fpscr); | |
1093 | goto pack; | |
1094 | ||
1095 | zero: | |
1096 | vdd.exponent = 0; | |
1097 | vdd.significand = 0; | |
1098 | goto pack; | |
1099 | ||
1100 | divzero: | |
1101 | exceptions = FPSCR_DZC; | |
1102 | infinity: | |
1103 | vdd.exponent = 2047; | |
1104 | vdd.significand = 0; | |
1105 | goto pack; | |
1106 | ||
1107 | invalid: | |
0355b3e0 | 1108 | vfp_put_double(vfp_double_pack(&vfp_double_default_qnan), dd); |
1da177e4 LT |
1109 | return FPSCR_IOC; |
1110 | } | |
1111 | ||
4cc9bd2e | 1112 | static struct op fops[16] = { |
baf97ce6 RK |
1113 | [FOP_TO_IDX(FOP_FMAC)] = { vfp_double_fmac, 0 }, |
1114 | [FOP_TO_IDX(FOP_FNMAC)] = { vfp_double_fnmac, 0 }, | |
1115 | [FOP_TO_IDX(FOP_FMSC)] = { vfp_double_fmsc, 0 }, | |
1116 | [FOP_TO_IDX(FOP_FNMSC)] = { vfp_double_fnmsc, 0 }, | |
1117 | [FOP_TO_IDX(FOP_FMUL)] = { vfp_double_fmul, 0 }, | |
1118 | [FOP_TO_IDX(FOP_FNMUL)] = { vfp_double_fnmul, 0 }, | |
1119 | [FOP_TO_IDX(FOP_FADD)] = { vfp_double_fadd, 0 }, | |
1120 | [FOP_TO_IDX(FOP_FSUB)] = { vfp_double_fsub, 0 }, | |
1121 | [FOP_TO_IDX(FOP_FDIV)] = { vfp_double_fdiv, 0 }, | |
1da177e4 LT |
1122 | }; |
1123 | ||
1124 | #define FREG_BANK(x) ((x) & 0x0c) | |
1125 | #define FREG_IDX(x) ((x) & 3) | |
1126 | ||
1127 | u32 vfp_double_cpdo(u32 inst, u32 fpscr) | |
1128 | { | |
1129 | u32 op = inst & FOP_MASK; | |
1130 | u32 exceptions = 0; | |
c29ecac1 | 1131 | unsigned int dest; |
1356c194 | 1132 | unsigned int dn = vfp_get_dn(inst); |
90e6b048 | 1133 | unsigned int dm; |
1da177e4 | 1134 | unsigned int vecitr, veclen, vecstride; |
4cc9bd2e | 1135 | struct op *fop; |
1da177e4 | 1136 | |
b9a5ce3c | 1137 | vecstride = (1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK)); |
1da177e4 | 1138 | |
4cc9bd2e | 1139 | fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)]; |
baf97ce6 | 1140 | |
c29ecac1 DJ |
1141 | /* |
1142 | * fcvtds takes an sN register number as destination, not dN. | |
1143 | * It also always operates on scalars. | |
1144 | */ | |
4cc9bd2e | 1145 | if (fop->flags & OP_SD) |
c29ecac1 | 1146 | dest = vfp_get_sd(inst); |
4cc9bd2e | 1147 | else |
c29ecac1 DJ |
1148 | dest = vfp_get_dd(inst); |
1149 | ||
90e6b048 RK |
1150 | /* |
1151 | * f[us]ito takes a sN operand, not a dN operand. | |
1152 | */ | |
1153 | if (fop->flags & OP_SM) | |
1154 | dm = vfp_get_sm(inst); | |
1155 | else | |
1156 | dm = vfp_get_dm(inst); | |
1157 | ||
1da177e4 LT |
1158 | /* |
1159 | * If destination bank is zero, vector length is always '1'. | |
1160 | * ARM DDI0100F C5.1.3, C5.3.2. | |
1161 | */ | |
4cc9bd2e | 1162 | if ((fop->flags & OP_SCALAR) || (FREG_BANK(dest) == 0)) |
1da177e4 | 1163 | veclen = 0; |
4cc9bd2e GF |
1164 | else |
1165 | veclen = fpscr & FPSCR_LENGTH_MASK; | |
1da177e4 LT |
1166 | |
1167 | pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride, | |
1168 | (veclen >> FPSCR_LENGTH_BIT) + 1); | |
1169 | ||
4cc9bd2e | 1170 | if (!fop->fn) |
1da177e4 LT |
1171 | goto invalid; |
1172 | ||
1173 | for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) { | |
1174 | u32 except; | |
baf97ce6 | 1175 | char type; |
1da177e4 | 1176 | |
baf97ce6 RK |
1177 | type = fop->flags & OP_SD ? 's' : 'd'; |
1178 | if (op == FOP_EXT) | |
1179 | pr_debug("VFP: itr%d (%c%u) = op[%u] (d%u)\n", | |
1da177e4 | 1180 | vecitr >> FPSCR_LENGTH_BIT, |
baf97ce6 | 1181 | type, dest, dn, dm); |
1da177e4 | 1182 | else |
baf97ce6 | 1183 | pr_debug("VFP: itr%d (%c%u) = (d%u) op[%u] (d%u)\n", |
1da177e4 | 1184 | vecitr >> FPSCR_LENGTH_BIT, |
baf97ce6 | 1185 | type, dest, dn, FOP_TO_IDX(op), dm); |
1da177e4 | 1186 | |
4cc9bd2e | 1187 | except = fop->fn(dest, dn, dm, fpscr); |
1da177e4 LT |
1188 | pr_debug("VFP: itr%d: exceptions=%08x\n", |
1189 | vecitr >> FPSCR_LENGTH_BIT, except); | |
1190 | ||
1191 | exceptions |= except; | |
1192 | ||
1da177e4 LT |
1193 | /* |
1194 | * CHECK: It appears to be undefined whether we stop when | |
1195 | * we encounter an exception. We continue. | |
1196 | */ | |
b9a5ce3c TO |
1197 | dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 3); |
1198 | dn = FREG_BANK(dn) + ((FREG_IDX(dn) + vecstride) & 3); | |
1da177e4 | 1199 | if (FREG_BANK(dm) != 0) |
b9a5ce3c | 1200 | dm = FREG_BANK(dm) + ((FREG_IDX(dm) + vecstride) & 3); |
1da177e4 LT |
1201 | } |
1202 | return exceptions; | |
1203 | ||
1204 | invalid: | |
1205 | return ~0; | |
1206 | } |